` After reading all the articles I decided to have everything in classes so I can reuse it here is starting code that takes care of all the Point A to F
Sample Code :
__author__ = "Soumil Shah"
__email__ = "shahsoumil519@gmail.com"
try:
import sys
import os
from fp.fp import FreeProxy
from fake_useragent import UserAgent
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver import Chrome
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import time
print('all module are loaded ')
except Exception as e:
print("Error ->>>: {} ".format(e))
class Spoofer(object):
def __init__(self, country_id=['US'], rand=True, anonym=True):
self.country_id = country_id
self.rand = rand
self.anonym = anonym
self.userAgent, self.ip = self.get()
def get(self):
ua = UserAgent()
proxy = FreeProxy(country_id=self.country_id, rand=self.rand, anonym=self.anonym).get()
ip = proxy.split("://")[1]
return ua.random, ip
class DriverOptions(object):
def __init__(self):
self.options = Options()
self.options.add_argument('--no-sandbox')
self.options.add_argument('--start-maximized')
self.options.add_argument('--start-fullscreen')
self.options.add_argument('--single-process')
self.options.add_argument('--disable-dev-shm-usage')
self.options.add_argument("--incognito")
self.options.add_argument('--disable-blink-features=AutomationControlled')
self.options.add_argument('--disable-blink-features=AutomationControlled')
self.options.add_experimental_option('useAutomationExtension', False)
self.options.add_experimental_option("excludeSwitches", ["enable-automation"])
self.options.add_argument("disable-infobars")
self.helperSpoofer = Spoofer()
self.options.add_argument('user-agent={}'.format(self.helperSpoofer.userAgent))
self.options.add_argument('--proxy-server=%s' % self.helperSpoofer.ip)
class WebDriver(DriverOptions):
def __init__(self, path=''):
DriverOptions.__init__(self)
self.driver_instance = self.get_driver()
def get_driver(self):
print("""
IP:{}
UserAgent: {}
""".format(self.helperSpoofer.ip, self.helperSpoofer.userAgent))
PROXY = self.helperSpoofer.ip
webdriver.DesiredCapabilities.CHROME['proxy'] = {
"httpProxy":PROXY,
"ftpProxy":PROXY,
"sslProxy":PROXY,
"noProxy":None,
"proxyType":"MANUAL",
"autodetect":False
}
webdriver.DesiredCapabilities.CHROME['acceptSslCerts'] = True
path = os.path.join(os.getcwd(), '../windowsDriver/chromedriver.exe')
driver = webdriver.Chrome(executable_path=path, options=self.options)
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source":
"const newProto = navigator.__proto__;"
"delete newProto.webdriver;"
"navigator.__proto__ = newProto;"
})
return driver
def main():
driver= WebDriver()
driverinstance = driver.driver_instance
driverinstance.get("https://www.expressvpn.com/what-is-my-ip")
time.sleep(5)
print("done")
if __name__ == "__main__":
main()
Additional Note:
According to a Stackoverflow post, the selenium detection works by testing for specified JavaScript variables that emerge while executing Selenium. Bot detection programs often check for the words "selenium" or "web river" in any of the variables (on the window object), as well as document variables named $cdc_ and $wdc_. Of course, all of this is dependent on the browser you are using. Different browsers show different things.
Conclusion :
Using Proxies and Spoofing UseAgents, you will be able to utilize Selenium on the vast majority of websites without being banned.
Referneces
“Can a Website Detect When You Are Using Selenium With Chromedriver?.” Stackover flow. Accessed September 11, 2021. https://stackoverflow.com/questions/33225947/can-a-website-detect-when-you-are-using-selenium-with-chromedriver.
“Selenium Webdriver: Modifying Navigator.webdriver Flag To Prevent Selenium Detection.” newbedev. Accessed September 11, 2021. https://newbedev.com/selenium-webdriver-modifying-navigator-webdriver-flag-to-prevent-selenium-detection.
“Can a Website Detect When You Are Using Selenium With Chromedriver?.” Stackover flow. Accessed September 11, 2021. https://stackoverflow.com/questions/33225947/can-a-website-detect-when-you-are-using-selenium-with-chromedriver.
No comments:
Post a Comment