-
Notifications
You must be signed in to change notification settings - Fork 0
/
selenium_scraper.py
44 lines (39 loc) · 1.68 KB
/
selenium_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.chrome.webdriver import WebDriver
from typing import List, Optional
class SeleniumScraper:
def __init__(self, user_agent: Optional[None]):
self.user_agent = user_agent
self.driver = self.driver_setup()
def driver_setup(self) -> WebDriver:
"""Method for setting up the Chrome WebDriver"""
options = Options()
if self.user_agent:
options.add_argument(f'user-agent: {self.user_agent}')
else:
options.add_argument('user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36')
driver = webdriver.Chrome(options=options)
return driver
def get_element(self, base: WebElement, xpath: str) -> Optional[WebElement]:
"""Extracts a single element from a WebElement using XPath"""
try:
return base.find_element(By.XPATH, xpath)
except Exception as e:
print(f'Error extracting element: {e}')
return None
def get_elements(self, xpath: str, timeout: int = 10) -> List[WebElement]:
"""Extracts multiple elements using XPath with optional waiting"""
wait = WebDriverWait(self.driver, timeout)
try:
return wait.until(EC.presence_of_all_elements_located((By.XPATH, xpath)))
except Exception as e:
print (f'Error extracting elements: {e}')
return []
def teardown(self):
"""Quits the WebDriver"""
self.driver.quit()