[release-table] Implement render_js_click_selector parameter (#485)
This parameter allows clicking on an element of the page after it was loaded when using `render_javascript`.
This commit is contained in:
@@ -54,6 +54,11 @@
|
||||
"name": "2.0",
|
||||
"releaseDate": "2012-04-27",
|
||||
"eol": "2014-12-31"
|
||||
},
|
||||
"10.0": {
|
||||
"name": "10.0",
|
||||
"releaseDate": "2024-07-19",
|
||||
"eol": "2028-07-31"
|
||||
}
|
||||
},
|
||||
"versions": {}
|
||||
|
||||
@@ -82,8 +82,9 @@ def fetch_markdown(url: str, data: any = None, user_agent: str = ENDOFLIFE_BOT_U
|
||||
return mwparserfromhell.parse(response.text)
|
||||
|
||||
# This requires some setup, see https://playwright.dev/python/docs/intro#installing-playwright.
|
||||
def fetch_javascript_url(url: str, user_agent: str = ENDOFLIFE_BOT_USER_AGENT, wait_until: str = None, wait_for: str = None, select_wait_for: bool = False) -> str:
|
||||
logging.info(f"Fetching {url} with JavaScript (wait_until = {wait_until}, wait_for = {wait_for}, select_wait_for = {select_wait_for})")
|
||||
def fetch_javascript_url(url: str, user_agent: str = ENDOFLIFE_BOT_USER_AGENT, wait_until: str = None, wait_for: str = None,
|
||||
select_wait_for: bool = False, click_selector: str = None) -> str:
|
||||
logging.info(f"Fetching {url} with JavaScript (wait_until = {wait_until}, wait_for = {wait_for}, select_wait_for = {select_wait_for}, click_selector = {click_selector})")
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch()
|
||||
context = browser.new_context()
|
||||
@@ -94,22 +95,16 @@ def fetch_javascript_url(url: str, user_agent: str = ENDOFLIFE_BOT_USER_AGENT, w
|
||||
page.goto(url, wait_until=wait_until)
|
||||
logging.info(f"Fetched {url}")
|
||||
|
||||
element_to_wait_for = None
|
||||
if wait_for:
|
||||
try:
|
||||
logging.info(f"Waiting for element with selector {wait_for}")
|
||||
element = page.wait_for_selector(selector=wait_for)
|
||||
logging.info(f"Waiting for element with selector {wait_for}")
|
||||
element_to_wait_for = page.wait_for_selector(selector=wait_for)
|
||||
|
||||
if element:
|
||||
logging.debug(f"Found element with selector {wait_for} on {url}")
|
||||
return element.inner_html() if select_wait_for else page.content()
|
||||
if click_selector:
|
||||
logging.info(f"Clicking on element with selector {click_selector}")
|
||||
page.click(selector=click_selector)
|
||||
page.wait_for_timeout(1000) # Wait for 1 second to allow the page to update after the click.
|
||||
|
||||
logging.error(f"No element found with selector {wait_for} on {url}, will return full page content")
|
||||
logging.debug(f"Full page content: {page.content()}")
|
||||
|
||||
except Exception as e: # noqa: BLE001
|
||||
logging.error(f"Error while waiting for element with selector {wait_for} on {url}: {e}")
|
||||
logging.debug(f"Full page content: {page.content()}")
|
||||
|
||||
return page.content()
|
||||
return element_to_wait_for.inner_html() if select_wait_for else page.content()
|
||||
finally:
|
||||
browser.close()
|
||||
|
||||
@@ -156,6 +156,7 @@ with ProductData(config.product) as product_data:
|
||||
render_js = config.data.get("render_javascript", False)
|
||||
render_js_wait_until = config.data.get("render_javascript_wait_until", None)
|
||||
render_js_wait_for = config.data.get("render_javascript_wait_for", None)
|
||||
render_js_click_selector = config.data.get("render_javascript_click_selector", None)
|
||||
header_row_selector = config.data.get("header_selector", "thead tr")
|
||||
rows_selector = config.data.get("rows_selector", "tbody tr")
|
||||
cells_selector = "td, th"
|
||||
@@ -164,7 +165,7 @@ with ProductData(config.product) as product_data:
|
||||
|
||||
if render_js:
|
||||
response_text = http.fetch_javascript_url(config.url, user_agent=user_agent, wait_until=render_js_wait_until,
|
||||
wait_for=render_js_wait_for)
|
||||
wait_for=render_js_wait_for, click_selector=render_js_click_selector)
|
||||
else:
|
||||
response_text = http.fetch_url(config.url, user_agent=user_agent).text
|
||||
soup = BeautifulSoup(response_text, features="html5lib")
|
||||
|
||||
Reference in New Issue
Block a user