[release-table] Implement render_js_click_selector parameter (#485)

This parameter allows clicking on an element of the page after it was loaded when using `render_javascript`.
This commit is contained in:
Marc Wrobel
2025-08-03 21:43:16 +02:00
committed by GitHub
parent 6ab864d474
commit 1735954764
3 changed files with 18 additions and 17 deletions

View File

@@ -54,6 +54,11 @@
"name": "2.0", "name": "2.0",
"releaseDate": "2012-04-27", "releaseDate": "2012-04-27",
"eol": "2014-12-31" "eol": "2014-12-31"
},
"10.0": {
"name": "10.0",
"releaseDate": "2024-07-19",
"eol": "2028-07-31"
} }
}, },
"versions": {} "versions": {}

View File

@@ -82,8 +82,9 @@ def fetch_markdown(url: str, data: any = None, user_agent: str = ENDOFLIFE_BOT_U
return mwparserfromhell.parse(response.text) return mwparserfromhell.parse(response.text)
# This requires some setup, see https://playwright.dev/python/docs/intro#installing-playwright. # This requires some setup, see https://playwright.dev/python/docs/intro#installing-playwright.
def fetch_javascript_url(url: str, user_agent: str = ENDOFLIFE_BOT_USER_AGENT, wait_until: str = None, wait_for: str = None, select_wait_for: bool = False) -> str: def fetch_javascript_url(url: str, user_agent: str = ENDOFLIFE_BOT_USER_AGENT, wait_until: str = None, wait_for: str = None,
logging.info(f"Fetching {url} with JavaScript (wait_until = {wait_until}, wait_for = {wait_for}, select_wait_for = {select_wait_for})") select_wait_for: bool = False, click_selector: str = None) -> str:
logging.info(f"Fetching {url} with JavaScript (wait_until = {wait_until}, wait_for = {wait_for}, select_wait_for = {select_wait_for}, click_selector = {click_selector})")
with sync_playwright() as p: with sync_playwright() as p:
browser = p.chromium.launch() browser = p.chromium.launch()
context = browser.new_context() context = browser.new_context()
@@ -94,22 +95,16 @@ def fetch_javascript_url(url: str, user_agent: str = ENDOFLIFE_BOT_USER_AGENT, w
page.goto(url, wait_until=wait_until) page.goto(url, wait_until=wait_until)
logging.info(f"Fetched {url}") logging.info(f"Fetched {url}")
element_to_wait_for = None
if wait_for: if wait_for:
try: logging.info(f"Waiting for element with selector {wait_for}")
logging.info(f"Waiting for element with selector {wait_for}") element_to_wait_for = page.wait_for_selector(selector=wait_for)
element = page.wait_for_selector(selector=wait_for)
if element: if click_selector:
logging.debug(f"Found element with selector {wait_for} on {url}") logging.info(f"Clicking on element with selector {click_selector}")
return element.inner_html() if select_wait_for else page.content() page.click(selector=click_selector)
page.wait_for_timeout(1000) # Wait for 1 second to allow the page to update after the click.
logging.error(f"No element found with selector {wait_for} on {url}, will return full page content") return element_to_wait_for.inner_html() if select_wait_for else page.content()
logging.debug(f"Full page content: {page.content()}")
except Exception as e: # noqa: BLE001
logging.error(f"Error while waiting for element with selector {wait_for} on {url}: {e}")
logging.debug(f"Full page content: {page.content()}")
return page.content()
finally: finally:
browser.close() browser.close()

View File

@@ -156,6 +156,7 @@ with ProductData(config.product) as product_data:
render_js = config.data.get("render_javascript", False) render_js = config.data.get("render_javascript", False)
render_js_wait_until = config.data.get("render_javascript_wait_until", None) render_js_wait_until = config.data.get("render_javascript_wait_until", None)
render_js_wait_for = config.data.get("render_javascript_wait_for", None) render_js_wait_for = config.data.get("render_javascript_wait_for", None)
render_js_click_selector = config.data.get("render_javascript_click_selector", None)
header_row_selector = config.data.get("header_selector", "thead tr") header_row_selector = config.data.get("header_selector", "thead tr")
rows_selector = config.data.get("rows_selector", "tbody tr") rows_selector = config.data.get("rows_selector", "tbody tr")
cells_selector = "td, th" cells_selector = "td, th"
@@ -164,7 +165,7 @@ with ProductData(config.product) as product_data:
if render_js: if render_js:
response_text = http.fetch_javascript_url(config.url, user_agent=user_agent, wait_until=render_js_wait_until, response_text = http.fetch_javascript_url(config.url, user_agent=user_agent, wait_until=render_js_wait_until,
wait_for=render_js_wait_for) wait_for=render_js_wait_for, click_selector=render_js_click_selector)
else: else:
response_text = http.fetch_url(config.url, user_agent=user_agent).text response_text = http.fetch_url(config.url, user_agent=user_agent).text
soup = BeautifulSoup(response_text, features="html5lib") soup = BeautifulSoup(response_text, features="html5lib")