[release-table] Implement render_js_click_selector parameter (#485)
This parameter allows clicking on an element of the page after it was loaded when using `render_javascript`.
This commit is contained in:
@@ -54,6 +54,11 @@
|
|||||||
"name": "2.0",
|
"name": "2.0",
|
||||||
"releaseDate": "2012-04-27",
|
"releaseDate": "2012-04-27",
|
||||||
"eol": "2014-12-31"
|
"eol": "2014-12-31"
|
||||||
|
},
|
||||||
|
"10.0": {
|
||||||
|
"name": "10.0",
|
||||||
|
"releaseDate": "2024-07-19",
|
||||||
|
"eol": "2028-07-31"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"versions": {}
|
"versions": {}
|
||||||
|
|||||||
@@ -82,8 +82,9 @@ def fetch_markdown(url: str, data: any = None, user_agent: str = ENDOFLIFE_BOT_U
|
|||||||
return mwparserfromhell.parse(response.text)
|
return mwparserfromhell.parse(response.text)
|
||||||
|
|
||||||
# This requires some setup, see https://playwright.dev/python/docs/intro#installing-playwright.
|
# This requires some setup, see https://playwright.dev/python/docs/intro#installing-playwright.
|
||||||
def fetch_javascript_url(url: str, user_agent: str = ENDOFLIFE_BOT_USER_AGENT, wait_until: str = None, wait_for: str = None, select_wait_for: bool = False) -> str:
|
def fetch_javascript_url(url: str, user_agent: str = ENDOFLIFE_BOT_USER_AGENT, wait_until: str = None, wait_for: str = None,
|
||||||
logging.info(f"Fetching {url} with JavaScript (wait_until = {wait_until}, wait_for = {wait_for}, select_wait_for = {select_wait_for})")
|
select_wait_for: bool = False, click_selector: str = None) -> str:
|
||||||
|
logging.info(f"Fetching {url} with JavaScript (wait_until = {wait_until}, wait_for = {wait_for}, select_wait_for = {select_wait_for}, click_selector = {click_selector})")
|
||||||
with sync_playwright() as p:
|
with sync_playwright() as p:
|
||||||
browser = p.chromium.launch()
|
browser = p.chromium.launch()
|
||||||
context = browser.new_context()
|
context = browser.new_context()
|
||||||
@@ -94,22 +95,16 @@ def fetch_javascript_url(url: str, user_agent: str = ENDOFLIFE_BOT_USER_AGENT, w
|
|||||||
page.goto(url, wait_until=wait_until)
|
page.goto(url, wait_until=wait_until)
|
||||||
logging.info(f"Fetched {url}")
|
logging.info(f"Fetched {url}")
|
||||||
|
|
||||||
|
element_to_wait_for = None
|
||||||
if wait_for:
|
if wait_for:
|
||||||
try:
|
logging.info(f"Waiting for element with selector {wait_for}")
|
||||||
logging.info(f"Waiting for element with selector {wait_for}")
|
element_to_wait_for = page.wait_for_selector(selector=wait_for)
|
||||||
element = page.wait_for_selector(selector=wait_for)
|
|
||||||
|
|
||||||
if element:
|
if click_selector:
|
||||||
logging.debug(f"Found element with selector {wait_for} on {url}")
|
logging.info(f"Clicking on element with selector {click_selector}")
|
||||||
return element.inner_html() if select_wait_for else page.content()
|
page.click(selector=click_selector)
|
||||||
|
page.wait_for_timeout(1000) # Wait for 1 second to allow the page to update after the click.
|
||||||
|
|
||||||
logging.error(f"No element found with selector {wait_for} on {url}, will return full page content")
|
return element_to_wait_for.inner_html() if select_wait_for else page.content()
|
||||||
logging.debug(f"Full page content: {page.content()}")
|
|
||||||
|
|
||||||
except Exception as e: # noqa: BLE001
|
|
||||||
logging.error(f"Error while waiting for element with selector {wait_for} on {url}: {e}")
|
|
||||||
logging.debug(f"Full page content: {page.content()}")
|
|
||||||
|
|
||||||
return page.content()
|
|
||||||
finally:
|
finally:
|
||||||
browser.close()
|
browser.close()
|
||||||
|
|||||||
@@ -156,6 +156,7 @@ with ProductData(config.product) as product_data:
|
|||||||
render_js = config.data.get("render_javascript", False)
|
render_js = config.data.get("render_javascript", False)
|
||||||
render_js_wait_until = config.data.get("render_javascript_wait_until", None)
|
render_js_wait_until = config.data.get("render_javascript_wait_until", None)
|
||||||
render_js_wait_for = config.data.get("render_javascript_wait_for", None)
|
render_js_wait_for = config.data.get("render_javascript_wait_for", None)
|
||||||
|
render_js_click_selector = config.data.get("render_javascript_click_selector", None)
|
||||||
header_row_selector = config.data.get("header_selector", "thead tr")
|
header_row_selector = config.data.get("header_selector", "thead tr")
|
||||||
rows_selector = config.data.get("rows_selector", "tbody tr")
|
rows_selector = config.data.get("rows_selector", "tbody tr")
|
||||||
cells_selector = "td, th"
|
cells_selector = "td, th"
|
||||||
@@ -164,7 +165,7 @@ with ProductData(config.product) as product_data:
|
|||||||
|
|
||||||
if render_js:
|
if render_js:
|
||||||
response_text = http.fetch_javascript_url(config.url, user_agent=user_agent, wait_until=render_js_wait_until,
|
response_text = http.fetch_javascript_url(config.url, user_agent=user_agent, wait_until=render_js_wait_until,
|
||||||
wait_for=render_js_wait_for)
|
wait_for=render_js_wait_for, click_selector=render_js_click_selector)
|
||||||
else:
|
else:
|
||||||
response_text = http.fetch_url(config.url, user_agent=user_agent).text
|
response_text = http.fetch_url(config.url, user_agent=user_agent).text
|
||||||
soup = BeautifulSoup(response_text, features="html5lib")
|
soup = BeautifulSoup(response_text, features="html5lib")
|
||||||
|
|||||||
Reference in New Issue
Block a user