Fix scripts requiring rendering pages with javascript (#310)

Replace request_html by playwright, as request_html, as it is [not maintained anymore](https://pypi.org/project/requests-html/) and scripts using it, such as artifactory.py, started to fail.
This commit is contained in:
Marc Wrobel
2024-02-16 22:51:21 +01:00
parent 1175756d11
commit 9cf243a10e
9 changed files with 69 additions and 48 deletions

View File

@@ -1,20 +1,20 @@
from common import dates, releasedata
from requests_html import HTMLSession
from bs4 import BeautifulSoup
from common import dates, http, releasedata
"""Fetch Java versions from https://www.java.com/releases/.
This script is using requests-html because the page needs JavaScript to render correctly."""
with releasedata.ProductData("oracle-jdk") as product_data:
r = HTMLSession().get('https://www.java.com/releases/')
r.html.render(sleep=1, scrolldown=3)
content = http.fetch_javascript_url('https://www.java.com/releases/')
soup = BeautifulSoup(content, 'html.parser')
previous_date = None
for row in r.html.find('#released tr'):
version_cell = row.find('td.anchor', first=True)
for row in soup.select('#released tr'):
version_cell = row.select_one('td.anchor')
if version_cell:
version = version_cell.attrs['id']
date_str = row.find('td')[1].text
date_str = row.select('td')[1].text
date = dates.parse_date(date_str) if date_str else previous_date
product_data.declare_version(version, date)
previous_date = date