Migrate Unity scraper to use Editor Release API (#558)
Switch from HTML scraping to the official Unity Editor Release API. - Fetch versions from paginated API endpoint - Filter out ALPHA and BETA stream releases - Process all available versions (not just first page) Closes https://github.com/endoflife-date/endoflife.date/issues/9418.
This commit is contained in:
48
src/unity.py
48
src/unity.py
@@ -1,27 +1,37 @@
|
|||||||
from common import dates, http
|
from common import dates, http
|
||||||
from common.releasedata import ProductData, config_from_argv
|
from common.releasedata import ProductData, config_from_argv
|
||||||
|
|
||||||
"""Fetches the Unity LTS releases from the Unity website. Non-LTS releases are not listed there, so this automation
|
"""Fetches Unity versions from the Unity Editor Release API.
|
||||||
is only partial.
|
|
||||||
|
|
||||||
This script is cumulative, only the first page is fetched (e.g. the first ten versions). This is because:
|
This script fetches stable releases from the Unity API, filtering out alpha, beta, and other pre-release versions.
|
||||||
- it is too long to fetch all (at least 30s, usually more than a minute),
|
The API provides paginated results with all Unity versions across different streams (TECH, LTS, BETA, ALPHA).
|
||||||
- this generates too many requests to the unity.com servers,
|
"""
|
||||||
- fetching multiple pages in parallel is raising a lot of errors and makes the overall process slower (this was tested
|
|
||||||
during https://github.com/endoflife-date/release-data/pull/194),
|
|
||||||
- and anyway oldest versions are never updated.
|
|
||||||
|
|
||||||
Note that it was assumed that:
|
|
||||||
- the script is ran regularly enough to keep the versions up to date (once a day or week looks enough),
|
|
||||||
- there is never more than 10 new LTS versions at a time.
|
|
||||||
|
|
||||||
The script will need to be updated if someday those conditions are not met."""
|
|
||||||
|
|
||||||
config = config_from_argv()
|
config = config_from_argv()
|
||||||
with ProductData(config.product) as product_data:
|
with ProductData(config.product) as product_data:
|
||||||
html = http.fetch_html(config.url)
|
offset = 0
|
||||||
|
limit = 25
|
||||||
|
|
||||||
for release in html.find_all('div', class_='component-releases-item__show__inner-header'):
|
while True:
|
||||||
version = release.find('h4').find('span').text
|
url = f"{config.url}?limit={limit}&offset={offset}"
|
||||||
date = dates.parse_datetime(release.find('time').attrs['datetime'])
|
data = http.fetch_json(url)
|
||||||
product_data.declare_version(version, date)
|
|
||||||
|
if 'results' not in data:
|
||||||
|
break
|
||||||
|
|
||||||
|
for release in data['results']:
|
||||||
|
version = release['version']
|
||||||
|
|
||||||
|
# Skip pre-release versions (ALPHA, BETA, etc.)
|
||||||
|
stream = release.get('stream', '')
|
||||||
|
if stream in ('ALPHA', 'BETA'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
date = dates.parse_datetime(release['releaseDate'])
|
||||||
|
product_data.declare_version(version, date)
|
||||||
|
|
||||||
|
# Check if we've reached the end
|
||||||
|
total = data.get('total', 0)
|
||||||
|
offset += limit
|
||||||
|
if offset >= total:
|
||||||
|
break
|
||||||
|
|||||||
Reference in New Issue
Block a user