[firefox] Fix script (#284)
- batch HTTP URL fetching by 20 to reduce the number of unnecessary retries, - search for c-release-date directly.
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import re
|
||||
import urllib.parse
|
||||
from itertools import islice
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from common import dates, http, releasedata
|
||||
@@ -8,24 +9,39 @@ from common import dates, http, releasedata
|
||||
|
||||
Versions lower than 10.0 are ignored because too difficult to parse."""
|
||||
|
||||
|
||||
# Will be replaced by itertools.batched in Python 3.12+.
|
||||
# See https://docs.python.org/3/library/itertools.html#itertools.batched.
|
||||
def batched(iterable: iter, n: int) -> iter:
|
||||
if n < 1:
|
||||
msg = 'n must be at least one'
|
||||
raise ValueError(msg)
|
||||
it = iter(iterable)
|
||||
while batch := tuple(islice(it, n)):
|
||||
yield batch
|
||||
|
||||
|
||||
product = releasedata.Product("firefox")
|
||||
releases_page = http.fetch_url("https://www.mozilla.org/en-US/firefox/releases/")
|
||||
releases_soup = BeautifulSoup(releases_page.text, features="html5lib")
|
||||
releases_list = releases_soup.find_all("ol", class_="c-release-list")
|
||||
release_notes_urls = [urllib.parse.urljoin(releases_page.url, p.get("href")) for p in releases_list[0].find_all("a")]
|
||||
|
||||
for release_notes in http.fetch_urls(release_notes_urls):
|
||||
version = release_notes.url.split("/")[-3]
|
||||
for batch_release_notes_urls in batched(release_notes_urls, 20):
|
||||
for release_notes in http.fetch_urls(batch_release_notes_urls):
|
||||
version = release_notes.url.split("/")[-3]
|
||||
|
||||
release_notes_soup = BeautifulSoup(release_notes.text, features="html5lib")
|
||||
if release_notes_soup.find("div", class_="c-release-version"):
|
||||
date = dates.parse_date(release_notes_soup.find("p", class_="c-release-date").get_text())
|
||||
product.declare_version(version, date)
|
||||
release_notes_soup = BeautifulSoup(release_notes.text, features="html5lib")
|
||||
date_elt = release_notes_soup.find(class_="c-release-date")
|
||||
if date_elt:
|
||||
date = dates.parse_date(date_elt.get_text())
|
||||
product.declare_version(version, date)
|
||||
continue
|
||||
|
||||
elif release_notes_soup.find("small", string=re.compile("^.?First offered")):
|
||||
element = release_notes_soup.find("small", string=re.compile("^.?First offered"))
|
||||
date = dates.parse_date(' '.join(element.get_text().split(" ")[-3:])) # get last 3 words
|
||||
product.declare_version(version, date)
|
||||
# versions < 10.0 are ignored
|
||||
date_elt = release_notes_soup.find("small", string=re.compile("^.?First offered"))
|
||||
if date_elt:
|
||||
date = dates.parse_date(' '.join(date_elt.get_text().split(" ")[-3:])) # get last 3 words
|
||||
product.declare_version(version, date)
|
||||
# versions < 10.0 are ignored
|
||||
|
||||
product.write()
|
||||
|
||||
Reference in New Issue
Block a user