Improve HTTP handling in scripts (#449)

Provide some helper methods to hide the complexity of parsing HTML, JSON, YAML, XML or Markdown.
2025-06-28 11:46:04 +02:00
parent fda4967c38
commit 312ce078bb
43 changed files with 103 additions and 137 deletions
--- a/src/google-kubernetes-engine.py
+++ b/src/google-kubernetes-engine.py
@@ -1,6 +1,5 @@
 import re

-from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata

 # https://regex101.com/r/zPxBqT/1
@@ -15,10 +14,9 @@ URL_BY_PRODUCT = {
 for config in endoflife.list_configs_from_argv(): # noqa: B007 multiple JSON produced for historical reasons
    for product_name, url in URL_BY_PRODUCT.items():
        with releasedata.ProductData(product_name) as product_data:
-            relnotes = http.fetch_url(url)
-            relnotes_soup = BeautifulSoup(relnotes.text, features="html5lib")
+            html = http.fetch_html(url)

-            for section in relnotes_soup.find_all('section', class_='releases'):
+            for section in html.find_all('section', class_='releases'):
                for h2 in section.find_all('h2'):  # h2 contains the date
                    date = dates.parse_date(h2.get('data-text'))