Add Java automation

Fetch Java versions with their dates from https://www.java.com/releases/. This script is using a new dependency: requests-html (https://requests-html.kennethreitz.org/). This is because https://www.java.com/releases/ needs JavaScript to render correctly. And for this to work requests-html automatically downloads Chromium because of its dependency to pyppeteer. Chromium is downloaded into PYPPETEER_HOME, so the workflow has been modified to set this directory as a subdirectory of ~/.cache. Note that declaring the same environment variable from the python script does not work.
2023-01-02 21:34:37 +01:00
parent a061e41789
commit 082b165d5d
4 changed files with 285 additions and 0 deletions
--- a/src/java.py
+++ b/src/java.py
@@ -0,0 +1,51 @@
+import json
+
+from requests_html import HTMLSession
+
+"""Fetch Java versions with their dates from https://www.java.com/releases/.
+
+This script is using requests-html (https://requests-html.kennethreitz.org/)
+because https://www.java.com/releases/ needs JavaScript to render correctly.
+
+requests-html is using pyppeteer internally for executing javascript. And
+pyppeteer is relying on Chromium, which is automatically downloaded  in
+~/.local/share/pyppeteer by the library. This path can be overridden by
+declaring a PYPPETEER_HOME environment variable. Unfortunately exporting this
+variable in the python script does not work, so it has to be done before this
+script execution.
+"""
+
+PRODUCT = "java"
+URL = "https://www.java.com/releases/"
+
+
+def fetch_releases(releases):
+    session = HTMLSession()
+    r = session.get('https://www.java.com/releases/')
+    r.html.render(sleep=1, scrolldown=3)
+
+    previous_date = None
+    for row in r.html.find('#released tr.shade'):
+        version = row.find('td.anchor', first=True).attrs['id']
+        date = row.find('td')[1].text
+        date = previous_date if not date else date
+        print(f"{version}: {date}")
+        releases[version] = date
+        previous_date = date
+
+
+def main():
+    print(f"::group::{PRODUCT}")
+    releases = {}
+    fetch_releases(releases)
+    print("::endgroup::")
+
+    with open(f"releases/{PRODUCT}.json", "w") as f:
+        f.write(json.dumps(
+            # sort by date desc
+            dict(sorted(releases.items(), key=lambda e: e[1], reverse=True)),
+            indent=2))
+
+
+if __name__ == '__main__':
+    main()