From 8f411b947964fadca0d583ca038e023ed2e2b760 Mon Sep 17 00:00:00 2001
From: Marc Wrobel <marc.wrobel@gmail.com>
Date: Mon, 24 Mar 2025 09:45:26 +0100
Subject: [PATCH] [atlassian] Improve scripts

Replace jira and confluence script by a single atlassian-versions script

Also update the atlassian_eol so that:

- Pages are now fetched using Javascript,
- The regex also accept EOS in additional to EOL,
- The full identifier is now required as a parameter (this will make it easier if the name changes again).
---
 src/atlassian_eol.py      | 14 +++++++-------
 src/atlassian_versions.py | 24 ++++++++++++++++++++++++
 src/confluence.py         | 15 ---------------
 src/jira.py               | 15 ---------------
 4 files changed, 31 insertions(+), 37 deletions(-)
 create mode 100644 src/atlassian_versions.py
 delete mode 100644 src/confluence.py
 delete mode 100644 src/jira.py
diff --git a/src/atlassian_eol.py b/src/atlassian_eol.py
index 99a92b55..84e588a7 100644
--- a/src/atlassian_eol.py
+++ b/src/atlassian_eol.py
@@ -5,24 +5,24 @@ import sys
 from bs4 import BeautifulSoup
 from common import dates, endoflife, http, releasedata
 
-"""Fetches EOL dates Atlassian EOL page.
+"""Fetches EOL dates from Atlassian EOL page.
 
-The only needed argument is the last part of the product title identifier on the Atlassian EOL page,
-such as `JiraSoftware` (from `AtlassianSupportEndofLifePolicy-JiraSoftware`).
+This script takes a single argument which is the product title identifier on the Atlassian EOL page, such as
+`AtlassianSupportEndofLifePolicy-JiraSoftware`.
 """
 
 METHOD = "atlassian_eol"
-REGEX = r"(?P<release>\d+(\.\d+)+) \(EOL date: (?P<date>.+)\).*$"
+REGEX = r"(?P<release>\d+(\.\d+)+) \(EO[SL] date: (?P<date>.+)\).*$"
 PATTERN = re.compile(REGEX, re.MULTILINE)
 
 p_filter = sys.argv[1] if len(sys.argv) > 1 else None
 m_filter = sys.argv[2] if len(sys.argv) > 2 else None
 for config in endoflife.list_configs(p_filter, METHOD, m_filter):
     with releasedata.ProductData(config.product) as product_data:
-        response = http.fetch_url('https://confluence.atlassian.com/support/atlassian-support-end-of-life-policy-201851003.html')
-        soup = BeautifulSoup(response.text, features="html5lib")
+        content = http.fetch_javascript_url('https://confluence.atlassian.com/support/atlassian-support-end-of-life-policy-201851003.html')
+        soup = BeautifulSoup(content, features="html5lib")
 
-        for li in soup.select(f"#AtlassianSupportEndofLifePolicy-{config.url}+ul li"):
+        for li in soup.select(f"#{config.url}+ul li"):
             match = PATTERN.match(li.get_text(strip=True))
             if not match:
                 logging.warning(f"Failed to parse EOL date from '{li.get_text(strip=True)}'")
diff --git a/src/atlassian_versions.py b/src/atlassian_versions.py
new file mode 100644
index 00000000..475576cc
--- /dev/null
+++ b/src/atlassian_versions.py
@@ -0,0 +1,24 @@
+import sys
+
+from bs4 import BeautifulSoup
+from common import dates, endoflife, http, releasedata
+
+"""Fetches versions from Atlassian download-archives pages.
+
+This script takes a single argument which is the url of the product's download-archives URL, such as
+`https://www.atlassian.com/software/confluence/download-archives`.
+"""
+
+METHOD = "atlassian_versions"
+
+p_filter = sys.argv[1] if len(sys.argv) > 1 else None
+m_filter = sys.argv[2] if len(sys.argv) > 2 else None
+for config in endoflife.list_configs(p_filter, METHOD, m_filter):
+    with releasedata.ProductData(config.product) as product_data:
+        content = http.fetch_javascript_url(config.url, wait_until='networkidle')
+        soup = BeautifulSoup(content, 'html5lib')
+
+        for version_block in soup.select('.versions-list'):
+            version = version_block.select_one('a.product-versions').attrs['data-version']
+            date = dates.parse_date(version_block.select_one('.release-date').text)
+            product_data.declare_version(version, date)
diff --git a/src/confluence.py b/src/confluence.py
deleted file mode 100644
index 2f147b80..00000000
--- a/src/confluence.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from bs4 import BeautifulSoup
-from common import dates, http, releasedata
-
-"""Fetches Confluence versions from www.atlassian.com.
-
-Note that requests_html is used because JavaScript is needed to render the page."""
-
-with releasedata.ProductData("confluence") as product_data:
-    content = http.fetch_javascript_url("https://www.atlassian.com/software/confluence/download-archives")
-    soup = BeautifulSoup(content, 'html.parser')
-
-    for version_block in soup.select('.versions-list'):
-        version = version_block.select_one('a.product-versions').attrs['data-version']
-        date = dates.parse_date(version_block.select_one('.release-date').text)
-        product_data.declare_version(version, date)
diff --git a/src/jira.py b/src/jira.py
deleted file mode 100644
index e61f3813..00000000
--- a/src/jira.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from bs4 import BeautifulSoup
-from common import dates, http, releasedata
-
-"""Fetches Jira versions from www.atlassian.com.
-
-Note that requests_html is used because JavaScript is needed to render the page."""
-
-with releasedata.ProductData("jira") as product_data:
-    content = http.fetch_javascript_url("https://www.atlassian.com/software/jira/update")
-    soup = BeautifulSoup(content, 'html.parser')
-
-    for version_block in soup.select('.versions-list'):
-        version = version_block.select_one('a.product-versions').attrs['data-version']
-        date = dates.parse_date(version_block.select_one('.release-date').text)
-        product_data.declare_version(version, date)