From 8f411b947964fadca0d583ca038e023ed2e2b760 Mon Sep 17 00:00:00 2001 From: Marc Wrobel Date: Mon, 24 Mar 2025 09:45:26 +0100 Subject: [PATCH] [atlassian] Improve scripts Replace jira and confluence script by a single atlassian-versions script Also update the atlassian_eol so that: - Pages are now fetched using Javascript, - The regex also accept EOS in additional to EOL, - The full identifier is now required as a parameter (this will make it easier if the name changes again). --- src/atlassian_eol.py | 14 +++++++------- src/atlassian_versions.py | 24 ++++++++++++++++++++++++ src/confluence.py | 15 --------------- src/jira.py | 15 --------------- 4 files changed, 31 insertions(+), 37 deletions(-) create mode 100644 src/atlassian_versions.py delete mode 100644 src/confluence.py delete mode 100644 src/jira.py diff --git a/src/atlassian_eol.py b/src/atlassian_eol.py index 99a92b55..84e588a7 100644 --- a/src/atlassian_eol.py +++ b/src/atlassian_eol.py @@ -5,24 +5,24 @@ import sys from bs4 import BeautifulSoup from common import dates, endoflife, http, releasedata -"""Fetches EOL dates Atlassian EOL page. +"""Fetches EOL dates from Atlassian EOL page. -The only needed argument is the last part of the product title identifier on the Atlassian EOL page, -such as `JiraSoftware` (from `AtlassianSupportEndofLifePolicy-JiraSoftware`). +This script takes a single argument which is the product title identifier on the Atlassian EOL page, such as +`AtlassianSupportEndofLifePolicy-JiraSoftware`. """ METHOD = "atlassian_eol" -REGEX = r"(?P\d+(\.\d+)+) \(EOL date: (?P.+)\).*$" +REGEX = r"(?P\d+(\.\d+)+) \(EO[SL] date: (?P.+)\).*$" PATTERN = re.compile(REGEX, re.MULTILINE) p_filter = sys.argv[1] if len(sys.argv) > 1 else None m_filter = sys.argv[2] if len(sys.argv) > 2 else None for config in endoflife.list_configs(p_filter, METHOD, m_filter): with releasedata.ProductData(config.product) as product_data: - response = http.fetch_url('https://confluence.atlassian.com/support/atlassian-support-end-of-life-policy-201851003.html') - soup = BeautifulSoup(response.text, features="html5lib") + content = http.fetch_javascript_url('https://confluence.atlassian.com/support/atlassian-support-end-of-life-policy-201851003.html') + soup = BeautifulSoup(content, features="html5lib") - for li in soup.select(f"#AtlassianSupportEndofLifePolicy-{config.url}+ul li"): + for li in soup.select(f"#{config.url}+ul li"): match = PATTERN.match(li.get_text(strip=True)) if not match: logging.warning(f"Failed to parse EOL date from '{li.get_text(strip=True)}'") diff --git a/src/atlassian_versions.py b/src/atlassian_versions.py new file mode 100644 index 00000000..475576cc --- /dev/null +++ b/src/atlassian_versions.py @@ -0,0 +1,24 @@ +import sys + +from bs4 import BeautifulSoup +from common import dates, endoflife, http, releasedata + +"""Fetches versions from Atlassian download-archives pages. + +This script takes a single argument which is the url of the product's download-archives URL, such as +`https://www.atlassian.com/software/confluence/download-archives`. +""" + +METHOD = "atlassian_versions" + +p_filter = sys.argv[1] if len(sys.argv) > 1 else None +m_filter = sys.argv[2] if len(sys.argv) > 2 else None +for config in endoflife.list_configs(p_filter, METHOD, m_filter): + with releasedata.ProductData(config.product) as product_data: + content = http.fetch_javascript_url(config.url, wait_until='networkidle') + soup = BeautifulSoup(content, 'html5lib') + + for version_block in soup.select('.versions-list'): + version = version_block.select_one('a.product-versions').attrs['data-version'] + date = dates.parse_date(version_block.select_one('.release-date').text) + product_data.declare_version(version, date) diff --git a/src/confluence.py b/src/confluence.py deleted file mode 100644 index 2f147b80..00000000 --- a/src/confluence.py +++ /dev/null @@ -1,15 +0,0 @@ -from bs4 import BeautifulSoup -from common import dates, http, releasedata - -"""Fetches Confluence versions from www.atlassian.com. - -Note that requests_html is used because JavaScript is needed to render the page.""" - -with releasedata.ProductData("confluence") as product_data: - content = http.fetch_javascript_url("https://www.atlassian.com/software/confluence/download-archives") - soup = BeautifulSoup(content, 'html.parser') - - for version_block in soup.select('.versions-list'): - version = version_block.select_one('a.product-versions').attrs['data-version'] - date = dates.parse_date(version_block.select_one('.release-date').text) - product_data.declare_version(version, date) diff --git a/src/jira.py b/src/jira.py deleted file mode 100644 index e61f3813..00000000 --- a/src/jira.py +++ /dev/null @@ -1,15 +0,0 @@ -from bs4 import BeautifulSoup -from common import dates, http, releasedata - -"""Fetches Jira versions from www.atlassian.com. - -Note that requests_html is used because JavaScript is needed to render the page.""" - -with releasedata.ProductData("jira") as product_data: - content = http.fetch_javascript_url("https://www.atlassian.com/software/jira/update") - soup = BeautifulSoup(content, 'html.parser') - - for version_block in soup.select('.versions-list'): - version = version_block.select_one('a.product-versions').attrs['data-version'] - date = dates.parse_date(version_block.select_one('.release-date').text) - product_data.declare_version(version, date)