diff --git a/src/chef-inspec.py b/src/chef-inspec.py index 46b456b7..0fac9b30 100644 --- a/src/chef-inspec.py +++ b/src/chef-inspec.py @@ -1,6 +1,5 @@ from bs4 import BeautifulSoup -from common import dates, http, releasedata -from common.git import Git +from common import dates, github, http, releasedata """Fetch released versions from docs.chef.io and retrieve their date from GitHub. docs.chef.io needs to be scraped because not all tagged versions are actually released. @@ -13,12 +12,8 @@ with releasedata.ProductData("chef-inspec") as product_data: rn_soup = BeautifulSoup(rn_response.text, features="html5lib") released_versions = [h2.get('id') for h2 in rn_soup.find_all('h2', id=True) if h2.get('id')] - git = Git("https://github.com/inspec/inspec.git") - git.setup(bare=True) - - versions = git.list_tags() - for version, date_str in versions: - sanitized_version = version.replace("v", "") + for release in github.fetch_releases("inspec/inspec"): + sanitized_version = release.tag_name.replace("v", "") if sanitized_version in released_versions: - date = dates.parse_date(date_str) + date = dates.parse_datetime(release.published_at) product_data.declare_version(sanitized_version, date) diff --git a/src/common/github.py b/src/common/github.py new file mode 100644 index 00000000..1c65e348 --- /dev/null +++ b/src/common/github.py @@ -0,0 +1,50 @@ +import json +import logging +import subprocess + + +class Release: + def __init__(self, name: str, tag_name: str, published_at: str, is_prerelease: bool) -> None: + self.name: str = name + self.tag_name: str = tag_name + self.published_at: str = published_at + self.is_prerelease: bool = is_prerelease + + +def fetch_releases(repo_id: str) -> list[Release]: + logging.info(f"fetching {repo_id} GitHub releases") + (owner, repo) = repo_id.split('/') + child = subprocess.run("""gh api graphql --paginate -f query=' +query($endCursor: String) { + repository(name: "%s", owner: "%s") { + releases( + orderBy: {field: NAME, direction: ASC} + first: 100 + after: $endCursor + ) { + pageInfo { hasNextPage, endCursor } + edges { + node { + name + publishedAt + isPrerelease + tagName + } + } + } + } +}'""" % (repo, owner), capture_output=True, timeout=300, check=True, shell=True) # noqa: UP031 + logging.info(f"fetched {repo_id} GitHub releases") + + # splitting because response may contain multiple JSON objects on a single line + responses = child.stdout.decode("utf-8").strip().replace('}{', '}\n{').split("\n") + pages = [json.loads(response) for response in responses] + + releases = [] + for page in pages: + for edge in page['data']['repository']['releases']['edges']: + release_data = edge['node'] + releases.append(Release(release_data['name'], release_data['tagName'], release_data['publishedAt'], + release_data['isPrerelease'])) + + return releases diff --git a/src/github_releases.py b/src/github_releases.py index e3b0e0fc..4e19aa16 100644 --- a/src/github_releases.py +++ b/src/github_releases.py @@ -1,9 +1,6 @@ -import json -import logging -import subprocess import sys -from common import dates, endoflife, releasedata +from common import dates, endoflife, github, releasedata """Fetches versions from GitHub releases using the GraphQL API and the GitHub CLI. @@ -13,47 +10,17 @@ Note: GraphQL API and GitHub CLI are used because it's simpler: no need to manag METHOD = "github_releases" -def fetch_releases(repo_id: str) -> list[dict]: - logging.info(f"fetching {repo_id} GitHub releases") - (owner, repo) = repo_id.split('/') - child = subprocess.run("""gh api graphql --paginate -f query=' -query($endCursor: String) { - repository(name: "%s", owner: "%s") { - releases( - orderBy: {field: NAME, direction: ASC} - first: 100 - after: $endCursor - ) { - pageInfo { hasNextPage, endCursor } - edges { - node { - name - publishedAt - isPrerelease - } - } - } - } -}'""" % (repo, owner), capture_output=True, timeout=300, check=True, shell=True) # noqa: UP031 - logging.info(f"fetched {repo_id} GitHub releases") - - # splitting because response may contain multiple JSON objects on a single line - responses = child.stdout.decode("utf-8").strip().replace('}{', '}\n{').split("\n") - return [json.loads(response) for response in responses] - - p_filter = sys.argv[1] if len(sys.argv) > 1 else None m_filter = sys.argv[2] if len(sys.argv) > 2 else None for config in endoflife.list_configs(p_filter, METHOD, m_filter): with releasedata.ProductData(config.product) as product_data: - for page in fetch_releases(config.url): - releases = [edge['node'] for edge in (page['data']['repository']['releases']['edges'])] + for release in github.fetch_releases(config.url): + if release.is_prerelease: + continue - for release in releases: - if not release['isPrerelease']: - version_str = release['name'] - version_match = config.first_match(version_str) - if version_match: - version = config.render(version_match) - date = dates.parse_datetime(release['publishedAt']) - product_data.declare_version(version, date) + version_str = release.name + version_match = config.first_match(version_str) + if version_match: + version = config.render(version_match) + date = dates.parse_datetime(release.published_at) + product_data.declare_version(version, date)