[chef-inspec] Use GitHub releases instead of git tags (#367)

GitHub releases gives better dates than git tags.
This commit is contained in:
Marc Wrobel
2024-08-13 22:20:36 +02:00
committed by GitHub
parent d492c2d192
commit f238224681
3 changed files with 64 additions and 52 deletions

View File

@@ -1,6 +1,5 @@
from bs4 import BeautifulSoup
from common import dates, http, releasedata
from common.git import Git
from common import dates, github, http, releasedata
"""Fetch released versions from docs.chef.io and retrieve their date from GitHub.
docs.chef.io needs to be scraped because not all tagged versions are actually released.
@@ -13,12 +12,8 @@ with releasedata.ProductData("chef-inspec") as product_data:
rn_soup = BeautifulSoup(rn_response.text, features="html5lib")
released_versions = [h2.get('id') for h2 in rn_soup.find_all('h2', id=True) if h2.get('id')]
git = Git("https://github.com/inspec/inspec.git")
git.setup(bare=True)
versions = git.list_tags()
for version, date_str in versions:
sanitized_version = version.replace("v", "")
for release in github.fetch_releases("inspec/inspec"):
sanitized_version = release.tag_name.replace("v", "")
if sanitized_version in released_versions:
date = dates.parse_date(date_str)
date = dates.parse_datetime(release.published_at)
product_data.declare_version(sanitized_version, date)

50
src/common/github.py Normal file
View File

@@ -0,0 +1,50 @@
import json
import logging
import subprocess
class Release:
def __init__(self, name: str, tag_name: str, published_at: str, is_prerelease: bool) -> None:
self.name: str = name
self.tag_name: str = tag_name
self.published_at: str = published_at
self.is_prerelease: bool = is_prerelease
def fetch_releases(repo_id: str) -> list[Release]:
logging.info(f"fetching {repo_id} GitHub releases")
(owner, repo) = repo_id.split('/')
child = subprocess.run("""gh api graphql --paginate -f query='
query($endCursor: String) {
repository(name: "%s", owner: "%s") {
releases(
orderBy: {field: NAME, direction: ASC}
first: 100
after: $endCursor
) {
pageInfo { hasNextPage, endCursor }
edges {
node {
name
publishedAt
isPrerelease
tagName
}
}
}
}
}'""" % (repo, owner), capture_output=True, timeout=300, check=True, shell=True) # noqa: UP031
logging.info(f"fetched {repo_id} GitHub releases")
# splitting because response may contain multiple JSON objects on a single line
responses = child.stdout.decode("utf-8").strip().replace('}{', '}\n{').split("\n")
pages = [json.loads(response) for response in responses]
releases = []
for page in pages:
for edge in page['data']['repository']['releases']['edges']:
release_data = edge['node']
releases.append(Release(release_data['name'], release_data['tagName'], release_data['publishedAt'],
release_data['isPrerelease']))
return releases

View File

@@ -1,9 +1,6 @@
import json
import logging
import subprocess
import sys
from common import dates, endoflife, releasedata
from common import dates, endoflife, github, releasedata
"""Fetches versions from GitHub releases using the GraphQL API and the GitHub CLI.
@@ -13,47 +10,17 @@ Note: GraphQL API and GitHub CLI are used because it's simpler: no need to manag
METHOD = "github_releases"
def fetch_releases(repo_id: str) -> list[dict]:
logging.info(f"fetching {repo_id} GitHub releases")
(owner, repo) = repo_id.split('/')
child = subprocess.run("""gh api graphql --paginate -f query='
query($endCursor: String) {
repository(name: "%s", owner: "%s") {
releases(
orderBy: {field: NAME, direction: ASC}
first: 100
after: $endCursor
) {
pageInfo { hasNextPage, endCursor }
edges {
node {
name
publishedAt
isPrerelease
}
}
}
}
}'""" % (repo, owner), capture_output=True, timeout=300, check=True, shell=True) # noqa: UP031
logging.info(f"fetched {repo_id} GitHub releases")
# splitting because response may contain multiple JSON objects on a single line
responses = child.stdout.decode("utf-8").strip().replace('}{', '}\n{').split("\n")
return [json.loads(response) for response in responses]
p_filter = sys.argv[1] if len(sys.argv) > 1 else None
m_filter = sys.argv[2] if len(sys.argv) > 2 else None
for config in endoflife.list_configs(p_filter, METHOD, m_filter):
with releasedata.ProductData(config.product) as product_data:
for page in fetch_releases(config.url):
releases = [edge['node'] for edge in (page['data']['repository']['releases']['edges'])]
for release in github.fetch_releases(config.url):
if release.is_prerelease:
continue
for release in releases:
if not release['isPrerelease']:
version_str = release['name']
version_match = config.first_match(version_str)
if version_match:
version = config.render(version_match)
date = dates.parse_datetime(release['publishedAt'])
product_data.declare_version(version, date)
version_str = release.name
version_match = config.first_match(version_str)
if version_match:
version = config.render(version_match)
date = dates.parse_datetime(release.published_at)
product_data.declare_version(version, date)