[chef-inspec] Use GitHub releases instead of git tags (#367)

GitHub releases gives better dates than git tags.
This commit is contained in:
Marc Wrobel
2024-08-13 22:20:36 +02:00
committed by GitHub
parent d492c2d192
commit f238224681
3 changed files with 64 additions and 52 deletions

View File

@@ -1,6 +1,5 @@
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from common import dates, http, releasedata from common import dates, github, http, releasedata
from common.git import Git
"""Fetch released versions from docs.chef.io and retrieve their date from GitHub. """Fetch released versions from docs.chef.io and retrieve their date from GitHub.
docs.chef.io needs to be scraped because not all tagged versions are actually released. docs.chef.io needs to be scraped because not all tagged versions are actually released.
@@ -13,12 +12,8 @@ with releasedata.ProductData("chef-inspec") as product_data:
rn_soup = BeautifulSoup(rn_response.text, features="html5lib") rn_soup = BeautifulSoup(rn_response.text, features="html5lib")
released_versions = [h2.get('id') for h2 in rn_soup.find_all('h2', id=True) if h2.get('id')] released_versions = [h2.get('id') for h2 in rn_soup.find_all('h2', id=True) if h2.get('id')]
git = Git("https://github.com/inspec/inspec.git") for release in github.fetch_releases("inspec/inspec"):
git.setup(bare=True) sanitized_version = release.tag_name.replace("v", "")
versions = git.list_tags()
for version, date_str in versions:
sanitized_version = version.replace("v", "")
if sanitized_version in released_versions: if sanitized_version in released_versions:
date = dates.parse_date(date_str) date = dates.parse_datetime(release.published_at)
product_data.declare_version(sanitized_version, date) product_data.declare_version(sanitized_version, date)

50
src/common/github.py Normal file
View File

@@ -0,0 +1,50 @@
import json
import logging
import subprocess
class Release:
def __init__(self, name: str, tag_name: str, published_at: str, is_prerelease: bool) -> None:
self.name: str = name
self.tag_name: str = tag_name
self.published_at: str = published_at
self.is_prerelease: bool = is_prerelease
def fetch_releases(repo_id: str) -> list[Release]:
logging.info(f"fetching {repo_id} GitHub releases")
(owner, repo) = repo_id.split('/')
child = subprocess.run("""gh api graphql --paginate -f query='
query($endCursor: String) {
repository(name: "%s", owner: "%s") {
releases(
orderBy: {field: NAME, direction: ASC}
first: 100
after: $endCursor
) {
pageInfo { hasNextPage, endCursor }
edges {
node {
name
publishedAt
isPrerelease
tagName
}
}
}
}
}'""" % (repo, owner), capture_output=True, timeout=300, check=True, shell=True) # noqa: UP031
logging.info(f"fetched {repo_id} GitHub releases")
# splitting because response may contain multiple JSON objects on a single line
responses = child.stdout.decode("utf-8").strip().replace('}{', '}\n{').split("\n")
pages = [json.loads(response) for response in responses]
releases = []
for page in pages:
for edge in page['data']['repository']['releases']['edges']:
release_data = edge['node']
releases.append(Release(release_data['name'], release_data['tagName'], release_data['publishedAt'],
release_data['isPrerelease']))
return releases

View File

@@ -1,9 +1,6 @@
import json
import logging
import subprocess
import sys import sys
from common import dates, endoflife, releasedata from common import dates, endoflife, github, releasedata
"""Fetches versions from GitHub releases using the GraphQL API and the GitHub CLI. """Fetches versions from GitHub releases using the GraphQL API and the GitHub CLI.
@@ -13,47 +10,17 @@ Note: GraphQL API and GitHub CLI are used because it's simpler: no need to manag
METHOD = "github_releases" METHOD = "github_releases"
def fetch_releases(repo_id: str) -> list[dict]:
logging.info(f"fetching {repo_id} GitHub releases")
(owner, repo) = repo_id.split('/')
child = subprocess.run("""gh api graphql --paginate -f query='
query($endCursor: String) {
repository(name: "%s", owner: "%s") {
releases(
orderBy: {field: NAME, direction: ASC}
first: 100
after: $endCursor
) {
pageInfo { hasNextPage, endCursor }
edges {
node {
name
publishedAt
isPrerelease
}
}
}
}
}'""" % (repo, owner), capture_output=True, timeout=300, check=True, shell=True) # noqa: UP031
logging.info(f"fetched {repo_id} GitHub releases")
# splitting because response may contain multiple JSON objects on a single line
responses = child.stdout.decode("utf-8").strip().replace('}{', '}\n{').split("\n")
return [json.loads(response) for response in responses]
p_filter = sys.argv[1] if len(sys.argv) > 1 else None p_filter = sys.argv[1] if len(sys.argv) > 1 else None
m_filter = sys.argv[2] if len(sys.argv) > 2 else None m_filter = sys.argv[2] if len(sys.argv) > 2 else None
for config in endoflife.list_configs(p_filter, METHOD, m_filter): for config in endoflife.list_configs(p_filter, METHOD, m_filter):
with releasedata.ProductData(config.product) as product_data: with releasedata.ProductData(config.product) as product_data:
for page in fetch_releases(config.url): for release in github.fetch_releases(config.url):
releases = [edge['node'] for edge in (page['data']['repository']['releases']['edges'])] if release.is_prerelease:
continue
for release in releases: version_str = release.name
if not release['isPrerelease']:
version_str = release['name']
version_match = config.first_match(version_str) version_match = config.first_match(version_str)
if version_match: if version_match:
version = config.render(version_match) version = config.render(version_match)
date = dates.parse_datetime(release['publishedAt']) date = dates.parse_datetime(release.published_at)
product_data.declare_version(version, date) product_data.declare_version(version, date)