[splunk] Fetch page with javascript
The site has been revamped and now require Javascript.
This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
from common import dates, http
|
from common import dates, http
|
||||||
from common.releasedata import ProductData, config_from_argv
|
from common.releasedata import ProductData, config_from_argv
|
||||||
|
|
||||||
@@ -31,8 +32,8 @@ def get_latest_minor_versions(versions: list[str]) -> list[str]:
|
|||||||
|
|
||||||
|
|
||||||
config = config_from_argv()
|
config = config_from_argv()
|
||||||
with ProductData(config.product) as product_data:
|
with (ProductData(config.product) as product_data):
|
||||||
html = http.fetch_html(config.url)
|
html = BeautifulSoup(http.fetch_javascript_url(config.url), features="html5lib")
|
||||||
|
|
||||||
all_versions = [option.attrs['value'] for option in html.select("select#version-select > option")]
|
all_versions = [option.attrs['value'] for option in html.select("select#version-select > option")]
|
||||||
all_versions = [v for v in all_versions if v != "DataMonitoringAppPreview"]
|
all_versions = [v for v in all_versions if v != "DataMonitoringAppPreview"]
|
||||||
@@ -40,9 +41,8 @@ with ProductData(config.product) as product_data:
|
|||||||
# Latest minor release notes contains release notes for all previous minor versions.
|
# Latest minor release notes contains release notes for all previous minor versions.
|
||||||
# For example, 9.0.5 release notes also contains release notes for 9.0.0 to 9.0.4.
|
# For example, 9.0.5 release notes also contains release notes for 9.0.0 to 9.0.4.
|
||||||
latest_minor_versions = get_latest_minor_versions(all_versions)
|
latest_minor_versions = get_latest_minor_versions(all_versions)
|
||||||
latest_minor_versions_urls = [f"{config.url}/{v}/ReleaseNotes/MeetSplunk" for v in latest_minor_versions]
|
for url in [f"{config.url}/{v}/ReleaseNotes/MeetSplunk" for v in latest_minor_versions]:
|
||||||
# Oddly using the endoflife.date user agent does not work for 9.0, 9.2 and 9.3.
|
response = BeautifulSoup(http.fetch_javascript_url(url, user_agent=http.FIREFOX_USER_AGENT), features="html5lib")
|
||||||
for response in http.fetch_urls(latest_minor_versions_urls, user_agent=http.FIREFOX_USER_AGENT):
|
|
||||||
for (version_str, date_str) in VERSION_DATE_PATTERN.findall(response.text):
|
for (version_str, date_str) in VERSION_DATE_PATTERN.findall(response.text):
|
||||||
version_str = f"{version_str}.0" if len(version_str.split(".")) == 2 else version_str # convert x.y to x.y.0
|
version_str = f"{version_str}.0" if len(version_str.split(".")) == 2 else version_str # convert x.y to x.y.0
|
||||||
date = dates.parse_date(date_str)
|
date = dates.parse_date(date_str)
|
||||||
|
|||||||
Reference in New Issue
Block a user