- add code for the situation when Patreon is using window.patreon = wrapInProxy({"bootstrap":' to store metadata

- refactor code to make it more readable
- output page content when the HTML structure is unknown (to make debugging easier)
This commit is contained in:
Tobi823
2023-12-10 16:10:46 +01:00
parent c55955db03
commit 5ff7106d4f

View File

@@ -249,23 +249,18 @@ class PatreonExtractor(Extractor):
return [genmap[ft] for ft in filetypes] return [genmap[ft] for ft in filetypes]
def _extract_bootstrap(self, page): def _extract_bootstrap(self, page):
bootstrap = text.extr( if "window.patreon.bootstrap," in page:
page, 'window.patreon = {"bootstrap":', '},"apiServer"') page_content = text.extr(page, "window.patreon.bootstrap,", "});")
if bootstrap: json_string = page_content + "}"
return util.json_loads(bootstrap + "}") elif 'window.patreon = {"bootstrap":' in page:
page_content = text.extr(page, 'window.patreon = {"bootstrap":', '},"apiServer"')
bootstrap = text.extr(page, "window.patreon.bootstrap,", "});") json_string = page_content + "}"
if bootstrap: elif 'window.patreon = wrapInProxy({"bootstrap":' in page:
return util.json_loads(bootstrap + "}") page_content = text.extr(page, 'window.patreon = wrapInProxy({"bootstrap":', '},"apiServer"')
json_string = page_content + "}"
data = text.extr(page, "window.patreon = {", "};\n") else:
if data: raise Exception(f"Unknown HTML and JS structure. Page content is: {page}")
try: return util.json_loads(json_string)
return util.json_loads("{" + data + "}")["bootstrap"]
except Exception:
pass
raise exception.StopExtraction("Unable to extract bootstrap data")
class PatreonCreatorExtractor(PatreonExtractor): class PatreonCreatorExtractor(PatreonExtractor):