- add code for the situation when Patreon is using window.patreon = wrapInProxy({"bootstrap":' to store metadata
- refactor code to make it more readable - output page content when the HTML structure is unknown (to make debugging easier)
This commit is contained in:
@@ -249,23 +249,18 @@ class PatreonExtractor(Extractor):
|
|||||||
return [genmap[ft] for ft in filetypes]
|
return [genmap[ft] for ft in filetypes]
|
||||||
|
|
||||||
def _extract_bootstrap(self, page):
|
def _extract_bootstrap(self, page):
|
||||||
bootstrap = text.extr(
|
if "window.patreon.bootstrap," in page:
|
||||||
page, 'window.patreon = {"bootstrap":', '},"apiServer"')
|
page_content = text.extr(page, "window.patreon.bootstrap,", "});")
|
||||||
if bootstrap:
|
json_string = page_content + "}"
|
||||||
return util.json_loads(bootstrap + "}")
|
elif 'window.patreon = {"bootstrap":' in page:
|
||||||
|
page_content = text.extr(page, 'window.patreon = {"bootstrap":', '},"apiServer"')
|
||||||
bootstrap = text.extr(page, "window.patreon.bootstrap,", "});")
|
json_string = page_content + "}"
|
||||||
if bootstrap:
|
elif 'window.patreon = wrapInProxy({"bootstrap":' in page:
|
||||||
return util.json_loads(bootstrap + "}")
|
page_content = text.extr(page, 'window.patreon = wrapInProxy({"bootstrap":', '},"apiServer"')
|
||||||
|
json_string = page_content + "}"
|
||||||
data = text.extr(page, "window.patreon = {", "};\n")
|
else:
|
||||||
if data:
|
raise Exception(f"Unknown HTML and JS structure. Page content is: {page}")
|
||||||
try:
|
return util.json_loads(json_string)
|
||||||
return util.json_loads("{" + data + "}")["bootstrap"]
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
raise exception.StopExtraction("Unable to extract bootstrap data")
|
|
||||||
|
|
||||||
|
|
||||||
class PatreonCreatorExtractor(PatreonExtractor):
|
class PatreonCreatorExtractor(PatreonExtractor):
|
||||||
|
|||||||
Reference in New Issue
Block a user