From 5ff7106d4f907775f8902c5aded0a68b0e4cf528 Mon Sep 17 00:00:00 2001 From: Tobi823 Date: Sun, 10 Dec 2023 16:10:46 +0100 Subject: [PATCH] - add code for the situation when Patreon is using window.patreon = wrapInProxy({"bootstrap":' to store metadata - refactor code to make it more readable - output page content when the HTML structure is unknown (to make debugging easier) --- gallery_dl/extractor/patreon.py | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index 6aef9cbe..b89fddcf 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -249,23 +249,18 @@ class PatreonExtractor(Extractor): return [genmap[ft] for ft in filetypes] def _extract_bootstrap(self, page): - bootstrap = text.extr( - page, 'window.patreon = {"bootstrap":', '},"apiServer"') - if bootstrap: - return util.json_loads(bootstrap + "}") - - bootstrap = text.extr(page, "window.patreon.bootstrap,", "});") - if bootstrap: - return util.json_loads(bootstrap + "}") - - data = text.extr(page, "window.patreon = {", "};\n") - if data: - try: - return util.json_loads("{" + data + "}")["bootstrap"] - except Exception: - pass - - raise exception.StopExtraction("Unable to extract bootstrap data") + if "window.patreon.bootstrap," in page: + page_content = text.extr(page, "window.patreon.bootstrap,", "});") + json_string = page_content + "}" + elif 'window.patreon = {"bootstrap":' in page: + page_content = text.extr(page, 'window.patreon = {"bootstrap":', '},"apiServer"') + json_string = page_content + "}" + elif 'window.patreon = wrapInProxy({"bootstrap":' in page: + page_content = text.extr(page, 'window.patreon = wrapInProxy({"bootstrap":', '},"apiServer"') + json_string = page_content + "}" + else: + raise Exception(f"Unknown HTML and JS structure. Page content is: {page}") + return util.json_loads(json_string) class PatreonCreatorExtractor(PatreonExtractor):