From 5ff7106d4f907775f8902c5aded0a68b0e4cf528 Mon Sep 17 00:00:00 2001 From: Tobi823 Date: Sun, 10 Dec 2023 16:10:46 +0100 Subject: [PATCH 1/4] - add code for the situation when Patreon is using window.patreon = wrapInProxy({"bootstrap":' to store metadata - refactor code to make it more readable - output page content when the HTML structure is unknown (to make debugging easier) --- gallery_dl/extractor/patreon.py | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index 6aef9cbe..b89fddcf 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -249,23 +249,18 @@ class PatreonExtractor(Extractor): return [genmap[ft] for ft in filetypes] def _extract_bootstrap(self, page): - bootstrap = text.extr( - page, 'window.patreon = {"bootstrap":', '},"apiServer"') - if bootstrap: - return util.json_loads(bootstrap + "}") - - bootstrap = text.extr(page, "window.patreon.bootstrap,", "});") - if bootstrap: - return util.json_loads(bootstrap + "}") - - data = text.extr(page, "window.patreon = {", "};\n") - if data: - try: - return util.json_loads("{" + data + "}")["bootstrap"] - except Exception: - pass - - raise exception.StopExtraction("Unable to extract bootstrap data") + if "window.patreon.bootstrap," in page: + page_content = text.extr(page, "window.patreon.bootstrap,", "});") + json_string = page_content + "}" + elif 'window.patreon = {"bootstrap":' in page: + page_content = text.extr(page, 'window.patreon = {"bootstrap":', '},"apiServer"') + json_string = page_content + "}" + elif 'window.patreon = wrapInProxy({"bootstrap":' in page: + page_content = text.extr(page, 'window.patreon = wrapInProxy({"bootstrap":', '},"apiServer"') + json_string = page_content + "}" + else: + raise Exception(f"Unknown HTML and JS structure. Page content is: {page}") + return util.json_loads(json_string) class PatreonCreatorExtractor(PatreonExtractor): From fd06255f93895e3d58ea5d62c5eb5666f1a29f35 Mon Sep 17 00:00:00 2001 From: Tobi823 Date: Sun, 10 Dec 2023 16:17:34 +0100 Subject: [PATCH 2/4] - reformat and refactor to pass tests --- gallery_dl/extractor/patreon.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index b89fddcf..2ff1e9e4 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -250,16 +250,19 @@ class PatreonExtractor(Extractor): def _extract_bootstrap(self, page): if "window.patreon.bootstrap," in page: - page_content = text.extr(page, "window.patreon.bootstrap,", "});") - json_string = page_content + "}" + content_begin = "window.patreon.bootstrap," + content_end = "});" + json_string = text.extr(page, content_begin, content_end) + "}" elif 'window.patreon = {"bootstrap":' in page: - page_content = text.extr(page, 'window.patreon = {"bootstrap":', '},"apiServer"') - json_string = page_content + "}" + content_begin = 'window.patreon = {"bootstrap":' + content_end = '},"apiServer"' + json_string = text.extr(page, content_begin, content_end) + "}" elif 'window.patreon = wrapInProxy({"bootstrap":' in page: - page_content = text.extr(page, 'window.patreon = wrapInProxy({"bootstrap":', '},"apiServer"') - json_string = page_content + "}" + content_begin = 'window.patreon = wrapInProxy({"bootstrap":' + content_end = '},"apiServer"' + json_string = text.extr(page, content_begin, content_end) + "}" else: - raise Exception(f"Unknown HTML and JS structure. Page content is: {page}") + raise Exception("Unknown HTML and JS structure. Page:" + page) return util.json_loads(json_string) From 244444b194ef7176530394fdff4e571ff8822528 Mon Sep 17 00:00:00 2001 From: Tobi823 Date: Sun, 10 Dec 2023 16:22:32 +0100 Subject: [PATCH 3/4] - adapt code to current code style --- gallery_dl/extractor/patreon.py | 37 ++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index 2ff1e9e4..0b0e9ebd 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -249,21 +249,28 @@ class PatreonExtractor(Extractor): return [genmap[ft] for ft in filetypes] def _extract_bootstrap(self, page): - if "window.patreon.bootstrap," in page: - content_begin = "window.patreon.bootstrap," - content_end = "});" - json_string = text.extr(page, content_begin, content_end) + "}" - elif 'window.patreon = {"bootstrap":' in page: - content_begin = 'window.patreon = {"bootstrap":' - content_end = '},"apiServer"' - json_string = text.extr(page, content_begin, content_end) + "}" - elif 'window.patreon = wrapInProxy({"bootstrap":' in page: - content_begin = 'window.patreon = wrapInProxy({"bootstrap":' - content_end = '},"apiServer"' - json_string = text.extr(page, content_begin, content_end) + "}" - else: - raise Exception("Unknown HTML and JS structure. Page:" + page) - return util.json_loads(json_string) + bootstrap = text.extr( + page, 'window.patreon = {"bootstrap":', '},"apiServer"') + if bootstrap: + return util.json_loads(bootstrap + "}") + + bootstrap = text.extr( + page, 'window.patreon = wrapInProxy({"bootstrap":', '},"apiServer"') + if bootstrap: + return util.json_loads(bootstrap + "}") + + bootstrap = text.extr(page, "window.patreon.bootstrap,", "});") + if bootstrap: + return util.json_loads(bootstrap + "}") + + data = text.extr(page, "window.patreon = {", "};\n") + if data: + try: + return util.json_loads("{" + data + "}")["bootstrap"] + except Exception: + pass + + raise exception.StopExtraction("Unable to extract bootstrap data") class PatreonCreatorExtractor(PatreonExtractor): From 66cbe9da410ba08465d051fbfb7a06c093d6eeb0 Mon Sep 17 00:00:00 2001 From: Tobi823 Date: Sun, 10 Dec 2023 16:24:00 +0100 Subject: [PATCH 4/4] - fix style check failure "line to long" --- gallery_dl/extractor/patreon.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index 0b0e9ebd..fb560e96 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -255,7 +255,9 @@ class PatreonExtractor(Extractor): return util.json_loads(bootstrap + "}") bootstrap = text.extr( - page, 'window.patreon = wrapInProxy({"bootstrap":', '},"apiServer"') + page, + 'window.patreon = wrapInProxy({"bootstrap":', + '},"apiServer"') if bootstrap: return util.json_loads(bootstrap + "}")