From df6f4e53076b9c2e0502cdc163de772565211c99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 26 Jun 2025 23:35:53 +0200 Subject: [PATCH] [common] only auto-set page_url when first group starts with / --- gallery_dl/extractor/comick.py | 6 ------ gallery_dl/extractor/common.py | 14 ++++++++++++-- gallery_dl/extractor/dankefuerslesen.py | 6 ------ gallery_dl/extractor/speakerdeck.py | 12 +++++------- gallery_dl/extractor/weebcentral.py | 3 --- 5 files changed, 17 insertions(+), 24 deletions(-) diff --git a/gallery_dl/extractor/comick.py b/gallery_dl/extractor/comick.py index 66fcd484..ea5eac6a 100644 --- a/gallery_dl/extractor/comick.py +++ b/gallery_dl/extractor/comick.py @@ -88,9 +88,6 @@ class ComickChapterExtractor(ComickBase, ChapterExtractor): pattern = BASE_PATTERN + r"/comic/([\w-]+)/(\w+-chapter-[^/?#]+)" example = "https://comick.io/comic/MANGA/ID-chapter-123-en" - def __init__(self, match): - ChapterExtractor.__init__(self, match, False) - def metadata(self, page): slug, chstr = self.groups manga = self._manga_info(slug) @@ -134,9 +131,6 @@ class ComickMangaExtractor(ComickBase, MangaExtractor): pattern = BASE_PATTERN + r"/comic/([\w-]+)/?(?:\?([^#]+))?" example = "https://comick.io/comic/MANGA" - def __init__(self, match): - MangaExtractor.__init__(self, match, False) - def items(self): slug = self.groups[0] manga = self._manga_info(slug) diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 8b7bd67c..9cf07335 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -730,7 +730,12 @@ class GalleryExtractor(Extractor): def __init__(self, match, url=None): Extractor.__init__(self, match) - self.page_url = self.root + self.groups[0] if url is None else url + + if url is None: + path = self.groups[0] + self.page_url = self.root + path if path[0] == "/" else None + else: + self.page_url = url def items(self): self.login() @@ -823,7 +828,12 @@ class MangaExtractor(Extractor): def __init__(self, match, url=None): Extractor.__init__(self, match) - self.page_url = self.root + self.groups[0] if url is None else url + + if url is None: + path = self.groups[0] + self.page_url = self.root + path if path[0] == "/" else None + else: + self.page_url = url if self.config("chapter-reverse", False): self.reverse = not self.reverse diff --git a/gallery_dl/extractor/dankefuerslesen.py b/gallery_dl/extractor/dankefuerslesen.py index c87dc2e2..a2b0f426 100644 --- a/gallery_dl/extractor/dankefuerslesen.py +++ b/gallery_dl/extractor/dankefuerslesen.py @@ -31,9 +31,6 @@ class DankefuerslesenChapterExtractor(DankefuerslesenBase, ChapterExtractor): pattern = BASE_PATTERN + r"/read/manga/([\w-]+)/([\w-]+)" example = "https://danke.moe/read/manga/TITLE/123/1/" - def __init__(self, match): - ChapterExtractor.__init__(self, match, False) - def _init(self): self.zip = self.config("zip", False) if self.zip: @@ -98,9 +95,6 @@ class DankefuerslesenMangaExtractor(DankefuerslesenBase, MangaExtractor): pattern = BASE_PATTERN + r"/read/manga/([^/?#]+)" example = "https://danke.moe/read/manga/TITLE/" - def __init__(self, match): - MangaExtractor.__init__(self, match, False) - def chapters(self, page): results = [] diff --git a/gallery_dl/extractor/speakerdeck.py b/gallery_dl/extractor/speakerdeck.py index bae27262..c4d2984e 100644 --- a/gallery_dl/extractor/speakerdeck.py +++ b/gallery_dl/extractor/speakerdeck.py @@ -23,14 +23,12 @@ class SpeakerdeckPresentationExtractor(GalleryExtractor): pattern = r"(?:https?://)?(?:www\.)?speakerdeck\.com/([^/?#]+)/([^/?#]+)" example = "https://speakerdeck.com/USER/PRESENTATION" - def __init__(self, match): - GalleryExtractor.__init__(self, match, "") - self.user, self.presentation = match.groups() - def metadata(self, _): + user, presentation = self.groups + url = self.root + "/oembed.json" params = { - "url": "{}/{}/{}".format(self.root, self.user, self.presentation), + "url": "{}/{}/{}".format(self.root, user, presentation), } data = self.request(url, params=params).json() @@ -38,8 +36,8 @@ class SpeakerdeckPresentationExtractor(GalleryExtractor): data["html"], 'src="//speakerdeck.com/player/', '"') return { - "user": self.user, - "presentation": self.presentation, + "user": user, + "presentation": presentation, "presentation_id": self.presentation_id, "title": data["title"], "author": data["author_name"], diff --git a/gallery_dl/extractor/weebcentral.py b/gallery_dl/extractor/weebcentral.py index 2acfabb1..b1364799 100644 --- a/gallery_dl/extractor/weebcentral.py +++ b/gallery_dl/extractor/weebcentral.py @@ -98,9 +98,6 @@ class WeebcentralMangaExtractor(WeebcentralBase, MangaExtractor): pattern = BASE_PATTERN + r"/series/(\w+)" example = "https://weebcentral.com/series/01J7ABCDEFGHIJKLMNOPQRSTUV/TITLE" - def __init__(self, match): - MangaExtractor.__init__(self, match, False) - def chapters(self, _): manga_id = self.groups[0] referer = "{}/series/{}".format(self.root, manga_id)