[common] only auto-set page_url when first group starts with /

2025-06-26 23:35:53 +02:00
parent 26e81e4162
commit df6f4e5307
5 changed files with 17 additions and 24 deletions
--- a/gallery_dl/extractor/comick.py
+++ b/gallery_dl/extractor/comick.py
@@ -88,9 +88,6 @@ class ComickChapterExtractor(ComickBase, ChapterExtractor):
    pattern = BASE_PATTERN + r"/comic/([\w-]+)/(\w+-chapter-[^/?#]+)"
    example = "https://comick.io/comic/MANGA/ID-chapter-123-en"

-    def __init__(self, match):
-        ChapterExtractor.__init__(self, match, False)
-
    def metadata(self, page):
        slug, chstr = self.groups
        manga = self._manga_info(slug)
@@ -134,9 +131,6 @@ class ComickMangaExtractor(ComickBase, MangaExtractor):
    pattern = BASE_PATTERN + r"/comic/([\w-]+)/?(?:\?([^#]+))?"
    example = "https://comick.io/comic/MANGA"

-    def __init__(self, match):
-        MangaExtractor.__init__(self, match, False)
-
    def items(self):
        slug = self.groups[0]
        manga = self._manga_info(slug)
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -730,7 +730,12 @@ class GalleryExtractor(Extractor):

    def __init__(self, match, url=None):
        Extractor.__init__(self, match)
-        self.page_url = self.root + self.groups[0] if url is None else url
+
+        if url is None:
+            path = self.groups[0]
+            self.page_url = self.root + path if path[0] == "/" else None
+        else:
+            self.page_url = url

    def items(self):
        self.login()
@@ -823,7 +828,12 @@ class MangaExtractor(Extractor):

    def __init__(self, match, url=None):
        Extractor.__init__(self, match)
-        self.page_url = self.root + self.groups[0] if url is None else url
+
+        if url is None:
+            path = self.groups[0]
+            self.page_url = self.root + path if path[0] == "/" else None
+        else:
+            self.page_url = url

        if self.config("chapter-reverse", False):
            self.reverse = not self.reverse
--- a/gallery_dl/extractor/dankefuerslesen.py
+++ b/gallery_dl/extractor/dankefuerslesen.py
@@ -31,9 +31,6 @@ class DankefuerslesenChapterExtractor(DankefuerslesenBase, ChapterExtractor):
    pattern = BASE_PATTERN + r"/read/manga/([\w-]+)/([\w-]+)"
    example = "https://danke.moe/read/manga/TITLE/123/1/"

-    def __init__(self, match):
-        ChapterExtractor.__init__(self, match, False)
-
    def _init(self):
        self.zip = self.config("zip", False)
        if self.zip:
@@ -98,9 +95,6 @@ class DankefuerslesenMangaExtractor(DankefuerslesenBase, MangaExtractor):
    pattern = BASE_PATTERN + r"/read/manga/([^/?#]+)"
    example = "https://danke.moe/read/manga/TITLE/"

-    def __init__(self, match):
-        MangaExtractor.__init__(self, match, False)
-
    def chapters(self, page):
        results = []

--- a/gallery_dl/extractor/speakerdeck.py
+++ b/gallery_dl/extractor/speakerdeck.py
@@ -23,14 +23,12 @@ class SpeakerdeckPresentationExtractor(GalleryExtractor):
    pattern = r"(?:https?://)?(?:www\.)?speakerdeck\.com/([^/?#]+)/([^/?#]+)"
    example = "https://speakerdeck.com/USER/PRESENTATION"

-    def __init__(self, match):
-        GalleryExtractor.__init__(self, match, "")
-        self.user, self.presentation = match.groups()
-
    def metadata(self, _):
+        user, presentation = self.groups
+
        url = self.root + "/oembed.json"
        params = {
-            "url": "{}/{}/{}".format(self.root, self.user, self.presentation),
+            "url": "{}/{}/{}".format(self.root, user, presentation),
        }
        data = self.request(url, params=params).json()

@@ -38,8 +36,8 @@ class SpeakerdeckPresentationExtractor(GalleryExtractor):
            data["html"], 'src="//speakerdeck.com/player/', '"')

        return {
-            "user": self.user,
-            "presentation": self.presentation,
+            "user": user,
+            "presentation": presentation,
            "presentation_id": self.presentation_id,
            "title": data["title"],
            "author": data["author_name"],
--- a/gallery_dl/extractor/weebcentral.py
+++ b/gallery_dl/extractor/weebcentral.py
@@ -98,9 +98,6 @@ class WeebcentralMangaExtractor(WeebcentralBase, MangaExtractor):
    pattern = BASE_PATTERN + r"/series/(\w+)"
    example = "https://weebcentral.com/series/01J7ABCDEFGHIJKLMNOPQRSTUV/TITLE"

-    def __init__(self, match):
-        MangaExtractor.__init__(self, match, False)
-
    def chapters(self, _):
        manga_id = self.groups[0]
        referer = "{}/series/{}".format(self.root, manga_id)