[common] only auto-set page_url when first group starts with /

This commit is contained in:
Mike Fährmann
2025-06-26 23:35:53 +02:00
parent 26e81e4162
commit df6f4e5307
5 changed files with 17 additions and 24 deletions

View File

@@ -88,9 +88,6 @@ class ComickChapterExtractor(ComickBase, ChapterExtractor):
pattern = BASE_PATTERN + r"/comic/([\w-]+)/(\w+-chapter-[^/?#]+)" pattern = BASE_PATTERN + r"/comic/([\w-]+)/(\w+-chapter-[^/?#]+)"
example = "https://comick.io/comic/MANGA/ID-chapter-123-en" example = "https://comick.io/comic/MANGA/ID-chapter-123-en"
def __init__(self, match):
ChapterExtractor.__init__(self, match, False)
def metadata(self, page): def metadata(self, page):
slug, chstr = self.groups slug, chstr = self.groups
manga = self._manga_info(slug) manga = self._manga_info(slug)
@@ -134,9 +131,6 @@ class ComickMangaExtractor(ComickBase, MangaExtractor):
pattern = BASE_PATTERN + r"/comic/([\w-]+)/?(?:\?([^#]+))?" pattern = BASE_PATTERN + r"/comic/([\w-]+)/?(?:\?([^#]+))?"
example = "https://comick.io/comic/MANGA" example = "https://comick.io/comic/MANGA"
def __init__(self, match):
MangaExtractor.__init__(self, match, False)
def items(self): def items(self):
slug = self.groups[0] slug = self.groups[0]
manga = self._manga_info(slug) manga = self._manga_info(slug)

View File

@@ -730,7 +730,12 @@ class GalleryExtractor(Extractor):
def __init__(self, match, url=None): def __init__(self, match, url=None):
Extractor.__init__(self, match) Extractor.__init__(self, match)
self.page_url = self.root + self.groups[0] if url is None else url
if url is None:
path = self.groups[0]
self.page_url = self.root + path if path[0] == "/" else None
else:
self.page_url = url
def items(self): def items(self):
self.login() self.login()
@@ -823,7 +828,12 @@ class MangaExtractor(Extractor):
def __init__(self, match, url=None): def __init__(self, match, url=None):
Extractor.__init__(self, match) Extractor.__init__(self, match)
self.page_url = self.root + self.groups[0] if url is None else url
if url is None:
path = self.groups[0]
self.page_url = self.root + path if path[0] == "/" else None
else:
self.page_url = url
if self.config("chapter-reverse", False): if self.config("chapter-reverse", False):
self.reverse = not self.reverse self.reverse = not self.reverse

View File

@@ -31,9 +31,6 @@ class DankefuerslesenChapterExtractor(DankefuerslesenBase, ChapterExtractor):
pattern = BASE_PATTERN + r"/read/manga/([\w-]+)/([\w-]+)" pattern = BASE_PATTERN + r"/read/manga/([\w-]+)/([\w-]+)"
example = "https://danke.moe/read/manga/TITLE/123/1/" example = "https://danke.moe/read/manga/TITLE/123/1/"
def __init__(self, match):
ChapterExtractor.__init__(self, match, False)
def _init(self): def _init(self):
self.zip = self.config("zip", False) self.zip = self.config("zip", False)
if self.zip: if self.zip:
@@ -98,9 +95,6 @@ class DankefuerslesenMangaExtractor(DankefuerslesenBase, MangaExtractor):
pattern = BASE_PATTERN + r"/read/manga/([^/?#]+)" pattern = BASE_PATTERN + r"/read/manga/([^/?#]+)"
example = "https://danke.moe/read/manga/TITLE/" example = "https://danke.moe/read/manga/TITLE/"
def __init__(self, match):
MangaExtractor.__init__(self, match, False)
def chapters(self, page): def chapters(self, page):
results = [] results = []

View File

@@ -23,14 +23,12 @@ class SpeakerdeckPresentationExtractor(GalleryExtractor):
pattern = r"(?:https?://)?(?:www\.)?speakerdeck\.com/([^/?#]+)/([^/?#]+)" pattern = r"(?:https?://)?(?:www\.)?speakerdeck\.com/([^/?#]+)/([^/?#]+)"
example = "https://speakerdeck.com/USER/PRESENTATION" example = "https://speakerdeck.com/USER/PRESENTATION"
def __init__(self, match):
GalleryExtractor.__init__(self, match, "")
self.user, self.presentation = match.groups()
def metadata(self, _): def metadata(self, _):
user, presentation = self.groups
url = self.root + "/oembed.json" url = self.root + "/oembed.json"
params = { params = {
"url": "{}/{}/{}".format(self.root, self.user, self.presentation), "url": "{}/{}/{}".format(self.root, user, presentation),
} }
data = self.request(url, params=params).json() data = self.request(url, params=params).json()
@@ -38,8 +36,8 @@ class SpeakerdeckPresentationExtractor(GalleryExtractor):
data["html"], 'src="//speakerdeck.com/player/', '"') data["html"], 'src="//speakerdeck.com/player/', '"')
return { return {
"user": self.user, "user": user,
"presentation": self.presentation, "presentation": presentation,
"presentation_id": self.presentation_id, "presentation_id": self.presentation_id,
"title": data["title"], "title": data["title"],
"author": data["author_name"], "author": data["author_name"],

View File

@@ -98,9 +98,6 @@ class WeebcentralMangaExtractor(WeebcentralBase, MangaExtractor):
pattern = BASE_PATTERN + r"/series/(\w+)" pattern = BASE_PATTERN + r"/series/(\w+)"
example = "https://weebcentral.com/series/01J7ABCDEFGHIJKLMNOPQRSTUV/TITLE" example = "https://weebcentral.com/series/01J7ABCDEFGHIJKLMNOPQRSTUV/TITLE"
def __init__(self, match):
MangaExtractor.__init__(self, match, False)
def chapters(self, _): def chapters(self, _):
manga_id = self.groups[0] manga_id = self.groups[0]
referer = "{}/series/{}".format(self.root, manga_id) referer = "{}/series/{}".format(self.root, manga_id)