diff --git a/docs/configuration.rst b/docs/configuration.rst index 7ffdb6b4..4c1cf631 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -1459,6 +1459,29 @@ Description * ``false``: Get posts from "Latest Updates" pages +extractor.batoto.domain +----------------------- +Type + ``string`` +Default + ``"auto"`` +Example + ``"mangatoto.org"`` +Description + Specifies the domain used by ``batoto`` extractors. + + ``"auto"`` | ``"url"`` + Use the input URL's domain + ``"nolegacy"`` + Use the input URL's domain + - replace legacy domains with ``"xbato.org"`` + ``"nowarn"`` + Use the input URL's domain + - do not warn about legacy domains + any ``string`` + Use this domain + + extractor.bbc.width ------------------- Type diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index f907a5b5..44833760 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -117,6 +117,10 @@ "recursive": true }, + "batoto": + { + "domain": "auto" + }, "bbc": { "width": 1920 diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py index 4d192a49..a1ad3ae8 100644 --- a/gallery_dl/extractor/batoto.py +++ b/gallery_dl/extractor/batoto.py @@ -54,11 +54,23 @@ class BatotoBase(): """Base class for batoto extractors""" category = "batoto" root = "https://xbato.org" + _warn_legacy = True - def _init_root(self, match): - domain = match.group(1) - if domain not in LEGACY_DOMAINS: - self.root = "https://" + domain + def _init_root(self): + domain = self.config("domain") + if domain is None or domain in {"auto", "url"}: + domain = self.groups[0] + if domain in LEGACY_DOMAINS: + if self._warn_legacy: + BatotoBase._warn_legacy = False + self.log.warning("Legacy domain '%s'", domain) + elif domain == "nolegacy": + domain = self.groups[0] + if domain in LEGACY_DOMAINS: + domain = "xbato.org" + elif domain == "nowarn": + domain = self.groups[0] + self.root = "https://" + domain def request(self, url, **kwargs): kwargs["encoding"] = "utf-8" @@ -72,10 +84,10 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor): example = "https://xbato.org/title/12345-MANGA/54321" def __init__(self, match): - self._init_root(match) - self.chapter_id = match.group(2) - url = "{}/title/0/{}".format(self.root, self.chapter_id) - ChapterExtractor.__init__(self, match, url) + ChapterExtractor.__init__(self, match, False) + self._init_root() + self.chapter_id = self.groups[1] + self.gallery_url = "{}/title/0/{}".format(self.root, self.chapter_id) def metadata(self, page): extr = text.extract_from(page) @@ -133,10 +145,10 @@ class BatotoMangaExtractor(BatotoBase, MangaExtractor): example = "https://xbato.org/title/12345-MANGA/" def __init__(self, match): - self._init_root(match) - self.manga_id = match.group(2) or match.group(3) - url = "{}/title/{}".format(self.root, self.manga_id) - MangaExtractor.__init__(self, match, url) + MangaExtractor.__init__(self, match, False) + self._init_root() + self.manga_id = self.groups[1] or self.groups[2] + self.manga_url = "{}/title/{}".format(self.root, self.manga_id) def chapters(self, page): extr = text.extract_from(page) diff --git a/test/results/batoto.py b/test/results/batoto.py index a64d064e..f52adea2 100644 --- a/test/results/batoto.py +++ b/test/results/batoto.py @@ -109,6 +109,7 @@ __tests__ = ( "#url" : "https://bato.to/title/113742-futsutsuka-na-akujo-de-wa-gozaimasu-ga-suuguu-chouso-torikae-den-official", "#category": ("", "batoto", "manga"), "#class" : batoto.BatotoMangaExtractor, + "#options" : {"domain": "xbato.org"}, "#count" : ">= 21", "chapter" : int,