From 18fabb9605de6a3ab09b6dae27699d142989e59f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 21 Jan 2026 20:10:38 +0100 Subject: [PATCH] [batoto] remove module (#8908) "Bato.to has shut down." There are mirror sites, but they are unscrapeable due to heavily obfuscated HTML and JS --- docs/configuration.rst | 22 --- docs/gallery-dl.conf | 4 - docs/supportedsites.md | 6 - gallery_dl/extractor/__init__.py | 1 - gallery_dl/extractor/batoto.py | 214 -------------------- test/results/batoto.py | 326 ------------------------------- 6 files changed, 573 deletions(-) delete mode 100644 gallery_dl/extractor/batoto.py delete mode 100644 test/results/batoto.py diff --git a/docs/configuration.rst b/docs/configuration.rst index e34087d7..452c5152 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -1760,28 +1760,6 @@ Description Get posts from "Latest Updates" pages -extractor.batoto.domain ------------------------ -Type - ``string`` -Default - ``"auto"`` -Example - ``"mangatoto.org"`` -Description - Specifies the domain used by ``batoto`` extractors. - - ``"auto"`` | ``"url"`` - Use the input URL's domain - ``"nolegacy"`` - Use the input URL's domain - - replace legacy domains with ``"xbato.org"`` - ``"nowarn"`` - Use the input URL's domain - - do not warn about legacy domains - any ``string`` - Use this domain - extractor.bbc.width ------------------- diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 93efad8c..f49d3e8e 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -146,10 +146,6 @@ "recursive": true }, - "batoto": - { - "domain": "auto" - }, "bbc": { "width": 1920 diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 5ec771c5..caf88c5c 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -127,12 +127,6 @@ Consider all listed sites to potentially be NSFW. Audios, Collections, Search Results, User Profiles - - BATO.TO - https://bato.to/ - Chapters, Manga - - BBC https://bbc.co.uk/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 97a5089f..f017da58 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -31,7 +31,6 @@ modules = [ "artstation", "aryion", "audiochan", - "batoto", "bbc", "behance", "bellazon", diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py deleted file mode 100644 index 3a4bf500..00000000 --- a/gallery_dl/extractor/batoto.py +++ /dev/null @@ -1,214 +0,0 @@ -# -*- coding: utf-8 -*- - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for https://bato.to/""" - -from .common import Extractor, ChapterExtractor, MangaExtractor -from .. import text, util -from ..cache import memcache - -BASE_PATTERN = (r"(?:https?://)?(" - r"(?:ba|d|f|h|j|m|w)to\.to|" - r"(?:(?:manga|read)toto|batocomic|[xz]bato)\.(?:com|net|org)|" - r"comiko\.(?:net|org)|" - r"bat(?:otoo|o?two)\.com)") - -# https://rentry.co/batoto -DOMAINS = { - "dto.to", - "fto.to", - "hto.to", - "jto.to", - "mto.to", - "wto.to", - "xbato.com", - "xbato.net", - "xbato.org", - "zbato.com", - "zbato.net", - "zbato.org", - "readtoto.com", - "readtoto.net", - "readtoto.org", - "batocomic.com", - "batocomic.net", - "batocomic.org", - "batotoo.com", - "batotwo.com", - "comiko.net", - "comiko.org", - "battwo.com", -} -LEGACY_DOMAINS = { - "bato.to", - "mangatoto.com", - "mangatoto.net", - "mangatoto.org", -} - - -class BatotoBase(): - """Base class for batoto extractors""" - category = "batoto" - root = "https://xbato.org" - _warn_legacy = True - - def _init_root(self): - domain = self.config("domain") - if domain is None or domain in {"auto", "url"}: - domain = self.groups[0] - if domain in LEGACY_DOMAINS: - if self._warn_legacy: - BatotoBase._warn_legacy = False - self.log.warning("Legacy domain '%s'", domain) - elif domain == "nolegacy": - domain = self.groups[0] - if domain in LEGACY_DOMAINS: - domain = "xbato.org" - elif domain == "nowarn": - domain = self.groups[0] - self.root = "https://" + domain - - def request(self, url, **kwargs): - kwargs["encoding"] = "utf-8" - return Extractor.request(self, url, **kwargs) - - -class BatotoChapterExtractor(BatotoBase, ChapterExtractor): - """Extractor for batoto manga chapters""" - archive_fmt = "{chapter_id}_{page}" - pattern = BASE_PATTERN + r"/(?:title/[^/?#]+|chapter)/(\d+)" - example = "https://xbato.org/title/12345-MANGA/54321" - - def __init__(self, match): - ChapterExtractor.__init__(self, match, False) - self._init_root() - self.chapter_id = self.groups[1] - self.page_url = f"{self.root}/title/0/{self.chapter_id}" - - def metadata(self, page): - extr = text.extract_from(page) - try: - manga, info, _ = extr("", "<").rsplit(" - ", 3) - except ValueError: - manga = info = None - - manga_id = text.extr( - extr('rel="canonical" href="', '"'), "/title/", "/") - - if not manga: - manga = extr('link-hover">', "<") - info = text.remove_html(extr('link-hover">', "</")) - info = text.unescape(info) - - match = text.re( - r"(?i)(?:(?:Volume|S(?:eason)?)\s*(\d+)\s+)?" - r"(?:Chapter|Episode)\s*(\d+)([\w.]*)").match(info) - if match: - volume, chapter, minor = match.groups() - else: - volume = chapter = 0 - minor = "" - - return { - **_manga_info(self, manga_id), - "chapter_url" : extr(self.chapter_id + "-ch_", '"'), - "title" : text.unescape(text.remove_html(extr( - "selected>", "</option")).partition(" : ")[2]), - "volume" : text.parse_int(volume), - "chapter" : text.parse_int(chapter), - "chapter_minor" : minor, - "chapter_string": info, - "chapter_id" : text.parse_int(self.chapter_id), - "date" : self.parse_timestamp(extr(' time="', '"')[:-3]), - } - - def images(self, page): - container = text.unescape(text.extr(page, 'pageOpts', ':[0,0]}"')) - - return [ - ((url.replace("://k", "://n", 1) - if url.startswith("https://k") and ".mb" in url else - url), None) - for url in text.extract_iter(container, r"\"", r"\"") - ] - - -class BatotoMangaExtractor(BatotoBase, MangaExtractor): - """Extractor for batoto manga""" - reverse = False - chapterclass = BatotoChapterExtractor - pattern = (BASE_PATTERN + - r"/(?:title/(\d+)[^/?#]*|series/(\d+)(?:/[^/?#]*)?)/?$") - example = "https://xbato.org/title/12345-MANGA/" - - def __init__(self, match): - MangaExtractor.__init__(self, match, False) - self._init_root() - self.manga_id = self.groups[1] or self.groups[2] - self.page_url = f"{self.root}/title/{self.manga_id}" - - def chapters(self, page): - extr = text.extract_from(page) - if warning := extr(' class="alert alert-warning">', "</div>"): - self.log.warning("'%s'", text.remove_html(warning)) - extr('<div data-hk="0-0-0-0"', "") - data = _manga_info(self, self.manga_id, page) - - results = [] - while True: - href = extr('<a href="/title/', '"') - if not href: - break - - chapter = href.rpartition("-ch_")[2] - chapter, sep, minor = chapter.partition(".") - - data["chapter"] = text.parse_int(chapter) - data["chapter_minor"] = sep + minor - data["date"] = self.parse_datetime_iso(extr('time="', '"')) - - url = f"{self.root}/title/{href}" - results.append((url, data.copy())) - return results - - -@memcache(keyarg=1) -def _manga_info(self, manga_id, page=None): - if page is None: - url = f"{self.root}/title/{manga_id}" - page = self.request(url).text - - props = text.extract(page, 'props="', '"', page.find(' prefix="r20" '))[0] - data = util.json_loads(text.unescape(props))["data"][1] - - return { - "manga" : data["name"][1], - "manga_id" : text.parse_int(manga_id), - "manga_slug" : data["slug"][1], - "manga_date" : self.parse_timestamp( - data["dateCreate"][1] / 1000), - "manga_date_updated": self.parse_timestamp( - data["dateUpdate"][1] / 1000), - "author" : json_list(data["authors"]), - "artist" : json_list(data["artists"]), - "genre" : json_list(data["genres"]), - "lang" : data["tranLang"][1], - "lang_orig" : data["origLang"][1], - "status" : data["originalStatus"][1], - "published" : data["originalPubFrom"][1], - "description": data["summary"][1]["code"][1], - "cover" : data["urlCoverOri"][1], - "uploader" : data["userId"][1], - "score" : data["stat_score_avg"][1], - } - - -def json_list(value): - return [ - item[1].replace("_", " ") - for item in util.json_loads(value[1].replace('\\"', '"')) - ] diff --git a/test/results/batoto.py b/test/results/batoto.py deleted file mode 100644 index f20d50e7..00000000 --- a/test/results/batoto.py +++ /dev/null @@ -1,326 +0,0 @@ -# -*- coding: utf-8 -*- - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -from gallery_dl.extractor import batoto - -__tests__ = ( -{ - "#url" : "https://battwo.com/title/86408-i-shall-master-this-family-official/1681030-ch_8", - "#class" : batoto.BatotoChapterExtractor, - "#pattern" : r"https://n\d+\.mb\w{3}\.org/media/7006/.+\.webp", - "#count" : 66, - - "artist" : ["Mon"], - "author" : ["Kim Roah"], - "chapter" : 8, - "chapter_id" : 1681030, - "chapter_minor" : "", - "chapter_string": "Chapter 8", - "chapter_url" : "8", - "count" : 66, - "page" : range(1, 66), - "cover" : "https://n24.mbhiz.org/media/mbim/476/4765b5482c87970ae18e3e335bc8a3f2f7a47f8b_400_600_43900.webp", - "date" : "dt:2021-05-15 18:51:37", - "description" : "The great Lombardi family was once at the top of the empire. After the death of its patriarch, the fate of the family and that of Firentia, born from a Lombardi and a peasant, fall to ruin at the hands of her useless and cruel cousins. But when she’s reincarnated as her seven-year-old self, she’ll work to protect the family’s honor, gain her grandpa Lulac’s favor, and prevent her own father’s death. In this lifetime, there’s only one way for her to win: become the head of their mighty household.", - "extension" : "webp", - "filename" : str, - "lang" : "en", - "lang_orig" : "ko", - "manga" : "I Shall Master this Family! [Official]", - "manga_date" : "dt:2021-05-10 20:18:58", - "manga_date_updated": "dt:2025-12-28 18:41:24", - "manga_id" : 86408, - "manga_slug" : "i-shall-master-this-family-official", - "published" : "2021", - "score" : range(8, 10), - "status" : "ongoing", - "title" : "Observing", - "uploader" : "677083", - "volume" : 0, - "genre" : [ - "drama", - "fantasy", - "full color", - "historical", - "manhwa", - "reincarnation", - "romance", - "shoujo", - "time travel", - "webtoon", - ], -}, - -{ - "#url" : "https://battwo.com/title/104929-86-eighty-six-official/1943513-vol_1-ch_5", - "#comment" : "volume (vol) in url", - "#class" : batoto.BatotoChapterExtractor, - "#count" : 7, - - "manga" : "86--EIGHTY-SIX (Official)", - "title" : "The Spearhead Squadron's Power", - "volume" : 1, - "chapter": 5, -}, - -{ - "#url" : "https://mto.to/chapter/2584460", - "#comment" : "'-' in manga title (#5200)", - "#class" : batoto.BatotoChapterExtractor, - - "chapter" : 9, - "chapter_id": 2584460, - "chapter_minor": "", - "chapter_url": "9", - "count" : 18, - "date" : "dt:2023-11-26 11:01:12", - "manga" : "Isekai Teni shitara Aiken ga Saikyou ni narimashita - Silver Fenrir to Ore ga Isekai Kurashi wo Hajimetara (Official)", - "manga_id" : 126793, - "title" : "", - "volume" : 0 -}, - -{ - "#url" : "https://battwo.com/title/90710-new-suitor-for-the-abandoned-wife/2089747-ch_76", - "#comment" : "duplicate info in chapter_minor / title (#5988)", - "#class" : batoto.BatotoChapterExtractor, - - "chapter" : 76, - "chapter_id" : 2089747, - "chapter_minor": "", - "chapter_url" : "76", - "title" : "Side Story 4 [END]", -}, - -{ - "#url" : "https://battwo.com/title/115494-today-with-you/2631897-ch_38", - "#class" : batoto.BatotoChapterExtractor, - - "chapter" : 37, - "chapter_id" : 2631897, - "chapter_minor" : "", - "chapter_string": "S1 Episode 37 (End of season)", - "chapter_url" : "38", - "count" : 69, - "date" : "dt:2023-12-20 17:31:18", - "manga" : "Today With You", - "manga_id" : 115494, - "title" : "", - "volume" : 1, -}, - -{ - "#url" : "https://battwo.com/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, - -{ - "#url" : "https://battwo.com/chapter/1681030", - "#comment" : "v2 URL", - "#class" : batoto.BatotoChapterExtractor, -}, - -{ - "#url" : "https://battwo.com/title/113742-futsutsuka-na-akujo-de-wa-gozaimasu-ga-suuguu-chouso-torikae-den-official", - "#class" : batoto.BatotoMangaExtractor, - "#pattern" : batoto.BatotoChapterExtractor.pattern, - "#count" : range(50, 80), - "#options" : {"domain": "xbato.org"}, - - "author" : ["Satsuki Nakamura"], - "artist" : ["Ei Ohitsuji", "Kana Yuki"], - "chapter" : int, - "chapter_minor": {"", ".5", ".6", ".7", ".8", ".9"}, - "cover" : "https://k02.mbimg.org/media/mbim/aa0/aa011e00e8354783114e1eb26beee624b98ab7f7_600_843_172402.webp", - "date" : "type:datetime", - "description" : "As the crown prince’s favored maiden at court, Kou Reirin’s future as the next empress is all but assured. That is, until her rival Shu Keigetsu, the court’s “sewer rat,” pushes her over a balcony! Reirin survives, but wakes up in Keigetsu’s body! Turns out, Keigetsu has used magic to swap bodies with Reirin in order to steal her position at court. After being sickly her whole life, Reirin is determined to use this new body to turn things around. She won’t let anything stop her, not even her impending execution!", - "lang" : "en", - "lang_orig" : "ja", - "manga" : "Futsutsuka na Akujo de wa Gozaimasu ga - Suuguu Chouso Torikae Den", - "manga_date" : "dt:2022-11-07 09:10:20", - "manga_date_updated": "type:datetime", - "manga_id" : 113742, - "manga_slug" : "futsutsuka-na-akujo-de-wa-gozaimasu-ga-suuguu-chouso-torikae-den", - "published" : "2020", - "score" : range(8, 10), - "status" : "ongoing", - "uploader" : "713741", - "genre" : [ - "adaptation", - "bodyswap", - "drama", - "fantasy", - "historical", - "josei", - "manga", - "romance", - "villainess", - ], -}, - -{ - "#url" : "https://battwo.com/title/104929-86-eighty-six-official", - "#comment" : "Manga with number in name", - "#class" : batoto.BatotoMangaExtractor, - "#count" : ">= 18", - - "manga": "86--EIGHTY-SIX (Official)", -}, - -{ - "#url" : "https://battwo.com/title/140046-the-grand-duke-s-fox-princess-mgchan", - "#comment" : "Non-English translation (Indonesian)", - "#class" : batoto.BatotoMangaExtractor, - "#count" : ">= 29", - - "manga": "The Grand Duke’s Fox Princess [cont by LUNABY]", -}, - -{ - "#url" : "https://battwo.com/title/134270-removed", - "#comment" : "Deleted/removed manga", - "#class" : batoto.BatotoMangaExtractor, - "#log" : "'This comic has been marked as deleted and the chapter list is not available.'", - "#count" : 0, -}, - -{ - "#url" : "https://mto.to/series/136193", - "#comment" : "uploader notice (#7657)", - "#category": ("", "batoto", "manga"), - "#class" : batoto.BatotoMangaExtractor, - "#log" : "'UPLOADER NOTICE - The comic was deleted off EbookRenta :/'", - "#results" : ( - "https://mto.to/title/136193-botsuraku-sunzen-desunode-konyakusha-o-furikiro-to-omoimasu-official/2456573-ch_1", - "https://mto.to/title/136193-botsuraku-sunzen-desunode-konyakusha-o-furikiro-to-omoimasu-official/2713985-ch_2", - "https://mto.to/title/136193-botsuraku-sunzen-desunode-konyakusha-o-furikiro-to-omoimasu-official/2739046-ch_3", - ), -}, - -{ - "#url" : "https://battwo.com/title/86408-i-shall-master-this-family-official", - "#class" : batoto.BatotoMangaExtractor, -}, - -{ - "#url" : "https://battwo.com/series/86408/i-shall-master-this-family-official", - "#comment" : "v2 URL", - "#class" : batoto.BatotoMangaExtractor, -}, - -{ - "#url" : "https://dto.to/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, -{ - "#url" : "https://fto.to/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, -{ - "#url" : "https://hto.to/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, -{ - "#url" : "https://jto.to/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, -{ - "#url" : "https://mto.to/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, -{ - "#url" : "https://wto.to/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, - -{ - "#url" : "https://mangatoto.com/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, -{ - "#url" : "https://mangatoto.net/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, -{ - "#url" : "https://mangatoto.org/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, - -{ - "#url" : "https://batocomic.com/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, -{ - "#url" : "https://batocomic.net/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, -{ - "#url" : "https://batocomic.org/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, - -{ - "#url" : "https://readtoto.com/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, -{ - "#url" : "https://readtoto.net/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, -{ - "#url" : "https://readtoto.org/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, - -{ - "#url" : "https://xbato.com/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, -{ - "#url" : "https://xbato.net/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, -{ - "#url" : "https://xbato.org/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, - -{ - "#url" : "https://zbato.com/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, -{ - "#url" : "https://zbato.net/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, -{ - "#url" : "https://zbato.org/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, - -{ - "#url" : "https://comiko.net/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, -{ - "#url" : "https://comiko.org/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, - -{ - "#url" : "https://batotoo.com/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, -{ - "#url" : "https://batotwo.com/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, -{ - "#url" : "https://battwo.com/title/86408/1681030", - "#class" : batoto.BatotoChapterExtractor, -}, - -)