diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 23459d0c..9aa51a08 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -97,6 +97,12 @@ Consider all listed sites to potentially be NSFW. Albums, Artwork Listings, Challenges, Followed Users, individual Images, Likes, Search Results, User Profiles + + BATO.TO + https://bato.to/ + Chapters, Manga + + BBC https://bbc.co.uk/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 6fca0120..9e33f2c3 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -24,6 +24,7 @@ modules = [ "architizer", "artstation", "aryion", + "batoto", "bbc", "behance", "blogger", diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py new file mode 100644 index 00000000..cd6302e6 --- /dev/null +++ b/gallery_dl/extractor/batoto.py @@ -0,0 +1,118 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://bato.to/""" + +from .common import Extractor, ChapterExtractor, MangaExtractor +from .. import text, exception +import re + +BASE_PATTERN = (r"(?:https?://)?" + r"(?:(?:ba|d|w)to\.to|\.to|(?:batotoo|mangatoto)\.com)") + + +class BatotoBase(): + """Base class for batoto extractors""" + category = "batoto" + root = "https://bato.to" + + def request(self, url, **kwargs): + kwargs["encoding"] = "utf-8" + return Extractor.request(self, url, **kwargs) + + +class BatotoChapterExtractor(BatotoBase, ChapterExtractor): + """Extractor for bato.to manga chapters""" + pattern = BASE_PATTERN + r"/(?:title/[^/?#]+|chapter)/(\d+)" + example = "https://bato.to/title/12345-MANGA/54321" + + def __init__(self, match): + self.root = text.root_from_url(match.group(0)) + self.chapter_id = match.group(1) + url = "{}/title/0/{}".format(self.root, self.chapter_id) + ChapterExtractor.__init__(self, match, url) + + def metadata(self, page): + extr = text.extract_from(page) + manga, info, _ = extr("", "<").rsplit(" - ", 3) + manga_id = extr("/title/", "/") + + match = re.match( + r"(?:Volume\s+(\d+) )?" + r"\w+\s+(\d+)(.*)", info) + if match: + volume, chapter, minor = match.groups() + title = text.remove_html(extr( + "selected>", "</option")).partition(" : ")[2] + else: + volume = chapter = 0 + minor = "" + title = info + + return { + "manga" : text.unescape(manga), + "manga_id" : text.parse_int(manga_id), + "title" : text.unescape(title), + "volume" : text.parse_int(volume), + "chapter" : text.parse_int(chapter), + "chapter_minor": minor, + "chapter_id" : text.parse_int(self.chapter_id), + "date" : text.parse_timestamp(extr(' time="', '"')[:-3]), + } + + def images(self, page): + images_container = text.extr(page, 'pageOpts', ':[0,0]}"') + images_container = text.unescape(images_container) + return [ + (url, None) + for url in text.extract_iter(images_container, r"\"", r"\"") + ] + + +class BatotoMangaExtractor(BatotoBase, MangaExtractor): + """Extractor for bato.to manga""" + reverse = False + chapterclass = BatotoChapterExtractor + pattern = BASE_PATTERN + r"/(?:title|series)/(\d+)[^/?#]*/?$" + example = "https://bato.to/title/12345-MANGA/" + + def __init__(self, match): + self.root = text.root_from_url(match.group(0)) + self.manga_id = match.group(1) + url = "{}/title/{}".format(self.root, self.manga_id) + MangaExtractor.__init__(self, match, url) + + def chapters(self, page): + extr = text.extract_from(page) + + warning = extr(' class="alert alert-warning">', "</div><") + if warning: + raise exception.StopExtraction("'%s'", text.remove_html(warning)) + + data = { + "manga_id": text.parse_int(self.manga_id), + "manga" : text.unescape(extr( + "<title>", "<").rpartition(" - ")[0]), + } + + extr('<div data-hk="0-0-0-0"', "") + results = [] + while True: + href = extr('<a href="/title/', '"') + if not href: + break + + chapter = href.rpartition("-ch_")[2] + chapter, sep, minor = chapter.partition(".") + + data["chapter"] = text.parse_int(chapter) + data["chapter_minor"] = sep + minor + data["date"] = text.parse_datetime( + extr('time="', '"'), "%Y-%m-%dT%H:%M:%S.%fZ") + + url = "{}/title/{}".format(self.root, href) + results.append((url, data.copy())) + return results diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 2995a46f..798a6830 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -32,6 +32,7 @@ CATEGORY_MAP = { "atfbooru" : "ATFBooru", "b4k" : "arch.b4k.co", "baraag" : "baraag", + "batoto" : "BATO.TO", "bbc" : "BBC", "comicvine" : "Comic Vine", "coomerparty" : "Coomer", diff --git a/test/results/batoto.py b/test/results/batoto.py new file mode 100644 index 00000000..f3853247 --- /dev/null +++ b/test/results/batoto.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import batoto +from gallery_dl import exception + +__tests__ = ( +{ + "#url" : "https://bato.to/title/86408-i-shall-master-this-family-official/1681030-ch_8", + "#category": ("", "batoto", "chapter"), + "#class" : batoto.BatotoChapterExtractor, + "#count" : 66, + + "manga" : "I Shall Master this Family! [Official]", + "title" : "Observing", + "chapter" : 8, +}, +{ + "#url" : "https://bato.to/title/104929-86-eighty-six-official/1943513-vol_1-ch_5", + "#comment" : "volume (vol) in url", + "#category": ("", "batoto", "chapter"), + "#class" : batoto.BatotoChapterExtractor, + "#count" : 7, + + "manga" : "86--EIGHTY-SIX (Official)", + "title" : "The Spearhead Squadron's Power", + "volume" : 1, + "chapter" : 5, +}, +{ + "#url" : "https://bato.to/title/113742-futsutsuka-na-akujo-de-wa-gozaimasu-ga-suuguu-chouso-torikae-den-official", + "#category": ("", "batoto", "manga"), + "#class" : batoto.BatotoMangaExtractor, + "#count" : ">= 21", + + "manga" : "Futsutsuka na Akujo de wa Gozaimasu ga - Suuguu Chouso Torikae Den (Official)", +}, +{ + "#url" : "https://bato.to/title/104929-86-eighty-six-official", + "#comment" : "Manga with number in name", + "#category": ("", "batoto", "manga"), + "#class" : batoto.BatotoMangaExtractor, + "#count" : ">= 18", + + "manga" : "86--EIGHTY-SIX (Official)", +}, +{ + "#url" : "https://bato.to/title/140046-the-grand-duke-s-fox-princess-mgchan", + "#comment" : "Non-English translation (Indonesian)", + "#category": ("", "batoto", "manga"), + "#class" : batoto.BatotoMangaExtractor, + "#count" : ">= 29", + + "manga" : "The Grand Duke’s Fox Princess ⎝⎝MGCHAN⎠⎠", +}, +{ + "#url" : "https://bato.to/title/134270-removed", + "#category": ("", "batoto", "manga"), + "#class" : batoto.BatotoMangaExtractor, + "#exception": exception.StopExtraction, +} +)