# -*- coding: utf-8 -*- # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://bato.to/""" from .common import Extractor, ChapterExtractor, MangaExtractor from .. import text, exception import re BASE_PATTERN = (r"(?:https?://)?(" r"(?:ba|d|f|h|j|m|w)to\.to|" r"(?:(?:manga|read)toto|batocomic|[xz]bato)\.(?:com|net|org)|" r"comiko\.(?:net|org)|" r"bat(?:otoo|o?two)\.com)") # https://rentry.co/batoto DOMAINS = { "dto.to", "fto.to", "hto.to", "jto.to", "mto.to", "wto.to", "xbato.com", "xbato.net", "xbato.org", "zbato.com", "zbato.net", "zbato.org", "readtoto.com", "readtoto.net", "readtoto.org", "batocomic.com", "batocomic.net", "batocomic.org", "batotoo.com", "batotwo.com", "comiko.net", "comiko.org", "battwo.com", } LEGACY_DOMAINS = { "bato.to", "mangatoto.com", "mangatoto.net", "mangatoto.org", } class BatotoBase(): """Base class for batoto extractors""" category = "batoto" root = "https://xbato.org" def _init_root(self, match): domain = match.group(1) if domain not in LEGACY_DOMAINS: self.root = "https://" + domain def request(self, url, **kwargs): kwargs["encoding"] = "utf-8" return Extractor.request(self, url, **kwargs) class BatotoChapterExtractor(BatotoBase, ChapterExtractor): """Extractor for batoto manga chapters""" archive_fmt = "{chapter_id}_{page}" pattern = BASE_PATTERN + r"/(?:title/[^/?#]+|chapter)/(\d+)" example = "https://xbato.org/title/12345-MANGA/54321" def __init__(self, match): self._init_root(match) self.chapter_id = match.group(2) url = "{}/title/0/{}".format(self.root, self.chapter_id) ChapterExtractor.__init__(self, match, url) def metadata(self, page): extr = text.extract_from(page) try: manga, info, _ = extr("