From 1e9395517041e8283dff1e97ad8e76e99aa7d440 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 23 Jan 2018 15:45:30 +0100 Subject: [PATCH] [batoto] remove module Site officially shut down on 2018.01.18 --- README.rst | 3 +- docs/supportedsites.rst | 1 - gallery_dl/extractor/__init__.py | 1 - gallery_dl/extractor/batoto.py | 217 ------------------------------- test/test_cookies.py | 3 +- 5 files changed, 2 insertions(+), 223 deletions(-) delete mode 100644 gallery_dl/extractor/batoto.py diff --git a/README.rst b/README.rst index bb12870e..ab954780 100644 --- a/README.rst +++ b/README.rst @@ -112,7 +112,6 @@ Supported Sites * pixiv.net * seiga.nicovideo.jp * nijie.info -* bato.to * mangastream.com * kissmanga.com * readcomiconline.to @@ -160,7 +159,7 @@ Username & Password Some extractors require you to provide valid login-credentials in the form of a username & password pair. This is necessary for ``pixiv``, ``nijie`` and ``seiga`` and optional -(but strongly recommended) for ``exhentai``, ``batoto`` and ``sankaku``. +(but strongly recommended) for ``exhentai`` and ``sankaku``. You can set the necessary information in your configuration file (cf. gallery-dl.conf_) diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 45116a94..c82d51ec 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -10,7 +10,6 @@ Site URL Capabilities arch.b4k.co https://arch.b4k.co/ Threads Archive of Sins https://archiveofsins.com/ Threads Archived.Moe https://archived.moe/ Threads -Batoto https://bato.to/ Chapters, Manga Optional Danbooru https://danbooru.donmai.us/ Pools, Popular Images, Posts, Tag-Searches Desuarchive https://desuarchive.org/ Threads DeviantArt https://www.deviantart.com/ |Collections, De-1| Optional (OAuth) diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 33af1895..249778f5 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -18,7 +18,6 @@ modules = [ "archivedmoe", "archiveofsins", "b4k", - "batoto", "danbooru", "desuarchive", "deviantart", diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py deleted file mode 100644 index be74b7fb..00000000 --- a/gallery_dl/extractor/batoto.py +++ /dev/null @@ -1,217 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2014-2017 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extract manga chapters from https://bato.to/""" - -from .common import MangaExtractor, AsynchronousExtractor, Message -from .. import text, util, cloudflare, exception -from ..cache import cache -import re - - -class BatotoExtractor(): - """Base class for batoto extractors""" - category = "batoto" - scheme = "https" - root = "https://bato.to" - cookienames = ("member_id", "pass_hash") - cookiedomain = ".bato.to" - - request = cloudflare.request_func - - def login(self): - """Login and set necessary cookies""" - if self._check_cookies(self.cookienames): - return - username, password = self._get_auth_info() - if username: - cookies = self._login_impl(username, password) - for key, value in cookies.items(): - self.session.cookies.set( - key, value, domain=self.cookiedomain) - - @cache(maxage=7*24*60*60, keyarg=1) - def _login_impl(self, username, password): - """Actual login implementation""" - self.log.info("Logging in as %s", username) - page = self.request(self.root).text - auth = text.extract(page, "name='auth_key' value='", "'")[0] - params = { - "app": "core", - "module": "global", - "section": "login", - "do": "process", - } - data = { - "auth_key": auth, - "referer": self.root, - "ips_username": username, - "ips_password": password, - "rememberMe": "1", - "anonymous": "1", - } - response = self.session.post(self.root + "/forums/index.php", - params=params, data=data) - if "Sign In - " in response.text: - raise exception.AuthenticationError() - return {c: response.cookies[c] for c in self.cookienames} - - @staticmethod - def parse_chapter_string(data): - """Parse 'chapter_string' value contained in 'data'""" - data["chapter_string"] = text.unescape(data["chapter_string"]) - pattern = r"(?:Vol\.(\d+) )?Ch\.([\d\w]+)([^ :]*)(?::? (.+))?" - match = re.match(pattern, data["chapter_string"]) - - volume, chapter, data["chapter_minor"], title = match.groups() - data["volume"] = util.safe_int(volume) - data["chapter"] = util.safe_int(chapter, chapter) - data["title"] = title if title and title != "Read Online" else "" - return data - - -class BatotoMangaExtractor(BatotoExtractor, MangaExtractor): - """Extractor for manga from bato.to""" - pattern = [r"(?:https?://)?(?:www\.)?(bato\.to" - r"/comic/_(?:/comics)?/[^/?&#]*-r\d+)"] - test = [ - ("http://bato.to/comic/_/comics/aria-r2007", { - "url": "a38585b0339587666d772ee06f2a60abdbf42a97", - "keyword": "c33ea7b97e3714530384e2411fae62ae51aae50d", - }), - # non-numeric chapter ("Extra") - ("https://bato.to/comic/_/comics/cosplay-deka-r2563", { - "count": ">= 37", - }), - # short URL - ("https://bato.to/comic/_/aria-r2007", None), - ] - - def chapters(self, page): - pos = 0 - results = [] - page = text.extract( - page, '

Chapters

', '')[0] - - while True: - data, pos = text.extract_all(page, ( - ("language" , '', '<'), - (None , '', '<'), - ), pos) - - if not data["token"]: - return results - - self.parse_chapter_string(data) - data["lang"] = util.language_to_code(data["language"]) - data["group"] = text.unescape(data["group"]) - data["contributor"] = text.unescape(data["contributor"]) - url = self.root + "/reader#" + data["token"] - - results.append((url, data)) - - -class BatotoChapterExtractor(BatotoExtractor, AsynchronousExtractor): - """Extractor for manga-chapters from bato.to""" - subcategory = "chapter" - directory_fmt = [ - "{category}", "{manga}", - "{volume:?v/ />02}c{chapter:>03}{chapter_minor}{title:?: //}"] - filename_fmt = ( - "{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}") - pattern = [r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)"] - test = [ - ("http://bato.to/reader#459878c8fda07502", { - "url": "432d7958506ad913b0a9e42664a89e46a63e9296", - "keyword": "96598b6f94d2b26d11c2780f8173cd6ab5fe9906", - }), - ("http://bato.to/reader#459878c8fda07502", { # error 10030 - "exception": exception.AuthorizationError, - "options": (("username", None),), - }), - ("https://bato.to/reader#528e7d7c4b1db6ff", { # error 10031 - "exception": exception.AuthorizationError, - }), - ("http://bato.to/reader#459878c8fda07503", { # error 10020 - "exception": exception.NotFoundError, - }), - ] - reader_url = "https://bato.to/areader" - - def __init__(self, match): - super().__init__() - self.token = match.group(1) - - def items(self): - self.login() - self.session.headers.update({ - "X-Requested-With": "XMLHttpRequest", - "Referer": self.root + "/reader", - }) - params = { - "id": self.token, - "p": 1, - "supress_webtoon": "t", - } - response = self.request(self.reader_url, params=params) - if response.status_code == 405: - error = text.extract(response.text, "ERROR [", "]")[0] - if error in ("10030", "10031"): - raise exception.AuthorizationError() - elif error == "10020": - raise exception.NotFoundError("chapter") - else: - raise Exception("error code: " + error) - page = response.text - data = self.get_job_metadata(page) - yield Message.Version, 1 - yield Message.Directory, data.copy() - for data["page"] in range(1, data["count"]+1): - next_url, image_url = self.get_page_urls(page) - text.nameext_from_url(image_url, data) - yield Message.Url, image_url, data.copy() - if next_url: - params["p"] += 1 - page = self.request(self.reader_url, params=params).text - - def get_job_metadata(self, page): - """Collect metadata for extractor-job""" - extr = text.extract - _ , pos = extr(page, '', ' - ', pos) - lang , pos = extr(page, '', '', pos) - _ , pos = extr(page, '