From 975a7cb6b99d4b673deecf31ba1483c3d9563bba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 21 Sep 2016 00:08:00 +0200 Subject: [PATCH] [batoto] add (optional) login capabilities --- README.rst | 7 ++-- gallery-dl.conf | 11 ++++++ gallery_dl/extractor/batoto.py | 63 ++++++++++++++++++++++++++-------- 3 files changed, 64 insertions(+), 17 deletions(-) diff --git a/README.rst b/README.rst index 208c4c16..4d6398c2 100644 --- a/README.rst +++ b/README.rst @@ -38,7 +38,7 @@ Supported Sites * Booru: behoimi.org, danbooru.donmai.us, e621.net, gelbooru.com, konachan.com, - safebooru.org, chan.sankakucomplex.com, yande.re + rule34.xxx, safebooru.org, chan.sankakucomplex.com, yande.re * Manga: bato.to, kissmanga.com, mangahere.co, mangamint.com, mangapanda.com, mangapark.me, mangareader.net, mangashare.com, mangastream.com, @@ -55,7 +55,7 @@ Supported Sites 4chan.org, 8ch.net * Image Hosts: chronos.to, coreimg.net, imagebam.com, imagetwist.com, img.yt, imgbox.com, - imgcandy.net, imgchili.net, imgtrex.com. turboimagehost.com + imgcandy.net, imgchili.net, imgtrex.com, turboimagehost.com Configuration @@ -77,7 +77,8 @@ Authentication ============== Some extractors require you to provide valid login-credentials. -This currently includes ``pixiv``, ``exhentai``, ``nijie`` and ``seiga``. +This currently includes ``pixiv``, ``exhentai``, ``nijie``, ``seiga`` +and ``batoto``. You can set the necessary information in your configuration file (cf. gallery-dl.conf_) diff --git a/gallery-dl.conf b/gallery-dl.conf index 9de9177f..68c52054 100644 --- a/gallery-dl.conf +++ b/gallery-dl.conf @@ -16,10 +16,16 @@ "username": null, "password": null }, + "batoto": + { + "username": null, + "password": null + }, "exhentai": { "wait-min": 3, "wait-max": 6, + "download-original": true, "username": null, "password": null }, @@ -28,6 +34,11 @@ "username": null, "password": null }, + "seiga": + { + "username": null, + "password": null + }, "gelbooru": { "filename_fmt": "{category}_{id:>07}_{md5}.{extension}" diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py index 37e78e6a..0eb9bb54 100644 --- a/gallery_dl/extractor/batoto.py +++ b/gallery_dl/extractor/batoto.py @@ -1,15 +1,16 @@ # -*- coding: utf-8 -*- -# Copyright 2014, 2015 Mike Fährmann +# Copyright 2014-2016 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract manga chapters from http://bato.to/""" +"""Extract manga chapters from https://bato.to/""" from .common import AsynchronousExtractor, Message -from .. import text, iso639_1 +from .. import text, iso639_1, config, exception +from ..cache import cache import re class BatotoChapterExtractor(AsynchronousExtractor): @@ -21,25 +22,27 @@ class BatotoChapterExtractor(AsynchronousExtractor): pattern = [r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)"] test = [("http://bato.to/reader#459878c8fda07502", { "url": "432d7958506ad913b0a9e42664a89e46a63e9296", - "keyword": "e34a9184a51266e4f1ab3c2a652a4359bb7e3d30", + "keyword": "7a3e03c40c8b3c7137c4ebe723b1b9c95a303d81", })] - url = "https://bato.to/areader" + url = "https://bato.to/" + reader_url = "https://bato.to/areader" def __init__(self, match): AsynchronousExtractor.__init__(self) self.token = match.group(1) - self.session.headers.update({ - "X-Requested-With": "XMLHttpRequest", - "Referer": "https://bato.to/reader", - }) def items(self): + self.login() + self.session.headers.update({ + "X-Requested-With": "XMLHttpRequest", + "Referer": self.url + "reader", + }) params = { "id": self.token, "p": 1, "supress_webtoon": "t", } - page = self.request(self.url, params=params).text + page = self.request(self.reader_url, params=params).text data = self.get_job_metadata(page) yield Message.Version, 1 yield Message.Directory, data.copy() @@ -50,7 +53,7 @@ class BatotoChapterExtractor(AsynchronousExtractor): yield Message.Url, image_url, data.copy() if next_url: params["p"] += 1 - page = self.request(self.url, params=params).text + page = self.request(self.reader_url, params=params).text def get_job_metadata(self, page): """Collect metadata for extractor-job""" @@ -64,15 +67,14 @@ class BatotoChapterExtractor(AsynchronousExtractor): _ , pos = extr(page, '', '', pos) count, pos = extr(page, '>page ', '<', pos-35) manga, pos = extr(page, "document.title = '", " - ", pos) - match = re.match(r"(Vol.(\d+) )?Ch.(\d+)([^:]*)(: (.+))?", cinfo) + match = re.match(r"(Vol.(\d+) )?Ch\.([^:]+)(: (.+))?", cinfo) return { "category": self.category, "token": self.token, "manga": text.unescape(manga), "volume": match.group(2) or "", "chapter": match.group(3), - "chapter-extra": match.group(4), - "title": match.group(6) or "", + "title": match.group(5) or "", "group": group, "lang": iso639_1.language_to_code(lang), "language": lang, @@ -88,3 +90,36 @@ class BatotoChapterExtractor(AsynchronousExtractor): iurl, pos = text.extract(page, '