[batoto] add (optional) login capabilities

2016-09-21 00:08:00 +02:00
parent e3d156078c
commit 975a7cb6b9
3 changed files with 64 additions and 17 deletions
--- a/README.rst
+++ b/README.rst
@@ -38,7 +38,7 @@ Supported Sites

 * Booru:
    behoimi.org, danbooru.donmai.us, e621.net, gelbooru.com, konachan.com,
-    safebooru.org, chan.sankakucomplex.com, yande.re
+    rule34.xxx, safebooru.org, chan.sankakucomplex.com, yande.re
 * Manga:
    bato.to, kissmanga.com, mangahere.co, mangamint.com, mangapanda.com,
    mangapark.me, mangareader.net, mangashare.com, mangastream.com,
@@ -55,7 +55,7 @@ Supported Sites
    4chan.org, 8ch.net
 * Image Hosts:
    chronos.to, coreimg.net, imagebam.com, imagetwist.com, img.yt, imgbox.com,
-    imgcandy.net, imgchili.net, imgtrex.com. turboimagehost.com
+    imgcandy.net, imgchili.net, imgtrex.com, turboimagehost.com


 Configuration
@@ -77,7 +77,8 @@ Authentication
 ==============

 Some extractors require you to provide valid login-credentials.
-This currently includes ``pixiv``, ``exhentai``, ``nijie`` and ``seiga``.
+This currently includes ``pixiv``, ``exhentai``, ``nijie``, ``seiga``
+and ``batoto``.

 You can set the necessary information in your configuration file
 (cf. gallery-dl.conf_)
--- a/gallery-dl.conf
+++ b/gallery-dl.conf
@@ -16,10 +16,16 @@
            "username": null,
            "password": null
        },
+        "batoto":
+        {
+            "username": null,
+            "password": null
+        },
        "exhentai":
        {
            "wait-min": 3,
            "wait-max": 6,
+            "download-original": true,
            "username": null,
            "password": null
        },
@@ -28,6 +34,11 @@
            "username": null,
            "password": null
        },
+        "seiga":
+        {
+            "username": null,
+            "password": null
+        },
        "gelbooru":
        {
            "filename_fmt": "{category}_{id:>07}_{md5}.{extension}"
--- a/gallery_dl/extractor/batoto.py
+++ b/gallery_dl/extractor/batoto.py
@@ -1,15 +1,16 @@
 # -*- coding: utf-8 -*-

-# Copyright 2014, 2015 Mike Fährmann
+# Copyright 2014-2016 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.

-"""Extract manga chapters from http://bato.to/"""
+"""Extract manga chapters from https://bato.to/"""

 from .common import AsynchronousExtractor, Message
-from .. import text, iso639_1
+from .. import text, iso639_1, config, exception
+from ..cache import cache
 import re

 class BatotoChapterExtractor(AsynchronousExtractor):
@@ -21,25 +22,27 @@ class BatotoChapterExtractor(AsynchronousExtractor):
    pattern = [r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)"]
    test = [("http://bato.to/reader#459878c8fda07502", {
        "url": "432d7958506ad913b0a9e42664a89e46a63e9296",
-        "keyword": "e34a9184a51266e4f1ab3c2a652a4359bb7e3d30",
+        "keyword": "7a3e03c40c8b3c7137c4ebe723b1b9c95a303d81",
    })]
-    url = "https://bato.to/areader"
+    url = "https://bato.to/"
+    reader_url = "https://bato.to/areader"

    def __init__(self, match):
        AsynchronousExtractor.__init__(self)
        self.token = match.group(1)
-        self.session.headers.update({
-            "X-Requested-With": "XMLHttpRequest",
-            "Referer": "https://bato.to/reader",
-        })

    def items(self):
+        self.login()
+        self.session.headers.update({
+            "X-Requested-With": "XMLHttpRequest",
+            "Referer": self.url + "reader",
+        })
        params = {
            "id": self.token,
            "p": 1,
            "supress_webtoon": "t",
        }
-        page = self.request(self.url, params=params).text
+        page = self.request(self.reader_url, params=params).text
        data = self.get_job_metadata(page)
        yield Message.Version, 1
        yield Message.Directory, data.copy()
@@ -50,7 +53,7 @@ class BatotoChapterExtractor(AsynchronousExtractor):
            yield Message.Url, image_url, data.copy()
            if next_url:
                params["p"] += 1
-                page = self.request(self.url, params=params).text
+                page = self.request(self.reader_url, params=params).text

    def get_job_metadata(self, page):
        """Collect metadata for extractor-job"""
@@ -64,15 +67,14 @@ class BatotoChapterExtractor(AsynchronousExtractor):
        _    , pos = extr(page, '</select>', '', pos)
        count, pos = extr(page, '>page ', '<', pos-35)
        manga, pos = extr(page, "document.title = '", " - ", pos)
-        match = re.match(r"(Vol.(\d+) )?Ch.(\d+)([^:]*)(: (.+))?", cinfo)
+        match = re.match(r"(Vol.(\d+) )?Ch\.([^:]+)(: (.+))?", cinfo)
        return {
            "category": self.category,
            "token": self.token,
            "manga": text.unescape(manga),
            "volume": match.group(2) or "",
            "chapter": match.group(3),
-            "chapter-extra": match.group(4),
-            "title": match.group(6) or "",
+            "title": match.group(5) or "",
            "group": group,
            "lang": iso639_1.language_to_code(lang),
            "language": lang,
@@ -88,3 +90,36 @@ class BatotoChapterExtractor(AsynchronousExtractor):
        iurl, pos = text.extract(page, '<img src="', '"', pos)
        return nurl if "_" in nurl else None, iurl

+    def login(self):
+        """Login and set necessary cookies"""
+        username = config.interpolate(("extractor", "batoto", "username"))
+        password = config.interpolate(("extractor", "batoto", "password"))
+        if username and password:
+            cookies = self._login_impl(username, password)
+            for key, value in cookies.items():
+                self.session.cookies.set(key, value, domain=".bato.to", path="/")
+
+    @cache(maxage=360*24*60*60, keyarg=1)
+    def _login_impl(self, username, password):
+        """Actual login implementation"""
+        page = self.request(self.url).text
+        auth = text.extract(page, "name='auth_key' value='", "'")[0]
+        params = {
+            "app": "core",
+            "module": "global",
+            "section": "login",
+            "do": "process",
+        }
+        data = {
+            "auth_key": auth,
+            "referer": self.url,
+            "ips_username": username,
+            "ips_password": password,
+            "rememberMe": "1",
+            "anonymous": "1",
+        }
+        response = self.request(self.url + "forums/index.php",
+                                 method="POST", params=params, data=data)
+        if "Sign In - " in response.text:
+            raise exception.AuthenticationError()
+        return {c: response.cookies[c] for c in ("member_id", "pass_hash")}