From 975a7cb6b99d4b673deecf31ba1483c3d9563bba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Wed, 21 Sep 2016 00:08:00 +0200
Subject: [PATCH] [batoto] add (optional) login capabilities

---
 README.rst                     |  7 ++--
 gallery-dl.conf                | 11 ++++++
 gallery_dl/extractor/batoto.py | 63 ++++++++++++++++++++++++++--------
 3 files changed, 64 insertions(+), 17 deletions(-)

diff --git a/README.rst b/README.rst
index 208c4c16..4d6398c2 100644
--- a/README.rst
+++ b/README.rst
@@ -38,7 +38,7 @@ Supported Sites
 
 * Booru:
     behoimi.org, danbooru.donmai.us, e621.net, gelbooru.com, konachan.com,
-    safebooru.org, chan.sankakucomplex.com, yande.re
+    rule34.xxx, safebooru.org, chan.sankakucomplex.com, yande.re
 * Manga:
     bato.to, kissmanga.com, mangahere.co, mangamint.com, mangapanda.com,
     mangapark.me, mangareader.net, mangashare.com, mangastream.com,
@@ -55,7 +55,7 @@ Supported Sites
     4chan.org, 8ch.net
 * Image Hosts:
     chronos.to, coreimg.net, imagebam.com, imagetwist.com, img.yt, imgbox.com,
-    imgcandy.net, imgchili.net, imgtrex.com. turboimagehost.com
+    imgcandy.net, imgchili.net, imgtrex.com, turboimagehost.com
 
 
 Configuration
@@ -77,7 +77,8 @@ Authentication
 ==============
 
 Some extractors require you to provide valid login-credentials.
-This currently includes ``pixiv``, ``exhentai``, ``nijie`` and ``seiga``.
+This currently includes ``pixiv``, ``exhentai``, ``nijie``, ``seiga``
+and ``batoto``.
 
 You can set the necessary information in your configuration file
 (cf. gallery-dl.conf_)
diff --git a/gallery-dl.conf b/gallery-dl.conf
index 9de9177f..68c52054 100644
--- a/gallery-dl.conf
+++ b/gallery-dl.conf
@@ -16,10 +16,16 @@
             "username": null,
             "password": null
         },
+        "batoto":
+        {
+            "username": null,
+            "password": null
+        },
         "exhentai":
         {
             "wait-min": 3,
             "wait-max": 6,
+            "download-original": true,
             "username": null,
             "password": null
         },
@@ -28,6 +34,11 @@
             "username": null,
             "password": null
         },
+        "seiga":
+        {
+            "username": null,
+            "password": null
+        },
         "gelbooru":
         {
             "filename_fmt": "{category}_{id:>07}_{md5}.{extension}"
diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py
index 37e78e6a..0eb9bb54 100644
--- a/gallery_dl/extractor/batoto.py
+++ b/gallery_dl/extractor/batoto.py
@@ -1,15 +1,16 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2014, 2015 Mike Fährmann
+# Copyright 2014-2016 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-"""Extract manga chapters from http://bato.to/"""
+"""Extract manga chapters from https://bato.to/"""
 
 from .common import AsynchronousExtractor, Message
-from .. import text, iso639_1
+from .. import text, iso639_1, config, exception
+from ..cache import cache
 import re
 
 class BatotoChapterExtractor(AsynchronousExtractor):
@@ -21,25 +22,27 @@ class BatotoChapterExtractor(AsynchronousExtractor):
     pattern = [r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)"]
     test = [("http://bato.to/reader#459878c8fda07502", {
         "url": "432d7958506ad913b0a9e42664a89e46a63e9296",
-        "keyword": "e34a9184a51266e4f1ab3c2a652a4359bb7e3d30",
+        "keyword": "7a3e03c40c8b3c7137c4ebe723b1b9c95a303d81",
     })]
-    url = "https://bato.to/areader"
+    url = "https://bato.to/"
+    reader_url = "https://bato.to/areader"
 
     def __init__(self, match):
         AsynchronousExtractor.__init__(self)
         self.token = match.group(1)
-        self.session.headers.update({
-            "X-Requested-With": "XMLHttpRequest",
-            "Referer": "https://bato.to/reader",
-        })
 
     def items(self):
+        self.login()
+        self.session.headers.update({
+            "X-Requested-With": "XMLHttpRequest",
+            "Referer": self.url + "reader",
+        })
         params = {
             "id": self.token,
             "p": 1,
             "supress_webtoon": "t",
         }
-        page = self.request(self.url, params=params).text
+        page = self.request(self.reader_url, params=params).text
         data = self.get_job_metadata(page)
         yield Message.Version, 1
         yield Message.Directory, data.copy()
@@ -50,7 +53,7 @@ class BatotoChapterExtractor(AsynchronousExtractor):
             yield Message.Url, image_url, data.copy()
             if next_url:
                 params["p"] += 1
-                page = self.request(self.url, params=params).text
+                page = self.request(self.reader_url, params=params).text
 
     def get_job_metadata(self, page):
         """Collect metadata for extractor-job"""
@@ -64,15 +67,14 @@ class BatotoChapterExtractor(AsynchronousExtractor):
         _    , pos = extr(page, '</select>', '', pos)
         count, pos = extr(page, '>page ', '<', pos-35)
         manga, pos = extr(page, "document.title = '", " - ", pos)
-        match = re.match(r"(Vol.(\d+) )?Ch.(\d+)([^:]*)(: (.+))?", cinfo)
+        match = re.match(r"(Vol.(\d+) )?Ch\.([^:]+)(: (.+))?", cinfo)
         return {
             "category": self.category,
             "token": self.token,
             "manga": text.unescape(manga),
             "volume": match.group(2) or "",
             "chapter": match.group(3),
-            "chapter-extra": match.group(4),
-            "title": match.group(6) or "",
+            "title": match.group(5) or "",
             "group": group,
             "lang": iso639_1.language_to_code(lang),
             "language": lang,
@@ -88,3 +90,36 @@ class BatotoChapterExtractor(AsynchronousExtractor):
         iurl, pos = text.extract(page, '<img src="', '"', pos)
         return nurl if "_" in nurl else None, iurl
 
+    def login(self):
+        """Login and set necessary cookies"""
+        username = config.interpolate(("extractor", "batoto", "username"))
+        password = config.interpolate(("extractor", "batoto", "password"))
+        if username and password:
+            cookies = self._login_impl(username, password)
+            for key, value in cookies.items():
+                self.session.cookies.set(key, value, domain=".bato.to", path="/")
+
+    @cache(maxage=360*24*60*60, keyarg=1)
+    def _login_impl(self, username, password):
+        """Actual login implementation"""
+        page = self.request(self.url).text
+        auth = text.extract(page, "name='auth_key' value='", "'")[0]
+        params = {
+            "app": "core",
+            "module": "global",
+            "section": "login",
+            "do": "process",
+        }
+        data = {
+            "auth_key": auth,
+            "referer": self.url,
+            "ips_username": username,
+            "ips_password": password,
+            "rememberMe": "1",
+            "anonymous": "1",
+        }
+        response = self.request(self.url + "forums/index.php",
+                                 method="POST", params=params, data=data)
+        if "Sign In - " in response.text:
+            raise exception.AuthenticationError()
+        return {c: response.cookies[c] for c in ("member_id", "pass_hash")}