diff --git a/docs/configuration.rst b/docs/configuration.rst index a7d5790a..3a86e2d7 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -224,7 +224,7 @@ Description The username to use when attempting to log in to another site. ``seiga`` modules and optional (but strongly recommended) for ``batoto`` and ``exhentai``. - This value can also be given via the ``-u/--username`` + This value can also be set via the ``-u/--username`` command-line option or by using a |.netrc|_ file. (see Authentication_) =========== ===== @@ -239,6 +239,20 @@ Description The password belonging to the username. =========== ===== +extractor.*.cookies +------------------- +=========== ===== +Type ``string`` or ``object`` +Default ``null`` +Description Source to read additional cookies from. + + * If this is a ``string``, it specifies the path of a + Mozilla/Netscape format cookies.txt file. + * If this is an ``object``, its key-value pairs, which should both + be ``strings``, will be used as cookie-names and -values. +=========== ===== + + Extractor-specific Options ========================== diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index 33a6d55a..883265e8 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -6,10 +6,9 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Downloader module for http:// and https:// urls""" +"""Downloader module for http:// and https:// URLs""" import time -import requests import requests.exceptions as rexcepts import mimetypes import logging @@ -24,9 +23,9 @@ class Downloader(BasicDownloader): retries = config.interpolate(("downloader", "http", "retries",), 5) timeout = config.interpolate(("downloader", "http", "timeout",), None) - def __init__(self, output): + def __init__(self, session, output): BasicDownloader.__init__(self) - self.session = requests.session() + self.session = session self.out = output def download_impl(self, url, pathfmt): @@ -96,17 +95,3 @@ class Downloader(BasicDownloader): # output for unrecoverable errors self.out.error(pathfmt.path, msg, tries, 0) - - def set_headers(self, headers): - """Set headers for http requests""" - self.set_dict(self.session.headers, headers) - - def set_cookies(self, cookies): - """Set cookies for http requests""" - self.set_dict(self.session.cookies, cookies) - - @staticmethod - def set_dict(dest, src): - """Copy the contents of dictionary 'src' to 'dest'""" - dest.clear() - dest.update(src) diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py index 08083f70..e91a5aeb 100644 --- a/gallery_dl/extractor/batoto.py +++ b/gallery_dl/extractor/batoto.py @@ -19,15 +19,19 @@ class BatotoExtractor(): category = "batoto" scheme = "https" root = "https://bato.to" + cookienames = ("member_id", "pass_hash") + cookiedomain = ".bato.to" def login(self): """Login and set necessary cookies""" + if self._check_cookies(self.cookienames): + return username, password = self.auth_info() if username: cookies = self._login_impl(username, password) for key, value in cookies.items(): self.session.cookies.set( - key, value, domain=".bato.to", path="/") + key, value, domain=self.cookiedomain) @cache(maxage=7*24*60*60, keyarg=1) def _login_impl(self, username, password): @@ -53,7 +57,7 @@ class BatotoExtractor(): method="POST", params=params, data=data) if "Sign In - " in response.text: raise exception.AuthenticationError() - return {c: response.cookies[c] for c in ("member_id", "pass_hash")} + return {c: response.cookies[c] for c in self.cookienames} class BatotoMangaExtractor(BatotoExtractor, MangaExtractor): diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py index 36fbfb15..f8013684 100644 --- a/gallery_dl/extractor/booru.py +++ b/gallery_dl/extractor/booru.py @@ -27,13 +27,13 @@ class BooruExtractor(Extractor): def __init__(self): Extractor.__init__(self) + self.session.headers.update(self.headers) self.params = {"limit": 50} self.setup() def items(self): yield Message.Version, 1 yield Message.Directory, self.get_job_metadata() - yield Message.Headers, self.headers for data in self.items_impl(): try: url = self.get_file_url(data) diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index abd556ab..b43a2edd 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -9,12 +9,14 @@ """Common classes and constants used by extractor modules.""" import os +import re import time import netrc import queue import logging import requests import threading +import http.cookiejar from .message import Message from .. import config @@ -25,11 +27,26 @@ class Extractor(): subcategory = "" directory_fmt = ["{category}"] filename_fmt = "{filename}" + cookiedomain = "" def __init__(self): self.session = requests.Session() self.log = logging.getLogger(self.category) + cookies = self.config("cookies") + if cookies: + if isinstance(cookies, dict): + setcookie = self.session.cookies.set + for name, value in cookies.items(): + setcookie(name, value, domain=self.cookiedomain) + else: + try: + cj = http.cookiejar.MozillaCookieJar() + cj.load(cookies) + self.session.cookies.update(cj) + except OSError as exc: + self.log.warning("cookies: %s", exc) + def __iter__(self): return self.items() @@ -67,6 +84,17 @@ class Extractor(): response.encoding = encoding return response + def _check_cookies(self, cookienames, domain=None): + """Return True if all 'cookienames' exist in the current session""" + if not domain and self.cookiedomain: + domain = self.cookiedomain + for name in cookienames: + try: + self.session.cookies._find(name, domain) + except KeyError: + return False + return True + class AsynchronousExtractor(Extractor): @@ -159,6 +187,13 @@ def safe_request(session, url, method="GET", *args, **kwargs): return r +# Reduce strictness of the expected magic string in cookie jar files. +# (This allows the use of Wget-generated cookiejar files without modification) + +http.cookiejar.MozillaCookieJar.magic_re = re.compile( + "#( Netscape)? HTTP Cookie File", re.IGNORECASE) + + # The first import of requests happens inside this file. # If we are running on Windows and the from requests expected certificate file # is missing (which happens in a standalone executable from py2exe), the diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 9b0959bf..2acfa6c5 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -36,6 +36,8 @@ class ExhentaiGalleryExtractor(Extractor): }), ] root = "https://exhentai.org" + cookienames = ("ipb_member_id", "ipb_pass_hash") + cookiedomain = ".exhentai.org" def __init__(self, match): Extractor.__init__(self) @@ -50,9 +52,8 @@ class ExhentaiGalleryExtractor(Extractor): def items(self): self.login() + self.setup_headers() yield Message.Version, 1 - yield Message.Headers, self.setup_headers() - yield Message.Cookies, self.session.cookies url = "{}/g/{}/{}/".format(self.root, self.gid, self.token) response = self.session.get(url) @@ -76,14 +77,9 @@ class ExhentaiGalleryExtractor(Extractor): """Initialize headers""" self.session.headers.update({ "User-Agent": "Mozilla/5.0", - "Accept": "text/html,application/xhtml+xml," - "application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5", "Referer": self.root + "/", }) - headers = self.session.headers.copy() - headers["Accept"] = "image/png,image/*;q=0.8,*/*;q=0.5" - return headers def get_job_metadata(self, page): """Collect metadata for extractor-job""" @@ -182,6 +178,8 @@ class ExhentaiGalleryExtractor(Extractor): def login(self): """Login and set necessary cookies""" + if self._check_cookies(self.cookienames): + return username, password = self.auth_info() if not username: self.log.info("no username given; using e-hentai.org") @@ -191,21 +189,12 @@ class ExhentaiGalleryExtractor(Extractor): cookies = self._login_impl(username, password) for key, value in cookies.items(): self.session.cookies.set( - key, value, domain=".exhentai.org", path="/") + key, value, domain=self.cookiedomain) @cache(maxage=90*24*60*60, keyarg=1) def _login_impl(self, username, password): """Actual login implementation""" self.log.info("Logging in as %s", username) - cnames = ["ipb_member_id", "ipb_pass_hash"] - - try: - cookies = self.config("cookies") - if isinstance(cookies, dict) and all(c in cookies for c in cnames): - return cookies - except TypeError: - pass - url = "https://forums.e-hentai.org/index.php?act=Login&CODE=01" params = { "CookieDate": "1", @@ -221,4 +210,4 @@ class ExhentaiGalleryExtractor(Extractor): if "You are now logged in as:" not in response.text: raise exception.AuthenticationError() - return {c: response.cookies[c] for c in cnames} + return {c: response.cookies[c] for c in self.cookienames} diff --git a/gallery_dl/extractor/imgchili.py b/gallery_dl/extractor/imgchili.py index 18501ef1..2c61b431 100644 --- a/gallery_dl/extractor/imgchili.py +++ b/gallery_dl/extractor/imgchili.py @@ -27,7 +27,6 @@ class ImgchiliExtractor(Extractor): page = self.request(self.url, encoding="utf-8").text data = self.get_job_metadata(page) yield Message.Version, 1 - yield Message.Headers, self.session.headers yield Message.Directory, data for url, image in self.get_images(page): data.update(image) diff --git a/gallery_dl/extractor/message.py b/gallery_dl/extractor/message.py index 91482bf2..12689997 100644 --- a/gallery_dl/extractor/message.py +++ b/gallery_dl/extractor/message.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015 Mike Fährmann +# Copyright 2015-2017 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -12,6 +12,4 @@ class Message(): Version = 1 Directory = 2 Url = 3 - Headers = 4 - Cookies = 5 Queue = 6 diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index 8da28f72..b335e7fe 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -18,6 +18,7 @@ class NijieExtractor(AsynchronousExtractor): category = "nijie" directory_fmt = ["{category}", "{artist-id}"] filename_fmt = "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}" + cookiedomain = "nijie.info" popup_url = "https://nijie.info/view_popup.php?id=" def __init__(self): @@ -62,6 +63,8 @@ class NijieExtractor(AsynchronousExtractor): def login(self): """Login and obtain session cookie""" + if self._check_cookies(("nemail", "nlogin")): + return username, password = self.auth_info() self.session.cookies = self._login_impl(username, password) diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 3b7a8d91..5305f281 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -32,8 +32,6 @@ class PixivExtractor(Extractor): metadata = self.get_metadata() yield Message.Version, 1 - yield Message.Headers, self.session.headers - yield Message.Cookies, self.session.cookies yield Message.Directory, metadata for work in self.works(): diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index e3fbc9a4..88b3ef7f 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014, 2015 Mike Fährmann +# Copyright 2014-2017 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -35,7 +35,6 @@ class SankakuTagExtractor(AsynchronousExtractor): def items(self): data = self.get_job_metadata() yield Message.Version, 1 - yield Message.Headers, self.session.headers yield Message.Directory, data for image in self.get_images(): image.update(data) diff --git a/gallery_dl/extractor/seiga.py b/gallery_dl/extractor/seiga.py index d2b2483d..b005f16c 100644 --- a/gallery_dl/extractor/seiga.py +++ b/gallery_dl/extractor/seiga.py @@ -17,6 +17,7 @@ from xml.etree import ElementTree class SeigaExtractor(Extractor): """Base class for seiga extractors""" category = "seiga" + cookiedomain = ".nicovideo.jp" def items(self): self.login() @@ -47,6 +48,8 @@ class SeigaExtractor(Extractor): def login(self): """Login and set necessary cookies""" + if self._check_cookies(("user_session",)): + return username, password = self.auth_info() self.session.cookies = self._login_impl(username, password) diff --git a/gallery_dl/extractor/senmanga.py b/gallery_dl/extractor/senmanga.py index 384b4076..a82f735e 100644 --- a/gallery_dl/extractor/senmanga.py +++ b/gallery_dl/extractor/senmanga.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016 Mike Fährmann +# Copyright 2016-2017 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -38,7 +38,6 @@ class SenmangaChapterExtractor(Extractor): data = self.get_job_metadata() yield Message.Version, 1 yield Message.Directory, data - yield Message.Headers, self.session.headers for i in range(int(data["count"])): page = str(i+1) data["page"] = page diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 162c5f37..d6c92ce5 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -79,12 +79,6 @@ class Job(): if self.pred_queue: self.handle_queue(msg[1]) - elif msg[0] == Message.Headers: - self.handle_headers(msg[1]) - - elif msg[0] == Message.Cookies: - self.handle_cookies(msg[1]) - elif msg[0] == Message.Version: if msg[1] != 1: raise "unsupported message-version ({}, {})".format( @@ -101,12 +95,6 @@ class Job(): def handle_queue(self, url): """Handle Message.Queue""" - def handle_headers(self, headers): - """Handle Message.Headers""" - - def handle_cookies(self, cookies): - """Handle Message.Cookies""" - def update_kwdict(self, kwdict): """Add 'category' and 'subcategory' keywords""" kwdict["category"] = self.extractor.category @@ -145,12 +133,6 @@ class DownloadJob(Job): except exception.NoExtractorError: self._write_unsupported(url) - def handle_headers(self, headers): - self.get_downloader("http:").set_headers(headers) - - def handle_cookies(self, cookies): - self.get_downloader("http:").set_cookies(cookies) - def get_downloader(self, url): """Return, and possibly construct, a downloader suitable for 'url'""" pos = url.find(":") @@ -160,7 +142,7 @@ class DownloadJob(Job): instance = self.downloaders.get(scheme) if instance is None: klass = downloader.find(scheme) - instance = klass(self.out) + instance = klass(self.extractor.session, self.out) self.downloaders[scheme] = instance return instance @@ -300,13 +282,10 @@ class DataJob(Job): # collect data try: for msg in self.extractor: - if msg[0] in (Message.Headers, Message.Cookies): - copy = (msg[0], dict(msg[1])) - else: - copy = [ - part.copy() if hasattr(part, "copy") else part - for part in msg - ] + copy = [ + part.copy() if hasattr(part, "copy") else part + for part in msg + ] self.data.append(copy) except Exception as exc: self.data.append((exc.__class__.__name__, str(exc))) diff --git a/gallery_dl/option.py b/gallery_dl/option.py index b0677647..f874177d 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -125,6 +125,11 @@ def build_parser(): metavar="SECONDS", action=ConfigAction, dest="timeout", type=float, help="Timeout for HTTP connections (defaut: no timeout)", ) + parser.add_argument( + "--cookies", + metavar="FILE", action=ConfigAction, dest="cookies", + help="File to load additional cookies from", + ) parser.add_argument( "-c", "--config", metavar="CFG", dest="cfgfiles", action="append", diff --git a/gallery_dl/util.py b/gallery_dl/util.py index c51c2349..9cd223b9 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -242,7 +242,7 @@ class OAuthSession(): self.session = session self.consumer_secret = consumer_secret self.token_secret = token_secret or "" - self.params = session.params + self.params = {} self.params["oauth_consumer_key"] = consumer_key self.params["oauth_token"] = token self.params["oauth_signature_method"] = "HMAC-SHA1" diff --git a/test/test_cookies.py b/test/test_cookies.py new file mode 100644 index 00000000..c85c09c8 --- /dev/null +++ b/test/test_cookies.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2017 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +import unittest +from unittest import mock + +import logging +import tempfile +import http.cookiejar +from os.path import join + +import gallery_dl.config as config +import gallery_dl.extractor as extractor +from gallery_dl.extractor.message import Message + +CKEY = ("cookies",) + + +class TestCookiejar(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.path = tempfile.TemporaryDirectory() + + cls.cookiefile = join(cls.path.name, "cookies.txt") + with open(cls.cookiefile, "w") as file: + file.write("""# HTTP Cookie File +.example.org\tTRUE\t/\tFALSE\t253402210800\tNAME\tVALUE +""") + + cls.invalid_cookiefile = join(cls.path.name, "invalid.txt") + with open(cls.invalid_cookiefile, "w") as file: + file.write("""# asd +.example.org\tTRUE\t/\tFALSE\t253402210800\tNAME\tVALUE +""") + + @classmethod + def tearDownClass(cls): + cls.path.cleanup() + + def test_cookiefile(self): + config.set(CKEY, self.cookiefile) + + cookies = extractor.find("test:").session.cookies + self.assertEqual(len(cookies), 1) + + cookie = next(iter(cookies)) + self.assertEqual(cookie.domain, ".example.org") + self.assertEqual(cookie.path, "/") + self.assertEqual(cookie.name, "NAME") + self.assertEqual(cookie.value, "VALUE") + + def test_invalid_cookiefile(self): + self._test_warning(self.invalid_cookiefile, http.cookiejar.LoadError) + + def test_invalid_filename(self): + self._test_warning(join(self.path.name, "nothing"), FileNotFoundError) + + def _test_warning(self, filename, exc): + config.set(CKEY, filename) + log = logging.getLogger("test") + with mock.patch.object(log, "warning") as mock_warning: + cookies = extractor.find("test:").session.cookies + self.assertEqual(len(cookies), 0) + mock_warning.assert_called_once() + self.assertEqual(mock_warning.call_args[0][0], "cookies: %s") + self.assertIsInstance(mock_warning.call_args[0][1], exc) + + +class TestCookiedict(unittest.TestCase): + + def setUp(self): + self.cdict = {"NAME1": "VALUE1", "NAME2": "VALUE2"} + config.set(CKEY, self.cdict) + + def test_dict(self): + cookies = extractor.find("test:").session.cookies + self.assertEqual(len(cookies), len(self.cdict)) + self.assertEqual(sorted(cookies.keys()), sorted(self.cdict.keys())) + self.assertEqual(sorted(cookies.values()), sorted(self.cdict.values())) + + def test_domain(self): + for category in ["batoto", "exhentai", "nijie", "seiga"]: + extr = _get_extractor(category) + cookies = extr.session.cookies + for key in self.cdict.keys(): + self.assertTrue(key in cookies) + for c in cookies: + self.assertEqual(c.domain, extr.cookiedomain) + + +class TestCookieLogin(unittest.TestCase): + + def test_cookie_login(self): + extr_cookies = { + "batoto": ("member_id", "pass_hash"), + "exhentai": ("ipb_member_id", "ipb_pass_hash"), + "nijie": ("nemail", "nlogin"), + "seiga": ("user_session",), + } + for category, cookienames in extr_cookies.items(): + cookies = {name: "value" for name in cookienames} + config.set(CKEY, cookies) + extr = _get_extractor(category) + with mock.patch.object(extr, "_login_impl") as mock_login: + extr.login() + mock_login.assert_not_called() + + +def _get_extractor(category): + for msg in extractor.find("test:" + category): + if msg[0] == Message.Queue: + return extractor.find(msg[1]) + + +if __name__ == "__main__": + unittest.main()