From 58e95a74874cf8a247cdf6ddd8a5c8ef68a52ba1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 30 Jun 2017 19:38:14 +0200 Subject: [PATCH 1/8] share extractor and downloader sessions There was never any "good" reason for the strict separation between extractors and downloaders. This change allows for reduced resource usage (probably unnoticeable) and less lines of code at the "cost" of tighter coupling. --- gallery_dl/downloader/http.py | 21 +++------------------ gallery_dl/extractor/booru.py | 2 +- gallery_dl/extractor/exhentai.py | 8 +------- gallery_dl/extractor/imgchili.py | 1 - gallery_dl/extractor/message.py | 4 +--- gallery_dl/extractor/pixiv.py | 2 -- gallery_dl/extractor/sankaku.py | 3 +-- gallery_dl/extractor/senmanga.py | 3 +-- gallery_dl/job.py | 31 +++++-------------------------- 9 files changed, 13 insertions(+), 62 deletions(-) diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index 33a6d55a..883265e8 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -6,10 +6,9 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Downloader module for http:// and https:// urls""" +"""Downloader module for http:// and https:// URLs""" import time -import requests import requests.exceptions as rexcepts import mimetypes import logging @@ -24,9 +23,9 @@ class Downloader(BasicDownloader): retries = config.interpolate(("downloader", "http", "retries",), 5) timeout = config.interpolate(("downloader", "http", "timeout",), None) - def __init__(self, output): + def __init__(self, session, output): BasicDownloader.__init__(self) - self.session = requests.session() + self.session = session self.out = output def download_impl(self, url, pathfmt): @@ -96,17 +95,3 @@ class Downloader(BasicDownloader): # output for unrecoverable errors self.out.error(pathfmt.path, msg, tries, 0) - - def set_headers(self, headers): - """Set headers for http requests""" - self.set_dict(self.session.headers, headers) - - def set_cookies(self, cookies): - """Set cookies for http requests""" - self.set_dict(self.session.cookies, cookies) - - @staticmethod - def set_dict(dest, src): - """Copy the contents of dictionary 'src' to 'dest'""" - dest.clear() - dest.update(src) diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py index 36fbfb15..f8013684 100644 --- a/gallery_dl/extractor/booru.py +++ b/gallery_dl/extractor/booru.py @@ -27,13 +27,13 @@ class BooruExtractor(Extractor): def __init__(self): Extractor.__init__(self) + self.session.headers.update(self.headers) self.params = {"limit": 50} self.setup() def items(self): yield Message.Version, 1 yield Message.Directory, self.get_job_metadata() - yield Message.Headers, self.headers for data in self.items_impl(): try: url = self.get_file_url(data) diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 9b0959bf..f5507989 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -50,9 +50,8 @@ class ExhentaiGalleryExtractor(Extractor): def items(self): self.login() + self.setup_headers() yield Message.Version, 1 - yield Message.Headers, self.setup_headers() - yield Message.Cookies, self.session.cookies url = "{}/g/{}/{}/".format(self.root, self.gid, self.token) response = self.session.get(url) @@ -76,14 +75,9 @@ class ExhentaiGalleryExtractor(Extractor): """Initialize headers""" self.session.headers.update({ "User-Agent": "Mozilla/5.0", - "Accept": "text/html,application/xhtml+xml," - "application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5", "Referer": self.root + "/", }) - headers = self.session.headers.copy() - headers["Accept"] = "image/png,image/*;q=0.8,*/*;q=0.5" - return headers def get_job_metadata(self, page): """Collect metadata for extractor-job""" diff --git a/gallery_dl/extractor/imgchili.py b/gallery_dl/extractor/imgchili.py index 18501ef1..2c61b431 100644 --- a/gallery_dl/extractor/imgchili.py +++ b/gallery_dl/extractor/imgchili.py @@ -27,7 +27,6 @@ class ImgchiliExtractor(Extractor): page = self.request(self.url, encoding="utf-8").text data = self.get_job_metadata(page) yield Message.Version, 1 - yield Message.Headers, self.session.headers yield Message.Directory, data for url, image in self.get_images(page): data.update(image) diff --git a/gallery_dl/extractor/message.py b/gallery_dl/extractor/message.py index 91482bf2..12689997 100644 --- a/gallery_dl/extractor/message.py +++ b/gallery_dl/extractor/message.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015 Mike Fährmann +# Copyright 2015-2017 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -12,6 +12,4 @@ class Message(): Version = 1 Directory = 2 Url = 3 - Headers = 4 - Cookies = 5 Queue = 6 diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index dbb3feef..5acd9873 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -31,8 +31,6 @@ class PixivExtractor(Extractor): metadata = self.get_metadata() yield Message.Version, 1 - yield Message.Headers, self.session.headers - yield Message.Cookies, self.session.cookies yield Message.Directory, metadata for work in self.works(): diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index e3fbc9a4..88b3ef7f 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014, 2015 Mike Fährmann +# Copyright 2014-2017 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -35,7 +35,6 @@ class SankakuTagExtractor(AsynchronousExtractor): def items(self): data = self.get_job_metadata() yield Message.Version, 1 - yield Message.Headers, self.session.headers yield Message.Directory, data for image in self.get_images(): image.update(data) diff --git a/gallery_dl/extractor/senmanga.py b/gallery_dl/extractor/senmanga.py index 384b4076..a82f735e 100644 --- a/gallery_dl/extractor/senmanga.py +++ b/gallery_dl/extractor/senmanga.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016 Mike Fährmann +# Copyright 2016-2017 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -38,7 +38,6 @@ class SenmangaChapterExtractor(Extractor): data = self.get_job_metadata() yield Message.Version, 1 yield Message.Directory, data - yield Message.Headers, self.session.headers for i in range(int(data["count"])): page = str(i+1) data["page"] = page diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 162c5f37..d6c92ce5 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -79,12 +79,6 @@ class Job(): if self.pred_queue: self.handle_queue(msg[1]) - elif msg[0] == Message.Headers: - self.handle_headers(msg[1]) - - elif msg[0] == Message.Cookies: - self.handle_cookies(msg[1]) - elif msg[0] == Message.Version: if msg[1] != 1: raise "unsupported message-version ({}, {})".format( @@ -101,12 +95,6 @@ class Job(): def handle_queue(self, url): """Handle Message.Queue""" - def handle_headers(self, headers): - """Handle Message.Headers""" - - def handle_cookies(self, cookies): - """Handle Message.Cookies""" - def update_kwdict(self, kwdict): """Add 'category' and 'subcategory' keywords""" kwdict["category"] = self.extractor.category @@ -145,12 +133,6 @@ class DownloadJob(Job): except exception.NoExtractorError: self._write_unsupported(url) - def handle_headers(self, headers): - self.get_downloader("http:").set_headers(headers) - - def handle_cookies(self, cookies): - self.get_downloader("http:").set_cookies(cookies) - def get_downloader(self, url): """Return, and possibly construct, a downloader suitable for 'url'""" pos = url.find(":") @@ -160,7 +142,7 @@ class DownloadJob(Job): instance = self.downloaders.get(scheme) if instance is None: klass = downloader.find(scheme) - instance = klass(self.out) + instance = klass(self.extractor.session, self.out) self.downloaders[scheme] = instance return instance @@ -300,13 +282,10 @@ class DataJob(Job): # collect data try: for msg in self.extractor: - if msg[0] in (Message.Headers, Message.Cookies): - copy = (msg[0], dict(msg[1])) - else: - copy = [ - part.copy() if hasattr(part, "copy") else part - for part in msg - ] + copy = [ + part.copy() if hasattr(part, "copy") else part + for part in msg + ] self.data.append(copy) except Exception as exc: self.data.append((exc.__class__.__name__, str(exc))) From a804a42e2306cd61c25fb3f0de9e13519b27c470 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 3 Jul 2017 15:02:19 +0200 Subject: [PATCH 2/8] add '--cookies' command-line option --- gallery_dl/extractor/common.py | 10 ++++++++++ gallery_dl/option.py | 5 +++++ 2 files changed, 15 insertions(+) diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index abd556ab..23b39c1c 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -15,6 +15,7 @@ import queue import logging import requests import threading +import http.cookiejar from .message import Message from .. import config @@ -30,6 +31,15 @@ class Extractor(): self.session = requests.Session() self.log = logging.getLogger(self.category) + cookies = self.config("cookies") + if cookies: + try: + cj = http.cookiejar.MozillaCookieJar() + cj.load(cookies) + self.session.cookies = cj + except OSError as exc: + self.log.warning("cookies: %s", exc) + def __iter__(self): return self.items() diff --git a/gallery_dl/option.py b/gallery_dl/option.py index b0677647..f874177d 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -125,6 +125,11 @@ def build_parser(): metavar="SECONDS", action=ConfigAction, dest="timeout", type=float, help="Timeout for HTTP connections (defaut: no timeout)", ) + parser.add_argument( + "--cookies", + metavar="FILE", action=ConfigAction, dest="cookies", + help="File to load additional cookies from", + ) parser.add_argument( "-c", "--config", metavar="CFG", dest="cfgfiles", action="append", From 726c6f01ae18bb71211925fd83908be904d8a7dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 7 Jul 2017 18:01:46 +0200 Subject: [PATCH 3/8] allow 'cookies' config option to be a dictionary --- gallery_dl/extractor/common.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 23b39c1c..d2355701 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -33,12 +33,15 @@ class Extractor(): cookies = self.config("cookies") if cookies: - try: - cj = http.cookiejar.MozillaCookieJar() - cj.load(cookies) - self.session.cookies = cj - except OSError as exc: - self.log.warning("cookies: %s", exc) + if isinstance(cookies, dict): + cj = cookies + else: + try: + cj = http.cookiejar.MozillaCookieJar() + cj.load(cookies) + except OSError as exc: + self.log.warning("cookies: %s", exc) + self.session.cookies.update(cj) def __iter__(self): return self.items() From 0610ae50006047eaeb49c7aecc6254d870fc3d2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 17 Jul 2017 10:33:36 +0200 Subject: [PATCH 4/8] skip login if cookies are present --- gallery_dl/extractor/batoto.py | 5 ++++- gallery_dl/extractor/common.py | 9 +++++++++ gallery_dl/extractor/exhentai.py | 14 ++++---------- gallery_dl/extractor/nijie.py | 2 ++ gallery_dl/extractor/seiga.py | 2 ++ gallery_dl/util.py | 2 +- 6 files changed, 22 insertions(+), 12 deletions(-) diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py index 00d9a6d4..88ad06a9 100644 --- a/gallery_dl/extractor/batoto.py +++ b/gallery_dl/extractor/batoto.py @@ -19,9 +19,12 @@ class BatotoExtractor(): category = "batoto" scheme = "https" root = "https://bato.to" + cookienames = ("member_id", "pass_hash") def login(self): """Login and set necessary cookies""" + if self._check_cookies(self.cookienames, ".bato.to"): + return username, password = self.auth_info() if username: cookies = self._login_impl(username, password) @@ -53,7 +56,7 @@ class BatotoExtractor(): method="POST", params=params, data=data) if "Sign In - " in response.text: raise exception.AuthenticationError() - return {c: response.cookies[c] for c in ("member_id", "pass_hash")} + return {c: response.cookies[c] for c in self.cookienames} class BatotoMangaExtractor(BatotoExtractor, MangaExtractor): diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index d2355701..4926dbf9 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -80,6 +80,15 @@ class Extractor(): response.encoding = encoding return response + def _check_cookies(self, cookienames, domain=None): + """Return True if all 'cookienames' exist in the current session""" + for name in cookienames: + try: + self.session.cookies._find(name, domain) + except KeyError: + return False + return True + class AsynchronousExtractor(Extractor): diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index f5507989..30df13e8 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -36,6 +36,7 @@ class ExhentaiGalleryExtractor(Extractor): }), ] root = "https://exhentai.org" + cookienames = ("ipb_member_id", "ipb_pass_hash") def __init__(self, match): Extractor.__init__(self) @@ -176,6 +177,8 @@ class ExhentaiGalleryExtractor(Extractor): def login(self): """Login and set necessary cookies""" + if self._check_cookies(self.cookienames, ".exhentai.org"): + return username, password = self.auth_info() if not username: self.log.info("no username given; using e-hentai.org") @@ -191,15 +194,6 @@ class ExhentaiGalleryExtractor(Extractor): def _login_impl(self, username, password): """Actual login implementation""" self.log.info("Logging in as %s", username) - cnames = ["ipb_member_id", "ipb_pass_hash"] - - try: - cookies = self.config("cookies") - if isinstance(cookies, dict) and all(c in cookies for c in cnames): - return cookies - except TypeError: - pass - url = "https://forums.e-hentai.org/index.php?act=Login&CODE=01" params = { "CookieDate": "1", @@ -215,4 +209,4 @@ class ExhentaiGalleryExtractor(Extractor): if "You are now logged in as:" not in response.text: raise exception.AuthenticationError() - return {c: response.cookies[c] for c in cnames} + return {c: response.cookies[c] for c in self.cookienames} diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index 8da28f72..7a59f0c0 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -62,6 +62,8 @@ class NijieExtractor(AsynchronousExtractor): def login(self): """Login and obtain session cookie""" + if self._check_cookies(("nemail", "nlogin"), "nijie.info"): + return username, password = self.auth_info() self.session.cookies = self._login_impl(username, password) diff --git a/gallery_dl/extractor/seiga.py b/gallery_dl/extractor/seiga.py index d2b2483d..adb65443 100644 --- a/gallery_dl/extractor/seiga.py +++ b/gallery_dl/extractor/seiga.py @@ -47,6 +47,8 @@ class SeigaExtractor(Extractor): def login(self): """Login and set necessary cookies""" + if self._check_cookies(("user_session",), ".nicovideo.jp"): + return username, password = self.auth_info() self.session.cookies = self._login_impl(username, password) diff --git a/gallery_dl/util.py b/gallery_dl/util.py index c51c2349..9cd223b9 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -242,7 +242,7 @@ class OAuthSession(): self.session = session self.consumer_secret = consumer_secret self.token_secret = token_secret or "" - self.params = session.params + self.params = {} self.params["oauth_consumer_key"] = consumer_key self.params["oauth_token"] = token self.params["oauth_signature_method"] = "HMAC-SHA1" From 00a23fe2083a89ffee11ac88f665021a2e165ec6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 21 Jul 2017 18:32:56 +0200 Subject: [PATCH 5/8] update configuration.rst --- docs/configuration.rst | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index 775b1d65..1432121e 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -224,7 +224,7 @@ Description The username to use when attempting to log in to another site. ``seiga`` modules and optional (but strongly recommended) for ``batoto`` and ``exhentai``. - This value can also be given via the ``-u/--username`` + This value can also be set via the ``-u/--username`` command-line option or by using a |.netrc|_ file. (see Authentication_) =========== ===== @@ -239,6 +239,20 @@ Description The password belonging to the username. =========== ===== +extractor.*.cookies +------------------- +=========== ===== +Type ``string`` or ``object`` +Default ``null`` +Description Source to read additional cookies from. + + * If this is a ``string``, it specifies the path of a + Mozilla/Netscape format cookies.txt file. + * If this is an ``object``, its key-value pairs, which should both + be ``strings``, will be used as cookie-names and -values. +=========== ===== + + Extractor-specific Options ========================== From 808f67ba7dbd2e4bfd8315fe4dc8990ba3551301 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 22 Jul 2017 15:43:35 +0200 Subject: [PATCH 6/8] use 'cookiedomain' for cookies set by object-config-values otherwise these cookies would not be picked up by the _check_cookies() method. --- gallery_dl/extractor/batoto.py | 5 +++-- gallery_dl/extractor/common.py | 9 +++++++-- gallery_dl/extractor/exhentai.py | 5 +++-- gallery_dl/extractor/nijie.py | 3 ++- gallery_dl/extractor/seiga.py | 3 ++- 5 files changed, 17 insertions(+), 8 deletions(-) diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py index 88ad06a9..ea5f1824 100644 --- a/gallery_dl/extractor/batoto.py +++ b/gallery_dl/extractor/batoto.py @@ -20,17 +20,18 @@ class BatotoExtractor(): scheme = "https" root = "https://bato.to" cookienames = ("member_id", "pass_hash") + cookiedomain = ".bato.to" def login(self): """Login and set necessary cookies""" - if self._check_cookies(self.cookienames, ".bato.to"): + if self._check_cookies(self.cookienames): return username, password = self.auth_info() if username: cookies = self._login_impl(username, password) for key, value in cookies.items(): self.session.cookies.set( - key, value, domain=".bato.to", path="/") + key, value, domain=self.cookiedomain) @cache(maxage=7*24*60*60, keyarg=1) def _login_impl(self, username, password): diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 4926dbf9..0fa40ef0 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -26,6 +26,7 @@ class Extractor(): subcategory = "" directory_fmt = ["{category}"] filename_fmt = "{filename}" + cookiedomain = "" def __init__(self): self.session = requests.Session() @@ -34,14 +35,16 @@ class Extractor(): cookies = self.config("cookies") if cookies: if isinstance(cookies, dict): - cj = cookies + setcookie = self.session.cookies.set + for name, value in cookies.items(): + setcookie(name, value, domain=self.cookiedomain) else: try: cj = http.cookiejar.MozillaCookieJar() cj.load(cookies) + self.session.cookies.update(cj) except OSError as exc: self.log.warning("cookies: %s", exc) - self.session.cookies.update(cj) def __iter__(self): return self.items() @@ -82,6 +85,8 @@ class Extractor(): def _check_cookies(self, cookienames, domain=None): """Return True if all 'cookienames' exist in the current session""" + if not domain and self.cookiedomain: + domain = self.cookiedomain for name in cookienames: try: self.session.cookies._find(name, domain) diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 30df13e8..2acfa6c5 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -37,6 +37,7 @@ class ExhentaiGalleryExtractor(Extractor): ] root = "https://exhentai.org" cookienames = ("ipb_member_id", "ipb_pass_hash") + cookiedomain = ".exhentai.org" def __init__(self, match): Extractor.__init__(self) @@ -177,7 +178,7 @@ class ExhentaiGalleryExtractor(Extractor): def login(self): """Login and set necessary cookies""" - if self._check_cookies(self.cookienames, ".exhentai.org"): + if self._check_cookies(self.cookienames): return username, password = self.auth_info() if not username: @@ -188,7 +189,7 @@ class ExhentaiGalleryExtractor(Extractor): cookies = self._login_impl(username, password) for key, value in cookies.items(): self.session.cookies.set( - key, value, domain=".exhentai.org", path="/") + key, value, domain=self.cookiedomain) @cache(maxage=90*24*60*60, keyarg=1) def _login_impl(self, username, password): diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index 7a59f0c0..b335e7fe 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -18,6 +18,7 @@ class NijieExtractor(AsynchronousExtractor): category = "nijie" directory_fmt = ["{category}", "{artist-id}"] filename_fmt = "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}" + cookiedomain = "nijie.info" popup_url = "https://nijie.info/view_popup.php?id=" def __init__(self): @@ -62,7 +63,7 @@ class NijieExtractor(AsynchronousExtractor): def login(self): """Login and obtain session cookie""" - if self._check_cookies(("nemail", "nlogin"), "nijie.info"): + if self._check_cookies(("nemail", "nlogin")): return username, password = self.auth_info() self.session.cookies = self._login_impl(username, password) diff --git a/gallery_dl/extractor/seiga.py b/gallery_dl/extractor/seiga.py index adb65443..b005f16c 100644 --- a/gallery_dl/extractor/seiga.py +++ b/gallery_dl/extractor/seiga.py @@ -17,6 +17,7 @@ from xml.etree import ElementTree class SeigaExtractor(Extractor): """Base class for seiga extractors""" category = "seiga" + cookiedomain = ".nicovideo.jp" def items(self): self.login() @@ -47,7 +48,7 @@ class SeigaExtractor(Extractor): def login(self): """Login and set necessary cookies""" - if self._check_cookies(("user_session",), ".nicovideo.jp"): + if self._check_cookies(("user_session",)): return username, password = self.auth_info() self.session.cookies = self._login_impl(username, password) From 55f048d02b2d3cdaaef1e54a60f45e8737c7fe84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 24 Jul 2017 18:33:42 +0200 Subject: [PATCH 7/8] ignore case of cookiejar magic strings --- gallery_dl/extractor/common.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 0fa40ef0..b43a2edd 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -9,6 +9,7 @@ """Common classes and constants used by extractor modules.""" import os +import re import time import netrc import queue @@ -186,6 +187,13 @@ def safe_request(session, url, method="GET", *args, **kwargs): return r +# Reduce strictness of the expected magic string in cookie jar files. +# (This allows the use of Wget-generated cookiejar files without modification) + +http.cookiejar.MozillaCookieJar.magic_re = re.compile( + "#( Netscape)? HTTP Cookie File", re.IGNORECASE) + + # The first import of requests happens inside this file. # If we are running on Windows and the from requests expected certificate file # is missing (which happens in a standalone executable from py2exe), the From 6db93a8b9e18406e23c116b225454514f29eb8e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 25 Jul 2017 14:02:53 +0200 Subject: [PATCH 8/8] add cookie tests --- test/test_cookies.py | 123 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 test/test_cookies.py diff --git a/test/test_cookies.py b/test/test_cookies.py new file mode 100644 index 00000000..c85c09c8 --- /dev/null +++ b/test/test_cookies.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2017 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +import unittest +from unittest import mock + +import logging +import tempfile +import http.cookiejar +from os.path import join + +import gallery_dl.config as config +import gallery_dl.extractor as extractor +from gallery_dl.extractor.message import Message + +CKEY = ("cookies",) + + +class TestCookiejar(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.path = tempfile.TemporaryDirectory() + + cls.cookiefile = join(cls.path.name, "cookies.txt") + with open(cls.cookiefile, "w") as file: + file.write("""# HTTP Cookie File +.example.org\tTRUE\t/\tFALSE\t253402210800\tNAME\tVALUE +""") + + cls.invalid_cookiefile = join(cls.path.name, "invalid.txt") + with open(cls.invalid_cookiefile, "w") as file: + file.write("""# asd +.example.org\tTRUE\t/\tFALSE\t253402210800\tNAME\tVALUE +""") + + @classmethod + def tearDownClass(cls): + cls.path.cleanup() + + def test_cookiefile(self): + config.set(CKEY, self.cookiefile) + + cookies = extractor.find("test:").session.cookies + self.assertEqual(len(cookies), 1) + + cookie = next(iter(cookies)) + self.assertEqual(cookie.domain, ".example.org") + self.assertEqual(cookie.path, "/") + self.assertEqual(cookie.name, "NAME") + self.assertEqual(cookie.value, "VALUE") + + def test_invalid_cookiefile(self): + self._test_warning(self.invalid_cookiefile, http.cookiejar.LoadError) + + def test_invalid_filename(self): + self._test_warning(join(self.path.name, "nothing"), FileNotFoundError) + + def _test_warning(self, filename, exc): + config.set(CKEY, filename) + log = logging.getLogger("test") + with mock.patch.object(log, "warning") as mock_warning: + cookies = extractor.find("test:").session.cookies + self.assertEqual(len(cookies), 0) + mock_warning.assert_called_once() + self.assertEqual(mock_warning.call_args[0][0], "cookies: %s") + self.assertIsInstance(mock_warning.call_args[0][1], exc) + + +class TestCookiedict(unittest.TestCase): + + def setUp(self): + self.cdict = {"NAME1": "VALUE1", "NAME2": "VALUE2"} + config.set(CKEY, self.cdict) + + def test_dict(self): + cookies = extractor.find("test:").session.cookies + self.assertEqual(len(cookies), len(self.cdict)) + self.assertEqual(sorted(cookies.keys()), sorted(self.cdict.keys())) + self.assertEqual(sorted(cookies.values()), sorted(self.cdict.values())) + + def test_domain(self): + for category in ["batoto", "exhentai", "nijie", "seiga"]: + extr = _get_extractor(category) + cookies = extr.session.cookies + for key in self.cdict.keys(): + self.assertTrue(key in cookies) + for c in cookies: + self.assertEqual(c.domain, extr.cookiedomain) + + +class TestCookieLogin(unittest.TestCase): + + def test_cookie_login(self): + extr_cookies = { + "batoto": ("member_id", "pass_hash"), + "exhentai": ("ipb_member_id", "ipb_pass_hash"), + "nijie": ("nemail", "nlogin"), + "seiga": ("user_session",), + } + for category, cookienames in extr_cookies.items(): + cookies = {name: "value" for name in cookienames} + config.set(CKEY, cookies) + extr = _get_extractor(category) + with mock.patch.object(extr, "_login_impl") as mock_login: + extr.login() + mock_login.assert_not_called() + + +def _get_extractor(category): + for msg in extractor.find("test:" + category): + if msg[0] == Message.Queue: + return extractor.find(msg[1]) + + +if __name__ == "__main__": + unittest.main()