From c1a6862863bc1f1a3feab265e77bfe149566baa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 21 Jan 2020 21:59:36 +0100 Subject: [PATCH] implement functions to load/save cookies.txt files (closes #586) The methods of the standard libraries' MozillaCookieJar have several shortcomings (#HttpOnly_ cookies, 0 expiration timestamps, etc.) and require construction of an ultimately pointless CookieJar object. --- CHANGELOG.md | 2 ++ gallery_dl/extractor/common.py | 19 +++------- gallery_dl/util.py | 63 ++++++++++++++++++++++++++++++++++ gallery_dl/version.py | 2 +- 4 files changed, 70 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a367f853..7003ed57 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ # Changelog +## Unreleased + ## 1.12.3 - 2020-01-19 ### Additions - [hentaifoundry] extract more metadata ([#565](https://github.com/mikf/gallery-dl/issues/565)) diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 55b15d44..20132b1f 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -16,7 +16,6 @@ import logging import datetime import requests import threading -import http.cookiejar from .message import Message from .. import config, text, util, exception, cloudflare @@ -197,13 +196,12 @@ class Extractor(): self._update_cookies_dict(cookies, self.cookiedomain) elif isinstance(cookies, str): cookiefile = util.expand_path(cookies) - cookiejar = http.cookiejar.MozillaCookieJar() try: - cookiejar.load(cookiefile) - except OSError as exc: + cookies = util.load_cookiestxt(cookiefile) + except Exception as exc: self.log.warning("cookies: %s", exc) else: - self._cookiejar.update(cookiejar) + self._update_cookies(cookies) self._cookiefile = cookiefile else: self.log.warning( @@ -218,11 +216,8 @@ class Extractor(): def _store_cookies(self): """Store the session's cookiejar in a cookies.txt file""" if self._cookiefile and self.config("cookies-update", True): - cookiejar = http.cookiejar.MozillaCookieJar() - for cookie in self._cookiejar: - cookiejar.set_cookie(cookie) try: - cookiejar.save(self._cookiefile) + util.save_cookiestxt(self._cookiefile, self._cookiejar) except OSError as exc: self.log.warning("cookies: %s", exc) @@ -491,12 +486,6 @@ def generate_extractors(extractor_data, symtable, classes): symtable[Extr.__name__] = prev = Extr -# Reduce strictness of the expected magic string in cookiejar files. -# (This allows the use of Wget-generated cookiejars without modification) -http.cookiejar.MozillaCookieJar.magic_re = re.compile( - "#( Netscape)? HTTP Cookie File", re.IGNORECASE) - - # Undo automatic pyOpenSSL injection by requests pyopenssl = config.get((), "pyopenssl", False) if not pyopenssl: diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 13bf80ed..6b247214 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -21,6 +21,7 @@ import datetime import operator import itertools import urllib.parse +from http.cookiejar import Cookie from email.utils import mktime_tz, parsedate_tz from . import text, exception @@ -135,6 +136,68 @@ def remove_directory(path): pass +def load_cookiestxt(path): + """Parse a Netscape cookies.txt file and return a list of its Cookies""" + cookies = [] + + with open(path) as fp: + for line in fp: + + # strip '#HttpOnly_' and trailing '\n' + if line.startswith("#HttpOnly_"): + line = line[10:] + if line[-1] == "\n": + line = line[:-1] + + # ignore empty lines and comments + if not line or line[0] in ("#", "$"): + continue + + domain, domain_specified, path, secure, expires, name, value = \ + line.split("\t") + if not name: + name = value + value = None + + cookies.append(Cookie( + 0, name, value, + None, False, + domain, + domain_specified == "TRUE", + domain.startswith("."), + path, False, + secure == "TRUE", + expires or None, + False, None, None, {}, + )) + + return cookies + + +def save_cookiestxt(path, cookies): + """Store 'cookies' in Netscape cookies.txt format""" + with open(path, "w") as fp: + fp.write("# Netscape HTTP Cookie File\n\n") + + for cookie in cookies: + if cookie.value is None: + name = "" + value = cookie.name + else: + name = cookie.name + value = cookie.value + + fp.write("\t".join(( + cookie.domain, + "TRUE" if cookie.domain.startswith(".") else "FALSE", + cookie.path, + "TRUE" if cookie.secure else "FALSE", + "0" if cookie.expires is None else str(cookie.expires), + name, + value, + )) + "\n") + + def code_to_language(code, default=None): """Map an ISO 639-1 language code to its actual name""" return CODES.get((code or "").lower(), default) diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 36d729e5..d44d3ed4 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.12.3" +__version__ = "1.13.0-dev"