implement functions to load/save cookies.txt files (closes #586)
The methods of the standard libraries' MozillaCookieJar have several shortcomings (#HttpOnly_ cookies, 0 expiration timestamps, etc.) and require construction of an ultimately pointless CookieJar object.
This commit is contained in:
@@ -1,5 +1,7 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## Unreleased
|
||||||
|
|
||||||
## 1.12.3 - 2020-01-19
|
## 1.12.3 - 2020-01-19
|
||||||
### Additions
|
### Additions
|
||||||
- [hentaifoundry] extract more metadata ([#565](https://github.com/mikf/gallery-dl/issues/565))
|
- [hentaifoundry] extract more metadata ([#565](https://github.com/mikf/gallery-dl/issues/565))
|
||||||
|
|||||||
@@ -16,7 +16,6 @@ import logging
|
|||||||
import datetime
|
import datetime
|
||||||
import requests
|
import requests
|
||||||
import threading
|
import threading
|
||||||
import http.cookiejar
|
|
||||||
from .message import Message
|
from .message import Message
|
||||||
from .. import config, text, util, exception, cloudflare
|
from .. import config, text, util, exception, cloudflare
|
||||||
|
|
||||||
@@ -197,13 +196,12 @@ class Extractor():
|
|||||||
self._update_cookies_dict(cookies, self.cookiedomain)
|
self._update_cookies_dict(cookies, self.cookiedomain)
|
||||||
elif isinstance(cookies, str):
|
elif isinstance(cookies, str):
|
||||||
cookiefile = util.expand_path(cookies)
|
cookiefile = util.expand_path(cookies)
|
||||||
cookiejar = http.cookiejar.MozillaCookieJar()
|
|
||||||
try:
|
try:
|
||||||
cookiejar.load(cookiefile)
|
cookies = util.load_cookiestxt(cookiefile)
|
||||||
except OSError as exc:
|
except Exception as exc:
|
||||||
self.log.warning("cookies: %s", exc)
|
self.log.warning("cookies: %s", exc)
|
||||||
else:
|
else:
|
||||||
self._cookiejar.update(cookiejar)
|
self._update_cookies(cookies)
|
||||||
self._cookiefile = cookiefile
|
self._cookiefile = cookiefile
|
||||||
else:
|
else:
|
||||||
self.log.warning(
|
self.log.warning(
|
||||||
@@ -218,11 +216,8 @@ class Extractor():
|
|||||||
def _store_cookies(self):
|
def _store_cookies(self):
|
||||||
"""Store the session's cookiejar in a cookies.txt file"""
|
"""Store the session's cookiejar in a cookies.txt file"""
|
||||||
if self._cookiefile and self.config("cookies-update", True):
|
if self._cookiefile and self.config("cookies-update", True):
|
||||||
cookiejar = http.cookiejar.MozillaCookieJar()
|
|
||||||
for cookie in self._cookiejar:
|
|
||||||
cookiejar.set_cookie(cookie)
|
|
||||||
try:
|
try:
|
||||||
cookiejar.save(self._cookiefile)
|
util.save_cookiestxt(self._cookiefile, self._cookiejar)
|
||||||
except OSError as exc:
|
except OSError as exc:
|
||||||
self.log.warning("cookies: %s", exc)
|
self.log.warning("cookies: %s", exc)
|
||||||
|
|
||||||
@@ -491,12 +486,6 @@ def generate_extractors(extractor_data, symtable, classes):
|
|||||||
symtable[Extr.__name__] = prev = Extr
|
symtable[Extr.__name__] = prev = Extr
|
||||||
|
|
||||||
|
|
||||||
# Reduce strictness of the expected magic string in cookiejar files.
|
|
||||||
# (This allows the use of Wget-generated cookiejars without modification)
|
|
||||||
http.cookiejar.MozillaCookieJar.magic_re = re.compile(
|
|
||||||
"#( Netscape)? HTTP Cookie File", re.IGNORECASE)
|
|
||||||
|
|
||||||
|
|
||||||
# Undo automatic pyOpenSSL injection by requests
|
# Undo automatic pyOpenSSL injection by requests
|
||||||
pyopenssl = config.get((), "pyopenssl", False)
|
pyopenssl = config.get((), "pyopenssl", False)
|
||||||
if not pyopenssl:
|
if not pyopenssl:
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ import datetime
|
|||||||
import operator
|
import operator
|
||||||
import itertools
|
import itertools
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
from http.cookiejar import Cookie
|
||||||
from email.utils import mktime_tz, parsedate_tz
|
from email.utils import mktime_tz, parsedate_tz
|
||||||
from . import text, exception
|
from . import text, exception
|
||||||
|
|
||||||
@@ -135,6 +136,68 @@ def remove_directory(path):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def load_cookiestxt(path):
|
||||||
|
"""Parse a Netscape cookies.txt file and return a list of its Cookies"""
|
||||||
|
cookies = []
|
||||||
|
|
||||||
|
with open(path) as fp:
|
||||||
|
for line in fp:
|
||||||
|
|
||||||
|
# strip '#HttpOnly_' and trailing '\n'
|
||||||
|
if line.startswith("#HttpOnly_"):
|
||||||
|
line = line[10:]
|
||||||
|
if line[-1] == "\n":
|
||||||
|
line = line[:-1]
|
||||||
|
|
||||||
|
# ignore empty lines and comments
|
||||||
|
if not line or line[0] in ("#", "$"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
domain, domain_specified, path, secure, expires, name, value = \
|
||||||
|
line.split("\t")
|
||||||
|
if not name:
|
||||||
|
name = value
|
||||||
|
value = None
|
||||||
|
|
||||||
|
cookies.append(Cookie(
|
||||||
|
0, name, value,
|
||||||
|
None, False,
|
||||||
|
domain,
|
||||||
|
domain_specified == "TRUE",
|
||||||
|
domain.startswith("."),
|
||||||
|
path, False,
|
||||||
|
secure == "TRUE",
|
||||||
|
expires or None,
|
||||||
|
False, None, None, {},
|
||||||
|
))
|
||||||
|
|
||||||
|
return cookies
|
||||||
|
|
||||||
|
|
||||||
|
def save_cookiestxt(path, cookies):
|
||||||
|
"""Store 'cookies' in Netscape cookies.txt format"""
|
||||||
|
with open(path, "w") as fp:
|
||||||
|
fp.write("# Netscape HTTP Cookie File\n\n")
|
||||||
|
|
||||||
|
for cookie in cookies:
|
||||||
|
if cookie.value is None:
|
||||||
|
name = ""
|
||||||
|
value = cookie.name
|
||||||
|
else:
|
||||||
|
name = cookie.name
|
||||||
|
value = cookie.value
|
||||||
|
|
||||||
|
fp.write("\t".join((
|
||||||
|
cookie.domain,
|
||||||
|
"TRUE" if cookie.domain.startswith(".") else "FALSE",
|
||||||
|
cookie.path,
|
||||||
|
"TRUE" if cookie.secure else "FALSE",
|
||||||
|
"0" if cookie.expires is None else str(cookie.expires),
|
||||||
|
name,
|
||||||
|
value,
|
||||||
|
)) + "\n")
|
||||||
|
|
||||||
|
|
||||||
def code_to_language(code, default=None):
|
def code_to_language(code, default=None):
|
||||||
"""Map an ISO 639-1 language code to its actual name"""
|
"""Map an ISO 639-1 language code to its actual name"""
|
||||||
return CODES.get((code or "").lower(), default)
|
return CODES.get((code or "").lower(), default)
|
||||||
|
|||||||
@@ -6,4 +6,4 @@
|
|||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
# published by the Free Software Foundation.
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
__version__ = "1.12.3"
|
__version__ = "1.13.0-dev"
|
||||||
|
|||||||
Reference in New Issue
Block a user