small code changes and cleanups

This commit is contained in:
Mike Fährmann
2019-03-12 21:02:11 +01:00
parent 6f57d44ec2
commit 591a07f20c
3 changed files with 38 additions and 38 deletions

View File

@@ -30,14 +30,16 @@ class Extractor():
filename_fmt = "{filename}.{extension}" filename_fmt = "{filename}.{extension}"
archive_fmt = "" archive_fmt = ""
cookiedomain = "" cookiedomain = ""
root = ""
test = None
def __init__(self, match): def __init__(self, match):
self.session = requests.Session() self.session = requests.Session()
self.log = logging.getLogger(self.category) self.log = logging.getLogger(self.category)
self.url = match.string self.url = match.string
self._set_headers() self._init_headers()
self._set_cookies() self._init_cookies()
self._set_proxies() self._init_proxies()
self._retries = self.config("retries", 5) self._retries = self.config("retries", 5)
self._timeout = self.config("timeout", 30) self._timeout = self.config("timeout", 30)
self._verify = self.config("verify", True) self._verify = self.config("verify", True)
@@ -121,14 +123,14 @@ class Extractor():
return username, password return username, password
def _set_headers(self): def _init_headers(self):
"""Set additional headers for the 'session' object""" """Set additional headers for the 'session' object"""
self.session.headers["Accept-Language"] = "en-US,en;q=0.5" self.session.headers["Accept-Language"] = "en-US,en;q=0.5"
self.session.headers["User-Agent"] = self.config( self.session.headers["User-Agent"] = self.config(
"user-agent", ("Mozilla/5.0 (X11; Linux x86_64; rv:62.0) " "user-agent", ("Mozilla/5.0 (X11; Linux x86_64; rv:62.0) "
"Gecko/20100101 Firefox/62.0")) "Gecko/20100101 Firefox/62.0"))
def _set_proxies(self): def _init_proxies(self):
"""Update the session's proxy map""" """Update the session's proxy map"""
proxies = self.config("proxy") proxies = self.config("proxy")
if proxies: if proxies:
@@ -142,21 +144,40 @@ class Extractor():
else: else:
self.log.warning("invalid proxy specifier: %s", proxies) self.log.warning("invalid proxy specifier: %s", proxies)
def _set_cookies(self): def _init_cookies(self):
"""Populate the session's cookiejar""" """Populate the session's cookiejar"""
cookies = self.config("cookies") cookies = self.config("cookies")
if cookies: if cookies:
if isinstance(cookies, dict): if isinstance(cookies, dict):
setcookie = self.session.cookies.set self._update_cookies_dict(cookies, self.cookiedomain)
for name, value in cookies.items():
setcookie(name, value, domain=self.cookiedomain)
else: else:
cookiejar = http.cookiejar.MozillaCookieJar()
try: try:
cj = http.cookiejar.MozillaCookieJar() cookiejar.load(cookies)
cj.load(cookies)
self.session.cookies.update(cj)
except OSError as exc: except OSError as exc:
self.log.warning("cookies: %s", exc) self.log.warning("cookies: %s", exc)
else:
self.session.cookies.update(cookiejar)
def _update_cookies(self, cookies, *, domain=""):
"""Update the session's cookiejar with 'cookies'"""
if isinstance(cookies, dict):
self._update_cookies_dict(cookies, domain or self.cookiedomain)
else:
setcookie = self.session.cookies.set_cookie
try:
cookies = iter(cookies)
except TypeError:
setcookie(cookies)
else:
for cookie in cookies:
setcookie(cookie)
def _update_cookies_dict(self, cookiedict, domain):
"""Update cookiejar with name-value pairs from a dict"""
setcookie = self.session.cookies.set
for name, value in cookiedict.items():
setcookie(name, value, domain=domain)
def _check_cookies(self, cookienames, *, domain=""): def _check_cookies(self, cookienames, *, domain=""):
"""Check if all 'cookienames' are in the session's cookiejar""" """Check if all 'cookienames' are in the session's cookiejar"""
@@ -169,30 +190,13 @@ class Extractor():
return False return False
return True return True
def _update_cookies(self, cookies, *, domain=""):
"""Update the session's cookiejar with 'cookies'"""
if isinstance(cookies, dict):
if not domain:
domain = self.cookiedomain
setcookie = self.session.cookies.set
for name, value in cookies.items():
setcookie(name, value, domain=domain)
else:
try:
cookies = iter(cookies)
except TypeError:
cookies = (cookies,)
setcookie = self.session.cookies.set_cookie
for cookie in cookies:
setcookie(cookie)
@classmethod @classmethod
def _get_tests(cls): def _get_tests(cls):
"""Yield an extractor's test cases as (URL, RESULTS) tuples""" """Yield an extractor's test cases as (URL, RESULTS) tuples"""
if not hasattr(cls, "test") or not cls.test: tests = cls.test
if not tests:
return return
tests = cls.test
if len(tests) == 2 and (not tests[1] or isinstance(tests[1], dict)): if len(tests) == 2 and (not tests[1] or isinstance(tests[1], dict)):
tests = (tests,) tests = (tests,)
@@ -212,7 +216,6 @@ class ChapterExtractor(Extractor):
"{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}") "{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}")
archive_fmt = ( archive_fmt = (
"{manga}_{chapter}{chapter_minor}_{page}") "{manga}_{chapter}{chapter_minor}_{page}")
root = ""
def __init__(self, match, url=None): def __init__(self, match, url=None):
Extractor.__init__(self, match) Extractor.__init__(self, match)
@@ -259,7 +262,6 @@ class MangaExtractor(Extractor):
categorytransfer = True categorytransfer = True
chapterclass = None chapterclass = None
reverse = True reverse = True
root = ""
def __init__(self, match, url=None): def __init__(self, match, url=None):
Extractor.__init__(self, match) Extractor.__init__(self, match)

View File

@@ -23,14 +23,13 @@ class FoolfuukaThreadExtractor(SharedConfigMixin, Extractor):
filename_fmt = "{media[media]}" filename_fmt = "{media[media]}"
archive_fmt = "{board[shortname]}_{num}_{timestamp}" archive_fmt = "{board[shortname]}_{num}_{timestamp}"
pattern_fmt = r"/([^/]+)/thread/(\d+)" pattern_fmt = r"/([^/]+)/thread/(\d+)"
resolve = "default" external = "default"
root = ""
def __init__(self, match): def __init__(self, match):
Extractor.__init__(self, match) Extractor.__init__(self, match)
self.board, self.thread = match.groups() self.board, self.thread = match.groups()
self.session.headers["Referer"] = self.root self.session.headers["Referer"] = self.root
if self.resolve == "direct": if self.external == "direct":
self.remote = self._remote_direct self.remote = self._remote_direct
def items(self): def items(self):
@@ -108,7 +107,7 @@ EXTRACTORS = {
}, },
"b4k": { "b4k": {
"root": "https://arch.b4k.co", "root": "https://arch.b4k.co",
"extra": {"resolve": "direct"}, "extra": {"external": "direct"},
"test-thread": ("https://arch.b4k.co/meta/thread/196/", { "test-thread": ("https://arch.b4k.co/meta/thread/196/", {
"url": "cdd4931ac1cd00264b0b54e2e3b0d8f6ae48957e", "url": "cdd4931ac1cd00264b0b54e2e3b0d8f6ae48957e",
}), }),

View File

@@ -19,7 +19,6 @@ class ShopifyExtractor(SharedConfigMixin, Extractor):
basecategory = "shopify" basecategory = "shopify"
filename_fmt = "{product[title]}_{num:>02}_{id}.{extension}" filename_fmt = "{product[title]}_{num:>02}_{id}.{extension}"
archive_fmt = "{id}" archive_fmt = "{id}"
root = ""
def __init__(self, match): def __init__(self, match):
Extractor.__init__(self, match) Extractor.__init__(self, match)