small code changes and cleanups
This commit is contained in:
@@ -30,14 +30,16 @@ class Extractor():
|
|||||||
filename_fmt = "{filename}.{extension}"
|
filename_fmt = "{filename}.{extension}"
|
||||||
archive_fmt = ""
|
archive_fmt = ""
|
||||||
cookiedomain = ""
|
cookiedomain = ""
|
||||||
|
root = ""
|
||||||
|
test = None
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
self.session = requests.Session()
|
self.session = requests.Session()
|
||||||
self.log = logging.getLogger(self.category)
|
self.log = logging.getLogger(self.category)
|
||||||
self.url = match.string
|
self.url = match.string
|
||||||
self._set_headers()
|
self._init_headers()
|
||||||
self._set_cookies()
|
self._init_cookies()
|
||||||
self._set_proxies()
|
self._init_proxies()
|
||||||
self._retries = self.config("retries", 5)
|
self._retries = self.config("retries", 5)
|
||||||
self._timeout = self.config("timeout", 30)
|
self._timeout = self.config("timeout", 30)
|
||||||
self._verify = self.config("verify", True)
|
self._verify = self.config("verify", True)
|
||||||
@@ -121,14 +123,14 @@ class Extractor():
|
|||||||
|
|
||||||
return username, password
|
return username, password
|
||||||
|
|
||||||
def _set_headers(self):
|
def _init_headers(self):
|
||||||
"""Set additional headers for the 'session' object"""
|
"""Set additional headers for the 'session' object"""
|
||||||
self.session.headers["Accept-Language"] = "en-US,en;q=0.5"
|
self.session.headers["Accept-Language"] = "en-US,en;q=0.5"
|
||||||
self.session.headers["User-Agent"] = self.config(
|
self.session.headers["User-Agent"] = self.config(
|
||||||
"user-agent", ("Mozilla/5.0 (X11; Linux x86_64; rv:62.0) "
|
"user-agent", ("Mozilla/5.0 (X11; Linux x86_64; rv:62.0) "
|
||||||
"Gecko/20100101 Firefox/62.0"))
|
"Gecko/20100101 Firefox/62.0"))
|
||||||
|
|
||||||
def _set_proxies(self):
|
def _init_proxies(self):
|
||||||
"""Update the session's proxy map"""
|
"""Update the session's proxy map"""
|
||||||
proxies = self.config("proxy")
|
proxies = self.config("proxy")
|
||||||
if proxies:
|
if proxies:
|
||||||
@@ -142,21 +144,40 @@ class Extractor():
|
|||||||
else:
|
else:
|
||||||
self.log.warning("invalid proxy specifier: %s", proxies)
|
self.log.warning("invalid proxy specifier: %s", proxies)
|
||||||
|
|
||||||
def _set_cookies(self):
|
def _init_cookies(self):
|
||||||
"""Populate the session's cookiejar"""
|
"""Populate the session's cookiejar"""
|
||||||
cookies = self.config("cookies")
|
cookies = self.config("cookies")
|
||||||
if cookies:
|
if cookies:
|
||||||
if isinstance(cookies, dict):
|
if isinstance(cookies, dict):
|
||||||
setcookie = self.session.cookies.set
|
self._update_cookies_dict(cookies, self.cookiedomain)
|
||||||
for name, value in cookies.items():
|
|
||||||
setcookie(name, value, domain=self.cookiedomain)
|
|
||||||
else:
|
else:
|
||||||
|
cookiejar = http.cookiejar.MozillaCookieJar()
|
||||||
try:
|
try:
|
||||||
cj = http.cookiejar.MozillaCookieJar()
|
cookiejar.load(cookies)
|
||||||
cj.load(cookies)
|
|
||||||
self.session.cookies.update(cj)
|
|
||||||
except OSError as exc:
|
except OSError as exc:
|
||||||
self.log.warning("cookies: %s", exc)
|
self.log.warning("cookies: %s", exc)
|
||||||
|
else:
|
||||||
|
self.session.cookies.update(cookiejar)
|
||||||
|
|
||||||
|
def _update_cookies(self, cookies, *, domain=""):
|
||||||
|
"""Update the session's cookiejar with 'cookies'"""
|
||||||
|
if isinstance(cookies, dict):
|
||||||
|
self._update_cookies_dict(cookies, domain or self.cookiedomain)
|
||||||
|
else:
|
||||||
|
setcookie = self.session.cookies.set_cookie
|
||||||
|
try:
|
||||||
|
cookies = iter(cookies)
|
||||||
|
except TypeError:
|
||||||
|
setcookie(cookies)
|
||||||
|
else:
|
||||||
|
for cookie in cookies:
|
||||||
|
setcookie(cookie)
|
||||||
|
|
||||||
|
def _update_cookies_dict(self, cookiedict, domain):
|
||||||
|
"""Update cookiejar with name-value pairs from a dict"""
|
||||||
|
setcookie = self.session.cookies.set
|
||||||
|
for name, value in cookiedict.items():
|
||||||
|
setcookie(name, value, domain=domain)
|
||||||
|
|
||||||
def _check_cookies(self, cookienames, *, domain=""):
|
def _check_cookies(self, cookienames, *, domain=""):
|
||||||
"""Check if all 'cookienames' are in the session's cookiejar"""
|
"""Check if all 'cookienames' are in the session's cookiejar"""
|
||||||
@@ -169,30 +190,13 @@ class Extractor():
|
|||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _update_cookies(self, cookies, *, domain=""):
|
|
||||||
"""Update the session's cookiejar with 'cookies'"""
|
|
||||||
if isinstance(cookies, dict):
|
|
||||||
if not domain:
|
|
||||||
domain = self.cookiedomain
|
|
||||||
setcookie = self.session.cookies.set
|
|
||||||
for name, value in cookies.items():
|
|
||||||
setcookie(name, value, domain=domain)
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
cookies = iter(cookies)
|
|
||||||
except TypeError:
|
|
||||||
cookies = (cookies,)
|
|
||||||
setcookie = self.session.cookies.set_cookie
|
|
||||||
for cookie in cookies:
|
|
||||||
setcookie(cookie)
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _get_tests(cls):
|
def _get_tests(cls):
|
||||||
"""Yield an extractor's test cases as (URL, RESULTS) tuples"""
|
"""Yield an extractor's test cases as (URL, RESULTS) tuples"""
|
||||||
if not hasattr(cls, "test") or not cls.test:
|
tests = cls.test
|
||||||
|
if not tests:
|
||||||
return
|
return
|
||||||
|
|
||||||
tests = cls.test
|
|
||||||
if len(tests) == 2 and (not tests[1] or isinstance(tests[1], dict)):
|
if len(tests) == 2 and (not tests[1] or isinstance(tests[1], dict)):
|
||||||
tests = (tests,)
|
tests = (tests,)
|
||||||
|
|
||||||
@@ -212,7 +216,6 @@ class ChapterExtractor(Extractor):
|
|||||||
"{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}")
|
"{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}")
|
||||||
archive_fmt = (
|
archive_fmt = (
|
||||||
"{manga}_{chapter}{chapter_minor}_{page}")
|
"{manga}_{chapter}{chapter_minor}_{page}")
|
||||||
root = ""
|
|
||||||
|
|
||||||
def __init__(self, match, url=None):
|
def __init__(self, match, url=None):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
@@ -259,7 +262,6 @@ class MangaExtractor(Extractor):
|
|||||||
categorytransfer = True
|
categorytransfer = True
|
||||||
chapterclass = None
|
chapterclass = None
|
||||||
reverse = True
|
reverse = True
|
||||||
root = ""
|
|
||||||
|
|
||||||
def __init__(self, match, url=None):
|
def __init__(self, match, url=None):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
|
|||||||
@@ -23,14 +23,13 @@ class FoolfuukaThreadExtractor(SharedConfigMixin, Extractor):
|
|||||||
filename_fmt = "{media[media]}"
|
filename_fmt = "{media[media]}"
|
||||||
archive_fmt = "{board[shortname]}_{num}_{timestamp}"
|
archive_fmt = "{board[shortname]}_{num}_{timestamp}"
|
||||||
pattern_fmt = r"/([^/]+)/thread/(\d+)"
|
pattern_fmt = r"/([^/]+)/thread/(\d+)"
|
||||||
resolve = "default"
|
external = "default"
|
||||||
root = ""
|
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.board, self.thread = match.groups()
|
self.board, self.thread = match.groups()
|
||||||
self.session.headers["Referer"] = self.root
|
self.session.headers["Referer"] = self.root
|
||||||
if self.resolve == "direct":
|
if self.external == "direct":
|
||||||
self.remote = self._remote_direct
|
self.remote = self._remote_direct
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
@@ -108,7 +107,7 @@ EXTRACTORS = {
|
|||||||
},
|
},
|
||||||
"b4k": {
|
"b4k": {
|
||||||
"root": "https://arch.b4k.co",
|
"root": "https://arch.b4k.co",
|
||||||
"extra": {"resolve": "direct"},
|
"extra": {"external": "direct"},
|
||||||
"test-thread": ("https://arch.b4k.co/meta/thread/196/", {
|
"test-thread": ("https://arch.b4k.co/meta/thread/196/", {
|
||||||
"url": "cdd4931ac1cd00264b0b54e2e3b0d8f6ae48957e",
|
"url": "cdd4931ac1cd00264b0b54e2e3b0d8f6ae48957e",
|
||||||
}),
|
}),
|
||||||
|
|||||||
@@ -19,7 +19,6 @@ class ShopifyExtractor(SharedConfigMixin, Extractor):
|
|||||||
basecategory = "shopify"
|
basecategory = "shopify"
|
||||||
filename_fmt = "{product[title]}_{num:>02}_{id}.{extension}"
|
filename_fmt = "{product[title]}_{num:>02}_{id}.{extension}"
|
||||||
archive_fmt = "{id}"
|
archive_fmt = "{id}"
|
||||||
root = ""
|
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
|
|||||||
Reference in New Issue
Block a user