simplify if statements by using walrus operators (#7671)
This commit is contained in:
@@ -121,14 +121,12 @@ def main():
|
|||||||
util.compile_expression = util.compile_expression_defaultdict
|
util.compile_expression = util.compile_expression_defaultdict
|
||||||
|
|
||||||
# format string separator
|
# format string separator
|
||||||
separator = config.get((), "format-separator")
|
if separator := config.get((), "format-separator"):
|
||||||
if separator:
|
|
||||||
from . import formatter
|
from . import formatter
|
||||||
formatter._SEPARATOR = separator
|
formatter._SEPARATOR = separator
|
||||||
|
|
||||||
# eval globals
|
# eval globals
|
||||||
path = config.get((), "globals")
|
if path := config.get((), "globals"):
|
||||||
if path:
|
|
||||||
util.GLOBALS.update(util.import_file(path).__dict__)
|
util.GLOBALS.update(util.import_file(path).__dict__)
|
||||||
|
|
||||||
# loglevels
|
# loglevels
|
||||||
@@ -140,13 +138,12 @@ def main():
|
|||||||
import platform
|
import platform
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
extra = ""
|
|
||||||
if util.EXECUTABLE:
|
if util.EXECUTABLE:
|
||||||
extra = f" - Executable ({version.__variant__})"
|
extra = f" - Executable ({version.__variant__})"
|
||||||
|
elif git_head := util.git_head():
|
||||||
|
extra = " - Git HEAD: " + git_head
|
||||||
else:
|
else:
|
||||||
git_head = util.git_head()
|
extra = ""
|
||||||
if git_head:
|
|
||||||
extra = " - Git HEAD: " + git_head
|
|
||||||
|
|
||||||
log.debug("Version %s%s", __version__, extra)
|
log.debug("Version %s%s", __version__, extra)
|
||||||
log.debug("Python %s - %s",
|
log.debug("Python %s - %s",
|
||||||
@@ -256,8 +253,7 @@ def main():
|
|||||||
))
|
))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
input_files = config.get((), "input-files")
|
if input_files := config.get((), "input-files"):
|
||||||
if input_files:
|
|
||||||
for input_file in input_files:
|
for input_file in input_files:
|
||||||
if isinstance(input_file, str):
|
if isinstance(input_file, str):
|
||||||
input_file = (input_file, None)
|
input_file = (input_file, None)
|
||||||
@@ -287,17 +283,15 @@ def main():
|
|||||||
input_manager.log = input_log = logging.getLogger("inputfile")
|
input_manager.log = input_log = logging.getLogger("inputfile")
|
||||||
|
|
||||||
# unsupported file logging handler
|
# unsupported file logging handler
|
||||||
handler = output.setup_logging_handler(
|
if handler := output.setup_logging_handler(
|
||||||
"unsupportedfile", fmt="{message}")
|
"unsupportedfile", fmt="{message}"):
|
||||||
if handler:
|
|
||||||
ulog = job.Job.ulog = logging.getLogger("unsupported")
|
ulog = job.Job.ulog = logging.getLogger("unsupported")
|
||||||
ulog.addHandler(handler)
|
ulog.addHandler(handler)
|
||||||
ulog.propagate = False
|
ulog.propagate = False
|
||||||
|
|
||||||
# error file logging handler
|
# error file logging handler
|
||||||
handler = output.setup_logging_handler(
|
if handler := output.setup_logging_handler(
|
||||||
"errorfile", fmt="{message}", mode="a")
|
"errorfile", fmt="{message}", mode="a"):
|
||||||
if handler:
|
|
||||||
elog = input_manager.err = logging.getLogger("errorfile")
|
elog = input_manager.err = logging.getLogger("errorfile")
|
||||||
elog.addHandler(handler)
|
elog.addHandler(handler)
|
||||||
elog.propagate = False
|
elog.propagate = False
|
||||||
@@ -319,8 +313,7 @@ def main():
|
|||||||
args.loglevel < logging.ERROR:
|
args.loglevel < logging.ERROR:
|
||||||
input_manager.progress(pformat)
|
input_manager.progress(pformat)
|
||||||
|
|
||||||
catmap = config.interpolate(("extractor",), "category-map")
|
if catmap := config.interpolate(("extractor",), "category-map"):
|
||||||
if catmap:
|
|
||||||
if catmap == "compat":
|
if catmap == "compat":
|
||||||
catmap = {
|
catmap = {
|
||||||
"coomer" : "coomerparty",
|
"coomer" : "coomerparty",
|
||||||
|
|||||||
@@ -103,14 +103,12 @@ def open_extern():
|
|||||||
openers = ("explorer", "notepad")
|
openers = ("explorer", "notepad")
|
||||||
else:
|
else:
|
||||||
openers = ("xdg-open", "open")
|
openers = ("xdg-open", "open")
|
||||||
editor = os.environ.get("EDITOR")
|
if editor := os.environ.get("EDITOR"):
|
||||||
if editor:
|
|
||||||
openers = (editor,) + openers
|
openers = (editor,) + openers
|
||||||
|
|
||||||
import shutil
|
import shutil
|
||||||
for opener in openers:
|
for opener in openers:
|
||||||
opener = shutil.which(opener)
|
if opener := shutil.which(opener):
|
||||||
if opener:
|
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
log.warning("Unable to find a program to open '%s' with", path)
|
log.warning("Unable to find a program to open '%s' with", path)
|
||||||
@@ -211,8 +209,7 @@ def load(files=None, strict=False, loads=util.json_loads):
|
|||||||
_files.append(pathfmt)
|
_files.append(pathfmt)
|
||||||
|
|
||||||
if "subconfigs" in conf:
|
if "subconfigs" in conf:
|
||||||
subconfigs = conf["subconfigs"]
|
if subconfigs := conf["subconfigs"]:
|
||||||
if subconfigs:
|
|
||||||
if isinstance(subconfigs, str):
|
if isinstance(subconfigs, str):
|
||||||
subconfigs = (subconfigs,)
|
subconfigs = (subconfigs,)
|
||||||
load(subconfigs, strict, loads)
|
load(subconfigs, strict, loads)
|
||||||
@@ -284,8 +281,7 @@ def accumulate(path, key, conf=_config):
|
|||||||
result = []
|
result = []
|
||||||
try:
|
try:
|
||||||
if key in conf:
|
if key in conf:
|
||||||
value = conf[key]
|
if value := conf[key]:
|
||||||
if value:
|
|
||||||
if isinstance(value, list):
|
if isinstance(value, list):
|
||||||
result.extend(value)
|
result.extend(value)
|
||||||
else:
|
else:
|
||||||
@@ -293,8 +289,7 @@ def accumulate(path, key, conf=_config):
|
|||||||
for p in path:
|
for p in path:
|
||||||
conf = conf[p]
|
conf = conf[p]
|
||||||
if key in conf:
|
if key in conf:
|
||||||
value = conf[key]
|
if value := conf[key]:
|
||||||
if value:
|
|
||||||
if isinstance(value, list):
|
if isinstance(value, list):
|
||||||
result[:0] = value
|
result[:0] = value
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -21,8 +21,7 @@ class DownloaderBase():
|
|||||||
extractor = job.extractor
|
extractor = job.extractor
|
||||||
self.log = job.get_logger("downloader." + self.scheme)
|
self.log = job.get_logger("downloader." + self.scheme)
|
||||||
|
|
||||||
opts = self._extractor_config(extractor)
|
if opts := self._extractor_config(extractor):
|
||||||
if opts:
|
|
||||||
self.opts = opts
|
self.opts = opts
|
||||||
self.config = self.config_opts
|
self.config = self.config_opts
|
||||||
|
|
||||||
@@ -60,8 +59,7 @@ class DownloaderBase():
|
|||||||
|
|
||||||
opts = {}
|
opts = {}
|
||||||
for cat, sub in reversed(path):
|
for cat, sub in reversed(path):
|
||||||
popts = self._extractor_opts(cat, sub)
|
if popts := self._extractor_opts(cat, sub):
|
||||||
if popts:
|
|
||||||
opts.update(popts)
|
opts.update(popts)
|
||||||
return opts
|
return opts
|
||||||
|
|
||||||
@@ -70,12 +68,10 @@ class DownloaderBase():
|
|||||||
if not cfg:
|
if not cfg:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
copts = cfg.get(self.scheme)
|
if copts := cfg.get(self.scheme):
|
||||||
if copts:
|
|
||||||
if subcategory in cfg:
|
if subcategory in cfg:
|
||||||
try:
|
try:
|
||||||
sopts = cfg[subcategory].get(self.scheme)
|
if sopts := cfg[subcategory].get(self.scheme):
|
||||||
if sopts:
|
|
||||||
opts = copts.copy()
|
opts = copts.copy()
|
||||||
opts.update(sopts)
|
opts.update(sopts)
|
||||||
return opts
|
return opts
|
||||||
|
|||||||
@@ -71,8 +71,7 @@ class HttpDownloader(DownloaderBase):
|
|||||||
self.chunk_size = chunk_size
|
self.chunk_size = chunk_size
|
||||||
if self.rate:
|
if self.rate:
|
||||||
func = util.build_selection_func(self.rate, 0, text.parse_bytes)
|
func = util.build_selection_func(self.rate, 0, text.parse_bytes)
|
||||||
rmax = func.args[1] if hasattr(func, "args") else func()
|
if rmax := func.args[1] if hasattr(func, "args") else func():
|
||||||
if rmax:
|
|
||||||
if rmax < self.chunk_size:
|
if rmax < self.chunk_size:
|
||||||
# reduce chunk_size to allow for one iteration each second
|
# reduce chunk_size to allow for one iteration each second
|
||||||
self.chunk_size = rmax
|
self.chunk_size = rmax
|
||||||
@@ -141,15 +140,13 @@ class HttpDownloader(DownloaderBase):
|
|||||||
# collect HTTP headers
|
# collect HTTP headers
|
||||||
headers = {"Accept": "*/*"}
|
headers = {"Accept": "*/*"}
|
||||||
# file-specific headers
|
# file-specific headers
|
||||||
extra = kwdict.get("_http_headers")
|
if extra := kwdict.get("_http_headers"):
|
||||||
if extra:
|
|
||||||
headers.update(extra)
|
headers.update(extra)
|
||||||
# general headers
|
# general headers
|
||||||
if self.headers:
|
if self.headers:
|
||||||
headers.update(self.headers)
|
headers.update(self.headers)
|
||||||
# partial content
|
# partial content
|
||||||
file_size = pathfmt.part_size()
|
if file_size := pathfmt.part_size():
|
||||||
if file_size:
|
|
||||||
headers["Range"] = f"bytes={file_size}-"
|
headers["Range"] = f"bytes={file_size}-"
|
||||||
|
|
||||||
# connect to (remote) source
|
# connect to (remote) source
|
||||||
@@ -424,8 +421,7 @@ class HttpDownloader(DownloaderBase):
|
|||||||
if mtype in MIME_TYPES:
|
if mtype in MIME_TYPES:
|
||||||
return MIME_TYPES[mtype]
|
return MIME_TYPES[mtype]
|
||||||
|
|
||||||
ext = mimetypes.guess_extension(mtype, strict=False)
|
if ext := mimetypes.guess_extension(mtype, strict=False):
|
||||||
if ext:
|
|
||||||
return ext[1:]
|
return ext[1:]
|
||||||
|
|
||||||
self.log.warning("Unknown MIME type '%s'", mtype)
|
self.log.warning("Unknown MIME type '%s'", mtype)
|
||||||
|
|||||||
@@ -80,8 +80,7 @@ class YoutubeDLDownloader(DownloaderBase):
|
|||||||
if not info_dict:
|
if not info_dict:
|
||||||
url = url[5:]
|
url = url[5:]
|
||||||
try:
|
try:
|
||||||
manifest = kwdict.pop("_ytdl_manifest", None)
|
if manifest := kwdict.pop("_ytdl_manifest", None):
|
||||||
if manifest:
|
|
||||||
info_dict = self._extract_manifest(
|
info_dict = self._extract_manifest(
|
||||||
ytdl_instance, url, manifest,
|
ytdl_instance, url, manifest,
|
||||||
kwdict.pop("_ytdl_manifest_data", None),
|
kwdict.pop("_ytdl_manifest_data", None),
|
||||||
@@ -103,8 +102,7 @@ class YoutubeDLDownloader(DownloaderBase):
|
|||||||
else:
|
else:
|
||||||
info_dict = info_dict["entries"][index]
|
info_dict = info_dict["entries"][index]
|
||||||
|
|
||||||
extra = kwdict.get("_ytdl_extra")
|
if extra := kwdict.get("_ytdl_extra"):
|
||||||
if extra:
|
|
||||||
info_dict.update(extra)
|
info_dict.update(extra)
|
||||||
|
|
||||||
return self._download_video(ytdl_instance, pathfmt, info_dict)
|
return self._download_video(ytdl_instance, pathfmt, info_dict)
|
||||||
|
|||||||
@@ -40,8 +40,7 @@ class _2chThreadExtractor(Extractor):
|
|||||||
|
|
||||||
yield Message.Directory, thread
|
yield Message.Directory, thread
|
||||||
for post in posts:
|
for post in posts:
|
||||||
files = post.get("files")
|
if files := post.get("files"):
|
||||||
if files:
|
|
||||||
post["post_name"] = post["name"]
|
post["post_name"] = post["name"]
|
||||||
post["date"] = text.parse_timestamp(post["timestamp"])
|
post["date"] = text.parse_timestamp(post["timestamp"])
|
||||||
del post["files"]
|
del post["files"]
|
||||||
|
|||||||
@@ -37,8 +37,7 @@ class _8musesAlbumExtractor(Extractor):
|
|||||||
self.request(url).text,
|
self.request(url).text,
|
||||||
'id="ractive-public" type="text/plain">', '</script>'))
|
'id="ractive-public" type="text/plain">', '</script>'))
|
||||||
|
|
||||||
images = data.get("pictures")
|
if images := data.get("pictures"):
|
||||||
if images:
|
|
||||||
count = len(images)
|
count = len(images)
|
||||||
album = self._make_album(data["album"])
|
album = self._make_album(data["album"])
|
||||||
yield Message.Directory, {"album": album, "count": count}
|
yield Message.Directory, {"album": album, "count": count}
|
||||||
@@ -54,8 +53,7 @@ class _8musesAlbumExtractor(Extractor):
|
|||||||
}
|
}
|
||||||
yield Message.Url, url, img
|
yield Message.Url, url, img
|
||||||
|
|
||||||
albums = data.get("albums")
|
if albums := data.get("albums"):
|
||||||
if albums:
|
|
||||||
for album in albums:
|
for album in albums:
|
||||||
permalink = album.get("permalink")
|
permalink = album.get("permalink")
|
||||||
if not permalink:
|
if not permalink:
|
||||||
|
|||||||
@@ -235,8 +235,7 @@ modules = [
|
|||||||
def find(url):
|
def find(url):
|
||||||
"""Find a suitable extractor for the given URL"""
|
"""Find a suitable extractor for the given URL"""
|
||||||
for cls in _list_classes():
|
for cls in _list_classes():
|
||||||
match = cls.pattern.match(url)
|
if match := cls.pattern.match(url):
|
||||||
if match:
|
|
||||||
return cls(match)
|
return cls(match)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -251,8 +250,7 @@ def add(cls):
|
|||||||
|
|
||||||
def add_module(module):
|
def add_module(module):
|
||||||
"""Add all extractors in 'module' to the list of available extractors"""
|
"""Add all extractors in 'module' to the list of available extractors"""
|
||||||
classes = _get_classes(module)
|
if classes := _get_classes(module):
|
||||||
if classes:
|
|
||||||
if isinstance(classes[0].pattern, str):
|
if isinstance(classes[0].pattern, str):
|
||||||
for cls in classes:
|
for cls in classes:
|
||||||
cls.pattern = re_compile(cls.pattern)
|
cls.pattern = re_compile(cls.pattern)
|
||||||
|
|||||||
@@ -205,8 +205,7 @@ class Ao3WorkExtractor(Ao3Extractor):
|
|||||||
}
|
}
|
||||||
data["language"] = util.code_to_language(data["lang"])
|
data["language"] = util.code_to_language(data["lang"])
|
||||||
|
|
||||||
series = data["series"]
|
if series := data["series"]:
|
||||||
if series:
|
|
||||||
extr = text.extract_from(series)
|
extr = text.extract_from(series)
|
||||||
data["series"] = {
|
data["series"] = {
|
||||||
"prev" : extr(' class="previous" href="/works/', '"'),
|
"prev" : extr(' class="previous" href="/works/', '"'),
|
||||||
|
|||||||
@@ -86,8 +86,7 @@ class ArcalivePostExtractor(ArcaliveExtractor):
|
|||||||
|
|
||||||
fallback = ()
|
fallback = ()
|
||||||
query = f"?type=orig&{query}"
|
query = f"?type=orig&{query}"
|
||||||
orig = text.extr(media, 'data-orig="', '"')
|
if orig := text.extr(media, 'data-orig="', '"'):
|
||||||
if orig:
|
|
||||||
path, _, ext = url.rpartition(".")
|
path, _, ext = url.rpartition(".")
|
||||||
if ext != orig:
|
if ext != orig:
|
||||||
fallback = (url + query,)
|
fallback = (url + query,)
|
||||||
|
|||||||
@@ -61,8 +61,7 @@ class AryionExtractor(Extractor):
|
|||||||
data = self.metadata()
|
data = self.metadata()
|
||||||
|
|
||||||
for post_id in self.posts():
|
for post_id in self.posts():
|
||||||
post = self._parse_post(post_id)
|
if post := self._parse_post(post_id):
|
||||||
if post:
|
|
||||||
if data:
|
if data:
|
||||||
post.update(data)
|
post.update(data)
|
||||||
yield Message.Directory, post
|
yield Message.Directory, post
|
||||||
|
|||||||
@@ -152,8 +152,7 @@ class BatotoMangaExtractor(BatotoBase, MangaExtractor):
|
|||||||
def chapters(self, page):
|
def chapters(self, page):
|
||||||
extr = text.extract_from(page)
|
extr = text.extract_from(page)
|
||||||
|
|
||||||
warning = extr(' class="alert alert-warning">', "</div>")
|
if warning := extr(' class="alert alert-warning">', "</div>"):
|
||||||
if warning:
|
|
||||||
self.log.warning("'%s'", text.remove_html(warning))
|
self.log.warning("'%s'", text.remove_html(warning))
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
|
|||||||
@@ -93,8 +93,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
|
|||||||
def _init(self):
|
def _init(self):
|
||||||
BehanceExtractor._init(self)
|
BehanceExtractor._init(self)
|
||||||
|
|
||||||
modules = self.config("modules")
|
if modules := self.config("modules"):
|
||||||
if modules:
|
|
||||||
if isinstance(modules, str):
|
if isinstance(modules, str):
|
||||||
modules = modules.split(",")
|
modules = modules.split(",")
|
||||||
self.modules = set(modules)
|
self.modules = set(modules)
|
||||||
@@ -208,8 +207,8 @@ class BehanceGalleryExtractor(BehanceExtractor):
|
|||||||
break
|
break
|
||||||
|
|
||||||
elif mtype == "embed":
|
elif mtype == "embed":
|
||||||
embed = module.get("originalEmbed") or module.get("fluidEmbed")
|
if embed := (module.get("originalEmbed") or
|
||||||
if embed:
|
module.get("fluidEmbed")):
|
||||||
embed = text.unescape(text.extr(embed, 'src="', '"'))
|
embed = text.unescape(text.extr(embed, 'src="', '"'))
|
||||||
module["extension"] = "mp4"
|
module["extension"] = "mp4"
|
||||||
results.append(("ytdl:" + embed, module))
|
results.append(("ytdl:" + embed, module))
|
||||||
|
|||||||
@@ -26,8 +26,7 @@ class BlueskyExtractor(Extractor):
|
|||||||
root = "https://bsky.app"
|
root = "https://bsky.app"
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
meta = self.config("metadata") or ()
|
if meta := self.config("metadata") or ():
|
||||||
if meta:
|
|
||||||
if isinstance(meta, str):
|
if isinstance(meta, str):
|
||||||
meta = meta.replace(" ", "").split(",")
|
meta = meta.replace(" ", "").split(",")
|
||||||
elif not isinstance(meta, (list, tuple)):
|
elif not isinstance(meta, (list, tuple)):
|
||||||
|
|||||||
@@ -27,8 +27,7 @@ class BooruExtractor(BaseExtractor):
|
|||||||
notes = self.config("notes", False)
|
notes = self.config("notes", False)
|
||||||
fetch_html = tags or notes
|
fetch_html = tags or notes
|
||||||
|
|
||||||
url_key = self.config("url")
|
if url_key := self.config("url"):
|
||||||
if url_key:
|
|
||||||
if isinstance(url_key, (list, tuple)):
|
if isinstance(url_key, (list, tuple)):
|
||||||
self._file_url = self._file_url_list
|
self._file_url = self._file_url_list
|
||||||
self._file_url_keys = url_key
|
self._file_url_keys = url_key
|
||||||
|
|||||||
@@ -144,8 +144,7 @@ class BoostyExtractor(Extractor):
|
|||||||
url = block["url"]
|
url = block["url"]
|
||||||
sep = "&" if "?" in url else "?"
|
sep = "&" if "?" in url else "?"
|
||||||
|
|
||||||
signed_query = post.get("signedQuery")
|
if signed_query := post.get("signedQuery"):
|
||||||
if signed_query:
|
|
||||||
url += sep + signed_query[1:]
|
url += sep + signed_query[1:]
|
||||||
sep = "&"
|
sep = "&"
|
||||||
|
|
||||||
@@ -280,8 +279,7 @@ class BoostyAPI():
|
|||||||
}
|
}
|
||||||
|
|
||||||
if not access_token:
|
if not access_token:
|
||||||
auth = self.extractor.cookies.get("auth", domain=".boosty.to")
|
if auth := self.extractor.cookies.get("auth", domain=".boosty.to"):
|
||||||
if auth:
|
|
||||||
access_token = text.extr(
|
access_token = text.extr(
|
||||||
auth, "%22accessToken%22%3A%22", "%22")
|
auth, "%22accessToken%22%3A%22", "%22")
|
||||||
if access_token:
|
if access_token:
|
||||||
|
|||||||
@@ -35,8 +35,7 @@ class CivitaiExtractor(Extractor):
|
|||||||
self.log.debug("Using tRPC API")
|
self.log.debug("Using tRPC API")
|
||||||
self.api = CivitaiTrpcAPI(self)
|
self.api = CivitaiTrpcAPI(self)
|
||||||
|
|
||||||
quality = self.config("quality")
|
if quality := self.config("quality"):
|
||||||
if quality:
|
|
||||||
if not isinstance(quality, str):
|
if not isinstance(quality, str):
|
||||||
quality = ",".join(quality)
|
quality = ",".join(quality)
|
||||||
self._image_quality = quality
|
self._image_quality = quality
|
||||||
@@ -45,8 +44,7 @@ class CivitaiExtractor(Extractor):
|
|||||||
self._image_quality = "original=true"
|
self._image_quality = "original=true"
|
||||||
self._image_ext = "png"
|
self._image_ext = "png"
|
||||||
|
|
||||||
quality_video = self.config("quality-videos")
|
if quality_video := self.config("quality-videos"):
|
||||||
if quality_video:
|
|
||||||
if not isinstance(quality_video, str):
|
if not isinstance(quality_video, str):
|
||||||
quality_video = ",".join(quality_video)
|
quality_video = ",".join(quality_video)
|
||||||
if quality_video[0] == "+":
|
if quality_video[0] == "+":
|
||||||
@@ -59,8 +57,7 @@ class CivitaiExtractor(Extractor):
|
|||||||
self._video_quality = "quality=100"
|
self._video_quality = "quality=100"
|
||||||
self._video_ext = "webm"
|
self._video_ext = "webm"
|
||||||
|
|
||||||
metadata = self.config("metadata")
|
if metadata := self.config("metadata"):
|
||||||
if metadata:
|
|
||||||
if isinstance(metadata, str):
|
if isinstance(metadata, str):
|
||||||
metadata = metadata.split(",")
|
metadata = metadata.split(",")
|
||||||
elif not isinstance(metadata, (list, tuple)):
|
elif not isinstance(metadata, (list, tuple)):
|
||||||
@@ -73,16 +70,14 @@ class CivitaiExtractor(Extractor):
|
|||||||
False
|
False
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
models = self.models()
|
if models := self.models():
|
||||||
if models:
|
|
||||||
data = {"_extractor": CivitaiModelExtractor}
|
data = {"_extractor": CivitaiModelExtractor}
|
||||||
for model in models:
|
for model in models:
|
||||||
url = f"{self.root}/models/{model['id']}"
|
url = f"{self.root}/models/{model['id']}"
|
||||||
yield Message.Queue, url, data
|
yield Message.Queue, url, data
|
||||||
return
|
return
|
||||||
|
|
||||||
posts = self.posts()
|
if posts := self.posts():
|
||||||
if posts:
|
|
||||||
for post in posts:
|
for post in posts:
|
||||||
|
|
||||||
if "images" in post:
|
if "images" in post:
|
||||||
@@ -107,8 +102,7 @@ class CivitaiExtractor(Extractor):
|
|||||||
yield Message.Url, file["url"], file
|
yield Message.Url, file["url"], file
|
||||||
return
|
return
|
||||||
|
|
||||||
images = self.images()
|
if images := self.images():
|
||||||
if images:
|
|
||||||
for file in images:
|
for file in images:
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
@@ -230,8 +224,7 @@ class CivitaiExtractor(Extractor):
|
|||||||
|
|
||||||
def _extract_meta_version(self, item, is_post=True):
|
def _extract_meta_version(self, item, is_post=True):
|
||||||
try:
|
try:
|
||||||
version_id = self._extract_version_id(item, is_post)
|
if version_id := self._extract_version_id(item, is_post):
|
||||||
if version_id:
|
|
||||||
version = self.api.model_version(version_id).copy()
|
version = self.api.model_version(version_id).copy()
|
||||||
return version.pop("model", None), version
|
return version.pop("model", None), version
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
@@ -569,8 +562,7 @@ class CivitaiRestAPI():
|
|||||||
self.root = extractor.root + "/api"
|
self.root = extractor.root + "/api"
|
||||||
self.headers = {"Content-Type": "application/json"}
|
self.headers = {"Content-Type": "application/json"}
|
||||||
|
|
||||||
api_key = extractor.config("api-key")
|
if api_key := extractor.config("api-key"):
|
||||||
if api_key:
|
|
||||||
extractor.log.debug("Using api_key authentication")
|
extractor.log.debug("Using api_key authentication")
|
||||||
self.headers["Authorization"] = "Bearer " + api_key
|
self.headers["Authorization"] = "Bearer " + api_key
|
||||||
|
|
||||||
@@ -648,8 +640,7 @@ class CivitaiTrpcAPI():
|
|||||||
"x-client" : "web",
|
"x-client" : "web",
|
||||||
"x-fingerprint" : "undefined",
|
"x-fingerprint" : "undefined",
|
||||||
}
|
}
|
||||||
api_key = extractor.config("api-key")
|
if api_key := extractor.config("api-key"):
|
||||||
if api_key:
|
|
||||||
extractor.log.debug("Using api_key authentication")
|
extractor.log.debug("Using api_key authentication")
|
||||||
self.headers["Authorization"] = "Bearer " + api_key
|
self.headers["Authorization"] = "Bearer " + api_key
|
||||||
|
|
||||||
|
|||||||
@@ -136,11 +136,10 @@ class Extractor():
|
|||||||
if first:
|
if first:
|
||||||
first = False
|
first = False
|
||||||
values = config.accumulate(extr + path, key)
|
values = config.accumulate(extr + path, key)
|
||||||
else:
|
elif conf := config.get(extr, path[0]):
|
||||||
conf = config.get(extr, path[0])
|
values[:0] = config.accumulate(
|
||||||
if conf:
|
(self.subcategory,), key, conf=conf)
|
||||||
values[:0] = config.accumulate(
|
|
||||||
(self.subcategory,), key, conf=conf)
|
|
||||||
return values
|
return values
|
||||||
|
|
||||||
def request(self, url, method="GET", session=None,
|
def request(self, url, method="GET", session=None,
|
||||||
@@ -455,8 +454,7 @@ class Extractor():
|
|||||||
if ZSTD:
|
if ZSTD:
|
||||||
headers["Accept-Encoding"] += ", zstd"
|
headers["Accept-Encoding"] += ", zstd"
|
||||||
|
|
||||||
referer = self.config("referer", self.referer)
|
if referer := self.config("referer", self.referer):
|
||||||
if referer:
|
|
||||||
if isinstance(referer, str):
|
if isinstance(referer, str):
|
||||||
headers["Referer"] = referer
|
headers["Referer"] = referer
|
||||||
elif self.root:
|
elif self.root:
|
||||||
@@ -471,8 +469,7 @@ class Extractor():
|
|||||||
custom_ua is not config.get(("extractor",), "user-agent"):
|
custom_ua is not config.get(("extractor",), "user-agent"):
|
||||||
headers["User-Agent"] = custom_ua
|
headers["User-Agent"] = custom_ua
|
||||||
|
|
||||||
custom_headers = self.config("headers")
|
if custom_headers := self.config("headers"):
|
||||||
if custom_headers:
|
|
||||||
if isinstance(custom_headers, str):
|
if isinstance(custom_headers, str):
|
||||||
if custom_headers in HEADERS:
|
if custom_headers in HEADERS:
|
||||||
custom_headers = HEADERS[custom_headers]
|
custom_headers = HEADERS[custom_headers]
|
||||||
@@ -482,8 +479,7 @@ class Extractor():
|
|||||||
custom_headers = ()
|
custom_headers = ()
|
||||||
headers.update(custom_headers)
|
headers.update(custom_headers)
|
||||||
|
|
||||||
custom_ciphers = self.config("ciphers")
|
if custom_ciphers := self.config("ciphers"):
|
||||||
if custom_ciphers:
|
|
||||||
if isinstance(custom_ciphers, list):
|
if isinstance(custom_ciphers, list):
|
||||||
ssl_ciphers = ":".join(custom_ciphers)
|
ssl_ciphers = ":".join(custom_ciphers)
|
||||||
elif custom_ciphers in CIPHERS:
|
elif custom_ciphers in CIPHERS:
|
||||||
@@ -491,8 +487,7 @@ class Extractor():
|
|||||||
else:
|
else:
|
||||||
ssl_ciphers = custom_ciphers
|
ssl_ciphers = custom_ciphers
|
||||||
|
|
||||||
source_address = self.config("source-address")
|
if source_address := self.config("source-address"):
|
||||||
if source_address:
|
|
||||||
if isinstance(source_address, str):
|
if isinstance(source_address, str):
|
||||||
source_address = (source_address, 0)
|
source_address = (source_address, 0)
|
||||||
else:
|
else:
|
||||||
@@ -526,10 +521,8 @@ class Extractor():
|
|||||||
if self.cookies_domain is None:
|
if self.cookies_domain is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
cookies = self.config("cookies")
|
if cookies := self.config("cookies"):
|
||||||
if cookies:
|
if select := self.config("cookies-select"):
|
||||||
select = self.config("cookies-select")
|
|
||||||
if select:
|
|
||||||
if select == "rotate":
|
if select == "rotate":
|
||||||
cookies = cookies[self.cookies_index % len(cookies)]
|
cookies = cookies[self.cookies_index % len(cookies)]
|
||||||
Extractor.cookies_index += 1
|
Extractor.cookies_index += 1
|
||||||
@@ -975,8 +968,7 @@ class BaseExtractor(Extractor):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def update(cls, instances):
|
def update(cls, instances):
|
||||||
extra_instances = config.get(("extractor",), cls.basecategory)
|
if extra_instances := config.get(("extractor",), cls.basecategory):
|
||||||
if extra_instances:
|
|
||||||
for category, info in extra_instances.items():
|
for category, info in extra_instances.items():
|
||||||
if isinstance(info, dict) and "root" in info:
|
if isinstance(info, dict) and "root" in info:
|
||||||
instances[category] = info
|
instances[category] = info
|
||||||
@@ -984,8 +976,7 @@ class BaseExtractor(Extractor):
|
|||||||
pattern_list = []
|
pattern_list = []
|
||||||
instance_list = cls.instances = []
|
instance_list = cls.instances = []
|
||||||
for category, info in instances.items():
|
for category, info in instances.items():
|
||||||
root = info["root"]
|
if root := info["root"]:
|
||||||
if root:
|
|
||||||
root = root.rstrip("/")
|
root = root.rstrip("/")
|
||||||
instance_list.append((category, root, info))
|
instance_list.append((category, root, info))
|
||||||
|
|
||||||
|
|||||||
@@ -49,8 +49,7 @@ class DanbooruExtractor(BaseExtractor):
|
|||||||
def items(self):
|
def items(self):
|
||||||
# 'includes' initialization must be done here and not in '_init()'
|
# 'includes' initialization must be done here and not in '_init()'
|
||||||
# or it'll cause an exception with e621 when 'metadata' is enabled
|
# or it'll cause an exception with e621 when 'metadata' is enabled
|
||||||
includes = self.config("metadata")
|
if includes := self.config("metadata"):
|
||||||
if includes:
|
|
||||||
if isinstance(includes, (list, tuple)):
|
if isinstance(includes, (list, tuple)):
|
||||||
includes = ",".join(includes)
|
includes = ",".join(includes)
|
||||||
elif not isinstance(includes, str):
|
elif not isinstance(includes, str):
|
||||||
|
|||||||
@@ -55,8 +55,7 @@ class DeviantartExtractor(Extractor):
|
|||||||
self.group = False
|
self.group = False
|
||||||
self._premium_cache = {}
|
self._premium_cache = {}
|
||||||
|
|
||||||
unwatch = self.config("auto-unwatch")
|
if self.config("auto-unwatch"):
|
||||||
if unwatch:
|
|
||||||
self.unwatch = []
|
self.unwatch = []
|
||||||
self.finalize = self._unwatch_premium
|
self.finalize = self._unwatch_premium
|
||||||
else:
|
else:
|
||||||
@@ -118,10 +117,8 @@ class DeviantartExtractor(Extractor):
|
|||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
if self.user:
|
if self.user:
|
||||||
group = self.config("group", True)
|
if group := self.config("group", True):
|
||||||
if group:
|
if user := _user_details(self, self.user):
|
||||||
user = _user_details(self, self.user)
|
|
||||||
if user:
|
|
||||||
self.user = user["username"]
|
self.user = user["username"]
|
||||||
self.group = False
|
self.group = False
|
||||||
elif group == "skip":
|
elif group == "skip":
|
||||||
@@ -179,8 +176,7 @@ class DeviantartExtractor(Extractor):
|
|||||||
yield self.commit(deviation, deviation["flash"])
|
yield self.commit(deviation, deviation["flash"])
|
||||||
|
|
||||||
if self.commit_journal:
|
if self.commit_journal:
|
||||||
journal = self._extract_journal(deviation)
|
if journal := self._extract_journal(deviation):
|
||||||
if journal:
|
|
||||||
if self.extra:
|
if self.extra:
|
||||||
deviation["_journal"] = journal["html"]
|
deviation["_journal"] = journal["html"]
|
||||||
deviation["is_original"] = True
|
deviation["is_original"] = True
|
||||||
@@ -389,8 +385,7 @@ class DeviantartExtractor(Extractor):
|
|||||||
deviations = state["@@entities"]["deviation"]
|
deviations = state["@@entities"]["deviation"]
|
||||||
content = deviations.popitem()[1]["textContent"]
|
content = deviations.popitem()[1]["textContent"]
|
||||||
|
|
||||||
html = self._textcontent_to_html(deviation, content)
|
if html := self._textcontent_to_html(deviation, content):
|
||||||
if html:
|
|
||||||
return {"html": html}
|
return {"html": html}
|
||||||
return {"html": content["excerpt"].replace("\n", "<br />")}
|
return {"html": content["excerpt"].replace("\n", "<br />")}
|
||||||
|
|
||||||
@@ -432,8 +427,7 @@ class DeviantartExtractor(Extractor):
|
|||||||
type = content["type"]
|
type = content["type"]
|
||||||
|
|
||||||
if type == "paragraph":
|
if type == "paragraph":
|
||||||
children = content.get("content")
|
if children := content.get("content"):
|
||||||
if children:
|
|
||||||
html.append('<p style="')
|
html.append('<p style="')
|
||||||
|
|
||||||
attrs = content["attrs"]
|
attrs = content["attrs"]
|
||||||
@@ -547,8 +541,7 @@ class DeviantartExtractor(Extractor):
|
|||||||
self.log.warning("Unsupported content type '%s'", type)
|
self.log.warning("Unsupported content type '%s'", type)
|
||||||
|
|
||||||
def _tiptap_process_text(self, html, content):
|
def _tiptap_process_text(self, html, content):
|
||||||
marks = content.get("marks")
|
if marks := content.get("marks"):
|
||||||
if marks:
|
|
||||||
close = []
|
close = []
|
||||||
for mark in marks:
|
for mark in marks:
|
||||||
type = mark["type"]
|
type = mark["type"]
|
||||||
@@ -587,8 +580,7 @@ class DeviantartExtractor(Extractor):
|
|||||||
html.append(text.escape(content["text"]))
|
html.append(text.escape(content["text"]))
|
||||||
|
|
||||||
def _tiptap_process_children(self, html, content):
|
def _tiptap_process_children(self, html, content):
|
||||||
children = content.get("content")
|
if children := content.get("content"):
|
||||||
if children:
|
|
||||||
for block in children:
|
for block in children:
|
||||||
self._tiptap_process_content(html, block)
|
self._tiptap_process_content(html, block)
|
||||||
|
|
||||||
@@ -841,8 +833,7 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\
|
|||||||
for fmt in media["types"]
|
for fmt in media["types"]
|
||||||
}
|
}
|
||||||
|
|
||||||
tokens = media.get("token") or ()
|
if tokens := media.get("token") or ():
|
||||||
if tokens:
|
|
||||||
if len(tokens) <= 1:
|
if len(tokens) <= 1:
|
||||||
fmt = formats[format]
|
fmt = formats[format]
|
||||||
if "c" in fmt:
|
if "c" in fmt:
|
||||||
@@ -1057,8 +1048,7 @@ class DeviantartStashExtractor(DeviantartExtractor):
|
|||||||
page = self._limited_request(url).text
|
page = self._limited_request(url).text
|
||||||
|
|
||||||
if stash_id[0] == "0":
|
if stash_id[0] == "0":
|
||||||
uuid = text.extr(page, '//deviation/', '"')
|
if uuid := text.extr(page, '//deviation/', '"'):
|
||||||
if uuid:
|
|
||||||
deviation = self.api.deviation(uuid)
|
deviation = self.api.deviation(uuid)
|
||||||
deviation["_page"] = page
|
deviation["_page"] = page
|
||||||
deviation["index"] = text.parse_int(text.extr(
|
deviation["index"] = text.parse_int(text.extr(
|
||||||
@@ -1081,8 +1071,7 @@ class DeviantartStashExtractor(DeviantartExtractor):
|
|||||||
yield deviation
|
yield deviation
|
||||||
return
|
return
|
||||||
|
|
||||||
stash_data = text.extr(page, ',\\"stash\\":', ',\\"@@')
|
if stash_data := text.extr(page, ',\\"stash\\":', ',\\"@@'):
|
||||||
if stash_data:
|
|
||||||
stash_data = util.json_loads(self._unescape_json(stash_data))
|
stash_data = util.json_loads(self._unescape_json(stash_data))
|
||||||
|
|
||||||
for sid in text.extract_iter(
|
for sid in text.extract_iter(
|
||||||
@@ -1461,8 +1450,7 @@ class DeviantartOAuthAPI():
|
|||||||
self.folders = extractor.config("folders", False)
|
self.folders = extractor.config("folders", False)
|
||||||
self.public = extractor.config("public", True)
|
self.public = extractor.config("public", True)
|
||||||
|
|
||||||
client_id = extractor.config("client-id")
|
if client_id := extractor.config("client-id"):
|
||||||
if client_id:
|
|
||||||
self.client_id = str(client_id)
|
self.client_id = str(client_id)
|
||||||
self.client_secret = extractor.config("client-secret")
|
self.client_secret = extractor.config("client-secret")
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -24,8 +24,7 @@ class E621Extractor(danbooru.DanbooruExtractor):
|
|||||||
request_interval_min = 1.0
|
request_interval_min = 1.0
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
includes = self.config("metadata") or ()
|
if includes := self.config("metadata") or ():
|
||||||
if includes:
|
|
||||||
if isinstance(includes, str):
|
if isinstance(includes, str):
|
||||||
includes = includes.split(",")
|
includes = includes.split(",")
|
||||||
elif not isinstance(includes, (list, tuple)):
|
elif not isinstance(includes, (list, tuple)):
|
||||||
|
|||||||
@@ -246,8 +246,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|||||||
def metadata_from_page(self, page):
|
def metadata_from_page(self, page):
|
||||||
extr = text.extract_from(page)
|
extr = text.extract_from(page)
|
||||||
|
|
||||||
api_url = extr('var api_url = "', '"')
|
if api_url := extr('var api_url = "', '"'):
|
||||||
if api_url:
|
|
||||||
self.api_url = api_url
|
self.api_url = api_url
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
@@ -419,8 +418,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|||||||
def _validate_signature(self, signature):
|
def _validate_signature(self, signature):
|
||||||
"""Return False if all file signature bytes are zero"""
|
"""Return False if all file signature bytes are zero"""
|
||||||
if signature:
|
if signature:
|
||||||
byte = signature[0]
|
if byte := signature[0]:
|
||||||
if byte:
|
|
||||||
# 60 == b"<"
|
# 60 == b"<"
|
||||||
if byte == 60 and b"<!doctype html".startswith(
|
if byte == 60 and b"<!doctype html".startswith(
|
||||||
signature[:14].lower()):
|
signature[:14].lower()):
|
||||||
|
|||||||
@@ -306,8 +306,7 @@ class FacebookSetExtractor(FacebookExtractor):
|
|||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
set_id = self.groups[0] or self.groups[3]
|
set_id = self.groups[0] or self.groups[3]
|
||||||
path = self.groups[1]
|
if path := self.groups[1]:
|
||||||
if path:
|
|
||||||
post_url = self.root + "/" + path
|
post_url = self.root + "/" + path
|
||||||
post_page = self.request(post_url).text
|
post_page = self.request(post_url).text
|
||||||
set_id = self.parse_post_page(post_page)["set_id"]
|
set_id = self.parse_post_page(post_page)["set_id"]
|
||||||
|
|||||||
@@ -40,8 +40,7 @@ class FanboxExtractor(Extractor):
|
|||||||
}
|
}
|
||||||
self.embeds = self.config("embeds", True)
|
self.embeds = self.config("embeds", True)
|
||||||
|
|
||||||
includes = self.config("metadata")
|
if includes := self.config("metadata"):
|
||||||
if includes:
|
|
||||||
if isinstance(includes, str):
|
if isinstance(includes, str):
|
||||||
includes = includes.split(",")
|
includes = includes.split(",")
|
||||||
elif not isinstance(includes, (list, tuple)):
|
elif not isinstance(includes, (list, tuple)):
|
||||||
@@ -141,8 +140,7 @@ class FanboxExtractor(Extractor):
|
|||||||
try:
|
try:
|
||||||
post["plan"] = plans[fee]
|
post["plan"] = plans[fee]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
fees = [f for f in plans if f >= fee]
|
if fees := [f for f in plans if f >= fee]:
|
||||||
if fees:
|
|
||||||
plan = plans[min(fees)]
|
plan = plans[min(fees)]
|
||||||
else:
|
else:
|
||||||
plan = plans[0].copy()
|
plan = plans[0].copy()
|
||||||
@@ -217,8 +215,7 @@ class FanboxExtractor(Extractor):
|
|||||||
|
|
||||||
def _get_urls_from_post(self, content_body, post):
|
def _get_urls_from_post(self, content_body, post):
|
||||||
num = 0
|
num = 0
|
||||||
cover_image = post.get("coverImageUrl")
|
if cover_image := post.get("coverImageUrl"):
|
||||||
if cover_image:
|
|
||||||
cover_image = util.re("/c/[0-9a-z_]+").sub("", cover_image)
|
cover_image = util.re("/c/[0-9a-z_]+").sub("", cover_image)
|
||||||
final_post = post.copy()
|
final_post = post.copy()
|
||||||
final_post["isCoverImage"] = True
|
final_post["isCoverImage"] = True
|
||||||
|
|||||||
@@ -58,8 +58,7 @@ class FapachiUserExtractor(Extractor):
|
|||||||
url = f"{self.root}/{self.user}/page/{self.num}"
|
url = f"{self.root}/{self.user}/page/{self.num}"
|
||||||
page = self.request(url).text
|
page = self.request(url).text
|
||||||
for post in text.extract_iter(page, 'model-media-prew">', ">"):
|
for post in text.extract_iter(page, 'model-media-prew">', ">"):
|
||||||
path = text.extr(post, '<a href="', '"')
|
if path := text.extr(post, '<a href="', '"'):
|
||||||
if path:
|
|
||||||
yield Message.Queue, self.root + path, data
|
yield Message.Queue, self.root + path, data
|
||||||
|
|
||||||
if '">Next page</a>' not in page:
|
if '">Next page</a>' not in page:
|
||||||
|
|||||||
@@ -457,8 +457,7 @@ class FlickrAPI(oauth.OAuth1API):
|
|||||||
def _pagination(self, method, params, key="photos"):
|
def _pagination(self, method, params, key="photos"):
|
||||||
extras = ("description,date_upload,tags,views,media,"
|
extras = ("description,date_upload,tags,views,media,"
|
||||||
"path_alias,owner_name,")
|
"path_alias,owner_name,")
|
||||||
includes = self.extractor.config("metadata")
|
if includes := self.extractor.config("metadata"):
|
||||||
if includes:
|
|
||||||
if isinstance(includes, (list, tuple)):
|
if isinstance(includes, (list, tuple)):
|
||||||
includes = ",".join(includes)
|
includes = ",".join(includes)
|
||||||
elif not isinstance(includes, str):
|
elif not isinstance(includes, str):
|
||||||
|
|||||||
@@ -164,8 +164,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
|
|||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
op = (self.data["op"],)
|
op = (self.data["op"],)
|
||||||
posts = self.data.get("posts")
|
if posts := self.data.get("posts"):
|
||||||
if posts:
|
|
||||||
posts = list(posts.values())
|
posts = list(posts.values())
|
||||||
posts.sort(key=lambda p: p["timestamp"])
|
posts.sort(key=lambda p: p["timestamp"])
|
||||||
return itertools.chain(op, posts)
|
return itertools.chain(op, posts)
|
||||||
|
|||||||
@@ -51,8 +51,7 @@ class FuraffinityExtractor(Extractor):
|
|||||||
def items(self):
|
def items(self):
|
||||||
metadata = self.metadata()
|
metadata = self.metadata()
|
||||||
for post_id in util.advance(self.posts(), self.offset):
|
for post_id in util.advance(self.posts(), self.offset):
|
||||||
post = self._parse_post(post_id)
|
if post := self._parse_post(post_id):
|
||||||
if post:
|
|
||||||
if metadata:
|
if metadata:
|
||||||
post.update(metadata)
|
post.update(metadata)
|
||||||
yield Message.Directory, post
|
yield Message.Directory, post
|
||||||
@@ -117,8 +116,7 @@ class FuraffinityExtractor(Extractor):
|
|||||||
data["folders"] = folders = []
|
data["folders"] = folders = []
|
||||||
for folder in extr(
|
for folder in extr(
|
||||||
"<h3>Listed in Folders</h3>", "</section>").split("</a>"):
|
"<h3>Listed in Folders</h3>", "</section>").split("</a>"):
|
||||||
folder = rh(folder)
|
if folder := rh(folder):
|
||||||
if folder:
|
|
||||||
folders.append(folder)
|
folders.append(folder)
|
||||||
else:
|
else:
|
||||||
# old site layout
|
# old site layout
|
||||||
@@ -280,8 +278,7 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor):
|
|||||||
return self._pagination_favorites()
|
return self._pagination_favorites()
|
||||||
|
|
||||||
def _parse_post(self, post_id):
|
def _parse_post(self, post_id):
|
||||||
post = FuraffinityExtractor._parse_post(self, post_id)
|
if post := FuraffinityExtractor._parse_post(self, post_id):
|
||||||
if post:
|
|
||||||
post["favorite_id"] = self._favorite_id
|
post["favorite_id"] = self._favorite_id
|
||||||
return post
|
return post
|
||||||
|
|
||||||
|
|||||||
@@ -73,8 +73,7 @@ class GirlswithmusclePostExtractor(GirlswithmuscleExtractor):
|
|||||||
|
|
||||||
metadata = self.metadata(page)
|
metadata = self.metadata(page)
|
||||||
|
|
||||||
url = text.extr(page, 'class="main-image" src="', '"')
|
if url := text.extr(page, 'class="main-image" src="', '"'):
|
||||||
if url:
|
|
||||||
metadata["type"] = "picture"
|
metadata["type"] = "picture"
|
||||||
else:
|
else:
|
||||||
url = text.extr(page, '<source src="', '"')
|
url = text.extr(page, '<source src="', '"')
|
||||||
|
|||||||
@@ -62,12 +62,10 @@ class HitomiGalleryExtractor(HitomiExtractor, GalleryExtractor):
|
|||||||
self.info = info = util.json_loads(page.partition("=")[2])
|
self.info = info = util.json_loads(page.partition("=")[2])
|
||||||
iget = info.get
|
iget = info.get
|
||||||
|
|
||||||
language = iget("language")
|
if language := iget("language"):
|
||||||
if language:
|
|
||||||
language = language.capitalize()
|
language = language.capitalize()
|
||||||
|
|
||||||
date = iget("date")
|
if date := iget("date"):
|
||||||
if date:
|
|
||||||
date += ":00"
|
date += ":00"
|
||||||
|
|
||||||
tags = []
|
tags = []
|
||||||
|
|||||||
@@ -108,8 +108,7 @@ class IdolcomplexExtractor(SankakuExtractor):
|
|||||||
pid = extr(">Post ID:", "<")
|
pid = extr(">Post ID:", "<")
|
||||||
created = extr(' title="', '"')
|
created = extr(' title="', '"')
|
||||||
|
|
||||||
file_url = extr('>Original:', 'id=')
|
if file_url := extr('>Original:', 'id='):
|
||||||
if file_url:
|
|
||||||
file_url = extr(' href="', '"')
|
file_url = extr(' href="', '"')
|
||||||
width = extr(">", "x")
|
width = extr(">", "x")
|
||||||
height = extr("", " ")
|
height = extr("", " ")
|
||||||
|
|||||||
@@ -75,8 +75,7 @@ class ImagebamGalleryExtractor(ImagebamExtractor):
|
|||||||
paths += findall(page)
|
paths += findall(page)
|
||||||
pos = page.find('rel="next" aria-label="Next')
|
pos = page.find('rel="next" aria-label="Next')
|
||||||
if pos > 0:
|
if pos > 0:
|
||||||
url = text.rextr(page, 'href="', '"', pos)
|
if url := text.rextr(page, 'href="', '"', pos):
|
||||||
if url:
|
|
||||||
page = self.request(url).text
|
page = self.request(url).text
|
||||||
continue
|
continue
|
||||||
return paths
|
return paths
|
||||||
|
|||||||
@@ -28,8 +28,7 @@ class ImagechestGalleryExtractor(GalleryExtractor):
|
|||||||
GalleryExtractor.__init__(self, match, url)
|
GalleryExtractor.__init__(self, match, url)
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
access_token = self.config("access-token")
|
if access_token := self.config("access-token"):
|
||||||
if access_token:
|
|
||||||
self.api = ImagechestAPI(self, access_token)
|
self.api = ImagechestAPI(self, access_token)
|
||||||
self.page_url = None
|
self.page_url = None
|
||||||
self.metadata = self._metadata_api
|
self.metadata = self._metadata_api
|
||||||
|
|||||||
@@ -28,8 +28,7 @@ class ImagefapExtractor(Extractor):
|
|||||||
response = Extractor.request(self, url, **kwargs)
|
response = Extractor.request(self, url, **kwargs)
|
||||||
|
|
||||||
if response.history and response.url.endswith("/human-verification"):
|
if response.history and response.url.endswith("/human-verification"):
|
||||||
msg = text.extr(response.text, '<div class="mt-4', '<')
|
if msg := text.extr(response.text, '<div class="mt-4', '<'):
|
||||||
if msg:
|
|
||||||
msg = " ".join(msg.partition(">")[2].split())
|
msg = " ".join(msg.partition(">")[2].split())
|
||||||
raise exception.AbortExtraction(f"'{msg}'")
|
raise exception.AbortExtraction(f"'{msg}'")
|
||||||
self.log.warning("HTTP redirect to %s", response.url)
|
self.log.warning("HTTP redirect to %s", response.url)
|
||||||
|
|||||||
@@ -109,8 +109,7 @@ class InkbunnyPoolExtractor(InkbunnyExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
InkbunnyExtractor.__init__(self, match)
|
InkbunnyExtractor.__init__(self, match)
|
||||||
pid = match[1]
|
if pid := match[1]:
|
||||||
if pid:
|
|
||||||
self.pool_id = pid
|
self.pool_id = pid
|
||||||
self.orderby = "pool_order"
|
self.orderby = "pool_order"
|
||||||
else:
|
else:
|
||||||
@@ -142,8 +141,7 @@ class InkbunnyFavoriteExtractor(InkbunnyExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
InkbunnyExtractor.__init__(self, match)
|
InkbunnyExtractor.__init__(self, match)
|
||||||
uid = match[1]
|
if uid := match[1]:
|
||||||
if uid:
|
|
||||||
self.user_id = uid
|
self.user_id = uid
|
||||||
self.orderby = self.config("orderby", "fav_datetime")
|
self.orderby = self.config("orderby", "fav_datetime")
|
||||||
else:
|
else:
|
||||||
@@ -218,8 +216,7 @@ class InkbunnySearchExtractor(InkbunnyExtractor):
|
|||||||
params["dayslimit"] = pop("days", None)
|
params["dayslimit"] = pop("days", None)
|
||||||
params["username"] = pop("artist", None)
|
params["username"] = pop("artist", None)
|
||||||
|
|
||||||
favsby = pop("favsby", None)
|
if favsby := pop("favsby", None):
|
||||||
if favsby:
|
|
||||||
# get user_id from user profile
|
# get user_id from user profile
|
||||||
url = f"{self.root}/{favsby}"
|
url = f"{self.root}/{favsby}"
|
||||||
page = self.request(url).text
|
page = self.request(url).text
|
||||||
|
|||||||
@@ -55,8 +55,7 @@ class InstagramExtractor(Extractor):
|
|||||||
self.login()
|
self.login()
|
||||||
|
|
||||||
data = self.metadata()
|
data = self.metadata()
|
||||||
videos = self.config("videos", True)
|
if videos := self.config("videos", True):
|
||||||
if videos:
|
|
||||||
videos_dash = (videos != "merged")
|
videos_dash = (videos != "merged")
|
||||||
videos_headers = {"User-Agent": "Mozilla/5.0"}
|
videos_headers = {"User-Agent": "Mozilla/5.0"}
|
||||||
previews = self.config("previews", False)
|
previews = self.config("previews", False)
|
||||||
@@ -91,8 +90,7 @@ class InstagramExtractor(Extractor):
|
|||||||
for file in files:
|
for file in files:
|
||||||
file.update(post)
|
file.update(post)
|
||||||
|
|
||||||
url = file.get("video_url")
|
if url := file.get("video_url"):
|
||||||
if url:
|
|
||||||
if videos:
|
if videos:
|
||||||
file["_http_headers"] = videos_headers
|
file["_http_headers"] = videos_headers
|
||||||
text.nameext_from_url(url, file)
|
text.nameext_from_url(url, file)
|
||||||
@@ -144,8 +142,7 @@ class InstagramExtractor(Extractor):
|
|||||||
if www_claim is not None:
|
if www_claim is not None:
|
||||||
self.www_claim = www_claim
|
self.www_claim = www_claim
|
||||||
|
|
||||||
csrf_token = response.cookies.get("csrftoken")
|
if csrf_token := response.cookies.get("csrftoken"):
|
||||||
if csrf_token:
|
|
||||||
self.csrf_token = csrf_token
|
self.csrf_token = csrf_token
|
||||||
|
|
||||||
return response
|
return response
|
||||||
@@ -164,8 +161,7 @@ class InstagramExtractor(Extractor):
|
|||||||
if "items" in post: # story or highlight
|
if "items" in post: # story or highlight
|
||||||
items = post["items"]
|
items = post["items"]
|
||||||
reel_id = str(post["id"]).rpartition(":")[2]
|
reel_id = str(post["id"]).rpartition(":")[2]
|
||||||
expires = post.get("expiring_at")
|
if expires := post.get("expiring_at"):
|
||||||
if expires:
|
|
||||||
post_url = f"{self.root}/stories/{post['user']['username']}/"
|
post_url = f"{self.root}/stories/{post['user']['username']}/"
|
||||||
else:
|
else:
|
||||||
post_url = f"{self.root}/stories/highlights/{reel_id}/"
|
post_url = f"{self.root}/stories/highlights/{reel_id}/"
|
||||||
@@ -193,20 +189,17 @@ class InstagramExtractor(Extractor):
|
|||||||
caption = post["caption"]
|
caption = post["caption"]
|
||||||
data["description"] = caption["text"] if caption else ""
|
data["description"] = caption["text"] if caption else ""
|
||||||
|
|
||||||
tags = self._find_tags(data["description"])
|
if tags := self._find_tags(data["description"]):
|
||||||
if tags:
|
|
||||||
data["tags"] = sorted(set(tags))
|
data["tags"] = sorted(set(tags))
|
||||||
|
|
||||||
location = post.get("location")
|
if location := post.get("location"):
|
||||||
if location:
|
|
||||||
slug = location["short_name"].replace(" ", "-").lower()
|
slug = location["short_name"].replace(" ", "-").lower()
|
||||||
data["location_id"] = location["pk"]
|
data["location_id"] = location["pk"]
|
||||||
data["location_slug"] = slug
|
data["location_slug"] = slug
|
||||||
data["location_url"] = \
|
data["location_url"] = \
|
||||||
f"{self.root}/explore/locations/{location['pk']}/{slug}/"
|
f"{self.root}/explore/locations/{location['pk']}/{slug}/"
|
||||||
|
|
||||||
coauthors = post.get("coauthor_producers")
|
if coauthors := post.get("coauthor_producers"):
|
||||||
if coauthors:
|
|
||||||
data["coauthors"] = [
|
data["coauthors"] = [
|
||||||
{"id" : user["pk"],
|
{"id" : user["pk"],
|
||||||
"username" : user["username"],
|
"username" : user["username"],
|
||||||
@@ -214,8 +207,7 @@ class InstagramExtractor(Extractor):
|
|||||||
for user in coauthors
|
for user in coauthors
|
||||||
]
|
]
|
||||||
|
|
||||||
items = post.get("carousel_media")
|
if items := post.get("carousel_media"):
|
||||||
if items:
|
|
||||||
data["sidecar_media_id"] = data["post_id"]
|
data["sidecar_media_id"] = data["post_id"]
|
||||||
data["sidecar_shortcode"] = data["post_shortcode"]
|
data["sidecar_shortcode"] = data["post_shortcode"]
|
||||||
else:
|
else:
|
||||||
@@ -237,8 +229,7 @@ class InstagramExtractor(Extractor):
|
|||||||
data["post_shortcode"])
|
data["post_shortcode"])
|
||||||
continue
|
continue
|
||||||
|
|
||||||
video_versions = item.get("video_versions")
|
if video_versions := item.get("video_versions"):
|
||||||
if video_versions:
|
|
||||||
video = max(
|
video = max(
|
||||||
video_versions,
|
video_versions,
|
||||||
key=lambda x: (x["width"], x["height"], x["type"]),
|
key=lambda x: (x["width"], x["height"], x["type"]),
|
||||||
@@ -281,8 +272,7 @@ class InstagramExtractor(Extractor):
|
|||||||
"edge_sidecar_to_children" not in post:
|
"edge_sidecar_to_children" not in post:
|
||||||
post = self.api.media(post["id"])[0]
|
post = self.api.media(post["id"])[0]
|
||||||
|
|
||||||
pinned = post.get("pinned_for_users", ())
|
if pinned := post.get("pinned_for_users", ()):
|
||||||
if pinned:
|
|
||||||
for index, user in enumerate(pinned):
|
for index, user in enumerate(pinned):
|
||||||
pinned[index] = int(user["id"])
|
pinned[index] = int(user["id"])
|
||||||
|
|
||||||
@@ -306,19 +296,16 @@ class InstagramExtractor(Extractor):
|
|||||||
}
|
}
|
||||||
data["date"] = data["post_date"]
|
data["date"] = data["post_date"]
|
||||||
|
|
||||||
tags = self._find_tags(data["description"])
|
if tags := self._find_tags(data["description"]):
|
||||||
if tags:
|
|
||||||
data["tags"] = sorted(set(tags))
|
data["tags"] = sorted(set(tags))
|
||||||
|
|
||||||
location = post.get("location")
|
if location := post.get("location"):
|
||||||
if location:
|
|
||||||
data["location_id"] = location["id"]
|
data["location_id"] = location["id"]
|
||||||
data["location_slug"] = location["slug"]
|
data["location_slug"] = location["slug"]
|
||||||
data["location_url"] = (f"{self.root}/explore/locations/"
|
data["location_url"] = (f"{self.root}/explore/locations/"
|
||||||
f"{location['id']}/{location['slug']}/")
|
f"{location['id']}/{location['slug']}/")
|
||||||
|
|
||||||
coauthors = post.get("coauthor_producers")
|
if coauthors := post.get("coauthor_producers"):
|
||||||
if coauthors:
|
|
||||||
data["coauthors"] = [
|
data["coauthors"] = [
|
||||||
{"id" : user["id"],
|
{"id" : user["id"],
|
||||||
"username": user["username"]}
|
"username": user["username"]}
|
||||||
@@ -365,32 +352,28 @@ class InstagramExtractor(Extractor):
|
|||||||
def _extract_tagged_users(self, src, dest):
|
def _extract_tagged_users(self, src, dest):
|
||||||
dest["tagged_users"] = tagged_users = []
|
dest["tagged_users"] = tagged_users = []
|
||||||
|
|
||||||
edges = src.get("edge_media_to_tagged_user")
|
if edges := src.get("edge_media_to_tagged_user"):
|
||||||
if edges:
|
|
||||||
for edge in edges["edges"]:
|
for edge in edges["edges"]:
|
||||||
user = edge["node"]["user"]
|
user = edge["node"]["user"]
|
||||||
tagged_users.append({"id" : user["id"],
|
tagged_users.append({"id" : user["id"],
|
||||||
"username" : user["username"],
|
"username" : user["username"],
|
||||||
"full_name": user["full_name"]})
|
"full_name": user["full_name"]})
|
||||||
|
|
||||||
usertags = src.get("usertags")
|
if usertags := src.get("usertags"):
|
||||||
if usertags:
|
|
||||||
for tag in usertags["in"]:
|
for tag in usertags["in"]:
|
||||||
user = tag["user"]
|
user = tag["user"]
|
||||||
tagged_users.append({"id" : user["pk"],
|
tagged_users.append({"id" : user["pk"],
|
||||||
"username" : user["username"],
|
"username" : user["username"],
|
||||||
"full_name": user["full_name"]})
|
"full_name": user["full_name"]})
|
||||||
|
|
||||||
mentions = src.get("reel_mentions")
|
if mentions := src.get("reel_mentions"):
|
||||||
if mentions:
|
|
||||||
for mention in mentions:
|
for mention in mentions:
|
||||||
user = mention["user"]
|
user = mention["user"]
|
||||||
tagged_users.append({"id" : user.get("pk"),
|
tagged_users.append({"id" : user.get("pk"),
|
||||||
"username" : user["username"],
|
"username" : user["username"],
|
||||||
"full_name": user["full_name"]})
|
"full_name": user["full_name"]})
|
||||||
|
|
||||||
stickers = src.get("story_bloks_stickers")
|
if stickers := src.get("story_bloks_stickers"):
|
||||||
if stickers:
|
|
||||||
for sticker in stickers:
|
for sticker in stickers:
|
||||||
sticker = sticker["bloks_sticker"]
|
sticker = sticker["bloks_sticker"]
|
||||||
if sticker["bloks_sticker_type"] == "mention":
|
if sticker["bloks_sticker_type"] == "mention":
|
||||||
@@ -698,8 +681,7 @@ class InstagramAvatarExtractor(InstagramExtractor):
|
|||||||
url = user.get("profile_pic_url_hd") or user["profile_pic_url"]
|
url = user.get("profile_pic_url_hd") or user["profile_pic_url"]
|
||||||
avatar = {"url": url, "width": 0, "height": 0}
|
avatar = {"url": url, "width": 0, "height": 0}
|
||||||
|
|
||||||
pk = user.get("profile_pic_id")
|
if pk := user.get("profile_pic_id"):
|
||||||
if pk:
|
|
||||||
pk = pk.partition("_")[0]
|
pk = pk.partition("_")[0]
|
||||||
code = shortcode_from_id(pk)
|
code = shortcode_from_id(pk)
|
||||||
else:
|
else:
|
||||||
@@ -753,8 +735,7 @@ class InstagramRestAPI():
|
|||||||
def highlights_media(self, user_id, chunk_size=5):
|
def highlights_media(self, user_id, chunk_size=5):
|
||||||
reel_ids = [hl["id"] for hl in self.highlights_tray(user_id)]
|
reel_ids = [hl["id"] for hl in self.highlights_tray(user_id)]
|
||||||
|
|
||||||
order = self.extractor.config("order-posts")
|
if order := self.extractor.config("order-posts"):
|
||||||
if order:
|
|
||||||
if order in ("desc", "reverse"):
|
if order in ("desc", "reverse"):
|
||||||
reel_ids.reverse()
|
reel_ids.reverse()
|
||||||
elif order in ("id", "id_asc"):
|
elif order in ("id", "id_asc"):
|
||||||
|
|||||||
@@ -39,8 +39,7 @@ class ItakuExtractor(Extractor):
|
|||||||
|
|
||||||
sections = []
|
sections = []
|
||||||
for s in post["sections"]:
|
for s in post["sections"]:
|
||||||
group = s["group"]
|
if group := s["group"]:
|
||||||
if group:
|
|
||||||
sections.append(group["title"] + "/" + s["title"])
|
sections.append(group["title"] + "/" + s["title"])
|
||||||
else:
|
else:
|
||||||
sections.append(s["title"])
|
sections.append(s["title"])
|
||||||
|
|||||||
@@ -41,8 +41,7 @@ class JschanThreadExtractor(JschanExtractor):
|
|||||||
|
|
||||||
yield Message.Directory, thread
|
yield Message.Directory, thread
|
||||||
for post in itertools.chain((thread,), posts):
|
for post in itertools.chain((thread,), posts):
|
||||||
files = post.pop("files", ())
|
if files := post.pop("files", ()):
|
||||||
if files:
|
|
||||||
thread.update(post)
|
thread.update(post)
|
||||||
thread["count"] = len(files)
|
thread["count"] = len(files)
|
||||||
for num, file in enumerate(files):
|
for num, file in enumerate(files):
|
||||||
|
|||||||
@@ -75,8 +75,7 @@ class KeenspotComicExtractor(Extractor):
|
|||||||
self._image = '<div id="comic">'
|
self._image = '<div id="comic">'
|
||||||
return "http://brawlinthefamily.keenspot.com/comic/theshowdown/"
|
return "http://brawlinthefamily.keenspot.com/comic/theshowdown/"
|
||||||
|
|
||||||
url = text.extr(page, '<link rel="first" href="', '"')
|
if url := text.extr(page, '<link rel="first" href="', '"'):
|
||||||
if url:
|
|
||||||
if self.comic == "porcelain":
|
if self.comic == "porcelain":
|
||||||
self._needle = 'id="porArchivetop_"'
|
self._needle = 'id="porArchivetop_"'
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -422,8 +422,7 @@ class KemonoDiscordExtractor(KemonoExtractor):
|
|||||||
find_hash = util.re(HASH_PATTERN).match
|
find_hash = util.re(HASH_PATTERN).match
|
||||||
|
|
||||||
posts = self.api.discord_channel(channel_id)
|
posts = self.api.discord_channel(channel_id)
|
||||||
max_posts = self.config("max-posts")
|
if max_posts := self.config("max-posts"):
|
||||||
if max_posts:
|
|
||||||
posts = itertools.islice(posts, max_posts)
|
posts = itertools.islice(posts, max_posts)
|
||||||
|
|
||||||
for post in posts:
|
for post in posts:
|
||||||
|
|||||||
@@ -26,8 +26,7 @@ class LivedoorExtractor(Extractor):
|
|||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
for post in self.posts():
|
for post in self.posts():
|
||||||
images = self._images(post)
|
if images := self._images(post):
|
||||||
if images:
|
|
||||||
yield Message.Directory, {"post": post}
|
yield Message.Directory, {"post": post}
|
||||||
for image in images:
|
for image in images:
|
||||||
yield Message.Url, image["url"], image
|
yield Message.Url, image["url"], image
|
||||||
|
|||||||
@@ -58,8 +58,7 @@ class MangadexExtractor(Extractor):
|
|||||||
cattributes = chapter["attributes"]
|
cattributes = chapter["attributes"]
|
||||||
mattributes = manga["attributes"]
|
mattributes = manga["attributes"]
|
||||||
|
|
||||||
lang = cattributes.get("translatedLanguage")
|
if lang := cattributes.get("translatedLanguage"):
|
||||||
if lang:
|
|
||||||
lang = lang.partition("-")[0]
|
lang = lang.partition("-")[0]
|
||||||
|
|
||||||
if cattributes["chapter"]:
|
if cattributes["chapter"]:
|
||||||
@@ -279,8 +278,7 @@ class MangadexAPI():
|
|||||||
|
|
||||||
@cache(maxage=900, keyarg=1)
|
@cache(maxage=900, keyarg=1)
|
||||||
def _authenticate_impl_client(self, username, password):
|
def _authenticate_impl_client(self, username, password):
|
||||||
refresh_token = _refresh_token_cache((username, "personal"))
|
if refresh_token := _refresh_token_cache((username, "personal")):
|
||||||
if refresh_token:
|
|
||||||
self.extractor.log.info("Refreshing access token")
|
self.extractor.log.info("Refreshing access token")
|
||||||
data = {
|
data = {
|
||||||
"grant_type" : "refresh_token",
|
"grant_type" : "refresh_token",
|
||||||
@@ -317,8 +315,7 @@ class MangadexAPI():
|
|||||||
|
|
||||||
@cache(maxage=900, keyarg=1)
|
@cache(maxage=900, keyarg=1)
|
||||||
def _authenticate_impl_legacy(self, username, password):
|
def _authenticate_impl_legacy(self, username, password):
|
||||||
refresh_token = _refresh_token_cache(username)
|
if refresh_token := _refresh_token_cache(username):
|
||||||
if refresh_token:
|
|
||||||
self.extractor.log.info("Refreshing access token")
|
self.extractor.log.info("Refreshing access token")
|
||||||
url = self.root + "/auth/refresh"
|
url = self.root + "/auth/refresh"
|
||||||
json = {"token": refresh_token}
|
json = {"token": refresh_token}
|
||||||
@@ -389,8 +386,7 @@ class MangadexAPI():
|
|||||||
params["contentRating[]"] = ratings
|
params["contentRating[]"] = ratings
|
||||||
params["offset"] = 0
|
params["offset"] = 0
|
||||||
|
|
||||||
api_params = config("api-parameters")
|
if api_params := config("api-parameters"):
|
||||||
if api_params:
|
|
||||||
params.update(api_params)
|
params.update(api_params)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|||||||
@@ -145,8 +145,7 @@ class MangaparkMangaExtractor(MangaparkBase, Extractor):
|
|||||||
yield Message.Queue, url, data
|
yield Message.Queue, url, data
|
||||||
|
|
||||||
def chapters(self):
|
def chapters(self):
|
||||||
source = self.config("source")
|
if source := self.config("source"):
|
||||||
if source:
|
|
||||||
source_id = self._select_source(source)
|
source_id = self._select_source(source)
|
||||||
self.log.debug("Requesting chapters for source_id %s", source_id)
|
self.log.debug("Requesting chapters for source_id %s", source_id)
|
||||||
chapters = self._extract_chapters_source(source_id)
|
chapters = self._extract_chapters_source(source_id)
|
||||||
|
|||||||
@@ -49,10 +49,8 @@ class MastodonExtractor(BaseExtractor):
|
|||||||
attachments.extend(status["reblog"]["media_attachments"])
|
attachments.extend(status["reblog"]["media_attachments"])
|
||||||
|
|
||||||
if self.cards:
|
if self.cards:
|
||||||
card = status.get("card")
|
if card := status.get("card"):
|
||||||
if card:
|
if url := card.get("image"):
|
||||||
url = card.get("image")
|
|
||||||
if url:
|
|
||||||
card["weburl"] = card.get("url")
|
card["weburl"] = card.get("url")
|
||||||
card["url"] = url
|
card["url"] = url
|
||||||
card["id"] = "card" + "".join(
|
card["id"] = "card" + "".join(
|
||||||
|
|||||||
@@ -33,15 +33,13 @@ class MisskeyExtractor(BaseExtractor):
|
|||||||
if "note" in note:
|
if "note" in note:
|
||||||
note = note["note"]
|
note = note["note"]
|
||||||
files = note.pop("files") or []
|
files = note.pop("files") or []
|
||||||
renote = note.get("renote")
|
if renote := note.get("renote"):
|
||||||
if renote:
|
|
||||||
if not self.renotes:
|
if not self.renotes:
|
||||||
self.log.debug("Skipping %s (renote)", note["id"])
|
self.log.debug("Skipping %s (renote)", note["id"])
|
||||||
continue
|
continue
|
||||||
files.extend(renote.get("files") or ())
|
files.extend(renote.get("files") or ())
|
||||||
|
|
||||||
reply = note.get("reply")
|
if reply := note.get("reply"):
|
||||||
if reply:
|
|
||||||
if not self.replies:
|
if not self.replies:
|
||||||
self.log.debug("Skipping %s (reply)", note["id"])
|
self.log.debug("Skipping %s (reply)", note["id"])
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -33,8 +33,7 @@ class NaverBlogPostExtractor(NaverBlogBase, GalleryExtractor):
|
|||||||
example = "https://blog.naver.com/BLOGID/12345"
|
example = "https://blog.naver.com/BLOGID/12345"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
blog_id = match[1]
|
if blog_id := match[1]:
|
||||||
if blog_id:
|
|
||||||
self.blog_id = blog_id
|
self.blog_id = blog_id
|
||||||
self.post_id = match[2]
|
self.post_id = match[2]
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -228,12 +228,10 @@ class NewgroundsExtractor(Extractor):
|
|||||||
data["index"] = text.parse_int(index)
|
data["index"] = text.parse_int(index)
|
||||||
data["_index"] = index
|
data["_index"] = index
|
||||||
|
|
||||||
image_data = extr("let imageData =", "\n];")
|
if image_data := extr("let imageData =", "\n];"):
|
||||||
if image_data:
|
|
||||||
data["_multi"] = self._extract_images_multi(image_data)
|
data["_multi"] = self._extract_images_multi(image_data)
|
||||||
else:
|
else:
|
||||||
art_images = extr('<div class="art-images', '\n\t\t</div>')
|
if art_images := extr('<div class="art-images', '\n\t\t</div>'):
|
||||||
if art_images:
|
|
||||||
data["_multi"] = self._extract_images_art(art_images, data)
|
data["_multi"] = self._extract_images_art(art_images, data)
|
||||||
|
|
||||||
return data
|
return data
|
||||||
@@ -544,8 +542,7 @@ class NewgroundsSearchExtractor(NewgroundsExtractor):
|
|||||||
self.query = text.parse_query(query)
|
self.query = text.parse_query(query)
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
suitabilities = self.query.get("suitabilities")
|
if suitabilities := self.query.get("suitabilities"):
|
||||||
if suitabilities:
|
|
||||||
data = {"view_suitability_" + s: "on"
|
data = {"view_suitability_" + s: "on"
|
||||||
for s in suitabilities.split(",")}
|
for s in suitabilities.split(",")}
|
||||||
self.request(self.root + "/suitabilities",
|
self.request(self.root + "/suitabilities",
|
||||||
|
|||||||
@@ -30,8 +30,7 @@ class NitterExtractor(BaseExtractor):
|
|||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
retweets = self.config("retweets", False)
|
retweets = self.config("retweets", False)
|
||||||
videos = self.config("videos", True)
|
if videos := self.config("videos", True):
|
||||||
if videos:
|
|
||||||
ytdl = (videos == "ytdl")
|
ytdl = (videos == "ytdl")
|
||||||
videos = True
|
videos = True
|
||||||
self.cookies.set("hlsPlayback", "on", domain=self.cookies_domain)
|
self.cookies.set("hlsPlayback", "on", domain=self.cookies_domain)
|
||||||
@@ -42,8 +41,7 @@ class NitterExtractor(BaseExtractor):
|
|||||||
self.log.debug("Skipping %s (retweet)", tweet["tweet_id"])
|
self.log.debug("Skipping %s (retweet)", tweet["tweet_id"])
|
||||||
continue
|
continue
|
||||||
|
|
||||||
attachments = tweet.pop("_attach", "")
|
if attachments := tweet.pop("_attach", ""):
|
||||||
if attachments:
|
|
||||||
files = []
|
files = []
|
||||||
for url in text.extract_iter(
|
for url in text.extract_iter(
|
||||||
attachments, 'href="', '"'):
|
attachments, 'href="', '"'):
|
||||||
|
|||||||
@@ -74,8 +74,7 @@ class OAuthBase(Extractor):
|
|||||||
"""Open 'url' in browser amd return response parameters"""
|
"""Open 'url' in browser amd return response parameters"""
|
||||||
url += "?" + urllib.parse.urlencode(params)
|
url += "?" + urllib.parse.urlencode(params)
|
||||||
|
|
||||||
browser = self.config("browser", True)
|
if browser := self.config("browser", True):
|
||||||
if browser:
|
|
||||||
try:
|
try:
|
||||||
import webbrowser
|
import webbrowser
|
||||||
browser = webbrowser.get()
|
browser = webbrowser.get()
|
||||||
@@ -83,8 +82,7 @@ class OAuthBase(Extractor):
|
|||||||
browser = None
|
browser = None
|
||||||
|
|
||||||
if browser and browser.open(url):
|
if browser and browser.open(url):
|
||||||
name = getattr(browser, "name", None)
|
if name := getattr(browser, "name", None):
|
||||||
if name:
|
|
||||||
self.log.info("Opening URL with %s:", name.capitalize())
|
self.log.info("Opening URL with %s:", name.capitalize())
|
||||||
else:
|
else:
|
||||||
self.log.info("Please open this URL in your browser:")
|
self.log.info("Please open this URL in your browser:")
|
||||||
|
|||||||
@@ -295,12 +295,10 @@ class PatreonExtractor(Extractor):
|
|||||||
if bootstrap:
|
if bootstrap:
|
||||||
return util.json_loads(bootstrap + "}")
|
return util.json_loads(bootstrap + "}")
|
||||||
|
|
||||||
bootstrap = text.extr(page, "window.patreon.bootstrap,", "});")
|
if bootstrap := text.extr(page, "window.patreon.bootstrap,", "});"):
|
||||||
if bootstrap:
|
|
||||||
return util.json_loads(bootstrap + "}")
|
return util.json_loads(bootstrap + "}")
|
||||||
|
|
||||||
data = text.extr(page, "window.patreon = {", "};\n")
|
if data := text.extr(page, "window.patreon = {", "};\n"):
|
||||||
if data:
|
|
||||||
try:
|
try:
|
||||||
return util.json_loads(f"{{{data}}}")["bootstrap"]
|
return util.json_loads(f"{{{data}}}")["bootstrap"]
|
||||||
except Exception:
|
except Exception:
|
||||||
@@ -340,8 +338,7 @@ class PatreonCreatorExtractor(PatreonExtractor):
|
|||||||
if creator and creator.startswith("id:"):
|
if creator and creator.startswith("id:"):
|
||||||
return creator[3:]
|
return creator[3:]
|
||||||
|
|
||||||
campaign_id = query.get("c") or query.get("campaign_id")
|
if campaign_id := query.get("c") or query.get("campaign_id"):
|
||||||
if campaign_id:
|
|
||||||
return campaign_id
|
return campaign_id
|
||||||
|
|
||||||
if user_id := query.get("u"):
|
if user_id := query.get("u"):
|
||||||
|
|||||||
@@ -165,12 +165,10 @@ class PhilomenaAPI():
|
|||||||
def _pagination(self, endpoint, params):
|
def _pagination(self, endpoint, params):
|
||||||
extr = self.extractor
|
extr = self.extractor
|
||||||
|
|
||||||
api_key = extr.config("api-key")
|
if api_key := extr.config("api-key"):
|
||||||
if api_key:
|
|
||||||
params["key"] = api_key
|
params["key"] = api_key
|
||||||
|
|
||||||
filter_id = extr.config("filter")
|
if filter_id := extr.config("filter"):
|
||||||
if filter_id:
|
|
||||||
params["filter_id"] = filter_id
|
params["filter_id"] = filter_id
|
||||||
elif not api_key:
|
elif not api_key:
|
||||||
params["filter_id"] = extr.config_instance("filter_id") or "2"
|
params["filter_id"] = extr.config_instance("filter_id") or "2"
|
||||||
|
|||||||
@@ -60,8 +60,7 @@ class PinterestExtractor(Extractor):
|
|||||||
"closeup_description",
|
"closeup_description",
|
||||||
"closeup_unified_description",
|
"closeup_unified_description",
|
||||||
):
|
):
|
||||||
value = pin.get(key)
|
if value := pin.get(key):
|
||||||
if value:
|
|
||||||
pin[key] = value.strip()
|
pin[key] = value.strip()
|
||||||
|
|
||||||
yield Message.Directory, pin
|
yield Message.Directory, pin
|
||||||
@@ -93,8 +92,7 @@ class PinterestExtractor(Extractor):
|
|||||||
if story_pin_data and self.stories:
|
if story_pin_data and self.stories:
|
||||||
return self._extract_story(pin, story_pin_data)
|
return self._extract_story(pin, story_pin_data)
|
||||||
|
|
||||||
carousel_data = pin.get("carousel_data")
|
if carousel_data := pin.get("carousel_data"):
|
||||||
if carousel_data:
|
|
||||||
return self._extract_carousel(pin, carousel_data)
|
return self._extract_carousel(pin, carousel_data)
|
||||||
|
|
||||||
videos = pin.get("videos")
|
videos = pin.get("videos")
|
||||||
@@ -270,8 +268,7 @@ class PinterestUserExtractor(PinterestExtractor):
|
|||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
for board in self.api.boards(self.user):
|
for board in self.api.boards(self.user):
|
||||||
url = board.get("url")
|
if url := board.get("url"):
|
||||||
if url:
|
|
||||||
board["_extractor"] = PinterestBoardExtractor
|
board["_extractor"] = PinterestBoardExtractor
|
||||||
yield Message.Queue, self.root + url, board
|
yield Message.Queue, self.root + url, board
|
||||||
|
|
||||||
|
|||||||
@@ -21,8 +21,7 @@ class PixeldrainExtractor(Extractor):
|
|||||||
archive_fmt = "{id}"
|
archive_fmt = "{id}"
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
api_key = self.config("api-key")
|
if api_key := self.config("api-key"):
|
||||||
if api_key:
|
|
||||||
self.session.auth = util.HTTPBasicAuth("", api_key)
|
self.session.auth = util.HTTPBasicAuth("", api_key)
|
||||||
|
|
||||||
def parse_datetime(self, date_string):
|
def parse_datetime(self, date_string):
|
||||||
|
|||||||
@@ -89,8 +89,7 @@ class PixivExtractor(Extractor):
|
|||||||
if tag["is_registered"]]
|
if tag["is_registered"]]
|
||||||
if self.meta_captions and not work.get("caption") and \
|
if self.meta_captions and not work.get("caption") and \
|
||||||
not work.get("_mypixiv") and not work.get("_ajax"):
|
not work.get("_mypixiv") and not work.get("_ajax"):
|
||||||
body = self._request_ajax("/illust/" + str(work["id"]))
|
if body := self._request_ajax("/illust/" + str(work["id"])):
|
||||||
if body:
|
|
||||||
work["caption"] = self._sanitize_ajax_caption(
|
work["caption"] = self._sanitize_ajax_caption(
|
||||||
body["illustComment"])
|
body["illustComment"])
|
||||||
|
|
||||||
@@ -293,8 +292,7 @@ class PixivExtractor(Extractor):
|
|||||||
|
|
||||||
def _extract_ajax_url(self, body):
|
def _extract_ajax_url(self, body):
|
||||||
try:
|
try:
|
||||||
original = body["urls"]["original"]
|
if original := body["urls"]["original"]:
|
||||||
if original:
|
|
||||||
return original
|
return original
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
@@ -699,8 +697,7 @@ class PixivRankingExtractor(PixivExtractor):
|
|||||||
except KeyError:
|
except KeyError:
|
||||||
raise exception.AbortExtraction(f"Invalid mode '{mode}'")
|
raise exception.AbortExtraction(f"Invalid mode '{mode}'")
|
||||||
|
|
||||||
date = query.get("date")
|
if date := query.get("date"):
|
||||||
if date:
|
|
||||||
if len(date) == 8 and date.isdecimal():
|
if len(date) == 8 and date.isdecimal():
|
||||||
date = f"{date[0:4]}-{date[4:6]}-{date[6:8]}"
|
date = f"{date[0:4]}-{date[4:6]}-{date[6:8]}"
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -33,8 +33,7 @@ class RedditExtractor(Extractor):
|
|||||||
previews = self.config("previews", True)
|
previews = self.config("previews", True)
|
||||||
embeds = self.config("embeds", True)
|
embeds = self.config("embeds", True)
|
||||||
|
|
||||||
videos = self.config("videos", True)
|
if videos := self.config("videos", True):
|
||||||
if videos:
|
|
||||||
if videos == "ytdl":
|
if videos == "ytdl":
|
||||||
self._extract_video = self._extract_video_ytdl
|
self._extract_video = self._extract_video_ytdl
|
||||||
elif videos == "dash":
|
elif videos == "dash":
|
||||||
@@ -139,8 +138,7 @@ class RedditExtractor(Extractor):
|
|||||||
)):
|
)):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
match = match_submission(url)
|
if match := match_submission(url):
|
||||||
if match:
|
|
||||||
extra.append(match[1])
|
extra.append(match[1])
|
||||||
elif not match_user(url) and not match_subreddit(url):
|
elif not match_user(url) and not match_subreddit(url):
|
||||||
if previews and "comment" not in data and \
|
if previews and "comment" not in data and \
|
||||||
@@ -181,8 +179,7 @@ class RedditExtractor(Extractor):
|
|||||||
submission["id"], item["media_id"], data.get("status"))
|
submission["id"], item["media_id"], data.get("status"))
|
||||||
continue
|
continue
|
||||||
src = data["s"]
|
src = data["s"]
|
||||||
url = src.get("u") or src.get("gif") or src.get("mp4")
|
if url := src.get("u") or src.get("gif") or src.get("mp4"):
|
||||||
if url:
|
|
||||||
yield url.partition("?")[0].replace("/preview.", "/i.", 1)
|
yield url.partition("?")[0].replace("/preview.", "/i.", 1)
|
||||||
else:
|
else:
|
||||||
self.log.error(
|
self.log.error(
|
||||||
@@ -202,8 +199,7 @@ class RedditExtractor(Extractor):
|
|||||||
submission["id"], mid, data.get("status"))
|
submission["id"], mid, data.get("status"))
|
||||||
continue
|
continue
|
||||||
src = data["s"]
|
src = data["s"]
|
||||||
url = src.get("u") or src.get("gif") or src.get("mp4")
|
if url := src.get("u") or src.get("gif") or src.get("mp4"):
|
||||||
if url:
|
|
||||||
yield url.partition("?")[0].replace("/preview.", "/i.", 1)
|
yield url.partition("?")[0].replace("/preview.", "/i.", 1)
|
||||||
else:
|
else:
|
||||||
self.log.error(
|
self.log.error(
|
||||||
@@ -242,8 +238,7 @@ class RedditExtractor(Extractor):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
for image in post["preview"]["images"]:
|
for image in post["preview"]["images"]:
|
||||||
variants = image.get("variants")
|
if variants := image.get("variants"):
|
||||||
if variants:
|
|
||||||
if "gif" in variants:
|
if "gif" in variants:
|
||||||
yield variants["gif"]["source"]["url"]
|
yield variants["gif"]["source"]["url"]
|
||||||
if "mp4" in variants:
|
if "mp4" in variants:
|
||||||
|
|||||||
@@ -40,8 +40,7 @@ class RedgifsExtractor(Extractor):
|
|||||||
|
|
||||||
for gif in self.gifs():
|
for gif in self.gifs():
|
||||||
|
|
||||||
gallery = gif.get("gallery")
|
if gallery := gif.get("gallery"):
|
||||||
if gallery:
|
|
||||||
gifs = self.api.gallery(gallery)["gifs"]
|
gifs = self.api.gallery(gallery)["gifs"]
|
||||||
enum = 1
|
enum = 1
|
||||||
cnt = len(gifs)
|
cnt = len(gifs)
|
||||||
@@ -71,8 +70,7 @@ class RedgifsExtractor(Extractor):
|
|||||||
def _formats(self, gif):
|
def _formats(self, gif):
|
||||||
urls = gif["urls"]
|
urls = gif["urls"]
|
||||||
for fmt in self.formats:
|
for fmt in self.formats:
|
||||||
url = urls.get(fmt)
|
if url := urls.get(fmt):
|
||||||
if url:
|
|
||||||
url = url.replace("//thumbs2.", "//thumbs3.", 1)
|
url = url.replace("//thumbs2.", "//thumbs3.", 1)
|
||||||
text.nameext_from_url(url, gif)
|
text.nameext_from_url(url, gif)
|
||||||
yield url
|
yield url
|
||||||
|
|||||||
@@ -39,8 +39,7 @@ class Rule34xyzExtractor(BooruExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
formats = self.config("format")
|
if formats := self.config("format"):
|
||||||
if formats:
|
|
||||||
if isinstance(formats, str):
|
if isinstance(formats, str):
|
||||||
formats = formats.split(",")
|
formats = formats.split(",")
|
||||||
self.formats = formats
|
self.formats = formats
|
||||||
|
|||||||
@@ -78,8 +78,7 @@ class SankakuExtractor(BooruExtractor):
|
|||||||
def _tags(self, post, page):
|
def _tags(self, post, page):
|
||||||
tags = collections.defaultdict(list)
|
tags = collections.defaultdict(list)
|
||||||
for tag in self.api.tags(post["id"]):
|
for tag in self.api.tags(post["id"]):
|
||||||
name = tag["name"]
|
if name := tag["name"]:
|
||||||
if name:
|
|
||||||
tags[tag["type"]].append(name.lower().replace(" ", "_"))
|
tags[tag["type"]].append(name.lower().replace(" ", "_"))
|
||||||
types = self.TAG_TYPES
|
types = self.TAG_TYPES
|
||||||
for type, values in tags.items():
|
for type, values in tags.items():
|
||||||
@@ -315,8 +314,7 @@ class SankakuAPI():
|
|||||||
params["lang"] = "en"
|
params["lang"] = "en"
|
||||||
params["limit"] = str(self.extractor.per_page)
|
params["limit"] = str(self.extractor.per_page)
|
||||||
|
|
||||||
refresh = self.extractor.config("refresh", False)
|
if refresh := self.extractor.config("refresh", False):
|
||||||
if refresh:
|
|
||||||
offset = expires = 0
|
offset = expires = 0
|
||||||
from time import time
|
from time import time
|
||||||
|
|
||||||
@@ -330,8 +328,7 @@ class SankakuAPI():
|
|||||||
|
|
||||||
for post in posts:
|
for post in posts:
|
||||||
if not expires:
|
if not expires:
|
||||||
url = post["file_url"]
|
if url := post["file_url"]:
|
||||||
if url:
|
|
||||||
expires = text.parse_int(
|
expires = text.parse_int(
|
||||||
text.extr(url, "e=", "&")) - 60
|
text.extr(url, "e=", "&")) - 60
|
||||||
|
|
||||||
|
|||||||
@@ -19,13 +19,11 @@ class Shimmie2Extractor(BaseExtractor):
|
|||||||
archive_fmt = "{id}"
|
archive_fmt = "{id}"
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
cookies = self.config_instance("cookies")
|
if cookies := self.config_instance("cookies"):
|
||||||
if cookies:
|
|
||||||
domain = self.root.rpartition("/")[2]
|
domain = self.root.rpartition("/")[2]
|
||||||
self.cookies_update_dict(cookies, domain=domain)
|
self.cookies_update_dict(cookies, domain=domain)
|
||||||
|
|
||||||
file_url = self.config_instance("file_url")
|
if file_url := self.config_instance("file_url"):
|
||||||
if file_url:
|
|
||||||
self.file_url_fmt = file_url
|
self.file_url_fmt = file_url
|
||||||
|
|
||||||
if self.category == "giantessbooru":
|
if self.category == "giantessbooru":
|
||||||
|
|||||||
@@ -90,8 +90,7 @@ class SimplyhentaiImageExtractor(Extractor):
|
|||||||
url = extr('"image":"' , '&')
|
url = extr('"image":"' , '&')
|
||||||
url = extr(""content":"", "&") or url
|
url = extr(""content":"", "&") or url
|
||||||
|
|
||||||
tags = text.extr(descr, " tagged with ", " online for free ")
|
if tags := text.extr(descr, " tagged with ", " online for free "):
|
||||||
if tags:
|
|
||||||
tags = tags.split(", ")
|
tags = tags.split(", ")
|
||||||
tags[-1] = tags[-1].partition(" ")[2]
|
tags[-1] = tags[-1].partition(" ")[2]
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -162,8 +162,7 @@ class SkebExtractor(Extractor):
|
|||||||
})
|
})
|
||||||
|
|
||||||
if self.article and "article_image_url" in resp:
|
if self.article and "article_image_url" in resp:
|
||||||
url = resp["article_image_url"]
|
if url := resp["article_image_url"]:
|
||||||
if url:
|
|
||||||
files.append({
|
files.append({
|
||||||
"content_category": "article",
|
"content_category": "article",
|
||||||
"file_id" : "article",
|
"file_id" : "article",
|
||||||
|
|||||||
@@ -115,8 +115,7 @@ class SubscribestarExtractor(Extractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def check_errors(response):
|
def check_errors(response):
|
||||||
errors = response.json().get("errors")
|
if errors := response.json().get("errors"):
|
||||||
if errors:
|
|
||||||
self.log.debug(errors)
|
self.log.debug(errors)
|
||||||
try:
|
try:
|
||||||
msg = f'"{errors.popitem()[1]}"'
|
msg = f'"{errors.popitem()[1]}"'
|
||||||
@@ -146,8 +145,7 @@ class SubscribestarExtractor(Extractor):
|
|||||||
def _media_from_post(self, html):
|
def _media_from_post(self, html):
|
||||||
media = []
|
media = []
|
||||||
|
|
||||||
gallery = text.extr(html, 'data-gallery="', '"')
|
if gallery := text.extr(html, 'data-gallery="', '"'):
|
||||||
if gallery:
|
|
||||||
for item in util.json_loads(text.unescape(gallery)):
|
for item in util.json_loads(text.unescape(gallery)):
|
||||||
if "/previews" in item["url"]:
|
if "/previews" in item["url"]:
|
||||||
self._warn_preview()
|
self._warn_preview()
|
||||||
|
|||||||
@@ -26,10 +26,8 @@ class SzurubooruExtractor(booru.BooruExtractor):
|
|||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
}
|
}
|
||||||
|
|
||||||
username = self.config("username")
|
if username := self.config("username"):
|
||||||
if username:
|
if token := self.config("token"):
|
||||||
token = self.config("token")
|
|
||||||
if token:
|
|
||||||
value = username + ":" + token
|
value = username + ":" + token
|
||||||
self.headers["Authorization"] = "Token " + \
|
self.headers["Authorization"] = "Token " + \
|
||||||
binascii.b2a_base64(value.encode())[:-1].decode()
|
binascii.b2a_base64(value.encode())[:-1].decode()
|
||||||
|
|||||||
@@ -34,8 +34,7 @@ class TumblrExtractor(Extractor):
|
|||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
|
|
||||||
name = match[2]
|
if name := match[2]:
|
||||||
if name:
|
|
||||||
self.blog = name + ".tumblr.com"
|
self.blog = name + ".tumblr.com"
|
||||||
else:
|
else:
|
||||||
self.blog = match[1] or match[3]
|
self.blog = match[1] or match[3]
|
||||||
@@ -139,8 +138,7 @@ class TumblrExtractor(Extractor):
|
|||||||
if url and url.startswith("https://a.tumblr.com/"):
|
if url and url.startswith("https://a.tumblr.com/"):
|
||||||
posts.append(self._prepare(url, post.copy()))
|
posts.append(self._prepare(url, post.copy()))
|
||||||
|
|
||||||
url = post.get("video_url") # type "video"
|
if url := post.get("video_url"): # type "video"
|
||||||
if url:
|
|
||||||
posts.append(self._prepare(
|
posts.append(self._prepare(
|
||||||
self._original_video(url), post.copy()))
|
self._original_video(url), post.copy()))
|
||||||
|
|
||||||
@@ -160,8 +158,7 @@ class TumblrExtractor(Extractor):
|
|||||||
posts.append(self._prepare(url, post.copy()))
|
posts.append(self._prepare(url, post.copy()))
|
||||||
|
|
||||||
if self.external: # external links
|
if self.external: # external links
|
||||||
url = post.get("permalink_url") or post.get("url")
|
if url := post.get("permalink_url") or post.get("url"):
|
||||||
if url:
|
|
||||||
post["extension"] = None
|
post["extension"] = None
|
||||||
posts.append((Message.Queue, url, post.copy()))
|
posts.append((Message.Queue, url, post.copy()))
|
||||||
del post["extension"]
|
del post["extension"]
|
||||||
@@ -191,8 +188,7 @@ class TumblrExtractor(Extractor):
|
|||||||
types = types.split(",")
|
types = types.split(",")
|
||||||
types = frozenset(types)
|
types = frozenset(types)
|
||||||
|
|
||||||
invalid = types - POST_TYPES
|
if invalid := types - POST_TYPES:
|
||||||
if invalid:
|
|
||||||
types = types & POST_TYPES
|
types = types & POST_TYPES
|
||||||
self.log.warning("Invalid post types: '%s'",
|
self.log.warning("Invalid post types: '%s'",
|
||||||
"', '".join(sorted(invalid)))
|
"', '".join(sorted(invalid)))
|
||||||
@@ -504,8 +500,7 @@ class TumblrAPI(oauth.OAuth1API):
|
|||||||
f"{t.hour:02}:{t.minute:02}:{t.second:02}")
|
f"{t.hour:02}:{t.minute:02}:{t.second:02}")
|
||||||
|
|
||||||
# hourly rate limit
|
# hourly rate limit
|
||||||
reset = response.headers.get("x-ratelimit-perhour-reset")
|
if reset := response.headers.get("x-ratelimit-perhour-reset"):
|
||||||
if reset:
|
|
||||||
self.log.info("Hourly API rate limit exceeded")
|
self.log.info("Hourly API rate limit exceeded")
|
||||||
self.extractor.wait(seconds=reset)
|
self.extractor.wait(seconds=reset)
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -159,12 +159,10 @@ class TwibooruAPI():
|
|||||||
def _pagination(self, endpoint, params):
|
def _pagination(self, endpoint, params):
|
||||||
extr = self.extractor
|
extr = self.extractor
|
||||||
|
|
||||||
api_key = extr.config("api-key")
|
if api_key := extr.config("api-key"):
|
||||||
if api_key:
|
|
||||||
params["key"] = api_key
|
params["key"] = api_key
|
||||||
|
|
||||||
filter_id = extr.config("filter")
|
if filter_id := extr.config("filter"):
|
||||||
if filter_id:
|
|
||||||
params["filter_id"] = filter_id
|
params["filter_id"] = filter_id
|
||||||
elif not api_key:
|
elif not api_key:
|
||||||
params["filter_id"] = "2"
|
params["filter_id"] = "2"
|
||||||
|
|||||||
@@ -278,8 +278,7 @@ class TwitterExtractor(Extractor):
|
|||||||
bvals = {bval["key"]: bval["value"]
|
bvals = {bval["key"]: bval["value"]
|
||||||
for bval in card["binding_values"]}
|
for bval in card["binding_values"]}
|
||||||
|
|
||||||
cbl = self.cards_blacklist
|
if cbl := self.cards_blacklist:
|
||||||
if cbl:
|
|
||||||
if name in cbl:
|
if name in cbl:
|
||||||
return
|
return
|
||||||
if "vanity_url" in bvals:
|
if "vanity_url" in bvals:
|
||||||
@@ -339,8 +338,8 @@ class TwitterExtractor(Extractor):
|
|||||||
response = self.request(url, fatal=False)
|
response = self.request(url, fatal=False)
|
||||||
if response.status_code >= 400:
|
if response.status_code >= 400:
|
||||||
continue
|
continue
|
||||||
url = text.extr(response.text, 'name="twitter:image" value="', '"')
|
if url := text.extr(
|
||||||
if url:
|
response.text, 'name="twitter:image" value="', '"'):
|
||||||
files.append({"url": url})
|
files.append({"url": url})
|
||||||
|
|
||||||
def _transform_tweet(self, tweet):
|
def _transform_tweet(self, tweet):
|
||||||
@@ -411,12 +410,10 @@ class TwitterExtractor(Extractor):
|
|||||||
content = tget("full_text") or tget("text") or ""
|
content = tget("full_text") or tget("text") or ""
|
||||||
entities = legacy["entities"]
|
entities = legacy["entities"]
|
||||||
|
|
||||||
hashtags = entities.get("hashtags")
|
if hashtags := entities.get("hashtags"):
|
||||||
if hashtags:
|
|
||||||
tdata["hashtags"] = [t["text"] for t in hashtags]
|
tdata["hashtags"] = [t["text"] for t in hashtags]
|
||||||
|
|
||||||
mentions = entities.get("user_mentions")
|
if mentions := entities.get("user_mentions"):
|
||||||
if mentions:
|
|
||||||
tdata["mentions"] = [{
|
tdata["mentions"] = [{
|
||||||
"id": text.parse_int(u["id_str"]),
|
"id": text.parse_int(u["id_str"]),
|
||||||
"name": u["screen_name"],
|
"name": u["screen_name"],
|
||||||
@@ -424,8 +421,7 @@ class TwitterExtractor(Extractor):
|
|||||||
} for u in mentions]
|
} for u in mentions]
|
||||||
|
|
||||||
content = text.unescape(content)
|
content = text.unescape(content)
|
||||||
urls = entities.get("urls")
|
if urls := entities.get("urls"):
|
||||||
if urls:
|
|
||||||
for url in urls:
|
for url in urls:
|
||||||
try:
|
try:
|
||||||
content = content.replace(url["url"], url["expanded_url"])
|
content = content.replace(url["url"], url["expanded_url"])
|
||||||
@@ -496,8 +492,7 @@ class TwitterExtractor(Extractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
descr = user["description"]
|
descr = user["description"]
|
||||||
urls = entities["description"].get("urls")
|
if urls := entities["description"].get("urls"):
|
||||||
if urls:
|
|
||||||
for url in urls:
|
for url in urls:
|
||||||
try:
|
try:
|
||||||
descr = descr.replace(url["url"], url["expanded_url"])
|
descr = descr.replace(url["url"], url["expanded_url"])
|
||||||
@@ -922,8 +917,7 @@ class TwitterTweetExtractor(TwitterExtractor):
|
|||||||
self.tweet_id = match[2]
|
self.tweet_id = match[2]
|
||||||
|
|
||||||
def tweets(self):
|
def tweets(self):
|
||||||
conversations = self.config("conversations")
|
if conversations := self.config("conversations"):
|
||||||
if conversations:
|
|
||||||
self._accessible = (conversations == "accessible")
|
self._accessible = (conversations == "accessible")
|
||||||
return self._tweets_conversation(self.tweet_id)
|
return self._tweets_conversation(self.tweet_id)
|
||||||
|
|
||||||
@@ -1563,8 +1557,7 @@ class TwitterAPI():
|
|||||||
headers=self.headers, fatal=None)
|
headers=self.headers, fatal=None)
|
||||||
|
|
||||||
# update 'x-csrf-token' header (#1170)
|
# update 'x-csrf-token' header (#1170)
|
||||||
csrf_token = response.cookies.get("ct0")
|
if csrf_token := response.cookies.get("ct0"):
|
||||||
if csrf_token:
|
|
||||||
self.headers["x-csrf-token"] = csrf_token
|
self.headers["x-csrf-token"] = csrf_token
|
||||||
|
|
||||||
remaining = int(response.headers.get("x-rate-limit-remaining", 6))
|
remaining = int(response.headers.get("x-rate-limit-remaining", 6))
|
||||||
@@ -1639,8 +1632,7 @@ class TwitterAPI():
|
|||||||
|
|
||||||
def _pagination_legacy(self, endpoint, params):
|
def _pagination_legacy(self, endpoint, params):
|
||||||
extr = self.extractor
|
extr = self.extractor
|
||||||
cursor = extr._init_cursor()
|
if cursor := extr._init_cursor():
|
||||||
if cursor:
|
|
||||||
params["cursor"] = cursor
|
params["cursor"] = cursor
|
||||||
original_retweets = (extr.retweets == "original")
|
original_retweets = (extr.retweets == "original")
|
||||||
bottom = ("cursor-bottom-", "sq-cursor-bottom")
|
bottom = ("cursor-bottom-", "sq-cursor-bottom")
|
||||||
@@ -1721,8 +1713,7 @@ class TwitterAPI():
|
|||||||
yield tweet
|
yield tweet
|
||||||
|
|
||||||
if "quoted_status_id_str" in tweet:
|
if "quoted_status_id_str" in tweet:
|
||||||
quoted = tweets.get(tweet["quoted_status_id_str"])
|
if quoted := tweets.get(tweet["quoted_status_id_str"]):
|
||||||
if quoted:
|
|
||||||
quoted = quoted.copy()
|
quoted = quoted.copy()
|
||||||
quoted["author"] = users[quoted["user_id_str"]]
|
quoted["author"] = users[quoted["user_id_str"]]
|
||||||
quoted["quoted_by"] = tweet["user"]["screen_name"]
|
quoted["quoted_by"] = tweet["user"]["screen_name"]
|
||||||
@@ -1742,8 +1733,7 @@ class TwitterAPI():
|
|||||||
pinned_tweet = extr.pinned
|
pinned_tweet = extr.pinned
|
||||||
|
|
||||||
params = {"variables": None}
|
params = {"variables": None}
|
||||||
cursor = extr._init_cursor()
|
if cursor := extr._init_cursor():
|
||||||
if cursor:
|
|
||||||
variables["cursor"] = cursor
|
variables["cursor"] = cursor
|
||||||
if features is None:
|
if features is None:
|
||||||
features = self.features_pagination
|
features = self.features_pagination
|
||||||
@@ -1792,8 +1782,7 @@ class TwitterAPI():
|
|||||||
except LookupError:
|
except LookupError:
|
||||||
extr.log.debug(data)
|
extr.log.debug(data)
|
||||||
|
|
||||||
user = extr._user_obj
|
if user := extr._user_obj:
|
||||||
if user:
|
|
||||||
user = user["legacy"]
|
user = user["legacy"]
|
||||||
if user.get("blocked_by"):
|
if user.get("blocked_by"):
|
||||||
if self.headers["x-twitter-auth-type"] and \
|
if self.headers["x-twitter-auth-type"] and \
|
||||||
@@ -1942,8 +1931,7 @@ class TwitterAPI():
|
|||||||
|
|
||||||
def _pagination_users(self, endpoint, variables, path=None):
|
def _pagination_users(self, endpoint, variables, path=None):
|
||||||
extr = self.extractor
|
extr = self.extractor
|
||||||
cursor = extr._init_cursor()
|
if cursor := extr._init_cursor():
|
||||||
if cursor:
|
|
||||||
variables["cursor"] = cursor
|
variables["cursor"] = cursor
|
||||||
params = {
|
params = {
|
||||||
"variables": None,
|
"variables": None,
|
||||||
@@ -2018,8 +2006,7 @@ def _login_impl(extr, username, password):
|
|||||||
method="POST", fatal=None)
|
method="POST", fatal=None)
|
||||||
|
|
||||||
# update 'x-csrf-token' header (#5945)
|
# update 'x-csrf-token' header (#5945)
|
||||||
csrf_token = response.cookies.get("ct0")
|
if csrf_token := response.cookies.get("ct0"):
|
||||||
if csrf_token:
|
|
||||||
headers["x-csrf-token"] = csrf_token
|
headers["x-csrf-token"] = csrf_token
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -25,8 +25,7 @@ class VipergirlsExtractor(Extractor):
|
|||||||
cookies_names = ("vg_userid", "vg_password")
|
cookies_names = ("vg_userid", "vg_password")
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
domain = self.config("domain")
|
if domain := self.config("domain"):
|
||||||
if domain:
|
|
||||||
pos = domain.find("://")
|
pos = domain.find("://")
|
||||||
if pos >= 0:
|
if pos >= 0:
|
||||||
self.root = domain.rstrip("/")
|
self.root = domain.rstrip("/")
|
||||||
@@ -45,8 +44,7 @@ class VipergirlsExtractor(Extractor):
|
|||||||
forum_title = root[1].attrib["title"]
|
forum_title = root[1].attrib["title"]
|
||||||
thread_title = root[2].attrib["title"]
|
thread_title = root[2].attrib["title"]
|
||||||
|
|
||||||
like = self.config("like")
|
if like := self.config("like"):
|
||||||
if like:
|
|
||||||
user_hash = root[0].get("hash")
|
user_hash = root[0].get("hash")
|
||||||
if len(user_hash) < 16:
|
if len(user_hash) < 16:
|
||||||
self.log.warning("Login required to like posts")
|
self.log.warning("Login required to like posts")
|
||||||
|
|||||||
@@ -112,8 +112,7 @@ class VkExtractor(Extractor):
|
|||||||
if offset_next >= total:
|
if offset_next >= total:
|
||||||
# the last chunk of photos also contains the first few photos
|
# the last chunk of photos also contains the first few photos
|
||||||
# again if 'total' is not a multiple of 10
|
# again if 'total' is not a multiple of 10
|
||||||
extra = total - offset_next
|
if extra := total - offset_next:
|
||||||
if extra:
|
|
||||||
del photos[extra:]
|
del photos[extra:]
|
||||||
|
|
||||||
yield from photos
|
yield from photos
|
||||||
|
|||||||
@@ -44,8 +44,7 @@ class WallpapercaveImageExtractor(Extractor):
|
|||||||
if path is None:
|
if path is None:
|
||||||
for wp in text.extract_iter(
|
for wp in text.extract_iter(
|
||||||
page, 'class="wallpaper" id="wp', '</picture>'):
|
page, 'class="wallpaper" id="wp', '</picture>'):
|
||||||
path = text.rextr(wp, ' src="', '"')
|
if path := text.rextr(wp, ' src="', '"'):
|
||||||
if path:
|
|
||||||
image = text.nameext_from_url(path)
|
image = text.nameext_from_url(path)
|
||||||
yield Message.Directory, image
|
yield Message.Directory, image
|
||||||
yield Message.Url, self.root + path, image
|
yield Message.Url, self.root + path, image
|
||||||
|
|||||||
@@ -94,8 +94,7 @@ class WarosuThreadExtractor(Extractor):
|
|||||||
"", "<").rstrip().rpartition(".")[0])
|
"", "<").rstrip().rpartition(".")[0])
|
||||||
extr("<br>", "")
|
extr("<br>", "")
|
||||||
|
|
||||||
url = extr("<a href=", ">")
|
if url := extr("<a href=", ">"):
|
||||||
if url:
|
|
||||||
if url[0] == "/":
|
if url[0] == "/":
|
||||||
data["image"] = self.root + url
|
data["image"] = self.root + url
|
||||||
elif "warosu." not in url:
|
elif "warosu." not in url:
|
||||||
|
|||||||
@@ -111,8 +111,7 @@ class WeiboExtractor(Extractor):
|
|||||||
self.log.warning("Unknown media type '%s'", type)
|
self.log.warning("Unknown media type '%s'", type)
|
||||||
return
|
return
|
||||||
|
|
||||||
pic_ids = status.get("pic_ids")
|
if pic_ids := status.get("pic_ids"):
|
||||||
if pic_ids:
|
|
||||||
pics = status["pic_infos"]
|
pics = status["pic_infos"]
|
||||||
for pic_id in pic_ids:
|
for pic_id in pic_ids:
|
||||||
pic = pics[pic_id]
|
pic = pics[pic_id]
|
||||||
@@ -187,17 +186,15 @@ class WeiboExtractor(Extractor):
|
|||||||
yield from statuses
|
yield from statuses
|
||||||
|
|
||||||
# videos, newvideo
|
# videos, newvideo
|
||||||
cursor = data.get("next_cursor")
|
if cursor := data.get("next_cursor"):
|
||||||
if cursor:
|
|
||||||
if cursor == -1:
|
if cursor == -1:
|
||||||
return
|
return
|
||||||
params["cursor"] = cursor
|
params["cursor"] = cursor
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# album
|
# album
|
||||||
since_id = data.get("since_id")
|
if since_id := data.get("since_id"):
|
||||||
if since_id:
|
params["sinceid"] = since_id
|
||||||
params["sinceid"] = data["since_id"]
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# home, article
|
# home, article
|
||||||
|
|||||||
@@ -82,8 +82,7 @@ class YoutubeDLExtractor(Extractor):
|
|||||||
ytdl_module, self, user_opts, extr_opts)
|
ytdl_module, self, user_opts, extr_opts)
|
||||||
|
|
||||||
# transfer cookies to ytdl
|
# transfer cookies to ytdl
|
||||||
cookies = self.cookies
|
if cookies := self.cookies:
|
||||||
if cookies:
|
|
||||||
set_cookie = ytdl_instance.cookiejar.set_cookie
|
set_cookie = ytdl_instance.cookiejar.set_cookie
|
||||||
for cookie in cookies:
|
for cookie in cookies:
|
||||||
set_cookie(cookie)
|
set_cookie(cookie)
|
||||||
|
|||||||
@@ -173,8 +173,7 @@ class ZerochanTagExtractor(ZerochanExtractor):
|
|||||||
self.posts = self.posts_api
|
self.posts = self.posts_api
|
||||||
self.session.headers["User-Agent"] = util.USERAGENT
|
self.session.headers["User-Agent"] = util.USERAGENT
|
||||||
|
|
||||||
exts = self.config("extensions")
|
if exts := self.config("extensions"):
|
||||||
if exts:
|
|
||||||
if isinstance(exts, str):
|
if isinstance(exts, str):
|
||||||
exts = exts.split(",")
|
exts = exts.split(",")
|
||||||
self.exts = exts
|
self.exts = exts
|
||||||
|
|||||||
@@ -48,15 +48,13 @@ class Job():
|
|||||||
self.kwdict = {}
|
self.kwdict = {}
|
||||||
self.kwdict_eval = False
|
self.kwdict_eval = False
|
||||||
|
|
||||||
cfgpath = self._build_config_path(parent)
|
if cfgpath := self._build_config_path(parent):
|
||||||
if cfgpath:
|
|
||||||
if isinstance(cfgpath, list):
|
if isinstance(cfgpath, list):
|
||||||
extr.config = extr._config_shared
|
extr.config = extr._config_shared
|
||||||
extr.config_accumulate = extr._config_shared_accumulate
|
extr.config_accumulate = extr._config_shared_accumulate
|
||||||
extr._cfgpath = cfgpath
|
extr._cfgpath = cfgpath
|
||||||
|
|
||||||
actions = extr.config("actions")
|
if actions := extr.config("actions"):
|
||||||
if actions:
|
|
||||||
from .actions import LoggerAdapter, parse_logging
|
from .actions import LoggerAdapter, parse_logging
|
||||||
self._logger_adapter = LoggerAdapter
|
self._logger_adapter = LoggerAdapter
|
||||||
self._logger_actions = parse_logging(actions)
|
self._logger_actions = parse_logging(actions)
|
||||||
@@ -89,8 +87,7 @@ class Job():
|
|||||||
"current_git_head": util.git_head()
|
"current_git_head": util.git_head()
|
||||||
}
|
}
|
||||||
# user-supplied metadata
|
# user-supplied metadata
|
||||||
kwdict = extr.config("keywords")
|
if kwdict := extr.config("keywords"):
|
||||||
if kwdict:
|
|
||||||
if extr.config("keywords-eval"):
|
if extr.config("keywords-eval"):
|
||||||
self.kwdict_eval = []
|
self.kwdict_eval = []
|
||||||
for key, value in kwdict.items():
|
for key, value in kwdict.items():
|
||||||
@@ -260,8 +257,7 @@ class Job():
|
|||||||
if self.extractor.config(target + "-unique"):
|
if self.extractor.config(target + "-unique"):
|
||||||
predicates.append(util.UniquePredicate())
|
predicates.append(util.UniquePredicate())
|
||||||
|
|
||||||
pfilter = self.extractor.config(target + "-filter")
|
if pfilter := self.extractor.config(target + "-filter"):
|
||||||
if pfilter:
|
|
||||||
try:
|
try:
|
||||||
pred = util.FilterPredicate(pfilter, target)
|
pred = util.FilterPredicate(pfilter, target)
|
||||||
except (SyntaxError, ValueError, TypeError) as exc:
|
except (SyntaxError, ValueError, TypeError) as exc:
|
||||||
@@ -269,8 +265,7 @@ class Job():
|
|||||||
else:
|
else:
|
||||||
predicates.append(pred)
|
predicates.append(pred)
|
||||||
|
|
||||||
prange = self.extractor.config(target + "-range")
|
if prange := self.extractor.config(target + "-range"):
|
||||||
if prange:
|
|
||||||
try:
|
try:
|
||||||
pred = util.RangePredicate(prange)
|
pred = util.RangePredicate(prange)
|
||||||
except ValueError as exc:
|
except ValueError as exc:
|
||||||
@@ -411,12 +406,10 @@ class DownloadJob(Job):
|
|||||||
return
|
return
|
||||||
self.visited.add(url)
|
self.visited.add(url)
|
||||||
|
|
||||||
cls = kwdict.get("_extractor")
|
if cls := kwdict.get("_extractor"):
|
||||||
if cls:
|
|
||||||
extr = cls.from_url(url)
|
extr = cls.from_url(url)
|
||||||
else:
|
else:
|
||||||
extr = extractor.find(url)
|
if extr := extractor.find(url):
|
||||||
if extr:
|
|
||||||
if self._extractor_filter is None:
|
if self._extractor_filter is None:
|
||||||
self._extractor_filter = self._build_extractor_filter()
|
self._extractor_filter = self._build_extractor_filter()
|
||||||
if not self._extractor_filter(extr):
|
if not self._extractor_filter(extr):
|
||||||
@@ -432,8 +425,7 @@ class DownloadJob(Job):
|
|||||||
else:
|
else:
|
||||||
extr._parentdir = pextr._parentdir
|
extr._parentdir = pextr._parentdir
|
||||||
|
|
||||||
pmeta = pextr.config2("parent-metadata", "metadata-parent")
|
if pmeta := pextr.config2("parent-metadata", "metadata-parent"):
|
||||||
if pmeta:
|
|
||||||
if isinstance(pmeta, str):
|
if isinstance(pmeta, str):
|
||||||
data = self.kwdict.copy()
|
data = self.kwdict.copy()
|
||||||
if kwdict:
|
if kwdict:
|
||||||
@@ -486,8 +478,7 @@ class DownloadJob(Job):
|
|||||||
self.archive.finalize()
|
self.archive.finalize()
|
||||||
self.archive.close()
|
self.archive.close()
|
||||||
|
|
||||||
pathfmt = self.pathfmt
|
if pathfmt := self.pathfmt:
|
||||||
if pathfmt:
|
|
||||||
hooks = self.hooks
|
hooks = self.hooks
|
||||||
if "post-after" in hooks:
|
if "post-after" in hooks:
|
||||||
for callback in hooks["post-after"]:
|
for callback in hooks["post-after"]:
|
||||||
@@ -523,8 +514,7 @@ class DownloadJob(Job):
|
|||||||
def download(self, url):
|
def download(self, url):
|
||||||
"""Download 'url'"""
|
"""Download 'url'"""
|
||||||
scheme = url.partition(":")[0]
|
scheme = url.partition(":")[0]
|
||||||
downloader = self.get_downloader(scheme)
|
if downloader := self.get_downloader(scheme):
|
||||||
if downloader:
|
|
||||||
try:
|
try:
|
||||||
return downloader.download(url, self.pathfmt)
|
return downloader.download(url, self.pathfmt)
|
||||||
except OSError as exc:
|
except OSError as exc:
|
||||||
@@ -570,8 +560,7 @@ class DownloadJob(Job):
|
|||||||
# monkey-patch method to do nothing and always return True
|
# monkey-patch method to do nothing and always return True
|
||||||
self.download = pathfmt.fix_extension
|
self.download = pathfmt.fix_extension
|
||||||
|
|
||||||
archive_path = cfg("archive")
|
if archive_path := cfg("archive"):
|
||||||
if archive_path:
|
|
||||||
archive_table = cfg("archive-table")
|
archive_table = cfg("archive-table")
|
||||||
archive_prefix = cfg("archive-prefix")
|
archive_prefix = cfg("archive-prefix")
|
||||||
if archive_prefix is None:
|
if archive_prefix is None:
|
||||||
@@ -608,8 +597,7 @@ class DownloadJob(Job):
|
|||||||
self._archive_write_file = ("file" in events)
|
self._archive_write_file = ("file" in events)
|
||||||
self._archive_write_skip = ("skip" in events)
|
self._archive_write_skip = ("skip" in events)
|
||||||
|
|
||||||
skip = cfg("skip", True)
|
if skip := cfg("skip", True):
|
||||||
if skip:
|
|
||||||
self._skipexc = None
|
self._skipexc = None
|
||||||
if skip == "enumerate":
|
if skip == "enumerate":
|
||||||
pathfmt.check_file = pathfmt._enum_file
|
pathfmt.check_file = pathfmt._enum_file
|
||||||
@@ -623,8 +611,7 @@ class DownloadJob(Job):
|
|||||||
self._skipexc = SystemExit
|
self._skipexc = SystemExit
|
||||||
self._skipmax = text.parse_int(smax)
|
self._skipmax = text.parse_int(smax)
|
||||||
|
|
||||||
skip_filter = cfg("skip-filter")
|
if skip_filter := cfg("skip-filter"):
|
||||||
if skip_filter:
|
|
||||||
self._skipftr = util.compile_filter(skip_filter)
|
self._skipftr = util.compile_filter(skip_filter)
|
||||||
else:
|
else:
|
||||||
self._skipftr = None
|
self._skipftr = None
|
||||||
@@ -637,8 +624,7 @@ class DownloadJob(Job):
|
|||||||
if not cfg("postprocess", True):
|
if not cfg("postprocess", True):
|
||||||
return
|
return
|
||||||
|
|
||||||
postprocessors = extr.config_accumulate("postprocessors")
|
if postprocessors := extr.config_accumulate("postprocessors"):
|
||||||
if postprocessors:
|
|
||||||
self.hooks = collections.defaultdict(list)
|
self.hooks = collections.defaultdict(list)
|
||||||
|
|
||||||
pp_log = self.get_logger("postprocessor")
|
pp_log = self.get_logger("postprocessor")
|
||||||
@@ -867,8 +853,7 @@ class UrlJob(Job):
|
|||||||
stdout_write("| " + url + "\n")
|
stdout_write("| " + url + "\n")
|
||||||
|
|
||||||
def handle_queue(self, url, kwdict):
|
def handle_queue(self, url, kwdict):
|
||||||
cls = kwdict.get("_extractor")
|
if cls := kwdict.get("_extractor"):
|
||||||
if cls:
|
|
||||||
extr = cls.from_url(url)
|
extr = cls.from_url(url)
|
||||||
else:
|
else:
|
||||||
extr = extractor.find(url)
|
extr = extractor.find(url)
|
||||||
@@ -981,8 +966,7 @@ class DataJob(Job):
|
|||||||
self.data.append((Message.Queue, url, self.filter(kwdict)))
|
self.data.append((Message.Queue, url, self.filter(kwdict)))
|
||||||
|
|
||||||
def handle_queue_resolve(self, url, kwdict):
|
def handle_queue_resolve(self, url, kwdict):
|
||||||
cls = kwdict.get("_extractor")
|
if cls := kwdict.get("_extractor"):
|
||||||
if cls:
|
|
||||||
extr = cls.from_url(url)
|
extr = cls.from_url(url)
|
||||||
else:
|
else:
|
||||||
extr = extractor.find(url)
|
extr = extractor.find(url)
|
||||||
|
|||||||
@@ -121,8 +121,7 @@ class PathfmtProxy():
|
|||||||
return pathfmt.__dict__.get(name) if pathfmt else None
|
return pathfmt.__dict__.get(name) if pathfmt else None
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
pathfmt = object.__getattribute__(self, "job").pathfmt
|
if pathfmt := object.__getattribute__(self, "job").pathfmt:
|
||||||
if pathfmt:
|
|
||||||
return pathfmt.path or pathfmt.directory
|
return pathfmt.path or pathfmt.directory
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
@@ -235,8 +234,7 @@ def configure_logging(loglevel):
|
|||||||
minlevel = handler.level
|
minlevel = handler.level
|
||||||
|
|
||||||
# file logging handler
|
# file logging handler
|
||||||
handler = setup_logging_handler("logfile", lvl=loglevel)
|
if handler := setup_logging_handler("logfile", lvl=loglevel):
|
||||||
if handler:
|
|
||||||
root.addHandler(handler)
|
root.addHandler(handler)
|
||||||
if minlevel > handler.level:
|
if minlevel > handler.level:
|
||||||
minlevel = handler.level
|
minlevel = handler.level
|
||||||
@@ -394,8 +392,7 @@ class PipeOutput(NullOutput):
|
|||||||
class TerminalOutput():
|
class TerminalOutput():
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
shorten = config.get(("output",), "shorten", True)
|
if shorten := config.get(("output",), "shorten", True):
|
||||||
if shorten:
|
|
||||||
func = shorten_string_eaw if shorten == "eaw" else shorten_string
|
func = shorten_string_eaw if shorten == "eaw" else shorten_string
|
||||||
limit = shutil.get_terminal_size().columns - OFFSET
|
limit = shutil.get_terminal_size().columns - OFFSET
|
||||||
sep = CHAR_ELLIPSIES
|
sep = CHAR_ELLIPSIES
|
||||||
@@ -460,8 +457,7 @@ class CustomOutput():
|
|||||||
if isinstance(fmt_success, list):
|
if isinstance(fmt_success, list):
|
||||||
off_success, fmt_success = fmt_success
|
off_success, fmt_success = fmt_success
|
||||||
|
|
||||||
shorten = config.get(("output",), "shorten", True)
|
if shorten := config.get(("output",), "shorten", True):
|
||||||
if shorten:
|
|
||||||
func = shorten_string_eaw if shorten == "eaw" else shorten_string
|
func = shorten_string_eaw if shorten == "eaw" else shorten_string
|
||||||
width = shutil.get_terminal_size().columns
|
width = shutil.get_terminal_size().columns
|
||||||
|
|
||||||
|
|||||||
@@ -202,8 +202,7 @@ class PathFormat():
|
|||||||
"""Build directory path and create it if necessary"""
|
"""Build directory path and create it if necessary"""
|
||||||
self.kwdict = kwdict
|
self.kwdict = kwdict
|
||||||
|
|
||||||
segments = self.build_directory(kwdict)
|
if segments := self.build_directory(kwdict):
|
||||||
if segments:
|
|
||||||
self.directory = directory = self.basedirectory + self.clean_path(
|
self.directory = directory = self.basedirectory + self.clean_path(
|
||||||
os.sep.join(segments) + os.sep)
|
os.sep.join(segments) + os.sep)
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -22,8 +22,7 @@ class PostProcessor():
|
|||||||
return self.__class__.__name__
|
return self.__class__.__name__
|
||||||
|
|
||||||
def _init_archive(self, job, options, prefix=None):
|
def _init_archive(self, job, options, prefix=None):
|
||||||
archive_path = options.get("archive")
|
if archive_path := options.get("archive"):
|
||||||
if archive_path:
|
|
||||||
extr = job.extractor
|
extr = job.extractor
|
||||||
|
|
||||||
archive_table = options.get("archive-table")
|
archive_table = options.get("archive-table")
|
||||||
|
|||||||
@@ -21,8 +21,7 @@ class ComparePP(PostProcessor):
|
|||||||
self._compare = self._compare_size
|
self._compare = self._compare_size
|
||||||
self._equal_exc = self._equal_cnt = 0
|
self._equal_exc = self._equal_cnt = 0
|
||||||
|
|
||||||
equal = options.get("equal")
|
if equal := options.get("equal"):
|
||||||
if equal:
|
|
||||||
equal, _, emax = equal.partition(":")
|
equal, _, emax = equal.partition(":")
|
||||||
self._equal_max = text.parse_int(emax)
|
self._equal_max = text.parse_int(emax)
|
||||||
if equal == "abort":
|
if equal == "abort":
|
||||||
|
|||||||
@@ -55,8 +55,7 @@ class MetadataPP(PostProcessor):
|
|||||||
self._json_encode = self._make_encoder(options, 4).encode
|
self._json_encode = self._make_encoder(options, 4).encode
|
||||||
ext = "json"
|
ext = "json"
|
||||||
|
|
||||||
base_directory = options.get("base-directory")
|
if base_directory := options.get("base-directory"):
|
||||||
if base_directory:
|
|
||||||
if base_directory is True:
|
if base_directory is True:
|
||||||
self._base = lambda p: p.basedirectory
|
self._base = lambda p: p.basedirectory
|
||||||
else:
|
else:
|
||||||
@@ -181,8 +180,7 @@ class MetadataPP(PostProcessor):
|
|||||||
try:
|
try:
|
||||||
pathfmt.directory_formatters = self._directory_formatters
|
pathfmt.directory_formatters = self._directory_formatters
|
||||||
pathfmt.directory_conditions = ()
|
pathfmt.directory_conditions = ()
|
||||||
segments = pathfmt.build_directory(pathfmt.kwdict)
|
if segments := pathfmt.build_directory(pathfmt.kwdict):
|
||||||
if segments:
|
|
||||||
directory = pathfmt.clean_path(os.sep.join(segments) + os.sep)
|
directory = pathfmt.clean_path(os.sep.join(segments) + os.sep)
|
||||||
else:
|
else:
|
||||||
directory = "." + os.sep
|
directory = "." + os.sep
|
||||||
@@ -244,8 +242,7 @@ class MetadataPP(PostProcessor):
|
|||||||
fp.write(self._json_encode(kwdict) + "\n")
|
fp.write(self._json_encode(kwdict) + "\n")
|
||||||
|
|
||||||
def _make_filter(self, options):
|
def _make_filter(self, options):
|
||||||
include = options.get("include")
|
if include := options.get("include"):
|
||||||
if include:
|
|
||||||
if isinstance(include, str):
|
if isinstance(include, str):
|
||||||
include = include.split(",")
|
include = include.split(",")
|
||||||
return lambda d: {k: d[k] for k in include if k in d}
|
return lambda d: {k: d[k] for k in include if k in d}
|
||||||
|
|||||||
@@ -17,8 +17,7 @@ class MtimePP(PostProcessor):
|
|||||||
|
|
||||||
def __init__(self, job, options):
|
def __init__(self, job, options):
|
||||||
PostProcessor.__init__(self, job)
|
PostProcessor.__init__(self, job)
|
||||||
value = options.get("value")
|
if value := options.get("value"):
|
||||||
if value:
|
|
||||||
self._get = formatter.parse(value, None, util.identity).format_map
|
self._get = formatter.parse(value, None, util.identity).format_map
|
||||||
else:
|
else:
|
||||||
key = options.get("key", "date")
|
key = options.get("key", "date")
|
||||||
|
|||||||
@@ -296,8 +296,7 @@ class UgoiraPP(PostProcessor):
|
|||||||
def _exec(self, args):
|
def _exec(self, args):
|
||||||
self.log.debug(args)
|
self.log.debug(args)
|
||||||
out = None if self.output else subprocess.DEVNULL
|
out = None if self.output else subprocess.DEVNULL
|
||||||
retcode = util.Popen(args, stdout=out, stderr=out).wait()
|
if retcode := util.Popen(args, stdout=out, stderr=out).wait():
|
||||||
if retcode:
|
|
||||||
output.stderr_write("\n")
|
output.stderr_write("\n")
|
||||||
self.log.error("Non-zero exit status when running %s (%s)",
|
self.log.error("Non-zero exit status when running %s (%s)",
|
||||||
args, retcode)
|
args, retcode)
|
||||||
|
|||||||
@@ -320,20 +320,17 @@ def dump_response(response, fp, headers=False, content=True, hide_auth=True):
|
|||||||
res_headers = response.headers.copy()
|
res_headers = response.headers.copy()
|
||||||
|
|
||||||
if hide_auth:
|
if hide_auth:
|
||||||
authorization = req_headers.get("Authorization")
|
if authorization := req_headers.get("Authorization"):
|
||||||
if authorization:
|
|
||||||
atype, sep, _ = str(authorization).partition(" ")
|
atype, sep, _ = str(authorization).partition(" ")
|
||||||
req_headers["Authorization"] = f"{atype} ***" if sep else "***"
|
req_headers["Authorization"] = f"{atype} ***" if sep else "***"
|
||||||
|
|
||||||
cookie = req_headers.get("Cookie")
|
if cookie := req_headers.get("Cookie"):
|
||||||
if cookie:
|
|
||||||
req_headers["Cookie"] = ";".join(
|
req_headers["Cookie"] = ";".join(
|
||||||
c.partition("=")[0] + "=***"
|
c.partition("=")[0] + "=***"
|
||||||
for c in cookie.split(";")
|
for c in cookie.split(";")
|
||||||
)
|
)
|
||||||
|
|
||||||
set_cookie = res_headers.get("Set-Cookie")
|
if set_cookie := res_headers.get("Set-Cookie"):
|
||||||
if set_cookie:
|
|
||||||
res_headers["Set-Cookie"] = re(r"(^|, )([^ =]+)=[^,;]*").sub(
|
res_headers["Set-Cookie"] = re(r"(^|, )([^ =]+)=[^,;]*").sub(
|
||||||
r"\1\2=***", set_cookie)
|
r"\1\2=***", set_cookie)
|
||||||
|
|
||||||
@@ -377,14 +374,11 @@ def extract_headers(response):
|
|||||||
headers = response.headers
|
headers = response.headers
|
||||||
data = dict(headers)
|
data = dict(headers)
|
||||||
|
|
||||||
hcd = headers.get("content-disposition")
|
if hcd := headers.get("content-disposition"):
|
||||||
if hcd:
|
if name := text.extr(hcd, 'filename="', '"'):
|
||||||
name = text.extr(hcd, 'filename="', '"')
|
|
||||||
if name:
|
|
||||||
text.nameext_from_url(name, data)
|
text.nameext_from_url(name, data)
|
||||||
|
|
||||||
hlm = headers.get("last-modified")
|
if hlm := headers.get("last-modified"):
|
||||||
if hlm:
|
|
||||||
data["date"] = datetime.datetime(*parsedate_tz(hlm)[:6])
|
data["date"] = datetime.datetime(*parsedate_tz(hlm)[:6])
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|||||||
@@ -258,8 +258,7 @@ def parse_command_line(module, argv):
|
|||||||
None if opts.match_filter is None
|
None if opts.match_filter is None
|
||||||
else module.match_filter_func(opts.match_filter))
|
else module.match_filter_func(opts.match_filter))
|
||||||
|
|
||||||
cookiesfrombrowser = getattr(opts, "cookiesfrombrowser", None)
|
if cookiesfrombrowser := getattr(opts, "cookiesfrombrowser", None):
|
||||||
if cookiesfrombrowser:
|
|
||||||
pattern = util.re(r"""(?x)
|
pattern = util.re(r"""(?x)
|
||||||
(?P<name>[^+:]+)
|
(?P<name>[^+:]+)
|
||||||
(?:\s*\+\s*(?P<keyring>[^:]+))?
|
(?:\s*\+\s*(?P<keyring>[^:]+))?
|
||||||
|
|||||||
Reference in New Issue
Block a user