[pixiv] remove '/jump.php' from *all* AJAX caption links (#4327)

https://github.com/mikf/gallery-dl/issues/4327#issuecomment-2969765775
This commit is contained in:
Mike Fährmann
2025-06-13 20:59:18 +02:00
parent 40dedd7ce0
commit b583891df6

View File

@@ -43,7 +43,7 @@ class PixivExtractor(Extractor):
self.meta_comments = self.config("comments")
self.meta_captions = self.config("captions")
if self.meta_captions:
if self.sanity_workaround or self.meta_captions:
self.meta_captions_sub = util.re(
r'<a href="/jump\.php\?([^"]+)').sub
@@ -91,10 +91,8 @@ class PixivExtractor(Extractor):
not work.get("_mypixiv") and not work.get("_ajax"):
body = self._request_ajax("/illust/" + str(work["id"]))
if body:
caption = self.meta_captions_sub(
lambda m: '<a href="' + text.unquote(m.group(1)),
work["caption"] = self._sanitize_ajax_caption(
body["illustComment"])
work["caption"] = text.unescape(caption)
if transform_tags:
transform_tags(work)
@@ -279,7 +277,7 @@ class PixivExtractor(Extractor):
translated_name = None
tags.append({"name": name, "translated_name": translated_name})
work["caption"] = text.unescape(body["illustComment"])
work["caption"] = self._sanitize_ajax_caption(body["illustComment"])
work["page_count"] = count = body["pageCount"]
if count == 1:
return ({"url": url},)
@@ -318,6 +316,12 @@ class PixivExtractor(Extractor):
except exception.HttpError:
pass
def _sanitize_ajax_caption(self, caption):
if not caption:
return ""
return text.unescape(self.meta_captions_sub(
lambda m: '<a href="' + text.unquote(m.group(1)), caption))
def _fallback_image(self, src):
if isinstance(src, str):
urls = None