merge #9047: [foolfuuka] improve media link resolution

This commit is contained in:
Mike Fährmann
2026-02-12 19:40:39 +01:00

View File

@@ -33,13 +33,13 @@ class FoolfuukaExtractor(BaseExtractor):
def items(self): def items(self):
yield Message.Directory, "", self.metadata() yield Message.Directory, "", self.metadata()
for post in self.posts(): for post in self.posts():
media = post["media"] if not (media := post.get("media")):
if not media:
continue continue
board = post["board"]["shortname"]
url = media["media_link"] url = media["media_link"]
if not url and "remote_media_link" in media: if not url and "remote_media_link" in media:
url = self.remote(media) url = self.remote(board, media)
if url and url[0] == "/": if url and url[0] == "/":
url = self.root + url url = self.root + url
@@ -56,8 +56,10 @@ class FoolfuukaExtractor(BaseExtractor):
def posts(self): def posts(self):
"""Return an iterable with all relevant posts""" """Return an iterable with all relevant posts"""
def remote(self, media): def remote(self, board, media):
"""Resolve a remote media link""" """Resolve a remote media link"""
if board in {"wsg", "gif"}:
return f"https://i.4cdn.org/{board}/{media['media_orig']}"
page = self.request(media["remote_media_link"]).text page = self.request(media["remote_media_link"]).text
url = text.extr(page, 'http-equiv="Refresh" content="0; url=', '"') url = text.extr(page, 'http-equiv="Refresh" content="0; url=', '"')
@@ -84,7 +86,6 @@ class FoolfuukaExtractor(BaseExtractor):
"sci": "warosu.org", "sci": "warosu.org",
"tg" : "archive.4plebs.org", "tg" : "archive.4plebs.org",
} }
board = url.split("/", 4)[3]
if board in board_domains: if board in board_domains:
domain = board_domains[board] domain = board_domains[board]
url = f"https://{domain}/{board}/full_image/{filename}" url = f"https://{domain}/{board}/full_image/{filename}"
@@ -98,7 +99,7 @@ class FoolfuukaExtractor(BaseExtractor):
return url return url
def _remote_direct(self, media): def _remote_direct(self, board, media):
return media["remote_media_link"] return media["remote_media_link"]