From 96bb2b16303d8d9c14f3f8785cf4ef5df0a0fd1f Mon Sep 17 00:00:00 2001 From: NecRaul Date: Wed, 11 Jun 2025 04:06:16 +0400 Subject: [PATCH 1/5] Fix Archived.moe redirection issue Unless the board is /b/ (in which case redirection works fine), remove the characters of the filename portion of the url until filename portion of the url is 13 characters long (epoch millis). --- gallery_dl/extractor/foolfuuka.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 5f90afc1..8a0b9e5d 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -36,6 +36,13 @@ class FoolfuukaExtractor(BaseExtractor): url = media["media_link"] if not url and "remote_media_link" in media: + if "/b/" not in media["remote_media_link"] and \ + media["remote_media_link"].startswith("https://archived.moe/"): + filename = media["media"].rpartition(".")[0] + if len(filename) > 13: + filename_remainder = len(filename) - 13 + remote_media_link = media["remote_media_link"].replace(filename, filename[:-filename_remainder]) + media["remote_media_link"] = remote_media_link url = self.remote(media) if url and url[0] == "/": url = self.root + url From cb74d0f2f3a33e4c01f3212dcc87c04c32ef9dd6 Mon Sep 17 00:00:00 2001 From: NecRaul Date: Wed, 11 Jun 2025 04:44:13 +0400 Subject: [PATCH 2/5] Lint with flake8 --- gallery_dl/extractor/foolfuuka.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 8a0b9e5d..250eab72 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -36,12 +36,15 @@ class FoolfuukaExtractor(BaseExtractor): url = media["media_link"] if not url and "remote_media_link" in media: - if "/b/" not in media["remote_media_link"] and \ - media["remote_media_link"].startswith("https://archived.moe/"): + remote_media_link = media["remote_media_link"] + if "/b/" not in remote_media_link and \ + remote_media_link.startswith("https://archived.moe/"): filename = media["media"].rpartition(".")[0] if len(filename) > 13: filename_remainder = len(filename) - 13 - remote_media_link = media["remote_media_link"].replace(filename, filename[:-filename_remainder]) + remote_media_link = media["remote_media_link"].replace( + filename, filename[:-filename_remainder] + ) media["remote_media_link"] = remote_media_link url = self.remote(media) if url and url[0] == "/": From 43706545322aaf0ababdcbece63d524267b49a01 Mon Sep 17 00:00:00 2001 From: NecRaul Date: Wed, 11 Jun 2025 04:49:21 +0400 Subject: [PATCH 3/5] Simplify remote_media_link assignment --- gallery_dl/extractor/foolfuuka.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 250eab72..8755086b 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -42,10 +42,9 @@ class FoolfuukaExtractor(BaseExtractor): filename = media["media"].rpartition(".")[0] if len(filename) > 13: filename_remainder = len(filename) - 13 - remote_media_link = media["remote_media_link"].replace( + media["remote_media_link"] = remote_media_link.replace( filename, filename[:-filename_remainder] ) - media["remote_media_link"] = remote_media_link url = self.remote(media) if url and url[0] == "/": url = self.root + url From 8645be8244078275a4ee2675afbc334a629e9f5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 11 Jun 2025 12:10:18 +0200 Subject: [PATCH 4/5] [archivedmoe] add redirect URL fixup test --- test/results/archivedmoe.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/results/archivedmoe.py b/test/results/archivedmoe.py index 42aae181..c5ddef62 100644 --- a/test/results/archivedmoe.py +++ b/test/results/archivedmoe.py @@ -35,6 +35,16 @@ __tests__ = ( ), }, +{ + "#url" : "https://archived.moe/a/thread/279540316/", + "#comment" : "filename/timestamp fixup for redirect URL (#7652)", + "#category": ("foolfuuka", "archivedmoe", "thread"), + "#class" : foolfuuka.FoolfuukaThreadExtractor, + "#urls" : ( + "http://desuarchive.org/a/full_image/1749537017533.jpg", + ), +}, + { "#url" : "https://archived.moe/gd/", "#category": ("foolfuuka", "archivedmoe", "board"), From e3df99dbb9a77cd374a7095edf5dbe13abd32834 Mon Sep 17 00:00:00 2001 From: NecRaul Date: Wed, 11 Jun 2025 21:49:33 +0400 Subject: [PATCH 5/5] Apply mikf's diff regarding Archived.moe Moved (and refactored) code into remote() Added a check for fixup_timestamp --- gallery_dl/extractor/foolfuuka.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 8755086b..de6dadb7 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -26,6 +26,9 @@ class FoolfuukaExtractor(BaseExtractor): self.remote = self._remote_direct elif self.category == "archivedmoe": self.referer = False + self.fixup_timestamp = True + else: + self.fixup_timestamp = False def items(self): yield Message.Directory, self.metadata() @@ -36,15 +39,6 @@ class FoolfuukaExtractor(BaseExtractor): url = media["media_link"] if not url and "remote_media_link" in media: - remote_media_link = media["remote_media_link"] - if "/b/" not in remote_media_link and \ - remote_media_link.startswith("https://archived.moe/"): - filename = media["media"].rpartition(".")[0] - if len(filename) > 13: - filename_remainder = len(filename) - 13 - media["remote_media_link"] = remote_media_link.replace( - filename, filename[:-filename_remainder] - ) url = self.remote(media) if url and url[0] == "/": url = self.root + url @@ -66,9 +60,18 @@ class FoolfuukaExtractor(BaseExtractor): """Resolve a remote media link""" page = self.request(media["remote_media_link"]).text url = text.extr(page, 'http-equiv="Refresh" content="0; url=', '"') - if url.endswith(".webm") and \ - url.startswith("https://thebarchive.com/"): - return url[:-1] + + if url.startswith("https://thebarchive.com/"): + # '.webm' -> '.web' (#5116) + if url.endswith(".webm"): + url = url[:-1] + elif self.fixup_timestamp: + # trim filename/timestamp to 13 characters (#7652) + path, _, filename = url.rpartition("/") + name, _, ext = filename.rpartition(".") + if len(name) > 13: + url = "{}/{}.{}".format(path, name[:13], ext) + return url @staticmethod