[archivedmoe] fix thebarchive webm URLs (#5116)

This commit is contained in:
Mike Fährmann
2024-01-27 00:24:41 +01:00
parent 34a4ddc399
commit 1f7101d606
2 changed files with 19 additions and 2 deletions

View File

@@ -24,6 +24,8 @@ class FoolfuukaExtractor(BaseExtractor):
BaseExtractor.__init__(self, match)
if self.category == "b4k":
self.remote = self._remote_direct
elif self.category == "archivedmoe":
self.referer = False
def items(self):
yield Message.Directory, self.metadata()
@@ -53,9 +55,12 @@ class FoolfuukaExtractor(BaseExtractor):
def remote(self, media):
"""Resolve a remote media link"""
needle = '<meta http-equiv="Refresh" content="0; url='
page = self.request(media["remote_media_link"]).text
return text.extr(page, needle, '"')
url = text.extr(page, 'http-equiv="Refresh" content="0; url=', '"')
if url.endswith(".webm") and \
url.startswith("https://thebarchive.com/"):
return url[:-1]
return url
@staticmethod
def _remote_direct(media):

View File

@@ -23,6 +23,18 @@ __tests__ = (
"#sha1_url": "ffec05a1a1b906b5ca85992513671c9155ee9e87",
},
{
"#url" : "https://archived.moe/b/thread/912594917/",
"#comment" : "broken thebarchive .webm URLs (#5116)",
"#category": ("foolfuuka", "archivedmoe", "thread"),
"#class" : foolfuuka.FoolfuukaThreadExtractor,
"#urls" : (
"https://thebarchive.com/b/full_image/1705625299234839.gif",
"https://thebarchive.com/b/full_image/1705625431133806.web",
"https://thebarchive.com/b/full_image/1705626190307840.web",
),
},
{
"#url" : "https://archived.moe/gd/",
"#category": ("foolfuuka", "archivedmoe", "board"),