[subscribestar] improve 'filename' (#8416)

This commit is contained in:
Mike Fährmann
2025-10-15 11:52:39 +02:00
parent 17156ab7a2
commit 36a3fe45e4
2 changed files with 13 additions and 8 deletions

View File

@@ -50,10 +50,16 @@ class SubscribestarExtractor(Extractor):
for num, item in enumerate(media, 1):
item.update(data)
item["num"] = num
text.nameext_from_url(item.get("name") or item["url"], item)
if item["url"][0] == "/":
item["url"] = self.root + item["url"]
yield Message.Url, item["url"], item
url = item["url"]
if name := (item.get("name") or item.get("original_filename")):
text.nameext_from_name(name, item)
else:
text.nameext_from_url(url, item)
if url[0] == "/":
url = f"{self.root}{url}"
yield Message.Url, url, item
def posts(self):
"""Yield HTML content of all relevant posts"""

View File

@@ -62,7 +62,7 @@ __tests__ = (
"content" : r"re:<h1>Brand Guidelines and Assets</h1>",
"date" : "dt:2020-05-07 12:33:00",
"extension" : "jpg",
"filename" : "8ff61299-b249-47dc-880a-cdacc9081c62",
"filename" : "ss_page-brand",
"group" : "imgs_and_videos",
"height" : 291,
"id" : 203885,
@@ -81,8 +81,7 @@ __tests__ = (
"#category": ("", "subscribestar", "post"),
"#class" : subscribestar.SubscribestarPostExtractor,
"#range" : "2",
"#pattern" : r"https://ss-uploads-prod\.b-cdn\.net/uploads_v2/users/11/posts/920015/bc018a55-9668-47f4-a664-b5fd66b56aaa\.pdf",
"#pattern" : r"https://\w+.cloudfront.net/uploads_v2/users/11/posts/920015/bc018a55-9668-47f4-a664-b5fd66b56aaa.pdf\?filename=Training%2520for%2520freelancers%2520-%2520Fiverr.pdf&.+",
"date" : "dt:2023-05-30 09:20:00",
"extension": "pdf",
"filename" : "Training for freelancers - Fiverr",
@@ -106,7 +105,7 @@ __tests__ = (
"content" : "<h1>Listening to Sasquatch - Driving to the Rez - Episode 243 - Part One</h1>\n\n<p>Topics we cover:</p>\n\n<p>Tree breaks, Foot stomps, Tracks and trackways, Hoots/calls with answers, \nTree structures, nests, Portal Cracks, Shapeshifting, Shimmer/invisibility \ncloaking, direct physical interaction inside the cloaking field, manipulation \nof canoe while we are in it, face to face interactions with multiple individuals \nteen aged and adult, male and female, cloaked and not cloaked, \nand vocalizations like drops of water. Truly amazing stories.</p>\n\n<p><a href=\"https://www.subscribestar.com/posts/1853792\" data-href=\"https://www.subscribestar.com/posts/1853792\">Go To Part Two</a></p>\n\n<p><a href=\"/away?url=aHR0cHM6Ly92aWRlby5pbmVsaWFiZW56LmNvbS9saXN0ZW5pbmctdG8tc2Fz%0AcXVhdGNoLWRyaXZpbmctdG8tdGhlLXJlei1lcGlzb2RlLTI0My1wYXJ0LW9u%0AZQ==%0A\" data-href=\"https://video.ineliabenz.com/listening-to-sasquatch-driving-to-the-rez-episode-243-part-one\">Watch the Video</a></p>\n\n<p><a href=\"/away?url=aHR0cHM6Ly9pbmVsaWEuc3Vic3RhY2suY29tL3AvbGlzdGVuaW5nLXRvLXNh%0Ac3F1YXRjaA==%0A\" data-href=\"https://inelia.substack.com/p/listening-to-sasquatch\">Read the article</a></p>\n\n<p>Audio is attached to this post.</p>",
"date" : "dt:2025-05-07 13:23:00",
"extension" : {"mp3", "jpg"},
"filename" : {"dttr-243-sasquatch-part1", "38cba130-3a31-4d8d-b326-7e5d3704801f"},
"filename" : {"dttr-243-sasquatch-part1", "yt-243-pt1"},
"id" : {0, 4627253},
"num" : range(1, 2),
"post_id" : 1851025,