[bellazon] fix video attachments (#8239)

This commit is contained in:
Mike Fährmann
2025-09-19 12:25:47 +02:00
parent dbf6de2482
commit 713a65923a
2 changed files with 70 additions and 3 deletions

View File

@@ -25,7 +25,7 @@ class BellazonExtractor(Extractor):
def items(self):
extract_urls = text.re(r'<a ([^>]*?href="([^"]+)".*?)</a>').findall
native = f"{self.root}/"
native = (f"{self.root}/", f"{self.root[6:]}/")
for post in self.posts():
urls = extract_urls(post["content"])
@@ -41,10 +41,20 @@ class BellazonExtractor(Extractor):
name = url
else:
name = text.unescape(alt)
dc = text.nameext_from_url(name, data.copy())
dc["id"] = text.extr(info, 'data-fileid="', '"')
if ext := text.extr(info, 'data-fileext="', '"'):
dc["extension"] = ext
elif "/core/interface/file/attachment.php" in url:
if not dc["id"]:
dc["id"] = url.rpartition("?id=")[2]
if (pos := info.find(">")) >= 0 and \
(name := info[pos+1:].strip()):
text.nameext_from_url(name, dc)
if url[0] == "/":
url = f"https:{url}"
yield Message.Url, url, dc
else:
yield Message.Queue, url, data
@@ -88,7 +98,7 @@ class BellazonExtractor(Extractor):
"posts": stats[1]["userInteractionCount"],
"date" : text.parse_datetime(schema["datePublished"]),
"date_updated": text.parse_datetime(schema["dateModified"]),
"description" : text.unescape(schema["text"]),
"description" : text.unescape(schema["text"]).strip(),
"section" : path[-2],
"author" : author["name"],
"author_url" : url_a,

View File

@@ -56,7 +56,7 @@ __tests__ = (
"author_url" : "https://www.bellazon.com/main/profile/72476-shepherd/",
"date" : "dt:2015-06-20 21:34:31",
"date_updated": "dt:2017-06-29 04:32:43",
"description" : "Previously featured in the popular TV series, Mr Selfridge, emerging British born actress Millie Brady is set for huge success. \nMillie has just been confirmed as the lead role in The Clan of the Cave Bear which will begin filming in May 2015. The drama pilot is from Imagine TV, Allison Shearmur Productions, Fox 21 TV and Lionsgate TV. Millie is also due to appear in the eagerly awaited black comedy, 'Pride and Prejudice and Zombies', staring alongside Matt Smith, Sally Philiips, Douglas Booth, Lily james and Sam Riley. She is currently filming 'Knights of the Roundtable: King Arthur' directed by Guy Ritchie. \n  \n  \nFarfetch, Jun 2015 \nLinda Brownlee photos \n  \n \n",
"description" : "Previously featured in the popular TV series, Mr Selfridge, emerging British born actress Millie Brady is set for huge success. \nMillie has just been confirmed as the lead role in The Clan of the Cave Bear which will begin filming in May 2015. The drama pilot is from Imagine TV, Allison Shearmur Productions, Fox 21 TV and Lionsgate TV. Millie is also due to appear in the eagerly awaited black comedy, 'Pride and Prejudice and Zombies', staring alongside Matt Smith, Sally Philiips, Douglas Booth, Lily james and Sam Riley. She is currently filming 'Knights of the Roundtable: King Arthur' directed by Guy Ritchie. \n  \n  \nFarfetch, Jun 2015 \nLinda Brownlee photos",
"id" : "57872",
"posts" : 1,
"section" : "Actresses",
@@ -117,6 +117,63 @@ __tests__ = (
},
},
{
"#url" : "https://www.bellazon.com/main/topic/66334-charly-jordan/page/3/#findComment-4576614",
"#comment" : "video attachments (#8239)",
"#class" : bellazon.BellazonPostExtractor,
"#pattern" : r"https://www\.bellazon\.com/main/applications/core/interface/file/attachment\.php\?id=\d+$",
"#range" : "2-",
"#count" : 10,
"count" : 12,
"extension": "mp4",
"filename" : r"re:^\d+$",
"id" : r"re:6361\d\d\d",
"num" : range(3, 12),
"post" : {
"author_id" : "101807",
"author_slug": "rogerdanish",
"author_url" : "https://www.bellazon.com/main/profile/101807-rogerdanish/",
"count" : 12,
"date" : "dt:2018-04-06 19:06:06",
"id" : "4576614",
"content" : str
},
"thread" : {
"author" : "gtemt",
"author_id" : "29506",
"author_slug" : "gtemt",
"author_url" : "https://www.bellazon.com/main/profile/29506-gtemt/",
"date" : "dt:2017-12-19 12:18:46",
"date_updated": "type:datetime",
"description" : "VID",
"id" : "66334",
"posts" : range(750, 999),
"section" : "Other Females of Interest",
"slug" : "charly-jordan",
"title" : "Charly Jordan",
"url" : "https://www.bellazon.com/main/topic/66334-charly-jordan/",
"views" : int,
"path" : [
"Females",
"Other Females of Interest",
"Charly Jordan",
],
},
},
{
"#url" : "https://www.bellazon.com/main/topic/66334-charly-jordan/page/3/#findComment-4571129",
"#comment" : "video attachment with '//www.bellazon.com/main/' as URL (#8239)",
"#class" : bellazon.BellazonPostExtractor,
"#results" : (
"https://www.bellazon.com/main/uploads/monthly_2018_03/charlyjordan10_Bg6mLKlFBuU.jpg.07b89fe216300157ff5dad0652df11cb.jpg",
"https://www.bellazon.com/main/uploads/monthly_2018_03/charlyjordan10_Bg6mLRzlFPz.jpg.3c846bc3d7a2ec4854012ca3bab0af99.jpg",
"https://www.bellazon.com/main/uploads/monthly_2018_03/charlyjordan10_Bg6mLVYlQUL.jpg.7e32ef45d5ba5270a330b250f83639dd.jpg",
"https://www.bellazon.com/main/applications/core/interface/file/attachment.php?id=6341394",
),
},
{
"#url" : "https://www.bellazon.com/main/topic/57872-millie-brady/",
"#class" : bellazon.BellazonThreadExtractor,