[bellazon] fix video attachments (#8239)
This commit is contained in:
@@ -25,7 +25,7 @@ class BellazonExtractor(Extractor):
|
||||
|
||||
def items(self):
|
||||
extract_urls = text.re(r'<a ([^>]*?href="([^"]+)".*?)</a>').findall
|
||||
native = f"{self.root}/"
|
||||
native = (f"{self.root}/", f"{self.root[6:]}/")
|
||||
|
||||
for post in self.posts():
|
||||
urls = extract_urls(post["content"])
|
||||
@@ -41,10 +41,20 @@ class BellazonExtractor(Extractor):
|
||||
name = url
|
||||
else:
|
||||
name = text.unescape(alt)
|
||||
|
||||
dc = text.nameext_from_url(name, data.copy())
|
||||
dc["id"] = text.extr(info, 'data-fileid="', '"')
|
||||
if ext := text.extr(info, 'data-fileext="', '"'):
|
||||
dc["extension"] = ext
|
||||
elif "/core/interface/file/attachment.php" in url:
|
||||
if not dc["id"]:
|
||||
dc["id"] = url.rpartition("?id=")[2]
|
||||
if (pos := info.find(">")) >= 0 and \
|
||||
(name := info[pos+1:].strip()):
|
||||
text.nameext_from_url(name, dc)
|
||||
|
||||
if url[0] == "/":
|
||||
url = f"https:{url}"
|
||||
yield Message.Url, url, dc
|
||||
else:
|
||||
yield Message.Queue, url, data
|
||||
@@ -88,7 +98,7 @@ class BellazonExtractor(Extractor):
|
||||
"posts": stats[1]["userInteractionCount"],
|
||||
"date" : text.parse_datetime(schema["datePublished"]),
|
||||
"date_updated": text.parse_datetime(schema["dateModified"]),
|
||||
"description" : text.unescape(schema["text"]),
|
||||
"description" : text.unescape(schema["text"]).strip(),
|
||||
"section" : path[-2],
|
||||
"author" : author["name"],
|
||||
"author_url" : url_a,
|
||||
|
||||
@@ -56,7 +56,7 @@ __tests__ = (
|
||||
"author_url" : "https://www.bellazon.com/main/profile/72476-shepherd/",
|
||||
"date" : "dt:2015-06-20 21:34:31",
|
||||
"date_updated": "dt:2017-06-29 04:32:43",
|
||||
"description" : "Previously featured in the popular TV series, Mr Selfridge, emerging British born actress Millie Brady is set for huge success. \nMillie has just been confirmed as the lead role in ‘The Clan of the Cave Bear’ which will begin filming in May 2015. The drama pilot is from Imagine TV, Allison Shearmur Productions, Fox 21 TV and Lionsgate TV. Millie is also due to appear in the eagerly awaited black comedy, 'Pride and Prejudice and Zombies', staring alongside Matt Smith, Sally Philiips, Douglas Booth, Lily james and Sam Riley. She is currently filming 'Knights of the Roundtable: King Arthur' directed by Guy Ritchie. \n \n \nFarfetch, Jun 2015 \nLinda Brownlee photos \n \n \n",
|
||||
"description" : "Previously featured in the popular TV series, Mr Selfridge, emerging British born actress Millie Brady is set for huge success. \nMillie has just been confirmed as the lead role in ‘The Clan of the Cave Bear’ which will begin filming in May 2015. The drama pilot is from Imagine TV, Allison Shearmur Productions, Fox 21 TV and Lionsgate TV. Millie is also due to appear in the eagerly awaited black comedy, 'Pride and Prejudice and Zombies', staring alongside Matt Smith, Sally Philiips, Douglas Booth, Lily james and Sam Riley. She is currently filming 'Knights of the Roundtable: King Arthur' directed by Guy Ritchie. \n \n \nFarfetch, Jun 2015 \nLinda Brownlee photos",
|
||||
"id" : "57872",
|
||||
"posts" : 1,
|
||||
"section" : "Actresses",
|
||||
@@ -117,6 +117,63 @@ __tests__ = (
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.bellazon.com/main/topic/66334-charly-jordan/page/3/#findComment-4576614",
|
||||
"#comment" : "video attachments (#8239)",
|
||||
"#class" : bellazon.BellazonPostExtractor,
|
||||
"#pattern" : r"https://www\.bellazon\.com/main/applications/core/interface/file/attachment\.php\?id=\d+$",
|
||||
"#range" : "2-",
|
||||
"#count" : 10,
|
||||
|
||||
"count" : 12,
|
||||
"extension": "mp4",
|
||||
"filename" : r"re:^\d+$",
|
||||
"id" : r"re:6361\d\d\d",
|
||||
"num" : range(3, 12),
|
||||
"post" : {
|
||||
"author_id" : "101807",
|
||||
"author_slug": "rogerdanish",
|
||||
"author_url" : "https://www.bellazon.com/main/profile/101807-rogerdanish/",
|
||||
"count" : 12,
|
||||
"date" : "dt:2018-04-06 19:06:06",
|
||||
"id" : "4576614",
|
||||
"content" : str
|
||||
},
|
||||
"thread" : {
|
||||
"author" : "gtemt",
|
||||
"author_id" : "29506",
|
||||
"author_slug" : "gtemt",
|
||||
"author_url" : "https://www.bellazon.com/main/profile/29506-gtemt/",
|
||||
"date" : "dt:2017-12-19 12:18:46",
|
||||
"date_updated": "type:datetime",
|
||||
"description" : "VID",
|
||||
"id" : "66334",
|
||||
"posts" : range(750, 999),
|
||||
"section" : "Other Females of Interest",
|
||||
"slug" : "charly-jordan",
|
||||
"title" : "Charly Jordan",
|
||||
"url" : "https://www.bellazon.com/main/topic/66334-charly-jordan/",
|
||||
"views" : int,
|
||||
"path" : [
|
||||
"Females",
|
||||
"Other Females of Interest",
|
||||
"Charly Jordan",
|
||||
],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.bellazon.com/main/topic/66334-charly-jordan/page/3/#findComment-4571129",
|
||||
"#comment" : "video attachment with '//www.bellazon.com/main/' as URL (#8239)",
|
||||
"#class" : bellazon.BellazonPostExtractor,
|
||||
"#results" : (
|
||||
"https://www.bellazon.com/main/uploads/monthly_2018_03/charlyjordan10_Bg6mLKlFBuU.jpg.07b89fe216300157ff5dad0652df11cb.jpg",
|
||||
"https://www.bellazon.com/main/uploads/monthly_2018_03/charlyjordan10_Bg6mLRzlFPz.jpg.3c846bc3d7a2ec4854012ca3bab0af99.jpg",
|
||||
"https://www.bellazon.com/main/uploads/monthly_2018_03/charlyjordan10_Bg6mLVYlQUL.jpg.7e32ef45d5ba5270a330b250f83639dd.jpg",
|
||||
"https://www.bellazon.com/main/applications/core/interface/file/attachment.php?id=6341394",
|
||||
),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.bellazon.com/main/topic/57872-millie-brady/",
|
||||
"#class" : bellazon.BellazonThreadExtractor,
|
||||
|
||||
Reference in New Issue
Block a user