[bellazon] update (#8247)
- include 'filename' in default filename_fmt and archive_fmt as 'id' alone is not guaranteed to be unique, even in the same post https://www.bellazon.com/main/topic/3556-bipasha-basu /page/2/#findComment-2536060 - support 'inline' files - ignore '/profile/' links - do not increment 'num' on ignored files
This commit is contained in:
@@ -20,13 +20,16 @@ class BellazonExtractor(Extractor):
|
||||
root = "https://www.bellazon.com/main"
|
||||
directory_fmt = ("{category}", "{thread[section]}",
|
||||
"{thread[title]} ({thread[id]})")
|
||||
filename_fmt = "{post[id]}_{num:>02}_{id}.{extension}"
|
||||
archive_fmt = "{post[id]}/{filename}"
|
||||
filename_fmt = "{post[id]}_{num:>02}_{id}_{filename}.{extension}"
|
||||
archive_fmt = "{post[id]}/{id}_{filename}"
|
||||
|
||||
def items(self):
|
||||
native = (f"{self.root}/", f"{self.root[6:]}/")
|
||||
extract_urls = text.re(
|
||||
r'(?s)<((?:video .*?<source src|a [^>]*?href)="([^"]+).*?)</a>'
|
||||
r'(?s)<('
|
||||
r'(?:video .*?<source src|a [^>]*?href)="([^"]+).*?</a>'
|
||||
r'|img [^>]*?src="([^"]+)"[^>]*>'
|
||||
r')'
|
||||
).findall
|
||||
|
||||
if self.config("quoted", False):
|
||||
@@ -44,9 +47,14 @@ class BellazonExtractor(Extractor):
|
||||
post["count"] = data["count"] = len(urls)
|
||||
|
||||
yield Message.Directory, data
|
||||
for data["num"], (info, url) in enumerate(urls, 1):
|
||||
url = text.unescape(url)
|
||||
data["num"] = 0
|
||||
for info, url, url_img in urls:
|
||||
url = text.unescape(url or url_img)
|
||||
|
||||
if url.startswith(native):
|
||||
if "/uploads/emoticons/" in url or "/profile/" in url:
|
||||
continue
|
||||
data["num"] += 1
|
||||
if not (alt := text.extr(info, ' alt="', '"')) or (
|
||||
alt.startswith("post-") and "_thumb." in alt):
|
||||
name = url
|
||||
@@ -60,13 +68,13 @@ class BellazonExtractor(Extractor):
|
||||
elif "/core/interface/file/attachment.php" in url:
|
||||
if not dc["id"]:
|
||||
dc["id"] = url.rpartition("?id=")[2]
|
||||
if (pos := info.find(">")) >= 0 and \
|
||||
(name := info[pos+1:].strip()):
|
||||
if name := text.extr(info, ">", "<").strip():
|
||||
text.nameext_from_url(name, dc)
|
||||
|
||||
if url[0] == "/":
|
||||
url = f"https:{url}"
|
||||
yield Message.Url, url, dc
|
||||
|
||||
else:
|
||||
yield Message.Queue, url, data
|
||||
|
||||
|
||||
@@ -134,7 +134,7 @@ __tests__ = (
|
||||
"extension": "mp4",
|
||||
"filename" : r"re:^\d+$",
|
||||
"id" : r"re:6361\d\d\d",
|
||||
"num" : range(3, 12),
|
||||
"num" : range(2, 11),
|
||||
"post" : {
|
||||
"author_id" : "101807",
|
||||
"author_slug": "rogerdanish",
|
||||
@@ -190,6 +190,20 @@ __tests__ = (
|
||||
"id" : "10919171",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.bellazon.com/main/topic/66334-charly-jordan/page/3/#findComment-4602714",
|
||||
"#comment" : "'/profile/' link",
|
||||
"#class" : bellazon.BellazonPostExtractor,
|
||||
"#count" : 0,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.bellazon.com/main/topic/66334-charly-jordan/page/3/#findComment-4603172",
|
||||
"#comment" : "'inline' image",
|
||||
"#class" : bellazon.BellazonPostExtractor,
|
||||
"#results" : "https://www.bellazon.com/main/uploads/monthly_2018_04/30602369_1891291154222843_1650952189830496256_n.jpg.33e6ab78dd0e8723f790ad4f58f3761a.jpg",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.bellazon.com/main/topic/57872-millie-brady/",
|
||||
"#class" : bellazon.BellazonThreadExtractor,
|
||||
@@ -244,7 +258,7 @@ __tests__ = (
|
||||
"#url" : "https://www.bellazon.com/main/topic/1774-zhang-ziyi/",
|
||||
"#class" : bellazon.BellazonThreadExtractor,
|
||||
"#range" : "1-5",
|
||||
"#options" : {"prder-posts": "asc"},
|
||||
"#options" : {"order-posts": "asc"},
|
||||
"#results" : (
|
||||
"http://img292.echo.cx/my.php?image=4moon011rk.jpg",
|
||||
"http://img294.echo.cx/my.php?image=heroclip3jb.jpg",
|
||||
|
||||
Reference in New Issue
Block a user