[bellazon] update (#8247)
- include 'filename' in default filename_fmt and archive_fmt as 'id' alone is not guaranteed to be unique, even in the same post https://www.bellazon.com/main/topic/3556-bipasha-basu /page/2/#findComment-2536060 - support 'inline' files - ignore '/profile/' links - do not increment 'num' on ignored files
This commit is contained in:
@@ -20,13 +20,16 @@ class BellazonExtractor(Extractor):
|
|||||||
root = "https://www.bellazon.com/main"
|
root = "https://www.bellazon.com/main"
|
||||||
directory_fmt = ("{category}", "{thread[section]}",
|
directory_fmt = ("{category}", "{thread[section]}",
|
||||||
"{thread[title]} ({thread[id]})")
|
"{thread[title]} ({thread[id]})")
|
||||||
filename_fmt = "{post[id]}_{num:>02}_{id}.{extension}"
|
filename_fmt = "{post[id]}_{num:>02}_{id}_{filename}.{extension}"
|
||||||
archive_fmt = "{post[id]}/{filename}"
|
archive_fmt = "{post[id]}/{id}_{filename}"
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
native = (f"{self.root}/", f"{self.root[6:]}/")
|
native = (f"{self.root}/", f"{self.root[6:]}/")
|
||||||
extract_urls = text.re(
|
extract_urls = text.re(
|
||||||
r'(?s)<((?:video .*?<source src|a [^>]*?href)="([^"]+).*?)</a>'
|
r'(?s)<('
|
||||||
|
r'(?:video .*?<source src|a [^>]*?href)="([^"]+).*?</a>'
|
||||||
|
r'|img [^>]*?src="([^"]+)"[^>]*>'
|
||||||
|
r')'
|
||||||
).findall
|
).findall
|
||||||
|
|
||||||
if self.config("quoted", False):
|
if self.config("quoted", False):
|
||||||
@@ -44,9 +47,14 @@ class BellazonExtractor(Extractor):
|
|||||||
post["count"] = data["count"] = len(urls)
|
post["count"] = data["count"] = len(urls)
|
||||||
|
|
||||||
yield Message.Directory, data
|
yield Message.Directory, data
|
||||||
for data["num"], (info, url) in enumerate(urls, 1):
|
data["num"] = 0
|
||||||
url = text.unescape(url)
|
for info, url, url_img in urls:
|
||||||
|
url = text.unescape(url or url_img)
|
||||||
|
|
||||||
if url.startswith(native):
|
if url.startswith(native):
|
||||||
|
if "/uploads/emoticons/" in url or "/profile/" in url:
|
||||||
|
continue
|
||||||
|
data["num"] += 1
|
||||||
if not (alt := text.extr(info, ' alt="', '"')) or (
|
if not (alt := text.extr(info, ' alt="', '"')) or (
|
||||||
alt.startswith("post-") and "_thumb." in alt):
|
alt.startswith("post-") and "_thumb." in alt):
|
||||||
name = url
|
name = url
|
||||||
@@ -60,13 +68,13 @@ class BellazonExtractor(Extractor):
|
|||||||
elif "/core/interface/file/attachment.php" in url:
|
elif "/core/interface/file/attachment.php" in url:
|
||||||
if not dc["id"]:
|
if not dc["id"]:
|
||||||
dc["id"] = url.rpartition("?id=")[2]
|
dc["id"] = url.rpartition("?id=")[2]
|
||||||
if (pos := info.find(">")) >= 0 and \
|
if name := text.extr(info, ">", "<").strip():
|
||||||
(name := info[pos+1:].strip()):
|
|
||||||
text.nameext_from_url(name, dc)
|
text.nameext_from_url(name, dc)
|
||||||
|
|
||||||
if url[0] == "/":
|
if url[0] == "/":
|
||||||
url = f"https:{url}"
|
url = f"https:{url}"
|
||||||
yield Message.Url, url, dc
|
yield Message.Url, url, dc
|
||||||
|
|
||||||
else:
|
else:
|
||||||
yield Message.Queue, url, data
|
yield Message.Queue, url, data
|
||||||
|
|
||||||
|
|||||||
@@ -134,7 +134,7 @@ __tests__ = (
|
|||||||
"extension": "mp4",
|
"extension": "mp4",
|
||||||
"filename" : r"re:^\d+$",
|
"filename" : r"re:^\d+$",
|
||||||
"id" : r"re:6361\d\d\d",
|
"id" : r"re:6361\d\d\d",
|
||||||
"num" : range(3, 12),
|
"num" : range(2, 11),
|
||||||
"post" : {
|
"post" : {
|
||||||
"author_id" : "101807",
|
"author_id" : "101807",
|
||||||
"author_slug": "rogerdanish",
|
"author_slug": "rogerdanish",
|
||||||
@@ -190,6 +190,20 @@ __tests__ = (
|
|||||||
"id" : "10919171",
|
"id" : "10919171",
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://www.bellazon.com/main/topic/66334-charly-jordan/page/3/#findComment-4602714",
|
||||||
|
"#comment" : "'/profile/' link",
|
||||||
|
"#class" : bellazon.BellazonPostExtractor,
|
||||||
|
"#count" : 0,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://www.bellazon.com/main/topic/66334-charly-jordan/page/3/#findComment-4603172",
|
||||||
|
"#comment" : "'inline' image",
|
||||||
|
"#class" : bellazon.BellazonPostExtractor,
|
||||||
|
"#results" : "https://www.bellazon.com/main/uploads/monthly_2018_04/30602369_1891291154222843_1650952189830496256_n.jpg.33e6ab78dd0e8723f790ad4f58f3761a.jpg",
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://www.bellazon.com/main/topic/57872-millie-brady/",
|
"#url" : "https://www.bellazon.com/main/topic/57872-millie-brady/",
|
||||||
"#class" : bellazon.BellazonThreadExtractor,
|
"#class" : bellazon.BellazonThreadExtractor,
|
||||||
@@ -244,7 +258,7 @@ __tests__ = (
|
|||||||
"#url" : "https://www.bellazon.com/main/topic/1774-zhang-ziyi/",
|
"#url" : "https://www.bellazon.com/main/topic/1774-zhang-ziyi/",
|
||||||
"#class" : bellazon.BellazonThreadExtractor,
|
"#class" : bellazon.BellazonThreadExtractor,
|
||||||
"#range" : "1-5",
|
"#range" : "1-5",
|
||||||
"#options" : {"prder-posts": "asc"},
|
"#options" : {"order-posts": "asc"},
|
||||||
"#results" : (
|
"#results" : (
|
||||||
"http://img292.echo.cx/my.php?image=4moon011rk.jpg",
|
"http://img292.echo.cx/my.php?image=4moon011rk.jpg",
|
||||||
"http://img294.echo.cx/my.php?image=heroclip3jb.jpg",
|
"http://img294.echo.cx/my.php?image=heroclip3jb.jpg",
|
||||||
|
|||||||
Reference in New Issue
Block a user