From 997e7422ff02492206f902cdb802bec03ccfaac7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 21 Sep 2025 16:20:17 +0200 Subject: [PATCH] [bellazon] update (#8247) - include 'filename' in default filename_fmt and archive_fmt as 'id' alone is not guaranteed to be unique, even in the same post https://www.bellazon.com/main/topic/3556-bipasha-basu /page/2/#findComment-2536060 - support 'inline' files - ignore '/profile/' links - do not increment 'num' on ignored files --- gallery_dl/extractor/bellazon.py | 22 +++++++++++++++------- test/results/bellazon.py | 18 ++++++++++++++++-- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/gallery_dl/extractor/bellazon.py b/gallery_dl/extractor/bellazon.py index 3a811926..5dcb6a51 100644 --- a/gallery_dl/extractor/bellazon.py +++ b/gallery_dl/extractor/bellazon.py @@ -20,13 +20,16 @@ class BellazonExtractor(Extractor): root = "https://www.bellazon.com/main" directory_fmt = ("{category}", "{thread[section]}", "{thread[title]} ({thread[id]})") - filename_fmt = "{post[id]}_{num:>02}_{id}.{extension}" - archive_fmt = "{post[id]}/{filename}" + filename_fmt = "{post[id]}_{num:>02}_{id}_{filename}.{extension}" + archive_fmt = "{post[id]}/{id}_{filename}" def items(self): native = (f"{self.root}/", f"{self.root[6:]}/") extract_urls = text.re( - r'(?s)<((?:video .*?]*?href)="([^"]+).*?)' + r'(?s)<(' + r'(?:video .*?]*?href)="([^"]+).*?' + r'|img [^>]*?src="([^"]+)"[^>]*>' + r')' ).findall if self.config("quoted", False): @@ -44,9 +47,14 @@ class BellazonExtractor(Extractor): post["count"] = data["count"] = len(urls) yield Message.Directory, data - for data["num"], (info, url) in enumerate(urls, 1): - url = text.unescape(url) + data["num"] = 0 + for info, url, url_img in urls: + url = text.unescape(url or url_img) + if url.startswith(native): + if "/uploads/emoticons/" in url or "/profile/" in url: + continue + data["num"] += 1 if not (alt := text.extr(info, ' alt="', '"')) or ( alt.startswith("post-") and "_thumb." in alt): name = url @@ -60,13 +68,13 @@ class BellazonExtractor(Extractor): elif "/core/interface/file/attachment.php" in url: if not dc["id"]: dc["id"] = url.rpartition("?id=")[2] - if (pos := info.find(">")) >= 0 and \ - (name := info[pos+1:].strip()): + if name := text.extr(info, ">", "<").strip(): text.nameext_from_url(name, dc) if url[0] == "/": url = f"https:{url}" yield Message.Url, url, dc + else: yield Message.Queue, url, data diff --git a/test/results/bellazon.py b/test/results/bellazon.py index b340d01d..17b30577 100644 --- a/test/results/bellazon.py +++ b/test/results/bellazon.py @@ -134,7 +134,7 @@ __tests__ = ( "extension": "mp4", "filename" : r"re:^\d+$", "id" : r"re:6361\d\d\d", - "num" : range(3, 12), + "num" : range(2, 11), "post" : { "author_id" : "101807", "author_slug": "rogerdanish", @@ -190,6 +190,20 @@ __tests__ = ( "id" : "10919171", }, +{ + "#url" : "https://www.bellazon.com/main/topic/66334-charly-jordan/page/3/#findComment-4602714", + "#comment" : "'/profile/' link", + "#class" : bellazon.BellazonPostExtractor, + "#count" : 0, +}, + +{ + "#url" : "https://www.bellazon.com/main/topic/66334-charly-jordan/page/3/#findComment-4603172", + "#comment" : "'inline' image", + "#class" : bellazon.BellazonPostExtractor, + "#results" : "https://www.bellazon.com/main/uploads/monthly_2018_04/30602369_1891291154222843_1650952189830496256_n.jpg.33e6ab78dd0e8723f790ad4f58f3761a.jpg", +}, + { "#url" : "https://www.bellazon.com/main/topic/57872-millie-brady/", "#class" : bellazon.BellazonThreadExtractor, @@ -244,7 +258,7 @@ __tests__ = ( "#url" : "https://www.bellazon.com/main/topic/1774-zhang-ziyi/", "#class" : bellazon.BellazonThreadExtractor, "#range" : "1-5", - "#options" : {"prder-posts": "asc"}, + "#options" : {"order-posts": "asc"}, "#results" : ( "http://img292.echo.cx/my.php?image=4moon011rk.jpg", "http://img294.echo.cx/my.php?image=heroclip3jb.jpg",