From 88bfc0991c88ba9452c39c982a837fbca01fe847 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Fri, 12 Sep 2025 17:21:34 +0200
Subject: [PATCH] [bellazon] add initial support (#7480)

---
 docs/supportedsites.md           |   6 +
 gallery_dl/extractor/__init__.py |   1 +
 gallery_dl/extractor/bellazon.py | 165 ++++++++++++++++++++++++
 test/results/bellazon.py         | 213 +++++++++++++++++++++++++++++++
 4 files changed, 385 insertions(+)
 create mode 100644 gallery_dl/extractor/bellazon.py
 create mode 100644 test/results/bellazon.py
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index a913c6bc..e8e836eb 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -133,6 +133,12 @@ Consider all listed sites to potentially be NSFW.
     <td>Collections, Galleries, User Profiles</td>
     <td></td>
 </tr>
+<tr id="bellazon" title="bellazon">
+    <td>Bellazon</td>
+    <td>https://www.bellazon.com/</td>
+    <td>Forums, Posts, Threads</td>
+    <td></td>
+</tr>
 <tr id="bilibili" title="bilibili">
     <td>Bilibili</td>
     <td>https://www.bilibili.com/</td>
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index fe61c428..b32fcd11 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -31,6 +31,7 @@ modules = [
     "batoto",
     "bbc",
     "behance",
+    "bellazon",
     "bilibili",
     "blogger",
     "bluesky",
diff --git a/gallery_dl/extractor/bellazon.py b/gallery_dl/extractor/bellazon.py
new file mode 100644
index 00000000..5c9b9cd2
--- /dev/null
+++ b/gallery_dl/extractor/bellazon.py
@@ -0,0 +1,165 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.bellazon.com/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?bellazon\.com/main"
+
+
+class BellazonExtractor(Extractor):
+    """Base class for bellazon extractors"""
+    category = "bellazon"
+    root = "https://www.bellazon.com/main"
+    directory_fmt = ("{category}", "{thread[section]}",
+                     "{thread[title]} ({thread[id]})")
+    filename_fmt = "{post[id]}_{num:>02}_{id}.{extension}"
+    archive_fmt = "{post[id]}/{filename}"
+
+    def items(self):
+        extract_urls = text.re(r'<a ([^>]*?href="([^"]+)".*?)</a>').findall
+        native = f"{self.root}/"
+
+        for post in self.posts():
+            urls = extract_urls(post["content"])
+            data = {"post": post}
+            post["count"] = data["count"] = len(urls)
+
+            yield Message.Directory, data
+            for data["num"], (info, url) in enumerate(urls, 1):
+                url = text.unescape(url)
+                if url.startswith(native):
+                    if not (alt := text.extr(info, ' alt="', '"')) or (
+                            alt.startswith("post-") and "_thumb." in alt):
+                        name = url
+                    else:
+                        name = text.unescape(alt)
+                    dc = text.nameext_from_url(name, data.copy())
+                    dc["id"] = text.extr(info, 'data-fileid="', '"')
+                    if ext := text.extr(info, 'data-fileext="', '"'):
+                        dc["extension"] = ext
+                    yield Message.Url, url, dc
+                else:
+                    yield Message.Queue, url, data
+
+    def _pagination(self, base, pnum=None):
+        base = f"{self.root}{base}"
+
+        if pnum is None:
+            url = f"{base}/"
+            pnum = 1
+        else:
+            url = f"{base}/page/{pnum}/"
+            pnum = None
+
+        while True:
+            page = self.request(url).text
+
+            yield page
+
+            if pnum is None or ' rel="next" ' not in page or text.extr(
+                    page, " rel=\"next\" data-page='", "'") == str(pnum):
+                return
+            pnum += 1
+            url = f"{base}/page/{pnum}/"
+
+    def _parse_thread(self, page):
+        schema = self._extract_jsonld(page)
+        author = schema["author"]
+        stats = schema["interactionStatistic"]
+        url_t = schema["url"]
+        url_a = author["url"]
+
+        path = text.split_html(text.extr(
+            page, '<nav class="ipsBreadcrumb', "</nav>"))[2:-1]
+
+        thread = {
+            "url"  : url_t,
+            "path" : path,
+            "title": schema["headline"],
+            "views": stats[0]["userInteractionCount"],
+            "posts": stats[1]["userInteractionCount"],
+            "date" : text.parse_datetime(schema["datePublished"]),
+            "date_updated": text.parse_datetime(schema["dateModified"]),
+            "description" : text.unescape(schema["text"]),
+            "section"     : path[-2],
+            "author"      : author["name"],
+            "author_url"  : url_a,
+        }
+
+        thread["id"], _, thread["slug"] = \
+            url_t.rsplit("/", 2)[1].partition("-")
+        thread["author_id"], _, thread["author_slug"] = \
+            url_a.rsplit("/", 2)[1].partition("-")
+
+        return thread
+
+    def _parse_post(self, html):
+        extr = text.extract_from(html)
+
+        post = {
+            "id": extr('id="elComment_', '"'),
+            "author_url": extr(" href='", "'"),
+            "date": text.parse_datetime(extr("datetime='", "'")),
+            "content": extr("<!-- Post content -->", "\n\t\t</div>"),
+        }
+
+        if (pos := post["content"].find(">")) >= 0:
+            post["content"] = post["content"][pos+1:].strip()
+
+        post["author_id"], _, post["author_slug"] = \
+            post["author_url"].rsplit("/", 2)[1].partition("-")
+
+        return post
+
+
+class BellazonPostExtractor(BellazonExtractor):
+    subcategory = "post"
+    pattern = (rf"{BASE_PATTERN}(/topic/\d+-[\w-]+(?:/page/\d+)?)"
+               rf"/?#findComment-(\d+)")
+    example = "https://www.bellazon.com/main/topic/123-SLUG/#findComment-12345"
+
+    def posts(self):
+        path, post_id = self.groups
+        page = self.request(f"{self.root}{path}").text
+
+        pos = page.find(f'id="elComment_{post_id}')
+        if pos < 0:
+            raise exception.NotFoundError("post")
+        html = text.extract(page, "<article ", "</article>", pos-100)[0]
+
+        self.kwdict["thread"] = self._parse_thread(page)
+        return (self._parse_post(html),)
+
+
+class BellazonThreadExtractor(BellazonExtractor):
+    subcategory = "thread"
+    pattern = rf"{BASE_PATTERN}(/topic/\d+-[\w-]+)(?:/page/(\d+))?"
+    example = "https://www.bellazon.com/main/topic/123-SLUG/"
+
+    def posts(self):
+        for page in self._pagination(*self.groups):
+            if "thread" not in self.kwdict:
+                self.kwdict["thread"] = self._parse_thread(page)
+            for html in text.extract_iter(page, "<article ", "</article>"):
+                yield self._parse_post(html)
+
+
+class BellazonForumExtractor(BellazonExtractor):
+    subcategory = "forum"
+    pattern = rf"{BASE_PATTERN}(/forum/\d+-[\w-]+)(?:/page/(\d+))?"
+    example = "https://www.bellazon.com/main/forum/123-SLUG/"
+
+    def items(self):
+        data = {"_extractor": BellazonThreadExtractor}
+        for page in self._pagination(*self.groups):
+            for row in text.extract_iter(
+                    page, '<li data-ips-hook="topicRow"', "</"):
+                yield Message.Queue, text.extr(row, 'href="', '"'), data
diff --git a/test/results/bellazon.py b/test/results/bellazon.py
new file mode 100644
index 00000000..7fb0c3f0
--- /dev/null
+++ b/test/results/bellazon.py
@@ -0,0 +1,213 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import bellazon
+
+
+__tests__ = (
+{
+    "#url"     : "https://www.bellazon.com/main/topic/57872-millie-brady/#findComment-4351049",
+    "#class"   : bellazon.BellazonPostExtractor,
+    "#results" : (
+        "https://www.bellazon.com/main/uploads/monthly_2017_06/595482b77fd89_millieb280617BZNImage101.jpg.10b91b9141b374e657a1a4c3d0c96b64.jpg",
+        "https://www.bellazon.com/main/uploads/monthly_2017_06/595482c3f2fa0_millieb280617BZNImage102.jpg.1b706048fc525151775cf4b7c734b283.jpg",
+        "https://www.bellazon.com/main/uploads/monthly_2017_06/595482cdc66ad_millieb280617BZNImage103.jpg.6fa2226a314d0f0e9f9426e7f90f4808.jpg",
+        "https://www.bellazon.com/main/uploads/monthly_2017_06/595482dac786c_millieb280617BZNImage104.jpg.e579be6b585cef90b965d4d09969a66a.jpg",
+        "https://www.bellazon.com/main/uploads/monthly_2017_06/595482e772acd_millieb280617BZNImage105.jpg.428df8a841957b48452a6a6ab64ddacb.jpg",
+    ),
+
+    "id"       : r"re:55\d+",
+    "filename" : str,
+    "extension": "jpg",
+    "count"    : 5,
+    "num"      : range(1, 5),
+    "post"     : {
+        "author_id"  : "72476",
+        "author_slug": "shepherd",
+        "author_url" : "https://www.bellazon.com/main/profile/72476-shepherd/",
+        "count"      : 5,
+        "date"       : "dt:2017-06-29 04:32:43",
+        "id"         : "4351049",
+        "content"    : """\
+<p>
+\tSerpentine Galleries Summer Party, London, Jun 28 '17
+</p>
+
+<p>
+\t 
+</p>
+
+<p>
+\t<a class="ipsAttachLink ipsAttachLink_image" href="https://www.bellazon.com/main/uploads/monthly_2017_06/595482b77fd89_millieb280617BZNImage101.jpg.10b91b9141b374e657a1a4c3d0c96b64.jpg" data-fileid="5550073" rel=""><img alt="millieb280617BZNImage101.jpg" class="ipsImage ipsImage_thumbnailed" data-fileid="5550073" src="https://www.bellazon.com/main/uploads/monthly_2017_06/595482b7c4730_millieb280617BZNImage101.thumb.jpg.5b5240deead09ec5546a6bbf68aff724.jpg" data-ratio="66.56" loading="lazy"></a> <a class="ipsAttachLink ipsAttachLink_image" href="https://www.bellazon.com/main/uploads/monthly_2017_06/595482c3f2fa0_millieb280617BZNImage102.jpg.1b706048fc525151775cf4b7c734b283.jpg" data-fileid="5550074" rel=""><img alt="millieb280617BZNImage102.jpg" class="ipsImage ipsImage_thumbnailed" data-fileid="5550074" src="https://www.bellazon.com/main/uploads/monthly_2017_06/595482c4529af_millieb280617BZNImage102.thumb.jpg.1b9f9ec5f002eaaaa80a174d1a7853d0.jpg" data-ratio="150" loading="lazy"></a> <a class="ipsAttachLink ipsAttachLink_image" href="https://www.bellazon.com/main/uploads/monthly_2017_06/595482cdc66ad_millieb280617BZNImage103.jpg.6fa2226a314d0f0e9f9426e7f90f4808.jpg" data-fileid="5550075" rel=""><img alt="millieb280617BZNImage103.jpg" class="ipsImage ipsImage_thumbnailed" data-fileid="5550075" src="https://www.bellazon.com/main/uploads/monthly_2017_06/595482ce268f7_millieb280617BZNImage103.thumb.jpg.580d38335424d6fa65bd5d476625864b.jpg" data-ratio="150.23" loading="lazy"></a>
+</p>
+
+<p>
+\t<a class="ipsAttachLink ipsAttachLink_image" href="https://www.bellazon.com/main/uploads/monthly_2017_06/595482dac786c_millieb280617BZNImage104.jpg.e579be6b585cef90b965d4d09969a66a.jpg" data-fileid="5550076" rel=""><img alt="millieb280617BZNImage104.jpg" class="ipsImage ipsImage_thumbnailed" data-fileid="5550076" src="https://www.bellazon.com/main/uploads/monthly_2017_06/595482db10e03_millieb280617BZNImage104.thumb.jpg.958eba72b585110a4b8c08f1efd9cfc8.jpg" title="" data-ratio="150.26" loading="lazy"></a> <a class="ipsAttachLink ipsAttachLink_image" href="https://www.bellazon.com/main/uploads/monthly_2017_06/595482e772acd_millieb280617BZNImage105.jpg.428df8a841957b48452a6a6ab64ddacb.jpg" data-fileid="5550077" rel=""><img alt="millieb280617BZNImage105.jpg" class="ipsImage ipsImage_thumbnailed" data-fileid="5550077" src="https://www.bellazon.com/main/uploads/monthly_2017_06/595482e7e6bc1_millieb280617BZNImage105.thumb.jpg.1e5ce2b85f7ceed7446d7f13caa9ce2b.jpg" data-ratio="150.22" loading="lazy"></a>
+</p>\
+""",
+    },
+    "thread"   : {
+        "author"      : "Shepherd",
+        "author_id"   : "72476",
+        "author_slug" : "shepherd",
+        "author_url"  : "https://www.bellazon.com/main/profile/72476-shepherd/",
+        "date"        : "dt:2015-06-20 21:34:31",
+        "date_updated": "dt:2017-06-29 04:32:43",
+        "description" : "Previously featured in the popular TV series, Mr Selfridge, emerging British born actress Millie Brady is set for huge success. \nMillie has just been confirmed as the lead role in ‘The Clan of the Cave Bear’ which will begin filming in May 2015. The drama pilot is from Imagine TV, Allison Shearmur Productions, Fox 21 TV and Lionsgate TV. Millie is also due to appear in the eagerly awaited black comedy, 'Pride and Prejudice and Zombies', staring alongside Matt Smith, Sally Philiips, Douglas Booth, Lily james and Sam Riley. She is currently filming 'Knights of the Roundtable: King Arthur' directed by Guy Ritchie. \n  \n  \nFarfetch, Jun 2015 \nLinda Brownlee photos \n  \n        \n",
+        "id"          : "57872",
+        "posts"       : 1,
+        "section"     : "Actresses",
+        "slug"        : "millie-brady",
+        "title"       : "Millie Brady",
+        "url"         : "https://www.bellazon.com/main/topic/57872-millie-brady/",
+        "views"       : range(3_800, 5_000),
+        "path"        : [
+            "Females",
+            "Actresses",
+            "Millie Brady",
+        ],
+    },
+},
+
+{
+    "#url"     : "https://www.bellazon.com/main/topic/3556-bipasha-basu/#findComment-2134610",
+    "#class"   : bellazon.BellazonPostExtractor,
+    "#results" : "https://www.bellazon.com/main/uploads/monthly_04_2010/post-35864-1270985307.jpg",
+
+    "id"       : "1002749",
+    "filename" : "post-35864-1270985307",
+    "extension": "jpg",
+    "count"    : 1,
+    "num"      : 1,
+    "post"     : {
+        "author_id"  : "35864",
+        "author_slug": "egluze",
+        "author_url" : "https://www.bellazon.com/main/profile/35864-egluze/",
+        "count"      : 1,
+        "date"       : "dt:2010-04-11 11:28:43",
+        "id"         : "2134610",
+        "content"    : """\
+<p><strong>Marie Claire India April 2010</strong></p>
+<p><a class="ipsAttachLink ipsAttachLink_image" href="https://www.bellazon.com/main/uploads/monthly_04_2010/post-35864-1270985307.jpg" rel="external nofollow"><img class="ipsImage ipsImage_thumbnailed" src="https://www.bellazon.com/main/uploads/monthly_04_2010/post-35864-1270985307_thumb.jpg" data-fileid="1002749" alt="post-35864-1270985307_thumb.jpg" data-ratio="133.67" loading="lazy"></a></p>\
+""",
+    },
+    "thread"   : {
+        "author"      : "SaBrIaNa",
+        "author_id"   : "1324",
+        "author_slug" : "sabriana",
+        "author_url"  : "https://www.bellazon.com/main/profile/1324-sabriana/",
+        "date"        : "dt:2005-12-26 20:31:33",
+        "date_updated": "dt:2017-06-17 05:19:09",
+        "description" : str,
+        "id"          : "3556",
+        "posts"       : 44,
+        "section"     : "Actresses",
+        "slug"        : "bipasha-basu",
+        "title"       : "Bipasha Basu",
+        "url"         : "https://www.bellazon.com/main/topic/3556-bipasha-basu/",
+        "views"       : range(20_000, 50_000),
+        "path"        : [
+            "Females",
+            "Actresses",
+            "Bipasha Basu",
+        ],
+    },
+},
+
+{
+    "#url"     : "https://www.bellazon.com/main/topic/57872-millie-brady/",
+    "#class"   : bellazon.BellazonThreadExtractor,
+    "#pattern" : r"https://www\.bellazon\.com/main/uploads/monthly_\d+_\d+/.+\.jpg",
+    "#count"   : 13,
+
+    "id"       : r"re:\d+",
+    "filename" : str,
+    "extension": "jpg",
+    "count"    : {5, 8},
+    "num"      : range(1, 8),
+    "post"     : {
+        "id"       : {"3721257", "4351049"},
+        "count"    : {5, 8},
+        "author_id": "72476",
+        "date"     : "type:datetime",
+    },
+    "thread"   : {
+        "id"       : "57872",
+        "title"    : "Millie Brady",
+        "author"   : "Shepherd",
+        "author_id": "72476",
+        "date"     : "dt:2015-06-20 21:34:31",
+    },
+},
+
+{
+    "#url"     : "https://www.bellazon.com/main/topic/3556-bipasha-basu/",
+    "#class"   : bellazon.BellazonThreadExtractor,
+    "#pattern" : r"https?://(www\.bellazon\.com/main/uploads/.+\.\w+|www\.[^.]+\.(com|ru)|img\d+.imagevenue.com|imagesion.com)",
+    "#count"   : 247,
+
+    "count"    : range(0, 30),
+    "num"      : range(0, 30),
+    "post"     : {
+        "id"       : r"re:\d+",
+        "author_id": r"re:\d+",
+        "count"    : range(0, 30),
+        "date"     : "type:datetime",
+    },
+    "thread"   : {
+        "id"          : "3556",
+        "title"       : "Bipasha Basu",
+        "author"      : "SaBrIaNa",
+        "author_id"   : "1324",
+        "date"        : "dt:2005-12-26 20:31:33",
+        "date_updated": "dt:2017-06-17 05:19:09",
+    },
+},
+
+{
+    "#url"     : "https://www.bellazon.com/main/topic/1774-zhang-ziyi/",
+    "#class"   : bellazon.BellazonThreadExtractor,
+    "#range"   : "1-5",
+    "#results" : (
+        "http://img292.echo.cx/my.php?image=4moon011rk.jpg",
+        "http://img294.echo.cx/my.php?image=heroclip3jb.jpg",
+        "http://img294.echo.cx/my.php?image=heroclip29ut.jpg",
+        "http://img294.echo.cx/my.php?image=heroclip35lp.jpg",
+        "http://img36.echo.cx/my.php?image=895welzz4514nv.jpg",
+    ),
+
+    "thread": {
+        "author"      : "Hiro",
+        "author_id"   : "26",
+        "author_slug" : "hiro",
+        "author_url"  : "https://www.bellazon.com/main/profile/26-hiro/",
+        "date"        : "dt:2005-06-08 03:02:03",
+        "date_updated": "dt:2023-07-09 07:33:19",
+        "description" : str,
+        "id"          : "1774",
+        "posts"       : 480,
+        "section"     : "Actresses",
+        "slug"        : "zhang-ziyi",
+        "title"       : "Zhang Ziyi",
+        "url"         : "https://www.bellazon.com/main/topic/1774-zhang-ziyi/",
+        "views"       : int,
+        "path"        : [
+            "Females",
+            "Actresses",
+            "Zhang Ziyi",
+        ],
+    },
+},
+
+{
+    "#url"     : "https://www.bellazon.com/main/forum/3-actresses/",
+    "#class"   : bellazon.BellazonForumExtractor,
+    "#pattern" : bellazon.BellazonThreadExtractor.pattern,
+    "#range"   : "1-100",
+    "#count"   : 100,
+},
+
+)