From 8796ad02bae6b7244b63848c6db911c9dff9161b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 20 Sep 2025 22:12:01 +0200 Subject: [PATCH] [bellazon] add 'order-posts' option (#8248) --- docs/configuration.rst | 16 ++++++++++++++ docs/gallery-dl.conf | 3 ++- gallery_dl/extractor/bellazon.py | 38 ++++++++++++++++++++++++++++++-- test/results/bellazon.py | 1 + 4 files changed, 55 insertions(+), 3 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index cd47dfa0..7825d071 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -1600,6 +1600,22 @@ Description ``image``, ``video``, ``mediacollection``, ``embed``, ``text``. +extractor.bellazon.order-posts +------------------------------ +Type + ``string`` +Default + ``"desc"`` +Description + Controls the order in which + posts of a ``thread`` are processed. + + ``"asc"`` + Ascending order (oldest first) + ``"desc"`` | ``"reverse"`` + Descending order (newest first) + + extractor.bellazon.quoted ------------------------- Type diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 1ce8e9f2..5a2f73f5 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -156,7 +156,8 @@ }, "bellazon": { - "quoted": false + "order-posts": "desc", + "quoted" : false }, "bilibili": { diff --git a/gallery_dl/extractor/bellazon.py b/gallery_dl/extractor/bellazon.py index cae69663..3a811926 100644 --- a/gallery_dl/extractor/bellazon.py +++ b/gallery_dl/extractor/bellazon.py @@ -91,6 +91,28 @@ class BellazonExtractor(Extractor): pnum += 1 url = f"{base}/page/{pnum}/" + def _pagination_reverse(self, base, pnum=None): + base = f"{self.root}{base}" + + url = f"{base}/page/9999/" # force redirect to highest page number + with self.request(url) as response: + parts = response.url.rsplit("/", 3) + pnum = text.parse_int(parts[2]) if parts[1] == "page" else 1 + page = response.text + + while True: + yield page + + pnum -= 1 + if pnum > 1: + url = f"{base}/page/{pnum}/" + elif pnum == 1: + url = f"{base}/" + else: + return + + page = self.request(url).text + def _parse_thread(self, page): schema = self._extract_jsonld(page) author = schema["author"] @@ -166,10 +188,22 @@ class BellazonThreadExtractor(BellazonExtractor): example = "https://www.bellazon.com/main/topic/123-SLUG/" def posts(self): - for page in self._pagination(*self.groups): + if (order := self.config("order-posts")) and \ + order[0] not in ("d", "r"): + pages = self._pagination(*self.groups) + reverse = False + else: + pages = self._pagination_reverse(*self.groups) + reverse = True + + for page in pages: if "thread" not in self.kwdict: self.kwdict["thread"] = self._parse_thread(page) - for html in text.extract_iter(page, "
"): + posts = text.extract_iter(page, "
") + if reverse: + posts = list(posts) + posts.reverse() + for html in posts: yield self._parse_post(html) diff --git a/test/results/bellazon.py b/test/results/bellazon.py index 82f56ce3..b340d01d 100644 --- a/test/results/bellazon.py +++ b/test/results/bellazon.py @@ -244,6 +244,7 @@ __tests__ = ( "#url" : "https://www.bellazon.com/main/topic/1774-zhang-ziyi/", "#class" : bellazon.BellazonThreadExtractor, "#range" : "1-5", + "#options" : {"prder-posts": "asc"}, "#results" : ( "http://img292.echo.cx/my.php?image=4moon011rk.jpg", "http://img294.echo.cx/my.php?image=heroclip3jb.jpg",