From 60104f20af7ae9d663d2dcf35473b1c717b4fb81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 9 Jan 2026 21:21:19 +0100 Subject: [PATCH] [xenforo] implement 'media' support (#8785) add 'media-user' and 'media-item' extractors TODO: 'media-category' extractor (?) --- docs/configuration.rst | 12 ++++ docs/gallery-dl.conf | 1 + docs/supportedsites.md | 6 +- gallery_dl/extractor/xenforo.py | 122 +++++++++++++++++++++++++++++++- scripts/supportedsites.py | 4 ++ test/results/atfforum.py | 31 ++++++++ 6 files changed, 172 insertions(+), 4 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index c93e933c..0e69eaea 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -7049,6 +7049,18 @@ Description For ``Category:`` pages, recursively descent into subcategories. +extractor.[xenforo].metadata +---------------------------- +Type + ``bool`` +Default + ``false`` +Description + Extract detailed metadata for `media` files. +Note + This requires 1 additional HTTP request per file. + + extractor.[xenforo].order-posts ------------------------------- Type diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 16cce081..7c1e7ff5 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -1187,6 +1187,7 @@ "xenforo": { + "metadata" : false, "order-posts": "desc" }, "nudostarforum": diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 8b7081e8..76b42503 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1888,19 +1888,19 @@ Consider all listed sites to potentially be NSFW. SimpCity Forums https://simpcity.cr/ - Forums, Posts, Threads + Forums, Media Files, User Media, Posts, Threads Supported NudoStar Forums https://nudostar.com/forum/ - Forums, Posts, Threads + Forums, Media Files, User Media, Posts, Threads Supported All The Fallen https://allthefallen.moe/forum/ - Forums, Posts, Threads + Forums, Media Files, User Media, Posts, Threads diff --git a/gallery_dl/extractor/xenforo.py b/gallery_dl/extractor/xenforo.py index 838a4043..9a9781c6 100644 --- a/gallery_dl/extractor/xenforo.py +++ b/gallery_dl/extractor/xenforo.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2025 Mike Fährmann +# Copyright 2025-2026 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -253,6 +253,47 @@ class XenforoExtractor(BaseExtractor): return post + def _extract_media(self, path, file): + media = {} + name, _, media["id"] = file.rpartition(".") + media["filename"], _, media["extension"] = name.rpartition("-") + return f"{self.root_media}{path}/full", media + + def _extract_media_ex(self, path, file): + page = self.request(f"{self.root}{path}/").text + + schema = self._extract_jsonld(page) + main = schema["mainEntity"] + author = main["author"] + url_a = author.get("url") or "" + stats = main["interactionStatistic"] + + media = text.nameext_from_name(main["name"], { + "schema": schema, + "id" : file.rpartition("."), + "size" : main.get("contentSize"), + "description": main.get("description"), + "date" : self.parse_datetime_iso(main.get("dateCreated")), + "width" : (w := main.get("width")) and text.parse_int( + w["name"].partition(" ")[0]) or 0, + "height": (h := main.get("height")) and text.parse_int( + h["name"].partition(" ")[0]) or 0, + "author" : author.get("name") or "", + "author_id" : (url_a[url_a.rfind(".")+1:-1] if url_a else + (author.get("name") or "")[15:]), + "author_url": url_a, + }) + + if ext := main.get("encodingFormat"): + media["extension"] = ext + + if isinstance(stats, list): + media["likes"] = stats[1]["userInteractionCount"] + media["views"] = stats[0]["userInteractionCount"] + media["comments"] = stats[0]["userInteractionCount"] + + return main["contentUrl"], media + def _require_auth(self, response=None): raise exception.AuthRequired( ("username & password", "authenticated cookies"), None, @@ -350,3 +391,82 @@ class XenforoForumExtractor(XenforoExtractor): for page in self._pagination(path, pnum): for path in extract_threads(page): yield Message.Queue, self.root + text.unquote(path), data + + +class XenforoMediaUserExtractor(XenforoExtractor): + subcategory = "media-user" + directory_fmt = ("{category}", "Media", "{author}") + filename_fmt = "{filename}.{extension}" + archive_fmt = "{id}" + pattern = (BASE_PATTERN + r"(/(?:index\.php\?)?)me(?:" + r"dia/users/([^/?#]+)(?:/page-(\d+))?|" + r"mbers/([^/?#]+)/#xfmgMedia)") + example = "https://simpcity.cr/media/users/USER.123/" + + def items(self): + self.root_media = self.config_instance("root-media") or self.root + groups = self.groups + + user = groups[-3] + if user is None: + user = groups[-1] + pnum = None + else: + pnum = groups[-2] + + path = f"{groups[-4]}media/users/{user}" + if (order := self.config("order-posts")) and \ + order[0] in ("d", "r"): + pages = self._pagination_reverse(path, pnum) + reverse = True + else: + pages = self._pagination(path, pnum) + reverse = False + + if meta := self.config("metadata"): + extr_media = self._extract_media_ex + meta = True + else: + extr_media = self._extract_media + meta = False + self.kwdict["author"], _, self.kwdict["author_id"] = \ + user.rpartition(".") + + for page in pages: + posts = page.split( + '