From 812482e53eb6ed5dd8e972d5d4c0d40aba77f650 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 13 Jan 2026 21:04:12 +0100 Subject: [PATCH] [xenforo] extract 'author_slug' metadata (#8785) --- gallery_dl/extractor/xenforo.py | 39 +++++++++++++++++++++------------ test/results/nudostarforum.py | 1 + test/results/simpcity.py | 21 +++++++++--------- 3 files changed, 37 insertions(+), 24 deletions(-) diff --git a/gallery_dl/extractor/xenforo.py b/gallery_dl/extractor/xenforo.py index c58fd23e..f1497c9b 100644 --- a/gallery_dl/extractor/xenforo.py +++ b/gallery_dl/extractor/xenforo.py @@ -245,7 +245,6 @@ class XenforoExtractor(BaseExtractor): author = schema["author"] stats = schema["interactionStatistic"] url_t = schema.get("url") or schema.get("@id") or "" - url_a = author.get("url") or "" thread = { "id" : url_t[url_t.rfind(".")+1:-1], @@ -254,13 +253,19 @@ class XenforoExtractor(BaseExtractor): "date" : self.parse_datetime_iso(schema["datePublished"]), "tags" : (schema["keywords"].split(", ") if "keywords" in schema else ()), - "section" : schema["articleSection"], - "author" : author.get("name") or "", - "author_id" : (url_a[url_a.rfind(".")+1:-1] if url_a else - (author.get("name") or "")[15:]), - "author_url": url_a, + "section": schema["articleSection"], + "author" : author.get("name") or "", } + if url_a := author.get("url"): + thread["author_url"] = url_a + thread["author_slug"], _, thread["author_id"] = \ + url_a[url_a.rfind("/", 0, -1)+1:-1].rpartition(".") + else: + thread["author_url"] = "" + thread["author_slug"] = text.slugify(thread["author"][:15]) + thread["author_id"] = thread["author"][15:] + if isinstance(stats, list): thread["views"] = stats[0]["userInteractionCount"] thread["posts"] = stats[1]["userInteractionCount"] @@ -286,7 +291,8 @@ class XenforoExtractor(BaseExtractor): } url_a = post["author_url"] - post["author_id"] = url_a[url_a.rfind(".")+1:-1] + post["author_slug"], _, post["author_id"] = \ + url_a[url_a.rfind("/", 0, -1)+1:-1].rpartition(".") con = post["content"] if (pos := con.find('
""", "count" : 3, "date" : "dt:2025-10-31 21:26:42", diff --git a/test/results/simpcity.py b/test/results/simpcity.py index aea6ba45..a833f645 100644 --- a/test/results/simpcity.py +++ b/test/results/simpcity.py @@ -22,25 +22,24 @@ __tests__ = ( "author" : "Zebrabobinn", "author_id" : "171827", "author_url": "https://simpcity.cr/members/zebrabobinn.171827/", + "author_slug": "zebrabobinn", "count" : 1, "date" : "dt:2023-03-08 12:59:10", "id" : "1753131", - "content" : """\ -
\ -""", + "content" : str, }, "thread": { "author" : "eula", "author_id" : "54987", "author_url": "https://simpcity.cr/members/eula.54987/", + "author_slug": "eula", "date" : "dt:2022-03-11 17:15:59", "id" : "10731", "posts" : range(320, 500), "section" : "Asians", "title" : "Ririkana | RR_loveit", "url" : "https://simpcity.cr/threads/ririkana-rr_loveit.10731/", - "views" : range(790_000, 900_000), + "views" : range(900_000, 2_000_000), "tags" : [ "asian", "big ass", @@ -69,10 +68,10 @@ __tests__ = ( "#auth" : True, "#results" : ( "https://jpg6.su/img/NNFssUg", - "https://saint2.cr/embed/nPy1kG3w55V", - "https://saint2.cr/embed/c0KhPjU4-F3", - "https://saint2.cr/embed/sZWnVZ_mQsV", - "https://saint2.cr/embed/MEBiLx6DETQ", + "https://turbovid.cr/embed/nPy1kG3w55V", + "https://turbovid.cr/embed/c0KhPjU4-F3", + "https://turbovid.cr/embed/sZWnVZ_mQsV", + "https://turbovid.cr/embed/MEBiLx6DETQ", ), }, @@ -104,6 +103,7 @@ __tests__ = ( "author" : "Hexorium", "author_id" : "3715883", "author_url": "https://simpcity.cr/members/hexorium.3715883/", + "author_slug": "hexorium", "count" : 6, "date" : "dt:2024-12-15 21:37:05", "id" : "12065490", @@ -112,6 +112,7 @@ __tests__ = ( "author" : "Deleted member 166159", "author_id" : "166159", "author_url": "", + "author_slug": "deleted-member", "date" : "dt:2022-04-05 14:48:14", "id" : "36572", "section" : "Premium Asians", @@ -244,7 +245,7 @@ __tests__ = ( "#category": ("xenforo", "simpcity", "thread"), "#class" : xenforo.XenforoThreadExtractor, "#auth" : True, - "#pattern" : r"https://(jpg6\.su/img/\w+|bunkr\.\w+/[fiv]/\w+|pixeldrain.com/l/\w+|alua.com/tatakai)|saint2.cr/embed", + "#pattern" : r"https://(jpg6\.su/img/\w+|bunkr\.\w+/[fiv]/\w+|pixeldrain.com/l/\w+|alua.com/tatakai)|turbovid.cr/embed", "#count" : range(100, 300), "count" : int,