From ab2c03b39eacc897736046c7d67ea3f4a1d7efbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 11 Dec 2025 21:26:34 +0100 Subject: [PATCH] [xenforo] implement generic XenForo forum extractors support - https://simpcity.cr/ - https://nudostar.com/forum/ (#8333) --- docs/supportedsites.md | 22 +- gallery_dl/extractor/__init__.py | 3 +- gallery_dl/extractor/nudostarforum.py | 201 ------------------ .../extractor/{simpcity.py => xenforo.py} | 115 ++++++---- scripts/supportedsites.py | 3 + test/results/nudostarforum.py | 69 +++++- test/results/simpcity.py | 51 +++-- 7 files changed, 186 insertions(+), 278 deletions(-) delete mode 100644 gallery_dl/extractor/nudostarforum.py rename gallery_dl/extractor/{simpcity.py => xenforo.py} (75%) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 3ed0aa40..a49e9be5 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -973,12 +973,6 @@ Consider all listed sites to potentially be NSFW. Boards, Feed, Likes, Pins, User Pins, related Pins, Search Results - - SimpCity Forums - https://simpcity.cr/ - Forums, Posts, Threads - Supported - Simply Hentai https://www.simply-hentai.com/ @@ -1836,6 +1830,22 @@ Consider all listed sites to potentially be NSFW. + + XenForo Forums + + + SimpCity Forums + https://simpcity.cr/ + Forums, Posts, Threads + Supported + + + NudoStar Forums + https://nudostar.com/forum/ + Forums, Posts, Threads + Supported + + Moebooru and MyImouto diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index e7722837..7661a9aa 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -146,7 +146,6 @@ modules = [ "nozomi", "nsfwalbum", "nudostar", - "nudostarforum", "okporn", "paheal", "patreon", @@ -187,7 +186,6 @@ modules = [ "senmanga", "sexcom", "shimmie2", - "simpcity", "simplyhentai", "sizebooru", "skeb", @@ -235,6 +233,7 @@ modules = [ "wikifeet", "wikimedia", "xasiat", + "xenforo", "xfolio", "xhamster", "xvideos", diff --git a/gallery_dl/extractor/nudostarforum.py b/gallery_dl/extractor/nudostarforum.py deleted file mode 100644 index eb152e93..00000000 --- a/gallery_dl/extractor/nudostarforum.py +++ /dev/null @@ -1,201 +0,0 @@ -# -*- coding: utf-8 -*- - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for https://nudostar.com/forum/""" - -from .common import Extractor, Message -from .. import text, exception -from ..cache import cache - -BASE_PATTERN = r"(?:https?://)?(?:www\.)?nudostar\.com/forum" - - -class NudostarforumExtractor(Extractor): - """Base class for nudostar forum extractors""" - category = "nudostarforum" - cookies_domain = "nudostar.com" - cookies_names = ("xf_user",) - root = "https://nudostar.com/forum" - directory_fmt = ("{category}", "{thread[title]} ({thread[id]})") - filename_fmt = "{post[id]}_{num:>02}_{filename}.{extension}" - archive_fmt = "{post[id]}/{filename}" - - def items(self): - self.login() - - for post in self.posts(): - internal, external = self._extract_post_urls(post["content"]) - - data = {"post": post} - post["count"] = data["count"] = len(internal) + len(external) - yield Message.Directory, "", data - - data["num"] = 0 - for url in internal: - data["num"] += 1 - text.nameext_from_url(url, data) - yield Message.Url, url, data - - for url in external: - data["num"] += 1 - yield Message.Queue, url, data - - def _extract_post_urls(self, content): - """Extract image and video URLs from post content""" - internal = [] - external = [] - seen = set() - - # Extract URLs from both href= and src= attributes - for attr in ('href="', 'src="'): - for url in text.extract_iter(content, attr, '"'): - if url in seen: - continue - - # Internal attachments - if "/forum/attachments/" in url: - # Skip numeric-only IDs and non-file links - path = url.rstrip("/") - if path.split(".")[-1].isdigit() and "-" not in path: - continue - if "upload?" in url: - continue - seen.add(url) - # Normalize to full URL - if url.startswith("/"): - url = "https://nudostar.com" + url - internal.append(url) - - # External image hosts - elif url.startswith("http") and "nudostar.com" not in url: - seen.add(url) - external.append(url) - - return internal, external - - def request_page(self, url): - try: - return self.request(url) - except exception.HttpError as exc: - if exc.status == 403: - raise exception.AuthRequired( - ("username & password", "authenticated cookies"), None, - "Login required to view this content") - raise - - def login(self): - if self.cookies_check(self.cookies_names): - return - - username, password = self._get_auth_info() - if username: - self.cookies_update(self._login_impl(username, password)) - - @cache(maxage=365*86400, keyarg=1) - def _login_impl(self, username, password): - self.log.info("Logging in as %s", username) - - url = f"{self.root}/login/" - page = self.request(url).text - token = text.extr(page, 'name="_xfToken" value="', '"') - - url = f"{self.root}/login/login" - data = { - "_xfToken" : token, - "login" : username, - "password" : password, - "remember" : "1", - "_xfRedirect": self.root + "/", - } - response = self.request(url, method="POST", data=data) - - if not response.history or "xf_user" not in response.cookies: - raise exception.AuthenticationError() - - return { - cookie.name: cookie.value - for cookie in self.cookies - if cookie.domain.endswith(self.cookies_domain) - } - - def _pagination(self, base, pnum=None): - if pnum is None: - url = f"{self.root}{base}/" - pnum = 1 - else: - url = f"{self.root}{base}/page-{pnum}" - pnum = None - - while True: - page = self.request_page(url).text - yield page - - if pnum is None or "pageNav-jump--next" not in page: - return - pnum += 1 - url = f"{self.root}{base}/page-{pnum}" - - def _parse_thread(self, page): - extr = text.extract_from(page) - - title = text.unescape(extr("", "<")) - if " | " in title: - title = title.rpartition(" | ")[0] - - thread_id = extr('data-content-key="thread-', '"') - - return { - "id" : thread_id, - "title": title.strip(), - } - - def _parse_post(self, html): - extr = text.extract_from(html) - - return { - "author": extr('data-author="', '"'), - "id" : extr('data-content="post-', '"'), - "date" : extr('datetime="', '"'), - "content": html, # Pass full article HTML for URL extraction - } - - -class NudostarforumPostExtractor(NudostarforumExtractor): - """Extractor for individual posts on nudostar forum""" - subcategory = "post" - pattern = (rf"{BASE_PATTERN}" - rf"/threads/[^/?#]+\.(\d+)/post-(\d+)") - example = "https://nudostar.com/forum/threads/NAME.12345/post-67890" - - def posts(self): - thread_id, post_id = self.groups - url = f"{self.root}/posts/{post_id}/" - page = self.request_page(url).text - - pos = page.find(f'data-content="post-{post_id}"') - if pos < 0: - raise exception.NotFoundError("post") - html = text.extract(page, "<article ", "</article>", pos-200)[0] - - self.kwdict["thread"] = self._parse_thread(page) - return (self._parse_post(html),) - - -class NudostarforumThreadExtractor(NudostarforumExtractor): - """Extractor for threads on nudostar forum""" - subcategory = "thread" - pattern = rf"{BASE_PATTERN}(/threads/[^/?#]+\.(\d+))(?:/page-(\d+))?" - example = "https://nudostar.com/forum/threads/NAME.12345/" - - def posts(self): - path, thread_id, pnum = self.groups - - for page in self._pagination(path, pnum): - if "thread" not in self.kwdict: - self.kwdict["thread"] = self._parse_thread(page) - - for html in text.extract_iter(page, "<article ", "</article>"): - yield self._parse_post(html) diff --git a/gallery_dl/extractor/simpcity.py b/gallery_dl/extractor/xenforo.py similarity index 75% rename from gallery_dl/extractor/simpcity.py rename to gallery_dl/extractor/xenforo.py index 4c0dd004..f9fe9d94 100644 --- a/gallery_dl/extractor/simpcity.py +++ b/gallery_dl/extractor/xenforo.py @@ -6,36 +6,38 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://simpcity.cr/""" +"""Extractors for XenForo forums""" -from .common import Extractor, Message +from .common import BaseExtractor, Message from .. import text, exception from ..cache import cache -BASE_PATTERN = r"(?:https?://)?(?:www\.)?simpcity\.(?:cr|su)" - -class SimpcityExtractor(Extractor): - """Base class for simpcity extractors""" - category = "simpcity" - cookies_domain = "simpcity.cr" +class XenforoExtractor(BaseExtractor): + """Base class for xenforo extractors""" + basecategory = "xenforo" + # cookies_domain = "simpcity.cr" cookies_names = ("ogaddgmetaprof_user",) - root = "https://simpcity.cr" directory_fmt = ("{category}", "{thread[section]}", "{thread[title]} ({thread[id]})") filename_fmt = "{post[id]}_{num:>02}_{id}_{filename}.{extension}" archive_fmt = "{post[id]}/{type[0]}{id}_{filename}" + def __init__(self, match): + BaseExtractor.__init__(self, match) + self.cookies_domain = "." + self.root.split("/")[2] + self.cookies_names = self.config_instance("cookies") + def items(self): self.login() extract_urls = text.re( r'(?s)(?:' r'<video (.*?\ssrc="[^"]+".*?)</video>' - r'|<a [^>]*?href="' - r'(?:https://[^"]+)?(/attachments/[^"]+".*?)</a>' - r'|<div [^>]*?data-src="' - r'(?:https://[^"]+)?(/attachments/[^"]+".*?)/>' + r'|<a [^>]*?href="[^"]*?' + r'(/attachments/[^"]+".*?)</a>' + r'|<div [^>]*?data-src="[^"]*?' + r'(/attachments/[^"]+".*?)/>' r'|(?:<a [^>]*?href="|<iframe [^>]*?src="|' r'''onclick="loadMedia\(this, ')([^"']+)''' r')' @@ -50,6 +52,7 @@ class SimpcityExtractor(Extractor): post["count"] = data["count"] = len(urls) yield Message.Directory, "", data + id_last = None data["num"] = data["num_internal"] = data["num_external"] = 0 for video, inl1, inl2, ext in urls: if ext: @@ -71,12 +74,14 @@ class SimpcityExtractor(Extractor): yield Message.Url, url, data elif (inline := inl1 or inl2): - data["num"] += 1 - data["num_internal"] += 1 - data["type"] = "inline" path = inline[:inline.find('"')] name, _, id = path[path.rfind("/", 0, -1):].strip( "/").rpartition(".") + if id == id_last: + id_last = None + continue + else: + id_last = id data["id"] = text.parse_int(id) if alt := text.extr(inline, 'alt="', '"'): text.nameext_from_name(alt, data) @@ -85,6 +90,9 @@ class SimpcityExtractor(Extractor): else: data["filename"], _, data["extension"] = \ name.rpartition("-") + data["num"] += 1 + data["num_internal"] += 1 + data["type"] = "inline" yield Message.Url, self.root + path, data def request_page(self, url): @@ -180,10 +188,15 @@ class SimpcityExtractor(Extractor): html, "blockMessage--error", "</").rpartition(">")[2].strip()) def _parse_thread(self, page): - schema = self._extract_jsonld(page)["mainEntity"] + try: + data = self._extract_jsonld(page) + except ValueError: + return {} + + schema = data.get("mainEntity", data) author = schema["author"] stats = schema["interactionStatistic"] - url_t = schema["url"] + url_t = schema.get("url") or schema.get("@id") or "" url_a = author.get("url") or "" thread = { @@ -191,8 +204,6 @@ class SimpcityExtractor(Extractor): "url" : url_t, "title": schema["headline"], "date" : self.parse_datetime_iso(schema["datePublished"]), - "views": stats[0]["userInteractionCount"], - "posts": stats[1]["userInteractionCount"], "tags" : (schema["keywords"].split(", ") if "keywords" in schema else ()), "section" : schema["articleSection"], @@ -202,6 +213,13 @@ class SimpcityExtractor(Extractor): "author_url": url_a, } + if isinstance(stats, list): + thread["views"] = stats[0]["userInteractionCount"] + thread["posts"] = stats[1]["userInteractionCount"] + else: + thread["views"] = -1 + thread["posts"] = stats["userInteractionCount"] + return thread def _parse_post(self, html): @@ -210,13 +228,11 @@ class SimpcityExtractor(Extractor): post = { "author": extr('data-author="', '"'), "id": extr('data-content="post-', '"'), - "author_url": extr('itemprop="url" content="', '"'), + "author_url": (extr('itemprop="url" content="', '"') or + extr('<a href="', '"')), "date": self.parse_datetime_iso(extr('datetime="', '"')), - "content": ( - extr('<div itemprop="text">', - '<div class="js-selectToQuote') or - extr('<div >', - '<div class="js-selectToQuote')).strip(), + "content": extr('class="message-body', + '<div class="js-selectToQuote'), "attachments": extr('<section class="message-attachments">', '</section>'), } @@ -224,16 +240,35 @@ class SimpcityExtractor(Extractor): url_a = post["author_url"] post["author_id"] = url_a[url_a.rfind(".")+1:-1] + con = post["content"] + if (pos := con.find('<div class="bbWrapper')) >= 0: + con = con[pos:] + post["content"] = con.strip() + return post -class SimpcityPostExtractor(SimpcityExtractor): +BASE_PATTERN = XenforoExtractor.update({ + "simpcity": { + "root": "https://simpcity.cr", + "pattern": r"(?:www\.)?simpcity\.(?:cr|su)", + "cookies": ("ogaddgmetaprof_user",), + }, + "nudostarforum": { + "root": "https://nudostar.com/forum", + "pattern": r"(?:www\.)?nudostar\.com/forum", + "cookies": ("xf_user",), + }, +}) + + +class XenforoPostExtractor(XenforoExtractor): subcategory = "post" pattern = rf"{BASE_PATTERN}/(?:threads/[^/?#]+/post-|posts/)(\d+)" example = "https://simpcity.cr/threads/TITLE.12345/post-54321" def posts(self): - post_id = self.groups[0] + post_id = self.groups[-1] url = f"{self.root}/posts/{post_id}/" page = self.request_page(url).text @@ -246,18 +281,21 @@ class SimpcityPostExtractor(SimpcityExtractor): return (self._parse_post(html),) -class SimpcityThreadExtractor(SimpcityExtractor): +class XenforoThreadExtractor(XenforoExtractor): subcategory = "thread" pattern = rf"{BASE_PATTERN}(/threads/(?:[^/?#]+\.)?\d+)(?:/page-(\d+))?" example = "https://simpcity.cr/threads/TITLE.12345/" def posts(self): + path = self.groups[-2] + pnum = self.groups[-1] + if (order := self.config("order-posts")) and \ order[0] not in ("d", "r"): - pages = self._pagination(*self.groups) + pages = self._pagination(path, pnum) reverse = False else: - pages = self._pagination_reverse(*self.groups) + pages = self._pagination_reverse(path, pnum) reverse = True for page in pages: @@ -271,13 +309,18 @@ class SimpcityThreadExtractor(SimpcityExtractor): yield self._parse_post(html) -class SimpcityForumExtractor(SimpcityExtractor): +class XenforoForumExtractor(XenforoExtractor): subcategory = "forum" - pattern = rf"{BASE_PATTERN}(/forums/(?:[^/?#]+\.)?\d+)(?:/page-(\d+))?" + pattern = rf"{BASE_PATTERN}(/forums/(?:[^/?#]+\.)?[^/?#]+)(?:/page-(\d+))?" example = "https://simpcity.cr/forums/TITLE.123/" def items(self): - data = {"_extractor": SimpcityThreadExtractor} - for page in self._pagination(*self.groups): - for path in text.extract_iter(page, ' uix-href="', '"'): + extract_threads = text.re( + r'(/threads/[^"]+)"[^>]+data-xf-init=').findall + + data = {"_extractor": XenforoThreadExtractor} + path = self.groups[-2] + pnum = self.groups[-1] + for page in self._pagination(path, pnum): + for path in extract_threads(page): yield Message.Queue, f"{self.root}{text.unquote(path)}", data diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 83dd6545..da195c5a 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -142,6 +142,7 @@ CATEGORY_MAP = { "nozrip" : "GaryC Booru", "nsfwalbum" : "NSFWalbum.com", "nudostar" : "NudoStar.TV", + "nudostarforum" : "NudoStar Forums", "okporn" : "OK.PORN", "paheal" : "Rule 34", "photovogue" : "PhotoVogue", @@ -472,6 +473,7 @@ BASE_MAP = { "szurubooru" : "szurubooru Instances", "urlshortener": "URL Shorteners", "vichan" : "vichan Imageboards", + "xenforo" : "XenForo Forums", } URL_MAP = { @@ -526,6 +528,7 @@ AUTH_MAP = { "mastodon.social": _OAUTH, "newgrounds" : "Supported", "nijie" : "Required", + "nudostarforum" : "Supported", "patreon" : _COOKIES, "pawoo" : _OAUTH, "pillowfort" : "Supported", diff --git a/test/results/nudostarforum.py b/test/results/nudostarforum.py index 79526480..d031a28a 100644 --- a/test/results/nudostarforum.py +++ b/test/results/nudostarforum.py @@ -4,26 +4,73 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -from gallery_dl.extractor import nudostarforum +from gallery_dl.extractor import xenforo __tests__ = ( { - "#url" : "https://nudostar.com/forum/threads/aspen-rae.106714/", - "#category": ("", "nudostarforum", "thread"), - "#class" : nudostarforum.NudostarforumThreadExtractor, -}, + "#url" : "https://nudostar.com/forum/threads/tate-mcrae.109528/post-1919100", + "#category": ("xenforo", "nudostarforum", "post"), + "#class" : xenforo.XenforoPostExtractor, + "#auth" : True, + "#results" : ( + "https://imagetwist.com/bvolb8129fnm/v1.jpg", + "https://imagetwist.com/9pddder15iow/v2.jpg", + "https://imagetwist.com/zzonmk0gqqdv/v3.jpg", + ), -{ - "#url" : "https://nudostar.com/forum/threads/aspen-rae.106714/page-2", - "#category": ("", "nudostarforum", "thread"), - "#class" : nudostarforum.NudostarforumThreadExtractor, + "count" : 3, + "type" : "external", + "post" : { + "attachments": "", + "author" : "djokica", + "author_id" : "3471965", + "author_url" : "/forum/members/djokica.3471965/", + "content" : """<div class="bbWrapper"><a href="https://imagetwist.com/bvolb8129fnm/v1.jpg" target="_blank" class="link link--external" rel="nofollow noopener"><img src="https://s10.imagetwist.com/th/73048/bvolb8129fnm.jpg" data-url="https://s10.imagetwist.com/th/73048/bvolb8129fnm.jpg" class="bbImage " style="" alt="" title="" /></a> <a href="https://imagetwist.com/9pddder15iow/v2.jpg" target="_blank" class="link link--external" rel="nofollow noopener"><img src="https://s10.imagetwist.com/th/73048/9pddder15iow.jpg" data-url="https://s10.imagetwist.com/th/73048/9pddder15iow.jpg" class="bbImage " style="" alt="" title="" /></a> <a href="https://imagetwist.com/zzonmk0gqqdv/v3.jpg" target="_blank" class="link link--external" rel="nofollow noopener"><img src="https://s10.imagetwist.com/th/73048/zzonmk0gqqdv.jpg" data-url="https://s10.imagetwist.com/th/73048/zzonmk0gqqdv.jpg" class="bbImage " style="" alt="" title="" /></a></div>""", + "count" : 3, + "date" : "dt:2025-10-31 21:26:42", + "id" : "1919100", + }, + "thread" : { + "author" : "djokica", + "author_id" : "", + "author_url": "", + "date" : "dt:2024-06-05 00:00:00", + "id" : "109528", + "posts" : range(20, 80), + "section" : "Celebrity", + "tags" : (), + "title" : "Tate Mcrae", + "url" : "https://nudostar.com/forum/threads/tate-mcrae.109528/", + "views" : -1, + }, }, { "#url" : "https://nudostar.com/forum/threads/name.12345/post-67890", - "#category": ("", "nudostarforum", "post"), - "#class" : nudostarforum.NudostarforumPostExtractor, + "#category": ("xenforo", "nudostarforum", "post"), + "#class" : xenforo.XenforoPostExtractor, +}, + +{ + "#url" : "https://nudostar.com/forum/threads/aspen-rae.106714/", + "#category": ("xenforo", "nudostarforum", "thread"), + "#class" : xenforo.XenforoThreadExtractor, +}, + +{ + "#url" : "https://nudostar.com/forum/threads/aspen-rae.106714/page-2", + "#category": ("xenforo", "nudostarforum", "thread"), + "#class" : xenforo.XenforoThreadExtractor, +}, + +{ + "#url" : "https://nudostar.com/forum/forums/celebrity.14/", + "#category": ("xenforo", "nudostarforum", "forum"), + "#class" : xenforo.XenforoForumExtractor, + "#pattern" : xenforo.XenforoThreadExtractor.pattern, + "#range" : "1-100", + "#count" : 100, }, ) diff --git a/test/results/simpcity.py b/test/results/simpcity.py index 6baafa21..a731a736 100644 --- a/test/results/simpcity.py +++ b/test/results/simpcity.py @@ -4,14 +4,15 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -from gallery_dl.extractor import simpcity +from gallery_dl.extractor import xenforo from gallery_dl import exception __tests__ = ( { "#url" : "https://simpcity.cr/threads/ririkana-rr_loveit.10731/post-1753131", - "#class" : simpcity.SimpcityPostExtractor, + "#category": ("xenforo", "simpcity", "post"), + "#class" : xenforo.XenforoPostExtractor, "#auth" : True, "#results" : "https://jpg6.su/img/coWRwo", @@ -54,7 +55,8 @@ __tests__ = ( { "#url" : "https://simpcity.cr/threads/ririkana-rr_loveit.10731/post-1753131", - "#class" : simpcity.SimpcityPostExtractor, + "#category": ("xenforo", "simpcity", "post"), + "#class" : xenforo.XenforoPostExtractor, "#auth" : False, "#exception": exception.AuthRequired, }, @@ -62,7 +64,8 @@ __tests__ = ( { "#url" : "https://simpcity.cr/threads/puutin_cos.219873/post-26053409", "#comment" : "iframe embeds (#8214)", - "#class" : simpcity.SimpcityPostExtractor, + "#category": ("xenforo", "simpcity", "post"), + "#class" : xenforo.XenforoPostExtractor, "#auth" : True, "#results" : ( "https://jpg6.su/img/NNFssUg", @@ -76,7 +79,8 @@ __tests__ = ( { "#url" : "https://simpcity.cr/threads/shinhashimoto00-shinhashimoto01.184378/post-13389764", "#comment" : "quote in post content (#8214)", - "#class" : simpcity.SimpcityPostExtractor, + "#category": ("xenforo", "simpcity", "post"), + "#class" : xenforo.XenforoPostExtractor, "#auth" : True, "#results" : ( "/goto/post?id=13358068", @@ -87,9 +91,11 @@ __tests__ = ( { "#url" : "https://simpcity.cr/threads/kayle-oralglory.36572/post-12065490", "#comment" : "deleted thread author (#8323)", - "#class" : simpcity.SimpcityPostExtractor, + "#category": ("xenforo", "simpcity", "post"), + "#class" : xenforo.XenforoPostExtractor, "#auth" : True, "#results" : ( + "https://redgifs.com/ifr/trainedovercookedsquid", "https://jpg6.su/img/aKroBJp", "https://jpg6.su/img/aKroy2E", "https://jpg6.su/img/aKrofqa", @@ -101,7 +107,7 @@ __tests__ = ( "author" : "Hexorium", "author_id" : "3715883", "author_url": "https://simpcity.cr/members/hexorium.3715883/", - "count" : 5, + "count" : 6, "date" : "dt:2024-12-15 21:37:05", "id" : "12065490", }, @@ -119,7 +125,8 @@ __tests__ = ( { "#url" : "https://simpcity.cr/threads/sophia-diamond.10049/post-10891", - "#class" : simpcity.SimpcityPostExtractor, + "#category": ("xenforo", "simpcity", "post"), + "#class" : xenforo.XenforoPostExtractor, "#auth" : True, "#results" : ( "https://brandarmy.com/SophiaDiamond", @@ -136,8 +143,8 @@ __tests__ = ( "post" : { "attachments": "", "author" : "inoncognito", - "author_id" : "", - "author_url" : "", + "author_id" : "53824", + "author_url" : "/members/inoncognito.53824/", "count" : 4, "date" : "dt:2022-03-11 00:41:28", "id" : "10891", @@ -170,7 +177,8 @@ __tests__ = ( { "#url" : "https://simpcity.cr/threads/sophia-diamond.10049/post-18744", - "#class" : simpcity.SimpcityPostExtractor, + "#category": ("xenforo", "simpcity", "post"), + "#class" : xenforo.XenforoPostExtractor, "#auth" : True, "#results" : "https://simpcity.cr/attachments/sophiadiamondcancunbikiniwp-png.36179/", @@ -190,12 +198,7 @@ __tests__ = ( "date" : "dt:2022-03-11 22:39:06", "id" : "18744", "attachments": str, - "content" : """\ -<div class="bbWrapper">Collage</div> - - - </div>\ -""", + "content" : r're:<div class="bbWrapper">Collage</div>\s+</div>', }, "thread" : { "date" : "dt:2022-03-11 00:41:28", @@ -208,14 +211,16 @@ __tests__ = ( { "#url" : "https://simpcity.cr/threads/lustn4lexi-hot4lexi-lexi-2-legit-hott4lexi-lexi.175167/post-2512729", "#comment" : "'Click here to load redgifs media' (#8609)", - "#class" : simpcity.SimpcityPostExtractor, + "#category": ("xenforo", "simpcity", "post"), + "#class" : xenforo.XenforoPostExtractor, "#auth" : True, "#results" : "https://redgifs.com/ifr/unusedsubmissivemullet", }, { "#url" : "https://simpcity.cr/threads/alua-tatakai.89490/", - "#class" : simpcity.SimpcityThreadExtractor, + "#category": ("xenforo", "simpcity", "thread"), + "#class" : xenforo.XenforoThreadExtractor, "#auth" : True, "#pattern" : r"https://(jpg6\.su/img/\w+|bunkr\.\w+/[fiv]/\w+|pixeldrain.com/l/\w+|alua.com/tatakai)|/goto/post", "#count" : range(100, 300), @@ -252,13 +257,15 @@ __tests__ = ( { "#url" : "https://simpcity.su/threads/angel-chan-wlep-wlop-menruinyanko_.12948/", - "#class" : simpcity.SimpcityThreadExtractor, + "#category": ("xenforo", "simpcity", "thread"), + "#class" : xenforo.XenforoThreadExtractor, }, { "#url" : "https://simpcity.cr/forums/asians.48/", - "#class" : simpcity.SimpcityForumExtractor, - "#pattern" : simpcity.SimpcityThreadExtractor.pattern, + "#category": ("xenforo", "simpcity", "forum"), + "#class" : xenforo.XenforoForumExtractor, + "#pattern" : xenforo.XenforoThreadExtractor.pattern, "#range" : "1-100", "#count" : 100, },