[xenforo] implement generic XenForo forum extractors

support - https://simpcity.cr/ - https://nudostar.com/forum/ (#8333)
2025-12-11 21:26:34 +01:00
parent 814085062a
commit ab2c03b39e
7 changed files with 186 additions and 278 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -973,12 +973,6 @@ Consider all listed sites to potentially be NSFW.
    <td>Boards, Feed, Likes, Pins, User Pins, related Pins, Search Results</td>
    <td></td>
 </tr>
-<tr id="simpcity" title="simpcity">
-    <td>SimpCity Forums</td>
-    <td>https://simpcity.cr/</td>
-    <td>Forums, Posts, Threads</td>
-    <td>Supported</td>
-</tr>
 <tr id="simplyhentai" title="simplyhentai">
    <td>Simply Hentai</td>
    <td>https://www.simply-hentai.com/</td>
@@ -1836,6 +1830,22 @@ Consider all listed sites to potentially be NSFW.
    <td></td>
 </tr>

+<tr id="xenforo" title="xenforo">
+    <td colspan="4"><strong>XenForo Forums</strong></td>
+</tr>
+<tr id="simpcity" title="simpcity">
+    <td>SimpCity Forums</td>
+    <td>https://simpcity.cr/</td>
+    <td>Forums, Posts, Threads</td>
+    <td>Supported</td>
+</tr>
+<tr id="nudostarforum" title="nudostarforum">
+    <td>NudoStar Forums</td>
+    <td>https://nudostar.com/forum/</td>
+    <td>Forums, Posts, Threads</td>
+    <td>Supported</td>
+</tr>
+
 <tr id="moebooru" title="moebooru">
    <td colspan="4"><strong>Moebooru and MyImouto</strong></td>
 </tr>
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@@ -146,7 +146,6 @@ modules = [
    "nozomi",
    "nsfwalbum",
    "nudostar",
-    "nudostarforum",
    "okporn",
    "paheal",
    "patreon",
@@ -187,7 +186,6 @@ modules = [
    "senmanga",
    "sexcom",
    "shimmie2",
-    "simpcity",
    "simplyhentai",
    "sizebooru",
    "skeb",
@@ -235,6 +233,7 @@ modules = [
    "wikifeet",
    "wikimedia",
    "xasiat",
+    "xenforo",
    "xfolio",
    "xhamster",
    "xvideos",
--- a/gallery_dl/extractor/nudostarforum.py
+++ b/gallery_dl/extractor/nudostarforum.py
@@ -1,201 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://nudostar.com/forum/"""
-
-from .common import Extractor, Message
-from .. import text, exception
-from ..cache import cache
-
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?nudostar\.com/forum"
-
-
-class NudostarforumExtractor(Extractor):
-    """Base class for nudostar forum extractors"""
-    category = "nudostarforum"
-    cookies_domain = "nudostar.com"
-    cookies_names = ("xf_user",)
-    root = "https://nudostar.com/forum"
-    directory_fmt = ("{category}", "{thread[title]} ({thread[id]})")
-    filename_fmt = "{post[id]}_{num:>02}_{filename}.{extension}"
-    archive_fmt = "{post[id]}/{filename}"
-
-    def items(self):
-        self.login()
-
-        for post in self.posts():
-            internal, external = self._extract_post_urls(post["content"])
-
-            data = {"post": post}
-            post["count"] = data["count"] = len(internal) + len(external)
-            yield Message.Directory, "", data
-
-            data["num"] = 0
-            for url in internal:
-                data["num"] += 1
-                text.nameext_from_url(url, data)
-                yield Message.Url, url, data
-
-            for url in external:
-                data["num"] += 1
-                yield Message.Queue, url, data
-
-    def _extract_post_urls(self, content):
-        """Extract image and video URLs from post content"""
-        internal = []
-        external = []
-        seen = set()
-
-        # Extract URLs from both href= and src= attributes
-        for attr in ('href="', 'src="'):
-            for url in text.extract_iter(content, attr, '"'):
-                if url in seen:
-                    continue
-
-                # Internal attachments
-                if "/forum/attachments/" in url:
-                    # Skip numeric-only IDs and non-file links
-                    path = url.rstrip("/")
-                    if path.split(".")[-1].isdigit() and "-" not in path:
-                        continue
-                    if "upload?" in url:
-                        continue
-                    seen.add(url)
-                    # Normalize to full URL
-                    if url.startswith("/"):
-                        url = "https://nudostar.com" + url
-                    internal.append(url)
-
-                # External image hosts
-                elif url.startswith("http") and "nudostar.com" not in url:
-                    seen.add(url)
-                    external.append(url)
-
-        return internal, external
-
-    def request_page(self, url):
-        try:
-            return self.request(url)
-        except exception.HttpError as exc:
-            if exc.status == 403:
-                raise exception.AuthRequired(
-                    ("username & password", "authenticated cookies"), None,
-                    "Login required to view this content")
-            raise
-
-    def login(self):
-        if self.cookies_check(self.cookies_names):
-            return
-
-        username, password = self._get_auth_info()
-        if username:
-            self.cookies_update(self._login_impl(username, password))
-
-    @cache(maxage=365*86400, keyarg=1)
-    def _login_impl(self, username, password):
-        self.log.info("Logging in as %s", username)
-
-        url = f"{self.root}/login/"
-        page = self.request(url).text
-        token = text.extr(page, 'name="_xfToken" value="', '"')
-
-        url = f"{self.root}/login/login"
-        data = {
-            "_xfToken" : token,
-            "login"    : username,
-            "password" : password,
-            "remember" : "1",
-            "_xfRedirect": self.root + "/",
-        }
-        response = self.request(url, method="POST", data=data)
-
-        if not response.history or "xf_user" not in response.cookies:
-            raise exception.AuthenticationError()
-
-        return {
-            cookie.name: cookie.value
-            for cookie in self.cookies
-            if cookie.domain.endswith(self.cookies_domain)
-        }
-
-    def _pagination(self, base, pnum=None):
-        if pnum is None:
-            url = f"{self.root}{base}/"
-            pnum = 1
-        else:
-            url = f"{self.root}{base}/page-{pnum}"
-            pnum = None
-
-        while True:
-            page = self.request_page(url).text
-            yield page
-
-            if pnum is None or "pageNav-jump--next" not in page:
-                return
-            pnum += 1
-            url = f"{self.root}{base}/page-{pnum}"
-
-    def _parse_thread(self, page):
-        extr = text.extract_from(page)
-
-        title = text.unescape(extr("<title>", "<"))
-        if " | " in title:
-            title = title.rpartition(" | ")[0]
-
-        thread_id = extr('data-content-key="thread-', '"')
-
-        return {
-            "id"   : thread_id,
-            "title": title.strip(),
-        }
-
-    def _parse_post(self, html):
-        extr = text.extract_from(html)
-
-        return {
-            "author": extr('data-author="', '"'),
-            "id"    : extr('data-content="post-', '"'),
-            "date"  : extr('datetime="', '"'),
-            "content": html,  # Pass full article HTML for URL extraction
-        }
-
-
-class NudostarforumPostExtractor(NudostarforumExtractor):
-    """Extractor for individual posts on nudostar forum"""
-    subcategory = "post"
-    pattern = (rf"{BASE_PATTERN}"
-               rf"/threads/[^/?#]+\.(\d+)/post-(\d+)")
-    example = "https://nudostar.com/forum/threads/NAME.12345/post-67890"
-
-    def posts(self):
-        thread_id, post_id = self.groups
-        url = f"{self.root}/posts/{post_id}/"
-        page = self.request_page(url).text
-
-        pos = page.find(f'data-content="post-{post_id}"')
-        if pos < 0:
-            raise exception.NotFoundError("post")
-        html = text.extract(page, "<article ", "</article>", pos-200)[0]
-
-        self.kwdict["thread"] = self._parse_thread(page)
-        return (self._parse_post(html),)
-
-
-class NudostarforumThreadExtractor(NudostarforumExtractor):
-    """Extractor for threads on nudostar forum"""
-    subcategory = "thread"
-    pattern = rf"{BASE_PATTERN}(/threads/[^/?#]+\.(\d+))(?:/page-(\d+))?"
-    example = "https://nudostar.com/forum/threads/NAME.12345/"
-
-    def posts(self):
-        path, thread_id, pnum = self.groups
-
-        for page in self._pagination(path, pnum):
-            if "thread" not in self.kwdict:
-                self.kwdict["thread"] = self._parse_thread(page)
-
-            for html in text.extract_iter(page, "<article ", "</article>"):
-                yield self._parse_post(html)
--- a/gallery_dl/extractor/simpcity.py
+++ b/gallery_dl/extractor/simpcity.py
@@ -6,36 +6,38 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.

-"""Extractors for https://simpcity.cr/"""
+"""Extractors for XenForo forums"""

-from .common import Extractor, Message
+from .common import BaseExtractor, Message
 from .. import text, exception
 from ..cache import cache

-BASE_PATTERN = r"(?:https?://)?(?:www\.)?simpcity\.(?:cr|su)"

-
-class SimpcityExtractor(Extractor):
-    """Base class for simpcity extractors"""
-    category = "simpcity"
-    cookies_domain = "simpcity.cr"
+class XenforoExtractor(BaseExtractor):
+    """Base class for xenforo extractors"""
+    basecategory = "xenforo"
+    #  cookies_domain = "simpcity.cr"
    cookies_names = ("ogaddgmetaprof_user",)
-    root = "https://simpcity.cr"
    directory_fmt = ("{category}", "{thread[section]}",
                     "{thread[title]} ({thread[id]})")
    filename_fmt = "{post[id]}_{num:>02}_{id}_{filename}.{extension}"
    archive_fmt = "{post[id]}/{type[0]}{id}_{filename}"

+    def __init__(self, match):
+        BaseExtractor.__init__(self, match)
+        self.cookies_domain = "." + self.root.split("/")[2]
+        self.cookies_names = self.config_instance("cookies")
+
    def items(self):
        self.login()

        extract_urls = text.re(
            r'(?s)(?:'
            r'<video (.*?\ssrc="[^"]+".*?)</video>'
-            r'|<a [^>]*?href="'
-            r'(?:https://[^"]+)?(/attachments/[^"]+".*?)</a>'
-            r'|<div [^>]*?data-src="'
-            r'(?:https://[^"]+)?(/attachments/[^"]+".*?)/>'
+            r'|<a [^>]*?href="[^"]*?'
+            r'(/attachments/[^"]+".*?)</a>'
+            r'|<div [^>]*?data-src="[^"]*?'
+            r'(/attachments/[^"]+".*?)/>'
            r'|(?:<a [^>]*?href="|<iframe [^>]*?src="|'
            r'''onclick="loadMedia\(this, ')([^"']+)'''
            r')'
@@ -50,6 +52,7 @@ class SimpcityExtractor(Extractor):
            post["count"] = data["count"] = len(urls)
            yield Message.Directory, "", data

+            id_last = None
            data["num"] = data["num_internal"] = data["num_external"] = 0
            for video, inl1, inl2, ext in urls:
                if ext:
@@ -71,12 +74,14 @@ class SimpcityExtractor(Extractor):
                    yield Message.Url, url, data

                elif (inline := inl1 or inl2):
-                    data["num"] += 1
-                    data["num_internal"] += 1
-                    data["type"] = "inline"
                    path = inline[:inline.find('"')]
                    name, _, id = path[path.rfind("/", 0, -1):].strip(
                        "/").rpartition(".")
+                    if id == id_last:
+                        id_last = None
+                        continue
+                    else:
+                        id_last = id
                    data["id"] = text.parse_int(id)
                    if alt := text.extr(inline, 'alt="', '"'):
                        text.nameext_from_name(alt, data)
@@ -85,6 +90,9 @@ class SimpcityExtractor(Extractor):
                    else:
                        data["filename"], _, data["extension"] = \
                            name.rpartition("-")
+                    data["num"] += 1
+                    data["num_internal"] += 1
+                    data["type"] = "inline"
                    yield Message.Url, self.root + path, data

    def request_page(self, url):
@@ -180,10 +188,15 @@ class SimpcityExtractor(Extractor):
            html, "blockMessage--error", "</").rpartition(">")[2].strip())

    def _parse_thread(self, page):
-        schema = self._extract_jsonld(page)["mainEntity"]
+        try:
+            data = self._extract_jsonld(page)
+        except ValueError:
+            return {}
+
+        schema = data.get("mainEntity", data)
        author = schema["author"]
        stats = schema["interactionStatistic"]
-        url_t = schema["url"]
+        url_t = schema.get("url") or schema.get("@id") or ""
        url_a = author.get("url") or ""

        thread = {
@@ -191,8 +204,6 @@ class SimpcityExtractor(Extractor):
            "url"  : url_t,
            "title": schema["headline"],
            "date" : self.parse_datetime_iso(schema["datePublished"]),
-            "views": stats[0]["userInteractionCount"],
-            "posts": stats[1]["userInteractionCount"],
            "tags" : (schema["keywords"].split(", ")
                      if "keywords" in schema else ()),
            "section"   : schema["articleSection"],
@@ -202,6 +213,13 @@ class SimpcityExtractor(Extractor):
            "author_url": url_a,
        }

+        if isinstance(stats, list):
+            thread["views"] = stats[0]["userInteractionCount"]
+            thread["posts"] = stats[1]["userInteractionCount"]
+        else:
+            thread["views"] = -1
+            thread["posts"] = stats["userInteractionCount"]
+
        return thread

    def _parse_post(self, html):
@@ -210,13 +228,11 @@ class SimpcityExtractor(Extractor):
        post = {
            "author": extr('data-author="', '"'),
            "id": extr('data-content="post-', '"'),
-            "author_url": extr('itemprop="url" content="', '"'),
+            "author_url": (extr('itemprop="url" content="', '"') or
+                           extr('<a href="', '"')),
            "date": self.parse_datetime_iso(extr('datetime="', '"')),
-            "content": (
-                extr('<div itemprop="text">',
-                     '<div class="js-selectToQuote') or
-                extr('<div >',
-                     '<div class="js-selectToQuote')).strip(),
+            "content": extr('class="message-body',
+                            '<div class="js-selectToQuote'),
            "attachments": extr('<section class="message-attachments">',
                                '</section>'),
        }
@@ -224,16 +240,35 @@ class SimpcityExtractor(Extractor):
        url_a = post["author_url"]
        post["author_id"] = url_a[url_a.rfind(".")+1:-1]

+        con = post["content"]
+        if (pos := con.find('<div class="bbWrapper')) >= 0:
+            con = con[pos:]
+        post["content"] = con.strip()
+
        return post


-class SimpcityPostExtractor(SimpcityExtractor):
+BASE_PATTERN = XenforoExtractor.update({
+    "simpcity": {
+        "root": "https://simpcity.cr",
+        "pattern": r"(?:www\.)?simpcity\.(?:cr|su)",
+        "cookies": ("ogaddgmetaprof_user",),
+    },
+    "nudostarforum": {
+        "root": "https://nudostar.com/forum",
+        "pattern": r"(?:www\.)?nudostar\.com/forum",
+        "cookies": ("xf_user",),
+    },
+})
+
+
+class XenforoPostExtractor(XenforoExtractor):
    subcategory = "post"
    pattern = rf"{BASE_PATTERN}/(?:threads/[^/?#]+/post-|posts/)(\d+)"
    example = "https://simpcity.cr/threads/TITLE.12345/post-54321"

    def posts(self):
-        post_id = self.groups[0]
+        post_id = self.groups[-1]
        url = f"{self.root}/posts/{post_id}/"
        page = self.request_page(url).text

@@ -246,18 +281,21 @@ class SimpcityPostExtractor(SimpcityExtractor):
        return (self._parse_post(html),)


-class SimpcityThreadExtractor(SimpcityExtractor):
+class XenforoThreadExtractor(XenforoExtractor):
    subcategory = "thread"
    pattern = rf"{BASE_PATTERN}(/threads/(?:[^/?#]+\.)?\d+)(?:/page-(\d+))?"
    example = "https://simpcity.cr/threads/TITLE.12345/"

    def posts(self):
+        path = self.groups[-2]
+        pnum = self.groups[-1]
+
        if (order := self.config("order-posts")) and \
                order[0] not in ("d", "r"):
-            pages = self._pagination(*self.groups)
+            pages = self._pagination(path, pnum)
            reverse = False
        else:
-            pages = self._pagination_reverse(*self.groups)
+            pages = self._pagination_reverse(path, pnum)
            reverse = True

        for page in pages:
@@ -271,13 +309,18 @@ class SimpcityThreadExtractor(SimpcityExtractor):
                yield self._parse_post(html)


-class SimpcityForumExtractor(SimpcityExtractor):
+class XenforoForumExtractor(XenforoExtractor):
    subcategory = "forum"
-    pattern = rf"{BASE_PATTERN}(/forums/(?:[^/?#]+\.)?\d+)(?:/page-(\d+))?"
+    pattern = rf"{BASE_PATTERN}(/forums/(?:[^/?#]+\.)?[^/?#]+)(?:/page-(\d+))?"
    example = "https://simpcity.cr/forums/TITLE.123/"

    def items(self):
-        data = {"_extractor": SimpcityThreadExtractor}
-        for page in self._pagination(*self.groups):
-            for path in text.extract_iter(page, ' uix-href="', '"'):
+        extract_threads = text.re(
+            r'(/threads/[^"]+)"[^>]+data-xf-init=').findall
+
+        data = {"_extractor": XenforoThreadExtractor}
+        path = self.groups[-2]
+        pnum = self.groups[-1]
+        for page in self._pagination(path, pnum):
+            for path in extract_threads(page):
                yield Message.Queue, f"{self.root}{text.unquote(path)}", data
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -142,6 +142,7 @@ CATEGORY_MAP = {
    "nozrip"         : "GaryC Booru",
    "nsfwalbum"      : "NSFWalbum.com",
    "nudostar"       : "NudoStar.TV",
+    "nudostarforum"  : "NudoStar Forums",
    "okporn"         : "OK.PORN",
    "paheal"         : "Rule 34",
    "photovogue"     : "PhotoVogue",
@@ -472,6 +473,7 @@ BASE_MAP = {
    "szurubooru"  : "szurubooru Instances",
    "urlshortener": "URL Shorteners",
    "vichan"      : "vichan Imageboards",
+    "xenforo"     : "XenForo Forums",
 }

 URL_MAP = {
@@ -526,6 +528,7 @@ AUTH_MAP = {
    "mastodon.social": _OAUTH,
    "newgrounds"     : "Supported",
    "nijie"          : "Required",
+    "nudostarforum"  : "Supported",
    "patreon"        : _COOKIES,
    "pawoo"          : _OAUTH,
    "pillowfort"     : "Supported",
--- a/test/results/nudostarforum.py
+++ b/test/results/nudostarforum.py
@@ -4,26 +4,73 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.

-from gallery_dl.extractor import nudostarforum
+from gallery_dl.extractor import xenforo


 __tests__ = (
 {
-    "#url"     : "https://nudostar.com/forum/threads/aspen-rae.106714/",
-    "#category": ("", "nudostarforum", "thread"),
-    "#class"   : nudostarforum.NudostarforumThreadExtractor,
-},
+    "#url"     : "https://nudostar.com/forum/threads/tate-mcrae.109528/post-1919100",
+    "#category": ("xenforo", "nudostarforum", "post"),
+    "#class"   : xenforo.XenforoPostExtractor,
+    "#auth"    : True,
+    "#results" : (
+        "https://imagetwist.com/bvolb8129fnm/v1.jpg",
+        "https://imagetwist.com/9pddder15iow/v2.jpg",
+        "https://imagetwist.com/zzonmk0gqqdv/v3.jpg",
+    ),

-{
-    "#url"     : "https://nudostar.com/forum/threads/aspen-rae.106714/page-2",
-    "#category": ("", "nudostarforum", "thread"),
-    "#class"   : nudostarforum.NudostarforumThreadExtractor,
+    "count"       : 3,
+    "type"        : "external",
+    "post"        : {
+        "attachments": "",
+        "author"     : "djokica",
+        "author_id"  : "3471965",
+        "author_url" : "/forum/members/djokica.3471965/",
+        "content"    : """<div class="bbWrapper"><a href="https://imagetwist.com/bvolb8129fnm/v1.jpg" target="_blank" class="link link--external" rel="nofollow noopener"><img src="https://s10.imagetwist.com/th/73048/bvolb8129fnm.jpg" data-url="https://s10.imagetwist.com/th/73048/bvolb8129fnm.jpg" class="bbImage " style="" alt="" title="" /></a> <a href="https://imagetwist.com/9pddder15iow/v2.jpg" target="_blank" class="link link--external" rel="nofollow noopener"><img src="https://s10.imagetwist.com/th/73048/9pddder15iow.jpg" data-url="https://s10.imagetwist.com/th/73048/9pddder15iow.jpg" class="bbImage " style="" alt="" title="" /></a> <a href="https://imagetwist.com/zzonmk0gqqdv/v3.jpg" target="_blank" class="link link--external" rel="nofollow noopener"><img src="https://s10.imagetwist.com/th/73048/zzonmk0gqqdv.jpg" data-url="https://s10.imagetwist.com/th/73048/zzonmk0gqqdv.jpg" class="bbImage " style="" alt="" title="" /></a></div>""",
+        "count"      : 3,
+        "date"       : "dt:2025-10-31 21:26:42",
+        "id"         : "1919100",
+    },
+    "thread"      : {
+        "author"    : "djokica",
+        "author_id" : "",
+        "author_url": "",
+        "date"      : "dt:2024-06-05 00:00:00",
+        "id"        : "109528",
+        "posts"     : range(20, 80),
+        "section"   : "Celebrity",
+        "tags"      : (),
+        "title"     : "Tate Mcrae",
+        "url"       : "https://nudostar.com/forum/threads/tate-mcrae.109528/",
+        "views"     : -1,
+    },
 },

 {
    "#url"     : "https://nudostar.com/forum/threads/name.12345/post-67890",
-    "#category": ("", "nudostarforum", "post"),
-    "#class"   : nudostarforum.NudostarforumPostExtractor,
+    "#category": ("xenforo", "nudostarforum", "post"),
+    "#class"   : xenforo.XenforoPostExtractor,
+},
+
+{
+    "#url"     : "https://nudostar.com/forum/threads/aspen-rae.106714/",
+    "#category": ("xenforo", "nudostarforum", "thread"),
+    "#class"   : xenforo.XenforoThreadExtractor,
+},
+
+{
+    "#url"     : "https://nudostar.com/forum/threads/aspen-rae.106714/page-2",
+    "#category": ("xenforo", "nudostarforum", "thread"),
+    "#class"   : xenforo.XenforoThreadExtractor,
+},
+
+{
+    "#url"     : "https://nudostar.com/forum/forums/celebrity.14/",
+    "#category": ("xenforo", "nudostarforum", "forum"),
+    "#class"   : xenforo.XenforoForumExtractor,
+    "#pattern" : xenforo.XenforoThreadExtractor.pattern,
+    "#range"   : "1-100",
+    "#count"   : 100,
 },

 )
--- a/test/results/simpcity.py
+++ b/test/results/simpcity.py
@@ -4,14 +4,15 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.

-from gallery_dl.extractor import simpcity
+from gallery_dl.extractor import xenforo
 from gallery_dl import exception


 __tests__ = (
 {
    "#url"     : "https://simpcity.cr/threads/ririkana-rr_loveit.10731/post-1753131",
-    "#class"   : simpcity.SimpcityPostExtractor,
+    "#category": ("xenforo", "simpcity", "post"),
+    "#class"   : xenforo.XenforoPostExtractor,
    "#auth"    : True,
    "#results" : "https://jpg6.su/img/coWRwo",

@@ -54,7 +55,8 @@ __tests__ = (

 {
    "#url"     : "https://simpcity.cr/threads/ririkana-rr_loveit.10731/post-1753131",
-    "#class"   : simpcity.SimpcityPostExtractor,
+    "#category": ("xenforo", "simpcity", "post"),
+    "#class"   : xenforo.XenforoPostExtractor,
    "#auth"     : False,
    "#exception": exception.AuthRequired,
 },
@@ -62,7 +64,8 @@ __tests__ = (
 {
    "#url"     : "https://simpcity.cr/threads/puutin_cos.219873/post-26053409",
    "#comment" : "iframe embeds (#8214)",
-    "#class"   : simpcity.SimpcityPostExtractor,
+    "#category": ("xenforo", "simpcity", "post"),
+    "#class"   : xenforo.XenforoPostExtractor,
    "#auth"    : True,
    "#results" : (
        "https://jpg6.su/img/NNFssUg",
@@ -76,7 +79,8 @@ __tests__ = (
 {
    "#url"     : "https://simpcity.cr/threads/shinhashimoto00-shinhashimoto01.184378/post-13389764",
    "#comment" : "quote in post content (#8214)",
-    "#class"   : simpcity.SimpcityPostExtractor,
+    "#category": ("xenforo", "simpcity", "post"),
+    "#class"   : xenforo.XenforoPostExtractor,
    "#auth"    : True,
    "#results" : (
        "/goto/post?id=13358068",
@@ -87,9 +91,11 @@ __tests__ = (
 {
    "#url"     : "https://simpcity.cr/threads/kayle-oralglory.36572/post-12065490",
    "#comment" : "deleted thread author (#8323)",
-    "#class"   : simpcity.SimpcityPostExtractor,
+    "#category": ("xenforo", "simpcity", "post"),
+    "#class"   : xenforo.XenforoPostExtractor,
    "#auth"    : True,
    "#results" : (
+        "https://redgifs.com/ifr/trainedovercookedsquid",
        "https://jpg6.su/img/aKroBJp",
        "https://jpg6.su/img/aKroy2E",
        "https://jpg6.su/img/aKrofqa",
@@ -101,7 +107,7 @@ __tests__ = (
        "author"    : "Hexorium",
        "author_id" : "3715883",
        "author_url": "https://simpcity.cr/members/hexorium.3715883/",
-        "count"     : 5,
+        "count"     : 6,
        "date"      : "dt:2024-12-15 21:37:05",
        "id"        : "12065490",
    },
@@ -119,7 +125,8 @@ __tests__ = (

 {
    "#url"     : "https://simpcity.cr/threads/sophia-diamond.10049/post-10891",
-    "#class"   : simpcity.SimpcityPostExtractor,
+    "#category": ("xenforo", "simpcity", "post"),
+    "#class"   : xenforo.XenforoPostExtractor,
    "#auth"    : True,
    "#results" : (
        "https://brandarmy.com/SophiaDiamond",
@@ -136,8 +143,8 @@ __tests__ = (
    "post"        : {
        "attachments": "",
        "author"     : "inoncognito",
-        "author_id"  : "",
-        "author_url" : "",
+        "author_id"  : "53824",
+        "author_url" : "/members/inoncognito.53824/",
        "count"      : 4,
        "date"       : "dt:2022-03-11 00:41:28",
        "id"         : "10891",
@@ -170,7 +177,8 @@ __tests__ = (

 {
    "#url"     : "https://simpcity.cr/threads/sophia-diamond.10049/post-18744",
-    "#class"   : simpcity.SimpcityPostExtractor,
+    "#category": ("xenforo", "simpcity", "post"),
+    "#class"   : xenforo.XenforoPostExtractor,
    "#auth"    : True,
    "#results" : "https://simpcity.cr/attachments/sophiadiamondcancunbikiniwp-png.36179/",

@@ -190,12 +198,7 @@ __tests__ = (
        "date"       : "dt:2022-03-11 22:39:06",
        "id"         : "18744",
        "attachments": str,
-        "content"    : """\
-<div class="bbWrapper">Collage</div>
-
-
-            </div>\
-""",
+        "content"    : r're:<div class="bbWrapper">Collage</div>\s+</div>',
    },
    "thread"      : {
        "date"      : "dt:2022-03-11 00:41:28",
@@ -208,14 +211,16 @@ __tests__ = (
 {
    "#url"     : "https://simpcity.cr/threads/lustn4lexi-hot4lexi-lexi-2-legit-hott4lexi-lexi.175167/post-2512729",
    "#comment" : "'Click here to load redgifs media' (#8609)",
-    "#class"   : simpcity.SimpcityPostExtractor,
+    "#category": ("xenforo", "simpcity", "post"),
+    "#class"   : xenforo.XenforoPostExtractor,
    "#auth"    : True,
    "#results" : "https://redgifs.com/ifr/unusedsubmissivemullet",
 },

 {
    "#url"     : "https://simpcity.cr/threads/alua-tatakai.89490/",
-    "#class"   : simpcity.SimpcityThreadExtractor,
+    "#category": ("xenforo", "simpcity", "thread"),
+    "#class"   : xenforo.XenforoThreadExtractor,
    "#auth"    : True,
    "#pattern" : r"https://(jpg6\.su/img/\w+|bunkr\.\w+/[fiv]/\w+|pixeldrain.com/l/\w+|alua.com/tatakai)|/goto/post",
    "#count"   : range(100, 300),
@@ -252,13 +257,15 @@ __tests__ = (

 {
    "#url"     : "https://simpcity.su/threads/angel-chan-wlep-wlop-menruinyanko_.12948/",
-    "#class"   : simpcity.SimpcityThreadExtractor,
+    "#category": ("xenforo", "simpcity", "thread"),
+    "#class"   : xenforo.XenforoThreadExtractor,
 },

 {
    "#url"     : "https://simpcity.cr/forums/asians.48/",
-    "#class"   : simpcity.SimpcityForumExtractor,
-    "#pattern" : simpcity.SimpcityThreadExtractor.pattern,
+    "#category": ("xenforo", "simpcity", "forum"),
+    "#class"   : xenforo.XenforoForumExtractor,
+    "#pattern" : xenforo.XenforoThreadExtractor.pattern,
    "#range"   : "1-100",
    "#count"   : 100,
 },