[idolcomplex] update to new domain and interface (#7559 #8009)

2025-08-11 22:24:04 +02:00
parent d7f654c643
commit e491d56dc3
8 changed files with 149 additions and 297 deletions
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -446,7 +446,6 @@ Default
    * ``"3.0-6.0"``
        ``bilibili``,
        ``exhentai``,
-        ``idolcomplex``,
        ``[reactor]``,
        ``readcomiconline``
    * ``"6.0-6.1"``
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -381,8 +381,9 @@
        {
            "username": "",
            "password": "",
-            "referer" : false,
-            "sleep-request": "3.0-6.0"
+
+            "refresh" : false,
+            "tags"    : false
        },
        "imagechest":
        {
@@ -643,8 +644,8 @@
            "username": "",
            "password": "",

-            "refresh"  : false,
-            "tags"     : false
+            "refresh" : false,
+            "tags"    : false
        },
        "sankakucomplex":
        {
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -417,7 +417,7 @@ Consider all listed sites to potentially be NSFW.
 </tr>
 <tr>
    <td>Idol Complex</td>
-    <td>https://idol.sankakucomplex.com/</td>
+    <td>https://www.idolcomplex.com/</td>
    <td>Pools, Posts, Tag Searches</td>
    <td>Supported</td>
 </tr>
--- a/gallery_dl/extractor/idolcomplex.py
+++ b/gallery_dl/extractor/idolcomplex.py
@@ -6,266 +6,39 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.

-"""Extractors for https://idol.sankakucomplex.com/"""
+"""Extractors for https://www.idolcomplex.com/"""

-from .sankaku import SankakuExtractor
-from .common import Message
-from ..cache import cache
-from .. import text, util, exception
-import collections
-import re
+from . import sankaku

-BASE_PATTERN = r"(?:https?://)?idol\.sankakucomplex\.com(?:/[a-z]{2})?"
+BASE_PATTERN = (r"(?:https?://)?(?:www\.)?"
+                r"idol(?:\.sankaku)?complex\.com(?:/[a-z]{2})?")


-class IdolcomplexExtractor(SankakuExtractor):
+class IdolcomplexBase():
    """Base class for idolcomplex extractors"""
    category = "idolcomplex"
-    root = "https://idol.sankakucomplex.com"
-    cookies_domain = "idol.sankakucomplex.com"
-    cookies_names = ("_idolcomplex_session",)
-    referer = False
-    request_interval = (3.0, 6.0)
-
-    def __init__(self, match):
-        SankakuExtractor.__init__(self, match)
-        self.logged_in = True
-        self.start_page = 1
-        self.start_post = 0
+    root = "https://www.idolcomplex.com"
+    cookies_domain = ".idolcomplex.com"

    def _init(self):
-        self.find_pids = re.compile(
-            r" href=[\"#]/\w\w/posts/(\w+)"
-        ).findall
-        self.find_tags = re.compile(
-            r'tag-type-([^"]+)">\s*<a [^>]*?href="/[^?]*\?tags=([^"]+)'
-        ).findall
-
-    def items(self):
-        self.login()
-        data = self.metadata()
-
-        for post_id in util.advance(self.post_ids(), self.start_post):
-            post = self._extract_post(post_id)
-            url = post["file_url"]
-            post.update(data)
-            text.nameext_from_url(url, post)
-            yield Message.Directory, post
-            yield Message.Url, url, post
-
-    def skip(self, num):
-        self.start_post += num
-        return num
-
-    def post_ids(self):
-        """Return an iterable containing all relevant post ids"""
-
-    def login(self):
-        if self.cookies_check(self.cookies_names):
-            return
-
-        username, password = self._get_auth_info()
-        if username:
-            return self.cookies_update(self._login_impl(username, password))
-
-        self.logged_in = False
-
-    @cache(maxage=90*86400, keyarg=1)
-    def _login_impl(self, username, password):
-        self.log.info("Logging in as %s", username)
-
-        url = self.root + "/users/login"
-        page = self.request(url).text
-
-        headers = {
-            "Referer": url,
-        }
-        url = self.root + (text.extr(page, '<form action="', '"') or
-                           "/en/user/authenticate")
-        data = {
-            "authenticity_token": text.unescape(text.extr(
-                page, 'name="authenticity_token" value="', '"')),
-            "url"           : "",
-            "user[name]"    : username,
-            "user[password]": password,
-            "commit"        : "Login",
-        }
-        self.sleep(10, "login")
-        response = self.request(url, method="POST", headers=headers, data=data)
-
-        if not response.history or response.url.endswith(
-                ("/users/login", "/user/home")):
-            raise exception.AuthenticationError()
-        return {c.name: c.value for c in response.history[0].cookies}
-
-    def _extract_post(self, post_id):
-        url = self.root + "/posts/" + post_id
-        page = self.request(url, retries=10).text
-        extr = text.extract_from(page)
-
-        vavg = extr('id="rating"', "</ul>")
-        vcnt = extr('>Votes</strong>:', "<")
-        pid = extr(">Post ID:", "<")
-        created = extr(' title="', '"')
-
-        if file_url := extr('>Original:', 'id='):
-            file_url = extr(' href="', '"')
-            width = extr(">", "x")
-            height = extr("", " ")
-        else:
-            width = extr('<object width=', ' ')
-            height = extr('height=', '>')
-            file_url = extr('<embed src="', '"')
-
-        rating = extr(">Rating:", "<br")
-
-        data = {
-            "id"          : pid.strip(),
-            "md5"         : file_url.rpartition("/")[2].partition(".")[0],
-            "vote_average": (1.0 * vavg.count('class="star-full"') +
-                             0.5 * vavg.count('class="star-half"')),
-            "vote_count"  : text.parse_int(vcnt),
-            "created_at"  : created,
-            "date"        : text.parse_datetime(
-                created, "%Y-%m-%d %H:%M:%S.%f"),
-            "rating"      : text.remove_html(rating).lower(),
-            "file_url"    : "https:" + text.unescape(file_url),
-            "width"       : text.parse_int(width),
-            "height"      : text.parse_int(height),
-        }
-
-        tags = collections.defaultdict(list)
-        tags_list = []
-        tags_html = text.extr(page, '<ul id="tag-sidebar"', '</ul>')
-        for tag_type, tag_name in self.find_tags(tags_html or ""):
-            tags[tag_type].append(text.unquote(tag_name))
-        for key, value in tags.items():
-            data["tags_" + key] = " ".join(value)
-            tags_list += value
-        data["tags"] = " ".join(tags_list)
-
-        return data
+        self.api = sankaku.SankakuAPI(self)
+        self.api.ROOT = "https://i.sankakuapi.com"
+        self.api.headers["Origin"] = self.root


-class IdolcomplexTagExtractor(IdolcomplexExtractor):
-    """Extractor for images from idol.sankakucomplex.com by search-tags"""
-    subcategory = "tag"
-    directory_fmt = ("{category}", "{search_tags}")
-    archive_fmt = "t_{search_tags}_{id}"
-    pattern = BASE_PATTERN + r"/(?:posts/?)?\?([^#]*)"
-    example = "https://idol.sankakucomplex.com/en/posts?tags=TAGS"
-    per_page = 20
-
-    def __init__(self, match):
-        IdolcomplexExtractor.__init__(self, match)
-        query = text.parse_query(match[1])
-        self.tags = text.unquote(query.get("tags", "").replace("+", " "))
-        self.start_page = text.parse_int(query.get("page"), 1)
-        self.next = text.parse_int(query.get("next"), 0)
-
-    def skip(self, num):
-        if self.next:
-            self.start_post += num
-        else:
-            pages, posts = divmod(num, self.per_page)
-            self.start_page += pages
-            self.start_post += posts
-        return num
-
-    def metadata(self):
-        if not self.next:
-            max_page = 50 if self.logged_in else 25
-            if self.start_page > max_page:
-                self.log.info("Traversing from page %d to page %d",
-                              max_page, self.start_page)
-                self.start_post += self.per_page * (self.start_page - max_page)
-                self.start_page = max_page
-
-        tags = self.tags.split()
-        if not self.logged_in and len(tags) > 4:
-            raise exception.AbortExtraction(
-                "Non-members can only search up to 4 tags at once")
-        return {"search_tags": " ".join(tags)}
-
-    def post_ids(self):
-        url = self.root + "/en/posts"
-
-        params = {"auto_page": "t"}
-        if self.next:
-            params["next"] = self.next
-        else:
-            params["page"] = self.start_page
-        params["tags"] = self.tags
-
-        while True:
-            response = self.request(url, params=params, retries=10)
-            if response.history and "/posts/premium" in response.url:
-                self.log.warning("HTTP redirect to %s", response.url)
-            page = response.text
-
-            yield from text.extract_iter(page, '"id":"', '"')
-
-            next_page_url = text.extr(page, 'next-page-url="', '"')
-            if not next_page_url:
-                return
-
-            url, _, next_params = text.unquote(
-                text.unescape(text.unescape(next_page_url))).partition("?")
-            next_params = text.parse_query(next_params)
-
-            if "next" in next_params:
-                # stop if the same "next" value occurs twice in a row (#265)
-                if "next" in params and params["next"] == next_params["next"]:
-                    return
-                next_params["page"] = "2"
-
-            if url[0] == "/":
-                url = self.root + url
-            params = next_params
+class IdolcomplexTagExtractor(IdolcomplexBase, sankaku.SankakuTagExtractor):
+    """Extractor for idolcomplex tag searches"""
+    pattern = BASE_PATTERN + r"(?:/posts)?/?\?([^#]*)"
+    example = "https://www.idolcomplex.com/en/posts?tags=TAGS"


-class IdolcomplexPoolExtractor(IdolcomplexExtractor):
-    """Extractor for image-pools from idol.sankakucomplex.com"""
-    subcategory = "pool"
-    directory_fmt = ("{category}", "pool", "{pool}")
-    archive_fmt = "p_{pool}_{id}"
+class IdolcomplexPoolExtractor(IdolcomplexBase, sankaku.SankakuPoolExtractor):
+    """Extractor for idolcomplex pools"""
    pattern = BASE_PATTERN + r"/pools?/(?:show/)?(\w+)"
-    example = "https://idol.sankakucomplex.com/pools/0123456789abcdef"
-    per_page = 24
-
-    def skip(self, num):
-        pages, posts = divmod(num, self.per_page)
-        self.start_page += pages
-        self.start_post += posts
-        return num
-
-    def metadata(self):
-        return {"pool": self.groups[0]}
-
-    def post_ids(self):
-        if not self.logged_in:
-            self.log.warning("Login required")
-
-        url = self.root + "/pools/show/" + self.groups[0]
-        params = {"page": self.start_page}
-
-        while True:
-            page = self.request(url, params=params, retries=10).text
-            pos = page.find('id="pool-show"') + 1
-            post_ids = self.find_pids(page, pos)
-
-            yield from post_ids
-            if len(post_ids) < self.per_page:
-                return
-            params["page"] += 1
+    example = "https://www.idolcomplex.com/en/pools/0123456789abcdef"


-class IdolcomplexPostExtractor(IdolcomplexExtractor):
-    """Extractor for single images from idol.sankakucomplex.com"""
-    subcategory = "post"
-    archive_fmt = "{id}"
-    pattern = BASE_PATTERN + r"/posts?/(?:show/)?(\w+)"
-    example = "https://idol.sankakucomplex.com/posts/0123456789abcdef"
-
-    def post_ids(self):
-        return (self.groups[0],)
+class IdolcomplexPostExtractor(IdolcomplexBase, sankaku.SankakuPostExtractor):
+    """Extractor for individual idolcomplex posts"""
+    pattern = BASE_PATTERN + r"/posts?(?:/show)?/(\w+)"
+    example = "https://www.idolcomplex.com/en/posts/0123456789abcdef"
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -152,12 +152,8 @@ class SankakuPoolExtractor(SankakuExtractor):
    pattern = BASE_PATTERN + r"/(?:books|pools?/show)/(\w+)"
    example = "https://sankaku.app/books/12345"

-    def __init__(self, match):
-        SankakuExtractor.__init__(self, match)
-        self.pool_id = match[1]
-
    def metadata(self):
-        pool = self.api.pools(self.pool_id)
+        pool = self.api.pools(self.groups[0])
        pool["tags"] = [tag["name"] for tag in pool["tags"]]
        pool["artist_tags"] = [tag["name"] for tag in pool["artist_tags"]]

@@ -178,12 +174,8 @@ class SankakuPostExtractor(SankakuExtractor):
    pattern = BASE_PATTERN + r"/posts?(?:/show)?/(\w+)"
    example = "https://sankaku.app/post/show/12345"

-    def __init__(self, match):
-        SankakuExtractor.__init__(self, match)
-        self.post_id = match[1]
-
    def posts(self):
-        return self.api.posts(self.post_id)
+        return self.api.posts(self.groups[0])


 class SankakuBooksExtractor(SankakuExtractor):
@@ -207,12 +199,14 @@ class SankakuBooksExtractor(SankakuExtractor):

 class SankakuAPI():
    """Interface for the sankaku.app API"""
+    ROOT = "https://sankakuapi.com"
+    VERSION = None

    def __init__(self, extractor):
        self.extractor = extractor
        self.headers = {
            "Accept"     : "application/vnd.sankaku.api+json;v=2",
-            "Api-Version": None,
+            "Api-Version": self.VERSION,
            "Origin"     : extractor.root,
        }

@@ -281,7 +275,7 @@ class SankakuAPI():
            _authenticate_impl(self.extractor, self.username, self.password)

    def _call(self, endpoint, params=None):
-        url = "https://sankakuapi.com" + endpoint
+        url = self.ROOT + endpoint
        for _ in range(5):
            self.authenticate()
            response = self.extractor.request(
@@ -357,12 +351,12 @@ class SankakuAPI():
 def _authenticate_impl(extr, username, password):
    extr.log.info("Logging in as %s", username)

-    url = "https://sankakuapi.com/auth/token"
-    headers = {"Accept": "application/vnd.sankaku.api+json;v=2"}
+    api = extr.api
+    url = api.ROOT + "/auth/token"
    data = {"login": username, "password": password}

    response = extr.request(
-        url, method="POST", headers=headers, json=data, fatal=False)
+        url, method="POST", headers=api.headers, json=data, fatal=False)
    data = response.json()

    if response.status_code >= 400 or not data.get("success"):
--- a/test/results/idolcomplex.py
+++ b/test/results/idolcomplex.py
@@ -5,16 +5,29 @@
 # published by the Free Software Foundation.

 from gallery_dl.extractor import idolcomplex
+from gallery_dl import exception


 __tests__ = (
+{
+    "#url"     : "https://www.idolcomplex.com/en/posts?tags=lyumos",
+    "#category": ("booru", "idolcomplex", "tag"),
+    "#class"   : idolcomplex.IdolcomplexTagExtractor,
+    "#pattern" : r"https://i[sv]\.sankakucomplex\.com/o/[^/]{2}/[^/]{2}/[^/]{32}\.\w+\?e=\d+&m=[^&#]+",
+    "#range"   : "18-22",
+    "#count"   : 5,
+},
+
+{
+    "#url"     : "https://idolcomplex.com/posts?tags=lyumos",
+    "#category": ("booru", "idolcomplex", "tag"),
+    "#class"   : idolcomplex.IdolcomplexTagExtractor,
+},
+
 {
    "#url"     : "https://idol.sankakucomplex.com/en/posts?tags=lyumos",
    "#category": ("booru", "idolcomplex", "tag"),
    "#class"   : idolcomplex.IdolcomplexTagExtractor,
-    "#pattern" : r"https://i[sv]\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}/[^/]{32}\.\w+\?e=\d+&m=[^&#]+",
-    "#range"   : "18-22",
-    "#count"   : 5,
 },

 {
@@ -41,11 +54,22 @@ __tests__ = (
    "#class"   : idolcomplex.IdolcomplexTagExtractor,
 },

+{
+    "#url"     : "https://www.idolcomplex.com/en/pools/e9PMwnwRBK3",
+    "#category": ("booru", "idolcomplex", "pool"),
+    "#class"   : idolcomplex.IdolcomplexPoolExtractor,
+    "#auth"    : True,
+    "#pattern" : (
+        r"https://is.sankakucomplex.com/o/50/9e/509eccbba54a43cea6b275a65b93c51d\.jpg\?e=\d+&m=.+",
+        r"https://is.sankakucomplex.com/o/cf/ae/cfae655b594634126bddc10ba7965485\.jpg\?e=\d+&m=.+",
+        r"https://is.sankakucomplex.com/o/53/b3/53b3d915a79ac72747455f4d0e843fc0\.jpg\?e=\d+&m=.+",
+    ),
+},
+
 {
    "#url"     : "https://idol.sankakucomplex.com/en/pools/e9PMwnwRBK3",
    "#category": ("booru", "idolcomplex", "pool"),
    "#class"   : idolcomplex.IdolcomplexPoolExtractor,
-    "#count"   : 3,
 },

 {
@@ -60,31 +84,92 @@ __tests__ = (
    "#class"   : idolcomplex.IdolcomplexPoolExtractor,
 },

+{
+    "#url"     : "https://www.idolcomplex.com/en/posts/vkr36qdOaZ4",
+    "#category": ("booru", "idolcomplex", "post"),
+    "#class"   : idolcomplex.IdolcomplexPostExtractor,
+    "#auth"    : True,
+    "#sha1_content": "694ec2491240787d75bf5d0c75d0082b53a85afd",
+
+    "audios"          : [],
+    "author"          : {
+        "avatar"       : str,
+        "avatar_rating": "q",
+        "display_name" : "kekal",
+        "id"           : "8YEa7e8RmD0",
+        "level"        : 20,
+        "name"         : "kekal",
+    },
+    "category"        : "idolcomplex",
+    "change"          : 2121180,
+    "comment_count"   : None,
+    "created_at"      : 1511560888,
+    "date"            : "dt:2017-11-24 22:01:28",
+    "extension"       : "jpg",
+    "fav_count"       : range(90, 120),
+    "file_ext"        : "jpg",
+    "file_size"       : 97521,
+    "file_type"       : "image/jpeg",
+    "file_url"        : r"re:https://is.sankakucomplex.com/o/50/9e/509eccbba54a43cea6b275a65b93c51d.jpg\?e=\d+&m=.+",
+    "filename"        : "509eccbba54a43cea6b275a65b93c51d",
+    "generation_directives": None,
+    "gif_preview_url" : None,
+    "has_children"    : False,
+    "has_comments"    : False,
+    "has_notes"       : False,
+    "height"          : 683,
+    "id"              : "vkr36qdOaZ4",
+    "in_visible_pool" : True,
+    "is_anonymous"    : False,
+    "is_favorited"    : False,
+    "is_note_locked"  : False,
+    "is_premium"      : False,
+    "is_rating_locked": False,
+    "is_restricted_anonymous_upload": False,
+    "is_status_locked": False,
+    "md5"             : "509eccbba54a43cea6b275a65b93c51d",
+    "parent_id"       : None,
+    "preview_height"  : 400,
+    "preview_url"     : r"re:https://is.sankakucomplex.com/p/50/9e/509eccbba54a43cea6b275a65b93c51d.avif\?e=\d+&m=.+",
+    "preview_width"   : 600,
+    "rating"          : "s",
+    "reactions"       : [],
+    "redirect_to_signup": False,
+    "sample_height"   : 683,
+    "sample_url"      : r"re:https://is.sankakucomplex.com/o/50/9e/509eccbba54a43cea6b275a65b93c51d.jpg\?e=\d+&m=.+",
+    "sample_width"    : 1024,
+    "sequence"        : None,
+    "source"          : "removed",
+    "status"          : "active",
+    "subtitles"       : [],
+    "tag_string"      : "lyumos the_witcher shani_(the_witcher) cosplay waistcoat wreath female green_eyes non-asian red_hair 1girl 3:2_aspect_ratio tagme",
+    "tags"            : [
+        "lyumos",
+        "the_witcher",
+        "shani_(the_witcher)",
+        "cosplay",
+        "waistcoat",
+        "wreath",
+        "female",
+        "green_eyes",
+        "non-asian",
+        "red_hair",
+        "1girl",
+        "3:2_aspect_ratio",
+        "tagme",
+    ],
+    "total_score"     : range(120, 150),
+    "total_tags"      : 13,
+    "user_vote"       : None,
+    "video_duration"  : None,
+    "vote_count"      : range(25, 50),
+    "width"           : 1024,
+},
+
 {
    "#url"     : "https://idol.sankakucomplex.com/en/posts/vkr36qdOaZ4",
    "#category": ("booru", "idolcomplex", "post"),
    "#class"   : idolcomplex.IdolcomplexPostExtractor,
-    "#sha1_content": "694ec2491240787d75bf5d0c75d0082b53a85afd",
-
-    "created_at"    : "2017-11-24 17:01:27.696",
-    "date"          : "dt:2017-11-24 17:01:27",
-    "extension"     : "jpg",
-    "file_url"      : r"re:https://i[sv]\.sankakucomplex\.com/data/50/9e/509eccbba54a43cea6b275a65b93c51d\.jpg\?",
-    "filename"      : "509eccbba54a43cea6b275a65b93c51d",
-    "height"        : 683,
-    "id"            : "vkr36qdOaZ4",  # legacy ID: 694215
-    "md5"           : "509eccbba54a43cea6b275a65b93c51d",
-    "rating"        : "g",
-    "tags"          : "lyumos the_witcher shani_(the_witcher) 1girl green_eyes non-asian redhead waistcoat wreath cosplay 3:2_aspect_ratio",
-    "tags_character": "shani_(the_witcher)",
-    "tags_copyright": "the_witcher",
-    "tags_general"  : "1girl green_eyes non-asian redhead waistcoat wreath",
-    "tags_genre"    : "cosplay",
-    "tags_idol"     : "lyumos",
-    "tags_medium"   : "3:2_aspect_ratio",
-    "vote_average"  : range(4, 5),
-    "vote_count"    : range(25, 40),
-    "width"         : 1024,
 },

 {
@@ -109,6 +194,7 @@ __tests__ = (
    "#url"     : "https://idol.sankakucomplex.com/post/show/694215",
    "#category": ("booru", "idolcomplex", "post"),
    "#class"   : idolcomplex.IdolcomplexPostExtractor,
+    "#exception": exception.AbortExtraction,
    "#sha1_content": "694ec2491240787d75bf5d0c75d0082b53a85afd",

    "id"            : "vkr36qdOaZ4",  # legacy ID: 694215
--- a/test/results/sankaku.py
+++ b/test/results/sankaku.py
@@ -572,6 +572,7 @@ __tests__ = (
    "#url"     : "https://sankaku.app/books?tags=aiue_oka",
    "#category": ("booru", "sankaku", "books"),
    "#class"   : sankaku.SankakuBooksExtractor,
+    "#auth"    : True,
    "#range"   : "1-20",
    "#count"   : 20,
 },
--- a/test/test_cookies.py
+++ b/test/test_cookies.py
@@ -91,7 +91,7 @@ class TestCookiedict(unittest.TestCase):
        self.assertEqual(sorted(cookies.values()), sorted(self.cdict.values()))

    def test_domain(self):
-        for category in ["exhentai", "idolcomplex", "nijie", "horne"]:
+        for category in ["exhentai", "nijie", "horne"]:
            extr = _get_extractor(category)
            cookies = extr.cookies
            for key in self.cdict:
@@ -108,7 +108,6 @@ class TestCookieLogin(unittest.TestCase):
    def test_cookie_login(self):
        extr_cookies = {
            "exhentai"   : ("ipb_member_id", "ipb_pass_hash"),
-            "idolcomplex": ("login", "pass_hash"),
            "nijie"      : ("nijie_tok",),
            "horne"      : ("horne_tok",),
        }
@@ -244,7 +243,6 @@ def _get_extractor(category):

 URLS = {
    "exhentai"   : "https://exhentai.org/g/1200119/d55c44d3d0/",
-    "idolcomplex": "https://idol.sankakucomplex.com/post/show/1",
    "nijie"      : "https://nijie.info/view.php?id=1",
    "horne"      : "https://horne.red/view.php?id=1",
    "test"       : "generic:https://example.org/",