From e491d56dc353ebeb40ae2ec0716285ff3dfb0272 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 11 Aug 2025 22:24:04 +0200 Subject: [PATCH] [idolcomplex] update to new domain and interface (#7559 #8009) --- docs/configuration.rst | 1 - docs/gallery-dl.conf | 9 +- docs/supportedsites.md | 2 +- gallery_dl/extractor/idolcomplex.py | 269 +++------------------------- gallery_dl/extractor/sankaku.py | 24 +-- test/results/idolcomplex.py | 136 +++++++++++--- test/results/sankaku.py | 1 + test/test_cookies.py | 4 +- 8 files changed, 149 insertions(+), 297 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index 985696e9..1f3d3428 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -446,7 +446,6 @@ Default * ``"3.0-6.0"`` ``bilibili``, ``exhentai``, - ``idolcomplex``, ``[reactor]``, ``readcomiconline`` * ``"6.0-6.1"`` diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 715ec7ea..687155e4 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -381,8 +381,9 @@ { "username": "", "password": "", - "referer" : false, - "sleep-request": "3.0-6.0" + + "refresh" : false, + "tags" : false }, "imagechest": { @@ -643,8 +644,8 @@ "username": "", "password": "", - "refresh" : false, - "tags" : false + "refresh" : false, + "tags" : false }, "sankakucomplex": { diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e4cde17d..3c4aa68e 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -417,7 +417,7 @@ Consider all listed sites to potentially be NSFW. Idol Complex - https://idol.sankakucomplex.com/ + https://www.idolcomplex.com/ Pools, Posts, Tag Searches Supported diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py index 075e1f66..26fd5958 100644 --- a/gallery_dl/extractor/idolcomplex.py +++ b/gallery_dl/extractor/idolcomplex.py @@ -6,266 +6,39 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://idol.sankakucomplex.com/""" +"""Extractors for https://www.idolcomplex.com/""" -from .sankaku import SankakuExtractor -from .common import Message -from ..cache import cache -from .. import text, util, exception -import collections -import re +from . import sankaku -BASE_PATTERN = r"(?:https?://)?idol\.sankakucomplex\.com(?:/[a-z]{2})?" +BASE_PATTERN = (r"(?:https?://)?(?:www\.)?" + r"idol(?:\.sankaku)?complex\.com(?:/[a-z]{2})?") -class IdolcomplexExtractor(SankakuExtractor): +class IdolcomplexBase(): """Base class for idolcomplex extractors""" category = "idolcomplex" - root = "https://idol.sankakucomplex.com" - cookies_domain = "idol.sankakucomplex.com" - cookies_names = ("_idolcomplex_session",) - referer = False - request_interval = (3.0, 6.0) - - def __init__(self, match): - SankakuExtractor.__init__(self, match) - self.logged_in = True - self.start_page = 1 - self.start_post = 0 + root = "https://www.idolcomplex.com" + cookies_domain = ".idolcomplex.com" def _init(self): - self.find_pids = re.compile( - r" href=[\"#]/\w\w/posts/(\w+)" - ).findall - self.find_tags = re.compile( - r'tag-type-([^"]+)">\s*]*?href="/[^?]*\?tags=([^"]+)' - ).findall - - def items(self): - self.login() - data = self.metadata() - - for post_id in util.advance(self.post_ids(), self.start_post): - post = self._extract_post(post_id) - url = post["file_url"] - post.update(data) - text.nameext_from_url(url, post) - yield Message.Directory, post - yield Message.Url, url, post - - def skip(self, num): - self.start_post += num - return num - - def post_ids(self): - """Return an iterable containing all relevant post ids""" - - def login(self): - if self.cookies_check(self.cookies_names): - return - - username, password = self._get_auth_info() - if username: - return self.cookies_update(self._login_impl(username, password)) - - self.logged_in = False - - @cache(maxage=90*86400, keyarg=1) - def _login_impl(self, username, password): - self.log.info("Logging in as %s", username) - - url = self.root + "/users/login" - page = self.request(url).text - - headers = { - "Referer": url, - } - url = self.root + (text.extr(page, '
") - vcnt = extr('>Votes:', "<") - pid = extr(">Post ID:", "<") - created = extr(' title="', '"') - - if file_url := extr('>Original:', 'id='): - file_url = extr(' href="', '"') - width = extr(">", "x") - height = extr("", " ") - else: - width = extr('') - file_url = extr('Rating:", "') - for tag_type, tag_name in self.find_tags(tags_html or ""): - tags[tag_type].append(text.unquote(tag_name)) - for key, value in tags.items(): - data["tags_" + key] = " ".join(value) - tags_list += value - data["tags"] = " ".join(tags_list) - - return data + self.api = sankaku.SankakuAPI(self) + self.api.ROOT = "https://i.sankakuapi.com" + self.api.headers["Origin"] = self.root -class IdolcomplexTagExtractor(IdolcomplexExtractor): - """Extractor for images from idol.sankakucomplex.com by search-tags""" - subcategory = "tag" - directory_fmt = ("{category}", "{search_tags}") - archive_fmt = "t_{search_tags}_{id}" - pattern = BASE_PATTERN + r"/(?:posts/?)?\?([^#]*)" - example = "https://idol.sankakucomplex.com/en/posts?tags=TAGS" - per_page = 20 - - def __init__(self, match): - IdolcomplexExtractor.__init__(self, match) - query = text.parse_query(match[1]) - self.tags = text.unquote(query.get("tags", "").replace("+", " ")) - self.start_page = text.parse_int(query.get("page"), 1) - self.next = text.parse_int(query.get("next"), 0) - - def skip(self, num): - if self.next: - self.start_post += num - else: - pages, posts = divmod(num, self.per_page) - self.start_page += pages - self.start_post += posts - return num - - def metadata(self): - if not self.next: - max_page = 50 if self.logged_in else 25 - if self.start_page > max_page: - self.log.info("Traversing from page %d to page %d", - max_page, self.start_page) - self.start_post += self.per_page * (self.start_page - max_page) - self.start_page = max_page - - tags = self.tags.split() - if not self.logged_in and len(tags) > 4: - raise exception.AbortExtraction( - "Non-members can only search up to 4 tags at once") - return {"search_tags": " ".join(tags)} - - def post_ids(self): - url = self.root + "/en/posts" - - params = {"auto_page": "t"} - if self.next: - params["next"] = self.next - else: - params["page"] = self.start_page - params["tags"] = self.tags - - while True: - response = self.request(url, params=params, retries=10) - if response.history and "/posts/premium" in response.url: - self.log.warning("HTTP redirect to %s", response.url) - page = response.text - - yield from text.extract_iter(page, '"id":"', '"') - - next_page_url = text.extr(page, 'next-page-url="', '"') - if not next_page_url: - return - - url, _, next_params = text.unquote( - text.unescape(text.unescape(next_page_url))).partition("?") - next_params = text.parse_query(next_params) - - if "next" in next_params: - # stop if the same "next" value occurs twice in a row (#265) - if "next" in params and params["next"] == next_params["next"]: - return - next_params["page"] = "2" - - if url[0] == "/": - url = self.root + url - params = next_params +class IdolcomplexTagExtractor(IdolcomplexBase, sankaku.SankakuTagExtractor): + """Extractor for idolcomplex tag searches""" + pattern = BASE_PATTERN + r"(?:/posts)?/?\?([^#]*)" + example = "https://www.idolcomplex.com/en/posts?tags=TAGS" -class IdolcomplexPoolExtractor(IdolcomplexExtractor): - """Extractor for image-pools from idol.sankakucomplex.com""" - subcategory = "pool" - directory_fmt = ("{category}", "pool", "{pool}") - archive_fmt = "p_{pool}_{id}" +class IdolcomplexPoolExtractor(IdolcomplexBase, sankaku.SankakuPoolExtractor): + """Extractor for idolcomplex pools""" pattern = BASE_PATTERN + r"/pools?/(?:show/)?(\w+)" - example = "https://idol.sankakucomplex.com/pools/0123456789abcdef" - per_page = 24 - - def skip(self, num): - pages, posts = divmod(num, self.per_page) - self.start_page += pages - self.start_post += posts - return num - - def metadata(self): - return {"pool": self.groups[0]} - - def post_ids(self): - if not self.logged_in: - self.log.warning("Login required") - - url = self.root + "/pools/show/" + self.groups[0] - params = {"page": self.start_page} - - while True: - page = self.request(url, params=params, retries=10).text - pos = page.find('id="pool-show"') + 1 - post_ids = self.find_pids(page, pos) - - yield from post_ids - if len(post_ids) < self.per_page: - return - params["page"] += 1 + example = "https://www.idolcomplex.com/en/pools/0123456789abcdef" -class IdolcomplexPostExtractor(IdolcomplexExtractor): - """Extractor for single images from idol.sankakucomplex.com""" - subcategory = "post" - archive_fmt = "{id}" - pattern = BASE_PATTERN + r"/posts?/(?:show/)?(\w+)" - example = "https://idol.sankakucomplex.com/posts/0123456789abcdef" - - def post_ids(self): - return (self.groups[0],) +class IdolcomplexPostExtractor(IdolcomplexBase, sankaku.SankakuPostExtractor): + """Extractor for individual idolcomplex posts""" + pattern = BASE_PATTERN + r"/posts?(?:/show)?/(\w+)" + example = "https://www.idolcomplex.com/en/posts/0123456789abcdef" diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index 1c93cbfb..8317fe6d 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -152,12 +152,8 @@ class SankakuPoolExtractor(SankakuExtractor): pattern = BASE_PATTERN + r"/(?:books|pools?/show)/(\w+)" example = "https://sankaku.app/books/12345" - def __init__(self, match): - SankakuExtractor.__init__(self, match) - self.pool_id = match[1] - def metadata(self): - pool = self.api.pools(self.pool_id) + pool = self.api.pools(self.groups[0]) pool["tags"] = [tag["name"] for tag in pool["tags"]] pool["artist_tags"] = [tag["name"] for tag in pool["artist_tags"]] @@ -178,12 +174,8 @@ class SankakuPostExtractor(SankakuExtractor): pattern = BASE_PATTERN + r"/posts?(?:/show)?/(\w+)" example = "https://sankaku.app/post/show/12345" - def __init__(self, match): - SankakuExtractor.__init__(self, match) - self.post_id = match[1] - def posts(self): - return self.api.posts(self.post_id) + return self.api.posts(self.groups[0]) class SankakuBooksExtractor(SankakuExtractor): @@ -207,12 +199,14 @@ class SankakuBooksExtractor(SankakuExtractor): class SankakuAPI(): """Interface for the sankaku.app API""" + ROOT = "https://sankakuapi.com" + VERSION = None def __init__(self, extractor): self.extractor = extractor self.headers = { "Accept" : "application/vnd.sankaku.api+json;v=2", - "Api-Version": None, + "Api-Version": self.VERSION, "Origin" : extractor.root, } @@ -281,7 +275,7 @@ class SankakuAPI(): _authenticate_impl(self.extractor, self.username, self.password) def _call(self, endpoint, params=None): - url = "https://sankakuapi.com" + endpoint + url = self.ROOT + endpoint for _ in range(5): self.authenticate() response = self.extractor.request( @@ -357,12 +351,12 @@ class SankakuAPI(): def _authenticate_impl(extr, username, password): extr.log.info("Logging in as %s", username) - url = "https://sankakuapi.com/auth/token" - headers = {"Accept": "application/vnd.sankaku.api+json;v=2"} + api = extr.api + url = api.ROOT + "/auth/token" data = {"login": username, "password": password} response = extr.request( - url, method="POST", headers=headers, json=data, fatal=False) + url, method="POST", headers=api.headers, json=data, fatal=False) data = response.json() if response.status_code >= 400 or not data.get("success"): diff --git a/test/results/idolcomplex.py b/test/results/idolcomplex.py index dfcff199..703d4c4e 100644 --- a/test/results/idolcomplex.py +++ b/test/results/idolcomplex.py @@ -5,16 +5,29 @@ # published by the Free Software Foundation. from gallery_dl.extractor import idolcomplex +from gallery_dl import exception __tests__ = ( +{ + "#url" : "https://www.idolcomplex.com/en/posts?tags=lyumos", + "#category": ("booru", "idolcomplex", "tag"), + "#class" : idolcomplex.IdolcomplexTagExtractor, + "#pattern" : r"https://i[sv]\.sankakucomplex\.com/o/[^/]{2}/[^/]{2}/[^/]{32}\.\w+\?e=\d+&m=[^&#]+", + "#range" : "18-22", + "#count" : 5, +}, + +{ + "#url" : "https://idolcomplex.com/posts?tags=lyumos", + "#category": ("booru", "idolcomplex", "tag"), + "#class" : idolcomplex.IdolcomplexTagExtractor, +}, + { "#url" : "https://idol.sankakucomplex.com/en/posts?tags=lyumos", "#category": ("booru", "idolcomplex", "tag"), "#class" : idolcomplex.IdolcomplexTagExtractor, - "#pattern" : r"https://i[sv]\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}/[^/]{32}\.\w+\?e=\d+&m=[^&#]+", - "#range" : "18-22", - "#count" : 5, }, { @@ -41,11 +54,22 @@ __tests__ = ( "#class" : idolcomplex.IdolcomplexTagExtractor, }, +{ + "#url" : "https://www.idolcomplex.com/en/pools/e9PMwnwRBK3", + "#category": ("booru", "idolcomplex", "pool"), + "#class" : idolcomplex.IdolcomplexPoolExtractor, + "#auth" : True, + "#pattern" : ( + r"https://is.sankakucomplex.com/o/50/9e/509eccbba54a43cea6b275a65b93c51d\.jpg\?e=\d+&m=.+", + r"https://is.sankakucomplex.com/o/cf/ae/cfae655b594634126bddc10ba7965485\.jpg\?e=\d+&m=.+", + r"https://is.sankakucomplex.com/o/53/b3/53b3d915a79ac72747455f4d0e843fc0\.jpg\?e=\d+&m=.+", + ), +}, + { "#url" : "https://idol.sankakucomplex.com/en/pools/e9PMwnwRBK3", "#category": ("booru", "idolcomplex", "pool"), "#class" : idolcomplex.IdolcomplexPoolExtractor, - "#count" : 3, }, { @@ -60,31 +84,92 @@ __tests__ = ( "#class" : idolcomplex.IdolcomplexPoolExtractor, }, +{ + "#url" : "https://www.idolcomplex.com/en/posts/vkr36qdOaZ4", + "#category": ("booru", "idolcomplex", "post"), + "#class" : idolcomplex.IdolcomplexPostExtractor, + "#auth" : True, + "#sha1_content": "694ec2491240787d75bf5d0c75d0082b53a85afd", + + "audios" : [], + "author" : { + "avatar" : str, + "avatar_rating": "q", + "display_name" : "kekal", + "id" : "8YEa7e8RmD0", + "level" : 20, + "name" : "kekal", + }, + "category" : "idolcomplex", + "change" : 2121180, + "comment_count" : None, + "created_at" : 1511560888, + "date" : "dt:2017-11-24 22:01:28", + "extension" : "jpg", + "fav_count" : range(90, 120), + "file_ext" : "jpg", + "file_size" : 97521, + "file_type" : "image/jpeg", + "file_url" : r"re:https://is.sankakucomplex.com/o/50/9e/509eccbba54a43cea6b275a65b93c51d.jpg\?e=\d+&m=.+", + "filename" : "509eccbba54a43cea6b275a65b93c51d", + "generation_directives": None, + "gif_preview_url" : None, + "has_children" : False, + "has_comments" : False, + "has_notes" : False, + "height" : 683, + "id" : "vkr36qdOaZ4", + "in_visible_pool" : True, + "is_anonymous" : False, + "is_favorited" : False, + "is_note_locked" : False, + "is_premium" : False, + "is_rating_locked": False, + "is_restricted_anonymous_upload": False, + "is_status_locked": False, + "md5" : "509eccbba54a43cea6b275a65b93c51d", + "parent_id" : None, + "preview_height" : 400, + "preview_url" : r"re:https://is.sankakucomplex.com/p/50/9e/509eccbba54a43cea6b275a65b93c51d.avif\?e=\d+&m=.+", + "preview_width" : 600, + "rating" : "s", + "reactions" : [], + "redirect_to_signup": False, + "sample_height" : 683, + "sample_url" : r"re:https://is.sankakucomplex.com/o/50/9e/509eccbba54a43cea6b275a65b93c51d.jpg\?e=\d+&m=.+", + "sample_width" : 1024, + "sequence" : None, + "source" : "removed", + "status" : "active", + "subtitles" : [], + "tag_string" : "lyumos the_witcher shani_(the_witcher) cosplay waistcoat wreath female green_eyes non-asian red_hair 1girl 3:2_aspect_ratio tagme", + "tags" : [ + "lyumos", + "the_witcher", + "shani_(the_witcher)", + "cosplay", + "waistcoat", + "wreath", + "female", + "green_eyes", + "non-asian", + "red_hair", + "1girl", + "3:2_aspect_ratio", + "tagme", + ], + "total_score" : range(120, 150), + "total_tags" : 13, + "user_vote" : None, + "video_duration" : None, + "vote_count" : range(25, 50), + "width" : 1024, +}, + { "#url" : "https://idol.sankakucomplex.com/en/posts/vkr36qdOaZ4", "#category": ("booru", "idolcomplex", "post"), "#class" : idolcomplex.IdolcomplexPostExtractor, - "#sha1_content": "694ec2491240787d75bf5d0c75d0082b53a85afd", - - "created_at" : "2017-11-24 17:01:27.696", - "date" : "dt:2017-11-24 17:01:27", - "extension" : "jpg", - "file_url" : r"re:https://i[sv]\.sankakucomplex\.com/data/50/9e/509eccbba54a43cea6b275a65b93c51d\.jpg\?", - "filename" : "509eccbba54a43cea6b275a65b93c51d", - "height" : 683, - "id" : "vkr36qdOaZ4", # legacy ID: 694215 - "md5" : "509eccbba54a43cea6b275a65b93c51d", - "rating" : "g", - "tags" : "lyumos the_witcher shani_(the_witcher) 1girl green_eyes non-asian redhead waistcoat wreath cosplay 3:2_aspect_ratio", - "tags_character": "shani_(the_witcher)", - "tags_copyright": "the_witcher", - "tags_general" : "1girl green_eyes non-asian redhead waistcoat wreath", - "tags_genre" : "cosplay", - "tags_idol" : "lyumos", - "tags_medium" : "3:2_aspect_ratio", - "vote_average" : range(4, 5), - "vote_count" : range(25, 40), - "width" : 1024, }, { @@ -109,6 +194,7 @@ __tests__ = ( "#url" : "https://idol.sankakucomplex.com/post/show/694215", "#category": ("booru", "idolcomplex", "post"), "#class" : idolcomplex.IdolcomplexPostExtractor, + "#exception": exception.AbortExtraction, "#sha1_content": "694ec2491240787d75bf5d0c75d0082b53a85afd", "id" : "vkr36qdOaZ4", # legacy ID: 694215 diff --git a/test/results/sankaku.py b/test/results/sankaku.py index 1c2f3949..ce76af64 100644 --- a/test/results/sankaku.py +++ b/test/results/sankaku.py @@ -572,6 +572,7 @@ __tests__ = ( "#url" : "https://sankaku.app/books?tags=aiue_oka", "#category": ("booru", "sankaku", "books"), "#class" : sankaku.SankakuBooksExtractor, + "#auth" : True, "#range" : "1-20", "#count" : 20, }, diff --git a/test/test_cookies.py b/test/test_cookies.py index 5900473d..89a0a3cc 100644 --- a/test/test_cookies.py +++ b/test/test_cookies.py @@ -91,7 +91,7 @@ class TestCookiedict(unittest.TestCase): self.assertEqual(sorted(cookies.values()), sorted(self.cdict.values())) def test_domain(self): - for category in ["exhentai", "idolcomplex", "nijie", "horne"]: + for category in ["exhentai", "nijie", "horne"]: extr = _get_extractor(category) cookies = extr.cookies for key in self.cdict: @@ -108,7 +108,6 @@ class TestCookieLogin(unittest.TestCase): def test_cookie_login(self): extr_cookies = { "exhentai" : ("ipb_member_id", "ipb_pass_hash"), - "idolcomplex": ("login", "pass_hash"), "nijie" : ("nijie_tok",), "horne" : ("horne_tok",), } @@ -244,7 +243,6 @@ def _get_extractor(category): URLS = { "exhentai" : "https://exhentai.org/g/1200119/d55c44d3d0/", - "idolcomplex": "https://idol.sankakucomplex.com/post/show/1", "nijie" : "https://nijie.info/view.php?id=1", "horne" : "https://horne.red/view.php?id=1", "test" : "generic:https://example.org/",