[idolcomplex] update to new domain and interface (#7559 #8009)

This commit is contained in:
Mike Fährmann
2025-08-11 22:24:04 +02:00
parent d7f654c643
commit e491d56dc3
8 changed files with 149 additions and 297 deletions

View File

@@ -446,7 +446,6 @@ Default
* ``"3.0-6.0"``
``bilibili``,
``exhentai``,
``idolcomplex``,
``[reactor]``,
``readcomiconline``
* ``"6.0-6.1"``

View File

@@ -381,8 +381,9 @@
{
"username": "",
"password": "",
"referer" : false,
"sleep-request": "3.0-6.0"
"refresh" : false,
"tags" : false
},
"imagechest":
{
@@ -643,8 +644,8 @@
"username": "",
"password": "",
"refresh" : false,
"tags" : false
"refresh" : false,
"tags" : false
},
"sankakucomplex":
{

View File

@@ -417,7 +417,7 @@ Consider all listed sites to potentially be NSFW.
</tr>
<tr>
<td>Idol Complex</td>
<td>https://idol.sankakucomplex.com/</td>
<td>https://www.idolcomplex.com/</td>
<td>Pools, Posts, Tag Searches</td>
<td>Supported</td>
</tr>

View File

@@ -6,266 +6,39 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://idol.sankakucomplex.com/"""
"""Extractors for https://www.idolcomplex.com/"""
from .sankaku import SankakuExtractor
from .common import Message
from ..cache import cache
from .. import text, util, exception
import collections
import re
from . import sankaku
BASE_PATTERN = r"(?:https?://)?idol\.sankakucomplex\.com(?:/[a-z]{2})?"
BASE_PATTERN = (r"(?:https?://)?(?:www\.)?"
r"idol(?:\.sankaku)?complex\.com(?:/[a-z]{2})?")
class IdolcomplexExtractor(SankakuExtractor):
class IdolcomplexBase():
"""Base class for idolcomplex extractors"""
category = "idolcomplex"
root = "https://idol.sankakucomplex.com"
cookies_domain = "idol.sankakucomplex.com"
cookies_names = ("_idolcomplex_session",)
referer = False
request_interval = (3.0, 6.0)
def __init__(self, match):
SankakuExtractor.__init__(self, match)
self.logged_in = True
self.start_page = 1
self.start_post = 0
root = "https://www.idolcomplex.com"
cookies_domain = ".idolcomplex.com"
def _init(self):
self.find_pids = re.compile(
r" href=[\"#]/\w\w/posts/(\w+)"
).findall
self.find_tags = re.compile(
r'tag-type-([^"]+)">\s*<a [^>]*?href="/[^?]*\?tags=([^"]+)'
).findall
def items(self):
self.login()
data = self.metadata()
for post_id in util.advance(self.post_ids(), self.start_post):
post = self._extract_post(post_id)
url = post["file_url"]
post.update(data)
text.nameext_from_url(url, post)
yield Message.Directory, post
yield Message.Url, url, post
def skip(self, num):
self.start_post += num
return num
def post_ids(self):
"""Return an iterable containing all relevant post ids"""
def login(self):
if self.cookies_check(self.cookies_names):
return
username, password = self._get_auth_info()
if username:
return self.cookies_update(self._login_impl(username, password))
self.logged_in = False
@cache(maxage=90*86400, keyarg=1)
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
url = self.root + "/users/login"
page = self.request(url).text
headers = {
"Referer": url,
}
url = self.root + (text.extr(page, '<form action="', '"') or
"/en/user/authenticate")
data = {
"authenticity_token": text.unescape(text.extr(
page, 'name="authenticity_token" value="', '"')),
"url" : "",
"user[name]" : username,
"user[password]": password,
"commit" : "Login",
}
self.sleep(10, "login")
response = self.request(url, method="POST", headers=headers, data=data)
if not response.history or response.url.endswith(
("/users/login", "/user/home")):
raise exception.AuthenticationError()
return {c.name: c.value for c in response.history[0].cookies}
def _extract_post(self, post_id):
url = self.root + "/posts/" + post_id
page = self.request(url, retries=10).text
extr = text.extract_from(page)
vavg = extr('id="rating"', "</ul>")
vcnt = extr('>Votes</strong>:', "<")
pid = extr(">Post ID:", "<")
created = extr(' title="', '"')
if file_url := extr('>Original:', 'id='):
file_url = extr(' href="', '"')
width = extr(">", "x")
height = extr("", " ")
else:
width = extr('<object width=', ' ')
height = extr('height=', '>')
file_url = extr('<embed src="', '"')
rating = extr(">Rating:", "<br")
data = {
"id" : pid.strip(),
"md5" : file_url.rpartition("/")[2].partition(".")[0],
"vote_average": (1.0 * vavg.count('class="star-full"') +
0.5 * vavg.count('class="star-half"')),
"vote_count" : text.parse_int(vcnt),
"created_at" : created,
"date" : text.parse_datetime(
created, "%Y-%m-%d %H:%M:%S.%f"),
"rating" : text.remove_html(rating).lower(),
"file_url" : "https:" + text.unescape(file_url),
"width" : text.parse_int(width),
"height" : text.parse_int(height),
}
tags = collections.defaultdict(list)
tags_list = []
tags_html = text.extr(page, '<ul id="tag-sidebar"', '</ul>')
for tag_type, tag_name in self.find_tags(tags_html or ""):
tags[tag_type].append(text.unquote(tag_name))
for key, value in tags.items():
data["tags_" + key] = " ".join(value)
tags_list += value
data["tags"] = " ".join(tags_list)
return data
self.api = sankaku.SankakuAPI(self)
self.api.ROOT = "https://i.sankakuapi.com"
self.api.headers["Origin"] = self.root
class IdolcomplexTagExtractor(IdolcomplexExtractor):
"""Extractor for images from idol.sankakucomplex.com by search-tags"""
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = BASE_PATTERN + r"/(?:posts/?)?\?([^#]*)"
example = "https://idol.sankakucomplex.com/en/posts?tags=TAGS"
per_page = 20
def __init__(self, match):
IdolcomplexExtractor.__init__(self, match)
query = text.parse_query(match[1])
self.tags = text.unquote(query.get("tags", "").replace("+", " "))
self.start_page = text.parse_int(query.get("page"), 1)
self.next = text.parse_int(query.get("next"), 0)
def skip(self, num):
if self.next:
self.start_post += num
else:
pages, posts = divmod(num, self.per_page)
self.start_page += pages
self.start_post += posts
return num
def metadata(self):
if not self.next:
max_page = 50 if self.logged_in else 25
if self.start_page > max_page:
self.log.info("Traversing from page %d to page %d",
max_page, self.start_page)
self.start_post += self.per_page * (self.start_page - max_page)
self.start_page = max_page
tags = self.tags.split()
if not self.logged_in and len(tags) > 4:
raise exception.AbortExtraction(
"Non-members can only search up to 4 tags at once")
return {"search_tags": " ".join(tags)}
def post_ids(self):
url = self.root + "/en/posts"
params = {"auto_page": "t"}
if self.next:
params["next"] = self.next
else:
params["page"] = self.start_page
params["tags"] = self.tags
while True:
response = self.request(url, params=params, retries=10)
if response.history and "/posts/premium" in response.url:
self.log.warning("HTTP redirect to %s", response.url)
page = response.text
yield from text.extract_iter(page, '"id":"', '"')
next_page_url = text.extr(page, 'next-page-url="', '"')
if not next_page_url:
return
url, _, next_params = text.unquote(
text.unescape(text.unescape(next_page_url))).partition("?")
next_params = text.parse_query(next_params)
if "next" in next_params:
# stop if the same "next" value occurs twice in a row (#265)
if "next" in params and params["next"] == next_params["next"]:
return
next_params["page"] = "2"
if url[0] == "/":
url = self.root + url
params = next_params
class IdolcomplexTagExtractor(IdolcomplexBase, sankaku.SankakuTagExtractor):
"""Extractor for idolcomplex tag searches"""
pattern = BASE_PATTERN + r"(?:/posts)?/?\?([^#]*)"
example = "https://www.idolcomplex.com/en/posts?tags=TAGS"
class IdolcomplexPoolExtractor(IdolcomplexExtractor):
"""Extractor for image-pools from idol.sankakucomplex.com"""
subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool}")
archive_fmt = "p_{pool}_{id}"
class IdolcomplexPoolExtractor(IdolcomplexBase, sankaku.SankakuPoolExtractor):
"""Extractor for idolcomplex pools"""
pattern = BASE_PATTERN + r"/pools?/(?:show/)?(\w+)"
example = "https://idol.sankakucomplex.com/pools/0123456789abcdef"
per_page = 24
def skip(self, num):
pages, posts = divmod(num, self.per_page)
self.start_page += pages
self.start_post += posts
return num
def metadata(self):
return {"pool": self.groups[0]}
def post_ids(self):
if not self.logged_in:
self.log.warning("Login required")
url = self.root + "/pools/show/" + self.groups[0]
params = {"page": self.start_page}
while True:
page = self.request(url, params=params, retries=10).text
pos = page.find('id="pool-show"') + 1
post_ids = self.find_pids(page, pos)
yield from post_ids
if len(post_ids) < self.per_page:
return
params["page"] += 1
example = "https://www.idolcomplex.com/en/pools/0123456789abcdef"
class IdolcomplexPostExtractor(IdolcomplexExtractor):
"""Extractor for single images from idol.sankakucomplex.com"""
subcategory = "post"
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/posts?/(?:show/)?(\w+)"
example = "https://idol.sankakucomplex.com/posts/0123456789abcdef"
def post_ids(self):
return (self.groups[0],)
class IdolcomplexPostExtractor(IdolcomplexBase, sankaku.SankakuPostExtractor):
"""Extractor for individual idolcomplex posts"""
pattern = BASE_PATTERN + r"/posts?(?:/show)?/(\w+)"
example = "https://www.idolcomplex.com/en/posts/0123456789abcdef"

View File

@@ -152,12 +152,8 @@ class SankakuPoolExtractor(SankakuExtractor):
pattern = BASE_PATTERN + r"/(?:books|pools?/show)/(\w+)"
example = "https://sankaku.app/books/12345"
def __init__(self, match):
SankakuExtractor.__init__(self, match)
self.pool_id = match[1]
def metadata(self):
pool = self.api.pools(self.pool_id)
pool = self.api.pools(self.groups[0])
pool["tags"] = [tag["name"] for tag in pool["tags"]]
pool["artist_tags"] = [tag["name"] for tag in pool["artist_tags"]]
@@ -178,12 +174,8 @@ class SankakuPostExtractor(SankakuExtractor):
pattern = BASE_PATTERN + r"/posts?(?:/show)?/(\w+)"
example = "https://sankaku.app/post/show/12345"
def __init__(self, match):
SankakuExtractor.__init__(self, match)
self.post_id = match[1]
def posts(self):
return self.api.posts(self.post_id)
return self.api.posts(self.groups[0])
class SankakuBooksExtractor(SankakuExtractor):
@@ -207,12 +199,14 @@ class SankakuBooksExtractor(SankakuExtractor):
class SankakuAPI():
"""Interface for the sankaku.app API"""
ROOT = "https://sankakuapi.com"
VERSION = None
def __init__(self, extractor):
self.extractor = extractor
self.headers = {
"Accept" : "application/vnd.sankaku.api+json;v=2",
"Api-Version": None,
"Api-Version": self.VERSION,
"Origin" : extractor.root,
}
@@ -281,7 +275,7 @@ class SankakuAPI():
_authenticate_impl(self.extractor, self.username, self.password)
def _call(self, endpoint, params=None):
url = "https://sankakuapi.com" + endpoint
url = self.ROOT + endpoint
for _ in range(5):
self.authenticate()
response = self.extractor.request(
@@ -357,12 +351,12 @@ class SankakuAPI():
def _authenticate_impl(extr, username, password):
extr.log.info("Logging in as %s", username)
url = "https://sankakuapi.com/auth/token"
headers = {"Accept": "application/vnd.sankaku.api+json;v=2"}
api = extr.api
url = api.ROOT + "/auth/token"
data = {"login": username, "password": password}
response = extr.request(
url, method="POST", headers=headers, json=data, fatal=False)
url, method="POST", headers=api.headers, json=data, fatal=False)
data = response.json()
if response.status_code >= 400 or not data.get("success"):

View File

@@ -5,16 +5,29 @@
# published by the Free Software Foundation.
from gallery_dl.extractor import idolcomplex
from gallery_dl import exception
__tests__ = (
{
"#url" : "https://www.idolcomplex.com/en/posts?tags=lyumos",
"#category": ("booru", "idolcomplex", "tag"),
"#class" : idolcomplex.IdolcomplexTagExtractor,
"#pattern" : r"https://i[sv]\.sankakucomplex\.com/o/[^/]{2}/[^/]{2}/[^/]{32}\.\w+\?e=\d+&m=[^&#]+",
"#range" : "18-22",
"#count" : 5,
},
{
"#url" : "https://idolcomplex.com/posts?tags=lyumos",
"#category": ("booru", "idolcomplex", "tag"),
"#class" : idolcomplex.IdolcomplexTagExtractor,
},
{
"#url" : "https://idol.sankakucomplex.com/en/posts?tags=lyumos",
"#category": ("booru", "idolcomplex", "tag"),
"#class" : idolcomplex.IdolcomplexTagExtractor,
"#pattern" : r"https://i[sv]\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}/[^/]{32}\.\w+\?e=\d+&m=[^&#]+",
"#range" : "18-22",
"#count" : 5,
},
{
@@ -41,11 +54,22 @@ __tests__ = (
"#class" : idolcomplex.IdolcomplexTagExtractor,
},
{
"#url" : "https://www.idolcomplex.com/en/pools/e9PMwnwRBK3",
"#category": ("booru", "idolcomplex", "pool"),
"#class" : idolcomplex.IdolcomplexPoolExtractor,
"#auth" : True,
"#pattern" : (
r"https://is.sankakucomplex.com/o/50/9e/509eccbba54a43cea6b275a65b93c51d\.jpg\?e=\d+&m=.+",
r"https://is.sankakucomplex.com/o/cf/ae/cfae655b594634126bddc10ba7965485\.jpg\?e=\d+&m=.+",
r"https://is.sankakucomplex.com/o/53/b3/53b3d915a79ac72747455f4d0e843fc0\.jpg\?e=\d+&m=.+",
),
},
{
"#url" : "https://idol.sankakucomplex.com/en/pools/e9PMwnwRBK3",
"#category": ("booru", "idolcomplex", "pool"),
"#class" : idolcomplex.IdolcomplexPoolExtractor,
"#count" : 3,
},
{
@@ -60,31 +84,92 @@ __tests__ = (
"#class" : idolcomplex.IdolcomplexPoolExtractor,
},
{
"#url" : "https://www.idolcomplex.com/en/posts/vkr36qdOaZ4",
"#category": ("booru", "idolcomplex", "post"),
"#class" : idolcomplex.IdolcomplexPostExtractor,
"#auth" : True,
"#sha1_content": "694ec2491240787d75bf5d0c75d0082b53a85afd",
"audios" : [],
"author" : {
"avatar" : str,
"avatar_rating": "q",
"display_name" : "kekal",
"id" : "8YEa7e8RmD0",
"level" : 20,
"name" : "kekal",
},
"category" : "idolcomplex",
"change" : 2121180,
"comment_count" : None,
"created_at" : 1511560888,
"date" : "dt:2017-11-24 22:01:28",
"extension" : "jpg",
"fav_count" : range(90, 120),
"file_ext" : "jpg",
"file_size" : 97521,
"file_type" : "image/jpeg",
"file_url" : r"re:https://is.sankakucomplex.com/o/50/9e/509eccbba54a43cea6b275a65b93c51d.jpg\?e=\d+&m=.+",
"filename" : "509eccbba54a43cea6b275a65b93c51d",
"generation_directives": None,
"gif_preview_url" : None,
"has_children" : False,
"has_comments" : False,
"has_notes" : False,
"height" : 683,
"id" : "vkr36qdOaZ4",
"in_visible_pool" : True,
"is_anonymous" : False,
"is_favorited" : False,
"is_note_locked" : False,
"is_premium" : False,
"is_rating_locked": False,
"is_restricted_anonymous_upload": False,
"is_status_locked": False,
"md5" : "509eccbba54a43cea6b275a65b93c51d",
"parent_id" : None,
"preview_height" : 400,
"preview_url" : r"re:https://is.sankakucomplex.com/p/50/9e/509eccbba54a43cea6b275a65b93c51d.avif\?e=\d+&m=.+",
"preview_width" : 600,
"rating" : "s",
"reactions" : [],
"redirect_to_signup": False,
"sample_height" : 683,
"sample_url" : r"re:https://is.sankakucomplex.com/o/50/9e/509eccbba54a43cea6b275a65b93c51d.jpg\?e=\d+&m=.+",
"sample_width" : 1024,
"sequence" : None,
"source" : "removed",
"status" : "active",
"subtitles" : [],
"tag_string" : "lyumos the_witcher shani_(the_witcher) cosplay waistcoat wreath female green_eyes non-asian red_hair 1girl 3:2_aspect_ratio tagme",
"tags" : [
"lyumos",
"the_witcher",
"shani_(the_witcher)",
"cosplay",
"waistcoat",
"wreath",
"female",
"green_eyes",
"non-asian",
"red_hair",
"1girl",
"3:2_aspect_ratio",
"tagme",
],
"total_score" : range(120, 150),
"total_tags" : 13,
"user_vote" : None,
"video_duration" : None,
"vote_count" : range(25, 50),
"width" : 1024,
},
{
"#url" : "https://idol.sankakucomplex.com/en/posts/vkr36qdOaZ4",
"#category": ("booru", "idolcomplex", "post"),
"#class" : idolcomplex.IdolcomplexPostExtractor,
"#sha1_content": "694ec2491240787d75bf5d0c75d0082b53a85afd",
"created_at" : "2017-11-24 17:01:27.696",
"date" : "dt:2017-11-24 17:01:27",
"extension" : "jpg",
"file_url" : r"re:https://i[sv]\.sankakucomplex\.com/data/50/9e/509eccbba54a43cea6b275a65b93c51d\.jpg\?",
"filename" : "509eccbba54a43cea6b275a65b93c51d",
"height" : 683,
"id" : "vkr36qdOaZ4", # legacy ID: 694215
"md5" : "509eccbba54a43cea6b275a65b93c51d",
"rating" : "g",
"tags" : "lyumos the_witcher shani_(the_witcher) 1girl green_eyes non-asian redhead waistcoat wreath cosplay 3:2_aspect_ratio",
"tags_character": "shani_(the_witcher)",
"tags_copyright": "the_witcher",
"tags_general" : "1girl green_eyes non-asian redhead waistcoat wreath",
"tags_genre" : "cosplay",
"tags_idol" : "lyumos",
"tags_medium" : "3:2_aspect_ratio",
"vote_average" : range(4, 5),
"vote_count" : range(25, 40),
"width" : 1024,
},
{
@@ -109,6 +194,7 @@ __tests__ = (
"#url" : "https://idol.sankakucomplex.com/post/show/694215",
"#category": ("booru", "idolcomplex", "post"),
"#class" : idolcomplex.IdolcomplexPostExtractor,
"#exception": exception.AbortExtraction,
"#sha1_content": "694ec2491240787d75bf5d0c75d0082b53a85afd",
"id" : "vkr36qdOaZ4", # legacy ID: 694215

View File

@@ -572,6 +572,7 @@ __tests__ = (
"#url" : "https://sankaku.app/books?tags=aiue_oka",
"#category": ("booru", "sankaku", "books"),
"#class" : sankaku.SankakuBooksExtractor,
"#auth" : True,
"#range" : "1-20",
"#count" : 20,
},

View File

@@ -91,7 +91,7 @@ class TestCookiedict(unittest.TestCase):
self.assertEqual(sorted(cookies.values()), sorted(self.cdict.values()))
def test_domain(self):
for category in ["exhentai", "idolcomplex", "nijie", "horne"]:
for category in ["exhentai", "nijie", "horne"]:
extr = _get_extractor(category)
cookies = extr.cookies
for key in self.cdict:
@@ -108,7 +108,6 @@ class TestCookieLogin(unittest.TestCase):
def test_cookie_login(self):
extr_cookies = {
"exhentai" : ("ipb_member_id", "ipb_pass_hash"),
"idolcomplex": ("login", "pass_hash"),
"nijie" : ("nijie_tok",),
"horne" : ("horne_tok",),
}
@@ -244,7 +243,6 @@ def _get_extractor(category):
URLS = {
"exhentai" : "https://exhentai.org/g/1200119/d55c44d3d0/",
"idolcomplex": "https://idol.sankakucomplex.com/post/show/1",
"nijie" : "https://nijie.info/view.php?id=1",
"horne" : "https://horne.red/view.php?id=1",
"test" : "generic:https://example.org/",