[hitomi] fix negative tag searches (#7694)

This commit is contained in:
Mike Fährmann
2025-06-30 18:24:09 +02:00
parent 95338ff0ec
commit 8764f32ea7
2 changed files with 57 additions and 38 deletions

View File

@@ -21,6 +21,27 @@ class HitomiExtractor(Extractor):
root = "https://hitomi.la"
domain = "gold-usergeneratedcontent.net"
def load_nozomi(self, query, language="all", headers=None):
ns, _, tag = query.strip().partition(":")
if ns == "female" or ns == "male":
ns = "tag/"
tag = query
elif ns == "language":
ns = ""
language = tag
tag = "index"
else:
ns = f"{ns}/"
url = (f"https://ltn.{self.domain}/n/{ns}"
f"/{tag.replace('_', ' ')}-{language}.nozomi")
if headers is None:
headers = {}
headers["Origin"] = self.root
headers["Referer"] = f"{self.root}/"
return decode_nozomi(self.request(url, headers=headers).content)
class HitomiGalleryExtractor(HitomiExtractor, GalleryExtractor):
"""Extractor for hitomi.la galleries"""
@@ -186,58 +207,46 @@ class HitomiIndexExtractor(HitomiTagExtractor):
class HitomiSearchExtractor(HitomiExtractor):
"""Extractor for galleries from multiple tag searches on hitomi.la"""
subcategory = "search"
pattern = r"(?:https?://)?hitomi\.la/search\.html\?([^/?#]+)"
pattern = r"(?:https?://)?hitomi\.la/search\.html\?([^#]+)"
example = "https://hitomi.la/search.html?QUERY"
def __init__(self, match):
Extractor.__init__(self, match)
self.query = match[1]
self.tags = text.unquote(self.query)
def items(self):
tags = text.unquote(self.groups[0])
data = {
"_extractor": HitomiGalleryExtractor,
"search_tags": self.tags,
"search_tags": tags,
}
results = [self.get_nozomi_items(tag) for tag in self.tags.split(" ")]
intersects = set.intersection(*results)
for gallery_id in sorted(intersects, reverse=True):
for gallery_id in self.gallery_ids(tags):
gallery_url = f"{self.root}/galleries/{gallery_id}.html"
yield Message.Queue, gallery_url, data
def get_nozomi_items(self, full_tag):
area, tag, language = self.get_nozomi_args(full_tag)
base = f"https://ltn.{self.domain}/n/"
def gallery_ids(self, tags):
result = None
positive = []
negative = []
if area:
nozomi_url = f"{base}{area}/{tag}-{language}.nozomi"
else:
nozomi_url = f"{base}{tag}-{language}.nozomi"
for tag in tags.split():
if tag[0] == "-":
negative.append(tag[1:])
else:
positive.append(tag)
headers = {
"Origin": self.root,
"Cache-Control": "max-age=0",
"Referer": f"{self.root}/search.html?{self.query}",
}
for tag in positive:
ids = self.load_nozomi(tag)
if result is None:
result = set(ids)
else:
result.intersection_update(ids)
response = self.request(nozomi_url, headers=headers)
return set(decode_nozomi(response.content))
if result is None:
# result = set(self.load_nozomi("index"))
result = set(self.load_nozomi("language:all"))
for tag in negative:
result.difference_update(self.load_nozomi(tag))
def get_nozomi_args(self, query):
ns, _, tag = query.strip().partition(":")
area = ns
language = "all"
if ns == "female" or ns == "male":
area = "tag"
tag = query
elif ns == "language":
area = None
language = tag
tag = "index"
return area, tag.replace("_", " "), language
return sorted(result, reverse=True) if result else ()
@memcache(maxage=1800)

View File

@@ -223,6 +223,16 @@ __tests__ = (
"search_tags": "female:sole_female language:japanese artist:sumiya",
},
{
"#url" : "https://hitomi.la/search.html?language%3Ajapanese%20-tag%3Aanimated%20group%3Aparadiddle#2",
"#comment" : "negative search tag (#7694)",
"#class" : hitomi.HitomiSearchExtractor,
"#pattern" : hitomi.HitomiGalleryExtractor.pattern,
"#count" : 41,
"search_tags": "language:japanese -tag:animated group:paradiddle",
},
{
"#url" : "https://hitomi.la/search.html?group:initial_g",
"#class" : hitomi.HitomiSearchExtractor,