[hitomi] fix negative tag searches (#7694)
This commit is contained in:
@@ -21,6 +21,27 @@ class HitomiExtractor(Extractor):
|
|||||||
root = "https://hitomi.la"
|
root = "https://hitomi.la"
|
||||||
domain = "gold-usergeneratedcontent.net"
|
domain = "gold-usergeneratedcontent.net"
|
||||||
|
|
||||||
|
def load_nozomi(self, query, language="all", headers=None):
|
||||||
|
ns, _, tag = query.strip().partition(":")
|
||||||
|
|
||||||
|
if ns == "female" or ns == "male":
|
||||||
|
ns = "tag/"
|
||||||
|
tag = query
|
||||||
|
elif ns == "language":
|
||||||
|
ns = ""
|
||||||
|
language = tag
|
||||||
|
tag = "index"
|
||||||
|
else:
|
||||||
|
ns = f"{ns}/"
|
||||||
|
|
||||||
|
url = (f"https://ltn.{self.domain}/n/{ns}"
|
||||||
|
f"/{tag.replace('_', ' ')}-{language}.nozomi")
|
||||||
|
if headers is None:
|
||||||
|
headers = {}
|
||||||
|
headers["Origin"] = self.root
|
||||||
|
headers["Referer"] = f"{self.root}/"
|
||||||
|
return decode_nozomi(self.request(url, headers=headers).content)
|
||||||
|
|
||||||
|
|
||||||
class HitomiGalleryExtractor(HitomiExtractor, GalleryExtractor):
|
class HitomiGalleryExtractor(HitomiExtractor, GalleryExtractor):
|
||||||
"""Extractor for hitomi.la galleries"""
|
"""Extractor for hitomi.la galleries"""
|
||||||
@@ -186,58 +207,46 @@ class HitomiIndexExtractor(HitomiTagExtractor):
|
|||||||
class HitomiSearchExtractor(HitomiExtractor):
|
class HitomiSearchExtractor(HitomiExtractor):
|
||||||
"""Extractor for galleries from multiple tag searches on hitomi.la"""
|
"""Extractor for galleries from multiple tag searches on hitomi.la"""
|
||||||
subcategory = "search"
|
subcategory = "search"
|
||||||
pattern = r"(?:https?://)?hitomi\.la/search\.html\?([^/?#]+)"
|
pattern = r"(?:https?://)?hitomi\.la/search\.html\?([^#]+)"
|
||||||
example = "https://hitomi.la/search.html?QUERY"
|
example = "https://hitomi.la/search.html?QUERY"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
Extractor.__init__(self, match)
|
|
||||||
self.query = match[1]
|
|
||||||
self.tags = text.unquote(self.query)
|
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
|
tags = text.unquote(self.groups[0])
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
"_extractor": HitomiGalleryExtractor,
|
"_extractor": HitomiGalleryExtractor,
|
||||||
"search_tags": self.tags,
|
"search_tags": tags,
|
||||||
}
|
}
|
||||||
results = [self.get_nozomi_items(tag) for tag in self.tags.split(" ")]
|
|
||||||
intersects = set.intersection(*results)
|
|
||||||
|
|
||||||
for gallery_id in sorted(intersects, reverse=True):
|
for gallery_id in self.gallery_ids(tags):
|
||||||
gallery_url = f"{self.root}/galleries/{gallery_id}.html"
|
gallery_url = f"{self.root}/galleries/{gallery_id}.html"
|
||||||
yield Message.Queue, gallery_url, data
|
yield Message.Queue, gallery_url, data
|
||||||
|
|
||||||
def get_nozomi_items(self, full_tag):
|
def gallery_ids(self, tags):
|
||||||
area, tag, language = self.get_nozomi_args(full_tag)
|
result = None
|
||||||
base = f"https://ltn.{self.domain}/n/"
|
positive = []
|
||||||
|
negative = []
|
||||||
|
|
||||||
if area:
|
for tag in tags.split():
|
||||||
nozomi_url = f"{base}{area}/{tag}-{language}.nozomi"
|
if tag[0] == "-":
|
||||||
else:
|
negative.append(tag[1:])
|
||||||
nozomi_url = f"{base}{tag}-{language}.nozomi"
|
else:
|
||||||
|
positive.append(tag)
|
||||||
|
|
||||||
headers = {
|
for tag in positive:
|
||||||
"Origin": self.root,
|
ids = self.load_nozomi(tag)
|
||||||
"Cache-Control": "max-age=0",
|
if result is None:
|
||||||
"Referer": f"{self.root}/search.html?{self.query}",
|
result = set(ids)
|
||||||
}
|
else:
|
||||||
|
result.intersection_update(ids)
|
||||||
|
|
||||||
response = self.request(nozomi_url, headers=headers)
|
if result is None:
|
||||||
return set(decode_nozomi(response.content))
|
# result = set(self.load_nozomi("index"))
|
||||||
|
result = set(self.load_nozomi("language:all"))
|
||||||
|
for tag in negative:
|
||||||
|
result.difference_update(self.load_nozomi(tag))
|
||||||
|
|
||||||
def get_nozomi_args(self, query):
|
return sorted(result, reverse=True) if result else ()
|
||||||
ns, _, tag = query.strip().partition(":")
|
|
||||||
area = ns
|
|
||||||
language = "all"
|
|
||||||
|
|
||||||
if ns == "female" or ns == "male":
|
|
||||||
area = "tag"
|
|
||||||
tag = query
|
|
||||||
elif ns == "language":
|
|
||||||
area = None
|
|
||||||
language = tag
|
|
||||||
tag = "index"
|
|
||||||
|
|
||||||
return area, tag.replace("_", " "), language
|
|
||||||
|
|
||||||
|
|
||||||
@memcache(maxage=1800)
|
@memcache(maxage=1800)
|
||||||
|
|||||||
@@ -223,6 +223,16 @@ __tests__ = (
|
|||||||
"search_tags": "female:sole_female language:japanese artist:sumiya",
|
"search_tags": "female:sole_female language:japanese artist:sumiya",
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://hitomi.la/search.html?language%3Ajapanese%20-tag%3Aanimated%20group%3Aparadiddle#2",
|
||||||
|
"#comment" : "negative search tag (#7694)",
|
||||||
|
"#class" : hitomi.HitomiSearchExtractor,
|
||||||
|
"#pattern" : hitomi.HitomiGalleryExtractor.pattern,
|
||||||
|
"#count" : 41,
|
||||||
|
|
||||||
|
"search_tags": "language:japanese -tag:animated group:paradiddle",
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://hitomi.la/search.html?group:initial_g",
|
"#url" : "https://hitomi.la/search.html?group:initial_g",
|
||||||
"#class" : hitomi.HitomiSearchExtractor,
|
"#class" : hitomi.HitomiSearchExtractor,
|
||||||
|
|||||||
Reference in New Issue
Block a user