[hitomi] update

- remove f-strings
- fix flake8 warnings
- move tests to test/results/hitomi.py
This commit is contained in:
Mike Fährmann
2024-10-29 16:51:19 +01:00
parent f170d73ffc
commit 6f54328a39
3 changed files with 121 additions and 101 deletions

View File

@@ -364,7 +364,7 @@ Consider all listed sites to potentially be NSFW.
<tr>
<td>Hitomi.la</td>
<td>https://hitomi.la/</td>
<td>Galleries, Tag Searches</td>
<td>Galleries, Site Index, Search Results, Tag Searches</td>
<td></td>
</tr>
<tr>

View File

@@ -16,22 +16,6 @@ import string
import re
def get_nozomi_args(query):
ns, tag = query.strip().split(":")
area = ns
language = "all"
if ns == "female" or ns == "male":
area = "tag"
tag = query
elif "language" == ns:
area = None
language = tag
tag = "index"
return area, tag, language
class HitomiGalleryExtractor(GalleryExtractor):
"""Extractor for image galleries from hitomi.la"""
category = "hitomi"
@@ -119,61 +103,14 @@ class HitomiGalleryExtractor(GalleryExtractor):
return result
class HitomiIndexExtractor(Extractor):
"""Extractor for galleries from index searches on hitomi.la"""
category = "hitomi"
subcategory = "index"
root = "https://hitomi.la"
pattern = (r"(?:https?://)?hitomi\.la/"
r"([a-zA-Z0-9_]+)-([a-zA-Z0-9_]+)\.html")
test = (
("https://hitomi.la/index-japanese.html", {
"pattern": HitomiGalleryExtractor.pattern,
"count": ">= 35",
}),
)
def __init__(self, match):
Extractor.__init__(self, match)
self.tag, self.language = match.groups()
def items(self):
data = {"_extractor": HitomiGalleryExtractor}
nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format(self.tag, self.language)
headers = {
"Origin": self.root,
"Cache-Control": "max-age=0",
}
offset = 0
total = None
while True:
headers["Referer"] = "{}/{}-{}.html?page={}".format(
self.root, self.tag, self.language, offset // 100 + 1)
headers["Range"] = "bytes={}-{}".format(offset, offset+99)
response = self.request(nozomi_url, headers=headers)
for gallery_id in decode_nozomi(response.content):
gallery_url = "{}/galleries/{}.html".format(
self.root, gallery_id)
yield Message.Queue, gallery_url, data
offset += 100
if total is None:
total = text.parse_int(
response.headers["content-range"].rpartition("/")[2])
if offset >= total:
return
class HitomiTagExtractor(Extractor):
"""Extractor for galleries from tag searches on hitomi.la"""
category = "hitomi"
subcategory = "tag"
root = "https://hitomi.la"
pattern = (r"(?:https?://)?hitomi\.la/"
r"(tag|artist|group|series|type|character)/"
r"([^/?#]+)\.html")
pattern = (r"(?:https?://)?hitomi\.la"
r"/(tag|artist|group|series|type|character)"
r"/([^/?#]+)\.html")
example = "https://hitomi.la/tag/TAG-LANG.html"
def __init__(self, match):
@@ -214,50 +151,58 @@ class HitomiTagExtractor(Extractor):
return
class HitomiSearchExtractor(Extractor):
"""Extractor for galleries from multiple tag searches on hitomi.la"""
category = "hitomi"
subcategory = "search"
root = "https://hitomi.la"
pattern = (r"(?:https?://)?hitomi\.la/search.html"
r"\?([^/?#]+)")
test = (
("https://hitomi.la/search.html?tag%3Ascreenshots%20language%3Ajapanese", {
"pattern": HitomiGalleryExtractor.pattern,
"count": ">= 35",
}),
("https://hitomi.la/search.html?language%3Ajapanese%20artist%3Asumiya"),
("https://hitomi.la/search.html?group:initial_g"),
("https://hitomi.la/search.html?series:amnesia"),
("https://hitomi.la/search.html?type%3Adoujinshi"),
("https://hitomi.la/search.html?character%3Aa2"),
)
class HitomiIndexExtractor(HitomiTagExtractor):
"""Extractor for galleries from index searches on hitomi.la"""
subcategory = "index"
pattern = r"(?:https?://)?hitomi\.la/(\w+)-(\w+)\.html"
example = "https://hitomi.la/index-LANG.html"
def __init__(self, match):
Extractor.__init__(self, match)
self.query = match.group(1)
self.tags = text.unquote(self.query).split(" ")
def get_nozomi_items(self, full_tag):
area, tag, language = get_nozomi_args(full_tag)
if area:
referer_base = "{}/n/{}/{}-{}.html".format(self.root, area, tag, language)
nozomi_url = "https://ltn.hitomi.la/{}/{}-{}.nozomi".format(area, tag, language)
else:
referer_base = "{}/n/{}-{}.html".format(self.root, tag, language)
nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format(tag, language)
self.tag, self.language = match.groups()
def items(self):
data = {"_extractor": HitomiGalleryExtractor}
nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format(
self.tag, self.language)
headers = {
"Origin": self.root,
"Cache-Control": "max-age=0",
}
headers["Referer"] = f"{referer_base}/search.html?{self.query}"
response = self.request(nozomi_url, headers=headers)
offset = 0
total = None
while True:
headers["Referer"] = "{}/{}-{}.html?page={}".format(
self.root, self.tag, self.language, offset // 100 + 1)
headers["Range"] = "bytes={}-{}".format(offset, offset+99)
response = self.request(nozomi_url, headers=headers)
result = set(decode_nozomi(response.content))
return result
for gallery_id in decode_nozomi(response.content):
gallery_url = "{}/galleries/{}.html".format(
self.root, gallery_id)
yield Message.Queue, gallery_url, data
offset += 100
if total is None:
total = text.parse_int(
response.headers["content-range"].rpartition("/")[2])
if offset >= total:
return
class HitomiSearchExtractor(Extractor):
"""Extractor for galleries from multiple tag searches on hitomi.la"""
category = "hitomi"
subcategory = "search"
root = "https://hitomi.la"
pattern = r"(?:https?://)?hitomi\.la/search\.html\?([^/?#]+)"
example = "https://hitomi.la/search.html?QUERY"
def __init__(self, match):
Extractor.__init__(self, match)
self.query = match.group(1)
self.tags = text.unquote(self.query).split(" ")
def items(self):
data = {"_extractor": HitomiGalleryExtractor}
@@ -270,6 +215,44 @@ class HitomiSearchExtractor(Extractor):
self.root, gallery_id)
yield Message.Queue, gallery_url, data
def get_nozomi_items(self, full_tag):
area, tag, language = self.get_nozomi_args(full_tag)
if area:
referer_base = "{}/n/{}/{}-{}.html".format(
self.root, area, tag, language)
nozomi_url = "https://ltn.hitomi.la/{}/{}-{}.nozomi".format(
area, tag, language)
else:
referer_base = "{}/n/{}-{}.html".format(
self.root, tag, language)
nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format(
tag, language)
headers = {
"Origin": self.root,
"Cache-Control": "max-age=0",
"Referer": "{}/search.html?{}".format(referer_base, self.query),
}
response = self.request(nozomi_url, headers=headers)
return set(decode_nozomi(response.content))
def get_nozomi_args(self, query):
ns, _, tag = query.strip().partition(":")
area = ns
language = "all"
if ns == "female" or ns == "male":
area = "tag"
tag = query
elif ns == "language":
area = None
language = tag
tag = "index"
return area, tag, language
@memcache(maxage=1800)
def _parse_gg(extr):

View File

@@ -194,4 +194,41 @@ __tests__ = (
"#class" : hitomi.HitomiTagExtractor,
},
{
"#url" : "https://hitomi.la/index-japanese.html",
"#class" : hitomi.HitomiIndexExtractor,
"#pattern" : hitomi.HitomiGalleryExtractor.pattern,
"#range" : "1-150",
"#count" : 150,
},
{
"#url" : "https://hitomi.la/search.html?tag%3Ascreenshots%20language%3Ajapanese",
"#class" : hitomi.HitomiSearchExtractor,
"#pattern" : hitomi.HitomiGalleryExtractor.pattern,
"#range" : "1-150",
"#count" : 150,
},
{
"#url" : "https://hitomi.la/search.html?language%3Ajapanese%20artist%3Asumiya",
"#class" : hitomi.HitomiSearchExtractor,
},
{
"#url" : "https://hitomi.la/search.html?group:initial_g",
"#class" : hitomi.HitomiSearchExtractor,
},
{
"#url" : "https://hitomi.la/search.html?series:amnesia",
"#class" : hitomi.HitomiSearchExtractor,
},
{
"#url" : "https://hitomi.la/search.html?type%3Adoujinshi",
"#class" : hitomi.HitomiSearchExtractor,
},
{
"#url" : "https://hitomi.la/search.html?character%3Aa2",
"#class" : hitomi.HitomiSearchExtractor,
},
)