[hitomi] update
- remove f-strings - fix flake8 warnings - move tests to test/results/hitomi.py
This commit is contained in:
@@ -364,7 +364,7 @@ Consider all listed sites to potentially be NSFW.
|
|||||||
<tr>
|
<tr>
|
||||||
<td>Hitomi.la</td>
|
<td>Hitomi.la</td>
|
||||||
<td>https://hitomi.la/</td>
|
<td>https://hitomi.la/</td>
|
||||||
<td>Galleries, Tag Searches</td>
|
<td>Galleries, Site Index, Search Results, Tag Searches</td>
|
||||||
<td></td>
|
<td></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
|
|||||||
@@ -16,22 +16,6 @@ import string
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
def get_nozomi_args(query):
|
|
||||||
ns, tag = query.strip().split(":")
|
|
||||||
area = ns
|
|
||||||
language = "all"
|
|
||||||
|
|
||||||
if ns == "female" or ns == "male":
|
|
||||||
area = "tag"
|
|
||||||
tag = query
|
|
||||||
elif "language" == ns:
|
|
||||||
area = None
|
|
||||||
language = tag
|
|
||||||
tag = "index"
|
|
||||||
|
|
||||||
return area, tag, language
|
|
||||||
|
|
||||||
|
|
||||||
class HitomiGalleryExtractor(GalleryExtractor):
|
class HitomiGalleryExtractor(GalleryExtractor):
|
||||||
"""Extractor for image galleries from hitomi.la"""
|
"""Extractor for image galleries from hitomi.la"""
|
||||||
category = "hitomi"
|
category = "hitomi"
|
||||||
@@ -119,61 +103,14 @@ class HitomiGalleryExtractor(GalleryExtractor):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
class HitomiIndexExtractor(Extractor):
|
|
||||||
"""Extractor for galleries from index searches on hitomi.la"""
|
|
||||||
category = "hitomi"
|
|
||||||
subcategory = "index"
|
|
||||||
root = "https://hitomi.la"
|
|
||||||
pattern = (r"(?:https?://)?hitomi\.la/"
|
|
||||||
r"([a-zA-Z0-9_]+)-([a-zA-Z0-9_]+)\.html")
|
|
||||||
test = (
|
|
||||||
("https://hitomi.la/index-japanese.html", {
|
|
||||||
"pattern": HitomiGalleryExtractor.pattern,
|
|
||||||
"count": ">= 35",
|
|
||||||
}),
|
|
||||||
)
|
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
Extractor.__init__(self, match)
|
|
||||||
self.tag, self.language = match.groups()
|
|
||||||
|
|
||||||
def items(self):
|
|
||||||
data = {"_extractor": HitomiGalleryExtractor}
|
|
||||||
nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format(self.tag, self.language)
|
|
||||||
headers = {
|
|
||||||
"Origin": self.root,
|
|
||||||
"Cache-Control": "max-age=0",
|
|
||||||
}
|
|
||||||
|
|
||||||
offset = 0
|
|
||||||
total = None
|
|
||||||
while True:
|
|
||||||
headers["Referer"] = "{}/{}-{}.html?page={}".format(
|
|
||||||
self.root, self.tag, self.language, offset // 100 + 1)
|
|
||||||
headers["Range"] = "bytes={}-{}".format(offset, offset+99)
|
|
||||||
response = self.request(nozomi_url, headers=headers)
|
|
||||||
|
|
||||||
for gallery_id in decode_nozomi(response.content):
|
|
||||||
gallery_url = "{}/galleries/{}.html".format(
|
|
||||||
self.root, gallery_id)
|
|
||||||
yield Message.Queue, gallery_url, data
|
|
||||||
|
|
||||||
offset += 100
|
|
||||||
if total is None:
|
|
||||||
total = text.parse_int(
|
|
||||||
response.headers["content-range"].rpartition("/")[2])
|
|
||||||
if offset >= total:
|
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
class HitomiTagExtractor(Extractor):
|
class HitomiTagExtractor(Extractor):
|
||||||
"""Extractor for galleries from tag searches on hitomi.la"""
|
"""Extractor for galleries from tag searches on hitomi.la"""
|
||||||
category = "hitomi"
|
category = "hitomi"
|
||||||
subcategory = "tag"
|
subcategory = "tag"
|
||||||
root = "https://hitomi.la"
|
root = "https://hitomi.la"
|
||||||
pattern = (r"(?:https?://)?hitomi\.la/"
|
pattern = (r"(?:https?://)?hitomi\.la"
|
||||||
r"(tag|artist|group|series|type|character)/"
|
r"/(tag|artist|group|series|type|character)"
|
||||||
r"([^/?#]+)\.html")
|
r"/([^/?#]+)\.html")
|
||||||
example = "https://hitomi.la/tag/TAG-LANG.html"
|
example = "https://hitomi.la/tag/TAG-LANG.html"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
@@ -214,50 +151,58 @@ class HitomiTagExtractor(Extractor):
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
class HitomiSearchExtractor(Extractor):
|
class HitomiIndexExtractor(HitomiTagExtractor):
|
||||||
"""Extractor for galleries from multiple tag searches on hitomi.la"""
|
"""Extractor for galleries from index searches on hitomi.la"""
|
||||||
category = "hitomi"
|
subcategory = "index"
|
||||||
subcategory = "search"
|
pattern = r"(?:https?://)?hitomi\.la/(\w+)-(\w+)\.html"
|
||||||
root = "https://hitomi.la"
|
example = "https://hitomi.la/index-LANG.html"
|
||||||
pattern = (r"(?:https?://)?hitomi\.la/search.html"
|
|
||||||
r"\?([^/?#]+)")
|
|
||||||
test = (
|
|
||||||
("https://hitomi.la/search.html?tag%3Ascreenshots%20language%3Ajapanese", {
|
|
||||||
"pattern": HitomiGalleryExtractor.pattern,
|
|
||||||
"count": ">= 35",
|
|
||||||
}),
|
|
||||||
("https://hitomi.la/search.html?language%3Ajapanese%20artist%3Asumiya"),
|
|
||||||
("https://hitomi.la/search.html?group:initial_g"),
|
|
||||||
("https://hitomi.la/search.html?series:amnesia"),
|
|
||||||
("https://hitomi.la/search.html?type%3Adoujinshi"),
|
|
||||||
("https://hitomi.la/search.html?character%3Aa2"),
|
|
||||||
)
|
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self.query = match.group(1)
|
self.tag, self.language = match.groups()
|
||||||
self.tags = text.unquote(self.query).split(" ")
|
|
||||||
|
|
||||||
def get_nozomi_items(self, full_tag):
|
|
||||||
area, tag, language = get_nozomi_args(full_tag)
|
|
||||||
|
|
||||||
if area:
|
|
||||||
referer_base = "{}/n/{}/{}-{}.html".format(self.root, area, tag, language)
|
|
||||||
nozomi_url = "https://ltn.hitomi.la/{}/{}-{}.nozomi".format(area, tag, language)
|
|
||||||
else:
|
|
||||||
referer_base = "{}/n/{}-{}.html".format(self.root, tag, language)
|
|
||||||
nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format(tag, language)
|
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
data = {"_extractor": HitomiGalleryExtractor}
|
||||||
|
nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format(
|
||||||
|
self.tag, self.language)
|
||||||
headers = {
|
headers = {
|
||||||
"Origin": self.root,
|
"Origin": self.root,
|
||||||
"Cache-Control": "max-age=0",
|
"Cache-Control": "max-age=0",
|
||||||
}
|
}
|
||||||
|
|
||||||
headers["Referer"] = f"{referer_base}/search.html?{self.query}"
|
offset = 0
|
||||||
response = self.request(nozomi_url, headers=headers)
|
total = None
|
||||||
|
while True:
|
||||||
|
headers["Referer"] = "{}/{}-{}.html?page={}".format(
|
||||||
|
self.root, self.tag, self.language, offset // 100 + 1)
|
||||||
|
headers["Range"] = "bytes={}-{}".format(offset, offset+99)
|
||||||
|
response = self.request(nozomi_url, headers=headers)
|
||||||
|
|
||||||
result = set(decode_nozomi(response.content))
|
for gallery_id in decode_nozomi(response.content):
|
||||||
return result
|
gallery_url = "{}/galleries/{}.html".format(
|
||||||
|
self.root, gallery_id)
|
||||||
|
yield Message.Queue, gallery_url, data
|
||||||
|
|
||||||
|
offset += 100
|
||||||
|
if total is None:
|
||||||
|
total = text.parse_int(
|
||||||
|
response.headers["content-range"].rpartition("/")[2])
|
||||||
|
if offset >= total:
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
class HitomiSearchExtractor(Extractor):
|
||||||
|
"""Extractor for galleries from multiple tag searches on hitomi.la"""
|
||||||
|
category = "hitomi"
|
||||||
|
subcategory = "search"
|
||||||
|
root = "https://hitomi.la"
|
||||||
|
pattern = r"(?:https?://)?hitomi\.la/search\.html\?([^/?#]+)"
|
||||||
|
example = "https://hitomi.la/search.html?QUERY"
|
||||||
|
|
||||||
|
def __init__(self, match):
|
||||||
|
Extractor.__init__(self, match)
|
||||||
|
self.query = match.group(1)
|
||||||
|
self.tags = text.unquote(self.query).split(" ")
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
data = {"_extractor": HitomiGalleryExtractor}
|
data = {"_extractor": HitomiGalleryExtractor}
|
||||||
@@ -270,6 +215,44 @@ class HitomiSearchExtractor(Extractor):
|
|||||||
self.root, gallery_id)
|
self.root, gallery_id)
|
||||||
yield Message.Queue, gallery_url, data
|
yield Message.Queue, gallery_url, data
|
||||||
|
|
||||||
|
def get_nozomi_items(self, full_tag):
|
||||||
|
area, tag, language = self.get_nozomi_args(full_tag)
|
||||||
|
|
||||||
|
if area:
|
||||||
|
referer_base = "{}/n/{}/{}-{}.html".format(
|
||||||
|
self.root, area, tag, language)
|
||||||
|
nozomi_url = "https://ltn.hitomi.la/{}/{}-{}.nozomi".format(
|
||||||
|
area, tag, language)
|
||||||
|
else:
|
||||||
|
referer_base = "{}/n/{}-{}.html".format(
|
||||||
|
self.root, tag, language)
|
||||||
|
nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format(
|
||||||
|
tag, language)
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"Origin": self.root,
|
||||||
|
"Cache-Control": "max-age=0",
|
||||||
|
"Referer": "{}/search.html?{}".format(referer_base, self.query),
|
||||||
|
}
|
||||||
|
|
||||||
|
response = self.request(nozomi_url, headers=headers)
|
||||||
|
return set(decode_nozomi(response.content))
|
||||||
|
|
||||||
|
def get_nozomi_args(self, query):
|
||||||
|
ns, _, tag = query.strip().partition(":")
|
||||||
|
area = ns
|
||||||
|
language = "all"
|
||||||
|
|
||||||
|
if ns == "female" or ns == "male":
|
||||||
|
area = "tag"
|
||||||
|
tag = query
|
||||||
|
elif ns == "language":
|
||||||
|
area = None
|
||||||
|
language = tag
|
||||||
|
tag = "index"
|
||||||
|
|
||||||
|
return area, tag, language
|
||||||
|
|
||||||
|
|
||||||
@memcache(maxage=1800)
|
@memcache(maxage=1800)
|
||||||
def _parse_gg(extr):
|
def _parse_gg(extr):
|
||||||
|
|||||||
@@ -194,4 +194,41 @@ __tests__ = (
|
|||||||
"#class" : hitomi.HitomiTagExtractor,
|
"#class" : hitomi.HitomiTagExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://hitomi.la/index-japanese.html",
|
||||||
|
"#class" : hitomi.HitomiIndexExtractor,
|
||||||
|
"#pattern" : hitomi.HitomiGalleryExtractor.pattern,
|
||||||
|
"#range" : "1-150",
|
||||||
|
"#count" : 150,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://hitomi.la/search.html?tag%3Ascreenshots%20language%3Ajapanese",
|
||||||
|
"#class" : hitomi.HitomiSearchExtractor,
|
||||||
|
"#pattern" : hitomi.HitomiGalleryExtractor.pattern,
|
||||||
|
"#range" : "1-150",
|
||||||
|
"#count" : 150,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://hitomi.la/search.html?language%3Ajapanese%20artist%3Asumiya",
|
||||||
|
"#class" : hitomi.HitomiSearchExtractor,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"#url" : "https://hitomi.la/search.html?group:initial_g",
|
||||||
|
"#class" : hitomi.HitomiSearchExtractor,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"#url" : "https://hitomi.la/search.html?series:amnesia",
|
||||||
|
"#class" : hitomi.HitomiSearchExtractor,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"#url" : "https://hitomi.la/search.html?type%3Adoujinshi",
|
||||||
|
"#class" : hitomi.HitomiSearchExtractor,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"#url" : "https://hitomi.la/search.html?character%3Aa2",
|
||||||
|
"#class" : hitomi.HitomiSearchExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user