diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 2d3f1774..ba5aed8f 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -364,7 +364,7 @@ Consider all listed sites to potentially be NSFW.
| Hitomi.la |
https://hitomi.la/ |
- Galleries, Tag Searches |
+ Galleries, Site Index, Search Results, Tag Searches |
|
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index 384908d3..308b42c6 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -16,22 +16,6 @@ import string
import re
-def get_nozomi_args(query):
- ns, tag = query.strip().split(":")
- area = ns
- language = "all"
-
- if ns == "female" or ns == "male":
- area = "tag"
- tag = query
- elif "language" == ns:
- area = None
- language = tag
- tag = "index"
-
- return area, tag, language
-
-
class HitomiGalleryExtractor(GalleryExtractor):
"""Extractor for image galleries from hitomi.la"""
category = "hitomi"
@@ -119,61 +103,14 @@ class HitomiGalleryExtractor(GalleryExtractor):
return result
-class HitomiIndexExtractor(Extractor):
- """Extractor for galleries from index searches on hitomi.la"""
- category = "hitomi"
- subcategory = "index"
- root = "https://hitomi.la"
- pattern = (r"(?:https?://)?hitomi\.la/"
- r"([a-zA-Z0-9_]+)-([a-zA-Z0-9_]+)\.html")
- test = (
- ("https://hitomi.la/index-japanese.html", {
- "pattern": HitomiGalleryExtractor.pattern,
- "count": ">= 35",
- }),
- )
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.tag, self.language = match.groups()
-
- def items(self):
- data = {"_extractor": HitomiGalleryExtractor}
- nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format(self.tag, self.language)
- headers = {
- "Origin": self.root,
- "Cache-Control": "max-age=0",
- }
-
- offset = 0
- total = None
- while True:
- headers["Referer"] = "{}/{}-{}.html?page={}".format(
- self.root, self.tag, self.language, offset // 100 + 1)
- headers["Range"] = "bytes={}-{}".format(offset, offset+99)
- response = self.request(nozomi_url, headers=headers)
-
- for gallery_id in decode_nozomi(response.content):
- gallery_url = "{}/galleries/{}.html".format(
- self.root, gallery_id)
- yield Message.Queue, gallery_url, data
-
- offset += 100
- if total is None:
- total = text.parse_int(
- response.headers["content-range"].rpartition("/")[2])
- if offset >= total:
- return
-
-
class HitomiTagExtractor(Extractor):
"""Extractor for galleries from tag searches on hitomi.la"""
category = "hitomi"
subcategory = "tag"
root = "https://hitomi.la"
- pattern = (r"(?:https?://)?hitomi\.la/"
- r"(tag|artist|group|series|type|character)/"
- r"([^/?#]+)\.html")
+ pattern = (r"(?:https?://)?hitomi\.la"
+ r"/(tag|artist|group|series|type|character)"
+ r"/([^/?#]+)\.html")
example = "https://hitomi.la/tag/TAG-LANG.html"
def __init__(self, match):
@@ -214,50 +151,58 @@ class HitomiTagExtractor(Extractor):
return
-class HitomiSearchExtractor(Extractor):
- """Extractor for galleries from multiple tag searches on hitomi.la"""
- category = "hitomi"
- subcategory = "search"
- root = "https://hitomi.la"
- pattern = (r"(?:https?://)?hitomi\.la/search.html"
- r"\?([^/?#]+)")
- test = (
- ("https://hitomi.la/search.html?tag%3Ascreenshots%20language%3Ajapanese", {
- "pattern": HitomiGalleryExtractor.pattern,
- "count": ">= 35",
- }),
- ("https://hitomi.la/search.html?language%3Ajapanese%20artist%3Asumiya"),
- ("https://hitomi.la/search.html?group:initial_g"),
- ("https://hitomi.la/search.html?series:amnesia"),
- ("https://hitomi.la/search.html?type%3Adoujinshi"),
- ("https://hitomi.la/search.html?character%3Aa2"),
- )
+class HitomiIndexExtractor(HitomiTagExtractor):
+ """Extractor for galleries from index searches on hitomi.la"""
+ subcategory = "index"
+ pattern = r"(?:https?://)?hitomi\.la/(\w+)-(\w+)\.html"
+ example = "https://hitomi.la/index-LANG.html"
def __init__(self, match):
Extractor.__init__(self, match)
- self.query = match.group(1)
- self.tags = text.unquote(self.query).split(" ")
-
- def get_nozomi_items(self, full_tag):
- area, tag, language = get_nozomi_args(full_tag)
-
- if area:
- referer_base = "{}/n/{}/{}-{}.html".format(self.root, area, tag, language)
- nozomi_url = "https://ltn.hitomi.la/{}/{}-{}.nozomi".format(area, tag, language)
- else:
- referer_base = "{}/n/{}-{}.html".format(self.root, tag, language)
- nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format(tag, language)
+ self.tag, self.language = match.groups()
+ def items(self):
+ data = {"_extractor": HitomiGalleryExtractor}
+ nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format(
+ self.tag, self.language)
headers = {
"Origin": self.root,
"Cache-Control": "max-age=0",
}
- headers["Referer"] = f"{referer_base}/search.html?{self.query}"
- response = self.request(nozomi_url, headers=headers)
+ offset = 0
+ total = None
+ while True:
+ headers["Referer"] = "{}/{}-{}.html?page={}".format(
+ self.root, self.tag, self.language, offset // 100 + 1)
+ headers["Range"] = "bytes={}-{}".format(offset, offset+99)
+ response = self.request(nozomi_url, headers=headers)
- result = set(decode_nozomi(response.content))
- return result
+ for gallery_id in decode_nozomi(response.content):
+ gallery_url = "{}/galleries/{}.html".format(
+ self.root, gallery_id)
+ yield Message.Queue, gallery_url, data
+
+ offset += 100
+ if total is None:
+ total = text.parse_int(
+ response.headers["content-range"].rpartition("/")[2])
+ if offset >= total:
+ return
+
+
+class HitomiSearchExtractor(Extractor):
+ """Extractor for galleries from multiple tag searches on hitomi.la"""
+ category = "hitomi"
+ subcategory = "search"
+ root = "https://hitomi.la"
+ pattern = r"(?:https?://)?hitomi\.la/search\.html\?([^/?#]+)"
+ example = "https://hitomi.la/search.html?QUERY"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.query = match.group(1)
+ self.tags = text.unquote(self.query).split(" ")
def items(self):
data = {"_extractor": HitomiGalleryExtractor}
@@ -270,6 +215,44 @@ class HitomiSearchExtractor(Extractor):
self.root, gallery_id)
yield Message.Queue, gallery_url, data
+ def get_nozomi_items(self, full_tag):
+ area, tag, language = self.get_nozomi_args(full_tag)
+
+ if area:
+ referer_base = "{}/n/{}/{}-{}.html".format(
+ self.root, area, tag, language)
+ nozomi_url = "https://ltn.hitomi.la/{}/{}-{}.nozomi".format(
+ area, tag, language)
+ else:
+ referer_base = "{}/n/{}-{}.html".format(
+ self.root, tag, language)
+ nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format(
+ tag, language)
+
+ headers = {
+ "Origin": self.root,
+ "Cache-Control": "max-age=0",
+ "Referer": "{}/search.html?{}".format(referer_base, self.query),
+ }
+
+ response = self.request(nozomi_url, headers=headers)
+ return set(decode_nozomi(response.content))
+
+ def get_nozomi_args(self, query):
+ ns, _, tag = query.strip().partition(":")
+ area = ns
+ language = "all"
+
+ if ns == "female" or ns == "male":
+ area = "tag"
+ tag = query
+ elif ns == "language":
+ area = None
+ language = tag
+ tag = "index"
+
+ return area, tag, language
+
@memcache(maxage=1800)
def _parse_gg(extr):
diff --git a/test/results/hitomi.py b/test/results/hitomi.py
index 78fa0799..1b0ffcba 100644
--- a/test/results/hitomi.py
+++ b/test/results/hitomi.py
@@ -194,4 +194,41 @@ __tests__ = (
"#class" : hitomi.HitomiTagExtractor,
},
+{
+ "#url" : "https://hitomi.la/index-japanese.html",
+ "#class" : hitomi.HitomiIndexExtractor,
+ "#pattern" : hitomi.HitomiGalleryExtractor.pattern,
+ "#range" : "1-150",
+ "#count" : 150,
+},
+
+{
+ "#url" : "https://hitomi.la/search.html?tag%3Ascreenshots%20language%3Ajapanese",
+ "#class" : hitomi.HitomiSearchExtractor,
+ "#pattern" : hitomi.HitomiGalleryExtractor.pattern,
+ "#range" : "1-150",
+ "#count" : 150,
+},
+
+{
+ "#url" : "https://hitomi.la/search.html?language%3Ajapanese%20artist%3Asumiya",
+ "#class" : hitomi.HitomiSearchExtractor,
+},
+{
+ "#url" : "https://hitomi.la/search.html?group:initial_g",
+ "#class" : hitomi.HitomiSearchExtractor,
+},
+{
+ "#url" : "https://hitomi.la/search.html?series:amnesia",
+ "#class" : hitomi.HitomiSearchExtractor,
+},
+{
+ "#url" : "https://hitomi.la/search.html?type%3Adoujinshi",
+ "#class" : hitomi.HitomiSearchExtractor,
+},
+{
+ "#url" : "https://hitomi.la/search.html?character%3Aa2",
+ "#class" : hitomi.HitomiSearchExtractor,
+},
+
)