diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index 9e66cd66..e2a8dfa1 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -10,7 +10,6 @@
from .common import GalleryExtractor
from .. import text, util
-import string
import json
@@ -24,7 +23,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
test = (
("https://hitomi.la/galleries/867789.html", {
"pattern": r"https://[a-c]a.hitomi.la/images/./../[0-9a-f]+.jpg",
- "keyword": "6701f8f588f119ef84cd29bdf99a399417b0a6a2",
+ "keyword": "3314105a0b344ea1461c43257b14b0de415b88bb",
"count": 16,
}),
# download test
@@ -55,56 +54,43 @@ class HitomiGalleryExtractor(GalleryExtractor):
)
def __init__(self, match):
- self.gallery_id = match.group(1)
- url = "https://ltn.hitomi.la/galleries/{}.js".format(self.gallery_id)
+ gid = match.group(1)
+ url = "https://ltn.hitomi.la/galleries/{}.js".format(gid)
GalleryExtractor.__init__(self, match, url)
+ self.data = None
self.session.headers["Referer"] = "{}/reader/{}.html".format(
- self.root, self.gallery_id)
+ self.root, gid)
- def metadata(self, _):
- # try galleries page first
- url = "{}/galleries/{}.html".format(self.root, self.gallery_id)
- response = self.request(url, fatal=False)
+ def metadata(self, page):
+ self.data = data = json.loads(page.partition("=")[2])
- # follow redirects
- while b"
Redirect" in response.content:
- url = text.extract(response.text, "href='", "'")[0]
- if not url.startswith("http"):
- url = text.urljoin(self.root, url)
- response = self.request(url, fatal=False)
+ language = data.get("language")
+ if language:
+ language = language.capitalize()
- # fallback to reader page
- if response.status_code >= 400:
- url = "{}/reader/{}.html".format(self.root, self.gallery_id)
- page = self.request(url).text
- return {
- "gallery_id": text.parse_int(self.gallery_id),
- "title": text.unescape(text.extract(
- page, "", "<")[0].rpartition(" | ")[0]),
- }
+ tags = []
+ for tinfo in data["tags"]:
+ tag = tinfo["tag"]
+ if tinfo.get("female"):
+ tag = "female:" + tag
+ elif tinfo.get("male"):
+ tag = "male:" + tag
+ tags.append(tag)
- page = response.text
- extr = text.extract_from(page, page.index('', '<').strip()),
- "artist" : self._prep(extr('', '
')),
- "group" : self._prep(extr('Group | ', ' | ')),
- "type" : self._prep_1(extr('Type | ', ' | ')),
- "language" : self._prep_1(extr('Language | ', ' | ')),
- "parody" : self._prep(extr('Series | ', ' | ')),
- "characters": self._prep(extr('Characters | ', ' | ')),
- "tags" : self._prep(extr('Tags | ', ' | ')),
- "date" : self._date(extr('', '')),
+ return {
+ "gallery_id": text.parse_int(data["id"]),
+ "title" : data["title"],
+ "type" : data["type"],
+ "language" : language,
+ "lang" : util.language_to_code(language),
+ "tags" : tags,
+ "date" : text.parse_datetime(
+ data["date"] + ":00", "%Y-%m-%d %H:%M:%S%z"),
}
- if data["language"] == "N/a":
- data["language"] = None
- data["lang"] = util.language_to_code(data["language"])
- return data
- def images(self, page):
+ def images(self, _):
result = []
- for image in json.loads(page.partition("=")[2])["files"]:
+ for image in self.data["files"]:
ihash = image["hash"]
idata = text.nameext_from_url(image["name"])
@@ -120,18 +106,3 @@ class HitomiGalleryExtractor(GalleryExtractor):
)
result.append((url, idata))
return result
-
- @staticmethod
- def _prep(value):
- return [
- text.unescape(string.capwords(v))
- for v in text.extract_iter(value or "", '.html">', '<')
- ]
-
- @staticmethod
- def _prep_1(value):
- return text.remove_html(value).capitalize()
-
- @staticmethod
- def _date(value):
- return text.parse_datetime(value + ":00", "%Y-%m-%d %H:%M:%S%z")