[hitomi] fix extractors (#7230)
This commit is contained in:
@@ -2793,9 +2793,6 @@ Description
|
|||||||
|
|
||||||
Available formats are ``"webp"`` and ``"avif"``.
|
Available formats are ``"webp"`` and ``"avif"``.
|
||||||
|
|
||||||
``"original"`` will try to download the original ``jpg`` or ``png`` versions,
|
|
||||||
but is most likely going to fail with ``403 Forbidden`` errors.
|
|
||||||
|
|
||||||
|
|
||||||
extractor.imagechest.access-token
|
extractor.imagechest.access-token
|
||||||
---------------------------------
|
---------------------------------
|
||||||
|
|||||||
@@ -16,19 +16,25 @@ import string
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
class HitomiGalleryExtractor(GalleryExtractor):
|
class HitomiExtractor(Extractor):
|
||||||
"""Extractor for image galleries from hitomi.la"""
|
"""Base class for hitomi extractors"""
|
||||||
category = "hitomi"
|
category = "hitomi"
|
||||||
root = "https://hitomi.la"
|
root = "https://hitomi.la"
|
||||||
|
domain = "gold-usergeneratedcontent.net"
|
||||||
|
|
||||||
|
|
||||||
|
class HitomiGalleryExtractor(HitomiExtractor, GalleryExtractor):
|
||||||
|
"""Extractor for hitomi.la galleries"""
|
||||||
pattern = (r"(?:https?://)?hitomi\.la"
|
pattern = (r"(?:https?://)?hitomi\.la"
|
||||||
r"/(?:manga|doujinshi|cg|gamecg|imageset|galleries|reader)"
|
r"/(?:manga|doujinshi|cg|gamecg|imageset|galleries|reader)"
|
||||||
r"/(?:[^/?#]+-)?(\d+)")
|
r"/(?:[^/?#]+-)?(\d+)")
|
||||||
example = "https://hitomi.la/manga/TITLE-867789.html"
|
example = "https://hitomi.la/manga/TITLE-867789.html"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
self.gid = match.group(1)
|
GalleryExtractor.__init__(self, match, False)
|
||||||
url = "https://ltn.hitomi.la/galleries/{}.js".format(self.gid)
|
self.gid = gid = self.groups[0]
|
||||||
GalleryExtractor.__init__(self, match, url)
|
self.gallery_url = "https://ltn.{}/galleries/{}.js".format(
|
||||||
|
self.domain, gid)
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self.session.headers["Referer"] = "{}/reader/{}.html".format(
|
self.session.headers["Referer"] = "{}/reader/{}.html".format(
|
||||||
@@ -71,43 +77,34 @@ class HitomiGalleryExtractor(GalleryExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def images(self, _):
|
def images(self, _):
|
||||||
# see https://ltn.hitomi.la/gg.js
|
# https://ltn.gold-usergeneratedcontent.net/gg.js
|
||||||
gg_m, gg_b, gg_default = _parse_gg(self)
|
gg_m, gg_b, gg_default = _parse_gg(self)
|
||||||
|
|
||||||
fmt = self.config("format") or "webp"
|
fmt = ext = self.config("format") or "webp"
|
||||||
if fmt == "original":
|
check = (fmt != "webp")
|
||||||
subdomain, path, ext, check = "b", "images", None, False
|
|
||||||
else:
|
|
||||||
subdomain, path, ext, check = "a", fmt, fmt, (fmt != "webp")
|
|
||||||
|
|
||||||
result = []
|
result = []
|
||||||
for image in self.info["files"]:
|
for image in self.info["files"]:
|
||||||
if check:
|
if check:
|
||||||
if image.get("has" + fmt):
|
ext = fmt if image.get("has" + fmt) else "webp"
|
||||||
path = ext = fmt
|
|
||||||
else:
|
|
||||||
path = ext = "webp"
|
|
||||||
ihash = image["hash"]
|
ihash = image["hash"]
|
||||||
idata = text.nameext_from_url(image["name"])
|
idata = text.nameext_from_url(image["name"])
|
||||||
idata["extension_original"] = idata["extension"]
|
idata["extension_original"] = idata["extension"]
|
||||||
if ext:
|
idata["extension"] = ext
|
||||||
idata["extension"] = ext
|
|
||||||
|
|
||||||
# see https://ltn.hitomi.la/common.js
|
# https://ltn.gold-usergeneratedcontent.net/common.js
|
||||||
inum = int(ihash[-1] + ihash[-3:-1], 16)
|
inum = int(ihash[-1] + ihash[-3:-1], 16)
|
||||||
url = "https://{}{}.hitomi.la/{}/{}/{}/{}.{}".format(
|
url = "https://{}{}.{}/{}/{}/{}.{}".format(
|
||||||
chr(97 + gg_m.get(inum, gg_default)),
|
ext[0], gg_m.get(inum, gg_default) + 1, self.domain,
|
||||||
subdomain, path, gg_b, inum, ihash, idata["extension"],
|
gg_b, inum, ihash, ext,
|
||||||
)
|
)
|
||||||
result.append((url, idata))
|
result.append((url, idata))
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
class HitomiTagExtractor(Extractor):
|
class HitomiTagExtractor(HitomiExtractor):
|
||||||
"""Extractor for galleries from tag searches on hitomi.la"""
|
"""Extractor for galleries from tag searches on hitomi.la"""
|
||||||
category = "hitomi"
|
|
||||||
subcategory = "tag"
|
subcategory = "tag"
|
||||||
root = "https://hitomi.la"
|
|
||||||
pattern = (r"(?:https?://)?hitomi\.la"
|
pattern = (r"(?:https?://)?hitomi\.la"
|
||||||
r"/(tag|artist|group|series|type|character)"
|
r"/(tag|artist|group|series|type|character)"
|
||||||
r"/([^/?#]+)\.html")
|
r"/([^/?#]+)\.html")
|
||||||
@@ -126,8 +123,8 @@ class HitomiTagExtractor(Extractor):
|
|||||||
"_extractor": HitomiGalleryExtractor,
|
"_extractor": HitomiGalleryExtractor,
|
||||||
"search_tags": text.unquote(self.tag.rpartition("-")[0]),
|
"search_tags": text.unquote(self.tag.rpartition("-")[0]),
|
||||||
}
|
}
|
||||||
nozomi_url = "https://ltn.hitomi.la/{}/{}.nozomi".format(
|
nozomi_url = "https://ltn.{}/{}/{}.nozomi".format(
|
||||||
self.type, self.tag)
|
self.domain, self.type, self.tag)
|
||||||
headers = {
|
headers = {
|
||||||
"Origin": self.root,
|
"Origin": self.root,
|
||||||
"Cache-Control": "max-age=0",
|
"Cache-Control": "max-age=0",
|
||||||
@@ -166,8 +163,8 @@ class HitomiIndexExtractor(HitomiTagExtractor):
|
|||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
data = {"_extractor": HitomiGalleryExtractor}
|
data = {"_extractor": HitomiGalleryExtractor}
|
||||||
nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format(
|
nozomi_url = "https://ltn.{}/{}-{}.nozomi".format(
|
||||||
self.tag, self.language)
|
self.domain, self.tag, self.language)
|
||||||
headers = {
|
headers = {
|
||||||
"Origin": self.root,
|
"Origin": self.root,
|
||||||
"Cache-Control": "max-age=0",
|
"Cache-Control": "max-age=0",
|
||||||
@@ -194,11 +191,9 @@ class HitomiIndexExtractor(HitomiTagExtractor):
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
class HitomiSearchExtractor(Extractor):
|
class HitomiSearchExtractor(HitomiExtractor):
|
||||||
"""Extractor for galleries from multiple tag searches on hitomi.la"""
|
"""Extractor for galleries from multiple tag searches on hitomi.la"""
|
||||||
category = "hitomi"
|
|
||||||
subcategory = "search"
|
subcategory = "search"
|
||||||
root = "https://hitomi.la"
|
|
||||||
pattern = r"(?:https?://)?hitomi\.la/search\.html\?([^/?#]+)"
|
pattern = r"(?:https?://)?hitomi\.la/search\.html\?([^/?#]+)"
|
||||||
example = "https://hitomi.la/search.html?QUERY"
|
example = "https://hitomi.la/search.html?QUERY"
|
||||||
|
|
||||||
@@ -224,11 +219,11 @@ class HitomiSearchExtractor(Extractor):
|
|||||||
area, tag, language = self.get_nozomi_args(full_tag)
|
area, tag, language = self.get_nozomi_args(full_tag)
|
||||||
|
|
||||||
if area:
|
if area:
|
||||||
nozomi_url = "https://ltn.hitomi.la/n/{}/{}-{}.nozomi".format(
|
nozomi_url = "https://ltn.{}/n/{}/{}-{}.nozomi".format(
|
||||||
area, tag, language)
|
self.domain, area, tag, language)
|
||||||
else:
|
else:
|
||||||
nozomi_url = "https://ltn.hitomi.la/n/{}-{}.nozomi".format(
|
nozomi_url = "https://ltn.{}/n/{}-{}.nozomi".format(
|
||||||
tag, language)
|
self.domain, tag, language)
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
"Origin": self.root,
|
"Origin": self.root,
|
||||||
@@ -257,7 +252,7 @@ class HitomiSearchExtractor(Extractor):
|
|||||||
|
|
||||||
@memcache(maxage=1800)
|
@memcache(maxage=1800)
|
||||||
def _parse_gg(extr):
|
def _parse_gg(extr):
|
||||||
page = extr.request("https://ltn.hitomi.la/gg.js").text
|
page = extr.request("https://ltn.gold-usergeneratedcontent.net/gg.js").text
|
||||||
|
|
||||||
m = {}
|
m = {}
|
||||||
|
|
||||||
@@ -280,4 +275,4 @@ def _parse_gg(extr):
|
|||||||
d = re.search(r"(?:var\s|default:)\s*o\s*=\s*(\d+)", page)
|
d = re.search(r"(?:var\s|default:)\s*o\s*=\s*(\d+)", page)
|
||||||
b = re.search(r"b:\s*[\"'](.+)[\"']", page)
|
b = re.search(r"b:\s*[\"'](.+)[\"']", page)
|
||||||
|
|
||||||
return m, b.group(1).strip("/"), int(d.group(1)) if d else 1
|
return m, b.group(1).strip("/"), int(d.group(1)) if d else 0
|
||||||
|
|||||||
@@ -12,8 +12,8 @@ __tests__ = (
|
|||||||
"#url" : "https://hitomi.la/galleries/867789.html",
|
"#url" : "https://hitomi.la/galleries/867789.html",
|
||||||
"#category": ("", "hitomi", "gallery"),
|
"#category": ("", "hitomi", "gallery"),
|
||||||
"#class" : hitomi.HitomiGalleryExtractor,
|
"#class" : hitomi.HitomiGalleryExtractor,
|
||||||
"#pattern" : r"https://[a-c]a\.hitomi\.la/webp/\d+/\d+/[0-9a-f]{64}\.webp",
|
"#pattern" : r"https://w[1-3]\.gold-usergeneratedcontent\.net/\d+/\d+/[0-9a-f]{64}\.webp",
|
||||||
"#count" : 16,
|
"#count" : 16,
|
||||||
|
|
||||||
"artist" : ["morris"],
|
"artist" : ["morris"],
|
||||||
"characters": [],
|
"characters": [],
|
||||||
@@ -82,7 +82,7 @@ __tests__ = (
|
|||||||
"#category": ("", "hitomi", "gallery"),
|
"#category": ("", "hitomi", "gallery"),
|
||||||
"#class" : hitomi.HitomiGalleryExtractor,
|
"#class" : hitomi.HitomiGalleryExtractor,
|
||||||
"#options" : {"format": "avif"},
|
"#options" : {"format": "avif"},
|
||||||
"#pattern" : r"https://[a-c]a\.hitomi\.la/avif/\d+/\d+/[0-9a-f]{64}\.avif",
|
"#pattern" : r"https://a[1-3]\.gold-usergeneratedcontent\.net/\d+/\d+/[0-9a-f]{64}\.avif",
|
||||||
"#count" : 22,
|
"#count" : 22,
|
||||||
|
|
||||||
"artist" : ["sorairo len"],
|
"artist" : ["sorairo len"],
|
||||||
@@ -96,7 +96,7 @@ __tests__ = (
|
|||||||
"lang" : "ja",
|
"lang" : "ja",
|
||||||
"language" : "Japanese",
|
"language" : "Japanese",
|
||||||
"num" : range(1, 22),
|
"num" : range(1, 22),
|
||||||
"parody" : [],
|
"parody" : ["original"],
|
||||||
"tags" : [
|
"tags" : [
|
||||||
"Blowjob ♀",
|
"Blowjob ♀",
|
||||||
"Focus Blowjob ♀",
|
"Focus Blowjob ♀",
|
||||||
|
|||||||
Reference in New Issue
Block a user