From becc7f85a64af08c9aa43ea87c507ee24adf39f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 29 Dec 2021 21:09:28 +0100 Subject: [PATCH] [hitomi] fix image URLs --- gallery_dl/extractor/hitomi.py | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 693c43ea..88cf98c1 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -10,9 +10,11 @@ from .common import GalleryExtractor, Extractor, Message from .nozomi import decode_nozomi +from ..cache import memcache from .. import text, util import string import json +import re class HitomiGalleryExtractor(GalleryExtractor): @@ -24,7 +26,8 @@ class HitomiGalleryExtractor(GalleryExtractor): r"/(?:[^/?#]+-)?(\d+)") test = ( ("https://hitomi.la/galleries/867789.html", { - "pattern": r"https://[a-c]b.hitomi.la/images/./../[0-9a-f]+.jpg", + "pattern": r"https://[a-c]b.hitomi.la/images/1639745412/\d+" + r"/[0-9a-f]{64}\.jpg", "keyword": "4873ef9a523621fc857b114e0b2820ba4066e9ae", "options": (("metadata", True),), "count": 16, @@ -36,12 +39,12 @@ class HitomiGalleryExtractor(GalleryExtractor): }), # Game CG with scenes (#321) ("https://hitomi.la/galleries/733697.html", { - "url": "0cb629ab2bfe93d994a7972f68ad2a5a64ecc161", + "url": "479d16fe92117a6a2ce81b4e702e6347922c81e3", "count": 210, }), # fallback for galleries only available through /reader/ URLs ("https://hitomi.la/galleries/1045954.html", { - "url": "b420755d56a1135104ca8ca0765f44e290db70c3", + "url": "ebc1415c5d7f634166ef7e2635b77735de1ea7a2", "count": 1413, }), # gallery with "broken" redirect @@ -134,19 +137,19 @@ class HitomiGalleryExtractor(GalleryExtractor): } def images(self, _): + # see https://ltn.hitomi.la/gg.js + gg_m, gg_b = _parse_gg(self) + result = [] for image in self.info["files"]: ihash = image["hash"] idata = text.nameext_from_url(image["name"]) # see https://ltn.hitomi.la/common.js - inum = int(ihash[-3:-1], 16) - offset = 1 if inum < 0x7c else 0 - + inum = int(ihash[-1] + ihash[-3:-1], 16) url = "https://{}b.hitomi.la/images/{}/{}/{}.{}".format( - chr(97 + offset), - ihash[-1], ihash[-3:-1], ihash, - idata["extension"], + chr(97 + gg_m.get(inum, 0)), + gg_b, inum, ihash, idata["extension"], ) result.append((url, idata)) return result @@ -186,3 +189,16 @@ class HitomiTagExtractor(Extractor): for gallery_id in decode_nozomi(self.request(url).content): url = "https://hitomi.la/galleries/{}.html".format(gallery_id) yield Message.Queue, url, data + + +@memcache() +def _parse_gg(extr): + page = extr.request("https://ltn.hitomi.la/gg.js").text + + m = { + int(match.group(1)): int(match.group(2)) + for match in re.finditer(r"case (\d+): o = (\d+); break;", page) + } + b = re.search(r"b:\s*[\"'](.+)[\"']", page) + + return m, b.group(1).strip("/")