diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index e72dad9a..64ae1e12 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -8,77 +8,75 @@ """Extract images from https://hitomi.la/""" -from .common import Extractor, Message +from .common import ChapterExtractor from .. import text, util import string -class HitomiGalleryExtractor(Extractor): +class HitomiGalleryExtractor(ChapterExtractor): """Extractor for image galleries from hitomi.la""" category = "hitomi" subcategory = "gallery" directory_fmt = ["{category}", "{gallery_id} {title}"] - filename_fmt = "{category}_{gallery_id}_{num:>03}_{name}.{extension}" - archive_fmt = "{gallery_id}_{num}" - pattern = [r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)\.html"] - test = [("https://hitomi.la/galleries/867789.html", { - "url": "e42a47dfadda93e4bf37e82b1dc9ad29edfa9130", - "keyword": "c007cd41229d727b2ced3b364350561444738351", - })] + filename_fmt = "{category}_{gallery_id}_{page:>03}_{name}.{extension}" + archive_fmt = "{gallery_id}_{page}" + pattern = [r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)"] + test = [ + ("https://hitomi.la/galleries/867789.html", { + "url": "cb759868d090fe0e2655c3e29ebf146054322b6d", + "keyword": "b1e66ff971fc8cb80240a687f508f3b74053f799", + }), + ("https://hitomi.la/reader/867789.html", None), + ] def __init__(self, match): - Extractor.__init__(self) - self.gid = match.group(1) + self.gid = util.safe_int(match.group(1)) + url = "https://hitomi.la/galleries/{}.html".format(self.gid) + ChapterExtractor.__init__(self, url) - def items(self): - url = "https://hitomi.la/galleries/" + self.gid + ".html" - page = self.request(url).text - data = self.get_job_metadata(page) - images = self.get_image_urls(page) - data["count"] = len(images) - yield Message.Version, 1 - yield Message.Directory, data - for data["num"], url in enumerate(images, 1): - yield Message.Url, url, text.nameext_from_url(url, data) + def get_metadata(self, page, extr=text.extract): + pos = page.index('