diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 08601d32..0bcba729 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -36,6 +36,7 @@ Hentai Foundry https://www.hentai-foundry.com/ |hentaifoundry-C| Hentai2Read https://hentai2read.com/ Chapters, Manga HentaiFox https://hentaifox.com/ Galleries, Search Results HentaiHere https://hentaihere.com/ Chapters, Manga +Hentainexus https://hentainexus.com/ Galleries Hitomi.la https://hitomi.la/ Galleries Hypnohub https://hypnohub.net/ Pools, Popular Images, Posts, Tag-Searches Idol Complex https://idol.sankakucomplex.com/ Pools, Posts, Tag-Searches Optional diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 60d1f64d..c7294c95 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -34,6 +34,7 @@ modules = [ "hentaifoundry", "hentaifox", "hentaihere", + "hentainexus", "hitomi", "hypnohub", "idolcomplex", diff --git a/gallery_dl/extractor/hentainexus.py b/gallery_dl/extractor/hentainexus.py new file mode 100644 index 00000000..a71134c2 --- /dev/null +++ b/gallery_dl/extractor/hentainexus.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://hentainexus.com/""" + +from .common import GalleryExtractor +from .. import text, util +import json + + +class HentainexusGalleryExtractor(GalleryExtractor): + """Extractor for image galleries on hentainexus.com""" + category = "hentainexus" + root = "https://hentainexus.com" + pattern = (r"(?i)(?:https?://)?(?:www\.)?hentainexus\.com" + r"/(?:view|read)/(\d+)") + test = ( + ("https://hentainexus.com/view/5688", { + "url": "57238d6e76a199298c9866bbcfaa111c0fa164b0", + "keyword": "5b254937a180b5c2cef303324cd5f7f6fec98d55", + }), + ("https://hentainexus.com/read/5688"), + ) + + def __init__(self, match): + self.gallery_id = match.group(1) + url = "{}/view/{}".format(self.root, self.gallery_id) + GalleryExtractor.__init__(self, match, url) + + def metadata(self, page): + rmve = text.remove_html + extr = text.extract_from(page) + data = { + "gallery_id" : text.parse_int(self.gallery_id), + "tags" : extr('"og:description" content="', '"').split(", "), + "thumbnail" : extr('"og:image" content="', '"'), + "title" : extr('

', '

'), + "artist" : rmve(extr('viewcolumn">Artist' , '')), + "book" : rmve(extr('viewcolumn">Book' , '')), + "language" : rmve(extr('viewcolumn">Language' , '')), + "magazine" : rmve(extr('viewcolumn">Magazine' , '')), + "parody" : rmve(extr('viewcolumn">Parody' , '')), + "publisher" : rmve(extr('viewcolumn">Publisher' , '')), + "description": rmve(extr('viewcolumn">Description', '')), + } + data["lang"] = util.language_to_code(data["language"]) + return data + + def images(self, page): + url = "{}/read/{}".format(self.root, self.gallery_id) + extr = text.extract_from(self.request(url).text) + imgs = extr("initReader(", "]") + "]" + base = extr('"', '"') + + return [(base + img, None) for img in json.loads(imgs)]