From db1738d451af7e35afae6b86a66e61c367389d7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 1 Aug 2016 15:36:56 +0200 Subject: [PATCH] [luscious] add extractor --- README.rst | 2 +- gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/luscious.py | 75 ++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 gallery_dl/extractor/luscious.py diff --git a/README.rst b/README.rst index 38798096..875e3072 100644 --- a/README.rst +++ b/README.rst @@ -44,7 +44,7 @@ Supported Sites powermanga.org, thespectrum.net * Hentai: exhentai.org, hbrowse.com, hentai2read.com, hentai-foundry.com, hitomi.la, - nhentai.net, + luscious.net, nhentai.net * Japanese: pixiv.net, nijie.info * Western: diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 14bf28bf..2d264920 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -35,6 +35,7 @@ modules = [ "khinsider", "kissmanga", "konachan", + "luscious", "mangahere", "mangamint", "mangapanda", diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py new file mode 100644 index 00000000..ff9d9ef3 --- /dev/null +++ b/gallery_dl/extractor/luscious.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- + +# Copyright 2016 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract images from https://luscious.net/""" + +from .common import Extractor, Message +from .. import text, iso639_1 +from urllib.parse import urljoin + +class LusciousExtractor(Extractor): + + category = "luscious" + directory_fmt = ["{category}", "{gallery-id} {title}"] + filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}" + pattern = [(r"(?:https?://)?(?:www\.)?luscious\.net/c/([^/]+)/" + r"(?:pictures/album|albums)/([^/\d]+(\d+))")] + test = [("https://luscious.net/c/incest_manga/albums/amazon-no-hiyaku-amazon-elixir-english-decensored_261127/view/", { + "url": "319a70261de12620d123add9b519d15b8515b503", + "keyword": "60cc15db2619b8aee47c1527b6326be5a54f5c2f", + })] + + def __init__(self, match): + Extractor.__init__(self) + self.section, self.gpart, self.gid = match.groups() + + def items(self): + data = self.get_job_metadata() + yield Message.Version, 1 + yield Message.Directory, data + for url, image in self.get_images(): + image.update(data) + yield Message.Url, url, image + + def get_job_metadata(self): + """Collect metadata for extractor-job""" + url = "https://luscious.net/c/{}/albums/{}/view/".format( + self.section, self.gpart) + data = text.extract_all(self.request(url).text, ( + ("title" , '"og:title" content="', '"'), + (None , '
  • ', ''), + ("count" , '

    ', ' '), + (None , '

    Section:', ''), + ("section" , '>', '<'), + (None , '

    Language:', ''), + ("language", '\n ', ' '), + ("artist" , 'rtist: ', '\n'), + ), values={"category": self.category, "gallery-id": self.gid})[0] + data["lang"] = iso639_1.language_to_code(data["language"]) + return data + + def get_images(self): + pnum = 1 + inum = 1 + apiurl = ("https://luscious.net/c/{}/pictures/album/{}/page/{{}}/.json" + "/?style=default").format(self.section, self.gpart) + while True: + data = self.request(apiurl.format(pnum)).json() + for doc in data["documents"]: + width, height, _, url = doc["sizes"][-1] + yield urljoin("https:", url), { + "width": width, + "height": height, + "num": inum, + "name": doc["title"], + "extension": url[url.rfind(".")+1:], + } + inum += 1 + if data["paginator_complete"]: + return + pnum += 1