diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 3318aa54..b3654723 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -67,6 +67,7 @@ Sankaku Channel https://chan.sankakucomplex.com/ Pools, Posts, Tag-Searc Sea Otter Scans https://reader.seaotterscans.com/ Chapters, Manga Sen Manga http://raw.senmanga.com/ Chapters Sense-Scans http://sensescans.com/ Chapters, Manga +Simply Hentai https://www.simply-hentai.com/ Galleries SlideShare https://www.slideshare.net/ Presentations SmugMug https://www.smugmug.com/ |Albums, individ-5| Optional (OAuth) Subapics https://subapics.com/ Chapters, Manga diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 59213681..680c75a4 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -74,6 +74,7 @@ modules = [ "seiga", "senmanga", "sensescans", + "simplyhentai", "slideshare", "smugmug", "subapics", diff --git a/gallery_dl/extractor/simplyhentai.py b/gallery_dl/extractor/simplyhentai.py new file mode 100644 index 00000000..68d30774 --- /dev/null +++ b/gallery_dl/extractor/simplyhentai.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- + +# Copyright 2018 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract hentai-manga from https://www.simply-hentai.com/""" + +from .common import ChapterExtractor +from .. import text, util, exception + + +class SimplyhentaiGalleryExtractor(ChapterExtractor): + """Extractor for image galleries from simply-hentai.com""" + category = "simplyhentai" + subcategory = "gallery" + directory_fmt = ["{category}", "{gallery_id} {title}"] + filename_fmt = "{category}_{gallery_id}_{page:>03}.{extension}" + archive_fmt = "{image_id}" + pattern = [r"(?:https?://)?(?!videos)[^.]+\.simply-hentai\.com" + r"(?:/(?!page|series|album|all-pages)[^/?&#]+)+"] + test = [ + (("https://original-work.simply-hentai.com" + "/amazon-no-hiyaku-amazon-elixir"), { + "url": "35f3843d0ea83e6a618df7afaebd2b03f3628db9", + "keyword": "1e22ccbe66412eab844f135ad9cd3424b8b064e8", + }), + ("https://www.simply-hentai.com/notfound", { + "exception": exception.GalleryDLException, + }), + # custom subdomain + ("https://pokemon.simply-hentai.com/mao-friends-9bc39", None), + # www subdomain, two path segments + ("https://www.simply-hentai.com/vocaloid/black-magnet", None), + ] + + def __init__(self, match): + ChapterExtractor.__init__(self, match.group(0)) + self.session.headers["Referer"] = match.group(0) + + def get_metadata(self, page): + extr = text.extract + title , pos = extr(page, 'Series', '', pos) + lang , pos = extr(page, 'box-title">Language', '', pos) + chars , pos = extr(page, 'box-title">Characters', '', pos) + tags , pos = extr(page, 'box-title">Tags', '', pos) + artist, pos = extr(page, 'box-title">Artists', '', pos) + date , pos = extr(page, 'Uploaded', '', pos) + lang = text.remove_html(lang) if lang else None + + return { + "gallery_id": text.parse_int(gid), + "title": text.unescape(title), + "series": text.remove_html(series), + "characters": ", ".join(text.split_html(chars)), + "tags": ", ".join(text.split_html(tags)), + "artist": ", ".join(text.split_html(artist)), + "lang": util.language_to_code(lang), + "language": lang, + "date": text.remove_html(date), + } + + def get_images(self, _): + images = self.request(self.url + "/all-pages.json").json() + return [ + (urls["full"], {"image_id": text.parse_int(image_id)}) + for image_id, urls in sorted(images.items()) + ] diff --git a/scripts/build_supportedsites.py b/scripts/build_supportedsites.py index 16567b63..987f6293 100755 --- a/scripts/build_supportedsites.py +++ b/scripts/build_supportedsites.py @@ -52,6 +52,7 @@ CATEGORY_MAP = { "seiga" : "Niconico Seiga", "senmanga" : "Sen Manga", "sensescans" : "Sense-Scans", + "simplyhentai" : "Simply Hentai", "slideshare" : "SlideShare", "smugmug" : "SmugMug", "thebarchive" : "The /b/ Archive",