diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 03479da7..925185c7 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -103,7 +103,7 @@ Sankaku Complex https://www.sankakucomplex.com/ Articles, Tag-Searches Sen Manga https://raw.senmanga.com/ Chapters Sense-Scans http://sensescans.com/reader/ Chapters, Manga Sex.com https://www.sex.com/ Boards, Pins, related Pins, Search Results -Simply Hentai https://www.simply-hentai.com/ Galleries +Simply Hentai https://www.simply-hentai.com/ Galleries, individual Images, Videos SlickPic https://www.slickpic.com/ Images from Users, Albums SlideShare https://www.slideshare.net/ Presentations SmugMug https://www.smugmug.com/ |smugmug-C| Optional (OAuth) diff --git a/gallery_dl/extractor/simplyhentai.py b/gallery_dl/extractor/simplyhentai.py index 85671553..a6a3da0c 100644 --- a/gallery_dl/extractor/simplyhentai.py +++ b/gallery_dl/extractor/simplyhentai.py @@ -8,16 +8,14 @@ """Extract hentai-manga from https://www.simply-hentai.com/""" -from .common import GalleryExtractor +from .common import GalleryExtractor, Extractor, Message from .. import text, util, exception -import json class SimplyhentaiGalleryExtractor(GalleryExtractor): """Extractor for image galleries from simply-hentai.com""" category = "simplyhentai" archive_fmt = "{image_id}" - root = "https://www.simply-hentai.com" pattern = (r"(?:https?://)?(?!videos\.)([\w-]+\.simply-hentai\.com" r"(?!/(?:album|gifs?|images?|series)(?:/|$))" r"(?:/(?!(?:page|all-pages)(?:/|\.|$))[^/?&#]+)+)") @@ -25,7 +23,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor): (("https://original-work.simply-hentai.com" "/amazon-no-hiyaku-amazon-elixir"), { "url": "258289249990502c3138719cb89e995a60861e49", - "keyword": "8b2400e4b466e8f46802fa5a6b917d2788bb7e8e", + "keyword": "eba83ccdbab3022a2280c77aa747f9458196138b", }), ("https://www.simply-hentai.com/notfound", { "exception": exception.GalleryDLException, @@ -42,30 +40,145 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor): self.session.headers["Referer"] = url def metadata(self, page): - path = text.extract(page, 'Series', '')), + "language" : text.remove_html(extr( + 'box-title">Language', '')) or None, + "characters": split(extr('box-title">Characters', '')), + "tags" : split(extr('box-title">Tags', '')), + "artist" : split(extr('box-title">Artists', '')), + "date" : text.parse_datetime(text.remove_html( + extr('Uploaded', '')), "%d.%m.%Y"), } + data["lang"] = util.language_to_code(data["language"]) + return data def images(self, _): + url = self.chapter_url + "/all-pages" + headers = {"Accept": "application/json"} + images = self.request(url, headers=headers).json() return [ - (image["sizes"]["full"], {"image_id": image["id"]}) - for image in self.manga["images"] + (urls["full"], {"image_id": text.parse_int(image_id)}) + for image_id, urls in sorted(images.items()) ] + + +class SimplyhentaiImageExtractor(Extractor): + """Extractor for individual images from simply-hentai.com""" + category = "simplyhentai" + subcategory = "image" + directory_fmt = ("{category}", "{type}s") + filename_fmt = "{category}_{token}{title:?_//}.{extension}" + archive_fmt = "{token}" + pattern = (r"(?:https?://)?(?:www\.)?(simply-hentai\.com" + r"/(image|gif)/[^/?&#]+)") + test = ( + (("https://www.simply-hentai.com/image" + "/pheromomania-vol-1-kanzenban-isao-3949d8b3-400c-4b6"), { + "url": "0338eb137830ab6f81e5f410d3936ef785d063d9", + "keyword": "e10e5588481cab68329ef6ec1e5325206b2079a2", + }), + ("https://www.simply-hentai.com/gif/8915dfcf-0b6a-47c", { + "url": "11c060d7ec4dfd0bd105300b6e1fd454674a5af1", + "keyword": "dd97a4bb449c397d6fec9f43a1303c0fb168ae65", + }), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.page_url = "https://www." + match.group(1) + self.type = match.group(2) + + def items(self): + extr = text.extract_from(self.request(self.page_url).text) + title = extr('"og:title" content="' , '"') + descr = extr('"og:description" content="', '"') + url = extr('"image":"' , '&') + url = extr(""content":"", "&") or url + + tags = text.extract(descr, " tagged with ", " online for free ")[0] + if tags: + tags = tags.split(", ") + tags[-1] = tags[-1].partition(" ")[2] + else: + tags = [] + + data = text.nameext_from_url(url, { + "title": text.unescape(title) if title else "", + "tags": tags, + "type": self.type, + }) + data["token"] = data["filename"].rpartition("_")[2] + + yield Message.Version, 1 + yield Message.Directory, data + yield Message.Url, url, data + + +class SimplyhentaiVideoExtractor(Extractor): + """Extractor for hentai videos from simply-hentai.com""" + category = "simplyhentai" + subcategory = "video" + directory_fmt = ("{category}", "{type}s") + filename_fmt = "{title}{episode:?_//>02}.{extension}" + archive_fmt = "{title}_{episode}" + pattern = r"(?:https?://)?(videos\.simply-hentai\.com/[^/?&#]+)" + test = ( + ("https://videos.simply-hentai.com/creamy-pie-episode-02", { + "pattern": r"https://www\.googleapis\.com/drive/v3/files" + r"/0B1ecQ8ZVLm3JcHZzQzBnVy1ZUmc\?alt=media&key=[\w-]+", + "keyword": "706790708b14773efc1e075ddd3b738a375348a5", + "count": 1, + }), + (("https://videos.simply-hentai.com" + "/1715-tifa-in-hentai-gang-bang-3d-movie"), { + "url": "ad9a36ae06c601b6490e3c401834b4949d947eb0", + "keyword": "f9dad94fbde9c95859e631ff4f07297a9567b874", + }), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.page_url = "https://" + match.group(1) + + def items(self): + page = self.request(self.page_url).text + + title, pos = text.extract(page, "", "") + tags , pos = text.extract(page, ">Tags", "", pos) + date , pos = text.extract(page, ">Upload Date", "", pos) + title = title.rpartition(" - ")[0] + + if "', pos) + embed_url = text.extract(page, 'src="', '"', pos)[0].replace( + "embedplayer.php?link=", "embed.php?name=") + embed_page = self.request(embed_url).text + video_url = text.extract(embed_page, '"file":"', '"')[0] + title, _, episode = title.rpartition(" Episode ") + + data = text.nameext_from_url(video_url, { + "title": text.unescape(title), + "episode": text.parse_int(episode), + "tags": text.split_html(tags)[::2], + "type": "video", + "date": text.parse_datetime(text.remove_html( + date), "%B %d, %Y %H:%M"), + }) + + yield Message.Version, 1 + yield Message.Directory, data + yield Message.Url, video_url, data