diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 0f65a2ff..273c092d 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -81,6 +81,7 @@ MangaPark https://mangapark.net/ Chapters, Manga Mangareader https://www.mangareader.net/ Chapters, Manga Mangoxo https://www.mangoxo.com/ Albums, Channels Optional mastodon.social https://mastodon.social/ Images from Statuses, User Profiles Optional (`OAuth `__) +My Hentai Gallery https://myhentaigallery.com/ Galleries Naver https://blog.naver.com/ Blogs, Posts Newgrounds https://www.newgrounds.com/ |newgrounds-C| Optional Ngomik http://ngomik.in/ Chapters diff --git a/gallery_dl/extractor/myhentaigallery.py b/gallery_dl/extractor/myhentaigallery.py index ccbfb3c1..4a43d57c 100644 --- a/gallery_dl/extractor/myhentaigallery.py +++ b/gallery_dl/extractor/myhentaigallery.py @@ -1,7 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2020 Mike Fährmann -# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. @@ -15,16 +13,30 @@ from .. import text, exception class MyhentaigalleryGalleryExtractor(GalleryExtractor): """Extractor for image galleries from myhentaigallery.com""" category = "myhentaigallery" - directory_fmt = ("{category}", "{gallery_id} [{artist}] {title}") - pattern = (r"(?:https?://)?(myhentaigallery\.com" - r"/gallery/thumbnails/[0-9]+)") + directory_fmt = ("{category}", "{gallery_id} {artist:?[/] /J, }{title}") + pattern = (r"(?:https?://)?myhentaigallery\.com" + r"/gallery/(?:thumbnails|show)/(\d+)") test = ( - ("https://myhentaigallery.com/gallery/thumbnails/16247"), - ("https://myhentaigallery.com/gallery/thumbnails/15224"), + ("https://myhentaigallery.com/gallery/thumbnails/16247", { + "pattern": r"https://images.myhentaigrid.com/imagesgallery/images" + r"/[^/]+/original/\d+\.jpg", + "keyword": { + "artist" : list, + "count" : 11, + "gallery_id": 16247, + "group" : list, + "parodies" : list, + "tags" : ["Giantess"], + "title" : "Attack Of The 50ft Woman 1", + }, + }), + ("https://myhentaigallery.com/gallery/show/16247/1"), ) + root = "https://myhentaigallery.com" def __init__(self, match): - url = "https://" + match.group(1) + self.gallery_id = match.group(1) + url = "{}/gallery/thumbnails/{}".format(self.root, self.gallery_id) GalleryExtractor.__init__(self, match, url) self.session.headers["Referer"] = url @@ -32,22 +44,22 @@ class MyhentaigalleryGalleryExtractor(GalleryExtractor): extr = text.extract_from(page) split = text.split_html - image = extr('
\n\n

', '

') if not title: raise exception.NotFoundError("gallery") - data = { + + return { "title" : text.unescape(title), - "gallery_id": text.parse_int(image.split("/")[-2]), + "gallery_id": text.parse_int(self.gallery_id), "tags" : split(extr('
\nCategories:', '
')), + "artist" : split(extr('
\nArtists:' , '
')), + "group" : split(extr('
\nGroups:' , '
')), + "parodies" : split(extr('
\nParodies:' , '
')), } - artists = split(extr('
\nArtists:', '
')) - data["artist"] = artists[0] if artists else "Unknown" - return data def images(self, page): - extr = text.extract_iter return [ - (text.unescape(url).replace("/thumbnail/", "/original/"), None) - for url in extr(page, 'class="comic-thumb">\n') ] diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 07240921..2a23c210 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -60,7 +60,7 @@ CATEGORY_MAP = { "mangapark" : "MangaPark", "mangastream" : "Manga Stream", "mastodon.social": "mastodon.social", - "myhentaigallery": "My Hentai Gallery" + "myhentaigallery": "My Hentai Gallery", "myportfolio" : "Adobe Portfolio", "nhentai" : "nhentai", "nijie" : "nijie",