diff --git a/docs/supportedsites.md b/docs/supportedsites.md index bf6465ee..cb5049c5 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -658,7 +658,7 @@ Consider all listed sites to potentially be NSFW. My Hentai Gallery https://myhentaigallery.com/ - Galleries + Galleries, Tag Searches diff --git a/gallery_dl/extractor/myhentaigallery.py b/gallery_dl/extractor/myhentaigallery.py index 2a39dc92..936f857a 100644 --- a/gallery_dl/extractor/myhentaigallery.py +++ b/gallery_dl/extractor/myhentaigallery.py @@ -6,17 +6,21 @@ """Extractors for https://myhentaigallery.com/""" -from .common import GalleryExtractor +from .common import Extractor, GalleryExtractor, Message from .. import text, exception +BASE_PATTERN = r"(?:https?://)?myhentaigallery\.com" -class MyhentaigalleryGalleryExtractor(GalleryExtractor): - """Extractor for image galleries from myhentaigallery.com""" + +class MyhentaigalleryBase(): category = "myhentaigallery" root = "https://myhentaigallery.com" + + +class MyhentaigalleryGalleryExtractor(MyhentaigalleryBase, GalleryExtractor): + """Extractor for image galleries from myhentaigallery.com""" directory_fmt = ("{category}", "{gallery_id} {artist:?[/] /J, }{title}") - pattern = (r"(?:https?://)?myhentaigallery\.com" - r"/g(?:allery/(?:thumbnails|show))?/(\d+)") + pattern = rf"{BASE_PATTERN}/g(?:allery/(?:thumbnails|show))?/(\d+)" example = "https://myhentaigallery.com/g/12345" def __init__(self, match): @@ -53,3 +57,32 @@ class MyhentaigalleryGalleryExtractor(GalleryExtractor): "/thumbnail/", "/original/"), None) for url in text.extract_iter(page, 'class="comic-thumb"', '') ] + + +class MyhentaigalleryTagExtractor(MyhentaigalleryBase, Extractor): + """Extractor for myhentaigallery tag searches""" + subcategory = "tag" + pattern = rf"{BASE_PATTERN}(/g/(artist|category|group|parody)/(\d+).*)" + example = "https://myhentaigallery.com/g/category/123" + + def items(self): + data = {"_extractor": MyhentaigalleryGalleryExtractor} + for url in self.galleries(): + yield Message.Queue, url, data + + def galleries(self): + root = self.root + url = root + self.groups[0] + + while True: + page = self.request(url).text + + for inner in text.extract_iter( + page, '
', "Next<") + except ValueError: + return + url = root + text.rextr(page, 'href="', '"', pos) diff --git a/test/results/myhentaigallery.py b/test/results/myhentaigallery.py index a90e067a..80034d43 100644 --- a/test/results/myhentaigallery.py +++ b/test/results/myhentaigallery.py @@ -35,4 +35,26 @@ __tests__ = ( "#class" : myhentaigallery.MyhentaigalleryGalleryExtractor, }, +{ + "#url" : "https://myhentaigallery.com/g/artist/8084?sorting=favorite", + "#class" : myhentaigallery.MyhentaigalleryTagExtractor, + "#pattern" : myhentaigallery.MyhentaigalleryGalleryExtractor.pattern, + "#count" : 18, +}, + +{ + "#url" : "https://myhentaigallery.com/g/group/2", + "#class" : myhentaigallery.MyhentaigalleryTagExtractor, +}, + +{ + "#url" : "https://myhentaigallery.com/g/parody/8239", + "#class" : myhentaigallery.MyhentaigalleryTagExtractor, +}, + +{ + "#url" : "https://myhentaigallery.com/g/category/59", + "#class" : myhentaigallery.MyhentaigalleryTagExtractor, +}, + )