diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index bf6465ee..cb5049c5 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -658,7 +658,7 @@ Consider all listed sites to potentially be NSFW.
diff --git a/gallery_dl/extractor/myhentaigallery.py b/gallery_dl/extractor/myhentaigallery.py
index 2a39dc92..936f857a 100644
--- a/gallery_dl/extractor/myhentaigallery.py
+++ b/gallery_dl/extractor/myhentaigallery.py
@@ -6,17 +6,21 @@
"""Extractors for https://myhentaigallery.com/"""
-from .common import GalleryExtractor
+from .common import Extractor, GalleryExtractor, Message
from .. import text, exception
+BASE_PATTERN = r"(?:https?://)?myhentaigallery\.com"
-class MyhentaigalleryGalleryExtractor(GalleryExtractor):
- """Extractor for image galleries from myhentaigallery.com"""
+
+class MyhentaigalleryBase():
category = "myhentaigallery"
root = "https://myhentaigallery.com"
+
+
+class MyhentaigalleryGalleryExtractor(MyhentaigalleryBase, GalleryExtractor):
+ """Extractor for image galleries from myhentaigallery.com"""
directory_fmt = ("{category}", "{gallery_id} {artist:?[/] /J, }{title}")
- pattern = (r"(?:https?://)?myhentaigallery\.com"
- r"/g(?:allery/(?:thumbnails|show))?/(\d+)")
+ pattern = rf"{BASE_PATTERN}/g(?:allery/(?:thumbnails|show))?/(\d+)"
example = "https://myhentaigallery.com/g/12345"
def __init__(self, match):
@@ -53,3 +57,32 @@ class MyhentaigalleryGalleryExtractor(GalleryExtractor):
"/thumbnail/", "/original/"), None)
for url in text.extract_iter(page, 'class="comic-thumb"', '')
]
+
+
+class MyhentaigalleryTagExtractor(MyhentaigalleryBase, Extractor):
+ """Extractor for myhentaigallery tag searches"""
+ subcategory = "tag"
+ pattern = rf"{BASE_PATTERN}(/g/(artist|category|group|parody)/(\d+).*)"
+ example = "https://myhentaigallery.com/g/category/123"
+
+ def items(self):
+ data = {"_extractor": MyhentaigalleryGalleryExtractor}
+ for url in self.galleries():
+ yield Message.Queue, url, data
+
+ def galleries(self):
+ root = self.root
+ url = root + self.groups[0]
+
+ while True:
+ page = self.request(url).text
+
+ for inner in text.extract_iter(
+ page, '', "
Next<")
+ except ValueError:
+ return
+ url = root + text.rextr(page, 'href="', '"', pos)
diff --git a/test/results/myhentaigallery.py b/test/results/myhentaigallery.py
index a90e067a..80034d43 100644
--- a/test/results/myhentaigallery.py
+++ b/test/results/myhentaigallery.py
@@ -35,4 +35,26 @@ __tests__ = (
"#class" : myhentaigallery.MyhentaigalleryGalleryExtractor,
},
+{
+ "#url" : "https://myhentaigallery.com/g/artist/8084?sorting=favorite",
+ "#class" : myhentaigallery.MyhentaigalleryTagExtractor,
+ "#pattern" : myhentaigallery.MyhentaigalleryGalleryExtractor.pattern,
+ "#count" : 18,
+},
+
+{
+ "#url" : "https://myhentaigallery.com/g/group/2",
+ "#class" : myhentaigallery.MyhentaigalleryTagExtractor,
+},
+
+{
+ "#url" : "https://myhentaigallery.com/g/parody/8239",
+ "#class" : myhentaigallery.MyhentaigalleryTagExtractor,
+},
+
+{
+ "#url" : "https://myhentaigallery.com/g/category/59",
+ "#class" : myhentaigallery.MyhentaigalleryTagExtractor,
+},
+
)