[myhentaigallery] add 'tag' extractor (#8537)
This commit is contained in:
@@ -658,7 +658,7 @@ Consider all listed sites to potentially be NSFW.
|
|||||||
<tr id="myhentaigallery" title="myhentaigallery">
|
<tr id="myhentaigallery" title="myhentaigallery">
|
||||||
<td>My Hentai Gallery</td>
|
<td>My Hentai Gallery</td>
|
||||||
<td>https://myhentaigallery.com/</td>
|
<td>https://myhentaigallery.com/</td>
|
||||||
<td>Galleries</td>
|
<td>Galleries, Tag Searches</td>
|
||||||
<td></td>
|
<td></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr id="naver-blog" title="naver-blog">
|
<tr id="naver-blog" title="naver-blog">
|
||||||
|
|||||||
@@ -6,17 +6,21 @@
|
|||||||
|
|
||||||
"""Extractors for https://myhentaigallery.com/"""
|
"""Extractors for https://myhentaigallery.com/"""
|
||||||
|
|
||||||
from .common import GalleryExtractor
|
from .common import Extractor, GalleryExtractor, Message
|
||||||
from .. import text, exception
|
from .. import text, exception
|
||||||
|
|
||||||
|
BASE_PATTERN = r"(?:https?://)?myhentaigallery\.com"
|
||||||
|
|
||||||
class MyhentaigalleryGalleryExtractor(GalleryExtractor):
|
|
||||||
"""Extractor for image galleries from myhentaigallery.com"""
|
class MyhentaigalleryBase():
|
||||||
category = "myhentaigallery"
|
category = "myhentaigallery"
|
||||||
root = "https://myhentaigallery.com"
|
root = "https://myhentaigallery.com"
|
||||||
|
|
||||||
|
|
||||||
|
class MyhentaigalleryGalleryExtractor(MyhentaigalleryBase, GalleryExtractor):
|
||||||
|
"""Extractor for image galleries from myhentaigallery.com"""
|
||||||
directory_fmt = ("{category}", "{gallery_id} {artist:?[/] /J, }{title}")
|
directory_fmt = ("{category}", "{gallery_id} {artist:?[/] /J, }{title}")
|
||||||
pattern = (r"(?:https?://)?myhentaigallery\.com"
|
pattern = rf"{BASE_PATTERN}/g(?:allery/(?:thumbnails|show))?/(\d+)"
|
||||||
r"/g(?:allery/(?:thumbnails|show))?/(\d+)")
|
|
||||||
example = "https://myhentaigallery.com/g/12345"
|
example = "https://myhentaigallery.com/g/12345"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
@@ -53,3 +57,32 @@ class MyhentaigalleryGalleryExtractor(GalleryExtractor):
|
|||||||
"/thumbnail/", "/original/"), None)
|
"/thumbnail/", "/original/"), None)
|
||||||
for url in text.extract_iter(page, 'class="comic-thumb"', '</div>')
|
for url in text.extract_iter(page, 'class="comic-thumb"', '</div>')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class MyhentaigalleryTagExtractor(MyhentaigalleryBase, Extractor):
|
||||||
|
"""Extractor for myhentaigallery tag searches"""
|
||||||
|
subcategory = "tag"
|
||||||
|
pattern = rf"{BASE_PATTERN}(/g/(artist|category|group|parody)/(\d+).*)"
|
||||||
|
example = "https://myhentaigallery.com/g/category/123"
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
data = {"_extractor": MyhentaigalleryGalleryExtractor}
|
||||||
|
for url in self.galleries():
|
||||||
|
yield Message.Queue, url, data
|
||||||
|
|
||||||
|
def galleries(self):
|
||||||
|
root = self.root
|
||||||
|
url = root + self.groups[0]
|
||||||
|
|
||||||
|
while True:
|
||||||
|
page = self.request(url).text
|
||||||
|
|
||||||
|
for inner in text.extract_iter(
|
||||||
|
page, '<div class="comic-inner">', "<div"):
|
||||||
|
yield root + text.extr(inner, 'href="', '"')
|
||||||
|
|
||||||
|
try:
|
||||||
|
pos = page.index(">Next<")
|
||||||
|
except ValueError:
|
||||||
|
return
|
||||||
|
url = root + text.rextr(page, 'href="', '"', pos)
|
||||||
|
|||||||
@@ -35,4 +35,26 @@ __tests__ = (
|
|||||||
"#class" : myhentaigallery.MyhentaigalleryGalleryExtractor,
|
"#class" : myhentaigallery.MyhentaigalleryGalleryExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://myhentaigallery.com/g/artist/8084?sorting=favorite",
|
||||||
|
"#class" : myhentaigallery.MyhentaigalleryTagExtractor,
|
||||||
|
"#pattern" : myhentaigallery.MyhentaigalleryGalleryExtractor.pattern,
|
||||||
|
"#count" : 18,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://myhentaigallery.com/g/group/2",
|
||||||
|
"#class" : myhentaigallery.MyhentaigalleryTagExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://myhentaigallery.com/g/parody/8239",
|
||||||
|
"#class" : myhentaigallery.MyhentaigalleryTagExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://myhentaigallery.com/g/category/59",
|
||||||
|
"#class" : myhentaigallery.MyhentaigalleryTagExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user