add more extractor test-cases
This commit is contained in:
@@ -18,6 +18,10 @@ class BatotoExtractor(AsynchronousExtractor):
|
||||
directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"]
|
||||
filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)"]
|
||||
test = [("http://bato.to/reader#df48fa98f7d41851", {
|
||||
"url": "ab0526091f65b8eda7a8866b937adbdb468d68b1",
|
||||
"keyword": "ef14d3230aa0872e8d9e4236ed9160755f78aeb3",
|
||||
})]
|
||||
url = "https://bato.to/areader"
|
||||
|
||||
def __init__(self, match):
|
||||
|
||||
@@ -21,7 +21,7 @@ class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts\?(?:utf8=%E2%9C%93&)?tags=([^&]+)"]
|
||||
test = [("https://danbooru.donmai.us/posts?tags=heath_ledger", {
|
||||
"url": "a261c33f117c7395f0eac54091075e67c8e66fca",
|
||||
"keyword": "86c1e0fc2879befe90cf8b4404264338b8063685",
|
||||
"keyword": "fc4685c98aedaf2383384d47af4f7bd257c40f32",
|
||||
})]
|
||||
|
||||
class DanbooruPoolExtractor(DanbooruExtractor, booru.BooruPoolExtractor):
|
||||
|
||||
@@ -19,6 +19,10 @@ class DeviantArtUserExtractor(AsynchronousExtractor):
|
||||
directory_fmt = ["{category}", "{artist}"]
|
||||
filename_fmt = "{category}_{index}_{title}.{extension}"
|
||||
pattern = [r"(?:https?://)?([^\.]+)\.deviantart\.com(?:/gallery)?/?$"]
|
||||
test = [("http://shimoda7.deviantart.com/gallery/", {
|
||||
"url": "63bfa8efba199e27181943c9060f6770f91a8441",
|
||||
"keyword": "c0343b41c28c87254a3f0b3241222e94f780701e",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
AsynchronousExtractor.__init__(self)
|
||||
@@ -102,6 +106,10 @@ class DeviantArtImageExtractor(Extractor):
|
||||
directory_fmt = ["{category}", "{artist}"]
|
||||
filename_fmt = "{category}_{index}_{title}.{extension}"
|
||||
pattern = [r"(?:https?://)?[^\.]+\.deviantart\.com/art/.+-(\d+)"]
|
||||
test = [("http://shimoda7.deviantart.com/art/For-the-sake-of-a-memory-10073852", {
|
||||
"url": "71345ce3bef5b19bd2a56d7b96e6b5ddba747c2e",
|
||||
"keyword": "f2dfde276a39990097935ace092811c56bc0bfec",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self)
|
||||
|
||||
@@ -11,12 +11,16 @@
|
||||
from .common import Extractor, Message
|
||||
from .. import text
|
||||
|
||||
class ImgthExtractor(Extractor):
|
||||
|
||||
class ImgthGalleryExtractor(Extractor):
|
||||
"""Extract all images of a gallery"""
|
||||
category = "imgth"
|
||||
directory_fmt = ["{category}", "{gallery-id} {title}"]
|
||||
filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}"
|
||||
pattern = [r"(?:https?://)?imgth\.com/gallery/(\d+)"]
|
||||
test = [("http://imgth.com/gallery/37/wallpaper-anime", {
|
||||
"url": "4ae1d281ca2b48952cf5cca57e9914402ad72748",
|
||||
"keyword": "1b15726d53bc2c08d845fa60ce538396380688df",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self)
|
||||
@@ -31,7 +35,9 @@ class ImgthExtractor(Extractor):
|
||||
for num, url in enumerate(self.get_images(page), 1):
|
||||
data["num"] = num
|
||||
yield Message.Url, url, text.nameext_from_url(url, data)
|
||||
|
||||
def get_images(self, page):
|
||||
"""Yield all image urls for this gallery"""
|
||||
pnum = 0
|
||||
while True:
|
||||
pos = 0
|
||||
@@ -48,15 +54,10 @@ class ImgthExtractor(Extractor):
|
||||
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {
|
||||
"category": self.category,
|
||||
"gallery-id": self.gid,
|
||||
}
|
||||
data, _ = text.extract_all(page, (
|
||||
return text.extract_all(page, (
|
||||
("title", '<h1>', '</h1>'),
|
||||
("count", 'total of images in this gallery: ', ' '),
|
||||
("date" , 'created on ', ' by <'),
|
||||
(None , 'href="/users/', ''),
|
||||
("user" , '>', '<'),
|
||||
), values=data)
|
||||
return data
|
||||
), values={"category": self.category, "gallery-id": self.gid})[0]
|
||||
|
||||
@@ -18,6 +18,10 @@ class ImgurExtractor(Extractor):
|
||||
directory_fmt = ["{category}", "{album-key} - {title}"]
|
||||
filename_fmt = "{category}_{album-key}_{num:>03}_{name}.{extension}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?imgur\.com/(?:a|gallery)/([^/?&#]+)"]
|
||||
test = [("https://imgur.com/a/TcBmP", {
|
||||
"url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563",
|
||||
"keyword": "5c96eee4df5938ed37f1f95f5c4ef64444bddeb4",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self)
|
||||
|
||||
@@ -28,6 +28,9 @@ class KissmangaMangaExtractor(KissmangaExtractor):
|
||||
"""Extract all manga-chapters from kissmanga"""
|
||||
subcategory = "manga"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?kissmanga\.com/Manga/[^/]+/?$"]
|
||||
test = [("http://kissmanga.com/Manga/Dropout", {
|
||||
"url": "992befdd64e178fe5af67de53f8b510860d968ca",
|
||||
})]
|
||||
|
||||
def items(self):
|
||||
cloudflare.bypass_ddos_protection(self.session, self.url_base)
|
||||
@@ -47,6 +50,16 @@ class KissmangaChapterExtractor(KissmangaExtractor):
|
||||
"""Extract a single manga-chapter from kissmanga"""
|
||||
subcategory = "chapter"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?kissmanga\.com/Manga/.+/.+\?id=\d+"]
|
||||
test = [
|
||||
("http://kissmanga.com/Manga/Dropout/Ch-000---Oneshot-?id=145847", {
|
||||
"url": "d9ce171d3d4c0493dbb169a5641ac4cd45b0cb3a",
|
||||
"keyword": "892c3e4df03a575a282a5695add986a49623d746",
|
||||
}),
|
||||
("http://kissmanga.com/Manga/Urban-Tales/a?id=256717", {
|
||||
"url": "b8dbf9fca1d8c942ca5fefd10299da49e8399081",
|
||||
"keyword": "0a98952984941cc2a11892b1cd7b237ffb20adaa",
|
||||
})
|
||||
]
|
||||
|
||||
def items(self):
|
||||
cloudflare.bypass_ddos_protection(self.session, "http://kissmanga.com")
|
||||
|
||||
@@ -18,6 +18,10 @@ class NhentaiExtractor(Extractor):
|
||||
directory_fmt = ["{category}", "{gallery-id} {title}"]
|
||||
filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?nhentai\.net/g/(\d+)"]
|
||||
test = [("http://nhentai.net/g/147850/", {
|
||||
"url": "199ddd07dded0f69282e09a372710698ea21ab8e",
|
||||
"keyword": "926be7e9c6684a0d477fce9b48b76df747937037",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self)
|
||||
|
||||
@@ -17,6 +17,10 @@ class NijieUserExtractor(AsynchronousExtractor):
|
||||
directory_fmt = ["{category}", "{artist-id}"]
|
||||
filename_fmt = "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?nijie\.info/members(?:_illust)?\.php\?id=(\d+)"]
|
||||
test = [("https://nijie.info/members_illust.php?id=44", {
|
||||
"url": "585d821df4716b1098660a0be426d01db4b65f2a",
|
||||
"keyword": "30c981b9d7351ec275b9840d8bc2b4ef3da8c4b4",
|
||||
})]
|
||||
popup_url = "https://nijie.info/view_popup.php?id="
|
||||
|
||||
def __init__(self, match):
|
||||
|
||||
@@ -24,6 +24,10 @@ class PowerMangaExtractor(Extractor):
|
||||
r"(.+/([a-z]{2})/\d+/\d+)(?:/page)?"),
|
||||
(r"(?:https?://)?(?:www\.)?(p)owermanga\.org/((?:[^-]+-)+[^-]+/?)"),
|
||||
]
|
||||
test = [("http://read.powermanga.org/read/one_piece/en/0/803/page/1", {
|
||||
"url": "e6179c1565068f99180620281f86bdd25be166b4",
|
||||
"keyword": "ef17bbc6a9ab0390a31f1508e825ddce35f2d2b1",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self)
|
||||
|
||||
@@ -16,6 +16,9 @@ class SpectrumNexusMangaExtractor(Extractor):
|
||||
category = "spectrumnexus"
|
||||
subcategory = "manga"
|
||||
pattern = [r"(?:https?://)?view\.thespectrum\.net/series/([^\.]+)\.html$"]
|
||||
test = [("http://view.thespectrum.net/series/kare-kano-volume-01.html", {
|
||||
"url": "b2b175aad5ef1701cc4aee7c24f1ca3a93aba9cb",
|
||||
})]
|
||||
url_base = "http://view.thespectrum.net/series/"
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -45,6 +48,10 @@ class SpectrumNexusChapterExtractor(AsynchronousExtractor):
|
||||
r"\?ch=(Chapter\+(\d+)|Volume\+(\d+))"),
|
||||
(r"(?:https?://)?(view\.thespectrum\.net/series/[^/]+-chapter-(\d+)\.html)"),
|
||||
]
|
||||
test = [("http://view.thespectrum.net/series/toriko.html?ch=Chapter+343&page=1", {
|
||||
"url": "c0fc7dc594841217cc622a67edd79f06e9900333",
|
||||
"keyword": "bde9c95a2d0feca0574c7248ed06f1684f86b2ac",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
AsynchronousExtractor.__init__(self)
|
||||
|
||||
Reference in New Issue
Block a user