add more extractor test-cases

This commit is contained in:
Mike Fährmann
2015-12-13 04:36:44 +01:00
parent f0e9e8e0dc
commit 2449532132
10 changed files with 59 additions and 10 deletions

View File

@@ -18,6 +18,10 @@ class BatotoExtractor(AsynchronousExtractor):
directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"]
filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
pattern = [r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)"]
test = [("http://bato.to/reader#df48fa98f7d41851", {
"url": "ab0526091f65b8eda7a8866b937adbdb468d68b1",
"keyword": "ef14d3230aa0872e8d9e4236ed9160755f78aeb3",
})]
url = "https://bato.to/areader"
def __init__(self, match):

View File

@@ -21,7 +21,7 @@ class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor):
pattern = [r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts\?(?:utf8=%E2%9C%93&)?tags=([^&]+)"]
test = [("https://danbooru.donmai.us/posts?tags=heath_ledger", {
"url": "a261c33f117c7395f0eac54091075e67c8e66fca",
"keyword": "86c1e0fc2879befe90cf8b4404264338b8063685",
"keyword": "fc4685c98aedaf2383384d47af4f7bd257c40f32",
})]
class DanbooruPoolExtractor(DanbooruExtractor, booru.BooruPoolExtractor):

View File

@@ -19,6 +19,10 @@ class DeviantArtUserExtractor(AsynchronousExtractor):
directory_fmt = ["{category}", "{artist}"]
filename_fmt = "{category}_{index}_{title}.{extension}"
pattern = [r"(?:https?://)?([^\.]+)\.deviantart\.com(?:/gallery)?/?$"]
test = [("http://shimoda7.deviantart.com/gallery/", {
"url": "63bfa8efba199e27181943c9060f6770f91a8441",
"keyword": "c0343b41c28c87254a3f0b3241222e94f780701e",
})]
def __init__(self, match):
AsynchronousExtractor.__init__(self)
@@ -102,6 +106,10 @@ class DeviantArtImageExtractor(Extractor):
directory_fmt = ["{category}", "{artist}"]
filename_fmt = "{category}_{index}_{title}.{extension}"
pattern = [r"(?:https?://)?[^\.]+\.deviantart\.com/art/.+-(\d+)"]
test = [("http://shimoda7.deviantart.com/art/For-the-sake-of-a-memory-10073852", {
"url": "71345ce3bef5b19bd2a56d7b96e6b5ddba747c2e",
"keyword": "f2dfde276a39990097935ace092811c56bc0bfec",
})]
def __init__(self, match):
Extractor.__init__(self)

View File

@@ -11,12 +11,16 @@
from .common import Extractor, Message
from .. import text
class ImgthExtractor(Extractor):
class ImgthGalleryExtractor(Extractor):
"""Extract all images of a gallery"""
category = "imgth"
directory_fmt = ["{category}", "{gallery-id} {title}"]
filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}"
pattern = [r"(?:https?://)?imgth\.com/gallery/(\d+)"]
test = [("http://imgth.com/gallery/37/wallpaper-anime", {
"url": "4ae1d281ca2b48952cf5cca57e9914402ad72748",
"keyword": "1b15726d53bc2c08d845fa60ce538396380688df",
})]
def __init__(self, match):
Extractor.__init__(self)
@@ -31,7 +35,9 @@ class ImgthExtractor(Extractor):
for num, url in enumerate(self.get_images(page), 1):
data["num"] = num
yield Message.Url, url, text.nameext_from_url(url, data)
def get_images(self, page):
"""Yield all image urls for this gallery"""
pnum = 0
while True:
pos = 0
@@ -48,15 +54,10 @@ class ImgthExtractor(Extractor):
def get_job_metadata(self, page):
"""Collect metadata for extractor-job"""
data = {
"category": self.category,
"gallery-id": self.gid,
}
data, _ = text.extract_all(page, (
return text.extract_all(page, (
("title", '<h1>', '</h1>'),
("count", 'total of images in this gallery: ', ' '),
("date" , 'created on ', ' by <'),
(None , 'href="/users/', ''),
("user" , '>', '<'),
), values=data)
return data
), values={"category": self.category, "gallery-id": self.gid})[0]

View File

@@ -18,6 +18,10 @@ class ImgurExtractor(Extractor):
directory_fmt = ["{category}", "{album-key} - {title}"]
filename_fmt = "{category}_{album-key}_{num:>03}_{name}.{extension}"
pattern = [r"(?:https?://)?(?:www\.)?imgur\.com/(?:a|gallery)/([^/?&#]+)"]
test = [("https://imgur.com/a/TcBmP", {
"url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563",
"keyword": "5c96eee4df5938ed37f1f95f5c4ef64444bddeb4",
})]
def __init__(self, match):
Extractor.__init__(self)

View File

@@ -28,6 +28,9 @@ class KissmangaMangaExtractor(KissmangaExtractor):
"""Extract all manga-chapters from kissmanga"""
subcategory = "manga"
pattern = [r"(?:https?://)?(?:www\.)?kissmanga\.com/Manga/[^/]+/?$"]
test = [("http://kissmanga.com/Manga/Dropout", {
"url": "992befdd64e178fe5af67de53f8b510860d968ca",
})]
def items(self):
cloudflare.bypass_ddos_protection(self.session, self.url_base)
@@ -47,6 +50,16 @@ class KissmangaChapterExtractor(KissmangaExtractor):
"""Extract a single manga-chapter from kissmanga"""
subcategory = "chapter"
pattern = [r"(?:https?://)?(?:www\.)?kissmanga\.com/Manga/.+/.+\?id=\d+"]
test = [
("http://kissmanga.com/Manga/Dropout/Ch-000---Oneshot-?id=145847", {
"url": "d9ce171d3d4c0493dbb169a5641ac4cd45b0cb3a",
"keyword": "892c3e4df03a575a282a5695add986a49623d746",
}),
("http://kissmanga.com/Manga/Urban-Tales/a?id=256717", {
"url": "b8dbf9fca1d8c942ca5fefd10299da49e8399081",
"keyword": "0a98952984941cc2a11892b1cd7b237ffb20adaa",
})
]
def items(self):
cloudflare.bypass_ddos_protection(self.session, "http://kissmanga.com")

View File

@@ -18,6 +18,10 @@ class NhentaiExtractor(Extractor):
directory_fmt = ["{category}", "{gallery-id} {title}"]
filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}"
pattern = [r"(?:https?://)?(?:www\.)?nhentai\.net/g/(\d+)"]
test = [("http://nhentai.net/g/147850/", {
"url": "199ddd07dded0f69282e09a372710698ea21ab8e",
"keyword": "926be7e9c6684a0d477fce9b48b76df747937037",
})]
def __init__(self, match):
Extractor.__init__(self)

View File

@@ -17,6 +17,10 @@ class NijieUserExtractor(AsynchronousExtractor):
directory_fmt = ["{category}", "{artist-id}"]
filename_fmt = "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}"
pattern = [r"(?:https?://)?(?:www\.)?nijie\.info/members(?:_illust)?\.php\?id=(\d+)"]
test = [("https://nijie.info/members_illust.php?id=44", {
"url": "585d821df4716b1098660a0be426d01db4b65f2a",
"keyword": "30c981b9d7351ec275b9840d8bc2b4ef3da8c4b4",
})]
popup_url = "https://nijie.info/view_popup.php?id="
def __init__(self, match):

View File

@@ -24,6 +24,10 @@ class PowerMangaExtractor(Extractor):
r"(.+/([a-z]{2})/\d+/\d+)(?:/page)?"),
(r"(?:https?://)?(?:www\.)?(p)owermanga\.org/((?:[^-]+-)+[^-]+/?)"),
]
test = [("http://read.powermanga.org/read/one_piece/en/0/803/page/1", {
"url": "e6179c1565068f99180620281f86bdd25be166b4",
"keyword": "ef17bbc6a9ab0390a31f1508e825ddce35f2d2b1",
})]
def __init__(self, match):
Extractor.__init__(self)

View File

@@ -16,6 +16,9 @@ class SpectrumNexusMangaExtractor(Extractor):
category = "spectrumnexus"
subcategory = "manga"
pattern = [r"(?:https?://)?view\.thespectrum\.net/series/([^\.]+)\.html$"]
test = [("http://view.thespectrum.net/series/kare-kano-volume-01.html", {
"url": "b2b175aad5ef1701cc4aee7c24f1ca3a93aba9cb",
})]
url_base = "http://view.thespectrum.net/series/"
def __init__(self, match):
@@ -45,6 +48,10 @@ class SpectrumNexusChapterExtractor(AsynchronousExtractor):
r"\?ch=(Chapter\+(\d+)|Volume\+(\d+))"),
(r"(?:https?://)?(view\.thespectrum\.net/series/[^/]+-chapter-(\d+)\.html)"),
]
test = [("http://view.thespectrum.net/series/toriko.html?ch=Chapter+343&page=1", {
"url": "c0fc7dc594841217cc622a67edd79f06e9900333",
"keyword": "bde9c95a2d0feca0574c7248ed06f1684f86b2ac",
})]
def __init__(self, match):
AsynchronousExtractor.__init__(self)