add more extractor test-cases

2015-12-13 04:36:44 +01:00
parent f0e9e8e0dc
commit 2449532132
10 changed files with 59 additions and 10 deletions
--- a/gallery_dl/extractor/batoto.py
+++ b/gallery_dl/extractor/batoto.py
@@ -18,6 +18,10 @@ class BatotoExtractor(AsynchronousExtractor):
    directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"]
    filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
    pattern = [r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)"]
+    test = [("http://bato.to/reader#df48fa98f7d41851", {
+        "url": "ab0526091f65b8eda7a8866b937adbdb468d68b1",
+        "keyword": "ef14d3230aa0872e8d9e4236ed9160755f78aeb3",
+    })]
    url = "https://bato.to/areader"

    def __init__(self, match):
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -21,7 +21,7 @@ class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor):
    pattern = [r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts\?(?:utf8=%E2%9C%93&)?tags=([^&]+)"]
    test = [("https://danbooru.donmai.us/posts?tags=heath_ledger", {
        "url": "a261c33f117c7395f0eac54091075e67c8e66fca",
-        "keyword": "86c1e0fc2879befe90cf8b4404264338b8063685",
+        "keyword": "fc4685c98aedaf2383384d47af4f7bd257c40f32",
    })]

 class DanbooruPoolExtractor(DanbooruExtractor, booru.BooruPoolExtractor):
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -19,6 +19,10 @@ class DeviantArtUserExtractor(AsynchronousExtractor):
    directory_fmt = ["{category}", "{artist}"]
    filename_fmt = "{category}_{index}_{title}.{extension}"
    pattern = [r"(?:https?://)?([^\.]+)\.deviantart\.com(?:/gallery)?/?$"]
+    test = [("http://shimoda7.deviantart.com/gallery/", {
+        "url": "63bfa8efba199e27181943c9060f6770f91a8441",
+        "keyword": "c0343b41c28c87254a3f0b3241222e94f780701e",
+    })]

    def __init__(self, match):
        AsynchronousExtractor.__init__(self)
@@ -102,6 +106,10 @@ class DeviantArtImageExtractor(Extractor):
    directory_fmt = ["{category}", "{artist}"]
    filename_fmt = "{category}_{index}_{title}.{extension}"
    pattern = [r"(?:https?://)?[^\.]+\.deviantart\.com/art/.+-(\d+)"]
+    test = [("http://shimoda7.deviantart.com/art/For-the-sake-of-a-memory-10073852", {
+        "url": "71345ce3bef5b19bd2a56d7b96e6b5ddba747c2e",
+        "keyword": "f2dfde276a39990097935ace092811c56bc0bfec",
+    })]

    def __init__(self, match):
        Extractor.__init__(self)
--- a/gallery_dl/extractor/imgth.py
+++ b/gallery_dl/extractor/imgth.py
@@ -11,12 +11,16 @@
 from .common import Extractor, Message
 from .. import text

-class ImgthExtractor(Extractor):
-
+class ImgthGalleryExtractor(Extractor):
+    """Extract all images of a gallery"""
    category = "imgth"
    directory_fmt = ["{category}", "{gallery-id} {title}"]
    filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}"
    pattern = [r"(?:https?://)?imgth\.com/gallery/(\d+)"]
+    test = [("http://imgth.com/gallery/37/wallpaper-anime", {
+        "url": "4ae1d281ca2b48952cf5cca57e9914402ad72748",
+        "keyword": "1b15726d53bc2c08d845fa60ce538396380688df",
+    })]

    def __init__(self, match):
        Extractor.__init__(self)
@@ -31,7 +35,9 @@ class ImgthExtractor(Extractor):
        for num, url in enumerate(self.get_images(page), 1):
            data["num"] = num
            yield Message.Url, url, text.nameext_from_url(url, data)
+
    def get_images(self, page):
+        """Yield all image urls for this gallery"""
        pnum = 0
        while True:
            pos = 0
@@ -48,15 +54,10 @@ class ImgthExtractor(Extractor):

    def get_job_metadata(self, page):
        """Collect metadata for extractor-job"""
-        data = {
-            "category": self.category,
-            "gallery-id": self.gid,
-        }
-        data, _ = text.extract_all(page, (
+        return text.extract_all(page, (
            ("title", '<h1>', '</h1>'),
            ("count", 'total of images in this gallery: ', ' '),
            ("date" , 'created on ', ' by <'),
            (None   , 'href="/users/', ''),
            ("user" , '>', '<'),
-        ), values=data)
-        return data
+        ), values={"category": self.category, "gallery-id": self.gid})[0]
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -18,6 +18,10 @@ class ImgurExtractor(Extractor):
    directory_fmt = ["{category}", "{album-key} - {title}"]
    filename_fmt = "{category}_{album-key}_{num:>03}_{name}.{extension}"
    pattern = [r"(?:https?://)?(?:www\.)?imgur\.com/(?:a|gallery)/([^/?&#]+)"]
+    test = [("https://imgur.com/a/TcBmP", {
+        "url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563",
+        "keyword": "5c96eee4df5938ed37f1f95f5c4ef64444bddeb4",
+    })]

    def __init__(self, match):
        Extractor.__init__(self)
--- a/gallery_dl/extractor/kissmanga.py
+++ b/gallery_dl/extractor/kissmanga.py
@@ -28,6 +28,9 @@ class KissmangaMangaExtractor(KissmangaExtractor):
    """Extract all manga-chapters from kissmanga"""
    subcategory = "manga"
    pattern = [r"(?:https?://)?(?:www\.)?kissmanga\.com/Manga/[^/]+/?$"]
+    test = [("http://kissmanga.com/Manga/Dropout", {
+        "url": "992befdd64e178fe5af67de53f8b510860d968ca",
+    })]

    def items(self):
        cloudflare.bypass_ddos_protection(self.session, self.url_base)
@@ -47,6 +50,16 @@ class KissmangaChapterExtractor(KissmangaExtractor):
    """Extract a single manga-chapter from kissmanga"""
    subcategory = "chapter"
    pattern = [r"(?:https?://)?(?:www\.)?kissmanga\.com/Manga/.+/.+\?id=\d+"]
+    test = [
+        ("http://kissmanga.com/Manga/Dropout/Ch-000---Oneshot-?id=145847", {
+            "url": "d9ce171d3d4c0493dbb169a5641ac4cd45b0cb3a",
+            "keyword": "892c3e4df03a575a282a5695add986a49623d746",
+        }),
+        ("http://kissmanga.com/Manga/Urban-Tales/a?id=256717", {
+            "url": "b8dbf9fca1d8c942ca5fefd10299da49e8399081",
+            "keyword": "0a98952984941cc2a11892b1cd7b237ffb20adaa",
+        })
+    ]

    def items(self):
        cloudflare.bypass_ddos_protection(self.session, "http://kissmanga.com")
--- a/gallery_dl/extractor/nhentai.py
+++ b/gallery_dl/extractor/nhentai.py
@@ -18,6 +18,10 @@ class NhentaiExtractor(Extractor):
    directory_fmt = ["{category}", "{gallery-id} {title}"]
    filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}"
    pattern = [r"(?:https?://)?(?:www\.)?nhentai\.net/g/(\d+)"]
+    test = [("http://nhentai.net/g/147850/", {
+        "url": "199ddd07dded0f69282e09a372710698ea21ab8e",
+        "keyword": "926be7e9c6684a0d477fce9b48b76df747937037",
+    })]

    def __init__(self, match):
        Extractor.__init__(self)
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -17,6 +17,10 @@ class NijieUserExtractor(AsynchronousExtractor):
    directory_fmt = ["{category}", "{artist-id}"]
    filename_fmt = "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}"
    pattern = [r"(?:https?://)?(?:www\.)?nijie\.info/members(?:_illust)?\.php\?id=(\d+)"]
+    test = [("https://nijie.info/members_illust.php?id=44", {
+        "url": "585d821df4716b1098660a0be426d01db4b65f2a",
+        "keyword": "30c981b9d7351ec275b9840d8bc2b4ef3da8c4b4",
+    })]
    popup_url = "https://nijie.info/view_popup.php?id="

    def __init__(self, match):
--- a/gallery_dl/extractor/powermanga.py
+++ b/gallery_dl/extractor/powermanga.py
@@ -24,6 +24,10 @@ class PowerMangaExtractor(Extractor):
         r"(.+/([a-z]{2})/\d+/\d+)(?:/page)?"),
        (r"(?:https?://)?(?:www\.)?(p)owermanga\.org/((?:[^-]+-)+[^-]+/?)"),
    ]
+    test = [("http://read.powermanga.org/read/one_piece/en/0/803/page/1", {
+        "url": "e6179c1565068f99180620281f86bdd25be166b4",
+        "keyword": "ef17bbc6a9ab0390a31f1508e825ddce35f2d2b1",
+    })]

    def __init__(self, match):
        Extractor.__init__(self)
--- a/gallery_dl/extractor/spectrumnexus.py
+++ b/gallery_dl/extractor/spectrumnexus.py
@@ -16,6 +16,9 @@ class SpectrumNexusMangaExtractor(Extractor):
    category = "spectrumnexus"
    subcategory = "manga"
    pattern = [r"(?:https?://)?view\.thespectrum\.net/series/([^\.]+)\.html$"]
+    test = [("http://view.thespectrum.net/series/kare-kano-volume-01.html", {
+        "url": "b2b175aad5ef1701cc4aee7c24f1ca3a93aba9cb",
+    })]
    url_base = "http://view.thespectrum.net/series/"

    def __init__(self, match):
@@ -45,6 +48,10 @@ class SpectrumNexusChapterExtractor(AsynchronousExtractor):
         r"\?ch=(Chapter\+(\d+)|Volume\+(\d+))"),
        (r"(?:https?://)?(view\.thespectrum\.net/series/[^/]+-chapter-(\d+)\.html)"),
    ]
+    test = [("http://view.thespectrum.net/series/toriko.html?ch=Chapter+343&page=1", {
+        "url": "c0fc7dc594841217cc622a67edd79f06e9900333",
+        "keyword": "bde9c95a2d0feca0574c7248ed06f1684f86b2ac",
+    })]

    def __init__(self, match):
        AsynchronousExtractor.__init__(self)