[nijie] add image-extractor

2016-09-19 08:51:49 +02:00
parent d324c58038
commit fea3be0aed
1 changed files with 67 additions and 20 deletions
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -12,27 +12,17 @@ from .common import AsynchronousExtractor, Message
 from .. import config, text, exception
 from ..cache import cache

-class NijieUserExtractor(AsynchronousExtractor):
-    """Extractor for works of a nijie-user"""
+class NijieExtractor(AsynchronousExtractor):
+    """Base class for nijie extractors"""
    category = "nijie"
-    subcategory = "user"
    directory_fmt = ["{category}", "{artist-id}"]
    filename_fmt = "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}"
-    pattern = [r"(?:https?://)?(?:www\.)?nijie\.info/members(?:_illust)?\.php\?id=(\d+)"]
-    test = [("https://nijie.info/members_illust.php?id=44", {
-        "url": "585d821df4716b1098660a0be426d01db4b65f2a",
-        "keyword": "30c981b9d7351ec275b9840d8bc2b4ef3da8c4b4",
-    })]
    popup_url = "https://nijie.info/view_popup.php?id="

-    def __init__(self, match):
+    def __init__(self):
        AsynchronousExtractor.__init__(self)
-        self.artist_id = match.group(1)
-        self.artist_url = (
-            "https://nijie.info/members_illust.php?id="
-            + self.artist_id
-        )
-        self.session.headers["Referer"] = self.artist_url
+        self.session.headers["Referer"] = "https://nijie.info/"
+        self.artist_id = ""

    def items(self):
        self.session.cookies = self.login(
@@ -57,14 +47,16 @@ class NijieUserExtractor(AsynchronousExtractor):

    def get_image_ids(self):
        """Collect all image-ids for a specific artist"""
-        response = self.session.get(self.artist_url)
-        if response.status_code == 404:
-            raise exception.NotFoundError("artist")
-        return list(text.extract_iter(response.text, ' illust_id="', '"'))
+        return []

    def get_image_data(self, image_id):
        """Get URL and metadata for images specified by 'image_id'"""
        page = self.request(self.popup_url + image_id).text
+        return self.extract_image_data(page, image_id)
+
+    @staticmethod
+    def extract_image_data(page, image_id):
+        """Get URL and metadata for images from 'page'"""
        images = list(text.extract_iter(page, '<img src="//pic', '"'))
        for index, url in enumerate(images):
            yield "https://pic" + url, text.nameext_from_url(url, {
@@ -77,7 +69,62 @@ class NijieUserExtractor(AsynchronousExtractor):
    def login(self, username, password):
        """Login and obtain session cookie"""
        params = {"email": username, "password": password}
-        page = self.session.post("https://nijie.info/login_int.php", data=params).text
+        page = self.session.post("https://nijie.info/login_int.php",
+                                 data=params).text
        if "//nijie.info/login.php" in page:
            raise exception.AuthenticationError()
        return self.session.cookies
+
+
+class NijieUserExtractor(NijieExtractor):
+    """Extractor for works of a nijie-user"""
+    subcategory = "user"
+    pattern = [(r"(?:https?://)?(?:www\.)?nijie\.info/"
+                r"members(?:_illust)?\.php\?id=(\d+)")]
+    test = [("https://nijie.info/members_illust.php?id=44", {
+        "url": "585d821df4716b1098660a0be426d01db4b65f2a",
+        "keyword": "30c981b9d7351ec275b9840d8bc2b4ef3da8c4b4",
+    })]
+
+    def __init__(self, match):
+        NijieExtractor.__init__(self)
+        self.artist_id = match.group(1)
+        self.artist_url = ("https://nijie.info/members_illust.php?id="
+                           + self.artist_id)
+
+    def get_image_ids(self):
+        response = self.request(self.artist_url)
+        if response.status_code == 404:
+            raise exception.NotFoundError("artist")
+        return list(text.extract_iter(response.text, ' illust_id="', '"'))
+
+
+class NijieImageExtractor(NijieExtractor):
+    """Extractor for a work/image from nijie.info"""
+    subcategory = "image"
+    pattern = [r"(?:https?://)?(?:www\.)?nijie\.info/view\.php\?id=(\d+)"]
+    test = [("https://nijie.info/view.php?id=70720", {
+        "url": "a10d4995645b5f260821e32c60a35f73546c2699",
+        "keyword": "1c0b1a2e447d8e1cd4f93c21f71d7fe7de0eeed3",
+        "content": "d85e3ea896ed5e4da0bca2390ad310a4df716ca6",
+    })]
+
+    def __init__(self, match):
+        NijieExtractor.__init__(self)
+        self.image_id = match.group(1)
+        self.page = ""
+
+    def get_job_metadata(self):
+        response = self.session.get(self.popup_url + self.image_id,
+                                    allow_redirects=False)
+        if 300 <= response.status_code < 400:
+            raise exception.NotFoundError("image")
+        self.page = response.text
+        self.artist_id = text.extract(self.page, "/nijie_picture/sp/", "_")[0]
+        return NijieExtractor.get_job_metadata(self)
+
+    def get_image_ids(self):
+        return (self.image_id,)
+
+    def get_image_data(self, _):
+        return self.extract_image_data(self.page, self.image_id)