[pixiv] transition to pixiv public api

2015-05-14 19:08:20 +02:00
parent c8a942a77a
commit 68c4306040
2 changed files with 122 additions and 112 deletions
--- a/7
+++ b/7
@@ -1,5 +1,6 @@
-[pixiv-cookies]
+[pixiv]
-PHPSESSID = XXXXX
+username = XXXXX
 password = XXXXX
 [exhentai-cookies]
 ipb_member_id = XXXXX
@@ -14,4 +15,4 @@ nijie_login_hash = XXXXX
 regex0 = d(?:anbooru)?[.:-_](\w.+)
 [gelbooru]
-regex0 = g(?:elbooru)?[.:-_](\w.+)
+regex0 = g(?:elbooru)?[.:-_](\w.+)
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -8,115 +8,96 @@
 """Extract images and ugoira from http://www.pixiv.net/"""
-from .common import AsynchronousExtractor
+from .common import SequentialExtractor
 from .common import Message
 from .common import safe_request
 import re
-import csv
+import json
 import requests
 info = {
    "category": "pixiv",
    "extractor": "PixivExtractor",
-    "directory": ["{category}", "{artist-id}"],
+    "directory": ["{category}", "{artist-id}-{artist-nick}"],
-    "filename": "{category}_{artist-id}_{illust-id}{num}.{extension}",
+    "filename": "{category}_{artist-id}_{id}{num}.{extension}",
    "pattern": [
        r"(?:https?://)?(?:www\.)?pixiv\.net/member(?:_illust)?\.php\?id=(\d+)",
    ],
 }
-class PixivExtractor(AsynchronousExtractor):
+class PixivExtractor(SequentialExtractor):
    member_url = "http://www.pixiv.net/member_illust.php"
    illust_url = "http://www.pixiv.net/member_illust.php?mode=medium"
    singl_v1_fmt = ("http://i{thumbnail-url[8]}.pixiv.net/img{directory:>02}"
                    "/img/{artist-nick}/{illust-id}.{extension}")
    manga_v1_fmt = ("http://i{thumbnail-url[8]}.pixiv.net/img{directory:>02}"
                    "/img/{artist-nick}/{illust-id}{big}_p{index}.{extension}")
    singl_v2_fmt = ("http://i{thumbnail-url[8]}.pixiv.net/img-original/img"
                    "/{url-date}/{illust-id}_p0.{extension}")
    manga_v2_fmt = ("http://i{thumbnail-url[8]}.pixiv.net/img-original/img"
                    "/{url-date}/{illust-id}_p{index}.{extension}")
    def __init__(self, match, config):
-        AsynchronousExtractor.__init__(self, config)
+        SequentialExtractor.__init__(self, config)
        self.config = config
        self.artist_id = match.group(1)
-        self.api = PixivAPI(config["pixiv-cookies"]["PHPSESSID"])
+        self.api = PixivAPI(self.session)
        self.session.headers.update({"Referer": "http://www.pixiv.net/"})
        self.session.cookies.update(self.config["pixiv-cookies"])
    def items(self):
        self.api.login(
            self.config.get("pixiv", "username"),
            self.config.get("pixiv", "password"),
        )
        metadata = self.get_job_metadata()
        yield Message.Version, 1
        yield Message.Headers, self.session.headers
        yield Message.Cookies, self.session.cookies
-        yield Message.Directory, self.get_job_metadata()
+        yield Message.Directory, metadata
-        for illust_id in self.get_illust_ids():
+        for work in self.get_works():
-            data = self.api.request(illust_id)
+            work.update(metadata)
            # debug
            # for i, value in enumerate(data):
                # print("{:02}: {}".format(i, value))
            # return
            # debug end
-            # if "うごイラ" in data["tags"]:
+            if work["type"] == "ugoira":
-                # ugoira / animations
+                url, framelist = self.parse_ugoira(work["id"])
-                    # url, framelist = self.parse_ugoira(img)
+                work["extension"] = "zip"
-                    # data[2] = "zip"
+                yield Message.Url, url, work.copy()
-                    # yield (url, sname_fmt.format(*data))
+                work["extension"] = "txt"
-                    # data[2] = "txt"
+                yield Message.Url, "text://"+framelist, work
-                    # yield (framelist, sname_fmt.format(*data))
+
-                    # continue
+            elif work["page_count"] == 1:
                yield Message.Url, work["url"], work
            # images
            if illust_id > 46270949:
                big = ""
                url_s_fmt = self.singl_v2_fmt
                url_m_fmt = self.manga_v2_fmt
            else:
-                big = "_big" if illust_id > 11319935 else ""
+                url = work["url"]
-                url_s_fmt = self.singl_v1_fmt
+                ext = work["extension"]
-                url_m_fmt = self.manga_v1_fmt
+                if work["id"] > 11319935 and "/img-original/" not in url:
                    big = "_big"
                else:
                    big = ""
                if url[-6] == "p":
                    part = url[:-7]
                else:
                    part = url[:-4]
                for i in range(work["page_count"]):
                    work["num"] = "_p{:02}".format(i)
                    url = "{}{}_p{}.{}".format(part, big, i, ext)
                    yield Message.Url, url, work.copy()
-            if not data["count"]:
+    def get_works(self):
-                yield Message.Url, url_s_fmt.format(**data), data
+        """Yield all work-items for a pixiv-member"""
-            else:
+        page = 1
                for i in range(0, int(data["count"])):
                    data["num"] = "_p{:02}".format(i)
                    yield (Message.Url,
                           url_m_fmt.format(index=i, big=big, **data),
                           data.copy())
    def get_illust_ids(self):
        """Yield all illust-ids for a pixiv-member"""
        needle = ('<li class="image-item "><a href="'
                  '/member_illust.php?mode=medium&amp;illust_id=')
        params = {"id": self.artist_id, "p": 1}
        while True:
-            text = self.request(self.member_url, params=params).text
+            data = self.api.user_works(self.artist_id, page)
-            pos = 0
+            for work in data["response"]:
-            found = 0
+                url = work["image_urls"]["large"]
-            while True:
+                work["num"] = ""
-                illust_id, pos = self.extract(text, needle, '"', pos)
+                work["url"] = url
-                if illust_id is None:
+                work["extension"] = url[url.rfind(".")+1:]
-                    break
+                yield work
-                found += 1
+            pinfo = data["pagination"]
-                yield int(illust_id)
+            if pinfo["current"] == pinfo["pages"]:
            if found != 20:
                return
-            params["p"] += 1
+            page = pinfo["next"]
    def parse_ugoira(self, illust_id):
        """Parse ugoira data"""
        # get illust page
        text = self.request(
-            self.illust_url,
+            self.illust_url, params={"illust_id": illust_id},
            params={"illust_id": illust_id},
        ).text
        # parse page
@@ -127,57 +108,85 @@ class PixivExtractor(AsynchronousExtractor):
        url = url.replace("\\/", "/")
        # build framelist
-        framelist = "text://" + re.sub(
+        framelist = re.sub(
            r'\{"file":"([^"]+)","delay":(\d+)\},?',
-            r'\1 \2\n',
+            r'\1 \2\n', frames
            frames
        )
        return url, framelist
    def get_job_metadata(self):
        """Collect metadata for extractor-job"""
        data = self.api.user(self.artist_id)["response"][0]
        return {
            "category": info["category"],
            "artist-id": self.artist_id,
            "artist-name": data["name"],
            "artist-nick": data["account"],
        }
 class PixivAPI():
-    api_url = "http://spapi.pixiv.net/iphone/illust.php"
+    """Minimal interface for the Pixiv Public-API for mobile devices
-    def __init__(self, session_id):
+    For a better and more complete implementation, see
-        self.session = requests.Session()
+    - https://github.com/upbit/pixivpy
-        self.session.params["PHPSESSID"] = session_id
+    For in-depth information regarding the Pixiv Public-API, see
    - http://blog.imaou.com/opensource/2014/10/09/pixiv_api_for_ios_update.html
    """
-    def request(self, illust_id):
+    def __init__(self, session=None):
-        data = next(csv.reader(
+        self.session = session or requests.Session()
-            [self.api_call(illust_id)]
+        self.session.headers.update({
-        ))
+            "Referer": "http://www.pixiv.net/",
-        return {
+            "User-Agent": "PixivIOSApp/5.1.1",
-            "category": info["category"],
+            # "Authorization": "Bearer 8mMXXWT9iuwdJvsVIvQsFYDwuZpRCMePeyagSh30ZdU",
-            "illust-id": data[0],
+        })
-            "artist-id": data[1],
+
-            "extension": data[2],
+    def login(self, username, password):
-            "title": data[3],
+        """Login and gain a Pixiv Public-API access token"""
-            "directory": data[4],
+        data = {
-            "artist-name": data[5],
+            "username": username,
-            "thumbnail-url": data[6],
+            "password": password,
-            "url-date": data[6][45:64],
+            "grant_type": "password",
-            # "thumbnail-mobile-url": data[9],
+            "client_id": "bYGKuGVw91e0NMfPGp44euvGt59s",
-            "date": data[12],
+            "client_secret": "HP3RmkgAmEGro0gn1x9ioawQE8WMfvLXDz3ZqxpK",
            "tags": data[13],
            # "description": data[18],
            "count": data[19],
            "artist-nick": data[24],
            # "artist-avatar-url": data[29],
            "num": "",
        }
        response = self.session.post(
            "https://oauth.secure.pixiv.net/auth/token", data=data
        )
        if response.status_code not in (200, 301, 302):
            raise Exception("login() failed! check username and password.\n"
                            "HTTP %s: %s" % (response.status_code, response.text))
        try:
            token = self._parse(response)
            self.session.headers["Authorization"] = (
                "Bearer " + token["response"]["access_token"]
            )
        except:
            raise Exception("Get access_token error! Response: %s" % (token))
-    def api_call(self, illust_id):
+    def user(self, user_id):
-        text = ""
+        """Query information about a pixiv user"""
-        while len(text) < 32:
+        response = self.session.get(
-            text = safe_request(
+            "https://public-api.secure.pixiv.net/v1/users/"
-                self.session, self.api_url,
+            "{user}.json".format(user=user_id)
-                params={"illust_id": illust_id}
+        )
-            ).text
+        return self._parse(response)
-        return text
+
    def user_works(self, user_id, page, per_page=20):
        """Query information about the works of a pixiv user"""
        params = {
            'page': page,
            'per_page': per_page,
            'image_sizes': 'large',
        }
        response = self.session.get(
            "https://public-api.secure.pixiv.net/v1/users/"
            "{user}/works.json".format(user=user_id), params=params
        )
        return self._parse(response)
    @staticmethod
    def _parse(response):
        """Parse a Pixiv Public-API response"""
        return json.loads(response.text)