From 68c4306040f1e17e0145b72bdd4aed1248d26a2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 14 May 2015 19:08:20 +0200 Subject: [PATCH] [pixiv] transition to pixiv public api --- config | 7 +- gallery_dl/extractor/pixiv.py | 227 ++++++++++++++++++---------------- 2 files changed, 122 insertions(+), 112 deletions(-) diff --git a/config b/config index 88f088b4..dec8b374 100644 --- a/config +++ b/config @@ -1,5 +1,6 @@ -[pixiv-cookies] -PHPSESSID = XXXXX +[pixiv] +username = XXXXX +password = XXXXX [exhentai-cookies] ipb_member_id = XXXXX @@ -14,4 +15,4 @@ nijie_login_hash = XXXXX regex0 = d(?:anbooru)?[.:-_](\w.+) [gelbooru] -regex0 = g(?:elbooru)?[.:-_](\w.+) \ No newline at end of file +regex0 = g(?:elbooru)?[.:-_](\w.+) diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 2941dc19..828ee9d2 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -8,115 +8,96 @@ """Extract images and ugoira from http://www.pixiv.net/""" -from .common import AsynchronousExtractor +from .common import SequentialExtractor from .common import Message -from .common import safe_request import re -import csv +import json import requests info = { "category": "pixiv", "extractor": "PixivExtractor", - "directory": ["{category}", "{artist-id}"], - "filename": "{category}_{artist-id}_{illust-id}{num}.{extension}", + "directory": ["{category}", "{artist-id}-{artist-nick}"], + "filename": "{category}_{artist-id}_{id}{num}.{extension}", "pattern": [ r"(?:https?://)?(?:www\.)?pixiv\.net/member(?:_illust)?\.php\?id=(\d+)", ], } -class PixivExtractor(AsynchronousExtractor): +class PixivExtractor(SequentialExtractor): member_url = "http://www.pixiv.net/member_illust.php" illust_url = "http://www.pixiv.net/member_illust.php?mode=medium" - singl_v1_fmt = ("http://i{thumbnail-url[8]}.pixiv.net/img{directory:>02}" - "/img/{artist-nick}/{illust-id}.{extension}") - manga_v1_fmt = ("http://i{thumbnail-url[8]}.pixiv.net/img{directory:>02}" - "/img/{artist-nick}/{illust-id}{big}_p{index}.{extension}") - - singl_v2_fmt = ("http://i{thumbnail-url[8]}.pixiv.net/img-original/img" - "/{url-date}/{illust-id}_p0.{extension}") - manga_v2_fmt = ("http://i{thumbnail-url[8]}.pixiv.net/img-original/img" - "/{url-date}/{illust-id}_p{index}.{extension}") - def __init__(self, match, config): - AsynchronousExtractor.__init__(self, config) + SequentialExtractor.__init__(self, config) self.config = config self.artist_id = match.group(1) - self.api = PixivAPI(config["pixiv-cookies"]["PHPSESSID"]) - self.session.headers.update({"Referer": "http://www.pixiv.net/"}) - self.session.cookies.update(self.config["pixiv-cookies"]) + self.api = PixivAPI(self.session) def items(self): + self.api.login( + self.config.get("pixiv", "username"), + self.config.get("pixiv", "password"), + ) + metadata = self.get_job_metadata() + yield Message.Version, 1 yield Message.Headers, self.session.headers yield Message.Cookies, self.session.cookies - yield Message.Directory, self.get_job_metadata() + yield Message.Directory, metadata - for illust_id in self.get_illust_ids(): - data = self.api.request(illust_id) - # debug - # for i, value in enumerate(data): - # print("{:02}: {}".format(i, value)) - # return - # debug end + for work in self.get_works(): + work.update(metadata) - # if "うごイラ" in data["tags"]: - # ugoira / animations - # url, framelist = self.parse_ugoira(img) - # data[2] = "zip" - # yield (url, sname_fmt.format(*data)) - # data[2] = "txt" - # yield (framelist, sname_fmt.format(*data)) - # continue + if work["type"] == "ugoira": + url, framelist = self.parse_ugoira(work["id"]) + work["extension"] = "zip" + yield Message.Url, url, work.copy() + work["extension"] = "txt" + yield Message.Url, "text://"+framelist, work + + elif work["page_count"] == 1: + yield Message.Url, work["url"], work - # images - if illust_id > 46270949: - big = "" - url_s_fmt = self.singl_v2_fmt - url_m_fmt = self.manga_v2_fmt else: - big = "_big" if illust_id > 11319935 else "" - url_s_fmt = self.singl_v1_fmt - url_m_fmt = self.manga_v1_fmt + url = work["url"] + ext = work["extension"] + if work["id"] > 11319935 and "/img-original/" not in url: + big = "_big" + else: + big = "" + if url[-6] == "p": + part = url[:-7] + else: + part = url[:-4] + for i in range(work["page_count"]): + work["num"] = "_p{:02}".format(i) + url = "{}{}_p{}.{}".format(part, big, i, ext) + yield Message.Url, url, work.copy() - if not data["count"]: - yield Message.Url, url_s_fmt.format(**data), data - else: - for i in range(0, int(data["count"])): - data["num"] = "_p{:02}".format(i) - yield (Message.Url, - url_m_fmt.format(index=i, big=big, **data), - data.copy()) - - def get_illust_ids(self): - """Yield all illust-ids for a pixiv-member""" - needle = ('