# -*- coding: utf-8 -*- # Copyright 2014, 2015 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extract images and ugoira from http://www.pixiv.net/""" from .common import AsynchronousExtractor from .common import Message from .common import safe_request import re import csv import requests info = { "category": "pixiv", "extractor": "PixivExtractor", "directory": ["{category}", "{artist-id}"], "filename": "{category}_{artist-id}_{illust-id}{num}.{extension}", "pattern": [ r"(?:https?://)?(?:www\.)?pixiv\.net/member(?:_illust)?\.php\?id=(\d+)", ], } class PixivExtractor(AsynchronousExtractor): member_url = "http://www.pixiv.net/member_illust.php" illust_url = "http://www.pixiv.net/member_illust.php?mode=medium" singl_v1_fmt = ("http://i{thumbnail-url[8]}.pixiv.net/img{directory:>02}" "/img/{artist-nick}/{illust-id}.{extension}") manga_v1_fmt = ("http://i{thumbnail-url[8]}.pixiv.net/img{directory:>02}" "/img/{artist-nick}/{illust-id}{big}_p{index}.{extension}") singl_v2_fmt = ("http://i{thumbnail-url[8]}.pixiv.net/img-original/img" "/{url-date}/{illust-id}_p0.{extension}") manga_v2_fmt = ("http://i{thumbnail-url[8]}.pixiv.net/img-original/img" "/{url-date}/{illust-id}_p{index}.{extension}") def __init__(self, match, config): AsynchronousExtractor.__init__(self, config) self.config = config self.artist_id = match.group(1) self.api = PixivAPI(config["pixiv-cookies"]["PHPSESSID"]) self.session.headers.update({"Referer": "http://www.pixiv.net/"}) self.session.cookies.update(self.config["pixiv-cookies"]) def items(self): yield Message.Version, 1 yield Message.Headers, self.session.headers yield Message.Cookies, self.session.cookies yield Message.Directory, self.get_job_metadata() for illust_id in self.get_illust_ids(): data = self.api.request(illust_id) # debug # for i, value in enumerate(data): # print("{:02}: {}".format(i, value)) # return # debug end # if "うごイラ" in data["tags"]: # ugoira / animations # url, framelist = self.parse_ugoira(img) # data[2] = "zip" # yield (url, sname_fmt.format(*data)) # data[2] = "txt" # yield (framelist, sname_fmt.format(*data)) # continue # images if illust_id > 46270949: big = "" url_s_fmt = self.singl_v2_fmt url_m_fmt = self.manga_v2_fmt else: big = "_big" if illust_id > 11319935 else "" url_s_fmt = self.singl_v1_fmt url_m_fmt = self.manga_v1_fmt if not data["count"]: yield Message.Url, url_s_fmt.format(**data), data else: for i in range(0, int(data["count"])): data["num"] = "_p{:02}".format(i) yield (Message.Url, url_m_fmt.format(index=i, big=big, **data), data.copy()) def get_illust_ids(self): """Yield all illust-ids for a pixiv-member""" needle = ('