[pixiv] add extractor for 'pixivision' articles (#1672)

This commit is contained in:
Mike Fährmann
2021-07-07 02:22:44 +02:00
parent 312a28e78a
commit 8ecca3af58
3 changed files with 46 additions and 3 deletions

View File

@@ -6,10 +6,10 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract images and ugoira from https://www.pixiv.net/"""
"""Extractors for https://www.pixiv.net/"""
from .common import Extractor, Message
from .. import text, exception
from .. import text, util, exception
from ..cache import cache
from datetime import datetime, timedelta
import itertools
@@ -517,6 +517,48 @@ class PixivFollowExtractor(PixivExtractor):
return {"user_follow": self.api.user}
class PixivPixivisionExtractor(PixivExtractor):
"""Extractor for illustrations from a pixivision article"""
subcategory = "pixivision"
directory_fmt = ("{category}", "pixivision",
"{pixivision_id} {pixivision_title}")
archive_fmt = "V{pixivision_id}_{id}{suffix}.{extension}"
pattern = r"(?:https?://)?(?:www\.)?pixivision\.net/(?:en/)?a/(\d+)"
test = (
("https://www.pixivision.net/en/a/2791"),
("https://pixivision.net/a/2791", {
"count": 7,
"keyword": {
"pixivision_id": "2791",
"pixivision_title": "What's your favorite music? Editors "
"picks featuring: “CD Covers”!",
},
}),
)
def __init__(self, match):
PixivExtractor.__init__(self, match)
self.pixivision_id = match.group(1)
def works(self):
return (
self.api.illust_detail(illust_id)
for illust_id in util.unique_sequence(text.extract_iter(
self.page, '<a href="https://www.pixiv.net/en/artworks/', '"'))
)
def metadata(self):
url = "https://www.pixivision.net/en/a/" + self.pixivision_id
headers = {"User-Agent": "Mozilla/5.0"}
self.page = self.request(url, headers=headers).text
title = text.extract(self.page, '<title>', ' - pixivision<')[0]
return {
"pixivision_id" : self.pixivision_id,
"pixivision_title": text.unescape(title),
}
class PixivAppAPI():
"""Minimal interface for the Pixiv App API for mobile devices