[wikiart] add extractor for single paintings (closes #1233)
There is no API endpoint for single paintings from what I can tell, so this uses the site's search.
This commit is contained in:
@@ -133,7 +133,7 @@ Warosu https://warosu.org/ Threads
|
||||
Weasyl https://www.weasyl.com/ Favorites, Folders, Journals, Submissions `API Key <configuration.rst#extractorweasylapi-key>`__
|
||||
Webtoon https://www.webtoons.com/ Comics, Episodes
|
||||
Weibo https://www.weibo.com/ Images from Statuses, User Profiles
|
||||
WikiArt.org https://www.wikiart.org/ Artists, Artist Listings, Artworks
|
||||
WikiArt.org https://www.wikiart.org/ |wikiart-C|
|
||||
xHamster https://xhamster.com/ Galleries, User Profiles
|
||||
XVideos https://www.xvideos.com/ Galleries, User Profiles
|
||||
Yandere https://yande.re/ Pools, Popular Images, Posts, Tag Searches
|
||||
@@ -166,4 +166,5 @@ Turboimagehost https://www.turboimagehost.com/ individual Images
|
||||
.. |reddit-C| replace:: individual Images, Submissions, Subreddits, User Profiles
|
||||
.. |smugmug-C| replace:: Albums, individual Images, Images from Users and Folders
|
||||
.. |twitter-C| replace:: Bookmarks, Likes, Lists, List Members, Media Timelines, Search Results, Timelines, Tweets
|
||||
.. |wikiart-C| replace:: Artists, Artist Listings, Artworks, individual Images
|
||||
.. |yuki-S| replace:: yuki.la 4chan archive
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2019-2020 Mike Fährmann
|
||||
# Copyright 2019-2021 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -40,7 +40,7 @@ class WikiartExtractor(Extractor):
|
||||
def paintings(self):
|
||||
"""Return an iterable containing all relevant 'painting' objects"""
|
||||
|
||||
def _pagination(self, url, extra_params=None, key="Paintings"):
|
||||
def _pagination(self, url, extra_params=None, key="Paintings", stop=False):
|
||||
headers = {
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
"Referer": url,
|
||||
@@ -60,6 +60,8 @@ class WikiartExtractor(Extractor):
|
||||
if not items:
|
||||
return
|
||||
yield from items
|
||||
if stop:
|
||||
return
|
||||
params["page"] += 1
|
||||
|
||||
|
||||
@@ -67,7 +69,7 @@ class WikiartArtistExtractor(WikiartExtractor):
|
||||
"""Extractor for an artist's paintings on wikiart.org"""
|
||||
subcategory = "artist"
|
||||
directory_fmt = ("{category}", "{artist[artistName]}")
|
||||
pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)"
|
||||
pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)/?$"
|
||||
test = ("https://www.wikiart.org/en/thomas-cole", {
|
||||
"url": "5ba2fbe6783fcce34e65014d16e5fbc581490c98",
|
||||
"keyword": "6d92913c55675e05553f000cfee5daff0b4107cf",
|
||||
@@ -75,18 +77,50 @@ class WikiartArtistExtractor(WikiartExtractor):
|
||||
|
||||
def __init__(self, match):
|
||||
WikiartExtractor.__init__(self, match)
|
||||
self.artist = match.group(2)
|
||||
self.artist_name = match.group(2)
|
||||
self.artist = None
|
||||
|
||||
def metadata(self):
|
||||
url = "{}/{}/{}?json=2".format(self.root, self.lang, self.artist)
|
||||
return {"artist": self.request(url).json()}
|
||||
url = "{}/{}/{}?json=2".format(self.root, self.lang, self.artist_name)
|
||||
self.artist = self.request(url).json()
|
||||
return {"artist": self.artist}
|
||||
|
||||
def paintings(self):
|
||||
url = "{}/{}/{}/mode/all-paintings".format(
|
||||
self.root, self.lang, self.artist)
|
||||
self.root, self.lang, self.artist_name)
|
||||
return self._pagination(url)
|
||||
|
||||
|
||||
class WikiartImageExtractor(WikiartArtistExtractor):
|
||||
"""Extractor for individual paintings on wikiart.org"""
|
||||
subcategory = "image"
|
||||
pattern = BASE_PATTERN + r"/(?!(?:paintings|artists)-by-)([\w-]+)/([\w-]+)"
|
||||
test = (
|
||||
("https://www.wikiart.org/en/thomas-cole/the-departure-1838", {
|
||||
"url": "4d9fd87680a2620eaeaf1f13e3273475dec93231",
|
||||
"keyword": "a1b083d500ce2fd364128e35b026e4ca526000cc",
|
||||
}),
|
||||
# no year or '-' in slug
|
||||
("https://www.wikiart.org/en/huang-shen/summer", {
|
||||
"url": "d7f60118c34067b2b37d9577e412dc1477b94207",
|
||||
}),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
WikiartArtistExtractor.__init__(self, match)
|
||||
self.title = match.group(3)
|
||||
|
||||
def paintings(self):
|
||||
title, sep, year = self.title.rpartition("-")
|
||||
if not sep or not year.isdecimal():
|
||||
title = self.title
|
||||
url = "{}/{}/Search/{} {}".format(
|
||||
self.root, self.lang,
|
||||
self.artist.get("artistName") or self.artist_name, title,
|
||||
)
|
||||
return self._pagination(url, stop=True)
|
||||
|
||||
|
||||
class WikiartArtworksExtractor(WikiartExtractor):
|
||||
"""Extractor for artwork collections on wikiart.org"""
|
||||
subcategory = "artworks"
|
||||
|
||||
Reference in New Issue
Block a user