From 20bd9cd296fd27c7c25a27aab7f3f8d6c0bd15e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 8 Jan 2021 23:15:20 +0100 Subject: [PATCH] [wikiart] add extractor for single paintings (closes #1233) There is no API endpoint for single paintings from what I can tell, so this uses the site's search. --- docs/supportedsites.rst | 3 ++- gallery_dl/extractor/wikiart.py | 48 ++++++++++++++++++++++++++++----- 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 320fb1b9..312f215b 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -133,7 +133,7 @@ Warosu https://warosu.org/ Threads Weasyl https://www.weasyl.com/ Favorites, Folders, Journals, Submissions `API Key `__ Webtoon https://www.webtoons.com/ Comics, Episodes Weibo https://www.weibo.com/ Images from Statuses, User Profiles -WikiArt.org https://www.wikiart.org/ Artists, Artist Listings, Artworks +WikiArt.org https://www.wikiart.org/ |wikiart-C| xHamster https://xhamster.com/ Galleries, User Profiles XVideos https://www.xvideos.com/ Galleries, User Profiles Yandere https://yande.re/ Pools, Popular Images, Posts, Tag Searches @@ -166,4 +166,5 @@ Turboimagehost https://www.turboimagehost.com/ individual Images .. |reddit-C| replace:: individual Images, Submissions, Subreddits, User Profiles .. |smugmug-C| replace:: Albums, individual Images, Images from Users and Folders .. |twitter-C| replace:: Bookmarks, Likes, Lists, List Members, Media Timelines, Search Results, Timelines, Tweets +.. |wikiart-C| replace:: Artists, Artist Listings, Artworks, individual Images .. |yuki-S| replace:: yuki.la 4chan archive diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py index 92385900..428c6b5b 100644 --- a/gallery_dl/extractor/wikiart.py +++ b/gallery_dl/extractor/wikiart.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2020 Mike Fährmann +# Copyright 2019-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -40,7 +40,7 @@ class WikiartExtractor(Extractor): def paintings(self): """Return an iterable containing all relevant 'painting' objects""" - def _pagination(self, url, extra_params=None, key="Paintings"): + def _pagination(self, url, extra_params=None, key="Paintings", stop=False): headers = { "X-Requested-With": "XMLHttpRequest", "Referer": url, @@ -60,6 +60,8 @@ class WikiartExtractor(Extractor): if not items: return yield from items + if stop: + return params["page"] += 1 @@ -67,7 +69,7 @@ class WikiartArtistExtractor(WikiartExtractor): """Extractor for an artist's paintings on wikiart.org""" subcategory = "artist" directory_fmt = ("{category}", "{artist[artistName]}") - pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)" + pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)/?$" test = ("https://www.wikiart.org/en/thomas-cole", { "url": "5ba2fbe6783fcce34e65014d16e5fbc581490c98", "keyword": "6d92913c55675e05553f000cfee5daff0b4107cf", @@ -75,18 +77,50 @@ class WikiartArtistExtractor(WikiartExtractor): def __init__(self, match): WikiartExtractor.__init__(self, match) - self.artist = match.group(2) + self.artist_name = match.group(2) + self.artist = None def metadata(self): - url = "{}/{}/{}?json=2".format(self.root, self.lang, self.artist) - return {"artist": self.request(url).json()} + url = "{}/{}/{}?json=2".format(self.root, self.lang, self.artist_name) + self.artist = self.request(url).json() + return {"artist": self.artist} def paintings(self): url = "{}/{}/{}/mode/all-paintings".format( - self.root, self.lang, self.artist) + self.root, self.lang, self.artist_name) return self._pagination(url) +class WikiartImageExtractor(WikiartArtistExtractor): + """Extractor for individual paintings on wikiart.org""" + subcategory = "image" + pattern = BASE_PATTERN + r"/(?!(?:paintings|artists)-by-)([\w-]+)/([\w-]+)" + test = ( + ("https://www.wikiart.org/en/thomas-cole/the-departure-1838", { + "url": "4d9fd87680a2620eaeaf1f13e3273475dec93231", + "keyword": "a1b083d500ce2fd364128e35b026e4ca526000cc", + }), + # no year or '-' in slug + ("https://www.wikiart.org/en/huang-shen/summer", { + "url": "d7f60118c34067b2b37d9577e412dc1477b94207", + }), + ) + + def __init__(self, match): + WikiartArtistExtractor.__init__(self, match) + self.title = match.group(3) + + def paintings(self): + title, sep, year = self.title.rpartition("-") + if not sep or not year.isdecimal(): + title = self.title + url = "{}/{}/Search/{} {}".format( + self.root, self.lang, + self.artist.get("artistName") or self.artist_name, title, + ) + return self._pagination(url, stop=True) + + class WikiartArtworksExtractor(WikiartExtractor): """Extractor for artwork collections on wikiart.org""" subcategory = "artworks"