From 534194bf922acead8a8496a195f97c0b176c85bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 19 Jan 2021 02:23:39 +0100 Subject: [PATCH] [unsplash] add extractors (#1197) for - single photos (/photos/ID) - user profiles (/@USER) - user likes (/@USER/likes) - search results (/s/photos/SEARCH) --- docs/supportedsites.rst | 2 + gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/unsplash.py | 183 +++++++++++++++++++++++++++++++ 3 files changed, 186 insertions(+) create mode 100644 gallery_dl/extractor/unsplash.py diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 312f215b..22ac113d 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -127,6 +127,7 @@ The /b/ Archive https://thebarchive.com/ Boards, Search Results, Tsumino https://www.tsumino.com/ Galleries, Search Results Supported Tumblr https://www.tumblr.com/ Likes, Posts, Tag Searches, User Profiles `OAuth `__ Twitter https://twitter.com/ |twitter-C| Supported +Unsplash https://unsplash.com/ |unsplash-C| VSCO https://vsco.co/ Collections, individual Images, User Profiles Wallhaven https://wallhaven.cc/ individual Images, Search Results `API Key `__ Warosu https://warosu.org/ Threads @@ -166,5 +167,6 @@ Turboimagehost https://www.turboimagehost.com/ individual Images .. |reddit-C| replace:: individual Images, Submissions, Subreddits, User Profiles .. |smugmug-C| replace:: Albums, individual Images, Images from Users and Folders .. |twitter-C| replace:: Bookmarks, Likes, Lists, List Members, Media Timelines, Search Results, Timelines, Tweets +.. |unsplash-C| replace:: Favorites, individual Images, Search Results, User Profiles .. |wikiart-C| replace:: Artists, Artist Listings, Artworks, individual Images .. |yuki-S| replace:: yuki.la 4chan archive diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 94b379f2..8f7bf507 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -109,6 +109,7 @@ modules = [ "tsumino", "tumblr", "twitter", + "unsplash", "vanillarock", "vsco", "wallhaven", diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py new file mode 100644 index 00000000..f69155df --- /dev/null +++ b/gallery_dl/extractor/unsplash.py @@ -0,0 +1,183 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://unsplash.com/""" + +from .common import Extractor, Message +from .. import text, util + +BASE_PATTERN = r"(?:https?://)?unsplash\.com" + + +class UnsplashExtractor(Extractor): + """Base class for unsplash extractors""" + category = "unsplash" + directory_fmt = ("{category}", "{user[username]}") + filename_fmt = "{id}.{extension}" + archive_fmt = "{id}" + root = "https://unsplash.com" + + def __init__(self, match): + Extractor.__init__(self, match) + self.item = match.group(1) + + def items(self): + for photo in self.photos(): + util.delete_items( + photo, ("", "related_collections")) + url = photo["urls"]["raw"] + text.nameext_from_url(url, photo) + + photo["extension"] = "jpg" + photo["date"] = text.parse_datetime(photo["created_at"]) + if "tags" in photo: + photo["tags"] = [t["title"] for t in photo["tags"]] + + yield Message.Directory, photo + yield Message.Url, url, photo + + def _pagination(self, url, params, results=False): + params["per_page"] = "20" + params["page"] = 1 + + while True: + photos = self.request(url, params=params).json() + if results: + photos = photos["results"] + yield from photos + + if len(photos) < 20: + return + params["page"] += 1 + + +class UnsplashImageExtractor(UnsplashExtractor): + """Extractor for a single unsplash photo""" + subcategory = "image" + pattern = BASE_PATTERN + r"/photos/(\w+)" + test = ("https://unsplash.com/photos/lsoogGC_5dg", { + "url": "00accb0a64d5a0df0db911f8b425892718dce524", + "keyword": { + "alt_description": "re:silhouette of trees near body of water ", + "blur_hash": "LZP4uQS4jboe%#o0WCa}2doJNaaz", + "categories": list, + "color": "#f3c08c", + "created_at": "2020-04-08T08:29:42-04:00", + "date": "dt:2020-04-08 12:29:42", + "description": "The Island", + "downloads": int, + "exif": { + "aperture": "11", + "exposure_time": "30", + "focal_length": "70.0", + "iso": 200, + "make": "Canon", + "model": "Canon EOS 5D Mark IV" + }, + "extension": "jpg", + "filename": "photo-1586348943529-beaae6c28db9", + "height": 6272, + "id": "lsoogGC_5dg", + "liked_by_user": False, + "likes": int, + "location": { + "city": "Beaver Dam", + "country": "United States", + "name": "Beaver Dam, WI 53916, USA", + "position": { + "latitude": 43.457769, + "longitude": -88.837329 + }, + "title": "Beaver Dam, WI 53916, USA" + }, + "promoted_at": "2020-04-08T11:12:03-04:00", + "sponsorship": None, + "tags": list, + "updated_at": "2021-01-13T07:15:42-05:00", + "user": { + "accepted_tos": True, + "bio": str, + "first_name": "Dave", + "id": "uMJXuywXLiU", + "instagram_username": "just_midwest_rock", + "last_name": "Hoefler", + "location": "Madison, WI", + "name": "Dave Hoefler", + "portfolio_url": str, + "total_collections": 1, + "total_likes": 178, + "total_photos": 687, + "twitter_username": None, + "updated_at": "2021-01-13T21:50:35-05:00", + "username": "johnwestrock" + }, + "views": int, + "width": 4480, + }, + }) + + def photos(self): + url = "{}/napi/photos/{}".format(self.root, self.item) + return (self.request(url).json(),) + + +class UnsplashUserExtractor(UnsplashExtractor): + """Extractor for all photos of an unsplash user""" + subcategory = "user" + pattern = BASE_PATTERN + r"/@(\w+)/?$" + test = ("https://unsplash.com/@johnwestrock", { + "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+" + r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$", + "range": "1-30", + "count": 30, + }) + + def photos(self): + url = "{}/napi/users/{}/photos".format(self.root, self.item) + params = {"order_by": "latest"} + return self._pagination(url, params) + + +class UnsplashFavoriteExtractor(UnsplashExtractor): + """Extractor for all likes of an unsplash user""" + subcategory = "favorite" + pattern = BASE_PATTERN + r"/@(\w+)/likes" + test = ("https://unsplash.com/@johnwestrock/likes", { + "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+" + r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$", + "range": "1-30", + "count": 30, + }) + + def photos(self): + url = "{}/napi/users/{}/likes".format(self.root, self.item) + params = {"order_by": "latest"} + return self._pagination(url, params) + + +class UnsplashSearchExtractor(UnsplashExtractor): + """Extractor for unsplash search results""" + subcategory = "search" + pattern = BASE_PATTERN + r"/s/photos/([^/?#]+)(?:\?([^/?#]+))?" + test = ("https://unsplash.com/s/photos/nature", { + "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+" + r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$", + "range": "1-30", + "count": 30, + }) + + def __init__(self, match): + UnsplashExtractor.__init__(self, match) + self.query = match.group(2) + + def photos(self): + url = self.root + "/napi/search/photos" + params = {"query": text.unquote(self.item)} + if self.query: + params.update(text.parse_query(self.query)) + return self._pagination(url, params, True)