[unsplash] add extractors (#1197)

for - single photos (/photos/ID) - user profiles (/@USER) - user likes (/@USER/likes) - search results (/s/photos/SEARCH)
2021-01-19 02:23:39 +01:00
parent 1fc16cb8c5
commit 534194bf92
3 changed files with 186 additions and 0 deletions
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@@ -109,6 +109,7 @@ modules = [
    "tsumino",
    "tumblr",
    "twitter",
+    "unsplash",
    "vanillarock",
    "vsco",
    "wallhaven",
--- a/gallery_dl/extractor/unsplash.py
+++ b/gallery_dl/extractor/unsplash.py
@@ -0,0 +1,183 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://unsplash.com/"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+BASE_PATTERN = r"(?:https?://)?unsplash\.com"
+
+
+class UnsplashExtractor(Extractor):
+    """Base class for unsplash extractors"""
+    category = "unsplash"
+    directory_fmt = ("{category}", "{user[username]}")
+    filename_fmt = "{id}.{extension}"
+    archive_fmt = "{id}"
+    root = "https://unsplash.com"
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.item = match.group(1)
+
+    def items(self):
+        for photo in self.photos():
+            util.delete_items(
+                photo, ("", "related_collections"))
+            url = photo["urls"]["raw"]
+            text.nameext_from_url(url, photo)
+
+            photo["extension"] = "jpg"
+            photo["date"] = text.parse_datetime(photo["created_at"])
+            if "tags" in photo:
+                photo["tags"] = [t["title"] for t in photo["tags"]]
+
+            yield Message.Directory, photo
+            yield Message.Url, url, photo
+
+    def _pagination(self, url, params, results=False):
+        params["per_page"] = "20"
+        params["page"] = 1
+
+        while True:
+            photos = self.request(url, params=params).json()
+            if results:
+                photos = photos["results"]
+            yield from photos
+
+            if len(photos) < 20:
+                return
+            params["page"] += 1
+
+
+class UnsplashImageExtractor(UnsplashExtractor):
+    """Extractor for a single unsplash photo"""
+    subcategory = "image"
+    pattern = BASE_PATTERN + r"/photos/(\w+)"
+    test = ("https://unsplash.com/photos/lsoogGC_5dg", {
+        "url": "00accb0a64d5a0df0db911f8b425892718dce524",
+        "keyword": {
+            "alt_description": "re:silhouette of trees near body of water ",
+            "blur_hash": "LZP4uQS4jboe%#o0WCa}2doJNaaz",
+            "categories": list,
+            "color": "#f3c08c",
+            "created_at": "2020-04-08T08:29:42-04:00",
+            "date": "dt:2020-04-08 12:29:42",
+            "description": "The Island",
+            "downloads": int,
+            "exif": {
+                "aperture": "11",
+                "exposure_time": "30",
+                "focal_length": "70.0",
+                "iso": 200,
+                "make": "Canon",
+                "model": "Canon EOS 5D Mark IV"
+            },
+            "extension": "jpg",
+            "filename": "photo-1586348943529-beaae6c28db9",
+            "height": 6272,
+            "id": "lsoogGC_5dg",
+            "liked_by_user": False,
+            "likes": int,
+            "location": {
+                "city": "Beaver Dam",
+                "country": "United States",
+                "name": "Beaver Dam, WI 53916, USA",
+                "position": {
+                    "latitude": 43.457769,
+                    "longitude": -88.837329
+                },
+                "title": "Beaver Dam, WI 53916, USA"
+            },
+            "promoted_at": "2020-04-08T11:12:03-04:00",
+            "sponsorship": None,
+            "tags": list,
+            "updated_at": "2021-01-13T07:15:42-05:00",
+            "user": {
+                "accepted_tos": True,
+                "bio": str,
+                "first_name": "Dave",
+                "id": "uMJXuywXLiU",
+                "instagram_username": "just_midwest_rock",
+                "last_name": "Hoefler",
+                "location": "Madison, WI",
+                "name": "Dave Hoefler",
+                "portfolio_url": str,
+                "total_collections": 1,
+                "total_likes": 178,
+                "total_photos": 687,
+                "twitter_username": None,
+                "updated_at": "2021-01-13T21:50:35-05:00",
+                "username": "johnwestrock"
+            },
+            "views": int,
+            "width": 4480,
+        },
+    })
+
+    def photos(self):
+        url = "{}/napi/photos/{}".format(self.root, self.item)
+        return (self.request(url).json(),)
+
+
+class UnsplashUserExtractor(UnsplashExtractor):
+    """Extractor for all photos of an unsplash user"""
+    subcategory = "user"
+    pattern = BASE_PATTERN + r"/@(\w+)/?$"
+    test = ("https://unsplash.com/@johnwestrock", {
+        "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
+                   r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+        "range": "1-30",
+        "count": 30,
+    })
+
+    def photos(self):
+        url = "{}/napi/users/{}/photos".format(self.root, self.item)
+        params = {"order_by": "latest"}
+        return self._pagination(url, params)
+
+
+class UnsplashFavoriteExtractor(UnsplashExtractor):
+    """Extractor for all likes of an unsplash user"""
+    subcategory = "favorite"
+    pattern = BASE_PATTERN + r"/@(\w+)/likes"
+    test = ("https://unsplash.com/@johnwestrock/likes", {
+        "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
+                   r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+        "range": "1-30",
+        "count": 30,
+    })
+
+    def photos(self):
+        url = "{}/napi/users/{}/likes".format(self.root, self.item)
+        params = {"order_by": "latest"}
+        return self._pagination(url, params)
+
+
+class UnsplashSearchExtractor(UnsplashExtractor):
+    """Extractor for unsplash search results"""
+    subcategory = "search"
+    pattern = BASE_PATTERN + r"/s/photos/([^/?#]+)(?:\?([^/?#]+))?"
+    test = ("https://unsplash.com/s/photos/nature", {
+        "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
+                   r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+        "range": "1-30",
+        "count": 30,
+    })
+
+    def __init__(self, match):
+        UnsplashExtractor.__init__(self, match)
+        self.query = match.group(2)
+
+    def photos(self):
+        url = self.root + "/napi/search/photos"
+        params = {"query": text.unquote(self.item)}
+        if self.query:
+            params.update(text.parse_query(self.query))
+        return self._pagination(url, params, True)