diff --git a/docs/configuration.rst b/docs/configuration.rst index 63383153..54d1c309 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -404,6 +404,7 @@ Default ``zerochan`` * ``"1.0-2.0"`` ``flickr``, + ``pexels``, ``weibo``, ``[wikimedia]`` * ``"1.4"`` diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index bef2043f..0d0c412f 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -128,7 +128,7 @@ }, "bilibili": { - "sleep-request": "2.0-4.0" + "sleep-request": "3.0-6.0" }, "bluesky": { @@ -438,6 +438,10 @@ "files" : ["images", "image_large", "attachments", "postfile", "content"], "format-images": "download_url" }, + "pexels": + { + "sleep-request": "1.0-2.0" + }, "pillowfort": { "username": "", diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 3cf97d9a..ea1a4c1e 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -667,6 +667,12 @@ Consider all listed sites to potentially be NSFW. Creators, Posts, User Profiles Cookies + + Pexels + https://pexels.com/ + Collections, individual Images, Search Results, User Profiles + + PhotoVogue https://www.vogue.com/photovogue/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 89a991e5..b582c997 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -124,6 +124,7 @@ modules = [ "nsfwalbum", "paheal", "patreon", + "pexels", "philomena", "photovogue", "picarto", diff --git a/gallery_dl/extractor/pexels.py b/gallery_dl/extractor/pexels.py new file mode 100644 index 00000000..804623b8 --- /dev/null +++ b/gallery_dl/extractor/pexels.py @@ -0,0 +1,189 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://pexels.com/""" + +from .common import Extractor, Message +from .. import text, exception + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?pexels\.com" + + +class PexelsExtractor(Extractor): + """Base class for pexels extractors""" + category = "pexels" + root = "https://www.pexels.com" + archive_fmt = "{id}" + request_interval = (1.0, 2.0) + request_interval_min = 0.5 + + def _init(self): + self.api = PexelsAPI(self) + + def items(self): + metadata = self.metadata() + + for post in self.posts(): + if "attributes" in post: + attr = post + post = post["attributes"] + post["type"] = attr["type"] + + post.update(metadata) + post["date"] = text.parse_datetime( + post["created_at"][:-5], "%Y-%m-%dT%H:%M:%S") + + if "image" in post: + url, _, query = post["image"]["download_link"].partition("?") + name = text.extr(query, "&dl=", "&") + elif "video" in post: + video = post["video"] + name = video["src"] + url = video["download_link"] + else: + self.log.warning("%s: Unsupported post type", post.get("id")) + continue + + yield Message.Directory, post + yield Message.Url, url, text.nameext_from_url(name, post) + + def posts(self): + return () + + def metadata(self): + return {} + + +class PexelsCollectionExtractor(PexelsExtractor): + """Extractor for a pexels.com collection""" + subcategory = "collection" + directory_fmt = ("{category}", "Collections", "{collection}") + pattern = BASE_PATTERN + r"/collections/((?:[^/?#]*-)?(\w+))" + example = "https://www.pexels.com/collections/SLUG-a1b2c3/" + + def metadata(self): + cname, cid = self.groups + return {"collection": cname, "collection_id": cid} + + def posts(self): + return self.api.collections_media(self.groups[1]) + + +class PexelsSearchExtractor(PexelsExtractor): + """Extractor for pexels.com search results""" + subcategory = "search" + directory_fmt = ("{category}", "Searches", "{search_tags}") + pattern = BASE_PATTERN + r"/search/([^/?#]+)" + example = "https://www.pexels.com/search/QUERY/" + + def metadata(self): + return {"search_tags": self.groups[0]} + + def posts(self): + return self.api.search_photos(self.groups[0]) + + +class PexelsUserExtractor(PexelsExtractor): + """Extractor for pexels.com user galleries""" + subcategory = "user" + directory_fmt = ("{category}", "@{user[slug]}") + pattern = BASE_PATTERN + r"/(@(?:(?:[^/?#]*-)?(\d+)|[^/?#]+))" + example = "https://www.pexels.com/@USER-12345/" + + def posts(self): + return self.api.users_media_recent(self.groups[1] or self.groups[0]) + + +class PexelsImageExtractor(PexelsExtractor): + subcategory = "image" + pattern = BASE_PATTERN + r"/photo/((?:[^/?#]*-)?\d+)" + example = "https://www.pexels.com/photo/SLUG-12345/" + + def posts(self): + url = "{}/photo/{}/".format(self.root, self.groups[0]) + page = self.request(url).text + return (self._extract_nextdata(page)["props"]["pageProps"]["medium"],) + + +class PexelsAPI(): + """Interface for the Pexels Web API""" + + def __init__(self, extractor): + self.extractor = extractor + self.root = "https://www.pexels.com/en-us/api" + self.headers = { + "Accept" : "*/*", + "Content-Type" : "application/json", + "secret-key" : "H2jk9uKnhRmL6WPwh89zBezWvr", + "Authorization" : "", + "X-Forwarded-CF-Connecting-IP" : "", + "X-Forwarded-HTTP_CF_IPCOUNTRY": "", + "X-Forwarded-CF-IPRegionCode" : "", + "X-Client-Type" : "react", + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "same-origin", + "Priority" : "u=4", + } + + def collections_media(self, collection_id): + endpoint = "/v3/collections/{}/media".format(collection_id) + params = { + "page" : "1", + "per_page": "24", + } + return self._pagination(endpoint, params) + + def search_photos(self, query): + endpoint = "/v3/search/photos" + params = { + "query" : query, + "page" : "1", + "per_page" : "24", + "orientation": "all", + "size" : "all", + "color" : "all", + "sort" : "popular", + } + return self._pagination(endpoint, params) + + def users_media_recent(self, user_id): + endpoint = "/v3/users/{}/media/recent".format(user_id) + params = { + "page" : "1", + "per_page": "24", + } + return self._pagination(endpoint, params) + + def _call(self, endpoint, params): + url = self.root + endpoint + + while True: + response = self.extractor.request( + url, params=params, headers=self.headers, fatal=None) + + if response.status_code < 300: + return response.json() + + elif response.status_code == 429: + self.extractor.wait(seconds=600) + + else: + self.extractor.log.debug(response.text) + raise exception.StopExtraction("API request failed") + + def _pagination(self, endpoint, params): + while True: + data = self._call(endpoint, params) + + yield from data["data"] + + pagination = data["pagination"] + if pagination["current_page"] >= pagination["total_pages"]: + return + params["page"] = pagination["current_page"] + 1 diff --git a/test/results/pexels.py b/test/results/pexels.py new file mode 100644 index 00000000..2a95f71a --- /dev/null +++ b/test/results/pexels.py @@ -0,0 +1,110 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import pexels + + +__tests__ = ( +{ + "#url" : "https://www.pexels.com/search/garden/", + "#class" : pexels.PexelsSearchExtractor, + "#pattern" : r"https://images\.pexels\.com/photos/\d+/[\w-]+\.jpe?g", + "#range" : "1-40", + "#count" : 40, + + "alt" : str, + "aspect_ratio" : float, + "collection_ids": list, + "colors" : list, + "created_at" : str, + "date" : "type:datetime", + "description" : str, + "extension" : "jpg", + "feed_at" : str, + "filename" : r"re:pexels-[\w-]+-\d+", + "width" : int, + "height" : int, + "id" : int, + "image" : dict, + "license" : str, + "liked" : False, + "main_color" : "len:3", + "pending" : False, + "publish_at" : str, + "published" : True, + "reactions" : dict, + "search_tags" : "garden", + "slug" : str, + "starred" : bool, + "status" : "approved", + "tags" : list, + "title" : str, + "type" : "photo", + "updated_at" : str, + "user" : { + "avatar" : dict, + "first_name": str, + "following" : False, + "hero" : bool, + "id" : int, + "slug" : str, + }, +}, + +{ + "#url" : "https://www.pexels.com/collections/summer-solstice-j2zdph3/", + "#class" : pexels.PexelsCollectionExtractor, + "#pattern" : r"https://(images\.pexels\.com/photos/\d+/[\w-]+\.jpe?g|www\.pexels\.com/download/video/\d+/)", + "#range" : "1-40", + "#count" : 40, + + "collection" : "summer-solstice-j2zdph3", + "collection_id": "j2zdph3", +}, + +{ + "#url" : "https://www.pexels.com/@ehioma-osih-109764575", + "#class" : pexels.PexelsUserExtractor, + "#pattern" : r"https://(images\.pexels\.com/photos/\d+/[\w-]+\.jpe?g|www\.pexels\.com/download/video/\d+/)", + "#range" : "1-40", + "#count" : 40, + + "user": { + "id": 109764575, + }, +}, + +{ + "#url" : "https://www.pexels.com/@azizico/", + "#comment" : "user URL without ID", + "#class" : pexels.PexelsUserExtractor, + "#range" : "1-10", + "#count" : 10, + + "user": { + "id": 423972809, + }, +}, + +{ + "#url" : "https://www.pexels.com/@109764575", + "#comment" : "user URL with only ID", + "#class" : pexels.PexelsUserExtractor, + "#range" : "1-10", + "#count" : 10, + + "user": { + "id": 109764575, + }, +}, + +{ + "#url" : "https://www.pexels.com/photo/sun-shining-between-the-trees-in-the-forest-onto-an-asphalt-road-17213600/", + "#class" : pexels.PexelsImageExtractor, + "#urls" : "https://images.pexels.com/photos/17213600/pexels-photo-17213600.jpeg", +}, + +)