[pexels] add support (#2286, #4214, #6769)

This commit is contained in:
Mike Fährmann
2025-01-12 16:50:12 +01:00
parent 1ae3ac5e39
commit 91bd3e37f2
6 changed files with 312 additions and 1 deletions

View File

@@ -404,6 +404,7 @@ Default
``zerochan``
* ``"1.0-2.0"``
``flickr``,
``pexels``,
``weibo``,
``[wikimedia]``
* ``"1.4"``

View File

@@ -128,7 +128,7 @@
},
"bilibili":
{
"sleep-request": "2.0-4.0"
"sleep-request": "3.0-6.0"
},
"bluesky":
{
@@ -438,6 +438,10 @@
"files" : ["images", "image_large", "attachments", "postfile", "content"],
"format-images": "download_url"
},
"pexels":
{
"sleep-request": "1.0-2.0"
},
"pillowfort":
{
"username": "",

View File

@@ -667,6 +667,12 @@ Consider all listed sites to potentially be NSFW.
<td>Creators, Posts, User Profiles</td>
<td><a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a></td>
</tr>
<tr>
<td>Pexels</td>
<td>https://pexels.com/</td>
<td>Collections, individual Images, Search Results, User Profiles</td>
<td></td>
</tr>
<tr>
<td>PhotoVogue</td>
<td>https://www.vogue.com/photovogue/</td>

View File

@@ -124,6 +124,7 @@ modules = [
"nsfwalbum",
"paheal",
"patreon",
"pexels",
"philomena",
"photovogue",
"picarto",

View File

@@ -0,0 +1,189 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://pexels.com/"""
from .common import Extractor, Message
from .. import text, exception
BASE_PATTERN = r"(?:https?://)?(?:www\.)?pexels\.com"
class PexelsExtractor(Extractor):
"""Base class for pexels extractors"""
category = "pexels"
root = "https://www.pexels.com"
archive_fmt = "{id}"
request_interval = (1.0, 2.0)
request_interval_min = 0.5
def _init(self):
self.api = PexelsAPI(self)
def items(self):
metadata = self.metadata()
for post in self.posts():
if "attributes" in post:
attr = post
post = post["attributes"]
post["type"] = attr["type"]
post.update(metadata)
post["date"] = text.parse_datetime(
post["created_at"][:-5], "%Y-%m-%dT%H:%M:%S")
if "image" in post:
url, _, query = post["image"]["download_link"].partition("?")
name = text.extr(query, "&dl=", "&")
elif "video" in post:
video = post["video"]
name = video["src"]
url = video["download_link"]
else:
self.log.warning("%s: Unsupported post type", post.get("id"))
continue
yield Message.Directory, post
yield Message.Url, url, text.nameext_from_url(name, post)
def posts(self):
return ()
def metadata(self):
return {}
class PexelsCollectionExtractor(PexelsExtractor):
"""Extractor for a pexels.com collection"""
subcategory = "collection"
directory_fmt = ("{category}", "Collections", "{collection}")
pattern = BASE_PATTERN + r"/collections/((?:[^/?#]*-)?(\w+))"
example = "https://www.pexels.com/collections/SLUG-a1b2c3/"
def metadata(self):
cname, cid = self.groups
return {"collection": cname, "collection_id": cid}
def posts(self):
return self.api.collections_media(self.groups[1])
class PexelsSearchExtractor(PexelsExtractor):
"""Extractor for pexels.com search results"""
subcategory = "search"
directory_fmt = ("{category}", "Searches", "{search_tags}")
pattern = BASE_PATTERN + r"/search/([^/?#]+)"
example = "https://www.pexels.com/search/QUERY/"
def metadata(self):
return {"search_tags": self.groups[0]}
def posts(self):
return self.api.search_photos(self.groups[0])
class PexelsUserExtractor(PexelsExtractor):
"""Extractor for pexels.com user galleries"""
subcategory = "user"
directory_fmt = ("{category}", "@{user[slug]}")
pattern = BASE_PATTERN + r"/(@(?:(?:[^/?#]*-)?(\d+)|[^/?#]+))"
example = "https://www.pexels.com/@USER-12345/"
def posts(self):
return self.api.users_media_recent(self.groups[1] or self.groups[0])
class PexelsImageExtractor(PexelsExtractor):
subcategory = "image"
pattern = BASE_PATTERN + r"/photo/((?:[^/?#]*-)?\d+)"
example = "https://www.pexels.com/photo/SLUG-12345/"
def posts(self):
url = "{}/photo/{}/".format(self.root, self.groups[0])
page = self.request(url).text
return (self._extract_nextdata(page)["props"]["pageProps"]["medium"],)
class PexelsAPI():
"""Interface for the Pexels Web API"""
def __init__(self, extractor):
self.extractor = extractor
self.root = "https://www.pexels.com/en-us/api"
self.headers = {
"Accept" : "*/*",
"Content-Type" : "application/json",
"secret-key" : "H2jk9uKnhRmL6WPwh89zBezWvr",
"Authorization" : "",
"X-Forwarded-CF-Connecting-IP" : "",
"X-Forwarded-HTTP_CF_IPCOUNTRY": "",
"X-Forwarded-CF-IPRegionCode" : "",
"X-Client-Type" : "react",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"Priority" : "u=4",
}
def collections_media(self, collection_id):
endpoint = "/v3/collections/{}/media".format(collection_id)
params = {
"page" : "1",
"per_page": "24",
}
return self._pagination(endpoint, params)
def search_photos(self, query):
endpoint = "/v3/search/photos"
params = {
"query" : query,
"page" : "1",
"per_page" : "24",
"orientation": "all",
"size" : "all",
"color" : "all",
"sort" : "popular",
}
return self._pagination(endpoint, params)
def users_media_recent(self, user_id):
endpoint = "/v3/users/{}/media/recent".format(user_id)
params = {
"page" : "1",
"per_page": "24",
}
return self._pagination(endpoint, params)
def _call(self, endpoint, params):
url = self.root + endpoint
while True:
response = self.extractor.request(
url, params=params, headers=self.headers, fatal=None)
if response.status_code < 300:
return response.json()
elif response.status_code == 429:
self.extractor.wait(seconds=600)
else:
self.extractor.log.debug(response.text)
raise exception.StopExtraction("API request failed")
def _pagination(self, endpoint, params):
while True:
data = self._call(endpoint, params)
yield from data["data"]
pagination = data["pagination"]
if pagination["current_page"] >= pagination["total_pages"]:
return
params["page"] = pagination["current_page"] + 1

110
test/results/pexels.py Normal file
View File

@@ -0,0 +1,110 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import pexels
__tests__ = (
{
"#url" : "https://www.pexels.com/search/garden/",
"#class" : pexels.PexelsSearchExtractor,
"#pattern" : r"https://images\.pexels\.com/photos/\d+/[\w-]+\.jpe?g",
"#range" : "1-40",
"#count" : 40,
"alt" : str,
"aspect_ratio" : float,
"collection_ids": list,
"colors" : list,
"created_at" : str,
"date" : "type:datetime",
"description" : str,
"extension" : "jpg",
"feed_at" : str,
"filename" : r"re:pexels-[\w-]+-\d+",
"width" : int,
"height" : int,
"id" : int,
"image" : dict,
"license" : str,
"liked" : False,
"main_color" : "len:3",
"pending" : False,
"publish_at" : str,
"published" : True,
"reactions" : dict,
"search_tags" : "garden",
"slug" : str,
"starred" : bool,
"status" : "approved",
"tags" : list,
"title" : str,
"type" : "photo",
"updated_at" : str,
"user" : {
"avatar" : dict,
"first_name": str,
"following" : False,
"hero" : bool,
"id" : int,
"slug" : str,
},
},
{
"#url" : "https://www.pexels.com/collections/summer-solstice-j2zdph3/",
"#class" : pexels.PexelsCollectionExtractor,
"#pattern" : r"https://(images\.pexels\.com/photos/\d+/[\w-]+\.jpe?g|www\.pexels\.com/download/video/\d+/)",
"#range" : "1-40",
"#count" : 40,
"collection" : "summer-solstice-j2zdph3",
"collection_id": "j2zdph3",
},
{
"#url" : "https://www.pexels.com/@ehioma-osih-109764575",
"#class" : pexels.PexelsUserExtractor,
"#pattern" : r"https://(images\.pexels\.com/photos/\d+/[\w-]+\.jpe?g|www\.pexels\.com/download/video/\d+/)",
"#range" : "1-40",
"#count" : 40,
"user": {
"id": 109764575,
},
},
{
"#url" : "https://www.pexels.com/@azizico/",
"#comment" : "user URL without ID",
"#class" : pexels.PexelsUserExtractor,
"#range" : "1-10",
"#count" : 10,
"user": {
"id": 423972809,
},
},
{
"#url" : "https://www.pexels.com/@109764575",
"#comment" : "user URL with only ID",
"#class" : pexels.PexelsUserExtractor,
"#range" : "1-10",
"#count" : 10,
"user": {
"id": 109764575,
},
},
{
"#url" : "https://www.pexels.com/photo/sun-shining-between-the-trees-in-the-forest-onto-an-asphalt-road-17213600/",
"#class" : pexels.PexelsImageExtractor,
"#urls" : "https://images.pexels.com/photos/17213600/pexels-photo-17213600.jpeg",
},
)