From 2f3265a8aefb4091a9e3b1daaa37cdb774d1a02f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 3 Mar 2025 17:48:22 +0100 Subject: [PATCH] [tenor] add initial support (#6075) --- docs/configuration.rst | 30 +++++++ docs/gallery-dl.conf | 4 + docs/supportedsites.md | 6 ++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/tenor.py | 135 +++++++++++++++++++++++++++++++ test/results/tenor.py | 91 +++++++++++++++++++++ 6 files changed, 267 insertions(+) create mode 100644 gallery_dl/extractor/tenor.py create mode 100644 test/results/tenor.py diff --git a/docs/configuration.rst b/docs/configuration.rst index 2ce6eada..b8ae95d6 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -4335,6 +4335,36 @@ Description and click ``Create Token``. +extractor.tenor.format +---------------------- +Type + * ``string`` + * ``list`` of ``strings`` +Default + ``["gif", "mp4", "webm", "webp"]`` +Description + List of names of the preferred animation format. + + If a selected format is not available, the next one in the list will be + tried until a format is found. + + Possible formats include + + * ``"gif"`` + * ``"gif_transparent"`` + * ``"gifpreview"`` + * ``"mediumgif"`` + * ``"tinygif"`` + * ``"tinygif_transparent"`` + * ``"mp4"`` + * ``"tinymp4"`` + * ``"webm"`` + * ``"webp"`` + * ``"webp_transparent"`` + * ``"tinywebp"`` + * ``"tinywebp_transparent"`` + + extractor.tiktok.audio ---------------------- Type diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 3564e86a..16d1f604 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -593,6 +593,10 @@ "username": "", "password": "" }, + "tenor": + { + "format": ["gif", "mp4", "webm", "webp"] + }, "tiktok": { "audio" : true, diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 325acd29..1a4d5959 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -937,6 +937,12 @@ Consider all listed sites to potentially be NSFW. Galleries + + Tenor + https://tenor.com/ + individual Images, Search Results + + TikTok https://www.tiktok.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 00b22d4e..8208241e 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -171,6 +171,7 @@ modules = [ "tapas", "tcbscans", "telegraph", + "tenor", "tiktok", "tmohentai", "toyhouse", diff --git a/gallery_dl/extractor/tenor.py b/gallery_dl/extractor/tenor.py new file mode 100644 index 00000000..3ea15e8f --- /dev/null +++ b/gallery_dl/extractor/tenor.py @@ -0,0 +1,135 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://tenor.com/""" + +from .common import Extractor, Message +from .. import text, util + +BASE_PATTERN = r"(?:https?://)?tenor\.com" + + +class TenorExtractor(Extractor): + """Base class for tenor extractors""" + category = "tenor" + root = "https://tenor.com" + filename_fmt = "{id}{title:? //}.{extension}" + archive_fmt = "{id}" + request_interval = (0.5, 1.5) + + def _init(self): + formats = self.config("format") + if formats is None: + self.formats = ("gif", "mp4", "webm", "webp") + else: + if isinstance(formats, str): + formats = formats.split(",") + self.formats = formats + + def items(self): + meta = self.metadata() + + for gif in self.gifs(): + fmt = self._extract_format(gif) + if not fmt: + self.log.warning("%s: Selected format(s) not available", + gif.get("id")) + continue + + url = fmt["url"] + gif["width"], gif["height"] = fmt["dims"] + gif["title"] = gif["h1_title"][:-4] + gif["date"] = text.parse_timestamp(gif["created"]) + if meta: + gif.update(meta) + + yield Message.Directory, gif + yield Message.Url, url, text.nameext_from_url(url, gif) + + def _extract_format(self, gif): + media_formats = gif["media_formats"] + for fmt in self.formats: + if fmt in media_formats: + return media_formats[fmt] + + def metadata(self): + return False + + def gifs(self): + return () + + +class TenorImageExtractor(TenorExtractor): + subcategory = "image" + pattern = BASE_PATTERN + r"/view/(?:[\w-]*-)?(\d+)" + example = "https://tenor.com/view/SLUG-1234567890" + + def gifs(self): + url = "{}/view/{}".format(self.root, self.groups[0]) + page = self.request(url).text + pos = page.index('id="store-cache"') + data = util.json_loads(text.extract(page, ">", "", pos)[0]) + return (data["gifs"]["byId"].popitem()[1]["results"][0],) + + +class TenorSearchExtractor(TenorExtractor): + subcategory = "search" + directory_fmt = ("{category}", "{search_tags}") + pattern = BASE_PATTERN + r"/search/([^/?#]+)" + example = "https://tenor.com/search/QUERY" + + def metadata(self): + query = text.unquote(self.groups[0]) + rest, _, last = query.rpartition("-") + if last == "gifs": + query = rest + self.search_tags = query.replace("-", " ") + + return {"search_tags": self.search_tags} + + def gifs(self): + url = "https://tenor.googleapis.com/v2/search" + params = { + "appversion": "browser-r20250225-1", + "prettyPrint": "false", + "key": "AIzaSyC-P6_qz3FzCoXGLk6tgitZo4jEJ5mLzD8", + "client_key": "tenor_web", + "locale": "en", + "anon_id": "", + "q": self.search_tags, + "limit": "50", + "contentfilter": "low", + "media_filter": "gif,gif_transparent,mediumgif,tinygif," + "tinygif_transparent,webp,webp_transparent," + "tinywebp,tinywebp_transparent,tinymp4,mp4,webm," + "originalgif,gifpreview", + "fields": "next,results.id,results.media_formats,results.title," + "results.h1_title,results.long_title,results.itemurl," + "results.url,results.created,results.user," + "results.shares,results.embed,results.hasaudio," + "results.policy_status,results.source_id,results.flags," + "results.tags,results.content_rating,results.bg_color," + "results.legacy_info,results.geographic_restriction," + "results.content_description", + "searchfilter": "none", + "pos": None, + "component": "web_desktop", + } + headers = { + "Referer": self.root + "/", + "Origin" : self.root, + } + + while True: + data = self.request(url, params=params, headers=headers).json() + + yield from data["results"] + + params["pos"] = data.get("next") + if not params["pos"]: + return diff --git a/test/results/tenor.py b/test/results/tenor.py new file mode 100644 index 00000000..fb216822 --- /dev/null +++ b/test/results/tenor.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import tenor + + +__tests__ = ( +{ + "#url" : "https://tenor.com/view/moving-gif-8525772382434057283", + "#class": tenor.TenorImageExtractor, + "#urls" : "https://media1.tenor.com/m/dlGgz3LRXEMAAAAC/moving.gif", + + "bg_color" : "", + "content_description": "an illustration of a tree with green leaves", + "created" : 1687512768.687436, + "date" : "dt:2023-06-23 09:32:48", + "embed" : "
Moving Stickerfrom Moving Stickers
", + "extension": "gif", + "filename" : "moving", + "h1_title" : "Moving Sticker", + "hasaudio" : False, + "width" : 467, + "height" : 498, + "id" : "8525772382434057283", + "index" : 0, + "itemurl" : "https://tenor.com/view/moving-gif-8525772382434057283", + "long_title": "Moving Sticker - Moving Stickers", + "media_formats": dict, + "policy_status": "POLICY_STATUS_UNSPECIFIED", + "shares" : 42528, + "source_id": "", + "title" : "Moving Sti", + "url" : "https://tenor.com/kjYh53rdMGt.gif", + "flags" : [ + "static", + "sticker", + ], + "legacy_info": { + "post_id": "200777050" + }, + "tags": [ + "moving", + ], + "user": { + "avatars" : {}, + "flags" : [], + "partnerbanner": {}, + "partnercategories": [], + "partnerlinks": [], + "partnername" : "", + "profile_id" : "11989898659889539214", + "tagline" : "", + "url" : "https://tenor.com/users/imenabdelmalek", + "userid" : "0", + "username" : "imenabdelmalek", + "usertype" : "user", + }, +}, + +{ + "#url" : "https://tenor.com/view/moving-gif-8525772382434057283", + "#comment": "'format' option", + "#class" : tenor.TenorImageExtractor, + "#options": {"format": ["mkv", "foobar", "webp"]}, + "#urls" : "https://media.tenor.com/dlGgz3LRXEMAAAAx/moving.webp", +}, + +{ + "#url" : "https://tenor.com/search/trees-gifs", + "#class" : tenor.TenorSearchExtractor, + "#pattern": r"https://media\d+\.tenor\.com/m/[\w-]+/[\w%-]+\.gif", + "#range" : "1-80", + "#count" : 80, + + "search_tags": "trees", +}, + +{ + "#url" : "https://tenor.com/search/trees-water-wind-sun-%3C&%3E-gifs", + "#class" : tenor.TenorSearchExtractor, + "#pattern": r"https://media\d+\.tenor\.com/m/[\w-]+/[\w%-]+\.gif", + "#range" : "1-80", + "#count" : 80, + + "search_tags": "trees water wind sun <&>", +}, + +)