[tenor] add initial support (#6075)

This commit is contained in:
Mike Fährmann
2025-03-03 17:48:22 +01:00
parent 4d2037f6c6
commit 2f3265a8ae
6 changed files with 267 additions and 0 deletions

View File

@@ -4335,6 +4335,36 @@ Description
and click ``Create Token``.
extractor.tenor.format
----------------------
Type
* ``string``
* ``list`` of ``strings``
Default
``["gif", "mp4", "webm", "webp"]``
Description
List of names of the preferred animation format.
If a selected format is not available, the next one in the list will be
tried until a format is found.
Possible formats include
* ``"gif"``
* ``"gif_transparent"``
* ``"gifpreview"``
* ``"mediumgif"``
* ``"tinygif"``
* ``"tinygif_transparent"``
* ``"mp4"``
* ``"tinymp4"``
* ``"webm"``
* ``"webp"``
* ``"webp_transparent"``
* ``"tinywebp"``
* ``"tinywebp_transparent"``
extractor.tiktok.audio
----------------------
Type

View File

@@ -593,6 +593,10 @@
"username": "",
"password": ""
},
"tenor":
{
"format": ["gif", "mp4", "webm", "webp"]
},
"tiktok":
{
"audio" : true,

View File

@@ -937,6 +937,12 @@ Consider all listed sites to potentially be NSFW.
<td>Galleries</td>
<td></td>
</tr>
<tr>
<td>Tenor</td>
<td>https://tenor.com/</td>
<td>individual Images, Search Results</td>
<td></td>
</tr>
<tr>
<td>TikTok</td>
<td>https://www.tiktok.com/</td>

View File

@@ -171,6 +171,7 @@ modules = [
"tapas",
"tcbscans",
"telegraph",
"tenor",
"tiktok",
"tmohentai",
"toyhouse",

View File

@@ -0,0 +1,135 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://tenor.com/"""
from .common import Extractor, Message
from .. import text, util
BASE_PATTERN = r"(?:https?://)?tenor\.com"
class TenorExtractor(Extractor):
"""Base class for tenor extractors"""
category = "tenor"
root = "https://tenor.com"
filename_fmt = "{id}{title:? //}.{extension}"
archive_fmt = "{id}"
request_interval = (0.5, 1.5)
def _init(self):
formats = self.config("format")
if formats is None:
self.formats = ("gif", "mp4", "webm", "webp")
else:
if isinstance(formats, str):
formats = formats.split(",")
self.formats = formats
def items(self):
meta = self.metadata()
for gif in self.gifs():
fmt = self._extract_format(gif)
if not fmt:
self.log.warning("%s: Selected format(s) not available",
gif.get("id"))
continue
url = fmt["url"]
gif["width"], gif["height"] = fmt["dims"]
gif["title"] = gif["h1_title"][:-4]
gif["date"] = text.parse_timestamp(gif["created"])
if meta:
gif.update(meta)
yield Message.Directory, gif
yield Message.Url, url, text.nameext_from_url(url, gif)
def _extract_format(self, gif):
media_formats = gif["media_formats"]
for fmt in self.formats:
if fmt in media_formats:
return media_formats[fmt]
def metadata(self):
return False
def gifs(self):
return ()
class TenorImageExtractor(TenorExtractor):
subcategory = "image"
pattern = BASE_PATTERN + r"/view/(?:[\w-]*-)?(\d+)"
example = "https://tenor.com/view/SLUG-1234567890"
def gifs(self):
url = "{}/view/{}".format(self.root, self.groups[0])
page = self.request(url).text
pos = page.index('id="store-cache"')
data = util.json_loads(text.extract(page, ">", "</script>", pos)[0])
return (data["gifs"]["byId"].popitem()[1]["results"][0],)
class TenorSearchExtractor(TenorExtractor):
subcategory = "search"
directory_fmt = ("{category}", "{search_tags}")
pattern = BASE_PATTERN + r"/search/([^/?#]+)"
example = "https://tenor.com/search/QUERY"
def metadata(self):
query = text.unquote(self.groups[0])
rest, _, last = query.rpartition("-")
if last == "gifs":
query = rest
self.search_tags = query.replace("-", " ")
return {"search_tags": self.search_tags}
def gifs(self):
url = "https://tenor.googleapis.com/v2/search"
params = {
"appversion": "browser-r20250225-1",
"prettyPrint": "false",
"key": "AIzaSyC-P6_qz3FzCoXGLk6tgitZo4jEJ5mLzD8",
"client_key": "tenor_web",
"locale": "en",
"anon_id": "",
"q": self.search_tags,
"limit": "50",
"contentfilter": "low",
"media_filter": "gif,gif_transparent,mediumgif,tinygif,"
"tinygif_transparent,webp,webp_transparent,"
"tinywebp,tinywebp_transparent,tinymp4,mp4,webm,"
"originalgif,gifpreview",
"fields": "next,results.id,results.media_formats,results.title,"
"results.h1_title,results.long_title,results.itemurl,"
"results.url,results.created,results.user,"
"results.shares,results.embed,results.hasaudio,"
"results.policy_status,results.source_id,results.flags,"
"results.tags,results.content_rating,results.bg_color,"
"results.legacy_info,results.geographic_restriction,"
"results.content_description",
"searchfilter": "none",
"pos": None,
"component": "web_desktop",
}
headers = {
"Referer": self.root + "/",
"Origin" : self.root,
}
while True:
data = self.request(url, params=params, headers=headers).json()
yield from data["results"]
params["pos"] = data.get("next")
if not params["pos"]:
return

91
test/results/tenor.py Normal file
View File

@@ -0,0 +1,91 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import tenor
__tests__ = (
{
"#url" : "https://tenor.com/view/moving-gif-8525772382434057283",
"#class": tenor.TenorImageExtractor,
"#urls" : "https://media1.tenor.com/m/dlGgz3LRXEMAAAAC/moving.gif",
"bg_color" : "",
"content_description": "an illustration of a tree with green leaves",
"created" : 1687512768.687436,
"date" : "dt:2023-06-23 09:32:48",
"embed" : "<div class=\"tenor-gif-embed\" data-postid=\"8525772382434057283\" data-share-method=\"host\" data-aspect-ratio=\"0.937751\" data-width=\"100%\"><a href=\"https://tenor.com/view/moving-gif-8525772382434057283\">Moving Sticker</a>from <a href=\"https://tenor.com/search/moving-stickers\">Moving Stickers</a></div> <script type=\"text/javascript\" async src=\"https://tenor.com/embed.js\"></script>",
"extension": "gif",
"filename" : "moving",
"h1_title" : "Moving Sticker",
"hasaudio" : False,
"width" : 467,
"height" : 498,
"id" : "8525772382434057283",
"index" : 0,
"itemurl" : "https://tenor.com/view/moving-gif-8525772382434057283",
"long_title": "Moving Sticker - Moving Stickers",
"media_formats": dict,
"policy_status": "POLICY_STATUS_UNSPECIFIED",
"shares" : 42528,
"source_id": "",
"title" : "Moving Sti",
"url" : "https://tenor.com/kjYh53rdMGt.gif",
"flags" : [
"static",
"sticker",
],
"legacy_info": {
"post_id": "200777050"
},
"tags": [
"moving",
],
"user": {
"avatars" : {},
"flags" : [],
"partnerbanner": {},
"partnercategories": [],
"partnerlinks": [],
"partnername" : "",
"profile_id" : "11989898659889539214",
"tagline" : "",
"url" : "https://tenor.com/users/imenabdelmalek",
"userid" : "0",
"username" : "imenabdelmalek",
"usertype" : "user",
},
},
{
"#url" : "https://tenor.com/view/moving-gif-8525772382434057283",
"#comment": "'format' option",
"#class" : tenor.TenorImageExtractor,
"#options": {"format": ["mkv", "foobar", "webp"]},
"#urls" : "https://media.tenor.com/dlGgz3LRXEMAAAAx/moving.webp",
},
{
"#url" : "https://tenor.com/search/trees-gifs",
"#class" : tenor.TenorSearchExtractor,
"#pattern": r"https://media\d+\.tenor\.com/m/[\w-]+/[\w%-]+\.gif",
"#range" : "1-80",
"#count" : 80,
"search_tags": "trees",
},
{
"#url" : "https://tenor.com/search/trees-water-wind-sun-%3C&%3E-gifs",
"#class" : tenor.TenorSearchExtractor,
"#pattern": r"https://media\d+\.tenor\.com/m/[\w-]+/[\w%-]+\.gif",
"#range" : "1-80",
"#count" : 80,
"search_tags": "trees water wind sun <&>",
},
)