Files
gallery-dl/test/results/tiktok.py
bassberry fd5f5611f6 [tiktok] extract subtitles and all cover types (#8805)
* Make sure that `img_id`, `audio_id` and `cover_id` fields are always available.
    The values are set '' where they are not applicable.
    Having `img_id` is necessary for the default `archive_fmt`, the other fields are handled for consistency.
* Allow downloading more than one cover.
    The previous behavior is kept as-is, but setting the "covers" option to "all" now grabs all available covers.
* Add support for downloading subtitles
    Allows filtering subtitles by source type (ASR, MT) and language.
* Ensure archive uniqueness for covers and subtitles.
* Update the URL test pattern to include the `image` extension.
    Although Tiktok may serve the covers with jpeg content, the file ending can be `.image`.
    The test before 0c14b164 failed because the asserted URL did not match all cover types, but the now used pattern needs the mentioned file ending.
* Add support for "creator_caption" subtitles in "LC" format.
    These subtitles have the keys "Format" set to "creator_caption" and "Source" to "LC".
* Add "LC" (Local Captions) as a subtitle source type in the documentation
* Code deduplication and renaming subtitle metadata
    Changed the item type from singular `subtitle` to `subtitles`.
    Removed the wrong descriptor `cover` from the subtitles fallback title.
* Refactor subtitle filtering
    The filter is now prepared in `_init` to prevent parsing the same config parameter for every item.
    The `_extract_subtitles` function will still extract if either filter (source or language) matches.
* Generate a `file_id` for subtitles
    Subtitles have multiple fields that determine the unique file, so these are simply concatenated.
    This is similar to the cover types, only with more variations.
* Added tests for subtitles
* fix docs entries
* fix '"covers": "all"'
* simplify some code
* Fix fallback title for subtitles
    Added the missing "f" to the f-string and added "subtitle" to the title.
    The resulting title will look like "TikTok video subtitle #1234567"
2026-01-30 21:01:06 +01:00

412 lines
14 KiB
Python

# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import tiktok
PATTERN = r"https://p1[69]-[^/?#.]+\.tiktokcdn[^/?#.]*\.com/[^/?#]+/\w+~.*\.(jpe?g|image)"
PATTERN_WITH_AUDIO = r"(?:" + PATTERN + r"|https://v\d+m?\.tiktokcdn[^/?#.]*\.com/[^?#]+\?[^/?#]+)"
VIDEO_PATTERN = r"https://v1[69]-webapp-prime.tiktok.com/video/tos/[^?#]+\?[^/?#]+"
OLD_VIDEO_PATTERN = r"https://www.tiktok.com/aweme/v1/play/\?[^/?#]+"
COMBINED_VIDEO_PATTERN = r"(?:" + VIDEO_PATTERN + r")|(?:" + OLD_VIDEO_PATTERN + r")"
USER_PATTERN = r"(https://www.tiktok.com/@([\w_.-]+)/video/(\d+)|" + PATTERN + r")"
SUBTITLE_PATTERN = r"https://v1[69]-[^/?#.]+\.tiktokcdn[^/?#.]*\.com/[^/?#]+/.*"
__tests__ = (
{
"#url" : "https://www.tiktok.com/@chillezy/photo/7240568259186019630",
"#comment" : "/photo/ link: many photos",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : PATTERN,
"#options" : {"videos": False, "audio": False},
},
{
"#url" : "https://www.tiktok.com/@chillezy/video/7240568259186019630",
"#comment" : "/video/ link: many photos",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : PATTERN,
"#options" : {"videos": False, "audio": False},
},
{
"#url" : "https://www.tiktokv.com/share/video/7240568259186019630",
"#comment" : "www.tiktokv.com link: many photos",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : PATTERN,
"#options" : {"videos": False, "audio": False},
},
{
"#url" : "https://www.tiktok.com/@hullcity/photo/7557376330036153622",
"#comment" : "/photo/ link: single photo",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : PATTERN,
"#options" : {"videos": False, "audio": False},
},
{
"#url" : "https://www.tiktok.com/@hullcity/video/7557376330036153622",
"#comment" : "/video/ link: single photo",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : PATTERN,
"#options" : {"videos": False, "audio": False},
},
{
"#url" : "https://www.tiktokv.com/share/video/7557376330036153622",
"#comment" : "www.tiktokv.com link: single photo",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : PATTERN,
"#options" : {"videos": False, "audio": False},
},
{
"#url" : "https://www.tiktok.com/@hullcity/photo/7553302113757990166",
"#comment" : "/photo/ link: few photos",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : PATTERN,
"#options" : {"videos": False, "audio": False},
},
{
"#url" : "https://www.tiktok.com/@hullcity/video/7553302113757990166",
"#comment" : "/video/ link: few photos",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : PATTERN,
"#options" : {"videos": False, "audio": False},
},
{
"#url" : "https://www.tiktokv.com/share/video/7553302113757990166",
"#comment" : "www.tiktokv.com link: few photos",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : PATTERN,
"#options" : {"videos": False, "audio": False},
},
{
"#url" : "https://www.tiktok.com/@ughuwhguweghw/video/1",
"#comment" : "deleted post",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#options" : {"videos": False, "audio": False},
"#count" : 0,
},
{
"#url" : "https://www.tiktok.com/@memezar/video/7449708266168274208",
"#comment" : "Video post",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : COMBINED_VIDEO_PATTERN,
"#options" : {"videos": True, "audio": True},
},
{
"#url" : "https://www.tiktok.com/@memezar/video/7449708266168274208",
"#comment" : "Video post (via yt-dlp)",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#results" : "ytdl:https://www.tiktok.com/@memezar/video/7449708266168274208",
"#options" : {"videos": "ytdl", "audio": True},
},
{
"#url" : "https://www.tiktok.com/@memezar/video/7449708266168274208",
"#comment" : "video post cover image",
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : PATTERN,
"#count" : 1,
"#options" : {"videos": False, "covers": True},
},
{
"#url" : "https://www.tiktok.com/@memezar/video/7449708266168274208",
"#comment" : "all video post cover images",
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : PATTERN,
"#count" : 3,
"#options" : {"videos": False, "covers": "all"},
},
{
"#url" : "https://www.tiktok.com/@memezar/photo/7449708266168274208",
"#comment" : "Video post as a /photo/ link",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : COMBINED_VIDEO_PATTERN,
"#options" : {"videos": True, "audio": True},
},
{
"#url" : "https://www.tiktokv.com/share/video/7240568259186019630",
"#comment" : "www.tiktokv.com link: many photos with audio",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#options" : {"audio": True},
"#pattern" : PATTERN_WITH_AUDIO,
"#count" : 17,
},
{
"#url" : "https://www.tiktokv.com/share/video/7240568259186019630",
"#comment" : "www.tiktokv.com link: many photos with audio disabled",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#options" : {"audio": False},
"#pattern" : PATTERN,
"#count" : 16,
},
{
"#url" : "https://www.tiktokv.com/share/video/7449708266168274208",
"#comment" : "Video post as a share link",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : COMBINED_VIDEO_PATTERN,
"#options" : {"videos": True},
},
{
"#url" : "https://www.tiktok.com/@memezar/video/7449708266168274208",
"#comment" : "Skipping video post",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#results" : (),
"#options" : {"videos": False},
},
{
"#url" : "https://www.tiktok.com/@chillezy/photo/7240568259186019630",
"#comment" : "/photo/ link: many photos with audio",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : PATTERN_WITH_AUDIO,
"#options" : {"videos": True},
},
{
"#url" : "https://www.tiktok.com/@chillezy/video/7240568259186019630",
"#comment" : "/video/ link: many photos with audio",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : PATTERN_WITH_AUDIO,
"#options" : {"videos": True},
},
{
"#url" : "https://www.tiktok.com/@/video/7240568259186019630",
"#class" : tiktok.TiktokPostExtractor,
},
{
"#url" : "https://www.tiktok.com/@veronicaperasso_1/video/7212008840433274118",
"#comment" : "no 'author' (#8189)",
"#class" : tiktok.TiktokPostExtractor,
"#results" : "ytdl:https://www.tiktok.com/@veronicaperasso_1/video/7212008840433274118",
"#options" : {"videos": "ytdl"},
},
{
"#url" : "https://www.tiktok.com/@memezar/video/7588916452304997635",
"#comment" : "default subtitles",
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : SUBTITLE_PATTERN,
"#count" : 1,
"#options" : {"videos": False, "covers": False, "subtitles": True}
},
{
"#url" : "https://www.tiktok.com/@memezar/video/7588916452304997635",
"#comment" : "english subtitles",
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : SUBTITLE_PATTERN,
"#count" : 1,
"#options" : {"videos": False, "covers": False, "subtitles": "eng-US"}
},
# This test is prone to break when more translation agents are added!
{
"#url" : "https://www.tiktok.com/@memezar/video/7588916452304997635",
"#comment" : "combined subtitle filter",
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : SUBTITLE_PATTERN,
"#count" : 6,
"#options" : {"videos": False, "covers": False, "subtitles": "ASR,deu-DE"}
},
# This test is prone to break when new languages or more translation agents are added!
{
"#url" : "https://www.tiktok.com/@memezar/video/7588916452304997635",
"#comment" : "all subtitles",
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : SUBTITLE_PATTERN,
"#count" : 64,
"#options" : {"videos": False, "covers": False, "subtitles": "all"}
},
{
"#url" : "https://vm.tiktok.com/ZGdh4WUhr/",
"#comment" : "vm.tiktok.com link: many photos",
"#category" : ("", "tiktok", "vmpost"),
"#class" : tiktok.TiktokVmpostExtractor,
"#pattern" : tiktok.TiktokPostExtractor.pattern,
},
{
"#url" : "https://vm.tiktok.com/ZGdhVtER2/",
"#comment" : "vm.tiktok.com link: single photo",
"#category" : ("", "tiktok", "vmpost"),
"#class" : tiktok.TiktokVmpostExtractor,
"#pattern" : tiktok.TiktokPostExtractor.pattern,
},
{
"#url" : "https://vm.tiktok.com/ZGdhVW3cu/",
"#comment" : "vm.tiktok.com link: few photos",
"#category" : ("", "tiktok", "vmpost"),
"#class" : tiktok.TiktokVmpostExtractor,
"#pattern" : tiktok.TiktokPostExtractor.pattern,
},
{
"#url" : "https://vm.tiktok.com/ZGdht7cjp/",
"#comment" : "Video post as a VM link",
"#category" : ("", "tiktok", "vmpost"),
"#class" : tiktok.TiktokVmpostExtractor,
"#pattern" : tiktok.TiktokPostExtractor.pattern,
},
{
"#url" : "https://vm.tiktok.com/ZGdh4WUhr/",
"#comment" : "vm.tiktok.com link: many photos with audio",
"#category" : ("", "tiktok", "vmpost"),
"#class" : tiktok.TiktokVmpostExtractor,
"#pattern" : tiktok.TiktokPostExtractor.pattern,
},
{
"#url" : "https://vt.tiktok.com/ZGdhVtER2",
"#comment" : "vt.tiktok.com link: single photo",
"#category" : ("", "tiktok", "vmpost"),
"#class" : tiktok.TiktokVmpostExtractor,
"#pattern" : tiktok.TiktokPostExtractor.pattern,
},
{
"#url" : "https://www.tiktok.com/t/ZGdhVtER2//",
"#comment" : "www.tiktok.com/t/ link: single photo",
"#category" : ("", "tiktok", "vmpost"),
"#class" : tiktok.TiktokVmpostExtractor,
"#pattern" : tiktok.TiktokPostExtractor.pattern,
},
{
"#url" : "https://www.tiktok.com/@chillezy",
"#comment" : "User profile",
"#category" : ("", "tiktok", "user"),
"#class" : tiktok.TiktokUserExtractor,
"#pattern" : USER_PATTERN,
"#count" : 11, # 10 posts + 1 avatar
"#options" : {"videos": True, "audio": True, "tiktok-range": "1-10"},
},
# order-posts currently has no effect if logged-in cookies aren't used.
# {
# "#url" : "https://www.tiktok.com/@chillezy",
# "#comment" : "User profile ascending order",
# "#category" : ("", "tiktok", "user"),
# "#class" : tiktok.TiktokUserExtractor,
# "#results" : "https://www.tiktok.com/@chillezy/video/7112145009356344622",
# "#options" : {"videos": True, "audio": True, "avatar": False, "tiktok-range": "1", "order-posts": "asc"},
# },
# {
# "#url" : "https://www.tiktok.com/@chillezy",
# "#comment" : "User profile popular order",
# "#category" : ("", "tiktok", "user"),
# "#class" : tiktok.TiktokUserExtractor,
# "#results" : "https://www.tiktok.com/@chillezy/video/7240568259186019630",
# "#options" : {"videos": True, "audio": True, "avatar": False, "tiktok-range": "1", "order-posts": "popular"},
# },
{
"#url" : "https://www.tiktok.com/@chillezy",
"#comment" : "User profile via yt-dlp",
"#category" : ("", "tiktok", "user"),
"#class" : tiktok.TiktokUserExtractor,
"#pattern" : USER_PATTERN,
"#count" : 11, # 10 posts + 1 avatar
"#options" : {"videos": True, "audio": True, "tiktok-range": "1-10", "tiktok-user-extractor": "ytdl"},
},
{
"#url" : "https://www.tiktok.com/@chillezy",
"#comment" : "User profile without avatar",
"#category" : ("", "tiktok", "user"),
"#class" : tiktok.TiktokUserExtractor,
"#pattern" : USER_PATTERN,
"#count" : 10, # 10 posts
"#options" : {"videos": True, "audio": True, "avatar": False, "tiktok-range": "1-10"},
},
{
"#url" : "https://www.tiktok.com/@joeysc14/",
"#comment" : "Public user profile with no content",
"#category" : ("", "tiktok", "user"),
"#class" : tiktok.TiktokUserExtractor,
"#pattern" : PATTERN,
"#options" : {"videos": False, "tiktok-range": "1"},
"#count" : 1, # 1 avatar
},
{
"#url" : "https://www.tiktok.com/@chillezy/avatar",
"#class" : tiktok.TiktokAvatarExtractor,
},
{
"#url" : "https://www.tiktok.com/@chillezy/posts",
"#class" : tiktok.TiktokPostsExtractor,
},
{
"#url" : "https://www.tiktok.com/@chillezy/reposts",
"#class" : tiktok.TiktokRepostsExtractor,
},
{
"#url" : "https://www.tiktok.com/@chillezy/stories",
"#class" : tiktok.TiktokStoriesExtractor,
},
{
"#url" : "https://www.tiktok.com/@chillezy/likes",
"#class" : tiktok.TiktokLikesExtractor,
},
{
"#url" : "https://www.tiktok.com/@chillezy/saved",
"#class" : tiktok.TiktokSavedExtractor,
},
)