[tumblr] support URLs without subdomain (#7358)

This commit is contained in:
Mike Fährmann
2025-04-13 09:33:51 +02:00
parent feacdd5d87
commit 21160a8b08
2 changed files with 28 additions and 2 deletions

View File

@@ -17,7 +17,7 @@ import re
BASE_PATTERN = (
r"(?:tumblr:(?:https?://)?([^/]+)|"
r"(?:https?://)?"
r"(?:www\.tumblr\.com/(?:blog/(?:view/)?)?([\w-]+)|"
r"(?:(?:www\.)?tumblr\.com/(?:blog/(?:view/)?)?([\w-]+)|"
r"([\w-]+\.tumblr\.com)))"
)
@@ -357,7 +357,7 @@ class TumblrLikesExtractor(TumblrExtractor):
class TumblrSearchExtractor(TumblrExtractor):
"""Extractor for a Tumblr search"""
subcategory = "search"
pattern = (BASE_PATTERN + r"/search/([^/?#]+)"
pattern = (r"(?:https?://)?(?:www\.)?tumblr\.com/search/([^/?#]+)"
r"(?:/([^/?#]+)(?:/([^/?#]+))?)?(?:/?\?([^#]+))?")
example = "https://www.tumblr.com/search/QUERY"

View File

@@ -92,6 +92,13 @@ __tests__ = (
"#count" : 4,
},
{
"#url" : "https://tumblr.com/mikf123",
"#comment" : "no 'www.' subdomain (#7358)",
"#category": ("", "tumblr", "user"),
"#class" : tumblr.TumblrUserExtractor,
},
{
"#url" : "https://donttrustthetits.tumblr.com/",
"#comment" : "pagination with 'date-max' (#2191) and 'api-key'",
@@ -273,6 +280,12 @@ __tests__ = (
"#class" : tumblr.TumblrPostExtractor,
},
{
"#url" : "https://tumblr.com/smarties-art/686047436641353728",
"#category": ("", "tumblr", "post"),
"#class" : tumblr.TumblrPostExtractor,
},
{
"#url" : "http://demo.tumblr.com/tagged/Times%20Square",
"#category": ("", "tumblr", "tag"),
@@ -360,6 +373,12 @@ __tests__ = (
"#class" : tumblr.TumblrLikesExtractor,
},
{
"#url" : "https://tumblr.com/mikf123/likes",
"#category": ("", "tumblr", "likes"),
"#class" : tumblr.TumblrLikesExtractor,
},
{
"#url" : "https://www.tumblr.com/search/nathan fielder",
"#category": ("", "tumblr", "search"),
@@ -377,4 +396,11 @@ __tests__ = (
"#category": ("", "tumblr", "search"),
"#class" : tumblr.TumblrSearchExtractor,
},
{
"#url" : "https://tumblr.com/search/nathan%20fielder?t=90",
"#category": ("", "tumblr", "search"),
"#class" : tumblr.TumblrSearchExtractor,
},
)