[directlink] use domain as 'subcategory' (#6703)

This commit is contained in:
Mike Fährmann
2024-12-22 17:19:56 +01:00
parent 18491a4ce6
commit de9442ba75
2 changed files with 70 additions and 38 deletions

View File

@@ -25,7 +25,8 @@ class DirectlinkExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.data = match.groupdict()
self.data = data = match.groupdict()
self.subcategory = ".".join(data["domain"].rsplit(".", 2)[-2:])
def items(self):
data = self.data

View File

@@ -10,164 +10,195 @@ from gallery_dl.extractor import directlink
__tests__ = (
{
"#url" : "https://en.wikipedia.org/static/images/project-logos/enwiki.png",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "wikipedia.org"),
"#class" : directlink.DirectlinkExtractor,
"#sha1_url" : "18c5d00077332e98e53be9fed2ee4be66154b88d",
"#sha1_metadata": "105770a3f4393618ab7b811b731b22663b5d3794",
"#sha1_metadata": "326ac83735d3a103ccd71f2aeea831f6d62e7836",
"#sha1_content" : "e6f58aaec8f31eb222f9e10fa9e9f64b79ae888c",
"category" : "directlink",
"subcategory": "wikipedia.org",
"domain" : "en.wikipedia.org",
"path" : "static/images/project-logos",
"filename" : "enwiki",
"extension" : "png",
"query" : None,
"fragment" : None,
},
{
"#url" : "https://example.org/file.webm",
"#comment" : "empty path",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
"#sha1_url" : "2d807ed7059d1b532f1bb71dc24b510b80ff943f",
"#sha1_metadata": "29dad729c40fb09349f83edafa498dba1297464a",
"#urls" : "https://example.org/file.webm",
"domain" : "example.org",
"path" : "",
"filename" : "file",
"extension": "webm",
},
{
"#url" : "https://example.org/path/to/file.webm?que=1?&ry=2/#fragment",
"#comment" : "more complex example",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
"#sha1_url" : "6fb1061390f8aada3db01cb24b51797c7ee42b31",
"#sha1_metadata": "3d7abc31d45ba324e59bc599c3b4862452d5f29c",
"#urls" : "https://example.org/path/to/file.webm?que=1?&ry=2/#fragment",
"domain" : "example.org",
"path" : "path/to",
"filename" : "file",
"extension": "webm",
"query" : "que=1?&ry=2/",
"fragment" : "fragment",
},
{
"#url" : "https://example.org/%27%3C%23/%23%3E%27.jpg?key=%3C%26%3E",
"#comment" : "percent-encoded characters",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
"#sha1_url" : "2627e8140727fdf743f86fe18f69f99a052c9718",
"#sha1_metadata": "831790fddda081bdddd14f96985ab02dc5b5341f",
"#urls" : "https://example.org/%27%3C%23/%23%3E%27.jpg?key=%3C%26%3E",
"domain" : "example.org",
"path" : "'<#",
"filename" : "#>'",
"extension": "jpg",
"query" : "key=<&>",
"fragment" : None,
},
{
"#url" : "https://post-phinf.pstatic.net/MjAxOTA1MjlfMTQ4/MDAxNTU5MTI2NjcyNTkw.JUzkGb4V6dj9DXjLclrOoqR64uDxHFUO5KDriRdKpGwg.88mCtd4iT1NHlpVKSCaUpPmZPiDgT8hmQdQ5K_gYyu0g.JPEG/2.JPG",
"#comment" : "upper case file extension (#296)",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "pstatic.net"),
"#class" : directlink.DirectlinkExtractor,
},
{
"#url" : "https://räksmörgås.josefsson.org/raksmorgas.jpg",
"#comment" : "internationalized domain name",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "josefsson.org"),
"#class" : directlink.DirectlinkExtractor,
"#sha1_url" : "a65667f670b194afbd1e3ea5e7a78938d36747da",
"#sha1_metadata": "fd5037fe86eebd4764e176cbaf318caec0f700be",
"#urls" : "https://räksmörgås.josefsson.org/raksmorgas.jpg",
"domain" : "räksmörgås.josefsson.org",
"path" : "",
"filename" : "raksmorgas",
"extension": "jpg",
"query" : None,
"fragment" : None,
},
{
"#url" : "https://example.org/file.gif",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
},
{
"#url" : "https://example.org/file.bmp",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
},
{
"#url" : "https://example.org/file.svg",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
},
{
"#url" : "https://example.org/file.webp",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
},
{
"#url" : "https://example.org/file.avif",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
},
{
"#url" : "https://example.org/file.heic",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
},
{
"#url" : "https://example.org/file.psd",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
},
{
"#url" : "https://example.org/file.mp4",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
},
{
"#url" : "https://example.org/file.m4v",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
},
{
"#url" : "https://example.org/file.mov",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
},
{
"#url" : "https://example.org/file.mkv",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
},
{
"#url" : "https://example.org/file.ogg",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
},
{
"#url" : "https://example.org/file.ogm",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
},
{
"#url" : "https://example.org/file.ogv",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
},
{
"#url" : "https://example.org/file.wav",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
},
{
"#url" : "https://example.org/file.mp3",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
},
{
"#url" : "https://example.org/file.opus",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
},
{
"#url" : "https://example.org/file.zip",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
},
{
"#url" : "https://example.org/file.rar",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
},
{
"#url" : "https://example.org/file.7z",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
},
{
"#url" : "https://example.org/file.pdf",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
},
{
"#url" : "https://example.org/file.swf",
"#category": ("", "directlink", ""),
"#category": ("", "directlink", "example.org"),
"#class" : directlink.DirectlinkExtractor,
},