[bunkr] extract correct 'filename' data (#6824)
This commit is contained in:
@@ -70,6 +70,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
||||
self.root = "https://" + domain
|
||||
|
||||
def request(self, url, **kwargs):
|
||||
kwargs["encoding"] = "utf-8"
|
||||
kwargs["allow_redirects"] = False
|
||||
|
||||
while True:
|
||||
@@ -114,8 +115,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
||||
|
||||
def fetch_album(self, album_id):
|
||||
# album metadata
|
||||
page = self.request(
|
||||
self.root + "/a/" + album_id, encoding="utf-8").text
|
||||
page = self.request(self.root + "/a/" + album_id).text
|
||||
title = text.unescape(text.unescape(text.extr(
|
||||
page, 'property="og:title" content="', '"')))
|
||||
|
||||
@@ -140,7 +140,8 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
||||
|
||||
file = self._extract_file(url)
|
||||
info = text.split_html(item)
|
||||
file["name"] = info[-3]
|
||||
if not file["name"]:
|
||||
file["name"] = info[-3]
|
||||
file["size"] = info[-2]
|
||||
file["date"] = text.parse_datetime(
|
||||
info[-1], "%H:%M:%S %d/%m/%Y")
|
||||
@@ -157,6 +158,8 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
||||
page = response.text
|
||||
file_url = (text.extr(page, '<source src="', '"') or
|
||||
text.extr(page, '<img src="', '"'))
|
||||
file_name = (text.extr(page, 'property="og:title" content="', '"') or
|
||||
text.extr(page, "<title>", " | Bunkr<"))
|
||||
|
||||
if not file_url:
|
||||
webpage_url = text.unescape(text.rextract(
|
||||
@@ -166,6 +169,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
||||
|
||||
return {
|
||||
"file" : text.unescape(file_url),
|
||||
"name" : text.unescape(file_name),
|
||||
"_http_headers" : {"Referer": response.url},
|
||||
"_http_validate": self._validate,
|
||||
}
|
||||
|
||||
@@ -53,7 +53,13 @@ class LolisafeAlbumExtractor(LolisafeExtractor):
|
||||
if "name" in file:
|
||||
name = file["name"]
|
||||
file["name"] = name.rpartition(".")[0] or name
|
||||
file["id"] = file["filename"].rpartition("-")[2]
|
||||
fid = file["filename"].rpartition("-")[2]
|
||||
if len(fid) == 12:
|
||||
file["id"] = ""
|
||||
file["filename"] = file["name"]
|
||||
else:
|
||||
file["id"] = fid
|
||||
file["filename"] = file["name"] + "-" + fid
|
||||
elif "id" in file:
|
||||
file["name"] = file["filename"]
|
||||
file["filename"] = "{}-{}".format(file["name"], file["id"])
|
||||
|
||||
@@ -224,4 +224,17 @@ __tests__ = (
|
||||
"#urls" : "https://meatballs.bunkr.ru/27-03-2024-Rp-0FfrropA.mp4",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://bunkr.site/f/wYGCKbGhSvuAW",
|
||||
"#comment" : "correct 'name' from HTML (#6790)",
|
||||
"#category": ("lolisafe", "bunkr", "media"),
|
||||
"#class" : bunkr.BunkrMediaExtractor,
|
||||
"#urls" : "https://kebab.bunkr.ru/80ca5405-8b8d-4f9f-8167-8b046bb9dc67.mp4",
|
||||
|
||||
"id" : "",
|
||||
"name" : "0hwndshtfmj7hcbut1nd4_source",
|
||||
"filename" : "0hwndshtfmj7hcbut1nd4_source",
|
||||
"extension": "mp4",
|
||||
},
|
||||
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user