[bunkr] fix metadata extraction (#6805)

This commit is contained in:
Mike Fährmann
2025-01-10 23:30:13 +01:00
parent 1d75c8308c
commit 88f1ef7c3c
2 changed files with 20 additions and 20 deletions

View File

@@ -114,20 +114,20 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
def fetch_album(self, album_id):
# album metadata
page = self.request(self.root + "/a/" + album_id).text
title, size = text.split_html(text.extr(
page, "<h1", "</span>").partition(">")[2])
if "&" in title:
title = title.replace(
"&lt;", "<").replace("&gt;", ">").replace("&amp;", "&")
page = self.request(
self.root + "/a/" + album_id, encoding="utf-8").text
title = text.unescape(text.unescape(text.extr(
page, 'property="og:title" content="', '"')))
# files
items = list(text.extract_iter(
page, '<div class="grid-images_box', "</a>"))
return self._extract_files(items), {
"album_id" : album_id,
"album_name" : title,
"album_size" : text.extr(size, "(", ")"),
"album_size" : text.extr(
page, '<span class="font-semibold">(', ')'),
"count" : len(items),
}
@@ -140,8 +140,8 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
file = self._extract_file(url)
info = text.split_html(item)
file["name"] = info[0]
file["size"] = info[2]
file["name"] = info[-3]
file["size"] = info[-2]
file["date"] = text.parse_datetime(
info[-1], "%H:%M:%S %d/%m/%Y")

View File

@@ -12,15 +12,15 @@ __tests__ = (
"#url" : "https://bunkr.sk/a/Lktg9Keq",
"#category": ("lolisafe", "bunkr", "album"),
"#class" : bunkr.BunkrAlbumExtractor,
"#urls" : "https://i-burger.bunkr.ru/test-%E3%83%86%E3%82%B9%E3%83%88-%22%26%3E-QjgneIQv.png?download=true",
"#urls" : "https://brg-bk.cdn.gigachad-cdn.ru/test-%E3%83%86%E3%82%B9%E3%83%88-%22%26%3E-QjgneIQv.png",
"#sha1_content": "0c8768055e4e20e7c7259608b67799171b691140",
"album_id" : "Lktg9Keq",
"album_name" : "test テスト \"&>",
"album_size" : "182 B",
"album_size" : "182 bytes",
"count" : 1,
"extension" : "png",
"file" : "https://i-burger.bunkr.ru/test-%E3%83%86%E3%82%B9%E3%83%88-%22%26%3E-QjgneIQv.png?download=true",
"file" : "https://brg-bk.cdn.gigachad-cdn.ru/test-%E3%83%86%E3%82%B9%E3%83%88-%22%26%3E-QjgneIQv.png",
"filename" : "test-テスト-\"&>-QjgneIQv",
"id" : "QjgneIQv",
"name" : "test-テスト-\"&>",
@@ -32,13 +32,13 @@ __tests__ = (
"#category": ("lolisafe", "bunkr", "album"),
"#class" : bunkr.BunkrAlbumExtractor,
"#urls" : (
"https://i-milkshake.bunkr.ru/image-sZrQUeOx.jpg?download=true",
"https://mlk-bk.cdn.gigachad-cdn.ru/image-sZrQUeOx.jpg?n=image.jpg",
),
"#sha1_content": "caf7c3d3439d94e83b3c24ddaf5a3a48aa057519",
"#sha1_content": "55998743751dfe008d0e95605114fcbfa7dc4de8",
"album_id" : "iXTTc1o2",
"album_name" : "test2",
"album_size" : "534.6 KB",
"album_size" : "534.61 KB",
"count" : 1,
"filename" : r"image-sZrQUeOx",
"id" : r"sZrQUeOx",
@@ -182,12 +182,12 @@ __tests__ = (
"#url" : "https://bunkr.black/i/image-sZrQUeOx.jpg",
"#category": ("lolisafe", "bunkr", "media"),
"#class" : bunkr.BunkrMediaExtractor,
"#urls" : "https://i-milkshake.bunkr.ru/image-sZrQUeOx.jpg?download=true",
"#sha1_content": "caf7c3d3439d94e83b3c24ddaf5a3a48aa057519",
"#urls" : "https://mlk-bk.cdn.gigachad-cdn.ru/image-sZrQUeOx.jpg?n=image.jpg",
"#sha1_content": "55998743751dfe008d0e95605114fcbfa7dc4de8",
"count" : 1,
"extension": "jpg",
"file" : "https://i-milkshake.bunkr.ru/image-sZrQUeOx.jpg?download=true",
"file" : "https://mlk-bk.cdn.gigachad-cdn.ru/image-sZrQUeOx.jpg?n=image.jpg",
"filename" : "image-sZrQUeOx",
"id" : "sZrQUeOx",
"name" : "image",
@@ -205,12 +205,12 @@ __tests__ = (
"#url" : "https://bunkrrr.org/d/dJuETSzKLrUps",
"#category": ("lolisafe", "bunkr", "media"),
"#class" : bunkr.BunkrMediaExtractor,
"#urls" : "https://burger.bunkr.ru/file-r5fmwjdd.zip",
"#urls" : "https://brg-bk.cdn.gigachad-cdn.ru/file-r5fmwjdd.zip",
"#sha1_content": "102ddd7894fe39b3843098fc51f972a0af938f45",
"count" : 1,
"extension": "zip",
"file" : "https://burger.bunkr.ru/file-r5fmwjdd.zip",
"file" : "https://brg-bk.cdn.gigachad-cdn.ru/file-r5fmwjdd.zip",
"filename" : "file-r5fmwjdd",
"id" : "r5fmwjdd",
"name" : "file",