[bunkr] fix metadata extraction (#6805)
This commit is contained in:
@@ -114,20 +114,20 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
|||||||
|
|
||||||
def fetch_album(self, album_id):
|
def fetch_album(self, album_id):
|
||||||
# album metadata
|
# album metadata
|
||||||
page = self.request(self.root + "/a/" + album_id).text
|
page = self.request(
|
||||||
title, size = text.split_html(text.extr(
|
self.root + "/a/" + album_id, encoding="utf-8").text
|
||||||
page, "<h1", "</span>").partition(">")[2])
|
title = text.unescape(text.unescape(text.extr(
|
||||||
if "&" in title:
|
page, 'property="og:title" content="', '"')))
|
||||||
title = title.replace(
|
|
||||||
"<", "<").replace(">", ">").replace("&", "&")
|
|
||||||
|
|
||||||
# files
|
# files
|
||||||
items = list(text.extract_iter(
|
items = list(text.extract_iter(
|
||||||
page, '<div class="grid-images_box', "</a>"))
|
page, '<div class="grid-images_box', "</a>"))
|
||||||
|
|
||||||
return self._extract_files(items), {
|
return self._extract_files(items), {
|
||||||
"album_id" : album_id,
|
"album_id" : album_id,
|
||||||
"album_name" : title,
|
"album_name" : title,
|
||||||
"album_size" : text.extr(size, "(", ")"),
|
"album_size" : text.extr(
|
||||||
|
page, '<span class="font-semibold">(', ')'),
|
||||||
"count" : len(items),
|
"count" : len(items),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -140,8 +140,8 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
|||||||
|
|
||||||
file = self._extract_file(url)
|
file = self._extract_file(url)
|
||||||
info = text.split_html(item)
|
info = text.split_html(item)
|
||||||
file["name"] = info[0]
|
file["name"] = info[-3]
|
||||||
file["size"] = info[2]
|
file["size"] = info[-2]
|
||||||
file["date"] = text.parse_datetime(
|
file["date"] = text.parse_datetime(
|
||||||
info[-1], "%H:%M:%S %d/%m/%Y")
|
info[-1], "%H:%M:%S %d/%m/%Y")
|
||||||
|
|
||||||
|
|||||||
@@ -12,15 +12,15 @@ __tests__ = (
|
|||||||
"#url" : "https://bunkr.sk/a/Lktg9Keq",
|
"#url" : "https://bunkr.sk/a/Lktg9Keq",
|
||||||
"#category": ("lolisafe", "bunkr", "album"),
|
"#category": ("lolisafe", "bunkr", "album"),
|
||||||
"#class" : bunkr.BunkrAlbumExtractor,
|
"#class" : bunkr.BunkrAlbumExtractor,
|
||||||
"#urls" : "https://i-burger.bunkr.ru/test-%E3%83%86%E3%82%B9%E3%83%88-%22%26%3E-QjgneIQv.png?download=true",
|
"#urls" : "https://brg-bk.cdn.gigachad-cdn.ru/test-%E3%83%86%E3%82%B9%E3%83%88-%22%26%3E-QjgneIQv.png",
|
||||||
"#sha1_content": "0c8768055e4e20e7c7259608b67799171b691140",
|
"#sha1_content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||||
|
|
||||||
"album_id" : "Lktg9Keq",
|
"album_id" : "Lktg9Keq",
|
||||||
"album_name" : "test テスト \"&>",
|
"album_name" : "test テスト \"&>",
|
||||||
"album_size" : "182 B",
|
"album_size" : "182 bytes",
|
||||||
"count" : 1,
|
"count" : 1,
|
||||||
"extension" : "png",
|
"extension" : "png",
|
||||||
"file" : "https://i-burger.bunkr.ru/test-%E3%83%86%E3%82%B9%E3%83%88-%22%26%3E-QjgneIQv.png?download=true",
|
"file" : "https://brg-bk.cdn.gigachad-cdn.ru/test-%E3%83%86%E3%82%B9%E3%83%88-%22%26%3E-QjgneIQv.png",
|
||||||
"filename" : "test-テスト-\"&>-QjgneIQv",
|
"filename" : "test-テスト-\"&>-QjgneIQv",
|
||||||
"id" : "QjgneIQv",
|
"id" : "QjgneIQv",
|
||||||
"name" : "test-テスト-\"&>",
|
"name" : "test-テスト-\"&>",
|
||||||
@@ -32,13 +32,13 @@ __tests__ = (
|
|||||||
"#category": ("lolisafe", "bunkr", "album"),
|
"#category": ("lolisafe", "bunkr", "album"),
|
||||||
"#class" : bunkr.BunkrAlbumExtractor,
|
"#class" : bunkr.BunkrAlbumExtractor,
|
||||||
"#urls" : (
|
"#urls" : (
|
||||||
"https://i-milkshake.bunkr.ru/image-sZrQUeOx.jpg?download=true",
|
"https://mlk-bk.cdn.gigachad-cdn.ru/image-sZrQUeOx.jpg?n=image.jpg",
|
||||||
),
|
),
|
||||||
"#sha1_content": "caf7c3d3439d94e83b3c24ddaf5a3a48aa057519",
|
"#sha1_content": "55998743751dfe008d0e95605114fcbfa7dc4de8",
|
||||||
|
|
||||||
"album_id" : "iXTTc1o2",
|
"album_id" : "iXTTc1o2",
|
||||||
"album_name" : "test2",
|
"album_name" : "test2",
|
||||||
"album_size" : "534.6 KB",
|
"album_size" : "534.61 KB",
|
||||||
"count" : 1,
|
"count" : 1,
|
||||||
"filename" : r"image-sZrQUeOx",
|
"filename" : r"image-sZrQUeOx",
|
||||||
"id" : r"sZrQUeOx",
|
"id" : r"sZrQUeOx",
|
||||||
@@ -182,12 +182,12 @@ __tests__ = (
|
|||||||
"#url" : "https://bunkr.black/i/image-sZrQUeOx.jpg",
|
"#url" : "https://bunkr.black/i/image-sZrQUeOx.jpg",
|
||||||
"#category": ("lolisafe", "bunkr", "media"),
|
"#category": ("lolisafe", "bunkr", "media"),
|
||||||
"#class" : bunkr.BunkrMediaExtractor,
|
"#class" : bunkr.BunkrMediaExtractor,
|
||||||
"#urls" : "https://i-milkshake.bunkr.ru/image-sZrQUeOx.jpg?download=true",
|
"#urls" : "https://mlk-bk.cdn.gigachad-cdn.ru/image-sZrQUeOx.jpg?n=image.jpg",
|
||||||
"#sha1_content": "caf7c3d3439d94e83b3c24ddaf5a3a48aa057519",
|
"#sha1_content": "55998743751dfe008d0e95605114fcbfa7dc4de8",
|
||||||
|
|
||||||
"count" : 1,
|
"count" : 1,
|
||||||
"extension": "jpg",
|
"extension": "jpg",
|
||||||
"file" : "https://i-milkshake.bunkr.ru/image-sZrQUeOx.jpg?download=true",
|
"file" : "https://mlk-bk.cdn.gigachad-cdn.ru/image-sZrQUeOx.jpg?n=image.jpg",
|
||||||
"filename" : "image-sZrQUeOx",
|
"filename" : "image-sZrQUeOx",
|
||||||
"id" : "sZrQUeOx",
|
"id" : "sZrQUeOx",
|
||||||
"name" : "image",
|
"name" : "image",
|
||||||
@@ -205,12 +205,12 @@ __tests__ = (
|
|||||||
"#url" : "https://bunkrrr.org/d/dJuETSzKLrUps",
|
"#url" : "https://bunkrrr.org/d/dJuETSzKLrUps",
|
||||||
"#category": ("lolisafe", "bunkr", "media"),
|
"#category": ("lolisafe", "bunkr", "media"),
|
||||||
"#class" : bunkr.BunkrMediaExtractor,
|
"#class" : bunkr.BunkrMediaExtractor,
|
||||||
"#urls" : "https://burger.bunkr.ru/file-r5fmwjdd.zip",
|
"#urls" : "https://brg-bk.cdn.gigachad-cdn.ru/file-r5fmwjdd.zip",
|
||||||
"#sha1_content": "102ddd7894fe39b3843098fc51f972a0af938f45",
|
"#sha1_content": "102ddd7894fe39b3843098fc51f972a0af938f45",
|
||||||
|
|
||||||
"count" : 1,
|
"count" : 1,
|
||||||
"extension": "zip",
|
"extension": "zip",
|
||||||
"file" : "https://burger.bunkr.ru/file-r5fmwjdd.zip",
|
"file" : "https://brg-bk.cdn.gigachad-cdn.ru/file-r5fmwjdd.zip",
|
||||||
"filename" : "file-r5fmwjdd",
|
"filename" : "file-r5fmwjdd",
|
||||||
"id" : "r5fmwjdd",
|
"id" : "r5fmwjdd",
|
||||||
"name" : "file",
|
"name" : "file",
|
||||||
|
|||||||
Reference in New Issue
Block a user