[bunkr] fix albums with more than 100 files (#8150)
This commit is contained in:
@@ -62,7 +62,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
||||
root = "https://bunkr.si"
|
||||
root_dl = "https://get.bunkrr.su"
|
||||
root_api = "https://apidl.bunkr.ru"
|
||||
archive_fmt = "{album_id}_{id|id_url}"
|
||||
archive_fmt = "{album_id}_{id|id_url|slug}"
|
||||
pattern = BASE_PATTERN + r"/a/([^/?#]+)"
|
||||
example = "https://bunkr.si/a/ID"
|
||||
|
||||
@@ -134,13 +134,13 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
||||
|
||||
def fetch_album(self, album_id):
|
||||
# album metadata
|
||||
page = self.request(self.root + "/a/" + album_id).text
|
||||
page = self.request(f"{self.root}/a/{album_id}?advanced=1").text
|
||||
title = text.unescape(text.unescape(text.extr(
|
||||
page, 'property="og:title" content="', '"')))
|
||||
|
||||
# files
|
||||
items = list(text.extract_iter(
|
||||
page, '<div class="grid-images_box', "</a>"))
|
||||
items = text.extr(
|
||||
page, "window.albumFiles = [", "</script>").split("\n},\n")
|
||||
|
||||
return self._extract_files(items), {
|
||||
"album_id" : album_id,
|
||||
@@ -156,17 +156,19 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
||||
|
||||
for item in items:
|
||||
try:
|
||||
url = text.unescape(text.extr(item, ' href="', '"'))
|
||||
if url[0] == "/":
|
||||
url = self.root + url
|
||||
data_id = text.extr(item, " id: ", ",").strip()
|
||||
file = self._extract_file(data_id)
|
||||
|
||||
file = self._extract_file(url)
|
||||
info = text.split_html(item)
|
||||
if not file["name"]:
|
||||
file["name"] = info[-3]
|
||||
file["size"] = info[-2]
|
||||
file["date"] = text.parse_datetime(
|
||||
info[-1], "%H:%M:%S %d/%m/%Y")
|
||||
file["name"] = util.json_loads(text.extr(
|
||||
item, 'original:', ',\n'))
|
||||
file["slug"] = util.json_loads(text.extr(
|
||||
item, 'slug: ', ',\n'))
|
||||
file["uuid"] = text.extr(
|
||||
item, 'name: "', ".")
|
||||
file["size"] = text.parse_int(text.extr(
|
||||
item, "size: ", " ,\n"))
|
||||
file["date"] = text.parse_datetime(text.extr(
|
||||
item, 'timestamp: "', '"'), "%H:%M:%S %d/%m/%Y")
|
||||
|
||||
yield file
|
||||
except exception.ControlException:
|
||||
@@ -175,11 +177,8 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
||||
self.log.error("%s: %s", exc.__class__.__name__, exc)
|
||||
self.log.debug("", exc_info=exc)
|
||||
|
||||
def _extract_file(self, webpage_url):
|
||||
page = self.request(webpage_url).text
|
||||
data_id = text.extr(page, 'data-file-id="', '"')
|
||||
referer = self.root_dl + "/file/" + data_id
|
||||
|
||||
def _extract_file(self, data_id):
|
||||
referer = f"{self.root_dl}/file/{data_id}"
|
||||
headers = {"Referer": referer, "Origin": self.root_dl}
|
||||
data = self.request_json(self.endpoint, method="POST", headers=headers,
|
||||
json={"id": data_id})
|
||||
@@ -190,14 +189,9 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
||||
else:
|
||||
file_url = data["url"]
|
||||
|
||||
file_name = text.extr(page, "<h1", "<").rpartition(">")[2]
|
||||
fallback = text.extr(page, 'property="og:url" content="', '"')
|
||||
|
||||
return {
|
||||
"file" : file_url,
|
||||
"name" : text.unescape(file_name),
|
||||
"id_url" : data_id,
|
||||
"_fallback" : (fallback,) if fallback else (),
|
||||
"_http_headers" : {"Referer": referer},
|
||||
"_http_validate": self._validate,
|
||||
}
|
||||
@@ -222,7 +216,13 @@ class BunkrMediaExtractor(BunkrAlbumExtractor):
|
||||
|
||||
def fetch_album(self, album_id):
|
||||
try:
|
||||
file = self._extract_file(self.root + album_id)
|
||||
page = self.request(f"{self.root}{album_id}").text
|
||||
data_id = text.extr(page, 'data-file-id="', '"')
|
||||
file = self._extract_file(data_id)
|
||||
file["name"] = text.unescape(text.extr(
|
||||
page, "<h1", "<").rpartition(">")[2])
|
||||
file["slug"] = album_id.rpartition("/")[2]
|
||||
file["uuid"] = text.extr(page, "/thumbs/", ".")
|
||||
except Exception as exc:
|
||||
self.log.error("%s: %s", exc.__class__.__name__, exc)
|
||||
return (), {}
|
||||
|
||||
@@ -12,8 +12,11 @@ __tests__ = (
|
||||
"#url" : "https://bunkr.sk/a/Lktg9Keq",
|
||||
"#category": ("lolisafe", "bunkr", "album"),
|
||||
"#class" : bunkr.BunkrAlbumExtractor,
|
||||
"#results" : "https://brg-bk.cdn.gigachad-cdn.ru/test-%E3%83%86%E3%82%B9%E3%83%88-%22%26%3E-QjgneIQv.png?n=test-%E3%83%86%E3%82%B9%E3%83%88-%22%26%3E.png",
|
||||
"#sha1_content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||
"#results" : """https://brg-bk.cdn.gigachad-cdn.ru/test-テスト-"&>-QjgneIQv.png""",
|
||||
"#sha1_content": (
|
||||
"0c8768055e4e20e7c7259608b67799171b691140",
|
||||
"961b25d85b5f5bd18cbe3e847ac55925f14d0286",
|
||||
),
|
||||
|
||||
"album_id" : "Lktg9Keq",
|
||||
"album_name" : "test テスト \"&>",
|
||||
@@ -25,6 +28,7 @@ __tests__ = (
|
||||
"id" : "QjgneIQv",
|
||||
"id_url" : "1044478",
|
||||
"name" : "test-テスト-\"&>",
|
||||
"slug" : "test-テスト-\"&>-QjgneIQv.png",
|
||||
"num" : 1,
|
||||
},
|
||||
|
||||
@@ -33,7 +37,7 @@ __tests__ = (
|
||||
"#category": ("lolisafe", "bunkr", "album"),
|
||||
"#class" : bunkr.BunkrAlbumExtractor,
|
||||
"#results" : (
|
||||
"https://mlk-bk.cdn.gigachad-cdn.ru/image-sZrQUeOx.jpg?n=image.jpg",
|
||||
"https://mlk-bk.cdn.gigachad-cdn.ru/image-sZrQUeOx.jpg",
|
||||
),
|
||||
"#sha1_content": "55998743751dfe008d0e95605114fcbfa7dc4de8",
|
||||
|
||||
@@ -53,8 +57,7 @@ __tests__ = (
|
||||
"#category": ("lolisafe", "bunkr", "album"),
|
||||
"#class" : bunkr.BunkrAlbumExtractor,
|
||||
"#pattern" : r"https://(i-)?meatballs.bunkr.ru/\w+",
|
||||
"#range" : "5-",
|
||||
"#count" : 3,
|
||||
"#count" : 4,
|
||||
},
|
||||
|
||||
{
|
||||
@@ -65,7 +68,9 @@ __tests__ = (
|
||||
"#count" : 2,
|
||||
|
||||
"id" : "",
|
||||
"id_url": {"UPKDHBf0CvrCe", "zQgSePr1f4HZ2"},
|
||||
"id_url": {"43478756", "43478551"},
|
||||
"slug" : {"UPKDHBf0CvrCe", "zQgSePr1f4HZ2"},
|
||||
"uuid" : "iso:uuid",
|
||||
},
|
||||
|
||||
{
|
||||
@@ -195,12 +200,12 @@ __tests__ = (
|
||||
"#url" : "https://bunkr.black/i/image-sZrQUeOx.jpg",
|
||||
"#category": ("lolisafe", "bunkr", "media"),
|
||||
"#class" : bunkr.BunkrMediaExtractor,
|
||||
"#results" : "https://mlk-bk.cdn.gigachad-cdn.ru/image-sZrQUeOx.jpg?n=image.jpg",
|
||||
"#results" : "https://mlk-bk.cdn.gigachad-cdn.ru/image-sZrQUeOx.jpg",
|
||||
"#sha1_content": "55998743751dfe008d0e95605114fcbfa7dc4de8",
|
||||
|
||||
"count" : 1,
|
||||
"extension": "jpg",
|
||||
"file" : "https://mlk-bk.cdn.gigachad-cdn.ru/image-sZrQUeOx.jpg?n=image.jpg",
|
||||
"file" : "https://mlk-bk.cdn.gigachad-cdn.ru/image-sZrQUeOx.jpg",
|
||||
"filename" : "image-sZrQUeOx",
|
||||
"id" : "sZrQUeOx",
|
||||
"name" : "image",
|
||||
@@ -211,19 +216,19 @@ __tests__ = (
|
||||
"#comment" : "/f/ URL",
|
||||
"#category": ("lolisafe", "bunkr", "media"),
|
||||
"#class" : bunkr.BunkrMediaExtractor,
|
||||
"#results" : "https://mlk-bk.cdn.gigachad-cdn.ru/image-sZrQUeOx.jpg?n=image.jpg",
|
||||
"#results" : "https://mlk-bk.cdn.gigachad-cdn.ru/image-sZrQUeOx.jpg",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://bunkrrr.org/d/dJuETSzKLrUps",
|
||||
"#category": ("lolisafe", "bunkr", "media"),
|
||||
"#class" : bunkr.BunkrMediaExtractor,
|
||||
"#results" : "https://brg-bk.cdn.gigachad-cdn.ru/file-r5fmwjdd.zip?n=file.zip",
|
||||
"#results" : "https://brg-bk.cdn.gigachad-cdn.ru/file-r5fmwjdd.zip",
|
||||
"#sha1_content": "102ddd7894fe39b3843098fc51f972a0af938f45",
|
||||
|
||||
"count" : 1,
|
||||
"extension": "zip",
|
||||
"file" : "https://brg-bk.cdn.gigachad-cdn.ru/file-r5fmwjdd.zip?n=file.zip",
|
||||
"file" : "https://brg-bk.cdn.gigachad-cdn.ru/file-r5fmwjdd.zip",
|
||||
"filename" : "file-r5fmwjdd",
|
||||
"id" : "r5fmwjdd",
|
||||
"id_url" : "38792076",
|
||||
@@ -235,7 +240,7 @@ __tests__ = (
|
||||
"#comment" : "redirect to '/f/rEeTUL8MXR17A' (#6790)",
|
||||
"#category": ("lolisafe", "bunkr", "media"),
|
||||
"#class" : bunkr.BunkrMediaExtractor,
|
||||
"#results" : "https://meatballs.bunkr.ru/27-03-2024-Rp-0FfrropA.mp4",
|
||||
"#results" : "https://c.bunkr-cache.se/hAVFkYK1bLbSaaKq/27-03-2024-Rp-0FfrropA.mp4",
|
||||
},
|
||||
|
||||
{
|
||||
@@ -243,10 +248,12 @@ __tests__ = (
|
||||
"#comment" : "correct 'name' from HTML (#6790)",
|
||||
"#category": ("lolisafe", "bunkr", "media"),
|
||||
"#class" : bunkr.BunkrMediaExtractor,
|
||||
"#results" : "https://kebab.bunkr.ru/80ca5405-8b8d-4f9f-8167-8b046bb9dc67.mp4",
|
||||
"#results" : "https://c.bunkr-cache.se/QlXezBjk2fCVVobM/80ca5405-8b8d-4f9f-8167-8b046bb9dc67.mp4",
|
||||
|
||||
"id" : "",
|
||||
"id_url" : "wYGCKbGhSvuAW",
|
||||
"id_url" : "41913002",
|
||||
"slug" : "wYGCKbGhSvuAW",
|
||||
"uuid" : "80ca5405-8b8d-4f9f-8167-8b046bb9dc67",
|
||||
"name" : "0hwndshtfmj7hcbut1nd4_source",
|
||||
"filename" : "0hwndshtfmj7hcbut1nd4_source",
|
||||
"extension": "mp4",
|
||||
@@ -257,12 +264,9 @@ __tests__ = (
|
||||
"#comment" : "file gone --- 403 error for main 'brg-bk.cdn.gigachad-cdn.ru' URL (#6732 #6972)",
|
||||
"#category": ("lolisafe", "bunkr", "media"),
|
||||
"#class" : bunkr.BunkrMediaExtractor,
|
||||
"#results" : "https://brg-bk.cdn.gigachad-cdn.ru/IMG_47272f2c698d257fd22f4300ae98ec35929b-iEYVkLPQ.jpg?n=IMG_47272f2c698d257fd22f4300ae98ec35929b.jpg",
|
||||
"#results" : "https://brg-bk.cdn.gigachad-cdn.ru/IMG_47272f2c698d257fd22f4300ae98ec35929b-iEYVkLPQ.jpg",
|
||||
"#sha1_content": "f1c839743563828b250e48d485933a735a508527",
|
||||
|
||||
"_fallback": (
|
||||
"https://i-burger.bunkr.ru/IMG_47272f2c698d257fd22f4300ae98ec35929b-iEYVkLPQ.jpg",
|
||||
),
|
||||
"_http_headers": {
|
||||
"Referer": "https://get.bunkrr.su/file/29682239",
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user