From 12a6e0493e8ea3ec263e8438fbf38a9baf7e1dca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 31 Aug 2025 15:48:27 +0200 Subject: [PATCH] [bunkr] fix albums with more than 100 files (#8150) --- gallery_dl/extractor/bunkr.py | 50 +++++++++++++++++------------------ test/results/bunkr.py | 40 +++++++++++++++------------- 2 files changed, 47 insertions(+), 43 deletions(-) diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py index eba16782..b2f6431b 100644 --- a/gallery_dl/extractor/bunkr.py +++ b/gallery_dl/extractor/bunkr.py @@ -62,7 +62,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): root = "https://bunkr.si" root_dl = "https://get.bunkrr.su" root_api = "https://apidl.bunkr.ru" - archive_fmt = "{album_id}_{id|id_url}" + archive_fmt = "{album_id}_{id|id_url|slug}" pattern = BASE_PATTERN + r"/a/([^/?#]+)" example = "https://bunkr.si/a/ID" @@ -134,13 +134,13 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): def fetch_album(self, album_id): # album metadata - page = self.request(self.root + "/a/" + album_id).text + page = self.request(f"{self.root}/a/{album_id}?advanced=1").text title = text.unescape(text.unescape(text.extr( page, 'property="og:title" content="', '"'))) # files - items = list(text.extract_iter( - page, '
")) + items = text.extr( + page, "window.albumFiles = [", "").split("\n},\n") return self._extract_files(items), { "album_id" : album_id, @@ -156,17 +156,19 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): for item in items: try: - url = text.unescape(text.extr(item, ' href="', '"')) - if url[0] == "/": - url = self.root + url + data_id = text.extr(item, " id: ", ",").strip() + file = self._extract_file(data_id) - file = self._extract_file(url) - info = text.split_html(item) - if not file["name"]: - file["name"] = info[-3] - file["size"] = info[-2] - file["date"] = text.parse_datetime( - info[-1], "%H:%M:%S %d/%m/%Y") + file["name"] = util.json_loads(text.extr( + item, 'original:', ',\n')) + file["slug"] = util.json_loads(text.extr( + item, 'slug: ', ',\n')) + file["uuid"] = text.extr( + item, 'name: "', ".") + file["size"] = text.parse_int(text.extr( + item, "size: ", " ,\n")) + file["date"] = text.parse_datetime(text.extr( + item, 'timestamp: "', '"'), "%H:%M:%S %d/%m/%Y") yield file except exception.ControlException: @@ -175,11 +177,8 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): self.log.error("%s: %s", exc.__class__.__name__, exc) self.log.debug("", exc_info=exc) - def _extract_file(self, webpage_url): - page = self.request(webpage_url).text - data_id = text.extr(page, 'data-file-id="', '"') - referer = self.root_dl + "/file/" + data_id - + def _extract_file(self, data_id): + referer = f"{self.root_dl}/file/{data_id}" headers = {"Referer": referer, "Origin": self.root_dl} data = self.request_json(self.endpoint, method="POST", headers=headers, json={"id": data_id}) @@ -190,14 +189,9 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): else: file_url = data["url"] - file_name = text.extr(page, "")[2] - fallback = text.extr(page, 'property="og:url" content="', '"') - return { "file" : file_url, - "name" : text.unescape(file_name), "id_url" : data_id, - "_fallback" : (fallback,) if fallback else (), "_http_headers" : {"Referer": referer}, "_http_validate": self._validate, } @@ -222,7 +216,13 @@ class BunkrMediaExtractor(BunkrAlbumExtractor): def fetch_album(self, album_id): try: - file = self._extract_file(self.root + album_id) + page = self.request(f"{self.root}{album_id}").text + data_id = text.extr(page, 'data-file-id="', '"') + file = self._extract_file(data_id) + file["name"] = text.unescape(text.extr( + page, "")[2]) + file["slug"] = album_id.rpartition("/")[2] + file["uuid"] = text.extr(page, "/thumbs/", ".") except Exception as exc: self.log.error("%s: %s", exc.__class__.__name__, exc) return (), {} diff --git a/test/results/bunkr.py b/test/results/bunkr.py index d5758507..0c7177f7 100644 --- a/test/results/bunkr.py +++ b/test/results/bunkr.py @@ -12,8 +12,11 @@ __tests__ = ( "#url" : "https://bunkr.sk/a/Lktg9Keq", "#category": ("lolisafe", "bunkr", "album"), "#class" : bunkr.BunkrAlbumExtractor, - "#results" : "https://brg-bk.cdn.gigachad-cdn.ru/test-%E3%83%86%E3%82%B9%E3%83%88-%22%26%3E-QjgneIQv.png?n=test-%E3%83%86%E3%82%B9%E3%83%88-%22%26%3E.png", - "#sha1_content": "0c8768055e4e20e7c7259608b67799171b691140", + "#results" : """https://brg-bk.cdn.gigachad-cdn.ru/test-テスト-"&>-QjgneIQv.png""", + "#sha1_content": ( + "0c8768055e4e20e7c7259608b67799171b691140", + "961b25d85b5f5bd18cbe3e847ac55925f14d0286", + ), "album_id" : "Lktg9Keq", "album_name" : "test テスト \"&>", @@ -25,6 +28,7 @@ __tests__ = ( "id" : "QjgneIQv", "id_url" : "1044478", "name" : "test-テスト-\"&>", + "slug" : "test-テスト-\"&>-QjgneIQv.png", "num" : 1, }, @@ -33,7 +37,7 @@ __tests__ = ( "#category": ("lolisafe", "bunkr", "album"), "#class" : bunkr.BunkrAlbumExtractor, "#results" : ( - "https://mlk-bk.cdn.gigachad-cdn.ru/image-sZrQUeOx.jpg?n=image.jpg", + "https://mlk-bk.cdn.gigachad-cdn.ru/image-sZrQUeOx.jpg", ), "#sha1_content": "55998743751dfe008d0e95605114fcbfa7dc4de8", @@ -53,8 +57,7 @@ __tests__ = ( "#category": ("lolisafe", "bunkr", "album"), "#class" : bunkr.BunkrAlbumExtractor, "#pattern" : r"https://(i-)?meatballs.bunkr.ru/\w+", - "#range" : "5-", - "#count" : 3, + "#count" : 4, }, { @@ -65,7 +68,9 @@ __tests__ = ( "#count" : 2, "id" : "", - "id_url": {"UPKDHBf0CvrCe", "zQgSePr1f4HZ2"}, + "id_url": {"43478756", "43478551"}, + "slug" : {"UPKDHBf0CvrCe", "zQgSePr1f4HZ2"}, + "uuid" : "iso:uuid", }, { @@ -195,12 +200,12 @@ __tests__ = ( "#url" : "https://bunkr.black/i/image-sZrQUeOx.jpg", "#category": ("lolisafe", "bunkr", "media"), "#class" : bunkr.BunkrMediaExtractor, - "#results" : "https://mlk-bk.cdn.gigachad-cdn.ru/image-sZrQUeOx.jpg?n=image.jpg", + "#results" : "https://mlk-bk.cdn.gigachad-cdn.ru/image-sZrQUeOx.jpg", "#sha1_content": "55998743751dfe008d0e95605114fcbfa7dc4de8", "count" : 1, "extension": "jpg", - "file" : "https://mlk-bk.cdn.gigachad-cdn.ru/image-sZrQUeOx.jpg?n=image.jpg", + "file" : "https://mlk-bk.cdn.gigachad-cdn.ru/image-sZrQUeOx.jpg", "filename" : "image-sZrQUeOx", "id" : "sZrQUeOx", "name" : "image", @@ -211,19 +216,19 @@ __tests__ = ( "#comment" : "/f/ URL", "#category": ("lolisafe", "bunkr", "media"), "#class" : bunkr.BunkrMediaExtractor, - "#results" : "https://mlk-bk.cdn.gigachad-cdn.ru/image-sZrQUeOx.jpg?n=image.jpg", + "#results" : "https://mlk-bk.cdn.gigachad-cdn.ru/image-sZrQUeOx.jpg", }, { "#url" : "https://bunkrrr.org/d/dJuETSzKLrUps", "#category": ("lolisafe", "bunkr", "media"), "#class" : bunkr.BunkrMediaExtractor, - "#results" : "https://brg-bk.cdn.gigachad-cdn.ru/file-r5fmwjdd.zip?n=file.zip", + "#results" : "https://brg-bk.cdn.gigachad-cdn.ru/file-r5fmwjdd.zip", "#sha1_content": "102ddd7894fe39b3843098fc51f972a0af938f45", "count" : 1, "extension": "zip", - "file" : "https://brg-bk.cdn.gigachad-cdn.ru/file-r5fmwjdd.zip?n=file.zip", + "file" : "https://brg-bk.cdn.gigachad-cdn.ru/file-r5fmwjdd.zip", "filename" : "file-r5fmwjdd", "id" : "r5fmwjdd", "id_url" : "38792076", @@ -235,7 +240,7 @@ __tests__ = ( "#comment" : "redirect to '/f/rEeTUL8MXR17A' (#6790)", "#category": ("lolisafe", "bunkr", "media"), "#class" : bunkr.BunkrMediaExtractor, - "#results" : "https://meatballs.bunkr.ru/27-03-2024-Rp-0FfrropA.mp4", + "#results" : "https://c.bunkr-cache.se/hAVFkYK1bLbSaaKq/27-03-2024-Rp-0FfrropA.mp4", }, { @@ -243,10 +248,12 @@ __tests__ = ( "#comment" : "correct 'name' from HTML (#6790)", "#category": ("lolisafe", "bunkr", "media"), "#class" : bunkr.BunkrMediaExtractor, - "#results" : "https://kebab.bunkr.ru/80ca5405-8b8d-4f9f-8167-8b046bb9dc67.mp4", + "#results" : "https://c.bunkr-cache.se/QlXezBjk2fCVVobM/80ca5405-8b8d-4f9f-8167-8b046bb9dc67.mp4", "id" : "", - "id_url" : "wYGCKbGhSvuAW", + "id_url" : "41913002", + "slug" : "wYGCKbGhSvuAW", + "uuid" : "80ca5405-8b8d-4f9f-8167-8b046bb9dc67", "name" : "0hwndshtfmj7hcbut1nd4_source", "filename" : "0hwndshtfmj7hcbut1nd4_source", "extension": "mp4", @@ -257,12 +264,9 @@ __tests__ = ( "#comment" : "file gone --- 403 error for main 'brg-bk.cdn.gigachad-cdn.ru' URL (#6732 #6972)", "#category": ("lolisafe", "bunkr", "media"), "#class" : bunkr.BunkrMediaExtractor, - "#results" : "https://brg-bk.cdn.gigachad-cdn.ru/IMG_47272f2c698d257fd22f4300ae98ec35929b-iEYVkLPQ.jpg?n=IMG_47272f2c698d257fd22f4300ae98ec35929b.jpg", + "#results" : "https://brg-bk.cdn.gigachad-cdn.ru/IMG_47272f2c698d257fd22f4300ae98ec35929b-iEYVkLPQ.jpg", "#sha1_content": "f1c839743563828b250e48d485933a735a508527", - "_fallback": ( - "https://i-burger.bunkr.ru/IMG_47272f2c698d257fd22f4300ae98ec35929b-iEYVkLPQ.jpg", - ), "_http_headers": { "Referer": "https://get.bunkrr.su/file/29682239", },