[bunkr] fix album extraction (#6798)

This commit is contained in:
Mike Fährmann
2025-01-10 11:00:32 +01:00
parent 118b994cf2
commit af9c06f812

View File

@@ -81,7 +81,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
# redirect
url = response.headers["Location"]
if url[0] == "/":
url = text.root_from_url(response.url) + url
url = self.root + url
continue
root, path = self._split(url)
if root not in CF_DOMAINS:
@@ -108,7 +108,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
"All Bunkr domains require solving a CF challenge")
# select alternative domain
root = "https://" + random.choice(DOMAINS)
self.root = root = "https://" + random.choice(DOMAINS)
self.log.debug("Trying '%s' as fallback", root)
url = root + path
@@ -122,7 +122,8 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
"&lt;", "<").replace("&gt;", ">").replace("&amp;", "&")
# files
items = list(text.extract_iter(page, "<!-- item -->", "<!-- -->"))
items = list(text.extract_iter(
page, '<div class="grid-images_box', "</a>"))
return self._extract_files(items), {
"album_id" : album_id,
"album_name" : title,
@@ -133,9 +134,11 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
def _extract_files(self, items):
for item in items:
try:
url = text.extr(item, ' href="', '"')
file = self._extract_file(text.unescape(url))
url = text.unescape(text.extr(item, ' href="', '"'))
if url[0] == "/":
url = self.root + url
file = self._extract_file(url)
info = text.split_html(item)
file["name"] = info[0]
file["size"] = info[2]