[vk] prevent '404 Not Found' errors for file downloads

only strip query parameters when regex substitution applies
This commit is contained in:
Mike Fährmann
2025-06-11 22:30:25 +02:00
parent d065452ba3
commit 0df083b208

View File

@@ -36,7 +36,7 @@ class VkExtractor(Extractor):
return num return num
def items(self): def items(self):
sub = util.re(r"/imp[fg]/").sub subn = util.re(r"/imp[fg]/").subn
sizes = "wzyxrqpo" sizes = "wzyxrqpo"
data = self.metadata() data = self.metadata()
@@ -58,9 +58,12 @@ class VkExtractor(Extractor):
self.log.warning("no photo URL found (%s)", photo.get("id")) self.log.warning("no photo URL found (%s)", photo.get("id"))
continue continue
photo["url"] = sub("/", url.partition("?")[0]) url_sub, count = subn("/", url.partition("?")[0])
# photo["url"] = url if count:
photo["_fallback"] = (url,) photo["_fallback"] = (url,)
photo["url"] = url = url_sub
else:
photo["url"] = url
try: try:
_, photo["width"], photo["height"] = photo[size] _, photo["width"], photo["height"] = photo[size]
@@ -71,8 +74,8 @@ class VkExtractor(Extractor):
photo["id"] = photo["id"].rpartition("_")[2] photo["id"] = photo["id"].rpartition("_")[2]
photo.update(data) photo.update(data)
text.nameext_from_url(photo["url"], photo) text.nameext_from_url(url, photo)
yield Message.Url, photo["url"], photo yield Message.Url, url, photo
def _pagination(self, photos_id): def _pagination(self, photos_id):
url = self.root + "/al_photos.php" url = self.root + "/al_photos.php"