From 61d793dc7dc349f2d46eacfaba35826f8a572631 Mon Sep 17 00:00:00 2001 From: Vitaliy Levin <90525259+VitalikLevin@users.noreply.github.com> Date: Fri, 5 Sep 2025 14:10:08 +0300 Subject: [PATCH] [vk] add 'wall-post' extractor (#474 #6378 #8159) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [vk] Added extractor for VK wall posts * update - fix flake8 - rename to 'wall-post' - remove __init__() / use self.groups - simplify 'description' extraction * add test * add to docs/supportedsites --------- Co-authored-by: Mike Fährmann --- docs/supportedsites.md | 2 +- gallery_dl/extractor/vk.py | 38 +++++++++++++++++++++++++++++++++++++- scripts/supportedsites.py | 1 + test/results/vk.py | 27 +++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 2 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index ce04321a..4d34c2cb 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1042,7 +1042,7 @@ Consider all listed sites to potentially be NSFW. VK https://vk.com/ - Albums, Photos, Tagged Photos + Albums, Photos, Tagged Photos, individual Wall Posts diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py index 75a0137e..3b3f9893 100644 --- a/gallery_dl/extractor/vk.py +++ b/gallery_dl/extractor/vk.py @@ -108,6 +108,10 @@ class VkExtractor(Extractor): total = payload[1] photos = payload[3] + for i in range(len(photos)): + photos[i]["num"] = self.offset + i + 1 + photos[i]["count"] = total + offset_next = self.offset + len(photos) if offset_next >= total: # the last chunk of photos also contains the first few photos @@ -128,7 +132,7 @@ class VkPhotosExtractor(VkExtractor): subcategory = "photos" pattern = (BASE_PATTERN + r"/(?:" r"(?:albums|photos|id)(-?\d+)" - r"|(?!(?:album|tag)-?\d+_?)([^/?#]+))") + r"|(?!(?:album|tag|wall)-?\d+_?)([^/?#]+))") example = "https://vk.com/id12345" def __init__(self, match): @@ -209,3 +213,35 @@ class VkTaggedExtractor(VkExtractor): def metadata(self): return {"user": {"id": self.user_id}} + + +class VkWallPostExtractor(VkExtractor): + """Extractor for a vk wall post""" + subcategory = "wall-post" + directory_fmt = ("{category}", "{user[id]}", "wall") + filename_fmt = "{wall[id]}_{num}.{extension}" + pattern = BASE_PATTERN + r"/wall(-?\d+)_(\d+)" + example = "https://vk.com/wall12345_123" + + def photos(self): + user_id, wall_id = self.groups + return self._pagination(f"wall{user_id}_{wall_id}") + + def metadata(self): + user_id, wall_id = self.groups + + url = f"{self.root}/wall{user_id}_{wall_id}" + page = self.request(url).text + desc = text.unescape( + text.extr(page, 'data-testid="post_description">', "") or + text.extr(page, 'name="description" content="', '"')) + + return { + "user": { + "id": user_id, + }, + "wall": { + "id": wall_id, + "description": desc, + }, + } diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index dd082c0a..018a7341 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -403,6 +403,7 @@ SUBCATEGORY_MAP = { }, "vk": { "tagged": "Tagged Photos", + "wall-post": "individual Wall Posts", }, "vsco": { "spaces": "", diff --git a/test/results/vk.py b/test/results/vk.py index c2ce5849..10aa9d9b 100644 --- a/test/results/vk.py +++ b/test/results/vk.py @@ -99,4 +99,31 @@ __tests__ = ( "#count" : 44, }, +{ + "#url" : "https://vk.com/wall-213352498_2115", + "#class" : vk.VkWallPostExtractor, + "#results" : ( + "https://sun9-42.userapi.com/s/v1/ig2/53qxcL7M8408L2HNDTHdHz-HXbprXBn1BLbE5HTuj-OsZD4I483jtZb8yMk9Mr4zzfPJhqBIJlAprWVhIqlk4Fn4.jpg?quality=95&as=32x57,48x85,72x128,108x192,160x284,240x427,360x640,480x853,540x960,640x1138,720x1280&from=bu&cs=720x0", + "https://sun9-49.userapi.com/s/v1/ig2/FnvT8T3mC2yQWc5yJTOe25Kj864ohqvTgOcTudqrE4sPfCMexS1mzNmgUndgxUbqhht-YmIVKW_edDFtzCLXzf7h.jpg?quality=95&as=32x57,48x85,72x128,108x192,160x284,240x427,360x640,480x853,540x960,640x1138,720x1280&from=bu&cs=720x0", + "https://sun9-78.userapi.com/s/v1/ig2/6VB0Cnmdtb9rDNFd5iHv5QJAJ-y-xSVELEoCLlOf_ej2BWVf61G3DSXbnXgmx-QFtQkOOnHIhCLFFLTIFKeVBR5Q.jpg?quality=95&as=32x57,48x85,72x128,108x192,160x284,240x427,360x640,480x853,540x960,640x1138,720x1280&from=bu&cs=720x0", + "https://sun9-60.userapi.com/s/v1/ig2/KO5SzdRUHjZRKlHii4oJ4BrTo5nbdyP3CCpf6_RfHhrEIx6jiVPlWH1R--fpoK5-0rigqXuaG68q39m5VQVy6YFo.jpg?quality=95&as=32x57,48x85,72x128,108x192,160x284,240x427,360x640,480x853,540x960,640x1138,720x1280&from=bu&cs=720x0", + "https://sun9-33.userapi.com/s/v1/ig2/IAN1ZHmVVtjRj0U7wGAfnMc5Xp83EFFYZAVqNgMKpfthLHOe6wh0bodM_xDwIALvVl4pcZ66Fv3bOROG4sUTwY21.jpg?quality=95&as=32x57,48x85,72x128,108x192,160x284,240x427,360x640,480x853,540x960,640x1138,720x1280&from=bu&cs=720x0", + "https://sun9-44.userapi.com/s/v1/ig2/RLzDGnlmu7C0sLh2YI2R4L9RBgZ061QLOsxogjEtC0cBZJ9HvhNwe1V16QX0tNLkTOLELAp8JDHwOo6dMvoWydeh.jpg?quality=95&as=32x57,48x85,72x128,108x192,160x284,240x427,360x640,480x853,540x960,640x1138,720x1280&from=bu&cs=720x0", + ), + + "id" : r"re:^\d+$", + "width" : 720, + "height" : 1280, + "count" : 6, + "num" : range(1, 6), + "likes" : int, + "user" : { + "id": "-213352498", + }, + "wall" : { + "description": "🎄 Обновляем не только аватарки, но и обои на телефоне", + "id" : "2115", + }, +}, + )