[vk:album] extract more metadata (#8029)

… when possible / allowed by VK

- album[name]
- album[count]
- user[name]
- user[nick]
- user[group]
This commit is contained in:
Mike Fährmann
2025-09-05 19:33:57 +02:00
parent 821f5c9a9e
commit 752f82f002
2 changed files with 74 additions and 7 deletions

View File

@@ -187,17 +187,40 @@ class VkAlbumExtractor(VkExtractor):
pattern = BASE_PATTERN + r"/album(-?\d+)_(\d+)$"
example = "https://vk.com/album12345_00"
def __init__(self, match):
VkExtractor.__init__(self, match)
self.user_id, self.album_id = match.groups()
def photos(self):
return self._pagination(f"album{self.user_id}_{self.album_id}")
user_id, album_id = self.groups
return self._pagination(f"album{user_id}_{album_id}")
def metadata(self):
user_id, album_id = self.groups
url = f"{self.root}/album{user_id}_{album_id}"
page = self.request(url).text
desc = text.extr(page, 'name="og:description" value="', '"')
try:
album_name, user_name, photos = desc.rsplit(" - ", 2)
except ValueError:
if msg := text.extr(
page, '<div class="message_page_title">Error</div>',
"</div>"):
msg = f" ('{text.remove_html(msg)[:-5]}')"
self.log.warning("%s_%s: Failed to extract metadata%s",
user_id, album_id, msg)
return {"user": {"id": user_id}, "album": {"id": album_id}}
return {
"user": {"id": self.user_id},
"album": {"id": self.album_id},
"user": {
"id" : user_id,
"nick" : text.unescape(user_name),
"name" : text.unescape(text.extr(
page, 'class="ui_crumb" href="/', '"')),
"group": user_id[0] == "-",
},
"album": {
"id" : album_id,
"name" : text.unescape(album_name),
"count": text.parse_int(photos[:-7])
},
}

View File

@@ -79,6 +79,20 @@ __tests__ = (
"#category": ("", "vk", "album"),
"#class" : vk.VkAlbumExtractor,
"#count" : 12,
"#log" : "-165740836_281339889: Failed to extract metadata ('Access denied')",
"album": {
"id" : "281339889",
"!name" : str,
"!count": int,
},
"user": {
"id": "-165740836",
"!name" : str,
"!nick" : str,
"!group": bool,
},
},
{
@@ -96,6 +110,36 @@ __tests__ = (
"#exception": exception.AuthorizationError,
},
{
"#url" : "https://vk.com/album-205150448_00",
"#class" : vk.VkAlbumExtractor,
"#range" : "1-25",
"#count" : 25,
"id" : r"re:\d+",
"width" : range(100, 8_000),
"height" : range(100, 8_000),
"filename" : str,
"extension" : {"jpg", "png"},
"date" : "type:datetime",
"count" : 826,
"num" : range(1, 25),
"description" : str,
"hash" : r"re:[0-9a-f]{18}",
"likes" : int,
"album" : {
"id" : "00",
"name" : "Community wall photos",
"count": 826,
},
"user" : {
"id" : "-205150448",
"name" : "otjareniy",
"nick" : "Отжареный Овощ(16+)",
"group": True,
},
},
{
"#url" : "https://vk.com/tag304303884",
"#category": ("", "vk", "tagged"),