[vk:album] extract more metadata (#8029)
… when possible / allowed by VK - album[name] - album[count] - user[name] - user[nick] - user[group]
This commit is contained in:
@@ -187,17 +187,40 @@ class VkAlbumExtractor(VkExtractor):
|
||||
pattern = BASE_PATTERN + r"/album(-?\d+)_(\d+)$"
|
||||
example = "https://vk.com/album12345_00"
|
||||
|
||||
def __init__(self, match):
|
||||
VkExtractor.__init__(self, match)
|
||||
self.user_id, self.album_id = match.groups()
|
||||
|
||||
def photos(self):
|
||||
return self._pagination(f"album{self.user_id}_{self.album_id}")
|
||||
user_id, album_id = self.groups
|
||||
return self._pagination(f"album{user_id}_{album_id}")
|
||||
|
||||
def metadata(self):
|
||||
user_id, album_id = self.groups
|
||||
|
||||
url = f"{self.root}/album{user_id}_{album_id}"
|
||||
page = self.request(url).text
|
||||
desc = text.extr(page, 'name="og:description" value="', '"')
|
||||
try:
|
||||
album_name, user_name, photos = desc.rsplit(" - ", 2)
|
||||
except ValueError:
|
||||
if msg := text.extr(
|
||||
page, '<div class="message_page_title">Error</div>',
|
||||
"</div>"):
|
||||
msg = f" ('{text.remove_html(msg)[:-5]}')"
|
||||
self.log.warning("%s_%s: Failed to extract metadata%s",
|
||||
user_id, album_id, msg)
|
||||
return {"user": {"id": user_id}, "album": {"id": album_id}}
|
||||
|
||||
return {
|
||||
"user": {"id": self.user_id},
|
||||
"album": {"id": self.album_id},
|
||||
"user": {
|
||||
"id" : user_id,
|
||||
"nick" : text.unescape(user_name),
|
||||
"name" : text.unescape(text.extr(
|
||||
page, 'class="ui_crumb" href="/', '"')),
|
||||
"group": user_id[0] == "-",
|
||||
},
|
||||
"album": {
|
||||
"id" : album_id,
|
||||
"name" : text.unescape(album_name),
|
||||
"count": text.parse_int(photos[:-7])
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -79,6 +79,20 @@ __tests__ = (
|
||||
"#category": ("", "vk", "album"),
|
||||
"#class" : vk.VkAlbumExtractor,
|
||||
"#count" : 12,
|
||||
"#log" : "-165740836_281339889: Failed to extract metadata ('Access denied')",
|
||||
|
||||
"album": {
|
||||
"id" : "281339889",
|
||||
"!name" : str,
|
||||
"!count": int,
|
||||
},
|
||||
"user": {
|
||||
"id": "-165740836",
|
||||
"!name" : str,
|
||||
"!nick" : str,
|
||||
"!group": bool,
|
||||
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
@@ -96,6 +110,36 @@ __tests__ = (
|
||||
"#exception": exception.AuthorizationError,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://vk.com/album-205150448_00",
|
||||
"#class" : vk.VkAlbumExtractor,
|
||||
"#range" : "1-25",
|
||||
"#count" : 25,
|
||||
|
||||
"id" : r"re:\d+",
|
||||
"width" : range(100, 8_000),
|
||||
"height" : range(100, 8_000),
|
||||
"filename" : str,
|
||||
"extension" : {"jpg", "png"},
|
||||
"date" : "type:datetime",
|
||||
"count" : 826,
|
||||
"num" : range(1, 25),
|
||||
"description" : str,
|
||||
"hash" : r"re:[0-9a-f]{18}",
|
||||
"likes" : int,
|
||||
"album" : {
|
||||
"id" : "00",
|
||||
"name" : "Community wall photos",
|
||||
"count": 826,
|
||||
},
|
||||
"user" : {
|
||||
"id" : "-205150448",
|
||||
"name" : "otjareniy",
|
||||
"nick" : "Отжареный Овощ(16+)",
|
||||
"group": True,
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://vk.com/tag304303884",
|
||||
"#category": ("", "vk", "tagged"),
|
||||
|
||||
Reference in New Issue
Block a user