[yaplog] improve metadata extraction (#443)
- provide a fallback if there is no numerical image ID - add a 'filename' field - convert 'date' to an actual datetime object
This commit is contained in:
@@ -36,11 +36,13 @@ class YaplogExtractor(AsynchronousMixin, Extractor):
|
||||
iurl = text.extract(page, '<img src="', '"')[0]
|
||||
if iurl[0] == "/":
|
||||
iurl = text.urljoin(self.root, iurl)
|
||||
iid, _, ext = iurl.rpartition("/")[2].rpartition(".")
|
||||
name, _, ext = iurl.rpartition("/")[2].rpartition(".")
|
||||
iid = name.rpartition("_")[0] or name
|
||||
image = {
|
||||
"url" : iurl,
|
||||
"num" : num,
|
||||
"id" : text.parse_int(iid.partition("_")[0]),
|
||||
"id" : text.parse_int(iid, iid),
|
||||
"filename" : name,
|
||||
"extension": ext,
|
||||
"post" : post,
|
||||
}
|
||||
@@ -75,7 +77,7 @@ class YaplogExtractor(AsynchronousMixin, Extractor):
|
||||
"id" : text.parse_int(pid),
|
||||
"title": text.unescape(title[:-3]),
|
||||
"user" : self.user,
|
||||
"date" : date,
|
||||
"date" : text.parse_datetime(date, "%B %d [%a], %Y, %H:%M"),
|
||||
}
|
||||
|
||||
|
||||
@@ -102,7 +104,7 @@ class YaplogPostExtractor(YaplogExtractor):
|
||||
test = (
|
||||
("https://yaplog.jp/imamiami0726/image/1299", {
|
||||
"url": "896cae20fa718735a57e723c48544e830ff31345",
|
||||
"keyword": "f8d8781e61c4c38238a7622d6df6c905f864e5d3",
|
||||
"keyword": "22df8ad6cb534514c6bb2ff000381d156769a620",
|
||||
}),
|
||||
# complete image URLs (#443)
|
||||
("https://yaplog.jp/msjane/archive/246", {
|
||||
|
||||
Reference in New Issue
Block a user