[ao3] parse 'series' metadata (#6013)

This commit is contained in:
Mike Fährmann
2024-09-17 16:55:39 +02:00
parent 6b8628b23f
commit 064bc890fc
2 changed files with 21 additions and 0 deletions

View File

@@ -116,6 +116,19 @@ class Ao3WorkExtractor(Ao3Extractor):
}
data["language"] = util.code_to_language(data["lang"])
series = data["series"]
if series:
extr = text.extract_from(series)
data["series"] = {
"prev" : extr(' class="previous" href="/works/', '"'),
"index": extr(' class="position">Part ', " "),
"id" : extr(' href="/series/', '"'),
"name" : text.unescape(extr(">", "<")),
"next" : extr(' class="next" href="/works/', '"'),
}
else:
data["series"] = None
yield Message.Directory, data
for fmt in self.formats:
try:

View File

@@ -25,6 +25,13 @@ __tests__ = (
"lang" : "en",
"language" : "English",
"likes" : range(1000, 2000),
"series" : {
"id" : "4237024",
"prev" : "",
"next" : "57205801",
"index": "1",
"name" : "The Wildcard Universe",
},
"title" : "The Wildcard",
"views" : range(34000, 50000),
"words" : 217549,
@@ -172,6 +179,7 @@ __tests__ = (
"#category": ("", "ao3", "user-works"),
"#class" : ao3.Ao3UserWorksExtractor,
"#urls" : (
"https://archiveofourown.org/works/58979287",
"https://archiveofourown.org/works/55035061",
"https://archiveofourown.org/works/52704457",
"https://archiveofourown.org/works/52502743",