[khinsider] fix extraction

This commit is contained in:
Mike Fährmann
2017-11-23 15:33:49 +01:00
parent 12de658937
commit 65c1c53eb8

View File

@@ -23,6 +23,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
"album/horizon-riders-wii-"), { "album/horizon-riders-wii-"), {
"pattern": ("https?://\d+\.\d+\.\d+\.\d+/ost/horizon-riders-wii-/" "pattern": ("https?://\d+\.\d+\.\d+\.\d+/ost/horizon-riders-wii-/"
"[^/]+/horizon-riders-wii-full-soundtrack\.mp3"), "[^/]+/horizon-riders-wii-full-soundtrack\.mp3"),
"count": 1,
"keyword": "d91cf3edee6713b536eaf3995743f0be7dc72f68", "keyword": "d91cf3edee6713b536eaf3995743f0be7dc72f68",
})] })]
@@ -58,13 +59,11 @@ class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
pos = page.find("Download all songs at once:") pos = page.find("Download all songs at once:")
if pos == -1: if pos == -1:
raise exception.NotFoundError("soundtrack") raise exception.NotFoundError("soundtrack")
num = 0 page = text.extract(page, '<table align="center"', '</table>', pos)[0]
for url in text.extract_iter( for num, url in enumerate(text.extract_iter(
page, '<tr>\r\n\r\n\t \t<td><a href="', '"', pos): page, '<td><a href="', '"'), 1):
page = self.request(url, encoding="utf-8").text page = self.request(url, encoding="utf-8").text
name, pos = text.extract(page, "Song name: <b>", "</b>") name, pos = text.extract(page, "Song name: <b>", "</b>")
url , pos = text.extract( url , pos = text.extract(
page, '<p><a style="color: #21363f;" href="', '"', pos page, '<p><a style="color: #21363f;" href="', '"', pos)
)
num += 1
yield url, text.nameext_from_url(name, {"num": num}) yield url, text.nameext_from_url(name, {"num": num})