[khinsider] use urljoin() to complete page URLs

This commit is contained in:
Mike Fährmann
2017-12-17 16:21:05 +01:00
parent 263741d243
commit 444008a14a
2 changed files with 5 additions and 4 deletions

View File

@@ -10,6 +10,7 @@
from .common import AsynchronousExtractor, Message from .common import AsynchronousExtractor, Message
from .. import text, exception from .. import text, exception
from urllib.parse import urljoin
class KhinsiderSoundtrackExtractor(AsynchronousExtractor): class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
@@ -26,14 +27,14 @@ class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
"count": 1, "count": 1,
"keyword": "d91cf3edee6713b536eaf3995743f0be7dc72f68", "keyword": "d91cf3edee6713b536eaf3995743f0be7dc72f68",
})] })]
root = "https://downloads.khinsider.com"
def __init__(self, match): def __init__(self, match):
AsynchronousExtractor.__init__(self) AsynchronousExtractor.__init__(self)
self.album = match.group(1) self.album = match.group(1)
def items(self): def items(self):
url = ("https://downloads.khinsider.com/game-soundtracks/album/" + url = (self.root + "/game-soundtracks/album/" + self.album)
self.album)
page = self.request(url, encoding="utf-8").text page = self.request(url, encoding="utf-8").text
data = self.get_job_metadata(page) data = self.get_job_metadata(page)
yield Message.Version, 1 yield Message.Version, 1
@@ -61,7 +62,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
page = text.extract(page, '<table id="songlist">', '</table>')[0] page = text.extract(page, '<table id="songlist">', '</table>')[0]
for num, url in enumerate(text.extract_iter( for num, url in enumerate(text.extract_iter(
page, '<td class="clickable-row"><a href="', '"'), 1): page, '<td class="clickable-row"><a href="', '"'), 1):
page = self.request(url, encoding="utf-8").text page = self.request(urljoin(self.root, url), encoding="utf-8").text
url = text.extract( url = text.extract(
page, '<p><a style="color: #21363f;" href="', '"')[0] page, '<p><a style="color: #21363f;" href="', '"')[0]
yield url, text.nameext_from_url(url, {"num": num}) yield url, text.nameext_from_url(url, {"num": num})

View File

@@ -89,7 +89,7 @@ class MangaparkChapterExtractor(MangaparkExtractor):
}), }),
("http://mangapark.me/manga/gekkan-shoujo-nozaki-kun/s2/c70/e2/1", { ("http://mangapark.me/manga/gekkan-shoujo-nozaki-kun/s2/c70/e2/1", {
"count": 15, "count": 15,
"keyword": "5760c0a5efd1ffe24468cfaac5b41d048af36360", "keyword": "dc9233cdd83d8659300f0a20ec3c493873f71741",
}), }),
] ]