[khinsider] use urljoin() to complete page URLs
This commit is contained in:
@@ -10,6 +10,7 @@
|
|||||||
|
|
||||||
from .common import AsynchronousExtractor, Message
|
from .common import AsynchronousExtractor, Message
|
||||||
from .. import text, exception
|
from .. import text, exception
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
|
|
||||||
class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
|
class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
|
||||||
@@ -26,14 +27,14 @@ class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
|
|||||||
"count": 1,
|
"count": 1,
|
||||||
"keyword": "d91cf3edee6713b536eaf3995743f0be7dc72f68",
|
"keyword": "d91cf3edee6713b536eaf3995743f0be7dc72f68",
|
||||||
})]
|
})]
|
||||||
|
root = "https://downloads.khinsider.com"
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
AsynchronousExtractor.__init__(self)
|
AsynchronousExtractor.__init__(self)
|
||||||
self.album = match.group(1)
|
self.album = match.group(1)
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = ("https://downloads.khinsider.com/game-soundtracks/album/" +
|
url = (self.root + "/game-soundtracks/album/" + self.album)
|
||||||
self.album)
|
|
||||||
page = self.request(url, encoding="utf-8").text
|
page = self.request(url, encoding="utf-8").text
|
||||||
data = self.get_job_metadata(page)
|
data = self.get_job_metadata(page)
|
||||||
yield Message.Version, 1
|
yield Message.Version, 1
|
||||||
@@ -61,7 +62,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
|
|||||||
page = text.extract(page, '<table id="songlist">', '</table>')[0]
|
page = text.extract(page, '<table id="songlist">', '</table>')[0]
|
||||||
for num, url in enumerate(text.extract_iter(
|
for num, url in enumerate(text.extract_iter(
|
||||||
page, '<td class="clickable-row"><a href="', '"'), 1):
|
page, '<td class="clickable-row"><a href="', '"'), 1):
|
||||||
page = self.request(url, encoding="utf-8").text
|
page = self.request(urljoin(self.root, url), encoding="utf-8").text
|
||||||
url = text.extract(
|
url = text.extract(
|
||||||
page, '<p><a style="color: #21363f;" href="', '"')[0]
|
page, '<p><a style="color: #21363f;" href="', '"')[0]
|
||||||
yield url, text.nameext_from_url(url, {"num": num})
|
yield url, text.nameext_from_url(url, {"num": num})
|
||||||
|
|||||||
@@ -89,7 +89,7 @@ class MangaparkChapterExtractor(MangaparkExtractor):
|
|||||||
}),
|
}),
|
||||||
("http://mangapark.me/manga/gekkan-shoujo-nozaki-kun/s2/c70/e2/1", {
|
("http://mangapark.me/manga/gekkan-shoujo-nozaki-kun/s2/c70/e2/1", {
|
||||||
"count": 15,
|
"count": 15,
|
||||||
"keyword": "5760c0a5efd1ffe24468cfaac5b41d048af36360",
|
"keyword": "dc9233cdd83d8659300f0a20ec3c493873f71741",
|
||||||
}),
|
}),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user