use text.urljoin()
This commit is contained in:
@@ -41,7 +41,7 @@ def solve_challenge(session, response):
|
|||||||
params["jschl_answer"] = solve_jschl(response.url, page)
|
params["jschl_answer"] = solve_jschl(response.url, page)
|
||||||
|
|
||||||
time.sleep(4)
|
time.sleep(4)
|
||||||
url = urllib.parse.urljoin(response.url, "/cdn-cgi/l/chk_jschl")
|
url = text.urljoin(response.url, "/cdn-cgi/l/chk_jschl")
|
||||||
return session.get(url, params=params)
|
return session.get(url, params=params)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,6 @@
|
|||||||
|
|
||||||
from .common import SharedConfigExtractor, Message
|
from .common import SharedConfigExtractor, Message
|
||||||
from .. import text
|
from .. import text
|
||||||
from urllib.parse import urljoin
|
|
||||||
from xml.etree import ElementTree
|
from xml.etree import ElementTree
|
||||||
import datetime
|
import datetime
|
||||||
import operator
|
import operator
|
||||||
@@ -52,7 +51,7 @@ class BooruExtractor(SharedConfigExtractor):
|
|||||||
try:
|
try:
|
||||||
url = image["file_url"]
|
url = image["file_url"]
|
||||||
if url.startswith("/"):
|
if url.startswith("/"):
|
||||||
url = urljoin(self.api_url, url)
|
url = text.urljoin(self.api_url, url)
|
||||||
image.update(data)
|
image.update(data)
|
||||||
yield Message.Url, url, text.nameext_from_url(url, image)
|
yield Message.Url, url, text.nameext_from_url(url, image)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
|
|||||||
@@ -10,7 +10,6 @@
|
|||||||
|
|
||||||
from .common import ChapterExtractor, MangaExtractor
|
from .common import ChapterExtractor, MangaExtractor
|
||||||
from .. import text
|
from .. import text
|
||||||
from urllib.parse import urljoin
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
|
||||||
@@ -61,7 +60,7 @@ class HbrowseMangaExtractor(HbrowseExtractor, MangaExtractor):
|
|||||||
title, pos = text.extract(page, '>View ', '<', pos)
|
title, pos = text.extract(page, '>View ', '<', pos)
|
||||||
data["chapter"] = text.parse_int(url.rpartition("/")[2][1:])
|
data["chapter"] = text.parse_int(url.rpartition("/")[2][1:])
|
||||||
data["title"] = title
|
data["title"] = title
|
||||||
results.append((urljoin(self.root, url), data.copy()))
|
results.append((text.urljoin(self.root, url), data.copy()))
|
||||||
|
|
||||||
|
|
||||||
class HbrowseChapterExtractor(HbrowseExtractor, ChapterExtractor):
|
class HbrowseChapterExtractor(HbrowseExtractor, ChapterExtractor):
|
||||||
|
|||||||
@@ -10,7 +10,6 @@
|
|||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util, exception
|
||||||
from urllib.parse import urljoin
|
|
||||||
|
|
||||||
|
|
||||||
class HentaifoundryExtractor(Extractor):
|
class HentaifoundryExtractor(Extractor):
|
||||||
@@ -47,7 +46,7 @@ class HentaifoundryExtractor(Extractor):
|
|||||||
|
|
||||||
def get_image_metadata(self, url):
|
def get_image_metadata(self, url):
|
||||||
"""Collect metadata for an image"""
|
"""Collect metadata for an image"""
|
||||||
page = self.request(urljoin(self.root, url)).text
|
page = self.request(text.urljoin(self.root, url)).text
|
||||||
index = url.rsplit("/", 2)[1]
|
index = url.rsplit("/", 2)[1]
|
||||||
title, pos = text.extract(
|
title, pos = text.extract(
|
||||||
page, 'Pictures</a> » <span>', '<')
|
page, 'Pictures</a> » <span>', '<')
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ from .common import Extractor, Message
|
|||||||
from .. import text, exception
|
from .. import text, exception
|
||||||
from ..cache import memcache
|
from ..cache import memcache
|
||||||
from os.path import splitext
|
from os.path import splitext
|
||||||
from urllib.parse import urljoin
|
|
||||||
|
|
||||||
|
|
||||||
class ImagehostImageExtractor(Extractor):
|
class ImagehostImageExtractor(Extractor):
|
||||||
@@ -142,8 +141,7 @@ class ImagevenueImageExtractor(ImagehostImageExtractor):
|
|||||||
|
|
||||||
def get_info(self, page):
|
def get_info(self, page):
|
||||||
url = text.extract(page, 'SRC="', '"')[0]
|
url = text.extract(page, 'SRC="', '"')[0]
|
||||||
url = urljoin(self.url, url)
|
return text.urljoin(self.url, url), url
|
||||||
return url, url
|
|
||||||
|
|
||||||
|
|
||||||
class ImagetwistImageExtractor(ImagehostImageExtractor):
|
class ImagetwistImageExtractor(ImagehostImageExtractor):
|
||||||
|
|||||||
@@ -10,7 +10,6 @@
|
|||||||
|
|
||||||
from .common import AsynchronousExtractor, Message
|
from .common import AsynchronousExtractor, Message
|
||||||
from .. import text, exception
|
from .. import text, exception
|
||||||
from urllib.parse import urljoin
|
|
||||||
|
|
||||||
|
|
||||||
class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
|
class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
|
||||||
@@ -63,7 +62,8 @@ class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
|
|||||||
page = text.extract(page, '<table id="songlist">', '</table>')[0]
|
page = text.extract(page, '<table id="songlist">', '</table>')[0]
|
||||||
for num, url in enumerate(text.extract_iter(
|
for num, url in enumerate(text.extract_iter(
|
||||||
page, '<td class="clickable-row"><a href="', '"'), 1):
|
page, '<td class="clickable-row"><a href="', '"'), 1):
|
||||||
page = self.request(urljoin(self.root, url), encoding="utf-8").text
|
url = text.urljoin(self.root, url)
|
||||||
|
page = self.request(url, encoding="utf-8").text
|
||||||
url = text.extract(
|
url = text.extract(
|
||||||
page, '<p><a style="color: #21363f;" href="', '"')[0]
|
page, '<p><a style="color: #21363f;" href="', '"')[0]
|
||||||
yield url, text.nameext_from_url(url, {"num": num})
|
yield url, text.nameext_from_url(url, {"num": num})
|
||||||
|
|||||||
@@ -10,7 +10,6 @@
|
|||||||
|
|
||||||
from .common import ChapterExtractor, MangaExtractor
|
from .common import ChapterExtractor, MangaExtractor
|
||||||
from .. import text, util, exception
|
from .. import text, util, exception
|
||||||
from urllib.parse import urljoin
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@@ -84,7 +83,7 @@ class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
|
|||||||
pagelist, pos = text.extract(page, "var page_array = [", "]", pos)
|
pagelist, pos = text.extract(page, "var page_array = [", "]", pos)
|
||||||
server , pos = text.extract(page, "var server = '", "'", pos)
|
server , pos = text.extract(page, "var server = '", "'", pos)
|
||||||
|
|
||||||
base = urljoin(self.root, server + dataurl + "/")
|
base = text.urljoin(self.root, server + dataurl + "/")
|
||||||
|
|
||||||
return [
|
return [
|
||||||
(base + page, None)
|
(base + page, None)
|
||||||
|
|||||||
@@ -10,7 +10,6 @@
|
|||||||
|
|
||||||
from .common import ChapterExtractor, MangaExtractor
|
from .common import ChapterExtractor, MangaExtractor
|
||||||
from .. import text
|
from .. import text
|
||||||
from urllib.parse import urljoin
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
@@ -51,7 +50,7 @@ class MangahereMangaExtractor(MangaExtractor):
|
|||||||
volume, pos = text.extract(page, 'span class="mr6">', '<', pos)
|
volume, pos = text.extract(page, 'span class="mr6">', '<', pos)
|
||||||
title, pos = text.extract(page, '/span>', '<', pos)
|
title, pos = text.extract(page, '/span>', '<', pos)
|
||||||
date, pos = text.extract(page, 'class="right">', '</span>', pos)
|
date, pos = text.extract(page, 'class="right">', '</span>', pos)
|
||||||
results.append((urljoin("http:", url), {
|
results.append((text.urljoin("http:", url), {
|
||||||
"manga": manga, "title": title, "date": date,
|
"manga": manga, "title": title, "date": date,
|
||||||
"volume": text.parse_int(volume.rpartition(" ")[2]),
|
"volume": text.parse_int(volume.rpartition(" ")[2]),
|
||||||
"chapter": text.parse_int(chapter),
|
"chapter": text.parse_int(chapter),
|
||||||
|
|||||||
@@ -10,7 +10,6 @@
|
|||||||
|
|
||||||
from .common import ChapterExtractor, MangaExtractor
|
from .common import ChapterExtractor, MangaExtractor
|
||||||
from .. import text
|
from .. import text
|
||||||
from urllib.parse import urljoin
|
|
||||||
|
|
||||||
|
|
||||||
class MangaparkExtractor():
|
class MangaparkExtractor():
|
||||||
@@ -120,7 +119,7 @@ class MangaparkChapterExtractor(MangaparkExtractor, ChapterExtractor):
|
|||||||
num += 1
|
num += 1
|
||||||
width , pos = text.extract(page, ' width="', '"', pos)
|
width , pos = text.extract(page, ' width="', '"', pos)
|
||||||
height, pos = text.extract(page, ' _heighth="', '"', pos)
|
height, pos = text.extract(page, ' _heighth="', '"', pos)
|
||||||
yield urljoin(self.root, url), {
|
yield text.urljoin(self.root, url), {
|
||||||
"page": num,
|
"page": num,
|
||||||
"width": width,
|
"width": width,
|
||||||
"height": height,
|
"height": height,
|
||||||
|
|||||||
@@ -10,7 +10,6 @@
|
|||||||
|
|
||||||
from .common import ChapterExtractor
|
from .common import ChapterExtractor
|
||||||
from .. import text
|
from .. import text
|
||||||
from urllib.parse import urljoin
|
|
||||||
|
|
||||||
|
|
||||||
class MangastreamChapterExtractor(ChapterExtractor):
|
class MangastreamChapterExtractor(ChapterExtractor):
|
||||||
@@ -47,5 +46,5 @@ class MangastreamChapterExtractor(ChapterExtractor):
|
|||||||
pos = page.index(' class="page"')
|
pos = page.index(' class="page"')
|
||||||
next_url = text.extract(page, ' href="', '"', pos)[0]
|
next_url = text.extract(page, ' href="', '"', pos)[0]
|
||||||
image_url = text.extract(page, ' src="', '"', pos)[0]
|
image_url = text.extract(page, ' src="', '"', pos)[0]
|
||||||
yield urljoin(self.base_url, image_url), None
|
yield text.urljoin(self.base_url, image_url), None
|
||||||
page = self.request(urljoin(self.base_url, next_url)).text
|
page = self.request(text.urljoin(self.base_url, next_url)).text
|
||||||
|
|||||||
Reference in New Issue
Block a user