use text.urljoin()

This commit is contained in:
Mike Fährmann
2018-04-26 17:00:26 +02:00
parent 2721417dd8
commit 95392554ee
10 changed files with 12 additions and 21 deletions

View File

@@ -41,7 +41,7 @@ def solve_challenge(session, response):
params["jschl_answer"] = solve_jschl(response.url, page) params["jschl_answer"] = solve_jschl(response.url, page)
time.sleep(4) time.sleep(4)
url = urllib.parse.urljoin(response.url, "/cdn-cgi/l/chk_jschl") url = text.urljoin(response.url, "/cdn-cgi/l/chk_jschl")
return session.get(url, params=params) return session.get(url, params=params)

View File

@@ -10,7 +10,6 @@
from .common import SharedConfigExtractor, Message from .common import SharedConfigExtractor, Message
from .. import text from .. import text
from urllib.parse import urljoin
from xml.etree import ElementTree from xml.etree import ElementTree
import datetime import datetime
import operator import operator
@@ -52,7 +51,7 @@ class BooruExtractor(SharedConfigExtractor):
try: try:
url = image["file_url"] url = image["file_url"]
if url.startswith("/"): if url.startswith("/"):
url = urljoin(self.api_url, url) url = text.urljoin(self.api_url, url)
image.update(data) image.update(data)
yield Message.Url, url, text.nameext_from_url(url, image) yield Message.Url, url, text.nameext_from_url(url, image)
except KeyError: except KeyError:

View File

@@ -10,7 +10,6 @@
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text from .. import text
from urllib.parse import urljoin
import json import json
@@ -61,7 +60,7 @@ class HbrowseMangaExtractor(HbrowseExtractor, MangaExtractor):
title, pos = text.extract(page, '>View ', '<', pos) title, pos = text.extract(page, '>View ', '<', pos)
data["chapter"] = text.parse_int(url.rpartition("/")[2][1:]) data["chapter"] = text.parse_int(url.rpartition("/")[2][1:])
data["title"] = title data["title"] = title
results.append((urljoin(self.root, url), data.copy())) results.append((text.urljoin(self.root, url), data.copy()))
class HbrowseChapterExtractor(HbrowseExtractor, ChapterExtractor): class HbrowseChapterExtractor(HbrowseExtractor, ChapterExtractor):

View File

@@ -10,7 +10,6 @@
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util, exception
from urllib.parse import urljoin
class HentaifoundryExtractor(Extractor): class HentaifoundryExtractor(Extractor):
@@ -47,7 +46,7 @@ class HentaifoundryExtractor(Extractor):
def get_image_metadata(self, url): def get_image_metadata(self, url):
"""Collect metadata for an image""" """Collect metadata for an image"""
page = self.request(urljoin(self.root, url)).text page = self.request(text.urljoin(self.root, url)).text
index = url.rsplit("/", 2)[1] index = url.rsplit("/", 2)[1]
title, pos = text.extract( title, pos = text.extract(
page, 'Pictures</a> &raquo; <span>', '<') page, 'Pictures</a> &raquo; <span>', '<')

View File

@@ -12,7 +12,6 @@ from .common import Extractor, Message
from .. import text, exception from .. import text, exception
from ..cache import memcache from ..cache import memcache
from os.path import splitext from os.path import splitext
from urllib.parse import urljoin
class ImagehostImageExtractor(Extractor): class ImagehostImageExtractor(Extractor):
@@ -142,8 +141,7 @@ class ImagevenueImageExtractor(ImagehostImageExtractor):
def get_info(self, page): def get_info(self, page):
url = text.extract(page, 'SRC="', '"')[0] url = text.extract(page, 'SRC="', '"')[0]
url = urljoin(self.url, url) return text.urljoin(self.url, url), url
return url, url
class ImagetwistImageExtractor(ImagehostImageExtractor): class ImagetwistImageExtractor(ImagehostImageExtractor):

View File

@@ -10,7 +10,6 @@
from .common import AsynchronousExtractor, Message from .common import AsynchronousExtractor, Message
from .. import text, exception from .. import text, exception
from urllib.parse import urljoin
class KhinsiderSoundtrackExtractor(AsynchronousExtractor): class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
@@ -63,7 +62,8 @@ class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
page = text.extract(page, '<table id="songlist">', '</table>')[0] page = text.extract(page, '<table id="songlist">', '</table>')[0]
for num, url in enumerate(text.extract_iter( for num, url in enumerate(text.extract_iter(
page, '<td class="clickable-row"><a href="', '"'), 1): page, '<td class="clickable-row"><a href="', '"'), 1):
page = self.request(urljoin(self.root, url), encoding="utf-8").text url = text.urljoin(self.root, url)
page = self.request(url, encoding="utf-8").text
url = text.extract( url = text.extract(
page, '<p><a style="color: #21363f;" href="', '"')[0] page, '<p><a style="color: #21363f;" href="', '"')[0]
yield url, text.nameext_from_url(url, {"num": num}) yield url, text.nameext_from_url(url, {"num": num})

View File

@@ -10,7 +10,6 @@
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text, util, exception from .. import text, util, exception
from urllib.parse import urljoin
import json import json
import re import re
@@ -84,7 +83,7 @@ class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
pagelist, pos = text.extract(page, "var page_array = [", "]", pos) pagelist, pos = text.extract(page, "var page_array = [", "]", pos)
server , pos = text.extract(page, "var server = '", "'", pos) server , pos = text.extract(page, "var server = '", "'", pos)
base = urljoin(self.root, server + dataurl + "/") base = text.urljoin(self.root, server + dataurl + "/")
return [ return [
(base + page, None) (base + page, None)

View File

@@ -10,7 +10,6 @@
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text from .. import text
from urllib.parse import urljoin
import re import re
@@ -51,7 +50,7 @@ class MangahereMangaExtractor(MangaExtractor):
volume, pos = text.extract(page, 'span class="mr6">', '<', pos) volume, pos = text.extract(page, 'span class="mr6">', '<', pos)
title, pos = text.extract(page, '/span>', '<', pos) title, pos = text.extract(page, '/span>', '<', pos)
date, pos = text.extract(page, 'class="right">', '</span>', pos) date, pos = text.extract(page, 'class="right">', '</span>', pos)
results.append((urljoin("http:", url), { results.append((text.urljoin("http:", url), {
"manga": manga, "title": title, "date": date, "manga": manga, "title": title, "date": date,
"volume": text.parse_int(volume.rpartition(" ")[2]), "volume": text.parse_int(volume.rpartition(" ")[2]),
"chapter": text.parse_int(chapter), "chapter": text.parse_int(chapter),

View File

@@ -10,7 +10,6 @@
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text from .. import text
from urllib.parse import urljoin
class MangaparkExtractor(): class MangaparkExtractor():
@@ -120,7 +119,7 @@ class MangaparkChapterExtractor(MangaparkExtractor, ChapterExtractor):
num += 1 num += 1
width , pos = text.extract(page, ' width="', '"', pos) width , pos = text.extract(page, ' width="', '"', pos)
height, pos = text.extract(page, ' _heighth="', '"', pos) height, pos = text.extract(page, ' _heighth="', '"', pos)
yield urljoin(self.root, url), { yield text.urljoin(self.root, url), {
"page": num, "page": num,
"width": width, "width": width,
"height": height, "height": height,

View File

@@ -10,7 +10,6 @@
from .common import ChapterExtractor from .common import ChapterExtractor
from .. import text from .. import text
from urllib.parse import urljoin
class MangastreamChapterExtractor(ChapterExtractor): class MangastreamChapterExtractor(ChapterExtractor):
@@ -47,5 +46,5 @@ class MangastreamChapterExtractor(ChapterExtractor):
pos = page.index(' class="page"') pos = page.index(' class="page"')
next_url = text.extract(page, ' href="', '"', pos)[0] next_url = text.extract(page, ' href="', '"', pos)[0]
image_url = text.extract(page, ' src="', '"', pos)[0] image_url = text.extract(page, ' src="', '"', pos)[0]
yield urljoin(self.base_url, image_url), None yield text.urljoin(self.base_url, image_url), None
page = self.request(urljoin(self.base_url, next_url)).text page = self.request(text.urljoin(self.base_url, next_url)).text