add 'text.ensure_http_scheme()'
This commit is contained in:
@@ -126,8 +126,9 @@ class DeviantartExtractor(Extractor):
|
||||
if self.extra:
|
||||
for match in DeviantartStashExtractor.pattern.finditer(
|
||||
deviation.get("description", "")):
|
||||
url = text.ensure_http_scheme(match.group(0))
|
||||
deviation["_extractor"] = DeviantartStashExtractor
|
||||
yield Message.Queue, match.group(0), deviation
|
||||
yield Message.Queue, url, deviation
|
||||
|
||||
def deviations(self):
|
||||
"""Return an iterable containing all relevant Deviation-objects"""
|
||||
|
||||
@@ -224,10 +224,7 @@ class NewgroundsImageExtractor(NewgroundsExtractor):
|
||||
self.post_url = "https://www.newgrounds.com/art/view/{}/{}".format(
|
||||
self.user, match.group(3))
|
||||
else:
|
||||
url = match.group(0)
|
||||
if not url.startswith("http"):
|
||||
url = "https://" + url
|
||||
self.post_url = url
|
||||
self.post_url = text.ensure_http_scheme(match.group(0))
|
||||
|
||||
def posts(self):
|
||||
return (self.post_url,)
|
||||
@@ -414,6 +411,6 @@ class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor):
|
||||
@staticmethod
|
||||
def _extract_favorites(page):
|
||||
return [
|
||||
"https://" + user.rpartition('"')[2].lstrip("/:")
|
||||
text.ensure_http_scheme(user.rpartition('"')[2])
|
||||
for user in text.extract_iter(page, 'class="item-user', '"><img')
|
||||
]
|
||||
|
||||
@@ -98,8 +98,7 @@ class PatreonExtractor(Extractor):
|
||||
headers = {"Referer": self.root}
|
||||
|
||||
while url:
|
||||
if not url.startswith("http"):
|
||||
url = "https://" + url.lstrip("/:")
|
||||
url = text.ensure_http_scheme(url)
|
||||
posts = self.request(url, headers=headers).json()
|
||||
|
||||
if "included" in posts:
|
||||
|
||||
@@ -60,6 +60,13 @@ def split_html(txt, sep=None):
|
||||
return []
|
||||
|
||||
|
||||
def ensure_http_scheme(url, scheme="https://"):
|
||||
"""Prepend 'scheme' to 'url' if it doesn't have one"""
|
||||
if url and not url.startswith(("https://", "http://")):
|
||||
return scheme + url.lstrip("/:")
|
||||
return url
|
||||
|
||||
|
||||
def filename_from_url(url):
|
||||
"""Extract the last part of an URL to use as a filename"""
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user