[recursive] add 'https://' to URLs if not present
This commit is contained in:
@@ -9,6 +9,7 @@
|
|||||||
"""Recursive extractor"""
|
"""Recursive extractor"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
|
from .. import text
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
@@ -25,7 +26,7 @@ class RecursiveExtractor(Extractor):
|
|||||||
with open(url[7:]) as fp:
|
with open(url[7:]) as fp:
|
||||||
page = fp.read()
|
page = fp.read()
|
||||||
else:
|
else:
|
||||||
page = self.request(url).text
|
page = self.request(text.ensure_http_scheme(url)).text
|
||||||
|
|
||||||
for match in re.finditer(r"https?://[^\s\"']+", page):
|
for match in re.finditer(r"https?://[^\s\"']+", page):
|
||||||
yield Message.Queue, match.group(0), {}
|
yield Message.Queue, match.group(0), {}
|
||||||
|
|||||||
Reference in New Issue
Block a user