[recursive] add 'https://' to URLs if not present

This commit is contained in:
Mike Fährmann
2024-12-10 17:16:52 +01:00
parent e8826ed3d4
commit 473ee5ff85

View File

@@ -9,6 +9,7 @@
"""Recursive extractor"""
from .common import Extractor, Message
from .. import text
import re
@@ -25,7 +26,7 @@ class RecursiveExtractor(Extractor):
with open(url[7:]) as fp:
page = fp.read()
else:
page = self.request(url).text
page = self.request(text.ensure_http_scheme(url)).text
for match in re.finditer(r"https?://[^\s\"']+", page):
yield Message.Queue, match.group(0), {}