[recursive] add 'https://' to URLs if not present

This commit is contained in:
Mike Fährmann
2024-12-10 17:16:52 +01:00
parent e8826ed3d4
commit 473ee5ff85

View File

@@ -9,6 +9,7 @@
"""Recursive extractor""" """Recursive extractor"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text
import re import re
@@ -25,7 +26,7 @@ class RecursiveExtractor(Extractor):
with open(url[7:]) as fp: with open(url[7:]) as fp:
page = fp.read() page = fp.read()
else: else:
page = self.request(url).text page = self.request(text.ensure_http_scheme(url)).text
for match in re.finditer(r"https?://[^\s\"']+", page): for match in re.finditer(r"https?://[^\s\"']+", page):
yield Message.Queue, match.group(0), {} yield Message.Queue, match.group(0), {}