implement 'text.root_from_url()'
use domain from input URL for kemono
This commit is contained in:
@@ -621,8 +621,7 @@ class BaseExtractor(Extractor):
|
|||||||
if index:
|
if index:
|
||||||
self.category, self.root = self.instances[index-1]
|
self.category, self.root = self.instances[index-1]
|
||||||
if not self.root:
|
if not self.root:
|
||||||
url = text.ensure_http_scheme(match.group(0))
|
self.root = text.root_from_url(match.group(0))
|
||||||
self.root = url[:url.index("/", 8)]
|
|
||||||
else:
|
else:
|
||||||
self.root = group
|
self.root = group
|
||||||
self.category = group.partition("://")[2]
|
self.category = group.partition("://")[2]
|
||||||
|
|||||||
@@ -30,8 +30,8 @@ class KemonopartyExtractor(Extractor):
|
|||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
if match.group(1) == "coomer":
|
if match.group(1) == "coomer":
|
||||||
self.category = "coomerparty"
|
self.category = "coomerparty"
|
||||||
self.root = "https://coomer.party"
|
|
||||||
self.cookiedomain = ".coomer.party"
|
self.cookiedomain = ".coomer.party"
|
||||||
|
self.root = text.root_from_url(match.group(0))
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
|
|||||||
@@ -46,6 +46,13 @@ def ensure_http_scheme(url, scheme="https://"):
|
|||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
def root_from_url(url, scheme="https://"):
|
||||||
|
"""Extract scheme and domain from a URL"""
|
||||||
|
if not url.startswith(("https://", "http://")):
|
||||||
|
return scheme + url[:url.index("/")]
|
||||||
|
return url[:url.index("/", 8)]
|
||||||
|
|
||||||
|
|
||||||
def filename_from_url(url):
|
def filename_from_url(url):
|
||||||
"""Extract the last part of an URL to use as a filename"""
|
"""Extract the last part of an URL to use as a filename"""
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -102,6 +102,18 @@ class TestText(unittest.TestCase):
|
|||||||
for value in INVALID_ALT:
|
for value in INVALID_ALT:
|
||||||
self.assertEqual(f(value), value)
|
self.assertEqual(f(value), value)
|
||||||
|
|
||||||
|
def test_root_from_url(self, f=text.root_from_url):
|
||||||
|
result = "https://example.org"
|
||||||
|
self.assertEqual(f("https://example.org/") , result)
|
||||||
|
self.assertEqual(f("https://example.org/path"), result)
|
||||||
|
self.assertEqual(f("example.org/") , result)
|
||||||
|
self.assertEqual(f("example.org/path/") , result)
|
||||||
|
|
||||||
|
result = "http://example.org"
|
||||||
|
self.assertEqual(f("http://example.org/") , result)
|
||||||
|
self.assertEqual(f("http://example.org/path/"), result)
|
||||||
|
self.assertEqual(f("example.org/", "http://") , result)
|
||||||
|
|
||||||
def test_filename_from_url(self, f=text.filename_from_url):
|
def test_filename_from_url(self, f=text.filename_from_url):
|
||||||
result = "filename.ext"
|
result = "filename.ext"
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user