add 'restrict-filenames' option (#348)
This commit is contained in:
@@ -108,6 +108,24 @@ Description Directory path used as the base for all download destinations.
|
|||||||
=========== =====
|
=========== =====
|
||||||
|
|
||||||
|
|
||||||
|
extractor.*.restrict-filenames
|
||||||
|
------------------------------
|
||||||
|
=========== =====
|
||||||
|
Type ``string``
|
||||||
|
Default ``"auto"``
|
||||||
|
Example ``"/!? ()[]{}"``
|
||||||
|
Description Characters to replace with underscores (``_``) when generating
|
||||||
|
directory and file names.
|
||||||
|
|
||||||
|
Special values:
|
||||||
|
|
||||||
|
* ``"auto"``: Use characters from ``"unix"`` or ``"windows"``
|
||||||
|
depending on the local operating system
|
||||||
|
* ``"unix"``: ``"/"``
|
||||||
|
* ``"windows"``: ``"<>:\"\\|/?*"``
|
||||||
|
=========== =====
|
||||||
|
|
||||||
|
|
||||||
extractor.*.skip
|
extractor.*.skip
|
||||||
----------------
|
----------------
|
||||||
=========== =====
|
=========== =====
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
"skip": true,
|
"skip": true,
|
||||||
"sleep": 0,
|
"sleep": 0,
|
||||||
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0",
|
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0",
|
||||||
|
"restrict-filenames": "auto",
|
||||||
|
|
||||||
"artstation":
|
"artstation":
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -83,22 +83,6 @@ def nameext_from_url(url, data=None):
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
def clean_path_windows(path):
|
|
||||||
"""Remove illegal characters from a path-segment (Windows)"""
|
|
||||||
try:
|
|
||||||
return re.sub(r'[<>:"\\/|?*]', "_", path)
|
|
||||||
except TypeError:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
def clean_path_posix(path):
|
|
||||||
"""Remove illegal characters from a path-segment (Posix)"""
|
|
||||||
try:
|
|
||||||
return path.replace("/", "_")
|
|
||||||
except AttributeError:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
def extract(txt, begin, end, pos=0):
|
def extract(txt, begin, end, pos=0):
|
||||||
"""Extract the text between 'begin' and 'end' from 'txt'
|
"""Extract the text between 'begin' and 'end' from 'txt'
|
||||||
|
|
||||||
@@ -266,12 +250,6 @@ def parse_datetime(date_string, format="%Y-%m-%dT%H:%M:%S%z"):
|
|||||||
return date_string
|
return date_string
|
||||||
|
|
||||||
|
|
||||||
if os.name == "nt":
|
|
||||||
clean_path = clean_path_windows
|
|
||||||
else:
|
|
||||||
clean_path = clean_path_posix
|
|
||||||
|
|
||||||
|
|
||||||
urljoin = urllib.parse.urljoin
|
urljoin = urllib.parse.urljoin
|
||||||
|
|
||||||
quote = urllib.parse.quote
|
quote = urllib.parse.quote
|
||||||
|
|||||||
@@ -535,6 +535,27 @@ class PathFormat():
|
|||||||
if os.altsep and os.altsep in self.basedirectory:
|
if os.altsep and os.altsep in self.basedirectory:
|
||||||
self.basedirectory = self.basedirectory.replace(os.altsep, os.sep)
|
self.basedirectory = self.basedirectory.replace(os.altsep, os.sep)
|
||||||
|
|
||||||
|
restrict = extractor.config("restrict-filenames", "auto")
|
||||||
|
if restrict == "auto":
|
||||||
|
restrict = "<>:\"\\/|?*" if os.name == "nt" else "/"
|
||||||
|
elif restrict == "unix":
|
||||||
|
restrict = "/"
|
||||||
|
elif restrict == "windows":
|
||||||
|
restrict = "<>:\"\\/|?*"
|
||||||
|
self.clean_path = self._build_cleanfunc(restrict)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _build_cleanfunc(repl):
|
||||||
|
if not repl:
|
||||||
|
return lambda x: x
|
||||||
|
elif len(repl) == 1:
|
||||||
|
def func(x, r=repl):
|
||||||
|
return x.replace(r, "_")
|
||||||
|
else:
|
||||||
|
def func(x, sub=re.compile("[" + re.escape(repl) + "]").sub):
|
||||||
|
return sub("_", x)
|
||||||
|
return func
|
||||||
|
|
||||||
def open(self, mode="wb"):
|
def open(self, mode="wb"):
|
||||||
"""Open file and return a corresponding file object"""
|
"""Open file and return a corresponding file object"""
|
||||||
return open(self.temppath, mode)
|
return open(self.temppath, mode)
|
||||||
@@ -551,7 +572,7 @@ class PathFormat():
|
|||||||
"""Build directory path and create it if necessary"""
|
"""Build directory path and create it if necessary"""
|
||||||
try:
|
try:
|
||||||
segments = [
|
segments = [
|
||||||
text.clean_path(
|
self.clean_path(
|
||||||
Formatter(segment, self.kwdefault)
|
Formatter(segment, self.kwdefault)
|
||||||
.format_map(keywords).strip())
|
.format_map(keywords).strip())
|
||||||
for segment in self.directory_fmt
|
for segment in self.directory_fmt
|
||||||
@@ -597,7 +618,7 @@ class PathFormat():
|
|||||||
def build_path(self):
|
def build_path(self):
|
||||||
"""Use filename-keywords and directory to build a full path"""
|
"""Use filename-keywords and directory to build a full path"""
|
||||||
try:
|
try:
|
||||||
self.filename = text.clean_path(
|
self.filename = self.clean_path(
|
||||||
self.formatter.format_map(self.keywords))
|
self.formatter.format_map(self.keywords))
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
raise exception.FormatError(exc, "filename")
|
raise exception.FormatError(exc, "filename")
|
||||||
|
|||||||
@@ -139,26 +139,6 @@ class TestText(unittest.TestCase):
|
|||||||
for value in INVALID:
|
for value in INVALID:
|
||||||
self.assertEqual(f(value), empty)
|
self.assertEqual(f(value), empty)
|
||||||
|
|
||||||
def test_clean_path_windows(self, f=text.clean_path_windows):
|
|
||||||
self.assertEqual(f(""), "")
|
|
||||||
self.assertEqual(f("foo"), "foo")
|
|
||||||
self.assertEqual(f("foo/bar"), "foo_bar")
|
|
||||||
self.assertEqual(f("foo<>:\"\\/|?*bar"), "foo_________bar")
|
|
||||||
|
|
||||||
# invalid arguments
|
|
||||||
for value in INVALID:
|
|
||||||
self.assertEqual(f(value), "")
|
|
||||||
|
|
||||||
def test_clean_path_posix(self, f=text.clean_path_posix):
|
|
||||||
self.assertEqual(f(""), "")
|
|
||||||
self.assertEqual(f("foo"), "foo")
|
|
||||||
self.assertEqual(f("foo/bar"), "foo_bar")
|
|
||||||
self.assertEqual(f("foo<>:\"\\/|?*bar"), "foo<>:\"\\_|?*bar")
|
|
||||||
|
|
||||||
# invalid arguments
|
|
||||||
for value in INVALID:
|
|
||||||
self.assertEqual(f(value), "")
|
|
||||||
|
|
||||||
def test_extract(self, f=text.extract):
|
def test_extract(self, f=text.extract):
|
||||||
txt = "<a><b>"
|
txt = "<a><b>"
|
||||||
self.assertEqual(f(txt, "<", ">"), ("a" , 3))
|
self.assertEqual(f(txt, "<", ">"), ("a" , 3))
|
||||||
|
|||||||
Reference in New Issue
Block a user