Remove control characters from filesystem paths
- add 'path-remove' option to specify the set of characters that should be removed - rename 'restrict-filenames' to 'path-restrict' - #348, #380
This commit is contained in:
@@ -108,21 +108,36 @@ Description Directory path used as the base for all download destinations.
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.*.restrict-filenames
|
||||
------------------------------
|
||||
extractor.*.path-restrict
|
||||
-------------------------
|
||||
=========== =====
|
||||
Type ``string``
|
||||
Default ``"auto"``
|
||||
Example ``"/!? ()[]{}"``
|
||||
Description Characters to replace with underscores (``_``) when generating
|
||||
directory and file names.
|
||||
Example ``"/!? (){}"``
|
||||
Description Set of characters to replace with underscores (``_``)
|
||||
in generated path segment names.
|
||||
|
||||
Special values:
|
||||
|
||||
* ``"auto"``: Use characters from ``"unix"`` or ``"windows"``
|
||||
depending on the local operating system
|
||||
* ``"unix"``: ``"/"``
|
||||
* ``"windows"``: ``"<>:\"\\|/?*"``
|
||||
* ``"windows"``: ``"\\\\|/<>:\"?*"``
|
||||
|
||||
Note: In a set with 2 or more characters, ``[]^-\`` need to be
|
||||
escaped with backslashes, e.g. ``"\\[\\]"``
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.*.path-remove
|
||||
-----------------------
|
||||
=========== =====
|
||||
Type ``string``
|
||||
Default ``"\\u0000-\\u001f\\u007f"`` (ASCII control characters)
|
||||
Description Set of characters to remove from generated path names.
|
||||
|
||||
Note: In a set with 2 or more characters, ``[]^-\`` need to be
|
||||
escaped with backslashes, e.g. ``"\\[\\]"``
|
||||
=========== =====
|
||||
|
||||
|
||||
|
||||
@@ -8,8 +8,9 @@
|
||||
"proxy": null,
|
||||
"skip": true,
|
||||
"sleep": 0,
|
||||
"path-restrict": "auto",
|
||||
"path-remove": "\\u0000-\\u001f\\u007f",
|
||||
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0",
|
||||
"restrict-filenames": "auto",
|
||||
|
||||
"artstation":
|
||||
{
|
||||
|
||||
@@ -535,25 +535,29 @@ class PathFormat():
|
||||
if os.altsep and os.altsep in self.basedirectory:
|
||||
self.basedirectory = self.basedirectory.replace(os.altsep, os.sep)
|
||||
|
||||
restrict = extractor.config("restrict-filenames", "auto")
|
||||
restrict = extractor.config("path-restrict", "auto")
|
||||
if restrict == "auto":
|
||||
restrict = "<>:\"\\/|?*" if os.name == "nt" else "/"
|
||||
restrict = "\\\\|/<>:\"?*" if os.name == "nt" else "/"
|
||||
elif restrict == "unix":
|
||||
restrict = "/"
|
||||
elif restrict == "windows":
|
||||
restrict = "<>:\"\\/|?*"
|
||||
self.clean_path = self._build_cleanfunc(restrict)
|
||||
restrict = "\\\\|/<>:\"?*"
|
||||
|
||||
remove = extractor.config("path-remove", "\x00-\x1f\x7f")
|
||||
|
||||
self.clean_segment = self._build_cleanfunc(restrict, "_")
|
||||
self.clean_path = self._build_cleanfunc(remove, "")
|
||||
|
||||
@staticmethod
|
||||
def _build_cleanfunc(repl):
|
||||
if not repl:
|
||||
def _build_cleanfunc(chars, repl):
|
||||
if not chars:
|
||||
return lambda x: x
|
||||
elif len(repl) == 1:
|
||||
def func(x, r=repl):
|
||||
return x.replace(r, "_")
|
||||
elif len(chars) == 1:
|
||||
def func(x, c=chars, r=repl):
|
||||
return x.replace(c, r)
|
||||
else:
|
||||
def func(x, sub=re.compile("[" + re.escape(repl) + "]").sub):
|
||||
return sub("_", x)
|
||||
def func(x, sub=re.compile("[" + chars + "]").sub, r=repl):
|
||||
return sub(r, x)
|
||||
return func
|
||||
|
||||
def open(self, mode="wb"):
|
||||
@@ -586,16 +590,19 @@ class PathFormat():
|
||||
# Build path segments by applying 'kwdict' to directory format strings
|
||||
try:
|
||||
segments = [
|
||||
self.clean_path(
|
||||
self.clean_segment(
|
||||
Formatter(segment, self.kwdefault)
|
||||
.format_map(kwdict).strip())
|
||||
.format_map(kwdict)
|
||||
.strip()
|
||||
)
|
||||
for segment in self.directory_fmt
|
||||
]
|
||||
except Exception as exc:
|
||||
raise exception.FormatError(exc, "directory")
|
||||
|
||||
# Join path segements
|
||||
self.directory = os.path.join(self.basedirectory, *segments)
|
||||
self.directory = self.clean_path(os.path.join(
|
||||
self.basedirectory, *segments))
|
||||
|
||||
# Remove trailing path separator;
|
||||
# occurs if the last argument to os.path.join() is an empty string
|
||||
@@ -641,8 +648,8 @@ class PathFormat():
|
||||
|
||||
# Apply 'kwdict' to filename format string
|
||||
try:
|
||||
self.filename = self.clean_path(
|
||||
self.formatter.format_map(self.kwdict))
|
||||
self.filename = self.clean_path(self.clean_segment(
|
||||
self.formatter.format_map(self.kwdict)))
|
||||
except Exception as exc:
|
||||
raise exception.FormatError(exc, "filename")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user