implement a way to shorten filenames with east-asian characters
(#1377) Setting 'output.shorten' to "eaw" (East-Asian Width) uses a slower algorithm that also considers characters with a width > 1.
This commit is contained in:
@@ -2531,6 +2531,9 @@ Description
|
|||||||
Controls whether the output strings should be shortened to fit
|
Controls whether the output strings should be shortened to fit
|
||||||
on one console line.
|
on one console line.
|
||||||
|
|
||||||
|
Set this option to ``"eaw"`` to also work with east-asian characters
|
||||||
|
with a display width greater than 1.
|
||||||
|
|
||||||
|
|
||||||
output.skip
|
output.skip
|
||||||
-----------
|
-----------
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
import shutil
|
import shutil
|
||||||
import logging
|
import logging
|
||||||
|
import unicodedata
|
||||||
from . import config, util
|
from . import config, util
|
||||||
|
|
||||||
|
|
||||||
@@ -270,9 +271,14 @@ class PipeOutput(NullOutput):
|
|||||||
class TerminalOutput(NullOutput):
|
class TerminalOutput(NullOutput):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.short = config.get(("output",), "shorten", True)
|
shorten = config.get(("output",), "shorten", True)
|
||||||
if self.short:
|
if shorten:
|
||||||
self.width = shutil.get_terminal_size().columns - OFFSET
|
func = shorten_string_eaw if shorten == "eaw" else shorten_string
|
||||||
|
limit = shutil.get_terminal_size().columns - OFFSET
|
||||||
|
sep = CHAR_ELLIPSIES
|
||||||
|
self.shorten = lambda txt: func(txt, limit, sep)
|
||||||
|
else:
|
||||||
|
self.shorten = util.identity
|
||||||
|
|
||||||
def start(self, path):
|
def start(self, path):
|
||||||
print(self.shorten(" " + path), end="", flush=True)
|
print(self.shorten(" " + path), end="", flush=True)
|
||||||
@@ -283,17 +289,6 @@ class TerminalOutput(NullOutput):
|
|||||||
def success(self, path, tries):
|
def success(self, path, tries):
|
||||||
print("\r", self.shorten(CHAR_SUCCESS + path), sep="")
|
print("\r", self.shorten(CHAR_SUCCESS + path), sep="")
|
||||||
|
|
||||||
def shorten(self, txt):
|
|
||||||
"""Reduce the length of 'txt' to the width of the terminal"""
|
|
||||||
if self.short and len(txt) > self.width:
|
|
||||||
hwidth = self.width // 2 - OFFSET
|
|
||||||
return "".join((
|
|
||||||
txt[:hwidth-1],
|
|
||||||
CHAR_ELLIPSIES,
|
|
||||||
txt[-hwidth-(self.width % 2):]
|
|
||||||
))
|
|
||||||
return txt
|
|
||||||
|
|
||||||
|
|
||||||
class ColorOutput(TerminalOutput):
|
class ColorOutput(TerminalOutput):
|
||||||
|
|
||||||
@@ -307,6 +302,56 @@ class ColorOutput(TerminalOutput):
|
|||||||
print("\r\033[1;32m", self.shorten(path), "\033[0m", sep="")
|
print("\r\033[1;32m", self.shorten(path), "\033[0m", sep="")
|
||||||
|
|
||||||
|
|
||||||
|
class EAWCache(dict):
|
||||||
|
|
||||||
|
def __missing__(self, key):
|
||||||
|
width = self[key] = \
|
||||||
|
2 if unicodedata.east_asian_width(key) in "WF" else 1
|
||||||
|
return width
|
||||||
|
|
||||||
|
|
||||||
|
def shorten_string(txt, limit, sep="…"):
|
||||||
|
"""Limit width of 'txt'; assume all characters have a width of 1"""
|
||||||
|
if len(txt) <= limit:
|
||||||
|
return txt
|
||||||
|
limit -= len(sep)
|
||||||
|
return txt[:limit // 2] + sep + txt[-((limit+1) // 2):]
|
||||||
|
|
||||||
|
|
||||||
|
def shorten_string_eaw(txt, limit, sep="…", cache=EAWCache()):
|
||||||
|
"""Limit width of 'txt'; check for east-asian characters with width > 1"""
|
||||||
|
char_widths = [cache[c] for c in txt]
|
||||||
|
text_width = sum(char_widths)
|
||||||
|
|
||||||
|
if text_width <= limit:
|
||||||
|
# no shortening required
|
||||||
|
return txt
|
||||||
|
|
||||||
|
limit -= len(sep)
|
||||||
|
if text_width == len(txt):
|
||||||
|
# all characters have a width of 1
|
||||||
|
return txt[:limit // 2] + sep + txt[-((limit+1) // 2):]
|
||||||
|
|
||||||
|
# wide characters
|
||||||
|
left = 0
|
||||||
|
lwidth = limit // 2
|
||||||
|
while True:
|
||||||
|
lwidth -= char_widths[left]
|
||||||
|
if lwidth < 0:
|
||||||
|
break
|
||||||
|
left += 1
|
||||||
|
|
||||||
|
right = -1
|
||||||
|
rwidth = (limit+1) // 2 + (lwidth + char_widths[left])
|
||||||
|
while True:
|
||||||
|
rwidth -= char_widths[right]
|
||||||
|
if rwidth < 0:
|
||||||
|
break
|
||||||
|
right -= 1
|
||||||
|
|
||||||
|
return txt[:left] + sep + txt[right+1:]
|
||||||
|
|
||||||
|
|
||||||
if util.WINDOWS:
|
if util.WINDOWS:
|
||||||
ANSI = os.environ.get("TERM") == "ANSI"
|
ANSI = os.environ.get("TERM") == "ANSI"
|
||||||
OFFSET = 1
|
OFFSET = 1
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
|
|
||||||
TESTS_CORE=(cache config cookies downloader extractor job oauth postprocessor text util)
|
TESTS_CORE=(cache config cookies downloader extractor job oauth output postprocessor text util)
|
||||||
TESTS_RESULTS=(results)
|
TESTS_RESULTS=(results)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
156
test/test_output.py
Normal file
156
test/test_output.py
Normal file
@@ -0,0 +1,156 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright 2021 Mike Fährmann
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
from gallery_dl import output # noqa E402
|
||||||
|
|
||||||
|
|
||||||
|
class TestShorten(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_shorten_noop(self, f=output.shorten_string):
|
||||||
|
self.assertEqual(f("" , 10), "")
|
||||||
|
self.assertEqual(f("foobar", 10), "foobar")
|
||||||
|
|
||||||
|
def test_shorten(self, f=output.shorten_string):
|
||||||
|
s = "01234567890123456789" # string of length 20
|
||||||
|
self.assertEqual(f(s, 30), s)
|
||||||
|
self.assertEqual(f(s, 25), s)
|
||||||
|
self.assertEqual(f(s, 20), s)
|
||||||
|
self.assertEqual(f(s, 19), "012345678…123456789")
|
||||||
|
self.assertEqual(f(s, 18), "01234567…123456789")
|
||||||
|
self.assertEqual(f(s, 17), "01234567…23456789")
|
||||||
|
self.assertEqual(f(s, 16), "0123456…23456789")
|
||||||
|
self.assertEqual(f(s, 15), "0123456…3456789")
|
||||||
|
self.assertEqual(f(s, 14), "012345…3456789")
|
||||||
|
self.assertEqual(f(s, 13), "012345…456789")
|
||||||
|
self.assertEqual(f(s, 12), "01234…456789")
|
||||||
|
self.assertEqual(f(s, 11), "01234…56789")
|
||||||
|
self.assertEqual(f(s, 10), "0123…56789")
|
||||||
|
self.assertEqual(f(s, 9) , "0123…6789")
|
||||||
|
self.assertEqual(f(s, 3) , "0…9")
|
||||||
|
self.assertEqual(f(s, 2) , "…9")
|
||||||
|
|
||||||
|
def test_shorten_separator(self, f=output.shorten_string):
|
||||||
|
s = "01234567890123456789" # string of length 20
|
||||||
|
self.assertEqual(f(s, 20, "|---|"), s)
|
||||||
|
self.assertEqual(f(s, 19, "|---|"), "0123456|---|3456789")
|
||||||
|
self.assertEqual(f(s, 15, "|---|"), "01234|---|56789")
|
||||||
|
self.assertEqual(f(s, 10, "|---|"), "01|---|789")
|
||||||
|
|
||||||
|
self.assertEqual(f(s, 19, "..."), "01234567...23456789")
|
||||||
|
self.assertEqual(f(s, 19, "..") , "01234567..123456789")
|
||||||
|
self.assertEqual(f(s, 19, ".") , "012345678.123456789")
|
||||||
|
self.assertEqual(f(s, 19, "") , "0123456780123456789")
|
||||||
|
|
||||||
|
|
||||||
|
class TestShortenEAW(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_shorten_eaw_noop(self, f=output.shorten_string_eaw):
|
||||||
|
self.assertEqual(f("" , 10), "")
|
||||||
|
self.assertEqual(f("foobar", 10), "foobar")
|
||||||
|
|
||||||
|
def test_shorten_eaw(self, f=output.shorten_string_eaw):
|
||||||
|
s = "01234567890123456789" # 20 ascii characters
|
||||||
|
self.assertEqual(f(s, 30), s)
|
||||||
|
self.assertEqual(f(s, 25), s)
|
||||||
|
self.assertEqual(f(s, 20), s)
|
||||||
|
self.assertEqual(f(s, 19), "012345678…123456789")
|
||||||
|
self.assertEqual(f(s, 18), "01234567…123456789")
|
||||||
|
self.assertEqual(f(s, 17), "01234567…23456789")
|
||||||
|
self.assertEqual(f(s, 16), "0123456…23456789")
|
||||||
|
self.assertEqual(f(s, 15), "0123456…3456789")
|
||||||
|
self.assertEqual(f(s, 14), "012345…3456789")
|
||||||
|
self.assertEqual(f(s, 13), "012345…456789")
|
||||||
|
self.assertEqual(f(s, 12), "01234…456789")
|
||||||
|
self.assertEqual(f(s, 11), "01234…56789")
|
||||||
|
self.assertEqual(f(s, 10), "0123…56789")
|
||||||
|
self.assertEqual(f(s, 9) , "0123…6789")
|
||||||
|
self.assertEqual(f(s, 3) , "0…9")
|
||||||
|
self.assertEqual(f(s, 2) , "…9")
|
||||||
|
|
||||||
|
def test_shorten_eaw_wide(self, f=output.shorten_string_eaw):
|
||||||
|
s = "幻想郷幻想郷幻想郷幻想郷" # 12 wide characters
|
||||||
|
self.assertEqual(f(s, 30), s)
|
||||||
|
self.assertEqual(f(s, 25), s)
|
||||||
|
self.assertEqual(f(s, 20), "幻想郷幻…想郷幻想郷")
|
||||||
|
self.assertEqual(f(s, 19), "幻想郷幻…想郷幻想郷")
|
||||||
|
self.assertEqual(f(s, 18), "幻想郷幻…郷幻想郷")
|
||||||
|
self.assertEqual(f(s, 17), "幻想郷幻…郷幻想郷")
|
||||||
|
self.assertEqual(f(s, 16), "幻想郷…郷幻想郷")
|
||||||
|
self.assertEqual(f(s, 15), "幻想郷…郷幻想郷")
|
||||||
|
self.assertEqual(f(s, 14), "幻想郷…幻想郷")
|
||||||
|
self.assertEqual(f(s, 13), "幻想郷…幻想郷")
|
||||||
|
self.assertEqual(f(s, 12), "幻想…幻想郷")
|
||||||
|
self.assertEqual(f(s, 11), "幻想…幻想郷")
|
||||||
|
self.assertEqual(f(s, 10), "幻想…想郷")
|
||||||
|
self.assertEqual(f(s, 9) , "幻想…想郷")
|
||||||
|
self.assertEqual(f(s, 3) , "…郷")
|
||||||
|
|
||||||
|
def test_shorten_eaw_mix(self, f=output.shorten_string_eaw):
|
||||||
|
s = "幻-想-郷##幻-想-郷##幻-想-郷" # mixed characters
|
||||||
|
self.assertEqual(f(s, 28), s)
|
||||||
|
self.assertEqual(f(s, 25), "幻-想-郷##幻…郷##幻-想-郷")
|
||||||
|
|
||||||
|
self.assertEqual(f(s, 20), "幻-想-郷#…##幻-想-郷")
|
||||||
|
self.assertEqual(f(s, 19), "幻-想-郷#…#幻-想-郷")
|
||||||
|
self.assertEqual(f(s, 18), "幻-想-郷…#幻-想-郷")
|
||||||
|
self.assertEqual(f(s, 17), "幻-想-郷…幻-想-郷")
|
||||||
|
self.assertEqual(f(s, 16), "幻-想-…#幻-想-郷")
|
||||||
|
self.assertEqual(f(s, 15), "幻-想-…幻-想-郷")
|
||||||
|
self.assertEqual(f(s, 14), "幻-想-…-想-郷")
|
||||||
|
self.assertEqual(f(s, 13), "幻-想-…-想-郷")
|
||||||
|
self.assertEqual(f(s, 12), "幻-想…-想-郷")
|
||||||
|
self.assertEqual(f(s, 11), "幻-想…想-郷")
|
||||||
|
self.assertEqual(f(s, 10), "幻-…-想-郷")
|
||||||
|
self.assertEqual(f(s, 9) , "幻-…想-郷")
|
||||||
|
self.assertEqual(f(s, 3) , "…郷")
|
||||||
|
|
||||||
|
def test_shorten_eaw_separator(self, f=output.shorten_string_eaw):
|
||||||
|
s = "01234567890123456789" # 20 ascii characters
|
||||||
|
self.assertEqual(f(s, 20, "|---|"), s)
|
||||||
|
self.assertEqual(f(s, 19, "|---|"), "0123456|---|3456789")
|
||||||
|
self.assertEqual(f(s, 15, "|---|"), "01234|---|56789")
|
||||||
|
self.assertEqual(f(s, 10, "|---|"), "01|---|789")
|
||||||
|
|
||||||
|
self.assertEqual(f(s, 19, "..."), "01234567...23456789")
|
||||||
|
self.assertEqual(f(s, 19, "..") , "01234567..123456789")
|
||||||
|
self.assertEqual(f(s, 19, ".") , "012345678.123456789")
|
||||||
|
self.assertEqual(f(s, 19, "") , "0123456780123456789")
|
||||||
|
|
||||||
|
def test_shorten_eaw_separator_wide(self, f=output.shorten_string_eaw):
|
||||||
|
s = "幻想郷幻想郷幻想郷幻想郷" # 12 wide characters
|
||||||
|
self.assertEqual(f(s, 24, "|---|"), s)
|
||||||
|
self.assertEqual(f(s, 19, "|---|"), "幻想郷|---|郷幻想郷")
|
||||||
|
self.assertEqual(f(s, 15, "|---|"), "幻想|---|幻想郷")
|
||||||
|
self.assertEqual(f(s, 10, "|---|"), "幻|---|郷")
|
||||||
|
|
||||||
|
self.assertEqual(f(s, 19, "..."), "幻想郷幻...郷幻想郷")
|
||||||
|
self.assertEqual(f(s, 19, "..") , "幻想郷幻..郷幻想郷")
|
||||||
|
self.assertEqual(f(s, 19, ".") , "幻想郷幻.想郷幻想郷")
|
||||||
|
self.assertEqual(f(s, 19, "") , "幻想郷幻想郷幻想郷")
|
||||||
|
|
||||||
|
def test_shorten_eaw_separator_mix_(self, f=output.shorten_string_eaw):
|
||||||
|
s = "幻-想-郷##幻-想-郷##幻-想-郷" # mixed characters
|
||||||
|
self.assertEqual(f(s, 30, "|---|"), s)
|
||||||
|
self.assertEqual(f(s, 19, "|---|"), "幻-想-|---|幻-想-郷")
|
||||||
|
self.assertEqual(f(s, 15, "|---|"), "幻-想|---|想-郷")
|
||||||
|
self.assertEqual(f(s, 10, "|---|"), "幻|---|-郷")
|
||||||
|
|
||||||
|
self.assertEqual(f(s, 19, "..."), "幻-想-郷...幻-想-郷")
|
||||||
|
self.assertEqual(f(s, 19, "..") , "幻-想-郷..#幻-想-郷")
|
||||||
|
self.assertEqual(f(s, 19, ".") , "幻-想-郷#.#幻-想-郷")
|
||||||
|
self.assertEqual(f(s, 19, "") , "幻-想-郷###幻-想-郷")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
||||||
Reference in New Issue
Block a user