Skip to content

Commit

Permalink
Shorten proposed file name on create if too long
Browse files Browse the repository at this point in the history
  • Loading branch information
dirkf committed Sep 19, 2021
1 parent a803582 commit aaa81b7
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 8 deletions.
4 changes: 4 additions & 0 deletions test/test_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
compat_shlex_split,
compat_str,
compat_struct_unpack,
compat_textwrap_shorten,
compat_urllib_parse_unquote,
compat_urllib_parse_unquote_plus,
compat_urllib_parse_urlencode,
Expand Down Expand Up @@ -121,6 +122,9 @@ def test_compat_etree_fromstring_doctype(self):
def test_struct_unpack(self):
self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))

def test_compat_textwrap_shorten(self):
self.assertEqual(compat_textwrap_shorten('Hello world!', width=11), 'Hello [...]')


if __name__ == '__main__':
unittest.main()
16 changes: 16 additions & 0 deletions test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
pkcs1pad,
read_batch_urls,
sanitize_filename,
sanitize_open,
sanitize_path,
sanitize_url,
expand_path,
Expand Down Expand Up @@ -231,6 +232,21 @@ def test_sanitize_path(self):
self.assertEqual(sanitize_path('./abc'), 'abc')
self.assertEqual(sanitize_path('./../abc'), '..\\abc')

def test_sanitize_open(self):
long_name = " I'm a lumberjack ".join(['I sleep all night and I work all day %d' % n for n in range(50)])
if sys.platform == 'win32':
result = sanitize_open('.\\' + long_name + '.test', open_mode='w')
result[0].close()
self.assertEqual(
result[1],
"I sleep all night and I work all day 0 I'm a lumberjack I sleep all night and I work all day 1 I'm a lumberjack I sleep all night and I work all day 2 I'm a lumberjack[...].test")
else:
result = sanitize_open('./' + long_name + '.test', open_mode='w')
result[0].close()
self.assertEqual(
result[1],
"./I sleep all night and I work all day 0 I'm a lumberjack I sleep all night and I work all day 1 I'm a lumberjack I sleep all night and I work all day 2 I'm a lumberjack[...].test")

def test_sanitize_url(self):
self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar')
self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
Expand Down
24 changes: 24 additions & 0 deletions youtube_dl/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2997,6 +2997,29 @@ def resf(tpl, *args, **kwargs):
def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
return ctypes.WINFUNCTYPE(*args, **kwargs)

# Compat version of textwrap.shorten(), not in Py2 textwrap
# Extractors can use this to précis a long metadata field, eg
# to make a title from a description
try:
from textwrap import shorten as compat_textwrap_shorten
except ImportError: # Python 2
def compat_textwrap_shorten(
text, width, fix_sentence_endings=False, break_long_words=True,
break_on_hyphens=True, placeholder=' [...]'):
import textwrap
try_text = textwrap.wrap(
text, width,
fix_sentence_endings=fix_sentence_endings,
break_long_words=break_long_words,
break_on_hyphens=break_on_hyphens)
if len(try_text) == 1:
return try_text[0]
return textwrap.wrap(
text, width - len(placeholder),
fix_sentence_endings=fix_sentence_endings,
break_long_words=break_long_words,
break_on_hyphens=break_on_hyphens)[0] + placeholder


__all__ = [
'compat_HTMLParseError',
Expand Down Expand Up @@ -3040,6 +3063,7 @@ def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
'compat_struct_pack',
'compat_struct_unpack',
'compat_subprocess_get_DEVNULL',
'compat_textwrap_shorten',
'compat_tokenize_tokenize',
'compat_urllib_error',
'compat_urllib_parse',
Expand Down
67 changes: 59 additions & 8 deletions youtube_dl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
compat_str,
compat_struct_pack,
compat_struct_unpack,
compat_textwrap_shorten,
compat_urllib_error,
compat_urllib_parse,
compat_urllib_parse_urlencode,
Expand Down Expand Up @@ -2036,6 +2037,28 @@ def clean_html(html):
return html.strip()


def reduce_filename(path, reduction=0.5, min_length=20, ellipsis='[...]'):
"""Try to reduce the filename by a specified reduction factor
Arguments:
path -- the path name to reduce
reduction -- factor by which to reduce its filename component
ellipsis -- placeholder for removed text
Returns path name with reduced filename, or None
"""

fname = os.path.split(path)
fname = list(fname[:1] + os.path.splitext(fname[1]))
fname[1] = remove_end(fname[1], ellipsis)
flen = len(fname[1])
if flen < min_length:
# give up
return None
fname[1] = compat_textwrap_shorten(fname[1], int(1 + reduction * flen), placeholder=ellipsis)
return os.path.join(fname[0], ''.join(fname[1:]))


def sanitize_open(filename, open_mode):
"""Try to open the given filename, and slightly tweak it if this fails.
Expand All @@ -2046,26 +2069,54 @@ def sanitize_open(filename, open_mode):
It returns the tuple (stream, definitive_file_name).
"""
def openfile(filename, open_mode):
stream = open(encodeFilename(filename), open_mode)
return (stream, filename)

try:
if filename == '-':
if sys.platform == 'win32':
import msvcrt
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
stream = open(encodeFilename(filename), open_mode)
return (stream, filename)
return openfile(filename, open_mode)
except (IOError, OSError) as err:
if err.errno in (errno.EACCES,):
raise

# In case of error, try to remove win32 forbidden chars
alt_filename = sanitize_path(filename)
if alt_filename == filename:
if 'w' not in open_mode or '+' in open_mode:
# only mung filename when creating the file
raise

org_err = err

# In case of error, try to remove win32 forbidden chars
if err.errno in (errno.EINVAL, ):
alt_filename = sanitize_path(filename)
if alt_filename != filename:
try:
return openfile(alt_filename, open_mode)
except (IOError, OSError) as new_err:
err = new_err
else:
# An exception here should be caught in the caller
stream = open(encodeFilename(alt_filename), open_mode)
return (stream, alt_filename)
alt_filename = filename

# Windows: an over-long file name can be detected by the CreateFile()
# API, and then get EINVAL, or by the filesystem, and then perhaps
# ENAMETOOLONG
# POSIX: ENAMETOOLONG in general
while err.errno in (errno.ENAMETOOLONG, errno.EINVAL, ):
alt_filename = reduce_filename(alt_filename)
if not alt_filename:
break
try:
return openfile(alt_filename, open_mode)
except (IOError, OSError) as new_err:
err = new_err

# Reduction didn't help; give up and report what initially went wrong
# This exception should be caught in the caller
raise org_err


def timeconvert(timestr):
Expand Down

0 comments on commit aaa81b7

Please sign in to comment.