Skip to content

Commit ad0dfae

Browse files
authored
Separate FILE_ENCODING from TERMINAL_ENCODING (#424)
Distinguish between terminal encoding (now TERMINAL_ENCODING, renamed from DEFAULT_ENCODING) and file encoding (FILE_ENCODING). Before, gitlint always decoded files using the terminal encoding (DEFAULT_ENCODING) which can be problematic in cases where the terminal encoding is non UTF-8. File encoding is hard set to UTF-8.
1 parent f1093bc commit ad0dfae

File tree

10 files changed

+45
-34
lines changed

10 files changed

+45
-34
lines changed

gitlint-core/gitlint/cli.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def log_system_info():
6868
LOG.debug("Git version: %s", git_version())
6969
LOG.debug("Gitlint version: %s", gitlint.__version__)
7070
LOG.debug("GITLINT_USE_SH_LIB: %s", os.environ.get("GITLINT_USE_SH_LIB", "[NOT SET]"))
71-
LOG.debug("DEFAULT_ENCODING: %s", gitlint.utils.DEFAULT_ENCODING)
71+
LOG.debug("DEFAULT_ENCODING: %s", gitlint.utils.TERMINAL_ENCODING)
7272

7373

7474
def build_config(
@@ -264,7 +264,7 @@ def __init__(self, config, config_builder, commit_hash, refspec, msg_filename, g
264264
@click.option("--ignore", envvar="GITLINT_IGNORE", default="", help="Ignore rules (comma-separated by id or name).")
265265
@click.option("--contrib", envvar="GITLINT_CONTRIB", default="",
266266
help="Contrib rules to enable (comma-separated by id or name).")
267-
@click.option("--msg-filename", type=click.File(encoding=gitlint.utils.DEFAULT_ENCODING),
267+
@click.option("--msg-filename", type=click.File(encoding=gitlint.utils.FILE_ENCODING),
268268
help="Path to a file containing a commit-msg.")
269269
@click.option("--ignore-stdin", envvar="GITLINT_IGNORE_STDIN", is_flag=True,
270270
help="Ignore any stdin data. Useful for running in CI server.")

gitlint-core/gitlint/config.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
)
1414
from gitlint.contrib import rules as contrib_rules
1515
from gitlint.exception import GitlintError
16-
from gitlint.utils import DEFAULT_ENCODING
16+
from gitlint.utils import FILE_ENCODING
1717

1818

1919
def handle_option_error(func):
@@ -468,7 +468,7 @@ def set_from_config_file(self, filename):
468468
try:
469469
parser = ConfigParser()
470470

471-
with open(filename, encoding=DEFAULT_ENCODING) as config_file:
471+
with open(filename, encoding=FILE_ENCODING) as config_file:
472472
parser.read_file(config_file, filename)
473473

474474
for section_name in parser.sections():

gitlint-core/gitlint/hooks.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from gitlint.exception import GitlintError
66
from gitlint.git import git_hooks_dir
7-
from gitlint.utils import DEFAULT_ENCODING
7+
from gitlint.utils import FILE_ENCODING
88

99
COMMIT_MSG_HOOK_SRC_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "files", "commit-msg")
1010
COMMIT_MSG_HOOK_DST_PATH = "commit-msg"
@@ -52,7 +52,7 @@ def uninstall_commit_msg_hook(lint_config):
5252
if not os.path.exists(dest_path):
5353
raise GitHookInstallerError(f"There is no commit-msg hook present in {dest_path}.")
5454

55-
with open(dest_path, encoding=DEFAULT_ENCODING) as fp:
55+
with open(dest_path, encoding=FILE_ENCODING) as fp:
5656
lines = fp.readlines()
5757
if len(lines) < 2 or lines[1] != GITLINT_HOOK_IDENTIFIER:
5858
msg = (

gitlint-core/gitlint/shell.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import subprocess
88

9-
from gitlint.utils import DEFAULT_ENCODING, USE_SH_LIB
9+
from gitlint.utils import TERMINAL_ENCODING, USE_SH_LIB
1010

1111

1212
def shell(cmd):
@@ -64,7 +64,7 @@ def _exec(*args, **kwargs):
6464
raise CommandNotFound from e
6565

6666
exit_code = p.returncode
67-
stdout = result[0].decode(DEFAULT_ENCODING)
67+
stdout = result[0].decode(TERMINAL_ENCODING)
6868
stderr = result[1] # 'sh' does not decode the stderr bytes to unicode
6969
full_cmd = "" if args is None else " ".join(args)
7070

gitlint-core/gitlint/tests/base.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from gitlint.deprecation import LOG as DEPRECATION_LOG
1414
from gitlint.deprecation import Deprecation
1515
from gitlint.git import GitChangedFileStats, GitContext
16-
from gitlint.utils import DEFAULT_ENCODING, LOG_FORMAT
16+
from gitlint.utils import FILE_ENCODING, LOG_FORMAT
1717

1818
EXPECTED_REGEX_STYLE_SEARCH_DEPRECATION_WARNING = (
1919
"WARNING: gitlint.deprecated.regex_style_search {0} - {1}: gitlint will be switching from using "
@@ -95,7 +95,7 @@ def get_sample_path(filename=""):
9595
def get_sample(filename=""):
9696
"""Read and return the contents of a file in gitlint/tests/samples"""
9797
sample_path = BaseTestCase.get_sample_path(filename)
98-
return Path(sample_path).read_text(encoding=DEFAULT_ENCODING)
98+
return Path(sample_path).read_text(encoding=FILE_ENCODING)
9999

100100
@staticmethod
101101
def patch_input(side_effect):
@@ -109,7 +109,7 @@ def get_expected(filename="", variable_dict=None):
109109
"""Utility method to read an expected file from gitlint/tests/expected and return it as a string.
110110
Optionally replace template variables specified by variable_dict."""
111111
expected_path = os.path.join(BaseTestCase.EXPECTED_DIR, filename)
112-
expected = Path(expected_path).read_text(encoding=DEFAULT_ENCODING)
112+
expected = Path(expected_path).read_text(encoding=FILE_ENCODING)
113113

114114
if variable_dict:
115115
expected = expected.format(**variable_dict)

gitlint-core/gitlint/tests/cli/test_cli.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from gitlint import __version__, cli
1010
from gitlint.shell import CommandNotFound
1111
from gitlint.tests.base import BaseTestCase
12-
from gitlint.utils import DEFAULT_ENCODING
12+
from gitlint.utils import FILE_ENCODING, TERMINAL_ENCODING
1313

1414

1515
class CLITests(BaseTestCase):
@@ -39,7 +39,7 @@ def get_system_info_dict():
3939
"gitlint_version": __version__,
4040
"GITLINT_USE_SH_LIB": BaseTestCase.GITLINT_USE_SH_LIB,
4141
"target": os.path.realpath(os.getcwd()),
42-
"DEFAULT_ENCODING": DEFAULT_ENCODING,
42+
"DEFAULT_ENCODING": TERMINAL_ENCODING,
4343
}
4444

4545
def test_version(self):
@@ -315,7 +315,7 @@ def test_lint_staged_msg_filename(self, sh, _):
315315

316316
with self.tempdir() as tmpdir:
317317
msg_filename = os.path.join(tmpdir, "msg")
318-
with open(msg_filename, "w", encoding=DEFAULT_ENCODING) as f:
318+
with open(msg_filename, "w", encoding=FILE_ENCODING) as f:
319319
f.write("WIP: msg-filename tïtle\n")
320320

321321
with patch("gitlint.display.stderr", new=StringIO()) as stderr:
@@ -370,7 +370,7 @@ def test_msg_filename(self, _):
370370

371371
with self.tempdir() as tmpdir:
372372
msg_filename = os.path.join(tmpdir, "msg")
373-
with open(msg_filename, "w", encoding=DEFAULT_ENCODING) as f:
373+
with open(msg_filename, "w", encoding=FILE_ENCODING) as f:
374374
f.write("Commït title\n")
375375

376376
with patch("gitlint.display.stderr", new=StringIO()) as stderr:

gitlint-core/gitlint/tests/cli/test_cli_hooks.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from gitlint import cli, config, hooks
77
from gitlint.shell import ErrorReturnCode
88
from gitlint.tests.base import BaseTestCase
9-
from gitlint.utils import DEFAULT_ENCODING
9+
from gitlint.utils import FILE_ENCODING
1010

1111

1212
class CLIHookTests(BaseTestCase):
@@ -102,7 +102,7 @@ def test_run_hook_no_tty(self):
102102

103103
with self.tempdir() as tmpdir:
104104
msg_filename = os.path.join(tmpdir, "hür")
105-
with open(msg_filename, "w", encoding=DEFAULT_ENCODING) as f:
105+
with open(msg_filename, "w", encoding=FILE_ENCODING) as f:
106106
f.write("WIP: tïtle\n")
107107

108108
with patch("gitlint.display.stderr", new=StringIO()) as stderr:
@@ -130,7 +130,7 @@ def test_run_hook_edit(self, shell):
130130

131131
with self.patch_input(["e", "e", "n"]), self.tempdir() as tmpdir:
132132
msg_filename = os.path.realpath(os.path.join(tmpdir, "hür"))
133-
with open(msg_filename, "w", encoding=DEFAULT_ENCODING) as f:
133+
with open(msg_filename, "w", encoding=FILE_ENCODING) as f:
134134
f.write(commit_messages[i] + "\n")
135135

136136
with patch("gitlint.display.stderr", new=StringIO()) as stderr:
@@ -158,7 +158,7 @@ def test_run_hook_no(self):
158158

159159
with self.patch_input(["n"]), self.tempdir() as tmpdir:
160160
msg_filename = os.path.join(tmpdir, "hür")
161-
with open(msg_filename, "w", encoding=DEFAULT_ENCODING) as f:
161+
with open(msg_filename, "w", encoding=FILE_ENCODING) as f:
162162
f.write("WIP: höok no\n")
163163

164164
with patch("gitlint.display.stderr", new=StringIO()) as stderr:
@@ -175,7 +175,7 @@ def test_run_hook_yes(self):
175175
"""Test for run-hook subcommand, answering 'y(es)' after commit-hook"""
176176
with self.patch_input(["y"]), self.tempdir() as tmpdir:
177177
msg_filename = os.path.join(tmpdir, "hür")
178-
with open(msg_filename, "w", encoding=DEFAULT_ENCODING) as f:
178+
with open(msg_filename, "w", encoding=FILE_ENCODING) as f:
179179
f.write("WIP: höok yes\n")
180180

181181
with patch("gitlint.display.stderr", new=StringIO()) as stderr:

gitlint-core/gitlint/tests/test_utils.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def test_use_sh_library(self, patched_env):
2727
self.assertEqual(utils.use_sh_library(), False)
2828

2929
@patch("gitlint.utils.locale")
30-
def test_default_encoding_non_windows(self, mocked_locale):
30+
def test_terminal_encoding_non_windows(self, mocked_locale):
3131
utils.PLATFORM_IS_WINDOWS = False
3232
mocked_locale.getpreferredencoding.return_value = "foöbar"
3333
self.assertEqual(utils.getpreferredencoding(), "foöbar")
@@ -37,7 +37,7 @@ def test_default_encoding_non_windows(self, mocked_locale):
3737
self.assertEqual(utils.getpreferredencoding(), "UTF-8")
3838

3939
@patch("os.environ")
40-
def test_default_encoding_windows(self, patched_env):
40+
def test_terminal_encoding_windows(self, patched_env):
4141
utils.PLATFORM_IS_WINDOWS = True
4242
# Mock out os.environ
4343
mock_env = {}

gitlint-core/gitlint/utils.py

+19-8
Original file line numberDiff line numberDiff line change
@@ -38,39 +38,50 @@ def use_sh_library():
3838
USE_SH_LIB = use_sh_library()
3939

4040
########################################################################################################################
41-
# DEFAULT_ENCODING
41+
# TERMINAL_ENCODING
42+
# Encoding used for terminal encoding/decoding.
4243

4344

4445
def getpreferredencoding():
4546
"""Modified version of local.getpreferredencoding() that takes into account LC_ALL, LC_CTYPE, LANG env vars
4647
on windows and falls back to UTF-8."""
4748
fallback_encoding = "UTF-8"
48-
default_encoding = locale.getpreferredencoding() or fallback_encoding
49+
preferred_encoding = locale.getpreferredencoding() or fallback_encoding
4950

5051
# On Windows, we mimic git/linux by trying to read the LC_ALL, LC_CTYPE, LANG env vars manually
5152
# (on Linux/MacOS the `getpreferredencoding()` call will take care of this).
5253
# We fallback to UTF-8
5354
if PLATFORM_IS_WINDOWS:
54-
default_encoding = fallback_encoding
55+
preferred_encoding = fallback_encoding
5556
for env_var in ["LC_ALL", "LC_CTYPE", "LANG"]:
5657
encoding = os.environ.get(env_var, False)
5758
if encoding:
5859
# Support dotted (C.UTF-8) and non-dotted (C or UTF-8) charsets:
5960
# If encoding contains a dot: split and use second part, otherwise use everything
6061
dot_index = encoding.find(".")
61-
default_encoding = encoding[dot_index + 1 :] if dot_index != -1 else encoding
62+
preferred_encoding = encoding[dot_index + 1 :] if dot_index != -1 else encoding
6263
break
6364

6465
# We've determined what encoding the user *wants*, let's now check if it's actually a valid encoding on the
6566
# system. If not, fallback to UTF-8.
6667
# This scenario is fairly common on Windows where git sets LC_CTYPE=C when invoking the commit-msg hook, which
6768
# is not a valid encoding in Python on Windows.
6869
try:
69-
codecs.lookup(default_encoding)
70+
codecs.lookup(preferred_encoding)
7071
except LookupError:
71-
default_encoding = fallback_encoding
72+
preferred_encoding = fallback_encoding
7273

73-
return default_encoding
74+
return preferred_encoding
7475

7576

76-
DEFAULT_ENCODING = getpreferredencoding()
77+
TERMINAL_ENCODING = getpreferredencoding()
78+
79+
########################################################################################################################
80+
# FILE_ENCODING
81+
# Gitlint assumes UTF-8 encoding for all file operations:
82+
# - reading/writing its own hook and config files
83+
# - reading/writing git commit messages
84+
# Git does have i18n.commitEncoding and i18n.logOutputEncoding options which we might want to take into account,
85+
# but that's not supported today.
86+
87+
FILE_ENCODING = "UTF-8"

qa/test_gitlint.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from qa.base import BaseTestCase
44
from qa.shell import echo, git, gitlint
5-
from qa.utils import DEFAULT_ENCODING
5+
from qa.utils import FILE_ENCODING
66

77

88
class IntegrationTests(BaseTestCase):
@@ -58,7 +58,7 @@ def test_fixup_commit(self):
5858
self.assertEqualStdout(output, expected)
5959

6060
# Make a small modification to the commit and commit it using fixup commit
61-
with open(os.path.join(self.tmp_git_repo, test_filename), "a", encoding=DEFAULT_ENCODING) as fh:
61+
with open(os.path.join(self.tmp_git_repo, test_filename), "a", encoding=FILE_ENCODING) as fh:
6262
fh.write("Appending söme stuff\n")
6363

6464
git("add", test_filename, _cwd=self.tmp_git_repo)
@@ -87,7 +87,7 @@ def test_fixup_amend_commit(self):
8787
self.assertEqualStdout(output, expected)
8888

8989
# Make a small modification to the commit and commit it using fixup=amend commit
90-
with open(os.path.join(self.tmp_git_repo, test_filename), "a", encoding=DEFAULT_ENCODING) as fh:
90+
with open(os.path.join(self.tmp_git_repo, test_filename), "a", encoding=FILE_ENCODING) as fh:
9191
fh.write("Appending söme stuff\n")
9292

9393
git("add", test_filename, _cwd=self.tmp_git_repo)
@@ -133,7 +133,7 @@ def test_squash_commit(self):
133133
self.assertEqualStdout(output, expected)
134134

135135
# Make a small modification to the commit and commit it using squash commit
136-
with open(os.path.join(self.tmp_git_repo, test_filename), "a", encoding=DEFAULT_ENCODING) as fh:
136+
with open(os.path.join(self.tmp_git_repo, test_filename), "a", encoding=FILE_ENCODING) as fh:
137137
# Wanted to write a unicode string, but that's obnoxious if you want to do it across Python 2 and 3.
138138
# https://stackoverflow.com/questions/22392377/
139139
# error-writing-a-file-with-file-write-in-python-unicodeencodeerror

0 commit comments

Comments
 (0)