Skip to content

Commit e6311d8

Browse files
authored
CBG-4322 use unique output files (#7352)
1 parent 590383e commit e6311d8

File tree

5 files changed

+85
-13
lines changed

5 files changed

+85
-13
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
extend-include = ["tools/sgcollect_info"]
33

44
[tool.ruff.lint]
5-
extend-select = ["I"]
5+
extend-select = ["I","B006"] # isort, mutable default argument

tools-tests/sgcollect_info_test.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,3 +67,54 @@ def test_make_collect_logs_heap_profile(tmpdir):
6767
assert [tasks[0].log_file] == [pprof_file.basename]
6868
# ensure that this is not redacted task
6969
assert tasks[0].description.startswith("Contents of")
70+
71+
72+
@pytest.mark.parametrize("should_redact", [True, False])
73+
def test_make_collect_logs_tasks_duplicate_files(should_redact, tmp_path):
74+
tmpdir1 = tmp_path / "tmpdir1"
75+
tmpdir2 = tmp_path / "tmpdir2"
76+
config = """
77+
{{"logfilepath": "{tmpdir1}",
78+
"logging": {{ "log_file_path": "{tmpdir2}" }}
79+
}}
80+
"""
81+
for d in [tmpdir1, tmpdir2]:
82+
d.mkdir()
83+
(d / "sg_info.log").write_text("foo")
84+
(d / "sg_info-01.log.gz").write_text("foo")
85+
86+
with unittest.mock.patch(
87+
"sgcollect_info.urlopen_with_basic_auth",
88+
return_value=io.BytesIO(
89+
config.format(
90+
tmpdir1=str(tmpdir1).replace("\\", "\\\\"),
91+
tmpdir2=str(tmpdir2).replace("\\", "\\\\"),
92+
).encode("utf-8")
93+
),
94+
):
95+
tasks = sgcollect_info.make_collect_logs_tasks(
96+
sg_url="fakeurl",
97+
sg_config_file_path="",
98+
sg_username="",
99+
sg_password="",
100+
)
101+
# assert all tasks have unique log_file names
102+
assert len(set(t.log_file for t in tasks)) == len(tasks)
103+
assert set(t.log_file for t in tasks) == {
104+
"sg_info.log",
105+
"sg_info.log.1",
106+
"sg_info-01.log.gz",
107+
"sg_info-01.log.gz.1",
108+
}
109+
110+
111+
@pytest.mark.parametrize(
112+
"basenames, filename, expected",
113+
[
114+
({}, "foo", "foo"),
115+
({"foo"}, "foo", "foo.1"),
116+
({"foo", "foo.1"}, "foo", "foo.2"),
117+
],
118+
)
119+
def test_get_unique_filename(basenames, filename, expected):
120+
assert sgcollect_info.get_unique_filename(basenames, filename) == expected

tools-tests/tasks_test.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,23 +47,25 @@
4747

4848

4949
@pytest.mark.parametrize("tag_userdata", [True, False])
50-
def test_add_file_task(tmpdir, tag_userdata):
50+
def test_add_file_task(tmp_path, tag_userdata):
5151
if tag_userdata:
5252
expected = REDACTED_OUTPUT
5353
else:
5454
expected = REDACTED_OUTPUT.replace("<ud>foo</ud>", "foo")
5555

5656
filename = "config.json"
57-
config_file = tmpdir.join(filename)
58-
config_file.write(INPUT_CONFIG)
57+
config_file = tmp_path / filename
58+
config_file.write_text(INPUT_CONFIG)
5959
postprocessors = [password_remover.remove_passwords]
6060
if tag_userdata:
6161
postprocessors.append(password_remover.tag_userdata_in_server_config)
6262
task = tasks.add_file_task(
63-
config_file.strpath,
63+
sourcefile_path=config_file,
64+
output_path=config_file.name,
6465
content_postprocessors=postprocessors,
6566
)
66-
output_dir = tmpdir.mkdir("output")
67+
output_dir = tmp_path / "output"
68+
output_dir.mkdir()
6769
runner = tasks.TaskRunner(
6870
verbosity=VERBOSE,
6971
default_name="sg.log",

tools/sgcollect_info

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,18 @@ def urlopen_with_basic_auth(url, username, password):
322322
return urllib.request.urlopen(url)
323323

324324

325+
def get_unique_filename(filenames: set[str], original_filename: str) -> str:
326+
"""
327+
Given a set of filenames, return a unique filename that is not in the set.
328+
"""
329+
i = 1
330+
filename = original_filename
331+
while filename in filenames:
332+
filename = f"{original_filename}.{i}"
333+
i += 1
334+
return filename
335+
336+
325337
def make_collect_logs_tasks(
326338
sg_url: str,
327339
sg_config_file_path: Optional[str],
@@ -391,6 +403,7 @@ def make_collect_logs_tasks(
391403

392404
# Keep a dictionary of log file paths we've added, to avoid adding duplicates
393405
sg_log_file_paths: set[pathlib.Path] = set()
406+
output_basenames: set[str] = set()
394407

395408
sg_tasks: List[PythonTask] = []
396409

@@ -399,8 +412,10 @@ def make_collect_logs_tasks(
399412
canonical_filename.resolve()
400413
if canonical_filename in sg_log_file_paths:
401414
return
415+
output_basename = get_unique_filename(output_basenames, canonical_filename.name)
402416
sg_log_file_paths.add(canonical_filename)
403-
task = add_file_task(sourcefile_path=filename)
417+
output_basenames.add(output_basename)
418+
task = add_file_task(sourcefile_path=filename, output_path=output_basename)
404419
task.no_header = True
405420
sg_tasks.append(task)
406421

@@ -538,6 +553,7 @@ def make_config_tasks(
538553
for sg_config_file in sg_config_files:
539554
task = add_file_task(
540555
sourcefile_path=sg_config_file,
556+
output_path=os.path.basename(sg_config_file),
541557
content_postprocessors=server_config_postprocessors,
542558
)
543559
collect_config_tasks.append(task)

tools/tasks.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,7 @@ def make_curl_task(
461461
url,
462462
user="",
463463
password="",
464-
content_postprocessors=[],
464+
content_postprocessors: Optional[List[Callable]] = None,
465465
timeout=60,
466466
log_file="python_curl.log",
467467
**kwargs,
@@ -496,8 +496,9 @@ def python_curl_task():
496496
print("WARNING: Error connecting to url {0}: {1}".format(url, e))
497497

498498
response_string = response_file_handle.read()
499-
for content_postprocessor in content_postprocessors:
500-
response_string = content_postprocessor(response_string)
499+
if content_postprocessors:
500+
for content_postprocessor in content_postprocessors:
501+
response_string = content_postprocessor(response_string)
501502
return response_string
502503

503504
return PythonTask(
@@ -506,8 +507,10 @@ def python_curl_task():
506507

507508

508509
def add_file_task(
509-
sourcefile_path, content_postprocessors: Optional[List[Callable]] = None
510-
) -> PythonTask:
510+
sourcefile_path: Union[pathlib.Path, str],
511+
output_path: Union[pathlib.Path, str],
512+
content_postprocessors: Optional[List[Callable]] = None,
513+
):
511514
"""
512515
Adds the contents of a file to the output zip
513516
@@ -525,7 +528,7 @@ def python_add_file_task():
525528
task = PythonTask(
526529
description="Contents of {0}".format(sourcefile_path),
527530
callable=python_add_file_task,
528-
log_file=os.path.basename(sourcefile_path),
531+
log_file=output_path,
529532
log_exception=False,
530533
)
531534

0 commit comments

Comments
 (0)