Skip to content

Commit aceb8df

Browse files
committed
Implement format_number utility and update various summaries to use it to separate 1000s with a thin space
update changelog fix failing tests Update format_number utility to accept a customizable thousands and use a comma for cli change line formating because circlclitest fails another fix for cirleci failing tests update changelog
1 parent fc3255f commit aceb8df

File tree

6 files changed

+87
-51
lines changed

6 files changed

+87
-51
lines changed

python/CHANGELOG.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,15 @@
1212
associated with each individual as a numpy array.
1313
(:user:`benjeffery`, :pr:`3153`)
1414

15+
- Use a thin space as the thousands separator in HTML output,
16+
and a comma in CLI output.
17+
(:user:`hossam26644`, :pr:`3167`, :issue:`2951`)
1518

1619
**Fixes**
1720

1821
- Correct assertion message when tables are compared with metadata ignored.
1922
(:user:`benjeffery`, :pr:`3162`, :issue:`3161`)
20-
23+
2124
--------------------
2225
[0.6.3] - 2025-04-28
2326
--------------------

python/tests/test_highlevel.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3764,19 +3764,19 @@ def test_str(self, ts_fixture):
37643764
╔═+╗\s*
37653765
║Tree.*?║\s*
37663766
╠═+╤═+╣\s*
3767-
║Index.*?│\s*\d+║\s*
3767+
║Index.*?│\s*[\d\u2009,]+║\s*
37683768
╟─+┼─+╢\s*
3769-
║Interval.*?│\s*\d+-\d+\s*\(\d+\)║\s*
3769+
║Interval.*?│\s*[\d\u2009,]+-[\d\u2009,]+\s*\([\d\u2009,]+\)║\s*
37703770
╟─+┼─+╢\s*
3771-
║Roots.*?│\s*\d+║\s*
3771+
║Roots.*?│\s*[\d\u2009,]+║\s*
37723772
╟─+┼─+╢\s*
3773-
║Nodes.*?│\s*\d+║\s*
3773+
║Nodes.*?│\s*[\d\u2009,]+║\s*
37743774
╟─+┼─+╢\s*
3775-
║Sites.*?│\s*\d+║\s*
3775+
║Sites.*?│\s*[\d\u2009,]+║\s*
37763776
╟─+┼─+╢\s*
3777-
║Mutations.*?│\s*\d+║\s*
3777+
║Mutations.*?│\s*[\d\u2009,]+║\s*
37783778
╟─+┼─+╢\s*
3779-
║Total\s*Branch\s*Length.*?│\s*[\d,]+\.\d+║\s*
3779+
║Total\s*Branch\s*Length.*?│\s*[\d\u2009,]+\.\d+║\s*
37803780
╚═+╧═+╝\s*
37813781
""",
37823782
re.VERBOSE | re.DOTALL,

python/tests/test_util.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,19 @@ def test_naturalsize(value, expected):
371371
assert util.naturalsize(-value) == expected
372372

373373

374+
def test_format_number():
375+
assert util.format_number(0) == "0"
376+
assert util.format_number("1.23") == "1.23"
377+
assert util.format_number(3216546.34) == "3 216 546.3"
378+
assert util.format_number(3216546.34, 9) == "3 216 546.34"
379+
assert util.format_number(-3456.23) == "-3 456.23"
380+
assert util.format_number(-3456.23, sep=",") == "-3,456.23"
381+
382+
with pytest.raises(TypeError) as e_info:
383+
util.format_number("bad")
384+
assert str(e_info.value) == "The string cannot be converted to a number"
385+
386+
374387
@pytest.mark.parametrize(
375388
"obj, expected",
376389
[

python/tskit/genotypes.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -334,22 +334,25 @@ def __str__(self) -> str:
334334
Return a plain text summary of the contents of a variant.
335335
"""
336336
try:
337-
site_id = self.site.id
338-
site_position = self.site.position
337+
site_id = util.format_number(self.site.id, sep=",")
338+
site_position = util.format_number(self.site.position, sep=",")
339339
counts = self.counts()
340340
freqs = self.frequencies()
341+
samples = util.format_number(len(self.samples), sep=",")
342+
num_alleles = util.format_number(self.num_alleles, sep=",")
341343
rows = (
342344
[
343-
["Site id", f"{site_id:,}"],
344-
["Site position", f"{site_position:,}"],
345-
["Number of samples", f"{len(self.samples):,}"],
346-
["Number of alleles", f"{self.num_alleles:,}"],
345+
["Site id", f"{site_id}"],
346+
["Site position", f"{site_position}"],
347+
["Number of samples", f"{samples}"],
348+
["Number of alleles", f"{num_alleles}"],
347349
]
348350
+ [
349351
[
350352
f"Samples with allele "
351353
f"""{'missing' if k is None else "'" + k + "'"}""",
352-
f"{counts[k]:,} ({freqs[k] * 100:.2g}%)",
354+
f"{util.format_number(counts[k], sep=',')} "
355+
f"({util.format_number(freqs[k] * 100, 2, sep=',')}%)",
353356
]
354357
for k in self.alleles
355358
]

python/tskit/trees.py

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2814,17 +2814,21 @@ def __str__(self):
28142814
Return a plain text summary of a tree in a tree sequence
28152815
"""
28162816
tree_rows = [
2817-
["Index", f"{self.index:,}"],
2817+
["Index", f"{util.format_number(self.index, sep=',')}"],
28182818
[
28192819
"Interval",
2820-
f"{self.interval.left:,.8g}-{self.interval.right:,.8g}"
2821-
f"({self.span:,.8g})",
2820+
f"{util.format_number(self.interval.left, sep=',')}-"
2821+
f"{util.format_number(self.interval.right, sep=',')}"
2822+
f"({util.format_number(self.span, sep=',')})",
2823+
],
2824+
["Roots", f"{util.format_number(self.num_roots, sep=',')}"],
2825+
["Nodes", f"{util.format_number(len(self.preorder()), sep=',')}"],
2826+
["Sites", f"{util.format_number(self.num_sites, sep=',')}"],
2827+
["Mutations", f"{util.format_number(self.num_mutations, sep=',')}"],
2828+
[
2829+
"Total Branch Length",
2830+
f"{util.format_number(self.total_branch_length, sep=',')}",
28222831
],
2823-
["Roots", f"{self.num_roots:,}"],
2824-
["Nodes", f"{len(self.preorder()):,}"],
2825-
["Sites", f"{self.num_sites:,}"],
2826-
["Mutations", f"{self.num_mutations:,}"],
2827-
["Total Branch Length", f"{self.total_branch_length:,.8g}"],
28282832
]
28292833
return util.unicode_table(tree_rows, title="Tree")
28302834

@@ -4391,17 +4395,10 @@ def __str__(self):
43914395
Return a plain text summary of the contents of a tree sequence
43924396
"""
43934397
ts_rows = [
4394-
["Trees", str(self.num_trees)],
4395-
[
4396-
"Sequence Length",
4397-
str(
4398-
int(self.sequence_length)
4399-
if self.discrete_genome
4400-
else self.sequence_length
4401-
),
4402-
],
4398+
["Trees", util.format_number(self.num_trees, sep=",")],
4399+
["Sequence Length", util.format_number(self.sequence_length, sep=",")],
44034400
["Time Units", self.time_units],
4404-
["Sample Nodes", str(self.num_samples)],
4401+
["Sample Nodes", util.format_number(self.num_samples, sep=",")],
44054402
["Total Size", util.naturalsize(self.nbytes)],
44064403
]
44074404
header = ["Table", "Rows", "Size", "Has Metadata"]
@@ -4410,7 +4407,7 @@ def __str__(self):
44104407
table_rows.append(
44114408
[
44124409
name.capitalize(),
4413-
f"{table.num_rows:,}",
4410+
f"{util.format_number(table.num_rows, sep=',')}",
44144411
util.naturalsize(table.nbytes),
44154412
(
44164413
"Yes"

python/tskit/util.py

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,26 @@ def unicode_table(
480480
return "".join(out)
481481

482482

483+
def format_number(number, sig_digits=8, sep="\u2009"):
484+
"""
485+
Format a number with with a separator to indicate thousands
486+
and up to `sig_digits` significant digits using 'g' format.
487+
488+
number: int, float, or a numeric string.
489+
sig_digits: int, number of significant digits to display.
490+
sep: str, the separator to use for thousands, default is a thin space.
491+
Returns a string.
492+
"""
493+
if isinstance(number, str):
494+
try:
495+
number = float(number)
496+
except ValueError:
497+
raise TypeError("The string cannot be converted to a number")
498+
499+
fmt = f",.{sig_digits}g"
500+
return format(number, fmt).replace(",", sep)
501+
502+
483503
def html_table(rows, *, header):
484504
headers = "".join(f"<th>{h}</th>" for h in header)
485505
rows = (
@@ -519,7 +539,7 @@ def tree_sequence_html(ts):
519539
f"""
520540
<tr>
521541
<td>{name.capitalize()}</td>
522-
<td>{table.num_rows:,}</td>
542+
<td>{format_number(table.num_rows)}</td>
523543
<td>{naturalsize(table.nbytes)}</td>
524544
<td style="text-align: center;">
525545
{'✅' if hasattr(table, "metadata") and len(table.metadata) > 0
@@ -599,10 +619,10 @@ def tree_sequence_html(ts):
599619
</tr>
600620
</thead>
601621
<tbody>
602-
<tr><td>Trees</td><td>{ts.num_trees:,}</td></tr>
603-
<tr><td>Sequence Length</td><td>{ts.sequence_length:,}</td></tr>
622+
<tr><td>Trees</td><td>{format_number(ts.num_trees)}</td></tr>
623+
<tr><td>Sequence Length</td><td>{format_number(ts.sequence_length)}</td></tr>
604624
<tr><td>Time Units</td><td>{ts.time_units}</td></tr>
605-
<tr><td>Sample Nodes</td><td>{ts.num_samples:,}</td></tr>
625+
<tr><td>Sample Nodes</td><td>{format_number(ts.num_samples)}</td></tr>
606626
<tr><td>Total Size</td><td>{naturalsize(ts.nbytes)}</td></tr>
607627
<tr>
608628
<td>Metadata</td><td style="text-align: left;">{md}</td>
@@ -671,13 +691,13 @@ def tree_html(tree):
671691
</tr>
672692
</thead>
673693
<tbody>
674-
<tr><td>Index</td><td>{tree.index:,}</td></tr>
675-
<tr><td>Interval</td><td>{tree.interval.left:,.8g}-{tree.interval.right:,.8g} ({tree.span:,.8g})</td></tr>
676-
<tr><td>Roots</td><td>{tree.num_roots:,}</td></tr>
677-
<tr><td>Nodes</td><td>{len(tree.preorder()):,}</td></tr>
678-
<tr><td>Sites</td><td>{tree.num_sites:,}</td></tr>
679-
<tr><td>Mutations</td><td>{tree.num_mutations:,}</td></tr>
680-
<tr><td>Total Branch Length</td><td>{tree.total_branch_length:,.8g}</td></tr>
694+
<tr><td>Index</td><td>{format_number(tree.index)}</td></tr>
695+
<tr><td>Interval</td><td>{format_number(tree.interval.left)}-{format_number(tree.interval.right)} ({format_number(tree.span)})</td></tr>
696+
<tr><td>Roots</td><td>{format_number(tree.num_roots)}</td></tr>
697+
<tr><td>Nodes</td><td>{format_number(len(tree.preorder()))}</td></tr>
698+
<tr><td>Sites</td><td>{format_number(tree.num_sites)}</td></tr>
699+
<tr><td>Mutations</td><td>{format_number(tree.num_mutations)}</td></tr>
700+
<tr><td>Total Branch Length</td><td>{format_number(tree.total_branch_length)}</td></tr>
681701
</tbody>
682702
</table>
683703
</div>
@@ -746,18 +766,18 @@ def variant_html(variant):
746766
return (
747767
html_body_head
748768
+ f"""
749-
<tr><td>Site Id</td><td>{site_id:,}</td></tr>
750-
<tr><td>Site Position</td><td>{site_position:,.8g}</td></tr>
751-
<tr><td>Number of Samples</td><td>{num_samples:,}</td></tr>
752-
<tr><td>Number of Alleles</td><td>{num_alleles:,}</td></tr>
769+
<tr><td>Site Id</td><td>{format_number(site_id)}</td></tr>
770+
<tr><td>Site Position</td><td>{format_number(site_position)}</td></tr>
771+
<tr><td>Number of Samples</td><td>{format_number(num_samples)}</td></tr>
772+
<tr><td>Number of Alleles</td><td>{format_number(num_alleles)}</td></tr>
753773
"""
754774
+ "\n".join(
755775
[
756776
f"""<tr><td>Samples with Allele {'missing' if k is None
757777
else "'" + k + "'"}</td><td>"""
758-
+ f"{counts[k]:,}"
778+
+ f"{format_number(counts[k])}"
759779
+ " "
760-
+ f"({freqs[k] * 100:,.2g}%)"
780+
+ f"({format_number(freqs[k] * 100, 2)}%)"
761781
+ "</td></tr>"
762782
for k in variant.alleles
763783
]

0 commit comments

Comments
 (0)