Skip to content

Commit 9b3b89c

Browse files
committed
Implement format_number utility and update various summaries to use it to separate 1000s with a thin space
update changelog fix failing tests
1 parent 4187575 commit 9b3b89c

File tree

6 files changed

+79
-51
lines changed

6 files changed

+79
-51
lines changed

python/CHANGELOG.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,15 @@
1212
associated with each individual as a numpy array.
1313
(:user:`benjeffery`, :pr:`3153`)
1414

15+
- Implement thin space separation for thousands in the numbers output for html
16+
and text.
17+
(:user:`hossam26644`, :pr:`3167`, :issue:`2951`)
1518

1619
**Fixes**
1720

1821
- Correct assertion message when tables are compared with metadata ignored.
1922
(:user:`benjeffery`, :pr:`3162`, :issue:`3161`)
20-
23+
2124
--------------------
2225
[0.6.3] - 2025-04-28
2326
--------------------

python/tests/test_highlevel.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3764,19 +3764,19 @@ def test_str(self, ts_fixture):
37643764
╔═+╗\s*
37653765
║Tree.*?║\s*
37663766
╠═+╤═+╣\s*
3767-
║Index.*?│\s*\d+║\s*
3767+
║Index.*?│\s*[\d\u2009,]+║\s*
37683768
╟─+┼─+╢\s*
3769-
║Interval.*?│\s*\d+-\d+\s*\(\d+\)║\s*
3769+
║Interval.*?│\s*[\d\u2009,]+-[\d\u2009,]+\s*\([\d\u2009,]+\)║\s*
37703770
╟─+┼─+╢\s*
3771-
║Roots.*?│\s*\d+║\s*
3771+
║Roots.*?│\s*[\d\u2009,]+║\s*
37723772
╟─+┼─+╢\s*
3773-
║Nodes.*?│\s*\d+║\s*
3773+
║Nodes.*?│\s*[\d\u2009,]+║\s*
37743774
╟─+┼─+╢\s*
3775-
║Sites.*?│\s*\d+║\s*
3775+
║Sites.*?│\s*[\d\u2009,]+║\s*
37763776
╟─+┼─+╢\s*
3777-
║Mutations.*?│\s*\d+║\s*
3777+
║Mutations.*?│\s*[\d\u2009,]+║\s*
37783778
╟─+┼─+╢\s*
3779-
║Total\s*Branch\s*Length.*?│\s*[\d,]+\.\d+║\s*
3779+
║Total\s*Branch\s*Length.*?│\s*[\d\u2009,]+\.\d+║\s*
37803780
╚═+╧═+╝\s*
37813781
""",
37823782
re.VERBOSE | re.DOTALL,

python/tests/test_util.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,18 @@ def test_naturalsize(value, expected):
371371
assert util.naturalsize(-value) == expected
372372

373373

374+
def test_format_number():
375+
assert util.format_number(0) == "0"
376+
assert util.format_number("1.23") == "1.23"
377+
assert util.format_number(3216546.34) == "3 216 546.3"
378+
assert util.format_number(3216546.34, 9) == "3 216 546.34"
379+
assert util.format_number(-3456.23) == "-3 456.23"
380+
381+
with pytest.raises(TypeError) as e_info:
382+
util.format_number("bad")
383+
assert str(e_info.value) == "The string cannot be converted to a number"
384+
385+
374386
@pytest.mark.parametrize(
375387
"obj, expected",
376388
[

python/tskit/genotypes.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -334,22 +334,23 @@ def __str__(self) -> str:
334334
Return a plain text summary of the contents of a variant.
335335
"""
336336
try:
337-
site_id = self.site.id
338-
site_position = self.site.position
337+
site_id = util.format_number(self.site.id)
338+
site_position = util.format_number(self.site.position)
339339
counts = self.counts()
340340
freqs = self.frequencies()
341341
rows = (
342342
[
343-
["Site id", f"{site_id:,}"],
344-
["Site position", f"{site_position:,}"],
345-
["Number of samples", f"{len(self.samples):,}"],
346-
["Number of alleles", f"{self.num_alleles:,}"],
343+
["Site id", f"{site_id}"],
344+
["Site position", f"{site_position}"],
345+
["Number of samples", f"{util.format_number(len(self.samples))}"],
346+
["Number of alleles", f"{util.format_number(self.num_alleles)}"],
347347
]
348348
+ [
349349
[
350350
f"Samples with allele "
351351
f"""{'missing' if k is None else "'" + k + "'"}""",
352-
f"{counts[k]:,} ({freqs[k] * 100:.2g}%)",
352+
f"{util.format_number(counts[k])} "
353+
f"({util.format_number(freqs[k] * 100, 2)}%)",
353354
]
354355
for k in self.alleles
355356
]

python/tskit/trees.py

Lines changed: 12 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2814,17 +2814,17 @@ def __str__(self):
28142814
Return a plain text summary of a tree in a tree sequence
28152815
"""
28162816
tree_rows = [
2817-
["Index", f"{self.index:,}"],
2817+
["Index", f"{util.format_number(self.index)}"],
28182818
[
28192819
"Interval",
2820-
f"{self.interval.left:,.8g}-{self.interval.right:,.8g}"
2821-
f"({self.span:,.8g})",
2820+
f"{util.format_number(self.interval.left)}-{util.format_number(self.interval.right)}"
2821+
f"({util.format_number(self.span)})",
28222822
],
2823-
["Roots", f"{self.num_roots:,}"],
2824-
["Nodes", f"{len(self.preorder()):,}"],
2825-
["Sites", f"{self.num_sites:,}"],
2826-
["Mutations", f"{self.num_mutations:,}"],
2827-
["Total Branch Length", f"{self.total_branch_length:,.8g}"],
2823+
["Roots", f"{util.format_number(self.num_roots)}"],
2824+
["Nodes", f"{util.format_number(len(self.preorder()))}"],
2825+
["Sites", f"{util.format_number(self.num_sites)}"],
2826+
["Mutations", f"{util.format_number(self.num_mutations)}"],
2827+
["Total Branch Length", f"{util.format_number(self.total_branch_length)}"],
28282828
]
28292829
return util.unicode_table(tree_rows, title="Tree")
28302830

@@ -4391,17 +4391,10 @@ def __str__(self):
43914391
Return a plain text summary of the contents of a tree sequence
43924392
"""
43934393
ts_rows = [
4394-
["Trees", str(self.num_trees)],
4395-
[
4396-
"Sequence Length",
4397-
str(
4398-
int(self.sequence_length)
4399-
if self.discrete_genome
4400-
else self.sequence_length
4401-
),
4402-
],
4394+
["Trees", util.format_number(self.num_trees)],
4395+
["Sequence Length", util.format_number(self.sequence_length)],
44034396
["Time Units", self.time_units],
4404-
["Sample Nodes", str(self.num_samples)],
4397+
["Sample Nodes", util.format_number(self.num_samples)],
44054398
["Total Size", util.naturalsize(self.nbytes)],
44064399
]
44074400
header = ["Table", "Rows", "Size", "Has Metadata"]
@@ -4410,7 +4403,7 @@ def __str__(self):
44104403
table_rows.append(
44114404
[
44124405
name.capitalize(),
4413-
f"{table.num_rows:,}",
4406+
f"{util.format_number(table.num_rows)}",
44144407
util.naturalsize(table.nbytes),
44154408
(
44164409
"Yes"

python/tskit/util.py

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,25 @@ def unicode_table(
480480
return "".join(out)
481481

482482

483+
def format_number(number, sig_digits=8):
484+
"""
485+
Format a number with thin spaces as thousands separators
486+
and up to `sig_digits` significant digits using 'g' format.
487+
488+
number: int, float, or a numeric string.
489+
sig_digits: int, number of significant digits to display.
490+
Returns a string.
491+
"""
492+
if isinstance(number, str):
493+
try:
494+
number = float(number)
495+
except ValueError:
496+
raise TypeError("The string cannot be converted to a number")
497+
498+
fmt = f",.{sig_digits}g"
499+
return format(number, fmt).replace(",", "\u2009")
500+
501+
483502
def html_table(rows, *, header):
484503
headers = "".join(f"<th>{h}</th>" for h in header)
485504
rows = (
@@ -519,7 +538,7 @@ def tree_sequence_html(ts):
519538
f"""
520539
<tr>
521540
<td>{name.capitalize()}</td>
522-
<td>{table.num_rows:,}</td>
541+
<td>{format_number(table.num_rows)}</td>
523542
<td>{naturalsize(table.nbytes)}</td>
524543
<td style="text-align: center;">
525544
{'✅' if hasattr(table, "metadata") and len(table.metadata) > 0
@@ -599,10 +618,10 @@ def tree_sequence_html(ts):
599618
</tr>
600619
</thead>
601620
<tbody>
602-
<tr><td>Trees</td><td>{ts.num_trees:,}</td></tr>
603-
<tr><td>Sequence Length</td><td>{ts.sequence_length:,}</td></tr>
621+
<tr><td>Trees</td><td>{format_number(ts.num_trees)}</td></tr>
622+
<tr><td>Sequence Length</td><td>{format_number(ts.sequence_length)}</td></tr>
604623
<tr><td>Time Units</td><td>{ts.time_units}</td></tr>
605-
<tr><td>Sample Nodes</td><td>{ts.num_samples:,}</td></tr>
624+
<tr><td>Sample Nodes</td><td>{format_number(ts.num_samples)}</td></tr>
606625
<tr><td>Total Size</td><td>{naturalsize(ts.nbytes)}</td></tr>
607626
<tr>
608627
<td>Metadata</td><td style="text-align: left;">{md}</td>
@@ -671,13 +690,13 @@ def tree_html(tree):
671690
</tr>
672691
</thead>
673692
<tbody>
674-
<tr><td>Index</td><td>{tree.index:,}</td></tr>
675-
<tr><td>Interval</td><td>{tree.interval.left:,.8g}-{tree.interval.right:,.8g} ({tree.span:,.8g})</td></tr>
676-
<tr><td>Roots</td><td>{tree.num_roots:,}</td></tr>
677-
<tr><td>Nodes</td><td>{len(tree.preorder()):,}</td></tr>
678-
<tr><td>Sites</td><td>{tree.num_sites:,}</td></tr>
679-
<tr><td>Mutations</td><td>{tree.num_mutations:,}</td></tr>
680-
<tr><td>Total Branch Length</td><td>{tree.total_branch_length:,.8g}</td></tr>
693+
<tr><td>Index</td><td>{format_number(tree.index)}</td></tr>
694+
<tr><td>Interval</td><td>{format_number(tree.interval.left)}-{format_number(tree.interval.right)} ({format_number(tree.span)})</td></tr>
695+
<tr><td>Roots</td><td>{format_number(tree.num_roots)}</td></tr>
696+
<tr><td>Nodes</td><td>{format_number(len(tree.preorder()))}</td></tr>
697+
<tr><td>Sites</td><td>{format_number(tree.num_sites)}</td></tr>
698+
<tr><td>Mutations</td><td>{format_number(tree.num_mutations)}</td></tr>
699+
<tr><td>Total Branch Length</td><td>{format_number(tree.total_branch_length)}</td></tr>
681700
</tbody>
682701
</table>
683702
</div>
@@ -746,18 +765,18 @@ def variant_html(variant):
746765
return (
747766
html_body_head
748767
+ f"""
749-
<tr><td>Site Id</td><td>{site_id:,}</td></tr>
750-
<tr><td>Site Position</td><td>{site_position:,.8g}</td></tr>
751-
<tr><td>Number of Samples</td><td>{num_samples:,}</td></tr>
752-
<tr><td>Number of Alleles</td><td>{num_alleles:,}</td></tr>
768+
<tr><td>Site Id</td><td>{format_number(site_id)}</td></tr>
769+
<tr><td>Site Position</td><td>{format_number(site_position)}</td></tr>
770+
<tr><td>Number of Samples</td><td>{format_number(num_samples)}</td></tr>
771+
<tr><td>Number of Alleles</td><td>{format_number(num_alleles)}</td></tr>
753772
"""
754773
+ "\n".join(
755774
[
756775
f"""<tr><td>Samples with Allele {'missing' if k is None
757776
else "'" + k + "'"}</td><td>"""
758-
+ f"{counts[k]:,}"
777+
+ f"{format_number(counts[k])}"
759778
+ " "
760-
+ f"({freqs[k] * 100:,.2g}%)"
779+
+ f"({format_number(freqs[k] * 100, 2)}%)"
761780
+ "</td></tr>"
762781
for k in variant.alleles
763782
]

0 commit comments

Comments
 (0)