Skip to content

Commit 3eff2ad

Browse files
committed
Add full lookup table for single rune width.
Provides nearly an order of magnitude speedup depending on how quickly the checks are done. Data is packed at 4 bytes/rune, since the max output value is 2. ``` cpu: AMD Ryzen 9 3950X 16-Core Processor BenchmarkRuneWidthAll/regular-32 51 25539433 ns/op 0 B/op 0 allocs/op BenchmarkRuneWidthAll/lut-32 442 2711694 ns/op 0 B/op 0 allocs/op BenchmarkRuneWidth768/regular-32 617528 2109 ns/op 0 B/op 0 allocs/op BenchmarkRuneWidth768/lut-32 605570 2038 ns/op 0 B/op 0 allocs/op BenchmarkRuneWidthAllEastAsian/regular-32 31 36469868 ns/op 0 B/op 0 allocs/op BenchmarkRuneWidthAllEastAsian/lut-32 442 2710229 ns/op 0 B/op 0 allocs/op BenchmarkRuneWidth768EastAsian/regular-32 73273 16028 ns/op 0 B/op 0 allocs/op BenchmarkRuneWidth768EastAsian/lut-32 634987 1871 ns/op 0 B/op 0 allocs/op PASS ```
1 parent 1ccc74d commit 3eff2ad

File tree

2 files changed

+111
-27
lines changed

2 files changed

+111
-27
lines changed

benchmark_test.go

Lines changed: 72 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,42 @@ var benchSink int
1414
func benchRuneWidth(b *testing.B, eastAsianWidth bool, start, stop rune, want int) int {
1515
b.Helper()
1616
n := 0
17-
got := -1
18-
c := NewCondition()
19-
c.EastAsianWidth = eastAsianWidth
20-
for i := 0; i < b.N; i++ {
21-
got = n
22-
for r := start; r < stop; r++ {
23-
n += c.RuneWidth(r)
17+
b.Run("regular", func(b *testing.B) {
18+
got := -1
19+
c := NewCondition()
20+
c.EastAsianWidth = eastAsianWidth
21+
b.ReportAllocs()
22+
b.ResetTimer()
23+
for i := 0; i < b.N; i++ {
24+
got = n
25+
for r := start; r < stop; r++ {
26+
n += c.RuneWidth(r)
27+
}
28+
got = n - got
2429
}
25-
got = n - got
26-
}
27-
if want != 0 && got != want { // some extra checks
28-
b.Errorf("got %d, want %d\n", got, want)
29-
}
30+
if want != 0 && got != want { // some extra checks
31+
b.Errorf("got %d, want %d\n", got, want)
32+
}
33+
})
34+
b.Run("lut", func(b *testing.B) {
35+
got := -1
36+
n = 0
37+
c := NewCondition()
38+
c.EastAsianWidth = eastAsianWidth
39+
c.CreateLUT()
40+
b.ReportAllocs()
41+
b.ResetTimer()
42+
for i := 0; i < b.N; i++ {
43+
got = n
44+
for r := start; r < stop; r++ {
45+
n += c.RuneWidth(r)
46+
}
47+
got = n - got
48+
}
49+
if want != 0 && got != want { // some extra checks
50+
b.Errorf("got %d, want %d\n", got, want)
51+
}
52+
})
3053
return n
3154
}
3255
func BenchmarkRuneWidthAll(b *testing.B) {
@@ -49,20 +72,44 @@ func BenchmarkRuneWidth768EastAsian(b *testing.B) {
4972
func benchString1Width(b *testing.B, eastAsianWidth bool, start, stop rune, want int) int {
5073
b.Helper()
5174
n := 0
52-
got := -1
53-
c := NewCondition()
54-
c.EastAsianWidth = eastAsianWidth
55-
for i := 0; i < b.N; i++ {
56-
got = n
57-
for r := start; r < stop; r++ {
58-
s := string(r)
59-
n += c.StringWidth(s)
75+
b.Run("regular", func(b *testing.B) {
76+
got := -1
77+
c := NewCondition()
78+
c.EastAsianWidth = eastAsianWidth
79+
b.ResetTimer()
80+
b.ReportAllocs()
81+
for i := 0; i < b.N; i++ {
82+
got = n
83+
for r := start; r < stop; r++ {
84+
s := string(r)
85+
n += c.StringWidth(s)
86+
}
87+
got = n - got
6088
}
61-
got = n - got
62-
}
63-
if want != 0 && got != want { // some extra checks
64-
b.Errorf("got %d, want %d\n", got, want)
65-
}
89+
if want != 0 && got != want { // some extra checks
90+
b.Errorf("got %d, want %d\n", got, want)
91+
}
92+
})
93+
b.Run("lut", func(b *testing.B) {
94+
got := -1
95+
n = 0
96+
c := NewCondition()
97+
c.EastAsianWidth = eastAsianWidth
98+
c.CreateLUT()
99+
b.ResetTimer()
100+
b.ReportAllocs()
101+
for i := 0; i < b.N; i++ {
102+
got = n
103+
for r := start; r < stop; r++ {
104+
s := string(r)
105+
n += c.StringWidth(s)
106+
}
107+
got = n - got
108+
}
109+
if want != 0 && got != want { // some extra checks
110+
b.Errorf("got %d, want %d\n", got, want)
111+
}
112+
})
66113
return n
67114
}
68115
func BenchmarkString1WidthAll(b *testing.B) {

runewidth.go

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ var nonprint = table{
8989

9090
// Condition have flag EastAsianWidth whether the current locale is CJK or not.
9191
type Condition struct {
92+
combinedLut []byte
9293
EastAsianWidth bool
9394
StrictEmojiNeutral bool
9495
}
@@ -104,10 +105,16 @@ func NewCondition() *Condition {
104105
// RuneWidth returns the number of cells in r.
105106
// See http://www.unicode.org/reports/tr11/
106107
func (c *Condition) RuneWidth(r rune) int {
108+
if r < 0 || r > 0x10FFFF {
109+
return 0
110+
}
111+
if len(c.combinedLut) > 0 {
112+
return int(c.combinedLut[r>>1]>>(uint(r&1)*4)) & 3
113+
}
107114
// optimized version, verified by TestRuneWidthChecksums()
108115
if !c.EastAsianWidth {
109116
switch {
110-
case r < 0x20 || r > 0x10FFFF:
117+
case r < 0x20:
111118
return 0
112119
case (r >= 0x7F && r <= 0x9F) || r == 0xAD: // nonprint
113120
return 0
@@ -124,7 +131,7 @@ func (c *Condition) RuneWidth(r rune) int {
124131
}
125132
} else {
126133
switch {
127-
case r < 0 || r > 0x10FFFF || inTables(r, nonprint, combining):
134+
case inTables(r, nonprint, combining):
128135
return 0
129136
case inTable(r, narrow):
130137
return 1
@@ -138,6 +145,27 @@ func (c *Condition) RuneWidth(r rune) int {
138145
}
139146
}
140147

148+
// CreateLUT will create an in-memory lookup table of 557056 bytes for faster operation.
149+
// This should not be called concurrently with other operations on c.
150+
// If options in c is changed, CreateLUT should be called again.
151+
func (c *Condition) CreateLUT() {
152+
const max = 0x110000
153+
lut := c.combinedLut
154+
if len(c.combinedLut) != 0 {
155+
// Remove so we don't use it.
156+
c.combinedLut = nil
157+
} else {
158+
lut = make([]byte, max/2)
159+
}
160+
for i := range lut {
161+
i32 := int32(i * 2)
162+
x0 := c.RuneWidth(i32)
163+
x1 := c.RuneWidth(i32 + 1)
164+
lut[i] = uint8(x0) | uint8(x1)<<4
165+
}
166+
c.combinedLut = lut
167+
}
168+
141169
// StringWidth return width as you can see
142170
func (c *Condition) StringWidth(s string) (width int) {
143171
g := uniseg.NewGraphemes(s)
@@ -271,3 +299,12 @@ func FillLeft(s string, w int) string {
271299
func FillRight(s string, w int) string {
272300
return DefaultCondition.FillRight(s, w)
273301
}
302+
303+
// CreateLUT will create an in-memory lookup table of 557055 bytes for faster operation.
304+
// This should not be called concurrently with other operations.
305+
func CreateLUT() {
306+
if len(DefaultCondition.combinedLut) > 0 {
307+
return
308+
}
309+
DefaultCondition.CreateLUT()
310+
}

0 commit comments

Comments
 (0)