Skip to content

Commit 2903373

Browse files
committed
use sync.Pool for runner pooling
Currently as noted in the comment of `putRunner`, there's no attempt being made to limit the size of the runner pooling - this can result in the pool containing a lot of runners that were once created in a spur but will likely not be used anymore. Instead of trying to do gc within this code, move the pooling to `sync.Pool` which will deallocated objects in idle and therefore keep the size of the pool as small as possible. For proper usage of `sync.Pool` there must be a hard limit on the size of `runtrack`, `runstrack`, `runcrawl` in the case they are too big they will not be cached, the `initMatch` function has been adjusted to make sure if one of three slices weren't cached they will be allocated again. The pool is global and not per-regexp because sync.Pool has a noCopy struct which conflicts with the the UnmarshalText function of `Regexp`. This should have the added benefit that if you're executing many different regexps they will benefit from each other allocated runners. The motivation for this change is that I'm seeing a lot of memory (~300MiB) being hold by these runners until the Go program is restarted which feels like an unoptimal usage of memory, with this change after a spur of these runners have been created in a small amount of time they are gracefully deallocated over time and no longer hold memory indefinitely.
1 parent 5f3687a commit 2903373

File tree

2 files changed

+48
-57
lines changed

2 files changed

+48
-57
lines changed

regexp.go

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import (
1212
"errors"
1313
"math"
1414
"strconv"
15-
"sync"
1615
"time"
1716

1817
"github.com/dlclark/regexp2/syntax"
@@ -45,10 +44,6 @@ type Regexp struct {
4544
capsize int // size of the capture array
4645

4746
code *syntax.Code // compiled program
48-
49-
// cache of machines for running regexp
50-
muRun *sync.Mutex
51-
runner []*runner
5247
}
5348

5449
// Compile parses a regular expression and returns, if successful,
@@ -76,7 +71,6 @@ func Compile(expr string, opt RegexOptions) (*Regexp, error) {
7671
capsize: code.Capsize,
7772
code: code,
7873
MatchTimeout: DefaultMatchTimeout,
79-
muRun: &sync.Mutex{},
8074
}, nil
8175
}
8276

runner.go

Lines changed: 48 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"math"
88
"strconv"
99
"strings"
10+
"sync"
1011
"time"
1112
"unicode"
1213

@@ -1356,38 +1357,30 @@ func (r *runner) initMatch() {
13561357
r.runmatch.reset(r.runtext, r.runtextstart)
13571358
}
13581359

1359-
// note we test runcrawl, because it is the last one to be allocated
1360-
// If there is an alloc failure in the middle of the three allocations,
1361-
// we may still return to reuse this instance, and we want to behave
1362-
// as if the allocations didn't occur. (we used to test _trackcount != 0)
1363-
1364-
if r.runcrawl != nil {
1365-
r.runtrackpos = len(r.runtrack)
1366-
r.runstackpos = len(r.runstack)
1367-
r.runcrawlpos = len(r.runcrawl)
1368-
return
1369-
}
1370-
13711360
r.initTrackCount()
13721361

1373-
tracksize := r.runtrackcount * 8
1374-
stacksize := r.runtrackcount * 8
1375-
1376-
if tracksize < 32 {
1377-
tracksize = 32
1378-
}
1379-
if stacksize < 16 {
1380-
stacksize = 16
1362+
if r.runtrack == nil {
1363+
tracksize := r.runtrackcount * 8
1364+
if tracksize < 32 {
1365+
tracksize = 32
1366+
}
1367+
r.runtrack = make([]int, tracksize)
1368+
r.runtrackpos = tracksize
13811369
}
13821370

1383-
r.runtrack = make([]int, tracksize)
1384-
r.runtrackpos = tracksize
1385-
1386-
r.runstack = make([]int, stacksize)
1387-
r.runstackpos = stacksize
1371+
if r.runstack == nil {
1372+
stacksize := r.runtrackcount * 8
1373+
if stacksize < 16 {
1374+
stacksize = 16
1375+
}
1376+
r.runstack = make([]int, stacksize)
1377+
r.runstackpos = stacksize
1378+
}
13881379

1389-
r.runcrawl = make([]int, 32)
1390-
r.runcrawlpos = 32
1380+
if r.runcrawl == nil {
1381+
r.runcrawl = make([]int, 32)
1382+
r.runcrawlpos = 32
1383+
}
13911384
}
13921385

13931386
func (r *runner) tidyMatch(quick bool) *Match {
@@ -1579,35 +1572,39 @@ func (r *runner) initTrackCount() {
15791572
r.runtrackcount = r.code.TrackCount
15801573
}
15811574

1575+
// Pool of runners, use getRunner and putRunner to retrieve and put back
1576+
// runners, respectively.
1577+
var runnerPool = sync.Pool{
1578+
New: func() interface{} {
1579+
return new(runner)
1580+
},
1581+
}
1582+
15821583
// getRunner returns a run to use for matching re.
1583-
// It uses the re's runner cache if possible, to avoid
1584-
// unnecessary allocation.
1584+
// It uses the runner cache if possible, to avoid unnecessary allocation.
15851585
func (re *Regexp) getRunner() *runner {
1586-
re.muRun.Lock()
1587-
if n := len(re.runner); n > 0 {
1588-
z := re.runner[n-1]
1589-
re.runner = re.runner[:n-1]
1590-
re.muRun.Unlock()
1591-
return z
1592-
}
1593-
re.muRun.Unlock()
1594-
z := &runner{
1595-
re: re,
1596-
code: re.code,
1597-
}
1586+
z := runnerPool.Get().(*runner)
1587+
z.re = re
1588+
z.code = re.code
15981589
return z
15991590
}
16001591

1601-
// putRunner returns a runner to the re's cache.
1602-
// There is no attempt to limit the size of the cache, so it will
1603-
// grow to the maximum number of simultaneous matches
1604-
// run using re. (The cache empties when re gets garbage collected.)
1592+
// putRunner returns a runner to the runner pool.
16051593
func (re *Regexp) putRunner(r *runner) {
1606-
re.muRun.Lock()
1607-
r.runtext = nil
1608-
if r.runmatch != nil {
1609-
r.runmatch.text = nil
1594+
// Proper usage of a sync.Pool requires each entry to have approximately
1595+
// the same memory cost.
1596+
// See https://golang.org/issue/23199.
1597+
if cap(r.runtrack) > 32*1024 {
1598+
r.runtrack = nil
1599+
}
1600+
if cap(r.runstack) > 16*1024 {
1601+
r.runstack = nil
1602+
}
1603+
if cap(r.runcrawl) > 1024 {
1604+
r.runcrawl = nil
16101605
}
1611-
re.runner = append(re.runner, r)
1612-
re.muRun.Unlock()
1606+
1607+
r.runtext = nil
1608+
r.runmatch = nil
1609+
runnerPool.Put(r)
16131610
}

0 commit comments

Comments
 (0)