@@ -2,12 +2,16 @@ package boltindex
2
2
3
3
import (
4
4
"bytes"
5
+ "log"
6
+ "sync"
5
7
6
8
"github.com/boltdb/bolt"
7
9
8
10
diff "github.com/mcluseau/go-diff"
9
11
)
10
12
13
+ const seenBatchSize = 1000
14
+
11
15
var (
12
16
resumeKeyKey = []byte ("resumeKey" )
13
17
metaPrefix = []byte ("meta:" )
@@ -26,6 +30,8 @@ type Index struct {
26
30
metaBucketName []byte
27
31
recordSeen bool
28
32
seenBucketName []byte
33
+ seenStream chan hash
34
+ seenWG sync.WaitGroup
29
35
}
30
36
31
37
func New (db * bolt.DB , bucket []byte , recordSeen bool ) (idx * Index , err error ) {
@@ -56,72 +62,60 @@ func New(db *bolt.DB, bucket []byte, recordSeen bool) (idx *Index, err error) {
56
62
metaBucketName : append (metaPrefix , bucket ... ),
57
63
recordSeen : recordSeen ,
58
64
seenBucketName : seenBucketName ,
65
+ seenWG : sync.WaitGroup {},
59
66
}
60
67
return
61
68
}
62
69
63
70
var _ diff.Index = & Index {}
64
71
65
- func (i * Index ) bucket (writable bool ) (tx * bolt.Tx , bucket * bolt.Bucket , err error ) {
66
- tx , err = i .db .Begin (writable )
67
- if err != nil {
68
- return
69
- }
70
-
71
- bucket = tx .Bucket (i .bucketName )
72
- return
73
- }
74
-
75
72
// Cleanup removes temp data produced by this index
76
73
func (i * Index ) Cleanup () (err error ) {
77
- if i .seenBucketName == nil {
78
- return
74
+ if i .seenStream != nil {
75
+ close (i .seenStream )
76
+ i .seenWG .Wait ()
79
77
}
80
78
81
- err = i .db .Update (func (tx * bolt.Tx ) (err error ) {
82
- tx .DeleteBucket (i .seenBucketName )
83
- tx .OnCommit (func () {
84
- i .seenBucketName = nil
79
+ if i .seenBucketName != nil {
80
+ err = i .db .Update (func (tx * bolt.Tx ) (err error ) {
81
+ tx .DeleteBucket (i .seenBucketName )
82
+ tx .OnCommit (func () {
83
+ i .seenBucketName = nil
84
+ })
85
+ return
85
86
})
86
- return
87
- })
88
- if err != nil {
89
- return
87
+ if err != nil {
88
+ return
89
+ }
90
90
}
91
91
92
92
return
93
93
}
94
94
95
- func commitOrRollback (tx * bolt.Tx , err error ) {
96
- if err == nil {
97
- tx .Commit ()
98
- } else {
99
- tx .Rollback ()
100
- }
101
- }
102
-
103
- func (i * Index ) Index (kv KeyValue , resumeKey []byte ) (err error ) {
104
- tx , bucket , err := i .bucket (true )
105
- if err != nil {
106
- return
107
- }
108
-
109
- defer commitOrRollback (tx , err )
95
+ func (i * Index ) Index (kvs <- chan KeyValue , resumeKey <- chan []byte ) (err error ) {
96
+ return i .db .Update (func (tx * bolt.Tx ) (err error ) {
97
+ bucket := tx .Bucket (i .bucketName )
98
+
99
+ for kv := range kvs {
100
+ if len (kv .Value ) == 0 {
101
+ // deletion
102
+ err = bucket .Delete (kv .Key )
103
+ } else {
104
+ // create/update
105
+ err = bucket .Put (kv .Key , hashOf (kv .Value ).Sum (nil ))
106
+ }
110
107
111
- if resumeKey != nil {
112
- // record resumeKey
113
- err = i . storeResumeKey ( tx , resumeKey )
114
- }
108
+ if err != nil {
109
+ return
110
+ }
111
+ }
115
112
116
- if len (kv .Value ) == 0 {
117
- // deletion
118
- err = bucket .Delete (kv .Key )
113
+ if resumeKey != nil {
114
+ // record resumeKey
115
+ err = i .storeResumeKey (tx , <- resumeKey )
116
+ }
119
117
return
120
- }
121
-
122
- // create/update
123
- err = bucket .Put (kv .Key , hashOf (kv .Value ).Sum (nil ))
124
- return
118
+ })
125
119
}
126
120
127
121
func (i * Index ) storeResumeKey (tx * bolt.Tx , resumeKey []byte ) (err error ) {
@@ -150,19 +144,26 @@ func (i *Index) Compare(kv KeyValue) (result diff.CompareResult, err error) {
150
144
panic ("nil values are not allowed here" )
151
145
}
152
146
153
- tx , bucket , err := i .bucket (i .recordSeen )
147
+ var currentValueHash []byte
148
+
149
+ err = i .db .View (func (tx * bolt.Tx ) error {
150
+ currentValueHash = tx .Bucket (i .bucketName ).Get (kv .Key )
151
+ return nil
152
+ })
153
+
154
154
if err != nil {
155
155
return
156
156
}
157
157
158
- defer commitOrRollback (tx , err )
159
-
160
158
if i .recordSeen {
161
- seenBucket := tx .Bucket (i .seenBucketName )
162
- err = seenBucket .Put (hashOf (kv .Key ).Sum (nil ), nil )
163
- }
159
+ if i .seenStream == nil {
160
+ i .seenStream = make (chan hash , seenBatchSize )
161
+ i .seenWG .Add (1 )
162
+ go i .writeSeen ()
163
+ }
164
164
165
- currentValueHash := bucket .Get (kv .Key )
165
+ i .seenStream <- hashOf (kv .Key )
166
+ }
166
167
167
168
if currentValueHash == nil {
168
169
return diff .MissingKey , nil
@@ -177,40 +178,85 @@ func (i *Index) Compare(kv KeyValue) (result diff.CompareResult, err error) {
177
178
}
178
179
}
179
180
181
+ func (i * Index ) writeSeen () {
182
+ defer i .seenWG .Done ()
183
+
184
+ batchCount := 0
185
+ batch := make ([]byte , 0 , seenBatchSize * hashLen )
186
+
187
+ saveBatch := func () (err error ) {
188
+ log .Printf ("save batch: %d entries" , batchCount )
189
+ err = i .db .Update (func (tx * bolt.Tx ) (err error ) {
190
+ bucket := tx .Bucket (i .seenBucketName )
191
+
192
+ for i := 0 ; i < batchCount ; i ++ {
193
+ bucket .Put (batch [i * hashLen :i + 1 * hashLen ], []byte {})
194
+ }
195
+
196
+ return
197
+ })
198
+ if err == nil {
199
+ batch = batch [0 :0 ]
200
+ batchCount = 0
201
+ }
202
+ return
203
+ }
204
+
205
+ for h := range i .seenStream {
206
+ h .Sum (batch )
207
+ batchCount ++
208
+
209
+ if batchCount == seenBatchSize {
210
+ saveBatch ()
211
+ }
212
+ }
213
+
214
+ if batchCount != 0 {
215
+ saveBatch ()
216
+ }
217
+ }
218
+
180
219
func (i * Index ) KeysNotSeen () <- chan []byte {
181
220
if ! i .recordSeen {
182
221
return nil
183
222
}
184
223
185
224
ch := make (chan []byte , 10 )
186
225
187
- go func () {
188
- defer close (ch )
189
-
190
- if err := i .db .View (func (tx * bolt.Tx ) (err error ) {
191
- keysBucket := tx .Bucket (i .bucketName )
192
- seenBucket := tx .Bucket (i .seenBucketName )
193
-
194
- err = keysBucket .ForEach (func (k , v []byte ) (err error ) {
195
- if seenBucket == nil {
196
- // no seenBucket => nothing was seen
197
- ch <- k
198
- }
199
- if seenBucket .Get (hashOf (k ).Sum (nil )) == nil {
200
- ch <- k
201
- }
202
- return
203
- })
204
- return
205
-
206
- }); err != nil {
207
- panic (err )
208
- }
209
- }()
226
+ go i .sendKeysNotSeen (ch )
210
227
211
228
return ch
212
229
}
213
230
231
+ func (i * Index ) sendKeysNotSeen (ch chan []byte ) {
232
+ defer close (ch )
233
+
234
+ if i .seenStream != nil {
235
+ close (i .seenStream )
236
+ i .seenWG .Wait ()
237
+ }
238
+
239
+ if err := i .db .View (func (tx * bolt.Tx ) (err error ) {
240
+ keysBucket := tx .Bucket (i .bucketName )
241
+ seenBucket := tx .Bucket (i .seenBucketName )
242
+
243
+ err = keysBucket .ForEach (func (k , v []byte ) (err error ) {
244
+ if seenBucket == nil {
245
+ // no seenBucket => nothing was seen
246
+ ch <- k
247
+ }
248
+ if seenBucket .Get (hashOf (k ).Sum (nil )) == nil {
249
+ ch <- k
250
+ }
251
+ return
252
+ })
253
+ return
254
+
255
+ }); err != nil {
256
+ panic (err )
257
+ }
258
+ }
259
+
214
260
func (i * Index ) Value (key []byte ) []byte {
215
261
panic ("should not be called" )
216
262
}
0 commit comments