-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsql2csv.go
108 lines (90 loc) · 2.12 KB
/
sql2csv.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
package wikidump
import (
"bufio"
"bytes"
"io"
"github.com/pkg/errors"
)
//SQL2CSV transforms on the fly a SQL data dump from dumps.wikimedia.org into a clean CSV
func SQL2CSV(r io.Reader) io.Reader {
return &_SQL2CSV{file: bufio.NewReader(r)}
}
type _SQL2CSV struct {
file *bufio.Reader
buffer []byte
err error
}
func (r *_SQL2CSV) Read(p []byte) (n int, err error) {
if len(p) == 0 {
return n, nil
}
//len(p)>0
if len(r.buffer) == 0 && r.refill() != nil {
return 0, r.err
}
//len(r.buffer)>0
min := len(p)
if len(r.buffer) < len(p) {
min = len(r.buffer)
}
//m>0
copy(p, r.buffer[:min])
n, r.buffer = min, r.buffer[min:]
return n, nil
}
func (r *_SQL2CSV) refill() (err error) {
if r.err != nil {
return r.err
}
defer func() {
r.err = err
}()
b, rawBuffer, err := r.nextRawBuffer()
if err != nil {
return err
}
inString := false
for i, c := range rawBuffer {
switch {
case !inString && bytes.HasSuffix(rawBuffer[:i+1], []byte("),(")):
b = append(b[:len(b)-2], '\n')
case c == '\'' && isEnabled(rawBuffer[:i]):
b = append(b, '"')
inString = !inString
case c == '\'' /*&& !isEnabled(rawBuffer[:i])*/ :
b = append(b[:len(b)-1], '\'')
case c == '"' && isEnabled(rawBuffer[:i]):
return errors.Errorf("SQL2CSV: error invalid \" in input.")
case c == '"' /*&& !isEnabled(rawBuffer[:i])*/ :
b = append(b[:len(b)-1], '"', '"')
default:
b = append(b, c)
}
}
r.buffer = append(b, '\n')
return nil
}
func isEnabled(b []byte) bool {
count := 0
for i := len(b) - 1; i >= 0 && b[i] == '\\'; i-- {
count++
}
return count%2 == 0
}
func (r *_SQL2CSV) nextRawBuffer() (buffer, rawBuffer []byte, err error) {
//fetch next line
for !bytes.HasPrefix(rawBuffer, []byte("INSERT INTO")) && err == nil {
rawBuffer, err = r.file.ReadBytes('\n')
}
if len(rawBuffer) == 0 {
return nil, nil, err
}
buffer = rawBuffer[:0]
begin := bytes.Index(rawBuffer, []byte("("))
end := bytes.LastIndex(rawBuffer, []byte(")"))
if begin == -1 || end == -1 || begin > end {
return nil, nil, errors.Errorf("SQL2CSV: invalid input error.")
}
rawBuffer = rawBuffer[begin+1 : end]
return
}