-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
split.go
80 lines (68 loc) · 1.55 KB
/
split.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
// Copyright 2021 The Mellium Contributors.
// Use of this source code is governed by the BSD 2-clause
// license that can be found in the LICENSE file.
package xml
import (
"bytes"
"io"
)
var (
cdataStart = []byte("<![CDATA[")
cdataEnd = []byte("]]>")
)
// Split is a bufio.SplitFunc that splits on XML tokens.
func Split(data []byte, atEOF bool) (advance int, token []byte, err error) {
if len(data) == 0 && atEOF {
return 0, nil, io.EOF
}
switch {
case bytes.HasPrefix(data, cdataStart):
return splitCData(data, atEOF)
case data[0] != '<':
return splitCharData(data, atEOF)
}
return splitOther(data, atEOF)
}
func splitCData(data []byte, atEOF bool) (int, []byte, error) {
idx := bytes.Index(data, cdataEnd)
if idx == -1 {
if atEOF {
return len(data), data, nil
}
return 0, nil, nil
}
return idx + len(cdataEnd), data[:idx+len(cdataEnd)], nil
}
func splitCharData(data []byte, atEOF bool) (int, []byte, error) {
idx := bytes.IndexByte(data, '<')
if idx == -1 {
if atEOF {
return len(data), data, nil
}
return 0, nil, nil
}
return idx, data[:idx], nil
}
func splitOther(data []byte, atEOF bool) (int, []byte, error) {
var startQuote byte
for i, b := range data {
if startQuote != 0 {
if b == startQuote {
startQuote = 0
}
continue
}
switch b {
case '"', '\'':
startQuote = b
case '>':
return i + 1, data[:i+1], nil
}
}
if atEOF {
// TODO: is this an invalid token if it starts with an unescaped '<'?
// Should we return an error?
return len(data), data, nil
}
return 0, nil, nil
}