-
Notifications
You must be signed in to change notification settings - Fork 2
/
parse.go
119 lines (106 loc) · 2.86 KB
/
parse.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
package robots
import "strings"
type parser struct {
agents []*agent
withinGroup bool
items []*item
robotsdata *robotsdata
}
type parsefn func(p *parser) parsefn
func parse(s string) *robotsdata {
p := &parser{
items: lex(s),
robotsdata: &robotsdata{},
}
for fn := parseStart; fn != nil; fn = fn(p) {
}
return p.robotsdata
}
func parseStart(p *parser) parsefn {
switch p.items[0].typ {
case itemUserAgent:
return parseUserAgent
case itemDisallow:
return parseDisallow
case itemAllow:
return parseAllow
case itemSitemap:
return parseSitemap
default:
return parseNext
}
}
// parseUserAgent handles two important cases. First, if we are within
// a group of rules already, a user-agent rule causes a new group to
// begin. Second, if we're starting a new group (i.e., the previous
// rule was also a user-agent rule and we're associating another agent
// with the forthcoming group) then we add another agent to p.agents.
func parseUserAgent(p *parser) parsefn {
if p.withinGroup { // The previous rule was allow or disallow
p.robotsdata.addAgents(p.agents)
p.agents = []*agent{
&agent{
name: p.items[0].val,
},
}
p.withinGroup = false // Now we're before the start of a group
return parseNext
}
// The previous rule was another user-agent rule
p.agents = append(p.agents, &agent{
name: p.items[0].val,
})
return parseNext
}
// parseAllow and parseDisallow are identical except for what they set
// the allow field of the member to. Therefore, we have this factory
// function.
func makeParseMember(allow bool) func(*parser) parsefn {
return func(p *parser) parsefn {
// Note that we set withinGroup to true even if we're
// evaluating allow/disallow rules that come before
// any user-agent rules. That's fine, it results in
// the desired behavior.
p.withinGroup = true
// If there is no path, do nothing.
if strings.TrimSpace(p.items[0].val) == "" {
return parseNext
}
// If there is no agent (i.e., the rules come before
// any user-agent line), this just doesn't do
// anything. That's what we want.
for _, agent := range p.agents {
m := &member{
allow: allow,
path: p.items[0].val,
}
agent.group.addMember(m)
}
return parseNext
}
}
var parseDisallow parsefn
var parseAllow parsefn
func init() {
// These variables must be initiated at run-time to avoid a
// definition loop.
parseDisallow = makeParseMember(false)
parseAllow = makeParseMember(true)
}
func parseSitemap(p *parser) parsefn {
// sitemap rules are global: they do not affect whether we are
// in a group or not.
p.robotsdata.sitemaps = append(p.robotsdata.sitemaps, p.items[0].val)
return parseNext
}
func parseNext(p *parser) parsefn {
p.items = p.items[1:]
if len(p.items) == 0 {
return parseEnd
}
return parseStart
}
func parseEnd(p *parser) parsefn {
p.robotsdata.addAgents(p.agents)
return nil
}