-
Notifications
You must be signed in to change notification settings - Fork 39
/
punctuation.go
43 lines (36 loc) · 1.06 KB
/
punctuation.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
package sentences
// PunctStrings implements all the functions necessary for punctuation strings.
// They are used to detect punctuation in the sentence
// tokenizer.
type PunctStrings interface {
NonPunct() string
Punctuation() string
HasSentencePunct(string) bool
}
// DefaultPunctStrings are used to detect punctuation in the sentence
// tokenizer.
type DefaultPunctStrings struct{}
// NewPunctStrings creates a default set of properties
func NewPunctStrings() *DefaultPunctStrings {
return &DefaultPunctStrings{}
}
// NonPunct regex string to detect non-punctuation.
func (p *DefaultPunctStrings) NonPunct() string {
return `[^\W\d]`
}
// Punctuation characters
func (p *DefaultPunctStrings) Punctuation() string {
return ";:,.!?;:,。!?"
}
// HasSentencePunct does the supplied text have a known sentence punctuation character?
func (p *DefaultPunctStrings) HasSentencePunct(text string) bool {
endPunct := `.!?。!?`
for _, char := range endPunct {
for _, achar := range text {
if char == achar {
return true
}
}
}
return false
}