1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
16
17
18
19 package search
20
21 import (
22 "fmt"
23 "strings"
24 "unicode"
25 "unicode/utf8"
26 )
27
28 type tokenType int
29
30 const (
31 tokenAnd tokenType = iota
32 tokenArg
33 tokenClose
34 tokenColon
35 tokenEOF
36 tokenError
37 tokenLiteral
38 tokenNot
39 tokenOpen
40 tokenOr
41 tokenPredicate
42 tokenQuotedArg
43 tokenQuotedLiteral
44 )
45
46 const (
47 eof = -1
48 whitespace = "\t\n\f\v\r "
49 opBound = whitespace + "("
50 )
51
52
53
54
55 func isSearchWordRune(r rune) bool {
56 switch r {
57 case ':', ')', '(', eof:
58 return false
59 }
60 return !unicode.IsSpace(r)
61 }
62
63 type token struct {
64 typ tokenType
65 val string
66 start int
67 }
68
69 func (t token) String() string {
70 switch t.typ {
71 case tokenEOF:
72 return "EOF"
73 case tokenError:
74 return fmt.Sprintf("{err:%q at pos: %d}", t.val, t.start)
75 }
76 return fmt.Sprintf("{t:%v,%q (col: %d)}", t.typ, t.val, t.start)
77 }
78
79 type lexer struct {
80 input string
81 start int
82 pos int
83 width int
84 tokens chan token
85 state stateFn
86 }
87
88 func (l *lexer) emit(typ tokenType) {
89 l.tokens <- token{typ, l.input[l.start:l.pos], l.start}
90 l.start = l.pos
91 }
92
93 func (l *lexer) next() (r rune) {
94 if l.pos >= len(l.input) {
95 l.width = 0
96 return eof
97 }
98 r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
99 l.pos += l.width
100 return
101 }
102
103 func (l *lexer) ignore() {
104 l.start = l.pos
105 }
106
107 func (l *lexer) backup() {
108 l.pos -= l.width
109 }
110
111 func (l *lexer) peek() rune {
112 r := l.next()
113 l.backup()
114 return r
115 }
116
117 func (l *lexer) accept(valid string) bool {
118 if strings.ContainsRune(valid, l.next()) {
119 return true
120 }
121 l.backup()
122 return false
123 }
124
125 func (l *lexer) acceptString(s string) bool {
126 for _, r := range s {
127 if l.next() != r {
128 l.backup()
129 return false
130 }
131 }
132 return true
133 }
134
135 func (l *lexer) acceptRun(valid string) {
136 for strings.ContainsRune(valid, l.next()) {
137 }
138 l.backup()
139 }
140
141 func (l *lexer) acceptRunFn(valid func(rune) bool) {
142 for valid(l.next()) {
143 }
144 l.backup()
145 }
146
147 func (l *lexer) errorf(format string, args ...interface{}) stateFn {
148 l.tokens <- token{
149 typ: tokenError,
150 val: fmt.Sprintf(format, args...),
151 start: l.start,
152 }
153 return nil
154 }
155
156 func lex(input string) (*lexer, chan token) {
157 l := &lexer{
158 input: input,
159 tokens: make(chan token),
160 state: readExp,
161 }
162 go l.run()
163 return l, l.tokens
164 }
165
166 func (l *lexer) run() {
167 for {
168 if l.state == nil {
169 close(l.tokens)
170 return
171 }
172 l.state = l.state(l)
173 }
174 }
175
176
177 type stateFn func(*lexer) stateFn
178
179 func readNeg(l *lexer) stateFn {
180 l.accept("-")
181 l.emit(tokenNot)
182 return readExp
183 }
184
185 func readClose(l *lexer) stateFn {
186 l.accept(")")
187 l.emit(tokenClose)
188 return readOperator
189 }
190
191 func readOpen(l *lexer) stateFn {
192 l.accept("(")
193 l.emit(tokenOpen)
194 return readExp
195 }
196
197 func readColon(l *lexer) stateFn {
198 l.accept(":")
199 l.emit(tokenColon)
200 return readArg
201 }
202
203 func readPredicate(l *lexer) stateFn {
204 l.acceptRunFn(unicode.IsLetter)
205 switch l.peek() {
206 case ':':
207 l.emit(tokenPredicate)
208 return readColon
209 }
210 return readLiteral
211 }
212
213 func readLiteral(l *lexer) stateFn {
214 l.acceptRunFn(isSearchWordRune)
215 l.emit(tokenLiteral)
216 return readOperator
217 }
218
219 func readArg(l *lexer) stateFn {
220 if l.peek() == '"' {
221 return readQuotedArg
222 }
223 l.acceptRunFn(isSearchWordRune)
224 l.emit(tokenArg)
225 if l.peek() == ':' {
226 return readColon
227 }
228 return readOperator
229 }
230
231 func readAND(l *lexer) stateFn {
232 if l.acceptString("and") && l.accept(opBound) {
233 l.backup()
234 l.emit(tokenAnd)
235 return readExp
236 }
237 return readPredicate
238 }
239
240 func readOR(l *lexer) stateFn {
241 if l.acceptString("or") && l.accept(opBound) {
242 l.backup()
243 l.emit(tokenOr)
244 return readExp
245 }
246 return readPredicate
247 }
248
249 func runQuoted(l *lexer) bool {
250 l.accept("\"")
251 for {
252 r := l.next()
253 switch r {
254 case eof:
255 return false
256 case '\\':
257 l.next()
258 case '"':
259 return true
260 }
261 }
262 }
263
264 func readQuotedLiteral(l *lexer) stateFn {
265 if !runQuoted(l) {
266 return l.errorf("Unclosed quote")
267 }
268 l.emit(tokenQuotedLiteral)
269 return readOperator
270 }
271
272 func readQuotedArg(l *lexer) stateFn {
273 if !runQuoted(l) {
274 return l.errorf("Unclosed quote")
275 }
276 l.emit(tokenQuotedArg)
277 if l.peek() == ':' {
278 return readColon
279 }
280 return readOperator
281 }
282
283 func readExp(l *lexer) stateFn {
284 l.acceptRun(whitespace)
285 l.ignore()
286 switch l.peek() {
287 case eof:
288 return nil
289 case '(':
290 return readOpen
291 case ')':
292 return readClose
293 case '-':
294 return readNeg
295 case '"':
296 return readQuotedLiteral
297 }
298 return readPredicate
299 }
300
301 func readOperator(l *lexer) stateFn {
302 l.acceptRun(whitespace)
303 l.ignore()
304 switch l.peek() {
305 case 'a':
306 return readAND
307 case 'o':
308 return readOR
309 }
310 return readExp
311 }