Home Download Docs Code Community
     1	/*
     2	Copyright 2014 The Perkeep Authors
     3	
     4	Licensed under the Apache License, Version 2.0 (the "License");
     5	you may not use this file except in compliance with the License.
     6	You may obtain a copy of the License at
     7	
     8	     http://www.apache.org/licenses/LICENSE-2.0
     9	
    10	Unless required by applicable law or agreed to in writing, software
    11	distributed under the License is distributed on an "AS IS" BASIS,
    12	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13	See the License for the specific language governing permissions and
    14	limitations under the License.
    15	*/
    16	
    17	// This is the lexer for search expressions (see expr.go).
    18	
    19	package search
    20	
    21	import (
    22		"fmt"
    23		"strings"
    24		"unicode"
    25		"unicode/utf8"
    26	)
    27	
    28	type tokenType int
    29	
    30	const (
    31		tokenAnd tokenType = iota
    32		tokenArg
    33		tokenClose
    34		tokenColon
    35		tokenEOF
    36		tokenError
    37		tokenLiteral
    38		tokenNot
    39		tokenOpen
    40		tokenOr
    41		tokenPredicate
    42		tokenQuotedArg
    43		tokenQuotedLiteral
    44	)
    45	
    46	const (
    47		eof        = -1 // -1 is unused in utf8
    48		whitespace = "\t\n\f\v\r "
    49		opBound    = whitespace + "("
    50	)
    51	
    52	// IsSearchWordRune defines the runes that can be used in unquoted predicate arguments
    53	// or unquoted literals. These are all non-space unicode characters except ':' which is
    54	// used for predicate marking,  and '(', ')', which are used for predicate grouping.
    55	func isSearchWordRune(r rune) bool {
    56		switch r {
    57		case ':', ')', '(', eof:
    58			return false
    59		}
    60		return !unicode.IsSpace(r)
    61	}
    62	
    63	type token struct {
    64		typ   tokenType
    65		val   string
    66		start int
    67	}
    68	
    69	func (t token) String() string {
    70		switch t.typ {
    71		case tokenEOF:
    72			return "EOF"
    73		case tokenError:
    74			return fmt.Sprintf("{err:%q at pos: %d}", t.val, t.start)
    75		}
    76		return fmt.Sprintf("{t:%v,%q (col: %d)}", t.typ, t.val, t.start)
    77	}
    78	
    79	type lexer struct {
    80		input  string
    81		start  int
    82		pos    int
    83		width  int
    84		tokens chan token
    85		state  stateFn
    86	}
    87	
    88	func (l *lexer) emit(typ tokenType) {
    89		l.tokens <- token{typ, l.input[l.start:l.pos], l.start}
    90		l.start = l.pos
    91	}
    92	
    93	func (l *lexer) next() (r rune) {
    94		if l.pos >= len(l.input) {
    95			l.width = 0
    96			return eof
    97		}
    98		r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
    99		l.pos += l.width
   100		return
   101	}
   102	
   103	func (l *lexer) ignore() {
   104		l.start = l.pos
   105	}
   106	
   107	func (l *lexer) backup() {
   108		l.pos -= l.width
   109	}
   110	
   111	func (l *lexer) peek() rune {
   112		r := l.next()
   113		l.backup()
   114		return r
   115	}
   116	
   117	func (l *lexer) accept(valid string) bool {
   118		if strings.ContainsRune(valid, l.next()) {
   119			return true
   120		}
   121		l.backup()
   122		return false
   123	}
   124	
   125	func (l *lexer) acceptString(s string) bool {
   126		for _, r := range s {
   127			if l.next() != r {
   128				l.backup()
   129				return false
   130			}
   131		}
   132		return true
   133	}
   134	
   135	func (l *lexer) acceptRun(valid string) {
   136		for strings.ContainsRune(valid, l.next()) {
   137		}
   138		l.backup()
   139	}
   140	
   141	func (l *lexer) acceptRunFn(valid func(rune) bool) {
   142		for valid(l.next()) {
   143		}
   144		l.backup()
   145	}
   146	
   147	func (l *lexer) errorf(format string, args ...interface{}) stateFn {
   148		l.tokens <- token{
   149			typ:   tokenError,
   150			val:   fmt.Sprintf(format, args...),
   151			start: l.start,
   152		}
   153		return nil
   154	}
   155	
   156	func lex(input string) (*lexer, chan token) {
   157		l := &lexer{
   158			input:  input,
   159			tokens: make(chan token),
   160			state:  readExp,
   161		}
   162		go l.run()
   163		return l, l.tokens
   164	}
   165	
   166	func (l *lexer) run() {
   167		for {
   168			if l.state == nil {
   169				close(l.tokens)
   170				return
   171			}
   172			l.state = l.state(l)
   173		}
   174	}
   175	
   176	// State functions
   177	type stateFn func(*lexer) stateFn
   178	
   179	func readNeg(l *lexer) stateFn {
   180		l.accept("-")
   181		l.emit(tokenNot)
   182		return readExp
   183	}
   184	
   185	func readClose(l *lexer) stateFn {
   186		l.accept(")")
   187		l.emit(tokenClose)
   188		return readOperator
   189	}
   190	
   191	func readOpen(l *lexer) stateFn {
   192		l.accept("(")
   193		l.emit(tokenOpen)
   194		return readExp
   195	}
   196	
   197	func readColon(l *lexer) stateFn {
   198		l.accept(":")
   199		l.emit(tokenColon)
   200		return readArg
   201	}
   202	
   203	func readPredicate(l *lexer) stateFn {
   204		l.acceptRunFn(unicode.IsLetter)
   205		switch l.peek() {
   206		case ':':
   207			l.emit(tokenPredicate)
   208			return readColon
   209		}
   210		return readLiteral
   211	}
   212	
   213	func readLiteral(l *lexer) stateFn {
   214		l.acceptRunFn(isSearchWordRune)
   215		l.emit(tokenLiteral)
   216		return readOperator
   217	}
   218	
   219	func readArg(l *lexer) stateFn {
   220		if l.peek() == '"' {
   221			return readQuotedArg
   222		}
   223		l.acceptRunFn(isSearchWordRune)
   224		l.emit(tokenArg)
   225		if l.peek() == ':' {
   226			return readColon
   227		}
   228		return readOperator
   229	}
   230	
   231	func readAND(l *lexer) stateFn {
   232		if l.acceptString("and") && l.accept(opBound) {
   233			l.backup()
   234			l.emit(tokenAnd)
   235			return readExp
   236		}
   237		return readPredicate
   238	}
   239	
   240	func readOR(l *lexer) stateFn {
   241		if l.acceptString("or") && l.accept(opBound) {
   242			l.backup()
   243			l.emit(tokenOr)
   244			return readExp
   245		}
   246		return readPredicate
   247	}
   248	
   249	func runQuoted(l *lexer) bool {
   250		l.accept("\"")
   251		for {
   252			r := l.next()
   253			switch r {
   254			case eof:
   255				return false
   256			case '\\':
   257				l.next()
   258			case '"':
   259				return true
   260			}
   261		}
   262	}
   263	
   264	func readQuotedLiteral(l *lexer) stateFn {
   265		if !runQuoted(l) {
   266			return l.errorf("Unclosed quote")
   267		}
   268		l.emit(tokenQuotedLiteral)
   269		return readOperator
   270	}
   271	
   272	func readQuotedArg(l *lexer) stateFn {
   273		if !runQuoted(l) {
   274			return l.errorf("Unclosed quote")
   275		}
   276		l.emit(tokenQuotedArg)
   277		if l.peek() == ':' {
   278			return readColon
   279		}
   280		return readOperator
   281	}
   282	
   283	func readExp(l *lexer) stateFn {
   284		l.acceptRun(whitespace)
   285		l.ignore()
   286		switch l.peek() {
   287		case eof:
   288			return nil
   289		case '(':
   290			return readOpen
   291		case ')':
   292			return readClose
   293		case '-':
   294			return readNeg
   295		case '"':
   296			return readQuotedLiteral
   297		}
   298		return readPredicate
   299	}
   300	
   301	func readOperator(l *lexer) stateFn {
   302		l.acceptRun(whitespace)
   303		l.ignore()
   304		switch l.peek() {
   305		case 'a':
   306			return readAND
   307		case 'o':
   308			return readOR
   309		}
   310		return readExp
   311	}
Website layout inspired by memcached.
Content by the authors.