Home Download Docs Code Community
     1	/*
     2	Copyright 2014 The Perkeep Authors
     3	
     4	Licensed under the Apache License, Version 2.0 (the "License");
     5	you may not use this file except in compliance with the License.
     6	You may obtain a copy of the License at
     7	
     8	     http://www.apache.org/licenses/LICENSE-2.0
     9	
    10	Unless required by applicable law or agreed to in writing, software
    11	distributed under the License is distributed on an "AS IS" BASIS,
    12	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13	See the License for the specific language governing permissions and
    14	limitations under the License.
    15	*/
    16	
    17	// This is the lexer for search expressions (see expr.go).
    18	
    19	package search
    20	
    21	import (
    22		"fmt"
    23		"strings"
    24		"unicode"
    25		"unicode/utf8"
    26	)
    27	
    28	type tokenType int
    29	
    30	const (
    31		tokenAnd tokenType = iota
    32		tokenArg
    33		tokenClose
    34		tokenColon
    35		tokenEOF
    36		tokenError
    37		tokenLiteral
    38		tokenNot
    39		tokenOpen
    40		tokenOr
    41		tokenPredicate
    42		tokenQuotedArg
    43		tokenQuotedLiteral
    44	)
    45	
    46	const (
    47		eof        = -1 // -1 is unused in utf8
    48		whitespace = "\t\n\f\v\r "
    49		opBound    = whitespace + "("
    50	)
    51	
    52	// IsSearchWordRune defines the runes that can be used in unquoted predicate arguments
    53	// or unquoted literals. These are all non-space unicode characters except ':' which is
    54	// used for predicate marking,  and '(', ')', which are used for predicate grouping.
    55	func isSearchWordRune(r rune) bool {
    56		switch r {
    57		case ':', ')', '(', eof:
    58			return false
    59		}
    60		return !unicode.IsSpace(r)
    61	}
    62	
    63	type token struct {
    64		typ   tokenType
    65		val   string
    66		start int
    67	}
    68	
    69	func (t token) String() string {
    70		switch t.typ {
    71		case tokenEOF:
    72			return "EOF"
    73		case tokenError:
    74			return fmt.Sprintf("{err:%q at pos: %d}", t.val, t.start)
    75		}
    76		return fmt.Sprintf("{t:%v,%q (col: %d)}", t.typ, t.val, t.start)
    77	}
    78	
    79	type lexer struct {
    80		input  string
    81		start  int
    82		pos    int
    83		width  int
    84		tokens chan token
    85		state  stateFn
    86	}
    87	
    88	func (l *lexer) emit(typ tokenType) {
    89		l.tokens <- token{typ, l.input[l.start:l.pos], l.start}
    90		l.start = l.pos
    91	}
    92	
    93	func (l *lexer) next() (r rune) {
    94		if l.pos >= len(l.input) {
    95			l.width = 0
    96			return eof
    97		}
    98		r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
    99		l.pos += l.width
   100		return
   101	}
   102	
   103	func (l *lexer) ignore() {
   104		l.start = l.pos
   105	}
   106	
   107	func (l *lexer) backup() {
   108		l.pos -= l.width
   109	}
   110	
   111	func (l *lexer) peek() rune {
   112		r := l.next()
   113		l.backup()
   114		return r
   115	}
   116	
   117	func (l *lexer) accept(valid string) bool {
   118		if strings.ContainsRune(valid, l.next()) {
   119			return true
   120		}
   121		l.backup()
   122		return false
   123	}
   124	
   125	func (l *lexer) acceptString(s string) bool {
   126		for _, r := range s {
   127			if l.next() != r {
   128				l.backup()
   129				return false
   130			}
   131		}
   132		return true
   133	}
   134	
   135	func (l *lexer) acceptRun(valid string) {
   136		for strings.ContainsRune(valid, l.next()) {
   137		}
   138		l.backup()
   139	}
   140	
   141	func (l *lexer) acceptRunFn(valid func(rune) bool) {
   142		for valid(l.next()) {
   143		}
   144		l.backup()
   145	}
   146	
   147	func (l *lexer) errorf(format string, args ...interface{}) stateFn {
   148		l.tokens <- token{
   149			typ:   tokenError,
   150			val:   fmt.Sprintf(format, args...),
   151			start: l.start,
   152		}
   153		return nil
   154	}
   155	
   156	func lex(input string) (*lexer, chan token) {
   157		l := &lexer{
   158			input:  input,
   159			tokens: make(chan token),
   160			state:  readExp,
   161		}
   162		go l.run()
   163		return l, l.tokens
   164	}
   165	
   166	func (l *lexer) run() {
   167		for {
   168			if l.state == nil {
   169				close(l.tokens)
   170				return
   171			}
   172			l.state = l.state(l)
   173		}
   174	}
   175	
   176	//
   177	// State functions
   178	//
   179	type stateFn func(*lexer) stateFn
   180	
   181	func readNeg(l *lexer) stateFn {
   182		l.accept("-")
   183		l.emit(tokenNot)
   184		return readExp
   185	}
   186	
   187	func readClose(l *lexer) stateFn {
   188		l.accept(")")
   189		l.emit(tokenClose)
   190		return readOperator
   191	}
   192	
   193	func readOpen(l *lexer) stateFn {
   194		l.accept("(")
   195		l.emit(tokenOpen)
   196		return readExp
   197	}
   198	
   199	func readColon(l *lexer) stateFn {
   200		l.accept(":")
   201		l.emit(tokenColon)
   202		return readArg
   203	}
   204	
   205	func readPredicate(l *lexer) stateFn {
   206		l.acceptRunFn(unicode.IsLetter)
   207		switch l.peek() {
   208		case ':':
   209			l.emit(tokenPredicate)
   210			return readColon
   211		}
   212		return readLiteral
   213	}
   214	
   215	func readLiteral(l *lexer) stateFn {
   216		l.acceptRunFn(isSearchWordRune)
   217		l.emit(tokenLiteral)
   218		return readOperator
   219	}
   220	
   221	func readArg(l *lexer) stateFn {
   222		if l.peek() == '"' {
   223			return readQuotedArg
   224		}
   225		l.acceptRunFn(isSearchWordRune)
   226		l.emit(tokenArg)
   227		if l.peek() == ':' {
   228			return readColon
   229		}
   230		return readOperator
   231	}
   232	
   233	func readAND(l *lexer) stateFn {
   234		if l.acceptString("and") && l.accept(opBound) {
   235			l.backup()
   236			l.emit(tokenAnd)
   237			return readExp
   238		}
   239		return readPredicate
   240	}
   241	
   242	func readOR(l *lexer) stateFn {
   243		if l.acceptString("or") && l.accept(opBound) {
   244			l.backup()
   245			l.emit(tokenOr)
   246			return readExp
   247		}
   248		return readPredicate
   249	}
   250	
   251	func runQuoted(l *lexer) bool {
   252		l.accept("\"")
   253		for {
   254			r := l.next()
   255			switch r {
   256			case eof:
   257				return false
   258			case '\\':
   259				l.next()
   260			case '"':
   261				return true
   262			}
   263		}
   264	}
   265	
   266	func readQuotedLiteral(l *lexer) stateFn {
   267		if !runQuoted(l) {
   268			return l.errorf("Unclosed quote")
   269		}
   270		l.emit(tokenQuotedLiteral)
   271		return readOperator
   272	}
   273	
   274	func readQuotedArg(l *lexer) stateFn {
   275		if !runQuoted(l) {
   276			return l.errorf("Unclosed quote")
   277		}
   278		l.emit(tokenQuotedArg)
   279		if l.peek() == ':' {
   280			return readColon
   281		}
   282		return readOperator
   283	}
   284	
   285	func readExp(l *lexer) stateFn {
   286		l.acceptRun(whitespace)
   287		l.ignore()
   288		switch l.peek() {
   289		case eof:
   290			return nil
   291		case '(':
   292			return readOpen
   293		case ')':
   294			return readClose
   295		case '-':
   296			return readNeg
   297		case '"':
   298			return readQuotedLiteral
   299		}
   300		return readPredicate
   301	}
   302	
   303	func readOperator(l *lexer) stateFn {
   304		l.acceptRun(whitespace)
   305		l.ignore()
   306		switch l.peek() {
   307		case 'a':
   308			return readAND
   309		case 'o':
   310			return readOR
   311		}
   312		return readExp
   313	}
Website layout inspired by memcached.
Content by the authors.