Use values parsed in scanner instead of reparsing

This commit is contained in:
Tobie Morgan Hitchcock 2016-09-07 16:53:04 +01:00
parent af4281c920
commit 1219b3825f
3 changed files with 140 additions and 120 deletions

View file

@ -28,9 +28,10 @@ type Parser struct {
c *fibre.Context c *fibre.Context
v map[string]interface{} v map[string]interface{}
buf struct { buf struct {
tok Token // last read token n int // buffer size
lit string // last read literal tok Token // last read token
n int // buffer size (max=1) lit string // last read literal
val interface{} // Last read value
} }
} }
@ -85,7 +86,7 @@ func (p *Parser) ParseMulti() (*Query, error) {
var text bool var text bool
for { for {
if tok, _ := p.scanIgnoreWhitespace(); tok == EOF { if tok, _, _ := p.scan(); tok == EOF {
if !text { if !text {
return nil, &EmptyError{} return nil, &EmptyError{}
} }
@ -160,7 +161,7 @@ func (p *Parser) ParseSingle() (Statement, error) {
func (p *Parser) mightBe(expected ...Token) (tok Token, lit string, found bool) { func (p *Parser) mightBe(expected ...Token) (tok Token, lit string, found bool) {
tok, lit = p.scanIgnoreWhitespace() tok, lit, _ = p.scan()
if found = p.in(tok, expected); !found { if found = p.in(tok, expected); !found {
p.unscan() p.unscan()
@ -172,7 +173,7 @@ func (p *Parser) mightBe(expected ...Token) (tok Token, lit string, found bool)
func (p *Parser) shouldBe(expected ...Token) (tok Token, lit string, err error) { func (p *Parser) shouldBe(expected ...Token) (tok Token, lit string, err error) {
tok, lit = p.scanIgnoreWhitespace() tok, lit, _ = p.scan()
if found := p.in(tok, expected); !found { if found := p.in(tok, expected); !found {
p.unscan() p.unscan()
@ -183,36 +184,51 @@ func (p *Parser) shouldBe(expected ...Token) (tok Token, lit string, err error)
} }
// scan returns the next token from the underlying scanner. // scan scans the next non-whitespace token.
// If a token has been unscanned then read that instead. func (p *Parser) scan() (tok Token, lit string, val interface{}) {
func (p *Parser) scan() (tok Token, lit string) {
// If we have a token on the buffer, then return it.
if p.buf.n != 0 {
p.buf.n = 0
return p.buf.tok, p.buf.lit
}
// Otherwise read the next token from the scanner. tok, lit, val = p.seek()
tok, lit = p.s.Scan()
// Save it to the buffer in case we unscan later.
p.buf.tok, p.buf.lit = tok, lit
return
}
// unscan pushes the previously read token back onto the buffer.
func (p *Parser) unscan() { p.buf.n = 1 }
// scanIgnoreWhitespace scans the next non-whitespace token.
func (p *Parser) scanIgnoreWhitespace() (tok Token, lit string) {
tok, lit = p.scan()
for { for {
if tok == WS { if tok == WS {
tok, lit = p.scan() tok, lit, val = p.seek()
} else { } else {
break break
} }
} }
return return
}
func (p *Parser) hold(tok Token) (val interface{}) {
if tok == p.buf.tok {
return p.buf.val
}
return nil
}
// seek returns the next token from the underlying scanner.
// If a token has been unscanned then read that instead.
func (p *Parser) seek() (tok Token, lit string, val interface{}) {
// If we have a token on the buffer, then return it.
if p.buf.n != 0 {
p.buf.n = 0
return p.buf.tok, p.buf.lit, p.buf.val
}
// Otherwise read the next token from the scanner.
tok, lit, val = p.s.Scan()
// Save it to the buffer in case we unscan later.
p.buf.tok, p.buf.lit, p.buf.val = tok, lit, val
return
}
// unscan pushes the previously read token back onto the buffer.
func (p *Parser) unscan() {
p.buf.n = 1
} }

View file

@ -37,7 +37,7 @@ func NewScanner(p *Parser, r io.Reader) *Scanner {
} }
// Scan returns the next token and literal value. // Scan returns the next token and literal value.
func (s *Scanner) Scan() (tok Token, lit string) { func (s *Scanner) Scan() (tok Token, lit string, val interface{}) {
// Read the next rune. // Read the next rune.
ch := s.next() ch := s.next()
@ -61,21 +61,21 @@ func (s *Scanner) Scan() (tok Token, lit string) {
switch ch { switch ch {
case eof: case eof:
return EOF, "" return EOF, "", val
case '*': case '*':
return ALL, string(ch) return ALL, string(ch), val
case '×': case '×':
return MUL, string(ch) return MUL, string(ch), val
case '∙': case '∙':
return MUL, string(ch) return MUL, string(ch), val
case '÷': case '÷':
return DIV, string(ch) return DIV, string(ch), val
case '@': case '@':
return EAT, string(ch) return EAT, string(ch), val
case ',': case ',':
return COMMA, string(ch) return COMMA, string(ch), val
case '.': case '.':
return DOT, string(ch) return DOT, string(ch), val
case '"': case '"':
return s.scanString(ch) return s.scanString(ch)
case '\'': case '\'':
@ -91,41 +91,41 @@ func (s *Scanner) Scan() (tok Token, lit string) {
case '$': case '$':
return s.scanParams(ch) return s.scanParams(ch)
case ':': case ':':
return COLON, string(ch) return COLON, string(ch), val
case ';': case ';':
return SEMICOLON, string(ch) return SEMICOLON, string(ch), val
case '(': case '(':
return LPAREN, string(ch) return LPAREN, string(ch), val
case ')': case ')':
return RPAREN, string(ch) return RPAREN, string(ch), val
case '¬': case '¬':
return NEQ, string(ch) return NEQ, string(ch), val
case '≤': case '≤':
return LTE, string(ch) return LTE, string(ch), val
case '≥': case '≥':
return GTE, string(ch) return GTE, string(ch), val
case '~': case '~':
return SIN, string(ch) return SIN, string(ch), val
case '∋': case '∋':
return SIN, string(ch) return SIN, string(ch), val
case '∌': case '∌':
return SNI, string(ch) return SNI, string(ch), val
case '⊇': case '⊇':
return CONTAINSALL, string(ch) return CONTAINSALL, string(ch), val
case '⊃': case '⊃':
return CONTAINSSOME, string(ch) return CONTAINSSOME, string(ch), val
case '⊅': case '⊅':
return CONTAINSNONE, string(ch) return CONTAINSNONE, string(ch), val
case '∈': case '∈':
return INS, string(ch) return INS, string(ch), val
case '∉': case '∉':
return NIS, string(ch) return NIS, string(ch), val
case '⊆': case '⊆':
return ALLCONTAINEDIN, string(ch) return ALLCONTAINEDIN, string(ch), val
case '⊂': case '⊂':
return SOMECONTAINEDIN, string(ch) return SOMECONTAINEDIN, string(ch), val
case '⊄': case '⊄':
return NONECONTAINEDIN, string(ch) return NONECONTAINEDIN, string(ch), val
case '#': case '#':
return s.scanCommentSingle(ch) return s.scanCommentSingle(ch)
case '/': case '/':
@ -135,7 +135,7 @@ func (s *Scanner) Scan() (tok Token, lit string) {
return s.scanCommentMultiple(ch) return s.scanCommentMultiple(ch)
case chn == ' ': case chn == ' ':
s.undo() s.undo()
return DIV, string(ch) return DIV, string(ch), val
default: default:
s.undo() s.undo()
return s.scanRegexp(ch) return s.scanRegexp(ch)
@ -144,99 +144,99 @@ func (s *Scanner) Scan() (tok Token, lit string) {
chn := s.next() chn := s.next()
switch { switch {
case chn == '~': case chn == '~':
return SIN, "=~" return SIN, "=~", val
case chn == '=': case chn == '=':
return EEQ, "==" return EEQ, "==", val
default: default:
s.undo() s.undo()
return EQ, string(ch) return EQ, string(ch), val
} }
case '?': case '?':
chn := s.next() chn := s.next()
switch { switch {
case chn == '=': case chn == '=':
return ANY, "?=" return ANY, "?=", val
default: default:
s.undo() s.undo()
return QMARK, string(ch) return QMARK, string(ch), val
} }
case '!': case '!':
chn := s.next() chn := s.next()
switch { switch {
case chn == '=': case chn == '=':
if s.next() == '=' { if s.next() == '=' {
return NEE, "!==" return NEE, "!==", val
} else { } else {
s.undo() s.undo()
return NEQ, "!=" return NEQ, "!=", val
} }
case chn == '~': case chn == '~':
return SNI, "!~" return SNI, "!~", val
default: default:
s.undo() s.undo()
return EXC, string(ch) return EXC, string(ch), val
} }
case '+': case '+':
chn := s.next() chn := s.next()
switch { switch {
case chn == '=': case chn == '=':
return INC, "+=" return INC, "+=", val
case isNumber(chn): case isNumber(chn):
return s.scanNumber(ch, chn) return s.scanNumber(ch, chn)
default: default:
s.undo() s.undo()
return ADD, string(ch) return ADD, string(ch), val
} }
case '-': case '-':
chn := s.next() chn := s.next()
switch { switch {
case chn == '=': case chn == '=':
return DEC, "-=" return DEC, "-=", val
case chn == '>': case chn == '>':
return OEDGE, "->" return OEDGE, "->", val
case chn == '-': case chn == '-':
return s.scanCommentSingle(ch) return s.scanCommentSingle(ch)
case isNumber(chn): case isNumber(chn):
return s.scanNumber(ch, chn) return s.scanNumber(ch, chn)
default: default:
s.undo() s.undo()
return SUB, string(ch) return SUB, string(ch), val
} }
case '>': case '>':
chn := s.next() chn := s.next()
switch { switch {
case chn == '=': case chn == '=':
return GTE, ">=" return GTE, ">=", val
default: default:
s.undo() s.undo()
return GT, string(ch) return GT, string(ch), val
} }
case '<': case '<':
chn := s.next() chn := s.next()
switch { switch {
case chn == '>': case chn == '>':
return NEQ, "<>" return NEQ, "<>", val
case chn == '=': case chn == '=':
return LTE, "<=" return LTE, "<=", val
case chn == '-': case chn == '-':
if s.next() == '>' { if s.next() == '>' {
return BEDGE, "<->" return BEDGE, "<->", val
} else { } else {
s.undo() s.undo()
return IEDGE, "<-" return IEDGE, "<-", val
} }
default: default:
s.undo() s.undo()
return LT, string(ch) return LT, string(ch), val
} }
} }
return ILLEGAL, string(ch) return ILLEGAL, string(ch), val
} }
// scanBlank consumes the current rune and all contiguous whitespace. // scanBlank consumes the current rune and all contiguous whitespace.
func (s *Scanner) scanBlank(chp ...rune) (tok Token, lit string) { func (s *Scanner) scanBlank(chp ...rune) (tok Token, lit string, val interface{}) {
tok = WS tok = WS
@ -260,12 +260,12 @@ func (s *Scanner) scanBlank(chp ...rune) (tok Token, lit string) {
} }
} }
return tok, buf.String() return tok, buf.String(), val
} }
// scanCommentSingle consumes the current rune and all contiguous whitespace. // scanCommentSingle consumes the current rune and all contiguous whitespace.
func (s *Scanner) scanCommentSingle(chp ...rune) (tok Token, lit string) { func (s *Scanner) scanCommentSingle(chp ...rune) (tok Token, lit string, val interface{}) {
tok = WS tok = WS
@ -287,12 +287,12 @@ func (s *Scanner) scanCommentSingle(chp ...rune) (tok Token, lit string) {
} }
} }
return tok, buf.String() return tok, buf.String(), val
} }
// scanCommentMultiple consumes the current rune and all contiguous whitespace. // scanCommentMultiple consumes the current rune and all contiguous whitespace.
func (s *Scanner) scanCommentMultiple(chp ...rune) (tok Token, lit string) { func (s *Scanner) scanCommentMultiple(chp ...rune) (tok Token, lit string, val interface{}) {
tok = WS tok = WS
@ -319,24 +319,24 @@ func (s *Scanner) scanCommentMultiple(chp ...rune) (tok Token, lit string) {
} }
} }
return tok, buf.String() return tok, buf.String(), val
} }
func (s *Scanner) scanParams(chp ...rune) (Token, string) { func (s *Scanner) scanParams(chp ...rune) (tok Token, lit string, val interface{}) {
tok, lit := s.scanIdent(chp...) tok, lit, val = s.scanIdent(chp...)
return BOUNDPARAM, lit
if s.p.is(tok, IDENT) { if s.p.is(tok, IDENT) {
return PARAM, lit, nil
} }
return tok, lit return
} }
// scanIdent consumes the current rune and all contiguous ident runes. // scanIdent consumes the current rune and all contiguous ident runes.
func (s *Scanner) scanIdent(chp ...rune) (tok Token, lit string) { func (s *Scanner) scanIdent(chp ...rune) (tok Token, lit string, val interface{}) {
tok = IDENT tok = IDENT
@ -362,19 +362,19 @@ func (s *Scanner) scanIdent(chp ...rune) (tok Token, lit string) {
// If the string matches a keyword then return that keyword. // If the string matches a keyword then return that keyword.
if tok := keywords[strings.ToUpper(buf.String())]; tok > 0 { if tok := keywords[strings.ToUpper(buf.String())]; tok > 0 {
return tok, buf.String() return tok, buf.String(), val
} }
if _, err := time.ParseDuration(buf.String()); err == nil { if val, err := time.ParseDuration(buf.String()); err == nil {
return DURATION, buf.String() return DURATION, buf.String(), val
} }
// Otherwise return as a regular identifier. // Otherwise return as a regular identifier.
return tok, buf.String() return tok, buf.String(), val
} }
func (s *Scanner) scanNumber(chp ...rune) (tok Token, lit string) { func (s *Scanner) scanNumber(chp ...rune) (tok Token, lit string, val interface{}) {
tok = NUMBER tok = NUMBER
@ -409,23 +409,23 @@ func (s *Scanner) scanNumber(chp ...rune) (tok Token, lit string) {
} }
} }
return tok, buf.String() return tok, buf.String(), nil
} }
func (s *Scanner) scanQuoted(chp ...rune) (Token, string) { func (s *Scanner) scanQuoted(chp ...rune) (tok Token, lit string, val interface{}) {
tok, lit := s.scanString(chp...) tok, lit, val = s.scanString(chp...)
return IDENT, lit
if s.p.is(tok, STRING) { if s.p.is(tok, STRING) {
return IDENT, lit, nil
} }
return tok, lit return
} }
func (s *Scanner) scanString(chp ...rune) (tok Token, lit string) { func (s *Scanner) scanString(chp ...rune) (tok Token, lit string, val interface{}) {
beg := chp[0] beg := chp[0]
end := beg end := beg
@ -454,7 +454,7 @@ func (s *Scanner) scanString(chp ...rune) (tok Token, lit string) {
if ch := s.next(); ch == end { if ch := s.next(); ch == end {
break break
} else if ch == eof { } else if ch == eof {
return ILLEGAL, buf.String() return ILLEGAL, buf.String(), val
} else if ch == '\n' { } else if ch == '\n' {
tok = REGION tok = REGION
buf.WriteRune(ch) buf.WriteRune(ch)
@ -482,23 +482,23 @@ func (s *Scanner) scanString(chp ...rune) (tok Token, lit string) {
} }
} }
if _, err := time.ParseDuration(buf.String()); err == nil { if val, err := time.ParseDuration(buf.String()); err == nil {
return DURATION, buf.String() return DURATION, buf.String(), val
} }
if _, err := time.Parse("2006-01-02", buf.String()); err == nil { if val, err := time.Parse("2006-01-02", buf.String()); err == nil {
return DATE, buf.String() return DATE, buf.String(), val
} }
if _, err := time.Parse(time.RFC3339, buf.String()); err == nil { if val, err := time.Parse(time.RFC3339, buf.String()); err == nil {
return TIME, buf.String() return TIME, buf.String(), val
} }
return tok, buf.String() return tok, buf.String(), val
} }
func (s *Scanner) scanRegexp(chp ...rune) (tok Token, lit string) { func (s *Scanner) scanRegexp(chp ...rune) (tok Token, lit string, val interface{}) {
tok = IDENT tok = IDENT
@ -512,7 +512,7 @@ func (s *Scanner) scanRegexp(chp ...rune) (tok Token, lit string) {
if ch := s.next(); ch == chp[0] { if ch := s.next(); ch == chp[0] {
break break
} else if ch == eof { } else if ch == eof {
return ILLEGAL, buf.String() return ILLEGAL, buf.String(), val
} else if ch == '\\' { } else if ch == '\\' {
chn := s.next() chn := s.next()
buf.WriteRune(ch) buf.WriteRune(ch)
@ -522,15 +522,15 @@ func (s *Scanner) scanRegexp(chp ...rune) (tok Token, lit string) {
} }
} }
if _, err := regexp.Compile(buf.String()); err == nil { if val, err := regexp.Compile(buf.String()); err == nil {
return REGEX, buf.String() return REGEX, buf.String(), val
} }
return tok, buf.String() return tok, buf.String(), val
} }
func (s *Scanner) scanObject(chp ...rune) (tok Token, lit string) { func (s *Scanner) scanObject(chp ...rune) (tok Token, lit string, val interface{}) {
beg := chp[0] beg := chp[0]
end := beg end := beg
@ -566,11 +566,11 @@ func (s *Scanner) scanObject(chp ...rune) (tok Token, lit string) {
sub-- sub--
buf.WriteRune(ch) buf.WriteRune(ch)
} else if ch == eof { } else if ch == eof {
return ILLEGAL, buf.String() return ILLEGAL, buf.String(), val
} else if ch == '\\' { } else if ch == '\\' {
switch chn := s.next(); chn { switch chn := s.next(); chn {
default: default:
return ILLEGAL, buf.String() return ILLEGAL, buf.String(), val
case 'b', 't', 'r', 'n', 'f', '"', '\\': case 'b', 't', 'r', 'n', 'f', '"', '\\':
buf.WriteRune(ch) buf.WriteRune(ch)
buf.WriteRune(chn) buf.WriteRune(chn)
@ -581,13 +581,13 @@ func (s *Scanner) scanObject(chp ...rune) (tok Token, lit string) {
} }
if beg == '{' { if beg == '{' {
return JSON, buf.String() return JSON, buf.String(), val
} }
if beg == '[' { if beg == '[' {
return ARRAY, buf.String() return ARRAY, buf.String(), val
} }
return ILLEGAL, buf.String() return ILLEGAL, buf.String(), val
} }

View file

@ -60,6 +60,10 @@ func (p *Parser) contains(search string, strings []string) bool {
func (p *Parser) declare(tok Token, lit string) (interface{}, error) { func (p *Parser) declare(tok Token, lit string) (interface{}, error) {
if val := p.hold(tok); val != nil {
return val, nil
}
switch tok { switch tok {
case TRUE: case TRUE: