surrealpatch/sql/scanner.go

1058 lines
19 KiB
Go
Raw Normal View History

2016-02-26 17:27:07 +00:00
// Copyright © 2016 Abcum Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package sql
import (
"bufio"
"bytes"
"io"
2016-09-06 13:30:59 +00:00
"regexp"
"strconv"
2016-02-26 17:27:07 +00:00
"strings"
"time"
)
// scanner represents a lexical scanner.
type scanner struct {
b []rune // any runes before
a []rune // any runes after
2016-02-26 17:27:07 +00:00
r *bufio.Reader
}
// newScanner returns a new instance of Scanner.
func newScanner(r io.Reader) *scanner {
return &scanner{r: bufio.NewReader(r)}
2016-02-26 17:27:07 +00:00
}
// scan returns the next token and literal value.
func (s *scanner) scan() (tok Token, lit string, val interface{}) {
2016-02-26 17:27:07 +00:00
// Read the next rune.
2016-09-06 13:30:59 +00:00
ch := s.next()
2016-02-26 17:27:07 +00:00
// If we see whitespace then consume all contiguous whitespace.
2016-09-07 15:58:50 +00:00
if isBlank(ch) {
2016-09-06 13:30:59 +00:00
return s.scanBlank(ch)
2016-02-26 17:27:07 +00:00
}
2016-09-07 15:58:50 +00:00
// If we see a letter then consume as a string.
2016-02-26 17:27:07 +00:00
if isLetter(ch) {
2017-11-16 20:53:13 +00:00
return s.scanIdiom(ch)
2016-02-26 17:27:07 +00:00
}
// If we see a number then consume as a number.
if isNumber(ch) {
2016-09-06 13:30:59 +00:00
return s.scanNumber(ch)
2016-02-26 17:27:07 +00:00
}
// Otherwise read the individual character.
switch ch {
case eof:
return EOF, "", val
2016-02-26 17:27:07 +00:00
case '*':
return MUL, string(ch), val
2016-09-06 13:30:59 +00:00
case '×':
return MUL, string(ch), val
2016-09-06 13:30:59 +00:00
case '∙':
return MUL, string(ch), val
2016-09-06 13:30:59 +00:00
case '÷':
return DIV, string(ch), val
2016-02-26 17:27:07 +00:00
case ',':
return COMMA, string(ch), val
2016-02-26 17:27:07 +00:00
case '.':
return DOT, string(ch), val
case '@':
return s.scanThing(ch)
2016-02-26 17:27:07 +00:00
case '"':
2016-09-06 13:30:59 +00:00
return s.scanString(ch)
2016-02-26 17:27:07 +00:00
case '\'':
2016-09-06 13:30:59 +00:00
return s.scanString(ch)
2016-02-26 17:27:07 +00:00
case '`':
2016-09-06 13:30:59 +00:00
return s.scanQuoted(ch)
2016-05-23 12:32:02 +00:00
case '⟨':
2016-09-06 13:30:59 +00:00
return s.scanQuoted(ch)
2016-02-26 17:27:07 +00:00
case '{':
2016-09-06 13:30:59 +00:00
return s.scanObject(ch)
2016-02-26 17:27:07 +00:00
case '[':
2016-09-06 13:30:59 +00:00
return s.scanObject(ch)
case '$':
return s.scanParams(ch)
2016-02-26 17:27:07 +00:00
case ':':
return COLON, string(ch), val
2016-02-26 17:27:07 +00:00
case ';':
return SEMICOLON, string(ch), val
2016-02-26 17:27:07 +00:00
case '(':
return LPAREN, string(ch), val
2016-02-26 17:27:07 +00:00
case ')':
return RPAREN, string(ch), val
2016-09-06 13:30:59 +00:00
case '¬':
return NEQ, string(ch), val
2016-09-06 13:30:59 +00:00
case '≤':
return LTE, string(ch), val
2016-09-06 13:30:59 +00:00
case '≥':
return GTE, string(ch), val
2016-09-06 13:30:59 +00:00
case '~':
return MAT, string(ch), val
2016-09-06 13:30:59 +00:00
case '∋':
return SIN, string(ch), val
2016-09-06 13:30:59 +00:00
case '∌':
return SNI, string(ch), val
2016-09-06 13:30:59 +00:00
case '⊇':
return CONTAINSALL, string(ch), val
2016-09-06 13:30:59 +00:00
case '⊃':
return CONTAINSSOME, string(ch), val
2016-09-06 13:30:59 +00:00
case '⊅':
return CONTAINSNONE, string(ch), val
2016-09-06 13:30:59 +00:00
case '∈':
return INS, string(ch), val
2016-09-06 13:30:59 +00:00
case '∉':
return NIS, string(ch), val
2016-09-06 13:30:59 +00:00
case '⊆':
return ALLCONTAINEDIN, string(ch), val
2016-09-06 13:30:59 +00:00
case '⊂':
return SOMECONTAINEDIN, string(ch), val
2016-09-06 13:30:59 +00:00
case '⊄':
return NONECONTAINEDIN, string(ch), val
2016-09-06 13:30:59 +00:00
case '#':
return s.scanCommentSingle(ch)
case '|':
chn := s.next()
switch {
case chn == '|':
return OR, "OR", val
default:
s.undo()
return s.scanModel(ch)
}
case '&':
chn := s.next()
switch {
case chn == '&':
return AND, "AND", val
default:
s.undo()
}
2016-07-21 21:45:35 +00:00
case '/':
chn := s.next()
switch {
case chn == '/':
return s.scanCommentSingle(ch)
2016-07-21 21:45:35 +00:00
case chn == '*':
return s.scanCommentMultiple(ch)
case isNumber(chn):
s.undo()
return DIV, string(ch), val
2016-09-06 13:30:59 +00:00
case chn == ' ':
s.undo()
return DIV, string(ch), val
2016-07-21 21:45:35 +00:00
default:
2016-09-06 13:30:59 +00:00
s.undo()
2016-07-21 21:45:35 +00:00
return s.scanRegexp(ch)
2016-07-04 10:37:37 +00:00
}
2016-09-06 13:30:59 +00:00
case '=':
chn := s.next()
switch {
case chn == '~':
return MAT, "=~", val
2016-09-06 13:30:59 +00:00
case chn == '=':
return EEQ, "==", val
2016-09-06 13:30:59 +00:00
default:
s.undo()
return EQ, string(ch), val
2016-02-26 17:27:07 +00:00
}
2016-09-06 13:30:59 +00:00
case '?':
chn := s.next()
switch {
case chn == '=':
return ANY, "?=", val
case chn == '~':
return MAY, "?~", val
2016-09-06 13:30:59 +00:00
default:
s.undo()
return QMARK, string(ch), val
2016-02-26 17:27:07 +00:00
}
2016-09-06 13:30:59 +00:00
case '!':
chn := s.next()
switch {
case chn == '=':
if s.next() == '=' {
return NEE, "!==", val
2016-09-06 13:30:59 +00:00
} else {
s.undo()
return NEQ, "!=", val
2016-07-04 10:37:37 +00:00
}
2016-09-06 13:30:59 +00:00
case chn == '~':
return NAT, "!~", val
2016-09-06 13:30:59 +00:00
default:
s.undo()
return EXC, string(ch), val
2016-07-04 10:37:37 +00:00
}
2016-09-06 13:30:59 +00:00
case '+':
chn := s.next()
switch {
case chn == '=':
return INC, "+=", val
2016-09-06 13:30:59 +00:00
case isNumber(chn):
return s.scanNumber(ch, chn)
default:
s.undo()
return ADD, string(ch), val
2016-09-06 13:30:59 +00:00
}
case '-':
chn := s.next()
switch {
case chn == '=':
return DEC, "-=", val
2016-09-06 13:30:59 +00:00
case chn == '>':
return OEDGE, "->", val
2016-09-06 13:30:59 +00:00
case chn == '-':
return s.scanCommentSingle(ch)
case isNumber(chn):
return s.scanNumber(ch, chn)
default:
s.undo()
return SUB, string(ch), val
2016-02-26 17:27:07 +00:00
}
case '>':
2016-09-06 13:30:59 +00:00
chn := s.next()
switch {
case chn == '=':
return GTE, ">=", val
2016-09-06 13:30:59 +00:00
default:
s.undo()
return GT, string(ch), val
2016-09-06 13:30:59 +00:00
}
case '<':
chn := s.next()
switch {
case chn == '>':
return NEQ, "<>", val
2016-09-06 13:30:59 +00:00
case chn == '=':
return LTE, "<=", val
2016-09-06 13:30:59 +00:00
case chn == '-':
if s.next() == '>' {
return BEDGE, "<->", val
2016-09-06 13:30:59 +00:00
} else {
s.undo()
return IEDGE, "<-", val
2016-09-06 13:30:59 +00:00
}
default:
s.undo()
return LT, string(ch), val
2016-02-26 17:27:07 +00:00
}
}
return ILLEGAL, string(ch), val
2016-02-26 17:27:07 +00:00
}
2016-09-06 13:30:59 +00:00
// scanBlank consumes the current rune and all contiguous whitespace.
func (s *scanner) scanBlank(chp ...rune) (tok Token, lit string, val interface{}) {
2016-09-06 13:30:59 +00:00
tok = WS
2016-02-26 17:27:07 +00:00
2016-09-06 13:30:59 +00:00
// Create a buffer
2016-02-26 17:27:07 +00:00
var buf bytes.Buffer
2016-09-06 13:30:59 +00:00
// Read passed in runes
for _, ch := range chp {
buf.WriteRune(ch)
}
// Read subsequent characters
2016-02-26 17:27:07 +00:00
for {
2016-09-06 13:30:59 +00:00
if ch := s.next(); ch == eof {
2016-02-26 17:27:07 +00:00
break
2016-09-07 15:58:50 +00:00
} else if !isBlank(ch) {
2016-09-06 13:30:59 +00:00
s.undo()
2016-02-26 17:27:07 +00:00
break
} else {
buf.WriteRune(ch)
}
}
return tok, buf.String(), val
2016-09-06 13:30:59 +00:00
}
// scanCommentSingle consumes the current rune and all contiguous whitespace.
func (s *scanner) scanCommentSingle(chp ...rune) (tok Token, lit string, val interface{}) {
2016-09-06 13:30:59 +00:00
tok = WS
// Create a buffer
var buf bytes.Buffer
// Read passed in runes
for _, ch := range chp {
buf.WriteRune(ch)
}
// Read subsequent characters
for {
if ch := s.next(); ch == eof {
break
} else if ch == '\n' || ch == '\r' {
2016-09-06 13:30:59 +00:00
buf.WriteRune(ch)
break
} else {
buf.WriteRune(ch)
}
}
return tok, buf.String(), val
2016-09-06 13:30:59 +00:00
}
// scanCommentMultiple consumes the current rune and all contiguous whitespace.
func (s *scanner) scanCommentMultiple(chp ...rune) (tok Token, lit string, val interface{}) {
2016-09-06 13:30:59 +00:00
tok = WS
// Create a buffer
var buf bytes.Buffer
// Read passed in runes
for _, ch := range chp {
buf.WriteRune(ch)
}
// Read subsequent characters
for {
if ch := s.next(); ch == eof {
break
} else if ch == '*' {
if chn := s.next(); chn == '/' {
buf.WriteRune(chn)
break
}
buf.WriteRune(ch)
} else {
buf.WriteRune(ch)
}
}
return tok, buf.String(), val
2016-09-06 13:30:59 +00:00
}
func (s *scanner) scanParams(chp ...rune) (tok Token, lit string, val interface{}) {
2016-09-06 13:30:59 +00:00
2017-11-16 20:53:13 +00:00
tok, lit, _ = s.scanIdiom()
if is(tok, THING) {
2017-11-16 20:53:13 +00:00
return ILLEGAL, lit, val
}
2016-09-06 13:30:59 +00:00
if is(tok, REGION) {
2016-09-14 21:22:18 +00:00
return ILLEGAL, lit, val
2016-09-06 13:30:59 +00:00
}
if is(tok, ILLEGAL) {
2016-09-14 21:22:18 +00:00
return ILLEGAL, lit, val
}
return PARAM, lit, val
}
func (s *scanner) scanQuoted(chp ...rune) (tok Token, lit string, val interface{}) {
2017-11-16 20:53:13 +00:00
var tbv string
var idv interface{}
// Create a buffer
var buf bytes.Buffer
tok, lit, _ = s.scanString(chp...)
if is(tok, REGION) {
2017-11-16 20:53:13 +00:00
return ILLEGAL, lit, val
}
if is(tok, ILLEGAL) {
2017-11-16 20:53:13 +00:00
return ILLEGAL, lit, val
}
if ch := s.next(); ch == ':' {
tbv = lit
buf.WriteString(lit)
buf.WriteRune(ch)
if tok, lit, idv = s.part(); tok == ILLEGAL {
buf.WriteString(lit)
return ILLEGAL, buf.String(), val
} else {
buf.WriteString(lit)
}
return THING, buf.String(), NewThing(tbv, idv)
} else if ch != eof {
s.undo()
}
return IDENT, lit, val
}
func (s *scanner) scanSection(chp ...rune) (tok Token, lit string, val interface{}) {
2016-09-14 21:22:18 +00:00
tok, lit, _ = s.scanString(chp...)
if is(tok, REGION) {
2016-09-14 21:22:18 +00:00
return ILLEGAL, lit, val
}
if is(tok, ILLEGAL) {
2016-09-14 21:22:18 +00:00
return ILLEGAL, lit, val
}
return IDENT, lit, val
2016-02-26 17:27:07 +00:00
}
// scanIdent consumes the current rune and all contiguous ident runes.
func (s *scanner) scanIdent(chp ...rune) (tok Token, lit string, val interface{}) {
2016-09-06 13:30:59 +00:00
tok = IDENT
2016-02-26 17:27:07 +00:00
2016-09-06 13:30:59 +00:00
// Create a buffer
2016-02-26 17:27:07 +00:00
var buf bytes.Buffer
2016-09-06 13:30:59 +00:00
// Read passed in runes
for _, ch := range chp {
buf.WriteRune(ch)
}
2017-11-16 20:53:13 +00:00
// Read subsequent characters
for {
if ch := s.next(); ch == eof {
break
} else if isIdentChar(ch) {
buf.WriteRune(ch)
} else {
s.undo()
break
}
}
// If the string matches a keyword then return that keyword.
if tok := keywords[strings.ToUpper(buf.String())]; tok > 0 {
return tok, buf.String(), val
}
if val, err := time.ParseDuration(buf.String()); err == nil {
return DURATION, buf.String(), val
}
// Otherwise return as a regular identifier.
return tok, buf.String(), val
}
// scanIdiom consumes the current rune and all contiguous ident runes.
func (s *scanner) scanIdiom(chp ...rune) (tok Token, lit string, val interface{}) {
tok = IDENT
var tbv string
var idv interface{}
// Create a buffer
var buf bytes.Buffer
// Read passed in runes
for _, ch := range chp {
buf.WriteRune(ch)
}
2016-09-06 13:30:59 +00:00
// Read subsequent characters
2016-02-26 17:27:07 +00:00
for {
2016-09-06 13:30:59 +00:00
if ch := s.next(); ch == eof {
2016-02-26 17:27:07 +00:00
break
} else if isIdentChar(ch) {
buf.WriteRune(ch)
} else if isExprsChar(ch) {
tok = EXPR
buf.WriteRune(ch)
2017-11-16 20:53:13 +00:00
} else if ch == ':' {
if tok == EXPR {
s.undo()
break
}
tbv = buf.String()
buf.WriteRune(ch)
if tok, lit, idv = s.part(); tok == ILLEGAL {
buf.WriteString(lit)
return ILLEGAL, buf.String(), val
} else {
buf.WriteString(lit)
}
return THING, buf.String(), NewThing(tbv, idv)
} else {
2016-09-06 13:30:59 +00:00
s.undo()
2016-02-26 17:27:07 +00:00
break
}
}
// If the string matches a keyword then return that keyword.
if tok := keywords[strings.ToUpper(buf.String())]; tok > 0 {
return tok, buf.String(), val
2016-02-26 17:27:07 +00:00
}
if val, err := time.ParseDuration(buf.String()); err == nil {
return DURATION, buf.String(), val
2016-05-23 12:32:02 +00:00
}
2016-02-26 17:27:07 +00:00
// Otherwise return as a regular identifier.
return tok, buf.String(), val
2016-02-26 17:27:07 +00:00
}
// scanThing consumes the current rune and all contiguous ident runes.
func (s *scanner) scanThing(chp ...rune) (tok Token, lit string, val interface{}) {
tok = THING
2017-03-02 10:47:10 +00:00
var tbv string
var idv interface{}
// Create a buffer
var buf bytes.Buffer
// Read passed in runes
for _, ch := range chp {
buf.WriteRune(ch)
}
if tok, tbv, _ = s.part(); tok == ILLEGAL {
buf.WriteString(tbv)
return ILLEGAL, buf.String(), val
} else {
buf.WriteString(tbv)
}
if ch := s.next(); ch == ':' {
buf.WriteRune(ch)
} else {
return ILLEGAL, buf.String(), val
}
if tok, lit, idv = s.part(); tok == ILLEGAL {
buf.WriteString(lit)
return ILLEGAL, buf.String(), val
} else {
buf.WriteString(lit)
}
return THING, buf.String(), NewThing(tbv, idv)
}
func (s *scanner) scanModel(chp ...rune) (tok Token, lit string, val interface{}) {
var com bool
var dot bool
var tbv string
var min float64 = 0
var inc float64 = 0
var max float64 = 0
// Create a buffer
var buf bytes.Buffer
// Read passed in runes
for _, ch := range chp {
buf.WriteRune(ch)
}
if tok, tbv, _ = s.part(); tok == ILLEGAL {
buf.WriteString(tbv)
return ILLEGAL, buf.String(), val
} else {
buf.WriteString(tbv)
}
if ch := s.next(); ch == ':' {
buf.WriteRune(ch)
} else {
return ILLEGAL, buf.String(), val
}
if ch := s.next(); isSignal(ch) {
tok, lit, _ = s.scanSignal(ch)
buf.WriteString(lit)
max, _ = strconv.ParseFloat(lit, 64)
} else {
return ILLEGAL, buf.String(), val
}
if ch := s.next(); ch == ',' {
com = true
buf.WriteRune(ch)
if ch := s.next(); isSignal(ch) {
tok, lit, _ = s.scanSignal(ch)
buf.WriteString(lit)
inc, _ = strconv.ParseFloat(lit, 64)
} else {
return ILLEGAL, buf.String(), val
}
} else {
s.undo()
}
if ch := s.next(); ch == '.' {
dot = true
buf.WriteRune(ch)
if ch := s.next(); ch == '.' {
buf.WriteRune(ch)
if ch := s.next(); isSignal(ch) {
tok, lit, _ = s.scanSignal(ch)
buf.WriteString(lit)
min = max
max, _ = strconv.ParseFloat(lit, 64)
} else {
return ILLEGAL, buf.String(), val
}
} else {
return ILLEGAL, buf.String(), val
}
} else {
s.undo()
}
if ch := s.next(); ch == '|' {
buf.WriteRune(ch)
} else {
return ILLEGAL, buf.String(), val
}
// If the minimum value is the
// same as the maximum value then
// error, as there is no ability
// to increment or decrement.
if min == max {
return ILLEGAL, buf.String(), val
}
// If we have a comma, but the
// value is below zero, we will
// error as this will cause an
// infinite loop in db.
if com == true && inc <= 0 {
return ILLEGAL, buf.String(), val
}
// If we have a min, and a max
// with .. notation, but no `inc`
// is specified, set the `inc` to
// a default of `1`.
if dot == true && inc <= 0 {
inc = 1
}
// If we have a comma, but no
// max value is specified then
// error, as we need a max with
// incrementing integer ids.
if com == true && dot == false {
return ILLEGAL, buf.String(), val
}
return MODEL, buf.String(), NewModel(tbv, min, inc, max)
}
func (s *scanner) scanSignal(chp ...rune) (tok Token, lit string, val interface{}) {
// Create a buffer
var buf bytes.Buffer
// Read passed in runes
for _, ch := range chp {
buf.WriteRune(ch)
}
// Read subsequent characters
for {
if ch := s.next(); ch == eof {
break
} else if isNumber(ch) {
buf.WriteRune(ch)
} else if ch == '.' {
if s.next() == '.' {
s.undo()
s.undo()
break
} else {
s.undo()
buf.WriteRune(ch)
}
} else {
s.undo()
break
}
}
return NUMBER, buf.String(), nil
}
func (s *scanner) scanNumber(chp ...rune) (tok Token, lit string, val interface{}) {
2016-05-23 12:32:02 +00:00
tok = NUMBER
2016-09-06 13:30:59 +00:00
// Create a buffer
2016-05-23 12:32:02 +00:00
var buf bytes.Buffer
2016-09-06 13:30:59 +00:00
// Read passed in runes
for _, ch := range chp {
buf.WriteRune(ch)
}
// Read subsequent characters
2016-05-23 12:32:02 +00:00
for {
2016-09-06 13:30:59 +00:00
if ch := s.next(); ch == eof {
2016-05-23 12:32:02 +00:00
break
} else if isNumber(ch) {
buf.WriteRune(ch)
} else if isLetter(ch) {
if tok == NUMBER || tok == DOUBLE {
tok = IDENT
buf.WriteRune(ch)
switch ch {
2017-11-16 20:53:13 +00:00
case 'e', 'E':
if chn := s.next(); chn == '+' {
tok = DOUBLE
buf.WriteRune(chn)
} else if ch == '-' {
tok = DOUBLE
buf.WriteRune(chn)
} else {
s.undo()
}
case 's', 'h', 'd', 'w':
tok = DURATION
case 'n', 'u', 'µ', 'm':
if chn := s.next(); chn == 's' {
tok = DURATION
buf.WriteRune(chn)
} else if ch == 'm' {
tok = DURATION
s.undo()
} else {
s.undo()
}
}
} else {
tok = IDENT
buf.WriteRune(ch)
}
2016-05-23 12:32:02 +00:00
} else if ch == '.' {
if tok == DOUBLE {
tok = IDENT
}
if tok == NUMBER {
tok = DOUBLE
}
buf.WriteRune(ch)
} else {
2016-09-06 13:30:59 +00:00
s.undo()
2016-05-23 12:32:02 +00:00
break
}
}
return tok, buf.String(), nil
2016-05-23 12:32:02 +00:00
}
func (s *scanner) scanString(chp ...rune) (tok Token, lit string, val interface{}) {
2016-02-26 17:27:07 +00:00
2016-09-06 13:30:59 +00:00
beg := chp[0]
2016-05-23 12:32:02 +00:00
end := beg
if beg == '"' {
end = '"'
}
if beg == '`' {
end = '`'
}
if beg == '⟨' {
end = '⟩'
}
2016-02-26 17:27:07 +00:00
2016-09-06 13:30:59 +00:00
tok = STRING
// Create a buffer
var buf bytes.Buffer
// Ignore passed in runes
// Read subsequent characters
2016-02-26 17:27:07 +00:00
for {
2016-09-06 13:30:59 +00:00
if ch := s.next(); ch == end {
2016-02-26 17:27:07 +00:00
break
} else if ch == eof {
return ILLEGAL, buf.String(), val
2016-02-26 17:27:07 +00:00
} else if ch == '\n' {
tok = REGION
buf.WriteRune(ch)
2016-05-23 12:32:02 +00:00
} else if ch == '\r' {
tok = REGION
buf.WriteRune(ch)
2016-02-26 17:27:07 +00:00
} else if ch == '\\' {
2016-09-06 13:30:59 +00:00
switch chn := s.next(); chn {
2016-05-23 12:32:02 +00:00
default:
buf.WriteRune(chn)
case 'b':
continue
case 't':
2016-09-06 13:30:59 +00:00
tok = REGION
2016-05-23 12:32:02 +00:00
buf.WriteRune('\t')
case 'r':
tok = REGION
buf.WriteRune('\r')
case 'n':
tok = REGION
2016-02-26 17:27:07 +00:00
buf.WriteRune('\n')
}
} else {
buf.WriteRune(ch)
}
}
2016-09-14 20:57:42 +00:00
if val, err := time.Parse(RFCDate, buf.String()); err == nil {
return DATE, buf.String(), val.UTC()
2016-02-26 17:27:07 +00:00
}
2016-09-14 20:57:42 +00:00
if val, err := time.Parse(RFCTime, buf.String()); err == nil {
return TIME, buf.String(), val.UTC()
}
return tok, buf.String(), val
2016-02-26 17:27:07 +00:00
}
func (s *scanner) scanRegexp(chp ...rune) (tok Token, lit string, val interface{}) {
2016-02-26 17:27:07 +00:00
tok = IDENT
2016-09-06 13:30:59 +00:00
// Create a buffer
2016-02-26 17:27:07 +00:00
var buf bytes.Buffer
2016-09-06 13:30:59 +00:00
// Ignore passed in runes
// Read subsequent characters
for {
if ch := s.next(); ch == chp[0] {
break
} else if ch == eof {
return ILLEGAL, buf.String(), val
2016-09-06 13:30:59 +00:00
} else if ch == '\\' {
chn := s.next()
buf.WriteRune(ch)
buf.WriteRune(chn)
} else {
buf.WriteRune(ch)
}
}
if _, err := regexp.Compile(buf.String()); err == nil {
return REGEX, buf.String(), nil
2016-09-06 13:30:59 +00:00
}
return tok, buf.String(), val
2016-09-06 13:30:59 +00:00
}
func (s *scanner) scanObject(chp ...rune) (tok Token, lit string, val interface{}) {
2016-09-06 13:30:59 +00:00
beg := chp[0]
2016-02-26 17:27:07 +00:00
end := beg
2016-05-23 12:32:02 +00:00
sub := 0
qut := 0
2016-02-26 17:27:07 +00:00
if beg == '{' {
end = '}'
tok = JSON
2016-02-26 17:27:07 +00:00
}
if beg == '[' {
end = ']'
tok = ARRAY
2016-02-26 17:27:07 +00:00
}
2016-09-06 13:30:59 +00:00
// Create a buffer
var buf bytes.Buffer
// Read passed in runes
for _, ch := range chp {
buf.WriteRune(ch)
}
// Read subsequent characters
2016-02-26 17:27:07 +00:00
for {
if ch := s.next(); ch == end && sub == 0 && qut == 0 {
2016-09-06 13:30:59 +00:00
buf.WriteRune(ch)
2016-02-26 17:27:07 +00:00
break
2016-05-23 12:32:02 +00:00
} else if ch == beg {
sub++
buf.WriteRune(ch)
} else if ch == end {
sub--
buf.WriteRune(ch)
2016-02-26 17:27:07 +00:00
} else if ch == eof {
return ILLEGAL, buf.String(), val
} else if ch == '"' {
if qut == 0 {
qut++
} else {
qut--
}
buf.WriteRune(ch)
2016-02-26 17:27:07 +00:00
} else if ch == '\\' {
2016-09-06 13:30:59 +00:00
switch chn := s.next(); chn {
2016-05-23 12:32:02 +00:00
default:
return ILLEGAL, buf.String(), val
2016-05-23 12:32:02 +00:00
case 'b', 't', 'r', 'n', 'f', '"', '\\':
buf.WriteRune(ch)
buf.WriteRune(chn)
2016-02-26 17:27:07 +00:00
}
} else {
buf.WriteRune(ch)
}
}
return tok, buf.String(), val
2016-02-26 17:27:07 +00:00
}
func (s *scanner) part() (tok Token, lit string, val interface{}) {
if ch := s.next(); isLetter(ch) {
tok, lit, _ = s.scanIdent(ch)
} else if isNumber(ch) {
tok, lit, _ = s.scanNumber(ch)
} else if ch == '`' {
2017-11-16 20:53:13 +00:00
tok, lit, _ = s.scanSection(ch)
} else if ch == '⟨' {
2017-11-16 20:53:13 +00:00
tok, lit, _ = s.scanSection(ch)
} else {
s.undo()
tok = ILLEGAL
}
2017-11-16 20:53:13 +00:00
if tok != IDENT && tok != NUMBER && tok != DOUBLE {
tok = ILLEGAL
}
if val == nil {
val = lit
}
return
}
2016-09-06 13:30:59 +00:00
// next reads the next rune from the bufferred reader.
2016-02-26 17:27:07 +00:00
// Returns the rune(0) if an error occurs (or io.EOF is returned).
func (s *scanner) next() rune {
2016-09-06 13:30:59 +00:00
if len(s.a) > 0 {
2016-09-06 13:30:59 +00:00
var r rune
r, s.a = s.a[len(s.a)-1], s.a[:len(s.a)-1]
s.b = append(s.b, r)
2016-09-06 13:30:59 +00:00
return r
}
r, _, err := s.r.ReadRune()
2016-02-26 17:27:07 +00:00
if err != nil {
return eof
}
s.b = append(s.b, r)
2016-09-06 13:30:59 +00:00
return r
2016-02-26 17:27:07 +00:00
}
2016-09-06 13:30:59 +00:00
// undo places the previously read rune back on the reader.
func (s *scanner) undo() {
2016-09-06 13:30:59 +00:00
if len(s.b) > 0 {
2016-09-06 13:30:59 +00:00
var r rune
r, s.b = s.b[len(s.b)-1], s.b[:len(s.b)-1]
s.a = append(s.a, r)
2016-09-06 13:30:59 +00:00
return
}
2016-02-26 17:27:07 +00:00
_ = s.r.UnreadRune()
2016-09-06 13:30:59 +00:00
2016-02-26 17:27:07 +00:00
}
2016-09-07 15:58:50 +00:00
// isBlank returns true if the rune is a space, tab, or newline.
func isBlank(ch rune) bool {
2016-05-23 12:32:02 +00:00
return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
2016-02-26 17:27:07 +00:00
}
// isNumber returns true if the rune is a number.
func isNumber(ch rune) bool {
return (ch >= '0' && ch <= '9')
}
// isSignal returns true if the rune is a number.
func isSignal(ch rune) bool {
return (ch >= '0' && ch <= '9') || ch == '-' || ch == '+'
}
2016-09-06 13:30:59 +00:00
// isLetter returns true if the rune is a letter.
func isLetter(ch rune) bool {
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == 'µ'
2016-02-26 17:27:07 +00:00
}
2016-09-06 13:30:59 +00:00
// isIdentChar returns true if the rune is allowed in a IDENT.
2016-02-26 17:27:07 +00:00
func isIdentChar(ch rune) bool {
return isLetter(ch) || isNumber(ch) || ch == '_'
2016-02-26 17:27:07 +00:00
}
2016-09-14 21:22:18 +00:00
// isThingChar returns true if the rune is allowed in a THING.
func isThingChar(ch rune) bool {
return isLetter(ch) || isNumber(ch) || ch == '_'
}
// isExprsChar returns true if the rune is allowed in a IDENT.
func isExprsChar(ch rune) bool {
2018-04-20 22:51:42 +00:00
return isLetter(ch) || isNumber(ch) || ch == '.' || ch == '_' || ch == '*' || ch == '[' || ch == '$' || ch == ']'
}
2016-02-26 17:27:07 +00:00
// eof represents a marker rune for the end of the reader.
var eof = rune(0)