surrealpatch/sql/scanner.go

406 lines
7.9 KiB
Go
Raw Normal View History

2016-02-26 17:27:07 +00:00
// Copyright © 2016 Abcum Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package sql
import (
"bufio"
"bytes"
"encoding/json"
"io"
"strconv"
"strings"
"time"
)
// Scanner represents a lexical scanner.
type Scanner struct {
r *bufio.Reader
}
// NewScanner returns a new instance of Scanner.
func NewScanner(r io.Reader) *Scanner {
return &Scanner{r: bufio.NewReader(r)}
}
// Scan returns the next token and literal value.
func (s *Scanner) Scan() (tok Token, lit string) {
// Read the next rune.
ch := s.read()
// If we see whitespace then consume all contiguous whitespace.
if isWhitespace(ch) {
s.unread()
return s.scanWhitespace()
}
// If we see a letter then consume as an string.
if isLetter(ch) {
s.unread()
return s.scanIdent()
}
// If we see a number then consume as a number.
if isNumber(ch) {
s.unread()
return s.scanNumber()
}
// Otherwise read the individual character.
switch ch {
case eof:
return EOF, ""
case '*':
return ALL, string(ch)
case '@':
return EAT, string(ch)
case ',':
return COMMA, string(ch)
case '.':
chn := s.read()
s.unread()
if isNumber(chn) {
return s.scanNumber()
}
return DOT, string(ch)
case '"':
s.unread()
return s.scanString()
case '\'':
s.unread()
return s.scanString()
case '`':
s.unread()
return s.scanQuoted()
case '{':
s.unread()
return s.scanSpecial()
case '[':
s.unread()
return s.scanSpecial()
case ':':
return COLON, string(ch)
case ';':
return SEMICOLON, string(ch)
case '(':
return LPAREN, string(ch)
case ')':
return RPAREN, string(ch)
case '=':
return EQ, string(ch)
case '+':
if chn := s.read(); chn == '=' {
return INC, "+="
}
s.unread()
return ADD, string(ch)
case '-':
if chn := s.read(); chn == '=' {
return DEC, "-="
}
s.unread()
return SUB, string(ch)
case '!':
if chn := s.read(); chn == '=' {
return NEQ, "!="
}
s.unread()
case '<':
if chn := s.read(); chn == '=' {
return LTE, "<="
}
s.unread()
return LT, string(ch)
case '>':
if chn := s.read(); chn == '=' {
return GTE, ">="
}
s.unread()
return GT, string(ch)
}
return ILLEGAL, string(ch)
}
// scanWhitespace consumes the current rune and all contiguous whitespace.
func (s *Scanner) scanWhitespace() (tok Token, lit string) {
// Create a buffer and read the current character into it.
var buf bytes.Buffer
buf.WriteRune(s.read())
// Read every subsequent whitespace character into the buffer.
// Non-whitespace characters and EOF will cause the loop to exit.
for {
if ch := s.read(); ch == eof {
break
} else if !isWhitespace(ch) {
s.unread()
break
} else {
buf.WriteRune(ch)
}
}
return WS, buf.String()
}
// scanIdent consumes the current rune and all contiguous ident runes.
func (s *Scanner) scanIdent() (tok Token, lit string) {
// Create a buffer and read the current character into it.
var buf bytes.Buffer
buf.WriteRune(s.read())
// Read every subsequent ident character into the buffer.
// Non-ident characters and EOF will cause the loop to exit.
for {
if ch := s.read(); ch == eof {
break
} else if !isIdentChar(ch) {
s.unread()
break
} else {
buf.WriteRune(ch)
}
}
// If the string matches a keyword then return that keyword.
if tok := keywords[strings.ToUpper(buf.String())]; tok > 0 {
return tok, buf.String()
}
// Otherwise return as a regular identifier.
return IDENT, buf.String()
}
func (s *Scanner) scanQuoted() (Token, string) {
tok, lit := s.scanString()
if is(tok, STRING) {
return IDENT, lit
}
return tok, lit
}
func (s *Scanner) scanString() (tok Token, lit string) {
tok = STRING
var buf bytes.Buffer
char := s.read()
for {
if ch := s.read(); ch == char {
break
} else if ch == eof {
return ILLEGAL, buf.String()
} else if ch == '\n' {
tok = REGION
buf.WriteRune(ch)
} else if ch == '\\' {
chn := s.read()
if chn == 'n' {
buf.WriteRune('\n')
} else if chn == '\\' {
buf.WriteRune('\\')
} else if chn == '"' {
buf.WriteRune('"')
} else if chn == '\'' {
buf.WriteRune('\'')
} else if chn == '`' {
buf.WriteRune('`')
} else {
return ILLEGAL, buf.String()
}
} else {
buf.WriteRune(ch)
}
}
return tok, buf.String()
}
func (s *Scanner) scanNumber() (tok Token, lit string) {
tok = NUMBER
// Create a buffer and read the current character into it.
var buf bytes.Buffer
buf.WriteRune(s.read())
// Read every subsequent ident character into the buffer.
// Non-ident characters and EOF will cause the loop to exit.
for {
if ch := s.read(); ch == eof {
break
} else if ch == '.' {
if tok == DOUBLE {
tok = IDENT
}
if tok == NUMBER {
tok = DOUBLE
}
buf.WriteRune(ch)
} else if ch == '+' {
buf.WriteRune(ch)
} else if ch == '-' {
buf.WriteRune(ch)
} else if ch == ':' {
buf.WriteRune(ch)
} else if ch == 'T' {
buf.WriteRune(ch)
} else if isLetter(ch) {
tok = IDENT
buf.WriteRune(ch)
} else if !isNumber(ch) {
s.unread()
break
} else {
buf.WriteRune(ch)
}
}
if _, err := time.Parse("2006-01-02", buf.String()); err == nil {
return DATE, buf.String()
}
if _, err := time.Parse(time.RFC3339, buf.String()); err == nil {
return TIME, buf.String()
}
if _, err := time.Parse(time.RFC3339Nano, buf.String()); err == nil {
return NANO, buf.String()
}
return tok, buf.String()
}
func (s *Scanner) scanSpecial() (tok Token, lit string) {
tok = IDENT
var buf bytes.Buffer
beg := s.read()
end := beg
if beg == '{' {
end = '}'
}
if beg == '[' {
end = ']'
}
for {
if ch := s.read(); ch == end {
break
} else if ch == eof {
return ILLEGAL, buf.String()
} else if ch == '\n' {
tok = REGION
buf.WriteRune(ch)
} else if ch == '\\' {
chn := s.read()
if chn == 'n' {
tok = REGION
buf.WriteRune('\n')
} else if chn == '\\' {
buf.WriteRune('\\')
} else if chn == '"' {
buf.WriteRune('"')
} else if chn == '\'' {
buf.WriteRune('\'')
} else if chn == '`' {
buf.WriteRune('`')
} else {
break
}
} else {
buf.WriteRune(ch)
}
}
var f interface{}
j := []byte(string(beg) + buf.String() + string(end))
err := json.Unmarshal(j, &f)
if err == nil {
return JSON, string(beg) + buf.String() + string(end)
}
return tok, buf.String()
}
// read reads the next rune from the bufferred reader.
// Returns the rune(0) if an error occurs (or io.EOF is returned).
func (s *Scanner) read() rune {
ch, _, err := s.r.ReadRune()
if err != nil {
return eof
}
return ch
}
// unread places the previously read rune back on the reader.
func (s *Scanner) unread() {
_ = s.r.UnreadRune()
}
func number(lit string) (i int64) {
i, _ = strconv.ParseInt(lit, 10, 64)
return
}
// isWhitespace returns true if the rune is a space, tab, or newline.
func isWhitespace(ch rune) bool {
return ch == ' ' || ch == '\t' || ch == '\n'
}
// isLetter returns true if the rune is a letter.
func isLetter(ch rune) bool {
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
}
// isNumber returns true if the rune is a number.
func isNumber(ch rune) bool {
return (ch >= '0' && ch <= '9')
}
// isSeparator returns true if the rune is a separator expression.
func isSeparator(ch rune) bool {
return (ch == '.')
}
// isIdentChar returns true if the rune can be used in an unquoted identifier.
func isIdentChar(ch rune) bool {
return isLetter(ch) || isNumber(ch) || isSeparator(ch) || ch == '_'
}
// eof represents a marker rune for the end of the reader.
var eof = rune(0)