406 lines
7.9 KiB
Go
406 lines
7.9 KiB
Go
|
// Copyright © 2016 Abcum Ltd
|
||
|
//
|
||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
// you may not use this file except in compliance with the License.
|
||
|
// You may obtain a copy of the License at
|
||
|
//
|
||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||
|
//
|
||
|
// Unless required by applicable law or agreed to in writing, software
|
||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
// See the License for the specific language governing permissions and
|
||
|
// limitations under the License.
|
||
|
|
||
|
package sql
|
||
|
|
||
|
import (
|
||
|
"bufio"
|
||
|
"bytes"
|
||
|
"encoding/json"
|
||
|
"io"
|
||
|
"strconv"
|
||
|
"strings"
|
||
|
"time"
|
||
|
)
|
||
|
|
||
|
// Scanner represents a lexical scanner.
|
||
|
type Scanner struct {
|
||
|
r *bufio.Reader
|
||
|
}
|
||
|
|
||
|
// NewScanner returns a new instance of Scanner.
|
||
|
func NewScanner(r io.Reader) *Scanner {
|
||
|
return &Scanner{r: bufio.NewReader(r)}
|
||
|
}
|
||
|
|
||
|
// Scan returns the next token and literal value.
|
||
|
func (s *Scanner) Scan() (tok Token, lit string) {
|
||
|
|
||
|
// Read the next rune.
|
||
|
ch := s.read()
|
||
|
|
||
|
// If we see whitespace then consume all contiguous whitespace.
|
||
|
if isWhitespace(ch) {
|
||
|
s.unread()
|
||
|
return s.scanWhitespace()
|
||
|
}
|
||
|
|
||
|
// If we see a letter then consume as an string.
|
||
|
if isLetter(ch) {
|
||
|
s.unread()
|
||
|
return s.scanIdent()
|
||
|
}
|
||
|
|
||
|
// If we see a number then consume as a number.
|
||
|
if isNumber(ch) {
|
||
|
s.unread()
|
||
|
return s.scanNumber()
|
||
|
}
|
||
|
|
||
|
// Otherwise read the individual character.
|
||
|
switch ch {
|
||
|
|
||
|
case eof:
|
||
|
return EOF, ""
|
||
|
case '*':
|
||
|
return ALL, string(ch)
|
||
|
case '@':
|
||
|
return EAT, string(ch)
|
||
|
case ',':
|
||
|
return COMMA, string(ch)
|
||
|
case '.':
|
||
|
chn := s.read()
|
||
|
s.unread()
|
||
|
if isNumber(chn) {
|
||
|
return s.scanNumber()
|
||
|
}
|
||
|
return DOT, string(ch)
|
||
|
case '"':
|
||
|
s.unread()
|
||
|
return s.scanString()
|
||
|
case '\'':
|
||
|
s.unread()
|
||
|
return s.scanString()
|
||
|
case '`':
|
||
|
s.unread()
|
||
|
return s.scanQuoted()
|
||
|
case '{':
|
||
|
s.unread()
|
||
|
return s.scanSpecial()
|
||
|
case '[':
|
||
|
s.unread()
|
||
|
return s.scanSpecial()
|
||
|
case ':':
|
||
|
return COLON, string(ch)
|
||
|
case ';':
|
||
|
return SEMICOLON, string(ch)
|
||
|
case '(':
|
||
|
return LPAREN, string(ch)
|
||
|
case ')':
|
||
|
return RPAREN, string(ch)
|
||
|
case '=':
|
||
|
return EQ, string(ch)
|
||
|
case '+':
|
||
|
if chn := s.read(); chn == '=' {
|
||
|
return INC, "+="
|
||
|
}
|
||
|
s.unread()
|
||
|
return ADD, string(ch)
|
||
|
case '-':
|
||
|
if chn := s.read(); chn == '=' {
|
||
|
return DEC, "-="
|
||
|
}
|
||
|
s.unread()
|
||
|
return SUB, string(ch)
|
||
|
case '!':
|
||
|
if chn := s.read(); chn == '=' {
|
||
|
return NEQ, "!="
|
||
|
}
|
||
|
s.unread()
|
||
|
case '<':
|
||
|
if chn := s.read(); chn == '=' {
|
||
|
return LTE, "<="
|
||
|
}
|
||
|
s.unread()
|
||
|
return LT, string(ch)
|
||
|
case '>':
|
||
|
if chn := s.read(); chn == '=' {
|
||
|
return GTE, ">="
|
||
|
}
|
||
|
s.unread()
|
||
|
return GT, string(ch)
|
||
|
|
||
|
}
|
||
|
|
||
|
return ILLEGAL, string(ch)
|
||
|
|
||
|
}
|
||
|
|
||
|
// scanWhitespace consumes the current rune and all contiguous whitespace.
|
||
|
func (s *Scanner) scanWhitespace() (tok Token, lit string) {
|
||
|
|
||
|
// Create a buffer and read the current character into it.
|
||
|
var buf bytes.Buffer
|
||
|
buf.WriteRune(s.read())
|
||
|
|
||
|
// Read every subsequent whitespace character into the buffer.
|
||
|
// Non-whitespace characters and EOF will cause the loop to exit.
|
||
|
for {
|
||
|
if ch := s.read(); ch == eof {
|
||
|
break
|
||
|
} else if !isWhitespace(ch) {
|
||
|
s.unread()
|
||
|
break
|
||
|
} else {
|
||
|
buf.WriteRune(ch)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return WS, buf.String()
|
||
|
|
||
|
}
|
||
|
|
||
|
// scanIdent consumes the current rune and all contiguous ident runes.
|
||
|
func (s *Scanner) scanIdent() (tok Token, lit string) {
|
||
|
|
||
|
// Create a buffer and read the current character into it.
|
||
|
var buf bytes.Buffer
|
||
|
buf.WriteRune(s.read())
|
||
|
|
||
|
// Read every subsequent ident character into the buffer.
|
||
|
// Non-ident characters and EOF will cause the loop to exit.
|
||
|
for {
|
||
|
if ch := s.read(); ch == eof {
|
||
|
break
|
||
|
} else if !isIdentChar(ch) {
|
||
|
s.unread()
|
||
|
break
|
||
|
} else {
|
||
|
buf.WriteRune(ch)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// If the string matches a keyword then return that keyword.
|
||
|
if tok := keywords[strings.ToUpper(buf.String())]; tok > 0 {
|
||
|
return tok, buf.String()
|
||
|
}
|
||
|
|
||
|
// Otherwise return as a regular identifier.
|
||
|
return IDENT, buf.String()
|
||
|
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) scanQuoted() (Token, string) {
|
||
|
|
||
|
tok, lit := s.scanString()
|
||
|
|
||
|
if is(tok, STRING) {
|
||
|
return IDENT, lit
|
||
|
}
|
||
|
|
||
|
return tok, lit
|
||
|
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) scanString() (tok Token, lit string) {
|
||
|
|
||
|
tok = STRING
|
||
|
|
||
|
var buf bytes.Buffer
|
||
|
|
||
|
char := s.read()
|
||
|
|
||
|
for {
|
||
|
if ch := s.read(); ch == char {
|
||
|
break
|
||
|
} else if ch == eof {
|
||
|
return ILLEGAL, buf.String()
|
||
|
} else if ch == '\n' {
|
||
|
tok = REGION
|
||
|
buf.WriteRune(ch)
|
||
|
} else if ch == '\\' {
|
||
|
chn := s.read()
|
||
|
if chn == 'n' {
|
||
|
buf.WriteRune('\n')
|
||
|
} else if chn == '\\' {
|
||
|
buf.WriteRune('\\')
|
||
|
} else if chn == '"' {
|
||
|
buf.WriteRune('"')
|
||
|
} else if chn == '\'' {
|
||
|
buf.WriteRune('\'')
|
||
|
} else if chn == '`' {
|
||
|
buf.WriteRune('`')
|
||
|
} else {
|
||
|
return ILLEGAL, buf.String()
|
||
|
}
|
||
|
} else {
|
||
|
buf.WriteRune(ch)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return tok, buf.String()
|
||
|
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) scanNumber() (tok Token, lit string) {
|
||
|
|
||
|
tok = NUMBER
|
||
|
|
||
|
// Create a buffer and read the current character into it.
|
||
|
var buf bytes.Buffer
|
||
|
buf.WriteRune(s.read())
|
||
|
|
||
|
// Read every subsequent ident character into the buffer.
|
||
|
// Non-ident characters and EOF will cause the loop to exit.
|
||
|
for {
|
||
|
if ch := s.read(); ch == eof {
|
||
|
break
|
||
|
} else if ch == '.' {
|
||
|
if tok == DOUBLE {
|
||
|
tok = IDENT
|
||
|
}
|
||
|
if tok == NUMBER {
|
||
|
tok = DOUBLE
|
||
|
}
|
||
|
buf.WriteRune(ch)
|
||
|
} else if ch == '+' {
|
||
|
buf.WriteRune(ch)
|
||
|
} else if ch == '-' {
|
||
|
buf.WriteRune(ch)
|
||
|
} else if ch == ':' {
|
||
|
buf.WriteRune(ch)
|
||
|
} else if ch == 'T' {
|
||
|
buf.WriteRune(ch)
|
||
|
} else if isLetter(ch) {
|
||
|
tok = IDENT
|
||
|
buf.WriteRune(ch)
|
||
|
} else if !isNumber(ch) {
|
||
|
s.unread()
|
||
|
break
|
||
|
} else {
|
||
|
buf.WriteRune(ch)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if _, err := time.Parse("2006-01-02", buf.String()); err == nil {
|
||
|
return DATE, buf.String()
|
||
|
}
|
||
|
|
||
|
if _, err := time.Parse(time.RFC3339, buf.String()); err == nil {
|
||
|
return TIME, buf.String()
|
||
|
}
|
||
|
|
||
|
if _, err := time.Parse(time.RFC3339Nano, buf.String()); err == nil {
|
||
|
return NANO, buf.String()
|
||
|
}
|
||
|
|
||
|
return tok, buf.String()
|
||
|
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) scanSpecial() (tok Token, lit string) {
|
||
|
|
||
|
tok = IDENT
|
||
|
|
||
|
var buf bytes.Buffer
|
||
|
|
||
|
beg := s.read()
|
||
|
end := beg
|
||
|
|
||
|
if beg == '{' {
|
||
|
end = '}'
|
||
|
}
|
||
|
|
||
|
if beg == '[' {
|
||
|
end = ']'
|
||
|
}
|
||
|
|
||
|
for {
|
||
|
if ch := s.read(); ch == end {
|
||
|
break
|
||
|
} else if ch == eof {
|
||
|
return ILLEGAL, buf.String()
|
||
|
} else if ch == '\n' {
|
||
|
tok = REGION
|
||
|
buf.WriteRune(ch)
|
||
|
} else if ch == '\\' {
|
||
|
chn := s.read()
|
||
|
if chn == 'n' {
|
||
|
tok = REGION
|
||
|
buf.WriteRune('\n')
|
||
|
} else if chn == '\\' {
|
||
|
buf.WriteRune('\\')
|
||
|
} else if chn == '"' {
|
||
|
buf.WriteRune('"')
|
||
|
} else if chn == '\'' {
|
||
|
buf.WriteRune('\'')
|
||
|
} else if chn == '`' {
|
||
|
buf.WriteRune('`')
|
||
|
} else {
|
||
|
break
|
||
|
}
|
||
|
} else {
|
||
|
buf.WriteRune(ch)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
var f interface{}
|
||
|
j := []byte(string(beg) + buf.String() + string(end))
|
||
|
err := json.Unmarshal(j, &f)
|
||
|
if err == nil {
|
||
|
return JSON, string(beg) + buf.String() + string(end)
|
||
|
}
|
||
|
|
||
|
return tok, buf.String()
|
||
|
|
||
|
}
|
||
|
|
||
|
// read reads the next rune from the bufferred reader.
|
||
|
// Returns the rune(0) if an error occurs (or io.EOF is returned).
|
||
|
func (s *Scanner) read() rune {
|
||
|
ch, _, err := s.r.ReadRune()
|
||
|
if err != nil {
|
||
|
return eof
|
||
|
}
|
||
|
return ch
|
||
|
}
|
||
|
|
||
|
// unread places the previously read rune back on the reader.
|
||
|
func (s *Scanner) unread() {
|
||
|
_ = s.r.UnreadRune()
|
||
|
}
|
||
|
|
||
|
func number(lit string) (i int64) {
|
||
|
i, _ = strconv.ParseInt(lit, 10, 64)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// isWhitespace returns true if the rune is a space, tab, or newline.
|
||
|
func isWhitespace(ch rune) bool {
|
||
|
return ch == ' ' || ch == '\t' || ch == '\n'
|
||
|
}
|
||
|
|
||
|
// isLetter returns true if the rune is a letter.
|
||
|
func isLetter(ch rune) bool {
|
||
|
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
|
||
|
}
|
||
|
|
||
|
// isNumber returns true if the rune is a number.
|
||
|
func isNumber(ch rune) bool {
|
||
|
return (ch >= '0' && ch <= '9')
|
||
|
}
|
||
|
|
||
|
// isSeparator returns true if the rune is a separator expression.
|
||
|
func isSeparator(ch rune) bool {
|
||
|
return (ch == '.')
|
||
|
}
|
||
|
|
||
|
// isIdentChar returns true if the rune can be used in an unquoted identifier.
|
||
|
func isIdentChar(ch rune) bool {
|
||
|
return isLetter(ch) || isNumber(ch) || isSeparator(ch) || ch == '_'
|
||
|
}
|
||
|
|
||
|
// eof represents a marker rune for the end of the reader.
|
||
|
var eof = rune(0)
|