surrealpatch/sql/scanner.go

// Copyright © 2016 Abcum Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package sql

import (
	"bufio"
	"bytes"
	"encoding/json"
	"io"
	"strconv"
	"strings"
	"time"
)

// Scanner represents a lexical scanner.
type Scanner struct {
	r *bufio.Reader
}

// NewScanner returns a new instance of Scanner.
func NewScanner(r io.Reader) *Scanner {
	return &Scanner{r: bufio.NewReader(r)}
}

// Scan returns the next token and literal value.
func (s *Scanner) Scan() (tok Token, lit string) {

	// Read the next rune.
	ch := s.read()

	// If we see whitespace then consume all contiguous whitespace.
	if isWhitespace(ch) {
		s.unread()
		return s.scanWhitespace()
	}

	// If we see a letter then consume as an string.
	if isLetter(ch) {
		s.unread()
		return s.scanIdent()
	}

	// If we see a number then consume as a number.
	if isNumber(ch) {
		s.unread()
		return s.scanNumber()
	}

	// Otherwise read the individual character.
	switch ch {

	case eof:
		return EOF, ""
	case '*':
		return ALL, string(ch)
	case '@':
		return EAT, string(ch)
	case ',':
		return COMMA, string(ch)
	case '.':
		chn := s.read()
		s.unread()
		if isNumber(chn) {
			return s.scanNumber()
		}
		return DOT, string(ch)
	case '"':
		s.unread()
		return s.scanString()
	case '\'':
		s.unread()
		return s.scanString()
	case '`':
		s.unread()
		return s.scanQuoted()
	case '{':
		s.unread()
		return s.scanSpecial()
	case '[':
		s.unread()
		return s.scanSpecial()
	case ':':
		return COLON, string(ch)
	case ';':
		return SEMICOLON, string(ch)
	case '(':
		return LPAREN, string(ch)
	case ')':
		return RPAREN, string(ch)
	case '=':
		return EQ, string(ch)
	case '+':
		if chn := s.read(); chn == '=' {
			return INC, "+="
		}
		s.unread()
		return ADD, string(ch)
	case '-':
		if chn := s.read(); chn == '=' {
			return DEC, "-="
		}
		s.unread()
		return SUB, string(ch)
	case '!':
		if chn := s.read(); chn == '=' {
			return NEQ, "!="
		}
		s.unread()
	case '<':
		if chn := s.read(); chn == '=' {
			return LTE, "<="
		}
		s.unread()
		return LT, string(ch)
	case '>':
		if chn := s.read(); chn == '=' {
			return GTE, ">="
		}
		s.unread()
		return GT, string(ch)

	}

	return ILLEGAL, string(ch)

}

// scanWhitespace consumes the current rune and all contiguous whitespace.
func (s *Scanner) scanWhitespace() (tok Token, lit string) {

	// Create a buffer and read the current character into it.
	var buf bytes.Buffer
	buf.WriteRune(s.read())

	// Read every subsequent whitespace character into the buffer.
	// Non-whitespace characters and EOF will cause the loop to exit.
	for {
		if ch := s.read(); ch == eof {
			break
		} else if !isWhitespace(ch) {
			s.unread()
			break
		} else {
			buf.WriteRune(ch)
		}
	}

	return WS, buf.String()

}

// scanIdent consumes the current rune and all contiguous ident runes.
func (s *Scanner) scanIdent() (tok Token, lit string) {

	// Create a buffer and read the current character into it.
	var buf bytes.Buffer
	buf.WriteRune(s.read())

	// Read every subsequent ident character into the buffer.
	// Non-ident characters and EOF will cause the loop to exit.
	for {
		if ch := s.read(); ch == eof {
			break
		} else if !isIdentChar(ch) {
			s.unread()
			break
		} else {
			buf.WriteRune(ch)
		}
	}

	// If the string matches a keyword then return that keyword.
	if tok := keywords[strings.ToUpper(buf.String())]; tok > 0 {
		return tok, buf.String()
	}

	// Otherwise return as a regular identifier.
	return IDENT, buf.String()

}

func (s *Scanner) scanQuoted() (Token, string) {

	tok, lit := s.scanString()

	if is(tok, STRING) {
		return IDENT, lit
	}

	return tok, lit

}

func (s *Scanner) scanString() (tok Token, lit string) {

	tok = STRING

	var buf bytes.Buffer

	char := s.read()

	for {
		if ch := s.read(); ch == char {
			break
		} else if ch == eof {
			return ILLEGAL, buf.String()
		} else if ch == '\n' {
			tok = REGION
			buf.WriteRune(ch)
		} else if ch == '\\' {
			chn := s.read()
			if chn == 'n' {
				buf.WriteRune('\n')
			} else if chn == '\\' {
				buf.WriteRune('\\')
			} else if chn == '"' {
				buf.WriteRune('"')
			} else if chn == '\'' {
				buf.WriteRune('\'')
			} else if chn == '`' {
				buf.WriteRune('`')
			} else {
				return ILLEGAL, buf.String()
			}
		} else {
			buf.WriteRune(ch)
		}
	}

	return tok, buf.String()

}

func (s *Scanner) scanNumber() (tok Token, lit string) {

	tok = NUMBER

	// Create a buffer and read the current character into it.
	var buf bytes.Buffer
	buf.WriteRune(s.read())

	// Read every subsequent ident character into the buffer.
	// Non-ident characters and EOF will cause the loop to exit.
	for {
		if ch := s.read(); ch == eof {
			break
		} else if ch == '.' {
			if tok == DOUBLE {
				tok = IDENT
			}
			if tok == NUMBER {
				tok = DOUBLE
			}
			buf.WriteRune(ch)
		} else if ch == '+' {
			buf.WriteRune(ch)
		} else if ch == '-' {
			buf.WriteRune(ch)
		} else if ch == ':' {
			buf.WriteRune(ch)
		} else if ch == 'T' {
			buf.WriteRune(ch)
		} else if isLetter(ch) {
			tok = IDENT
			buf.WriteRune(ch)
		} else if !isNumber(ch) {
			s.unread()
			break
		} else {
			buf.WriteRune(ch)
		}
	}

	if _, err := time.Parse("2006-01-02", buf.String()); err == nil {
		return DATE, buf.String()
	}

	if _, err := time.Parse(time.RFC3339, buf.String()); err == nil {
		return TIME, buf.String()
	}

	if _, err := time.Parse(time.RFC3339Nano, buf.String()); err == nil {
		return NANO, buf.String()
	}

	return tok, buf.String()

}

func (s *Scanner) scanSpecial() (tok Token, lit string) {

	tok = IDENT

	var buf bytes.Buffer

	beg := s.read()
	end := beg

	if beg == '{' {
		end = '}'
	}

	if beg == '[' {
		end = ']'
	}

	for {
		if ch := s.read(); ch == end {
			break
		} else if ch == eof {
			return ILLEGAL, buf.String()
		} else if ch == '\n' {
			tok = REGION
			buf.WriteRune(ch)
		} else if ch == '\\' {
			chn := s.read()
			if chn == 'n' {
				tok = REGION
				buf.WriteRune('\n')
			} else if chn == '\\' {
				buf.WriteRune('\\')
			} else if chn == '"' {
				buf.WriteRune('"')
			} else if chn == '\'' {
				buf.WriteRune('\'')
			} else if chn == '`' {
				buf.WriteRune('`')
			} else {
				break
			}
		} else {
			buf.WriteRune(ch)
		}
	}

	var f interface{}
	j := []byte(string(beg) + buf.String() + string(end))
	err := json.Unmarshal(j, &f)
	if err == nil {
		return JSON, string(beg) + buf.String() + string(end)
	}

	return tok, buf.String()

}

// read reads the next rune from the bufferred reader.
// Returns the rune(0) if an error occurs (or io.EOF is returned).
func (s *Scanner) read() rune {
	ch, _, err := s.r.ReadRune()
	if err != nil {
		return eof
	}
	return ch
}

// unread places the previously read rune back on the reader.
func (s *Scanner) unread() {
	_ = s.r.UnreadRune()
}

func number(lit string) (i int64) {
	i, _ = strconv.ParseInt(lit, 10, 64)
	return
}

// isWhitespace returns true if the rune is a space, tab, or newline.
func isWhitespace(ch rune) bool {
	return ch == ' ' || ch == '\t' || ch == '\n'
}

// isLetter returns true if the rune is a letter.
func isLetter(ch rune) bool {
	return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
}

// isNumber returns true if the rune is a number.
func isNumber(ch rune) bool {
	return (ch >= '0' && ch <= '9')
}

// isSeparator returns true if the rune is a separator expression.
func isSeparator(ch rune) bool {
	return (ch == '.')
}

// isIdentChar returns true if the rune can be used in an unquoted identifier.
func isIdentChar(ch rune) bool {
	return isLetter(ch) || isNumber(ch) || isSeparator(ch) || ch == '_'
}

// eof represents a marker rune for the end of the reader.
var eof = rune(0)
Add initial sql parser code 2016-02-26 17:27:07 +00:00			`// Copyright © 2016 Abcum Ltd`
			`//`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`//`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`

			`package sql`

			`import (`
			`"bufio"`
			`"bytes"`
			`"encoding/json"`
			`"io"`
			`"strconv"`
			`"strings"`
			`"time"`
			`)`

			`// Scanner represents a lexical scanner.`
			`type Scanner struct {`
			`r *bufio.Reader`
			`}`

			`// NewScanner returns a new instance of Scanner.`
			`func NewScanner(r io.Reader) *Scanner {`
			`return &Scanner{r: bufio.NewReader(r)}`
			`}`

			`// Scan returns the next token and literal value.`
			`func (s *Scanner) Scan() (tok Token, lit string) {`

			`// Read the next rune.`
			`ch := s.read()`

			`// If we see whitespace then consume all contiguous whitespace.`
			`if isWhitespace(ch) {`
			`s.unread()`
			`return s.scanWhitespace()`
			`}`

			`// If we see a letter then consume as an string.`
			`if isLetter(ch) {`
			`s.unread()`
			`return s.scanIdent()`
			`}`

			`// If we see a number then consume as a number.`
			`if isNumber(ch) {`
			`s.unread()`
			`return s.scanNumber()`
			`}`

			`// Otherwise read the individual character.`
			`switch ch {`

			`case eof:`
			`return EOF, ""`
			`case '*':`
			`return ALL, string(ch)`
			`case '@':`
			`return EAT, string(ch)`
			`case ',':`
			`return COMMA, string(ch)`
			`case '.':`
			`chn := s.read()`
			`s.unread()`
			`if isNumber(chn) {`
			`return s.scanNumber()`
			`}`
			`return DOT, string(ch)`
			`case '"':`
			`s.unread()`
			`return s.scanString()`
			`case '\'':`
			`s.unread()`
			`return s.scanString()`
			case '`':
			`s.unread()`
			`return s.scanQuoted()`
			`case '{':`
			`s.unread()`
			`return s.scanSpecial()`
			`case '[':`
			`s.unread()`
			`return s.scanSpecial()`
			`case ':':`
			`return COLON, string(ch)`
			`case ';':`
			`return SEMICOLON, string(ch)`
			`case '(':`
			`return LPAREN, string(ch)`
			`case ')':`
			`return RPAREN, string(ch)`
			`case '=':`
			`return EQ, string(ch)`
			`case '+':`
			`if chn := s.read(); chn == '=' {`
			`return INC, "+="`
			`}`
			`s.unread()`
			`return ADD, string(ch)`
			`case '-':`
			`if chn := s.read(); chn == '=' {`
			`return DEC, "-="`
			`}`
			`s.unread()`
			`return SUB, string(ch)`
			`case '!':`
			`if chn := s.read(); chn == '=' {`
			`return NEQ, "!="`
			`}`
			`s.unread()`
			`case '<':`
			`if chn := s.read(); chn == '=' {`
			`return LTE, "<="`
			`}`
			`s.unread()`
			`return LT, string(ch)`
			`case '>':`
			`if chn := s.read(); chn == '=' {`
			`return GTE, ">="`
			`}`
			`s.unread()`
			`return GT, string(ch)`

			`}`

			`return ILLEGAL, string(ch)`

			`}`

			`// scanWhitespace consumes the current rune and all contiguous whitespace.`
			`func (s *Scanner) scanWhitespace() (tok Token, lit string) {`

			`// Create a buffer and read the current character into it.`
			`var buf bytes.Buffer`
			`buf.WriteRune(s.read())`

			`// Read every subsequent whitespace character into the buffer.`
			`// Non-whitespace characters and EOF will cause the loop to exit.`
			`for {`
			`if ch := s.read(); ch == eof {`
			`break`
			`} else if !isWhitespace(ch) {`
			`s.unread()`
			`break`
			`} else {`
			`buf.WriteRune(ch)`
			`}`
			`}`

			`return WS, buf.String()`

			`}`

			`// scanIdent consumes the current rune and all contiguous ident runes.`
			`func (s *Scanner) scanIdent() (tok Token, lit string) {`

			`// Create a buffer and read the current character into it.`
			`var buf bytes.Buffer`
			`buf.WriteRune(s.read())`

			`// Read every subsequent ident character into the buffer.`
			`// Non-ident characters and EOF will cause the loop to exit.`
			`for {`
			`if ch := s.read(); ch == eof {`
			`break`
			`} else if !isIdentChar(ch) {`
			`s.unread()`
			`break`
			`} else {`
			`buf.WriteRune(ch)`
			`}`
			`}`

			`// If the string matches a keyword then return that keyword.`
			`if tok := keywords[strings.ToUpper(buf.String())]; tok > 0 {`
			`return tok, buf.String()`
			`}`

			`// Otherwise return as a regular identifier.`
			`return IDENT, buf.String()`

			`}`

			`func (s *Scanner) scanQuoted() (Token, string) {`

			`tok, lit := s.scanString()`

			`if is(tok, STRING) {`
			`return IDENT, lit`
			`}`

			`return tok, lit`

			`}`

			`func (s *Scanner) scanString() (tok Token, lit string) {`

			`tok = STRING`

			`var buf bytes.Buffer`

			`char := s.read()`

			`for {`
			`if ch := s.read(); ch == char {`
			`break`
			`} else if ch == eof {`
			`return ILLEGAL, buf.String()`
			`} else if ch == '\n' {`
			`tok = REGION`
			`buf.WriteRune(ch)`
			`} else if ch == '\\' {`
			`chn := s.read()`
			`if chn == 'n' {`
			`buf.WriteRune('\n')`
			`} else if chn == '\\' {`
			`buf.WriteRune('\\')`
			`} else if chn == '"' {`
			`buf.WriteRune('"')`
			`} else if chn == '\'' {`
			`buf.WriteRune('\'')`
			} else if chn == '`' {
			buf.WriteRune('`')
			`} else {`
			`return ILLEGAL, buf.String()`
			`}`
			`} else {`
			`buf.WriteRune(ch)`
			`}`
			`}`

			`return tok, buf.String()`

			`}`

			`func (s *Scanner) scanNumber() (tok Token, lit string) {`

			`tok = NUMBER`

			`// Create a buffer and read the current character into it.`
			`var buf bytes.Buffer`
			`buf.WriteRune(s.read())`

			`// Read every subsequent ident character into the buffer.`
			`// Non-ident characters and EOF will cause the loop to exit.`
			`for {`
			`if ch := s.read(); ch == eof {`
			`break`
			`} else if ch == '.' {`
			`if tok == DOUBLE {`
			`tok = IDENT`
			`}`
			`if tok == NUMBER {`
			`tok = DOUBLE`
			`}`
			`buf.WriteRune(ch)`
			`} else if ch == '+' {`
			`buf.WriteRune(ch)`
			`} else if ch == '-' {`
			`buf.WriteRune(ch)`
			`} else if ch == ':' {`
			`buf.WriteRune(ch)`
			`} else if ch == 'T' {`
			`buf.WriteRune(ch)`
			`} else if isLetter(ch) {`
			`tok = IDENT`
			`buf.WriteRune(ch)`
			`} else if !isNumber(ch) {`
			`s.unread()`
			`break`
			`} else {`
			`buf.WriteRune(ch)`
			`}`
			`}`

			`if _, err := time.Parse("2006-01-02", buf.String()); err == nil {`
			`return DATE, buf.String()`
			`}`

			`if _, err := time.Parse(time.RFC3339, buf.String()); err == nil {`
			`return TIME, buf.String()`
			`}`

			`if _, err := time.Parse(time.RFC3339Nano, buf.String()); err == nil {`
			`return NANO, buf.String()`
			`}`

			`return tok, buf.String()`

			`}`

			`func (s *Scanner) scanSpecial() (tok Token, lit string) {`

			`tok = IDENT`

			`var buf bytes.Buffer`

			`beg := s.read()`
			`end := beg`

			`if beg == '{' {`
			`end = '}'`
			`}`

			`if beg == '[' {`
			`end = ']'`
			`}`

			`for {`
			`if ch := s.read(); ch == end {`
			`break`
			`} else if ch == eof {`
			`return ILLEGAL, buf.String()`
			`} else if ch == '\n' {`
			`tok = REGION`
			`buf.WriteRune(ch)`
			`} else if ch == '\\' {`
			`chn := s.read()`
			`if chn == 'n' {`
			`tok = REGION`
			`buf.WriteRune('\n')`
			`} else if chn == '\\' {`
			`buf.WriteRune('\\')`
			`} else if chn == '"' {`
			`buf.WriteRune('"')`
			`} else if chn == '\'' {`
			`buf.WriteRune('\'')`
			} else if chn == '`' {
			buf.WriteRune('`')
			`} else {`
			`break`
			`}`
			`} else {`
			`buf.WriteRune(ch)`
			`}`
			`}`

			`var f interface{}`
			`j := []byte(string(beg) + buf.String() + string(end))`
			`err := json.Unmarshal(j, &f)`
			`if err == nil {`
			`return JSON, string(beg) + buf.String() + string(end)`
			`}`

			`return tok, buf.String()`

			`}`

			`// read reads the next rune from the bufferred reader.`
			`// Returns the rune(0) if an error occurs (or io.EOF is returned).`
			`func (s *Scanner) read() rune {`
			`ch, _, err := s.r.ReadRune()`
			`if err != nil {`
			`return eof`
			`}`
			`return ch`
			`}`

			`// unread places the previously read rune back on the reader.`
			`func (s *Scanner) unread() {`
			`_ = s.r.UnreadRune()`
			`}`

			`func number(lit string) (i int64) {`
			`i, _ = strconv.ParseInt(lit, 10, 64)`
			`return`
			`}`

			`// isWhitespace returns true if the rune is a space, tab, or newline.`
			`func isWhitespace(ch rune) bool {`
			`return ch == ' ' \|\| ch == '\t' \|\| ch == '\n'`
			`}`

			`// isLetter returns true if the rune is a letter.`
			`func isLetter(ch rune) bool {`
			`return (ch >= 'a' && ch <= 'z') \|\| (ch >= 'A' && ch <= 'Z')`
			`}`

			`// isNumber returns true if the rune is a number.`
			`func isNumber(ch rune) bool {`
			`return (ch >= '0' && ch <= '9')`
			`}`

			`// isSeparator returns true if the rune is a separator expression.`
			`func isSeparator(ch rune) bool {`
			`return (ch == '.')`
			`}`

			`// isIdentChar returns true if the rune can be used in an unquoted identifier.`
			`func isIdentChar(ch rune) bool {`
			`return isLetter(ch) \|\| isNumber(ch) \|\| isSeparator(ch) \|\| ch == '_'`
			`}`

			`// eof represents a marker rune for the end of the reader.`
			`var eof = rune(0)`