// Copyright © 2016 Abcum Ltd // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package sql import ( "bufio" "bytes" "io" "regexp" "strings" "time" ) // Scanner represents a lexical scanner. type Scanner struct { b []rune // any runes before a []rune // any runes after p *Parser r *bufio.Reader } // NewScanner returns a new instance of Scanner. func NewScanner(p *Parser, r io.Reader) *Scanner { return &Scanner{p: p, r: bufio.NewReader(r)} } // Scan returns the next token and literal value. func (s *Scanner) Scan() (tok Token, lit string, val interface{}) { // Read the next rune. ch := s.next() // If we see whitespace then consume all contiguous whitespace. if isBlank(ch) { return s.scanBlank(ch) } // If we see a letter then consume as a string. if isLetter(ch) { return s.scanIdent(ch) } // If we see a number then consume as a number. if isNumber(ch) { return s.scanNumber(ch) } // Otherwise read the individual character. switch ch { case eof: return EOF, "", val case '*': return ALL, string(ch), val case '×': return MUL, string(ch), val case '∙': return MUL, string(ch), val case '÷': return DIV, string(ch), val case ',': return COMMA, string(ch), val case '.': return DOT, string(ch), val case '@': return s.scanThing(ch) case '"': return s.scanString(ch) case '\'': return s.scanString(ch) case '`': return s.scanQuoted(ch) case '⟨': return s.scanQuoted(ch) case '{': return s.scanObject(ch) case '[': return s.scanObject(ch) case '$': return s.scanParams(ch) case ':': return COLON, string(ch), val case ';': return SEMICOLON, string(ch), val case '(': return LPAREN, string(ch), val case ')': return RPAREN, string(ch), val case '¬': return NEQ, string(ch), val case '≤': return LTE, string(ch), val case '≥': return GTE, string(ch), val case '~': return SIN, string(ch), val case '∋': return SIN, string(ch), val case '∌': return SNI, string(ch), val case '⊇': return CONTAINSALL, string(ch), val case '⊃': return CONTAINSSOME, string(ch), val case '⊅': return CONTAINSNONE, string(ch), val case '∈': return INS, string(ch), val case '∉': return NIS, string(ch), val case '⊆': return ALLCONTAINEDIN, string(ch), val case '⊂': return SOMECONTAINEDIN, string(ch), val case '⊄': return NONECONTAINEDIN, string(ch), val case '#': return s.scanCommentSingle(ch) case '/': chn := s.next() switch { case chn == '*': return s.scanCommentMultiple(ch) case chn == ' ': s.undo() return DIV, string(ch), val default: s.undo() return s.scanRegexp(ch) } case '=': chn := s.next() switch { case chn == '~': return SIN, "=~", val case chn == '=': return EEQ, "==", val default: s.undo() return EQ, string(ch), val } case '?': chn := s.next() switch { case chn == '=': return ANY, "?=", val default: s.undo() return QMARK, string(ch), val } case '!': chn := s.next() switch { case chn == '=': if s.next() == '=' { return NEE, "!==", val } else { s.undo() return NEQ, "!=", val } case chn == '~': return SNI, "!~", val default: s.undo() return EXC, string(ch), val } case '+': chn := s.next() switch { case chn == '=': return INC, "+=", val case isNumber(chn): return s.scanNumber(ch, chn) default: s.undo() return ADD, string(ch), val } case '-': chn := s.next() switch { case chn == '=': return DEC, "-=", val case chn == '>': return OEDGE, "->", val case chn == '-': return s.scanCommentSingle(ch) case isNumber(chn): return s.scanNumber(ch, chn) default: s.undo() return SUB, string(ch), val } case '>': chn := s.next() switch { case chn == '=': return GTE, ">=", val default: s.undo() return GT, string(ch), val } case '<': chn := s.next() switch { case chn == '>': return NEQ, "<>", val case chn == '=': return LTE, "<=", val case chn == '-': if s.next() == '>' { return BEDGE, "<->", val } else { s.undo() return IEDGE, "<-", val } default: s.undo() return LT, string(ch), val } } return ILLEGAL, string(ch), val } // scanBlank consumes the current rune and all contiguous whitespace. func (s *Scanner) scanBlank(chp ...rune) (tok Token, lit string, val interface{}) { tok = WS // Create a buffer var buf bytes.Buffer // Read passed in runes for _, ch := range chp { buf.WriteRune(ch) } // Read subsequent characters for { if ch := s.next(); ch == eof { break } else if !isBlank(ch) { s.undo() break } else { buf.WriteRune(ch) } } return tok, buf.String(), val } // scanCommentSingle consumes the current rune and all contiguous whitespace. func (s *Scanner) scanCommentSingle(chp ...rune) (tok Token, lit string, val interface{}) { tok = WS // Create a buffer var buf bytes.Buffer // Read passed in runes for _, ch := range chp { buf.WriteRune(ch) } // Read subsequent characters for { if ch := s.next(); ch == '\n' || ch == '\r' { buf.WriteRune(ch) break } else { buf.WriteRune(ch) } } return tok, buf.String(), val } // scanCommentMultiple consumes the current rune and all contiguous whitespace. func (s *Scanner) scanCommentMultiple(chp ...rune) (tok Token, lit string, val interface{}) { tok = WS // Create a buffer var buf bytes.Buffer // Read passed in runes for _, ch := range chp { buf.WriteRune(ch) } // Read subsequent characters for { if ch := s.next(); ch == eof { break } else if ch == '*' { if chn := s.next(); chn == '/' { buf.WriteRune(chn) break } buf.WriteRune(ch) } else { buf.WriteRune(ch) } } return tok, buf.String(), val } func (s *Scanner) scanParams(chp ...rune) (tok Token, lit string, val interface{}) { tok, lit, val = s.scanIdent(chp...) if s.p.is(tok, IDENT) { return PARAM, lit, nil } return } // scanIdent consumes the current rune and all contiguous ident runes. func (s *Scanner) scanIdent(chp ...rune) (tok Token, lit string, val interface{}) { tok = IDENT // Create a buffer var buf bytes.Buffer // Read passed in runes for _, ch := range chp { buf.WriteRune(ch) } // Read subsequent characters for { if ch := s.next(); ch == eof { break } else if !isIdentChar(ch) { s.undo() break } else { buf.WriteRune(ch) } } // If the string matches a keyword then return that keyword. if tok := keywords[strings.ToUpper(buf.String())]; tok > 0 { return tok, buf.String(), val } if val, err := time.ParseDuration(buf.String()); err == nil { return DURATION, buf.String(), val } // Otherwise return as a regular identifier. return tok, buf.String(), val } // scanThing consumes the current rune and all contiguous ident runes. func (s *Scanner) scanThing(chp ...rune) (tok Token, lit string, val interface{}) { tok = THING // Create a buffer var buf bytes.Buffer var beg bytes.Buffer var mid bytes.Buffer var end bytes.Buffer // Read passed in runes for _, ch := range chp { buf.WriteRune(ch) } for { if ch := s.next(); ch == eof { break } else if isLetter(ch) { tok, lit, _ = s.scanIdent(ch) beg.WriteString(lit) } else if isNumber(ch) { tok, lit, _ = s.scanNumber(ch) beg.WriteString(lit) } else if ch == '`' { tok, lit, _ = s.scanQuoted(ch) beg.WriteString(lit) } else if ch == '{' { tok, lit, _ = s.scanQuoted(ch) beg.WriteString(lit) } else if ch == '⟨' { tok, lit, _ = s.scanQuoted(ch) beg.WriteString(lit) } else { s.undo() break } } if beg.Len() < 1 { return ILLEGAL, buf.String() + beg.String() + mid.String() + end.String(), val } for { if ch := s.next(); ch != ':' { s.undo() break } else { mid.WriteRune(ch) break } } if mid.Len() < 1 { return ILLEGAL, buf.String() + beg.String() + mid.String() + end.String(), val } for { if ch := s.next(); ch == eof { break } else if isLetter(ch) { tok, lit, _ = s.scanIdent(ch) end.WriteString(lit) } else if isNumber(ch) { tok, lit, _ = s.scanNumber(ch) end.WriteString(lit) } else if ch == '`' { tok, lit, _ = s.scanQuoted(ch) beg.WriteString(lit) } else if ch == '{' { tok, lit, _ = s.scanQuoted(ch) end.WriteString(lit) } else if ch == '⟨' { tok, lit, _ = s.scanQuoted(ch) end.WriteString(lit) } else { s.undo() break } } if end.Len() < 1 { return ILLEGAL, buf.String() + beg.String() + mid.String() + end.String(), val } val = &Thing{ TB: beg.String(), ID: end.String(), } switch tok { case DATE, TIME, NUMBER, DOUBLE: val.(*Thing).ID, _ = s.p.declare(tok, end.String()) case REGION: return ILLEGAL, buf.String() + beg.String() + mid.String() + end.String(), val } // Otherwise return as a regular thing. return THING, buf.String() + beg.String() + mid.String() + end.String(), val } func (s *Scanner) scanNumber(chp ...rune) (tok Token, lit string, val interface{}) { tok = NUMBER // Create a buffer var buf bytes.Buffer // Read passed in runes for _, ch := range chp { buf.WriteRune(ch) } // Read subsequent characters for { if ch := s.next(); ch == eof { break } else if isNumber(ch) { buf.WriteRune(ch) } else if isLetter(ch) { tok = IDENT buf.WriteRune(ch) } else if ch == '.' { if tok == DOUBLE { tok = IDENT } if tok == NUMBER { tok = DOUBLE } buf.WriteRune(ch) } else { s.undo() break } } return tok, buf.String(), nil } func (s *Scanner) scanQuoted(chp ...rune) (tok Token, lit string, val interface{}) { tok, lit, val = s.scanString(chp...) if s.p.is(tok, STRING) { return IDENT, lit, nil } return } func (s *Scanner) scanString(chp ...rune) (tok Token, lit string, val interface{}) { beg := chp[0] end := beg if beg == '"' { end = '"' } if beg == '`' { end = '`' } if beg == '⟨' { end = '⟩' } if beg == '{' { end = '}' } tok = STRING // Create a buffer var buf bytes.Buffer // Ignore passed in runes // Read subsequent characters for { if ch := s.next(); ch == end { break } else if ch == eof { return ILLEGAL, buf.String(), val } else if ch == '\n' { tok = REGION buf.WriteRune(ch) } else if ch == '\r' { tok = REGION buf.WriteRune(ch) } else if ch == '\\' { switch chn := s.next(); chn { default: buf.WriteRune(chn) case 'b': continue case 't': tok = REGION buf.WriteRune('\t') case 'r': tok = REGION buf.WriteRune('\r') case 'n': tok = REGION buf.WriteRune('\n') } } else { buf.WriteRune(ch) } } if val, err := time.ParseDuration(buf.String()); err == nil { return DURATION, buf.String(), val } if val, err := time.Parse("2006-01-02", buf.String()); err == nil { return DATE, buf.String(), val } if val, err := time.Parse(time.RFC3339, buf.String()); err == nil { return TIME, buf.String(), val } return tok, buf.String(), val } func (s *Scanner) scanRegexp(chp ...rune) (tok Token, lit string, val interface{}) { tok = IDENT // Create a buffer var buf bytes.Buffer // Ignore passed in runes // Read subsequent characters for { if ch := s.next(); ch == chp[0] { break } else if ch == eof { return ILLEGAL, buf.String(), val } else if ch == '\\' { chn := s.next() buf.WriteRune(ch) buf.WriteRune(chn) } else { buf.WriteRune(ch) } } if val, err := regexp.Compile(buf.String()); err == nil { return REGEX, buf.String(), val } return tok, buf.String(), val } func (s *Scanner) scanObject(chp ...rune) (tok Token, lit string, val interface{}) { beg := chp[0] end := beg sub := 0 if beg == '{' { end = '}' } if beg == '[' { end = ']' } tok = IDENT // Create a buffer var buf bytes.Buffer // Read passed in runes for _, ch := range chp { buf.WriteRune(ch) } // Read subsequent characters for { if ch := s.next(); ch == end && sub == 0 { buf.WriteRune(ch) break } else if ch == beg { sub++ buf.WriteRune(ch) } else if ch == end { sub-- buf.WriteRune(ch) } else if ch == eof { return ILLEGAL, buf.String(), val } else if ch == '\\' { switch chn := s.next(); chn { default: return ILLEGAL, buf.String(), val case 'b', 't', 'r', 'n', 'f', '"', '\\': buf.WriteRune(ch) buf.WriteRune(chn) } } else { buf.WriteRune(ch) } } if beg == '{' { return JSON, buf.String(), val } if beg == '[' { return ARRAY, buf.String(), val } return ILLEGAL, buf.String(), val } // next reads the next rune from the bufferred reader. // Returns the rune(0) if an error occurs (or io.EOF is returned). func (s *Scanner) next() rune { if len(s.a) > 0 { var r rune r, s.a = s.a[len(s.a)-1], s.a[:len(s.a)-1] s.b = append(s.b, r) return r } r, _, err := s.r.ReadRune() if err != nil { return eof } s.b = append(s.b, r) return r } // undo places the previously read rune back on the reader. func (s *Scanner) undo() { if len(s.b) > 0 { var r rune r, s.b = s.b[len(s.b)-1], s.b[:len(s.b)-1] s.a = append(s.a, r) return } _ = s.r.UnreadRune() } // isBlank returns true if the rune is a space, tab, or newline. func isBlank(ch rune) bool { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' } // isNumber returns true if the rune is a number. func isNumber(ch rune) bool { return (ch >= '0' && ch <= '9') } // isLetter returns true if the rune is a letter. func isLetter(ch rune) bool { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') } // isIdentChar returns true if the rune is allowed in a IDENT. func isIdentChar(ch rune) bool { return isLetter(ch) || isNumber(ch) || ch == '.' || ch == '_' || ch == '*' } // eof represents a marker rune for the end of the reader. var eof = rune(0)