Add fuzzy string search to conditional SQL clauses

This commit is contained in:
Tobie Morgan Hitchcock 2018-04-13 20:33:40 +01:00
parent 9b4ff941b6
commit 3f7d7fc863
4 changed files with 208 additions and 24 deletions

View file

@ -23,6 +23,9 @@ import (
"strings" "strings"
"time" "time"
"golang.org/x/text/language"
"golang.org/x/text/search"
"github.com/abcum/surreal/cnf" "github.com/abcum/surreal/cnf"
"github.com/abcum/surreal/sql" "github.com/abcum/surreal/sql"
"github.com/abcum/surreal/util/data" "github.com/abcum/surreal/util/data"
@ -236,7 +239,7 @@ func (e *executor) fetch(ctx context.Context, val interface{}, doc *data.Doc) (o
return binaryMath(val.Op, l, r), nil return binaryMath(val.Op, l, r), nil
case sql.EQ, sql.NEQ, sql.ANY, sql.LT, sql.LTE, sql.GT, sql.GTE: case sql.EQ, sql.NEQ, sql.ANY, sql.LT, sql.LTE, sql.GT, sql.GTE:
return binaryCheck(val.Op, l, r, val.LHS, val.RHS, doc), nil return binaryCheck(val.Op, l, r, val.LHS, val.RHS, doc), nil
case sql.SIN, sql.SNI, sql.INS, sql.NIS: case sql.SIN, sql.SNI, sql.INS, sql.NIS, sql.MAT, sql.NAT, sql.MAY:
return binaryCheck(val.Op, l, r, val.LHS, val.RHS, doc), nil return binaryCheck(val.Op, l, r, val.LHS, val.RHS, doc), nil
case sql.CONTAINSALL, sql.CONTAINSSOME, sql.CONTAINSNONE: case sql.CONTAINSALL, sql.CONTAINSSOME, sql.CONTAINSNONE:
return binaryCheck(val.Op, l, r, val.LHS, val.RHS, doc), nil return binaryCheck(val.Op, l, r, val.LHS, val.RHS, doc), nil
@ -783,9 +786,9 @@ func negOp(op sql.Token) bool {
func chkOp(op sql.Token) int8 { func chkOp(op sql.Token) int8 {
switch op { switch op {
case sql.EQ, sql.SIN, sql.INS: case sql.EQ, sql.SIN, sql.INS, sql.MAT, sql.ANY:
return +1 return +1
case sql.NEQ, sql.SNI, sql.NIS: case sql.NEQ, sql.SNI, sql.NIS, sql.NAT, sql.MAY:
return -1 return -1
case sql.CONTAINSALL: case sql.CONTAINSALL:
return +1 return +1
@ -846,6 +849,15 @@ func chkString(op sql.Token, a, b string) (val bool) {
return strings.Contains(a, b) == true return strings.Contains(a, b) == true
case sql.SNI: case sql.SNI:
return strings.Contains(a, b) == false return strings.Contains(a, b) == false
case sql.MAT:
b, e := search.New(language.Und, search.Loose).IndexString(a, b)
return b != -1 && e != -1
case sql.NAT:
b, e := search.New(language.Und, search.Loose).IndexString(a, b)
return b == -1 && e == -1
case sql.MAY:
b, e := search.New(language.Und, search.Loose).IndexString(a, b)
return b != -1 && e != -1
} }
return negOp(op) return negOp(op)
} }
@ -958,6 +970,21 @@ func chkArrayL(op sql.Token, a []interface{}, i interface{}) (val bool) {
default: default:
return data.Consume(a).Contains(i) == false return data.Consume(a).Contains(i) == false
} }
case sql.MAT:
switch s := i.(type) {
case string:
return chkSearch(op, a, s)
}
case sql.NAT:
switch s := i.(type) {
case string:
return chkSearch(op, a, s)
}
case sql.MAY:
switch s := i.(type) {
case string:
return chkSearch(op, a, s)
}
} }
return negOp(op) return negOp(op)
} }
@ -992,6 +1019,21 @@ func chkArrayR(op sql.Token, i interface{}, a []interface{}) (val bool) {
default: default:
return data.Consume(a).Contains(i) == false return data.Consume(a).Contains(i) == false
} }
case sql.MAT:
switch s := i.(type) {
case string:
return chkSearch(op, a, s)
}
case sql.NAT:
switch s := i.(type) {
case string:
return chkSearch(op, a, s)
}
case sql.MAY:
switch s := i.(type) {
case string:
return chkSearch(op, a, s)
}
} }
return negOp(op) return negOp(op)
} }
@ -1062,6 +1104,10 @@ func chkArray(op sql.Token, a []interface{}, b []interface{}) (val bool) {
func chkMatch(op sql.Token, a []interface{}, r *regexp.Regexp) (val bool) { func chkMatch(op sql.Token, a []interface{}, r *regexp.Regexp) (val bool) {
if len(a) == 0 {
return op == sql.NEQ
}
for _, v := range a { for _, v := range a {
var s string var s string
@ -1088,8 +1134,8 @@ func chkMatch(op sql.Token, a []interface{}, r *regexp.Regexp) (val bool) {
} }
if op == sql.NEQ { if op == sql.NEQ {
if chkRegex(sql.EQ, s, r) == true { if chkRegex(sql.EQ, s, r) == false {
return false return true
} }
} }
@ -1105,7 +1151,7 @@ func chkMatch(op sql.Token, a []interface{}, r *regexp.Regexp) (val bool) {
case sql.EQ: case sql.EQ:
return true return true
case sql.NEQ: case sql.NEQ:
return true return false
case sql.ANY: case sql.ANY:
return false return false
} }
@ -1113,3 +1159,64 @@ func chkMatch(op sql.Token, a []interface{}, r *regexp.Regexp) (val bool) {
return return
} }
func chkSearch(op sql.Token, a []interface{}, r string) (val bool) {
if len(a) == 0 {
return op == sql.NAT
}
for _, v := range a {
var s string
switch c := v.(type) {
default:
return false
case string:
s = c
case bool:
s = strconv.FormatBool(c)
case int64:
s = strconv.FormatInt(c, 10)
case float64:
s = strconv.FormatFloat(c, 'g', -1, 64)
case time.Time:
s = c.String()
}
if op == sql.MAT {
b, e := search.New(language.Und, search.Loose).IndexString(s, r)
if b == -1 && e == -1 {
return false
}
}
if op == sql.NAT {
b, e := search.New(language.Und, search.Loose).IndexString(s, r)
if b == -1 && e == -1 {
return true
}
}
if op == sql.MAY {
b, e := search.New(language.Und, search.Loose).IndexString(s, r)
if b != -1 && e != -1 {
return true
}
}
}
switch op {
case sql.MAT:
return true
case sql.NAT:
return false
case sql.MAY:
return false
}
return
}

View file

@ -680,15 +680,28 @@ func TestFetch(t *testing.T) {
SELECT * FROM "test" WHERE "test" "a true test string"; SELECT * FROM "test" WHERE "test" "a true test string";
SELECT * FROM "test" WHERE "test" "a true test string"; SELECT * FROM "test" WHERE "test" "a true test string";
SELECT * FROM "test" WHERE "a true test string" ~ "test";
SELECT * FROM "test" WHERE "a true test string" ~ "Test";
SELECT * FROM "test" WHERE "a true test string" !~ "test";
SELECT * FROM "test" WHERE "a true test string" !~ "Test";
SELECT * FROM "test" WHERE "a true test string" ?~ "test";
SELECT * FROM "test" WHERE "a true test string" ?~ "Test";
SELECT * FROM "test" WHERE "a true test string" = /test/; SELECT * FROM "test" WHERE "a true test string" = /test/;
SELECT * FROM "test" WHERE "a true test string" = /Test/;
SELECT * FROM "test" WHERE "a true test string" = /(?i)Test/;
SELECT * FROM "test" WHERE "a true test string" != /test/; SELECT * FROM "test" WHERE "a true test string" != /test/;
SELECT * FROM "test" WHERE "a true test string" != /Test/;
SELECT * FROM "test" WHERE "a true test string" != /(?i)Test/;
SELECT * FROM "test" WHERE "a true test string" ?= /test/; SELECT * FROM "test" WHERE "a true test string" ?= /test/;
SELECT * FROM "test" WHERE "a true test string" ?= /Test/;
SELECT * FROM "test" WHERE "a true test string" ?= /(?i)Test/;
` `
res, err := Execute(setupKV(), txt, nil) res, err := Execute(setupKV(), txt, nil)
So(err, ShouldBeNil) So(err, ShouldBeNil)
So(res, ShouldHaveLength, 21) So(res, ShouldHaveLength, 33)
So(res[2].Result, ShouldHaveLength, 1) So(res[2].Result, ShouldHaveLength, 1)
So(res[3].Result, ShouldHaveLength, 1) So(res[3].Result, ShouldHaveLength, 1)
@ -711,8 +724,21 @@ func TestFetch(t *testing.T) {
So(res[17].Result, ShouldHaveLength, 0) So(res[17].Result, ShouldHaveLength, 0)
So(res[18].Result, ShouldHaveLength, 1) So(res[18].Result, ShouldHaveLength, 1)
So(res[19].Result, ShouldHaveLength, 0) So(res[19].Result, ShouldHaveLength, 1)
So(res[20].Result, ShouldHaveLength, 1) So(res[20].Result, ShouldHaveLength, 0)
So(res[21].Result, ShouldHaveLength, 0)
So(res[22].Result, ShouldHaveLength, 1)
So(res[23].Result, ShouldHaveLength, 1)
So(res[24].Result, ShouldHaveLength, 1)
So(res[25].Result, ShouldHaveLength, 0)
So(res[26].Result, ShouldHaveLength, 1)
So(res[27].Result, ShouldHaveLength, 0)
So(res[28].Result, ShouldHaveLength, 1)
So(res[29].Result, ShouldHaveLength, 0)
So(res[30].Result, ShouldHaveLength, 1)
So(res[31].Result, ShouldHaveLength, 0)
So(res[32].Result, ShouldHaveLength, 1)
}) })
@ -784,37 +810,58 @@ func TestFetch(t *testing.T) {
SELECT * FROM "test" WHERE [ [1,2,3] ] [1,2,3]; SELECT * FROM "test" WHERE [ [1,2,3] ] [1,2,3];
SELECT * FROM "test" WHERE [ [1,2,3] ] [1,2,3]; SELECT * FROM "test" WHERE [ [1,2,3] ] [1,2,3];
SELECT * FROM "test" WHERE [1,2,3,4,5] [1,3,5]; SELECT * FROM "test" WHERE [1,2,3,4,5] [1,2,3];
SELECT * FROM "test" WHERE [1,2,3,4,5] [2,4,6]; SELECT * FROM "test" WHERE [1,2,3,4,5] [2,4,6];
SELECT * FROM "test" WHERE [1,3,5,7,9] [1,3,5]; SELECT * FROM "test" WHERE [1,3,5,7,9] [1,2,3];
SELECT * FROM "test" WHERE [1,3,5,7,9] [2,4,6]; SELECT * FROM "test" WHERE [1,3,5,7,9] [2,4,6];
SELECT * FROM "test" WHERE [1,3,5,7,9] [1,3,5]; SELECT * FROM "test" WHERE [1,3,5,7,9] [1,2,3];
SELECT * FROM "test" WHERE [1,3,5,7,9] [2,4,6]; SELECT * FROM "test" WHERE [1,3,5,7,9] [2,4,6];
SELECT * FROM "test" WHERE [1,3,5] [1,2,3,4,5]; SELECT * FROM "test" WHERE [1,2,3] [1,2,3,4,5];
SELECT * FROM "test" WHERE [2,4,6] [1,2,3,4,5]; SELECT * FROM "test" WHERE [2,4,6] [1,2,3,4,5];
SELECT * FROM "test" WHERE [1,3,5] [1,3,5,7,9]; SELECT * FROM "test" WHERE [1,2,3] [1,3,5,7,9];
SELECT * FROM "test" WHERE [2,4,6] [1,3,5,7,9]; SELECT * FROM "test" WHERE [2,4,6] [1,3,5,7,9];
SELECT * FROM "test" WHERE [1,3,5] [1,3,5,7,9]; SELECT * FROM "test" WHERE [1,2,3] [1,3,5,7,9];
SELECT * FROM "test" WHERE [2,4,6] [1,3,5,7,9]; SELECT * FROM "test" WHERE [2,4,6] [1,3,5,7,9];
SELECT * FROM "test" WHERE [] = /[0-9]/;
SELECT * FROM "test" WHERE [1,2,3] = /[0-9]/; SELECT * FROM "test" WHERE [1,2,3] = /[0-9]/;
SELECT * FROM "test" WHERE [1,"2",true] = /[0-9]/; SELECT * FROM "test" WHERE [1,"2",true] = /[0-9]/;
SELECT * FROM "test" WHERE ["a","b","c"] = /[0-9]/; SELECT * FROM "test" WHERE ["a","b","c"] = /[0-9]/;
SELECT * FROM "test" WHERE [] != /[0-9]/;
SELECT * FROM "test" WHERE [1,2,3] != /[0-9]/; SELECT * FROM "test" WHERE [1,2,3] != /[0-9]/;
SELECT * FROM "test" WHERE [1,"2",true] != /[0-9]/; SELECT * FROM "test" WHERE [1,"2",true] != /[0-9]/;
SELECT * FROM "test" WHERE ["a","b","c"] != /[0-9]/; SELECT * FROM "test" WHERE ["a","b","c"] != /[0-9]/;
SELECT * FROM "test" WHERE [] ?= /[0-9]/;
SELECT * FROM "test" WHERE [1,2,3] ?= /[0-9]/; SELECT * FROM "test" WHERE [1,2,3] ?= /[0-9]/;
SELECT * FROM "test" WHERE [1,"2",true] ?= /[0-9]/; SELECT * FROM "test" WHERE [1,"2",true] ?= /[0-9]/;
SELECT * FROM "test" WHERE ["a","b","c"] ?= /[0-9]/; SELECT * FROM "test" WHERE ["a","b","c"] ?= /[0-9]/;
SELECT * FROM "test" WHERE [] ~ "pro";
SELECT * FROM "test" WHERE [1,2,3] ~ "pro";
SELECT * FROM "test" WHERE [1,"2","pro"] ~ "pro";
SELECT * FROM "test" WHERE ["a","b","c","gopros"] ~ "Pro";
SELECT * FROM "test" WHERE ["gopros","gopros","gopros"] ~ "Pro";
SELECT * FROM "test" WHERE [] !~ "pro";
SELECT * FROM "test" WHERE [1,2,3] !~ "pro";
SELECT * FROM "test" WHERE [1,"2","pro"] !~ "pro";
SELECT * FROM "test" WHERE ["a","b","c","gopros"] !~ "Pro";
SELECT * FROM "test" WHERE ["gopros","gopros","gopros"] !~ "Pro";
SELECT * FROM "test" WHERE [] ?~ "pro";
SELECT * FROM "test" WHERE [1,2,3] ?~ "pro";
SELECT * FROM "test" WHERE [1,"2","pro"] ?~ "pro";
SELECT * FROM "test" WHERE ["a","b","c","gopros"] ?~ "Pro";
SELECT * FROM "test" WHERE ["gopros","gopros","gopros"] ?~ "Pro";
` `
res, err := Execute(setupKV(), txt, nil) res, err := Execute(setupKV(), txt, nil)
So(err, ShouldBeNil) So(err, ShouldBeNil)
So(res, ShouldHaveLength, 39) So(res, ShouldHaveLength, 57)
So(res[2].Result, ShouldHaveLength, 1) So(res[2].Result, ShouldHaveLength, 1)
So(res[3].Result, ShouldHaveLength, 1) So(res[3].Result, ShouldHaveLength, 1)
@ -850,16 +897,38 @@ func TestFetch(t *testing.T) {
So(res[28].Result, ShouldHaveLength, 0) So(res[28].Result, ShouldHaveLength, 0)
So(res[29].Result, ShouldHaveLength, 1) So(res[29].Result, ShouldHaveLength, 1)
So(res[30].Result, ShouldHaveLength, 1) So(res[30].Result, ShouldHaveLength, 0)
So(res[31].Result, ShouldHaveLength, 0) So(res[31].Result, ShouldHaveLength, 1)
So(res[32].Result, ShouldHaveLength, 0) So(res[32].Result, ShouldHaveLength, 0)
So(res[33].Result, ShouldHaveLength, 0) So(res[33].Result, ShouldHaveLength, 0)
So(res[34].Result, ShouldHaveLength, 0)
So(res[35].Result, ShouldHaveLength, 1)
So(res[34].Result, ShouldHaveLength, 1)
So(res[35].Result, ShouldHaveLength, 0)
So(res[36].Result, ShouldHaveLength, 1) So(res[36].Result, ShouldHaveLength, 1)
So(res[37].Result, ShouldHaveLength, 1) So(res[37].Result, ShouldHaveLength, 1)
So(res[38].Result, ShouldHaveLength, 0) So(res[38].Result, ShouldHaveLength, 0)
So(res[39].Result, ShouldHaveLength, 1)
So(res[40].Result, ShouldHaveLength, 1)
So(res[41].Result, ShouldHaveLength, 0)
So(res[42].Result, ShouldHaveLength, 0)
So(res[43].Result, ShouldHaveLength, 0)
So(res[44].Result, ShouldHaveLength, 0)
So(res[45].Result, ShouldHaveLength, 0)
So(res[46].Result, ShouldHaveLength, 1)
So(res[47].Result, ShouldHaveLength, 1)
So(res[48].Result, ShouldHaveLength, 1)
So(res[49].Result, ShouldHaveLength, 1)
So(res[50].Result, ShouldHaveLength, 1)
So(res[51].Result, ShouldHaveLength, 0)
So(res[52].Result, ShouldHaveLength, 0)
So(res[53].Result, ShouldHaveLength, 0)
So(res[54].Result, ShouldHaveLength, 1)
So(res[55].Result, ShouldHaveLength, 1)
So(res[56].Result, ShouldHaveLength, 1)
}) })

View file

@ -105,7 +105,7 @@ func (s *scanner) scan() (tok Token, lit string, val interface{}) {
case '≥': case '≥':
return GTE, string(ch), val return GTE, string(ch), val
case '~': case '~':
return SIN, string(ch), val return MAT, string(ch), val
case '∋': case '∋':
return SIN, string(ch), val return SIN, string(ch), val
case '∌': case '∌':
@ -166,7 +166,7 @@ func (s *scanner) scan() (tok Token, lit string, val interface{}) {
chn := s.next() chn := s.next()
switch { switch {
case chn == '~': case chn == '~':
return SIN, "=~", val return MAT, "=~", val
case chn == '=': case chn == '=':
return EEQ, "==", val return EEQ, "==", val
default: default:
@ -178,6 +178,8 @@ func (s *scanner) scan() (tok Token, lit string, val interface{}) {
switch { switch {
case chn == '=': case chn == '=':
return ANY, "?=", val return ANY, "?=", val
case chn == '~':
return MAY, "?~", val
default: default:
s.undo() s.undo()
return QMARK, string(ch), val return QMARK, string(ch), val
@ -193,7 +195,7 @@ func (s *scanner) scan() (tok Token, lit string, val interface{}) {
return NEQ, "!=", val return NEQ, "!=", val
} }
case chn == '~': case chn == '~':
return SNI, "!~", val return NAT, "!~", val
default: default:
s.undo() s.undo()
return EXC, string(ch), val return EXC, string(ch), val

View file

@ -88,6 +88,9 @@ const (
SNI // ∌ SNI // ∌
INS // ∈ INS // ∈
NIS // ∉ NIS // ∉
MAT // ~
NAT // !~
MAY // ?~
operatorEnd operatorEnd
@ -259,6 +262,9 @@ var tokens = [...]string{
SNI: "∌", SNI: "∌",
INS: "∈", INS: "∈",
NIS: "∉", NIS: "∉",
MAT: "~",
NAT: "!~",
MAY: "?~",
// keywords // keywords
@ -400,7 +406,7 @@ func (tok Token) precedence() int {
return 1 return 1
case EQ, NEQ, EEQ, NEE, case EQ, NEQ, EEQ, NEE,
LT, LTE, GT, GTE, LT, LTE, GT, GTE,
ANY, SIN, SNI, INS, NIS, ANY, SIN, SNI, INS, NIS, MAT, NAT, MAY,
CONTAINSALL, CONTAINSNONE, CONTAINSSOME, CONTAINSALL, CONTAINSNONE, CONTAINSSOME,
ALLCONTAINEDIN, NONECONTAINEDIN, SOMECONTAINEDIN: ALLCONTAINEDIN, NONECONTAINEDIN, SOMECONTAINEDIN:
return 2 return 2