Add fuzzy string search to conditional SQL clauses

This commit is contained in:
Tobie Morgan Hitchcock 2018-04-13 20:33:40 +01:00
parent 9b4ff941b6
commit 3f7d7fc863
4 changed files with 208 additions and 24 deletions

View file

@ -23,6 +23,9 @@ import (
"strings"
"time"
"golang.org/x/text/language"
"golang.org/x/text/search"
"github.com/abcum/surreal/cnf"
"github.com/abcum/surreal/sql"
"github.com/abcum/surreal/util/data"
@ -236,7 +239,7 @@ func (e *executor) fetch(ctx context.Context, val interface{}, doc *data.Doc) (o
return binaryMath(val.Op, l, r), nil
case sql.EQ, sql.NEQ, sql.ANY, sql.LT, sql.LTE, sql.GT, sql.GTE:
return binaryCheck(val.Op, l, r, val.LHS, val.RHS, doc), nil
case sql.SIN, sql.SNI, sql.INS, sql.NIS:
case sql.SIN, sql.SNI, sql.INS, sql.NIS, sql.MAT, sql.NAT, sql.MAY:
return binaryCheck(val.Op, l, r, val.LHS, val.RHS, doc), nil
case sql.CONTAINSALL, sql.CONTAINSSOME, sql.CONTAINSNONE:
return binaryCheck(val.Op, l, r, val.LHS, val.RHS, doc), nil
@ -783,9 +786,9 @@ func negOp(op sql.Token) bool {
func chkOp(op sql.Token) int8 {
switch op {
case sql.EQ, sql.SIN, sql.INS:
case sql.EQ, sql.SIN, sql.INS, sql.MAT, sql.ANY:
return +1
case sql.NEQ, sql.SNI, sql.NIS:
case sql.NEQ, sql.SNI, sql.NIS, sql.NAT, sql.MAY:
return -1
case sql.CONTAINSALL:
return +1
@ -846,6 +849,15 @@ func chkString(op sql.Token, a, b string) (val bool) {
return strings.Contains(a, b) == true
case sql.SNI:
return strings.Contains(a, b) == false
case sql.MAT:
b, e := search.New(language.Und, search.Loose).IndexString(a, b)
return b != -1 && e != -1
case sql.NAT:
b, e := search.New(language.Und, search.Loose).IndexString(a, b)
return b == -1 && e == -1
case sql.MAY:
b, e := search.New(language.Und, search.Loose).IndexString(a, b)
return b != -1 && e != -1
}
return negOp(op)
}
@ -958,6 +970,21 @@ func chkArrayL(op sql.Token, a []interface{}, i interface{}) (val bool) {
default:
return data.Consume(a).Contains(i) == false
}
case sql.MAT:
switch s := i.(type) {
case string:
return chkSearch(op, a, s)
}
case sql.NAT:
switch s := i.(type) {
case string:
return chkSearch(op, a, s)
}
case sql.MAY:
switch s := i.(type) {
case string:
return chkSearch(op, a, s)
}
}
return negOp(op)
}
@ -992,6 +1019,21 @@ func chkArrayR(op sql.Token, i interface{}, a []interface{}) (val bool) {
default:
return data.Consume(a).Contains(i) == false
}
case sql.MAT:
switch s := i.(type) {
case string:
return chkSearch(op, a, s)
}
case sql.NAT:
switch s := i.(type) {
case string:
return chkSearch(op, a, s)
}
case sql.MAY:
switch s := i.(type) {
case string:
return chkSearch(op, a, s)
}
}
return negOp(op)
}
@ -1062,6 +1104,10 @@ func chkArray(op sql.Token, a []interface{}, b []interface{}) (val bool) {
func chkMatch(op sql.Token, a []interface{}, r *regexp.Regexp) (val bool) {
if len(a) == 0 {
return op == sql.NEQ
}
for _, v := range a {
var s string
@ -1088,8 +1134,8 @@ func chkMatch(op sql.Token, a []interface{}, r *regexp.Regexp) (val bool) {
}
if op == sql.NEQ {
if chkRegex(sql.EQ, s, r) == true {
return false
if chkRegex(sql.EQ, s, r) == false {
return true
}
}
@ -1105,7 +1151,7 @@ func chkMatch(op sql.Token, a []interface{}, r *regexp.Regexp) (val bool) {
case sql.EQ:
return true
case sql.NEQ:
return true
return false
case sql.ANY:
return false
}
@ -1113,3 +1159,64 @@ func chkMatch(op sql.Token, a []interface{}, r *regexp.Regexp) (val bool) {
return
}
func chkSearch(op sql.Token, a []interface{}, r string) (val bool) {
if len(a) == 0 {
return op == sql.NAT
}
for _, v := range a {
var s string
switch c := v.(type) {
default:
return false
case string:
s = c
case bool:
s = strconv.FormatBool(c)
case int64:
s = strconv.FormatInt(c, 10)
case float64:
s = strconv.FormatFloat(c, 'g', -1, 64)
case time.Time:
s = c.String()
}
if op == sql.MAT {
b, e := search.New(language.Und, search.Loose).IndexString(s, r)
if b == -1 && e == -1 {
return false
}
}
if op == sql.NAT {
b, e := search.New(language.Und, search.Loose).IndexString(s, r)
if b == -1 && e == -1 {
return true
}
}
if op == sql.MAY {
b, e := search.New(language.Und, search.Loose).IndexString(s, r)
if b != -1 && e != -1 {
return true
}
}
}
switch op {
case sql.MAT:
return true
case sql.NAT:
return false
case sql.MAY:
return false
}
return
}

View file

@ -680,15 +680,28 @@ func TestFetch(t *testing.T) {
SELECT * FROM "test" WHERE "test" "a true test string";
SELECT * FROM "test" WHERE "test" "a true test string";
SELECT * FROM "test" WHERE "a true test string" ~ "test";
SELECT * FROM "test" WHERE "a true test string" ~ "Test";
SELECT * FROM "test" WHERE "a true test string" !~ "test";
SELECT * FROM "test" WHERE "a true test string" !~ "Test";
SELECT * FROM "test" WHERE "a true test string" ?~ "test";
SELECT * FROM "test" WHERE "a true test string" ?~ "Test";
SELECT * FROM "test" WHERE "a true test string" = /test/;
SELECT * FROM "test" WHERE "a true test string" = /Test/;
SELECT * FROM "test" WHERE "a true test string" = /(?i)Test/;
SELECT * FROM "test" WHERE "a true test string" != /test/;
SELECT * FROM "test" WHERE "a true test string" != /Test/;
SELECT * FROM "test" WHERE "a true test string" != /(?i)Test/;
SELECT * FROM "test" WHERE "a true test string" ?= /test/;
SELECT * FROM "test" WHERE "a true test string" ?= /Test/;
SELECT * FROM "test" WHERE "a true test string" ?= /(?i)Test/;
`
res, err := Execute(setupKV(), txt, nil)
So(err, ShouldBeNil)
So(res, ShouldHaveLength, 21)
So(res, ShouldHaveLength, 33)
So(res[2].Result, ShouldHaveLength, 1)
So(res[3].Result, ShouldHaveLength, 1)
@ -711,8 +724,21 @@ func TestFetch(t *testing.T) {
So(res[17].Result, ShouldHaveLength, 0)
So(res[18].Result, ShouldHaveLength, 1)
So(res[19].Result, ShouldHaveLength, 0)
So(res[20].Result, ShouldHaveLength, 1)
So(res[19].Result, ShouldHaveLength, 1)
So(res[20].Result, ShouldHaveLength, 0)
So(res[21].Result, ShouldHaveLength, 0)
So(res[22].Result, ShouldHaveLength, 1)
So(res[23].Result, ShouldHaveLength, 1)
So(res[24].Result, ShouldHaveLength, 1)
So(res[25].Result, ShouldHaveLength, 0)
So(res[26].Result, ShouldHaveLength, 1)
So(res[27].Result, ShouldHaveLength, 0)
So(res[28].Result, ShouldHaveLength, 1)
So(res[29].Result, ShouldHaveLength, 0)
So(res[30].Result, ShouldHaveLength, 1)
So(res[31].Result, ShouldHaveLength, 0)
So(res[32].Result, ShouldHaveLength, 1)
})
@ -784,37 +810,58 @@ func TestFetch(t *testing.T) {
SELECT * FROM "test" WHERE [ [1,2,3] ] [1,2,3];
SELECT * FROM "test" WHERE [ [1,2,3] ] [1,2,3];
SELECT * FROM "test" WHERE [1,2,3,4,5] [1,3,5];
SELECT * FROM "test" WHERE [1,2,3,4,5] [1,2,3];
SELECT * FROM "test" WHERE [1,2,3,4,5] [2,4,6];
SELECT * FROM "test" WHERE [1,3,5,7,9] [1,3,5];
SELECT * FROM "test" WHERE [1,3,5,7,9] [1,2,3];
SELECT * FROM "test" WHERE [1,3,5,7,9] [2,4,6];
SELECT * FROM "test" WHERE [1,3,5,7,9] [1,3,5];
SELECT * FROM "test" WHERE [1,3,5,7,9] [1,2,3];
SELECT * FROM "test" WHERE [1,3,5,7,9] [2,4,6];
SELECT * FROM "test" WHERE [1,3,5] [1,2,3,4,5];
SELECT * FROM "test" WHERE [1,2,3] [1,2,3,4,5];
SELECT * FROM "test" WHERE [2,4,6] [1,2,3,4,5];
SELECT * FROM "test" WHERE [1,3,5] [1,3,5,7,9];
SELECT * FROM "test" WHERE [1,2,3] [1,3,5,7,9];
SELECT * FROM "test" WHERE [2,4,6] [1,3,5,7,9];
SELECT * FROM "test" WHERE [1,3,5] [1,3,5,7,9];
SELECT * FROM "test" WHERE [1,2,3] [1,3,5,7,9];
SELECT * FROM "test" WHERE [2,4,6] [1,3,5,7,9];
SELECT * FROM "test" WHERE [] = /[0-9]/;
SELECT * FROM "test" WHERE [1,2,3] = /[0-9]/;
SELECT * FROM "test" WHERE [1,"2",true] = /[0-9]/;
SELECT * FROM "test" WHERE ["a","b","c"] = /[0-9]/;
SELECT * FROM "test" WHERE [] != /[0-9]/;
SELECT * FROM "test" WHERE [1,2,3] != /[0-9]/;
SELECT * FROM "test" WHERE [1,"2",true] != /[0-9]/;
SELECT * FROM "test" WHERE ["a","b","c"] != /[0-9]/;
SELECT * FROM "test" WHERE [] ?= /[0-9]/;
SELECT * FROM "test" WHERE [1,2,3] ?= /[0-9]/;
SELECT * FROM "test" WHERE [1,"2",true] ?= /[0-9]/;
SELECT * FROM "test" WHERE ["a","b","c"] ?= /[0-9]/;
SELECT * FROM "test" WHERE [] ~ "pro";
SELECT * FROM "test" WHERE [1,2,3] ~ "pro";
SELECT * FROM "test" WHERE [1,"2","pro"] ~ "pro";
SELECT * FROM "test" WHERE ["a","b","c","gopros"] ~ "Pro";
SELECT * FROM "test" WHERE ["gopros","gopros","gopros"] ~ "Pro";
SELECT * FROM "test" WHERE [] !~ "pro";
SELECT * FROM "test" WHERE [1,2,3] !~ "pro";
SELECT * FROM "test" WHERE [1,"2","pro"] !~ "pro";
SELECT * FROM "test" WHERE ["a","b","c","gopros"] !~ "Pro";
SELECT * FROM "test" WHERE ["gopros","gopros","gopros"] !~ "Pro";
SELECT * FROM "test" WHERE [] ?~ "pro";
SELECT * FROM "test" WHERE [1,2,3] ?~ "pro";
SELECT * FROM "test" WHERE [1,"2","pro"] ?~ "pro";
SELECT * FROM "test" WHERE ["a","b","c","gopros"] ?~ "Pro";
SELECT * FROM "test" WHERE ["gopros","gopros","gopros"] ?~ "Pro";
`
res, err := Execute(setupKV(), txt, nil)
So(err, ShouldBeNil)
So(res, ShouldHaveLength, 39)
So(res, ShouldHaveLength, 57)
So(res[2].Result, ShouldHaveLength, 1)
So(res[3].Result, ShouldHaveLength, 1)
@ -850,16 +897,38 @@ func TestFetch(t *testing.T) {
So(res[28].Result, ShouldHaveLength, 0)
So(res[29].Result, ShouldHaveLength, 1)
So(res[30].Result, ShouldHaveLength, 1)
So(res[31].Result, ShouldHaveLength, 0)
So(res[30].Result, ShouldHaveLength, 0)
So(res[31].Result, ShouldHaveLength, 1)
So(res[32].Result, ShouldHaveLength, 0)
So(res[33].Result, ShouldHaveLength, 0)
So(res[34].Result, ShouldHaveLength, 0)
So(res[35].Result, ShouldHaveLength, 1)
So(res[34].Result, ShouldHaveLength, 1)
So(res[35].Result, ShouldHaveLength, 0)
So(res[36].Result, ShouldHaveLength, 1)
So(res[37].Result, ShouldHaveLength, 1)
So(res[38].Result, ShouldHaveLength, 0)
So(res[39].Result, ShouldHaveLength, 1)
So(res[40].Result, ShouldHaveLength, 1)
So(res[41].Result, ShouldHaveLength, 0)
So(res[42].Result, ShouldHaveLength, 0)
So(res[43].Result, ShouldHaveLength, 0)
So(res[44].Result, ShouldHaveLength, 0)
So(res[45].Result, ShouldHaveLength, 0)
So(res[46].Result, ShouldHaveLength, 1)
So(res[47].Result, ShouldHaveLength, 1)
So(res[48].Result, ShouldHaveLength, 1)
So(res[49].Result, ShouldHaveLength, 1)
So(res[50].Result, ShouldHaveLength, 1)
So(res[51].Result, ShouldHaveLength, 0)
So(res[52].Result, ShouldHaveLength, 0)
So(res[53].Result, ShouldHaveLength, 0)
So(res[54].Result, ShouldHaveLength, 1)
So(res[55].Result, ShouldHaveLength, 1)
So(res[56].Result, ShouldHaveLength, 1)
})

View file

@ -105,7 +105,7 @@ func (s *scanner) scan() (tok Token, lit string, val interface{}) {
case '≥':
return GTE, string(ch), val
case '~':
return SIN, string(ch), val
return MAT, string(ch), val
case '∋':
return SIN, string(ch), val
case '∌':
@ -166,7 +166,7 @@ func (s *scanner) scan() (tok Token, lit string, val interface{}) {
chn := s.next()
switch {
case chn == '~':
return SIN, "=~", val
return MAT, "=~", val
case chn == '=':
return EEQ, "==", val
default:
@ -178,6 +178,8 @@ func (s *scanner) scan() (tok Token, lit string, val interface{}) {
switch {
case chn == '=':
return ANY, "?=", val
case chn == '~':
return MAY, "?~", val
default:
s.undo()
return QMARK, string(ch), val
@ -193,7 +195,7 @@ func (s *scanner) scan() (tok Token, lit string, val interface{}) {
return NEQ, "!=", val
}
case chn == '~':
return SNI, "!~", val
return NAT, "!~", val
default:
s.undo()
return EXC, string(ch), val

View file

@ -88,6 +88,9 @@ const (
SNI // ∌
INS // ∈
NIS // ∉
MAT // ~
NAT // !~
MAY // ?~
operatorEnd
@ -259,6 +262,9 @@ var tokens = [...]string{
SNI: "∌",
INS: "∈",
NIS: "∉",
MAT: "~",
NAT: "!~",
MAY: "?~",
// keywords
@ -400,7 +406,7 @@ func (tok Token) precedence() int {
return 1
case EQ, NEQ, EEQ, NEE,
LT, LTE, GT, GTE,
ANY, SIN, SNI, INS, NIS,
ANY, SIN, SNI, INS, NIS, MAT, NAT, MAY,
CONTAINSALL, CONTAINSNONE, CONTAINSSOME,
ALLCONTAINEDIN, NONECONTAINEDIN, SOMECONTAINEDIN:
return 2