// Copyright © 2016 Abcum Ltd // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // github.com/sergi/go-diff/diffmatchpatch // github.com/yudai/gojsondiff // github.com/yudai/golcs package diff /*type Diff struct{} func (d *Diff) ToPACK() []byte { return []byte("DIFF") }*/ import ( "container/list" "encoding/json" "reflect" "sort" dmp "github.com/sergi/go-diff/diffmatchpatch" "github.com/yudai/golcs" ) // A Diff holds deltas generated by a Differ type Diff interface { // Deltas returns Deltas that describe differences between two JSON objects Deltas() []Delta // Modified returnes true if Diff has at least one Delta. Modified() bool } type diff struct { deltas []Delta } func (diff *diff) Deltas() []Delta { return diff.deltas } func (diff *diff) Modified() bool { return len(diff.deltas) > 0 } // A Differ conmapres JSON objects and apply patches type Differ struct { textDiffMinimumLength int } // New returns new Differ with default configuration func New() *Differ { return &Differ{ textDiffMinimumLength: 10, } } // Compare compares two JSON strings as []bytes and return a Diff object. func (differ *Differ) Compare(left []byte, right []byte) (Diff, error) { var leftMap, rightMap map[string]interface{} err := json.Unmarshal(left, &leftMap) if err != nil { return nil, err } err = json.Unmarshal(right, &rightMap) if err != nil { return nil, err } return differ.CompareObjects(leftMap, rightMap), nil } // CompareObjects compares two JSON object as map[string]interface{} // and return a Diff object. func (differ *Differ) CompareObjects(left map[string]interface{}, right map[string]interface{}) Diff { deltas := differ.compareMaps(left, right) return &diff{deltas: deltas} } func (differ *Differ) compareMaps(left map[string]interface{}, right map[string]interface{}) (deltas []Delta) { deltas = make([]Delta, 0) names := sortedKeys(left) // stabilize delta order for _, name := range names { if rightValue, ok := right[name]; ok { same, delta := differ.compareValues(Name(name), left[name], rightValue) if !same { deltas = append(deltas, delta) } } else { deltas = append(deltas, NewDeleted(Name(name), left[name])) } } names = sortedKeys(right) // stabilize delta order for _, name := range names { if _, ok := left[name]; !ok { deltas = append(deltas, NewAdded(Name(name), right[name])) } } return deltas } // ApplyPatch applies a Diff to an JSON object. This method is destructive. func (differ *Differ) ApplyPatch(json map[string]interface{}, patch Diff) { applyDeltas(patch.Deltas(), json) } type maybe struct { index int lcsIndex int item interface{} } func (differ *Differ) compareArrays( left []interface{}, right []interface{}, ) (deltas []Delta) { deltas = make([]Delta, 0) // LCS index pairs lcsPairs := lcs.New(left, right).IndexPairs() // list up items not in LCS, they are maybe deleted maybeDeleted := list.New() // but maybe moved or modified lcsI := 0 for i, leftValue := range left { if lcsI < len(lcsPairs) && lcsPairs[lcsI].Left == i { lcsI++ } else { maybeDeleted.PushBack(maybe{index: i, lcsIndex: lcsI, item: leftValue}) } } // list up items not in LCS, they are maybe Added maybeAdded := list.New() // but maybe moved or modified lcsI = 0 for i, rightValue := range right { if lcsI < len(lcsPairs) && lcsPairs[lcsI].Right == i { lcsI++ } else { maybeAdded.PushBack(maybe{index: i, lcsIndex: lcsI, item: rightValue}) } } // find moved items var delNext *list.Element // for prefetch to remove item in iteration for delCandidate := maybeDeleted.Front(); delCandidate != nil; delCandidate = delNext { delCan := delCandidate.Value.(maybe) delNext = delCandidate.Next() for addCandidate := maybeAdded.Front(); addCandidate != nil; addCandidate = addCandidate.Next() { addCan := addCandidate.Value.(maybe) if reflect.DeepEqual(delCan.item, addCan.item) { deltas = append(deltas, NewMoved(Index(delCan.index), Index(addCan.index), delCan.item, nil)) maybeAdded.Remove(addCandidate) maybeDeleted.Remove(delCandidate) break } } } // find modified or add+del prevIndexDel := 0 prevIndexAdd := 0 delElement := maybeDeleted.Front() addElement := maybeAdded.Front() for i := 0; i <= len(lcsPairs); i++ { // not "< len(lcsPairs)" var lcsPair lcs.IndexPair var delSize, addSize int if i < len(lcsPairs) { lcsPair = lcsPairs[i] delSize = lcsPair.Left - prevIndexDel - 1 addSize = lcsPair.Right - prevIndexAdd - 1 prevIndexDel = lcsPair.Left prevIndexAdd = lcsPair.Right } var delSlice []maybe if delSize > 0 { delSlice = make([]maybe, 0, delSize) } else { delSlice = make([]maybe, 0, maybeDeleted.Len()) } for ; delElement != nil; delElement = delElement.Next() { d := delElement.Value.(maybe) if d.lcsIndex != i { break } delSlice = append(delSlice, d) } var addSlice []maybe if addSize > 0 { addSlice = make([]maybe, 0, addSize) } else { addSlice = make([]maybe, 0, maybeAdded.Len()) } for ; addElement != nil; addElement = addElement.Next() { a := addElement.Value.(maybe) if a.lcsIndex != i { break } addSlice = append(addSlice, a) } if len(delSlice) > 0 && len(addSlice) > 0 { var bestDeltas []Delta bestDeltas, delSlice, addSlice = differ.maximizeSimilarities(delSlice, addSlice) for _, delta := range bestDeltas { deltas = append(deltas, delta) } } for _, del := range delSlice { deltas = append(deltas, NewDeleted(Index(del.index), del.item)) } for _, add := range addSlice { deltas = append(deltas, NewAdded(Index(add.index), add.item)) } } return deltas } func (differ *Differ) compareValues( position Position, left interface{}, right interface{}, ) (same bool, delta Delta) { if reflect.TypeOf(left) != reflect.TypeOf(right) { return false, NewModified(position, left, right) } switch left.(type) { case map[string]interface{}: l := left.(map[string]interface{}) childDeltas := differ.compareMaps(l, right.(map[string]interface{})) if len(childDeltas) > 0 { return false, NewObject(position, childDeltas) } case []interface{}: l := left.([]interface{}) childDeltas := differ.compareArrays(l, right.([]interface{})) if len(childDeltas) > 0 { return false, NewArray(position, childDeltas) } default: if !reflect.DeepEqual(left, right) { if reflect.ValueOf(left).Kind() == reflect.String && reflect.ValueOf(right).Kind() == reflect.String && differ.textDiffMinimumLength <= len(left.(string)) { textDiff := dmp.New() patchs := textDiff.PatchMake(left.(string), right.(string)) return false, NewTextDiff(position, patchs, left, right) } else { return false, NewModified(position, left, right) } } } return true, nil } func applyDeltas(deltas []Delta, object interface{}) interface{} { preDeltas := make(preDeltas, 0) for _, delta := range deltas { switch delta.(type) { case PreDelta: preDeltas = append(preDeltas, delta.(PreDelta)) } } sort.Sort(preDeltas) for _, delta := range preDeltas { object = delta.PreApply(object) } postDeltas := make(postDeltas, 0, len(deltas)-len(preDeltas)) for _, delta := range deltas { switch delta.(type) { case PostDelta: postDeltas = append(postDeltas, delta.(PostDelta)) } } sort.Sort(postDeltas) for _, delta := range postDeltas { object = delta.PostApply(object) } return object } func (differ *Differ) maximizeSimilarities(left []maybe, right []maybe) (resultDeltas []Delta, freeLeft, freeRight []maybe) { deltaTable := make([][]Delta, len(left)) for i := 0; i < len(left); i++ { deltaTable[i] = make([]Delta, len(right)) } for i, leftValue := range left { for j, rightValue := range right { _, delta := differ.compareValues(Index(rightValue.index), leftValue.item, rightValue.item) deltaTable[i][j] = delta } } sizeX := len(left) + 1 // margins for both sides sizeY := len(right) + 1 // fill out with similarities dpTable := make([][]float64, sizeX) for i := 0; i < sizeX; i++ { dpTable[i] = make([]float64, sizeY) } for x := sizeX - 2; x >= 0; x-- { for y := sizeY - 2; y >= 0; y-- { prevX := dpTable[x+1][y] prevY := dpTable[x][y+1] score := deltaTable[x][y].Similarity() + dpTable[x+1][y+1] dpTable[x][y] = max(prevX, prevY, score) } } minLength := len(left) if minLength > len(right) { minLength = len(right) } maxInvalidLength := minLength - 1 freeLeft = make([]maybe, 0, len(left)-minLength) freeRight = make([]maybe, 0, len(right)-minLength) resultDeltas = make([]Delta, 0, minLength) var x, y int for x, y = 0, 0; x <= sizeX-2 && y <= sizeY-2; { current := dpTable[x][y] nextX := dpTable[x+1][y] nextY := dpTable[x][y+1] xValidLength := len(left) - maxInvalidLength + y yValidLength := len(right) - maxInvalidLength + x if x+1 < xValidLength && current == nextX { freeLeft = append(freeLeft, left[x]) x++ } else if y+1 < yValidLength && current == nextY { freeRight = append(freeRight, right[y]) y++ } else { resultDeltas = append(resultDeltas, deltaTable[x][y]) x++ y++ } } for ; x < sizeX-1; x++ { freeLeft = append(freeLeft, left[x-1]) } for ; x < sizeY-1; y++ { freeLeft = append(freeRight, left[y-1]) } return resultDeltas, freeLeft, freeRight } func deltasSimilarity(deltas []Delta) (similarity float64) { for _, delta := range deltas { similarity += delta.Similarity() } similarity = similarity / float64(len(deltas)) return } func stringSimilarity(left, right string) (similarity float64) { matchingLength := float64( lcs.New( stringToInterfaceSlice(left), stringToInterfaceSlice(right), ).Length(), ) similarity = (matchingLength / float64(len(left))) * (matchingLength / float64(len(right))) return } func stringToInterfaceSlice(str string) []interface{} { s := make([]interface{}, len(str)) for i, v := range str { s[i] = v } return s } func sortedKeys(m map[string]interface{}) (keys []string) { keys = make([]string, 0, len(m)) for key, _ := range m { keys = append(keys, key) } sort.Strings(keys) return } func max(first float64, rest ...float64) (max float64) { max = first for _, value := range rest { if max < value { max = value } } return max }