From f93a5afb6e55d6941f749eae4d8cbcb45c30ff40 Mon Sep 17 00:00:00 2001 From: Tobie Morgan Hitchcock Date: Thu, 21 Jul 2016 22:50:53 +0100 Subject: [PATCH] Add first working DIFF implementation --- util/diff/delta.go | 461 +++++++++++++++++++++++++++++++++++++++++ util/diff/diff.go | 428 ++++++++++++++++++++++++++++++++++++++ util/diff/unmarshal.go | 131 ++++++++++++ util/form/ascii.go | 297 ++++++++++++++++++++++++++ util/form/delta.go | 124 +++++++++++ util/item/item.go | 21 +- 6 files changed, 1456 insertions(+), 6 deletions(-) create mode 100644 util/diff/delta.go create mode 100644 util/diff/diff.go create mode 100644 util/diff/unmarshal.go create mode 100755 util/form/ascii.go create mode 100755 util/form/delta.go diff --git a/util/diff/delta.go b/util/diff/delta.go new file mode 100644 index 00000000..2c840bc3 --- /dev/null +++ b/util/diff/delta.go @@ -0,0 +1,461 @@ +package diff + +import ( + "errors" + dmp "github.com/sergi/go-diff/diffmatchpatch" + "reflect" + "strconv" +) + +// A Delta represents an atomic difference between two JSON objects. +type Delta interface { + // Similarity calculates the similarity of the Delta values. + // The return value is normalized from 0 to 1, + // 0 is completely different and 1 is they are same + Similarity() (similarity float64) +} + +// To cache the calculated similarity, +// concrete Deltas can use similariter and similarityCache +type similariter interface { + similarity() (similarity float64) +} + +type similarityCache struct { + similariter + value float64 +} + +func newSimilarityCache(sim similariter) similarityCache { + cache := similarityCache{similariter: sim, value: -1} + return cache +} + +func (cache similarityCache) Similarity() (similarity float64) { + if cache.value < 0 { + cache.value = cache.similariter.similarity() + } + return cache.value +} + +// A Position represents the position of a Delta in an object or an array. +type Position interface { + // String returns the position as a string + String() (name string) + + // CompareTo returns a true if the Position is smaller than another Position. + // This function is used to sort Positions by the sort package. + CompareTo(another Position) bool +} + +// A Name is a Postition with a string, which means the delta is in an object. +type Name string + +func (n Name) String() (name string) { + return string(n) +} + +func (n Name) CompareTo(another Position) bool { + return n < another.(Name) +} + +// A Index is a Position with an int value, which means the Delta is in an Array. +type Index int + +func (i Index) String() (name string) { + return strconv.Itoa(int(i)) +} + +func (i Index) CompareTo(another Position) bool { + return i < another.(Index) +} + +// A PreDelta is a Delta that has a position of the left side JSON object. +// Deltas implements this interface should be applies before PostDeltas. +type PreDelta interface { + // PrePosition returns the Position. + PrePosition() Position + + // PreApply applies the delta to object. + PreApply(object interface{}) interface{} +} + +type preDelta struct{ Position } + +func (i preDelta) PrePosition() Position { + return Position(i.Position) +} + +type preDeltas []PreDelta + +// for sorting +func (s preDeltas) Len() int { + return len(s) +} + +// for sorting +func (s preDeltas) Swap(i, j int) { + s[i], s[j] = s[j], s[i] +} + +// for sorting +func (s preDeltas) Less(i, j int) bool { + return !s[i].PrePosition().CompareTo(s[j].PrePosition()) +} + +// A PreDelta is a Delta that has a position of the right side JSON object. +// Deltas implements this interface should be applies after PreDeltas. +type PostDelta interface { + // PostPosition returns the Position. + PostPosition() Position + + // PostApply applies the delta to object. + PostApply(object interface{}) interface{} +} + +type postDelta struct{ Position } + +func (i postDelta) PostPosition() Position { + return Position(i.Position) +} + +type postDeltas []PostDelta + +// for sorting +func (s postDeltas) Len() int { + return len(s) +} + +// for sorting +func (s postDeltas) Swap(i, j int) { + s[i], s[j] = s[j], s[i] +} + +// for sorting +func (s postDeltas) Less(i, j int) bool { + return s[i].PostPosition().CompareTo(s[j].PostPosition()) +} + +// An Object is a Delta that represents an object of JSON +type Object struct { + postDelta + similarityCache + + // Deltas holds internal Deltas + Deltas []Delta +} + +// NewObject returns an Object +func NewObject(position Position, deltas []Delta) *Object { + d := Object{postDelta: postDelta{position}, Deltas: deltas} + d.similarityCache = newSimilarityCache(&d) + return &d +} + +func (d *Object) PostApply(object interface{}) interface{} { + switch object.(type) { + case map[string]interface{}: + o := object.(map[string]interface{}) + n := string(d.PostPosition().(Name)) + o[n] = applyDeltas(d.Deltas, o[n]) + case []interface{}: + o := object.([]interface{}) + n := int(d.PostPosition().(Index)) + o[n] = applyDeltas(d.Deltas, o[n]) + } + return object +} + +func (d *Object) similarity() (similarity float64) { + similarity = deltasSimilarity(d.Deltas) + return +} + +// An Array is a Delta that represents an array of JSON +type Array struct { + postDelta + similarityCache + + // Deltas holds internal Deltas + Deltas []Delta +} + +// NewArray returns an Array +func NewArray(position Position, deltas []Delta) *Array { + d := Array{postDelta: postDelta{position}, Deltas: deltas} + d.similarityCache = newSimilarityCache(&d) + return &d +} + +func (d *Array) PostApply(object interface{}) interface{} { + switch object.(type) { + case map[string]interface{}: + o := object.(map[string]interface{}) + n := string(d.PostPosition().(Name)) + o[n] = applyDeltas(d.Deltas, o[n]) + case []interface{}: + o := object.([]interface{}) + n := int(d.PostPosition().(Index)) + o[n] = applyDeltas(d.Deltas, o[n]) + } + return object +} + +func (d *Array) similarity() (similarity float64) { + similarity = deltasSimilarity(d.Deltas) + return +} + +// An Added represents a new added field of an object or an array +type Added struct { + postDelta + similarityCache + + // Values holds the added value + Value interface{} +} + +// NewAdded returns a new Added +func NewAdded(position Position, value interface{}) *Added { + d := Added{postDelta: postDelta{position}, Value: value} + return &d +} + +func (d *Added) PostApply(object interface{}) interface{} { + switch object.(type) { + case map[string]interface{}: + object.(map[string]interface{})[string(d.PostPosition().(Name))] = d.Value + case []interface{}: + i := int(d.PostPosition().(Index)) + o := object.([]interface{}) + if i < len(o) { + o = append(o, 0) //dummy + copy(o[i+1:], o[i:]) + o[i] = d.Value + object = o + } else { + object = append(o, d.Value) + } + } + + return object +} + +func (d *Added) similarity() (similarity float64) { + return 0 +} + +// A Modified represents a field whose value is changed. +type Modified struct { + postDelta + similarityCache + + // The value before modification + OldValue interface{} + + // The value after modification + NewValue interface{} +} + +// NewModified returns a Modified +func NewModified(position Position, oldValue, newValue interface{}) *Modified { + d := Modified{ + postDelta: postDelta{position}, + OldValue: oldValue, + NewValue: newValue, + } + d.similarityCache = newSimilarityCache(&d) + return &d + +} + +func (d *Modified) PostApply(object interface{}) interface{} { + switch object.(type) { + case map[string]interface{}: + // TODO check old value + object.(map[string]interface{})[string(d.PostPosition().(Name))] = d.NewValue + case []interface{}: + object.([]interface{})[int(d.PostPosition().(Index))] = d.NewValue + } + return object +} + +func (d *Modified) similarity() (similarity float64) { + similarity += 0.3 // at least, they are at the same position + if reflect.TypeOf(d.OldValue) == reflect.TypeOf(d.NewValue) { + similarity += 0.3 // types are same + + switch d.OldValue.(type) { + case string: + similarity += 0.4 * stringSimilarity(d.OldValue.(string), d.NewValue.(string)) + case float64: + ratio := d.OldValue.(float64) / d.NewValue.(float64) + if ratio > 1 { + ratio = 1 / ratio + } + similarity += 0.4 * ratio + } + } + return +} + +// A TextDiff represents a Modified with TextDiff between the old and the new values. +type TextDiff struct { + Modified + + // Diff string + Diff []dmp.Patch +} + +// NewTextDiff returns +func NewTextDiff(position Position, diff []dmp.Patch, oldValue, newValue interface{}) *TextDiff { + d := TextDiff{ + Modified: *NewModified(position, oldValue, newValue), + Diff: diff, + } + return &d +} + +func (d *TextDiff) PostApply(object interface{}) interface{} { + switch object.(type) { + case map[string]interface{}: + o := object.(map[string]interface{}) + i := string(d.PostPosition().(Name)) + // TODO error + d.OldValue = o[i] + // TODO error + d.patch() + o[i] = d.NewValue + case []interface{}: + o := object.([]interface{}) + i := d.PostPosition().(Index) + d.OldValue = o[i] + // TODO error + d.patch() + o[i] = d.NewValue + } + return object +} + +func (d *TextDiff) patch() error { + if d.OldValue == nil { + return errors.New("Old Value is not set") + } + patcher := dmp.New() + patched, successes := patcher.PatchApply(d.Diff, d.OldValue.(string)) + for _, success := range successes { + if !success { + return errors.New("Failed to apply a patch") + } + } + d.NewValue = patched + return nil +} + +func (d *TextDiff) DiffString() string { + dmp := dmp.New() + return dmp.PatchToText(d.Diff) +} + +// A Delted represents deleted field or index of an Object or an Array. +type Deleted struct { + preDelta + + // The value deleted + Value interface{} +} + +// NewDeleted returns a Deleted +func NewDeleted(position Position, value interface{}) *Deleted { + d := Deleted{ + preDelta: preDelta{position}, + Value: value, + } + return &d + +} + +func (d *Deleted) PreApply(object interface{}) interface{} { + switch object.(type) { + case map[string]interface{}: + // TODO check old value + delete(object.(map[string]interface{}), string(d.PrePosition().(Name))) + case []interface{}: + i := int(d.PrePosition().(Index)) + o := object.([]interface{}) + object = append(o[:i], o[i+1:]...) + } + return object +} + +func (d Deleted) Similarity() (similarity float64) { + return 0 +} + +// A Moved represents field that is moved, which means the index or name is +// changed. Note that, in this library, assigning a Moved and a Modified to +// a single position is not allowed. For the compatibility with jsondiffpatch, +// the Moved in this library can hold the old and new value in it. +type Moved struct { + preDelta + postDelta + similarityCache + // The value before moving + Value interface{} + // The delta applied after moving (for compatibility) + Delta interface{} +} + +func NewMoved(oldPosition Position, newPosition Position, value interface{}, delta Delta) *Moved { + d := Moved{ + preDelta: preDelta{oldPosition}, + postDelta: postDelta{newPosition}, + Value: value, + Delta: delta, + } + d.similarityCache = newSimilarityCache(&d) + return &d +} + +func (d *Moved) PreApply(object interface{}) interface{} { + switch object.(type) { + case map[string]interface{}: + //not supported + case []interface{}: + i := int(d.PrePosition().(Index)) + o := object.([]interface{}) + d.Value = o[i] + object = append(o[:i], o[i+1:]...) + } + return object +} + +func (d *Moved) PostApply(object interface{}) interface{} { + switch object.(type) { + case map[string]interface{}: + //not supported + case []interface{}: + i := int(d.PostPosition().(Index)) + o := object.([]interface{}) + o = append(o, 0) //dummy + copy(o[i+1:], o[i:]) + o[i] = d.Value + object = o + } + + if d.Delta != nil { + d.Delta.(PostDelta).PostApply(object) + } + + return object +} + +func (d *Moved) similarity() (similarity float64) { + similarity = 0.6 // as type and contens are same + ratio := float64(d.PrePosition().(Index)) / float64(d.PostPosition().(Index)) + if ratio > 1 { + ratio = 1 / ratio + } + similarity += 0.4 * ratio + return +} diff --git a/util/diff/diff.go b/util/diff/diff.go new file mode 100644 index 00000000..bcac2302 --- /dev/null +++ b/util/diff/diff.go @@ -0,0 +1,428 @@ +// Copyright © 2016 Abcum Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// github.com/sergi/go-diff/diffmatchpatch +// github.com/yudai/gojsondiff +// github.com/yudai/golcs + +package diff + +/*type Diff struct{} + +func (d *Diff) ToPACK() []byte { + return []byte("DIFF") +}*/ + +import ( + "container/list" + "encoding/json" + "reflect" + "sort" + + dmp "github.com/sergi/go-diff/diffmatchpatch" + "github.com/yudai/golcs" +) + +// A Diff holds deltas generated by a Differ +type Diff interface { + // Deltas returns Deltas that describe differences between two JSON objects + Deltas() []Delta + // Modified returnes true if Diff has at least one Delta. + Modified() bool +} + +type diff struct { + deltas []Delta +} + +func (diff *diff) Deltas() []Delta { + return diff.deltas +} + +func (diff *diff) Modified() bool { + return len(diff.deltas) > 0 +} + +// A Differ conmapres JSON objects and apply patches +type Differ struct { + textDiffMinimumLength int +} + +// New returns new Differ with default configuration +func New() *Differ { + return &Differ{ + textDiffMinimumLength: 10, + } +} + +// Compare compares two JSON strings as []bytes and return a Diff object. +func (differ *Differ) Compare(left []byte, right []byte) (Diff, error) { + var leftMap, rightMap map[string]interface{} + err := json.Unmarshal(left, &leftMap) + if err != nil { + return nil, err + } + + err = json.Unmarshal(right, &rightMap) + if err != nil { + return nil, err + } + return differ.CompareObjects(leftMap, rightMap), nil +} + +// CompareObjects compares two JSON object as map[string]interface{} +// and return a Diff object. +func (differ *Differ) CompareObjects(left map[string]interface{}, right map[string]interface{}) Diff { + deltas := differ.compareMaps(left, right) + return &diff{deltas: deltas} +} + +func (differ *Differ) compareMaps(left map[string]interface{}, right map[string]interface{}) (deltas []Delta) { + deltas = make([]Delta, 0) + + names := sortedKeys(left) // stabilize delta order + for _, name := range names { + if rightValue, ok := right[name]; ok { + same, delta := differ.compareValues(Name(name), left[name], rightValue) + if !same { + deltas = append(deltas, delta) + } + } else { + deltas = append(deltas, NewDeleted(Name(name), left[name])) + } + } + + names = sortedKeys(right) // stabilize delta order + for _, name := range names { + if _, ok := left[name]; !ok { + deltas = append(deltas, NewAdded(Name(name), right[name])) + } + } + + return deltas +} + +// ApplyPatch applies a Diff to an JSON object. This method is destructive. +func (differ *Differ) ApplyPatch(json map[string]interface{}, patch Diff) { + applyDeltas(patch.Deltas(), json) +} + +type maybe struct { + index int + lcsIndex int + item interface{} +} + +func (differ *Differ) compareArrays( + left []interface{}, + right []interface{}, +) (deltas []Delta) { + deltas = make([]Delta, 0) + // LCS index pairs + lcsPairs := lcs.New(left, right).IndexPairs() + + // list up items not in LCS, they are maybe deleted + maybeDeleted := list.New() // but maybe moved or modified + lcsI := 0 + for i, leftValue := range left { + if lcsI < len(lcsPairs) && lcsPairs[lcsI].Left == i { + lcsI++ + } else { + maybeDeleted.PushBack(maybe{index: i, lcsIndex: lcsI, item: leftValue}) + } + } + + // list up items not in LCS, they are maybe Added + maybeAdded := list.New() // but maybe moved or modified + lcsI = 0 + for i, rightValue := range right { + if lcsI < len(lcsPairs) && lcsPairs[lcsI].Right == i { + lcsI++ + } else { + maybeAdded.PushBack(maybe{index: i, lcsIndex: lcsI, item: rightValue}) + } + } + + // find moved items + var delNext *list.Element // for prefetch to remove item in iteration + for delCandidate := maybeDeleted.Front(); delCandidate != nil; delCandidate = delNext { + delCan := delCandidate.Value.(maybe) + delNext = delCandidate.Next() + + for addCandidate := maybeAdded.Front(); addCandidate != nil; addCandidate = addCandidate.Next() { + addCan := addCandidate.Value.(maybe) + if reflect.DeepEqual(delCan.item, addCan.item) { + deltas = append(deltas, NewMoved(Index(delCan.index), Index(addCan.index), delCan.item, nil)) + maybeAdded.Remove(addCandidate) + maybeDeleted.Remove(delCandidate) + break + } + } + } + + // find modified or add+del + prevIndexDel := 0 + prevIndexAdd := 0 + delElement := maybeDeleted.Front() + addElement := maybeAdded.Front() + for i := 0; i <= len(lcsPairs); i++ { // not "< len(lcsPairs)" + var lcsPair lcs.IndexPair + var delSize, addSize int + if i < len(lcsPairs) { + lcsPair = lcsPairs[i] + delSize = lcsPair.Left - prevIndexDel - 1 + addSize = lcsPair.Right - prevIndexAdd - 1 + prevIndexDel = lcsPair.Left + prevIndexAdd = lcsPair.Right + } + + var delSlice []maybe + if delSize > 0 { + delSlice = make([]maybe, 0, delSize) + } else { + delSlice = make([]maybe, 0, maybeDeleted.Len()) + } + for ; delElement != nil; delElement = delElement.Next() { + d := delElement.Value.(maybe) + if d.lcsIndex != i { + break + } + delSlice = append(delSlice, d) + } + + var addSlice []maybe + if addSize > 0 { + addSlice = make([]maybe, 0, addSize) + } else { + addSlice = make([]maybe, 0, maybeAdded.Len()) + } + for ; addElement != nil; addElement = addElement.Next() { + a := addElement.Value.(maybe) + if a.lcsIndex != i { + break + } + addSlice = append(addSlice, a) + } + + if len(delSlice) > 0 && len(addSlice) > 0 { + var bestDeltas []Delta + bestDeltas, delSlice, addSlice = differ.maximizeSimilarities(delSlice, addSlice) + for _, delta := range bestDeltas { + deltas = append(deltas, delta) + } + } + + for _, del := range delSlice { + deltas = append(deltas, NewDeleted(Index(del.index), del.item)) + } + for _, add := range addSlice { + deltas = append(deltas, NewAdded(Index(add.index), add.item)) + } + } + + return deltas +} + +func (differ *Differ) compareValues( + position Position, + left interface{}, + right interface{}, +) (same bool, delta Delta) { + if reflect.TypeOf(left) != reflect.TypeOf(right) { + return false, NewModified(position, left, right) + } + + switch left.(type) { + + case map[string]interface{}: + l := left.(map[string]interface{}) + childDeltas := differ.compareMaps(l, right.(map[string]interface{})) + if len(childDeltas) > 0 { + return false, NewObject(position, childDeltas) + } + + case []interface{}: + l := left.([]interface{}) + childDeltas := differ.compareArrays(l, right.([]interface{})) + + if len(childDeltas) > 0 { + return false, NewArray(position, childDeltas) + } + + default: + if !reflect.DeepEqual(left, right) { + + if reflect.ValueOf(left).Kind() == reflect.String && + reflect.ValueOf(right).Kind() == reflect.String && + differ.textDiffMinimumLength <= len(left.(string)) { + + textDiff := dmp.New() + patchs := textDiff.PatchMake(left.(string), right.(string)) + return false, NewTextDiff(position, patchs, left, right) + + } else { + return false, NewModified(position, left, right) + } + } + } + + return true, nil +} + +func applyDeltas(deltas []Delta, object interface{}) interface{} { + preDeltas := make(preDeltas, 0) + for _, delta := range deltas { + switch delta.(type) { + case PreDelta: + preDeltas = append(preDeltas, delta.(PreDelta)) + } + } + sort.Sort(preDeltas) + for _, delta := range preDeltas { + object = delta.PreApply(object) + } + + postDeltas := make(postDeltas, 0, len(deltas)-len(preDeltas)) + for _, delta := range deltas { + switch delta.(type) { + case PostDelta: + postDeltas = append(postDeltas, delta.(PostDelta)) + } + } + sort.Sort(postDeltas) + + for _, delta := range postDeltas { + object = delta.PostApply(object) + } + + return object +} + +func (differ *Differ) maximizeSimilarities(left []maybe, right []maybe) (resultDeltas []Delta, freeLeft, freeRight []maybe) { + deltaTable := make([][]Delta, len(left)) + for i := 0; i < len(left); i++ { + deltaTable[i] = make([]Delta, len(right)) + } + for i, leftValue := range left { + for j, rightValue := range right { + _, delta := differ.compareValues(Index(rightValue.index), leftValue.item, rightValue.item) + deltaTable[i][j] = delta + } + } + + sizeX := len(left) + 1 // margins for both sides + sizeY := len(right) + 1 + + // fill out with similarities + dpTable := make([][]float64, sizeX) + for i := 0; i < sizeX; i++ { + dpTable[i] = make([]float64, sizeY) + } + for x := sizeX - 2; x >= 0; x-- { + for y := sizeY - 2; y >= 0; y-- { + prevX := dpTable[x+1][y] + prevY := dpTable[x][y+1] + score := deltaTable[x][y].Similarity() + dpTable[x+1][y+1] + + dpTable[x][y] = max(prevX, prevY, score) + } + } + + minLength := len(left) + if minLength > len(right) { + minLength = len(right) + } + maxInvalidLength := minLength - 1 + + freeLeft = make([]maybe, 0, len(left)-minLength) + freeRight = make([]maybe, 0, len(right)-minLength) + + resultDeltas = make([]Delta, 0, minLength) + var x, y int + for x, y = 0, 0; x <= sizeX-2 && y <= sizeY-2; { + current := dpTable[x][y] + nextX := dpTable[x+1][y] + nextY := dpTable[x][y+1] + + xValidLength := len(left) - maxInvalidLength + y + yValidLength := len(right) - maxInvalidLength + x + + if x+1 < xValidLength && current == nextX { + freeLeft = append(freeLeft, left[x]) + x++ + } else if y+1 < yValidLength && current == nextY { + freeRight = append(freeRight, right[y]) + y++ + } else { + resultDeltas = append(resultDeltas, deltaTable[x][y]) + x++ + y++ + } + } + for ; x < sizeX-1; x++ { + freeLeft = append(freeLeft, left[x-1]) + } + for ; x < sizeY-1; y++ { + freeLeft = append(freeRight, left[y-1]) + } + + return resultDeltas, freeLeft, freeRight +} + +func deltasSimilarity(deltas []Delta) (similarity float64) { + for _, delta := range deltas { + similarity += delta.Similarity() + } + similarity = similarity / float64(len(deltas)) + return +} + +func stringSimilarity(left, right string) (similarity float64) { + matchingLength := float64( + lcs.New( + stringToInterfaceSlice(left), + stringToInterfaceSlice(right), + ).Length(), + ) + similarity = + (matchingLength / float64(len(left))) * (matchingLength / float64(len(right))) + return +} + +func stringToInterfaceSlice(str string) []interface{} { + s := make([]interface{}, len(str)) + for i, v := range str { + s[i] = v + } + return s +} + +func sortedKeys(m map[string]interface{}) (keys []string) { + keys = make([]string, 0, len(m)) + for key, _ := range m { + keys = append(keys, key) + } + sort.Strings(keys) + return +} + +func max(first float64, rest ...float64) (max float64) { + max = first + for _, value := range rest { + if max < value { + max = value + } + } + return max +} diff --git a/util/diff/unmarshal.go b/util/diff/unmarshal.go new file mode 100644 index 00000000..4df826f7 --- /dev/null +++ b/util/diff/unmarshal.go @@ -0,0 +1,131 @@ +package diff + +import ( + "encoding/json" + "errors" + dmp "github.com/sergi/go-diff/diffmatchpatch" + "io" + "strconv" +) + +type Unmarshaller struct { +} + +func NewUnmarshaller() *Unmarshaller { + return &Unmarshaller{} +} + +func (um *Unmarshaller) UnmarshalBytes(diffBytes []byte) (Diff, error) { + var diffObj map[string]interface{} + json.Unmarshal(diffBytes, &diffObj) + return um.UnmarshalObject(diffObj) +} + +func (um *Unmarshaller) UnmarshalString(diffString string) (Diff, error) { + return um.UnmarshalBytes([]byte(diffString)) +} + +func (um *Unmarshaller) UnmarshalReader(diffReader io.Reader) (Diff, error) { + var diffBytes []byte + io.ReadFull(diffReader, diffBytes) + return um.UnmarshalBytes(diffBytes) +} + +func (um *Unmarshaller) UnmarshalObject(diffObj map[string]interface{}) (Diff, error) { + result, err := process(Name(""), diffObj) + if err != nil { + return nil, err + } + return &diff{deltas: result.(*Object).Deltas}, nil +} + +func process(position Position, object interface{}) (Delta, error) { + var delta Delta + switch object.(type) { + case map[string]interface{}: + o := object.(map[string]interface{}) + if isArray, typed := o["_t"]; typed && isArray == "a" { + deltas := make([]Delta, 0, len(o)) + for name, value := range o { + if name == "_t" { + continue + } + + normalizedName := name + if normalizedName[0] == '_' { + normalizedName = name[1:] + } + index, err := strconv.Atoi(normalizedName) + if err != nil { + return nil, err + } + + childDelta, err := process(Index(index), value) + if err != nil { + return nil, err + } + + deltas = append(deltas, childDelta) + } + + for _, d := range deltas { + switch d.(type) { + case *Moved: + moved := d.(*Moved) + + var dd interface{} + var i int + for i, dd = range deltas { + switch dd.(type) { + case *Moved: + case PostDelta: + pd := dd.(PostDelta) + if moved.PostPosition() == pd.PostPosition() { + moved.Delta = pd + deltas = append(deltas[:i], deltas[i+1:]...) + } + } + } + } + } + + delta = NewArray(position, deltas) + } else { + deltas := make([]Delta, 0, len(o)) + for name, value := range o { + childDelta, err := process(Name(name), value) + if err != nil { + return nil, err + } + deltas = append(deltas, childDelta) + } + delta = NewObject(position, deltas) + } + case []interface{}: + o := object.([]interface{}) + switch len(o) { + case 1: + delta = NewAdded(position, o[0]) + case 2: + delta = NewModified(position, o[0], o[1]) + case 3: + switch o[2] { + case float64(0): + delta = NewDeleted(position, o[0]) + case float64(2): + dmp := dmp.New() + patches, err := dmp.PatchFromText(o[0].(string)) + if err != nil { + return nil, err + } + delta = NewTextDiff(position, patches, nil, nil) + case float64(3): + delta = NewMoved(position, Index(int(o[1].(float64))), nil, nil) + default: + return nil, errors.New("Unknown delta type") + } + } + } + + return delta, nil +} diff --git a/util/form/ascii.go b/util/form/ascii.go new file mode 100755 index 00000000..0247df85 --- /dev/null +++ b/util/form/ascii.go @@ -0,0 +1,297 @@ +package form + +import ( + "errors" + "fmt" + "sort" + + diff "github.com/abcum/surreal/util/diff" +) + +func NewAsciiFormatter(left map[string]interface{}) *AsciiFormatter { + return &AsciiFormatter{ + left: left, + ShowArrayIndex: false, + } +} + +type AsciiFormatter struct { + left map[string]interface{} + ShowArrayIndex bool + buffer string + path []string + size []int + inArray []bool +} + +func (f *AsciiFormatter) Format(diff diff.Diff) (result string, err error) { + f.buffer = "" + f.path = []string{} + f.size = []int{} + f.inArray = []bool{} + + f.printIndent(AsciiSame) + f.println("{") + f.push("ROOT", len(f.left), false) + f.processObject(f.left, diff.Deltas()) + f.pop() + f.printIndent(AsciiSame) + f.println("}") + + return f.buffer, nil +} + +func (f *AsciiFormatter) processArray(array []interface{}, deltas []diff.Delta) error { + patchedIndex := 0 + for index, value := range array { + f.processItem(value, deltas, diff.Index(index)) + patchedIndex++ + } + + // additional Added + for _, delta := range deltas { + switch delta.(type) { + case *diff.Added: + d := delta.(*diff.Added) + // skip items already processed + if int(d.Position.(diff.Index)) < len(array) { + continue + } + f.printRecursive(d.Position.String(), d.Value, AsciiAdded) + } + } + + return nil +} + +func (f *AsciiFormatter) processObject(object map[string]interface{}, deltas []diff.Delta) error { + names := sortedKeys(object) + for _, name := range names { + value := object[name] + f.processItem(value, deltas, diff.Name(name)) + } + + // Added + for _, delta := range deltas { + switch delta.(type) { + case *diff.Added: + d := delta.(*diff.Added) + f.printRecursive(d.Position.String(), d.Value, AsciiAdded) + } + } + + return nil +} + +func (f *AsciiFormatter) processItem(value interface{}, deltas []diff.Delta, position diff.Position) error { + matchedDeltas := f.searchDeltas(deltas, position) + positionStr := position.String() + if len(matchedDeltas) > 0 { + for _, matchedDelta := range matchedDeltas { + + switch matchedDelta.(type) { + case *diff.Object: + d := matchedDelta.(*diff.Object) + switch value.(type) { + case map[string]interface{}: + //ok + default: + return errors.New("Type mismatch") + } + o := value.(map[string]interface{}) + + f.printKeyWithIndent(positionStr, AsciiSame) + f.println("{") + f.push(positionStr, len(o), false) + f.processObject(o, d.Deltas) + f.pop() + f.printIndent(AsciiSame) + f.print("}") + f.printComma() + + case *diff.Array: + d := matchedDelta.(*diff.Array) + switch value.(type) { + case []interface{}: + //ok + default: + return errors.New("Type mismatch") + } + a := value.([]interface{}) + + f.printKeyWithIndent(positionStr, AsciiSame) + f.println("[") + f.push(positionStr, len(a), true) + f.processArray(a, d.Deltas) + f.pop() + f.printIndent(AsciiSame) + f.print("]") + f.printComma() + + case *diff.Added: + d := matchedDelta.(*diff.Added) + f.printRecursive(positionStr, d.Value, AsciiAdded) + f.size[len(f.size)-1]++ + + case *diff.Modified: + d := matchedDelta.(*diff.Modified) + savedSize := f.size[len(f.size)-1] + f.printRecursive(positionStr, d.OldValue, AsciiDeleted) + f.size[len(f.size)-1] = savedSize + f.printRecursive(positionStr, d.NewValue, AsciiAdded) + + case *diff.TextDiff: + savedSize := f.size[len(f.size)-1] + d := matchedDelta.(*diff.TextDiff) + f.printRecursive(positionStr, d.OldValue, AsciiDeleted) + f.size[len(f.size)-1] = savedSize + f.printRecursive(positionStr, d.NewValue, AsciiAdded) + + case *diff.Deleted: + d := matchedDelta.(*diff.Deleted) + f.printRecursive(positionStr, d.Value, AsciiDeleted) + + default: + return errors.New("Unknown Delta type detected") + } + + } + } else { + f.printRecursive(positionStr, value, AsciiSame) + } + + return nil +} + +func (f *AsciiFormatter) searchDeltas(deltas []diff.Delta, postion diff.Position) (results []diff.Delta) { + results = make([]diff.Delta, 0) + for _, delta := range deltas { + switch delta.(type) { + case diff.PostDelta: + if delta.(diff.PostDelta).PostPosition() == postion { + results = append(results, delta) + } + case diff.PreDelta: + if delta.(diff.PreDelta).PrePosition() == postion { + results = append(results, delta) + } + default: + panic("heh") + } + } + return +} + +const ( + AsciiSame = " " + AsciiAdded = "+" + AsciiDeleted = "-" +) + +func (f *AsciiFormatter) push(name string, size int, array bool) { + f.path = append(f.path, name) + f.size = append(f.size, size) + f.inArray = append(f.inArray, array) +} + +func (f *AsciiFormatter) pop() { + f.path = f.path[0 : len(f.path)-1] + f.size = f.size[0 : len(f.size)-1] + f.inArray = f.inArray[0 : len(f.inArray)-1] +} + +func (f *AsciiFormatter) printIndent(marker string) { + f.print(marker) + for n := 0; n < len(f.path); n++ { + f.print(" ") + } +} + +func (f *AsciiFormatter) printKeyWithIndent(name string, marker string) { + f.printIndent(marker) + if !f.inArray[len(f.inArray)-1] { + f.printf(`"%s": `, name) + } else if f.ShowArrayIndex { + f.printf(`%s: `, name) + } +} + +func (f *AsciiFormatter) printComma() { + f.size[len(f.size)-1]-- + if f.size[len(f.size)-1] > 0 { + f.println(",") + } else { + f.println() + } +} + +func (f *AsciiFormatter) printValue(value interface{}) { + switch value.(type) { + case string: + f.buffer += fmt.Sprintf(`"%s"`, value) + default: + f.buffer += fmt.Sprintf(`%#v`, value) + } +} + +func (f *AsciiFormatter) print(a ...interface{}) { + f.buffer += fmt.Sprint(a...) +} + +func (f *AsciiFormatter) printf(format string, a ...interface{}) { + f.buffer += fmt.Sprintf(format, a...) +} + +func (f *AsciiFormatter) println(a ...interface{}) { + f.buffer += fmt.Sprintln(a...) +} + +func (f *AsciiFormatter) printRecursive(name string, value interface{}, marker string) { + switch value.(type) { + case map[string]interface{}: + f.printKeyWithIndent(name, marker) + f.println("{") + + m := value.(map[string]interface{}) + size := len(m) + f.push(name, size, false) + + keys := sortedKeys(m) + for _, key := range keys { + f.printRecursive(key, m[key], marker) + } + f.pop() + + f.printIndent(marker) + f.print("}") + f.printComma() + case []interface{}: + f.printKeyWithIndent(name, marker) + f.println("[") + + s := value.([]interface{}) + size := len(s) + f.push("", size, true) + for _, item := range s { + f.printRecursive("", item, marker) + } + f.pop() + + f.printIndent(marker) + f.print("]") + f.printComma() + default: + f.printKeyWithIndent(name, marker) + f.printValue(value) + f.printComma() + } +} + +func sortedKeys(m map[string]interface{}) (keys []string) { + keys = make([]string, 0, len(m)) + for key, _ := range m { + keys = append(keys, key) + } + sort.Strings(keys) + return +} diff --git a/util/form/delta.go b/util/form/delta.go new file mode 100755 index 00000000..53fc65fa --- /dev/null +++ b/util/form/delta.go @@ -0,0 +1,124 @@ +package form + +import ( + "encoding/json" + "errors" + "fmt" + + diff "github.com/abcum/surreal/util/diff" +) + +const ( + DeltaDelete = 0 + DeltaTextDiff = 2 + DeltaMove = 3 +) + +func NewDeltaFormatter() *DeltaFormatter { + return &DeltaFormatter{ + PrintIndent: true, + } +} + +type DeltaFormatter struct { + PrintIndent bool +} + +func (f *DeltaFormatter) Format(diff diff.Diff) (result string, err error) { + jsonObject, err := f.formatObject(diff.Deltas()) + if err != nil { + return "", err + } + var resultBytes []byte + if f.PrintIndent { + resultBytes, err = json.MarshalIndent(jsonObject, "", " ") + } else { + resultBytes, err = json.Marshal(jsonObject) + } + if err != nil { + return "", err + } + + return string(resultBytes), nil +} + +func (f *DeltaFormatter) FormatAsJson(diff diff.Diff) (json map[string]interface{}, err error) { + return f.formatObject(diff.Deltas()) +} + +func (f *DeltaFormatter) formatObject(deltas []diff.Delta) (deltaJson map[string]interface{}, err error) { + deltaJson = map[string]interface{}{} + for _, delta := range deltas { + switch delta.(type) { + case *diff.Object: + d := delta.(*diff.Object) + deltaJson[d.Position.String()], err = f.formatObject(d.Deltas) + if err != nil { + return nil, err + } + case *diff.Array: + d := delta.(*diff.Array) + deltaJson[d.Position.String()], err = f.formatArray(d.Deltas) + if err != nil { + return nil, err + } + case *diff.Added: + d := delta.(*diff.Added) + deltaJson[d.PostPosition().String()] = []interface{}{d.Value} + case *diff.Modified: + d := delta.(*diff.Modified) + deltaJson[d.PostPosition().String()] = []interface{}{d.OldValue, d.NewValue} + case *diff.TextDiff: + d := delta.(*diff.TextDiff) + deltaJson[d.PostPosition().String()] = []interface{}{d.DiffString(), 0, DeltaTextDiff} + case *diff.Deleted: + d := delta.(*diff.Deleted) + deltaJson[d.PrePosition().String()] = []interface{}{d.Value, 0, DeltaDelete} + case *diff.Moved: + return nil, errors.New("Delta type 'Move' is not supported in objects") + default: + return nil, errors.New(fmt.Sprintf("Unknown Delta type detected: %#v", delta)) + } + } + return +} + +func (f *DeltaFormatter) formatArray(deltas []diff.Delta) (deltaJson map[string]interface{}, err error) { + deltaJson = map[string]interface{}{ + "_t": "a", + } + for _, delta := range deltas { + switch delta.(type) { + case *diff.Object: + d := delta.(*diff.Object) + deltaJson[d.Position.String()], err = f.formatObject(d.Deltas) + if err != nil { + return nil, err + } + case *diff.Array: + d := delta.(*diff.Array) + deltaJson[d.Position.String()], err = f.formatArray(d.Deltas) + if err != nil { + return nil, err + } + case *diff.Added: + d := delta.(*diff.Added) + deltaJson[d.PostPosition().String()] = []interface{}{d.Value} + case *diff.Modified: + d := delta.(*diff.Modified) + deltaJson[d.PostPosition().String()] = []interface{}{d.OldValue, d.NewValue} + case *diff.TextDiff: + d := delta.(*diff.TextDiff) + deltaJson[d.PostPosition().String()] = []interface{}{d.DiffString(), 0, DeltaTextDiff} + case *diff.Deleted: + d := delta.(*diff.Deleted) + deltaJson["_"+d.PrePosition().String()] = []interface{}{d.Value, 0, DeltaDelete} + case *diff.Moved: + d := delta.(*diff.Moved) + deltaJson["_"+d.PrePosition().String()] = []interface{}{"", d.PostPosition(), DeltaMove} + default: + return nil, errors.New(fmt.Sprintf("Unknown Delta type detected: %#v", delta)) + } + } + return +} diff --git a/util/item/item.go b/util/item/item.go index 7a855abd..2f419967 100644 --- a/util/item/item.go +++ b/util/item/item.go @@ -26,7 +26,8 @@ import ( "github.com/abcum/surreal/sql" "github.com/abcum/surreal/util/conv" "github.com/abcum/surreal/util/data" - // "github.com/abcum/surreal/util/diff" + "github.com/abcum/surreal/util/diff" + "github.com/abcum/surreal/util/form" "github.com/abcum/surreal/util/keys" ) @@ -114,6 +115,7 @@ func (this *Doc) Allow(txn kvs.TX, cond string) (val bool) { } return true + } func (this *Doc) Check(txn kvs.TX, cond []sql.Expr) (val bool) { @@ -224,7 +226,7 @@ func (this *Doc) PurgePatch(txn kvs.TX) (err error) { func (this *Doc) StorePatch(txn kvs.TX) (err error) { key := &keys.Patch{KV: this.key.KV, NS: this.key.NS, DB: this.key.DB, TB: this.key.TB, ID: this.key.ID} - return txn.CPut(key.Encode(), this.diff(), nil) + return txn.CPut(key.Encode(), this.diff().ToPACK(), nil) } @@ -302,7 +304,7 @@ func (this *Doc) Yield(output sql.Token, fallback sql.Token) (res interface{}) { case sql.ID: res = fmt.Sprintf("@%v:%v", this.key.TB, this.key.ID) case sql.DIFF: - res = this.diff() + res = this.diff().Data() case sql.FULL: res = this.current.Data() case sql.AFTER: @@ -326,9 +328,16 @@ func (this *Doc) Yield(output sql.Token, fallback sql.Token) (res interface{}) { // -------------------------------------------------- // -------------------------------------------------- -func (this *Doc) diff() []byte { - // *diff.Diff - return []byte("DIFF") +func (this *Doc) diff() *data.Doc { + + differ := diff.New() + diff, _ := differ.Compare(this.current.Get("data").ToJSON(), this.initial.Get("data").ToJSON()) + + format := form.NewDeltaFormatter() + diffed, _ := format.Format(diff) + + return data.NewFromJSON([]byte(diffed)) + } func (this *Doc) getFlds(txn kvs.TX) (out []*field) {