Last active
September 7, 2018 14:10
-
-
Save jasorod/7bf8c5abebd9e153d3e6e584201d0980 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// ListDiff.swift | |
// cbnnews | |
// | |
// Created by Rodriguez, Jason on 11/18/17. | |
// Copyright © 2017 cbn. All rights reserved. | |
// | |
// From "A Technique for Isolating Differences Between Files" by Paul Heckel, Communications of the ACM, Volume 21(4), April 1978, pp. 264-268 | |
// | |
import Foundation | |
private struct SymbolEntry { | |
var oldCount: Int | |
let oldPosition: Int? | |
let newPosition: Int? | |
var newCount: Int | |
} | |
private struct DiffArrayEntry { | |
var position: Int = 0 | |
var symbolEntryKey: Int = 0 | |
var isCommonUnique: Bool = false | |
var moved: Bool = false | |
} | |
class ListDiff<S: Sequence> where S.Element: Hashable { | |
private var processed = false | |
private var symbolTable: [Int: SymbolEntry] = [:] | |
private var oldSequenceArray: [DiffArrayEntry] = [] | |
private var newSequenceArray: [DiffArrayEntry] = [] | |
var deletedIndexes: [Int] { | |
if !processed { | |
process() | |
} | |
var _deletedIndexes: [Int] = [] | |
for oldObj in oldSequenceArray where !oldObj.moved { | |
_deletedIndexes.append(oldObj.position) | |
} | |
return _deletedIndexes | |
} | |
var insertedIndexes: [Int] { | |
if !processed { | |
process() | |
} | |
var _insertedIndexes: [Int] = [] | |
for newObj in newSequenceArray where !newObj.moved { | |
_insertedIndexes.append(newObj.position) | |
} | |
return _insertedIndexes | |
} | |
var movedIndexes: [(from: Int, to: Int)] { | |
if !processed { | |
process() | |
} | |
var _movedIndexes: [(from: Int, to: Int)] = [] | |
for (num, newObj) in newSequenceArray.enumerated() where newObj.moved { | |
_movedIndexes.append((from: newObj.position, to: num)) | |
} | |
return _movedIndexes | |
} | |
required init(oldSeq: S, newSeq: S) { | |
//pass #1 & #2 | |
for (num, obj) in oldSeq.enumerated() { | |
oldSequenceArray.append(DiffArrayEntry(position: num, symbolEntryKey: obj.hashValue, isCommonUnique: false, moved: false)) | |
if let current_symbol = symbolTable[obj.hashValue] { | |
symbolTable[obj.hashValue] = SymbolEntry( | |
oldCount: current_symbol.oldCount + 1, | |
oldPosition: num, | |
newPosition: current_symbol.newPosition, | |
newCount: current_symbol.newCount | |
) | |
} else { | |
symbolTable[obj.hashValue] = SymbolEntry(oldCount: 1, oldPosition: num, newPosition: nil, newCount: 0) | |
} | |
} | |
for (num, obj) in newSeq.enumerated() { | |
newSequenceArray.append(DiffArrayEntry(position: num, symbolEntryKey: obj.hashValue, isCommonUnique: false, moved: false)) | |
if let current_symbol = symbolTable[obj.hashValue] { | |
symbolTable[obj.hashValue] = SymbolEntry( | |
oldCount: current_symbol.oldCount, | |
oldPosition: current_symbol.oldPosition, | |
newPosition: num, | |
newCount: current_symbol.newCount + 1 | |
) | |
} else { | |
symbolTable[obj.hashValue] = SymbolEntry(oldCount: 0, oldPosition: nil, newPosition: num, newCount: 1) | |
} | |
} | |
} | |
private func process() { | |
//pass #3 | |
symbolTable.forEach { (_, value) in | |
if value.newCount == 1 && value.oldCount == 1 { | |
newSequenceArray[value.newPosition!].position = value.oldPosition! | |
newSequenceArray[value.newPosition!].isCommonUnique = true | |
newSequenceArray[value.newPosition!].moved = true | |
oldSequenceArray[value.oldPosition!].position = value.newPosition! | |
oldSequenceArray[value.oldPosition!].isCommonUnique = true | |
oldSequenceArray[value.oldPosition!].moved = true | |
} | |
} | |
//pass #4 | |
var found_common_line = true | |
//assume sentinel value for beginning of seq is in the array by indexing past the end of the array | |
var current_found_line = -1 | |
for (num, newObj) in newSequenceArray.enumerated() { | |
if found_common_line { | |
guard let st_entry = symbolTable[newObj.symbolEntryKey] else { continue } | |
if st_entry.newCount > 0 && st_entry.oldCount > 0 { | |
if !newObj.isCommonUnique && newObj.symbolEntryKey == oldSequenceArray[safe: current_found_line + 1]?.symbolEntryKey { | |
oldSequenceArray[current_found_line + 1].position = num | |
oldSequenceArray[current_found_line + 1].moved = true | |
newSequenceArray[num].position = current_found_line + 1 | |
newSequenceArray[num].moved = true | |
current_found_line += 1 | |
} | |
} else { | |
found_common_line = false | |
} | |
} | |
if newObj.isCommonUnique { | |
found_common_line = true | |
current_found_line = newObj.position | |
} | |
} | |
//pass #5 | |
found_common_line = true | |
//assume sentinel value for end of seq is in the array by indexing past the end of the array | |
current_found_line = oldSequenceArray.count | |
for (num, newObj) in newSequenceArray.enumerated().reversed() { | |
if found_common_line { | |
guard let st_entry = symbolTable[newObj.symbolEntryKey] else { continue } | |
if st_entry.newCount > 0 && st_entry.oldCount > 0 { | |
if !newObj.isCommonUnique && newObj.symbolEntryKey == oldSequenceArray[safe: current_found_line - 1]?.symbolEntryKey { | |
oldSequenceArray[current_found_line - 1].position = num | |
oldSequenceArray[current_found_line - 1].moved = true | |
newSequenceArray[num].position = current_found_line - 1 | |
newSequenceArray[num].moved = true | |
current_found_line -= 1 | |
} | |
} else { | |
found_common_line = false | |
} | |
} | |
if newObj.isCommonUnique { | |
found_common_line = true | |
current_found_line = newObj.position | |
} | |
} | |
processed = true | |
} | |
} | |
extension Collection { | |
/// Returns the element at the specified index iff it is within bounds, otherwise nil. | |
fileprivate subscript (safe index: Index) -> Element? { | |
return indices.contains(index) ? self[index] : nil | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment