Created
November 1, 2023 14:46
-
-
Save activcoding/144caf996c7553b425f60f3aefe735a7 to your computer and use it in GitHub Desktop.
Fuzzy Search in Swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
import CollectionConcurrencyKit | |
enum FuzzySearch { | |
/// Searches an array of view models for occurrences of a fuzzy search query. | |
/// | |
/// This function takes a fuzzy search `query` and an array of `URL`s, and returns a new array that contains only | |
/// those url's that match the query. | |
/// The function uses the `score` function to calculate a score for each url and | |
/// includes only those url's whose scores are greater than 0.0. | |
/// The resulting array is then sorted by a score, in descending order. | |
/// | |
/// - Parameters: | |
/// - query: A `String` value representing the fuzzy search query. | |
/// - urls: An array of `URL`s, each representing a file, to search within. | |
/// - Returns: An array of `URL`s that match the fuzzy search query, sorted by score. | |
static func search(query: String, in urls: [URL]) async -> [URL] { | |
let queryTokens = query.lowercased().components(separatedBy: .whitespacesAndNewlines) | |
return await urls | |
.concurrentMap { (url: $0, score: score(tokens: queryTokens, url: $0)) } | |
.filter { $0.score > .zero } | |
.sorted { $0.score > $1.score } | |
.map(\.url) | |
} | |
/// Calculates the score of the fuzzy search query against a text string. | |
/// | |
/// This function takes a fuzzy search `query` and a `text` string, | |
/// and calculates a score based on how well the `query` matches the `text`. | |
/// The function is case-insensitive and calculates the score by iterating through each token in the `query`, | |
/// finding all occurrences of the token in the `text`, and calculating a proximity score for each occurrence. | |
/// The final score is the average of all token scores weighted by their proximity scores. | |
/// | |
/// - Parameters: | |
/// - query: A `String` value representing the fuzzy search query. | |
/// - url: A `URL` value representing the filePath to search within. | |
/// - Returns: A `Double` value representing the calculated score. | |
private static func score(tokens: [String], url: URL) -> Double { | |
let text = url.lastPathComponent.lowercased() | |
var score: Double = 0.0 | |
for token in tokens { | |
let ranges = text.ranges(of: token) | |
if !ranges.isEmpty { | |
let tokenScore = Double(token.count) / Double(text.count) | |
let proximityScore = proximityScoreForRanges(ranges, text: text) | |
let levenshteinScore = Double(levenshteinDistance(from: String(token), to: text)) / Double(text.count) | |
score += (tokenScore * proximityScore) * (1 - levenshteinScore) | |
} | |
} | |
if let date = getLastModifiedDate(for: url.path) { | |
return (score / Double(tokens.count)) * Double(calculateDateScore(for: date)) | |
} else { | |
return (score / Double(tokens.count)) | |
} | |
} | |
/// Calculates the proximity score based on an array of ranges. | |
/// | |
/// This function takes an array of `Range<String.Index>` objects and calculates a proximity score. | |
/// The higher the score, the closer the ranges are to each other in the original string. | |
/// | |
/// - Parameter ranges: An array of `Range<String.Index>` objects representing the positions of matched substrings. | |
/// - Returns: A `Double` value representing the proximity score. | |
private static func proximityScoreForRanges(_ ranges: [Range<String.Index>], text: String) -> Double { | |
let sortedRanges = ranges.sorted(by: { $0.lowerBound < $1.lowerBound }) | |
var score: Double = 1.0 | |
for index in 1..<sortedRanges.count { | |
let previousRange = sortedRanges[index - 1] | |
let currentRange = sortedRanges[index] | |
let distance = currentRange.lowerBound.utf16Offset(in: text) | |
- previousRange.upperBound.utf16Offset(in: text) | |
let proximity = 1.0 / Double(distance) | |
score += proximity | |
} | |
return score / Double(sortedRanges.count) | |
} | |
/// Retrieve the last modification date for a given file path. | |
/// | |
/// This function attempts to retrieve the last modification date of a file located at the specified file path. | |
/// If the file path is valid and the modification date can be retrieved, | |
/// the function returns a `Date` object representing the modification date. | |
/// If an error occurs or the file path is invalid, the function returns `nil`. | |
/// | |
/// - Parameter filePath: The file path for which to retrieve the last modification date. | |
/// - Returns: The last modification date as a `Date?` (optional) value, | |
/// or `nil` if an error occurs or the file path is invalid. | |
private static func getLastModifiedDate(for filePath: String) -> Date? { | |
let fileManger = FileManager.default | |
do { | |
let attributes = try fileManger.attributesOfItem(atPath: filePath) | |
let modificationDate = attributes[.modificationDate] as? Date | |
return modificationDate | |
} catch { | |
return nil | |
} | |
} | |
/// Calculate the date score for a given file's modification date. | |
/// | |
/// This function calculates the date score based on the time difference | |
/// between the current date and the file's modification date, | |
/// using an exponential decay function with a half-life of 3600 seconds (1 hour). | |
/// The score will be higher for more recently modified files. | |
/// | |
/// - Parameter modificationDate: The file's modification date. | |
/// - Returns: The date score as a `Double` value. | |
private static func calculateDateScore(for modificationDate: Date) -> Double { | |
let now = Date() | |
let timeDiff = now.timeIntervalSince(modificationDate) | |
let halfLife: Double = 3600 // decay half-life in seconds | |
let decayFactor = log(2) / halfLife | |
let score = exp(-decayFactor * timeDiff) | |
return score + 0.01 | |
} | |
/// Calculates the Levenshtein distance between two input strings. | |
/// | |
/// - Parameters: | |
/// - sourceString: The source string to compare against the target string; | |
/// - targetString: The target string to compare against the source string. | |
/// - Returns: The Levenshtein distance between `sourceString` and `targetString`. | |
/// An integer representing the minimum number of | |
/// insertions, deletions, or substitutions required to transform the source string into the target string. | |
private static func levenshteinDistance(from sourceString: String, to targetString: String) -> Int { | |
let source = Array(sourceString) | |
let target = Array(targetString) | |
let sourceCount = source.count | |
let targetCount = target.count | |
guard sourceCount > 0 else { | |
return targetCount | |
} | |
guard targetCount > 0 else { | |
return sourceCount | |
} | |
var distanceMatrix = Array(repeating: Array(repeating: 0, count: targetCount + 1), count: sourceCount + 1) | |
for rowIndex in 0...sourceCount { | |
distanceMatrix[rowIndex][0] = rowIndex | |
} | |
for columnIndex in 0...targetCount { | |
distanceMatrix[0][columnIndex] = columnIndex | |
} | |
for rowIndex in 1...sourceCount { | |
for columnIndex in 1...targetCount { | |
let cost = source[rowIndex - 1] == target[columnIndex - 1] ? 0 : 1 | |
let deletionCost = distanceMatrix[rowIndex - 1][columnIndex] + 1 | |
let insertionCost = distanceMatrix[rowIndex][columnIndex - 1] + 1 | |
let substitutionCost = distanceMatrix[rowIndex - 1][columnIndex - 1] + cost | |
distanceMatrix[rowIndex][columnIndex] = min(deletionCost, insertionCost, substitutionCost) | |
} | |
} | |
return distanceMatrix[sourceCount][targetCount] | |
} | |
} | |
extension String { | |
/// This function is case-insensitive and returns an array of `Range<String.Index>` objects representing | |
/// the positions of all occurrences of the `searchString` within the original string. | |
/// | |
/// - Parameter searchString: A `String` value to search for within the original string. | |
/// - Returns: An array of `Range<String.Index>` objects representing the | |
/// positions of all occurrences of `searchString`. | |
func ranges(of searchString: String) -> [Range<String.Index>] { | |
var result: [Range<String.Index>] = [] | |
var searchStartIndex = startIndex | |
while let range = self[searchStartIndex..<endIndex].range(of: searchString, options: .caseInsensitive) { | |
result.append(range) | |
searchStartIndex = range.upperBound | |
} | |
return result | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment