Created
June 3, 2024 00:15
-
-
Save Kampfkarren/3cf13d45272792544259f3b40595809b to your computer and use it in GitHub Desktop.
Damerau-Levenshtein distance in Luau
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
local function graphemeCount(text: string): number | |
local count = 0 | |
for _ in utf8.graphemes(text) do | |
count += 1 | |
end | |
return count | |
end | |
-- https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#Distance_with_adjacent_transpositions | |
local function editDistance(textA: string, textB: string): number | |
local da = {} | |
local d = {} | |
local graphemeCountA = graphemeCount(textA) | |
local graphemeCountB = graphemeCount(textB) | |
local maxDistance = graphemeCountA + graphemeCountB | |
d[Vector3.new(-1, -1)] = maxDistance | |
for indexA = 0, graphemeCountA do | |
d[Vector3.new(indexA, -1)] = maxDistance | |
d[Vector3.new(indexA, 0)] = indexA | |
end | |
for indexB = 0, graphemeCountB do | |
d[Vector3.new(-1, indexB)] = maxDistance | |
d[Vector3.new(0, indexB)] = indexB | |
end | |
local graphemeIndexA = 0 | |
for graphemeStartA, graphemeEndA in utf8.graphemes(textA) do | |
local graphemeA = textA:sub(graphemeStartA, graphemeEndA) | |
graphemeIndexA += 1 | |
local db = 0 | |
local graphemeIndexB = 0 | |
for graphemeStartB, graphemeEndB in utf8.graphemes(textB) do | |
local graphemeB = textB:sub(graphemeStartB, graphemeEndB) | |
graphemeIndexB += 1 | |
local k = da[graphemeB] or 0 | |
local l = db | |
local cost = 0 | |
if graphemeA == graphemeB then | |
db = graphemeIndexB | |
else | |
cost = 1 | |
end | |
d[Vector3.new(graphemeIndexA, graphemeIndexB)] = math.min( | |
(d[Vector3.new(graphemeIndexA - 1, graphemeIndexB - 1)] or 0) + cost, | |
(d[Vector3.new(graphemeIndexA, graphemeIndexB - 1)] or 0) + 1, | |
(d[Vector3.new(graphemeIndexA - 1, graphemeIndexB)] or 0) + 1, | |
(d[Vector3.new(k - 1, l - 1)] or 0) + (graphemeIndexA - k - 1) + 1 + (graphemeIndexB - l - 1) | |
) | |
end | |
da[graphemeA] = 1 | |
end | |
return d[Vector3.new(graphemeCountA, graphemeCountB)] or 0 | |
end | |
return editDistance |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
local ServerScriptService = game:GetService("ServerScriptService") | |
local JestGlobals = require(ServerScriptService.Packages.JestGlobals) | |
local editDistance = require(script.Parent.editDistance) | |
local expect = JestGlobals.expect | |
local it = JestGlobals.it | |
it("should do simple substitution", function() | |
expect(editDistance("kitten", "sitting")).toEqual(3) | |
end) | |
it("should handle transpositions", function() | |
expect(editDistance("ca", "ac")).toEqual(1) | |
end) | |
it("should handle insertions", function() | |
expect(editDistance("book", "back")).toEqual(2) | |
end) | |
it("should handle deletions", function() | |
expect(editDistance("gumbo", "gambol")).toEqual(2) | |
end) | |
it("should handle mixed operations", function() | |
expect(editDistance("abcdef", "azced")).toEqual(3) | |
end) | |
it("should return zero for identical strings", function() | |
expect(editDistance("example", "example")).toEqual(0) | |
end) | |
it("should handle empty strings", function() | |
expect(editDistance("", "")).toEqual(0) | |
end) | |
it("should handle one empty string", function() | |
expect(editDistance("abc", "")).toEqual(3) | |
end) | |
it("should handle strings with different lengths", function() | |
expect(editDistance("short", "longer")).toEqual(6) | |
end) | |
it("should handle long strings with no operations", function() | |
expect(editDistance("abcdefghijklmnopqrstuvwxyz", "abcdefghijklmnopqrstuvwxyz")).toEqual(0) | |
end) | |
it("should handle unicode", function() | |
expect(editDistance("my family: 👩👦 here they are", "my family: 🥳 here they are")).toEqual(1) | |
end) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment