Last active
August 29, 2015 14:01
-
-
Save sgillis/0c79cf86949f5ac7a153 to your computer and use it in GitHub Desktop.
Process a confusion matrix in Haskell
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.Char (isSpace) | |
import Data.List.Split (splitOn) | |
-- Trim the right side of the string from extraneous whitespace (\r) | |
trim :: String -> String | |
trim str | all isSpace str = "" | |
trim (c:cs) = c : trim cs | |
-- Process a single row of the matrix. | |
rowToString :: String -> [String] -> [Int] -> [String] | |
rowToString _ [] _ = [] | |
rowToString _ _ [] = [] | |
rowToString prefix (h:hs) (x:xs) = stringList ++ rowToString prefix hs xs | |
where stringList = take x $ repeat replacementString | |
replacementString = prefix ++ "\t" ++ h | |
-- Recursively process all rows | |
processMatrix :: ([String], [[String]]) -> [String] | |
processMatrix (h, []) = [] | |
processMatrix (h, (r:rs)) = rowResult ++ processMatrix (h, rs) | |
where rowResult = rowToString (head r) h (map read $ tail r) | |
-- Read the matrix in a usable format | |
readMatrix :: String -> ([String],[[String]]) | |
readMatrix = (\x -> (tail $ head x, tail x)) . map (splitOn "\t") . map trim . lines | |
main = mapM_ putStrLn . processMatrix . readMatrix =<< readFile "data.txt" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def generateString(header, row): | |
original, counts = row[0], row[1:] | |
string = "" | |
for i, count in enumerate(counts): | |
new_string = (original + "\t" + header[i] + "\n") * int(count) | |
string += new_string | |
if string != "": | |
return string.strip('\n') | |
else: | |
return None | |
if __name__ == "__main__": | |
with open("data.txt") as f: | |
content = f.readlines() | |
content = [line.strip('\r\n').split('\t') for line in content] | |
header, body = content[0][1:], content[1:] | |
for row in body: | |
result = generateString(header, row) | |
if result is not None: | |
print result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
. b mb w | |
. 0 0 0 0 | |
b 2 11 3 0 | |
mb 0 0 0 0 | |
w 0 0 0 2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
b . | |
b . | |
b b | |
b b | |
b b | |
b b | |
b b | |
b b | |
b b | |
b b | |
b b | |
b b | |
b b | |
b mb | |
b mb | |
b mb | |
w w | |
w w |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment