Created
July 25, 2016 00:31
-
-
Save mistidoi/6a5bc7951cc2c75049c99c44f957e299 to your computer and use it in GitHub Desktop.
bcp2csv in Haskell (with Lazy BtyeStrings)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import System.Environment | |
import Data.List.Split | |
import Data.List.Utils | |
import Data.List | |
import qualified Data.ByteString.Lazy.Search as S | |
import qualified Data.ByteString.Lazy.Char8 as C | |
-- usage: $ ./bcp2csv input.bcp output.csv | |
main :: IO () | |
main = do | |
args <- getArgs | |
input <- C.readFile $ head args | |
let outputFilename = args !! 1 | |
C.writeFile outputFilename . C.unlines . map joinWithCommas $ parseAndEscapeBCP input | |
type Field = C.ByteString | |
type Line = [Field] | |
-- runs the escaping code on the nested list data structure generated by parseBCP. | |
parseAndEscapeBCP :: C.ByteString -> [Line] | |
parseAndEscapeBCP input = map (map escapeField) $ parseBCP input | |
-- parses BCP file into nested Lists. top level is lines, next level is fields. | |
parseBCP :: C.ByteString -> [Line] | |
parseBCP input = map splitFields $ splitLines input | |
escapeField :: Field -> Field | |
escapeField = quoteIfNecessary . escapeQuotes | |
splitLines :: C.ByteString -> [C.ByteString] | |
splitLines input = S.split (convertToStrictByteString "&$&") input | |
splitFields :: C.ByteString -> Line | |
splitFields input = S.split (convertToStrictByteString "#&#") input | |
convertToStrictByteString input = S.strictify (C.pack input) | |
escapeQuotes :: Field -> Field | |
escapeQuotes input | |
| '"' `C.elem` input = S.replace (convertToStrictByteString "\"") (C.pack "\"\"\"") input | |
| otherwise = input | |
quoteIfNecessary :: Field -> Field | |
quoteIfNecessary input | |
| '\n' `C.elem` input || ',' `C.elem` input || '"' `C.elem` input = wrapInQuotes input | |
| otherwise = input | |
wrapInQuotes :: Field -> Field | |
wrapInQuotes input = C.concat [C.pack "\"", input, C.pack "\""] | |
joinWithCommas :: Line -> C.ByteString | |
joinWithCommas list = C.intercalate (C.pack ",") list |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment