Created
June 17, 2018 19:20
-
-
Save berewt/6c2d3bbf98a5df8fde546594d903581c to your computer and use it in GitHub Desktop.
Data pre-processing in Idris with the version 0.0.1 of Leon
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Audit | |
import Control.Monad.Syntax | |
import Data.Fin | |
import Data.String | |
import Data.Vect | |
import Data.Vect.Sub | |
import Leon.DataFrame.Columns | |
import Leon.DataFrame.Row | |
%default total | |
-- Initial type | |
public export | |
auditFields : Vect 12 String | |
auditFields = ["ID", "Age", "Employment", "Education", "Marital", "Occupation", "Gender", "Deductions", "Hours", "IGNORE_Accounts", "RISK_Adjustment", "TARGET_Adjusted"] | |
public export | |
AuditHeader : Type | |
AuditHeader= Header Audit.auditFields | |
public export | |
InitialHeader : AuditHeader | |
InitialHeader = [String, String, String, String, String, String, String, String, String, String, String, String] | |
-- Step1 : ID should be a Natural | |
-- Here to decompose the steps, can be kept aside if we chain transformation | |
public export | |
Step1Header : AuditHeader | |
Step1Header = set "ID" (Maybe Nat) InitialHeader | |
step1 : Record InitialHeader -> Record Step1Header | |
step1 = update "ID" parsePositive | |
-- Step2 : Age should be a positive less than 100 | |
public export | |
Step2Header : AuditHeader | |
Step2Header = set "Age" (Maybe (Fin 100)) Step1Header | |
lessThan100 : Nat -> Maybe (Fin 100) | |
lessThan100 n = natToFin n 100 | |
step2 : Record Step1Header -> Record Step2Header | |
step2 = Row.update "Age" (parsePositive >=> lessThan100) | |
-- Step3 : keep only ID, Age and gender | |
public export | |
Step3Header : Header ["ID", "Gender", "Age"] | |
Step3Header = keepOnly ["ID", "Gender", "Age"] Step2Header | |
step3 : Record Step2Header -> Record Step3Header | |
step3 x = Row.keepOnly ["ID", "Gender", "Age"] x | |
-- Step4 : keep only male and female in Gender | |
public export | |
data Gender = Male | Female | |
public export | |
Step4Header : Header ["ID", "Gender", "Age"] | |
Step4Header = set "Gender" (Maybe Gender) Step3Header | |
parseGender : String -> Maybe Gender | |
parseGender "Male" = Just Male | |
parseGender "Female" = Just Female | |
parseGender _ = Nothing | |
step4 : Record Step3Header -> Record Step4Header | |
step4 = Row.update "Gender" parseGender | |
-- final Put everything together | |
FinalHeader : Header ["ID", "Gender", "Age"] | |
FinalHeader = [Maybe Nat, Maybe Gender, Maybe (Fin 100)] | |
pipeline : Record InitialHeader -> Record FinalHeader | |
pipeline = step4 . step3 . step2 . step1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment