Created
May 20, 2018 21:33
-
-
Save jszwec/2bb94f8f3612e0162eb16003701f727e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package mapping | |
import "reflect" | |
// based on 'na_values' from: | |
// https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html | |
var nanMap = map[string]struct{}{ | |
"": {}, | |
"#N/A": {}, | |
"#N/A N/A": {}, | |
"#NA": {}, | |
"-1.#IND": {}, | |
"-1.#QNAN": {}, | |
"-NaN": {}, | |
"-nan": {}, | |
"1.#IND": {}, | |
"1.#QNAN": {}, | |
"N/A": {}, | |
"NA": {}, | |
"NULL": {}, | |
"n/a": {}, | |
"null": {}, | |
} | |
// MapNaN returns a Map function that can be used with csvutil.Decoder to recognize | |
// an extended list of NaN values. In addition to the default NaNs list it is | |
// possible to supply a list of custom ones. | |
// | |
// NaN Mapping works when v is a float64, float32, or any floatX alias. | |
// It is not tied up with any column. | |
// | |
// The default NaN formats provided with this function are as follows: | |
// "" | |
// "#N/A" | |
// "#N/A N/A" | |
// "#NA" | |
// "-1.#IND" | |
// "-1.#QNAN" | |
// "-NaN" | |
// "-nan" | |
// "1.#IND" | |
// "1.#QNAN" | |
// "N/A" | |
// "NA" | |
// "NULL" | |
// "n/a" | |
// "null" | |
// | |
// The above list was based on default pandas na_values: | |
// | |
// https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html | |
func MapNaN(nans ...string) func(string, string, interface{}) string { | |
m := nanMap | |
if len(nans) > 0 { | |
m = make(map[string]struct{}, len(nanMap)+len(nans)) | |
for k, v := range nanMap { | |
m[k] = v | |
} | |
for _, v := range nans { | |
m[v] = struct{}{} | |
} | |
} | |
return func(s, _ string, v interface{}) string { | |
return nan(m, s, v) | |
} | |
} | |
func nan(m map[string]struct{}, field string, v interface{}) string { | |
if k := reflect.TypeOf(v).Kind(); k != reflect.Float64 && k != reflect.Float32 { | |
return field | |
} | |
if _, ok := m[field]; ok { | |
return "NaN" | |
} | |
return field | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment