Arqentum · January 15, 2021 13:46
diff --git a/normalize_column_names_spark.py b/normalize_column_names_spark.py
 def normalize_column_names(df: DataFrame) : DataFrame = {
    df.toDF(df.columns map(_
            .toLowerCase
            .replaceAll("\\s", "_")
            .replaceAll("\"+", "")
            ): _*)
 }

 def load_path_as_df(dict: List[List[String]])  {
    
    dict.foreach{ l => 
        val name :: delimiter :: path :: extra = l
        normalize_column_names(spark.read.option("header", "true").option("delimiter", delimiter).csv(path.replaceAll("%3D","=")).toDF() ).createOrReplaceTempView(name)
    }
 }





 val lt =  List(
      List("standard_thesaurus" ,",","s3a://adaptive-data-lake/staging/data_provider%3DAdaptive/standard_tickers.csv")
    , List("all_thesaurus_names",",","s3://adaptive-data-lake/prod/data_provider=Adaptive/prod_thesauri_with_names.csv")
    , List("all_sections"       ,",","s3://adaptive-data-lake/prod/data_provider%3DAdaptive/a_section__201911221610.csv")
    , List("mscience"           ,",","s3://adaptive-data-lake/prod/data_provider%3Dmscience/namespace%3Dapi.mscience.swipev2.txs_per_shopper/year%3D2019/month%3D12/day%3D15/")
    , List("req_daily"          ,",","s3://adaptive-data-lake/dev/test_stanis/miscellaneous/fundmanager_inf_manually_changed.csv")
    , List("req_full"           ,",","s3://adaptive-data-lake/dev/test_stanis/miscellaneous/fundmanager_inf.csv")
 ) 

 load_path_as_df(lt)
	def normalize_column_names(df: DataFrame) : DataFrame = {
	df.toDF(df.columns map(_
	.toLowerCase
	.replaceAll("\\s", "_")
	.replaceAll("\"+", "")
	): _*)
	}

	def load_path_as_df(dict: List[List[String]]) {

	dict.foreach{ l =>
	val name :: delimiter :: path :: extra = l
	normalize_column_names(spark.read.option("header", "true").option("delimiter", delimiter).csv(path.replaceAll("%3D","=")).toDF() ).createOrReplaceTempView(name)
	}
	}





	val lt = List(
	List("standard_thesaurus" ,",","s3a://adaptive-data-lake/staging/data_provider%3DAdaptive/standard_tickers.csv")
	, List("all_thesaurus_names",",","s3://adaptive-data-lake/prod/data_provider=Adaptive/prod_thesauri_with_names.csv")
	, List("all_sections" ,",","s3://adaptive-data-lake/prod/data_provider%3DAdaptive/a_section__201911221610.csv")
	, List("mscience" ,",","s3://adaptive-data-lake/prod/data_provider%3Dmscience/namespace%3Dapi.mscience.swipev2.txs_per_shopper/year%3D2019/month%3D12/day%3D15/")
	, List("req_daily" ,",","s3://adaptive-data-lake/dev/test_stanis/miscellaneous/fundmanager_inf_manually_changed.csv")
	, List("req_full" ,",","s3://adaptive-data-lake/dev/test_stanis/miscellaneous/fundmanager_inf.csv")
	)

	load_path_as_df(lt)