franklinokech · June 4, 2020 09:06
diff --git a/add_column.py b/add_column.py
 idx = 0
 new_col = [7, 8, 9]  # can be a list, a Series, an array or a scalar   
 df.insert(loc=idx, column='A', value=new_col)
diff --git a/concatenate_columns_with_date.py b/concatenate_columns_with_date.py
 df['composite_column'] =df['string_col'] + '-' + df['date_column'].astype(str)
diff --git a/convert_col_to_date.py b/convert_col_to_date.py
 # Convert a pandas column to Date data type
 df.date_column = pd.to_datetime(df.date_column, format='%d-%m-%Y')
diff --git a/convert_column_to_lower.py b/convert_column_to_lower.py
 # Convert a given column to lower case
 df.column_name = df.column_name.str.lower()
diff --git a/export_to_drive.py b/export_to_drive.py
 from google.colab import auth
 from gspread_dataframe import get_as_dataframe, set_with_dataframe
 auth.authenticate_user()

 import gspread
 from oauth2client.client import GoogleCredentials
 gc = gspread.authorize(GoogleCredentials.get_application_default())

 sh = gc.open('Google Sheet File Name')

 # Select Spreadsheet
 # By title
 worksheet = sh.worksheet('Tab within File')

 # Append Dataframe to Sheet
 set_with_dataframe(worksheet, df)
diff --git a/get_frequency_count.py b/get_frequency_count.py
 # Get frequency percentage by values in column 'City'
 frequency = empDfObj['City'].value_counts(normalize =True)
 
 print("Frequency of values as percentage in column 'City' :")
 print(frequency * 100)
diff --git a/merge_left.py b/merge_left.py
 df_merged = pd.merge(left=df_left, right=df_right, left_on='primary_key', right_on='primary_key', how='left')
diff --git a/preappend_string.py b/preappend_string.py
 # This snippet pre-appends string to a column values
 df['col'] = 'str' + df['col'].astype(str)
diff --git a/python_data_cleaning.py b/python_data_cleaning.py
 # Remove trailing spaces in column names
 df.columns = [x.strip() for x in df.columns]
	idx = 0
	new_col = [7, 8, 9] # can be a list, a Series, an array or a scalar
	df.insert(loc=idx, column='A', value=new_col)
	# Convert a pandas column to Date data type
	df.date_column = pd.to_datetime(df.date_column, format='%d-%m-%Y')
	# Convert a given column to lower case
	df.column_name = df.column_name.str.lower()
	from google.colab import auth
	from gspread_dataframe import get_as_dataframe, set_with_dataframe
	auth.authenticate_user()

	import gspread
	from oauth2client.client import GoogleCredentials
	gc = gspread.authorize(GoogleCredentials.get_application_default())

	sh = gc.open('Google Sheet File Name')

	# Select Spreadsheet
	# By title
	worksheet = sh.worksheet('Tab within File')

	# Append Dataframe to Sheet
	set_with_dataframe(worksheet, df)
	# Get frequency percentage by values in column 'City'
	frequency = empDfObj['City'].value_counts(normalize =True)

	print("Frequency of values as percentage in column 'City' :")
	print(frequency * 100)
	# This snippet pre-appends string to a column values
	df['col'] = 'str' + df['col'].astype(str)
	# Remove trailing spaces in column names
	df.columns = [x.strip() for x in df.columns]