Tech-49 · January 20, 2023 10:10
diff --git a/Python_helper.txt b/Python_helper.txt
 -------------------
 Pandas
 -------------------

 # Create dataframe from dictionary
 df = pd.DataFrame.from_dict(data["trainData"])

 # Convert entire dataframe values to numeric type.
 df = df.apply(pd.to_numeric, errors='ignore')

 # Remove missing value.
 df = df.dropna()

 # Create a new column named "new_col" with value "2" if cluster_id column has value 1, else 0.
 df.insert(loc=0, column=new_col, value=np.where(cluster_id == 1, 2, 0))

 # Create a new column named "new_col" with value of df["log2_livable"] if cluster_id is 1, else 0.
 df.insert(loc=0, column=new_col, value=np.where(cluster_id == 1, df["log2_livable"], 0))

 # Change date format of entire column.
 effective_date = pd.to_datetime(df["effective_date"], format='%Y-%m-%d')

 # Re-order dataframe by columns (A-Z)
 df = df[sorted(df.columns)]

 # Clone dataframe
 df.copy(deep=True)

 # Fetch the row based on given index.
 df.iloc[[index]]

 # Fetch multiple columns.
 df.loc[:, ['col_1', 'col_2']]

 # Filter rows in dataframe where cluster_2 is 0
 X[X['cluster_2'].isin([0])].reset_index()

 # Sum of rows
 df = df.sum(axis = 1)

 # Create dataframe from dictionary (Key as columns)
 a={'b':100,'c':300}
 pd.DataFrame(coefs, index=[0,])
	-------------------
	Pandas
	-------------------

	# Create dataframe from dictionary
	df = pd.DataFrame.from_dict(data["trainData"])

	# Convert entire dataframe values to numeric type.
	df = df.apply(pd.to_numeric, errors='ignore')

	# Remove missing value.
	df = df.dropna()

	# Create a new column named "new_col" with value "2" if cluster_id column has value 1, else 0.
	df.insert(loc=0, column=new_col, value=np.where(cluster_id == 1, 2, 0))

	# Create a new column named "new_col" with value of df["log2_livable"] if cluster_id is 1, else 0.
	df.insert(loc=0, column=new_col, value=np.where(cluster_id == 1, df["log2_livable"], 0))

	# Change date format of entire column.
	effective_date = pd.to_datetime(df["effective_date"], format='%Y-%m-%d')

	# Re-order dataframe by columns (A-Z)
	df = df[sorted(df.columns)]

	# Clone dataframe
	df.copy(deep=True)

	# Fetch the row based on given index.
	df.iloc[[index]]

	# Fetch multiple columns.
	df.loc[:, ['col_1', 'col_2']]

	# Filter rows in dataframe where cluster_2 is 0
	X[X['cluster_2'].isin([0])].reset_index()

	# Sum of rows
	df = df.sum(axis = 1)

	# Create dataframe from dictionary (Key as columns)
	a={'b':100,'c':300}
	pd.DataFrame(coefs, index=[0,])