toebR · October 24, 2022 10:48
diff --git a/pd_gdb.py b/pd_gdb.py
 import geopandas as gpd
 import pandas as pd
 import numpy as np
 from matplotlib import pyplot as plt

 #set pandas options so that all column names are diplayed when printes
 pd.set_option('display.max_columns', None)
 pd.set_option('display.max_rows', None)

 ######load gdb with arcpy and convert to pd dataframe#########
 input = r'my_project.gdb\my_table'
 arr = arcpy.da.TableToNumPyArray(input, '*')

 # convert to a Pandas DataFrame
 df = pd.DataFrame(arr)

 #####################

 #load .gdb
 gdb = gpd.read_file(r"D:\learn_py\basics\data\_STILLBERG_GIS\_STILLBERG_GIS.gdb")

 #plot gdb
 gdb.plot()

 #first few rows of gdb
 gdb.head()

 #transform to pd dataframe
 df = pd.DataFrame(gdb)
 df
 #column names
 df.columns

 #select columns
 sel = df[["Code_group", 'Code_description', "Quality_pos____hgt_", "Quality_pos_"]]
 sel

 #select rows (filter) with "pkt" and return the column Code_group and Code_description, if second statement is empty, it returns the whole df
 sel.loc[(sel.Code_group == "pkt") & (sel.Code_description == "aluprofil"), ["Code_group", "Code_description"]]

 #unique values in a column (returns array)
 a = np.unique(sel[['Code']])
 a


 #mutate a new column based on values in 2 existing columns (deprec)
 sel = sel.assign(test = sel["Quality_pos____hgt_"] - sel["Quality_pos_"])

 #conditional column value formatting (TRUE, False can be swapped with strings of my own choice)
 sel['ifesle'] = np.where((sel['Code_description'] == 'aluprofil') & (sel["Quality_pos____hgt_"] < 0.25), True, False)
 sel

 #plot hist of dataframe column
 sel.hist(column="Quality_pos____hgt_")

 #look into python list comprehension

 #grouped summary statistics over multiple columns
 summary_sel = sel.groupby(['ifesle', "Code_group"]).describe()

 #count entries per categorial variable in column
 summary_count = sel.groupby('ifesle')["Code_group"].count()
 summary_count = pd.DataFrame(summary_count)

 ########################## calculate total of row in a new column
 summary_count_coltot = summary_count.assign(total = np.sum(summary_count["Code_group"].values)) #add total to dataframe based on values in Code_group #total in a new column
 summary_count_coltot #now with this car we can e.g. calcualte rowwise percentages etc.
 summary_count_coltot = summary_count_coltot.reset_index() #turn descriptors to column name (reset index)
 summary_count_coltot["ifesle"] = summary_count_coltot["ifesle"].astype(str) #turn boolians into strings for plotting
 plt.bar(x = summary_count_coltot["ifesle"], height= summary_count_coltot["Code_group"])
 ############################

 #calcualate total of column in a new row
 summary_count.loc['Column_Total']= summary_count.sum(numeric_only=True, axis=0)
 summary_count

 summary_count = summary_count.reset_index() #turn descriptors to column name (reset index)
 summary_count["ifesle"] = summary_count["ifesle"].astype(str) #turn ifesle into one columns type (now its a string and 2 boolians)

 plt.bar(x = summary_count["ifesle"], height = summary_count["Code_group"]) #codegroups count per total

 #for loop of features
 bsp = range(100)
 rangetolist = [num for num in bsp] #jede nummer im range object wird gepostet (conditions können geadded werden) --> gibt list zurück (nicht mehr eine range))
 bsp = [num for num in bsp if num < 50] #[return for iterator in bsp if condition]

 sel.shape #shape array def
 sel.shape[0] #number of rows in the pandas dataframe

 bsp = bsp[::-2]

 #to loop over all rows of a pd dataframe e.g. (here base on entries of shape).
 for x in range(len(bsp)):
    #print(sel[x,2])
    print(x)

 for x in enumerate(bsp): #enumerate "enumerates" iterator entries  -  creates tupples with iterator values
    print(x)

 for x,num in enumerate(bsp): #enumerate "enumerates" iterator entries  -  creates tupples with iterator values
    print(x)
    print(num) #adding a ",num" 'unzipps' the tupples into a singular vector (see print(num) result)


 #function
 def functioname(dfvar):
    result = dfvar.columns
    return result

 functioname(sel)
	import geopandas as gpd
	import pandas as pd
	import numpy as np
	from matplotlib import pyplot as plt

	#set pandas options so that all column names are diplayed when printes
	pd.set_option('display.max_columns', None)
	pd.set_option('display.max_rows', None)

	######load gdb with arcpy and convert to pd dataframe#########
	input = r'my_project.gdb\my_table'
	arr = arcpy.da.TableToNumPyArray(input, '*')

	# convert to a Pandas DataFrame
	df = pd.DataFrame(arr)

	#####################

	#load .gdb
	gdb = gpd.read_file(r"D:\learn_py\basics\data\_STILLBERG_GIS\_STILLBERG_GIS.gdb")

	#plot gdb
	gdb.plot()

	#first few rows of gdb
	gdb.head()

	#transform to pd dataframe
	df = pd.DataFrame(gdb)
	df
	#column names
	df.columns

	#select columns
	sel = df[["Code_group", 'Code_description', "Quality_pos____hgt_", "Quality_pos_"]]
	sel

	#select rows (filter) with "pkt" and return the column Code_group and Code_description, if second statement is empty, it returns the whole df
	sel.loc[(sel.Code_group == "pkt") & (sel.Code_description == "aluprofil"), ["Code_group", "Code_description"]]

	#unique values in a column (returns array)
	a = np.unique(sel[['Code']])
	a


	#mutate a new column based on values in 2 existing columns (deprec)
	sel = sel.assign(test = sel["Quality_pos____hgt_"] - sel["Quality_pos_"])

	#conditional column value formatting (TRUE, False can be swapped with strings of my own choice)
	sel['ifesle'] = np.where((sel['Code_description'] == 'aluprofil') & (sel["Quality_pos____hgt_"] < 0.25), True, False)
	sel

	#plot hist of dataframe column
	sel.hist(column="Quality_pos____hgt_")

	#look into python list comprehension

	#grouped summary statistics over multiple columns
	summary_sel = sel.groupby(['ifesle', "Code_group"]).describe()

	#count entries per categorial variable in column
	summary_count = sel.groupby('ifesle')["Code_group"].count()
	summary_count = pd.DataFrame(summary_count)

	########################## calculate total of row in a new column
	summary_count_coltot = summary_count.assign(total = np.sum(summary_count["Code_group"].values)) #add total to dataframe based on values in Code_group #total in a new column
	summary_count_coltot #now with this car we can e.g. calcualte rowwise percentages etc.
	summary_count_coltot = summary_count_coltot.reset_index() #turn descriptors to column name (reset index)
	summary_count_coltot["ifesle"] = summary_count_coltot["ifesle"].astype(str) #turn boolians into strings for plotting
	plt.bar(x = summary_count_coltot["ifesle"], height= summary_count_coltot["Code_group"])
	############################

	#calcualate total of column in a new row
	summary_count.loc['Column_Total']= summary_count.sum(numeric_only=True, axis=0)
	summary_count

	summary_count = summary_count.reset_index() #turn descriptors to column name (reset index)
	summary_count["ifesle"] = summary_count["ifesle"].astype(str) #turn ifesle into one columns type (now its a string and 2 boolians)

	plt.bar(x = summary_count["ifesle"], height = summary_count["Code_group"]) #codegroups count per total

	#for loop of features
	bsp = range(100)
	rangetolist = [num for num in bsp] #jede nummer im range object wird gepostet (conditions können geadded werden) --> gibt list zurück (nicht mehr eine range))
	bsp = [num for num in bsp if num < 50] #[return for iterator in bsp if condition]

	sel.shape #shape array def
	sel.shape[0] #number of rows in the pandas dataframe

	bsp = bsp[::-2]

	#to loop over all rows of a pd dataframe e.g. (here base on entries of shape).
	for x in range(len(bsp)):
	#print(sel[x,2])
	print(x)

	for x in enumerate(bsp): #enumerate "enumerates" iterator entries - creates tupples with iterator values
	print(x)

	for x,num in enumerate(bsp): #enumerate "enumerates" iterator entries - creates tupples with iterator values
	print(x)
	print(num) #adding a ",num" 'unzipps' the tupples into a singular vector (see print(num) result)


	#function
	def functioname(dfvar):
	result = dfvar.columns
	return result

	functioname(sel)