dottyz’s gists

dottyz / story_bike_share_analyze_17.py

Created May 2, 2019 18:45

	for idx, val in enumerate(x):
	if val > knee_start:
	kneedle = KneeLocator(x=x[idx:], y=y_fit[idx:], curve='concave', direction='increasing')
	knee_end = kneedle.knee

	print(knee_end)
	break

dottyz / story_bike_share_analyze_16.py

Created May 2, 2019 18:45

	def logistic_growth(x, k, x0):
	return 1 / (1 + np.exp(-k*(x - x0)))

	data['Cummulative Trips'] = data['Casual Trips'].cumsum()
	data['Percentage Trips'] = data['Cummulative Trips'] / data['Casual Trips'].sum()

	x = data['Mean Temp'].values
	y = data['Percentage Trips'].values

	popt, pcov = curve_fit(logistic_growth, x, y, maxfev=2000)

dottyz / story_bike_share_analyze_15.py

Created May 2, 2019 18:44

	popt, pcov = curve_fit(exp_growth_no_shift, x, y, maxfev=2000)

	# Estimate the first knee point
	kneedle = KneeLocator(x=x, y=exp_growth_no_shift(x, *popt), curve='convex', direction='increasing')

	knee_start = kneedle.knee
	print(knee_start)

dottyz / story_bike_share_analyze_14.py

Created May 2, 2019 18:44

	from kneed import KneeLocator
	from scipy.optimize import curve_fit

	# Define the curve fitting equations
	def linear(x, m, b):
	return m*x + b
	def exp_growth_no_shift(x, a, b):
	return a * np.exp(-b * x)
	def exp_growth(x, a, b, c):
	return a * np.exp(-b * x) + c

dottyz / story_bike_share_analyze_13.py

Created May 2, 2019 18:43

	data = data.sort_values('Mean Temp').dropna().reset_index(drop=True)

	fig, ax = plt.subplots(figsize=(20, 10))
	sns.scatterplot(x='Mean Temp', y='Casual Trips', data=data)

dottyz / story_bike_share_analyze_12.py

Created May 2, 2019 18:43

	fig, ax = plt.subplots(figsize=(16, 9))

	ax2 = ax.twinx() # Create the twin axis to enable display of ridership and temperature on the same graph
	palette = sns.color_palette() # Get the default color palette

	for i, user_type in enumerate(['Casual Trips', 'Member Trips']):
	sns.lineplot(x='Date', y=user_type, data=data, ax=ax, color=palette[i], markers='')

	sns.pointplot(x='Date', y='Mean Temp', data=data, ax=ax2, color=palette[2], markers='x')

dottyz / story_bike_share_analyze_11.py

Last active May 3, 2019 14:22

	# Import the weather data and drop the first 22 rows (containing descriptions of the weather station)
	weather = pd.read_csv('./data/weather.csv', header=22)

	# Remove units contained in the column names (eg. Celcius, mm, etc.)
	weather.columns = [re.sub(r'\([^()]*\)', '', x).strip() if x != 'Date/Time' else 'Date' for x in weather.columns]

	data = df.groupby(['Date', 'User Type'])['Id'].nunique().to_frame().pivot_table(index='Date', columns='User Type').reset_index()
	data.columns = ['Date', 'Casual Trips', 'Member Trips']
	data = data.merge(weather[['Date', 'Mean Temp', 'Total Precip']], on='Date', how='inner')

dottyz / story_bike_share_analyze_10.py

Created May 2, 2019 18:41

	data = df.groupby(['Date', 'Hour', 'User Type'])['Id'].nunique().groupby(['Hour', 'User Type']).mean().reset_index()
	fig, ax = plt.subplots(figsize=(16, 9))

	sns.barplot(x='Hour', y='Id', hue='User Type', data=data, ax=ax)
	ax.set_ylabel('Average Hourly Trips')

dottyz / story_bike_share_analyze_9.py

Created May 2, 2019 18:41

	fig, axes = plt.subplots(1, 3, figsize=(18, 6))
	axes = np.array(axes).flatten()
	for m, ax in zip(ridership[ridership['Quarter']==3]['Month'].unique(), axes):
	ax.set_title(m)
	ax.set_ylim(0, 7000)
	ax.set_ylabel('Average Daily Trips')

	sns.barplot(
	x='Day of Week',
	y='Id',

dottyz / story_bike_share_analyze_8.py

Created May 2, 2019 18:40

	fig, axes = plt.subplots(2, 2, figsize=(15, 15))

	# Flatten the 2D axes array for ease of looping
	axes = np.array(axes).flatten()

	# Prepare the month description titles for each quarter
	quarter_names = ['Jan. - Mar.', 'Apr. - Jun.', 'Jul. - Sept.', 'Oct. - Dec.']

	for q, ax in zip(sorted(ridership['Quarter'].unique()), axes):
	ax.set_title(quarter_names[(q-1)])

Yizhao Tan dottyz