GrovesD2 · April 10, 2023 13:25
diff --git a/dist_fit_spy.py b/dist_fit_spy.py
 import numpy as np
 import yfinance as yf
 import scipy.stats as stats
 from scipy.integrate import quad
 from scipy.optimize import minimize

 # Get the monthly price data for the SPY ticker
 df = yf.download(
    'SPY', 
    interval='1mo',
 )

 # Convert the closing prices to percentage changes
 data = 100*df['Close'].pct_change().dropna().values

 # Define a list of candidate distributions to fit
 dist_list = [
    stats.norm, 
    stats.lognorm,
    stats.genlogistic,
 ]

 # Estimate the parameters of each candidate distribution, by default, the 
 # fitting is performed with maximum likelihood estimation
 param_estimates = [dist.fit(data) for dist in dist_list]

 # Compute the log-likelihood of the data for each distribution, this
 # serves as a method of finding the "best fit" distribution, i.e., which
 # distribution maximises the likelihood function
 loglikes = [
    dist.logpdf(data, *params).sum() 
    for dist, params in zip(dist_list, param_estimates)
 ]

 # Select the best-fit distribution based on the minimum negative log-likelihood
 best_dist = dist_list[np.argmax(loglikes)]
 params = param_estimates[np.argmax(loglikes)]

 # Print the name of the best-fit distribution
 print(f'The best-fit distribution is {best_dist.name}')

 # Let's get the x position of the peak of the distribution, and work out the 
 # volume to the right of the peak
 x_pos_peak = minimize(lambda x: -best_dist.pdf(x, *params), x0=0).x[0]
 vol_right = quad(lambda x: best_dist.pdf(x, *params), x_pos_peak, np.inf)[0]

 # Print out the statistics
 print('\nStatistics:')
 print('- Mean of the distribution: ', np.mean(data))
 print('- Standard deviation of the distribution: ', np.std(data))
 print('- Median of the distribution: ', np.median(data))
 print('- Interquartile range: ', stats.iqr(data))
 print('- Peak of the fitted distribution: ', x_pos_peak)
 print('- Volume to the right of the peak: ', 100*vol_right)
	import numpy as np
	import yfinance as yf
	import scipy.stats as stats
	from scipy.integrate import quad
	from scipy.optimize import minimize

	# Get the monthly price data for the SPY ticker
	df = yf.download(
	'SPY',
	interval='1mo',
	)

	# Convert the closing prices to percentage changes
	data = 100*df['Close'].pct_change().dropna().values

	# Define a list of candidate distributions to fit
	dist_list = [
	stats.norm,
	stats.lognorm,
	stats.genlogistic,
	]

	# Estimate the parameters of each candidate distribution, by default, the
	# fitting is performed with maximum likelihood estimation
	param_estimates = [dist.fit(data) for dist in dist_list]

	# Compute the log-likelihood of the data for each distribution, this
	# serves as a method of finding the "best fit" distribution, i.e., which
	# distribution maximises the likelihood function
	loglikes = [
	dist.logpdf(data, *params).sum()
	for dist, params in zip(dist_list, param_estimates)
	]

	# Select the best-fit distribution based on the minimum negative log-likelihood
	best_dist = dist_list[np.argmax(loglikes)]
	params = param_estimates[np.argmax(loglikes)]

	# Print the name of the best-fit distribution
	print(f'The best-fit distribution is {best_dist.name}')

	# Let's get the x position of the peak of the distribution, and work out the
	# volume to the right of the peak
	x_pos_peak = minimize(lambda x: -best_dist.pdf(x, *params), x0=0).x[0]
	vol_right = quad(lambda x: best_dist.pdf(x, *params), x_pos_peak, np.inf)[0]

	# Print out the statistics
	print('\nStatistics:')
	print('- Mean of the distribution: ', np.mean(data))
	print('- Standard deviation of the distribution: ', np.std(data))
	print('- Median of the distribution: ', np.median(data))
	print('- Interquartile range: ', stats.iqr(data))
	print('- Peak of the fitted distribution: ', x_pos_peak)
	print('- Volume to the right of the peak: ', 100*vol_right)