Created
April 10, 2023 13:25
-
-
Save GrovesD2/c0a1c3976b03e0b82a9e9bfadd2019ea to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import yfinance as yf | |
import scipy.stats as stats | |
from scipy.integrate import quad | |
from scipy.optimize import minimize | |
# Get the monthly price data for the SPY ticker | |
df = yf.download( | |
'SPY', | |
interval='1mo', | |
) | |
# Convert the closing prices to percentage changes | |
data = 100*df['Close'].pct_change().dropna().values | |
# Define a list of candidate distributions to fit | |
dist_list = [ | |
stats.norm, | |
stats.lognorm, | |
stats.genlogistic, | |
] | |
# Estimate the parameters of each candidate distribution, by default, the | |
# fitting is performed with maximum likelihood estimation | |
param_estimates = [dist.fit(data) for dist in dist_list] | |
# Compute the log-likelihood of the data for each distribution, this | |
# serves as a method of finding the "best fit" distribution, i.e., which | |
# distribution maximises the likelihood function | |
loglikes = [ | |
dist.logpdf(data, *params).sum() | |
for dist, params in zip(dist_list, param_estimates) | |
] | |
# Select the best-fit distribution based on the minimum negative log-likelihood | |
best_dist = dist_list[np.argmax(loglikes)] | |
params = param_estimates[np.argmax(loglikes)] | |
# Print the name of the best-fit distribution | |
print(f'The best-fit distribution is {best_dist.name}') | |
# Let's get the x position of the peak of the distribution, and work out the | |
# volume to the right of the peak | |
x_pos_peak = minimize(lambda x: -best_dist.pdf(x, *params), x0=0).x[0] | |
vol_right = quad(lambda x: best_dist.pdf(x, *params), x_pos_peak, np.inf)[0] | |
# Print out the statistics | |
print('\nStatistics:') | |
print('- Mean of the distribution: ', np.mean(data)) | |
print('- Standard deviation of the distribution: ', np.std(data)) | |
print('- Median of the distribution: ', np.median(data)) | |
print('- Interquartile range: ', stats.iqr(data)) | |
print('- Peak of the fitted distribution: ', x_pos_peak) | |
print('- Volume to the right of the peak: ', 100*vol_right) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment