Created
December 26, 2022 12:48
-
-
Save GrovesD2/897b77899035ef0ea975b037b589ac6c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import yfinance as yf | |
GROWTH_SINCE = '2021-12-01' # The lower date to calculate the stock performance | |
GROUPBY_COL = 'GICS Sector' # Use 'GICS Sector' or 'GICS Sub-Industry' | |
S_AND_P_URL = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies' | |
NUM_PER_GROUP = 3 # The top n winning stocks per group | |
if __name__ == '__main__': | |
# This reads in the current list of S&P500 stocks from wikipedia, which | |
# also includes information on the stock's sector, and sub industry | |
ticker_info = pd.read_html(S_AND_P_URL)[0] | |
# Replace any dots with dashes in ticker names to prevent errors in | |
# downloading A and B stocks | |
tickers = [ | |
ticker.replace('.', '-') | |
for ticker in ticker_info['Symbol'].unique().tolist() | |
] | |
# Download the price data for all the stocks in the S&P list | |
ticker_prices = yf.download( | |
tickers, | |
start = GROWTH_SINCE, | |
threads = True, | |
) | |
# Filter to only the closing prices (this is used for the comparison) | |
ticker_prices = ticker_prices[ | |
[col for col in ticker_prices.columns if col[0] == 'Close'] | |
] | |
# Clean up the price dataframe, noting we don't require the date column | |
# since we are calculating the performance over the entire interval | |
ticker_prices = ( | |
ticker_prices | |
.dropna() | |
.reset_index() | |
.drop(columns = 'Date') | |
) | |
# Find the stock growth in percent | |
growth = 100*(ticker_prices.iloc[-1]/ticker_prices.iloc[0] - 1) | |
# Cleaning of the growth pandas series, changing it back to a dataframe | |
# so that we can merge with the sector/industry to find the winning stocks | |
# per that group | |
growth = ( | |
growth | |
.to_frame() | |
.reset_index() | |
.drop(columns = ['level_0']) | |
.rename(columns = {'level_1': 'Symbol', 0: 'Growth'}) | |
) | |
# Merge the growth with the ticker information dataframe to obtain the | |
# column used for the groupby | |
growth = growth.merge( | |
ticker_info[['Symbol', GROUPBY_COL]], | |
on = 'Symbol', | |
how = 'left', | |
) | |
# Find the ranking of each stock per sector | |
growth['sector_rank'] = ( | |
growth | |
.groupby(GROUPBY_COL) | |
['Growth'] | |
.rank(ascending = False) | |
) | |
# Filter to only the winning stocks, and sort the values | |
growth = ( | |
growth[growth['sector_rank'] <= NUM_PER_GROUP] | |
.sort_values( | |
[GROUPBY_COL, 'Growth'], | |
ascending = False, | |
) | |
) | |
print(growth) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment