Last active
November 6, 2023 09:06
-
-
Save rep-movsd/26b5308c71a75705097bba94beeb0699 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
''' | |
This script parses the HTML file from https://www.moneycontrol.com/stocks/marketinfo/dividends_declared/index.php and | |
creates a CSV file with relevant data | |
Sheet will contain all upcoming dividends after the current date, and those whose percentage is declared | |
It calculates the net percentage earning per rupee invested if you buy the stock at current price and hold it till the record date | |
To run this script, you need to first create: | |
1. A file called scrip.json with empty dictionary "{}" | |
2. A folder called cache | |
3. Save the HTML output from the above URL into a file called input.html (use save as from browser or use curl) | |
The CSV it creates has formulae in it that need Google finance api to do the required calculations. | |
You must import this CSV into Google Sheets for it to work | |
Each HTML page the script fetches for individual stocks is saved in the cache folder to avoid repeated fetches | |
''' | |
import os.path | |
import sys | |
import json | |
import requests | |
import re | |
from datetime import datetime | |
from bs4 import BeautifulSoup | |
# Scrip file | |
with open('scrip.json') as f: | |
dctScrips = json.load(f) | |
# Creates the basic data array with rows containing name, percent and record date | |
def parseInputHTML(sFile): | |
# Open the input file | |
with open(sFile) as f: | |
sHTML = f.read() | |
# Find the table element with class "dvdtbl" | |
soup = BeautifulSoup(sHTML, 'html.parser') | |
table = soup.find('table', class_='dvdtbl') | |
# Initialize an empty list to store table data | |
arrData = [] | |
# Create the basic data | |
for row in table.find_all('tr')[2:]: # Skip the first 2 rows (header) | |
columns = row.find_all('td') | |
if columns: | |
# There are 5 columns in the table | |
# Company name, Dividend type, Percent,Announcement, Record date, Ex Dividend date | |
# if the date is in the past, ignore it, else store these into dctData | |
sName = columns[0].text.strip() | |
sPercent = columns[2].text.strip() | |
sRecordDate = columns[4].text.strip() | |
# Extract the scrips URL from the first column and save it | |
elemLink = columns[0].find('a') | |
sURL = 'https://www.moneycontrol.com/india' + elemLink.get('href') | |
# if the date is in the past, ignore it, else store these into dctData | |
dtNow = datetime.today() | |
try: | |
dtRecord = datetime.strptime(sRecordDate, "%d-%m-%Y") | |
except ValueError: | |
continue | |
if dtRecord >= dtNow and sPercent != '0.00': | |
arrData.append( | |
{ | |
'name': sName, | |
'percent': sPercent, | |
'recordDate': dtRecord.strftime("%Y-%m-%d"), | |
'url': sURL, | |
} | |
) | |
return arrData | |
# Function to get the HTML for a scrip, from cached file if possible | |
def getScripHTML(sName, sURL): | |
# Is there a file? | |
print(f'Loading {sName}', file=sys.stderr) | |
sFile = f'cache/{sName}.html' | |
if os.path.exists(sFile): | |
with open(sFile) as f: | |
sHTML = f.read() | |
return sHTML | |
else: | |
print(f'Fetching {sName}', file=sys.stderr) | |
response = requests.get(sURL) | |
if response.status_code == 200: | |
with open(sFile, 'w') as f: | |
f.write(response.text) | |
return response.text | |
def getCodes(row): | |
''' | |
Gets the NSE and BSE codes for this row and plugs it into the scrip dictionary | |
''' | |
dctScrip = dctScrips.get(row['name']) | |
if not 'BSE' in dctScrip or not 'NSE' in dctScrip: | |
# Fetch the HTML for the scrip | |
sHTML = getScripHTML(row['name'], row['url']) | |
soup = BeautifulSoup(sHTML, 'html.parser') | |
# Find the element with class "company_slider" | |
div = soup.find('ul', class_='company_slider') | |
if div: | |
elBSE = div.find(string='BSE:') | |
sBSE = elBSE.parent.nextSibling.nextSibling.text if elBSE else '' | |
elNSE = div.find(string='NSE:') | |
sNSE = elNSE.parent.nextSibling.nextSibling.text if elNSE else '' | |
dctScrip['BSE'] = sBSE | |
dctScrip['NSE'] = sNSE | |
return True | |
return False | |
def getFaceValue(row): | |
''' | |
Gets the face value for this scrip and plugs it into the scrip dictionary | |
''' | |
dctScrip = dctScrips.get(row['name']) | |
if not 'FV' in dctScrip: | |
sHTML = getScripHTML(row['name'], row['url']) | |
# Look for the string 'var scid = "*"' in the HTML string | |
rx = r'var scid = "([A-Z0-9]+)";' | |
match = re.search(rx, sHTML) | |
if match: | |
sSCID = match.group(1) | |
sExchange = 'nse' if dctScrip.get('NSE') else 'bse' | |
sURL = f'https://priceapi.moneycontrol.com/pricefeed/{sExchange}/equitycash/{sSCID}' | |
print(f'Fetch SCID {sSCID} for {row["name"]} - {sURL}', file=sys.stderr) | |
response = requests.get(sURL, headers={'User-Agent': 'Mozilla/5.0'}) | |
print(response.status_code, file=sys.stderr) | |
if response.status_code == 200: | |
dctResponse = response.json() | |
sFaceValue = dctResponse.get('data', {}).get('FV', '1.00') | |
if sFaceValue: | |
dctScrip['FV'] = sFaceValue | |
return True | |
return False | |
arrData = parseInputHTML('input.html') | |
# Iterate over the data and fetch the BSE, NSE code and face value | |
bChanged = False | |
for row in arrData: | |
sName = row['name'] | |
if sName not in dctScrips: | |
dctScrips[sName] = {} | |
# Get the BSE and NSE code if needed | |
bChanged = getCodes(row) or bChanged | |
# Get the face value if needed | |
bChanged = getFaceValue(row) or bChanged | |
# print(f'{sName} - {dctScrips[sName]}') | |
# Write the scrip dictionary back to file if changed | |
if bChanged: | |
with open('scrip.json', 'w') as f: | |
json.dump(dctScrips, f, indent=4) | |
# Iterate over the data and create the output csv | |
# A B C D E F G | |
arrOut = [[ 'Name', 'Codes', 'Record date', 'Div. %', 'Stock Price', 'Face value', 'Net Earning%']] | |
# Data starts from row 2 | |
nRow = 2 | |
def getCell(col, row): | |
return f'{col}{row}' | |
for row in arrData: | |
# These are relative to last column | |
cellPct = getCell('D', nRow) | |
cellPrice = getCell('E', nRow) | |
cellFV = getCell('F', nRow) | |
# Actual money earned per share is the percent times of face value of share | |
# e.g. face value of 10 and 50% dividend means 5 rupees per share | |
cellEarningPerShare = f'{cellPct} * {cellFV} / 100' | |
# Money earned per rupee invested is (faceValue / price) * earning Per Share | |
cellEarningPerRupee = f'({cellFV} / {cellPrice}) * {cellEarningPerShare}' | |
# print(row) | |
sName = row['name'] | |
dctScrip = dctScrips.get(sName) | |
sGoogleCode = f'NSE:{dctScrip["NSE"]}' if dctScrip.get('NSE') else f'BOM:{dctScrip["BSE"]}' | |
cellPriceLookup = f'=GOOGLEFINANCE("{sGoogleCode}", "PRICE")' | |
arrOut.append([ | |
sName, | |
dctScrips[sName]['BSE'] + '/' + dctScrips[sName]['NSE'], | |
row['recordDate'], | |
row['percent'], | |
cellPriceLookup, | |
dctScrip['FV'], | |
f'=ROUND({cellEarningPerRupee}, 2)' | |
]) | |
nRow += 1 | |
for e in arrOut: | |
sRow = '\t'.join(map(str, e)) | |
print(sRow) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment