- To extract stock information
- 1. Generate list of S&P 500 companies
- Get total number of Shares
- Final plot for returns
To extract stock information
To extract stock information we will use yfinance
module which is a convenient way to download data from Yahoo Finance. The official API for Yahoo Finance was decommissioned some time back. More details about this module can be found here.
from requests import get
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import time as time
from tqdm import tqdm
import yfinance as yf
from IPython.core.display import clear_output
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
import seaborn as sns
sns.set(style="whitegrid")
sns.color_palette("husl")
%config InlineBackend.figure_format = 'retina'
%config InlineBackend.print_figure_kwargs={'facecolor' : "w"}
plot_params = {
'font.size' : 30,
'axes.titlesize' : 24,
'axes.labelsize' : 20,
'axes.labelweight' : 'bold',
'lines.linewidth' : 3,
'lines.markersize' : 10,
'xtick.labelsize' : 16,
'ytick.labelsize' : 16,
}
plt.rcParams.update(plot_params)
wiki_url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
response = get(wiki_url)
html_soup = BeautifulSoup(response.text, 'html.parser')
tab = html_soup.find("table",{"class":"wikitable sortable"})
column_headings = [entry.text.strip() for entry in tab.findAll('th')]
print(column_headings)
SP_500_dict = {keys:[] for keys in column_headings}
for i, name in enumerate(SP_500_dict.keys()):
print(i, name)
for row_entry in tab.findAll('tr')[1:]:
row_elements = row_entry.findAll('td')
for key, _elements in zip(SP_500_dict.keys(), row_elements):
SP_500_dict[key].append(_elements.text.strip())
SP_500_df = pd.DataFrame(SP_500_dict, columns=SP_500_dict.keys())
SP_500_df
SP_500_df['GICS Sector'].value_counts()
fig, ax = plt.subplots(1,1, figsize=(10,10))
SP_500_df['GICS Sector'].value_counts().plot.pie(y='GICS Sector', autopct='%1.1f%%', fontsize=20, ax = ax, colormap='tab20')
plt.axis('off')
SP_500_df.loc[ SP_500_df['GICS Sector'] == 'Energy']
We can parse these tables and search companies based on the sector
SP_500_df.loc[ SP_500_df['GICS Sector'] == 'Information Technology']
import yfinance as yf
START_DATE = "2020-01-01"
END_DATE = "2020-07-26"
yf_tickr = yf.Ticker('TSLA')
_shares_outstanding = yf_tickr.info['sharesOutstanding']
_previous_close = yf_tickr.info['previousClose']
print('Outstanding shares: {}'.format(_shares_outstanding))
print('Market Cap: {} Million USD'.format((_shares_outstanding * _previous_close)/10**6))
df_tckr = yf_tickr.history(start=START_DATE, end=END_DATE, interval="1wk", actions=False)
df_tckr['Market_Cap'] = df_tckr['Open'] * _shares_outstanding
df_tckr['YTD'] = (df_tckr['Open'] - df_tckr['Open'][0]) * 100 / df_tckr['Open'][0]
fig, ax = plt.subplots(1,1, figsize=(10,8))
df_tckr.plot(use_index=True, y="YTD",ax=ax, linewidth=4, grid=False, label='TSLA')
ax.set_xlabel('Date')
ax.set_ylabel('% YTD change (Weekly basis)')
import time as time
def plot_market_cap(tickr_list, START_DATE, END_DATE):
total_data = {}
for tickr in tickr_list:
total_data[tickr] = {}
print('Looking at: {}'.format(tickr))
yf_tickr = yf.Ticker(tickr)
#try:
# _shares_outstanding = yf_tickr.info['sharesOutstanding']
#except(IndexError):
# print('Shares outstanding not found')
# _shares_outstanding = None
df_tckr = yf_tickr.history(start=START_DATE, end=END_DATE, actions=False)
df_tckr['YTD'] = (df_tckr['Open'] - df_tckr['Open'][0]) * 100 / df_tckr['Open'][0]
total_data[tickr]['hist'] = df_tckr
#total_data[tickr]['shares'] = _shares_outstanding
time.sleep(np.random.randint(10))
return total_data
tickr_list = ['AAPL', 'TSLA','FB','DAL','XOM']
data = plot_market_cap(tickr_list, START_DATE, END_DATE)
company_name = [SP_500_df[SP_500_df['Symbol'].str.contains(i)]['Security'].values[0] for i in tickr_list]
company_name
print(len(data['AAPL']['hist']['YTD']))
ytd_stat = pd.DataFrame()
for tickr in tickr_list:
ytd_stat[tickr] = data[tickr]['hist']['YTD'].values
ytd_stat['Date'] = data['AAPL']['hist'].index
ytd_stat
fig, ax = plt.subplots(1,1,figsize=(15,10))
for i, tickr in enumerate(tickr_list):
ax.plot(ytd_stat['Date'], ytd_stat[tickr], linewidth=5.0, label=company_name[i])
ax.set_ylabel('YTD %Return 2020')
ax.set_xlabel('Date')
ax.legend()