from requests import get
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import time as time
from tqdm import tqdm
import yfinance as yf
from IPython.core.display import clear_output
To extract stock information
To extract stock information we will use yfinance
module which is a convenient way to download data from Yahoo Finance. The official API for Yahoo Finance was decommissioned some time back. More details about this module can be found here.
#----- PLOTTING PARAMS ----#
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
import seaborn as sns
set(style="whitegrid")
sns."husl")
sns.color_palette(%config InlineBackend.figure_format = 'retina'
%config InlineBackend.print_figure_kwargs={'facecolor' : "w"}
= {
plot_params 'font.size' : 30,
'axes.titlesize' : 24,
'axes.labelsize' : 20,
'axes.labelweight' : 'bold',
'lines.linewidth' : 3,
'lines.markersize' : 10,
'xtick.labelsize' : 16,
'ytick.labelsize' : 16,
}
plt.rcParams.update(plot_params)
1. Generate list of S&P 500 companies
Parse wikipedia to generate a list
= 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
wiki_url = get(wiki_url)
response = BeautifulSoup(response.text, 'html.parser')
html_soup = html_soup.find("table",{"class":"wikitable sortable"}) tab
= [entry.text.strip() for entry in tab.findAll('th')]
column_headings print(column_headings)
['Symbol', 'Security', 'SEC filings', 'GICS Sector', 'GICS Sub-Industry', 'Headquarters Location', 'Date first added', 'CIK', 'Founded']
= {keys:[] for keys in column_headings} SP_500_dict
for i, name in enumerate(SP_500_dict.keys()):
print(i, name)
0 Symbol
1 Security
2 SEC filings
3 GICS Sector
4 GICS Sub-Industry
5 Headquarters Location
6 Date first added
7 CIK
8 Founded
Populate each row entry as per company data
for row_entry in tab.findAll('tr')[1:]:
= row_entry.findAll('td')
row_elements for key, _elements in zip(SP_500_dict.keys(), row_elements):
SP_500_dict[key].append(_elements.text.strip())
= pd.DataFrame(SP_500_dict, columns=SP_500_dict.keys()) SP_500_df
SP_500_df
Symbol | Security | SEC filings | GICS Sector | GICS Sub-Industry | Headquarters Location | Date first added | CIK | Founded | |
---|---|---|---|---|---|---|---|---|---|
0 | MMM | 3M Company | reports | Industrials | Industrial Conglomerates | St. Paul, Minnesota | 1976-08-09 | 0000066740 | 1902 |
1 | ABT | Abbott Laboratories | reports | Health Care | Health Care Equipment | North Chicago, Illinois | 1964-03-31 | 0000001800 | 1888 |
2 | ABBV | AbbVie Inc. | reports | Health Care | Pharmaceuticals | North Chicago, Illinois | 2012-12-31 | 0001551152 | 2013 (1888) |
3 | ABMD | Abiomed | reports | Health Care | Health Care Equipment | Danvers, Massachusetts | 2018-05-31 | 0000815094 | 1981 |
4 | ACN | Accenture | reports | Information Technology | IT Consulting & Other Services | Dublin, Ireland | 2011-07-06 | 0001467373 | 1989 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
500 | YUM | Yum! Brands Inc | reports | Consumer Discretionary | Restaurants | Louisville, Kentucky | 1997-10-06 | 0001041061 | 1997 |
501 | ZBRA | Zebra Technologies | reports | Information Technology | Electronic Equipment & Instruments | Lincolnshire, Illinois | 2019-12-23 | 0000877212 | 1969 |
502 | ZBH | Zimmer Biomet | reports | Health Care | Health Care Equipment | Warsaw, Indiana | 2001-08-07 | 0001136869 | 1927 |
503 | ZION | Zions Bancorp | reports | Financials | Regional Banks | Salt Lake City, Utah | 2001-06-22 | 0000109380 | 1873 |
504 | ZTS | Zoetis | reports | Health Care | Pharmaceuticals | Parsippany, New Jersey | 2013-06-21 | 0001555280 | 1952 |
505 rows × 9 columns
'GICS Sector'].value_counts() SP_500_df[
Information Technology 75
Industrials 74
Financials 65
Consumer Discretionary 63
Health Care 62
Consumer Staples 32
Real Estate 29
Materials 28
Utilities 28
Communication Services 26
Energy 23
Name: GICS Sector, dtype: int64
Visualize distribution of the companies as per sectors
= plt.subplots(1,1, figsize=(10,10))
fig, ax 'GICS Sector'].value_counts().plot.pie(y='GICS Sector', autopct='%1.1f%%', fontsize=20, ax = ax, colormap='tab20')
SP_500_df['off') plt.axis(
(-1.25, 1.25, -1.25, 1.25)
'GICS Sector'] == 'Energy'] SP_500_df.loc[ SP_500_df[
Symbol | Security | SEC filings | GICS Sector | GICS Sub-Industry | Headquarters Location | Date first added | CIK | Founded | |
---|---|---|---|---|---|---|---|---|---|
44 | APA | APA Corporation | reports | Energy | Oil & Gas Exploration & Production | Houston, Texas | 1997-07-28 | 0000006769 | 1954 |
59 | BKR | Baker Hughes Co | reports | Energy | Oil & Gas Equipment & Services | Houston, Texas | 2017-07-07 | 0001701605 | 2017 |
80 | COG | Cabot Oil & Gas | reports | Energy | Oil & Gas Exploration & Production | Houston, Texas | 2008-06-23 | 0000858470 | 1989 |
101 | CVX | Chevron Corp. | reports | Energy | Integrated Oil & Gas | San Ramon, California | 1957-03-04 | 0000093410 | 1879 |
121 | COP | ConocoPhillips | reports | Energy | Oil & Gas Exploration & Production | Houston, Texas | 1957-03-04 | 0001163165 | 2002 |
140 | DVN | Devon Energy | reports | Energy | Oil & Gas Exploration & Production | Oklahoma City, Oklahoma | 2000-08-30 | 0001090012 | 1971 |
142 | FANG | Diamondback Energy | reports | Energy | Oil & Gas Exploration & Production | Midland, Texas | 2018-12-03 | 0001539838 | 2007 |
169 | EOG | EOG Resources | reports | Energy | Oil & Gas Exploration & Production | Houston, Texas | 2000-11-02 | 0000821189 | 1999 |
183 | XOM | Exxon Mobil Corp. | reports | Energy | Integrated Oil & Gas | Irving, Texas | 1957-03-04 | 0000034088 | 1999 |
219 | HAL | Halliburton Co. | reports | Energy | Oil & Gas Equipment & Services | Houston, Texas | 1957-03-04 | 0000045012 | 1919 |
227 | HES | Hess Corporation | reports | Energy | Integrated Oil & Gas | New York, New York | 1984-05-31 | 0000004447 | 1919 |
230 | HFC | HollyFrontier Corp | reports | Energy | Oil & Gas Refining & Marketing | Dallas, Texas | 2018-06-18 | 0000048039 | 1947 |
274 | KMI | Kinder Morgan | reports | Energy | Oil & Gas Storage & Transportation | Houston, Texas | 2012-05-25 | 0001506307 | 1997 |
298 | MRO | Marathon Oil Corp. | reports | Energy | Oil & Gas Exploration & Production | Houston, Texas | 1991-05-01 | 0000101778 | 1887 |
299 | MPC | Marathon Petroleum | reports | Energy | Oil & Gas Refining & Marketing | Findlay, Ohio | 2011-07-01 | 0001510295 | 2009 (1887) |
345 | NOV | NOV Inc. | reports | Energy | Oil & Gas Equipment & Services | Houston, Texas | 2005-03-14 | 0001021860 | 1841 |
352 | OXY | Occidental Petroleum | reports | Energy | Oil & Gas Exploration & Production | Houston, Texas | 1982-12-31 | 0000797468 | 1920 |
355 | OKE | Oneok | reports | Energy | Oil & Gas Storage & Transportation | Tulsa, Oklahoma | 2010-03-15 | 0001039684 | 1906 |
372 | PSX | Phillips 66 | reports | Energy | Oil & Gas Refining & Marketing | Houston, Texas | 2012-05-01 | 0001534701 | 2012 (1917) |
374 | PXD | Pioneer Natural Resources | reports | Energy | Oil & Gas Exploration & Production | Irving, Texas | 2008-09-24 | 0001038357 | 1997 |
411 | SLB | Schlumberger Ltd. | reports | Energy | Oil & Gas Equipment & Services | Curaçao, Kingdom of the Netherlands | 1965-03-31 | 0000087347 | 1926 |
466 | VLO | Valero Energy | reports | Energy | Oil & Gas Refining & Marketing | San Antonio, Texas | 0001035002 | 1980 | |
494 | WMB | Williams Companies | reports | Energy | Oil & Gas Storage & Transportation | Tulsa, Oklahoma | 1975-03-31 | 0000107263 | 1908 |
We can parse these tables and search companies based on the sector
'GICS Sector'] == 'Information Technology'] SP_500_df.loc[ SP_500_df[
Symbol | Security | SEC filings | GICS Sector | GICS Sub-Industry | Headquarters Location | Date first added | CIK | Founded | |
---|---|---|---|---|---|---|---|---|---|
4 | ACN | Accenture | reports | Information Technology | IT Consulting & Other Services | Dublin, Ireland | 2011-07-06 | 0001467373 | 1989 |
6 | ADBE | Adobe Inc. | reports | Information Technology | Application Software | San Jose, California | 1997-05-05 | 0000796343 | 1982 |
7 | AMD | Advanced Micro Devices | reports | Information Technology | Semiconductors | Santa Clara, California | 2017-03-20 | 0000002488 | 1969 |
13 | AKAM | Akamai Technologies | reports | Information Technology | Internet Services & Infrastructure | Cambridge, Massachusetts | 2007-07-12 | 0001086222 | 1998 |
38 | APH | Amphenol Corp | reports | Information Technology | Electronic Components | Wallingford, Connecticut | 2008-09-30 | 0000820313 | 1932 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
475 | V | Visa Inc. | reports | Information Technology | Data Processing & Outsourced Services | San Francisco, California | 2009-12-21 | 0001403161 | 1958 |
489 | WDC | Western Digital | reports | Information Technology | Technology Hardware, Storage & Peripherals | San Jose, California | 2009-07-01 | 0000106040 | 1970 |
490 | WU | Western Union Co | reports | Information Technology | Data Processing & Outsourced Services | Englewood, Colorado | 2006-09-29 | 0001365135 | 1851 |
498 | XLNX | Xilinx | reports | Information Technology | Semiconductors | San Jose, California | 1999-11-08 | 0000743988 | 1984 |
501 | ZBRA | Zebra Technologies | reports | Information Technology | Electronic Equipment & Instruments | Lincolnshire, Illinois | 2019-12-23 | 0000877212 | 1969 |
75 rows × 9 columns
Final plot for returns
= plt.subplots(1,1,figsize=(15,10))
fig, ax for i, tickr in enumerate(tickr_list):
'Date'], ytd_stat[tickr], linewidth=5.0, label=company_name[i])
ax.plot(ytd_stat['YTD %Return 2020')
ax.set_ylabel('Date')
ax.set_xlabel( ax.legend()
<matplotlib.legend.Legend at 0x7f9c0a4365e0>