import warnings
warnings.filterwarnings("ignore", module="matplotlib")
import os
import pandas as pd
import requests
from bs4 import BeautifulSoup
from IPython.display import display
proxies = {
'http': 'http://LOGIN:PASSWORD@PROXYURL:PROXYPORT',
'https': 'https://LOGIN:PASSWORD@PROXYURL:PROXYPORT'
}
url = 'http://www.dtcc.com/repository-otc-data/top-1000-single-names-09-20-2015-through-12-19-2015'
# r = requests.get(url, proxies=proxies)
r = requests.get(url)
r
<Response [200]>
html = r.text
soup = BeautifulSoup(html, 'html.parser')
table = soup.find('table', attrs={'id': ['dataTable']})
rows = table.find_all('tr')
len(rows)
952
data = []
for row in rows[:]:
cols = row.find_all('td')
cols = [e.text.strip() for e in cols]
data.append([e for e in cols if e]) # discard empty values
header = [data[0][i]+' '+data[1][i] for i in range(len(data[0]))]
header = header + [data[1][i] for i in range(len(data[0]), len(data[1]))]
header
['TOTAL NUMBER OF REFERENCE ENTITY', 'AVERAGE MONTHLY REGION', 'AVERAGE DAILY INDEX CONSTITUENT', 'AVERAGE NUMBER CLEARING DEALERS', 'CLEARING DEALERS', 'NOTIONAL (USD EQ)', 'TRADES/DAY', 'DOC CLAUSE %']
data2 = []
for e in data[2:]:
data2.append(e)
df = pd.DataFrame(data=data2, columns=header)
if not os.path.exists('dump'):
os.makedirs('dump')
df.to_csv(os.path.join('dump', 'df_data.csv'))
df.info()
display(df.head())
display(df.tail())
<class 'pandas.core.frame.DataFrame'> RangeIndex: 950 entries, 0 to 949 Data columns (total 8 columns): TOTAL NUMBER OF REFERENCE ENTITY 950 non-null object AVERAGE MONTHLY REGION 950 non-null object AVERAGE DAILY INDEX CONSTITUENT 950 non-null object AVERAGE NUMBER CLEARING DEALERS 950 non-null object CLEARING DEALERS 950 non-null object NOTIONAL (USD EQ) 950 non-null object TRADES/DAY 950 non-null object DOC CLAUSE % 744 non-null object dtypes: object(8) memory usage: 59.5+ KB
TOTAL NUMBER OF REFERENCE ENTITY | AVERAGE MONTHLY REGION | AVERAGE DAILY INDEX CONSTITUENT | AVERAGE NUMBER CLEARING DEALERS | CLEARING DEALERS | NOTIONAL (USD EQ) | TRADES/DAY | DOC CLAUSE % | |
---|---|---|---|---|---|---|---|---|
0 | ABBOTT LABORATORIES | AMERICAS | 6 | 2.7 | 2,500,000 | 0 | < 5% | None |
1 | ABENGOA, S.A. | EUROPE | Y | 11 | 9.7 | 22,500,000 | 19 | > 95% |
2 | ABU DHABI | SOVEREIGN | Y | 8 | 5.0 | 10,000,000 | 1 | > 95% |
3 | ABU DHABI NATIONAL ENERGY\n COMPANY | EUROPE | Y | 1 | 0.3 | 2,500,000 | 0 | > 95% |
4 | ACCOR | EUROPE | Y | 7 | 6.7 | 17,500,000 | 5 | > 95% |
TOTAL NUMBER OF REFERENCE ENTITY | AVERAGE MONTHLY REGION | AVERAGE DAILY INDEX CONSTITUENT | AVERAGE NUMBER CLEARING DEALERS | CLEARING DEALERS | NOTIONAL (USD EQ) | TRADES/DAY | DOC CLAUSE % | |
---|---|---|---|---|---|---|---|---|
945 | YUM! BRANDS, INC. | AMERICAS | Y | 8 | 7.0 | 50,000,000 | 10 | < 5% |
946 | ZURICH INSURANCE COMPANY\n LTD | EUROPE | Y | 11 | 9.0 | 17,500,000 | 3 | > 95% |
947 | 21ST CENTURY FOX AMERICA,\n INC. | AMERICAS | Y | 7 | 6.0 | 20,000,000 | 3 | < 5% |
948 | 3I GROUP PLC | EUROPE | Y | 2 | 1.0 | 2,500,000 | 0 | > 95% |
949 | 3M COMPANY | AMERICAS | 4 | 1.3 | 2,500,000 | 0 | 5-25% | None |