#!/usr/bin/env python # coding: utf-8 # In[2]: library(XML) url="http://stats.espncricinfo.com/ci/engine/stats/index.html?class=1;team=8;template=results;type=batting" #Note I can also break the url string and use paste command to modify this url with parameters tables=readHTMLTable(url) tables$"Overall figures" # In[1]: from bs4 import BeautifulSoup import urllib # In[16]: r = urllib.urlopen("http://stats.espncricinfo.com/ci/engine/stats/index.html?class=1;team=8;template=results;type=batting").read() soup = BeautifulSoup(r, "lxml") headings=[row.get_text().encode("utf-8") for row in soup.find_all("th")] datasets = [] for row in soup.find_all("tr"): data=[] for td in row.find_all("td"): data.append(td.get_text().encode("utf-8")) if len(data)==12: datasets.append(data) # In[26]: import numpy as np import pandas as pd table=pd.DataFrame(np.array(datasets),columns=headings) print table # In[ ]: