import pandas as pd !cat /Users/sergulaydore/pydata-book/ch06/ex1.csv df = pd.read_csv('/Users/sergulaydore/pydata-book/ch06/ex1.csv') df pd.read_table('/Users/sergulaydore/pydata-book/ch06/ex1.csv', sep = ',') !cat /Users/sergulaydore/pydata-book/ch06/ex2.csv pd.read_csv('/Users/sergulaydore/pydata-book/ch06/ex2.csv', header = None) pd.read_csv('/Users/sergulaydore/pydata-book/ch06/ex2.csv', names = ['a','b', 'c', 'd', 'message']) names = ['a', 'b', 'c', 'd', 'message'] pd.read_csv('/Users/sergulaydore/pydata-book/ch06/ex2.csv', names = names, index_col = 'message') !cat /Users/sergulaydore/pydata-book/ch06/csv_mindex.csv parsed = pd.read_csv('/Users/sergulaydore/pydata-book/ch06/csv_mindex.csv', index_col = ['key1','key2']) parsed list(open('/Users/sergulaydore/pydata-book/ch06/ex3.txt')) result = pd.read_table('/Users/sergulaydore/pydata-book/ch06/ex3.txt', sep = '\s+') result !cat /Users/sergulaydore/pydata-book/ch06/ex4.csv pd.read_csv('/Users/sergulaydore/pydata-book/ch06/ex4.csv', skiprows = [0,2,3]) !cat /Users/sergulaydore/pydata-book/ch06/ex5.csv result = pd.read_csv('/Users/sergulaydore/pydata-book/ch06/ex5.csv') result pd.isnull(result) result = pd.read_csv('/Users/sergulaydore/pydata-book/ch06/ex5.csv', na_values = ['NULL']) result sentinels = {'message':['foo', 'NA'], 'something': ['two']} pd.read_csv('/Users/sergulaydore/pydata-book/ch06/ex5.csv', na_values = sentinels) result = pd.read_csv('/Users/sergulaydore/pydata-book/ch06/ex6.csv') result pd.read_csv('/Users/sergulaydore/pydata-book/ch06/ex6.csv', nrows = 5) chunker = pd.read_csv('/Users/sergulaydore/pydata-book/ch06/ex6.csv', chunksize = 1000) chunker next(iter(chunker)) tot = pd.Series([]) for piece in chunker: tot = tot.add(piece['key'].value_counts(), fill_value = 0) tot = tot.order(ascending = False) tot[:10] data = pd.read_csv('/Users/sergulaydore/pydata-book/ch06/ex5.csv') data data.to_csv('/Users/sergulaydore/pydata-book/ch06/out.csv') !cat /Users/sergulaydore/pydata-book/ch06/out.csv import sys data.to_csv(sys.stdout, sep = '|') data.to_csv(sys.stdout, na_rep = 'NULL') data.to_csv(sys.stdout, index=False, header = False) data.to_csv(sys.stdout, index = False, columns = ['a', 'b', 'c']) dates = pd.date_range('1/1/2000', periods=7) import numpy as np ts = pd.Series(np.arange(7), index=dates) ts.to_csv('/Users/sergulaydore/pydata-book/ch06/tseries.csv') !cat /Users/sergulaydore/pydata-book/ch06/tseries.csv pd.Series.from_csv('/Users/sergulaydore/pydata-book/ch06/tseries.csv', parse_dates = True) help(pd.Series.from_csv) !cat /Users/sergulaydore/pydata-book/ch06/ex7.csv import csv f = open('/Users/sergulaydore/pydata-book/ch06/ex7.csv') reader = csv.reader(f) for line in reader: print line lines = list(csv.reader(open('/Users/sergulaydore/pydata-book/ch06/ex7.csv'))) lines header, values = lines[0], lines[1:] header values data_dict = {h: v for h, v in zip(header, zip(*values))} data_dict class my_dialect(csv.Dialect): lineterminator = '\n' delimiter = ';' quotechar = '"' quoting=1 reader = csv.reader(f, dialect = my_dialect) reader = csv.reader(f, delimiter='|') with open('mydata.csv','w') as f: writer = csv.writer(f, dialect=my_dialect) writer.writerow(('one', 'two', 'three')) writer.writerow(('1','2','3')) writer.writerow(('4','5','6')) writer.writerow(('7','8','9')) !cat mydata.csv obj = """ {"name": "Wes", "places_lived": ["United States", "Spain", "Germany"], "pet": null, "siblings":[{"name": "Scott", "age": 25, "pet":"Zuko"}, {"name": "Katie", "age": 33, "pet": "Cisco"}] } """ import json result = json.loads(obj) result asjson = json.dumps(result) siblings = pd.DataFrame(result['siblings'], columns = ['name','age']) siblings from lxml.html import parse from urllib2 import urlopen help(parse) parsed = parse((urlopen('http://finance.yahoo.com/q/op?s=AAPL+Options'))) doc = parsed.getroot() doc links = doc.findall('.//a') links[15:20] lnk = links[28] lnk.get('href') lnk.text_content() urls = [lnk.get('href') for lnk in doc.findall('.//a')] urls[10:] tables = doc.findall('.//table') calls = tables[9] puts = tables[13] t0 = tables[0] t1 = tables[1] t2 = tables[2] rows0 = t0.findall('.//tr') rows1 = t1.findall('.//tr') rows2 = t2.findall('.//tr') def _unpack(row, kind='td'): elts = row.findall('.//%s' %kind) return [val.text_content() for val in elts] _unpack(rows2[2], kind='td') from pandas.io.parsers import TextParser def parse_options_data(table): rows = table.findall('.//tr') header = _unpack(rows2[0], kind = 'th') data = [_unpack(r) for r in rows2[1:]] return TextParser(data, names = header).get_chunk() call_data = parse_options_data(t2) call_data.head() !pwd %cd /Users/sergulaydore/pydata-book/ch06 frame = pd.read_csv('./ex1.csv') frame frame.to_pickle('./frame_pickle') pd.read_pickle('./frame_pickle') store = pd.HDFStore('./mydata.h5') store['obj1'] = frame store['obj1_col'] = frame['a'] store store['obj1'] xls_file = pd.ExcelFile('./data.xls') table = xls_file.parse('Sheet1') import requests url = 'https://api.twitter.com/1.1/search/tweets.json?q=%23freebandnames&since_id=24012619984051000&max_id=250126199840518145&result_type=mixed&count=4' resp = requests.get(url) resp import json data = json.loads(resp.text) data.keys() import requests from requests_oauthlib import OAuth1 import sqlite3 query = """ CREATE TABLE test (a VARCHAR(20), b VARCHAR(20), c REAL, d INTEGER ); """ con = sqlite3.connect(':memory:') con.execute(query) con.commit() data = [ ('Atlanta', 'Georgia', 1.25, 6), ('Tallahassee', 'Florida', 2.6, 3), ('Sacramento','California', 1.7, 5) ] stmt = "INSERT INTO test VALUES(?,?,?,?)" con.executemany(stmt, data) con.commit() cursor = con.execute('select * from test') rows = cursor.fetchall() rows cursor.description pd.DataFrame(rows, columns = zip(*cursor.description)[0]) import pandas.io.sql as sql sql.read_sql('select * from test', con)