from pandas import Series, DataFrame import pandas as pd import numpy as np from datetime import datetime pd.set_printoptions(notebook_repr_html=False) from pandas import * import matplotlib as mpl import numpy as np import pandas as pd from pandas import Series, DataFrame plot(np.random.randn(1000).cumsum()) np.arange(50)[10:40] index = ["1.1.1.1","2.2.2.2","3.3.3.3","4.4.4.4","5.5.5.5","6.6.6.6","7.7.7.7","8.8.8.8","9.9.9.9"] login_failed = [10,5,8,9,3,2,1,8,5] login_success = [47,15,40,20,30,12,18,8,5] s = Series(login_failed, index=index) s.index ss = Series(login_success, index=index) s[["2.2.2.2","5.5.5.5"]] s.plot(kind="barh") ss.plot(kind="barh") dates = pd.date_range('2012-07-16', '2012-07-21') atemps = Series([21,23,24,19,18,26], index = dates) atemps.plot(kind="barh") btemps = Series([10,3,2,1,8,6], index = dates) temps = pd.DataFrame({'Vancouver': atemps, 'Toronto': btemps}) temps plot(temps) temps['Vancouver'] - temps['Toronto'] temp_diff = temps['Vancouver'] - temps['Toronto'] temp_diff.plot(kind="bar") temps['Difference'] = temps['Vancouver'] - temps['Toronto'] temps #note add freq D in the bottom of the DataFrame # getting rid of columns is easy too del temps['Difference'] temps temps.describe() ts = pd.read_csv('/Users/antigen/Downloads/latency.csv', parse_dates=['date'], index_col='date') # index is Timestamp a = ts.resample('30s', how='count') # there is a bunch of how like mean, median, count, max, min plot(b) plot(a) d = ts.resample('M') # takes sample for whole month e = ts.resample('s', how='count') f = ts.resample('s', how=lambda x: x.mean()) g = ts.resample('t', how=['median','mean','count']) plot(e) plot(g.dropna()) df2 = pd.read_csv('/Users/antigen/Downloads/latency.csv', parse_dates=['date']) # index is zero based df2 plot(df['value']) dates2 = ['2013-01-01','2013-01-02','2013-01-03','2013-01-04','2013-01-05','2013-01-06'] dates2 data = [1,3,5,3,2,7] ts = Series(data, index=pd.to_datetime(dates2)) ts.plot() rng = pd.date_range("2013-02-06", periods=100) data = abs(random.randn(100)) ts = pd.Series(data, index=rng) ts ts.plot() ts[datetime(2013,3,1)] # gets specific value on specific date ts['2013-03-01'] # slice in date string ts['2013-03-01':'2013-04-01'] plot(ts['2013-03-01':'2013-04-01']) plot(ts['2013-03']) # same as above truncate date for month ip_df = pd.DataFrame(np.random.randn(400).reshape((100,4)), index=rng, columns=('1.1.1.1','2.2.2.2','3.3.3.3','4.4.4.4')) ip_df.ix['2013-03'] resampled = ip_df['1.1.1.1'].resample('D', how="ohlc") resampled plot(resampled) plot(ip_df.ix['2013-03']) ip_df.ix['2013-03-31'].plot(kind="bar") # query bar chart for just one day