from pandas import Series, DataFrame
import pandas as pd
import numpy as np
from datetime import datetime
pd.set_printoptions(notebook_repr_html=False)

from pandas import *

import matplotlib as mpl

import numpy as np
import pandas as pd
from pandas import Series, DataFrame

plot(np.random.randn(1000).cumsum())

np.arange(50)[10:40]

index = ["1.1.1.1","2.2.2.2","3.3.3.3","4.4.4.4","5.5.5.5","6.6.6.6","7.7.7.7","8.8.8.8","9.9.9.9"]
login_failed = [10,5,8,9,3,2,1,8,5]
login_success = [47,15,40,20,30,12,18,8,5]
s = Series(login_failed, index=index)
s.index
ss = Series(login_success, index=index)

s[["2.2.2.2","5.5.5.5"]]

s.plot(kind="barh")

ss.plot(kind="barh")

dates = pd.date_range('2012-07-16', '2012-07-21')
atemps = Series([21,23,24,19,18,26], index = dates)

atemps.plot(kind="barh")

btemps = Series([10,3,2,1,8,6], index = dates)

temps = pd.DataFrame({'Vancouver': atemps, 'Toronto': btemps})

temps

plot(temps)

temps['Vancouver'] - temps['Toronto']

temp_diff = temps['Vancouver'] - temps['Toronto']
temp_diff.plot(kind="bar")

temps['Difference'] = temps['Vancouver'] - temps['Toronto']
temps

#note add freq D in the bottom of the DataFrame
# getting rid of columns is easy too

del temps['Difference']
temps

temps.describe()

ts = pd.read_csv('/Users/antigen/Downloads/latency.csv', parse_dates=['date'], index_col='date') # index is Timestamp
a = ts.resample('30s', how='count') # there is a bunch of how like mean, median, count, max, min
plot(b)
plot(a)

d = ts.resample('M') # takes sample for whole month
e = ts.resample('s', how='count')
f = ts.resample('s', how=lambda x: x.mean())
g = ts.resample('t', how=['median','mean','count'])
plot(e)

plot(g.dropna())

df2 = pd.read_csv('/Users/antigen/Downloads/latency.csv', parse_dates=['date']) # index is zero based
df2

plot(df['value'])

dates2 = ['2013-01-01','2013-01-02','2013-01-03','2013-01-04','2013-01-05','2013-01-06']
dates2


data = [1,3,5,3,2,7]

ts = Series(data, index=pd.to_datetime(dates2))

ts.plot()


rng = pd.date_range("2013-02-06", periods=100)
data = abs(random.randn(100))
ts = pd.Series(data, index=rng)
ts

ts.plot()

ts[datetime(2013,3,1)] # gets specific value on specific date

ts['2013-03-01'] # slice in date string

ts['2013-03-01':'2013-04-01']

plot(ts['2013-03-01':'2013-04-01'])

plot(ts['2013-03']) # same as above truncate date for month 

ip_df = pd.DataFrame(np.random.randn(400).reshape((100,4)), index=rng, columns=('1.1.1.1','2.2.2.2','3.3.3.3','4.4.4.4'))

ip_df.ix['2013-03']

resampled = ip_df['1.1.1.1'].resample('D', how="ohlc")
resampled

plot(resampled)

plot(ip_df.ix['2013-03'])

ip_df.ix['2013-03-31'].plot(kind="bar") # query bar chart for just one day