In [2]:
from pandas import Series, DataFrame
import pandas as pd
import numpy as np
from datetime import datetime
pd.set_printoptions(notebook_repr_html=False)
/Users/antigen/.virtualenvs/pandas/lib/python2.7/site-packages/pandas/core/format.py:1286: FutureWarning: set_printoptions is deprecated, use set_option instead
  FutureWarning)
In [3]:
from pandas import *
In [4]:
import matplotlib as mpl
In [5]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
In [6]:
plot(np.random.randn(1000).cumsum())
Out[6]:
[<matplotlib.lines.Line2D at 0x10c4941d0>]
In [7]:
np.arange(50)[10:40]
Out[7]:
array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
       27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39])
In [19]:
index = ["1.1.1.1","2.2.2.2","3.3.3.3","4.4.4.4","5.5.5.5","6.6.6.6","7.7.7.7","8.8.8.8","9.9.9.9"]
login_failed = [10,5,8,9,3,2,1,8,5]
login_success = [47,15,40,20,30,12,18,8,5]
s = Series(login_failed, index=index)
s.index
ss = Series(login_success, index=index)
In [20]:
s[["2.2.2.2","5.5.5.5"]]
Out[20]:
2.2.2.2    5
5.5.5.5    3

User Logins - Failed

In [21]:
s.plot(kind="barh")
Out[21]:
<matplotlib.axes.AxesSubplot at 0x10c690ad0>

User Logins Success

In [22]:
ss.plot(kind="barh")
Out[22]:
<matplotlib.axes.AxesSubplot at 0x10c755b50>
In [39]:
dates = pd.date_range('2012-07-16', '2012-07-21')
atemps = Series([21,23,24,19,18,26], index = dates)
In [44]:
atemps.plot(kind="barh")
Out[44]:
<matplotlib.axes.AxesSubplot at 0x10f6bc990>
In [45]:
btemps = Series([10,3,2,1,8,6], index = dates)
In [48]:
temps = pd.DataFrame({'Vancouver': atemps, 'Toronto': btemps})
In [49]:
temps
Out[49]:
            Toronto  Vancouver
2012-07-16       10         21
2012-07-17        3         23
2012-07-18        2         24
2012-07-19        1         19
2012-07-20        8         18
2012-07-21        6         26
In [59]:
plot(temps)
Out[59]:
[<matplotlib.lines.Line2D at 0x10ff89710>,
 <matplotlib.lines.Line2D at 0x10ff89990>]
In [61]:
temps['Vancouver'] - temps['Toronto']
Out[61]:
2012-07-16    11
2012-07-17    20
2012-07-18    22
2012-07-19    18
2012-07-20    10
2012-07-21    20
Freq: D
In [66]:
temp_diff = temps['Vancouver'] - temps['Toronto']
temp_diff.plot(kind="bar")
Out[66]:
<matplotlib.axes.AxesSubplot at 0x11031bb10>
In [68]:
temps['Difference'] = temps['Vancouver'] - temps['Toronto']
temps
Out[68]:
            Toronto  Vancouver  Difference
2012-07-16       10         21          11
2012-07-17        3         23          20
2012-07-18        2         24          22
2012-07-19        1         19          18
2012-07-20        8         18          10
2012-07-21        6         26          20
In [69]:
#note add freq D in the bottom of the DataFrame
# getting rid of columns is easy too

del temps['Difference']
temps
Out[69]:
            Toronto  Vancouver
2012-07-16       10         21
2012-07-17        3         23
2012-07-18        2         24
2012-07-19        1         19
2012-07-20        8         18
2012-07-21        6         26
In [71]:
temps.describe()
Out[71]:
         Toronto  Vancouver
count   6.000000   6.000000
mean    5.000000  21.833333
std     3.577709   3.060501
min     1.000000  18.000000
25%     2.250000  19.500000
50%     4.500000  22.000000
75%     7.500000  23.750000
max    10.000000  26.000000
In [183]:
ts = pd.read_csv('/Users/antigen/Downloads/latency.csv', parse_dates=['date'], index_col='date') # index is Timestamp
a = ts.resample('30s', how='count') # there is a bunch of how like mean, median, count, max, min
plot(b)
plot(a)
Out[183]:
[<matplotlib.lines.Line2D at 0x10cd4d210>]
In [207]:
d = ts.resample('M') # takes sample for whole month
e = ts.resample('s', how='count')
f = ts.resample('s', how=lambda x: x.mean())
g = ts.resample('t', how=['median','mean','count'])
plot(e)
Out[207]:
[<matplotlib.lines.Line2D at 0x10ef0ea90>]
In [210]:
plot(g.dropna())
Out[210]:
[<matplotlib.lines.Line2D at 0x10ec3ad10>,
 <matplotlib.lines.Line2D at 0x10ec3af90>,
 <matplotlib.lines.Line2D at 0x10ec6c1d0>]
In [83]:
df2 = pd.read_csv('/Users/antigen/Downloads/latency.csv', parse_dates=['date']) # index is zero based
df2
Out[83]:
                  date  value
0  2013-01-01 00:01:00    156
1  2013-01-01 00:01:00    109
2  2013-01-01 00:01:00     93
3  2013-01-01 00:01:00     93
4  2013-01-01 00:02:00    109
5  2013-01-01 00:02:00     78
6  2013-01-01 00:05:00    109
7  2013-01-01 00:05:00    124
8  2013-01-01 00:06:00    156
9  2013-01-01 00:06:00    124
10 2013-01-01 00:06:00     93
11 2013-01-01 00:06:00    109
12 2013-01-01 00:09:00    109
13 2013-01-01 00:09:00     93
14 2013-01-01 00:10:00    109
15 2013-01-01 00:10:00    109
16 2013-01-01 00:11:00    124
17 2013-01-01 00:11:00     93
18 2013-01-01 00:13:00    109
19 2013-01-01 00:13:00    140
20 2013-01-01 00:14:00    109
21 2013-01-01 00:14:00    124
22 2013-01-01 00:16:00    140
23 2013-01-01 00:16:00     93
24 2013-01-01 00:17:00     93
25 2013-01-01 00:17:00     78
26 2013-01-01 00:18:00     93
27 2013-01-01 00:18:00     93
28 2013-01-01 00:21:00    171
In [94]:
plot(df['value'])
Out[94]:
[<matplotlib.lines.Line2D at 0x110a13fd0>]
In [110]:
dates2 = ['2013-01-01','2013-01-02','2013-01-03','2013-01-04','2013-01-05','2013-01-06']
dates2
Out[110]:
['2013-01-01',
 '2013-01-02',
 '2013-01-03',
 '2013-01-04',
 '2013-01-05',
 '2013-01-06']
In [115]:
data = [1,3,5,3,2,7]
In [116]:
ts = Series(data, index=pd.to_datetime(dates2))
In [117]:
ts.plot()
Out[117]:
<matplotlib.axes.AxesSubplot at 0x110b3e690>
In [167]:
 
In [140]:
rng = pd.date_range("2013-02-06", periods=100)
data = abs(random.randn(100))
ts = pd.Series(data, index=rng)
ts
Out[140]:
2013-02-06    2.177601
2013-02-07    0.131016
2013-02-08    0.186129
2013-02-09    2.214074
2013-02-10    1.313707
2013-02-11    0.288971
2013-02-12    0.429556
2013-02-13    1.361498
2013-02-14    0.597708
2013-02-15    0.717795
2013-02-16    1.609128
2013-02-17    0.973900
2013-02-18    0.429234
2013-02-19    1.155196
2013-02-20    0.575780
2013-02-21    0.447055
2013-02-22    1.556562
2013-02-23    1.426248
2013-02-24    0.759532
2013-02-25    0.328270
2013-02-26    1.445686
2013-02-27    0.592981
2013-02-28    1.388797
2013-03-01    0.487079
2013-03-02    0.128901
2013-03-03    0.976608
2013-03-04    0.290113
2013-03-05    0.186305
2013-03-06    0.921998
2013-03-07    1.033010
2013-03-08    1.302089
2013-03-09    0.831252
2013-03-10    0.830676
2013-03-11    2.037284
2013-03-12    0.587511
2013-03-13    1.006569
2013-03-14    1.155288
2013-03-15    0.348637
2013-03-16    0.394078
2013-03-17    1.210451
2013-03-18    1.737161
2013-03-19    0.730112
2013-03-20    0.153140
2013-03-21    0.324377
2013-03-22    0.605676
2013-03-23    1.396863
2013-03-24    1.196339
2013-03-25    0.110275
2013-03-26    0.088492
2013-03-27    0.547314
2013-03-28    1.507934
2013-03-29    1.560234
2013-03-30    0.057482
2013-03-31    0.385469
2013-04-01    0.760922
2013-04-02    0.501677
2013-04-03    0.085251
2013-04-04    0.169331
2013-04-05    0.331173
2013-04-06    0.163402
2013-04-07    1.851305
2013-04-08    1.138525
2013-04-09    1.409413
2013-04-10    0.251428
2013-04-11    1.239500
2013-04-12    0.125696
2013-04-13    0.358392
2013-04-14    0.316140
2013-04-15    0.417716
2013-04-16    1.681600
2013-04-17    0.474041
2013-04-18    0.708479
2013-04-19    0.148397
2013-04-20    1.145004
2013-04-21    0.467433
2013-04-22    0.728944
2013-04-23    0.344365
2013-04-24    2.348498
2013-04-25    0.524482
2013-04-26    0.443507
2013-04-27    0.319204
2013-04-28    0.955623
2013-04-29    0.912225
2013-04-30    0.175102
2013-05-01    0.934319
2013-05-02    0.839550
2013-05-03    0.559449
2013-05-04    1.105563
2013-05-05    0.650831
2013-05-06    0.319773
2013-05-07    2.117838
2013-05-08    0.636953
2013-05-09    1.123323
2013-05-10    0.616250
2013-05-11    2.424736
2013-05-12    0.473804
2013-05-13    0.481757
2013-05-14    1.046076
2013-05-15    1.612891
2013-05-16    0.532724
Freq: D, Length: 100
In [141]:
ts.plot()
Out[141]:
<matplotlib.axes.AxesSubplot at 0x110dac190>
In [145]:
ts[datetime(2013,3,1)] # gets specific value on specific date
Out[145]:
0.48707884254205852
In [146]:
ts['2013-03-01'] # slice in date string
Out[146]:
0.48707884254205852
In [147]:
ts['2013-03-01':'2013-04-01']
Out[147]:
2013-03-01    0.487079
2013-03-02    0.128901
2013-03-03    0.976608
2013-03-04    0.290113
2013-03-05    0.186305
2013-03-06    0.921998
2013-03-07    1.033010
2013-03-08    1.302089
2013-03-09    0.831252
2013-03-10    0.830676
2013-03-11    2.037284
2013-03-12    0.587511
2013-03-13    1.006569
2013-03-14    1.155288
2013-03-15    0.348637
2013-03-16    0.394078
2013-03-17    1.210451
2013-03-18    1.737161
2013-03-19    0.730112
2013-03-20    0.153140
2013-03-21    0.324377
2013-03-22    0.605676
2013-03-23    1.396863
2013-03-24    1.196339
2013-03-25    0.110275
2013-03-26    0.088492
2013-03-27    0.547314
2013-03-28    1.507934
2013-03-29    1.560234
2013-03-30    0.057482
2013-03-31    0.385469
2013-04-01    0.760922
Freq: D
In [148]:
plot(ts['2013-03-01':'2013-04-01'])
Out[148]:
[<matplotlib.lines.Line2D at 0x111700790>]
In [150]:
plot(ts['2013-03']) # same as above truncate date for month 
Out[150]:
[<matplotlib.lines.Line2D at 0x1088a4a50>]
In [159]:
ip_df = pd.DataFrame(np.random.randn(400).reshape((100,4)), index=rng, columns=('1.1.1.1','2.2.2.2','3.3.3.3','4.4.4.4'))
In [156]:
ip_df.ix['2013-03']
Out[156]:
            1.1.1.1  2.2.2.2  3.3.3.3  4.4.4.4
2013-03-01       92       93       94       95
2013-03-02       96       97       98       99
2013-03-03      100      101      102      103
2013-03-04      104      105      106      107
2013-03-05      108      109      110      111
2013-03-06      112      113      114      115
2013-03-07      116      117      118      119
2013-03-08      120      121      122      123
2013-03-09      124      125      126      127
2013-03-10      128      129      130      131
2013-03-11      132      133      134      135
2013-03-12      136      137      138      139
2013-03-13      140      141      142      143
2013-03-14      144      145      146      147
2013-03-15      148      149      150      151
2013-03-16      152      153      154      155
2013-03-17      156      157      158      159
2013-03-18      160      161      162      163
2013-03-19      164      165      166      167
2013-03-20      168      169      170      171
2013-03-21      172      173      174      175
2013-03-22      176      177      178      179
2013-03-23      180      181      182      183
2013-03-24      184      185      186      187
2013-03-25      188      189      190      191
2013-03-26      192      193      194      195
2013-03-27      196      197      198      199
2013-03-28      200      201      202      203
2013-03-29      204      205      206      207
2013-03-30      208      209      210      211
2013-03-31      212      213      214      215
In [218]:
resampled = ip_df['1.1.1.1'].resample('D', how="ohlc")
resampled
Out[218]:
                open      high       low     close
2013-02-06  0.667463  0.667463  0.667463  0.667463
2013-02-07 -1.287710 -1.287710 -1.287710 -1.287710
2013-02-08 -0.582973 -0.582973 -0.582973 -0.582973
2013-02-09  0.331141  0.331141  0.331141  0.331141
2013-02-10  0.311155  0.311155  0.311155  0.311155
2013-02-11 -0.330716 -0.330716 -0.330716 -0.330716
2013-02-12  0.057521  0.057521  0.057521  0.057521
2013-02-13  1.712147  1.712147  1.712147  1.712147
2013-02-14  0.820147  0.820147  0.820147  0.820147
2013-02-15  1.096700  1.096700  1.096700  1.096700
2013-02-16 -0.138053 -0.138053 -0.138053 -0.138053
2013-02-17 -1.601882 -1.601882 -1.601882 -1.601882
2013-02-18  0.249855  0.249855  0.249855  0.249855
2013-02-19 -0.097528 -0.097528 -0.097528 -0.097528
2013-02-20 -0.033422 -0.033422 -0.033422 -0.033422
2013-02-21  1.019544  1.019544  1.019544  1.019544
2013-02-22 -1.643013 -1.643013 -1.643013 -1.643013
2013-02-23 -0.582085 -0.582085 -0.582085 -0.582085
2013-02-24  1.436340  1.436340  1.436340  1.436340
2013-02-25 -1.191150 -1.191150 -1.191150 -1.191150
2013-02-26  0.567902  0.567902  0.567902  0.567902
2013-02-27  0.333226  0.333226  0.333226  0.333226
2013-02-28  1.579566  1.579566  1.579566  1.579566
2013-03-01  0.966033  0.966033  0.966033  0.966033
2013-03-02 -0.687408 -0.687408 -0.687408 -0.687408
2013-03-03 -0.973395 -0.973395 -0.973395 -0.973395
2013-03-04 -0.293088 -0.293088 -0.293088 -0.293088
2013-03-05 -1.024993 -1.024993 -1.024993 -1.024993
2013-03-06  0.920623  0.920623  0.920623  0.920623
2013-03-07 -0.234771 -0.234771 -0.234771 -0.234771
2013-03-08 -0.482392 -0.482392 -0.482392 -0.482392
2013-03-09  0.593006  0.593006  0.593006  0.593006
2013-03-10  0.079329  0.079329  0.079329  0.079329
2013-03-11  0.479830  0.479830  0.479830  0.479830
2013-03-12 -0.154861 -0.154861 -0.154861 -0.154861
2013-03-13 -0.679170 -0.679170 -0.679170 -0.679170
2013-03-14  1.008153  1.008153  1.008153  1.008153
2013-03-15 -1.753305 -1.753305 -1.753305 -1.753305
2013-03-16 -0.726288 -0.726288 -0.726288 -0.726288
2013-03-17  1.534673  1.534673  1.534673  1.534673
2013-03-18 -0.930242 -0.930242 -0.930242 -0.930242
2013-03-19  2.126452  2.126452  2.126452  2.126452
2013-03-20 -0.033397 -0.033397 -0.033397 -0.033397
2013-03-21  0.947184  0.947184  0.947184  0.947184
2013-03-22  0.031262  0.031262  0.031262  0.031262
2013-03-23 -0.318363 -0.318363 -0.318363 -0.318363
2013-03-24 -0.869514 -0.869514 -0.869514 -0.869514
2013-03-25 -0.628911 -0.628911 -0.628911 -0.628911
2013-03-26 -0.609140 -0.609140 -0.609140 -0.609140
2013-03-27  0.749727  0.749727  0.749727  0.749727
2013-03-28 -0.206117 -0.206117 -0.206117 -0.206117
2013-03-29  1.183828  1.183828  1.183828  1.183828
2013-03-30 -2.006837 -2.006837 -2.006837 -2.006837
2013-03-31  0.094606  0.094606  0.094606  0.094606
2013-04-01 -0.631297 -0.631297 -0.631297 -0.631297
2013-04-02  1.498079  1.498079  1.498079  1.498079
2013-04-03 -0.732888 -0.732888 -0.732888 -0.732888
2013-04-04  0.307917  0.307917  0.307917  0.307917
2013-04-05  0.789150  0.789150  0.789150  0.789150
2013-04-06  1.739575  1.739575  1.739575  1.739575
2013-04-07  0.181541  0.181541  0.181541  0.181541
2013-04-08  0.944273  0.944273  0.944273  0.944273
2013-04-09 -1.581479 -1.581479 -1.581479 -1.581479
2013-04-10 -0.494982 -0.494982 -0.494982 -0.494982
2013-04-11  0.007488  0.007488  0.007488  0.007488
2013-04-12  0.176036  0.176036  0.176036  0.176036
2013-04-13 -0.231764 -0.231764 -0.231764 -0.231764
2013-04-14 -0.595858 -0.595858 -0.595858 -0.595858
2013-04-15 -1.327879 -1.327879 -1.327879 -1.327879
2013-04-16  1.318089  1.318089  1.318089  1.318089
2013-04-17  1.403062  1.403062  1.403062  1.403062
2013-04-18  0.305167  0.305167  0.305167  0.305167
2013-04-19 -0.690680 -0.690680 -0.690680 -0.690680
2013-04-20  0.121424  0.121424  0.121424  0.121424
2013-04-21  0.770222  0.770222  0.770222  0.770222
2013-04-22  0.850089  0.850089  0.850089  0.850089
2013-04-23  0.575472  0.575472  0.575472  0.575472
2013-04-24 -0.202110 -0.202110 -0.202110 -0.202110
2013-04-25 -1.743006 -1.743006 -1.743006 -1.743006
2013-04-26  0.253060  0.253060  0.253060  0.253060
2013-04-27  1.422381  1.422381  1.422381  1.422381
2013-04-28 -0.082828 -0.082828 -0.082828 -0.082828
2013-04-29 -0.385594 -0.385594 -0.385594 -0.385594
2013-04-30 -0.246531 -0.246531 -0.246531 -0.246531
2013-05-01  0.927497  0.927497  0.927497  0.927497
2013-05-02 -0.361332 -0.361332 -0.361332 -0.361332
2013-05-03 -0.133065 -0.133065 -0.133065 -0.133065
2013-05-04 -2.733855 -2.733855 -2.733855 -2.733855
2013-05-05  0.520610  0.520610  0.520610  0.520610
2013-05-06 -0.818820 -0.818820 -0.818820 -0.818820
2013-05-07 -0.764690 -0.764690 -0.764690 -0.764690
2013-05-08 -0.059040 -0.059040 -0.059040 -0.059040
2013-05-09  1.397286  1.397286  1.397286  1.397286
2013-05-10 -0.649536 -0.649536 -0.649536 -0.649536
2013-05-11 -0.161345 -0.161345 -0.161345 -0.161345
2013-05-12  0.911908  0.911908  0.911908  0.911908
2013-05-13 -0.454775 -0.454775 -0.454775 -0.454775
2013-05-14  0.346036  0.346036  0.346036  0.346036
2013-05-15  1.010684  1.010684  1.010684  1.010684
2013-05-16 -1.548136 -1.548136 -1.548136 -1.548136
In [219]:
plot(resampled)
Out[219]:
[<matplotlib.lines.Line2D at 0x1087d78d0>,
 <matplotlib.lines.Line2D at 0x1087d7b50>,
 <matplotlib.lines.Line2D at 0x1087d7d50>,
 <matplotlib.lines.Line2D at 0x1087d7ed0>]
In [212]:
plot(ip_df.ix['2013-03'])
Out[212]:
[<matplotlib.lines.Line2D at 0x10f042a50>,
 <matplotlib.lines.Line2D at 0x10f042cd0>,
 <matplotlib.lines.Line2D at 0x10f042ed0>,
 <matplotlib.lines.Line2D at 0x10f046090>]
In [213]:
ip_df.ix['2013-03-31'].plot(kind="bar") # query bar chart for just one day
Out[213]:
<matplotlib.axes.AxesSubplot at 0x10d540e10>
In [213]:
 
In [213]:
 
In [213]:
 
In [ ]: