import pandas as pd
loansData = pd.read_csv('http://spark-public.s3.amazonaws.com/dataanalysis/loansData.csv')
loansData['Interest.Rate'][0:5]
81174 8.90% 99592 12.12% 80059 21.98% 15825 9.99% 33182 11.71% Name: Interest.Rate, dtype: object
/Users/nitin/dl/mangodata.csv
mangoData = pd.read_csv('/Users/nitin/dl/mangodata.csv')
mangoData
date | med | scheduled | actual | |
---|---|---|---|---|
0 | 4/14/13 | lisinopril | 8:00 | 8:01 |
1 | 4/15/13 | lisinopril | 8:00 | 8:10 |
2 | 4/16/13 | lisinopril | 8:00 | 8:02 |
3 | 4/17/13 | lisinopril | 8:00 | 8:01 |
4 | 4/18/13 | lisinopril | 8:00 | 7:54 |
5 | 4/19/13 | lisinopril | 8:00 | 8:03 |
6 | 4/20/13 | lisinopril | 8:00 | 7:51 |
7 | 4/21/13 | lisinopril | 8:00 | 8:01 |
8 | 4/22/13 | lisinopril | 8:00 | 8:13 |
9 | 4/23/13 | lisinopril | 8:00 | 8:14 |
10 | 4/23/13 | atorvastatin | 17:00 | 17:14 |
11 | 4/24/13 | lisinopril | 8:00 | 7:51 |
12 | 4/24/13 | atorvastatin | 17:00 | 18:03 |
13 | 4/25/13 | lisinopril | 8:00 | 8:10 |
14 | 4/25/13 | atorvastatin | 17:00 | NaN |
15 | 4/26/13 | lisinopril | 8:00 | 8:19 |
16 | 4/26/13 | atorvastatin | 17:00 | 17:28 |
17 | 4/27/13 | lisinopril | 8:00 | 8:05 |
18 | 4/27/13 | atorvastatin | 17:00 | 18:01 |
19 | 4/28/13 | lisinopril | 8:00 | 8:17 |
20 | 4/28/13 | atorvastatin | 17:00 | 18:03 |
21 | 4/29/13 | lisinopril | 8:00 | 8:30 |
22 | 4/29/13 | atorvastatin | 17:00 | NaN |
23 | 4/30/13 | lisinopril | 8:00 | NaN |
24 | 4/30/13 | atorvastatin | 17:00 | 17:24 |
25 | 5/1/13 | lisinopril | 8:00 | 8:29 |
26 | 5/1/13 | atorvastatin | 17:00 | 17:46 |
27 | 5/2/13 | lisinopril | 8:00 | 8:13 |
28 | 5/2/13 | atorvastatin | 17:00 | 17:28 |
29 | 5/3/13 | lisinopril | 8:00 | 8:08 |
... | ... | ... | ... | ... |
585 | 5/22/14 | atorvastatin | 8:00 | 9:58 |
586 | 5/23/14 | atorvastatin | 8:00 | NaN |
587 | 5/24/14 | atorvastatin | 8:00 | 10:14 |
588 | 5/25/14 | atorvastatin | 8:00 | 8:05 |
589 | 5/26/14 | atorvastatin | 8:00 | NaN |
590 | 5/27/14 | atorvastatin | 8:00 | NaN |
591 | 5/28/14 | atorvastatin | 8:00 | NaN |
592 | 5/29/14 | atorvastatin | 8:00 | NaN |
593 | 5/30/14 | atorvastatin | 8:00 | NaN |
594 | 5/31/14 | atorvastatin | 8:00 | NaN |
595 | 6/1/14 | atorvastatin | 8:00 | NaN |
596 | 6/2/14 | atorvastatin | 8:00 | NaN |
597 | 6/3/14 | atorvastatin | 8:00 | 7:57 |
598 | 6/4/14 | atorvastatin | 8:00 | 9:24 |
599 | 6/5/14 | atorvastatin | 8:00 | NaN |
600 | 6/6/14 | atorvastatin | 8:00 | NaN |
601 | 6/7/14 | atorvastatin | 8:00 | 9:21 |
602 | 6/8/14 | atorvastatin | 8:00 | NaN |
603 | 6/9/14 | atorvastatin | 8:00 | NaN |
604 | 6/10/14 | atorvastatin | 8:00 | 9:06 |
605 | 6/11/14 | atorvastatin | 8:00 | 8:40 |
606 | 6/12/14 | atorvastatin | 8:00 | 10:07 |
607 | 6/13/14 | atorvastatin | 8:00 | 9:59 |
608 | 6/14/14 | atorvastatin | 8:00 | 8:02 |
609 | 6/15/14 | atorvastatin | 8:00 | 10:55 |
610 | 6/16/14 | atorvastatin | 8:00 | 9:00 |
611 | 6/17/14 | atorvastatin | 8:00 | NaN |
612 | 6/18/14 | atorvastatin | 8:00 | NaN |
613 | 6/19/14 | atorvastatin | 8:00 | NaN |
614 | 6/20/14 | atorvastatin | 8:00 | NaN |
615 rows × 4 columns
mangoData2 = pd.read_csv('/Users/nitin/dl/mangodata2.csv')
mangoData2
date | dow | med | scheduled | actual | |
---|---|---|---|---|---|
0 | 4/14/2013 | sunday | lisinopril | 8:00 | 8:01 |
1 | 4/15/2013 | monday | lisinopril | 8:00 | 8:10 |
2 | 4/16/2013 | tuesday | lisinopril | 8:00 | 8:02 |
3 | 4/17/2013 | wednesday | lisinopril | 8:00 | 8:01 |
4 | 4/18/2013 | thursday | lisinopril | 8:00 | 7:54 |
5 | 4/19/2013 | friday | lisinopril | 8:00 | 8:03 |
6 | 4/20/2013 | saturday | lisinopril | 8:00 | 7:51 |
7 | 4/21/2013 | sunday | lisinopril | 8:00 | 8:01 |
8 | 4/22/2013 | monday | lisinopril | 8:00 | 8:13 |
9 | 4/23/2013 | tuesday | lisinopril | 8:00 | 8:14 |
10 | 4/24/2013 | wednesday | lisinopril | 8:00 | 7:51 |
11 | 4/25/2013 | thursday | lisinopril | 8:00 | 8:10 |
12 | 4/26/2013 | friday | lisinopril | 8:00 | 8:19 |
13 | 4/27/2013 | saturday | lisinopril | 8:00 | 8:05 |
14 | 4/28/2013 | sunday | lisinopril | 8:00 | 8:17 |
15 | 4/29/2013 | monday | lisinopril | 8:00 | 8:30 |
16 | 4/30/2013 | tuesday | lisinopril | 8:00 | NaN |
17 | 5/1/2013 | wednesday | lisinopril | 8:00 | 8:29 |
18 | 5/2/2013 | thursday | lisinopril | 8:00 | 8:13 |
19 | 5/3/2013 | friday | lisinopril | 8:00 | 8:08 |
20 | 5/4/2013 | saturday | lisinopril | 8:00 | 8:22 |
21 | 5/5/2013 | sunday | lisinopril | 8:00 | 8:17 |
22 | 5/6/2013 | monday | lisinopril | 8:00 | 8:12 |
23 | 5/7/2013 | tuesday | lisinopril | 8:00 | 8:15 |
24 | 5/8/2013 | wednesday | lisinopril | 8:00 | 8:13 |
25 | 5/9/2013 | thursday | lisinopril | 8:00 | 8:02 |
26 | 5/10/2013 | friday | lisinopril | 8:00 | 8:02 |
27 | 5/11/2013 | saturday | lisinopril | 8:00 | 8:11 |
28 | 5/12/2013 | sunday | lisinopril | 8:00 | 8:28 |
29 | 5/13/2013 | monday | lisinopril | 8:00 | NaN |
... | ... | ... | ... | ... | ... |
585 | 5/22/2014 | friday | atorvastatin | 8:00 | 9:58 |
586 | 5/23/2014 | saturday | atorvastatin | 8:00 | NaN |
587 | 5/24/2014 | sunday | atorvastatin | 8:00 | 10:14 |
588 | 5/25/2014 | monday | atorvastatin | 8:00 | 8:05 |
589 | 5/26/2014 | tuesday | atorvastatin | 8:00 | NaN |
590 | 5/27/2014 | wednesday | atorvastatin | 8:00 | NaN |
591 | 5/28/2014 | thursday | atorvastatin | 8:00 | NaN |
592 | 5/29/2014 | friday | atorvastatin | 8:00 | NaN |
593 | 5/30/2014 | saturday | atorvastatin | 8:00 | NaN |
594 | 5/31/2014 | sunday | atorvastatin | 8:00 | NaN |
595 | 6/1/2014 | monday | atorvastatin | 8:00 | NaN |
596 | 6/2/2014 | tuesday | atorvastatin | 8:00 | NaN |
597 | 6/3/2014 | wednesday | atorvastatin | 8:00 | 7:57 |
598 | 6/4/2014 | thursday | atorvastatin | 8:00 | 9:24 |
599 | 6/5/2014 | friday | atorvastatin | 8:00 | NaN |
600 | 6/6/2014 | saturday | atorvastatin | 8:00 | NaN |
601 | 6/7/2014 | sunday | atorvastatin | 8:00 | 9:21 |
602 | 6/8/2014 | monday | atorvastatin | 8:00 | NaN |
603 | 6/9/2014 | tuesday | atorvastatin | 8:00 | NaN |
604 | 6/10/2014 | wednesday | atorvastatin | 8:00 | 9:06 |
605 | 6/11/2014 | thursday | atorvastatin | 8:00 | 8:40 |
606 | 6/12/2014 | friday | atorvastatin | 8:00 | 10:07 |
607 | 6/13/2014 | saturday | atorvastatin | 8:00 | 9:59 |
608 | 6/14/2014 | sunday | atorvastatin | 8:00 | 8:02 |
609 | 6/15/2014 | monday | atorvastatin | 8:00 | 10:55 |
610 | 6/16/2014 | tuesday | atorvastatin | 8:00 | 9:00 |
611 | 6/17/2014 | wednesday | atorvastatin | 8:00 | NaN |
612 | 6/18/2014 | thursday | atorvastatin | 8:00 | NaN |
613 | 6/19/2014 | friday | atorvastatin | 8:00 | NaN |
614 | 6/20/2014 | saturday | atorvastatin | 8:00 | NaN |
615 rows × 5 columns
# plot missed by dow
# plot delay by dow
# plot delay by num of days into program
# plot on time %
lisin = mangoData2['med'== 'lisinopril']
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-8-f6d2a6f92a52> in <module>() 4 # plot on time % 5 ----> 6 lisin = mangoData2['med'== 'lisinopril'] //anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in __getitem__(self, key) 1676 return self._getitem_multilevel(key) 1677 else: -> 1678 return self._getitem_column(key) 1679 1680 def _getitem_column(self, key): //anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in _getitem_column(self, key) 1683 # get column 1684 if self.columns.is_unique: -> 1685 return self._get_item_cache(key) 1686 1687 # duplicate columns & possible reduce dimensionaility //anaconda/lib/python2.7/site-packages/pandas/core/generic.pyc in _get_item_cache(self, item) 1050 res = cache.get(item) 1051 if res is None: -> 1052 values = self._data.get(item) 1053 res = self._box_item_values(item, values) 1054 cache[item] = res //anaconda/lib/python2.7/site-packages/pandas/core/internals.pyc in get(self, item, fastpath) 2563 2564 if not isnull(item): -> 2565 loc = self.items.get_loc(item) 2566 else: 2567 indexer = np.arange(len(self.items))[isnull(self.items)] //anaconda/lib/python2.7/site-packages/pandas/core/index.pyc in get_loc(self, key) 1179 loc : int if unique index, possibly slice or mask if not 1180 """ -> 1181 return self._engine.get_loc(_values_from_object(key)) 1182 1183 def get_value(self, series, key): //anaconda/lib/python2.7/site-packages/pandas/index.so in pandas.index.IndexEngine.get_loc (pandas/index.c:3656)() //anaconda/lib/python2.7/site-packages/pandas/index.so in pandas.index.IndexEngine.get_loc (pandas/index.c:3534)() //anaconda/lib/python2.7/site-packages/pandas/hashtable.so in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:11911)() //anaconda/lib/python2.7/site-packages/pandas/hashtable.so in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:11864)() KeyError: False
lisin = mangoData2[0:5]
lisin
date | dow | med | scheduled | actual | |
---|---|---|---|---|---|
0 | 4/14/2013 | sunday | lisinopril | 8:00 | 8:01 |
1 | 4/15/2013 | monday | lisinopril | 8:00 | 8:10 |
2 | 4/16/2013 | tuesday | lisinopril | 8:00 | 8:02 |
3 | 4/17/2013 | wednesday | lisinopril | 8:00 | 8:01 |
4 | 4/18/2013 | thursday | lisinopril | 8:00 | 7:54 |
lisin = mangoData2[mangoData2['med'] == 'lisinopril']
ator = mangoData2[mangoData2['med'] == 'atorvastatin']
import datetime as dt
import dt.time as tm
def mk_hr_min(stime):
(hr,min) = [ int(x) for x in stime.split(':') ]
def tdiff(s1, s2):
h1,m1 = mk_hr_min(s1)
h2,m2 = mk_hr_min(s2)
t1 = tm(h1,m1,0)
t2 = tm(h2,m2,0)
td = t2 - t1
return 60*td.hours + td.minutes