#!/usr/bin/env python
# coding: utf-8

# In[ ]:


import sys
sys.path.append('/Users/Brandon1')
from pythonimports import *
get_ipython().run_line_magic('matplotlib', 'inline')


# In[ ]:


#set this directory for health data and outfiles
DIR = '/Users/Brandon1/Desktop/apple_health_export/'


# In[ ]:


healthdata = open(op.join(DIR,'export.xml'),'rb')
d = healthdata.readlines()


# In[ ]:


len(d)


# In[ ]:


#each health source starts with 'HKQuantityTypeIdentifier' e.g., 'HKQuantityTypeIdentifierHeartRate


# In[ ]:


#info available for each source - some info == "NA"
srckeys = ['sourceName', 'sourceVersion', 'device', 'unit', 'creationDate', 'creationDate_time', 'startDate', 'startDate_time', 'endDate', 'endDate_time', 'value']


# In[ ]:


sp = " "
data = OrderedDict()
lcount = 0
srcrecs = OrderedDict()
for line in d[84:]:
    lcount += 1
    if line.startswith(" ")[0].split("\" ")
        rtype,src = splits[0].split("=\"")
        src = "_".join([word for word in "".join([sp+x if x.isupper() else x for x in src[24:]]).split()])
        if not src in data.keys():
            data[src] = OrderedDict()
        data[src][lcount] = OrderedDict()
        for k in srckeys:
            data[src][lcount][k] = "NA"
        for split in splits[1:]:
            rec,val = split.split("=\"")
            try:
                assert rec in data[src][lcount].keys()
            except AssertionError:
                print "crap"
            if rec in ['startDate','endDate','creationDate']:
                date,time,whoknows = val.split(" ")
                data[src][lcount][rec] = date
                data[src][lcount]["%s_time" % rec] = time
            elif rec == 'device':
                dev = [v for v in val.split(", ") if 'name' in v][0].split(":")[1]
                data[src][lcount][rec] = dev
            elif rec == 'value':
                data[src][lcount][rec] = val[:-1]
            else:
                data[src][lcount][rec] = val
    if lcount % 10000 == 0:
        print lcount


# # write out the files

# In[ ]:


for src in sorted(data):
    filE = op.join(DIR,'%s.txt' % src)
    print filE
    with open(filE,'wb') as o:
        k0 = data[src].keys()[0]
        text = '\t'.join([k for k in data[src][k0].keys()]) + '\n'
        o.write("%s" % text)
        for lcount in data[src]:
            text = '\t'.join([k for k in data[src][lcount].values()]) + '\n'
            o.write("%s" % text)


# # check out the heart rate data

# In[ ]:


hr = pd.read_csv(op.join(DIR,'Heart_Rate.txt'),sep='\t')
hr.head()


# In[ ]:


#get heart rate values into 10min bins, save dict
rate = OrderedDict()
for row in hr.index:
    ctime = ":".join([x for x in hr.loc[row,'creationDate_time'].split(":")[:-1]])[:-1]
    if not ctime in rate:
        rate[ctime] = []
    rate[ctime].append(hr.loc[row,'value'])
filE = op.join(DIR,'heart_rate_10mins.pkl')
with open(filE,'wb') as o:
    pickle.dump(rate,o,pickle.HIGHEST_PROTOCOL)


# In[ ]:


filE = op.join(DIR,'heart_rate_10mins.pkl')
rate = pickle.load(open(filE,'rb'))


# In[ ]:


#make a fig with average heart rate per bin, and cloud of std (2 periods for continuity)
x = pd.date_range('2017-05-10',periods=288,freq='10min')+pd.date_range('2017-05-10',periods=144,freq='10min')
y = [np.mean([t for t in rate[time]]) for time in sorted(rate)]+[np.mean([t for t in rate[time]]) for time in sorted(rate)]
error = [np.std([t for t in rate[time]]) for time in sorted(rate)]+[np.std([t for t in rate[time]]) for time in sorted(rate)]
ymin = []
ymax = []
for i,Y in enumerate(y):
    ymin.append(y[i]-error[i])
    ymax.append(y[i]+error[i])
fig, ax = plt.subplots(1)
fig.autofmt_xdate()
fig.set_size_inches(18.5, 10.5)
pl.plot(x,y,'k-')
pl.fill_between(x,ymin,ymax)
xfmt = mdates.DateFormatter('%H:%M')
ax.xaxis.set_major_formatter(xfmt)
plt.show()