In [ ]:
import sys
sys.path.append('/Users/Brandon1')
from pythonimports import *
%matplotlib inline
In [ ]:
#set this directory for health data and outfiles
DIR = '/Users/Brandon1/Desktop/apple_health_export/'
In [ ]:
healthdata = open(op.join(DIR,'export.xml'),'rb')
d = healthdata.readlines()
In [ ]:
len(d)
In [ ]:
#each health source starts with 'HKQuantityTypeIdentifier' e.g., 'HKQuantityTypeIdentifierHeartRate
In [ ]:
#info available for each source - some info == "NA"
srckeys = ['sourceName',
           'sourceVersion',
           'device',
           'unit',
           'creationDate',
           'creationDate_time',
           'startDate',
           'startDate_time',
           'endDate',
           'endDate_time',
           'value']
In [ ]:
sp = " "
data = OrderedDict()
lcount = 0
srcrecs = OrderedDict()
for line in d[84:]:
    lcount += 1
    if line.startswith(" <R"):
        splits = line.split("<")[1].split("/>")[0].split("\" ")
        
        rtype,src = splits[0].split("=\"")
        src = "_".join([word for word in "".join([sp+x if x.isupper() else x for x in src[24:]]).split()])
        if not src in data.keys():
            data[src] = OrderedDict()
        data[src][lcount] = OrderedDict()
        
        for k in srckeys:
            data[src][lcount][k] = "NA"

        for split in splits[1:]:
            rec,val = split.split("=\"")
            try:
                assert rec in data[src][lcount].keys()
            except AssertionError:
                print "crap"
            if rec in ['startDate','endDate','creationDate']:
                date,time,whoknows = val.split(" ")
                data[src][lcount][rec] = date
                data[src][lcount]["%s_time" % rec] = time
            elif rec == 'device':
                dev = [v for v in val.split(", ") if 'name' in v][0].split(":")[1]
                data[src][lcount][rec] = dev
            elif rec == 'value':
                data[src][lcount][rec] = val[:-1]
            else:
                data[src][lcount][rec] = val
    if lcount % 10000 == 0:
        print lcount

write out the files

In [ ]:
for src in sorted(data):
    filE = op.join(DIR,'%s.txt' % src)
    print filE
    with open(filE,'wb') as o:
        k0 = data[src].keys()[0]
        text = '\t'.join([k for k in data[src][k0].keys()]) + '\n'
        o.write("%s" % text)
        for lcount in data[src]:
            text = '\t'.join([k for k in data[src][lcount].values()]) + '\n'
            o.write("%s" % text)

check out the heart rate data

In [ ]:
hr = pd.read_csv(op.join(DIR,'Heart_Rate.txt'),sep='\t')
hr.head()
In [ ]:
#get heart rate values into 10min bins, save dict
rate = OrderedDict()
for row in hr.index:
    ctime = ":".join([x for x in hr.loc[row,'creationDate_time'].split(":")[:-1]])[:-1]
    if not ctime in rate:
        rate[ctime] = []
    rate[ctime].append(hr.loc[row,'value'])
filE = op.join(DIR,'heart_rate_10mins.pkl')
with open(filE,'wb') as o:
    pickle.dump(rate,o,pickle.HIGHEST_PROTOCOL)
In [ ]:
filE = op.join(DIR,'heart_rate_10mins.pkl')
rate = pickle.load(open(filE,'rb'))
In [ ]:
#make a fig with average heart rate per bin, and cloud of std (2 periods for continuity)
x = pd.date_range('2017-05-10',periods=288,freq='10min')+pd.date_range('2017-05-10',periods=144,freq='10min')
y = [np.mean([t for t in rate[time]]) for time in sorted(rate)]+[np.mean([t for t in rate[time]]) for time in sorted(rate)]
error = [np.std([t for t in rate[time]]) for time in sorted(rate)]+[np.std([t for t in rate[time]]) for time in sorted(rate)]
ymin = []
ymax = []
for i,Y in enumerate(y):
    ymin.append(y[i]-error[i])
    ymax.append(y[i]+error[i])

fig, ax = plt.subplots(1)
fig.autofmt_xdate()
fig.set_size_inches(18.5, 10.5)
pl.plot(x,y,'k-')
pl.fill_between(x,ymin,ymax)
xfmt = mdates.DateFormatter('%H:%M')
ax.xaxis.set_major_formatter(xfmt)

plt.show()
In [ ]:
1+1+1