import sys
sys.path.append('/Users/Brandon1')
from pythonimports import *
%matplotlib inline
#set this directory for health data and outfiles
DIR = '/Users/Brandon1/Desktop/apple_health_export/'
healthdata = open(op.join(DIR,'export.xml'),'rb')
d = healthdata.readlines()
len(d)
#each health source starts with 'HKQuantityTypeIdentifier' e.g., 'HKQuantityTypeIdentifierHeartRate
#info available for each source - some info == "NA"
srckeys = ['sourceName',
'sourceVersion',
'device',
'unit',
'creationDate',
'creationDate_time',
'startDate',
'startDate_time',
'endDate',
'endDate_time',
'value']
sp = " "
data = OrderedDict()
lcount = 0
srcrecs = OrderedDict()
for line in d[84:]:
lcount += 1
if line.startswith(" <R"):
splits = line.split("<")[1].split("/>")[0].split("\" ")
rtype,src = splits[0].split("=\"")
src = "_".join([word for word in "".join([sp+x if x.isupper() else x for x in src[24:]]).split()])
if not src in data.keys():
data[src] = OrderedDict()
data[src][lcount] = OrderedDict()
for k in srckeys:
data[src][lcount][k] = "NA"
for split in splits[1:]:
rec,val = split.split("=\"")
try:
assert rec in data[src][lcount].keys()
except AssertionError:
print "crap"
if rec in ['startDate','endDate','creationDate']:
date,time,whoknows = val.split(" ")
data[src][lcount][rec] = date
data[src][lcount]["%s_time" % rec] = time
elif rec == 'device':
dev = [v for v in val.split(", ") if 'name' in v][0].split(":")[1]
data[src][lcount][rec] = dev
elif rec == 'value':
data[src][lcount][rec] = val[:-1]
else:
data[src][lcount][rec] = val
if lcount % 10000 == 0:
print lcount
for src in sorted(data):
filE = op.join(DIR,'%s.txt' % src)
print filE
with open(filE,'wb') as o:
k0 = data[src].keys()[0]
text = '\t'.join([k for k in data[src][k0].keys()]) + '\n'
o.write("%s" % text)
for lcount in data[src]:
text = '\t'.join([k for k in data[src][lcount].values()]) + '\n'
o.write("%s" % text)
hr = pd.read_csv(op.join(DIR,'Heart_Rate.txt'),sep='\t')
hr.head()
#get heart rate values into 10min bins, save dict
rate = OrderedDict()
for row in hr.index:
ctime = ":".join([x for x in hr.loc[row,'creationDate_time'].split(":")[:-1]])[:-1]
if not ctime in rate:
rate[ctime] = []
rate[ctime].append(hr.loc[row,'value'])
filE = op.join(DIR,'heart_rate_10mins.pkl')
with open(filE,'wb') as o:
pickle.dump(rate,o,pickle.HIGHEST_PROTOCOL)
filE = op.join(DIR,'heart_rate_10mins.pkl')
rate = pickle.load(open(filE,'rb'))
#make a fig with average heart rate per bin, and cloud of std (2 periods for continuity)
x = pd.date_range('2017-05-10',periods=288,freq='10min')+pd.date_range('2017-05-10',periods=144,freq='10min')
y = [np.mean([t for t in rate[time]]) for time in sorted(rate)]+[np.mean([t for t in rate[time]]) for time in sorted(rate)]
error = [np.std([t for t in rate[time]]) for time in sorted(rate)]+[np.std([t for t in rate[time]]) for time in sorted(rate)]
ymin = []
ymax = []
for i,Y in enumerate(y):
ymin.append(y[i]-error[i])
ymax.append(y[i]+error[i])
fig, ax = plt.subplots(1)
fig.autofmt_xdate()
fig.set_size_inches(18.5, 10.5)
pl.plot(x,y,'k-')
pl.fill_between(x,ymin,ymax)
xfmt = mdates.DateFormatter('%H:%M')
ax.xaxis.set_major_formatter(xfmt)
plt.show()
1+1+1