#!/usr/bin/env python # coding: utf-8 # In[ ]: import sys sys.path.append('/Users/Brandon1') from pythonimports import * get_ipython().run_line_magic('matplotlib', 'inline') # In[ ]: #set this directory for health data and outfiles DIR = '/Users/Brandon1/Desktop/apple_health_export/' # In[ ]: healthdata = open(op.join(DIR,'export.xml'),'rb') d = healthdata.readlines() # In[ ]: len(d) # In[ ]: #each health source starts with 'HKQuantityTypeIdentifier' e.g., 'HKQuantityTypeIdentifierHeartRate # In[ ]: #info available for each source - some info == "NA" srckeys = ['sourceName', 'sourceVersion', 'device', 'unit', 'creationDate', 'creationDate_time', 'startDate', 'startDate_time', 'endDate', 'endDate_time', 'value'] # In[ ]: sp = " " data = OrderedDict() lcount = 0 srcrecs = OrderedDict() for line in d[84:]: lcount += 1 if line.startswith(" ")[0].split("\" ") rtype,src = splits[0].split("=\"") src = "_".join([word for word in "".join([sp+x if x.isupper() else x for x in src[24:]]).split()]) if not src in data.keys(): data[src] = OrderedDict() data[src][lcount] = OrderedDict() for k in srckeys: data[src][lcount][k] = "NA" for split in splits[1:]: rec,val = split.split("=\"") try: assert rec in data[src][lcount].keys() except AssertionError: print "crap" if rec in ['startDate','endDate','creationDate']: date,time,whoknows = val.split(" ") data[src][lcount][rec] = date data[src][lcount]["%s_time" % rec] = time elif rec == 'device': dev = [v for v in val.split(", ") if 'name' in v][0].split(":")[1] data[src][lcount][rec] = dev elif rec == 'value': data[src][lcount][rec] = val[:-1] else: data[src][lcount][rec] = val if lcount % 10000 == 0: print lcount # # write out the files # In[ ]: for src in sorted(data): filE = op.join(DIR,'%s.txt' % src) print filE with open(filE,'wb') as o: k0 = data[src].keys()[0] text = '\t'.join([k for k in data[src][k0].keys()]) + '\n' o.write("%s" % text) for lcount in data[src]: text = '\t'.join([k for k in data[src][lcount].values()]) + '\n' o.write("%s" % text) # # check out the heart rate data # In[ ]: hr = pd.read_csv(op.join(DIR,'Heart_Rate.txt'),sep='\t') hr.head() # In[ ]: #get heart rate values into 10min bins, save dict rate = OrderedDict() for row in hr.index: ctime = ":".join([x for x in hr.loc[row,'creationDate_time'].split(":")[:-1]])[:-1] if not ctime in rate: rate[ctime] = [] rate[ctime].append(hr.loc[row,'value']) filE = op.join(DIR,'heart_rate_10mins.pkl') with open(filE,'wb') as o: pickle.dump(rate,o,pickle.HIGHEST_PROTOCOL) # In[ ]: filE = op.join(DIR,'heart_rate_10mins.pkl') rate = pickle.load(open(filE,'rb')) # In[ ]: #make a fig with average heart rate per bin, and cloud of std (2 periods for continuity) x = pd.date_range('2017-05-10',periods=288,freq='10min')+pd.date_range('2017-05-10',periods=144,freq='10min') y = [np.mean([t for t in rate[time]]) for time in sorted(rate)]+[np.mean([t for t in rate[time]]) for time in sorted(rate)] error = [np.std([t for t in rate[time]]) for time in sorted(rate)]+[np.std([t for t in rate[time]]) for time in sorted(rate)] ymin = [] ymax = [] for i,Y in enumerate(y): ymin.append(y[i]-error[i]) ymax.append(y[i]+error[i]) fig, ax = plt.subplots(1) fig.autofmt_xdate() fig.set_size_inches(18.5, 10.5) pl.plot(x,y,'k-') pl.fill_between(x,ymin,ymax) xfmt = mdates.DateFormatter('%H:%M') ax.xaxis.set_major_formatter(xfmt) plt.show() # In[ ]: 1+1+1 # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: