from lxml import etree from lxml import objectify cr = etree.parse("CD007470StatsDataOnly.rm5") #pulls out all the studies with low risk of bias root = cr.getroot() ad = root.getchildren()[7] a = ad.getchildren()[0].getchildren()[1].getchildren() studies_xml = a[5].getchildren()[1:]#+ a[6].getchildren()[1:] def toNumber(v): try: return float(v) except Exception: return v def asNumbers(d): return { k : toNumber(v) for k,v in d.items()} stds = [asNumbers(dict(s.attrib)) for s in studies_xml] import pandas as pd studies = pd.DataFrame(stds) from pymc import * def inv_logit(p): return 1 /(1+ exp(-p)) with Model() as model: nstudies = len(studies) hm = Normal('hazard_mean', -2.5, 3) #mean hazard rate sdh = Tpos('sd_hazard', 15, 1, .5**-2) #hyperparameters for effect size distribution em = T('effect_mean', 8, 0, .05**-2) #mean effect size sd = Tpos('sd_effect', 15, .05, .05**-2) h = Normal('hazard', hm, sd = sdh, shape = nstudies) e = Normal('effect', em, sd = sd, shape = nstudies) controls = Binomial('controls', p = inv_logit(h), n = studies.TOTAL_2.values, observed = studies.EVENTS_2.values) treatment = Binomial('treatment', p = inv_logit(h+e), n = studies.TOTAL_1.values, observed = studies.EVENTS_1.values) start = find_MAP(vars =[hm, em, h, e]) step = NUTS(scaling=start) trace = sample(3000, step, progressbar=False) traceplot(trace); esamples = pd.DataFrame(trace[em]) print "probability mean effect is negative", (esamples < 0).mean() print "mean effect estimate", esamples.mean()