#!/usr/bin/env python # coding: utf-8 # In[1]: import arviz as az import pystan import numpy as np import ujson as json # In[2]: with open("radon.json", "rb") as f: radon_data = json.load(f) key_renaming = {"x": "floor_idx", "county": "county_idx", "u": "uranium"} radon_data = { key_renaming.get(key, key): np.array(value) if isinstance(value, list) else value for key, value in radon_data.items() } radon_data["county_idx"] = radon_data["county_idx"] + 1 # In[3]: prior_code = """ data { int J; int N; int floor_idx[N]; int county_idx[N]; real uranium[J]; } generated quantities { real g[2]; real sigma_a = exponential_rng(1); real sigma = exponential_rng(1); real b = normal_rng(0, 1); real za_county[J]; real y_hat[N]; real a[J]; real a_county[J]; g[1] = normal_rng(0, 10); g[2] = normal_rng(0, 10); for (i in 1:J) { za_county[i] = normal_rng(0, 1); a[i] = g[1] + g[2] * uranium[i]; a_county[i] = a[i] + za_county[i] * sigma_a; } for (j in 1:N) { y_hat[j] = normal_rng(a_county[county_idx[j]] + b * floor_idx[j], sigma); } } """ # In[4]: prior_model = pystan.StanModel(model_code=prior_code, extra_compile_args=['-flto']) # In[5]: prior_data = {key: value for key, value in radon_data.items() if key not in ("county_name", "y")} prior = prior_model.sampling(data=prior_data, iter=500, warmup=0, algorithm="Fixed_param") # In[6]: radon_code = """ data { int J; int N; int floor_idx[N]; int county_idx[N]; real uranium[J]; real y[N]; } parameters { real g[2]; real sigma_a; real sigma; real za_county[J]; real b; } transformed parameters { real theta[N]; real a[J]; real a_county[J]; for (i in 1:J) { a[i] = g[1] + g[2] * uranium[i]; a_county[i] = a[i] + za_county[i] * sigma_a; } for (j in 1:N) theta[j] = a_county[county_idx[j]] + b * floor_idx[j]; } model { g ~ normal(0, 10); sigma_a ~ exponential(1); za_county ~ normal(0, 1); b ~ normal(0, 1); sigma ~ exponential(1); for (j in 1:N) y[j] ~ normal(theta[j], sigma); } generated quantities { real log_lik[N]; real y_hat[N]; for (j in 1:N) { log_lik[j] = normal_lpdf(y[j] | theta[j], sigma); y_hat[j] = normal_rng(theta[j], sigma); } } """ # In[7]: stan_model = pystan.StanModel(model_code=radon_code, extra_compile_args=['-flto']) # In[8]: model_data = {key: value for key, value in radon_data.items() if key not in ("county_name",)} fit = stan_model.sampling(data=model_data, control={"adapt_delta": 0.99}, iter=1500, warmup=1000) # In[9]: coords = { "level": ["basement", "floor"], "obs_id": np.arange(radon_data["y"].size), "county": radon_data["county_name"], "g_coef": ["intercept", "slope"], } dims = { "g" : ["g_coef"], "za_county" : ["county"], "y" : ["obs_id"], "y_hat" : ["obs_id"], "floor_idx" : ["obs_id"], "county_idx" : ["obs_id"], "theta" : ["obs_id"], "uranium" : ["county"], "a" : ["county"], "a_county" : ["county"], } idata = az.from_pystan( posterior=fit, posterior_predictive="y_hat", prior=prior, prior_predictive="y_hat", observed_data=["y"], constant_data=["floor_idx", "county_idx", "uranium"], log_likelihood={"y": "log_lik"}, coords=coords, dims=dims, ).rename({"y_hat": "y"}) # renames both prior and posterior predictive # In[11]: idata # In[12]: idata.to_netcdf("pystan.nc") # In[ ]: