libraries = c("dplyr","magrittr","tidyr","ggplot2","rstan","readxl")
for(x in libraries) { library(x,character.only=TRUE,warn.conflicts=FALSE,quietly=TRUE) }
require(zoo)
require(lubridate)
base_sz = 12 # base_size parameter
theme_set(theme_bw())
'%&%' = function(x,y) paste0(x,y)
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
packageVersion("rstan")
packageVersion("StanHeaders")
rstan::stan_version()
Loading required package: zoo Warning message in library(package, lib.loc = lib.loc, character.only = TRUE, logical.return = TRUE, : “there is no package called ‘zoo’”
[1] ‘2.19.3’
[1] ‘2.21.0.1’
read.csv("../../data/supplemetary table.csv") %>%
filter(SIClassification=='Certain') %>%
select(-X, -Source, -SIClassification, -DiagnosisCountry) %>%
mutate(InfectorOnset = as.Date(InfectorOnset, format="%m/%d/%Y"),
InfecteeOnset = as.Date(InfecteeOnset, format="%m/%d/%Y")) -> df
df
InfectorOnset | InfecteeOnset | ER | EL | SL | SR |
---|---|---|---|---|---|
<date> | <date> | <int> | <int> | <int> | <int> |
2020-01-17 | 2020-01-20 | 48 | 47 | 50 | 51 |
2020-01-22 | 2020-01-26 | 53 | 52 | 56 | 57 |
2020-01-24 | 2020-01-26 | 55 | 54 | 56 | 57 |
2020-01-24 | 2020-01-26 | 55 | 54 | 56 | 57 |
2020-01-26 | 2020-01-30 | 57 | 56 | 60 | 61 |
2020-01-26 | 2020-01-29 | 57 | 56 | 59 | 60 |
2020-01-26 | 2020-01-30 | 57 | 56 | 60 | 61 |
2020-01-26 | 2020-01-30 | 57 | 56 | 60 | 61 |
2020-01-21 | 2020-01-24 | 52 | 51 | 54 | 55 |
2020-01-21 | 2020-01-24 | 52 | 51 | 54 | 55 |
2020-01-20 | 2020-01-29 | 51 | 50 | 59 | 60 |
2020-02-01 | 2020-02-05 | 63 | 62 | 66 | 67 |
2019-12-20 | 2019-12-25 | 20 | 19 | 24 | 25 |
2019-12-20 | 2019-12-29 | 20 | 19 | 28 | 29 |
2019-12-27 | 2020-01-03 | 27 | 26 | 33 | 34 |
2019-12-12 | 2019-12-19 | 12 | 11 | 18 | 19 |
2019-12-21 | 2019-12-24 | 21 | 20 | 23 | 24 |
2020-01-04 | 2020-01-11 | 35 | 34 | 41 | 42 |
CUTOFF_TIME = as.Date('2020-02-12')
t0 = as.Date('2019-12-01')
df['tstar'] = CUTOFF_TIME
df %<>% mutate(dist = (SR+SL)/2-(ER+EL)/2,
SL = if_else(SL < EL, EL, SL),
ER = if_else(ER > SR, SR, ER),
tstar = as.numeric(tstar - t0))
df
InfectorOnset | InfecteeOnset | ER | EL | SL | SR | tstar | dist |
---|---|---|---|---|---|---|---|
<date> | <date> | <int> | <int> | <int> | <int> | <dbl> | <dbl> |
2020-01-17 | 2020-01-20 | 48 | 47 | 50 | 51 | 73 | 3 |
2020-01-22 | 2020-01-26 | 53 | 52 | 56 | 57 | 73 | 4 |
2020-01-24 | 2020-01-26 | 55 | 54 | 56 | 57 | 73 | 2 |
2020-01-24 | 2020-01-26 | 55 | 54 | 56 | 57 | 73 | 2 |
2020-01-26 | 2020-01-30 | 57 | 56 | 60 | 61 | 73 | 4 |
2020-01-26 | 2020-01-29 | 57 | 56 | 59 | 60 | 73 | 3 |
2020-01-26 | 2020-01-30 | 57 | 56 | 60 | 61 | 73 | 4 |
2020-01-26 | 2020-01-30 | 57 | 56 | 60 | 61 | 73 | 4 |
2020-01-21 | 2020-01-24 | 52 | 51 | 54 | 55 | 73 | 3 |
2020-01-21 | 2020-01-24 | 52 | 51 | 54 | 55 | 73 | 3 |
2020-01-20 | 2020-01-29 | 51 | 50 | 59 | 60 | 73 | 9 |
2020-02-01 | 2020-02-05 | 63 | 62 | 66 | 67 | 73 | 4 |
2019-12-20 | 2019-12-25 | 20 | 19 | 24 | 25 | 73 | 5 |
2019-12-20 | 2019-12-29 | 20 | 19 | 28 | 29 | 73 | 9 |
2019-12-27 | 2020-01-03 | 27 | 26 | 33 | 34 | 73 | 7 |
2019-12-12 | 2019-12-19 | 12 | 11 | 18 | 19 | 73 | 7 |
2019-12-21 | 2019-12-24 | 21 | 20 | 23 | 24 | 73 | 3 |
2020-01-04 | 2020-01-11 | 35 | 34 | 41 | 42 | 73 | 7 |
mean(df$dist)
sd(df$dist)
data_drname = "../../data"
flname = 'data.csv'
write.table(df, paste0(data_drname,flname), row.names=FALSE, sep=",", quote = FALSE)
stanmaindir = '../../../../Hokkaido_Backup/Wuhan_Serial_interval_2020/certain'
unlink(stanmaindir, recursive=T)
dir.create(stanmaindir)
## main dir for Stan simulations
standirname = stanmaindir%&%"/lognormal-no_truncation"
unlink(standirname, recursive=T)
dir.create(standirname)
# Dumping data
N = nrow(df)
E_L = df$EL
E_R = df$ER
S_L = df$SL
S_R = df$SR
stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N'), file=standirname%&%"/Data.R")
# Dumping initial conditions
e_raw = rep(.5, N)
s_raw = rep(.5, N)
logmean_SI = log(mean(df$dist))
logsd_SI = log(sd(df$dist))
stan_rdump(c('e_raw', 's_raw', 'logmean_SI', 'logsd_SI'), file=standirname%&%"/Init.R")
# Stan program
"data {
int<lower = 0> N; // number of records
vector<lower = 0>[N] E_L;
vector<lower = 0>[N] E_R;
vector<lower = 0>[N] S_L;
vector<lower = 0>[N] S_R;
}
parameters {
real logmean_SI;
real logsd_SI;
vector<lower = 0, upper = 1>[N] s_raw;
vector<lower = 0, upper = 1>[N] e_raw;
}
transformed parameters {
real<lower = 0> param2 = sqrt(log((exp(2*(logsd_SI-logmean_SI))+1.0)));
real param1 = logmean_SI - param2^2/2.0;
vector<lower = min(S_L), upper = max(S_R)>[N] s;
vector<lower = min(E_L), upper = max(E_R)>[N] e;
s = S_L + (S_R - S_L) .* s_raw;
for (k in 1:N)
if (E_R[k] > s[k])
e[k] = E_L[k] + (s[k] - E_L[k]) * e_raw[k];
else
e[k] = E_L[k] + (E_R[k] - E_L[k]) * e_raw[k];
}
model {
logmean_SI ~ std_normal();
logsd_SI ~ std_normal();
e_raw ~ normal(0.5, 1.0);
s_raw ~ normal(0.5, 1.0);
target += lognormal_lpdf(s - e | param1, param2);
}
generated quantities {
real<lower = 0> mean_SI = exp(param1 + square(param2) / 2);
real<lower = 0> sd_SI = sqrt((exp(square(param2)) - 1) * exp(2*param1 + square(param2)));
vector[N] log_likelihood;
for (k in 1:N)
log_likelihood[k] = lognormal_lpdf(s[k] - e[k] | param1, param2);
}" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)
standistribdir = "../../../../../CmdStan-2.22.1"
stanscriptdir = "../Dropbox/" %&% substring(standirname,13)
## bash file
"#!/bin/bash
cwd=$(pwd)
cd "%&%standistribdir%&%"
make -j6 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..10}
do
echo Running ${i}
SEEDNUMBER=$((1+$i))
./fit \\
method=sample num_samples=10000 num_warmup=20000 save_warmup=0 \\
adapt delta=0.98 \\
algorithm=hmc \\
engine=nuts \\
random seed=${SEEDNUMBER} \\
id=$i \\
data file=Data.R \\
init=Init.R \\
output file=trace-$i.csv \\
diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!
" %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)
## running the bash script
system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)
## main dir for Stan simulations
standirname = stanmaindir%&%"/gamma-no_truncation"
unlink(standirname, recursive=T)
dir.create(standirname)
# Dumping data
N = nrow(df)
E_L = df$EL
E_R = df$ER
S_L = df$SL
S_R = df$SR
stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N'), file=standirname%&%"/Data.R")
# Dumping initial conditions
e_raw = rep(.2, N)
s_raw = rep(.8, N)
param1 = (mean(df$dist)/sd(df$dist))^2
param2 = mean(df$dist)/(sd(df$dist)^2)
stan_rdump(c('e_raw', 's_raw', 'param1', 'param2'), file=standirname%&%"/Init.R")
# Stan program
"data {
int<lower = 0> N; // number of records
vector<lower = 0>[N] E_L;
vector<lower = 0>[N] E_R;
vector<lower = 0>[N] S_L;
vector<lower = 0>[N] S_R;
}
parameters {
real<lower=0> mean_SI;
real<lower=0> sd_SI;
vector<lower = 0, upper = 1>[N] e_raw;
vector<lower = 0, upper = 1>[N] s_raw;
}
transformed parameters {
real<lower = 0> param1 = square(mean_SI/sd_SI);
real<lower = 0> param2 = mean_SI/square(sd_SI);
vector<lower = min(S_L), upper = max(S_R)>[N] s;
vector<lower = min(E_L), upper = max(E_R)>[N] e;
s = S_L + (S_R - S_L) .* s_raw;
for (k in 1:N)
if (E_R[k] > s[k])
e[k] = E_L[k] + (s[k] - E_L[k]) * e_raw[k];
else
e[k] = E_L[k] + (E_R[k] - E_L[k]) * e_raw[k];
}
model {
mean_SI ~ normal(5.0, 10.0);
sd_SI ~ cauchy(0, 5.0);
e_raw ~ normal(0.5, 1.0);
s_raw ~ normal(0.5, 1.0);
target += gamma_lpdf(s - e | param1, param2);
}
generated quantities {
vector[N] log_likelihood;
for (k in 1:N)
log_likelihood[k] = gamma_lpdf(s[k] - e[k] | param1, param2);
}" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)
standistribdir = "../../../../../CmdStan-2.22.1"
stanscriptdir = "../Dropbox/" %&% substring(standirname,13)
## bash file
"#!/bin/bash
cwd=$(pwd)
cd "%&%standistribdir%&%"
make -j6 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..10}
do
echo Running ${i}
SEEDNUMBER=$((1+$i))
./fit \\
method=sample num_samples=10000 num_warmup=20000 save_warmup=0 \\
adapt delta=0.98 \\
algorithm=hmc \\
engine=nuts \\
random seed=${SEEDNUMBER} \\
id=$i \\
data file=Data.R \\
init=Init.R \\
output file=trace-$i.csv \\
refresh=1000 \\
diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!
" %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)
## running the bash script
system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)
## main dir for Stan simulations
standirname = stanmaindir%&%"/weibull-no_truncation"
unlink(standirname, recursive=T)
dir.create(standirname)
# Dumping data
N = nrow(df)
E_L = df$EL
E_R = df$ER
S_L = df$SL
S_R = df$SR
stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N'), file=standirname%&%"/Data.R")
# Dumping initial conditions
e_raw = rep(.2, N)
s_raw = rep(.8, N)
logmean_SI = log(mean(df$dist))
param1 = 1.75
stan_rdump(c('e_raw', 's_raw', 'logmean_SI', 'param1'), file=standirname%&%"/Init.R")
# Stan program
"data {
int<lower = 0> N; // number of records
vector<lower = 0>[N] E_L;
vector<lower = 0>[N] E_R;
vector<lower = 0>[N] S_L;
vector<lower = 0>[N] S_R;
}
parameters {
real<lower = 0> mean_SI;
real<lower = 0> param1;
vector<lower = 0, upper = 1>[N] e_raw;
vector<lower = 0, upper = 1>[N] s_raw;
}
transformed parameters {
real<lower = 0> param2 = mean_SI/tgamma(1.0+1.0/param1);
vector<lower = min(S_L), upper = max(S_R)>[N] s;
vector<lower = min(E_L), upper = max(E_R)>[N] e;
s = S_L + (S_R - S_L) .* s_raw;
for (k in 1:N)
if (E_R[k] > s[k])
e[k] = E_L[k] + (s[k] - E_L[k]) * e_raw[k];
else
e[k] = E_L[k] + (E_R[k] - E_L[k]) * e_raw[k];
}
model {
mean_SI ~ normal(5.0, 10.0);
param1 ~ exponential(0.0001);
e_raw ~ normal(0.5, 1.0);
s_raw ~ normal(0.5, 1.0);
target += weibull_lpdf(s - e | param1, param2);
}
generated quantities {
real sd_SI = param2*sqrt(tgamma(1.0+2.0/param1) - square(tgamma(1.0+1.0/param1)));
vector[N] log_likelihood;
for (k in 1:N)
log_likelihood[k] = weibull_lpdf(s[k] - e[k] | param1, param2);
}" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)
standistribdir = "../../../../../CmdStan-2.22.1"
stanscriptdir = "../Dropbox/" %&% substring(standirname,13)
## bash file
"#!/bin/bash
cwd=$(pwd)
cd "%&%standistribdir%&%"
make -j6 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..10}
do
echo Running ${i}
SEEDNUMBER=$((1+$i))
./fit \\
method=sample num_samples=10000 num_warmup=10000 save_warmup=0 \\
adapt delta=0.98 \\
algorithm=hmc \\
engine=nuts \\
random seed=${SEEDNUMBER} \\
id=$i \\
data file=Data.R \\
init=Init.R \\
output file=trace-$i.csv \\
diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!
" %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)
## running the bash script
system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)
## main dir for Stan simulations
standirname = stanmaindir%&%"/lognormal-truncated"
unlink(standirname, recursive=T)
dir.create(standirname)
# Dumping data
N = nrow(df)
E_L = df$EL
E_R = df$ER
S_L = df$SL
S_R = df$SR
r = 0.14
upper_bound = df$tstar[1]
stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N', 'r', 'upper_bound'), file=standirname%&%"/Data.R")
# Dumping initial conditions
e_raw = rep(.2, N)
s_raw = rep(.8, N)
logmean_SI = log(mean(df$dist))
logsd_SI = log(sd(df$dist))
stan_rdump(c('e_raw', 's_raw', 'logmean_SI', 'logsd_SI'), file=standirname%&%"/Init.R")
# Stan program
"functions {
real[] fstar_ode(real t, real[] z, real[] theta, data real[] x_r, int[] x_i) {
int N = x_i[1]; // number of records
real e[N] = theta[1:N];
real param1 = theta[N+1];
real param2 = theta[N+2];
real upper_bound = x_r[1];
real r = x_r[2];
real dzdt[N];
real tstar[N];
for (k in 1:N) {
tstar[k] = upper_bound - e[k];
dzdt[k] = exp(lognormal_lcdf(tstar[k]*(1.0-t) | param1, param2)) * r * tstar[k] * exp(-r*tstar[k]*t) / (1.0 - exp(-r*tstar[k]*t));
}
return dzdt;
}
}
data {
int<lower = 0> N; // number of records
vector<lower = 0>[N] E_L;
vector<lower = 0>[N] E_R;
vector<lower = 0>[N] S_L;
vector<lower = 0>[N] S_R;
real<lower = 0> upper_bound;
real<lower = 0> r;
}
transformed data {
int X_i[1] = {N};
real X_r[2] = {upper_bound, r};
}
parameters {
real logmean_SI;
real logsd_SI;
vector<lower = 0, upper = 1>[N] e_raw;
vector<lower = 0, upper = 1>[N] s_raw;
}
transformed parameters {
real<lower = 0> param2 = sqrt(log((exp(2*(logsd_SI-logmean_SI))+1.0)));
real param1 = logmean_SI - square(param2)/2.0;
vector<lower = min(E_L), upper = max(E_R)>[N] e;
vector<lower = min(S_L), upper = max(S_R)>[N] s;
vector[N] Z;
{
real theta[N+2];
real Z0[N];
s = S_L + (S_R - S_L) .* s_raw;
for (k in 1:N)
if (E_R[k] > s[k])
e[k] = E_L[k] + (s[k] - E_L[k]) * e_raw[k];
else
e[k] = E_L[k] + (E_R[k] - E_L[k]) * e_raw[k];
for (k in 1:N) {
Z0[k] = 0.0;
theta[k] = e[k];
}
theta[N+1] = param1;
theta[N+2] = param2;
Z = to_vector(to_array_1d(integrate_ode_rk45(fstar_ode, Z0, 0.001, {1.0}, theta, X_r, X_i, 1e-5, 1e-3, 5e2)));
}
}
model {
logmean_SI ~ std_normal();
logsd_SI ~ std_normal();
e_raw ~ normal(0.5, 1.0);
s_raw ~ normal(0.5, 1.0);
target += lognormal_lpdf(s - e | param1, param2) - log(Z);
}
generated quantities {
real<lower = 0> mean_SI = exp(param1 + square(param2)/2);
real<lower = 0> sd_SI = sqrt((exp(square(param2))-1)*exp(2*param1+square(param2)));
vector[N] log_likelihood;
for (k in 1:N)
log_likelihood[k] = lognormal_lpdf(s[k] - e[k] | param1, param2) - log(Z[k]);
}" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)
standistribdir = "../../../../../CmdStan-2.22.1"
stanscriptdir = "../Dropbox/" %&% substring(standirname,13)
## bash file
"#!/bin/bash
cwd=$(pwd)
cd "%&%standistribdir%&%"
make -j6 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..10}
do
echo Running ${i}
SEEDNUMBER=$((1+$i))
./fit \\
method=sample num_samples=10000 num_warmup=10000 save_warmup=0 \\
adapt delta=0.98 \\
algorithm=hmc \\
engine=nuts \\
random seed=${SEEDNUMBER} \\
id=$i \\
data file=Data.R \\
init=Init.R \\
output file=trace-$i.csv \\
diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!
" %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)
## running the bash script
system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)
## main dir for Stan simulations
standirname = stanmaindir%&%"/gamma-truncated"
unlink(standirname, recursive=T)
dir.create(standirname)
# Dumping data
N = nrow(df)
E_L = df$EL
E_R = df$ER
S_L = df$SL
S_R = df$SR
r = 0.14
upper_bound = df$tstar[1]
stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N', 'r', 'upper_bound'), file=standirname%&%"/Data.R")
# Dumping initial conditions
e_raw = rep(.4, N)
s_raw = rep(.6, N)
mean_SI = mean(df$dist)
sd_SI = sd(df$dist)
stan_rdump(c('e_raw', 's_raw', 'mean_SI', 'sd_SI'), file=standirname%&%"/Init.R")
# Stan program
"functions {
real[] fstar_ode(real t, real[] z, real[] theta, data real[] x_r, int[] x_i) {
int N = x_i[1]; // number of records
real e[N] = theta[1:N];
real param1 = theta[N+1];
real param2 = theta[N+2];
real upper_bound = x_r[1];
real r = x_r[2];
real dzdt[N];
real tstar[N];
for (k in 1:N) {
tstar[k] = upper_bound - e[k];
dzdt[k] = exp(gamma_lcdf(tstar[k]*(1.0-t) | param1, param2)) * r * tstar[k] * exp(-r*tstar[k]*t) / (1.0 - exp(-r*tstar[k]*t));
}
return dzdt;
}
}
data {
int<lower = 0> N; // number of records
vector<lower = 0>[N] E_L;
vector<lower = 0>[N] E_R;
vector<lower = 0>[N] S_L;
vector<lower = 0>[N] S_R;
real<lower = 0> r;
real<lower = 0> upper_bound;
}
transformed data {
int X_i[1] = {N};
real X_r[2] = {upper_bound, r};
}
parameters {
real<lower = 0> mean_SI;
real<lower = 0> sd_SI;
vector<lower = 0, upper = 1>[N] s_raw;
vector<lower = 0, upper = 1>[N] e_raw;
}
transformed parameters {
real<lower = 0> param1 = square(mean_SI/sd_SI);
real<lower = 0> param2 = mean_SI/square(sd_SI);
vector<lower = min(S_L), upper = max(S_R)>[N] s;
vector<lower = min(E_L), upper = max(E_R)>[N] e;
vector[N] Z;
{
real theta[N+2];
real Z0[N];
s = S_L + (S_R - S_L) .* s_raw;
for (k in 1:N)
if (E_R[k] > s[k])
e[k] = E_L[k] + (s[k] - E_L[k]) * e_raw[k];
else
e[k] = E_L[k] + (E_R[k] - E_L[k]) * e_raw[k];
for (k in 1:N) {
Z0[k] = 0.0;
theta[k] = e[k];
}
theta[N+1] = param1;
theta[N+2] = param2;
Z = to_vector(to_array_1d(integrate_ode_rk45(fstar_ode, Z0, 0.001, {1.0}, theta, X_r, X_i)));
}
}
model {
mean_SI ~ normal(5.0, 10.0);
sd_SI ~ cauchy(0, 5.0);
e_raw ~ normal(0.5, 1.0);
s_raw ~ normal(0.5, 1.0);
target += gamma_lpdf(s - e | param1, param2) - log(Z);
}
generated quantities {
vector[N] log_likelihood;
for (k in 1:N)
log_likelihood[k] = gamma_lpdf(s[k] - e[k] | param1, param2) - log(Z[k]);
}" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)
standistribdir = "../../../../../CmdStan"
stanscriptdir = "../Dropbox/" %&% substring(standirname,13)
## bash file
"#!/bin/bash
cwd=$(pwd)
cd "%&%standistribdir%&%"
make -j6 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..10}
do
echo Running ${i}
SEEDNUMBER=$((1+$i))
./fit \\
method=sample num_samples=10000 num_warmup=10000 save_warmup=0 \\
adapt delta=0.98 \\
algorithm=hmc \\
engine=nuts \\
random seed=${SEEDNUMBER} \\
id=$i \\
data file=Data.R \\
init=Init.R \\
output file=trace-$i.csv \\
diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!
" %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)
## running the bash script
system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)
## main dir for Stan simulations
standirname = stanmaindir%&%"/weibull-truncated-2.23.0"
unlink(standirname, recursive=T)
dir.create(standirname)
# Dumping data
N = nrow(df)
E_L = df$EL
E_R = df$ER
S_L = df$SL
S_R = df$SR
r = 0.14
upper_bound = df$tstar[1]
stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N', 'r', 'upper_bound'), file=standirname%&%"/Data.R")
# Dumping initial conditions
e_raw = rep(.2, N)
s_raw = rep(.8, N)
logmean_SI = log(mean(df$dist))
param1 = 1.75
stan_rdump(c('e_raw', 's_raw', 'logmean_SI', 'param1'), file=standirname%&%"/Init.R")
# Stan program
"functions {
real[] fstar_ode(real t, real[] z, real[] theta, data real[] x_r, int[] x_i) {
int N = x_i[1]; // number of records
real e[N] = theta[1:N];
real param1 = theta[N+1];
real param2 = theta[N+2];
real upper_bound = x_r[1];
real r = x_r[2];
real dzdt[N];
real tstar[N];
for (k in 1:N) {
tstar[k] = upper_bound - e[k];
dzdt[k] = - expm1(-((1.0-t)*tstar[k]/param2)^param1) * r * tstar[k] * exp(-r*tstar[k]*t) / (1.0 - exp(-r*tstar[k]*t));
}
return dzdt;
}
}
data {
int<lower = 0> N; // number of records
vector<lower = 0>[N] E_L;
vector<lower = 0>[N] E_R;
vector<lower = 0>[N] S_L;
vector<lower = 0>[N] S_R;
real<lower = 0> r;
real<lower = 0> upper_bound;
}
transformed data {
int X_i[1] = {N};
real X_r[2] = {upper_bound, r};
}
parameters {
real<lower = 0> mean_SI;
real<lower = 0> param1;
vector<lower = 0, upper = 1>[N] e_raw;
vector<lower = 0, upper = 1>[N] s_raw;
}
transformed parameters {
real<lower = 0> param2 = mean_SI/tgamma(1.0+1.0/param1);
vector<lower = min(S_L), upper = max(S_R)>[N] s;
vector<lower = min(E_L), upper = max(E_R)>[N] e;
real Z[N];
{
vector[N] s_;
real theta[N+2];
real Z0[N];
s = S_L + (S_R - S_L) .* s_raw;
for (k in 1:N)
if (E_R[k] > s[k])
e[k] = E_L[k] + (s[k] - E_L[k]) * e_raw[k];
else
e[k] = E_L[k] + (E_R[k] - E_L[k]) * e_raw[k];
for (k in 1:N) {
Z0[k] = 0.0;
theta[k] = e[k];
}
theta[N+1] = param1;
theta[N+2] = param2;
Z = to_array_1d(integrate_ode_rk45(fstar_ode, Z0, 0.01, {1.0}, theta, X_r, X_i, 1e-5, 1e-3, 5e2));
}
}
model {
mean_SI ~ normal(5.0, 10.0);
param1 ~ exponential(0.0001);
e_raw ~ normal(0.5, 1.0);
s_raw ~ normal(0.5, 1.0);
for (k in 1:N)
target += weibull_lpdf(s[k] - e[k] | param1, param2) - log(Z[k]);
}
generated quantities {
real sd_SI = param2*sqrt(tgamma(1.0+2.0/param1)-(tgamma(1.0+1.0/param1))^2);
vector[N] log_likelihood;
for (k in 1:N)
log_likelihood[k] = weibull_lpdf(s[k] - e[k] | param1, param2) - log(Z[k]);
}" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)
standistribdir = "../../../../../CmdStan" #-2.22.1
stanscriptdir = "../Dropbox/" %&% substring(standirname,13)
## bash file
"#!/bin/bash
cwd=$(pwd)
cd "%&%standistribdir%&%"
make -j6 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..10}
do
echo Running ${i}
SEEDNUMBER=$((1+$i))
./fit \\
method=sample num_samples=10000 num_warmup=10000 save_warmup=0 \\
adapt delta=0.98 \\
algorithm=hmc \\
engine=nuts \\
random seed=${SEEDNUMBER} \\
id=$i \\
data file=Data.R \\
init=Init.R \\
output file=trace-$i.csv \\
diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!
" %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)
## running the bash script
system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)