libraries = c("dplyr","magrittr","tidyr","ggplot2","rstan","readxl")
for(x in libraries) { library(x,character.only=TRUE,warn.conflicts=FALSE,quietly=TRUE) }
require(zoo)
require(lubridate)
base_sz = 12 # base_size parameter
theme_set(theme_bw())
'%&%' = function(x,y) paste0(x,y)
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
packageVersion("rstan")
packageVersion("StanHeaders")
rstan::stan_version()
rstan (Version 2.19.3, GitRev: 2e1f913d3ca3) For execution on a local, multicore CPU with excess RAM we recommend calling options(mc.cores = parallel::detectCores()). To avoid recompilation of unchanged Stan programs, we recommend calling rstan_options(auto_write = TRUE) Loading required package: zoo Attaching package: ‘zoo’ The following objects are masked from ‘package:base’: as.Date, as.Date.numeric Loading required package: lubridate Attaching package: ‘lubridate’ The following object is masked from ‘package:base’: date
[1] ‘2.19.3’
[1] ‘2.21.0.1’
filenames = c("data_incper", "data_ons_hosp", "dthdata_hosp_dth", "dthdata_ons_dth", "dthdata_ons_hosp", "data_incper_inclwuhan")
for (idx in 1:(length(filenames)-1))
{
## main dir for Stan simulations
standirname = 'stan-sims/'%&%filenames[idx]%&%"-lognormal-no_truncation"
unlink(standirname, recursive=T)
dir.create(standirname)
datafilename = "../../data/"%&%filenames[idx]%&%".csv"
read.table(datafilename, sep=",", header=T) %>% select(EL,ER,SL,SR,tstar) %>% mutate(dist = (SR+SL)/2-(ER+EL)/2) -> df
# Dumping data
N = nrow(df)
E_L = df$EL
E_R = df$ER
S_L = df$SL
S_R = df$SR
stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N'), file=standirname%&%"/Data.R")
# Dumping initial conditions
e_raw = rep(.5, N)
s_raw = rep(.5, N)
logmean_SI = log(mean(df$dist))
logsd_SI = log(sd(df$dist))
stan_rdump(c('e_raw', 's_raw', 'logmean_SI', 'logsd_SI'), file=standirname%&%"/Init.R")
# Stan program
"data {
int<lower = 0> N; // number of records
vector<lower = 0>[N] E_L;
vector<lower = 0>[N] E_R;
vector<lower = 0>[N] S_L;
vector<lower = 0>[N] S_R;
}
parameters {
real logmean_SI;
real logsd_SI;
vector<lower = 0, upper = 1>[N] s_raw;
vector<lower = 0, upper = 1>[N] e_raw;
}
transformed parameters {
real<lower = 0> param2 = sqrt(log((exp(2*(logsd_SI-logmean_SI))+1.0)));
real param1 = logmean_SI - param2^2/2.0;
vector<lower = min(S_L), upper = max(S_R)>[N] s;
vector<lower = min(E_L), upper = max(E_R)>[N] e;
s = S_L + (S_R - S_L) .* s_raw;
for (k in 1:N) {
if (E_R[k] > s[k])
e[k] = E_L[k] + (s[k] - E_L[k]) * e_raw[k];
else
e[k] = E_L[k] + (E_R[k] - E_L[k]) * e_raw[k];
}
}
model {
logmean_SI ~ std_normal();
logsd_SI ~ std_normal();
e_raw ~ normal(0.5, 1.0);
s_raw ~ normal(0.5, 1.0);
for (k in 1:N)
target += lognormal_lpdf(s[k] - e[k] | param1, param2);
}
generated quantities {
real<lower = 0> mean_SI = exp(param1 + param2^2/2);
real<lower = 0> sd_SI = sqrt((exp(param2^2)-1)*exp(2*param1+param2^2));
vector[N] log_likelihood;
for (k in 1:N)
log_likelihood[k] = lognormal_lpdf(s[k] - e[k] | param1, param2);
}" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)
standistribdir = "../../../CmdStan"
stanscriptdir = "../Hokkaido_Wuhan_IncubationPeriod_2020/scripts/Andrei-more_accurate/"%&%standirname
## bash file
"#!/bin/bash
cwd=$(pwd)
cd "%&%standistribdir%&%"
make -j6 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..4}
do
echo Running ${i}
SEEDNUMBER=$((1+$i))
./fit \\
method=sample num_samples=25000 num_warmup=100000 save_warmup=0 \\
adapt delta=0.98 \\
algorithm=hmc \\
engine=nuts \\
random seed=${SEEDNUMBER} \\
id=$i \\
data file=Data.R \\
init=Init.R \\
output file=trace-$i.csv \\
diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!
" %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)
## running the bash script
system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)
}
for (idx in 1:(length(filenames)-1))
{
## main dir for Stan simulations
standirname = 'stan-sims/'%&%filenames[idx]%&%"-gamma-no_truncation"
unlink(standirname, recursive=T)
dir.create(standirname)
datafilename = "../../data/"%&%filenames[idx]%&%".csv"
read.table(datafilename, sep=",", header=T) %>% select(EL,ER,SL,SR,tstar) %>% mutate(dist = (SR+SL)/2-(ER+EL)/2) -> df
# Dumping data
N = nrow(df)
E_L = df$EL
E_R = df$ER
S_L = df$SL
S_R = df$SR
stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N'), file=standirname%&%"/Data.R")
# Dumping initial conditions
e_raw = rep(.2, N)
s_raw = rep(.8, N)
mean_SI = mean(df$dist)
sd_SI = sd(df$dist)
stan_rdump(c('e_raw', 's_raw', 'mean_SI', 'sd_SI'), file=standirname%&%"/Init.R")
# Stan program
"data {
int<lower = 0> N; // number of records
vector<lower = 0>[N] E_L;
vector<lower = 0>[N] E_R;
vector<lower = 0>[N] S_L;
vector<lower = 0>[N] S_R;
}
parameters {
real<lower=0> mean_SI;
real<lower=0> sd_SI;
vector<lower = 0, upper = 1>[N] e_raw;
vector<lower = 0, upper = 1>[N] s_raw;
}
transformed parameters {
real<lower = 0> param1 = (mean_SI/sd_SI)^2;
real<lower = 0> param2 = mean_SI/(sd_SI^2);
vector<lower = min(S_L), upper = max(S_R)>[N] s;
vector<lower = min(E_L), upper = max(E_R)>[N] e;
s = S_L + (S_R - S_L) .* s_raw;
for (k in 1:N) {
if (E_R[k] > s[k])
e[k] = E_L[k] + (s[k] - E_L[k]) * e_raw[k];
else
e[k] = E_L[k] + (E_R[k] - E_L[k]) * e_raw[k];
}
}
model {
mean_SI ~ normal(5.0, 10.0);
sd_SI ~ cauchy(0, 5.0);
e_raw ~ normal(0.5, 1.0);
s_raw ~ normal(0.5, 1.0);
for (k in 1:N)
target += gamma_lpdf(s[k] - e[k] | param1, param2);
}
generated quantities {
vector[N] log_likelihood;
for (k in 1:N)
log_likelihood[k] = gamma_lpdf(s[k] - e[k] | param1, param2);
}" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)
standistribdir = "../../../CmdStan"
stanscriptdir = "../Hokkaido_Wuhan_IncubationPeriod_2020/scripts/Andrei-more_accurate/"%&%standirname
## bash file
"#!/bin/bash
cwd=$(pwd)
cd "%&%standistribdir%&%"
make -j6 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..4}
do
echo Running ${i}
SEEDNUMBER=$((1+$i))
./fit \\
method=sample num_samples=25000 num_warmup=100000 save_warmup=0 \\
adapt delta=0.98 \\
algorithm=hmc \\
engine=nuts \\
random seed=${SEEDNUMBER} \\
id=$i \\
data file=Data.R \\
init=Init.R \\
output file=trace-$i.csv \\
diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!
" %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)
## running the bash script
system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)
}
for (idx in 1:(length(filenames)-1))
{
## main dir for Stan simulations
standirname = 'stan-sims/'%&%filenames[idx]%&%"-weibull-no_truncation"
unlink(standirname, recursive=T)
dir.create(standirname)
datafilename = "../../data/"%&%filenames[idx]%&%".csv"
read.table(datafilename, sep=",", header=T) %>% select(EL,ER,SL,SR,tstar) %>% mutate(dist = (SR+SL)/2-(ER+EL)/2) -> df
# Dumping data
N = nrow(df)
E_L = df$EL
E_R = df$ER
S_L = df$SL
S_R = df$SR
stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N'), file=standirname%&%"/Data.R")
# Dumping initial conditions
e_raw = rep(.2, N)
s_raw = rep(.8, N)
mean_SI = mean(df$dist)
param1 = 1.75
stan_rdump(c('e_raw', 's_raw', 'mean_SI', 'param1'), file=standirname%&%"/Init.R")
# Stan program
"data {
int<lower = 0> N; // number of records
vector<lower = 0>[N] E_L;
vector<lower = 0>[N] E_R;
vector<lower = 0>[N] S_L;
vector<lower = 0>[N] S_R;
}
parameters {
real<lower = 0> mean_SI;
real<lower = 0> param1;
vector<lower = 0, upper = 1>[N] e_raw;
vector<lower = 0, upper = 1>[N] s_raw;
}
transformed parameters {
real<lower = 0> param2 = mean_SI/tgamma(1.0+1.0/param1);
vector<lower = min(S_L), upper = max(S_R)>[N] s;
vector<lower = min(E_L), upper = max(E_R)>[N] e;
s = S_L + (S_R - S_L) .* s_raw;
for (k in 1:N) {
if (E_R[k] > s[k])
e[k] = E_L[k] + (s[k] - E_L[k]) * e_raw[k];
else
e[k] = E_L[k] + (E_R[k] - E_L[k]) * e_raw[k];
}
}
model {
mean_SI ~ normal(5.0, 10.0);
param1 ~ exponential(0.0001);
e_raw ~ normal(0.5, 1.0);
s_raw ~ normal(0.5, 1.0);
for (k in 1:N)
target += weibull_lpdf(s[k] - e[k] | param1, param2);
}
generated quantities {
real sd_SI = param2*sqrt(tgamma(1.0+2.0/param1)-(tgamma(1.0+1.0/param1))^2);
vector[N] log_likelihood;
for (k in 1:N)
log_likelihood[k] = weibull_lpdf(s[k] - t[k] | param1, param2);
}" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)
standistribdir = "../../../CmdStan"
stanscriptdir = "../Hokkaido_Wuhan_IncubationPeriod_2020/scripts/Andrei-more_accurate/"%&%standirname
## bash file
"#!/bin/bash
cwd=$(pwd)
cd "%&%standistribdir%&%"
make -j6 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..4}
do
echo Running ${i}
SEEDNUMBER=$((1+$i))
./fit \\
method=sample num_samples=25000 num_warmup=100000 save_warmup=0 \\
adapt delta=0.98 \\
algorithm=hmc \\
engine=nuts \\
random seed=${SEEDNUMBER} \\
id=$i \\
data file=Data.R \\
init=Init.R \\
output file=trace-$i.csv \\
diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!
" %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)
## running the bash script
system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)
}
idx = length(filenames)
{
## main dir for Stan simulations
standirname = 'stan-sims/'%&%filenames[idx]%&%"-lognormal-no_truncation"
unlink(standirname, recursive=T)
dir.create(standirname)
datafilename = "../../data/"%&%filenames[idx]%&%".csv"
read.table(datafilename, sep=",", header=T) %>% select(EL,ER,SL,SR,tstar) %>% mutate(dist = (SR+SL)/2-(ER+EL)/2) -> df -> df
df %>% filter(EL>0) -> df1 # E_L is defined
df %>% filter(EL==0) -> df2 # E_L is missing
df = rbind(df1,df2) # we move all incomplete records to the end of the dataframe
# Dumping data
N = nrow(df)
N2 = nrow(df2)
N1 = N - N2
E_L = df$EL
E_R = df$ER
S_L = df$SL
S_R = df$SR
stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N', 'N2'), file=standirname%&%"/Data.R")
# Dumping initial conditions
logmean_SI = log(mean(df$dist))
logsd_SI = log(sd(df$dist))
s_raw = rep(.6, N)
e_raw = rep(.4, N1)
t = rep(5.0, N2)
E_L_est_raw = rep(.5, N2)
stan_rdump(c('t', 'E_L_est_raw', 's_raw', 'e_raw', 'logmean_SI', 'logsd_SI'), file=standirname%&%"/Init.R")
# Stan program
"data {
int<lower = 0> N; // number of records
int<lower = 0> N2; // number of incomplete records
vector<lower = 0>[N] E_L;
vector<lower = 0>[N] E_R;
vector<lower = 0>[N] S_L;
vector<lower = 0>[N] S_R;
}
transformed data {
int<lower = 0> N1 = N - N2;
}
parameters {
real logmean_SI;
real logsd_SI;
vector<lower = 0, upper = 1>[N] s_raw;
vector<lower = 0, upper = 1>[N1] e_raw;
vector<lower = 0>[N2] t;
vector<lower = 0, upper = 1>[N2] E_L_est_raw;
}
transformed parameters {
real<lower = 0> param2 = sqrt(log((exp(2*(logsd_SI-logmean_SI))+1.0)));
real param1 = logmean_SI - param2^2/2.0;
vector<lower = min(S_L), upper = max(S_R)>[N] s;
vector<lower = min(E_L), upper = max(S_R)>[N1] e;
vector<lower = 0, upper = max(E_R)>[N2] E_L_est;
s = S_L + (S_R - S_L) .* s_raw;
for (k in 1:N1) {
if (E_R[k] > s[k])
e[k] = E_L[k] + (s[k] - E_L[k]) * e_raw[k];
else
e[k] = E_L[k] + (E_R[k] - E_L[k]) * e_raw[k];
}
for (m in 1:N2)
E_L_est[m] = E_R[N1 + m] * E_L_est_raw[m];
}
model {
logmean_SI ~ std_normal();
logsd_SI ~ std_normal();
e_raw ~ normal(0.5, 1.0);
s_raw ~ normal(0.5, 1.0);
t ~ normal(5.0, 10.0);
E_L_est_raw ~ normal(0.5, 1.0);
for (k in 1:N1)
target += lognormal_lpdf(s[k] - e[k] | param1, param2);
for (m in 1:N2) {
target += lognormal_lpdf(t[m] | param1, param2) + lognormal_lcdf(s[N1 + m] - E_L_est[m] | param1, param2);
if (s[N1 + m] > E_R[N1 + m])
target += lognormal_lccdf(s[N1 + m] - E_R[N1 + m] | param1, param2);
}
}
generated quantities {
real<lower = 0> mean_SI = exp(param1 + param2^2/2);
real<lower = 0> sd_SI = sqrt((exp(param2^2)-1)*exp(2*param1+param2^2));
vector[N] log_likelihood;
for (k in 1:N1)
log_likelihood[k] = lognormal_lpdf(s[k] - e[k] | param1, param2);
for (m in 1:N2) {
log_likelihood[N1 + m] = lognormal_lpdf(t[m] | param1, param2) + lognormal_lcdf(s[N1 + m] - E_L_est[m] | param1, param2);
if (s[N1 + m] > E_R[N1 + m])
log_likelihood[N1 + m] += lognormal_lccdf(s[N1 + m] - E_R[N1 + m] | param1, param2);
}
}" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)
standistribdir = "../../../CmdStan"
stanscriptdir = "../Hokkaido_Wuhan_IncubationPeriod_2020/scripts/Andrei-more_accurate/"%&%standirname
## bash file
"#!/bin/bash
cwd=$(pwd)
cd "%&%standistribdir%&%"
make -j6 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..4}
do
echo Running ${i}
SEEDNUMBER=$((1+$i))
./fit \\
method=sample num_samples=10000 num_warmup=10000 save_warmup=0 \\
adapt delta=0.98 \\
algorithm=hmc \\
engine=nuts \\
random seed=${SEEDNUMBER} \\
id=$i \\
data file=Data.R \\
init=Init.R \\
output file=trace-$i.csv \\
diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!
" %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)
## running the bash script
system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)
}
idx = length(filenames)
{
## main dir for Stan simulations
standirname = 'stan-sims/'%&%filenames[idx]%&%"-gamma-no_truncation"
unlink(standirname, recursive=T)
dir.create(standirname)
datafilename = "../../data/"%&%filenames[idx]%&%".csv"
read.table(datafilename, sep=",", header=T) %>% select(EL,ER,SL,SR,tstar) %>% mutate(dist = (SR+SL)/2-(ER+EL)/2) -> df -> df
df %>% filter(EL>0) -> df1 # E_L is defined
df %>% filter(EL==0) -> df2 # E_L is missing
df = rbind(df1,df2) # we move all incomplete records to the end of the dataframe
# Dumping data
N = nrow(df)
N2 = nrow(df2)
N1 = N - N2
E_L = df$EL
E_R = df$ER
S_L = df$SL
S_R = df$SR
stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N', 'N2'), file=standirname%&%"/Data.R")
# Dumping initial conditions
mean_SI = mean(df$dist)
sd_SI = sd(df$dist)
s_raw = rep(.6, N)
e_raw = rep(.4, N1)
t = rep(5.0, N2)
E_L_est_raw = rep(.5, N2)
stan_rdump(c('t', 'E_L_est_raw', 's_raw', 'e_raw', 'mean_SI', 'sd_SI'), file=standirname%&%"/Init.R")
# Stan program
"data {
int<lower = 0> N; // number of records
int<lower = 0> N2; // number of incomplete records
vector<lower = 0>[N] E_L;
vector<lower = 0>[N] E_R;
vector<lower = 0>[N] S_L;
vector<lower = 0>[N] S_R;
}
transformed data {
int<lower = 0> N1 = N - N2;
}
parameters {
real<lower=0> mean_SI;
real<lower=0> sd_SI;
vector<lower = 0, upper = 1>[N] s_raw;
vector<lower = 0, upper = 1>[N1] e_raw;
vector<lower = 0>[N2] t;
vector<lower = 0, upper = 1>[N2] E_L_est_raw;
}
transformed parameters {
real<lower = 0> param1 = (mean_SI/sd_SI)^2;
real<lower = 0> param2 = mean_SI/(sd_SI^2);
vector<lower = min(S_L), upper = max(S_R)>[N] s;
vector<lower = min(E_L), upper = max(S_R)>[N1] e;
vector<lower = 0, upper = max(E_R)>[N2] E_L_est;
s = S_L + (S_R - S_L) .* s_raw;
for (k in 1:N1) {
if (E_R[k] > s[k])
e[k] = E_L[k] + (s[k] - E_L[k]) * e_raw[k];
else
e[k] = E_L[k] + (E_R[k] - E_L[k]) * e_raw[k];
}
for (m in 1:N2)
E_L_est[m] = E_R[N1 + m] * E_L_est_raw[m];
}
model {
mean_SI ~ normal(5.0, 10.0);
sd_SI ~ cauchy(0, 5.0);
e_raw ~ normal(0.5, 1.0);
s_raw ~ normal(0.5, 1.0);
t ~ normal(5.0, 10.0);
E_L_est_raw ~ normal(0.5, 1.0);
for (k in 1:N1)
target += gamma_lpdf(s[k] - e[k] | param1, param2);
for (m in 1:N2) {
target += gamma_lpdf(t[m] | param1, param2) + gamma_lcdf(s[N1 + m] - E_L_est[m] | param1, param2);
if (s[N1 + m] > E_R[N1 + m])
target += gamma_lccdf(s[N1 + m] - E_R[N1 + m] | param1, param2);
}
}
generated quantities {
vector[N] log_likelihood;
for (k in 1:N1)
log_likelihood[k] = gamma_lpdf(s[k] - e[k] | param1, param2);
for (m in 1:N2) {
log_likelihood[N1 + m] = gamma_lpdf(t[m] | param1, param2) + gamma_lcdf(s[N1 + m] - E_L_est[m] | param1, param2);
if (s[N1 + m] > E_R[N1 + m])
log_likelihood[N1 + m] += gamma_lccdf(s[N1 + m] - E_R[N1 + m] | param1, param2);
}
}" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)
standistribdir = "../../../CmdStan"
stanscriptdir = "../Hokkaido_Wuhan_IncubationPeriod_2020/scripts/Andrei-more_accurate/"%&%standirname
## bash file
"#!/bin/bash
cwd=$(pwd)
cd "%&%standistribdir%&%"
make -j6 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..4}
do
echo Running ${i}
SEEDNUMBER=$((1+$i))
./fit \\
method=sample num_samples=10000 num_warmup=10000 save_warmup=0 \\
adapt delta=0.98 \\
algorithm=hmc \\
engine=nuts \\
random seed=${SEEDNUMBER} \\
id=$i \\
data file=Data.R \\
init=Init.R \\
output file=trace-$i.csv \\
diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!
" %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)
## running the bash script
system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)
}
idx = length(filenames)
{
## main dir for Stan simulations
standirname = 'stan-sims/'%&%filenames[idx]%&%"-weibull-no_truncation"
unlink(standirname, recursive=T)
dir.create(standirname)
datafilename = "../../data/"%&%filenames[idx]%&%".csv"
read.table(datafilename, sep=",", header=T) %>% select(EL,ER,SL,SR,tstar) %>% mutate(dist = (SR+SL)/2-(ER+EL)/2) -> df -> df
df %>% filter(EL>0) -> df1 # E_L is defined
df %>% filter(EL==0) -> df2 # E_L is missing
df = rbind(df1,df2) # we move all incomplete records to the end of the dataframe
# Dumping data
N = nrow(df)
N2 = nrow(df2)
N1 = N - N2
E_L = df$EL
E_R = df$ER
S_L = df$SL
S_R = df$SR
stan_rdump(c('E_L', 'E_R', 'S_L', 'S_R', 'N', 'N2'), file=standirname%&%"/Data.R")
# Dumping initial conditions
logmean_SI = log(mean(df$dist))
param1 = 1.75
s_raw = rep(.6, N)
e_raw = rep(.4, N1)
t = rep(5.0, N2)
E_L_est_raw = rep(.5, N2)
stan_rdump(c('t', 'E_L_est_raw', 's_raw', 'e_raw', 'logmean_SI', 'param1'), file=standirname%&%"/Init.R")
# Stan program
"data {
int<lower = 0> N; // number of records
int<lower = 0> N2; // number of incomplete records
vector<lower = 0>[N] E_L;
vector<lower = 0>[N] E_R;
vector<lower = 0>[N] S_L;
vector<lower = 0>[N] S_R;
}
transformed data {
int<lower = 0> N1 = N - N2;
}
parameters {
real<lower = 0> mean_SI;
real<lower = 0> param1;
vector<lower = 0, upper = 1>[N] s_raw;
vector<lower = 0, upper = 1>[N1] e_raw;
vector<lower = 0>[N2] t;
vector<lower = 0, upper = 1>[N2] E_L_est_raw;
}
transformed parameters {
real<lower = 0> param2 = mean_SI/tgamma(1.0+1.0/param1);
vector<lower = min(S_L), upper = max(S_R)>[N] s;
vector<lower = min(E_L), upper = max(S_R)>[N1] e;
vector<lower = 0, upper = max(E_R)>[N2] E_L_est;
s = S_L + (S_R - S_L) .* s_raw;
for (k in 1:N1) {
if (E_R[k] > s[k])
e[k] = E_L[k] + (s[k] - E_L[k]) * e_raw[k];
else
e[k] = E_L[k] + (E_R[k] - E_L[k]) * e_raw[k];
}
for (m in 1:N2)
E_L_est[m] = E_R[N1 + m] * E_L_est_raw[m];
}
model {
mean_SI ~ normal(5.0, 10.0);
param1 ~ exponential(0.0001);
e_raw ~ normal(0.5, 1.0);
s_raw ~ normal(0.5, 1.0);
t ~ normal(5.0, 10.0);
E_L_est_raw ~ normal(0.5, 1.0);
for (k in 1:N1)
target += weibull_lpdf(s[k] - e[k] | param1, param2);
for (m in 1:N2) {
target += weibull_lpdf(t[m] | param1, param2) + weibull_lcdf(s[N1 + m] - E_L_est[m] | param1, param2);
if (s[N1 + m] > E_R[N1 + m])
target += weibull_lccdf(s[N1 + m] - E_R[N1 + m] | param1, param2);
}
}
generated quantities {
real sd_SI = param2*sqrt(tgamma(1.0+2.0/param1)-(tgamma(1.0+1.0/param1))^2);
vector[N] log_likelihood;
for (k in 1:N1)
log_likelihood[k] = weibull_lpdf(s[k] - e[k] | param1, param2);
for (m in 1:N2) {
log_likelihood[N1 + m] = weibull_lpdf(t[m] | param1, param2) + weibull_lcdf(s[N1 + m] - E_L_est[m] | param1, param2);
if (s[N1 + m] > E_R[N1 + m])
log_likelihood[N1 + m] += weibull_lccdf(s[N1 + m] - E_R[N1 + m] | param1, param2);
}
}" %>% cat(file=standirname %&% "/fit.stan", sep="", fill=TRUE)
standistribdir = "../../../../CmdStan"
stanscriptdir = "../Dropbox/Hokkaido_Wuhan_IncubationPeriod_2020/scripts/Andrei-more_accurate/"%&%standirname
## bash file
"#!/bin/bash
cwd=$(pwd)
cd "%&%standistribdir%&%"
make -j6 "%&%stanscriptdir%&%"/fit
cd "%&%stanscriptdir%&%"
mkdir -p diagnostics
for i in {1..4}
do
echo Running ${i}
SEEDNUMBER=$((1+$i))
./fit \\
method=sample num_samples=10000 num_warmup=10000 save_warmup=0 \\
adapt delta=0.98 \\
algorithm=hmc \\
engine=nuts \\
random seed=${SEEDNUMBER} \\
id=$i \\
data file=Data.R \\
init=Init.R \\
output file=trace-$i.csv \\
diagnostic_file=diagnostics/diagnostics-$i.csv > diagnostics/output-$i.txt &
done
echo Finished sampling haha!
" %>% cat(file=standirname%&%"/fit.sh", sep="", fill=TRUE)
## running the bash script
system("bash "%&%standirname%&%"/fit.sh", intern = TRUE)
}