Resources

About generalized linear models, including the poisson and negative binomial:

https://jrnold.github.io/bayesian_notes/generalized-linear-models.html

http://mc-stan.org/rstanarm/articles/count.html

About model comparison:

http://mc-stan.org/loo/articles/loo2-example.html

About the dispersion parameter for the negative binomial distribution:

https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations

http://andrewgelman.com/2018/04/03/justify-my-love/

Setup

Data

data(roaches)
roaches <- as_tibble(roaches)
covariates <- 
  roaches %>%
  mutate(roach1 = scale(roach1)) %>%
  dplyr::select(roach1, treatment, senior) 

d <- list(
  y = roaches$y,
  x = as.matrix(covariates),
  offset = roaches$exposure2
)

d$N <- length(d$y)
d$K <- ncol(d$x)

Models

Poisson

poisson_fit <- stan("stan/poisson.stan", data = d)
print(poisson_fit, pars = c("a", "b", "lp__"))
## Inference for Stan model: poisson.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##          mean se_mean   sd     2.5%      25%      50%      75%    97.5%
## a        2.41    0.00 0.02     2.37     2.40     2.41     2.42     2.45
## b[1]     0.51    0.00 0.01     0.50     0.51     0.51     0.52     0.53
## b[2]    -0.63    0.00 0.02    -0.68    -0.64    -0.63    -0.61    -0.58
## b[3]    -0.70    0.00 0.03    -0.76    -0.72    -0.70    -0.68    -0.64
## lp__ 17093.26    0.03 1.40 17089.62 17092.59 17093.57 17094.28 17094.96
##      n_eff Rhat
## a     2723    1
## b[1]  3338    1
## b[2]  3060    1
## b[3]  3072    1
## lp__  1705    1
## 
## Samples were drawn using NUTS(diag_e) at Sat May  5 14:38:10 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Negative binomial

neg_binomial_fit <- stan("stan/neg_binomial.stan", data = d)
print(neg_binomial_fit, pars = c("a", "b", "phi"))
## Inference for Stan model: neg_binomial.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##       mean se_mean   sd  2.5%   25%   50%   75% 97.5% n_eff Rhat
## a     2.39       0 0.21  2.00  2.26  2.39  2.53  2.81  2512    1
## b[1]  0.99       0 0.19  0.65  0.86  0.99  1.11  1.40  3268    1
## b[2] -0.78       0 0.24 -1.25 -0.95 -0.78 -0.62 -0.30  2710    1
## b[3] -0.32       0 0.26 -0.82 -0.49 -0.32 -0.15  0.21  3396    1
## phi   0.27       0 0.03  0.22  0.25  0.27  0.28  0.32  4000    1
## 
## Samples were drawn using NUTS(diag_e) at Sat May  5 14:38:19 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

LOO-CV

loo_poisson <- loo(extract_log_lik(poisson_fit))
## Warning: Relative effective sample sizes ('r_eff' argument) not specified.
## For models fit with MCMC, the reported PSIS effective sample sizes and 
## MCSE estimates will be over-optimistic.
## Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
## Warning in log(z): NaNs produced

## Warning in log(z): NaNs produced

## Warning in log(z): NaNs produced

## Warning in log(z): NaNs produced

## Warning in log(z): NaNs produced
loo_poisson
## 
## Computed from 4000 by 262 log-likelihood matrix
## 
##          Estimate     SE
## elpd_loo  -6797.0  886.6
## p_loo       367.3  104.7
## looic     13594.0 1773.2
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     238   90.8%   254       
##  (0.5, 0.7]   (ok)         8    3.1%   102       
##    (0.7, 1]   (bad)        9    3.4%   12        
##    (1, Inf)   (very bad)   7    2.7%   1         
## See help('pareto-k-diagnostic') for details.
loo_neg_binomial <- loo(extract_log_lik(neg_binomial_fit))
## Warning: Relative effective sample sizes ('r_eff' argument) not specified.
## For models fit with MCMC, the reported PSIS effective sample sizes and 
## MCSE estimates will be over-optimistic.
## Warning: Some Pareto k diagnostic values are slightly high. See help('pareto-k-diagnostic') for details.
loo_neg_binomial
## 
## Computed from 4000 by 262 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo   -896.8 37.7
## p_loo         6.4  2.4
## looic      1793.7 75.5
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     261   99.6%   601       
##  (0.5, 0.7]   (ok)         1    0.4%   164       
##    (0.7, 1]   (bad)        0    0.0%   <NA>      
##    (1, Inf)   (very bad)   0    0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
compare(loo_poisson, loo_neg_binomial)
## elpd_diff        se 
##    5900.2     868.8

WAIC

compare(
  waic(extract_log_lik(poisson_fit)),
  waic(extract_log_lik(neg_binomial_fit))
)
## Warning: 49 (18.7%) p_waic estimates greater than 0.4. We recommend trying
## loo instead.
## Warning: 2 (0.8%) p_waic estimates greater than 0.4. We recommend trying
## loo instead.
## elpd_diff        se 
##    6020.2     915.8

PPC

See this vignette for extracting information from a stanfit object:

http://mc-stan.org/rstan/articles/stanfit_objects.html

yrep_pois <- rstan::extract(poisson_fit, pars = "y_rep")$y_rep
yrep_pois_alt <- as.matrix(poisson_fit, pars = "y_rep")
# extract() permutes the order of the draws, 
# so these two matrices aren't in the same order

ppc_dens_overlay(y = d$y, yrep = yrep_pois[1:50, ]) + xlim(0, 100)
## Warning: Removed 345 rows containing non-finite values (stat_density).
## Warning: Removed 23 rows containing non-finite values (stat_density).

# changing xlim to ignore the long tail
yrep_nb <- rstan::extract(neg_binomial_fit, pars = "y_rep")$y_rep
yrep_pois_alt <- as.matrix(neg_binomial_fit, pars = "y_rep")
# extract() permutes the order of the draws, 
# so these two matrices aren't in the same order

ppc_dens_overlay(y = d$y, yrep = yrep_nb[1:50, ]) + xlim(0, 100)
## Warning: Removed 903 rows containing non-finite values (stat_density).
## Warning: Removed 23 rows containing non-finite values (stat_density).

# changing xlim to ignore the VERY long tail