-
Notifications
You must be signed in to change notification settings - Fork 0
/
mcmcreg.R
260 lines (188 loc) · 10.1 KB
/
mcmcreg.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
#######################################
## author: Rob Williams ##
## contact: [email protected] ##
## project: misc R functions ##
## created: November 4, 2017 ##
## updated: November 7, 2017 ##
#######################################
## this function is a wrapper to texreg() which produces publication quality
## regression table with interval measures of uncertainty for MCMC output. the
## user can specify the confidence level of the interval, as well as credible or
## highest posterior density intervals. many of the arguments are standard texreg
## arguments that are passed to texreg() at the end of the function to create
## the table.
## inspiration drawn from Johannes Karreth's mcmctab function
## depends on texreg and whichever MCMC package the model object(s) require
## arguments:
## mod: a single MCMC model object, or a list of model objects of the same class.
## pars: a scalar or vector of the parameters you wish to include in the table.
## stanfit objects can use either the individual parameter names, or the
## names of the indexed parameter to retrieve the entire parameter e.g.
## pars = 'beta' will return beta[1], beta[2], and beta[3] for a stanfit
## model with a three element beta parameter. parameter arguments for all
## other model object types must contain the entire set of parameters you
## wish to include in the table. when combining models with different
## parameters in one table, this argument also accepts a list the length
## of the number of models. this is necessary when combining JAGS models
## with different parameters because coda and JAGS related packages do not
## support parameter family extraction like Stan.
## point_est: a character indicating whether to use the mean or median for point
## estimates in the table.
## ci: a scalar indicating the confidence level of the uncertainty intervals.
## hpdi: a logical indicating whether to use highest posterior density intervals.
## ci_test: value to compare for the interval test. set to NULL for no stars.
## model_names: an optional vector of models names.
## custom_coef: an optional vector or list of vectors containing parameter names
## for each model. if there are multiple models, the list must have
## the same number of elements as there are models, and the vector
## of names in each list element must match the number of parameters.
## if not supplied, the function will use the parameter names in the
## model object(s).
## gof: a named list of goodness of fit statistics, or a list of such lists
## caption: an optional caption for the table.
## label: an optional label for the table.
## reorder_coef: an optional vector reordering the coefficients in the final
## texreg table
## sideways: a logical indicating whether to rotate the table; this function sets
## use.packages = F in texreg(), so you will have to add the rotating
## package to your preamble.
## filename: an optional character giving the name of a file to save the table to
mcmcreg <- function(mod, pars, point_est = 'mean', ci = .95, hpdi = F, ci_test = 0,
model_names = NULL, custom_coef = NULL, gof = numeric(0),
gof_names = character(0), caption, label = NULL, groups = NULL,
reorder_coef = NULL, coef_map = NULL, sideways = F, float_pos = '',
filename, format = 'latex', scalebox = NULL) {
## if only one model object, coerce to a list
if (class(mod) != 'list') mod <- list(mod)
## if only one parameter vector, coerce to a list
if (class(pars) != 'list') pars <- list(pars)
## if only one gof statistic scalar or vector, coerce to a list
if (class(gof) != 'list') gof <- list(rep(gof, times = length(mod)))
## if only one gof statistic name scalar or vector, coerce to a list
if (class(gof_names) != 'list') gof_names <- list(gof_names)
## if no caption, assign default
if (missing(caption) & length(mod) > 1) caption <- 'Statistical Models'
if (missing(caption) & length(mod) == 1) caption <- 'Statistical Model'
## extract samples and variable names from stanfit object
if (lapply(mod, class)[[1]] == 'stanfit') {
## extract coefficient names from list of model ojects
coef_names <- mapply(function(x, y) rownames(rstan::summary(x, pars = y)$summary),
mod, pars, SIMPLIFY = F)
## extract posterior samples from list of model objects
samps <- mapply(function(x, y) as.data.frame(rstan::extract(x, pars = y)),
mod, pars, SIMPLIFY = F)
}
## extract samples and variable names from runjags object
if (lapply(mod, class)[[1]] == 'runjags') {
## extract posterior samples from list of model objects
samps <- mapply(function(x, y) runjags:::as.mcmc.list.runjags(x, vars = y),
mod, pars, SIMPLIFY = F)
## average over chains and convert to dataframe
samps <- lapply(samps, function(x) as.data.frame(Reduce("+", x) / length(x)))
## extract coefficient names from dataframe
coef_names <- lapply(samps, colnames)
}
## extract samples and variable names from mcmc.list object
if (lapply(mod, class)[[1]] == 'mcmc.list') {
## extract posterior samples from list of model objects
samps <- lapply(mod, function(x) as.data.frame(Reduce("+", x) / length(x)))
## drop columns not in pars
samps <- mapply(function(x, y) x[, colnames(x) %in% y], samps, pars,
SIMPLIFY = F)
## extract coefficient names from dataframe
coef_names <- lapply(samps, colnames)
}
## extract samples and variable names from mcmc object
if (lapply(mod, class)[[1]] == 'mcmc') {
## extract posterior samples from list of model objects
samps <- mapply(function(x) coda:::as.data.frame.mcmc(x, vars = y),
mod, pars, SIMPLIFY = F)
## extract coefficient names from dataframe
coef_names <- lapply(samps, colnames)
}
## calculate point estimate of posterior density
if (point_est == 'mean') {
samps_pe <- lapply(samps, function(x) apply(x, 2, mean))
} else {
samps_pe <- lapply(samps, function(x) apply(x, 2, median))
}
## calculate uncertainty interval for ci argument
if (hpdi == F) {
samps_ci <- lapply(samps, function(x) apply(x, 2, quantile,
probs = c(.5 - ci/2, .5 + ci/2)))
} else {
samps_ci <- lapply(samps, function(x) t(coda::HPDinterval(coda::as.mcmc(x),
prob = ci)))
}
## if coefficent names supplied, replace names from model object(s)
if (!is.null(custom_coef) & !is.list(custom_coef)) coef_names <- list(custom_coef)
if (!is.null(custom_coef)) coef_names <- custom_coef
##
if (length(mod) != length(coef_names)) {
stop('number of models does not match number of custom coefficient vectors')
}
## create list of texreg object(s) with point estimates and interval
tr_list <- mapply(function(v, w, x, y, z) texreg::createTexreg(coef.names = v,
coef = w,
ci.low = x[1, ],
ci.up = x[2, ],
gof = y,
gof.names = z),
coef_names, samps_pe, samps_ci, gof, gof_names)
## create LaTeX output
if (format == 'latex') {
## create LaTeX code
tr <- texreg::texreg(l = tr_list, custom.model.names = model_names,
caption = caption, label = label, ci.test = ci_test,
sideways = sideways, reorder.coef = reorder_coef,
custom.coef.map = coef_map, groups = groups,
scalebox = scalebox, float.pos = float_pos,
use.packages = F)
## replace confidence w/ credible or highest posterior density in texreg output
if (hpdi == F) {
tr <- sub('outside the confidence interval',
paste('outside ', ci * 100 ,'\\\\% credible interval', sep = ''),
tr)
} else {
tr <- sub('outside the confidence interval',
paste('outside ', ci * 100 ,'\\\\% highest posterior density interval',
sep = ''), tr)
}
## return LaTeX code to console or write to file
if (missing(filename)) {
return(tr)
} else {
## remove newline at start of LaTeX code
tr <- sub('^\\n', '', tr)
tex_file <- file(paste(filename, 'tex', sep = '.'))
writeLines(tr, tex_file, sep = '')
close(tex_file)
}
}
## create HTML output
if (format == 'html') {
hr <- texreg::htmlreg(l = tr_list, custom.model.names = model_names,
caption = caption, ci.test = ci_test,
reorder.coef = reorder_coef, custom.coef.map = coef_map,
groups = groups)
## replace confidence w/ credible or highest posterior density in texreg output
if (hpdi == F) {
hr <- sub('outside the confidence interval',
paste('outside ', ci * 100, '% credible interval', sep = ''),
hr)
} else {
tr <- sub('outside the confidence interval',
paste('outside ', ci * 100, '% highest posterior density interval',
sep = ''), hr)
}
## return html code to console or write to file
if (missing(filename)) {
return(hr)
} else {
hmtl_file <- file(paste(filename, 'html', sep = '.'))
writeLines(hr, html_file, sep = '')
close(html_file)
}
}
}
## need to figure out how to get log likelihood matrix from various objects for WAIC