From b457d9ac6a33de6739cd65a25b30fc1104e80236 Mon Sep 17 00:00:00 2001 From: Alex Knudson Date: Mon, 9 Nov 2020 23:53:45 -0800 Subject: [PATCH] bad enough to turn into something good First draft done --- 050-predictive-inference.Rmd | 252 ++++++++++++++---- docs/050-predictive-inference.md | 171 ++++++++---- .../figure-html/ch050-Cold-Fish-1.png | Bin 0 -> 63309 bytes .../figure-html/ch050-Moving-Moose-1.png | Bin 0 -> 13979 bytes .../figure-html/ch050-Olive-Screwdriver-1.png | Bin 0 -> 91902 bytes .../ch050-Strawberry-Swallow-1.png | Bin 0 -> 22594 bytes docs/conclusion.html | 6 +- docs/discussion.html | 74 ++--- docs/index.html | 6 +- docs/model-checking.html | 8 +- docs/motivating-data.html | 6 +- docs/predictive-inferences.html | 154 +++++++---- docs/reference-keys.txt | 7 + docs/references.html | 9 +- docs/results.html | 6 +- docs/search_index.json | 2 +- docs/supplementary-code.html | 6 +- docs/workflow.html | 6 +- index.Rmd | 3 +- packages.bib | 30 +++ 20 files changed, 550 insertions(+), 196 deletions(-) create mode 100644 docs/050-predictive-inference_files/figure-html/ch050-Cold-Fish-1.png create mode 100644 docs/050-predictive-inference_files/figure-html/ch050-Moving-Moose-1.png create mode 100644 docs/050-predictive-inference_files/figure-html/ch050-Olive-Screwdriver-1.png create mode 100644 docs/050-predictive-inference_files/figure-html/ch050-Strawberry-Swallow-1.png diff --git a/050-predictive-inference.Rmd b/050-predictive-inference.Rmd index 4880246..416f1a4 100644 --- a/050-predictive-inference.Rmd +++ b/050-predictive-inference.Rmd @@ -1,4 +1,6 @@ ```{r ch050-setup, include=FALSE} +library(tidyverse) +library(rethinking) library(loo) l031_av <- loo(readRDS("models/m031.rds")) @@ -10,74 +12,230 @@ l034_vis <- loo(readRDS("models/m034vis.rds")) l034s_vis <- loo(readRDS("models/m034s_vis.rds")) ``` - # Predictive Inference {#predictive-inferences} -_All models are wrong but some are useful_ +_All models are wrong, but some are useful_ -The above quote is from George Box, and it is a popular quote that statisticians like to throw around^[I am one of them]. All models are wrong because it is nearly impossible to account for the minutiae of every process that contributes to an observed phenomenon, and often trying to results in poorer performing models. +The above quote is from George Box, and it is a popular quote that statisticians like to throw around^[I am one of them]. All models are wrong because it is nearly impossible to account for the minutiae of every process that contributes to an observed phenomenon, and often trying to results in poorer performing models. Also is it ever truly possible to _prove_ that a model is correct? At best our scientific method can falsify certain hypotheses, but it cannot ever tell us if a model is universally correct. That doesn't matter. What does matter is if the model is useful and can make accurate predictions. -*why is predictive performance the right model selection/comparison criteria* +Why is predictive performance so important? Consider five points of data (figure \@ref(fig:ch050-Moving-Moose)). I have simulated values from some polynomial equation of degree less than five, but with no more information other than that, how can the best polynomial model be selected? -- idea of "geocentric" models: wrong models that still predict well -- notions overfitting/underfitting: -- more parameters leads to better in-sample fit -- a prefect fit to data is always possible -- but predicts poorly (overfit) -- underfitting fails to capture the *regular* features of the data (why regularizing priors are important) +```{r ch050-Moving-Moose, fig.cap="Five points from a polynomial model."} +set.seed(11) +x0 <- 0:5 +y0 <- -0.5*(x0 - 2)^2 + 2 + rnorm(length(x0), 0, 1) +x <- head(x0, 5) +y <- head(y0, 5) +p <- ggplot(data.frame(x=x, y=y), aes(x, y)) + + geom_point(size = 4) +p +``` +One thing to try is fit a handful of linear models, check the parameter's p-values, the $R^2$ statistic, and perform other goodness of fit tests, but there is a problem. As you increase the degree of the polynomial fit, the $R^2$ statistic will always increase. In fact with five data points, a fourth degree polynomial will fit the data perfectly (figure \@ref(fig:ch050-Olive-Screwdriver)). + + +```{r ch050-Olive-Screwdriver, fig.cap="Data points with various polynomial regression lines."} +p2 <- p + + geom_smooth(formula = y ~ 1 + x, + method = "lm", se = FALSE, + aes(color = "Linear")) + + geom_smooth(formula = y ~ 1 + x + I(x^2), + method = "lm", se = FALSE, + aes(color = "Quadratic")) + + geom_smooth(formula = y ~ 1 + x + I(x^2) + I(x^3), + method = "lm", se = FALSE, + aes(color = "Cubic")) + + geom_smooth(formula = y ~ 1 + x + I(x^2) + I(x^3) + I(x^4), + method = "lm", se = FALSE, + aes(color = "Quartic")) + + geom_point(size = 4) + + scale_color_manual(values = c("#20639B", + "#3CAEA3", + "#F6D55C", + "#ED553B"), + limits = c("Linear", + "Quadratic", + "Cubic", + "Quartic"), + labels = c("Linear", + "Quadratic", + "Cubic", + "Quartic"), + name = "Degree") +p2 +``` -I think you covered this already in Ch. 1 and 2 but here is more thoughts: -The PI's predictive philosophy has evolved to prefer this reference model approach. -Early on statisticians are usually taught to prefer *parsimony* or simple models. -The idea is that this guards against *overfitting* and also boosts power to detect *statistically significant* effects. +If I were to add a $6^{th}$ point - a new observation - which of the models would you expect to do best? Can it be estimated which model will predict best before testing with new data? One guess is that the quadratic or cubic model will do well because because the linear model is potentially _underfit_ to the data and the quartic is _overfit_ to the data. Figure \@ref(fig:ch050-Cold-Fish) shows the new data point from the polynomial model. Now the linear and cubic models are trending in the wrong direction. The quadratic and quartic models are both trending down, so may be the correct form for the model. -Also computation limitations made small models preferable. -But in modern statistical learning, we tend to include all relevant data with elaborate probabilitistc structures. -The idea is to include all the data with the aim of squeezing all predictive ability from the data points. +```{r ch050-Cold-Fish, fig.cap="The fitted polynomial models with a new observation."} +p2 + + geom_point(data = data.frame(x = tail(x0, 1), y = tail(y0, 1)), + shape = 17, size = 4) +``` -- not sure where this goes, but make sure you say that 1 model is not sufficient, we need a collection (or series/sequence) of models. that is why we need to fit models fast in `stan`/HMC +Figure \@ref(fig:ch050-Strawberry-Swallow) shows the 80% and 95% prediction intervals for a new observation given $x = 5$ as well as the true outcome as a dashed line at $y = `r round(tail(y0, 1), 3)`$. The linear model has the smallest prediction interval (PI), but completely misses the target. The remaining three models all include the observed value in their 95% PIs, but the quadratic has the smallest PI of the three. The actual data generating polynomial is + +$$ +y \sim \mathcal{N}(\mu, 1^2) \\ +\mu = -0.5(x - 2)^2 + 2 +$$ + + +```{r ch050-Maximum Panther, include=FALSE} +d <- data.frame(y = y, + x = x, + x2 = x^2, + x3 = x^3, + x4 = x^4) + +m1 <- xfun::cache_rds({ + ulam(alist( + y ~ dnorm(mu, sigma), + mu <- a + b1*x, + c(a, b1) ~ dnorm(0, 2), + sigma ~ dexp(1) + ), data = d, log_lik=TRUE) +}) +m2 <- xfun::cache_rds({ + ulam(alist( + y ~ dnorm(mu, sigma), + mu <- a + b1*x + b2*x2, + c(a, b1, b2) ~ dnorm(0, 2), + sigma ~ dexp(1) + ), data = d, log_lik=TRUE) +}) +m3 <- xfun::cache_rds({ + ulam(alist( + y ~ dnorm(mu, sigma), + mu <- a + b1*x + b2*x2 + b3*x3, + c(a, b1, b2, b3) ~ dnorm(0, 2), + sigma ~ dexp(1) + ), data = d, log_lik=TRUE) +}) +m4 <- xfun::cache_rds({ + ulam(alist( + y ~ dnorm(mu, sigma), + mu <- a + b1*x + b2*x2 + b3*x3 + b4*x4, + c(a, b1, b2, b3, b4) ~ dnorm(0, 2), + sigma ~ dexp(1) + ), data = d, log_lik=TRUE) +}) +``` -transitional sentence: given that we want to compare models (and possibly select), how to quantifying +```{r ch050-Strawberry-Swallow, fig.cap="95% Prediction intervals for the four polynomial models, as well as the true value (dashed line)."} +new_y <- tail(y0, 1) +new_x <- tail(x0, 1) +new_dat <- data.frame(x = new_x, + x2 = new_x^2, + x3 = new_x^3, + x4 = new_x^4) +bind_rows( + round(PI(link(m1, data = new_dat), prob = c(0.8, 0.95)), 3), + round(PI(link(m2, data = new_dat), prob = c(0.8, 0.95)), 3), + round(PI(link(m3, data = new_dat), prob = c(0.8, 0.95)), 3), + round(PI(link(m4, data = new_dat), prob = c(0.8, 0.95)), 3) +) %>% + add_column(model = c("Linear", "Quadratic", "Cubic", "Quartic"), + .before = 1) %>% + mutate(model = factor(model, + levels = c("Linear", "Quadratic", "Cubic", "Quartic"))) %>% + ggplot(aes(x = model, xend = model, y = `3%`, yend = `98%`)) + + geom_segment(size = 1, color = "gray30") + + geom_segment(aes(x = model, xend = model, y = `10%`, yend = `90%`), + size = 2, inherit.aes = FALSE) + + geom_hline(yintercept = new_y, linetype = "dashed") + + labs(y = "Prediction Interval", x = "Model", + title = "Predicting a new Observation", + subtitle = "x = 5, 80% and 95% Prediction Intervals") +``` -*Quantifying predictive performance* +This is just a toy example, and real-world real-data models are often more complex, but they do present the same headaches when it comes to model/feature selection and goodness of fit checks. Clearly the quartic model has the best fit to the data, but it is too variable and doesn't capture the regular features of the data, so it does poorly for the out-of-sample prediction. The linear model suffers as well by being less biased and too inflexible to capture the structure of the data. The quadratic and cubic are in the middle of the road, but the quadratic does well and makes fewer assumptions about the data. In other words, the quadratic model is just complex enough to predict well while making fewer assumptions. _Information criteria_ is a way of weighing the prediction quality of a model against its complexity, and is arguably a better system for model selection/comparison than other goodness of fit statistics such as $R^2$ or p-values. -- log posterior predictive (more below) and information theory (if you want to talk about that at all) -- cross-validation, loo, WAIC -- and estimates of loo. loo psis -- @vehtari2017practical +## Model Comparison via Predictive Performance -*some notes from my grant posterior*. rewrite this for your glm based model. -Given a model $M$ with posterior predictive distribution $p( \tilde{T} | \tilde{x}, D$ for a new survival time $\tilde{T}$ with observed data $D$ with feature vector $\tilde{x}$. -We evaluate predictive performance using the **logarithm of the predictive density (LPD)** evaluated pointwise at the actual observation $( \tilde{t}, \tilde{x}, M)$ [@Peltola2014; @Piironen2017b]. -LPD is a proper scoring rule and measures both the **calibration** and **sharpness** of the predictive distribution [@Gneiting2007]. -With omit technical definitions of these concepts, but loosely calibration means the statistical consistency between the predictive distribution and the observations (errors on the order). -Sharpness, on the other hand, refers to how concentrated the predictive posterior (how precisely forecasted). -Typically we don't have the analytic form of the predictive posterior, so instead we use $J$ MCMC draws to approximate the LPD [@Peltola2014]: +We don't always have the observed data to compare predictions against (nor the data generating model). Some techniques to compensate for this limitation include cross validation, where the data is split into _training_ data and _testing_ data. The model is fit to the training data, and then predictions are made with the testing data and compared to the observed values. This can often give a good estimate for out-of-sample prediction error. Cross validation can be extended into k-fold cross validation. The idea is to _fold_ the data into $k$ disjoint partitions, and predict partition $i$ using the rest of the data to train on. The prediction error of the $k$-folds can then be averaged over to get an estimate for out-of-sample prediction error. -\begin{equation} - LPD(M) \approx \frac{1}{J} \Sigma_{j=1}^{J} log p( \tilde{t} | \tilde{x}, D, \theta^{(j)} ), -\end{equation} +Taking $k$-fold CV to the limit by letting $k = \# observations$ results in something called _leave one out cross validation_ (LOOCV), where for each observation in the data, the model is fit to the remaining data and predicted for the left out observation. The downside of $k$-fold cross validation is that it requires fitting the model $k$ times, which can be computationally expensive for complex Bayesian models. Thankfully there is a way to approximate LOOCV without having to refit the model many times. -where $\theta^{(j)}$ is the posterior parameter vector from the $j$th posterior sample. +### LOOCV and Importance Sampling -Further we'll like a metric of general predictive performance and so compute the average over $n$ data points: +LOOCV and many other evaluation tools such as WAIC rest on the _log-pointwise-predictive-density_ (lppd), which is a loose measure of deviance from some "true" probability distribution. Typically we don't have the analytic form of the predictive posterior, so instead we use $S$ MCMC draws to approximate the lppd [@vehtari2017practical]: - +$$ -Further, we'd like to compare the MLPD value of a model $M$ and another model $M^*$ (possibly a reference model or competing model): +To estimate LOOCV, the relative "importance" of each observation must be computed. Certain observations have more influence on the posterior distribution, and so have more impact on the posterior if they are removed. The intuition behind measuring importance is that more influential observations are relatively less likely than less important observations that are relatively expected. Then by omitting a sample, the relative importance weight can be measured by the lppd. This omitted calculation is known as the out-of-sample lppd. For each omitted $y_i$, - +$$ +\mathrm{lppd}_{CV} = \sum_i \frac{1}{S} \sum_s \log p(y_{i} | \Theta_{-i,s}) +$$ + + + + + + + + + + + + + + + + + + +There is a package called `loo` that can compute the expected log-pointwise-posterior-density (ELPD) using PSIS-LOO, as well as the estimated number of effective parameters and LOO information criterion [@R-loo]. For the part of the researcher, the log-likelihood of the observations must be computed in the model. For my models, I added this in the _generated quantities_ block of my Stan program. It is standard practice to name the log-likelihood as `log_lik` in the model. + +``` +generated quantities { + vector[N] log_lik; + + for (i in 1:N) { + real alpha = b + bGT[G[i], trt[i]]; + real beta = a + aGT[G[i], trt[i]]; + real lambda = lG[G[i]]; + real p = lambda + (1 - 2*lambda) * inv_logit(exp(beta) * (x[i] - alpha)); + log_lik[i] = binomial_lpmf(k[i] | n[i], p); + } +} +``` + + Models can be compared simply using `loo::loo_compare`. It estimated the ELPD and its standard error, then calculates the relative differences between all the models. The model with the highest ELPD is predicted to have the best out-of-sample predictions. The comparison of the first three iterations of the model from [chapter 3](#workflow) for the audiovisual data are shown below. + +```{r ch050-Artificial Bleeding, echo=TRUE} +comp_av <- loo_compare(l031_av, l032_av, l032nc_av, l033_av) +print(comp_av, simplify = FALSE) +``` + +The centered and non-centered parameterizations (models 2 and 3 respectively) have essentially the same ELPD. This is expected since they are essentially the same model. The reparameterization only helps with model fitting efficiency, though that can mean more reliable posteriors. The model with age-block interactions (model 4) has the highest ELPD, but is not decisively the best as determined by the standard error of the ELPD. The only thing that can be determined is that including age and block improves performance significantly over the base model (model 1). + +But how about for the visual data? The fourth iteration of the model introduced a lapse rate. Did the change significantly improve the ELPD? + +```{r ch050-Full Shower, echo=TRUE} +comp_vis <- loo_compare(l033_vis, l034_vis) +print(comp_vis, simplify = FALSE) +``` + +Absolutely! Something else interesting also happened with the introduction of the lapse rate - the effective number of parameters decreased (`p_loo`). + +Earlier I argued that model selection is out, model comparison is in. At the end of [chapter 3](#workflow) I finished with a model that has age-block interactions and a lapse rate for each age group. There was one more model that I could have specified - one that estimates at the subject level. There is no domain-specific reason to include the subject level information, especially since the goal is to make inferences at the age group level, but there may still be statistical reason to add in the subjects. For one, adding in the subject as another level in a multilevel model can induce regularization among the subjects, which can overall make for better predictions on new data. + +I've gone ahead and fit the model with subject-level information, and the comparison between this new model and the one from iteration 4 is shown below. + +```{r ch050-Skilled Weeknight, echo=TRUE} +comp_vis2 <- loo_compare(l034_vis, l034s_vis) +print(comp_vis2, simplify = FALSE) +``` + +Including the subject-level information significantly improves the ELPD, and even though there are over 100 parameters in the model (slope and intercept for each of the 45 subjects), the effective number of parameters is much less. Since this new model is capable of making inferences at both the age group level and the subject level, I use it for the result section ([chapter 6](#results)). -A negative difference in $\Delta MLPD$ for Model $M$ compared to a reference Model ($M^*$) means worse performance for the model while a positive difference indicates better prediction. -We assess the uncertainty in the difference using Bayesian bootstrap [@Rubin1981] samples of $\Delta MLPD$ between model $M$ and $M^*$: +One concern comes up when it comes to LOOCV and multilevel models. What does it mean to leave _one_ out? Should one subject be left out? One age group? Just one observation? With more levels in a model, more careful considerations must be taken when it comes to estimating prediction performance. diff --git a/docs/050-predictive-inference.md b/docs/050-predictive-inference.md index 2cce0d6..25aab64 100644 --- a/docs/050-predictive-inference.md +++ b/docs/050-predictive-inference.md @@ -1,70 +1,149 @@ + + # Predictive Inference {#predictive-inferences} -_All models are wrong but some are useful_ +_All models are wrong, but some are useful_ -The above quote is from George Box, and it is a popular quote that statisticians like to throw around^[I am one of them]. All models are wrong because it is nearly impossible to account for the minutiae of every process that contributes to an observed phenomenon, and often trying to results in poorer performing models. +The above quote is from George Box, and it is a popular quote that statisticians like to throw around^[I am one of them]. All models are wrong because it is nearly impossible to account for the minutiae of every process that contributes to an observed phenomenon, and often trying to results in poorer performing models. Also is it ever truly possible to _prove_ that a model is correct? At best our scientific method can falsify certain hypotheses, but it cannot ever tell us if a model is universally correct. That doesn't matter. What does matter is if the model is useful and can make accurate predictions. -*why is predictive performance the right model selection/comparison criteria* +Why is predictive performance so important? Consider five points of data (figure \@ref(fig:ch050-Moving-Moose)). I have simulated values from some polynomial equation of degree less than five, but with no more information other than that, how can the best polynomial model be selected? -- idea of "geocentric" models: wrong models that still predict well -- notions overfitting/underfitting: -- more parameters leads to better in-sample fit -- a prefect fit to data is always possible -- but predicts poorly (overfit) -- underfitting fails to capture the *regular* features of the data (why regularizing priors are important) +
+Five points from a polynomial model. +

(\#fig:ch050-Moving-Moose)Five points from a polynomial model.

+
+One thing to try is fit a handful of linear models, check the parameter's p-values, the $R^2$ statistic, and perform other goodness of fit tests, but there is a problem. As you increase the degree of the polynomial fit, the $R^2$ statistic will always increase. In fact with five data points, a fourth degree polynomial will fit the data perfectly (figure \@ref(fig:ch050-Olive-Screwdriver)). -I think you covered this already in Ch. 1 and 2 but here is more thoughts: -The PI's predictive philosophy has evolved to prefer this reference model approach. -Early on statisticians are usually taught to prefer *parsimony* or simple models. -The idea is that this guards against *overfitting* and also boosts power to detect *statistically significant* effects. -Also computation limitations made small models preferable. -But in modern statistical learning, we tend to include all relevant data with elaborate probabilitistc structures. +
+Data points with various polynomial regression lines. +

(\#fig:ch050-Olive-Screwdriver)Data points with various polynomial regression lines.

+
-The idea is to include all the data with the aim of squeezing all predictive ability from the data points. +If I were to add a $6^{th}$ point - a new observation - which of the models would you expect to do best? Can it be estimated which model will predict best before testing with new data? One guess is that the quadratic or cubic model will do well because because the linear model is potentially _underfit_ to the data and the quartic is _overfit_ to the data. Figure \@ref(fig:ch050-Cold-Fish) shows the new data point from the polynomial model. Now the linear and cubic models are trending in the wrong direction. The quadratic and quartic models are both trending down, so may be the correct form for the model. -- not sure where this goes, but make sure you say that 1 model is not sufficient, we need a collection (or series/sequence) of models. that is why we need to fit models fast in `stan`/HMC -transitional sentence: given that we want to compare models (and possibly select), how to quantifying +
+The fitted polynomial models with a new observation. +

(\#fig:ch050-Cold-Fish)The fitted polynomial models with a new observation.

+
+Figure \@ref(fig:ch050-Strawberry-Swallow) shows the 80% and 95% prediction intervals for a new observation given $x = 5$ as well as the true outcome as a dashed line at $y = -3.434$. The linear model has the smallest prediction interval (PI), but completely misses the target. The remaining three models all include the observed value in their 95% PIs, but the quadratic has the smallest PI of the three. The actual data generating polynomial is -*Quantifying predictive performance* +$$ +y \sim \mathcal{N}(\mu, 1^2) \\ +\mu = -0.5(x - 2)^2 + 2 +$$ -- log posterior predictive (more below) and information theory (if you want to talk about that at all) -- cross-validation, loo, WAIC -- and estimates of loo. loo psis -- @vehtari2017practical -*some notes from my grant posterior*. rewrite this for your glm based model. -Given a model $M$ with posterior predictive distribution $p( \tilde{T} | \tilde{x}, D$ for a new survival time $\tilde{T}$ with observed data $D$ with feature vector $\tilde{x}$. -We evaluate predictive performance using the **logarithm of the predictive density (LPD)** evaluated pointwise at the actual observation $( \tilde{t}, \tilde{x}, M)$ [@Peltola2014; @Piironen2017b]. -LPD is a proper scoring rule and measures both the **calibration** and **sharpness** of the predictive distribution [@Gneiting2007]. -With omit technical definitions of these concepts, but loosely calibration means the statistical consistency between the predictive distribution and the observations (errors on the order). -Sharpness, on the other hand, refers to how concentrated the predictive posterior (how precisely forecasted). -Typically we don't have the analytic form of the predictive posterior, so instead we use $J$ MCMC draws to approximate the LPD [@Peltola2014]: -\begin{equation} - LPD(M) \approx \frac{1}{J} \Sigma_{j=1}^{J} log p( \tilde{t} | \tilde{x}, D, \theta^{(j)} ), -\end{equation} -where $\theta^{(j)}$ is the posterior parameter vector from the $j$th posterior sample. -Further we'll like a metric of general predictive performance and so compute the average over $n$ data points: +
+95% Prediction intervals for the four polynomial models, as well as the true value (dashed line). +

(\#fig:ch050-Strawberry-Swallow)95% Prediction intervals for the four polynomial models, as well as the true value (dashed line).

+
- +This is just a toy example, and real-world real-data models are often more complex, but they do present the same headaches when it comes to model/feature selection and goodness of fit checks. Clearly the quartic model has the best fit to the data, but it is too variable and doesn't capture the regular features of the data, so it does poorly for the out-of-sample prediction. The linear model suffers as well by being less biased and too inflexible to capture the structure of the data. The quadratic and cubic are in the middle of the road, but the quadratic does well and makes fewer assumptions about the data. In other words, the quadratic model is just complex enough to predict well while making fewer assumptions. _Information criteria_ is a way of weighing the prediction quality of a model against its complexity, and is arguably a better system for model selection/comparison than other goodness of fit statistics such as $R^2$ or p-values. -Further, we'd like to compare the MLPD value of a model $M$ and another model $M^*$ (possibly a reference model or competing model): +## Model Comparison via Predictive Performance - +$$ + +To estimate LOOCV, the relative "importance" of each observation must be computed. Certain observations have more influence on the posterior distribution, and so have more impact on the posterior if they are removed. The intuition behind measuring importance is that more influential observations are relatively less likely than less important observations that are relatively expected. Then by omitting a sample, the relative importance weight can be measured by the lppd. This omitted calculation is known as the out-of-sample lppd. For each omitted $y_i$, + +$$ +\mathrm{lppd}_{CV} = \sum_i \frac{1}{S} \sum_s \log p(y_{i} | \Theta_{-i,s}) +$$ + + + + + + + + + + + + + + + + + + +There is a package called `loo` that can compute the expected log-pointwise-posterior-density (ELPD) using PSIS-LOO, as well as the estimated number of effective parameters and LOO information criterion [@R-loo]. For the part of the researcher, the log-likelihood of the observations must be computed in the model. For my models, I added this in the _generated quantities_ block of my Stan program. It is standard practice to name the log-likelihood as `log_lik` in the model. + +``` +generated quantities { + vector[N] log_lik; + + for (i in 1:N) { + real alpha = b + bGT[G[i], trt[i]]; + real beta = a + aGT[G[i], trt[i]]; + real lambda = lG[G[i]]; + real p = lambda + (1 - 2*lambda) * inv_logit(exp(beta) * (x[i] - alpha)); + log_lik[i] = binomial_lpmf(k[i] | n[i], p); + } +} +``` + + Models can be compared simply using `loo::loo_compare`. It estimated the ELPD and its standard error, then calculates the relative differences between all the models. The model with the highest ELPD is predicted to have the best out-of-sample predictions. The comparison of the first three iterations of the model from [chapter 3](#workflow) for the audiovisual data are shown below. + + +```r +comp_av <- loo_compare(l031_av, l032_av, l032nc_av, l033_av) +print(comp_av, simplify = FALSE) +#> elpd_diff se_diff elpd_loo se_elpd_loo p_loo se_p_loo looic se_looic +#> model4 0.0 0.0 -1615.7 42.8 16.2 0.9 3231.4 85.6 +#> model2 -1.0 3.8 -1616.7 42.6 11.3 0.6 3233.3 85.2 +#> model3 -1.3 3.8 -1617.0 42.7 11.8 0.6 3234.0 85.3 +#> model1 -32.8 10.4 -1648.5 43.0 3.0 0.2 3296.9 86.1 +``` + +The centered and non-centered parameterizations (models 2 and 3 respectively) have essentially the same ELPD. This is expected since they are essentially the same model. The reparameterization only helps with model fitting efficiency, though that can mean more reliable posteriors. The model with age-block interactions (model 4) has the highest ELPD, but is not decisively the best as determined by the standard error of the ELPD. The only thing that can be determined is that including age and block improves performance significantly over the base model (model 1). + +But how about for the visual data? The fourth iteration of the model introduced a lapse rate. Did the change significantly improve the ELPD? + + +```r +comp_vis <- loo_compare(l033_vis, l034_vis) +print(comp_vis, simplify = FALSE) +#> elpd_diff se_diff elpd_loo se_elpd_loo p_loo se_p_loo looic se_looic +#> model2 0.0 0.0 -1001.1 44.0 19.2 1.9 2002.2 88.0 +#> model1 -259.4 31.9 -1260.5 56.1 23.1 2.3 2520.9 112.2 +``` + +Absolutely! Something else interesting also happened with the introduction of the lapse rate - the effective number of parameters decreased (`p_loo`). + +Earlier I argued that model selection is out, model comparison is in. At the end of [chapter 3](#workflow) I finished with a model that has age-block interactions and a lapse rate for each age group. There was one more model that I could have specified - one that estimates at the subject level. There is no domain-specific reason to include the subject level information, especially since the goal is to make inferences at the age group level, but there may still be statistical reason to add in the subjects. For one, adding in the subject as another level in a multilevel model can induce regularization among the subjects, which can overall make for better predictions on new data. + +I've gone ahead and fit the model with subject-level information, and the comparison between this new model and the one from iteration 4 is shown below. + + +```r +comp_vis2 <- loo_compare(l034_vis, l034s_vis) +print(comp_vis2, simplify = FALSE) +#> elpd_diff se_diff elpd_loo se_elpd_loo p_loo se_p_loo looic se_looic +#> model2 0.0 0.0 -925.1 38.1 75.6 5.4 1850.3 76.2 +#> model1 -76.0 19.1 -1001.1 44.0 19.2 1.9 2002.2 88.0 +``` + +Including the subject-level information significantly improves the ELPD, and even though there are over 100 parameters in the model (slope and intercept for each of the 45 subjects), the effective number of parameters is much less. Since this new model is capable of making inferences at both the age group level and the subject level, I use it for the result section ([chapter 6](#results)). -A negative difference in $\Delta MLPD$ for Model $M$ compared to a reference Model ($M^*$) means worse performance for the model while a positive difference indicates better prediction. -We assess the uncertainty in the difference using Bayesian bootstrap [@Rubin1981] samples of $\Delta MLPD$ between model $M$ and $M^*$: +One concern comes up when it comes to LOOCV and multilevel models. What does it mean to leave _one_ out? Should one subject be left out? One age group? Just one observation? With more levels in a model, more careful considerations must be taken when it comes to estimating prediction performance. diff --git a/docs/050-predictive-inference_files/figure-html/ch050-Cold-Fish-1.png b/docs/050-predictive-inference_files/figure-html/ch050-Cold-Fish-1.png new file mode 100644 index 0000000000000000000000000000000000000000..2980741817ea862306e302c6544428279ac5683f GIT binary patch literal 63309 zcmd3Og;!Kx*zSlT3L?_d(hbrgjdV(PcOytQNOw0#cf*ha(%qdy_t2g9=`f_J8LAyu6nvE*$>51c54z#8zMY_s1Cs&H2A8Z*K2bg#TTBLiyx_ z@ZV+M%m3H6dQZ*1xTwm{rx{gBhNOlg^Ev@a9!tB&o^6*iZko@HaM~ZhcxPd=Do_&Bsr} z98k+svECmUo_Bkv8@&~pjn&w2{|&wp_=@n8sMgPMVHIr`@|~6ql(&JMZbywf&Bv{a z_HzG5@Lx~DKYP~fn2jW`HvWCyTNAK@PdieXybuMJok=z}wq@H+d1LWSFv@#%XUnloTC5^Y2r?M~Qt9cE4hU zj-f!u&hicqMyZ!39$#~g`(6$&+fW)EEj9jM6TL$wI{~E|YSh|s-XAAkKpE$ol%^o5 z(>$$_WHXe*mUUO1D13$`)c=OvwFf@+wa3A1zM#V-)a;g-`GRoY(5j2u%z(TUsV{ zwb~x#0^WK46AgjTmS2(O2ieCDZ|a=BNK!Z}5LguJc#L`o@i}rFO3yjoOG5wcEx-2P zZRESa)K3lSUc^@F^@142V_`FCui@@o7_3RCyvt-nByguvb9Ny z?8*-mm-X_qtJq)b)pWhOi^auF*1Z9b^_v$=->jE^{M8WWa(E*of*Q$o%R}z{i2H)&_E3m~ zzXuBH`-f%YBje-NbjO9gUZNU%yoV)7J*Td%Tq;S=ud;gV_g{Ub=i={{^Jv?kqnxr7 zT#dQ{Z4`sm`DQ{>csp|1a!N`uxz5}xe;ug62RI)P>O467+*Bck?AD$4!RPADYrin2 z5f?hFg9BANv3+g{ida=YV+Gz!*%&gCklu}X+`Qe0?}Z3$eJ?I*eR%y>Scf%l+z!ug z<@r7aL?fMUMtr9-TmNOefNW8I$+*v$cc^3K4f7}s~oqYa8zG3;qatN7SNM|`lIq^W*xW^at z?}p&vppKWS)kJU|H5?6fzws9o<$0uee&BK4QJPU*Jo-^*G54*$8a3zk_^GdczG`s! zKbOYH4~O#dy~Z7rn;ovmC;PTwi*Un$1A#E-hfEv?uf z*I(~{GN~mW4;aw4=5ST$Z%{OerU}JsI8gqVSM0tOTh;p$6yWc*GIPXVyS))e*n77t% zL*f36nc09R&Eubld}Md>&)TCL9!*}Ddk?EY1RE|VuBx3{mq(7+YHkEXH)XCe?B(OCV-`;a>F50vWCqbUGe+LnHw%pGK-bPc#ZjZXZ+p*m1Jq$RXvqy=| za-z2VH+o;|l7tG#wTtrj&FE65%f9pN2hYP~_lNfoLIqJ+%lSyA$JM+qJd*KnN}c6= z^~JKQgoMP^yh-Lkd9!s_5GsL{;`wR6g4rl^ES0gUekaz-=t~cHZe3-*T6PJVA=V6o zLMZ=jF-Q%FI(KY~`lH-`c(voQLYO>PQ7@YL?5I&je5n=krIMNbgOtnEm9(@Vc3T6w zpTFpL1p(0!8xzx^Oq%e`cr@j?$9uhJ&jb%}5{A7PS5PY87hs@cyC8zObQm z_T{U;i~BfMiO6%oap>oNxF24Y47F>Y-eZUY~VFSlBp#uAq$mc%vpn zbh+=3ZT<`D%RpLjhY7w>x*B@ynykLo^>G_ zyOz=RUBTm6h8k^#gEt@ehmUk`b(e6 zS*+1HK5_(Uj<|=3r%|W%3!$0NFhwUt#`gJtV2Y**IJ1A2((#fR<>_Lk*oV?%T60Gd zxzarkM>jD4lkeDM)SqvB?O3gS+Me8ywBuTAzFYU0_!X3*L|ai?jpWI$G>UdBeVL?2 z)NR(cGAwc&`3*>(DpXtgFDhtk8B_LK?+Lu_zg1fu>*cp=d#2Ir3_LqCFyg2t{+AQE z;-ES2qRbuG&p#9}D;_$6o;MoFE-KpfIJSk_>|<$jckSlO4P+WLf?b;-WUn{`_$UwE zqPQF@*(%1ha!kHB->Mw;obMGX+^is=8_c8plMd+bod0Nh1VFATF6>%+1s=L^9`(#Q zoL-qgbZ3Gb!*$EnFT*KNFYges1d~gj%qkzvy0oV{%4#KF2aIH-54Ya@;kkV-%x!V{ z_Ag2J_yHKv)8IVQxPmWHd*nK8NH!^BcD{MlGU}oN{@kryyR!i(5I&%y8cbOGuUEXB zX?bm;l%I7&)Cx#c;Wp$3MY^+>4xf{&^u;-ifXPk$n;iTz@}kR~mD%mieio)C-gcn@flFyg*c#cD zaOK}YL5~P%&V4F?vL3=zCI)X`v~9HU(YIAQiGX#VKJDp*ZgUKUJ2Z1<(k9qe{X!Cu zL43(IqR!E9Mzl%WR^^eqs{ywrKLB269Y@5%}`CMX(PeYZzG!Mu@Rf@U;XneugS}}cJzyA6LOy%{L#?h)R4xc-&iMc4i8z0 zKYm&`NS@?hA|}ZaQkErHcSo(mV*N4crqZ%#ii1)Xdrh5iMQ>;rynUU0%OjlWv`HhH zzG{lDmG#Bq$3NA52NGK)ey+4hsKh3cX53MD`V)C$P!Tvu&P}~18NcZ^R1RC(Na~o1 zj`6BpeRdckG;dT#fp3Ujb%N&;|F-aY?Q?-Z?d+Qw-1Z`LCpF5jnIUZ4r8=|OPny+O z^$iw3Zwu|Pq@=TUvX$u7r!i7@B>b!#vU9ADCH&m3)c)VC5Y~T$p^!un5^OP6&@V!#m=>)Z$dqNrPFa-mq zF#38{YY1~t%Zrnsj@0Tr{A4bDYT}TG?C9g@lte7zR)05{b0X!3!>{e0-y#@o<)us4)rVMUM&#@l# z@5n%5SnRPr(6;Ar+kOGw^svFCypOGHR#%BA(iO(xbGhlc-!$;lSRS8nbl5`WA!7Qw zB>oj3C2{e!zv^19wQH<*E;;?#!RdiEo^YC*=By$^x3UiBkR~L$+#^RhH{gusp33)S z_7?6z7JIS+Y{WQ3JbB->d>L>$=6ZX*wZ40mnV%XJLnPLZqG=WcOEt9MvQK~*-Tsr^ z85UqnD2JCjb34Z_h%Vyrd>eK#c1Ko24aAhtMD%0JA5+ViAy!aAunh;pD>q8;O8a$2 zhgTi4+olZIIIo`OC`ooI*bQ0v{dbPG#Y|nB@7aul0(mWdJ&>4B!|!C zc=)52fWx3CtQ8Rjr0cD_6pM}gSz21U@dObA6H`1Ujtj`+(V{z4q&DcoGwiPCSj?c` z?#Xh)$Tn!n*&{j4N-Au8CHYH_xid%39~c{TJVL#m%o?}8wW?T8CTMK{2G(d*4ypW zjniRShMezIKOZk=Gu_YnA?Ql)CDKKs#kyFBTO-H=fkzEgO9^q>5yr>IUpl-2X$7J> z_vr-?LJm?c4%4r==$xvZwpmN6u!T1khw(Q;wv=^4%3xn?cRUl78?Yb4dAL$NTKW_^ zEmOC7hm@Z;3BAB&FiQ(ABC6*5*vRN;?a~;rZYU+;wwR`nbn1i zZwa3t)N^fj<}vF?}J_5cize&w?95NoeR0j909|~{ZUm)itzA=XYgTD0T;XDE) zuN}_mPHFStfWCb>7dQRDB6(W-YQ+CH4mY!!V;yV}Y2NKCuRBKDzD#Q;;hqv5mfmwi z1ikx@NkJ7l4&OG}xTpDdPYI#jsb=%xUR&i5aSMn}t+FAfDahMrCr&}8sHkY~th4R; z>E3>nha>b9a2dpap>8%^^rh$4FAHd4+^VU_-$wCQ{T#Mi!VztKemY{ zXd3`tj_lg?=k2%ibF;)VQrVHu$sIrD1|w?c_L{$LOWg^T_xh-A`Z6hgh21=-=-=46%$3NpOQe zQNFJT?WBey4=)=tn0a0CGtUBf-%<**Cfujkpi3Vc`G^1VR)Jr^Z`&l`Tz!QJ4#Ng=ZSl`i-OR?3dfwa2(;7+e}Jqj^$9sgOg`fB9PRSV)y+5ZrSbaO02w*4ri4<07W?ACS-Ygk;#(K4%)JLxr? zpB7weg;~$`5>Ve}RqTn7KhZSy)AhUujXnQ-KDw5MJe&=xSpxR&-HA3IMLRbUIoYco zzkF@dOvK;M+t)y5P=xCe;T%xTDekz+p}fj3sfI}zy zlI`I_Qq00hQcr~nl}0A3!*4i1h3wIO9mm*`*}DH!SbiOy_6BdU9^2Cc1j3n~oi(Yd zL&jTJw!;L0z9NyHMB%C9&c|N6B1zK1nP{!tSADiF8sC{ZF-Wd)G83!b)MxpOv!^q2 zYUTKovC*FDAwrdbL$H@jvb?g6V6nN#=d=OML(QT|jkWd2a|lS3 zWy#R`^)2Y>>Hk~mH#ihd@8*t;8Y)x^g}ztUx#!7zbAwfH9Mfl*6(l^moTzE=y35J^KU>E`E7io3ph#A++RbA76 zMdsN{pw4kS98TQ3ui*B7ScOi`S-EkdiR)yuL5b&*nZLH1U}TtMr1jyjJ0I(%KzYs) zfow6L*f#X@Iqp@rD1ljkq+fW4!ew@imWz;EZ_svR>o!;1u4DrfA!yaa-``)&vMe{3 zEVS$e=$$Q4(N>gcw`+U!XsEI!5V=|&_q1=_;F)ili730K(?&{R>9i!TY4$M-tcqqF zX1Mc;nr3g*NA%e5_>{j^2dh61-Z_7 zdUjnk7x7Eb#mhhN#A{AJE_08BOB^Zo2Fo_Zcw++YfJA{U|<+ zak8Z0!$fF)s!!*_a@srBk5F@NEl)1w{?7;*08Fv$fGn)RwSa|g1y761<0GJisv#0BLvgR&~?Sz=c>reU*dgxBn1@4R@^f_*nL*|*!@IT5Bun|`y#nQfh6!(EmV?( zV(A?FYJPo$eXnpMraLhui&Xr?h?~Ds`98&jW;_vx<13KB^LwtR4SO}6r6-mz85?qF ze7%}82t>w9i2n_ACgG_OFW`ClGcq^m@!MWPe~fDHtthQm6jxyfab+NmC<5zPtQW2L z^&-9Jnky~SgU!s-#^0){_erniLa0E)Fneo}L))%yYq2?f^qY!I>w!g+_ypgv;2#jk zrKbw;B$cz7q}er|Y!^ubAq@4b)=LdQ`-aOc8Yov$0qaCRgpSHf8l;klJ4SNWa5Pa! zB8^6Ggie%bRw**Jj>&1FK6l$SZpuS)b$&(juVvI03ODr%RF(ul?R<%Wnm#AxumhE zOZ&8LNZ-*aaRC9;CtO;k+;A?T4k3X^ESK#jAY7^5%x!8>#939P`}+DeA2nI5w73Y~ zxsn6n9r?!DU%Q*vCG|0h9ppL08nc-Jo0B#zLkUuLa1~Ns)y-M$x?+DFxRh6Tl&o^{ z_vDzdL!G`0`QbeRe!7pQ(no>0SO#+RYaNtG#Igd6_e|7cx<%sVx^LqQZ)tOrKH3LV6X!NLr6hDJv;;CgDaie zJpJqA?83!8k-QKSE{B}fnfnP!Zu^+94nDp9CQol7=U4H{m7>P3ZQlFhj?d!f#z0g4 zKP~`t_gBsB5rSitQB2utRs=W}EuB^!r{^5rExp1he~;_Wdzp5!_nNn+8(bKd-3h1 ztmtRr2J|92GhF_#^e3Z~ZQh$S`EW|Usu#9^h}!P+Y^4>6jH^X3gLj$&ct5*pt}f7ZwU|_)En>3|tF!cE6SC7aor~oipK3C|9CjS0mU^Ozz6uYf?=i72x<6 zfNS)8Xen35Q*5wH*_QXzY@kjcSqmFqBCyR%iBWXXX(RJdv0uS`ZNL0hi`on;2mF}>oFNW%RERw077W?WE}npeJ2 zX8!Dn*0qC)Ch4RXd;yF-%@=xcJSn;M0`wanK|2((C~&u=U;+Zt?v;Q$cMZJnmb^HRqmsHuGx$|(mFT9?d9ij_EoBkyx0a=FM$<_ zc`gWRN~D@#AS1C<=t1fF1&IXzP<@eO{OX6~zZV}yM}C(*)zaDwj!zHGQX0&#z>3DV;?jMxbejWsZ?7}-d0#3=`4|?T;{(%1x>`AVxf}_R zzi?jOW^$2;^Fpdm%DhWk%47a#CU1SOAm9;49#VQURY)0-Fuoescz8^ z2jP)Aau^CuZ>?^*adi#Vu!QAiA#FK6VNCgExTu^FvB2Af@;lMx2e+*L8TJRs^H;vu z6BcuQOatT`ss7z<4<4KBY(wYl-@kB{Hy@4Oa-XS1X$o^^I5&&==J2Z$Upqf z`9qO&Bu6@LgcP7I+}ws*N2G}+k-PwU{bVQK0^jo-x13K5rlzE?}>Z0*mvu{g`g<8n2X;6y82;kLJ9f`tas+kKNiipHoowjXhkXim^h`IaRdS zrnJQN++ncWM7=jL$=&6vol;t~nCd;KDD|z6Ifsp_;*$KRtJt%3XIo5uO|qx*Q~;%7 z{MvGL(Ugnf$Jgb)RuVC}nQgn<9WK9K5uV}Zk9|Y2#lIR*Sm}Vg-zLYLhlw&L6p!yr zh9t|`)|SZ(S?i3U*ovHyZ7?kaLz_=Q)b`Up4DXHZhN-aZI|d!;A)@ayFf)dTML7& zc-<(oG4a%EFjzsLpHuueK>pOl>l(U0TTd;|=GjM7y%=ixRQM}X3Tn9c4&U6uqjHI3 z6M^F7UyXTq>*cDfntRFC0cG6Uc}hkp8Im{g#r!g++-tE$`O3tP;{$+FG{&6P$=SkT zSH8jk8Ga+_Il8CSXyk?%!@&1+`1ZR4*a>CkK{KpfXgxh{<}yoqT2(W~t}+IiE{^k+ z5f|WUhNVZKZ1}D484hLMSmQ7pz`8ugqA_R&^eKl{eg{1F_i=sEy}ho3^waHjnYc8>CzdzU}GORAUacR~)(DW&)oA8CMw}gkU(ZC9k z5|~H-a@P%;&g*fI4O(XP$mym+ejTPY={n_7gR%A>Hkzfc5$jUyYTmK%&u^*bhcoVe+lZ z4DL&Z=T!a4|2gj+Vz# z45b9!F=g1dLw2hPO(V@7tcc{wERwlxmP)2_?(6BXFoaOw&$V5A#UzSKg!pP$X*VT$ z8pzZm1L!Y6-VvRN!hOX;X^`8P`jb_@+@prU_eZADy{`(!6%MI3!pBa9=qv~Xyn5^B zgP)<{hplbZs=3nsMnLA3g>HPZj54I}6mfQ7*xzlwi8KDd%Y(&v+iF_DgedC>?@s*M z`<;GMT@#bC7Iv9g*feXGGMmsWYd8HOc{VI9pcySRq``^JIriG4?;12+Sro`Ju!ttgJXP zf8sJgY#e)Q0s^2mH7=3J>+ z1}6!xKYn^QIRB+kn@DRKnDbxD`DuYf`=&dLVf`$Jd*mwyEXD%Rr(e=O1b}Cv+j3g5-C0J?Wcd9>Ka5 zEQm5c@&wclF+g?#3R8vUku$797Ec^Rt>tWccE=Gk1V37Cmxb?41Q)F74t}k2K2clz z`hXhmUt1gk?0@0D*ZCR(_AL%6+m&yeDa!-UQi|VJ@I|>sZ;Ne$}3uA$VO+ z85NbDB?)wb)9{9Z@CLWGBXRCXk+MhPk*V};@vM%R2gE_A`;1d*U z`6~hp$n-*Ivxle77b0NM+KEVDeS6ifUjQ0Qwd^fJIygwtr#;A_J;**#NBbAWT4*aB zUyyo&t1f;TsJTw7>vXp)%WxLi%~k3@R|H^TAxvEuyebfoa1Df(t?H%OQUVBIIRGH2 zF+ZExEv$H}uqA*;M?4S1@YHYgwtKk$HLR>N;~79=F}Ilv+22t$9*XPt?uX@k0~YEm zgPj2W>T+;bbu|UI3dv|bl>X2Zedy1oWi84HpVVrv)CRn8$3sCOEKC_}Oup>N$#?8Z zGkO+=qCzEul@3&3it9+!Ry3u#&!Uyec>V=V^4sC*px@V=4A8L@F{3>;GCMRycUmmq+L4&$M#B=w z;G(dB{l==~&>>lIjM6)RnJsl4vJyVk8`523G);Ub{Ylbl#UJ_Q2BoDkiN}M8(@LaJ z)qtS+5%D_hz;rW-?z&Hi*0z#15o`W)J%X4zvd( zMdOz#0ZCgIQ&(o5O2u?2%JG54G=fT7?E}@TeP_Sv5QT(X1)WzZ0P^02=;<=bCgsql zAA|O*@pIgxb%LVv3vxZi)b2p-pa#Iqi|VO=`{M&k(DbBuaC)Cf=_NEnkm1#3$0X-s z$ETP4iNFZCy2ST8Rx1uTIeL2A(?>XolQ#s_D@8dEG-`IixGwUn5Ce-aOQ`~cNeAg! zCj8#*rS0;gFG6uYEqfg}XOU|KzVQP%9oxsmM7{bIP zvkvrw`-+zB;O6kcY^rCQ7Z^TC$6jiA6d^I6K16rDlEw%YLlsJ9@?mNtv*GrB3l}XL zBh=%rPvkBcOg6mVv^#LO+qB&kvLNQ*VfalF8$0Nh6?b?j4!Gw<9|^!4e0XSkbNw6+063UHf5{ddpg_6L zdelIR{$kj)!NPVA;kD#=;uFuLa~7Ar3SY3ShiDTHCgD(L5j>!N-`|MYbZbWba~=XOiVfJY;Xi3KgZNf1>Y|MlLAW{!?(IE1R-{JY938DU?;st*S;? zN{13&{6;AOp=7|1H}4y?>#ri-yPAtFy2UW@UMd0K>7@zB9(917@vu|V^HpiE3h;#=nwNWiRz zC0S7rP_E6rU6lEv+yO?9NUw$cGU&D9T5}d{$x@)3g*|m6iGN@fn0h6?8D?2cH#?T@ zycE%G)w>C(opibI(t7!Tuumh_kAmj5x^?fb=HCme>3~(^E1?3`QpZ)>N)8ZEn|By7 z%r2FKV%*j_Yc)jcE#fa^;hU`|(PXMIzvLnj17{ol9tc?OSaD0gFig!JD66)+ZU(Tm zFUT2!gZ1^WMaX^l%b!!Ls7Ku#NY^X;2P*Z{J86)UH~tzoas`HFRXyzZE{fSV`t)aU zrk*=R`b+X8qi<#qS#A(UHp5T?N0>D!^;1w6LsX5Iz}X3y_N9eJ-IlcG$#R1sT*^b> z4KSOoXvcX9tchtheEgXr6gc>k{Rw^%$`;AF~wN?=q0g^F|eCmU)qci zFH+RO_|-1FP&u($c($6<4&NE_Xz&_4A)wm69_jv6_@5k1^01 z#a{0EmlQ84bgsB2laxK=+80qCO<}^hj^h9v0rD$}fW3?U(N`rsRNUQ|OxF%bXy9di z9Mr*gsm3KnLUh17#n(|6&850>5^$=d(9Jk&yI-LxQ54s9Ps@`FP({VVi8Cl;$Zbo@ zPsF5eCuy5f`B$rdq7@EVS0nw6p5WQBy|f0A4G(${;N8IGb=BC>2q8*{$|5|p_FmS%ub9X=u?W(FA<%MH6W2y zibX5oQyN#S9$kt-YyL?gV3b$0FT=%qqJjZ4zG+8W^cyV~A(?%Soqkop*gGkTcIYX9 zIDG<6y9AcoX{oc0D)e~SW4ukCMcn@+0>*XmJq0)KF^^My3I4_Jr2cvrOYH|h+jz8K zcY|^v#Cu$mHy_|S_0(a|90q3)xcscXkY`K#@fA`el7TV2W!%n`b_e@;mt@6;=NfBP znWSY?jA$METAuiDMWH6 z8S^Kp@7I+6aq%aKFn~gNxKsPxSZdiYBSc!^%)zyPuD@5lr+!7l6 zK8eWr=M@Fj?dgd8!ENo%scc37o8<~NUGzU9Wzg4g@58lN#PAQf$&Fyhnr1VW zxI+WLStOZ$=rWGxhz5PlxEL_Q8Z%a0V457buAyeEsje$|y8_}@;xRgLtO6I3F|u*) z*@Yo^*ut|ya_HpJ7?0Vx!7+4iUWiLrmE^z0b5a|`F8-9yKvFwCEh&zS>Y?U0JZB3dm3@Dk!+?6`G+)5fNFB&$I zD`RWDTKS?b8D1C}D$|Ip)?I(n*5Cw62_Eau@ncz(k0NRYT2of4W-KwQb2jR)s|8n@n6sBZ7?B zlSTY8ELksJlg^ux;UPoD$290CN7m?gKJ^oezZaALt_+ur5tYlo-xh-(wfURuUbvqz z&ygB7xNAbH=F4Y3r~rP zJG;IhjjNQ>de-lQjI=EwVVKnB$=UHm>cPcIrmf7vw6pG8#mb*(iwxrC+IS938_vWu z7PCbE7FZV_+(*63bq5oxfr=@_{YC3_+)*v{7isfNot21O!TQ6W%;#oeh<7}#$Ha=N zUE520T52>)9Y-xGnJzBM?D~Cu01ZcyL77w*U<|B$aHctF!lXi?C8GNPkTw+xWQ}8Ht>v}^$Cs@nZ!lB5c65J=oT_Bdu-M*UVebIE!X*G-YVxHQ!}El? zA>KCqi2PbV-Dl9XcqAq&g~QGx4lmR{6bZLW>VChx*K#m_m`XM7XhX?EymYPNIpN}- z=;2yyL7a;%ooG-2WL;)2BQ?cas2A8o*80;fds0yK21}(W_Ad+}(WSHKMY({A-Md$? z`BVXcMsW&GjX`+|PMrFCTsU8!T~_IPdK!kadkuB=3{t}U5{gNy>CNiCCM$lKWa{Rb-^Rwc&APeno(`*X0T#z{hJN(Jjlhgr&;*g?M2 zIL6?_(c<~VtpdDWjm5|l4}PpVRWa!v#{dsq6lfogXk5O0JaI@x6W_jn26IW% zRH?vQgcpom5N#q^N}kpZ+q|#?Jx{a$T$4To4k0u&N&{_oQ4{$Ff1)Y-WVtksHTTT4AZ$ZU0P~KbB^Y zeS5#?_JmZ@uR89VLhVD$W@0XJt7D?06KABVaThq{9gih$S;D;KzTV&_vO@u#YaaJH zB)bd8&Ad5%$9l3mo#H*cW^??EF^^zk4FX=z3~sPG&l;yH>M!EfKc9v<#Lkgt|Asj%CZQ_5ezz4`KjmA5p80ph>> zh+V8P3-+zSCd!(jc)C|8>TSLMI&kwBN4kl=2-O)v271OR?a=7E)Dd=zChM{c)y$yT zeP9Ar#L0!z=-~lJ;N~P%S-DqPisK=eb&As=7r zYWBajXHGhO8GT6)V`QGSg6B0-6t=zi?W;%rH13Bef&qO0!g+>%K69{&o5j04k#caSk7RBD`)T z0s;sF3c`u?BX-k$=wWW#AQqmV?oZZ1@Gp^3e%<(c8AT2m>UeGz=}WGdL@QsH(l*rz zc5S;+j=z(3*ov0;$*{4)6%J5})I9u{*Wt2#MZW1I29-(nDD2(c8Mb8A_e{h00IH!m zJ)oqMyicn%jX11Qem9^S< z^CSz9hrVnHaQyy1F2KOq>M98oq(yx)80jxMk3s4iH4@2=lbAn+A;881nA_*NKFz+s5_TaW%hM9eBm7aeA0*iw z`kqIbx0S^*bqND7QR^XCW5t%k%vC*ZUNwLF)BT``Uvw!)jC8=!&wJeAN~uHuSo-HX zWpr&uuis;YrHYaE{w!l_FOM~-a6{BG`-Q{ZVu1{IK@OyTvIXCX-G-a-NXjbooHV;f zArY@pj)OBCdeu^c%4Ft{@79_TZLCWG+Nz$s6TX0fkV`_7iqBw%)&lHlGM%8kp^C7ye_@Kp7~ROKf~ z*Fsns)4FJ4>0KSx(_#Qj<4|TvgMnE&-@M4E_&7_qn!7$GR%`uP_1Fy?qy(q5F+_sB5S}l?AAe(?EWclU`h7CG*v}Fb zA5_&`u~VQe#&x)IIzuNC$~iyedcehOqH-Zv$=~FpE)03puIG7D^V%KdVfBs7cGf zRlhEgH*%>g=2ip{2^tjT1bEQ|1S3?Nu#w5w!z_(?Cmn#50driNWXQ9wST5{Z(X~#E z-{e#n))lRHoffKM{&2QrY}0J*S2X)%Q#UyEgn=xAMtTAQK~R8-_np%o&+#zC{p`4T z=3|Y;Rzp1-fcZ^OIFt!^FEQ(>c(x-0y*8>DV+7-BskODmn{lu8d!ts_uC108G3Eb5 z)>p>V*#+Iw;ts{3cyV`!7I$|q2iM{*Emqv!-QB&oySux)!+m(a`{n+)2}w)Rq-W0F zV{5IM-A_`2R;5y9Dnp`KK+#eezrZn=jlMx@FDCZUyZ7B0PkY&s<<|!;XS3B3BZ>NL&gwKj=`Yn3p9B}&{S}Op{O?ZbZ4S?0 zU)SnwvTU&E=xS?E;=hP4@O=70^v&-CWuX4+a#fj;#J%~Nt(P~Y--D!+p;oog^P|_= z__M@#)EVEm>Qb?Nu5nN~lLnD0gBE_2q0;vo$UO3n9e7EZPgSILspqy&IYU1QwzrO} z2QTQ`$4Ff)pFA}uuqI`Xvh=czJ$JGEP&6E%nvRf!?;9@o^XjXL9GibE+F@o{zBbVc zDa?x6*G=+nCu$Nn6~yFIDpsT60FBSpA20%R2=8{tz*n7smD!!#N6#iqg|ICaY)2~> zUm3H_;Pwh+A7&w3@@T@BV7H3|%9&f}K+}gNF?q`G^lhJhN-7%aXrk8LQcJsi@*>Ik7>w`b>L#; zOpsEv`-vyqql7up?2)~8*wN#HO3y#R1Ho^H$Oqd0QC`m~oR$mk)~qI6s$Fg{(81U2 z%|MXnsgWIQNJ*vaE=O-0+B--K*OBF+O|9K>>Hc4rkyTp}mI@Ki@T{gEZj4dwqN0gb zDr-ZskQ}j0hU|E5!&KTS)v^q(&ehRPCQp@NK3gj10)YjBI9-+{e`~IJUT{z;d3m2% zc?CKyZ`T1_`&jpU6J#>?7mg{`(_fq@f=SXvvSShI4;$7m^Lkm0g&CQY)i0{mJE_?# z{t`BMEYA+UIGfCWo?5Dq0XsatgCU0_L-Sl<=^&p}eNF19MNY_$WoLgYx0Yv>yb&@f`w*j{OfR@pR~JF-a9q+e;sG zKFVI)Jav2Ka#LKs=bdFHZi^27fQ&7h=hYU5x0hJ^Wk11)0#)o#(1=G_5+Mk+FY$5G zSc-UdgGo)K_*|7IU4QCnN+U5Bagu^^cq(*PTR9iW9*p=K`L0kWDbJD2kHLMc6d~I@ zLyxezGJL|nc{QFF=hCI$-nO5J~F_0LH zfzf8Rfes;{Y)tY2BlrrfieCH1w@SqIc*3t8EK`~tYrLwT+)j;J|46DLL*?Si< z+2?YSiO!QMy)&1pM0J$KRp4+@f81a-A)^v9saN6kh*gm4PNcVN^4qA*IBTve;*r{5 zABX+^r$5m`WW7!WS*!L7+bXJt9}J#%M6}ja{V`1a&FK><4sju0v?-iq7`}nhh4_2@ zlvSA$w%VeM7*B%{UK5w=f+mB2-v&oUuTALy%l1vA&AXm5TeEsYwz5EqdI_E~?Jz6c zSU#YI`vv}qmlLV_5hST5vCKo@+JAe;9XmBdc4K9u(#_h-St2kDuJ1#plBe(mS$y(U z@x}+6Cp8qyMb~vVH^w0KQY(c&w*}Lxz9728?R|Ego)^@A(2N_?Yk}H|**<1~589(a z=-Z;#WzWd2$^lNrQ70co(7`x@>LoDtRy3@P?o1{yDYn$OrS$o8^mz&5U9EQNly8&t zSH8Yp_fxdM zse~A7Af^p{g}B(@EL^#AEVyZ3K~g}dDsX)Eg3y}k+I`i)0|Wm)R|^iJ{BF2P4Vs^>B-ePWh*+O1Z^RhaRt_b*7dGlAPZMAj&d4;%u!bx7 z3EBt8IW`Kel0zg9$oWpY3;$PUQJs&H4KnvmJiFrkt$)8Bw!*X3QEo!jvUple+^bmKQPzeqdk5zh4kGXPFd@k3{At zPgVa&A~izX-SOGq=<{k1p;+dIVP~IxZ@^*`Ma89<+tor(uyWjcP3;90gC`FXyIf)i z4bu6~w*!I-+Am;8og^$fi$yD35+Qg7$mU74mvV;vs`F0P_rBOv*}fiPhoTFeXmHyT zfplc$38!xZDK#dOg)S`{)A{Pw07yz%cC(7)Cyu$aNWC|n-ah6LZ|gjndgz6e>p=HU z0k4aMLK$ye1Y*BEhS_#o3hGR25eDmmpwye-kNBjFs1)pi1sumNI6sXK!LD@&gL?47 z8e4e&jxqgXyR|%%v>F1%t%VgBx*Sp53Wdmh56N@y!6&Ntx!_3DQAOo65z~z_#MZhs zgIi6Avy!op#-Ai^7g&XDBKPR6Pe$k`L>!(S4FX1O0f(Wx9%1EMJ^PH4{e{xzXn9Xc zb9jNa_WkP2(`)o+cXfmZfALTL;v-SqHT3U9VT4|o2Z@1 z0GC?ibp@4V%{|C=3tl|Lxe3*Ph1akFIFm}qPrL`ZB;f+jmBTRkZhGyqEPuxjWA>@| zLHI&n2g}p)~m;quAu)sQNMC zyMgBOlA7a5;UdknY4mcZ#E12aS8 z25+heU)n9uT7GYQ?jSgO9Y~M=1uqA9@2B5qzA#Cg_)R75j;QQS;fgoQp*Wjxlun3H zx)#js6qt)NqIW$cfnO9eBB}|EvXk6&vHO?%uIg?7)D4;D!>#Mqz#Is8{djxyd9z^l zli${TNg<4Bd4pGpYfVq>Q>MTdU=5a=KGbQZlHTeY=*Agtsy3n$6x#dhL{i2I`AzTp z&7+9kcXK$&z|V?@^>cKheaGC2aQrFA0{MSTa27NPZ7-fHHsd1p&9M4$21W%)D8{JX z=<57_XciHIER3e9dOLvY~SDssd_( z2?C3oekAAk%7N6`o>}*Y07;9m=@of1tHRoFyYEp=sB+U6@DW>VHakhxOLaD{2(JAJ zTlJ|oaC6Ofe>)vC8*zV{0_*a?x^W;bGV$DzOuIFl87rI#Ij_~A2f}|5F>OL zP{S2AaPF8RJtkPGVuVDQX zFlLhCdcxh%$UV+};GN3V0&3kbvx#)u5YH}afGyz|rP4tbU8H^kFL^z>KlrZjVhT{d*nN2?lEvQjw9%o>G>dp6= zA&|IxpAm9SZQU&S>O%Y+NZnF2pf~5UNwFmHda71v=AB*yJ6)@FF1i=}6|-A-ugApk zkr`L1NIgDo%jqMvz(HIPWr)L^pEPIut2CU%@z(<*^+&TG-*d&7{_|FW69 z5vP7`=YA3imBaB_hycD$bAca~5m=UJ5G_eKp&Wfd({+SzxB?GSU+ax*w-VPfE| z#Y&5jRjRMcDx$I&2I8$p6Xh~L(GKh8EnnK(8NT!ZqqGQ2DeQQI^M7&82f}puBc_4m z?BA$3-Ek`3py@6ZVCxqcnCNGh3TW32$CAjUI_npS>nMGO7dUATwmhija0Q?RAXbKh zkHu4ncMeI@6Xxj%btH&1cs(2)lUHsN84x|8fVne|Y5IG7(gNIrdz%ZP4ndh`2NsuM z3eKJRvqtE})I43tAC^mQVMB9xxmc)^o-Be3JvY-EKBAze(A$-M`B(o!D&8~(%z)u}%K`rc(?VKC zAOT7=S)9C0F8C~5q}~|{Z`;6|x$u;hL$)$(*D9v;PJ*36EuF3rvgPjeJuAJD&}um| z!+c5Z^*rLkcW2oV{t2Q_e-0~R7tQ#msmviV3@x4JEv{k1Y5gw~b&5cO9Wqz6V*wqd z#+!0=j@#e1uG@hXdhK}YmgJ8>0MaMM5~gNi$yMO4+e~5 zQ=KT6VY@S#F)#s!po8)coUau`8XORTPn^?gxU$n#@h^zs=Zw*IxV&J%xfQr8qu`^` zpw8sQ{^SMLs*Yr z@8Z=9K2{J>csB5*Rd4kN94I*$R{$jrs?rVv&NWx{?oEzwbv8bn1b@fOmyes*hcu}+ zL}E`XGuupiOYqYHQtx~Zuej7v-Vg~$RU7+nzXjYV#$%J_t)Jx_x@Mx+GJ-m!s7{+Z zLn~W(Q-lF0qJ)Im_)VZ&V)GKB4Rz>$W-&X8T934^__nXh1!i;# zQ=e8B;(+lX3%wC%}v#Kf6q*xU(!Zlnno! z6yBqyx>!uipam_{?eN5H*rrHk^q8>FZQf1WO^a+ifuoN<{ko2PpvZH{C|z6hK6 zNrlw$GS2u<6{ST@jJ=)Kg8cf_{UlWT4O2kTvzyFcVeze0Q|2OuoQ(d4rulha;oVGB z*mXm-K7v+oW+YMAh0$;_^lmE@~R_pdLx*698^OmIi%q22PQp5G~1EF_cgD7658aeWLPZO|q$?dHY zyJw#K$6-{JXjx!F3~UG!sPeJxpgfyJIr7syRDX}>$6~TK$#N8`k4>1$y(d5glozHh z;=rDU`ZjE!^_pE&)-Uf^rdDqidIn6Fdip8XGl8+V;bFmalEZ(WZJ9> znIG9@B#M92b=mZ9bH8_Ta4=0))zDa+n5gu4v@4l)yI~5>uk%N1_u087JZxM;k0r5v zWp`HvEqIIu8r_JUCW!z+LQ+8iC9M)p3b`tv;F2|CosVLuo*~bZDvNyB{p^5v~PmJ z<~S|aG{N(Xp!eMpsn;kEiwj;6C@GF(D$;Bp`lXN|D|h46nfwu!>^~sH?sG*Ist{QZ7FL5Zl(TR*P$Af5R4;M`n{SlG zSdE#n?;EDX(2X10fJYb)%&rTn)VU_B8BIM*u4nHJ5BSfwwwT{KcRa7^%XbKQp%ZIX z?rylty-z-@rG&exVQV|K>A(;_WN4Y+L5*|>yh{~G%#DtfoO&F`kKgCgW1LWg+9gWq z86o>b2ZhY!bO^OH>?heZ*7BU;BnE`T5X)*y6n$FN*KACcumAnARLQ6Mb0@}wWG$L^ zQyCB(r1QsV4w^JqQ0C{$)eu;etp%enp@-0a9LQ_0%;79y!g}GHN?Ujy;uvQhNU-;# zr`QLNGKDw%m{`NNTY4N|ffIP*$vrL?gYnw;@6T|*JOj2gH3hXl<}TgWcieSggp}{f zsn0AMVQVxi6+KVq2k;n|Rj@lB7`uw54!+ZXnuR;t+L^(OJ`-9CdbeOf-ZI$up1_IY+QML-2c2{+`~~I7TAv+?cGjvc_4v`iifZq=I$2Dr zDH`c3(5e!nBH|W&?a46AFf0MpEZo0~Dn~xABRR@-9=xQc$d!w>-v@6#D1tC0X;7E0 ztC^x)EL}$bV}TLOXP1jOM6So{YdM-7*Vw7fZvvBu#t-%|Mx)OzjKX?%x_o zzgPo4()d|QVcTUn$Z20$t5>)L-)SjAO0{-Regz995I?VOk!N(G?T0cfpOY4!BLZw3*>nZnqjDNgbs13~?|!tS1=B1= zGO;Vk_3OElXYHj3KEK6SU!LGz?yjG#X<>*%C%J|PUDdLF^WbFT7|4;9Su`L?n^?}H zYS)&B0S6NB6=vg;AWYVt@58U@D)62|Vh{z^l`{QXa(QF8sM;|3Ksr8BHO`e5k1y~8 z)Mgk{$DmX6KEzhXA*28#lQR~Shl`r8eoCTmGc z9-zJJ?~?7V-2`MDYn14)UJnaaSR(?*H)J51GVSw_5vsmLTqV$7{&aWg1jS|P&1a># z5@qn2At)n;wCwa3(xrN0XQ-Og-SCz?D*GD@O35IpR=Htl0Li>&jw{2fWCQG=6ap$3 zKa?)KwRvPZbt5y&(&7g1K~Vp|9t#w(Yy?O&Xb=}*#45ch&FKcc*rPmkJ|j@(bG|!zK@Kb*_cROZ}a;@qr{PaN>uOOR}RQi4eDg2Qd z0p#0(l4#Hzwf;(1l1CGHucT&H%F4=WobP_+YIpeA1M&}|*)pQ$6)Et4te?Z7vOl8c zeU)7<_ibLxMA&3#*b?)O_@a1*j>D0(;>O|R3gy7}4fOX7K8jv%1l3YptHR%=6Zq6X z<%{xW0RM#)NEZP?Jt~eX6wHH&u~SpGnhr~LUbYF)l9I|hK$^cBo9L?)h?cm&JA?{O zH?^0p;VR(KMr_4s4j}e37?cJDx=b+C2cSVOfx_O@wM?p!_5PxhtThJR6kU0j1DT^y z;1jPP3^6TGcYi<_=yL|mT$=p)aIfrEEl9~qKFoqZ2!niiVsW+K9xU3A8g36d_#K|4 zaVa8sdkmC)yYjLh60AR9=>!*gBd(hj(X%)K#CeaI>)=<1xT|zU{==#@;Ol*>BF;*cm>n?j~p)%Ra7fN z1nf7FgB>q{?NiyD-i{*>ix+6*FW7Nhg(;oitV3UP>e34?Oo&(q*6-xe+5T0;Upe1k zZHYgoNQf_iI>dNxWR_3_{~BGgIV*;Q@02K^`=ex=B~yDI%1}3j$#@pyDTlG33UpY$ zONjQAoC!PTWc`}~AO_GnaBg&}s?pdN}RCF$k1>3|v3Q;Y0N?R>je zt4un7461&IBzqXDB28q+>w86-7L>q*d!kF0@LsZY@pL~Hecx2i$LcSuSs}mHmb+aU zQm9!`sqcD%M`9snYI^^*qc3)&@9yBOwWIHzOGX)IYJpF@Q=QzWvW!=3iKudSLTFCy za+G*aBcia7J%_abtl*j{QVOxB%O1j~iM?2Mr7-j2*0P7rkGLiW|9)85?zwgDo$vT{ zEG_#h{hL^aL7yDXg9<8FD*ppZwv)tOzomcHrml1fv=`rpQ%#7!JISN>+C9Me?mwQ) ziP2)>ZQMq(*?h%oBlVk*xCT`RXHOEO`_IafKjfdKAT#B2CH=1!z+Zrd&J5=Erf02s z(XvRHI_=ES{GQR6If*k|2&Q-2R)ovzw))P{08|J9Rc>F4iy!XJuYHw~tLKulYm~KX z?uV-_HVQimZ963%{4P-HCw761VKwL=e5ZNFyMCBS>=mny%{0_`=A zG#kdtg5>c6^|5Jzk#=2~;KVHIM2~tx>y1v!%`NG(8L-5?uC7j`CH2r_NJGf^4Ib@v zyn@>R`(1>SC86|s%lu>5C0aJApyQEt5($S0#d2XqrELElHOGNn+au_iZ zYPgt@^V$1H#8=D^vXp3|U<0m`f6JEgk&p;KN%?#djGDfFe#o)Fez=v-D4h#4wU<>e z@{}^c7dkNtZRQotp_vL0*>RCO0asDk{i5x3z>FP!;zbiYwYn~?snp;cz#a&m!ULYQ zmJmf~wP(UfZOpl%WGQ=A604AR@{R#RoSc8VIZHymoA0xL{r|f7UL zhi1@pljpWFE;;7W^|Dq=bG`>Pa~0 zG*NN)Z;R-WKsr(anjK==IO<;CuEwRGL!yVefVr2qaeulMQ^8@70&^(mxu{#7HMMb1 zB~)H~(DV$Wk8kaTH+!%SN7`e5v1JFGN+HEYjS<}r*vN0Mt>APq9z^2}n)#~w#YtEh ze<3`EV#`;pPi3P#2G9WcdLkdf$&06Zg?U78=JT0tl@+qo8)iYB^t@`s;GdeXm@sg5 zl=Rkc(ERYv0H)qHx<0^enW3|pzWIx(apyJ(DzVfwP%wI&q0TQCz%8qUZXkQ6Fp@xK z#dAhQ_b@p$0KK;^Z&YRvPhPI9XDO4+S~S9tD1s7$8E;XV5LPy0bVlTNa3dQtm0rJE z$dn^#>kL+XQ9k+*B#`ZrE`$bDDFAfZ7mEL&Vx%BRgXOL&5G(_vviSJjT8C&D+Yo4?pIfDxcxd_V4n4aaNa?u`y_x3;yuzP@bj;_U1$>jtb{#ns8gPT>3g2^|5`d>eNa2H=(V3m7liVlmII(0hz&_c5J+5+w+vtzci- z0GBAjZ)WX;N(noIXkm|&ZyLY6s42>Iqpb1Eb1g$gUe`P=LB)23haNYDF+Vmt0(Yuw6>C$m|fPel>Ds$0m|$sA(w3Ph5CyZjum z>@V{LI1$A>B~y#}WJOjX<9lT!T#GLWE_xi4TC$`LQ8Z@1Y5z>{GgUL~ytD4uNQn1) zcQHW(q^WEsZ98NbIwvJ%-GItXHMKZ5;l=p@u#WfYo%B-Ixr_ltMj`MF6QujUC#e^+xg4(*9-rDb=;;^cM%B7-|>=_@Xm;TUS zRR)@@`p2KTkyX)erqEnQ@hHzZ($_AR=mbPD(m&~`VL7-_Bn+5A2yrRR9i!kbNqivx zk|va%)zH1Qw`jS?^z26DU)+v@2m92Bqj{bDtdj^`nKL^N*+3@^R z!Ih~=OXrfkM^A~tfs?qpjGaQ~tu6v6JTJ;`gOWwKBNw7`&nl&7j@JNRRNvLbEVRK; z%p;m}xtC(|cC7clG=BVIXjk@pY9jGWY~K<0+qD~hGo!l*{BN(J6Rl^`t>#_1HhE#6@u z2AJ@yu;A||e#?lTwe)WS@stx+^b-rbz4Gm}(hMdgWc?Q3`d9tP8dr8sp2lyA@UDPK z52Q^}R^Z}(5=oHQHhB-9??~`=8c{#pBP4dd{Z~%K(ABG|pQpo7oG7OnS_5tL1F$I| zvif-i*ia7i{^co1Np*=1gb<46krGxjK=XXPLF$;%yEC%)Ya>Sg`A%H?OlIYbJ$bLW zM2>*ZxZFfp5Zy>)UBzAAug8HMdJPK&kEaKSaD34-lJyN|UqT(zNne)%Y+uERrEkEv z#w?jU*!77Q*@hruziwW)49o*6ild*kSuVsH8d( z{Q8h_c?d*yh_+H(_<7^dJ)*{<0n2K zG>gO=_SMXxxdOV0NDMHrf0sb4__B#ecpy5^ntrvv_^{jzp%z!B)+EuZxA+T7vVh$g ziXZ;5!3oM;MY?@%WW4BN#HpD=6aVu#UYM^Fqq;&}{IAw3p%LrK=g*2t-_c|8o(O&$ z6;8@ZwE>vdBNILUGxVA|W=7ZWP$&*dz14C(@B+f#-rna7MFoY+rDdCW{FNYGFF2|v z?;jq&IGXR>fh{Tnf2Kp)7Cb1WhVEq8tKvgC(o+hSsFR@GsRWR|UkU4*|O0` z2(kG5Zh%I0DRBmKHFG3GEjSNR4ZmoIqO%_H$2&ihqRWizPu{_!#Pj0R;~SUI{4eDd z?f|ZB8k;T?V1r08vzCi+6FfTNs`E0rdJCYnr>w|9<{rEM4jrr)t9Jp+u>HLK8CVyB z9Kni>u>Lsa3_LPq?T1oSLJFeT6!Q-Bee%#djJ(w~@Vbq@4{)ek8#084qHgqhCVxBfXktom$%{`Gwkj+aE(@aZy5Ags z*Yj2B_MFm+$kcP0@T`XDs5n_~m_rofnneJVz}Alr7s?cbb1bRF4n!bd>E1`lI96yC z-opqLdTT8<1IxQ?fDK+)dLEa96dAxIrxZqmG~lfWA}IeN z7=^o+*&T2?(Ua^YoskQ6gt!V6ZTmcACFa<*A*Kf9gxbQF5rNs+)uss*ws94+e5p5I zs*3lS}HdUbWX=^=${`CN(3NxEo`Besrw z``8yh17ZnxyNxp2*r>@BBOS85xQLxXt2!-v1yF0JkwYXS+b-Ckc=x+MpOQoUBj5plzL>RovHHo3IS}J|5SL>O z#WPrO*$y)X5CP?ewJ>+>P7K;XbS=;26cBc;woXSU!4Il96SapjPcbPXM2S(cW#Xwd z+2Z_s%%lbG>AwlcPcJYvBJ0EWquMj8AhSoXv`Kv09y55$4rg0dIWK2g#fO?wT8`;M z-Gk!PpUxQfp$(%>JV*IsTX_DNJeCQ7!V6a{(=s1qVl8$0eQ(%6O0IG7Iwcq25 zq_D&;$4~U6i6ktT4)?!Q-K03Ou}!|@e@#9>YS-tkBc7RD=5=kDWSu|M*eOpYS_r8o z#SbzGcha!A36c3!L>Dbzr)X8SOs6fBH+#I*!QDcyyePv8sdd~9pFfb4kUhw90fh7J zz?+)n0JV-#XAjBpS9TnZycrAPL>a9LOUkPT+fxA$$4gvh-q7PI7AAGqa*iregAN4 za#5UsI(W#lVbgv8PFQ<@V0S1}ctiI^dAs(Tn#Es!k-_#<|6=VIeC`?!Pzw_EGNP72 ztlOpS&0+FO4T98IM{=qyQ_8B)qiq)hoN;ulNlMH^DqR&5~c-#`M5l?21pJ}j2E z7)LJlzPUpa>o$&lHmwcZvjA+p%^?D&m&1w`<7n9ht(}@oEw@{X1YO=vPRdc>AH^I! zOW1y+JV5C2|EVIIPxujiKlN7bY#k5w9L^{q?=0Sp%z?ez;Wh1P8BT>NR*2+$m3VGg z?4hP_mt6OUC)TuP4e*NDWKfn~%}4*8bu`U3Gt?i6KFlKli1rZIg>r_fnBmjbdV?ws z#F*0l>mi{)A|L(^4)}X$>*F=W==ifbd0R|$N;5CsB+fNXo|cZQq{R_Xf%t~cSw%%E z{<^l0=wvPEsu|E>8`PYD3K10-1KROeK#P-~37~s;uLj~H^Vmh$l%XXj?m*RYWqatmXyahX34GKsp?1V}I@I6TY{(=#PU$keyK@683`(|jR z_AWWXSn@}th6f@wcGKhaEd`O-8opy#*nX-P2OTcgqZf|MGWwl2y6N&)qyfnO(^>Ls z61725k_(7I*Q+z>_Ey6O3{ zt9m-LtiZ7jiy1W(CdcV*iIQP5wQO&abi=NBnde`kVhtv_>l;mstNDz1d(vSeVeNDdA-+)iu4doF|NOch}cO$gAGtN>(7dz#3 z0@ImlsiB(fABP+lX_eL)@!1Mb=q*>%RQQxB5!1aGtEm|GD$V9zEJo(X;ez1(PjV1Jk+e@OaJX{l=^CQSmYLw))BQol+w^Lgy|( zrb~Cw=UxVQ2PIAY-iBXw~M}5mqio}07JLOD}rf(_stL? zs9xDWsFG-R@SL+<3%0ypAs>(U&Fw5uEZ6p#t1yjZJmFNndxBA4roTujYNtM4MU4`RqIAVk=g7=LDR^|&-RoJ01 zcg4YU-WLHU@&wO+7J(4vF3)8Rl6ic8l3$24n+DTEl!AM|tkngj>IpmV2pcwmt)O^RN`IdR=-jifjCuwMjt;|(3 zfcV-f#O|ibu7bs4e-I1y0$#frE~6=~a5KX=FMV(?i<)-3R(bT+1qx%a+soQpp6qYz zB*oV8irmss0w&T0gYh*e0eVimNH&iU6&|$bIEa2I2I_WeDu#=_f9coz4r6hdMG1!J zCNi03&6dMp63&N!poKtukz_1P$?Ba>scjHvEzO=0KxuC_$Hv)}bR+gwU-!E^Y&Di` z%qS}fYR*^=`wQJs0NrmB->@i`Ece?s5hVa_8?!E+Z}R8n(A!6~G(I~3cS+~K4I^vj z0fXE`*a_B}DD8a!kc3k@R{|jHdcSdTn|*59+OsydLWhrIVo+Lu4Or-)oUO?CMxCHO zYUq^rkr$H{>A`P=+*xi{1SYIKYi@D|8V7flY)Iua)T|3?WkEYXN<|&Eado1q*JT-7 z%>W#N5($W0K0xGRxnk8aP|P^w_dv0Xl97^ZZvGTQCv*Qp?09Z0Q*(jIK^i4|YmB^U z_m_DDd7Oa?Ch4{#e$Z^q;{}M?CI0uDl|Bp+BHAF|{WeE>?|IvwSl&S*mu0t?Ud)g0 z@(}Vb{bmJTSeXo$caH=E%it}{p?5P?-sp^Lhb2ELNEfOlbC)cw%Z4v--5tYG&8f$f z97|73mLD(9nF+bn377jF*rK@FD9WXH*8*dN7pnps0(LwAD1P#AcYM4q^b$cwR9#*4%~?YYK%?9&hXlNx$03lp+TQy(^*9?HVV21Jz(BptsAJREK>IX zwWCm;V=Q>^$nUJspMxUv(XIuG<8Sz|O7R16C3|5lXH{h!*dn0Qq zwseUv7#0gf4>$%C6vRchp7k4VQWN9-PieQ3vm2gH%--1jD52g@RP}S~Pf;CMd52TP zbhheJ^+m}m>(eJt@ngU!CXf)U*N!n;t=9&+&Iqx?i$yKgUm@3SP<0M^GzHoFr;4|YByI9PF5rCLUIcO;W ze_D;S)yZt4(-dn(97y1pUB&fSM`2@70ON@QQ_h+8YMaRQ4 z55U;soX{ZMkM+M113DQclg;^zPK$M6^Y)cEhWf45<&UKH$0Bl6qu5bCzqV4FN&3{9 zCOxF1J6AyLBbvdTXUXr~)nMa@A9;9*W&q&i9)KFeM)9RI!xl-RF&iOK{+Av`9%Gn-X`{lDrYYM* z!i}(!Xc7=V1!M05X(OlgTBH$tJaE zvi;aXChV+-+Ea42x<#TkzuHufui3>}YGES6+s3czil(HFhcw6}>aS zj*`E^d`JHKOV@mR0oe?CWb&V>&hUlFH>*$_EpxMJ1kFi2abzFt_r*@^Q3070cIb}w&)zpqYq;B-^x6A_oyV$CA>}kW zJAh!lXvM|%;Va;|JW^EnePaSN`TjU>FIO)^cBZ7bZzn=8OC1W71>Nfkw9em5(y0#8 zTEEKM)i6Cdr%osP$$LzBQTz-Lg#!HHKZNk-XoK8)RnZy5j>9oQT+3^DA|}a@;#A(Q zq%r!3N?bh5i>ajrm`X5IAv6c!;_)RCc4JdhDlgz%ODjv&ng^+h@Q^pH^)P*S_1-Z} z*~gz-{f(~i0cDP0!TlpKyA6>_<2kyna8{#ADoDhBNF19TZ^(Dy6#F_7`SIUEDPq}- zTKr`^`y?2M>5_QHzs*l3>6pFQ%`)bp!(%4_49CH2f$eqY8R752P=A1-5$}~>s>o+) z0j63QC64=G>vO+4f5CP83WOy}I+%?>mR6%QYrhfAT)ZrnXS1fNVGs&wiX7GH5XzNU z%m>&U>)|zs6`TCoK5{sLl7E}!EN4(dageHtM5JVkS@pce{#F}7 z``E`wOx~(;>^lnBKUSae6Q)8#*YBBo}Sw4~{z3ffs0eq98v% zY?c6S#h$h9r8asU^8BcxS|{`g;5i;Y16eVo<^ z_0ty^vRo?;>LPq7Go$p+w=k23I>t>*R$OXKbi$Cv4+uul-&O%rfQ`1{OLZ1Ll)-?; zqQ&Xd4S1}3)%w2ee$D-|7mM)S4nR=^L~M9Nv^@FvEN8IoCU1of8lM#`J=jRw!|fys zP=?~|zTW`MmA=n+Wfl*%-M#UM`Q33PJG8XJW{E#$i>>XL$=&V8a3z>d)fxx9Ha96f$G#gZ5F z_IrR?*L~SwuziKySt%KaO%l=bH&2ac3~k6&%UOVK>cpP<0(A+=-o#I*U0&rV(&7H- zvhf=lcpjWzglo*X==b-HIW3zH!}Ppg9U}@-w4JjzM~^R-$AMF^P7!y8;}1+kv>BENlh?~gIUii*KRZ&KuA&k8P4UrxQ*6tjm9BK4!po1J;|G0=>x zPvyCQ`~me3c)s`X<$4_J?E#>`fM?drwVUjK#~5_Tl`%d??S%ZV7QpK1uiHN4%-Ynb z5~?5!&gjmF)W8({B#K8upLmv#992+RlM=O7M)fK$tsXJpnIE#zZggUCtbZVBV~4?v zg4p`h>XBFjjk#5_yx7z-MrTTw+!<{tPI{Whqo#YUk?gyC zx}Qkq_$9v%(*_?v7BVilPfsCq6Q(JJn;l{B`28%@JTbXmCt@O!Djk@y)OYa#^GY;d z3hV~-Z#p!u)t4&GHwwLsIytK_#Denf-zL#V2s8w$TL{pV$TUj_g?RAPPkLYo1*X&c zTmrfs-DVFokT`g5t{6Xo^;dEYTxktxBy2hBo=}A%Lj$65V4RQ)URF$sf!1Ks?vMJS zUA;NyB#KtnalWe`=6+AY!AyPVucp3G>N!v7NzeP$`M?7{DV5plaRKlB=kq1@G7-Er zu_6MR2s^0i2U@#6vH^DG>0toIL2*x2yz&srO2jPimqPio{^O&t!v)T1+V*;%iY-zp z7^mXJ;KgJ#1&>Eq@4KWi93At#7Ol{%sya{&EgMNL?|@U{#@D>Mgrl`Zuxnv970XqW z+TvyT@Pc*!9o3M>&XC+f))fP4qv^dY;06W+^)`3(&3sE+H;FmsJfsr_ga@@NrbjQn zmfESCXX==pVaCaJf>XLV^>IDgSH7qmg zP6K?F>9U))^Jfh^_N1ml@&00}MTcqw&Y&1wBgK?!j|_2T_NI6VtD%d1%DzV!A!;_HdE?)9%)DrP%ZBY>yG+D}%8{1rLa*MUM)XnMf2G>}P8_>|Am6<2LL zMpO#mWMJQ6fMSmq-+Ic?435+M{lzO}{M|_Dd$gimU8L7@io1i{Q@;t{Ss}WQQ6UpX z>-?xYp#Rk>yuQ#)&}Z^`#kG(kTeGlnwXw1I7SmFY6^NQ_?ySpeJ!ProAAe}`lNZ26 z_3fi~8=_eldQ0>=GfG8^gc~AmPJ7$i9-6#?(Z|6*(+W)NJFsNAW$W_N*N9t^rM0{K zU@?t9Jbue~03s1!%<##(|M$$?T)o+J{+k{}Rw42!-^;a`qmJk(rri?cPVIG6()l|b z6e<8Q-%xpaWBQ>8Lf8X>1Zob~4K+X#5B3$nW-p?EX(!IqO0NgwdFIZ8#m3{XQ9bAT zCZjRp__ONQH;EX~^5&Wfc6D+iZh>p;$5mRT8KDaZH@Pi#4gm1iYL#r=9e>KCz= z00z{yqTJ|IaWZwz;;voq_9el%#%Hhqe0RvEmz15Cl8a`Q17D6C48HI<$t}l^UkjV# zX|+Z7(wE+At>W-getjT(M%(9m!ZMX`lekaZ zJ{1>{Wc!i5CYTL98=0BAqJYYwwAZeH5hS;4t$H{K-a!pmxSgG1GIh}$+P$fFss;;f3(P(S1BrbBspK&lBe&*!CL#(W6`tQr^8~3_D z!k${Q=$Dc&hx#vv-vRfFge-^|WO<&o{5oCjT4azoD8aBF3(xgu?1bI%Oeyr5<%J6Z z`S-#t@CZ8fTpn)T%$d4D*T|yjA*A6_EAhfSAM$@BS-{_99ub7?u(fk?1L~>ZIhT)f zJir60E+1h{GdS^fAD3Km4|#5&USxakhY5`5R)GD=GbMQm9d35&bvmL7KE2zdrCFB2 z=G|su#Kf$d0BA05fpJ}6vcfAg#3V`AgMZk%sbhJrl}2+S{@IkpTXgndn%V*a>Ir!2 zusNHO*`7G52?c*8Y~35fW_dyhC6C(l*sax(Z>!Ao$%EDY6W6A&@cktRe%2`fct{!} zQ?$i4pmUK>$7uKG)*I3eqhC`&#NBp2d?)(#0*zs5w7Zk>wy{Q{`wQh~^qv7t;!S;P zO!cGkNh>-FyI&W08#7=b5j4bw5k>q6tJ7cTR|5-9+f2T>gNQmu^1466NOY=Nh2@UU z{KoBTUK}+?annf0=~vd+aLqJMmQ-X*z zNOuTsx;xMP@V>wAJ2U6}&zb+6nKK`wj=*N~th(>(y6$VOg-iYtk8FWA_;KuK{SzhN z0Bor=qZn(1G$g|vD}>voFL%3R(RtECu_ZmiwEhm;8e5!xfLtP5+ygwv0`u$pqA}un zobj(!2NTZy4vsyhy5tw^R&w`6c(dG!UDM7+dYhG^){sZaPfh&8IvAbjHzam7+3+tI zC-Gl6a3_5G6t{G*fc|*(P)h#dT`Te`nd`Uw|!u#D#uEVk8R<=PT zcFm)8Uo+ffSaZk?#8ZVYpFv?!JigB3NElll`1+h%{lP5UH^}T1{Xrgzou8GcA4f@= zFfcK+r>n$J$jG}#|^cj~ZFSMvOi+#JbJ71hevTf%$z zq_G^KJ)sOHKc~9zM5#YQN$@^~V6X=+JLI+V7W$+OpY*j#Xa`ZHUF){m$%@wmN?y;Pk+>nJR~urAJYDM>&WEw0?8R6OFgNdFyc zu=s234B=~Vd5v3I?cbqPy(A!qqqmHDe)rMerY7@5;vDFU^qBqA)(2c>ck;42NIOgh1ynd&9}!VIH1X~&4Aa0)->w*O&sMJACzT%#m1bO-9M|{0`Lf%IpoO5N~r@X2ft;|DobxyxeG?k>H}M#>V@ILw#UZ z*m~;KA!z$d{$v)DTvB*1-ady`%5Y&%v`BlWh4L4qBwWi6$vbI%{jOT9=CxM zEOB6Oxl8q@nE7Hy=lkFJLRk;F%?^>{g+#KsymxoK+c%&5mc+Y`UB%3b-8}#DCf|UT zK516p)hqp%mM_g;vQ%}ys;d3O`Aoqbt23IZgJ;wvPF$tzx1 zcon$@ypoE3O9H&l6#C{$<``T|lXBN}U#h7Qn~)P&3{9q42&N_ln~7hy0q0 z=%YK>@)l0;^a-y!)a9FH)KX4wZqi&48D%x3@E*|YHM_EKBAUk>Jjk8QY!LqB)pcjd z)aAna_X|i!@81uPBm54k2|8Wyi;@H9mn@604G@V}`V(crPYSx$;G8E2y`%cwjSlV`6Ysk4*ub4>sYWVeV^Ca)>xs0Sd=bZ5H z@DNh=JT&8iAFYi^m+Rl;t>lEA*ua*doStJ@GvqPxaSo;L4OX?e-hNU#mUt2u?7JBg zl@-Ib@zB)O)@GZOLBqFQ$YAEIvy9df#i-lUjg|^2seVf$2Gse>S|4`5d3fo0|H_?m zVaj-`Fn?FjMGpR`2q%}#Tla7JKjN5I(qm*L-i*TW7;Y!(&#q%#W>Eb2)uiz)T+5w$ zO_n1@5iW8s(9I>Kv^wevO+zbQNZMPB@leL&{n8^=_blxX73xZ)+ypyOaL4|J=^u=9 zX_nio4$1a+!Tssf zMoB)j`Q%3Th5cp6i29j1D+&sdk10`Q*ko@!TFzU4H7igCKd zEh+9TpS4A@Kd(W86ZYGLT)~O%*0-P=y>ej|UwA^hQz-f?nsf}^(`3VLKqo)Bf4`SF zU1~a*C2x1LTV=_`GdJu(l9S||p<|avuY2)(c^gO10Piw0>oHX%(6GPceIX84xq>zB zEu|nWEg>Nxd^QM|(YGfEpKO=b`1u()P<&)!YI0cq;#}2z)tDnVFl$#hsoBC`%yva( zEI#J5R>_EW;CDCY6f=Ilnm3}8C<)r+t-z-i!-I|;8@QV6Y?_7b@`h0)*Jl0p(8?QOJK>c-@$s-GvUDmE= zLxR?Q-nM~OLZQ}Ya|4fFKx=)WwZ2=uI$@N8zhMv?Z+PC}(iRAyhuVQ?{Ze^bb3OVk5d6Q@b)=BQ7X7Xj6J}wBoz7S;}uab_fB%2i~K4?{A!1 ziVUx1&#!S#K1S1yZ;s13*<tWijF`sx0%o>Dd~K z?%RZOqv+C|r+c@RVx8uDEOwShXQK2DNev`5lqthoyT@*M++!#R$XVw0|1CN; z!)EX>(fgrasMN>SSFzv9sx4Z6YU_#qyo6d!j?ooTq@&t?oc=y3w1I-s{iSe;sgPe z5IwD^=k_Mk%3u)&CWdI3;l1Y2Bv;pkBPAJKsY!6Ew zY&DIPI&4YQ>P6ljvUN}MUnN+LXH3>^n^+XenF@O_dwec%dj)^;KJsW-m8ONXIF#qf z@I0u*lV#x%+9p@{MoxJ#JAOBxdc`Z+kvQBczpBr8?Iiu_{7LSzO^NPSc3;%SJOdi<^k90PF)8&Y3i=O?4i-7~TlNr5qE6GA@ zj^6XfDDobSI@oiQhrZagC~%2hU5*vjSm)1{77xiO#CIoDbISO>mIHqTeajzAIa}$p z^(nQoN=NEaRxYxKXJ;RKo0C*&2D@ZR>!m$0nvtCGHF?vw*e^eT(kkd{G;->2e^T&3 zAgCGl$HuRG5lr}?wf10HqrxEiyZd?lG8hb8TwLkYL(7dsDx2Wo;AZiefdUiu%GG;& z>u_D*g`R4G&!2gRijX%=)p%1h2PQmB)rAF@1JrP7tQnW^s4^X{PB_>H=xaP~JBOF7Tpp~7nmrDUNp;+5PLdMtv;SC^ehmwSWp z0}Z#XWzZ(;>XDh)aJ=5h9hN@Btl2Hz8I3Kic6Li-oq^3Vo&ZB=HwgplDwLqEwoODt zL`$2NmgW=Kq(V8&*IZ7@~)I@mf?hh6yY6NLc-qcp`mP=rOPx+T6%hF=UB5kOWted z+U0APbr=*I63r>3Shk6W25C{zo?RPY^@nT zDw>?zQGsL0SIMakE~j~hwjU*HeIwj2=K&8bWn+uFivj1H|Lxd<0vqdWve>|^g0_)+ zoUeO(-BSZK^kcAG{paO%^vSY?4V;s*+HbyGcjB!xtpXwfppcl*KB$a**&HJnCa<8t zo9of9#DoH z{gZ%`H}KV7Im_?2$8LP1xeK(vh3VQHmbXT8+w^`u3_%YUR^d7~Onuw7=#)5v+LFcF zvZgc2M&q^DV>OCN&fVzsu9#;?ROm_wUU)q(q^7Ozpe-fEIq*zAGjG7Tm(8EUpuoy9 zQ?tw{ft)ru2rbbYBE_P)Nh{5+IkMMm>&hQ|{h1Ahuax+Jqu0A{V!(uPG(#widH+K0g%mWdCexLs%p49Y z%I7q{pIKrGox1NNu!n~(pFw4&LtG;!CXQw|s&-!JgLq!J)wHp`{yrk2&1{3WU;oee z>bz>!-(%*Z9~g_Q40$he9OjBK%rxf;k%8JWCD)e^mG(Brc>=jQy`G;?sG8rG3u2eZ zGnU8+m7EjQsRHowG+%9eoodh9Z#pBPQ$Lda&Bjr25rs1^AjCv-_ zUz)+ZhjFSbYkS6P(!C!-jJonjv+a5Y*&lr_2`LiSx8I$moGCd4!ZMdPhm`K-vs-2k z_xyZjv%po-9jbJlo&Y8B0fHZCxGc4|Q*$Jf_~B|j?9E_9FBSqA5~h6nw=HfQO2x@KYmL}NxA0e zf}2ZobS|64z&(kV- z>MgdeUhL>{1u9t_6t;TsUG5)&G(TCFPIUgZYa5USAAGsHe&?sUJ;thCq?Asf3zwwo zFI?0uV}L`*tEnOBd`n7)&dKD9+0$gQ*jfKy6<(}>K;8pNB@KtCxJd2QIj+_dDmrLx zG+yNG>tZ(3dnbDmXd|4HVVh1KX>zgM{SI^A_2ZoQ;jpQ~En>eJUla2>zr;y@@l|*)VUj9Yoz!E%VcKI zvv|-!V`G=5O?N2tHlf=4Qf7?J%?#IhGp9u{j9~D03v}U)mlsg_L{1H_`B|1e$7%OTvl zatCxHR4swr#JPr~VVCv$69_39;`;3B^k<#rknx~HX+o=aWIa`dN(Z~8AJk}6ZNsbc zVyg6f7GrF*^Aa}S)U=3GC{8|0FStC$%!zl1vCJ`4;zmZbktlq24YIMG=PSvpd5ZiX z3h&K%vFC4=JDm@ji23=$!U`U+WTh{TE^8#;{^%`F`R&YN8$dr4ijojW#G`9cIx(uK;6tYxp(6;&rCb(aJUW6mtrj=2}E4~YkV zT|~ai#znvS{r1);^^YqCkDflex)tBGuB>`|T}oEgb!liT-}h)C zidFBB>h@V&9Itn3+e@2FYQyo}N6UPr{h>+}9JI(gIx`6idv+v>I%3+D6cs7OeY_q& ze%#yfJ?(|y>i6`K^KQH}KMwM`U9`^0AnllVIRZsez^`}E3r<4a-QE1w179E4W0ek9 zxvfpX`tseIiT?cgv(IIsGcV7JWk!;)$f`rv$wiQF)KDkq$&|Gjkc*rSPzleIU+;!( z(79fFKnNS(QlYK-^N@py)*7i#FybeJs|7Tj9IwSP4`c7xM7zEi+K$J==q_SZg9m@xQjQ$gO> z3Ad;rKf*ueW&(1J)U^1?UKkL2_u{W#S0UCf_vafaDk?sBFaV#3J+6BVu`gJch;R?s z(sJ1!B<2iTf7@Of7T=qWxNzYD6BAQn?+P~;SF>fT;%#ZQzAq2yXsLY<*WfDf$0fSr zN1NTc)D`JlYBWT<6XEW<%1N7Ss(-#1AB?MXUUhJO)rbi9pL|9gT3Bh{CneRmt{ASY zQwh5`Ee_a2%mb#=-rUU0$T(19)gNbDJ(#7D0UMeMyoB>B*X4m!MYHO;%=cu0;&ARF zerqgm4lpg*x0G4av$mCMKS6YGSss~lII`ukRU$Cx-WQ<0R)9+`U%rK@YV5G`|7oMK z;2#Pn?x7^A&{M%IBK+TM$H#K@_&b)k&D$?i3UCMu>q#N)esS)ijEs!UXa!Q>Yi?^h zIv796V{DF45qsG8EBGo4ux%Vdi>~B`-L*gYWmCb>Z3JSwHw6XvVCU_hJ^Z6TVd5b0 z6}fNxg|9yv{`L&vwz`x^*Wf_csQH`|%s)S7idL7h7?z(k^!gEAkZU4wg zCoDYtl*ebYOBN>Q4$1MHif=6J;p+S>siCf(=($`TPABV;n6t27Y|$;1s*#}#c{(H{ zWTVG-_H)4TZlidfQO&VefY0$BCckQXC`|lVfR;AdB_L1QTk|E&=G}YUkwy_!xFjLj zKVwc--gcQT{RzlCeU759U&pUiG&eP^jR<&b&c3{WD>V&k((0l7Vpq0S&fW$l1OVz< zogc2KAGP;p7jQx8>9lA==9V$(A2y{ON#aIo?Zbqb7I3PaV`b4!>(j&NJ&3P(7cWQ{gl>WuV?I*LiwM66j=Qd| z?yZz@X?hlx7WDC+`s~Js5V@8pR&RMr*V*}g*_qiF1X#Bkn!C=8U67Ad3*vIOL4Wby z69k_siZq8x*k_@2p(`Z-Qz%7 z@+6;q3zhpTV^z}Az-e=I^0%|RrhkP)G|(1L{jZrMk?|xUiaVoHp+_L(ffixc2i4NF zoTRDK-xLNXb$EiM?r47@pOyTtOwU?U!23ec4wf*wEA`?Wj8s3!gN-FAUJsO5m~h-5SGPwJ;7EoK5M$ z@xh8X1OaGNOEb0C>UUUr?nJxq-d)M^|7SFC{5vo5DMdt1JIa5_CO5xISejL0W<8(u zPb`TG3=D)MsgfwNhe`4^gmt&KIBL@Q>26a~lSaDYTwku9o15E%2M-{VH-=HyKqdv` z9hEO`*7`A&f*+rP?~ag=5DyOzC#U_~@3f7-ZNiR!-vmC$)edP;Z^O3on2qgg++)P( zU0x*$kBK3}UXgh6B-OnCe3Dya{lN#F#LYa5SrW_Yl!8Ekx|K>2zIP#^p{#fAY+CE% z5CCx>e8if zIK2@iQ9@5oKjPG_1bc`ZAhE`cP>GxG12c1IJ2_5>?GxCC8 z8#8`%Hm8#rkN*AU^Y3?zV3}o)Tyn9jYXLpUc8YZDL7wru2RivL_;&~Kkq?><24~ztfFtTa`x8P zU*kg&>%w}zTLmeayl>)&u_jUxFg@pKlQtV$$~0{?9buV zQ|y0J0-o+#{V}zd{N}dK?k(7Z3v)?=<^=(Lg@(A>kBrvQIggqjr)|b&gl_%up+s7R zWt~mU_}>hHIse~@=U(L!HKx?<4VOKYr)ZXf>B4sxPh ziOKe(F7MP|8LC#E<^T0{Xx}P#L{sX5NAK2r8x#}z@(etuegvh{T#Q?4w|c_$jeuZ| zoGd$#G1OQ|DfWQ#8w!LQ#oG}zDcCo&_lKSNQ`XeQeZ3ijle3vYT{5L6YNw;l&Snm- z^PI|Ebj4kHL-6YROvg57?)Wk+^DU&8h<5hjvH#Ux_>TK)k>JF*S zTK-j6nQ(M)Am0sSi(UZh^f5S$bG$HG(sM((!$>*i%fll7AXb#LjLbDs(&(tDec^W~ znSke062P-8PSD-xuqmy(h{{WzpjMimTN-;Q8nXYUw) zasJAy?-P6E7$+eRmXF|joH2@!CCbaoqjz;~Q$qnPIQePpPxomht=`qu)w;TS8QpU$ z56%XHL;VeQOG^vx>l?eje#!X;NSqBDLiYqrK;HB}^z_=_oC6?j_T-6XMq&B_s4Eo! zx{o)KPV~V{yhWu~`N4$qus6B*`VKfjMz5ub4^`F8l_VAZt*VT!DyZE zpL)j|<)h0Iaw2E1AY+nrZ4CacG1O-x1S26W%YP-hh4*Rar%UFQI`{jx06=2^OFQ2wf&FYhjB4(i*TCcx~ot*VL48 z&Ei7WD#g3ri$uE~85X*b8xnZ59y};}T<k&|msjUJ3d~h;6l{w@@pp5Qv za)E=^%blO=fp{2-WdYXNG&RoeYeD8GZFW8)h#hy{G`Vh@ihyrA>dV@E)2!H?zKI|O`SKAQ>Y zfT!1?E?N$8)@5aE6b>L}GpGQB{RsN6-XCwH{R5x%fL+5?omNNy5YC}4ENVaXWq6chx=cZ1LR!5e!!of6t?;mPFYP)EaB-T zE89I>vR2NG!x0rmMgh4?LnS%*dPr|i$VOcA+NnR@#1htb%QUTT73Ue4tYp zMJLZ1;nrR|qpq(m5*)*C=6g0YO=fMRuYU-w+Jn;q;bq=`W6Z7j;_4?q@>=zAcZhKN z9PS!cc>wTKmV)C^xO|Vj;@Y<1V2#UELYh+gZ2j<)9RW8y7@3%$75B$3CUBd-?X?`O zbaw0$c|h0D(2(jI7CdQf*t$Xjp1VLsQM*4@e`GQvre_!PfH8EAu0sgqghc9jBO$eDV)8j0r#Ocv*f#FFO8IU+mwb; zi;qQAg#^M0A0qA!z(xS&heI^??(FI5fqJeX&mbD+@blKzRw3(L5Z(XjvFBKwDNr33 z^>B+mB?|g%(U@f*cT!IvREnuk^4r7*MlQ`e!{hzMV1Uw%qm?~8Dl#%Ox_MUC0Z{|U zMlTz_KH+>_J?5TwqxWh`I5l!l*Ylfy$WSXvS{-xBVs;bH1MldU*)_etH8um<0j?;ro&nVVw;hg`bDoa4YiYpy zqB=PsG%~yQOxneZdFx6Y15!=`!_3UOlX>;_kk$SB_sPh}5(5CX`|O~{uab~V|NXl! zvM}|aterw~cER9aduicoNyrKv!rt@?jhFbOQ@ItKhmF1j6s(YWF4u4xx0YwDV9e~ZfXb}bmb4+^zl!O7Dk%ogb5cCVQ zULLpAM=O@5j&tlbq{A12P$Z64|9tO$pCyPmhsAc^pQC`e&u*Qx~Av8a8OWCD_lc}LfNgwaGAE)t9BgK=`oC?(rZo=tAP~UkYnH{|~ymNN{5W0PZO}dq*P~#b%&O%FZbPA@d#o zxR-`nB(B9I(2%f@kenPhOB^5WCE{JqkyMKWENrzk-`neZnnEU$ zrOu9b+Q3mkps+wRD{f|5GpKMH1TF~Y==5-aD+E5yk{wm}JSrU?2aa4>Sy?*fvCyp| z4iQ|+!Xj&UNn5M;30K3MP?!slL$akLK zlsJ2eIVG;#Je}C6|54G%H0Ual$DVI&(iPlg7{ZAPJ2+I2)0K} zh_7EcR#431&Cm!N9&%}V1_oKRNcbWl#9ZY)!BtSJA%!|YYB4c3&Vx`2Cb9kwqU!DBjHHrqY25cfO{4Gnav*U?sgODTdZ z0E-9zf>a0y8`cX{6MjQ0;B6po7VYO(q|ZMhL__UW8A>KFJ3*`h^+FS=pU{ho`$CTE zH}dt?*B_w^6DfqB!WBnkKvXR|SOQcMgl^G-0-IfFrmwH>=2jL+De6(YzqIlnxK{ne zbNnffa-v8%xX>U{{Oaf+B~avxQo(>DJ*mk^{ zS4iGjF^*5EyBNYm7zKa(BaT2hfYzdPBw`S1=WFI;vs<)gVTgo4Yy_~2oPAf&c*CkI z*$>fLk)#H^U=C8RV~40KXgWe7$&JvKqYUvQCQ;FUa5-4Y5lZTOk)c)avs52rCJtR<(0=JQ+eIzrL zxX;L>-H(Qbz0_i0fiOz@OFj3h=vQfz7xrP+Lu3hoa7udhs=GLtNs)S&3V2MXVf~r6 z$Ip@3stUuJLaKrAYuN-0k;i-b^fQ)*2kJ-Xwja9+b`Iem>$;n95Ez$6*|nwuS7pC8uZ7f5;Fb~h!p*b-ns+WB)TO-r}2tTb*0FHqHIy{;LbG#A0MBiHcsUEQGvezs~`{Vy+4-$v8o2B zX(hET0qc`prM8g5on(Ofj4PI z;3x<*ZjDr~JeEDPQF=CeV3l8L8e4KXpmH5q8;8RTfVL-jtvA8W@rO}Wo|`MfZXf1y{Z^`Z(OoY-y7c8PhfCyvflnm@=ls_}t9m1voGVF~mJ z0OHb5pim6a+)03vwh+p{G?M0t|39 z9OA7O!~3DES2UZD%z%ATa~l5u4x*SopZEQf#SmmpHd=@W<*V1Toz|zDE|vbF;I-70 zlao7lUbVciob1IF#ZR1ZZC7pP-I}<_eTARpYUd1EZ`*R5X?VnunXTL8T(6)y$ox58 zR!s3Wq70g*P~u^oTY1CZ;Z8zCROeN-P9i^LlE(6IBbzkbPW}D-Uhj*gpeQ#Bh zL>_TYiBt%H4dMiFGlW*7a)%jQ$oO@*?3vk?CAuv{qfk#785zMLzJ152x)KsNEtk`5 zCq%IS zQVQw@I2xdq1cjb1c*Ox^geuqtaIjKy@;TrJI823{=OeSTp`qTdDVg`OuQ3Q@9xwU7#yryVB7#83y9=CAh$#z=jJ6B zB-+Cy*a6TC95c~lJ}cVL!=m>2nE-q$xST;d#~CVO9byNI5S;;oOu3++@ePlZJB9=Y zR}$|+6Y>qv+kT{ma>~l>EiDURfkxO6XhhIQJ}@7l+_L?~L5E}pN^OUuD?U3AjcrFO zO1^%DVEO`LvJ)_*?1);O4My7a4&P%($N=D55FGc*06e<|ApqzaD1!CmM2xI!fwMhCV1I4Wj_F0RmY6_GWmrC3WyrRwVHdk&?;RM;35 z&=Ji^B=6q+E^T)|4d2ht??Y6S$L-SEddm`z3U%m?5u4JQdYqeps6gsolywN=j!{gk zc?)wM#OJQ3K#wj9gr$Kreb>+%~$m_^|{vYY+Sr0R1?te0O{a-Wb=dlj~ zRDR?%0h9z<5jn4=6A&-uj~tUQF-xqTL4XcUMpV{yOTZS|B&Gl|ON5)HYiCzZTaLP$ z0t*2sA!Lo!5#AWZQ<}KFh+q|9mGrl6i9)(R4qc^{AvopxpL9WCWi1^kG;X+r9u)&< z1^MhMDJf=V<{3ada0Ec59gwYFxr3=Fl{ZXg5CA3pFK1oUARng%HM32OA^M;(`1A;kJ+RqwU)Fxj%6*&$i+w@aOjz%5@ zbcb^47Lk?s>^D;T>Z_`Dg369qsp~Fj=OFo1%<-?HrluzAkS~bafU9#BJfKjj47Cdf zI5vqzM{Ml}F>*BWULvxq3)JdglXe43+XDjwfMFq`xd4Jhxb&MhU!*UedHauK0LR*r zA=+q`?y=wbgI*n@pkL4$y{6S&$h4pXWGh6ol|1zYq6HyS*kM`@LY9lmw~UMzfAiZ*qI+LY49(kV^r?$lm4R;#{dx*--~COCr1hR2O+pLs(y`)`RL=gR zYn^mbh%TzNM>CnV_(3-WOSFq@*)K4|IXEYZ*CUQ zFyqN4RBQGlQ`6EC#Jq>hiaxAZKuX}aei@i5BO~Lg`z%sfhs{V}W6Aco`#s%kL`J?k!p=6nnks&K93tNYJz`#X$3|_ghUU1x1!#J<27=3+o7fepB z_tn+k2adS?37ru>gi1e{{1TKgFw18Q5akHa3g{@TLP8bk>>R7%|0sspGceVbyHXlq zj1IOIq9Y@VO&akty0aEMpmm7F2V8iPQe$2lZJ<1<9Q|MMS=FxIJ8-hF64?DXjKp_&5+yOU^7A9Zx%D=K@HcZIS5JusNqu zy_S<#F%KynxVkhfNjh5EMDOi~ufB7DLI?C;^Lne6jsdU9&o|)L0dNJQ?K0c>84%!b zsqG5%1mLdu1=dxTYsii*6iQRa+IkV>^9`t2d4UHIG%9$Z5nx&PAKH0=4~D6Q4c&?^ z?6ouiCWdf5v$j}Py>DfsHgRoyfR~_@0-x>{A0m(j?%zOQ6%;RFarKwlj)M`~4#Mt- z^W4x&;0=cw(FxQc@2jXFPXne&U?gE+oT>1gIU`Zd=vtaz4K*kUw~MrNbfGdP6F-Or zj7>~Hh4eP7)1QJ$b+6^s@R1Sye^R!bl+^2V3-D$gQlJ)7b`~DYR98*d35u;soo?A5o z&-H-%@YvX)LQ{o~(uX}~|Be10=6rEbEx@cWzNB#cA1+a!`W}h~7gw%GNg-B*uMh52 zt$kDGdz`NU`Mg>(>wNME$o73yKPe4Xbv~j10Tg%n>YP zE-rkc0b|K2DOX|SLpOfKWoJXF4dxJjK>c{Z`FE?Ub{&CVQG#Lc7~ru`^^Ep7o4?>8 zajW&kHI?TRqFG=qpiXpswev#{cG_j6iHs3bkF8*>D7t$+Y7Zz)6A@#@)C-@}7r5Pd zTD||@DO;#=gVqdpFc?FUrlX^S+B?vJw*_X=E2J(E3%^ zRIW$0&mp%!+TRs^S0VKuXaY@ zyesO;%F3WV?ydhg{lLQh^k2_31dk1;*CvJh0y}Y4(GC{`Gy_n%oSd9wWPXBqVTuWf zURR|-8I4kz;WYWl7`)yvXa5CVFpkE-4G+YSO|{P#Kevu5JO9bjUu+K z0{%rq1pQOx`~JLmu9ZLS(=p@OTScU7`i!)o&QpM%zc&wU0y)G4^mwpa)WBWL7qoKk zCatC3kNDe>#7+xL#-=yB7ZTM!^87p^9V3PTUMl=Xc*ruKf0>z?(5HYg0s;bRpmb?g zdPwF4kwmMnQlK(gBT}%uVquS^Bt6{jny$ybT2;*FIbS7X_0IeLpcQrgUl|HE% z^c=tpBVb|B*6sssd%gA?LFHDT9xcHBLc7U2L>KTnfV>qWV4oa#N4J=ffdTU?oDL*Q zAd!fl6qKzT>I~oiq-hYHJm!0t2u5DVLe<$A7=x`r~PGP;Z}Jm8SZN@o$uND8D*gjFZjux3)vF#yB?s~K>oMj@m__#_j9>@YHLb3(8&<+hsVk+Ec_RROA$0Zh z4hW8SKnm-T-EZ03gyuDfa7r;;Gu}SB=H{4MTi@M!>h>gYUs%cjX$9s|BhnRP z)OWA%0p!BR+YNKFKUnqsKHk~68h)&G2?uJL&tGfy`l~FL6L1`S?B{^W;09t81w`u$ zPFaQor{A@4K2Rt#f=kh%Hr)^F@DizWlCbM?t?LKO#@=1=C1i(2tu3-E;z}JtGuE3J z2=vE>hY!}$;kaE%0#Tm0Hjuds;FiqnY|9BHSQdj2A;AgD2d?d~6Y3?Kyn8_8YcC!k z2BI`}Z>HCFaAv{LvTF^6n+DVuAee>;{{2V{3JaKe>S+Qfxd6u1Df!i}+X5r4J%CEx zKFGpHwYOl^;L%*J;2DLmTLG~kOxzv7WS1=UXm_{9ZgIH}qW>Xq3gEP?+R9y|&sS-V zhHH+70Zt<&)(-Dw$CFK!lTD~?e*5;#|BAA}jkwX#Q7}JUrWvtsr1DdFk4vNY zk%Q*79T>fKNDm;kIspA85V~!&>r_#Pskr*(LBZ5?gbLfplB8j0H8j4e$~%DnXyT$c zT$-z2whC2fw!>C{$ba%r_VXpN%@dsaWFKC&c)=U02rm_njg7J7l}!Nv z%!3>SX(uNK4bJzv6N4{4pu<6av7AiuS_?5cgbXO!BX;_3Szk7@QVfj?K9&?bH(rDH3ksEUe;c9D(Np`jrtk3-GO28wE+1Q--Q?aF&7j81wEUWAEya>!h8 zSm7?T|9BKO2te27WM^~5w8HeM-tWy9gC_$u91XV&Arp&?+YB<)>nqJIEjnKwZ~|C{ z%p?L+32LxxOiX)F{1gN8+zat-sK;fY(PwN3UdmC_$KsYYHp|P)${tb3`$7)Dp-Fur&a6Km@^H?*LftL#tMG`+@Us}jbi0@P z5-=H&213DsQjI$!IHwcl@mCh_KNRBwlk31~>hXoG2?&b#?JptM?n+4@^IYQmc|;BR z|ME$21fr-sqEw(M*WAA_AWyqy8Kl`~TNZBD!{D@NkPB1z3krFL8#ey0*5q{I1A^lJ^b_ASP3Bm|;-O+6SDGxakD1 z{Td=owhDd_gn}9wjl(;&LqG$1xV^O{3fqTEM;uzfa0%js2=KjBbEuX+H(+YssfA6} zm1s2Liw1#;0)QemOLoT9xhW!{EoSrU} z@U4NR&B)~xs*oe!jxo7E~#&KkwiC_!nnjRF1Zsk=lM;0 zopru-*0;WM*0;_%>#X%zd+o;Zpa0DK=l6Tx=Y5{%eg8{MCPo#pno%*_PuR5Tq9ynA zFS_29bGFw8;v9qHEYY$(@f;rTgz(unGhk~%y5%|8vV}6SQxrzYOBarUzrVkx!`(J` zB`VteNQfg&>qbvR4K%O_Ubkb!j@OngmAz@CS&EU+8B<0?SIBd3kRlKd$oKK@flQCZ2432L}fyr^QUO=OJxFj_C73#TPnzQMA$P z6LVPU-bTk`)ZdARVlaB2@6q7kucIZS#V1dmjJMjb7mH`=!w^<(Sn-Y<5`tnRHFv@t zUf&|Re`6{czd91JTp|SUnHI77)w#M~qk5 zg=}UXLuFD-OhSXyu(b`+s_6Qb^)S;WCMMDiO-^~_)s-tC%T2k;F_*7c@j?^@PMsDG zskqwOw%pl$1^#=-3YDqQXzY_)7sF?NwQ+E`nBuh@5e4fI7Uj?=)j3Cw+);k?G_mgK zXgZ6L_S_>bm2d%!vXYSLC3o)V%+=n73D5Ignti;y%pM!?Y_Oc(zkgp{{Ttb^$j0kF zR-?2b8*81q?Y3?4743g+PJ}nn=ENpIu(GhQKuM5_Cn3JU#wyF~?Cm{p;D8m9xH{kT z?Af!0PFf^#<>Y9^KX3CZM4D!Mn*UZF@)q*&r%x$l-@*oCYx^D9x7%G^(}pOV!Q@6{ zRWVsAL2OiUaUw?j>zDu8Yp~oZC@cjIy&J?70`UUbni!eD-9rjM`magdQz9ld|0eMQ z#HEsUtbk&B2?4o90t|ah3X&O60I3=ft(DMd4?=tka6FHPL){ck56Dk|b|ogssy%ta zeicf7juOgW1FRS2Z_*nx~d~G6pY(OQF|CW<%!{>Tv>~-1_9&gPWnvAt9}W zxF0SxI6jH@J&&1n`}P(ua@(U?XXJX42zJ)g)g49GV8Ok6i-F;xQvK+$f*f#c)i{g6 zTfj&l1qta&z5ffZyZbU62T%xCR>NQ>C^fq{X1>x%)mA#qTdjI;vfx!Y@FCc~9Lg~4;aJR&x3DvLAB zWTzWUGurAc`r$_Nz*Xph3V$3%v^Y`~93F>EW1BCNxi9JbaxE<_dCCd-pbMM$oAo`6 z>S>M@(m**5tBvQmd~%DmT=ui~0vJK&7I2vNzI`Ll&fqd~25lAlL4O6n5h*NKz31icZE+Tj9Rk9NfynWDsq^$D zuHNe{Jv}`HhQAJ7weM|KdEK+;HxNP2ASRs`;R{E{hU#i=$_$Ae+qcV02j07Xe`I82 znW5Dw425g=E5tA4f&s3V7e#E)CqMB@bQ5^>fF@=Nq5&d*va^e#LJ>s46^B3JavnOA z7oRS91Eq(@D&+c*H2`nb;HDbt>mSPOz5?i0?%<|g7vuEhQdY*qnHL!OAkETZpaQ2d zQA?voR%68q=&l<&1!x&Q3Qs3o%LYTHK+?Z-?i!~;6);}-U6xTjz|V|H6$Hk)rPqc! zo0t<56NuvjSYK%cRx`Uo!)xBJGSf=T%b!yttaY!PV_O@8Me~NLr0(IpAd8cK#Y5+u<(Tms=1L*?JGop(bMtO%f|hwLL#1rd$Ve znjUMc|JAEk#B%(ASJ;s%MLVOYuNz%RHmTkz?dH`NU#e#&f5-eCo-{N)+u7HC`gH7r zbD$x{YP7evXQl@2OBVs|oW!neolO-84x|Sc=2A_2jvGdmpi~1@P}%N>M5h3j5hPl? zsQWzN4p2Co-8N96O%%`ZCvw)%It#P2dEgBd;cy0k4H%<|LVnS52}VS0<(|?>a%6?8 zRS=czLAjP-5x2|#{|{S=Y?j5F2TiZF_z$&RKq3}wvx)&i*KQF# zGF4~Qt{0tp_mlN}hUn3o!wmj(l3@7@?=B9|5IvfvoZtA4!P-s^FgtlNDrR#T(y?-) ze3YT6+>N#o^mEBeHvcTk<#>|UQ!$@VW-Jvw+hBZd@EHMja~X5ACUtkr)SFr!Z`FGjewtGl~9^hMm2^2f(0yOf3gb1ppR zcVF(@dOu;*)_(O*6)HbfNIPiiw{fAo{=DY=?e=2+62}UqBZB0L_JB9my-h*1AHF^@ ze)-jl;HrhlFPaa_(&GM|>$rAp32&1v8?nUq`VYW8*4EZFt!$VNw`e7L@7-%26f3W; zp>Z=ON19d}GZU25_nMk;V2Fw(-OGwP?id#i&ZDq!Jt?F(%4>CXdrOjkwQ+WKUKWo; z0>EWn6{iXuh(fNYNS{D2cmaG>?SCA(hB(7E4NFO!eB_DU)GR@+_x};c@HfN;Asm+8 zLdHt0P%|{~+P}ZjV^w7C*HV>;tz$j*bP0cOSpNQzRKLRHwX4sg=<@OL!L{S$^UXvN z6SWz>pSY@y6H$pmS7a8h)Xm*g$BM1EMOEaymT(0|i6z(Oq{q}d*Qs}C9g=4r0hNki z9j6qvy$Ga%u+>PgB*GV>N+{^OEkTQI_hS@6qH}-)h1(|-^uZ7V9L%l<>?j$@&yA#> zL&uQvRHvzVjB&jpunuZlfFI#_n(*S#_v}wI6r4CPwouD4sTLS2WoR8$FOv{S|F~2r zd-^P+9PV$VB_;*cpvH=U8?*a7q2ho&wxVE6$Sj0)Yw{Ms^MM=QN#TOj*Z`^~9-pSW zv(vsPU_a$55+Z1A99Po|*{ZL*TSHwP{*rw)d9eY6%f58!5|d)O_#nEZ_SUp+{P?Ym zD|_Vf_m+e_-LR}ACogXhc+DPerbCAY(KB&wUCXbz$9lZ$e$nL>n&pFU6;5=73OdrD z83I091#7!a_`a7OkBdU;?3-2v8LME7Gam0-RXAx4mj zO3(>IH&o>Lp^1YSMraYhV%S08e-7ajk)AQIw#zJ&Vuud1?E(WwVS9K->LnHSTZoDv z@i+?K^_xF{DGS{w!ffkfU0_vna&w80M<{iY9IszKF!3~W@+muW$=Ujb2GaS0OsZx` z$$tO#?cvI`AHYLYCh4@o4XOz83q=}fD}fD+3_^ht1)O0X^g~SxTqeXS23Z|T&lf!U z1vpLe^QZWpM!~qv0Rh%&=vT6u-C9hjP9UqGnr*Qzkzu8vyQLXp&71TONQjmKoEN6D_(f;e)B}Y>#v#n`eZ&ZQ_}DAZH| z^AV~LS?1VFPsVEBT}O|OZ<17$XHxt5Td~BgLk8mO>v#G*=N2Ch4Aj%rMUf0rn{Ft@ z-;vBrOG3x(+6Z4jJ?Whx_LI+U?=D7KC>9!%0y$$=rO6FvNJ5Z$g-~i5L{&2TEsRS8;}384 zlxxwO<;c7)DwEqb9zNRwUE=4yO^+?JY%{Q(VsiPe8d^Tx=MAF|7Cl49UG9N~z^`B?A}Op0tddhXXIL6@BXN zX5{Ajz(y^#Iz=B1u*CUDB-DMgNHmMV}ClO;^o;CWs#?pS!j2 zw84^mGIiM>CAV^ObNdSqHch-Qa570AwWEw-p`X@z;8lNbFUnYC4ea+($Sg-t2qN(C z2*p{mnt6UY%N^VYiDH1;n<}K9Hp1M?u`u{M#Hv{?YT!sJ1_Ol*-*o+ijfa#{hXHgt zm@k0XrYV?e5)ubkD`_2qyAR81Z(xl%UD5xo4IEUwP({w&y93z##DqzdGS|r#1>)a| zPob)+HOsAr%!jvHke~1WdZ0W`cF7M*b>umtUIz{kYXjP@dDL>4PlI2r?|>h21@G#w z8{t#2L%oK`NTedtG#8fH4DV!dM{2M%`YKB&D@rYc=Buv6w@#!@=a3ibnUkOON{{yB ze;AoG;{;Osqt%`23+~)G_<8`ccN7&cm3#?~P&f6_>IX*YFDUE~h`_K$;7d3Q<_NuY zjy5(raQ1J~BeLpmrb_BgKJ%Cq(g4@)M`lgD?)F01z22YjSy?#t^wsIlQ$3|xIB zP4sqcz(62L!~3udYeuX@%jU4OzG)eK5B!}ZrNKU5l#`Pq(0GKK+rg%K|K7bJ3=Lu3 zZ597QTxeKu_%EQ^L_y6^D*HZsEOXK4Sy*@!c2?xxCuNt(fV}2+9y61$i804$rX~vk z4Z^koR!I2_`72k_(_0iu$4tIu#+VIVS>bwyB7GVvA$`rl&AH2Y;v>%SbZf%YptKL2 zc#}3?YrNszyL{MmQd3hgaX;to-BmpCaC^AqzK`(zV}WN|qj8Ny+;mizBV>@ zkT+QjS7&EKB4B4+2}@CFjz3%`E;-poH!8$uObr!`<6aB9VaZ0r!VnUVl=2K2j@<0w z3q$goul<9S&H0cRD2nWtw+GKx}krObc zaKPtSwvn-6Hvm%S$T*-w8N=j2)j+lLEBv@O zGi3zOW0|2mE=UW+ldq?+vJbslpIJ~an)LIz@#>fm)D@U%2{V7H=>2S694_hR`X!~0 z)d9{!TT{~+=fsyHEM>=#)oL^|9#SzDlax zgKP{l=*vdv%!W1BHEzvr=a&iTFX=l#6zd*1Uq@8@^^sL$iRulu^M`+lDLy1&==x+e6b zkuD#PFwd%0tN8Twj-Fn%YBg-tDyZHXD9|EjnPdH}{$~Yu2t^%frL7ZrwUwUS2*vzV++Z^Yim>*s$TJpMKi7apR^KIW zyk*Olty{MW2nYxY3JM7c2@4Bv+qP}{_U$4fBBG+AVq#)H|NQfg9XrIu#dq%9DIp=T zd-rZhNy$BX_DD%d?cKX~-@bkO_wSdMmX?u`k(HG_aNvNPoSeM8yn=$l!Gi~tl$4Z} zl~q(!R8>{g)YR0~)ipIWVKCU?!-tO?Iijttt)ru(tE;Q0r)OYbU}$J~{P^(`Cr%g{ z85tWJpFDZ;)TvXaPoFk1F*$SQjH#*V*|TTO%*@W6J7;cgZed|zX=!=>{CO)YD{E_O z8ylMo7cN}9c=6JuOSZPQmoHzov$M0ew}->w4h{~Ej*eHZTyb)8a&~rhadB~Vb#-%d zyL$ENwQJYh-Q7JrJUl%;y}Z1xU%&3{?d{{^0|Ej90|SGCf{;jL zaB%RA8#iv=ym{-^t=qS6hlGUOy?Zw_H1yuRd-w0(fAHWzSXkJ@hY!QU!yi3*^!V}P zh=_Tr`~3Oy7cX8U zBqSszCMG2%B_}7Rq@=uj`SR7PSE;F~X=!Qc>FF668JU@xSy@@HU%$@I&VKXeO-@cu zZfC>m0nwrm_KiAgQ*45S3*VhvWgocI&B9YkG*!bnkm!_ts=H}*> zmX_Am*0#2`_V)IUj*iaG&aSSm?(S|9iS+gB*Kgmx_4fAm_4W1l_YVvV3=R&G$z%$J zGBh+aJUl!yGD4+NM@L7;#>T#X|2{rGJ~1&dIXOwA(Wa)Rrl+T8W@ct*XXobT=yW=R z!I+<)XEK>A7K_bhi$O!cPkilFJ#&v$t3*P;e~|G0al2KkeqN=2^za#yr;o(F zcU%HqI{T92#6RS_9<{u$zS(yVhbEF!kL$?BjrI|~Dnak-I;&Bi%Na-9-3J&N6~Cu0EZxBE$Iw1nd6C{b8Mv5nA4{VN}l2B)>T&O`{yVQwksG0L7@%iCmd{}g_;48*R9g@9EXZ;l;YxJAn#lN9*zNe3G<6nZ-icfX&(Rvi35Wi0{@noB`}DHs2tW~ z$rf21SV2&ah8E+V?L%fZMvHM@&7fW$oD7^*nrf1jmORfW*{@NHgxG=lpcc(gw^#PF zQ)AKSj|=kvVndj~)VTgB??`m@@<<8`^R%lz6+tcF;kR3|cPot?KQNt>8ffR&_G)>v zZfG81rl)F2sYNZV-=a=iwij7^#j+uLS;k#vAnQjLtR+o})4bdne zpIg*j`Y9F?ppv10o%~i)byX|8FmJgBqlUFH7O-ZZS>A5*`4E>5{q2Ot8l%NcrM-jV zb|vMGxJXGe=Sy#MlQbx=peG(QO-+zf=5Bb@p2#f4vOz{h44wnTT z33>1(N%E;YYx{nUvPc!aKGE@>c*f> zQZ(AYqR4`u?Cpy2f9!V(ch$WVnLn7|!Gjptk_3;~n%xX-_cF6km@EA_s&9PoO-Ux^ zNa%S1&{euylPoGJvO~#}+7Hsc-Em zYq_&Tr`TQ;^WBSwGoTLnFvFn0OS3i79!f==F26&&Dj|u~g6t04MMpC+(^bVfw$#L;ROGvcJsyJ+Fm-VK;RzvV(V5 z4k6w)Je<7XoJ-@h+Uccn8gVDHA)C0#ldr`KtM#0?>9|nPXq4!4Clx|@zT+?NP7NM( zSwWSF&o?RciOpz#HmYpf?|xQpttC^%O2`3XMKrn#-*rW7*wW=G$3Z7Bf|BEC+BL;K z6@J?Ym5WzR#0Cg{)~;zs)w9{Kot<4sCHsY|E0S{!d1r}vEp1|3_{jPd0H8mIbY*5F9)6Yip@V|doVOJu z41L@AmLJ;wbny~At%?=v3p|_UL;F5f{zLJ4a{bgA2d}OkQpgXr+X;maDO(5yVl0&4 z$~wv*%p7ZGdi3_+G&gLWXR7p`eA4flBExf z{`{NKveUd|x8F?D#hS2&;77iXRs67>r9(H>pwAUC*0VK#D5W3+sVeKqpMJA==)P=> zY|-$V(XtfHdYo``>1!gb}Rd3%m96?b3ZMw5$?D z1+q>Ie0TC#pr<7z79ygekYp<5(rODE3|qv{8+TzDN-5?L4cQE_mjxLoVgw1kOILQq z8u9{G+d^Ovr8Z@0h}pCklq2f|mNe?Xvd+JAw290DMjG$>pclK7zoypPYP?kT3xRDv)g)aaQ zg?4$8B)MHtlmLVm8dh57h+CQC$Y{Vp!tp7zHCAEe@+1h=nBObEAUW^JI`93lxx+6@ zA|!4T5wMtKDvFACtnIr2iOPlLi4sgwtb@@S@4nZXK0yn+UP{l@6{f22SLzrV6BI34 zuGtIi18|m}6?a?`jnkwOb2CZz=|*yO=fVAxB7_{G28!BDdmdXbDSq0o=69^UF>)5QiZZe;P)GF`+s0`vb#)?}jjf9j zZE>{>dr}=TzY6e~K)4gSiBN90g0%jbIOqStnfL>v#=my+KLWNyZ)ZE(=tsQyC_qTr z1_WjSwbRZdaxe&=!b+O=0&GNZ1MmkmZMX+;7ktfrhT#Oov*93~a?>~mxQ@z;Qd>_N z_jL41sgG|K_2n@xW*hbJIruF)t3M?H@z8H6veR;o4cW2G_-9Lv#;PGSI=QirT_ud1$3!&!Tn zB5w%;)EWHHG9EZ?X^9aP91#S>0DZJlv6snyjaEtA^1wY`=V_yA_bqR**N}02yTy4F z*LWp(k9p!;B+YW>TLCTvWV%!v2RyIA3wOB0T6B=S_mWz{i_54*s}N)ARc6Lh57K=o zN>OfJ4&J`~(0sWg3W9Ru@{F4t1XwWZ$&5GJU_1i1K#h(%7bJV6G#8^~il#M~W8JgV zS68i-Db*?Qf`D5HyZ($jfg2zx70yS6RxJvx&3rXVSoHpwk8dQRT4rNMvKD{hrw%=dY2L;mCs zUte z;v2`siN^&JL0-rwAU6m5&h)+BnZ|PqyVB zKkv;xZ{Y>E;tx!A`(VQ5Su9A9lBEg}Y78IR!Kz=ZbzpuCAsrYORPV`^efXVkw+3!6 zs>sz>HDR*xX>^KHRglQLy-8b~=)`*+_Fc^jAChkX5O3KnSgQ^Z@t$GWD2yNQ|Ik*T zX?Pe@%_h zFK)4LSqpah*d1ZQGhXrz_7le3la6};6567wbMJ;!y(OD?=Jh_6r==B3b+Fff{T* z0n?g^mqq(*E}rdkN-oxB)uqzY7o=t=y@Rw#>F_PU2?SqxXw_GK41JEme%pXf#kC>1 zF2dlg6FYP0@8|%f2)r~O4tjzUYvBFoB&&F(ajmUzkXKje8sFB)))@?EJ5qdk_gEf; zFo~zEuPI|EYmjx*%7(^I(+B0W7(KcM{0bLfZ72Nx z-m?p#)KbC#JkEa_s{h|{x_@EIKWBaiv3dHS{P`BYcxqd50~2tx!l}ch05%j6lm=qG zVw~nl!0DFKDFB%s)?^EKSp+UHdEr7hy&r4aRvaN=_gEIIw(Zc#mclqtM=5|WHB4F< z02Qxz@H7yoc6s4TfH>7}IK=<}o1hoJm1#BA2&fX~uNl=*+q&-MSa5#T3@mgkSeyji zMG6`V7XICtJ0Q{YpUs2){lme@L;jk7Alz~}msWRen43Lmn3js5#B2HMQceN+g+=2$ zqdQ{6EWJb+mfq2#RY2nJXV4!gFdN33JY`$YAbti^*n;UeqXMQ#lQVpDkNT74oM8&1 z>kyFlnp_8fghh`*aMbkW<)pQi>`{wcmg4s%_Y;6DT2uZD6QQM9X^&|tXl4N1V9PVrAhXFuF zzi9BCy$$ew5xU@Qw4_BlFh>_=CV=|1Y+FcYp!)PA_n3@o)QT~_vRt0pZO>P1skJTP zWX?BH?Lt;o;K^Tf(MXHk>kUG!CEY@z^eJj$ZcSY{Fn&$K;$2tF|ElF(sLx8C_ z!M9R3jSrtEUPB>7*);+&%qsQW%UitJpW&9tp0(*ZL$9sBxop9`#U#~=5a{gx6Diui z|7n7$q{U_au9;-d{3TYc?@$Y`L)r?{walqaZg=9!UC4z249^23Iwo4GYt^bvXTe&4 zm}dSj9nHyxXxmz3KA7qRSevy!E%Is#Ag|VI4(O~|(zyZB^-N~eSvgU<9%m1;1le_v zUs^-4huI>58ihXjX)_%P+BWN0))E%UIsnJS!2U zed0m$6bpo&*^d`84^vkuLO7jD5HGWy0gh3-&a5q&oE=xjxe7h;k7=_65y*{Q0-Rl^v3ZUJnQ3`;}`KPQq*dG%muPa@{W_l~O zUlV=68;!;e$Ktxp0Dt=&oR%sY^g0R+1m?^$EBQykh3c)sB z&1Wh4EXN);czdaLCpJ9OKr+h`T}PhpOX{bzA9CXIv|dU>%5hj|`OXd5IVp{$rL6X> zjDw&AmN>XaNKpv1U377q{{jGiP5|%^3q1aC@A0qIq5R=y()REw$*Zyw9e|RA$~z!M ze!hhpL_P8r0O}bZtk46T#(GXfYcmL&gn4MMU4)}AK{O6@R}6YqD%6|< zii9dLn}Y47G)g1K2dybTC37O=mH7;_kq91=sBuqd%J+{ZJ`+rD9=E^vYrxI_-&Q^S z;a}T-$)WgH%skAY>eC;`nlcMNZi7=5q22wF{ds^~|16f68mTlAn?EfFRUKP_5-Ul# zUy*LVLJJF7xL(4yp@Vmol~#&q01)7Tdz;hsDF1i{bxbYF&3WZG{eTS~vmuSDP*u_j z%CPzyjG7BnHE2a(ImKq;WO>@4raPKQtQR}~*0ws)N_R3!LJFP0Lnh|pbMVR;u@;P@ zs{KyoQ{BeNo5Y@-KY)$c`V#1VWs(J}M_VxJs83|wWeo&?dZ`X&$3eYKwq9KhC37dk zqtIBV+)Q0Vz{PB(c<&N zcN8}Kk0>nwe*(phK=lMU)pG*ZPkB&hY-@qe6k&VyrAD4jrx@;YjrX!#0TB{9uwhAA z8@zMvK)}>Kr)QnJS#F5O#?*H$cc4?R0Inew*H;;;#`2=+(@Vj zO4Nm=?GqMpw_I7sF0AtO5IirpVMoD-Wpm9r5We-u;_TnF1p;kFziGP;v|)eKb`fa9 z{kjeJ4h|!IAwTL^tT+W$xgUa00NR><-6rCrorPqPbV53}{$hy?KiXtexT0!b{$fEM zP$X*%D`6yTCm0HgTyt7kizF7hEgO?1JHIoEZ}~+@VbzRo1s3-?g{^qqi6}AiS=^cma_p7Azz~Yfgosf>*%O=>bBfo#B-4 z1n2!7YjXea+ljVk#Q#Mos_?pEyPGDjmI(-eNKulYtMb7Dq^V4B`YkK@=B1oDP5iwQ_(U#ML!A7r!}}q*!A@f*bO0{KPhLY*UL)E&xKmM^~pyz%gCG7+*&w4DaHCOQii= zI?@5q7YHOlcFRXbX8pB`JNR|PXPD)VKmeKG;+Nq%9d+Cj|$Hbi#;~AUkV`m>0+!iCO zBi5p*Q;sG*`eguX?`(q#tSWI+Xha9SwVk*bbQ~Uyl8A_J+`%tOMIBoYl((3}{SMQQ zEJ){2{mgJQ7-h$cHZ)x_tJuw@zp0ty+OV3MkzoyKt}cPaSp67^@K$wk@Xui+_5)e@ zjbRWJb+Wi#p{4Z`tscd%lw4~kxz?cvQ`Rz=9JX_caL3b;au~2AhiOEwBp*)FUE62k zSgYZMXXjL7x)rN{#V4c@tB0dg6y_a|=K$ZFj?puO!g?^h1FvT7$-T-GZ7pL|HSVh{?Sh+nmE3H zc1mmE^U|cBF|{)F5tJ6^ImR<_7&!%#^S`q=%ukfs~2^ARko-gwg{!(ubU z<^{1bw3(Tka7GDvZP6Qftc9R!YHB!@QGkBGZyz*u^(C*VZ&ARrEciUQZ=dnks8~qT zk!vod%ow7A!(%zD#U7TfDU+}>DTMIt5-bnadF{NHbCVf@M_L9jqMW6C){WJhic6+# zw>ZQ5a_gPRwB9ih^V9fCaLVD_r-CP?*A$4ESsh>vPo^p+Nj6*pbZi)5^EZ`rXcaT@ zsr1tn^W3|D80T2l!*{-y3TK*{oX}Dl54Q3HCS924WId=3XW>%2&8}~i-u=)1(&QiQ g>~DXs!VLP_svcUF_+SS33C=40V@5|ak6gI%KL8-ZDgXcg literal 0 HcmV?d00001 diff --git a/docs/050-predictive-inference_files/figure-html/ch050-Olive-Screwdriver-1.png b/docs/050-predictive-inference_files/figure-html/ch050-Olive-Screwdriver-1.png new file mode 100644 index 0000000000000000000000000000000000000000..994a454d74ef0b532d56a9709539fe064faddabc GIT binary patch literal 91902 zcmc$`bx>As)HeDk0@BhU-QC^N-Q9?QbayKa0!nw+LrFJCNOw0#cXyus@O$5x^WXXF ze4jH8dziiNSod1jx>nqTDJe)I!Q;V$Kp-S(DKQlg2wE5ff|7dw7Whrr&IBLu1I}Jb z+X)0B2!Z^66TNk23IY*>q{Tj|xu+d2o2Ox@FL0hdq=EOj1;WydL*c1EiJ^*j`+wWT zL)HI@Ib@7}b8ZpG*q;S4SL&X+4si)s`m2&$u z!BRTAximZzJ$9(?)g|vGHy=##$i4prECcc{W}#sk`oBlepZ@Fs*#DlTpuVoX`R{Q8 zgy;U>lRvvZ4B>x|#7J5Hf4>TzhR&;;m{1`JtmW9;-d?cgB&B6y;zI0N_{=J8Y;4So zbLrWtf&Gg|k1=s@i!^6{f8U7ts|!zZ*T$Ls2pt_=86&yV*OY|BL`ks#;jDs!o_$Bw zkjXvg=H=cfg6gK2p$;6Y@f>K>y(R-?WnUS3atf!-ot+Y}O!3)+ z(_jKa83QPb#~$|o-1h&k%m4pt0Yw_vAP{aj7`*>J&iPf*Y}fbR?lfTR#7l{7yPQQ69mCyP$1<$y=^ZEu70v@DY1wBn6%d7 z0t%Sv&9Pu{yY}^Py0*vjK?!}m_h;ScijSvVD5Lxra=FOwAOrN#0^+DrrI-fnf#wzp$-ZJ%}=_sE4~M1+iEwr8_|Z|XMgpZU2R6B zAs>QRG4b5}=|7xSPs@It)Ty)6G&!l^3bmL(Y0iB9+>C40*3^^Mlg96HK9MiGp$;4i zAV^9;E-~YX>1Em@SR;Ah@IZr|l8OCjlmw0!{!}iz=lg>SSY-UUN7dIO`hqhgf)bM* z*w5+P|Ll&EdMA?8>_(b$(KYU#4zD`@`c8Ks-zg~h>jiI{5e&g&_9s3zi6h4##+=z0b9IKDRYv#{cr#1;Xtv^AN`sGF>`%ku^jL>$bfdM?Pk*` zD@JSxLgiIe6YTOMEQ0SLdnJAghzGIb`Xn2ru-fLMVvVXX-MHI_t9>TVD>6l2C#K|7 zrhAm^XHgAUYq5(|uR7lFh=_>r@O5_)$mjlu0IU%oRZgJYui4c7LqIcY<=fhhkgdS( zVj`t@UcK_VIsS11=XN+3@E`mM|AIq$Jl`5DR7k%+tRAhiS*#;@Xn#0vW-}WKZQT2d zZFo4AEvi=nCG~swV6LW+8mGt&Hc?mal8xouv1%S&O+-OK!CNsYbMVUD+_|6)(x#b60$bYTyb-t;t^q3=YaVjGIvDvFfsPM2tUE6!CIw%=xa zc-=l!o*UH3#oda>PP)yaf}o+Mel5}?p#1a2s#F%S%Na$waUh3+kon7<`TO|FdfISq zt2i7dpYLXcR&{TnV7kw&(=L{lmbm|CIJP`1sCMVQ2}QxXFDEOl9Y(o3ZRIv=d@lQ& z_S|Snc*5)I>Zgg%GG6Twm0>I__DMKyWVN=LHI5547O{zmn%dg9iPbJAD*}u=ue=T*zj$=Lzi&w*28DtcBw}i6dUtoXyFwyg`Q$Y>)vk0`xhtPj6y0D8 z?^2ibhEUM^Hb*>S-4Pt(RcD${kt2T4kcRZSXA@wNLLTRv{jpTD&vyl>wqP*WaxFkI z3LjO3-M)#>U#*L?%y=@RKq)eSYCSFD@0F_5^e;n9)Tcc~Kb;nLhb|$R5=E5g3z@`@ zXJ;mT*VcwpnNG`7$lxF$nOT^&e#_p`ot>T0bUwGOfp}nGi}@ z`AVCQm-9-lB#L_NE%c^P9TynvSi^hPg|m@nmBKM>Z!MA(v4To8lC?{ztvpv78?U@k zN`25C9v;rvcDqg?o4|g--+r zO|qYRm_Y9sHL2+x+8by*L0MQc?q`xwQC1$WG?A>BUEE}g+#6djGxpi19Ewf6SbMSb zf#q!Gg~-gmCeh`_+Un%b&gLfn^_&Ud5HdJz7D~0N%Ny3=yO<69o~{5$)7B^UAEFo1 zxN2_x?0O>8tkCG^!-3i4MmjJ$Y6u|2(T@nq-S`7fUnxZzjOqAkKVRksQbEPpR|L(g zE&bT-_%8?>nUKH1X=j8%x2{xv$@ZqKenrQ?;Q4MRb2LNXp|Y1S7TIF5V7NOBJy+2b zCwTQl(fu#gm+E=ThkRk;m^a^PR6@G`F(Z=UBkStFPkwL>%J9_dPN`DZ_?ZZ!7WdxnbJp>Oq@lo{>97RH-Fn9*=PrmKOI5SwW(ub~I-1v2mtBC}Ba(*uyC1EU*xj4QKxA%*k-+oSu%Sc4w zH+o*}kLOBis;RM`#*C!%`Q7go1_cHEBQc+BiuWF|+AR5bJuZ}jzEZEdbkEGQe8$Mr z>M+8yli*ioh9*B5#dYd#ywSJkY?%_A$R34b()-aJP{KSgc-i*m;pxW{GoFI*WX%H1 z%Fczh?P$ScJ14Wf;}>_XiBUQ zl@rG6D~HU;p!M5mJV$(7|Gv_sH?#;)j3fsP6%lSR)tAmFf&{1f-j)Xkhn0Mdsj2sA z7ppJNHvpnPJ*>V6+OG*0PI`F9zY|w>baV_x`pD_BKLzj%z-!A3Hi%oDm9&rhSAR~s zSrfdj53$(cf@36t%uI}#ze~{a&Ve@_Q1kNAQtQRKqqUCj2evwupP^B&j+Ym6CP4ci zD7`jl>dZsZ>W{vzoV^se@X=XX!lw>%T1@0g$5NvH9NYrSKl;A(XHM7557R`SLbL5{ z@SGQ<&^|8_{Y7#Mt(K(7`wE%QUoHNh+Zr~5P7Pqw8%~34Jw1`QqGEu@xZEC2wdxWk zW;W@G1OU5x<7~;k3n^h5{*;XKM83}AM;cZg>&c5Mp3BL&9-}6ihM(yYzFTa=R$`t6 zgKcTZ{Nl@Jr&VG(rdOIXPu>N<$63$EY^AU3-C+WrmqqgBt=Bs3t3q}wExkm3OB-k0 z=_9$4(eNbvgrnb41d}o|?QA+g)>l;4d$5lC_F;T}vL4VHYF{WEw(oM(`JN{Xi)Av< zq8T3ik)MevO>`jf52z0d*B4eW_RFsz106{~x1pqaKsL*c%Qt&*) zMZdBZPVTujTNOqIH5f(KejWc7>a1^fJCJU#0-)~B4q$OcxQ*NZx>fi@GGhAr+0Iwx z*FuBXC1;icV5O8zCGQaY!pB<;J@sLnPaKS@Q&kDG$7hu;c~L1sZYQHv#fvdb|Jxl3 zz;ZA?oK|ALaCuTUaBt|bV4>4J7rE**8}f_kOr4)oz|IR1nVM>mzD6mmxIo-ig3If7 z2j^;(nRc^JW!uKogS2x<34_ArcSrT_T8f8`&Uo}OV}OPCl{2zp0bfK^;xdgzF*q*N z#d<8-eJ;&{5t}Njci&}-`&et%JudqALTHlTfPw-!#$>?I{p07>-R2jzRH&B-FGhO% z;!UKb_p)uME)ripn&`Jqpj}_ujdwxdTUbH_3dIPw?o76Bmw=~s{_k?MYKg0cTAap$ zEBnb4nMclXS7O50WIM&>s~e7S09c|{URBz#=g+)&H5@KG5C5j?s-gnt$+qgdwM?1{ zrEqNvIKTZDz&X*=Kq77s@h8YP-$y-vwoI!~m)}7naS#6ToKR0i-hT3nJmdGCSH2$F zz>)oL`R{<`L(kpE*O<0qgkSWA|1@DJrfN-J^Kf4Zfds<-h~EHaxJNrU?^V%nTuAD0 zMNS9qe{aLrbNp{Gs;ba$Fp8iqhY~WeKes5rx2>(W5zf~<)X~vTsKhIaaw5kHzTXK) z?))o0q!xJM1A~A6&)e>RC}Q-iG=G=9B(SniYWeU`gp$V-ZkKizo= z`SwOO9FjKxg<=D-gS49V$W9xhX^!eofUL+4^b40QhK0P)!s`y+JqsL8x5pZ5DWgMP z7xo6ib16d33lEFa7A19-GhY?85}5)1oZqz#FVSjaDg*O%KMvE26XIUY{sT4;-TX2= zas=#+20ify6=Ftov&PYom!G0-G)6U(pefOCT^=c9WdCi*J~84=y%i!mE}T(eMyVtg z71pmMrKj_f7}EaE&{zM@i)N55B0i=k_Gzkeb*$Zqp##BIJ-;yhxxT`C&I|FnC##76 zHusem*c@E%3}fN&!KU*N5)Oan*Uw|jXVqbjmb-`X?bR=bwK@OIUI3ZhDBhIb@|53& z77g3LSe!#!QqMgIhetyJ%J=#)v#6#!JRGu8P|ABGe&NrRd!v#l8C)#xf??ep?H`~< z5+(;&pF1=2{9Xf2f3;xXhy&(QOsyATcnJ93eBX%;cVO9jqdX-!f^uZ8`ZD0W9G>zz zARZDhV5eF_WusjmpbadRX_(VrxP~c@-6I#AV?NheeQdpBhnxJB^WWRhz}q>$_eT54 zYSf2Yw%#CS)JrIoH$1ojr1f*1F~@^Z69wk0X|uxvrrkYPlVGz*ZTk5TX{tPyVhe;C zmS8i~>?_OJ99ONlVs7YdDg9qKh!g@W`HftxAGauS^A#Wj3vmu5$;6EHnUFBa)~nVB zw4-GU@lUU-KneoheV$v`a(4F<%d^}bJ^3-tB{C8AEZTilFHJJ(@;tCw)YSRw*MDn% zd-P7YB}mkIoKJ;P;zw>Ba1^xNjk$j*GzTrjX}q?Gz8I-^LSB_p22^G}vP?^AtCy5i z6B%Z-5Rc`!VE6AoAfU58W~Uo?-5ECzaI_9v7gdTo(#beJNR0vsaZp#E<(9k+FVntR zZ;8~ICI62Q(r^GFxPQ(ZqOfxji(3^*b1N3#`UA(4wr$f+Qm7j~Us`TYza%xoyXy2^nL?C&+46$u9kIZCio;sJP_JkIpjUQE>;f2FXe=4*Toa~>nSloz zO*q9?9!bX)8Gnl|+>u6SbF%hy*7qS9Ryr%g^*JVXRk{4KVi%4Y#)6r@YC5Pa7T6c( zaMmw`bv7nqO(qBr_Dv1v+Y&E+kX?YtVF2;mt`W7!2cx389EcaDTPXAFiP&%aMqBv% zvRE@qz6K97i(>!ezyIu7~!^qVt6pMNV!k zyJ@$Yui^NZJLD|0-KlI0s7$vpxdu@*p-_LW-M^t|k%g?=tM8KXYP~=c&PG#Yk%N@f z3|xj(TVi}-&iycU6g?qC80P+7rWe)swR_ZZqVcx}p?ofihCYwe>t)HP&F`Qdx2T3BpQRZ+ zs$R9tpB%9JF+IRU7aBQO*PZWvV8zFVySob!=zcu)6(^lqVu6!G9b!n*rREXqmLozJ zWc87Ir|7vGmgLKE8m}XkU9J1kN`{m(9{;nQAO&EsIFvQ7heijaJiihePdwId+3&s| zuS~_9&cl*uLc7(u@ayP$z;nqmLRO_?jmPSClFJ}-M|OxdVvODSP_d)XFqVAEHaQrW zIfTdf_{FnPxu@&z61P}qsE-=PqWJL|Nr*On=q=IwJtjj2LrbP~yEUWQ-;5c{XQiZ;Fe_Z^eyX>1D z#yop-VPBbZvXSjGRLXCR`=z1;?s~xV)6kR!H)XiOjuGJ9?I|W zd2coS=?ygX7k%v$CdGH7rneYjllGsptKz{H*IxJ$Xs7A0VtF2umsPAL#t5t zL{f!J{_>>n=n|TZ=34of?h>4VmdNUdc`v}TV{HVvnO)cfc61-|ORug!_|!08dMUWG;#TrTw?eGRu%ZLAp~+Uas@ zaWT1f)@}JIe(}e=T!&3c%+2D^ujCHG3&y^d7KFFms=@zl_$b{UY)74aKReeN(O>E| z)M877bFN3?PB{ckoKz6ql!y)cwF}$D$ww9GU|UUbwD^e*{nK!>8j|(zU4ZQdWb>ne7^4 zPnM-kQDY|#B4j-zVeQT)o2l6b2-NLx0#rpZO5~wSh9QngIL!fY3Q}xxv-9Q=`AT|% z0a`M0B^k^j7ruw|esyif`plBlJMe}bUCFbp=%|g3-Asg};>D?9S-2Tqfrxh`Zz*b) zmG8MyZ(?^bw_jad3KXE3WKR!lU~s6^g7XI-SZy_VR%TRtwKn4m#^RGOx2~00!&|92 z7eN+^ZDd?wj4RYceWsx84kdSO?aPtK&XgN6$vt$LuaYWYh)DfQ{YfrCKWg|b&+n8q zKMc-|b~gC_F}Q=_k}?X9>@$^Zgn#6yI}L{5Y?UlcFSVB z;@xiVMJhrfav>|yW3N2AlWlowWxi*~e!_}|qx`B*!vHnTLY=EZ$rnrGO0iCEa7-FI znsz!;pG8qhzB~W*6@6IU^@`<+t^n}(*-6bx3^;XqhxdMm^pmZh@Njdt@);=`U92+It7>8ac8j!8W_F+wQGjGCeKTbJF_-y49*_?+;ex99XW=@SBPx$8}j%Q&Zsd2HWsB zAbZXEQnnzJY6^0P2h42!XTT703q6jISdh90{K<(35suuuRO`?d~u}-@_e+xnMTHVwH zoapUhvG_rs)bC`bn9&E-KT91gVXte|D)T>hafo{sz3|J}Re{(OF%;mwxOwXMEw367 z7^xEARt1Ns-QtA0uDZrkoqN}WJD>_CNbl71kwRqU9`hgLm3$+IeZ?1)-4Nk0_>I5> zIinra)+HI{6`rYqLJsi|RS<^h3q*cLTd$SOZJ2Xd$*OzLN$&_Hrx2HuB~wfopVC9@v?@JVoj#mvSfbid0>_ z&+8YOV>Hs{GM<+iU+$3dSV6zo(AsWl&NRN`cA?(k`wsQ~nx{p1-ufi(jajsKtj;5zmixJGWFzK3TAgU3zmw zgfC@??#$%E7KSFqLEnS{mB)*9> z!&xUVVmNk^HpPDZmJTI6V31J{qz#x#bfp*%LM0m?>jiY(Gf< zNV^_A*|K|y2rN1wRy{am@@C!^Ia{K3(csU0=hoc{)Zl@DMHL{q`^LM67twigddwrg zbk@Hez@}j`v67$+Vaa?VemPFM3CMO>a6vP%i|IP48#9kkbjEOkxv)iU?CnWPU%BL? zH5_jlT-PRtjF-m%?79;7Krr{*1mL$h99Kl2)dhyTubxK2v2%$^I6XdPtr5~+6Edym z(1}O4sr2o%i&MZVy5l1>OG01Rq7Grz_Sbk|fBr8cMhubRxKJNo0I=HsdI8dzBy-D& zGZI!=Wg6y`pG6{%^(`=|8%8&S64c_8aSBsU$ePJW5uxvNM}Jw&I1TyjT~EG+zJa3< z@-^jHI_fI1VM@~WCq#kD z3|m7pBP&ShRx$vZwr(lpNU4P6df@>BvEyO`NeimA`baS+iq^#J6Fx7QX`)r%k^-oDXKGa(<)1&IaYwSJ3YW!Q*m3)KctQ0)i0Sn&AL6nzt zahH?4+F9M@3Rw0W@+Tr7d7e5f0Tk(lXRkK(rdYceHxGo^>V4I@E{1&>mL8mEiWmEl zHHx>MTC905$`d(5m86YMh=#CYgD$2vvb+8nMczF<->M;th37yL?eHO89Y$l2YJY)+ zYjZ0lKyg%c6%>$BlMD7P?n;5u{!|;>8du4!W20W06RFJq=HG*0+V;M6WNmF=Tk_>B zd`5(d?(2c&a}Bh)B_&nCcuI@763)sXQ;BEBk7r_Q6$_WI6(xRAK&?~e_IQy@yI5eF zwYCWFS?zFJRczxWy4d?SHt2`m{v5)u>LYjjk$t5@Lm-p-T)yY>B6l>zu>yr}q!fu= zej^Qb;y8jW$Rb5!@z2NL0BlYmzwX*hz+{2X((_;9T}oZOa1s|ji1lS-o7|BbzZS`x z_VGCqhHyehM{o=wMyn)IDw-}LpsVP2y!sYCk7ln?cgt$0yc%774 zxdc|%tR#ly#Xsl(2ZeS#NS-O*Cz!5pnx}X@#uT7r{Gp$Hz9}9E{tax!X@+y|G`xFXr=!e`+S?G?PzA%XB8 zSL=d-s?Z`?#r>JV9LE$XIGg%u`Z$qKI>c_j)&SZbmp%j(0jSQQXS=-bFR0RgJ{PhT z5j+N=;OZz}QkI!jwr=})L<&HR?F%Dd9zSOo6W;=eoq#d%MNi16B29w4Hk3Z;#!22YtqKzX&lkC4spA_Tl2`k#JlHN?o0VaI^)=taiVnCd-baHv9>=*x za99U~O{y|i4=CT^p_9;{Z0P~$sjH%rfc||X`;ECYi7s<&6rp9ev=q{&JUrj#n>#VF zX+s>?Oq0Om1jx!*iGcAke^+QIbLU)P79wS*>6PqT&{K9sg=;1&Gj198j{UaEqo8<$ znmoIzP~Hb;C;CZn!^3iky@fx{%V*BR_7djb@|OztpfLKRMWDDafi#J-Xer2U#R=DS z51sUe6==T!Ca9&`q)zV{Mew?EE6&5NY@GS^ZLR$rzI&8%;NOVdT;h-Ppr{~z z&7kGnTrTSDbO3?-yH6wlG}W0Ja~-blodMw%` z#*}1i+V00LM64-Fk*M~-rU_tB`AW<1%>zA*2FMcjh8bg5{V*K6mUj{5bW7oO`G8qt zJyk4nEGxGimC+Uza>GNHfv~w6-~s;zsE!=YSMM0U+|NGi(?#59;`3~yyhBnqF4N_e zcSIE2IOIENaoQcziVlAB2ksv-{~y0fj=?;VVL-X^@+1#R?q{u5OdSW7J02G+H@#$x z3`{}|64w14`Ydx&d`Z{-Ma8pA-hMVl;RF=r`)pWd**OnkAID;-9O2=qTT$nJ zEKm06B>>C*no*eP>)97W@{!Aye=py)822a7kEicld{@zq&`SYj+oDRR=J|EP)H6WLpWo=N6Azz zfaMBrJYCnfKdpU7NwHr8%7#p>mlF-MP%%FN_xu6c+j{UmRBU>xdctU#BlX0DaD^%; z_^BqwE0W^sPk-$ZHDjaB0FYiM_krfbadKAWeWvtW=LNPgkhvpaF{yw6Y=gSmY}j)z zKQYDoHi1Ex(0wg{dj=||7&88YB}WkH%-9SJlI+vgYW&#EIQnd|Wg&3(Vne@vWs!96 zT8S5u(ri{2{HiF9_%pu%3xC3e>A~q`rol^_v18_et_YcdxxnK2ezx6qTN-FSs0* z-btapC$1IJHB*Xq`SFd>fJSVQwli<^b%x(ow1WZZYplk1e zC1D%KJz)4Hr&x6h@G0F@Ksy00#X zXqMWkr?EE*!g?G(&&kX)*qk{J#L>*06s%~=0KSZH{L|4E-!ZOTn9>tyX$(yTq>%1i z2-C%O`WhovY5nQtEc+p)_9Hm*cTq~Ls>rHE+i2d{SW}4pj;=Ct_xDMgsCIqin?s`# zA&c!W`FxkGHNd~Bj9h&ONFSuiCEW7vdq?calu@uT&{hLVhDGB4!UR;jEbiR)xwIO# zQWI#t!Bw=%7;q^tyD^{aXk7`krn?s4aLWzM!P=?Dw+slIB+E1lccgdg1H(d%3}|k_ zvH<9ok|&PffjYTu-?&Mn3sS-cWc%4FK{|lPIgcWU-T${{Yu6Ah4brv&k$^sK9)ZI$ zhNJij?TyEL+H8uSnd;HnXgi$*3K3(SW?RpDH{htU8W(D<+|JYS=$TO3=Rj%ife|$% zQ7Q%a0JSB&`e?1pL6v>qru1!9D*Kqf_xDYG9${#83!q=$;wmdND(%`VpBo_z!%pTB zKSXBNOLr;>8Hiqc@Yu}c@JC&@lg)enR{}J3u8l2^4>734jgNe(X1cA3ni;|&v$^GrRw`!O^4@nA6UCByJ!Mk zWDo=r)j<0;Gg#fBEHffBD&?lu_Lo|Z3nLH4$qq}d8cZ81)bls6c{4gjMs`l};)uC(`>8A{nd$14Mc zx!tkp97_eUlINI@A$iP)jZ119cP+SiZ^yGEa2%2`s`4-90@fSSR)1f;-(JRls|(6RiHTL}^x}B2q+z#i z?HX}nD27Fok8D=PsEZ0k{66fRojln2;>_yxlC_hvJGcN|QU0d8?g+qtp9s)PLm=Sk z%%%wR{3V`k^!DZx`Xd5~jw(fF;feE;aMIvB16g`!uPbYhW$t6R?UhiM4c3HV1uiiL zgmyQo2S-!o_lZrrh&@Nw+0ugPH^@^^VK5m6<;8No5 zh|{KQ=-!4HrjP}5?fKgN-EVd}*N32Z93K1jB&25Dvi>+)J~IxH`M%BZEFX!(RWV!4jw(bK_BRAtO6}o ze=mDFA?~%}tJiVc%ko3q_qEMTU~zNhO$XX#jsg2!_1Gb(|Jc3)V*l1c$t)cFa4$`a?tsZ&20E|3(1wBzQbXNSG)4TH%`#7P& zAGxU5C@0mLt(bl3Vsc@#wZKWm;=;6Es4RPtyN=iq%-ltJjVvqs+yUz5cl$4-D+Hz9 zavBV1;ZB;<$e#cx26`;wvh08m`p%YV>(QuC>ULL|4utr%gj)lEg|H%~KEdCSnG3nl)FB{*78lG+ zr=);bAux{T%k*GtwJh8qFqK=Oz9lyKSkm}R1W^}M_bG~0B<=_WK2_wMR_T@fTLS=x z;kFe*Jyk^K=n5}R+x{E8x8AV&$NAc)#iF;4G{Fnfzo$~Da3Be^5bKV*XWfjBhs*4| zya9auq0%W!EZOX19P2ri15_U=!HXuEFK$!vKx>vRGutut#1WJzk4b)v@|^NrFgp_@ zhk*K%aNEfsmK7+=AP$g_LT&84K?4C^4WZ5>9Y5VS0W+xEtimHR%t)*|Ej!HtD;}yi z?{_>7n*qRXC~E99mzXeJ^|Saw`e#{z+-OCgVJ2{*S99mk&fJct#8it73AN~m|K7O! ziwOh3NP>}P^N|cKBtik#NST9`qDB<1PZ1B+i4mKB7Vs6*p+8NEeG>cnHcwi_I!kv# z3lO&TFa?gkAHW{Q+sUA_0~AD~gxZv--)Xkke7lO1la#a|sm>Nl4LW8I;xUX?ZS$16 zpG3#)ek-Z2&pT3f*6s7Ce6`efHit+00E(aiH~d5P_#5c6T0NcF^P(dMx2eD$KjR3O zeN>kc#1aiSIQs6DOo!AC12oTUjO zSL-5X`!aRZZH4ABVNT zF9R%L4M6P4(MNM4Z7HF8iGL77w-p_zQtPAM3oO#M?5TacCf{Ueoflm3Y zt|({~sn;U7wE}P(o@;sljo0edt#9;0(Y;ds#)#u+Sg;5?ppMgvA78;D(^m)s$YucO zL+``B!z~}Q_^7SK_2<^ZzZ=MfB-_r`PtY6YX!<#BP1So%d~J&#DlJcaZ-&z3XQnO? z!`XC+Vg4hI_4=P2m|k}1N{jjiM)iO`U&g(nOvgX~z;M>3Hht?Ocp6m+6k<&=MdPpH z??7Qd7db0_M24TQ|J7hMOSR5bztBtP@wiAoz_6s4*3Hp_7SU0G;A%xTtOQ7$bh^1&CRV;-M1P0@QM!GZNl&=tqO`2yVHh#3!WFRaRSff{c`+3M#)i;es!37jbn z;!b3kK@~!l;OAR$pj!;%+|7_uRB;M(x-MYpW3I2|d&y};+c?VI96;E#-W-=5&NS$v zl2weZIiX(X4Nu51N&umctsEZT*eT2OGbqufW*{&Jr#px6E8_eKW41N$`*_xOb1M2U zA08CeTwr|iaR8U9uwd5UEQLJ>@XV};hRm_kYT{zZi4E&JGG7yG3bT!2`261iR4zHv zCc(~ma!<63dXtJ@uM}p>VI(_kuy|yl7=$#@*o@;F7FQWmT~tHvFbkA#fnsqUK=iED zt3$=s%K_-t;BAQ3VEW0*at$*;r_|X@W@5I`OjX9{Pag;zBo<#obd(r>oVdPcC{`H* z?M^#@`gwxAzpq%V1i!-3DB+TuLiyF!ZWt4Ces3T`;DjRvcj_NDUF(A^iPwpG0hJ{u5D)Nk zCAlH6sD~4%vo$!-JIn$~_yyti_K_=J4v|oy^&b5v+5(LYt99!yH5}nOh|I$X_=b=% zB*l(+l^3IC)I`O^z-{W;87-jshGt!KHG{CVLN1oU+QR09!{!`DE|^C4Y% zZu>zD-euN9q|#lUuMgDX^b=s__`Z*X@l@Al4afN9d|Rdj4Zht598l;BZ$MwZb-ftV zVM!0lmq-Dr2T5Cbop>sBfgq z6Jyam_mSF@v-zfw;=ZXPtpuqo6@#|J4ycKTwE^yj9M9{7gO!x1&vH3R-`(_xXqvfa z6iN%?F>}IIXrpekN#S)>B^!sO@F-irZjBhhde~^a>}4OQkP|(&hE;kvOAFG^Q-U0N z;*3`EcHxd{QljIs_%{HE&5LC?n7^*bm(%0ZE_z7^1%D8G=~e(zB?m`gi2LM4j{1rl+)}N)yLoqRi<&1@*}88-$g>By z`8g-~PUiHaP#<(e0Iq zc%8(Sg~xALP)k#B>I`u7-^bts63+#iPbmfTxKN)$VsfVH{Yc?-hu_jn5PX-bjm7*c z%xS#TvW}WxCUWJ7j?Ce)mq zb{tw%5YCVmpH>pWKMSw!i=%SEsQPX3mnuMX*|8qJa;j^JL43&h&bcjV*6zarWx<3D zHFOvw3dqt_1nOn4Fcao&K=6s*;anvozfI(OYb-4twrsl!Tmp;4Wh3-^JOl2b#u6bG zTd&@1{&MC`0V!asD^yAU_hQa6W80M28V4F~oR9 z7`f88s*P@%X`=zn%>F$zJlz_MukFF-o3bP*M+t;stTwIax*tJPKUa`iMt8iPu4e0P ztd=DlU} z$v5#CllZnrKyaiIk*EYsViJ@9aCx}qbTdzLW-py77mpg8;pN(wl7T>fSPxq@^Rxqt z)cNpOC_P7UHg;@qgivDmfqPv{`c0QVB?*(c9o8T3Ea252R8K#>rvkGjmC?V6V<459s2-1U})hsPh5+2%J6?(R0F{hC#8Y;KlPTz zJjqMhfyON>C__Cx@6)wU&C407se@W%8fCZVgG^z&k!&NU?)<%7KsGhaaezi7wAN~i zqMO9#)^fA518Ag9z@nE1dkFnRTjoKmU+qB4=`H>|#sidXTcv&N9O2(@=^1}r&QrSb zE56U&P6fb0cGsBm28jm!DI~P*d(Mri8O3b$l)PLSx`XCr1}KIYkd9&ozsG&_e%(4% zt=Ek$DIU9&uPj*JvR;JuGaskypk9Qk$>W+dJ0T;`WJx+eOmX;Y2MSgS&taP3iyKgA z!n8g+B-hWN=~&8?^z%0@$>_R*k_H3m(EZ*7A1Ml0GE#MPO~vrWYP1Duj5B z)OkQgns3^$hNU|nAtvP0-9vp?uR6;%jO zNjAJBS*){3Wi^#?ZnK`RYH4YCH#LV*tQw|H+qQ`2@S;C`9+EMLKSG%~_RpRh|A!UB z(4RIVW=AvZf(AalVfNCpb9CU^q3cG#H*Pv=?tim$F0r(;73|0}$tfLU(~T8gzBh6? z2oK}h##>pw&IZe&dlZbTdb+({ztlx@=Z{>QmwnHlvRD+(YvJW{sNoIf8U}}lYnuYU zoQ}9N;+)RPyBip<1TH4AT`TVYsnkckSxruuH6v!x?Wzc^Q1>?*Xd3;~`HvZuC*9F- z$FVV#8V6BF37tK)(l1(DM}=!#fv1dT*=urFaz}b&Ag;w&&MJPB74+qA?P2%-_5y&n zQ`|G-g4*Xl=-g;e$c*Vc$@T9s1xZZvw#sF&0oB`IqA$L>_VJIDhb?;}_f2l7{M4PO zvH^~6nG!B1pfPW4H`tYpGc`4}+b3U|iq;##3kT2WUMC<+E3qRIx)2RTp9i))iXqpZ z6<0|JmXXg!vdRm*;mhTD;RS=?Nf5iZ@5+VHvY{ku*}v17c;qs?kV>QW_LAZS8%vC6 zfQGZF;Z5XbuVN2p?SAuqO=>$EY)ZZ<^HXl`7)mHj9V(+zT|>!BoE`3zG)vaO-lmpO zK|=b2qYAVN$W2;LrGnD5j#5D@XdAMV?4)QSX^-+CywYoV;tRiOw1ShX!u%ldc*4Td zYRTyvA4?6J1VCS4d{v2F;yHkOc9!i9pb51Zj#J=98zv?u2M33WL25Vt^dxXk3ERM( zgqMkdp{%r2PV6TVa4pEHdOozMsfjn$Z+!Eo4IC$Z&rU>6JJ2@=~;Ju z;s8zC=-qMhZmD~eSh|Af?tRCO4>Ss@VmvGzyL^ccH4O&5_{(p9N+;fQ;2!LGW@>UE z%S?_q0x2sZ6q7Z6F*qeN(@<44_8(D1ss4zYU9=PqiXlgF6&Tz`hOe%h7qZ-HW?pS< z=mp%Fteh@qcTvkPgT(*l#OiV;qK0*KVkI~G+agEJKMy{5fFnd6i;E#;SNzyxSY_@Fc?K;*#fSjrsv)I5U-PP-L3vE`=z z15D9X* zs9zTYb(-XpR0($L3$y^WL>t1JLgpbdvtb}cc zXm+j2h8dT0 zRUzp!A9qVSSY+#wR%gHne&E!w5qeC#;nKfOjP4bxeQbz{8Ly1a)|jC zJJFR#jqY7!kqdASyZ{QO75k5oc0r3Ou|R<%!HMuV;0w(m!- z&ef9AabR9NaHuYAiz2DB(#Z|?*m)U6G)AODU4LEBW3iKDx~G(32Yn9OnC>1IyMP~X zA6XJowcAWzpbIHn!b$xjJ9Q1yRZ{(%V(F#xUBQzBRcIBD;@D?NQG7(_%QmEB9;KhP~louxoD35e8m%g9i28HpQXQOd8kOZON{Za61ySFg3>zk`{ zy3*7e4oic$pbM_-Tn4HPm$9bjn3Q)C6=^>>V948uNS%20yWx0wY-2@aQIPs2%x($v zmk=W5$c3BuNoi6maNK1ugKB=je?NS@ga^GOGaS~zdM z74fx`q}0S*^4$0HsJu!loA)iVf#TBoyz;pN7kjg(S8OhmUa9ORW$==7 zH`ep_iCXw#%<_~iZQWg(2Ha^j@1PhcV0+r~pk2^P4_T#6jY{8mRUamcVOfTn&}IA4mDxEJ zpn%%fT|E1}+;Te@B7UDVZzDxiWbHXh;n_4=RI<=|9W-XspslH>t-h_Tq&Zrvfrg(O z(fx@iQe6bOYbFihv!CNXpFFcHapnFZ94u24ry!+pU3+w^x0D-KWbvO2@ttkI`ES}0 zCPdkFdZeu*3e4?$q2DVmW-xQhbSP|#U(?3l_wrY1Z@m`!7Gw%PIa{D(E+Q=IkQ*$dwvWU%I#bu!JtdT@HBphI zQ41=k#WO22Y&vtv_`O5-r5o_7o+0ls3?A>Unr86`%A8S*OfhM>Tx88wb~!t{TNaq0eBVTtNf(i zEOF7>B_)ch?isV4@=P6! zJ#4Ygjv76zjAbQ}6XNL)cI`$Qv0FnVL6Z+?i3xlXPn=Dw*_0mfK|*8#Z-(Ji0BfzqJ&q_I+GxH7tDRv{}FSG&IJf$6@ww z#H0zIzHLr@!GMkJf!>B-@5CW6#>GjEwxY^0&o70Uqti<8&qbBy@>nCRvW9@|&*Vgg z++0iEit-)3`Q{g{(P?$?&H-l5EGanutc$Sk8xEem^+*z=fTTpb<~uqwxnpDI(CGAp z_IAhmc;MAc&oAu%qU4(k_X&g}Z1QdN&mR|tN*A@HMY)BB%wL>TNToe-c`(MfAC`}& zNxpUp8)RZq0Nkvm$^IamvqQ0cqJz^@ngP99> z(G`?l^w(aDWp^K9nN5px&O0cv;ja-B?+D`?cSui-4PO|c2-CFkJc(#&wN5NI@a>%) z_@k*Rd&wEPKkFcSx8->3Sg&wjlK7dBhz%;V5(iD<>H~*ASNxRLDJi(Lc0rl&E5CL& zN^Ar@h&6MLBtaYjqEP_6^ND{_d4NjFlKf;<5_{Ifi<82leQomolj3$`h*IWxz1?3| z+W5oxM;OusgSfIYp4An*sz2+yHs|nJuK;OZ;hV_4#68ug=0CQYbzhoq(lK>lQN==G zd4-pNA9%K_rv;T?`2~yB0{bNP& zFv#Qj)#%*?CBIlWOSi@cvHdJ+R9<{aRVD<}OXC`15V(vNP(yZ#d;5VX!sgTSyOws% z$2w=;1H5#FroKWMHC{ig>m9`+9l=&6-j9}oIt%}t$=<;-kQ0(G%RZDcXQGvnhAG^A zJV1Ib^rmS+I|Q$yx3-hK@Hb8d`THgqV+r7yy)YkFEH7EEwz3VDq|@{=9uFmxjx|dX zW550jYA>&Jde}+Q*i7blnp&zgf>PEln2!0|f#6wbjle(|cc0*UNzW=hu zkoZ~h<5}R_|4}WzupCBztswh#aYwwBdv0RD+9h7@dd$~J#oqBJBk#H$W9S*H0tT=R znR4>KbBA=!9nQbZ(r1;E>41l>RkaN(0&e6>x#UD`TIb3YErHFvC`>+p%xFqukoZ8A z|G?aW93X&usBO-jvosG6V#5aEn)A&~k>*@%$-)W^DJ0Jep$@a^`lX)*?q~gsj@3#3 zZOJt|I`ZPAGESRjJNdW~h0I!A;ed5Q7m9)hts;$_bur(%ipc0X^UJ^BZjdcVC2`rp zGQ*i;{7C8f;;{+Gc-@~165CwY-c~FrrOH8!Yx1T?uPKZLs;g775+1?*z6%v$l z(FY`ZJUpi6G*c(+bap|l!+;%9knsJzKBu`cTYqeKuDQ}cO*6v9n5FxlCs!PGBwsN( zIHV+g=O7iYY{9g0mqH}$LE;8gZhmaS2hMoGIS52g1A;RHI*k)^!uUia>Ju`E{@(}K z16|=qr_6=$NrYh`;s0?sPSOgFLz)YTGHYiB&Iv{hgO&2V-HxxFiI2DD|I_IXySExQ zICuu`xHOaABtZiqpJ7PqJ913suRf21(q}*Xv3)Ob9w-ZD{yaEL3}q-_Y2MiLF7s~% zg$2vA(&xc`9qsmdFXNEo$Dj2DOnxzGC|y_|Z_7QUTfG1)X^l&g8y}9KxOOgEj9XM? zyr>{kDbkXLFB|x^j)%_btd7IBpZGdbbidTmEdEvJAxQiOU z5ZplyaKLt~N^1M>yQL_l&Z9osK`2o2lO3CsjG9Hold?yZH>KHV%kxBKh0(ydZ*xCI z5}O+Hm>$t3zki0)q^Sx;;gJs#Gq$vJZ69er)K`yYikr2(d^f&U}RLuqcR>yBY!*25K*eNpCWKL27wuq2zwllEGk3>RzMmF!rdQ z3O0r~9wUJ}L+|;poPd;vYsNiadkdF55%7)p&TozX0wcTvkjpT~7EFz$0kEf0Onu`n za}OZR`S~-Wm4^LvXBoP1_1Of5KXjZhC2yOAu<@>XQa`7b>CKmDTqRNGCL~WUQ4<|E zry+&8AwQ5y4C(WJE*YTR3($g{%L3aT#vlrB%DGmWBS`7Qa4{z`kLAiPW@D5*cdVO< zM;zgv>J#Va#C@2g4pS(Hk>cVLhspgECapsVVQ#gO1!r_>xrsj+6hYPWHmjzw+>noztvGql2?14GQnGc+9*SZMej2AF;W63~g-Ws&6Ef(~6b5 z;yn4t%|0kisO2-5z#quPZoMuY88`krJ~9{DOOZ zkI6UR_LVcatHb3WoOTlqLXqT{ExbY1=qDSPT&FGoWFr?8Dqfo=1)7^Pl;lC_kXSUt zC>qAyUe9om33EqbVf5_kk)3zq5>o&j6_T~#+=G`=(OASjHy*a99m$H~PC~w+)kpb& z!R1>0Dh(?rHPBQ8BMW0th6B}#i^C7)YMqqtxVJ4{-qFO zjqKFiAdlC-5LUWUIw4IYNkdW9?ivEM#3}{}RZ5BxQn4sHX;D>S;7>+Usz2T%jZgkz z>lr=!{?)7s!qJE_#ql?|VHiL){P&=tFi<5PHB(y_yNke3Y^n({f~zs#d+CwHR#oP| z9bq-z9*Wdp{eB|2ADgAvV6DfJZ)9M|+Lii5Kl;cq}lJR0FTUh$+lr2*%(h zkrL8`8?5=}$Ol189z5-rp&ftihCoV{#CF}5x-Cm!yJx6pXA;GA-S%s@tWAH*?N3gv z18;nX@;0dbZLq(nB82KbDe0{0%Vs~ik(EAvxokq;>sM|iU#bklSrr0KMy;Fa0x zC1zeHwOoq!1!Jm73C3Kmqw1d!<17jOaSaE30=(gnyDj@PcBEVHZ*{cizMt}OPkE@7 z7?_Se3s3IH;VJ@dE%0t3)mOs)IgRkZbJt$;w36(=Kk*8w=gh3QlO~x+P#9}j$^$0) zQeVB1D`S?xiTA6kw2)5yc3f&9lIQSpL$Tnb{Cr0iNKZHR@vZjx8N=Cf#mU6 z#_3IoS*wlb*k6o86kdL?# z%in)#xVug&DD|F_)MIBo*{BIRprH>f*`WVrP#RQXsN#%39$JD1`DOdbQ!1;vqG%_r z4|5I)LdL=V9n$?C6Xt=PG(K3@`54D`5mm!N)fkDfK71(PxA!JO7WAkRLH{_$~}UWOV(6&0NrFjGw+FIjfc>TiroA|Ca_ zF9m2Cx(Q3pi~wQ|!fB4*=n+pcg#r0R8Mud3e=i5+&`6V@OhyR*{I#trEz)DHsSYC3 z!>EJy`gu$IO1Zn(B35LE#*jyntQR9(MIzNm+5hHjyhHQV679w~W zWl$fK=|5%u9;T#_uL7&cjo=`u+6{c*fSU_;cbxa?y6W|@*#;XMc2t7c$e?JlBW&~xMD~ylmL%ZcYX`X^H!|~47h||Ol30F2pw&l|1 zigLCg*Q-xX4=tr740DUl+_rS3J&=$$UjMy~b7$~QR496+sHR7Akq|zU{LO}O6c*x< z7u#V*v4GYWue$VQz4sb08vPx6ZsY@a8gc_Q;z?{Z8BxZ!2$nl;5Fr> zIXQr;QIlvBTFH6Es3ai-gS%|78t)lZ43^eGLy=6+i!hzkAKGLclp>j$&=86H3U z<+2l%T#W8b>`vuiO1T6;CmPvs)mNwr930>nxo9w)F(NeBTb{_n)>ANb#8JqT(^BSN zO=X%T4P{i9>X*ysUH>q@^$|$CrP_Y(nPd|=0WTH;@S5^LfrMK*5)md= zPW}dPPETG!dcS9|4{jg>Is}VruUo(~*BN0_vZKif3}$IgY>q6=j+U2sfV@pND4)D6 zikYYT$bTk&5y_qEZsW#lJh8-s7vl@j^ekc3RrjaBl36o4wv^$BB3P(e5nIV!;a3UY zkgV|s(C(L(qY>I>F8sMOmrrE>A$Ux{Ha8-kg7i`=K4pblN&OsrDqIxB%Vo>q5)iyQ z_j{nzeC69Y_bpV9t|z+EplMmE@j=C&x(VDN%Lb~@VT_TK%oAD|Ib3-BSO~pJY1_Gi zo-`@`nG+18j{zMTwlwqjVNOqEnWXZQm`68{0qfnxuAecC1+|hZ|L#_}Hs2E+3y9o8 zlm?&`NozQY{TFvAam^{}njB*1dmGjX;2nM(*)4t+Jo@02tL0FBDXWCu_^%g01=TbK zR{UBt-#l-McgQTus0QO$Jbn5ZYVb*b8j48^QjGFP%9WJpEo;qJ{yYcDgzCuG@&Do1 z-8vwxaaebAa;Vf}Wfc94{RR+m{?`}4g+YyzL3%_3CX?Pwh(-I3vgT~;fkgA3O2-Qo z0|VGc5XpBN!(KJ<+g@nrH?Hn(73t}@psN=a5^>;^Nd&%a=XO;qiJ#=OGfC zaEhgyVpT(-{UUb%S3@~4{{2+i*^sH^Xv^>1PP+%m-+&5dEdE>4FQ^%cc;mPs?J|7D zQ1`tF-ylgz^qS>E%s)!3@@h_$>@mt8PK+A?aAG~ZGz6A2q6Fw&aY+*0+D6b|)|G@k zR>MZ+B^<)rknKlUHK%L`$NZRaqF3YZ)6gB6?NVTr9#byS>(>IHV*YUSxytKytZKfL zzF(0Naiv7akWnEop%8ERLhrFT8`Qtd@kR#ik=mz9xDs$WXmCuJv0zeuXKyb*2O!Fx zJrz~xgnCkPH@`HqPiSaum;JG4q+!}wiY1C70X+sV>P@O@onBsQ7;eyuwSQXn@RSPz zukH@>F;B7a$|os3%Jn-}(3Z9}Y968*!KgJ38)EGQ=RB?IXzhs9i~NzST5Otu;J*~zHhG?k9q`WOLn+b@-2KV#~*tGK{RAYuTb#ykN4EiVIxqPp6nbObSM;8 zF8Qlr!u<7Cy!fwzA+zSRHWb1xl33w$^z8zdv$gf1i!;RnM6|J_@vqNGjSC4&6K?^i z;golM;_+)X#8rF9cmBwON~T|CF=3m@w*aeWO5sLEAUzac7DLwRMYMp8e~v3m;L)QJ z?;N(ox@4AW2psU6(c=lh?hZex8U4$!>S!d8Ag1^%dkjQ>j-93@Oc)9eJ>Y@Pq*n!y zL5D-oIL(={U)e<*IW1CPb9e5}Nu7N1;E(ltc&1BV{k^ZbACmRJ;>Rou;vVtToLFj# zPU6Qq#kYzTIhnL-hlTiJFLHC38kpU_9Gr(2J*X4QNLH^i4_ReeOpHJj&FTX-=Bh^h z5XCr_i^9h0sMOZM*8K1;G+t}I0XMiXv9F$EoD#E-^N09KZ8~>Hx8a}3Gq4*eLsN=b zA~=_z0P60)cM3W5ctk&ZJrk$L?S8WnEYEsgDLioud?)5EldXbPR2KlJE$thW4uzR} z865G#$&=1g7AKq&gi z0!ssstG*XD+p+OR@p;4P^>&~<4K*GN{d;#UbJxVO$S?zV>8;e*sm?XLru`K_)Z+L3|3}&mGWOzd*FpE@R4r z!fr!osg8}}-NS}>AZMJ}gQ#)VW@lA5cGg!{3B#xsWG8LVl2p0XfLu`t4d>xR@Wk&= zIg+L{)3{hW*)P`lO3Ubx1n`g1Q1WWQyr@VeJ&CDyP+nXucT}G?GVFzXS0Mtb$he+( z1KW=Quw%fTIx5e$GQKPyOG6a5C|H4jKWN;Csjh8*J;sF`c@Fe`7}0yXT5tC}K0YS) zJgMGzyK79%#vkU`?`D#4vaukn@%8ceemCK`)qCY!^VaQz5H!g>ZUiKR#MiwT`NcWg ze`wAVD6gvb-^K8&>wfRB~?$N$tDA8IYL#KOX1$OwGymuWc z1sLIS+tf_4E*SxK=o^a`748@mmH>B-=>M;DZL3si3{%2byD;}9S^aCN%<2Bic8*3W zPw09T&AU;htfFc(VmVByj>IsLA%;M7LFK!?>YMnQb3|>t6cb-CNe_ zD_^xh5>7-SmS=FEnt8Q6hh`J0n3M)f^;=uS?Y`saYO|xj+ru`{=)bO^f&Y4x4Vmvo z;YFB_M__A(d04T)*w@g^;|ST9MH-o&a5^L+%*SRJTKK?AP+eOLkKb>6yI44^yX`jP z=^(m>3s{Q8&oQG5aBmzR)#z?$fuwKAz=8OI9aCu1d7m>eb8^r53?(e>@F%lp7~KiD zpv^(&3Jl}%CQ*xPw2ho{ja}@7G>_h>(_UPzLntoXK>W~{7#O^d zJMjy;o}UYEfNl||wG;g5eM2;1(+dmg+S(a?+$6vakW(`H;>64^cpaWga@)z!G^jSo z86JNNPxGct!p_4`y#9ndYWr46wOE&Yvi@!V^VoETTpU8Zm6O1q)1+SL1=^NWI`z16 zP0Dcax=?WFR|r{M`Pz*YBDe_e*mg5t8!hbF&?S(eLa490?3wZ_Wyd*ZCVa52Z+xP4t`fuK!E^Ss@)eu z^($Vckt?9>e?{BXa3et89uRoD^oM|c+xLCj2gud9Ym-B|(ILUV;D*I^%;h)J0gmtuy_OX6$d>L;bflMY1sN#|$62~CPE+OfsbhmciX)GCmV1^vD%ECn_ zSN85RVW1^0!45^gd`aaSU1IMGc^XtvgCxrSvr}+f+4sHqa{udb`#2 zyaO0Xv$C=RO%62YUju$;1IF*nWWU`2efoX6y?q`>^nA`eHL=o-KEhBBt|lI!jtQjaS7f^b zuDV++OxF;K=VGQBPPert<<~udl96R)fpPo~J$!DP20q`0S0lZwDl%f&rxkGiWZ?W= zBm5-){I_*OWeQP@J zcm_rXxL$1jRoC-&-stfDqgKwdHu*6cXh_~i>HGJm`bux>r<=q2`ucpFk5&DscRQfs zPJe^)?At_0HQsKgY<3te$C=6Fcm6k?nVCVddvD$0wg z$ZtBR78o78TD4)Y!%F!RwZ1tUyVnG84+$&@Ip{bhX-jGb)v6j+GoLw&eO8^egt_y| zM;106Y^{&irsn_r9fiRIBFdu0f!D+=|X?rv-eCivBe9 zA7iZ*POP9C(e83EJ>b9e0e;d=ejzxE)ajR0e3wju6*3lGLIb3iHQ2_69Dy+m;3`Jz zcE_srCaQM(qQ>&AQUP0ro{^Q6^>;Jc_vHuB`QLt3%eLKlJCdtEpmEJ*MZNQB?oUO_ z5isvT;AtxQtZ6%H(UP_>THx&qXm)ni3lPT}8&fBjeMnPO|EK{!C5rK%iUnRj1_Qm^ zt=O(u71ME0aNN*TNSJ=$BfcY_$#g{zu%g0Fo9y>Ri}tlQF4(;*vXa0o#*pC;te6v z(|{5e5Z(T+S^QIWHg8ab08GEu28_m@Y$&gdyF>C8-g*rcu8Oqm8X8mC&{3z-@HNP1PK#LRaQ4g z$%AI|E5=3pihTm;8*`fJ3cSh6wdwNJ=O)%{d#sV6bARA#5gQnx;||a_L7T^bbVgsA zfX#R~;lp?W({%u9EI^fg1r8oK$hQ4<{_XV{Z1Wd&7S8B8SeF$iFy>+gm=1BnelAp6 z$`AOO%O%em5o%999<~o`#k~%NIwk>~RdsJ(vj^d*i`^|oW9$ZI(}JD#98A?@ z$!33=$D!%-P5c+Yeo#I4CgmTb@fN;l|LvYa)JBV~nn+bSu#0bYh0N~pTu7131zjD~ z>(U~}re(Tx9bTo)ib@v&&48u32hW{qZMyUyCI_VqeRw3#i)1SJD1K*X z5XW`gF9p{*nsf0%ihTFDDX&yVa3IOq+{d+fg;)pKCKI$56wU=1Hf~1`XZMj4fh*&) z#{dCgl(y&AYFofk19(|}EbdM?R(POZnNF)3h8XCfpR0rA=1GRBy)G{=udvW~yFape z5SrKv1VX2lYuih+L}RS@{ad5iVfPSg3+S)G?^v{#@Am($(TjDs1CqJ*H-y4B5U+91 zujl>h*RR5~qD&fcbPGb)9RT#chJ5OI53u4wQ+KL0eide{;o}ENmBZZ#lzet@VktBf?UFM#&V)-eRDFyzGA!60mHKJ!=As|7+!^2eH zQkYLGN;;0ZyXiR&kLFM$ow9(NSpX`vX-k7qu%fb#{4S@_J0WR3-+;Mz6HB=w)x; z@9}pNn)k1u5=~hpT0XR!bKHZquag@Gr`Qs$)4Bz0qjmcW8mDY!?#AM5MKWfyO4AMP z4a=HFbH(N%`y5h19KO;&0ZyF0_>t>p76UV`m|st>+WbE>3A2XEK#xwUE8J1@u-(LS;On4Co>gDmURj#u{06 z2R1SJdAl_Btp8^wt$3baETW$qkD+~_?JQU_Xo&v4n|`-D*RVN3&1cLujV`wJ^mTpz zIU+=DcYw&z@PhZpTUXbB~J&9W5OIa2@{Qw!W5K)X@)H`DLY%AKvx)NO6+Is{XTW)MsBC-7WT7 zj=pz@aTx#(M0vCLz73i3=;-K>yXTgc)-X5{U?YW++Cck&M}4(c`@KNTUn}EsU{pyw z)VI&}0xjGeTt^Icny>er1zzbHMIEtr8E;llL$i}PQG_H2(AVj@ezn>V{6|kG(Y;g2 zH0Alm?HSJ_g-U0t*)1uivuy zy#Y1ErS;-AV#P(Y=>N5sQMl8BjJwbK&4Vt&9Es|f22jQqm(0TE`s_WCu01320|SHm z#52qE1{TH*R2-7*Yh9wqtUzTo2H!0ZOIL8QcIPw3res%R&rI#ejOQd*+KEwjUFz)K$FqZkN9ZAZM)G#lvZC-@#~)M9cY+C$v3BH+}> z7W8tK_orr6*2(J$=uBrgFsNRsJLaQkmIEl2KtSsvQCdP59UVms5J^xkQadn#$7-!* zZr1_PvwdV2&m(k3>f@A1m;(Ha;m-ver-YlK=I=ILcjqvL@H#HIOAn|7zy7#=qrHA& zJG%D)qZm1#Nw~P)6>z*B-6^*-pbqF`s4{kVUH0g$DRhb z3)2-i6GJ`Y1h|B5{~zAR4UGVcs^ZSyC>>QTLBsM-CV-(+(XTb2qT`H3uK8=|?KR`p zUqA(>)oJMW1H^>jATh3IC+q>Xs^YjuKL)~!y8$>yBKRkX2R>@%6Q}gGeOyV8>3y!~ zf$M|(XmAke4!z#vu)f#d07%f}+}!O#=Nm^Z7e46b?hXh?o?n4f)J16Zx)nS&qu1kZ za`b=t%oad0!}InJ=;8y+5%k$nmj1s>LYXj~1d$evOa-4np*D^KFMejUgEW>BA&Xk7 z{N(j=<0s>|NM9|F`C*R-+H>HEp_=N=~3 zH9}H}yc`fy@ZGP@tga$}-XEgh-Tp#iBfQNaa;;XGP2~Zz8u>1|Ab|dl%?EkmQ18+h zeb7X1UFY}c!e^cDZyFaJ&u7n#zRz5h3(srkKKB9ejDV)F+KmR|=}5h^l;VboH{U2! z?#{!>1gL&%PrFBadQFq;8=b9~vY*O?MV0Er0W}_zB*^W~*=_Y*JW>Pn!({BkRZ4w! zV@st#itD+IMMR0v$)Qf0<9n;~hLD9WTUUhgwW^PDN=a4`Cor=aK9= zkgLlw6z0^&DPmopU(w%e_dPZX;7D=-G=cxL8BjL96n%5PP$fcYJQ&0|5vse?(Yr~U zx*EC%+PVipOr>r%$LC8+8{7Cb66T8bLmE&#jUR{<0;2eiN-}PCZEbCG@?wZOd$0xp z1dPA};wjM=85mMvl-Le1;qv+UnF*LC`1P#m$KiA#05z=4%|9?)8qmCG6I$TmZ=Ly! z;Q}zy5x{Xz6|HB>oxbmJlrltK7k>f2%g)8sc{45Jd3Vx$(GA~dwFC|du)8Za`2YA} zqXV#q!;&=t`K61fpcuohD`%L(%$6fvlLi6?KnXZvg9x{s79m_8}hMe#kLWj;gj^#mr)^54!XQz`!=KL=W{`2e^dNPXH$ zNDPEC0plIRnULNvdzjyE+dhtE!4hFPdAv zy=bdL+<&3#e?b-u5M=S!LvLyxqU85CdP5~o3@@2(j! zJ50gyRU#WsxM7u{rb`RE!A@&Y|1STs=6c7O)W_2U*wO22IFl841cr|4yxEK3zKQf! z{InKME?2})@$^ml^VisF7=1{ME3z6Eq!O{69SE1QJfJSKqJ_2C4peTI%fKYw8isi~4UT*_Lo>qIK2K-_hjuU)rrsFU7zVG)TAt7e$JdS>#s& zz$psbz4mC=Eyh3OuKeFs;|ZejO+xOwxxF=h)tj)W@8@iBjM0zRr(SmP6A*{b%oXd@??cwn{+axvJzxen@l_L(~ zA`!Uv$=E`^E9T5a$dFX?kxr6MGap@pRMKZ?Fo*2F`pl`{+S}WgvLq{x50K|td5+Ut zTnNtyy;ttS46}L=N%DK`Gl1d>4$e<&9l2z5iPZNOEHi<&sk!LM@CyDl`u?AOMhEU0 zlTD#Qy(kPXs4?GoUFud&tKZg}FCLo#k!maYpb#)*_yWkXeFe;z_cPibiX81gct{%4 zrWKB*ub@ob`0f+J-`!&OW+I~V6EaJ$sKTcG4glqqrSq{fZS!wCP&?;g(%DT5nInWf zCpuW0UugHtkynF8r`W^Jtj$hq)6|zM{3fpUlyu#Nyo^1`)R4k+6ViOw-RuqtoG1Hg z!osk!Qs^l6qx2U3uaoWt1HK=CAD7kEVrn|9`-7JnN-4`GN-LrJ?k#+WR2Xk|ig}11 z`-^DS=p0m2LB+Q8br|v-P(yA<3m}1ry)J~zOH3S7^Jpv=Dj@wH$lPg6w~$W{maG~K zzT%op>(`3@ab9g?!FJ^gsz>d~5Nl~`Q(4nPJV{a=3*%v#UxY70@MP&>_GtH^Fy@_v zL42!9`BsEVR4Gi3`b-&<+>)Je^WDSVOu4*Hr&c-B`x$x9I2kRIe=V@lgAeg6Dr9`S zykjdJhY)j&>0Z9Ivx<_xscY6%C#zLLqiR;s!@&O6j9b#%#etJy=coR$dkgU!x1Q`X z+jpmp%cp+yTr+^22+t4_ZYrJ|H*1QFmTvp7*}b0Wsg~ z6fh>3Ys1UYg(@`8ANJ*-(DldMAWQ&?XL_tL_wjRVzogY_B2H-vt<;cr4mSi(6$^{e zcc+$THiUc{X~q{lukp<tK_a1yz($oTk_H^_KgB$=S$MtrRaaW*S0vA zt{Mvp-6Mn#JKnNn*D}*vcv`wmk8Cln#WxZP_s%TZ_awU)2*yTxzxvaI{OoV!GcAGj zxFALWedmGZU)i`E>bP4A^^8B2{?gv-a#;o_(=x*82nl))ZaqK>s+jKv%0CTogiwq$ zv^8m ze~b?Y`fJ!r#o2T`?g4|^=jZ1?YE7mB&$u84?WT*CgLnzJ_>AT(VpgQX^)60o!+j8UO7sLMXzy3Tau}@%Orv-fL+wIq?;p zGSd5>z_`Qz4;SG5SJ`ZlI>){G!(nU;cuej&LDaS(F@(d`wc`o&KJN|C`9jZWN(^wp zk6XyT4?Vn9l*Xc{I-B}V?0f61Jhh4pd0lZw?x)MiA6zG$0RcgI`Pxu}^#s70RKcqJO{=M< zYK3wEvBDj2^O23sKq*MB3<$Wn>ov&7Eft7%NF6g-0VcbCxK-eN8XFp1TwL~ccL9a1 z>0GHQGVev#;PFKMN%uWO0zMM)H<(GaE#$&94i#M2D3K*W-qUTgoUm{|1 zXqU-$EraoYimOa{G%L@ZocArRH}|4*6nA91uq}O%ec|u~jwg=<-1R42nA+tJt|hrt zV`YK9s?Mr(TI^xCdDmfi)7}G5fExwmkPVC*|D)uO!ny<1oS~LrkthPi@XLh3^?O-4Ra6VGbxGHTN67@2~g8yNh_k2p`1}{ip_tkRr9hqrD&~ zO({oT>6_9HD%E=l7)t9mr4$z8x0<>vrLlb^GG$NV@X5Z@GeR#1sQUzrPxwco=I zD{V2&aaFcm*dw|hTv)*sE=ps7zYb5v=BAFyiR0Z0Cpo7&@TBA)l$LmyG>yBALq!T3 zhX!wLr3<=}!QU z1VRNfs=eQ3T!)@fluQ9p%{BL? zMnI-z*DUWgzP?66G{JIRj0FVy01FuXO4dA$bXaKT{9I5yl&ZG|K(K7g?D$fP0?2Sg zeiCM@fC|)@_>dP$QC3Pft!39W?}lw!acm6t581c>t)h6PrmPEluX-(n6oqZ4Ao1f{ ziC^j2)eZYiQ{%0xg!rmuplQmZd$0@~K{#C4*y``bl^8%h^5cC%2r(PZzu>wPZ`>1u zYdSmB!z)%6x)mt=1GcBf&>>UrYOG~fHEJ1eKQ7ELA4$MF)0em1XiW>2#bfBU&dH%g zf@&B<-Z%gx#bC09$=H3iv|xt8Lec0Kg^uVJFQ*7WX*QHKKKwpc;xyDSJWqv8@%z3d zuxCrccecr;`U>H0 zV76pY;tv3fVK29M0Wu?iy?d|WTeDUj@w#eGebA4a&owqsyGrv-M7wEKG`2Qp z>x+OM@(7T{#e0+s>0`??Rg{0|AF(tiW?W`yh|H}iS~)2{9gf}mlcJl_d||PFZ(|r8 z?rMU`We2#X#>7&Ro7_ahMFl(K5vxK*u3ne2!+BBgsjP;Ih~bQE&W^}{@zBoXsp$O1dvL;S8Qi>;IJYif=es4^pr zX{hj@KHs4V9|i*Xj|MxG)wDE5r)hSa*HqQW+|v!r*Y1Q>T+#$IfTP?_wm*^Ld)Z-q zdAWjg@?<4#+&IK~LkVnaie6pYAh4|(`mC{3iv3h+ofHr3n1{H{A~3!TcuW&rpA^2+ z(pjI~^wUi@^FYOUd>)AzM$;;@_H-^GydTWZ9V|YNMNj+A+r@16Odb2e4kt+Dae#?G0usl{t+XSqQOu{-ZD7>I`emqc52FwHLF#CU-mQ*TQFrLHYY{cUXY z!OCgy%^i4*$uEHTa=51V^%_)i1eyr57M&_9{QY8-_*r^r?868uw5-fLo4XDqb=c5W zU}~rAzq8v8adEx#z5Od3K>qy2-s(Jv*6|dwUW^jBzmX-sWwzci9Wk0O-yM1&Xm3myoszJ$rGJ8|`@omvxnB9p(<4}im)CwlFzPy=$l3>3i37k*2!mej)eqC5BX$< z7BiXHVeE(pmH6eZ!}u$O&<3UYy8_SSv&t2GN7Lz2Pp zSdMncxfIPGk2Q*|AKqa|UA1{KwKJH4Q>sc(z9OUvu&k=C?j8cCJk|A`T51w1!>tO% z**a`Y2e*ry717c0^_4S_^$H3ZS*cXAb?g0igk%<*DnIFXHvRx5c(A{Xvl)KYD~gOH z`22qPpO;bcD||jpFR_Y*I~eA>fR~M`n_4&pDYR@BBPPgF7(DU?KktXu`Kmj86#DDJ ztrOn|*UsX_RWL_KFX7?Cm>B6&NTJ!fYO~Pq3@-690|YqJ21_@Wu$0`_L=Lj zjj10Rw>ciUB6)Pd$Wo04BN133zN$OwWJ})*9779-ypSQ=M z4?#Iztr5A zFZ_yL9r%C9dh4*No3Lwi(;+1wAV{bnNFya((hbrrjnW;`DJ3A?E!~}hfOKxU8#dj| zH$3n6o^ze+?7!K;Z)WaTYu#%i1K>^Dh=BrD^tvnlW8dc1i^^kIid(kZ8s9)@TS!~N zo>@kXnjkb~V#syEXu(6Zj>q6_3(kZ3=x}%V*~`gA!eZwJ0IBbD&(rnx)>twPvQAEZ z1haYERb#WxrwnI{QnKVc#^;MdzhJBZ>FvL z#V&AvuD<Z|ah;1|fHp6gaiD?3qQxLgz-=UF4V{X`$@}f_Sn&X)qhNa2(0oo; z*+hOEWAysKjBe&i@rg=}2!ob_iA?SsTpUG*g!Z=%j9@s;IFeqdC+Z5qkc!cWpEHy4 zvHJejw_=IlH5*VE-W1>drCuOih&qxeWg6LGqGfCuV98uBrrJRBfns+Kllxs!!yhBo#fvvwt=$J zNb;}WaHrM}`eJcfn%nggEScSLoF~5e4qvzm zn$Y>RNXU&QTOo+ z@X6b&|2KUdF%xs5flN1+DMOL&l`^dxgyVMmw%x?RTCSt)9dHsq{lEq1uW$pj34q)F z0$4rZxSiGHO?&)YgOiZ6<_()zkFXCmcjWiUg_pmETd#U?KJ>?V6=Z)i2NM!v_KoT- z4~!;L?~b}Ge-dP`hi`4Gc{2ZK$_+7VN!P&vHux#fw zx$T5aIaRB(oZzbbj8LAg*~MgM{f&K+N$|*#uc!KHk3W%)!}HX%85P9B@+dxV!nZd~ z9I#L`tN+Lo(YRNeYQuTwWpUFH&P3rvJyk2&R0X@W4+rN57kp`pQQG8lv|bJNzRF&; zB`2mPYz0LJh7V^;Nz02RMoS#r?xP6@Dcrvjvf`j_hxR}XcCdO~ zU@QxA+b7D6J*Hq~Bj^(irOHDf&%}Xx^_Rf7hq(rJh75G}FVm3eYRzaJAT%8gdpV&a zTnHdCv#*;KZ4Qg!eeCOYkr&%B&M`cG-Z%TgV3x=_bi{SrW~8n4^eQk7@3jU$tc$mk z{=JR%E={r_lZ3|QUKin?&BzSbws%)IRSN7Vh&mR5M69Bm%9BhIE>{17_+uTbR6M2P z-m$0s{zdljAf(3Y>%kfDIj|)XF6`d}Bh4Goh+QQfCrfb+r+QbjeQvk?ef|F7qfgV@ z2mZmuo%g4xsXSoVL1Xn?P_ORYhg{r$>Lk}TQK^t5k?2Y*eu!tWh*?gwj$OJL8?b?N zbM5^k-6#DQFKh8yO!OY zn(O89UO%R{@L+$->LZ6dEDo$&&7R3f9r7?ZH)JA-G79;K3gY9akG^F@orP(o_oy5fklf}cxtD@esJ}9N-(iYRT(}!yUX;$ma z+g81?cKdji6r#WxY^G%vLUS#F!(gz?^tk?18)i~N|iTVM?zlBhBy+&Ytlgb{_^wZ z8Jm;77JH38X=l1`M05PK?k(NF4n0NQ4PNw9jiP!Q7V>4>AIS*L+1Ki>x zDe8j3DJjT-1`8r7++F9GSAzWZng+E$w}r(Q%e*?k~P{Kn085C`L2Fu za{fh?c6i814XO&?bu(oK3up?zZQmfhlkg?NJ&2)<508<43x9;AJD|h=pBhWC;ftg=<-S$cKHM7Jsf|Bh zey}PMut^f*(mNc)ykf*2bjP*2tux2iH1RDeFmtI?X5INUX~af-iOv7gZ7w{(a&Xd=7jJ%TXk z=N|-uBkk0d0 z*`mp8pkGE2S9p)(YFj-s6Lr5bG~a}dX-h?Cuqf*#?8myFhrguV%$1AAmmKnd(De#3 z_{BkAHBVEd(PB}-y^T8~R9S*gCbCpSy71ohDu|mBz;AJ7Wd%6Zc8f**?v!7VP3C9; z-n;!>OyHR&VNr?xjJn&Dk@VIKr9zvB5$xD`O(xn;ReZo_vYT z%kB4T`8Xdkw%l8}Ea>wcVdWJZ-woS91Cqn6fC#3=)p(uRgWVKWmze$9*@2PpSAsyXM{)7S&jE#5r7&^^WnL^U-Z7^4x^a~8eUzn-lAX-35w__%%<=>O zK+$YlW$kFJTuWtvVe;MR|MyWBd6Gf84{ltgoI{~rX`5@ZdPEmNzTG$Jedx-x{pUJ- z_yG`%pI0)^Fi2PM+HIk4FKa#SMkgAMdie5KTn;`Gu|5y@)JR)Ud$v0B>jA6jXRB3N zx3l9RU5;8|vhs_N-Xs89 z%hIb2w5YTR<3ur>^^tkREmL0ui1+&XdN>JZ1S!uXG~`V9X>SPsA$9tDKz+1t>D`mu zTHYEVv~6LzQBnCu+N1+blD1dCModBd_edvNgT^!2G|aL)tY1`Kfkem$_xL5A!w+b` zB77zP@^4RHG27fJW3=9$pFFt&K+q<guCwkL@#g_!>=yD2bH2JghB7LO%6$c#UjT8mw(Xiz&23WF@a*HeXbxKs=LSE|y|( zWtijAi!DCK>%FGLsYdMTqVucewF5=HulBI;}W*4LiVt)mq!0 ztoLeEne=Yh`3T|=5L^J&0?>s==Ai%^X$0XbK)+5vp$xjs>huI{0McGKyXVI9gpcIX zvBy)0O+;2uXUq6_krF+zh}>SxJl#%$2x%4mdz01NeWSe5WlAUIsH4)a{F7*hgjtk; zcY1D^t6@f(p9oVvaOXYPn`b1^*+@i@5XSeLFU)?6h%RBnI@dHkp zgM z;{(q|Rc-BN5F`GtSv9yW&WsP*7fV-XwSW#m1utxDCOSGEpq387qpk&xzjhl^5IO{+cEwrMtg9meu zXT}ogBtRKn5A~Y9JYhH$wpUqpO&S-y}!>Z+B|_-m3rZnS8KgQ zMo#|Ni7oI+TpR)73Bcgq80!3-9PN6W*IZmLAtPgBmHK~tPfkwQzfLyaU(CV#2&&-t%;G z(1Txqfx&X(w_@p49_dr%kl$~uEKY2&WW|yfFp-RzE28kp{>>gYb>R6cNACU0uIrwD z^sh3>)pbGMHo%@N6V&m+lAQGw3 zX2w)vAjD|)VVhdiid)Dm)re11b7X9~(~hfDG+dj$I7ajN0;kKY|GhorP%uD);WdS1 z`SQ!<#-*Zt=aX9U3bSvgMK!m1Sun*q1P?Ta}41b>|AN3N@$ zcY=+hK}kb~1eyJ>w=H6bR@q;f9lK%*wf2vF@WJ`_UO9bOG1rK$N_rwpg<%I0jCLjy z-5`GY+fC&ftl0v?{ViHVCuxx8*?DzzZ{{e)Vu?pB0CWw77C=P_vqR5lJ#W}mSu%Ak; zT;WV#fBSxn!iR^!YF{|o?9bA2-jpooT6?9i%_a(4S2%ae+kemya^n|cB8`D_SAMFz zO93G@+uQxlbYXbQv@mv^wsyowk%-_E*3!9L+~#~zSm<)=u+N`6*Md6oq>2o`&QzbV zg)e-)Blak-z0-~Kt$NfU)<2lj8xoR}pFyTuzu%VXG|8-mhlfMF!Jt4Q@^j(D#6%4Z z4d98)#l;20Dxs=#a__$w<&cCYI|eV>@dcgZ$8Ka)EYjnetFgcTq$&JWH%1=SOs<-5 z>o0Siv7n5U=yg=IZ`Nk_P$VVAE1KifQ6`6ac|j%*4&wLXhUDH`UM%5%3~vQj)z_)* zK@c6CC-Sp-1x>La&e0shm=~%ZCy$7o;>h1n)$S{)|EWBh1>(cs5f=9^A&u=61 z&HHCi*{kx_XS!{Ws+q1`v&9(it-eZPDSN*cnb-A?yqgg+baQiKI}RX&K)lOZZwj*! zFpv>M6(Qb$>9+x5U$AgjRMysq;G#cH4vtAEv_cokLp+t(LEf2QUy}&2HofO9=>Otq z$zpSh5bRZ6u_#tDPR2yXT|i$t1>+V_`eX~*05S}Wm=pI?1SPhwEcLEvNsBlWu=cvS z_QE=y_|H5JS&9CtLL+%GcQ|DRdaS1G6#fhM2W);Hepo2s7k&=-Ag{4>{!9i3=K}j! zF5;=sHbiJ?d@q6llVoE!T8=L!k|O#M7-lQUEr!D$b<+e=gy@x|*zoGsYn#myr{_zu z$+0#K+Io6RNlC=vVx1dKlSY^KmrHt8CgJ({TVUoTQZnWpCNbN;;bCkD&hzJ^PH@9+ z|CXLDJ7R5q{>yg(zc@s2mXl|Be<$cJ7i@5lr#VdqVnuFZKg`0L!vQ1U=f&K+a+<+O+7$SJS0z+B8jSt$q=lC$HO4wH~S_YXS zRmT|}nS=MPA89;088Y;M+Hu$^Fv(`PZC95n)&oU&I05N~J_BNJ0ub@fEz zOd~o7BXT)YK4Ze;5OfSA?XproouQ$?U%@6goSbMueRZw5G<~d1ZF-5p~Ro*P(}BFElVw zqxeiYm}N{9Eq@4~yPI6M^dT4NO5@rH20Pm|({u4NE6vWde!_sk%!m&b(AA9<9(>Gp z-!e*VWB)9mKEKX;{wdhp>Q~XpA7-8s-4-`+s{oJl0e)!LUo)i>eO9u}> z>UX*v=`L${)WM{Gc2Cn6{=*h$-HeZ%aKaPATXM@IFKx=@6Rcq*5KooUO!-CMGp@o= z=Wja_oN|n^VEukzX_tA=5(u}*!Q~~?=^nVGq&gs;YH3MFuY*xH z%M>mYh&rgvz?{}pu|^fRGk1A;83?ioUc8V^`8YZ}dulM zHRt9hOOEzz8y@cI*GXf>iAa#0X}8Z9_DOCn2QC_;!yBSH7r$ZDIDFGg4wdu?;>!#X zK$89L5h*X<`{YXuj&D<)Hz&ti=Ut?Bn~w7iw3N*y8NXle7TQTAP8pIsLU0s*6)0E5 z7NDy*nU|<2&%H*h@X=jcuQY04d;Pb`+OnpdnFsp0!*Hqci`n{jipBHT-Tm`)c?ZdN z>g1nwbQY|d8yaTP#)lGEKy)PmCgSyO=N58uataC~e%So=PW!W0R{~5Yrh`e2yAuVV z?zjY-;QahNBqZc%Km`K%VZs)sUVPT6p(%4Zd@sNa&-Cc|L$9-2fH{xVAAWs=% zTi7@*MXGB9DqH4oW@|6_9HH9O@QL?VyOw#+lcx06YUVHQWUj8R%xY%y@)*;6zr^98 z2M7Zru@Ue(_%*~Yoomjeq+}zel66=pzMya2rXjQ<(tIy7rTe9#US7Zl+#3ivtz7Za zL)mQmIXVzvHp@#g)V*R8mg`I$-u=;KQjsD9iJe@~Z;UNC<9FBPVeq!ic&nKFQlr{; zMy+ct6IW54P?f`fYd`%uzsGEh-QHf>Ur#&v2vXpN6c!dX^5a*5>D;L)LWn*y(S$`! z(fAG@&5vjaN{gD=X`IljJslfZ<;?RpET@wM6i&9`j5%KepfK6=_P1kAGncZE`D3iq zPj6V{9_@l)riMLsQx<0BR^!T-J08^}n}PG&JBX7op-DWKOjN|Ps_Sq26T_!A_q>xa z`0SiyjPE51XtH@)e!ZrUIlD+ECnjCn4Hc~m$Xfj~Em>S~w0z~s>`**dhuVYypCi^R zsBjyl@tV(KmE+Zz8{Z%9>5sMd+HogJRsm_XrEhgI>TDcj+J7f_SJ=YAGd^VWFwgyg z5l(}`a3m#{QLn`h3%0djdmdqsfUq*`IH0j>q28&c;6h*C5{4Hwi%|uk#B4^?8V)p& zi&fEc|NZe%Q`UHEm;#ZKU4sC4r87V3@{KOM2A{?JmUIGIE0*yG(suvL)u4s6&GZ#8 z&)E^hZ8B;dWl)W2`c z`?y{4;=o4hXGRg4NQk-r>+dh+%AZ2(fcX>X69?~v!qY@>WVySA$s5`zXpkVyjH**o z!mIGhxvr?!#s_}o%+o;G`1LFJy63f6#4KdmTqvw#vVNeD$arL?w9CfyME$8*7T`Pk z2zO?>hRZnmq3>{jGaP}QSMp~hvm_%4ITH!-N>ER&p}Mo+M8Z-2-Y-Da9=9~JSpD&NdmAk~k>^@}YCl+0+yq|%N-W`dGoXE7X zu@t&BHlNgYv}I4((Y#^lV3K5%{Sl9JyX^ZG11857?&Q@$c1N~+^XIT+c5-;!Lt7xy z;5+}v6_VZo!3RuZs%jeHwPI|mI~Cc47_9$*heUa5uhd_4cB`d zH9a?=@je!z#>rxxtf$FVJAf2Ot2-_zT8J3iW&>%hpLK89tmNn^oWpLt%~ru>@pHOco!u zdAP!)abk>KSNV0`yVkcfXGh_?c_7W7aSOhyDeJ?vIiX0XXh%C-%3{@SR<_m6Ly4+rWRB2I!0{MJNo{Rc4sw&-&uaa;$8=T+V`Fk*H1jIftH-Z zD1)sS^XTVhq>bzcqChUvr!`jmHH@BdWHvYSiAV}5GCaf}!EXZQ*wg>R zlfG%{8?%F5f_DzUC^r2+-PqW69$DeN7nfAl??bCv?)B4mvAYZpU@9=@E>i$4W7&q9 z27m4Mx!m&GEDL0bK@*FxbAc&(;s6Nv!h~n-MFkXVv~jc*g9SG@URjSEI>Ph3TZ^jn2ADTW}LcdDF zcO$z|b@gXhdm-`%Ee}VWL28bOTgN5RoIa-!1!@%LqLJN2zYt-$|MUA66q`*nm`$E( z1YsaG69qOPwnqGIB$xFYbI*gLd{*0liP2S>j>1Nc{j*a$TX@{=H|3+6Ny>8qaq5g0dgAyvKc zemNonQzBZr=dTo1mOv78CpphHIU9p*c{^k4kO%A3Cu?1Sfb3E+Y8b`K<( z`+ygp#y5Q-oVy2d=3{VKvTxa}nSEX42?EbwgLQV~`TVP=(bn=5@v8QI>zw!QiqyVP z%Ci4Y*4+|qqsuNC{aFGDZsIz*MB|`+GCwMV2SKS9S#`oU0%&!+wSXwyQsof_H$qIB zT4JF>F72<4nnBeTo*>FV(QB%HV^D5UkQSUuh_{zN zbMc&zAKbz9-{Na32t@{Y(uW?=mo;UU_swv1Wu4{x$V|%>Ry~UlYzRhp<-rd0@$E5$ zYrru~enCEtkVL^$`cfgYXKy(e4etJEn`tWMDl0K``N0zObgq~>+|R@org~w8B$}RW zI(EBmLQ<+4Rkc;6{`<}O!8{yt+@SLYAuiZ@Cx~7|@|+CYj>L-@GrCc`kPAv7Q%ehn z(fQ5RLx6^fBUBLF-0BeX=TOW)(_cT_i@MpoG+f7r1p}4|G8xRX3(3UGAF;B)k2L2i z+V`hacTYq+5;4F$@P>eK;q9l@hD=4nJ0q8s1OAb#f@KTz=44}H%GSqdNh0*Wx_Ls# znR~eB4c|5bUa80>5^LDUKNqIJj;8E4mb6RcmH0AOLeg@?uaqpbJ9yw?m+WceVi#DQ zyGh>3{OnIO{8hh!lg+%*yT&%@lN|}3wI!stwozDt=j>0b%lfw+_mui*jH12Dm>0L8 zAF+KeDXIJXRGqVEqV^R{ACH!z$#%6q|Khm2r`ZIKwLm^qpawdw*{JN|mu&FA8V#hPrF(sFszo4H3G#BxBY;=vCaiD`XCx+IkLpQQWX% z2A|E}v=ASvK$bbsi6;3#x3vgwR;5WIyo#{-09AQegB|JB13V$U36Iq4ifB-h2tZd@ zQ{hLNJNQKugO^tj3&sf599)U^WlV4E389Mg7mGOhlZwBfE-1iuuLuz9aiZp=q|Fy~ zWuuRve%mmd3x{tB$TozJUNZ*2>SNB^DJaHH(Wy(ql+6g6#O|SO((rBo9_f`BQDBX4 zex!F6FNj?0bc4d!`sG$AvUP8cubeSDA&{r1R#!{1EL6_$%q7^tpK*%KHzRn(e+DqB zjybwzJeqEY`D5;>!;U@Dmt=^tNtk7N`a0P?DGTn>XM}v~ns&(16vzRt)V*N!G;%)A z^O?g#yoH$K#!R9czX49__~T%xsYJ6OYUitxEDW-x;U9liBF^k284$~)?_0V>3H`a=xv`Wf_Qt7UFBclC(*}?T|blP znBhc!zLNW5aFyNTD={TTZp!Ak+bA0~wVy%DOaopqRy+i<8P%qgw#X=%kLPm76KGKK z^w4yFe_T3SCA>z4L}5bngN-b`gK~UGzUo;Jfy=y!llXO2UFvf(Ml%lAnVTIBpI5ux zY_{y$&-5)-{gPsL|90!N`12M+G-@E-xB@4mhJkR?eZa~GKmJYNr>pR=lVrA!4mL8( z5W(zGHVzK%bCN4BiOF0agb8WY-*f|gp<2$)eUI#e&7~t zrve3DgjU%1N!KCjqSM!gb|`S8&4De%>1sGOp%{Qf^Sc!=WuHOPCUf(kD!0Tka(;tB z<)1mg<|{$Y`#{=xLl2OP#^d}j2hk>$w^~Wz4B8KQ?&f=H*UsFXZJjHmjl++-m<3+cKz0dgaQLr<#q(G zn?1U>*un#D>uYW0r=_*{_w%*=))19sWk2D;Qk&Os8sEO>4nSxy(b4T3^1c(g!}FF` zAn_G^t@7SlMdCGihMZY#0X^{L*&ufMUQ?hDbu~Kjo5k~k$M z09X$k^dhIo#8)B3y~6;JFw=z}tjJA5>tu#NzF9zmoGP=U?3foB++~)J4~YU3VFk~w zKoflqnwsWtx+m)kcuFJbRiW(&@0^FuoK0E^}Z1s z(7YG9nTsWwq}C}YnAWk$|J_ifGx$e%G&_^U@IJ@?Rz>l`SX3n(R5z#S&~tYvGF?wC z%589WZ!Tb#|Dpap9a3JbPI?sqqT;fwgPCOi9-kxW-cTGrfS?rdr;UnODn-qI=g`D$ z7yiNP@m5brNa*q5Ue9&og@8a?Wo4!FqAT^=w*Vv}1yAwIRvA0vGMGol$Hncq!Ho3vRA=-b^l(lqcYxnvnQ| zgFk|-Q|N`mbidxCtJuam4zB;N)Pt%^y7PW+9vX{{oPB^`-UbILonmWs;yZMcHQkzm zofi}o_#tB^0R%RVL96(Ve@_Ipnk@1J#?N`rE-?a~&ok7F~uqNxkYdG^dU_> zCPFpR!)ru~Ab84P12U1??R5A*=IX57U0j-hK~eYCKDgY^9Y9@aU=9|5qLwQi0S^zU zd}-hx6%iJ`@@S3uaFDzU%_yB@)1O1Uy4c-Xvm8c;8J>{9oR^yHTWSkq4DYUeU0*9+ zqMT);I(yuWO--=F#!d@|jN|i-=twlVVNBC;pZ<~(?8MW@Q&IopCZ8J`&=51yePm?4 z3^;4j>?hk{#i;Und^kHh12dGtpWYPVR-p{WfTIM8*gQajL)%FO*9=|}nZ$0st9PcO z3lx4cnll4Vob-RIXLhptPEA?) zZIr~(Zc9@Ibvr^bNBdiZ%t01`GVjgl$|y3}SK1l|l+Y9TA}Jz&Zf-FdTN4y<-dFfT zf25t&nv`|pK@0=r){n0|*LstsW{IW%pM9@8DKI@ZRn(CpCD=nPN6#KgqH zaty49hVF1iLrs-$v?Y?`ep2Y(FPfx7y>Yl1Ffm{|%a?kUWDJmK#LzSZ0tW>hZdXIvMe|JUqz?NSslY6w_Uc^wy3@znT- zvx>0}(1s_Qi-`Bwpsqf6XY_O!Ql<<^6;~W&f9A}m{ai59cXmf>*<(02_hOj}^gl7{ z|EW(R!a4O$PER?l=FR55hClK6u3M>`i!PmMG3o(8QsFQ8+5U(5&#|2G=7;(;g3iV~ zX#0W|1Xfx~Pb8aunC0IHlwETqk7R3w9?Zohr(45M?tcI&Ha0faEK#M61tO3WCa>#$ za5pr=OUzB1+si|}a{WJmjs(6A;5HK+;U=g3Rl9~rzKaBwN}w8eGNam`t$|II)ji&E zz@naA<>kGD2+moZl97>tOU*Zj@rxo zgNg|6(~f36F%{gk$|x>6EjY|9FyFsWP+v0X^iea+-^NXS;=?tCmymT4x0Sod_yRMk zF@Z{YgT@(bFe@yJw#Z@~4fQtYCPEt3O<&<(xmn`9EONeVa$s@snDtj+$MN`Z*_PT! z(r+5_PJnP{0J-3Z1p7GWEyV^SxtNaKV>&b~IE&_vaIPV5|8Z1REg^tn<$4eHi@rQN zUV>7!Zo!ehc5^(};RgnzshoRjVGwGkJY4buksg?QxwyF0zdfl4R;O1jf-*4dVa9Zu9;OP+C$pK4l^b*li->3) zs)T;Z0!&I}ZS4i{fJDK1vt|{B`a@9YYpLN@!Di?LZMN93T?tQ~>L8AKbl@Ijx0kbi# zLgIy&z9T$4JBQ`>`0ui-1QyRqqeJT)~!&6|IiRW*$%;B@|M&FFb}W>+Mw z8voh(@||=XeQ$3s8WtA6&2oDJi=pNkpisaI6m~k`(E}?$Zcq+|g<;ecVk;FvFYu?r zUU31Bgea_m9*zsf<*LfcMIbJ!`8oyEZ%$whK#*}3HVBK1jMOZ#0@m33>|)$Z?;?Z2 z%gmJeVxmA9Ff;M#OD1z15U^pzBgrgD%1TR1OG+$BoZrdI%QMD5gZwTla}LrAF45n_ zob05K6;ud~_0sV;SD8GV62tHPxN$h&(<5F4CFcC9kM$ZKJ)Hf6tgLKme6RZN@&W@j zp`jYNt!JQ?C{pL4Of8pUGf_ZGOKWFm2M-Vb%27BJXALQZ#J}|)o5}C# zIkWAapH@0rbU3J=-Y8n%$8Y1}2(O6}%8mo}1-euJeieo-l)UQN*%7--fN5s#)&D|N z5jP~KwRKT{yF69Od2Q;xyAg5B4wuCJM1VZxlQ1yF5iC({IvNU&(qee;o&r_@c&o`n zy!Yqol1mr0dW|X(P_X`XcNcyx#OA+hpLW;;u7MgLNQ2V==s!cZBcP@M5#=6~;e!b* z+3YTW%P!HX+gxAACm=vZM5Lso^58O#`M8Kq|<4IdA_I8W~*UT~9Yjd0jv}QZ0Jz4`M#B)CHb|oZ-*zt@>CYj@@LIY`G zul2sOn}ix%4qjY;T-jdBgQ`?8;UedcQ|%Q*HE9y~s?sU&)wm4tL1L=UnaMw_%R+B51{O+cCBN=T#K&ok7S|knQG2r*5z`;Nngg&2 z-m4U=zMr}Kra^o|DO_uNCAL2&evofXee)H3G{xAdKX+$eEP}KnJZBmBThhNgrlE?l zz)AWS%SMzV&cgbRr>C7{uh66X;9{txe~YdUh{gDM7Z29WIojr@$*ndA636o#U2uVj z0(2E++6^Z_@8oj1;EeV)@Vr#JVN89200#$$j7(Qm)#*P0og5I*83RL(ZbKYl;HWtO z%&D;;%XST@nbu&89%vQ!J>S3_8h6I?_3CZ@%*{2Or@6-;p0p!iySTYM1$}MCOW?$} zd^}6OR1e&9!25Oc$6Z}quCA|B0Ur*|8?}5Iug$XH<@sp&Weh)Xq~zpu237L??Ij9i z`}_v*%C^9q2=qC<(Ny_zsk;$99hsS#5N{GrtMXM)og+f3Z@m|OH#A)LkbChu?Wxc4 zcmcb=Zq_wulSp|``)NSCo(1$J&37#Ki}!?=>4>jPJicdO@KROSDovnk}L4hgOfK zBLk_zyNWKY`~Wvx=zNz{br!RfZ~5`K?6CAcNzvqbm+#$({mgv@k7}4e!^I!RQ(S6H zbi3JIo&0gG+OaEOI*P@c96%&hc$qMS9)07UvRO4GkK8#=@XdnaqLQAzW%DrJWB=3z?GEYUaJ5MT9`AOa^!FnAhN-P-r(&QUF+yN=KNgkKhqd z;}v}_+}qm&8yDy(P$Bx<{3gJ46Ig|F1$E9eI(`MlW}s?vb8C!@ly3?*2iK`<>*%D$ z#i=SQpN>g0XZY85Gpi zrofOJ=%M-pF-hnjOG`Gqwa!8I2Exlhko17~lEk|C`HL4w5HmBgX%Jq)Li+jog5;Qc zcII%q^gGt}Pzd}Q^Ve42zu$xJY|uXNN5kWG%M_&e2tqj!*p=!ST9-cT*DXOHwzjIA znw_(mP{`?RFbyTVAk1@m7V9VLQL)vR$a7*0;WirSvCBokn`FI~_E+ph*C_jC@Ja&*xD3`&dSp0;*8Q3hczQV9~Uh+iW9dsrTgr|3f zJDT^P3j0lF*m=x5 zS=^9MCE{c}PGJTKS641;$-=0n0|^x%{62jOR80?r)c+8zr!NOb5I3~zfP*q4$hYen zk|?l-ZlGu86p-nFJ}&U~pFzEgpdh(pRjq}o&`?W6@<0p@KLw^P4#EDQWm`L1mAI>= zy{1)H?);BuTrU{7VODduE=OWL$YGJ#ukc7H)*CzzB=s}=>Q?Hl$Z#iI8P?U>ut(E} zEh#cw&zrQG_25U&y`V&!nn!(HF5xX0$jVz8zZq3Y8O1H^Q5(nDI&c<2kGlCz)BV6d zAi&YpMqPax?7k9<@0A|^BV+WH%B^f|CA;(LUtC>X1v$pe)s+vpYv%=Zb#?Ip@h8YI zhAq4-EVfriE0e9#@qml-yaR3;bgIS7;h;u70gv#5)1SS(z5}c4Fmiqm@OPb$z)?6w z!0Tbox^1Ds-qhay6o|MMf$9kuGP?ueY$ySP+g?S_Uc-Sac=|vNlo}mvcfJb+I*q$K zk5suSV5DZ;AD2jW0Uq~`L*tX{_agg~Rj;&kKS)(i67mXldK^^XzVTf6OU2bjyq?qb zz_4!CKJpa~jH5l~#@(CcD@BjZ8jg)p6vHOJR1!%W<3Gu=g1z;GKZ~CIZ&o`#xwK}y zt#QhcR-rllb*|bH=WQeS+c{brOgeNSHcrj+gYpJ76XBLxR7`Vbq0^SALyPI*S>1yc zK;mfDIMOFVOdQT=F<^MR7Arpcd;0pQtoMp)Muwyjo4j;iaKgIm->`75Y-||cMFRc0 z=lzYNr;UNZ>+QlemnNM5pU~2LrpESEQ&?DdF7WGHu`shH@5Akw##84VC0pAHx%_A7 z=wBGjU;6{mZRH&=$98G*8EPgF+rHv)+B@4B@7}R0Vbo~^YPsAevtsa9v*iYUmV7tx z2nZl4G=t4sq*ex8!WBksR()F=-2359Ll#u2Ap%(*wTIWhMfIpf@G0|L`3Skx@oDYd~Ob=0*Se~x!owUlZ!hK;Mjv^{I^m;nAekP zP{7l}(9jUp>6NR51~I$55R#CPnA30S{L<(V&-l_>{$q3HFfNvE`#9V8gZTyk=77yy zpOcfrKD2iHPB!Tb00+OJyH6Xr(XptYz#M3uo)YVKc#fmvW7mU*EpCDC>HD^l=V1AW ze4Ozd2|uTpI%OwxB8}-64GC6owu=y3dmplbe4CTAY-8&aw^NQ}LZNr{$!Ok8561oW z3+em+M_#z0_R1+x^<= z?qe9*YXKj+`6|<4(2V_~_29@dpS^OJyiz2=!C^KV$&ee$8%|7P!-VEo&hlA%Xp5W3JO5 z5y^E`%U?#-mrX^=`I-TkVT0ckpO$Ze7qQ`NDx|I*h2#SAaNC-8A$feR)Eo((&)@RoVs{xAI8rq)V*cm$roA}g~Jz;!9dj{B3tXV zT{`)8skV?It3orp^?U3%DT0lKEF?${>Y*-djuU z;7!btQ*j$MH5tFp{h|ogkZ?|NgYW3VHkZ^HGsQtzSv1w_Nc!S~<>LyoTANTd@64Og z6dS@^KZE^bW`mBfm#m;Ktp%wGJfftFps!|TG<0+zFKd7K@&)J#LH6orYWsEa_v=i)8*qQ= zZE^_d-vD33K}EFz3Q6mG4mb!vYk_zb0|R3(56V;S6*g}e$>+4(4!5?e^CpU3xA_W` z_OY=~K77eM&Q?uD&e@auIQs`G?QV?fc9Ep$wRQ;2aV>CZn~mH}mdnuCvaR;3NL z71>Z8ME&CQy)^?#x-U1N1h+i-e6E+?Qgt_OKfc6Qv2qJ*GybK~>K6IyNBlhDZEuWW zWE;!*zH0~`${DPo$ocJBu^NwT;s+{T-uZ{qkNC{cUQkaX`ogOg_Y$U5Ef?FBmb zV0+7*&xG1a=Zu>W3Ii5e<(MSwfC6N)0r=X3JPyW$B3`m?hI1~~yBufPvU~*NK&EM~ zV)^4BSi(V^4i^_ReJ$s0F9tc6s!OLqpV?$K`eu^VbWxlO>6MqH_2S9aBKnDc8wx3F7KnrPfVeEe)G65+`tM=O;_fhxctfpc} zCFmzVaSNL|A%AjVPEkOi3~V3=oFDCwo+JB)wy3Lk$?liGx+KVvZVaBmVN0T+QemRm z`Al1?9!H%eFCY0Emc81nxS|}Z%DYc0_TL)9>$>c4ON|qg0~D z!e2;o_t}83g5YUT0}Uw#41g)aU7>^kEj4E*db+6y#4}J-g?xGg`b-e`d>*Y;+rD)D zXl~|RlYWYXp#BG@ji7D~f64leg@uKmpP!B{v<8>0bN~chJVL@t&_9CmO*V~}>&=@E zmuYb7z>;A?Ks~ewu7lxBmAeG6gS)FM=wg|`yIEPs>$47`0}h3wbL#5q{^(1{$bb;+ zHg_5Z&Jt)QHA^lkddOlVC~a+R6%`dhwFOA5?VEF$<^9}#Qj5GIXtr4mIuJ>?d%>rc zfKGVGxn~}Sp5yWA_4{e2fs-g%e?}DeDil9tEBLguv0Dnrkjk&`_J?vcs^fq&=lz2h zWeZ5oE3Y<{K8s`^#sSP4Q2M#i@r)t4Jp*_-WrKsuFM&F0hn%Vza5+aHHu+HDMwNoM zxBK>1I?sC+d$c6HOu~2eNnV_o(X(@4M3v9$S5ajLGqnLx`Kg!pxlhLii?l9;Z++-( zKGh3IRX_NHkG}QW81~AGi;DoK0ua+ei~HqFl__XU%#-9KB!2d9JUOv~r}u<`0a`!+ z%(yTy$y4AQUEhHSUZz!du-+S^R;rz8u1SI80i0dIJtS(ScMVA2CW|#drSAr?0RZ6Y z=;@^=BwS5uSu^M~o&iu2oQ9!)|Cm*QVUGZSYrtH|;X)H9A72Zo5&&)hIviEd;wQ0x zZIoH;aE60;0O31un|x10vuy?ehvX|LvYQ-Xlc1e^AKM4;<)K8@El|>K+BF@Mm%Z&- zxnKu9FyrIg>Dd|EgU19ALpua9G^GaRg~Ss;{94()ecz729;@yUn{R+h7KN~6>H|Vq z)kW;WpZ}0I4)&E@<)G*20p0H-0R;3?<%Qm+Dcvs5{K3%Me=~ocNBV!VbNd$%|16~I zi!X(R1+)(RtI~@;Fr}gg9Xaop`S~B-hR-A&*Pmr{z+T&;1(2U7=id@+6#8GwRP%g{ z{0_ZUW~&2MQUfM9L|5#|sHg8TP*2$S%UE)(#mnr7|5pR8ohoq%_nK{poP!9%!ZD~# z@fO}?7w6mQRchiRe!X=drYUe)1SKNWqKI2_2H{P3h6U-TJZM{amno*md71$oZis&t z%`(~4aCh8VDRe}CA;|Iad1%*y!zMsKiG|JR-*e`U@1Q?>1_(s@g4yob-I-e=7H5Rm zuDqwQHPjjl0+u(9Gn=M4W>zWpAqG}imATj1VP{LRxuoA6@NkHEppx}2@Tyb0Nse3g z&zLr8yc!hM$IV7t%T(bRx#-8dJbb@-cx05-Gd)-WXc{n0pi=>vb|AV)61Dwp=EEZ? z0accSgArjEiN9}0lYlH8g`hbfA)18%;`Xo#$|p_T`%;45Q;4PxU628vh%NtJf$Uac zy^y{z4^`Pg$hU?st!^l6oQ+_htfJK%z&!ii4cb;RGGQ?>JF-u5V9L)a!-Y;_lMdx? z&$@oHB)3Y~XCKq7E&gd|UfOuy`mb?jps~yFjVk^1*Bss<<<%bHF!M*NTV~+8^t}*=6 zGfz;>2QY{DVG?4nELBX?vGmi| zy$hcm6c%gxt-JnOGS^YMdlH8lkaPnrVEkugVF7F+liRbvsm4Q*STpiVcn!<9u6RrI zR<{3k{UO<(EegD&$$S_7P<#ylmITQOm2xBMDbw0KO<&(*MPT^?Iju??<6M8ct8Hsg zaBwhC!U7@y92QyNcxq{BX>Lw8AOJHBFA4Jm`oc0Jm z-jabLBQZci(bcRwGLy(FJfRgE>;3y-Ha(jrQi@1C=4Y)0mM-6tmr9CJn!$%`93CBl z3()9Go!vy+Ry?tO$E&)l?yT4mJeZAlOb zKBEZm8gmkz`!m$lDtk?C^sNc80168h;M)D7sEqAqc=7O1QEu$-i?aWurh!6ZUtHBO zd*Hmp(4VjYb)#E`6+>@z(a{_0tZHftUf{i?9VABBgGP#M>2>&#s!B=EJ6!+^iVN~rO zy)i-{H}YzCBkcl~pTcaWH?tFp^ROv}?v%#PnYpacEShn5UN)=2&=_Z`e3*pI2?xjY z&NoMU3rulR(Rd@96t)(&SphtAYFY+q zz2u>Djf9*tnQ#={f~SJJ$~lZ#DJYP1c`SB&1oEDt|46QDYwdRGIlkd*7rXHD6TP( zBT|vPuIF6c;#ptGr$LQnG=>Lh&2~5AnD+z$bsd#B3iuDZZo z%dt&vMe*+*JuPA$sP-Ia7^1V8(GRwx$)W&#)jf--9P~c^c3HuKBw|B0%57s#8Xk}6 z)X(zO@_0csNJm)crRL%27nFz(7(&~T*4C0>B)`uQ$jCV5e@ey(H4u6s_CY7^VSIqY zt=XgK%MwMBPoznpW)UoP7Z!iUylc<1HWO87*P9(@d+!6IN67zH?jOhweLE~iad$~P z`b(k);3KB*dAWtX4xWc+uk8DO!c+fxxg|?|IO1xlP!xRym(mWs>V=#3)bRS^Wav4s z(+@&-ZHXu4ly{LAkyXS|B&t%p)=;nyH&8W*-1~3hXo;6O_V$3`=nA7oHjDf405#Cz zW}7I~;A=0~yhJgf0lEsMPmk%Td1EP7{8`*QdxBWEoMVCzCWLj2JaQpO7A5dLK{qOx%Kx z{1EAEf#d2Vx!^gB48gvujy**wuE=QhJZyK4A|nOs$^KP?dnY1!2WppR(rb&89t&4{ zU;^x#05&9)>8!z{x#C+qMA5sQ180&Sk}@_0i=cZtpIiXFs`lU z$REO6{`RoIloXn^PqB$n<*5Hahh&^#Uwd??Lft5wDnG z*zO|~{kCI1sU<3uO&Vs92KTbGIdsd56 zd<)GGSln+wU`hcXNMe|jG?y9}Z2ixtFq&dkE8XZSsM)!HO)(*6Ni?fVt&`Ft504Yy z-cbH+<85yf_0+OC2g&ajd!%OE|Mz+z*$?|)!VBKsUb00GN~jLl1VKz9EHFv$ki~xn zuwLH@g(6~9x|99~MHWoAg~Di8o4mX?Miuw(V1XEn3)$LcVdb(=qnZjJV7(fDpr{%| z=&i|q@hXxmDTi}04U$t1FMKgn3j92*C+pOM^>d7RS<2vJZ*g-v9~wyv0|tIXPK-kM z^}j|N6jV;94Lu+SVi(V4tY~PVT3)lyDc}XNA4Zi(fr(3gUDEo0CVxe&vqFq#>h}cGTab9wdvS z7rg5Pu=WRlWn!G5@;fy+f^XXUjKHYhWd-Cx*M+%zKH|+lf-^xfJ1jzq$HBx}pfNK% z%XKiypb8?4g|CqkUtEk9X((+EdR-MoS*m4r4*;h2?~UfitQExM--(T}J*FzQ;UpdUEYVhDswx)5gYnv**Hf4v??vZMAV! z;Kt*Ew)~7aJObfiNf)oy4>pUZ=ME1pdQO=E5>rEp3}zO>JMr5h@jNMNc;Tm%wm8nEpx+ff-ve+^{Jqq& zuIS-I`EnUooQ`8cZT&*|V#&<_wat}FeH_x&l{#!W-M4*E@UQ~YH6FM zn-(QorJHJNHECw(SOm&iR?)TAg`@uNK&@ZdxepS~Y~~<`>D5g+zu)=dWX5-gJls5X zo|T_B)+UqEZtFupI!t?ZtHOUxrQ{i&nhry|r8JX9k@Z$1(L7#4%Xs5#{a+-fXlwDFwT~nu{{8oVC`&jqx%FiDl;Sir*k03V2)>?gwCm+oZ;(^-oLt?T z#s1h3LBCLE+24DzpO@qS(+WFP_5I#+EgV+$4gUqHRO`D@;>L*V$4ugzFX*ir1qH;ivERa` ze_Ml|ki#wNo|@9+Xdd5}_Ubf{Mih+XI0-9#SVS_em_6T@hqFf3-NgW5w)&7mV~3az z*Qw-&Dl6y(YEQP}h!Kb=?o+CpmQAd$h*#p?I2LgpB$GjEK7-w8aQ0xvR98?(TXh!3 z)BRo;f1arzWTdWxA~9X$tC}~jcxIeRoX+OO+*M|;&xRqVLv6r1iS_}TEH!N*a?x-I zhF&9Is})-!i*SLH2sDP`DZ5iwK}Rf?3H!F-!M6s_RB`M9S)!IyWD1S4Dr$XibH9tP ze*0S~SBD#~X3`jMBw!>=QZO*-NrIIaLg550*+rqoSie~)z4o|=Il&Wz95ds|_>GO- z-Z^FjxGgoyGsVjx#Vsot`b1#0^-*5i6)$(N$ zQW6m>^edP)Req01!Hcg}pD4AXFxz_fe_4Qr5}FUl72Q?|ZN};HBFdKWYSuIN9byr> zZBt6EAL#3Eap>q@1>ZU-ilQub{bh_eQWX7JLLi2eJs^h2hxYPHFDzq5ik1D)IIUTE z&1C9tGwj<6awj*GKNhH`R4R1FeUhwiS%5lLbd9{Srh!@cSe-(qz$7I#qc_a7_0?J$ zzw|UZcKqir%a*Xa8ab4=yTSGG(65f`NOzH@TJ6 z25NC-knV^>6!jqEF@w5#Njt?J{_jz{ka2%-sQ9*Ed=Kb?>K`yjpE5*;ND-`-oU66!%tofm@P&Qi)wEDzq>IH@#K1zW~f z&#G7%ZaV(8nYnMG%1|$BrLaa3<^mS$?6wZmJFxZM1L%%my zzpZ@8PF?z0U+Mk;YsvL?@OLOix1m>=EM?(X2mINLPjPbXE2q%&z%*%(Ox}6&U~Q3y zS5X;1JK%iYqqrj=tm(`osOqQ~|EV7O;oftky6)2M^HA)3gf~Zt(5B<=xh`X2SjSDF zm6==l;}oZYHP{*LG9vcnokbso=U2urhDiKD$e~=VY(z@YHR5L6DDH$~vaEy2XY~}K zV0nDC)@gtiOk|9!W7VeLB{5&(p`1N`!IqSY+#9Q4ar1crPd4v<@@^3sGF*4$u8-P? z_RKDBgPa_9%mLIHp@|J}|fvlFD@uLc>a zhXFDMu&*z}t;zq*jE-g0=5Lv{4~&+ka_QVqd4HQf<8OypD*O8^EXk*;xZw34OgE0~ zOIV66XU(Ch&7{Q?;?owHV-&wiI>>sxSe4)e!;kQ@-!k+k>|M!;E<83)`yuo1AMc)| z8zPFbXaqWnFyyf5>H8TI6eOleCiaD9MEjCxrVqKxj_?>5rby#fI|4R20?A2uqtS_& zWrUeO+`Mht4kkBOSz(r8M{HJdak&KEO@4qZSYV;SmIfJaW#w_uBcdJT*g-O9q?4CWWDYgx7J(u?{3eHVv(5q*-%e~%GN{mP^ zWoZi4RjJX^;!_uyj~ORxy6^Ngh244m?1x2<7{A!t?5}0ICWlzs?cP{W^WY$#R|XkB zk6{vt-v5e9PJT~2Ej5$Ym%*%qKr`*9NsWbPQK6}o+w?_OP~O!JUqT=hw6+b8kBM?< zc@Jw{l&4wn)kFG+)E@aJk!k%cXw4#Izvd78w*cp73rx#^anrtYOB6m!m1gZYlQrA2);cE0D~a&nu?`(i{cnM+An(L~2Ix&iV{v6n_hpdtWfq}$;F z?(6&G!%vOcLZ7?*L7GfLQj$fp<}>J{46NH&EJ6oL0Ty?>+Sxgz--O#O{&oshTsw0t zAI(o}1gohCTgB3DXBsMa~@ZW*irzc0VJ7zj-oc^}ypy$rhQMe%fN-&N{5 zIGlpIGt?RYlG^mzbuu~3V!)UtGiacb3G2SrYB0y5*aVg-n1W}ZUPd1LZ>jks<|?3a zK=r)6t&QLHA1~<43NW|%yZuO921E*g9TvHq%+(qJPq*;MNFYCGJs%ZFWzsRNZJ1xx z`T-rALC&H>VDS!0(VWYu!;mC5E0lfi^DrE08)$cbXQxqrkb#YVu*Xp;`yI0|EEA_b zC70>>+*(1b?#er6(rA$idiu0h^pWcE{==OU(+?eCu`BaV_um85_cp>IaS1@9L+@kY z8)a#3mU3yFe454F&Hp=tS-Q+EQ}BdU&=G5p<8>;^^(oXJonuQ2Nm-%@ZR(azShhq< z0#bqxS80F0d~jQ`v6JKz5iU7BTm?>kdZ5=k;1NL4jNjW|ml;rf_ngifscpNczbGi@nntf4B)D47q&&vh z`^nj>SM-`ajd4e%5s4nIRnf#kmc&dGKRx&m_XwX1hqpl|uCLVv14rzTxZRHu98BB? zGD4ER3sU_;Oe!idghG9+eT;q9A9bZ&{7xoD@g5?+=mtDop=H&VYx0YaJ19oP#~e6> zPX08U3<|AVF`XX);h{F?5SN=TFO_$plYFT4BWQ6CVRhd9X+(acMyWz zd>yC?x7F1-0_q1eq|^oYPluL3WfI03Q=#Af}_q!h1vaNU!M+HBj@LOBX`{GZWE1Rk_ePZU!bZMQZx;0b$o0YAc+?MtY5*tOcquOzh=x24-~}vhs+%U8X)g zsNjtBiJ+W2{(58dU6&byvk{_&Q(D9WN_D*}_nEP^+wcnSe3!UCmRh%_?boVVsIf#} zHcb-040T`4!ff&Kn=pqjXZ=#sG-5Xztw&;$sW<%gve(Oif;%!w&eZIZU6y9F$b&8BN>rdqmuCQI?*G%^OJ9?zUUN1>C3V#E~Mfa(-MY#}af z(JNwk2D)-rTkF;^U3S*DgoZv3sDtvAjGT=~3Os*lg+N^WDtv~piVVs6~=S{(@ ztPWjcN5@3(n(8{g9se86kuGK^Y;=K%+^fy)0O!phpnAajdxl--i}bkKgVz``SJIKg zW`e{YE8}wSAss9pI?@oI$cSh9h`qk-2jTyx$G?fL%x;MuO`pnz%>BB~)Feyu_FKx> zh^$r;w*`utl+^FikHzfl?E4Q9$=IK!L7phcJi+611tk}%--v910~gW-g2KyI@3l@u(0vlj~#vwQA)q54BAvd zo`0fw{_SLFVsiY9(d$Poh^pUQC~<~N8Y-z^&}8S|aNc7sr+C+8j8$jBjlE1382%D; zXXjA|X~Qk7`cfI(Ve=5`0_)_^V&@HpGNLve4{vm;}qvp>NCE`8AN zfUJj{6cZBy0q_ahQ2Upc2qeJ=#dF~lo@p0JlVFl2vzXN~`(XF)@IpfnoX2bCV$}>e z^y-xY6~)6PuD*J<_X4&hKPKYR@`SRq?HMa;`gWICh{%%vH63+R-8)Af6N!y@26dlC z_yjLt_53L|ji+&XXOFyH{fWBZW0dxnO)Wo!u=U%wFRO2i6S5>6M6^AZW3R+}n0^P} zHCf$M!WJq2Q!|n;XpI(*jXo?g0aa{dy&Hin3mBsU#luTQ&iBFn9s#sI)c3uGgM?yy zw-}WhX7*i2LerfxP1 z4Fj{^;l9c{Au+BgQ^l9_HRK;6Q4eKeLIK*mySu|CWmgT&N#fV?x|g;@Bv@ zlA;_8W8wRyjWU)<$<1*4Shp63g{hRn`VYz=xanY7#%{{^4q6Q-%zqYmjQ4KE(4>tM%q-B-!p1G-{jCISH8V%BtYWAuks}G9|>TO zYC{-v#bh80)@A18yqGu3`2@%{AU6d16iYv2A7Ek!x-HJmx&dCd5i7+6ikxS5JM>C;yo24IY zbaW+$YRCTIzAo5}%D0M6-q{^(JCj5ku34a5C0_>23=2LbI^>RqxE1zCmymv)W?mf9 zHsU0cWe@~YfVc45?*UI8W&^vuB?@K<9DE#twXLIW7sN((V0ij79aOxz(GD3+UvgJo z(oy@rZB#|N((2&8G!{-R_fQ8*F0Yipf3ji|@cIWe_JVE(-wStVe_>0>znHIDW_Y07hxOifNTnur4MZA;MQy;jo z>pcE+`QbYnC1qN~$?@2(5gVI|i~>&#r6`x5l!wEz3sdsQ!H{>QszB)Bw)Dfn4`}iL z2m{0u49PO|g|vxCkXOjRfKZjpb2s-OmW<0%HI`IpG)LqGSp+CFA!;a)6=jZW>)1AL z0c+|&d@^?9-}-)!xY#crASW0x<>+?+kooRRWoEB&C@NtckQRcKDRMPu1Pl_#%{fy6 zXqq_!B0N37j-Ef#gui602zo#pDQe!fr{(G3YU5#Um^bbUK{>bkZUs`U{EuSE^QS#t zKV*cc$|Qp&6Q9tT1m%aG-K@PFvrk~uA3^>{j9FAtM9z_=yLp3q{6Tj+s_>z~g}Cb*fELBZ8YAnoHN6q~pn50jKoEfU1I{v`0jgPi0s94s)ZzJ#$oMWjI5 zX*%Tu(URud`+G8=@&J0A2LQk)B))u-WlU66U)~AmQtC4)&yC{c*z66&{{YOM1G@{5 zPBP*}iREfGI~FPw*R!l)5c8hg?&Q48KL8>jqob1oXd8v44U>#aUy&V{Qoj~-KPgiy zSNO_bIBf|+B_Y>;b71*N7uq<1UTQyFptwE&WOCQfD?2;8@tIRrR+b5G2#xHR9l?!# zaYhGWH?e+~>HEw%s)C=2dhIWMLltxm&!t7iJ+AcX>gw*#0O;*FeV-|rK3>DW(R2D& zEKGwhw3&tXp*EAB$8)`1*Gw=&JuR(52(%nm-dLJ*Z4_g#k$vY~VXCD1xwpTGA$jmw z1V%qX{-3T?r8)=dM?F1eS4x|(50pSu3=L`5(+4|ek=yh^8cn^ZmpEooQ0U|v?HOtv ztu5}V?Gp02xSSnORxMP)#pPR33?>S|R1vtFv*u1s=5s@s5qe1(waP-i84)}W8peHJ z;!r2<_we!p5oG#^g0ppUs@-Rs#aS(5HqUDMs?n;(v#YCZUUcogot*=nR&iA=EqYhv z=d!N{N8A$mYytuToSdr6sjcTeiwP-?`ecpF)cQZ5F}iu?DEf#_A>QvFwc@_7tH`*J zpd629udq1pk>*(sqc#E8_Xv8p9VZ#aHm6Qu0l1Q|pRj$u%soTyTa5dK#dbv;X})q} zqzu!l;m#O1USOQe{%IvGJMcI%`){`*aO!YT$`k04?HR7i2FW^Ya_}>C2LcAFg+*gy zHUraM8>~gTGjYbRsU&EaJE^FtlVw_+_b6TQDqrKBlqkcJ{L_qqpAtcfsq0hCtQPH& zrP9sOWY92WuafRm(kS!LeLk zPmp2gSoh%)d`+%xS|TnjB-x<_6Bwc%nyWaP_e2!$XelwV{}saRrx{i8I@?zgkqd!w zE&iO2=C&y)3MMEZaq1kkFOQ~eqz;=Usz7P=<3jHHklP&n7EO)vTTR6rLS>=AS3KsI zexI}&PwgYQDa5@P+iSFs!7O7bY|t<8^N!wbvK1wDeCR}?a6>u}d-EJyp^rRO7DZiX zJXE~47~tx#)Y<>&&fT?pTT}7Ca6q2x>2aINCH$QKfmYqHB!papIkE9LXjzz@|A>A; zjn|<=BO9zZnAOvW-4Z~a zb}W1!OXZlZlqmn!Tzh@})#3V7;9r@YyI%T&fBwu+4s03S;{haswV8-wfAgvO-bqvN z{g|ZH{wS}`l;@P~b9Zv1zXV%qXSYtz`zn}Ovs$RQ!uIENAbWWnYW@~D1N_+s%r~=}4imAFk4vL8r zl)U%fUE-2nC8!;ru`oef49(>$Xa>zDf@@~*cfvsO%7t#HZanD%O%ICkG5 zFiC%jT(UQe9~G-C+lv0AKIpF}`5+BXF+~c(@2qWW6wkgagtk$B!;}xu4c8BzYpn5n!MrZcY+Q4(Bv{_RgUs0TXVw z=yxpzR)t!~rz#GR=>~NIQdyRY3WuewD&}nDy>BJ{KQu?al|Wcb-uE|A1%IGTKK-a? zmNFXpSL}dQnjcBTR~n=$)b*g-sFzAwbhIy{T;fiW>(0J-9`C(7F_Q2S^_@rcBYOo~ zDkHC-hC!NbXuJups?DV9%fkSF8eL5X(%uV54_<%vQsgAKWd> zQg^1c3sjS;xvjHE*0|o3BYah z=mYk3Lc0iZJ~lnrV4)ZOMer&3Ww0%JQ|5IMCR6uoy z4j%|HvQexCK$1Q^%`N)I0^Th<#{^JxBHX6^(T#GZ_`ye{^aKQ`6c@Ij?n< zKtf)1mSS1zE}L`bx|OUZMBDK+^tTg{@0=J&zMhjEMxq z#`Mg=Mt%z)zQEV*f4cj-h&e;aAEa$81STBFTpW;tn?m_q-nFMpMTL?RJyT%vd2XF9 zX3^u{z4i_9f!{ViGV)RN4RH)#(f+EWECe-PeUOV$Xt7)jTp0?Rnf#QZO^~<-X~Q3b z=TtOhLV=r0%&9+BDGP9GBkSOwRY|eVLpz`&`&V9ZC^gG7q2FUD$sfExCU(h z>qB(GQdqe(wReb1FuP?vcIqmYm~9}OpreE@`W)$0k?19f|BhS_l;dHpzlfghjfJ*K z;r#B2<(Cu|A-hcyeOSoJ|Ek>z;_^0;P#B9-7JghMXIKfeUd*XV-Wo2pJFj1XA@hr8 zzsLCh_T!Ao9>M0OY>hC;?+-($&B0a3>ZAC!{V)(Rzo(GfZ`K=OT}WI~&VI-sgh!M) zx#kl4EnnkrC)lVh|8s@9&td7(xa6bIAEIBX+9lt=(DC<6we4r68I}%WZx);#q9v%a z8iZg{CJt0RUvev%8vK$hR-n#|BGxOUrITe-+u>bNCIc>uUa@}+gv9P7Eq*lpkj}n+ z*1C`>RAKISRcG$4W2E?5eq{H4v*JKxu2987_95!>EYFT{mdP^$f9m8{$kO0#B##EG zurH;FX(j%c(6IiZB?;x!=i$dW4r5XShvM3(TE>HHzw)VX)mo=TG}j5_3WDv)P9nyE zIzRbhipn1;*4Ebzo(CdXoij_V?sc2|{I+tC-t)Sy?7oL+R*7JeQ%Xbnemm)uuISJeQegCP zyJkC8@DdWrO&{RboqO}#9tDMjz&o>H$BS|Tb`U(DE?Fl%{24 zhCv$l^Kt)IYHaC4!=iEX3Sb4lz9HVzg>pVnHydG7jV64pGJRit#<2?DeDm>b_=B(3 z2<}Xh3CU)8BC3ZDh`Az?RDlfr^!ywK5>m~raf4C^j4SlO55MM4V`IqkRZUS6}d))gfsn2+c6ot-zuUob5> zUj}%0nm7}AS|4VDUy*~Khx4;yfjDzbn$omdh+aQ z;|Y@zAK~Eq!MW{itpsw8z}%^JOz))zPUZ)8&&~WQN=q9y`0 zSMmB>w?u12qaqk=uKzvbKkd^zs8D7ybfUv_fWoMHa>#Q6|cO+1gO- zTB%l7^Uj@TEy4U?_RiYPK@Q;*wFq8zQHOFT9nXQQMynZmTs)X2TWlEuRzC8T-O zTfW$j4t>-&uRcWY-$U8IClUL-49+Dq@qcE0Q|-`W1S}{sI@-S=d?<>EjxyLP$T0=D zMWCd4VBjdyw_}RL;PznE8a8-59l%pgGdBTGv5B{?d=YYMyyeZh;G9Zc9NVQ$Wdr5Z zJW{6@ZTX|Xv-2f>JUe5#5ayughFI0o(X3Wbyg^6WV3f>BNBiUz6`zQRI4xV|iWF$- zAre(^B;0{WF=hSoOs-s#m?dV2ZI4g4Dc+C{kWon2a;D=Gq0rLNGAcXHMRgZ<*q7>G z<$bHbUg$J?gDyIfKqH1Y9U$?utG<3<;n(M;TO^4Dg@oy-8+s>k5ez%kFDZY-{(J=p z%r?Ws;4x}84oHbkhpCt@dP*%ZM023}IO1JcOERHStFQj zq1aVjPS_=_DvigV&Eh|BOR9g7pfA`;{U0Fb^&iaSbMg)z*#f{BW+w~*&aOa@; zMW})oC>APNeD+O13W+>TD#1I{yUt%B69G9{s&txi>e-^{GrippLz1nF=M!B?Q2o}l zpy+qc_O6FVUsO8|{?lJPE*$jhw={8d^SeC!Y;xApqk`yEx?A#KOt20N=rEdK!YJro z5%d=_=F)a6vt?fvTSxIis(G{&jCEzfH9&!hjia>r$uFWOq|2#~?f|$XZ#1m<18uv7x_n<|@PgcKlKNv`eE^;oPc8O) zF;kpdM?OBE4+L+5g@u#RNhu^0^5WGWVW+l>2*zAlffGGQC1du0O#g5rboZVNB6{^f zZS)>H)Z(hE;c?Cw%QT5B%Y#7siIk@imM(M^ z&j8bb6UMGsg&zDpO>4hDCT6T;?cJR>-0ruLNrb@D-kH^1J}I@jzB0U|!qDl<*fU)* z788f_*YS5VOtkoli~6}3a(VA1T12R@973Hc*wipN%gDa@=Wk$&<#mJSlhaEfe@eXadhqzKxiqbaszzI1yZkP+@6#M<*ql3 zBmb>gH83=Tqiy$Ed8*>=(C_|pVpTBx5ONBr`Xl2L)6xcgQ1vZIa=VCxj&Q&TYkvdI z2#T59l_38Q4jbeUN?#v}HUJqhD^Ym;?>voP_GE=h?-6XWDhp%LtFa zYOE>u&VBz@ZGrM+$0C~p;Br&)8yYT3i?i=;cb;8K?TJ&rzI+}*&b|}S6+o_axV*UBOY4!H z19WHKzh48j`1ba9*Eb+-AB3o}rncXLuEJg<`_5)m?`)*WZ^d|9@e{L*96c!J=+T#U z0M^T%aZ+H(PD<{xcf~fXAI5Ks))Y5XjLXyP+Gcs1RHk=p`X9#KgJdAYcfP3B)r{D61Z-gJ4YN~xTT z3_oy;3xq>aQBeV9N}x0*AtPh4Ed$m*05<|ik^Cd^3kT*u4?An!q&wpTSs8!wz;~-* z0K^?273E$s5L1L%2U0tg2_X6T*N_vY#rY&o7FOen;Pdp>)Yq#WI1oZ<=4RkdGKfua zk0MfR2LD0ix5CnDhJdc{e_P&ePk!IK;Ia4+EtL*sGR37gMlUM@Wyyj0A@4rX%zJo{ zw-(#V&5?be*zSGuvK9w0_&DO)-9LhBNbzSY(0;3c94HaXmV_w6O+Fp|_h7=csz#If{VlVYs;6y=3Pd~7_x{5{*bp9Z5yx9LAS>)%(L%pWp z4o}MU&bP{*PI(!uN&^Iezh&sI>2SCJ84{|$`|~Nvwe$S3R3x#AyUjBeQB{$M=yHBC z$j{Gc>LKh7^>}=|!?ROY?R28F*G27^ij!jK4z+KegkoyYT(~ug&U&!|yC!mf6JRHPSRg-dZR)No5Gg$n!=!ZP$RbBb; zHFlz8o9=MgN-#!VkZi|*B#Vb|uAl)&?(!R@x~r}b@8Z5D7=J*GB40unQU{R1D<6WtnKi$)CUJ!-8^*neYD9-fngcC(XZO*V;B)R^_( z@F7O3BEgk}O6P2L+R`}JKaL-v18JgWxbEhGEgXXX{ZXqOD1y2O^^J|akdUA{_%dl$ z0h1T%U^wCqcr;+R1MLq!CnqOB41NUsKLBd^(l``&^Hh>qa1sGUy3gisEHbhj@KXSF zP*fa4d(ACX_+pYdXdIKNRtc;cfFYek{&;V1?<$}bpB`U+2dD*sK_sqP|NagHB(JG% zn|qW?~@^NtmZxi-g_$ATrcHwwbl9C=9wh3=l=*M0IuDUaf5$B1sNY7KS#)`X&tENvUqEOBUp&o zDd^`nvqH|AIRai_HlNcruttP~djOQ%rsFN3!9WW@0C-=`oAFrBeN%mFb<9533Mzu& zsNs!S{Nb1k5rdmz#I3kLRDaiTK}* z_DmLJCHzbZ41Q%(lxVc573z#~St_)&Li0P~(~fn)1Q}HJ~12M{ndi64V<4;)zxQgqpOp@e)Rz!1E8kL zCHVZmI4I6i?g>zk1(pz?bZ8H-FED+Yf;JaN`yt;m~LqSB%a zYt8K+*P_LAz6+V(EFVh0&&0f3^?FuZ;HQ_n!kvsza`8%lyZe<$PmitlAT8LDKpnZT zFs7JY;9{8L#deV+5I|s+B_$BZiffJ-*vzxaRy_WHs3>VG&F|p2T9$Gt{O05Em1)-k z4G>r8C%Tw4F@A@i$QFI~RpRF-=4uZ=K~w7*5wYPBUJQL4)vEwb_Jy7LU2RXNj>yx~ zqhE^NBj%VNnU{E;OyGt-Kb zC@3iC@xl6v5K9b9k#*|?N)U-J``h1=K=}DPw)%)TpsZyaqJEk_w+-|>Cu$|d0kS4< zgkhX7IW*YFGwl8sIH`@eD17Bx@O|5UtW91N$RrLtB?KGLxb-GD}rU=#+ z7<9OZNH2IVdA6vIX)Rh-=AV2%5KP$cY(r_>SAx1S23P_dOQ&r84&@%zzyV28dyP?Sj-%4hKY>zy`M*UUwg-5|A~z#icrxEji=UIc z2D*zp{6B~6Z_Q;2G;S;hA?98MVtUTL$0U&B_F*&^Qy4a#yI^~@IqpRY^Zl55Q%j`% zn&WIj4c7XoR;J8%hfu%1tvd7EB7NBO90>b6!O@YGl?6H(=8w~*4goh$BSXU?xpc=x zn;6>;;-vxWRmhc70%Cvw+2*kiu~cg|!*E6&bxTm-H}UzL)ZnN%G)q5fj#cQxX;m8-v7d}uTtNC5jN za2^FXo!ifFEPysukWj4xkI{SRmD262lV>SukjiL*26E@E6&ePrZyziU z-~bj5MT$8cUe~tWqDmkc2RxiIGh4wFZ`KtC{USlz%*RzLJdRsi66sg2>IjfE>Wc`% zixizqkyE@cN{J1I<)2X~*4sw1G5lZO2j;tu^uFeAwe#1)5z%~cV*yq|x1i7H)$yu` z;ZMW^SM5yuY=!+%=K0+EV=Z}7l34v%Cgon= zc?>_#^Ydj4CTOCae>0mRS`zg5Wpm14d&8q=VTEb9PER7%E9BN+T>S~o$kExi)d%zx z{RDUz&}sA6tbnad^(>2ihafxq89>DY%8<7czmsSj>j^(ph-O9d>-Y&*Nn~W?{ZYsLT=X0A!?vIygHJCn**%Hj zqxDx!qtF6cWy}UTgc{2?+B#HIE8YMw2VSr9^Yg&<&U51nPYkJ$wwf9o1UQX)J>Bkn zk=Ow731sb{y7TJd!L7wfP7W1f4Z4ze{>uJ8Kv;K%_n|ZY0sRrUQ*>^`2w0NMbcJ4l zJT3p%4O5-t&SzoZ%EogH@XAV`5wKr?m=L-PTur{-3Si=ZY>%ENNmA^i5;4go<|7c0s3wOO&-) zR4%(Lp^gK~rbilRd$;}?aUlfQK?ikWu@axefxV>Jy_$Ps-M37X3@g+EogtnYKAAzH zkN)B_pD?K#wDp=Y*1r-Eg$d5!S z!2*7u6^_&qy+(y=8+N*0SQ4` zr9ry81f-Gf5R`7DLt0w8OHxw0ySuy2e&Bu2eBaEO^P4ks82@nY=Z+m~uf5h@*SeOO z`(FJS5*FF^%1WkO4p8(!k0IU7<-3D&FhGLJr3oK!Vt~~X7%bd?fDlX>;Lp~ob3BMv zwH_BL>&va8EwLa2RVWl%Ix!7+%LdX# zrucP0=^KELXCuIXRYs_+bg;Fx{q$)W*rC9}1{>>#vFjZ+ejbF-&CGg=by>6hp=+mA zalMk$&uY3ty&eUjm%ig>rDgleDLlGxdpTj4L1=z~s3cGE)(cEdZGMOZmZfBB|FSRJ zFI=v|uQ?`IhK^5jKKGH?YI zXfd@DR(9#-L45(BC+Xewj7f^ZvULR+` z18-S(&>lf8vADDZD(B##eFF&=AWZ@|`!FIyKz|n4mL@~11hm}H2C&S(+!%BO5tah- zP>^r42dpqqQXM$Xu;Rooq?28NzX@DN0Pmc!V!GKJ$y@7*v$;4lxH>v52PH^29QWrH z^OTaf?8E^R{qBql$UXp!&Jp4>Ge?;NqM9D;wgLbioOWO0cl6ejo`Io!+#$xiM&E-) zyPyoSE|268mu8!MIh~DKic`GP!-IIOJUP^CNAqef34ZuXaeUQ!Q}RMXA=i)=KW@Ke zO=8W^z#ndit*lEuj*E?Fe#1A1`{<6R&Ag`QLsaHhets*q8)Jr29^v<5%iaJYNs91c zMpD45J<02}BhjV0dX)Lo<``M|7dRX$>z{A0|NS9VCPzU?NT`oDc$8M0U%%&%v>6Yv z!qO3Z*~y2zv^0vBFLV7H=b$Jjcdj~Yoy*tDF+ay3$}6Lj#qDfP6GY92wW`dmKgl}2 zBm&7(RgIIlG)z?kCubxb@4L^hHKxWjG=BUDG2{w(Utm#~kP0}~DSU=4WGW5Wh)PRK z1q1|Etesfv?WVrd@Bfk#X4Otid}c;m_mRXbXb&o9s9BHDbU;5_|HpMn|4pU+?9y$D8Y_A`RIXxSm2yR<|EVX2v)3JAo*OOriSF33q86|vyZVp zW?3WCMDrxe#_K1J&8SfPLWwaCYUB~N~ zPb1I%5yj1{Iwc>J??K#Zdel%chMmFi( zmK!W*7j-tz`NYstBfEPK+Z^X8|6G#X74u~wL)xP|9F#_}EFJP;Hu&4s_}YRN-mM-cLSAHN!V(0+0s{ zOCdFtDOUEgr_t#AX2t>%Kv-mAe- z>~KRu5$`mH-~$ZGrT&jI+Uy*KF%zYT@VF-{bmL;l_S#maN?(J0 z<*P|ua(GMu`EZs$lF{48KhV6LHKy_wJkA%8@f#)q<{w~uxBlRpWukzLcQp0OBQsAi zw@O%CBCHg|$U_UGH*ckwcx+{_9Gp}C%B{`Z$TWOKNXS#ocL)o$)`@{7V8DP}A#jjU zD>4lB?ww~j>U+8=v~(JA&a(@#4$}U|9s|5CwdLk4A5h9khdDfNEy*vOSULCU$4`80czLrrHPZ=dkq-b6MskL4SK-b>6b`n8Yn1UL& zVfo~HNQ;zsD#8~Y;R$Qo4zKd@@ao5xsMF46%GIl`fZ?U7f_uN zC!rNZ7})6Uqk!XmnKv%oa)D)m&8BH}dblzPqZSdHH&L`7yL5Y#Jh`jwQNAkW*)8Oi z%Ox(b8NH=MiC1W0z~OMS03wyu0^bl!pT|xnNiX`5?yVtrMLoHmsxIYO(>mj@MPQmH zob;;*E1Ds*AU*HfmMB&_JWLDWC!PbOEiW7b?E8nge4dybeLi#FZ|_*{UI{a-43nVa zti$TNmF(a{MflEs7W5@B-&gh~tZb3{J^B~p5z8pro`Cngm%F>4SUAU3M?dd;d6CBO zM_HV{0{v?gI!lhQe*_j-E|{PMwY-);BRPo4WzG+0QrOHSER&+0DuQEPi{O!CepkYf zDUCF*ugS;ne)%&`Z6Bm3BE=f=V>vZ|*m)GI!3*R{Djw=V{B)PoX7eMUkJPjpt^@bN zyt)5|5;s$joW9R}*{Mvw9)5FNyhcu2ri#n_7a39%skHO^vApWana}GNPQ-+EjFEpe z-ui;7A{~lMBV4ioA^gES{yDtvNpfiA2ptjtYoei_SG0Uy?bl%BN>;UfzF{I2Zo@ID z`hA0JlN6d)U7#-G=<9Lzj)h;V+$l@}so_snUM;V#0JgwX}5ej#Ii6Ri+r&}ZT zfAhN^W};{N1<4gHB+4!51(Zu}C0sjl;HGB$1$du8kEh8hq5Uiq{X@)yr zeKp@L*&Zs_mrPGz(I^PnMa4`ZAo~Ve^}kdHI#n)#JY-8Qce*1d?erL)yAgB7HSr&k z*zU}@8prbN8h(DPY@-sElz->~F;T*>XsEOg1)Snl`}58k8i}B-Bf7ykgPWRPpZ9-@ zjdFNs>m$(Cgt#@4hVuHsxRxECcOQQ{xWa&ZxVYPzYpQJ}dJEgy!QQpQK^qaz^}+?& zcnGsIl%qXjt+N0XIJ0NON|ulvbg{@dt5O(+{FA`O`wnT}_bs|xCf0AJnh^?DSW_A# zuiA#bNPA~Huo~#f@S`a7Z`>8b!~yG_Q)!7^OnyJl?dK04zDj5XJYo5=C>T?xkk_)G zc^J^E!>#BGpMB;^f?|;jVs?BgO_dfTEv*jdvvzV-7#7n13kjA7$K8W|=z|TQvjB+6 zS1CK{ThhSc{$&lY9k?gQnJtnguI()^T$pV4zPC28sbl(pl3Mf6k07=d<97VLy&JNy z_YFelUqtJJrvOYA41E-^`gU3nW3vN7o6S!xDyeirNj{w6XZfAO+?@vtwRVrQf>N%a zGvC%~YEl(A=+WYtk{(G)zMf)Ar;qmkM4||vAVist8KHqV5Iz{?+26?=Bl1niS1)~A z!BGvH8|4jkKCVl*|ER^a+0WmeMm@@?BkCgY`ge5zBl_ybb@5Hd6pDz+hZO&yAM+JX zK2LBSbq|E=r)rb9R)!?jksmF&@U&x8&RENP@bbRQ=#!Kp}j{5ggHp>&H< z*PKDLaLHjVMRf!3ddFF9O$gE2vL7n#kW09Y2iY)|mw{!%5dlzwQmglWUhRz`NvQxd zD8s9H_4+9n9~K`K2U~qY9K2;b{5)`&*owqqzoh5&+cTR;7u}hI#IsRNzMkshqkXy6 zPf~~|;p#-6rpWk=(mS9R42L-&S$*k7A4WwkxH4R;dr* zg7D;kIimRyK!JW}`ejD?hDVI=apT=P7DkOwo+q1kswT5>gD)gKYcEz4kTE_}CS4>^ zf4ie{WE}tHt^!~sC0R;&ZBlUMN(WcCR)$R?W=&@GU-&`_TH%BNv4Ut*`7?MFZvuN*cl?HdBXN46SlAX zw{M%>`$!=BnL>ctX8CMyW1so>)aBjcAIX`Z_cyeeI@-U*a_!+|^d&5mg`Fafu`X_0 zI>m7{$RR0MK@CfxrJ3!*eUb7x3R9KnzA2T+`E=bD|DHqASseL=#8~@8-l3$fet+bu zzP~xHeq57VNIv^YURi=`4OMCW27!*|dhAbpiCM!7v}APKzX|HJGx15c^1IZ?k&BSa zt4DPNBl5;EK)I8Liy0j6iliEOfK=8i>tpR7E6Z!Z>PoU*Rw3dTw97kaUB93umn(cB zYxQ2P9G&(}u5itIuttZT7p`w?J^xqs%0ge;!>}SEPOIy_7EIsl(NKd4kf06DpV%3? zM}6;6#b22#Awjc8sL96BQoLZE*;U}nA9?G26$j}kAxgQ#Iax$q5(Nmuw~)Hb*!`7J z?7$3MPxQXD7=p(Vk18dzZJ22Jq#ig7 zzHfQ{g{kzc!@0{5;84bo5D3{4p>HwZx!7-lACu|Bp=9G?>O?B+A-yJ4JY&0Hh;RQ% zgIfC++K6Hig^!t)s1H&wTQJY7Y!sX zrIqwpA1Odv4HFT+&-Qi%Q~A5Z)$_rDvg>u(u zpDh{EGUaS}QLz%TMED+14&wJ*vGh^(juo8E6x5c-nps2h@To3oy zuO#m$A|-Zj8Z-02r!?7;V3yT`lpZRnVN}*j4k;EY!U%t&fj9!1pR!-{$d+*4#8tJv z@u7WgJ`~XT^4~iqg+0x!>J)sT-|R%6bFYSTZr!!0G|zHuF4JoAbG2ut-i&FK0`6(R z?@?P_&G`fclq&@ZWH3T+5Rrrxdeqg`joxy=qO2JkIuaF6S8CAN_@Q<|LWc?|34w%S zu$aN8Od}N8HeyR&KG$<0^1adhyr*+_SUEc$nIn?+ucR=^ep6L(LX{W`GXHet;F;^; zk=s$9Y|-uen}q>#y*i(-COc4r9jKyYJ>Ql7WqxR<6^IB~{WTJz>u-{vL(cz~kh|3= zU?5S(74CkUzOkvWrS)S>D`;=s{izW3WnJL0FFzjiwTHsV$;sT+>{%2@l~ts`2^Oxc zVR=Qungxm^|Erf1qsrt|xcp&O@EaXc?Fk9}Ck2pKwdXH^;P};JgnCG%3WfXb|GRp~1dMra&uC4d&id1qGO~Dp$rEGUgkgDrtb`G*nci#Gj|1ISCLYTj(~4k75QMhG z+hnd>>Zc3@@{vEfeWlD&G2W1jtoZ6=<0)Ou>(zuE(f5$oA5iv1af3zIyfUIO=*M?H z8vCn8hMt{$4)3tH)UMx}2~uB5R36$tkvF`;IC5k{hjiMJ72S4bpKwBzO|@mf2-{~f z=0>zuD0U+mPy7_jNK=#MmjwT^Qk;Wk4#O9v{aNa7JVcF^2izWc;iU!?O&ChHvv`B15-gj)f^I>xasi|Ao7E>Jvz0^_+id**6+SfM_%=jRZ0d z2Qn(SICP~X%Wr8#HzcPg;;hS<-u)WAl@-jecjfuJCBKTnd&nbqjA|Zr>FhxnF-&bS z4(FlMrZb_uv*e17i@mWEQ2=Xbj(LJXzdh$^lE!0!tfXB06dok0&zST<0b*f!l+URhhPK=3&JcOm_ z>?UEXS0e`l8LJ>Jjrw-{)LTuMPND|2qOP;EPRcMdHA zCvPjtcDQ#v)>->q5Y zO2pWdHw(ACz}_-6v?cVc@+CGMUF4UOnwnj!+)9a#Z~2W3>)H&bO{qs8Wyu7@hR2RP zpJEB#80Fn+ZEQw)lj3}iDfG_G6Tq|=RnA7_qQW7Y<(E(%=|t`uLht)gM(-93~#RShIm$^&6MjsZi*@{FNQ9~)! zV{=S)UTC%6VQJ}FOVjeTMXTlf@^2}BV#Np#L}y!0TsnzP-e{BQw!W@X(%ebD{wT|A zif21{Usg_$GP`H==-H;~QS2h0b^g4wWq*+zdnCgDuW6_@CMP53f)$eHg^1HWJf>oK)tvxj(W)C&8?jJmPdUpU81p2ntV6CWh`@L z&l0IcHN>;H^{}%{{-jKPMQ?SG0*8r<1zKhmIGxhGG0rPu+tG0tRu74k*m zG*yL6Q;i%?QI1opD=(u&SUQm+Sbm~z2HWtHZ;?x8#Ak7rG9C9STjqpzI0AIA8X|PD zs-wuxtu}FND;!MPB~FAX?+#AeR}1@&1>A>f>@Rbm`MIR-sSpT>p;JcI`!O~@KPeKm znGq-29)}7s>6rD#)4{0(I9b-Z|Ne8~NdjFU!3C2J#iC=I)L#QMQKND_zNs^`K^>x=J zN@_;(-|0up9}EK-4iJ4*I5M%}+hy|O5xYrek67U>)O|7TCa_L*)}p88jL{}?^154T z>gU+Df+$uW!$Yit%!9F%w4g`HiSD9p@Q=J|^XF>@iqI(Z1kWyNcNL0<%|Dx){?WO3 z15AY+G)Ur~C(cv^>oK#18PUtA`!DMlR)Shl_j$vCm!B}(!?sc1#8w0hg^3YDq$vbQ z7zp|K}^OhqALL=O(q z$wjGgY&>PQe|jsvF^soe(9EB=?jgpC6AVoT4l%Uug;4xnW(J`ZF zqHOjWre3$(Id9>oukH6Mry(on4Lo~B&sDyH4h2G%one83LQpp{2 zf0WF{h+1*@V^8K_Na$H6<9lr-x^H`plJVg?bPkOMHXT#-smEKxcu!~FUIhVrDx4Mv z;yI8;(S|>fEcU1#_{W;dZGPof2=o-UHWbt;x-_NvG6Qb6yV}3Tn~w!#=Qb8fn2dEl zLOqN7+Ti#IFkl6!06chSV$IUSgmSr)-)Ah+wFB#k2z?e-6_DqVsj0=)BGo`vfrglXE5^_K{ zrQzq&qO}-tu={^QXYzaMAtxB@t_TS;ohF*+=HdcT#R~lnOl&%Odeev6<|ZaJ2kpcX z;vpq3H~bS5`X^7tUpbMvXf7h2YtnT$HE@RM-o%EENnZ2I8>u0g(nBDJpwIvkec{n5 zukPbpPV=|*1fauBJK<^QEw`b5L_xsWL zmzav~LBcwJsG0&62=E7DY8*4R@^*IRer#-P)7IS#S;h^Z2-eQd4yCO{vV?XsV-B*r zNLRC@_g)d4`sRWA-Y}NyEAoA?oIUhrHxPbEq~DgZlD(neM(4U~8MjdFJT#>r5(NR!5NZzy_KoPLHIQld9 z$Q|A`ZN>UT!n>+x{A7@Oa_1y2`&W83nKv(AuazDV()6VZ`m&lwrG{$BdR;ti_BI<0 z-oa2qgcQ|G`tIlvvgI3!+QCp@rh5Mvn|S6j0eSZld=gal^6w-xN(>()v-F3O`r8~$ zdjYlh$eoURxZ~EaBO%KN7mFQ>EMRMGbeJi{<+KX^?pG8FR`pV!mk=O(Di!j6Zy7~9R~VGbsR11vLqWC&rkHFoXjs1mk6mZ$Vn z%ZXYJJ4T0!zFwnAB|5kU`>(W_LS$3*^+i|ZrKS;>6&Omi;oZd24SIur^HI?u1l}+u zdBshCUQ@JJd@Q*tSca!z^gW*98`S@q`$zgB1nO0aDc1cou$l?eYyEuC`$L~ zSWy*X?BCqnP((E7!3v)FfeK}x&ZaA8owwSFEk9Z|f}-@`+N>S}_&obXF$L`F5NRInSg;4I@P5u#|NEckZk8y&xg`eam&A|=^BdP#iMY-2M6T}RJ)&> zVtRaHVCpZ!pz_pT?Sg<3+Jdcn(xern(vMPQzW4r>UY?erjVd;peH}DC!F7vBUpy53 z6Kgc2(YO~nhiMzoNpjcRDu7UNmQ~8nUrp7o?eRmd`sl2e2I=Z)5F|N|(hJ`!JS+t( zhqEvKx__YQC>)^?hr0zCmGrUTSrJ=J^3kySU)uP%lexX`Yq_L5*K?v%lxdWNSP5;} zE<}Y&N|M+3p*`zFz&}dTx2O+q}{n z^KN>IrTQB@r9nhTkBy#OSLr=qz+f3DCR#=(5}JFwB64J2dU!W;xu%Gq zfw{~w5VP`|`rQ=Sc;dk?7PNxNHl;RSLUt@j$Dbc?4J9Qdd3kw7MNFBzvnMg(0YMM2anE_YI4!ShYUS^^+kQLoB$t%1V51^= zZ&*GfU~in$1{m%$OXy;C>#x|Y9hd2kQ|@p2b}doO8#(2Ezm_}KHBXlDEtLbfEpfrL zfzQgyQiah*XD&!uhZuFgsBAi1dbn8?tgAzQNQkV_#;^n2R9AO*ZB0$uKo9N-GX|i| ziZ<{UH8RRxvcVn1Z-Lin{Q<|v=d?QC>`k}h_3oYEqNO3_7YjZKvJX(j_S=Y(^M2Nw zoD@{Ey|RdzpChyWdLvj=p7GVJn`3GwlQpSq&xfO6!6nQlsA zstTme78yv61{2Bt=}Pzzh{e;BB(nS(W)0WkUzFQrx%x_<)A_A@`S<1NkH4=kV5N7{ zy&PTHaT}g1?g2n}%FMG^hxT_Xa)aCu3~zdbYk;p1{4`Tz=a2Bk#o0MNA)#Jt4^S9# zd{|p8xYZgNiyZ%*ef;mKLnC(5wj|Me6ZNHXZe$Ax2;PqoQcVqE)h#lc6mG6&6LQtU<|AKv728um;M{-vGWcK-gtRuogmkWp5^CJRxtt zZhkVf6qOpWO4pYEH(eeN0&#kmmnQSqz#D844MVqY$iz9B75R4k3s-m!~+0|c#i)G0ObE1C?pP$aDYk^ zpGk{Ed)0Zlua`z`xp-zudC&Q*YxiAO>gZuWkyO$}&UZ1TvlKbZiP_?sj}Qoi=EZ6f z^Lmo5paWPHWY)8_>@AiF3zcqqtvr!>vlGm7BvZMhBWk{S3E`TCRc#mz#;uc~_G6c# z10rdUjB0F$X&}o)l}L`*Ir;h5?Hmov$8Ka!%i+2y33F+a4ZwPhnN$<{^g())>qyys zKb_UIJiQ%ZVs zV;z+8b+md|C&Jy$sIe)_+R!RvQzg50c{gv&O@s;Et>YVV#4qwy0!LK@H`NRYWfeRMeU~tDf-{poq&DV zvPX9E)Zk35xm?Mrzz-2Yv1{zPQW9R?KPB^?e7q%}=p{w23_Vie{1(4*iL%}x($^HV zLdOHZ&kc`ta0We&HQMwahCm45;oQxP(QL0fRfUds9Zh{b(FSJ+55hQp=84d)h~rH@x^mZ=i6nwBdv z9NwPVsa*(CO9`HR9uUr-@v|csus7(qt8}S4=|{uYY7*sz5Ez55FIV5VxHs$+{7Nd5 z+HOturgJDY?db4$o&Fb2EAG{MEJ#ylalygDU>kPzFiNB8H}<&H$`B4&;NoCd;!SY9 zOxX{LHInObFv*{{o%%%*tc?QECxdhU_`6&S97O`pM4F#1hhr!5{S2aiks_|SWR0I{ z_78_v6GRAvfaZL?kDPExQ8OP=n*x?Gc`fXz;l_@=5S_H#sFqs>uVi%qkN@(AN zVa_7JVE9SBi};w9IUsE|4p?E#dl(J>kRjRNbs9#r=Pi}&W_w#={A>jo*X^oD0m*JW zIpsfP#OZzTwjq#C>IzxiD}15rKtU|E1&5$c7eli~<4TZg)_Kxn%3Q9Bdz4Ialtgrt zNcD0@!UNK@PSLbhYVO9%|5+7^;yZ|y#9FwUFeG*KQr28-X*tv4D=l>#eBDkGi`+MO zh^KzWwd|b6L9e1XIGgql5k^wKLfFe)=cBBd^CNwoW=?TBP!#E`I>g>puA15Va_Uz} z0hjs6gyA}%geRJ*30oZtoQ9Sc0hksn~E9A&h>(^mhZ*Ldgjj!xb7 zy4x)?7-xzkyVqpuSB5t!#2O06$Im-bvo*VuV4lR4)hE}^k=4stl(q10Cu-LLo44@2 zpu(%4dYb+_%%%~3L^;-Qkay9anadR|)la1G;US%T@U9KTQ#ZFIUTjO ztJ4z(dzQy(lV_Q;IaykRwR5V?m_V&T5`{hc>6=xyD|#i1pgq6muYLY3CK#ots?STC zm`gm~ugQAPQm7;1i44101bhBSJe!Nxw#35Bf?j^!IWJ1$nbQBHBBWHlCtaf{$9PLIz)V@f zQLL(7u1DoCg8+L_7$1%gS~pvp1( zz$;Dx8wP9kZjgQm3^jiEx-H&v)2#wx?ZC+#?gnR!C8(5`iWOflnyn}s-7g7A?@wyY zjH?DPZ3BV44T9|HsV&E-tL=JJQs2n!2qhsJrQf}3F7fq|{UKu>69 zz{s?SziO86dWl1v9TR=`d3QoYa*u*y2Rt@543_wBtQQr)C+CrdJ=63Bk=5ZyeKZu` zy{uiEpwY?RZl^J+=4a0mZ&u{g5W81VAu9#99FczVUm7)P*c0A?StGq6qnFT&=m>kd^D)qP;!sia{qo1cpOnSbdxqYMKZ1k(^j**snyxU3 zbIejW8iyg0+#94GKa z`7oIFKD|fWuqr!fB z{S^K>b~w5QwN7>g&*gbBwc)_c{mS9=f$+b`M0CvTIc$!q9oKr0spTi&_9zzQhPp19 z9Jj;cd*OK}Mcx$3Tl@>nma04`kK+$FE^%4xBz#NDJ|gM-=vY=u2z&gEj^loLnY3ub3%itoxsSy#b>klDR`} z6n&Om_7CBH!mGc7S7%_PDu0)aD9cJ>7cW=k%%#4cju7zE>N<`a}4?|N5+8M-95H zpytm}emE}NyZ-Ec*xtSXgCXd!Eiok^MKfDgfST4bzyK675SouD`}$&+WY&tn=&5Ks zcYA;#T4L1zEZEfOJ5sKlf^LLF7V7tO;&0&NgVp3Ccm>MRfx-~%bT zuHdwYlDMBm5=yWlB6xhXeyHj-H2P}vu)Y_}OuAQSov1+MJ_x1btba6Pe#z_4VgBDQ z2zx^5ZPoMEqj;Ai5bE0gvxYQ5YbebhaVrX8-7BgpBslkc8S?@<24Qe6sfITp+_tWC zuVvn1!ftiLs`epsteLwPjSlSj4zwS2i1h?|QH^s5P<`Zwg=r4%Oy@Pg;*?$_v#$8h zo0r+(ug9ODN{ltPA}JbB&z5a?!Irn@G-w#E!c_xkSC9|t{v_>?@WwCvhoOai@;y{k z^Q#v1tM~nA_OJmAy8@&7Rb2h+?5|_8+qi?H|NElAciv^3=$}Ua5mKkofxWMk`&T#O zIq7qjRB>5ORRjem;k(Z^dBS)Q_`YCqD$1~H2Bn3+97u|iBMZ&`<>yOn`fBT+eBas; z=`A5mU&*V?r~MVXYjUSSU0gSW!I+G^!lx-8<&MBTXClHB`PF3bMSSpkB10ygvJdBS;LWtmX!Z=VJd@UH8{ewLhpuk$=1|GoaPc<wF8%-Z1Hz^yIyA_S6vc>~}CuXp+GD&%3X{yJx~Vyh5E& z_F>JMCjyVgH27+4r0O2QjBW78_`$Q^CYRxe%v$Z^RK88t3)sb`1Inhg^7)^~S=;0e zzpgZ0Uig);B;K#=PBFyuy3yncM7NQJgQJi4$^pVb-n7O#V&96lZ5D|7@F}2k>Y_&x zJRDKnz`D7AsUph0Q-Pa@xVP*EHg*BeWbT)==l4In2Bwsnh_xq2)XQ?g)`}O058J8( zdD{x>$V9Z`(684^9##)(0x|}^RrFb~hAUF7tN!4wGVVbGS_pXdYa?^FR^SpVv}wou z%tpSS&FSyJPE|0PM6+MX^7A{KGW>z?umMX!;P^Z;8QvGYwL1?L_i@Sj$o!0}1jWl4F4TEav1uOAc~F0gtt zr9APOyBM&G86LLMDjaa5!8qb9&S}`a8XW_ce-7C6GNAK&!KEZ)I4JHx!3?-k)hD6=B?ClpI zWl<%F^?td(|1*Gvz_1oc+JKuCc#s!mAYSRqWR5Gcxt(Pb0{14wbPy!PSK*MvQPIM< zMIT^Fa_?;`t3~i(8l33a-Hd273H7B5b=XHd^RPD$rLNouxQd&9?N5@m_upTw1-+NN ze6gQ9t@oc{BZcGJv(2)AFM8QSAo1`1rfzVP)Z~kOVFRyHrs zmwhJth?HI0y#T$72CbIpB5@Sh?%Kfo@vszMz`JSfkdSO0Q5bFA;bq6%afD!hhSY$qqL_)XwH4FY4%KkQX02e3^^wDJ|@9|m)e3y!5`@FY!gU)uZvek26)?i6&#XP{C5Nzb1l*CbK?KSKd~1tvJ2`}YQy zlwqZ2gfyaquxERc^`k)3S>65>F5X8m?a_s1cgO@Ay`QL5J(zcY(600PYqL z+s}bSNI+O%S+($eb8~ZabQF3$j|?)J)T*slDjQA;Qu~9E&q4CG-D2zT`&GUa%Z!&~ zV)pyIq$t^Jo4G{)k*Xj>uIwu?O2lA68Ic5|zH5n)#KYm;}Z5?PF&s4Yn!Nuk3 zUv>w=HQYiRYSVQwIRVmg|S9S)e|X+oll3JQ`q zZL&t4cP7gP5{WT=L5VCCEiEk>nb(WoLY|RMQ+qA*E{k`!9==t$Uvtl|zyWHd`P2&v zqr@-5;x2X6?#H4a)%s_7VO}0_^`USIZ3bqApYZE&gGOu3!`o8o3fJ!uMKS z)GOF{{5QUZn>QvpnoAvZ-oCJhCR|id@Y>x&+(lP6)z9ZcDtRB6Z9pMpT2ya;dwmgY z0L+)WP31;sz&)c>pa!Cb`H|9moGVcv8*WU?h2cg{oG!HE!fDmsDm#hCT}v>(2DMry zA&)K-(@5%5xN*GiTw6H9^%)-yM$5d=^DW215Fe6@cM(SCI37Ox8jqyH=(Q8X{~NQZ zO6WBul$;WFcPOEXj!NI;F^Z0mpnre6>goao(Hj~XqOLmBU%q_#y=paz z4%8!viROeOaI&<=qDI8?kf50ZE~8yE&8X~ zrZ(ju*kkoJ@s=9WWh+UtQ_0;aOi8GdcgEmAobrr zn-VJxS3YepzDs)v7lL$(fJC37SmhM}Y)OR~$AjWQfMTFnrDsF!Py|H2U9UA6>rgOA zpLvqR48gh2);ak@lR+{c(H;4%y&cF91)b2w263G42el@qrb}bPYT4UJdne8vq0kg} zaae?=Xj^THt7?ibb{fIbNv|z)(bn7|yWIYIFF_i!pQ_9}0hv9pY?KGo5!?UmL^i%1 z(1@}hW;n1AN)N8l-!>gu{Lu7=BB*FU-ePl-&cifA&<>aiV!&bmSX)3fJDe$41cpi< z*<~6C_)$N-l&^vfoq3A(Et*N2%wMpN_iVbgU?v&R`G9zYH9#*PB#7ro zyKTx-91f-2vdwBTQ8epVootQ(a*~f?Vt?@)_P@#AjU_%Ib(VGz63RA%2ep%dP01?vPd zGBVU;WFd~ADvlJ_Muy97H4Sxr_CE@+0dfH%ilW-x8ri&iEA;gs z%Kom^-OZV{>p>d`@OL>RlGj|I)BC>s*9$4P8!2W}e3zP$ zppwo(S|B5le*~tv^sb{olVrxRwH_va_CFH!cZ zO+l)wYMq*qk_{P zcXM-c{w|KZ>h4+`xo-um)3q-?#)h@!UEJLM&&RhSBpAW1K*fPq-_M9JAsnAWlHI5( zUka1Da_wROeTf~&r3VUr9hlf z0fLsY>j3+j6*s(VAuGnxH+(Ii*@TO6AQ?y4QQW3DFYJ($-yhWXTgdJ2RmHlRXak0 zCfL8aD|U8wEf(zT5W$|Nd0pKV_|Bd#a}kR!)Z`h+VrYX(lPS(4(iM~sz0o-b=B;J@ zqP)(6Z`~9PH7d5ZCFF_^-*~5$N2x#c$qIx zn|DVufZS6G>&uPoD_6ekZn%(N<>hw$$-ERSfMzE*_vQU1;5`5w8HJp@Jgd{;C)lOV ziX8wy@fX$AT>v{|A|o<^xQ)%x{QOV99#GHe_4<3n@1g&l)cr^I*3Q? zg1YWE14H)o_!uZqyWQ#P7)|>)<9MmDMEhBBfM;iCCp8BDsRH;jpny_c zb#*m>#{iS1xcFdDf(d5L581)x0Bd3&AQ%9ZVRmh8ZDFAWAgBRsS%#rl%T*w^a@MNl zF|d)ov&qh5|A#wTw-fjNh9o*2j9 zorm6?@3&yhf&3*bfY0PyOTm3gkL*2IZ$wEa00V7Xg{psYN zKY!c+^~*$Q#|FF?25eS`<)wk;!l{We?~Lep23UxKVPePsrOegbd^A3uRKDc?}|>iPP!S=?}dc;PKB{`phq zj}I!v2Tu{q&5ey}z{~@HN^v2GiEme%YGJ&Kg3CdyeWJ$vOF^vc=95qP(U$#++)whf zZ#I>e_&qMVwRbl*zEqISivk!F5L6y`>VPvVmRE$;OF%XsfBOIoUH@RUEzl)@Q75Nr z0XsMmTd-?Bg7*S-&!}ORKKgyPjLpsQ!q4X~1P`Kt)Nsk*LaL^FIpz|JZTj5aU$WPT>Q zu~A!aP&t8#k?~o$mlqte3B0E(AapuTn7bAhyj``tZ3nZ4;eMPB1xsP6$qNBU74Q@0 zu2(tY@_iBqCnsyHSGo?CIy%M>AHj~Ac5&z2TlPTA2>;o$0C37|{qqH+lPOhZ9VT|i z10v>Aw}UYawwpQGT$tJ~a;ccP=4oidOwB4BI+R+72C!M%p zV(IAUHa0hjNJu^!ho04!f=EW zXN@o)?P6xn{S0O&0eMI&M1O*Pk#cvD;xOlShOT`Mqlio_eAEdAUjcBMZE!HLwIyL3 zS_#tU-pW3OEPI2BXel?_Mjn?#DT2#D1y^t8=7Ck7$cXk9t#R(Iy5Fs^zWxSqh~4g3 zdPsPRFpdM!pbX_<0o5J5nHphoU-!Y&A>|yt6qG*vXgnxYy8(e9y%H0AuK@N(3Rq3Z zcao5KcXM;|yX*eDX+R}4K7Jl->8jrzL@1?%1mFL9ElVtZVUCR?{H>b6={{{1vQpR{#_PGrkYwOXCHQW38eyP_^5pf(K%izLxA_ z0qVJe>VA!C$5BO{dUQK`nbP>J(`-jw(?idk@aG=8~fBov*fQM^x zOLkf%#J<=z;x~nQ4LyRVpb*bF&kb;xu~P;m?&_FHF!=$P3uq{D&{N=6E`Rodc!mpM z)zNE$nuQWEjBYDGlmTJx-1vAESf*BQr~m!?{pl}Gwgdv?3L#~DUW9BvybS4-?nS==?T8U1FGA%=k zG}p8hvuz?-GS)zGwykY_X9m6aUT`1JIo!)Rzu)=3@BGepLE{-(I9R97#bMdV14}LS zS97_Su(!zIf`p7rHVkB~#>PgsyWG-SnQp@!k8!=8(;m5=s@okluz%WYwv~>?!8Ui& zyX?T9MTG)E|EitisIDrUifyI?rTPf_T{`W7OAa*W`{IHla~tF{WcoX&IhP^lozE}b zaBpZG*P163w~IpnfXW_@4Jn%c5LB%R@0cclt23!zx3^7U`i`}peQOM7(cFZ#COx=R zTmrDFJrSs{FlWiEPra=_}c$`uVc)@fK8ctX} zdvPv4*w^3hk@}EB%tm7_pYOH>{kDm7AlXv&2jLMB$V0Tl>Aa5>bkjAc!?Mi?dfwL* z5%bWP0=iWVj9g2gc|+uPoMCdI8UX;a7!^QfKgb{GmCCM4r}(8E2fhubNE3Sl1hyIE z6x7$Zc-|HBy9z{nelH68H*MRSB0`m@Vk1DW;#HV(2!h;-(-LEN6Y8GF%tY1J9(ZSg zQxyODRyL8U!W9ncS_Emo0&nWqavB>p+A&nHhiYmYyQ_fDz^6H$+q$bFVjBytVyWoR-y$Q)2L!^LV#S(-@c30Vu|5qIN;}ILJ=6(sLbx?i|wqf*L}y z)IOv$_$9zb6t#tntdORpA%f@U=Yd%f+!cdX+832PBeh6$nlVT@L1Vpa%D59F1Jf&# zT(C6@3~!b_Y{K$x-f=*9y>HN9kbmqDK=H7`g%MV!ewpDG4`d^;nnfi(mVA0Po1Nj+ zs`Jan4{H`g1SbTRGJ<(oNWpHTeY19veCHqKgNnfgnow~3N5o^z7{N>ar9jGH_*T== UT;3s39eMc^ywh2onT1uq0md)(S^xk5 literal 0 HcmV?d00001 diff --git a/docs/050-predictive-inference_files/figure-html/ch050-Strawberry-Swallow-1.png b/docs/050-predictive-inference_files/figure-html/ch050-Strawberry-Swallow-1.png new file mode 100644 index 0000000000000000000000000000000000000000..bcaa9e22b5cc2e169bb348f392bae6a0836a482f GIT binary patch literal 22594 zcmeEuXH=8vw{JpKX`?iyj;J6aAkuprDGE|V0qKtPjLJuv;eFHi(=dAPLtb5M7_pE#W9}MQrQ+ECBy`N_%5&F8COsBX`fj}Up z+qcvVKpRWbFfcGOGBPnSF*7r>u(15{%P*%-pFVTu3@a-u8yg!tJ39vl2PY@z*|TTQ zojb?H#l_9d&BMdP%gf8h$H&jl|Ld>6oA|fJk_3BkoQBg55F>!J6YuBzxNJw12eqB;hQc6lnT3T90Mn+avR!&Y%US3{7 zK|xVbQAtTjSy@>{MMYIr6#{|WxN+mR-+sG!^QM}bn!38WhK7cwrsl0%w{G9Qt)-=< zt*w3M&K(^c9bH{rJv}{ref_(4?;02w+`D)0{{8!ghK5E)M#jd*4<0-)F)=YUH8nFc zGdDN4u&}VSw6wCadie0+@4x^4=+Pr;P<;(c^_*bu9B_t#yCMG5&CB1(AIypHRfk33Bq@<>%B9X|nw6yf} zbQB8p=FOXojEv08%(rjfW@Tl)d-pCoJ3A*Q=l%Qlxw*M{d3hf`e8|twFDNJ|EG+!^ z@nca@QE_o`Nl8g*X=zzmS$TPRMMXtrWo1=WRdsbW8jY^0sj024t*fi6udn~~=~F{P z!{^VR8yg$HeEHJU)YRPE+|tt0+S=OI*4EzM-qF#~+1c6E)z#hI-P6;9!C-oOd;9wO z`uqC_1_lNP2Zx4+hKGkoMn*oK0ZD%F@eQmCnqPTrlzK+r@wvsHZwCb zJ3Bi!H#a{&zp${dxVX5qwDkS^_vPi~m6es%)m0o0hsWdB*4Eb7*EcpcHa9o7wzjsn zw|91S2n53J?(W{+-v0jn!NI}d;USSoB#}sDGWpv_i(tS9PI}xj@dAN(!zlm2v2*LT zAkeR%+iExN`=zc<(5JG^?R{EbaG-yBIh;3;tyeAjVTMG}#pe3E3pA&}>Q5h?55Huj zzrv*R@%J0FMVjhOT+&+M%{gDrL|yj1cKHR{=|6p2YRQghNwXOL2Tvwa2!w+m8z|Od z9*dkfxL%?op{91U~3_8rW)T|+!TJLtc)&a_&k5%_fu@}?sT$+$GgWK zAA@E?d+FrAWqNq$Q+;hMKYMqsycz5|)>~N1(j4k#^q95{J`~|{Ce@jZ=g^%Kc2jBz z)2Hg{>wK+72JC8M7M1hVYzEHE($gL6WX~BjS@`YLb+~27;+yzLs|(*y)64gD$rA$; zCseELoRH`n&D$S5IYW16=D?*AE)p49ZuwxRJFDUr;#ku%b{mjuW%x4<%;D-dvPUG* zmSb|HcS3;Rx6n>~`84t4kN|4lIw?}b*p*h8x-{ul|Kp-Lc^sY7YV9i;lv;FA*~y$y zgSRl3#|_32BD-LCw^gcAy`iJ@2Rtv~H8CSKZfQ9p^`Q|1>P7y!QZ0A^F8|i6+@UL| zg9tfX+rzk}`Q`ZXU&z5aB76#-V@BNMZ(DlS#;JZQzWFkXo}(YGqG~gf6z$B%%WiHD zg-w_(i!w4;bz7?XB1_cXy$*!3JCHcmWzxR0LG)g{)V@DKf)`ak{L&3@ycnTGy zH%m0>!~;9@R7I#3`)2a4v!vwb+fac8jw;6{aIG`ZqjJ|G{Jwn#1%$NlyDEkyE_p#c@dRD2jW|W<%gVDaaXjm44dN~Y!7a^ z!mt?`J+3P2H~AVCTW#`oHRmoA!2{`eO#W_JKbf5zm|xV;!?(mx?0kyYw<|L@r6>cdH@_+&B35~6nb_G}@cTh#6RnMwb^w%XZ_L;h8gb?+Fx_kvZR z7xU}Bq){Dky|RA8)B9S$^NyB^hS9+BiF{!!`-F0jHBm^e>#ffwm{@d+ zJau+cXZRTzK?mwu(IAdlED*5C{c8Jki4zZKFbrw|q4r^wQF9iiF2Tws*?&IvMP%h@KPzrob~_0zIC3k#Tpm=v~eqoR}Jg((5tE9UGjXc ztfXAv!;f<>#sRA5Ei0Tqeyy$(;1+jZG-AE$=FEffH9K&f#XlT^>F|88$YJA zE^`UT=YkfR$$TTx7+Tx^Y5s+)PKC1i(;k^FH$l;Pj(SI0vz7kbsn$=+d5P0$-f?C+ zYX13yJ)iM=RcB#T{Fu{jPxbA7y8N(cdDSDc{?+QW2qv$TTd{O*rVSik6}rPKWx8zV#wj>b zGsK1J@?DV-4EoK(V^`6sQ}nTcV1&zJS}BLWu;Na?IfTGjrHt2Pquq*4)+R89Y+JC4 zbuubXTtk)NCopxFQG3fGGWbR{KB@UJLEI#iB|>$BaGTI+(tECndD^O#?{iOAOd0!Q zvg?EL+Q7x}K=aP2N2)UA(i3$#FXu@OyM>!+9nlk|Peo;J+34@Q$ZV{zWUm@jdo^qt z%zbZUYQ?#!(UYK`6@i{i+%)hZm_wj!{wnUa>fl-lFUQ?HSWYW%@|(6MP(GOuK0_GG zeXklJ0G}@`Y^~|a7ZBQ0nzSxpJGWZ@`_#)+b|%izx+$T_^Tp9D4r30TMEe zAYV%-v+|Mk}J#3#GVo`_(5H5qtKuR*xKU;8G- z=wlP}nJRJg)A!Rcu19v2a1w)l>Ln%{7WKl|IdNg-33E5CH+pwE_|H=qYNEww6yX$@ z*KCffY*HRd_A2)`r*(+)=kC8f%dp9?RGek<)j+0`_@KYfNI-TWKWLq<4km_L zR)40->VH+PBIuiX{kp0=rmlgX+f_##I$SbHXkHQ7NDrl0lA=Hz%fZXmA*OLBf%d!_ zxaEcG*T{Xok1w)x(rzsXKo=}5XdTi=>@n%0AnyY0f!_096(EsaV}W@C!P=&~>1CeT z$`c7W?pSGNXzcW+$`u8Jw@ayq3y=w&HB;||ug2sN0~>N^h>I9%9>g-4tn;}?ogEEm z#O2{G^VRwy0Sm55z)+0+!NC$imqy*Eh0;5(lg7NnIv5AtUNS85t97@U?*z5P2ytCP z4cn+%ICiF=0@Z5URCH<%w7dt2%DC&zgUKhbQ~?PL#4G{ALVjCdN1v_yg>VVs!5+Hd4jNpsPOGTuP4P;s4le=k4) z9bknlI*b`Q$KCT-RDS9vbn!iN-&K5OD(?Jyo@w4H(!@mEAyW36r+)31FNOD3jLAMw z_27x%;Y-FwhW5nPt(cCH2l&p6&075Gh&Z7y0{|v>yu}Jh`+IYR)FDdW&D|e==j(K} zp1Ey(U`-N1`F2C#^B_o`7-E4d@Qq@1VfM$saWUoWDtjZP5|iB^ry~9omcksh_GOEh=d^ARJ3pEJPRK_ z+NB7e##gP_*L8~OZC?Vk6foY%8lDHfA7!50|@xlWMwRxwIkJdm_vtC)&4lXc(pil@ohdpI>T$QxL1tMbce z_CqZeMP#n*S3>o3+>IUaA;*-1h;6zdV;ihc75Y_!+wZO^iy|*K4oK^)(8(J0F;|u< zL4!8xfzH}Th0&%icQ+w4_*w9Qi(&0xh|rfh!_ui14$%I?#P#FQnW7w&u7hy@cz}sz z1yaH$(-OGRqaL=pJ3!f|)|Bu?dK-Gwyr#gk1U^IOt&@ho&`mYr zwlfet*3o_#Z(wtpz(l(;w}6&rBB=VIP~7&uRnYwT<;YU;U9#Jm(+Mvx!4Y~ptu+^N zHUsC<7*1zhGy_T71;tv7eLzq2vakn31yA?p+5={&mM0IoBydIIzxq-nMZMqtMmC9* zq}cj#a6g{EGw6-irfzpxhmk9nmzsubP(p&0>DIpUEX>J3Xk$UT$TFinzZf*3k{MMh z9P-?~=U`iK73Q( z9am~eox?U0NUg7*GWlDAZYEg3zJ~M;T3iicGB)-EY3&%^g%3&Vfwqd68nxXS66;k3 z9$@D$z}DqpZ|1B%i*%e+eJJJ+#}2FP-cG5%>;0}xb(FRgf4ML(WYW~8<9XioXu5D7 zLEf79L0X5(>X+2|#f2}Zwj|>k^;i_LN9OSP7rN-8sul)}`eymDo)eY=VOSrl$t$Dn z)Q%rY>u~)Q`|Z`_P4SiS?~(l1lFT735v3}d;uVDVFQe_)lHY$5Qavj{u;q*^F}CH} zwu04Gw%We&V>18x5TsRFoqHKV7Th4Ui%nV~>SmGEfBJ6dx}QGx9f{>i#r;a+dnkME z8!{>}-H$8Cx^toMOcS+!#-O1(I3QBWr21v# z@=#n_LzSeO9a>1S-M5Uif$ejv+AGK{BdbOQ8Zdj4&HEk`{oDD`hX?zg8+fY3*!{1| zLDTLBn_Rosr58i(RQ^(?H}*qH*~==TJ|uFyoCji^R2bwVg0%Hu0$o}*eFhgJ#jX5u z*(QYu5sd}bLP7a#gs%DL4uaZI^e%Lb_+LrLs-FZwtj5jy*zQn5g6*?n> zWC>Q(FDM;Zs&t&5)8nmqygeio@>*oLuGr1bt-7`KX|hv@Co>8i;nyQC z6~EAV0WHHlmuBJJm9{#za{b!D*L&Sk9JXYfqL9m&XJfENK;btQ#_(FOEN4TY7-`ckVs}&R;$skTE@IM#X-P1`Y87|>B>Pp6&F*8$$r>^)DYCh=@g6!sXBRQ4(c`(C?W z2gl+CChE0JEf`rizPNKid)83r*}86jBzWI%_cvoNtA?jziNaJ;A^wb*nl>AI`P)wq z`d`zEX1#l8Z3ha@vXBeVi0gs?#}7RGyY6O3?TBl=bOBV3`8D{I;7u}qUUW=-9z zqFqFg?*7E&Mz!+w#8^&8*+rZPA<;G>y%sRRouHZQROCMT?!b8g9N>#SS^~}qvTJNdj)ibtAHNX}hvkqOJ z`&;^h+`pT(2Q<`a_m@PfaPIeN^x?iIS@Bj?m9ArNjhf|3Cb=x|Dh(6X$exl(@yW|B zR%R|tW^FMHHU0SV9p5)fYBs8{DUUe4=;S^%1%Gjle7PnIJx0uv2FY|c@o*O_xqD4| z-Bb!NgUKtC{IT!s>XY$Ed3D4d`eVPs?YyPu{U;*K`|c{ha2BpPtrLR$m}=gvXZ6{8 zT7jeQ3)aeQBSBrYO4JTmn+y|#m)cW8q)+@#$0-@R6FGtAYfokO9U0`yM?!zqy6e27 z{&r#9`ZK=bF5jn=b7Guk>DPCLSj}X-cL{T8U^CSnSS(Al<@ShSwVx1ln49HEY?$F_1s1$xZd-&<5&5gJ7VO8)Dtb8i&hGJmY8dFbc zLwYLlVqXqDIc4v1P{|rTO)SN}!1s0Dg9fSJ1S%M?&k&-V7@}=YC#`D2B*%Dp&I!zT zN8{j}jvyjyJxwtOu@sB{^IU{$=P#I8(@5{y!-Rxd{^-l1`AdW+DgqlFHE-y^(PhXL zOS#?Gt+yMhBGueBK02{m!0;pNlezoR2Mfv#EuzTO;0EDmP*-*prYX_#wwi*K3md+F z5MP=@y865nD`ED;$Rt-TB@@i2JKUr;7LL77wk2!!JY18%>Hs4C#boU2?g*sZ98wW3 zV+$QEpDI^ZhtFH`6^OZC@4Y%9c;J4{BX+9ra@5BZO0QQEo1P9YbXRHHeWFDa6CVhJ9MAo@a3jl!k{#I4P z;;p!r4R?vV)j*ER9M6Ft^ofi1QsKr0CXOS>22B}#_3(_VK{P)k1!wwEbVzCrNz0@wD#EJv32AEMMCn?mu_Iit4& zutNEq9Akj9r*yn&Si8S^0E^&qb!Njt-_a#%XuhkgDH%GL_b(D*))S{?umMWmAR9=6deFEShrZi}{KIqnG zsPk!Q($GazM~Hx80DeUNrp?pu;@+I28yk^LohwWKHK@XpjcoC|_DhZ)i*$tqvk zt&4F)KAjVt?6S09w@Vg}50vwZ)G6F`Ux%+v$S){eRk^Sie)Z(!m;Q}uY?U~00u&Oh zNzPKYM^M}`$N%xB$D{1sNdNxU5Q|ow>D5VBM8(!ysLYP7z_2ufm@6K}7Lou4YKv=t z(Rb;oDHL3;mXH(|K$@|;r6?~fOg_wbRYnK)VVP>IaIIFn)GjVdYI!`CUxL%?PUF2g zNA1)pl#Q!@d|<_V=x(=nkOy@i+i&57aa&kdGWMhrKlb=8llQUX>`|%@Bfsa<9}a(z zb0ivNbKJBWZ{RK)shMPpr_b>}Xw?<1!12~f2tk30#eWWt@LxE5yIPGVq`x%jPS)6Z z?h}9DW>UL-Lw1-}+ZZTE_kxSGRnL~*TpmKwg}HN^8A2K)frsZzU*Xah$k1YjM1*TG{`bAOFi|Pk`(}PuwB|?(WR6Dd zHD47b@NnJOj50C8$xA(*=PIcg_j=F-Dt!XF&koO}(l-R1LM76oaw%`Aa%{mlS3rie zD3Fs0DEAb^5PTVK%JDyRm8U}6LS&UcfGar!C%_G7`Pc1%ZSqk}0 zb|L&d_Vda;-t58|Mhg&%gNDLtLKZyaLH}ygoL%o=cLdgL!N2>@(fN)W-N5`CNApci zE$VbX!?wM&?Dn>Prx08d+*61i45CIYu>exZxuOkLZds3==t}MuX^wiVvCD81DFl`x z9*NIZ7!_|x!YaO3ZBPEj+n~2a-!jMFNbSHy4M;n;-2n0|aeRfbK=xhzW1V$JI#1XEZN5xQ==Z8dkGh8Leb!O*L^d}Uk>hj#+1Kx z?m;PzK466vhZcF;;=p$)k{$ehJ?DNg-|$UB?PV1p!d)CbUn?wZSB~cIW0CvA>d--_ zf)Fh)h^7w+xW4Ln+e-`rqV9oe$K7a5i4@LIgokluGP&c{ z{kC&yH!rj*%UD<*kzw8c)wAc#2D*cOYr<2{I>D3)E^mDbXp%YZ1rXaE9&1Q8M&66A zYV{wr^6RV;COFlEziRe7eVbiez>#Z}df%ujyGSda4q`8_yiAm2MgERGfVgjQig~7i zFo0GiQ3wYWqARTLpqNGhu9<^4YXCsg$37{C}d&d2mG?>44hIJ8%_-|-1RPBh9#xLX&Wr&v3}g?3;- z+21Zb4OSy;J~|#pM6;aT3??Ojq@pPr9kdYzsQK}K8o)Wsj89%elT!m(Py6SKO44|EBD9nm-9;hvT4BK|K+{^6TH_(9MxZ- zh+AV73simZz_SIOc9ZV|wLUi`Ysqahg^)+P9@A}kIN=B!cIn^cf_d#1sX%4F{(yAv z5S9f*)jPK!oi0UO-@lLW(m+s~uL|&4t|v(&x%UYrnsPjl;VGq;}L2zGhkcj(7Jd zjpaH9M1Q5}6qP9@a+*Vu5-F*fy9J~}IqVdE<|M1wc*`>BBx{{8!DcI810uwas|?Ig zMlO;1xVupgcUQ%3Y{EX}MqHjy{`GEThjQ{u5nfCdlz(1nG0oS@Zm$C`PyLhu=!1J% z%!4tG@d;2_Kpw|;kG2-riG5%H%-rtR?3DPOli2k(qU+U2tpucAgt8kSq@EimXG|ZM zmMwqpus;IJnz8P8S~z-L;N=xdHLaEMvna`S<#WM#v=|d2GCn(WLEO`FM~~mjjp{f? zOG!k6i6JPjo4&@rQC`+3mLx$45+#`ww39&TeTf~K^w}5n(W_IeX%PVFz$k%}wd5=& z9tk!2qe|Oees~}`>n4(WqktCW$OxoX4v%)IOrfvim(-w-?X0qgN0n3KcVz^RbdGpk z3lEu_1v$;ftV{-fdX3*Kr2BhTE+eG>ERaWGzJS;uqQ`3iftiDj61ycpwE7H`{0ngw z2+_|eAzFtK0v`B$L@FH+(|cyvS+2G5^^mcMV(i40@ua0R>bb>4w)31_NWZ#}I4cBycl~JJtaBJ$PvGp!)$`54!b{hbI)=ZO`)b;ZhSgoIt$oftOY;}i z=P=Ph5c^x4?$h{UroZ@QTvYiM4DNrCD4JL$6rXkJA0I>%H&PL7jQzh7H$z6x{7wHB zZ5*$=)V$4J+ttnO?hUiQchK+Ge48n8522NZG<%^0{z=h&>gu3fBeP1OMxvQ4>Y^%G zD&a`GR|nzMqT3|D_18o_zZ$8(pLfAS?qwu{5c@}}Pqc%i3@cSLLf+pxG5|m-I2|X| zMITVs)1L#-6mK&4bMrnx8KIw=;8t@@e(kkLeet z_uk(-@Np2ZYuZZ(T~h`HnOz1dWyWlC(|{nAp=lbHRNB#zUg7-|U! zXHwwxOU6LIohKUh2DT(xY-iiUO3ZWBeF84fn zCy@qx;Dw6F+LOoRsi(Dq_G9O-)BHnVcQqY-hdB9A+p!f%F`IwEg&m}8Y<1kM?0Kst zcjW!r2QSk>i1uSGIzhTV+%x^7V%5GdVg7Fr;MTWRLGNwdB*<+%ZP5;qJ@XTmvUoB4 zJb%48hgmL+7457c{bkGme58`2$Jv&CqL73$DENLZ_$P81KqgnF1pCgLp|6Z`{iIx& zcFRY2MeCYkT3{v(;}&$vk7`T@Fc)T?6R7(?tos~h z@NX)4FnZ}Qu~R=ccu%)wH8xwr$nv+@?VFT=F0(WIFxJxg*6E%)@(r24XgG)I?#sU@ z36|PEwyGxhP{p72pjR&_*^yKc8+En5&6$5VLsD7XU65AOu?xQdOH;qy{0)>Pa6GvS zE;gdG1LKFh|AF-9FxmWHjSwWKPkYJYHdOk5IY~mzKeRSBOZzv$)uvP!ssBVkOVov+ z+5ZYPOEgksraLv!5uQhs{T%_XBH`V_{|HZuPX7mEXwfU`{iV$w^}pO|5KeODj-daW z+y8`m48D?^s3G?#=O+LFGdct__|yFbM-nIobwL1)juk9S=WW7$j`nXT)0c!Oh|}m_ zIr9V47oSepkO|;_fYtsR*Q}*Nx>VJxj6bc&k?#J+KUC!!&-ss_=JXeG{0Aurz^^G3 z|7p?ha!S5c@Y1L1aq zP@JXQ!3iPi#rxT@75F`$3WPi8__&MWf(5MfTb7PU*dDJYf3UaCiN&+_f4~$5{LfHM zwwwY832pI{K}&g_dYKtOd>s?w`^@d-ePYqSTaD~(PLfcfV9B6&?acP?RoQoY0r2#6zstc`_%e6AcQ#rOtey5y5<=Hb!*e%*4kt-rgPAb^Z0YKY)Rg}PUrJg$oqAE;~x>Bw92;O}aNXi|-I#;O|)w?@LOV-^wRmM~5 zZ>5?eZy|uZyh^G*yz~k@=CK>&`Cz&u=~7{4K~kVCi|k1Q!YsNm@aj=wjsQtjc~dC_ zY%MmC(&h6R^}Q0kbdV+r16Akor$C5i(0x|8V3ICmf_t+J2XjuP_5QcXHXvsnxE*l8 zhifHG&`0g5kpDSrp)>iOduK+?3owvQQ*=iIF#j`gJ$eHKg_(40sy{LKDBU~>i3b!f z-xHu47xVK$2^~f=>j?g(!494P{C6?C3Gfz|<-N>5iWC$A4IoxMFoyWCgrAeNg;A2B zRS}Gl)w^wn%b=WEaE>5IM=T3Ymzg&+0Z{#eGdl=9^Hl{#6b*_Z2`TCyz1;2vLs&(WX{}hCqRaP&z0&5G}cO5sE5e&QnEM7Qy>8_81C_930M%D)SAr}Il^TM zVRjIAHhJ;G^tM3@0Z$PM1pF8-J~!p?CNKyL5oLi;_yRI=>T|53C6i`(u#vxig}vLS{!`csz*tPaRT{JbCV29}5M1evI?Hw{-Z(W5)`8yQA$d zp-7_UwFnxkF zDO;#^oE=k!&%px*L#(c(fe=GSHewG(sa~a|gNTvc5MoGCf^lI;OH!a1JI_%a}^#@hkPn_on&D5(1o`h|YX|@RpXM^dHNGdU}?6yl|nSgf0q^Qq%!| zmLAA?k>Ynefn@owaq$9*b$qGF0h8Ige{Z3`sY%e_y|&U_)X417Gw|xT3_Bu;3-cAM zbG;&e34RHrm3+*a69nkGv1TQN8oK%p&}SW20jxlFBuqcx+AxMssN6h>dd~!*JX)e3 zP<}}UAEi%63TX!;Lr5XKh;Wm~Bq5O2-D4TbqKwNo++V+s&)o#SDn3%wl|i`WG&X0| zSqU=CF3(Q!habESTv^(J;+L1AlUHxC)^*2RKOWx&PrJOVBMC;?jmCKL{DbG%Ii7g% zfVqgpU&^tAn04GdjndOTD(-&B-q$mbW>K`5?;kUJ-a=^{R7Zi$8IQODDixRALUqynE>-pm0W&uO-ALZ?_T=gZFUvW&k9*jC? z?E%s{UVn*h;B}9i3K63mQ!}-`e}+hdxqqxrHfjauft=*5VxZ(=KB7gKuCyRoiHed; z9Ibd(za)~8UjGWmPcdC6-h+a))G4PC_YDzX=?f{Occu7`)Ra+gv+jNFN^;BUZGk9)nJ!bO9tQAahz{f#RpF_)tAY?}vw6QqR6 z59HwOLC$`lir`|O2tSLOOVg%>LK$EC&!t7Fu%o(w;lR=Ey^*agYu(;09+g`_BQwse!O^-_R6n;paO%)VGe(q~t|M z&$Ph7!5wQdNJiBCU*LkkJx$-q#bVy^l#)R=(Fs+4D*bjANIb>(XtsI;EL1&sl?%0G zEa*_LIDd{l-zT_x1(T_U1&TO^A7j1g z66Gj(5Y>9fO2q$7S%>cu4(lTjs{S!FoTa~Q-G%Kh{*&a$UzMAi|%MXCKMYG zrvUdXr82z=%KG()U&;~vUxYnVr`A99b1REzu){IS|IxDlk?g<@xgg@~Oko{Drfj?< zXZMGY-&1*q(>I=;OH*$Gl7xRxukA!BzHZEQaXT+K@YNBn9%6U?v3QIwxcNm41f@sR{OC`sWFKo?U|$Ri?-ivIsUhjo?C#( zlpfHd9FMPjK};#Zxiecnsc#rSQgalc0W$o3LH?#)ia5ReFxa7;qA5Uv&;HGQG2l~_ zu}oYuQEPf(3VKuy)j!8|dkOAm&uqj_gQbox4T1G;5Rp(~e?+%Q!>0;ynl1~BgMtL0 zGf_Zim2V_~6XT>)IK3Z#yU(IZj?QLzF=^f2maGc;{^B9 z2lUf|6aSV82~=5Xzas7EOCzay{)a`-C~q8WnYM%*Sp?SN}IAzU>B z@SYXWeQpY1+^|mNzIiJhjPa&NeWiF5Q^4?2#eni2ZjH}hj|JrLl}g{00%D=aIrDq= ziOfE3t=#mOQraJ*Q$~lEe0*6J-fA4W-YEeCd9M=%$E5oIy5_wGej;WrSX z{KvjWEf7o;qyd#(J^uKp8RBU6k>iw6kmkrUkLc22M{yEx{n^lZ&z_88>SLnbox+Sx zo-3)OoCX~&OaOjhVYsV?Ip_UeF@WmyC?*<8JdgnyN`IZaKeZzbj_LnQ1aAnwiBw!-#6x#)u9-1TF46y@B zv!(wuRAl4a{s4j4FqGc~py2cWG<5N{olq1UbMEaSU*+Q6yeoC6A*CQAGq#X1E%d07 zMXiw>r9tL2+a7S(RV|t}>0SH7Ve78DJk8d$!!CbsLVZ;JOnvVfx@cNFb2=%bH_W;u zM2ag!f(yxR)-u++HZZbA&X_jN=%s&@7jm5|L`+6WhAZR(7x8lWcfHK%;mqE^N6Wj; zQipdKF|&cP%`GXt4u{xa6|R^`G%O;NyAnDX8#+V44IgdXJCYkYiI%ipevlPNcr%jA z9~o*D1tJQT$I#&>dNQX+Y6D>|gL&Tg5FRt1^*{n{MBx!&h(*8$2ToE7qUGPe5pc*N zpF>fu5G5I&dguj+{TCx%G*qZ1)S7^s%ClovCBX3gx~tv%3{Qo1C=gN9ngXaE%K zt}NO+Ghuvt$uxv-4uXJKF6}t%-jn(&AcMb}LY z?YQ1GaqC7iwMKjO%y{cOK2E0C7fi6zO#Ix@B&qW6-Mwr48NGUsmMcZL>clB*=|yN% z#O4RYI_$nafhhz&5U$?X9r19XUm17sE*Sy3>q1##nJS_}3u82_!t)h;@s=FXGJQ$jOloQ1ncE+;$|=m_yFx=Dz%?Ek_$`ZdtV-Ow_|7G{mF30{o%RaPzkzMMHd_NdWb4VXqij^s{8hNj18SRe3|xJQI)0<00-R@xF+0IMWsVn5X? zDsb_g=B~TO6(UEq1?UJcTGdC5MM7A7ga+U%_(+ZV@cauq;_Z1w(@VW;H!03kE2y}S zFQdCGE|7K2Qq7<#@>Z_j>68D=clK8@flQ4=k_AqNvMB9FY!FxHItlG|piL>j@VR}2blMgr#0oK4>6wAkwxjKa4=00vp;U=KzpA9ZvPHHf}|){zsI#e|NrKH`)h8; zla3lZIqo2~jxQIL9Q(qKnmBy>-Vy+m;^%+^Ut>M{AH2Lagm-(UUK)MA-W_b4s%5)P z5`7l1efHX67JK`0{YsR})XV*o;^*?pSH~{I;2&=#S9D&fv3b4uxb$H!N1Ll5ziIR; zm_c1j z5al(3#Eaz;XO}WlFL;k#JgIM1?mFSG+ar`+_~39l-N{LC&gwOFj;PtiiZgtaZEVC* zcRRlOz=C_7kawEA-+QeLOpVuUBCYHC4-?L!{JAgr9gg({cf;2jIa%ad=2c&Kg33w^ zLmWh%yeWGJ_bF<;HE6Vr8WgpV*?|u1wJ_ZaOq zaf>q`M9JN!qU_3~W%TgXwdUpY0E0aNi_GPV3kl;-lpy$RPCNOq!O>!MPg8)cogZuk zMAv&%&^uTygqqc8$W7E1?JtpepusSqN@!S;eY3L)RCfB=$fE|g-+~nH_;xcFY(qc} zc9~zw?{Dp<&uv)+R%9q4(>~I3)qVDUCE1@S`m!IeoZ{Gb%{!oCKFkzRjt^C)dmkyG z$1{_*Q&nT^fybH_$QQ0}r048k#oy%{V`fF>J#WWFy_hJyyI)(DW;-1pH12RZ%AIB1=u9mE44 zV>;$#;T-Kzz}S<&R<1d@nAJCp%~Y!6eP^=+Nk0K%lF{m>Y$KUEGfav$K`T!)cJrBI zSIpf#E5@%uV>=2911~za0m#A5XF95R&pHqU&Z(cG4tU2}L+#y*?vIim+Ka9CaQO-j z9)2u%V2@Q;n#rO@(ksz5?@?z5#BS{RzFys8PNS|p&+KrtRaux1gUrr$lGuZk3VTr* zbc&>elG$*NVoJ$es663-!7G(-yWAyi+-UlZAq}RvsEWcGxOQwT!UH{l&SQac1T56& z-d04-pRtFN_ZLKz(dl!H9`37^Qx4zzxd15w2KVf#5OIDxoElM*^+-H}`EhgLOg}Qj znJkl^<>nK@AM)h@ypV(6C?CJJaF(2&au>_WPuX*eC0_t86VpYDB0XhU%*ad~<3OSu34!@Zp7i~Q+W`2UB1#yd;hfKSdDk|IgIJ8?r zn;$ql7*7Kb=ZaFP0EBy(x>W`etDvyhD*#8x1}I8XAIOFI*E19`vsKPo0r#@%5@!wvJLZyC_S%DRLk( z)XE1b2P2lAJc1)kF6b$Kd#g%6$+Dil8S@?5HfeN7h{fSG0Al8AQ|NcT%P+Rees5MF zj%zL-n(4}QF0Bu*leDweSEqtGNUtpy>J>$p9dOc?e3;;e^O>D^bDy9>$>u?sLoaUF zHo2PAX-N!?+82Bf?u3>2cf~0ua^UloQ2~EPeu^nx0MMBB@+FzyfAcUsu)Rbs zv1%)t&zIw@9x{a>w+U zsr7SHL9ebwe?79vu1r4`!UQ59DHMT)5G!O;kPD6lq4N~q`E8SnQEu`=KE)| z1=#&BEG2NUYP+5tSKo8HPwGLaQVlBo+(mPUvtn!b!#5#b=ipS0um3Cowhttko zEf2U#v!%-W>`AUj?Ilo_G|AI2}<=Z2Xu#);;e z?!rU*b>9xVvLI{IS`FzSj!YEB{NXA>hLVTRCA0EQg7U}iD%5;us_nD%&6Tv%VqMxQ zLRWrcgJ$HjviHGK0*G> ztAdh^>^wmae7_m=YDgcX#g1fIw95LW) zRfpG>0I7viq!t88DioKkUQleyItQuv6PxVpo#fgzCIAhM)DD8MBs|dhYu%hBq5A7~ zFIo>yqk^vwFAAq56V#gR5{K786Bn3s2Bn7$lKAZs%=Gp^DJv^c>DpGu^(xi{9Y$?{_u zY-aX7Pz7$Z@IJ0nfb}qdq=RiIW7P#y7*Ws{BDs^>0-4cE|7*%dnh|yf*uQ*yqy8*? z{ps5?GI~K#1dLNXU%%<{mCu=g$>L<&z8ULt6|ApFeRd3f`KG?`@706SFJFJ3@a_MH zY11u(zH-9sV2s_g&SYZo-r|SumdCrd$khD*s~+CIdls{cnZ?_Gulc9EI`{AL!u4UE zpX;{U)!yy<_xlColxse~a0ebCSl*+TUy*JUuT~Vh`GM%SZ~xpMo}Kie*IQTqo$Q`@ zUmiVsSUtn<<1}D^dVn+lmjt+O`*J>I|G9U~j@Dc9?q+@vpPAifU9nhf_V?LIP7Y`H u6xUrUyL%$#BO++J_nr*%;4$j=d#Wzp$PzH)d6z= literal 0 HcmV?d00001 diff --git a/docs/conclusion.html b/docs/conclusion.html index b14543f..a04be72 100644 --- a/docs/conclusion.html +++ b/docs/conclusion.html @@ -156,7 +156,11 @@
  • 4.2 Prior Predictive Checks
  • -
  • 5 Predictive Inference
  • +
  • 5 Predictive Inference
  • 6 Psychometric Results
  • -
  • 5 Predictive Inference
  • +
  • 5 Predictive Inference
  • 6 Psychometric Results
    • 6.1 Affect of Adaptation across Age Groups
      • 6.1.1 On Perceptual Synchrony
      • @@ -245,30 +249,30 @@

        7.2 Data Cleaning and Reproducibi

        The next thing I did was to create a table of metadata - information extracted from the directory structure and file names combined with the the subject data and the file path. Regular expressions can be used to extract patterns from a string. With a list of all Matlab files within the RecalibrationData folder, I tried to extract the task, age group, initials, and block using the expression

        "^(\\w+)/(\\w+)/(\\w+)/[A-Z]{2,3}_*[A-Z]*(adapt[0-9]|baseline[0-9]*).*"

        Breaking it apart, the ^(\\w+)/ matches any word characters at the start and before the next slash. Since the directory structure is Task/AgeGroup/Subject/file.mat, the regular expression should match three words between slashes. The file name generally follows the pattern of Initials__block#__MAT.mat, so [A-Z]{2,3}_*[A-Z]* should match the initials, and (adapt[0-9]|baseline[0-9]*) should match the block (baseline or adapt). This method works for \(536\) of the \(580\) individual records. For the ones it failed, it was generally do to misspellings or irregular capitalizing of “baseline” and “adapt”.

        -
        table(feat_typ[,4])
        -#> 
        -#>   AC   AG   BB   BC   BT   CB   CC   CE   CJ   CM   DB   DC   DD   DE  DTF   DW 
        -#>   13   12   13   13   13   13   10   12   13    4   13   13    7   12   12   13 
        -#>   EM   ET   GB   GT   HG   IV   JM JM_F   JS   JW   KC   KK   LP   MC   MS   MW 
        -#>   13   13   13   13   13    4   12   13   13   13   13   11    7   13   13   26 
        -#>   NP   PB   SB   SJ  SJF   TS   TW   VM   WL   WW   YG 
        -#>   12   13   12   26   13   13   13   13   13   12    7
        -
        table(feat_atyp[,4])
        -#> 
        -#>  AG  CC  CE  CM  DD DTF  IV  JM  JS  KK  NP  SB  WW  YG 
        -#>   1   3   1   9   6   1   9   1   2   2   1   1   1   6
        +
        table(feat_typ[,4])
        +#> 
        +#>   AC   AG   BB   BC   BT   CB   CC   CE   CJ   CM   DB   DC   DD   DE  DTF   DW 
        +#>   13   12   13   13   13   13   10   12   13    4   13   13    7   12   12   13 
        +#>   EM   ET   GB   GT   HG   IV   JM JM_F   JS   JW   KC   KK   LP   MC   MS   MW 
        +#>   13   13   13   13   13    4   12   13   13   13   13   11    7   13   13   26 
        +#>   NP   PB   SB   SJ  SJF   TS   TW   VM   WL   WW   YG 
        +#>   12   13   12   26   13   13   13   13   13   12    7
        +
        table(feat_atyp[,4])
        +#> 
        +#>  AG  CC  CE  CM  DD DTF  IV  JM  JS  KK  NP  SB  WW  YG 
        +#>   1   3   1   9   6   1   9   1   2   2   1   1   1   6

        Since there is only a handful of irregular block names, they can be dealt with a separate regular expression that properly extracts the block information. Other challenges in cleaning the data include the handling of subjects with the same initials. This becomes a problem because filtering by a subject’s initials is not guaranteed to return a unique subject. Furthermore there are two middle age subjects with the same initials of “JM”, so one was also identified with their sex “JM_F”. The solution is to create a unique identifier (labeled as SID) that is a combination of age group, sex, and initials. For an experiment identifier (labeled as RID), the task and block were prepended to the SID. Each of these IDs uniquely identify the subjects and their experimental records making it easier to filter and search.

        -
        glimpse(features)
        -#> Rows: 580
        -#> Columns: 8
        -#> $ rid       <fct> av-post1-M-f-CC, av-post1-M-f-DB, av-post1-M-f-HG, av-post1…
        -#> $ sid       <fct> M-f-CC, M-f-DB, M-f-HG, M-f-JM, M-f-MS, M-f-SJF, M-f-TS, M-…
        -#> $ path      <chr> "Audiovisual/MiddleAge/CC/CCadapt1__MAT.mat", "Audiovisual/…
        -#> $ task      <chr> "audiovisual", "audiovisual", "audiovisual", "audiovisual",…
        -#> $ trial     <fct> post1, post1, post1, post1, post1, post1, post1, post1, pos…
        -#> $ age_group <fct> middle_age, middle_age, middle_age, middle_age, middle_age,…
        -#> $ age       <dbl> 39, 44, 41, 48, 49, 43, 47, 49, 49, 44, 43, 44, 48, 48, 50,…
        -#> $ sex       <fct> F, F, F, F, F, F, F, F, F, M, M, M, M, M, M, F, F, F, F, F,…
        +
        glimpse(features)
        +#> Rows: 580
        +#> Columns: 8
        +#> $ rid       <fct> av-post1-M-f-CC, av-post1-M-f-DB, av-post1-M-f-HG, av-post1…
        +#> $ sid       <fct> M-f-CC, M-f-DB, M-f-HG, M-f-JM, M-f-MS, M-f-SJF, M-f-TS, M-…
        +#> $ path      <chr> "Audiovisual/MiddleAge/CC/CCadapt1__MAT.mat", "Audiovisual/…
        +#> $ task      <chr> "audiovisual", "audiovisual", "audiovisual", "audiovisual",…
        +#> $ trial     <fct> post1, post1, post1, post1, post1, post1, post1, post1, pos…
        +#> $ age_group <fct> middle_age, middle_age, middle_age, middle_age, middle_age,…
        +#> $ age       <dbl> 39, 44, 41, 48, 49, 43, 47, 49, 49, 44, 43, 44, 48, 48, 50,…
        +#> $ sex       <fct> F, F, F, F, F, F, F, F, F, M, M, M, M, M, M, F, F, F, F, F,…

        Then with the table of clean metadata, the task is simply to loop through each row, read the Matlab file given by path, add the unique ID as a column, and then join the experimental data with the metadata to create a data set that is ready for model fitting and data exploration. The full code used to generate the clean data is not yet available online, but can be shared with the committee.

        The benefit of writing a script to generate the data is that others can look over my code and verify that it is doing what I intended for it to do, and I can go back to any step within the process to make changes if the need comes up. Another tool that contributed to the reproducibility is the version control management software, Git. With Git I can take a snapshot of the changes I make, and revert if necessary. This thesis is also hosted on Github, and the entire history of development can be viewed there.

        @@ -277,17 +281,17 @@

        7.3 Developing a model

        Chapter 3 details the deeper considerations that went into building a model, but doesn’t tell the full story of struggles and setbacks I faced. I find that I learn more from others when they share what didn’t work along with the final path that did work. There is knowledge to be gained in failed experiments, because then there is one more way to not do something, just like a failing outcome reduces the variance of the Beta distribution.

        I knew that I wanted to apply Bayesian modeling techniques to the data, because it was something knew that I was learning. I tried using a classical GLM to first get a baseline understanding of the data, but the fact that some estimates for certain subjects failed due to complete separation reinforced my enthusiasm to employ non-classical techniques. My first Bayesian model was derived from Lee and Wagenmakers (2014) which used nested loops to iterate over subjects and SOA values. I felt that the data was stored in a complicated way and made it difficult to comprehend and extend.

        Next I moved on to using arm::bayesglm to remove convergence issues, but was met with other limitations such as linear parameterization and lack of hierarchical modeling. The book Statistical Rethinking (McElreath 2020) was my first introduction to Bayesian multilevel modeling. His rethinking package accompanies the book, and offers a compact yet expressive syntax for models that get translated into a Stan model. A model with age group and block can be written using rethinking::ulam as

        -
        rethinking::ulam(alist(
        -  k ~ binomial_logit(n, p),
        -  p = exp(b + bG[G] + bT[trt]) * (x - (a + aG[G] + aT[trt])),
        -  a ~ normal(0, 0.06),
        -  aG[G] ~ normal(0, sd_aG),
        -  aT[trt] ~ normal(0, sd_aT),
        -  b ~ normal(3, 1),
        -  bG[G] ~ normal(0, sd_bG),
        -  bT[trt] ~ normal(0, sd_bT),
        -  c(sd_aG, sd_aT, sd_bG, sd_bT) ~ half_cauchy(0, 5)
        -), data = df, chains = 4, cores = 4, log_lik = TRUE)
        +
        rethinking::ulam(alist(
        +  k ~ binomial_logit(n, p),
        +  p = exp(b + bG[G] + bT[trt]) * (x - (a + aG[G] + aT[trt])),
        +  a ~ normal(0, 0.06),
        +  aG[G] ~ normal(0, sd_aG),
        +  aT[trt] ~ normal(0, sd_aT),
        +  b ~ normal(3, 1),
        +  bG[G] ~ normal(0, sd_bG),
        +  bT[trt] ~ normal(0, sd_bT),
        +  c(sd_aG, sd_aT, sd_bG, sd_bT) ~ half_cauchy(0, 5)
        +), data = df, chains = 4, cores = 4, log_lik = TRUE)

        During my time learning about multilevel models, I tried writing my own package that generates a Stan program based on R formula syntax. At the time I didn’t fully understand the concepts of no-pooling, complete pooling, and partial pooling, and my package was plagued by the same lack of flexibility that rstanarm and brms have. In fact I learned that brms and rstanarm already did what I was trying to do after I had already started making my library, but it was a fun learning and programming experience. The fossilized remains of my attempt can be viewed on github.

        I also tried using lme4, rstanarm, and brms, and learned more about the concepts of fixed and random effects. It was around this time that I noticed that parameterization can have a significant affect on the efficiency of a model and the inferential power of the estimated parameters. When fitting a classical model, there is little difference in estimating a + bx vs. d(x - c) since the latter can just be expanded as -cd + dx which is essentially the same as the first parameterization, but there is a practical difference in the interpretation of the parameters. The second parameterization implies that there is a dependence among the parameters that can be factored out. In the context of psychometric functions, there is a stronger connection between PSS and c and the JND and d. This parameterization made it easier to specify priors and also increased the model efficiency. Since only rethinking and Stan allow for arbitrary parameterization, I left the others behind.

        I finally arrived at a model that worked well, but learned that using a binary indicator variable for the treatment comes with the assumption of higher uncertainty for one of the conditions. The linear model that I arrived at is displayed in equation (7.1).

        diff --git a/docs/index.html b/docs/index.html index ad52d0b..0e4ead5 100644 --- a/docs/index.html +++ b/docs/index.html @@ -156,7 +156,11 @@
    • 4.2 Prior Predictive Checks
  • -
  • 5 Predictive Inference
  • +
  • 5 Predictive Inference
  • 6 Psychometric Results
  • -
  • 5 Predictive Inference
  • +
  • 5 Predictive Inference
  • 6 Psychometric Results
    • 6.1 Affect of Adaptation across Age Groups
      • 6.1.1 On Perceptual Synchrony
      • @@ -228,7 +232,7 @@

        4.1.1.1 Trace Plots

        Figure 4.2: An example of healthy chains.

        -

        There is a similar diagnostic plot called the rank histogram plot (or trank plot for trace rank plot). Vehtari et al. (2020) details the motivation for trank plots, but in short if the chains are all exploring the posterior efficiently, then the histograms will be similar and uniform. Figure 4.3 is from the same model as above but for the rank histogram.

        +

        There is a similar diagnostic plot called the rank histogram plot (or trank plot for trace rank plot). Vehtari, Gelman, et al. (2020) details the motivation for trank plots, but in short if the chains are all exploring the posterior efficiently, then the histograms will be similar and uniform. Figure 4.3 is from the same model as above but for the rank histogram.

        A trank plot of healthy chains.

        diff --git a/docs/motivating-data.html b/docs/motivating-data.html index 7c49f4b..8c04d9f 100644 --- a/docs/motivating-data.html +++ b/docs/motivating-data.html @@ -156,7 +156,11 @@

    • 4.2 Prior Predictive Checks
  • -
  • 5 Predictive Inference
  • +
  • 5 Predictive Inference
  • 6 Psychometric Results
  • -
  • 5 Predictive Inference
  • +
  • 5 Predictive Inference
  • 6 Psychometric Results
    • 6.1 Affect of Adaptation across Age Groups
      • 6.1.1 On Perceptual Synchrony
      • @@ -192,68 +196,104 @@

        5 Predictive Inference

        -

        All models are wrong but some are useful

        -

        The above quote is from George Box, and it is a popular quote that statisticians like to throw around9. All models are wrong because it is nearly impossible to account for the minutiae of every process that contributes to an observed phenomenon, and often trying to results in poorer performing models.

        -

        why is predictive performance the right model selection/comparison criteria

        -
          -
        • idea of “geocentric” models: wrong models that still predict well
          -
        • -
        • notions overfitting/underfitting:
        • -
        • more parameters leads to better in-sample fit
          -
        • -
        • a prefect fit to data is always possible
          -
        • -
        • but predicts poorly (overfit)
          -
        • -
        • underfitting fails to capture the regular features of the data (why regularizing priors are important)
        • -
        -

        I think you covered this already in Ch. 1 and 2 but here is more thoughts: -The PI’s predictive philosophy has evolved to prefer this reference model approach. -Early on statisticians are usually taught to prefer parsimony or simple models. -The idea is that this guards against overfitting and also boosts power to detect statistically significant effects.

        -

        Also computation limitations made small models preferable. -But in modern statistical learning, we tend to include all relevant data with elaborate probabilitistc structures.

        -

        The idea is to include all the data with the aim of squeezing all predictive ability from the data points.

        -
          -
        • not sure where this goes, but make sure you say that 1 model is not sufficient, we need a collection (or series/sequence) of models. that is why we need to fit models fast in stan/HMC
        • -
        -

        transitional sentence: given that we want to compare models (and possibly select), how to quantifying

        -

        Quantifying predictive performance

        -
          -
        • log posterior predictive (more below) and information theory (if you want to talk about that at all)
        • -
        • cross-validation, loo, WAIC
        • -
        • and estimates of loo. loo psis
        • -
        • Vehtari, Gelman, and Gabry (2017)
        • -
        -

        some notes from my grant posterior. rewrite this for your glm based model. -Given a model \(M\) with posterior predictive distribution \(p( \tilde{T} | \tilde{x}, D\) for a new survival time \(\tilde{T}\) with observed data \(D\) with feature vector \(\tilde{x}\). -We evaluate predictive performance using the logarithm of the predictive density (LPD) evaluated pointwise at the actual observation \(( \tilde{t}, \tilde{x}, M)\) (???; ???). -LPD is a proper scoring rule and measures both the calibration and sharpness of the predictive distribution (???). -With omit technical definitions of these concepts, but loosely calibration means the statistical consistency between the predictive distribution and the observations (errors on the order). -Sharpness, on the other hand, refers to how concentrated the predictive posterior (how precisely forecasted). -Typically we don’t have the analytic form of the predictive posterior, so instead we use \(J\) MCMC draws to approximate the LPD (???):

        -

        \[\begin{equation} - LPD(M) \approx \frac{1}{J} \Sigma_{j=1}^{J} log p( \tilde{t} | \tilde{x}, D, \theta^{(j)} ), -\end{equation}\]

        -

        where \(\theta^{(j)}\) is the posterior parameter vector from the \(j\)th posterior sample.

        -

        Further we’ll like a metric of general predictive performance and so compute the average over \(n\) data points:

        - -

        Further, we’d like to compare the MLPD value of a model \(M\) and another model \(M^*\) (possibly a reference model or competing model):

        - -

        A negative difference in \(\Delta MLPD\) for Model \(M\) compared to a reference Model (\(M^*\)) means worse performance for the model while a positive difference indicates better prediction. -We assess the uncertainty in the difference using Bayesian bootstrap (???) samples of \(\Delta MLPD\) between model \(M\) and \(M^*\):

        +\]

        +

        To estimate LOOCV, the relative “importance” of each observation must be computed. Certain observations have more influence on the posterior distribution, and so have more impact on the posterior if they are removed. The intuition behind measuring importance is that more influential observations are relatively less likely than less important observations that are relatively expected. Then by omitting a sample, the relative importance weight can be measured by the lppd. This omitted calculation is known as the out-of-sample lppd. For each omitted \(y_i\),

        +

        \[ +\mathrm{lppd}_{CV} = \sum_i \frac{1}{S} \sum_s \log p(y_{i} | \Theta_{-i,s}) +\]

        +

        There is a package called loo that can compute the expected log-pointwise-posterior-density (ELPD) using PSIS-LOO, as well as the estimated number of effective parameters and LOO information criterion (Vehtari, Gabry, et al. 2020). For the part of the researcher, the log-likelihood of the observations must be computed in the model. For my models, I added this in the generated quantities block of my Stan program. It is standard practice to name the log-likelihood as log_lik in the model.

        +
        generated quantities {
        +  vector[N] log_lik;
        +
        +  for (i in 1:N) {
        +    real alpha = b + bGT[G[i], trt[i]];
        +    real beta = a + aGT[G[i], trt[i]];
        +    real lambda = lG[G[i]];
        +    real p = lambda + (1 - 2*lambda) * inv_logit(exp(beta) * (x[i] - alpha));
        +    log_lik[i] = binomial_lpmf(k[i] | n[i], p);
        +  }
        +}
        +

        Models can be compared simply using loo::loo_compare. It estimated the ELPD and its standard error, then calculates the relative differences between all the models. The model with the highest ELPD is predicted to have the best out-of-sample predictions. The comparison of the first three iterations of the model from chapter 3 for the audiovisual data are shown below.

        +
        comp_av <- loo_compare(l031_av, l032_av, l032nc_av, l033_av)
        +print(comp_av, simplify = FALSE)
        +#>        elpd_diff se_diff elpd_loo se_elpd_loo p_loo   se_p_loo looic   se_looic
        +#> model4     0.0       0.0 -1615.7     42.8        16.2     0.9   3231.4    85.6 
        +#> model2    -1.0       3.8 -1616.7     42.6        11.3     0.6   3233.3    85.2 
        +#> model3    -1.3       3.8 -1617.0     42.7        11.8     0.6   3234.0    85.3 
        +#> model1   -32.8      10.4 -1648.5     43.0         3.0     0.2   3296.9    86.1
        +

        The centered and non-centered parameterizations (models 2 and 3 respectively) have essentially the same ELPD. This is expected since they are essentially the same model. The reparameterization only helps with model fitting efficiency, though that can mean more reliable posteriors. The model with age-block interactions (model 4) has the highest ELPD, but is not decisively the best as determined by the standard error of the ELPD. The only thing that can be determined is that including age and block improves performance significantly over the base model (model 1).

        +

        But how about for the visual data? The fourth iteration of the model introduced a lapse rate. Did the change significantly improve the ELPD?

        +
        comp_vis <- loo_compare(l033_vis, l034_vis)
        +print(comp_vis, simplify = FALSE)
        +#>        elpd_diff se_diff elpd_loo se_elpd_loo p_loo   se_p_loo looic   se_looic
        +#> model2     0.0       0.0 -1001.1     44.0        19.2     1.9   2002.2    88.0 
        +#> model1  -259.4      31.9 -1260.5     56.1        23.1     2.3   2520.9   112.2
        +

        Absolutely! Something else interesting also happened with the introduction of the lapse rate - the effective number of parameters decreased (p_loo).

        +

        Earlier I argued that model selection is out, model comparison is in. At the end of chapter 3 I finished with a model that has age-block interactions and a lapse rate for each age group. There was one more model that I could have specified - one that estimates at the subject level. There is no domain-specific reason to include the subject level information, especially since the goal is to make inferences at the age group level, but there may still be statistical reason to add in the subjects. For one, adding in the subject as another level in a multilevel model can induce regularization among the subjects, which can overall make for better predictions on new data.

        +

        I’ve gone ahead and fit the model with subject-level information, and the comparison between this new model and the one from iteration 4 is shown below.

        +
        comp_vis2 <- loo_compare(l034_vis, l034s_vis)
        +print(comp_vis2, simplify = FALSE)
        +#>        elpd_diff se_diff elpd_loo se_elpd_loo p_loo   se_p_loo looic   se_looic
        +#> model2     0.0       0.0  -925.1     38.1        75.6     5.4   1850.3    76.2 
        +#> model1   -76.0      19.1 -1001.1     44.0        19.2     1.9   2002.2    88.0
        +

        Including the subject-level information significantly improves the ELPD, and even though there are over 100 parameters in the model (slope and intercept for each of the 45 subjects), the effective number of parameters is much less. Since this new model is capable of making inferences at both the age group level and the subject level, I use it for the result section (chapter 6).

        +

        One concern comes up when it comes to LOOCV and multilevel models. What does it mean to leave one out? Should one subject be left out? One age group? Just one observation? With more levels in a model, more careful considerations must be taken when it comes to estimating prediction performance.

        +
        +

        References

        +
        +

        Vehtari, Aki, Jonah Gabry, Mans Magnusson, Yuling Yao, Paul-Christian Bürkner, Topi Paananen, and Andrew Gelman. 2020. Loo: Efficient Leave-One-Out Cross-Validation and Waic for Bayesian Models. https://CRAN.R-project.org/package=loo.

        +

        Vehtari, Aki, Andrew Gelman, and Jonah Gabry. 2017. “Practical Bayesian Model Evaluation Using Leave-One-Out Cross-Validation and Waic.” Statistics and Computing 27 (5): 1413–32.

        diff --git a/docs/reference-keys.txt b/docs/reference-keys.txt index 7c593cc..41be77c 100644 --- a/docs/reference-keys.txt +++ b/docs/reference-keys.txt @@ -48,6 +48,11 @@ fig:ch040-Dog-Reborn tab:unnamed-chunk-1 fig:ch040-Timely-Nitrogen fig:ch040-Hot-Locomotive +fig:ch050-Moving-Moose +fig:ch050-Olive-Screwdriver +fig:ch050-Cold-Fish +fig:ch050-Strawberry-Swallow +eq:lppd fig:ch060-Eastern-Cat fig:ch060-Beta-Lonesome fig:ch060-Omega-Permanent @@ -87,6 +92,8 @@ effective-sample-size divergent-transitions prior-predictive-checks predictive-inferences +model-comparison-via-predictive-performance +loocv-and-importance-sampling results affect-of-adaptation-across-age-groups on-perceptual-synchrony diff --git a/docs/references.html b/docs/references.html index 535b678..aeea736 100644 --- a/docs/references.html +++ b/docs/references.html @@ -156,7 +156,11 @@

    • 4.2 Prior Predictive Checks
  • -
  • 5 Predictive Inference
  • +
  • 5 Predictive Inference
  • 6 Psychometric Results
    • 6.1 Affect of Adaptation across Age Groups
      • 6.1.1 On Perceptual Synchrony
      • @@ -295,6 +299,9 @@

        References

        Vatakis, Argiro, Linda Bayliss, Massimiliano Zampini, and Charles Spence. 2007. “The Influence of Synchronous Audiovisual Distractors on Audiovisual Temporal Order Judgments.” Perception & Psychophysics 69 (2): 298–309.

        +

        Vehtari, Aki, Jonah Gabry, Mans Magnusson, Yuling Yao, Paul-Christian Bürkner, Topi Paananen, and Andrew Gelman. 2020. Loo: Efficient Leave-One-Out Cross-Validation and Waic for Bayesian Models. https://CRAN.R-project.org/package=loo.

        +
        +

        Vehtari, Aki, Andrew Gelman, and Jonah Gabry. 2017. “Practical Bayesian Model Evaluation Using Leave-One-Out Cross-Validation and Waic.” Statistics and Computing 27 (5): 1413–32.

        diff --git a/docs/results.html b/docs/results.html index 71d9c32..5092105 100644 --- a/docs/results.html +++ b/docs/results.html @@ -156,7 +156,11 @@
    • 4.2 Prior Predictive Checks
  • -
  • 5 Predictive Inference
  • +
  • 5 Predictive Inference
  • 6 Psychometric Results
    • 6.1 Affect of Adaptation across Age Groups
      • 6.1.1 On Perceptual Synchrony
      • diff --git a/docs/search_index.json b/docs/search_index.json index e08a47b..6583ff3 100644 --- a/docs/search_index.json +++ b/docs/search_index.json @@ -1 +1 @@ -[["index.html", "Application of a Principaled Bayesian Workflow to Multilevel Modeling 1 Introduction 1.1 Everything can be Blamed on Fisher 1.2 Proposal of New Methods 1.3 Organization", " Application of a Principaled Bayesian Workflow to Multilevel Modeling Alexander D. Knudson December, 2020 1 Introduction With the advances in computational power and high-level programming languages like Python, R, and Julia, statistical methods have evolved to be more flexible and expressive. No longer must we be subjugated by p-values and step-wise regression techniques. Gone are the days of using clever modeling techniques to tame misbehaved data. Now is the time for principled and informed decisions to create bespoke models and domain-motivated analyses. We have the shoulders of giants to stand upon and look out at the vast sea of data science. I want to talk about how the advances in computational power have lead to a sort of mini revolution - resurrection - in statistics where Bayesian modeling has gained an incredible following thanks to projects like Stan. The steady adoption of computer aided statistical workflows also brings the need for multidisciplinary techniques from numerical analysis, probability theory, statistics, computer science, visualizations, and more. And with the age of computers, there is a strong push towards reproducibility. Concepts of modular design, workflows, project history and versioning, virtual environments, and human readable code all contribute to reproducible analyses. And somehow I also want to tie in how data is immutable - raw data should (must) be treated as a constant and unchangeable entity, and merely touching it will cause data mitosis. I will now segue into introducing the intent of this paper. I believe that utilizing the computational ability of modern computers helps strengthen the validity of an analysis. This is achieved by using powerful but expressive tools like Stan to write models that visually match written mathematical models. Classical statistical tools, while fast, require clever mathematics to perform certain routines such as fitting mixed effects models or the interpretation of cryptic p-values to determine if a model is “good”. Instead I believe we should be moving towards probabilistic programming languages like Stan to carry out Statistical analyses. This paper is motivated by an experiment in psychometrics (chapter 2), and by highlighting a principled workflow I seek to convince the reader that Bayesian multilevel modeling should be the default tool for modeling psychometric experiments. In the next section of this introduction, I will list classical tools for statistical modeling [of psychometric experiments] and touch on the limitations of such tools. Following that section, I will introduce the methods I use for building a model that deviate from classical methods. 1.1 Everything can be Blamed on Fisher … or Pearson, or Gauss, or … When I hear the term “regression”, I instantly think about maximum likelihood estimation (MLE) of parameters. And why not? There is an endless wealth of literature on the subject of linear regression and MLE (Johnson, Wichern, and others 2002; Larsen and Marx 2005; Sheather 2009; Navidi 2015). Most introductory courses on statistics and regression center around classical techniques such as MLE, hypothesis testing, and residual analysis. For the common student, learning statistical modeling in classical way can feel sterilized and mechanic. Check that the data are normal. Check that the coefficients are significantly different from zero. Check that the residuals are normal. Etc. I’m not trying to say that these methods are not important or that they are deeply flawed - it would be bad for modern society if we were just now finding out that the models are wrong. Instead, I am arguing that because they are so common and easy to apply that they are used without much extra thought. Take variable selection as an example. In a data set where there are a dozen predictors, how does one go about selecting which parameters produce the best model? Without thought, one may reach for a step-wise selection algorithm, and confidently conclude that variables \\(x\\), \\(y\\), and \\(z\\) are significant because the p-values say so. This method does fall apart quickly because as the number of parameters grow, so too does the number of steps needed to find the best subset of variables1, and there is no guarantee that the algorithm actually selects the best2 subset. But even if the best subset of variables is found, one still needs to consider if the variables have a practical effect or if the model omitted an important variable of interest. Sure, the type of analysis is important to the techniques used. Variable selection through step-wise algorithms or penalized maximum likelihood estimation (Hoerl and Kennard 1970; Tibshirani 1996) may be appropriate in an exploratory data analysis, but improper for causal inference and other scientifically motivated experiments. Which brings me to talk next about p-values, confidence intervals, and hypothesis testing. The concept of basing scientific results on the falsifiability (Popper 1959) or refutability of a claim is a strong foundation for the scientific method, and is arguably much better than the previous grounds of verifiability – just because something has been true for a very long time, doesn’t mean it will always be true in the future. But hypothesis testing comes with its own set of problems. Null hypothesis testing for point estimates usually depends on calculating a confidence interval and seeing if the interval contains the point of interest. This can be misleading, as there is more than one confidence interval that can be calculated. For Gaussian distributions, the mean equals the median equals the mode, so a 95% confidence interval is evenly distributed around the central measures. Some distributions are skewed, so an equal tail area confidence interval might not necessarily include the most likely value. Take for example the exponential distribution \\[ X \\sim \\mathrm{exponential} (\\lambda) \\] An equal tail area 95% confidence interval would be \\(\\left(-\\ln(0.975)/\\lambda, -\\ln(0.025)/\\lambda\\right)\\) which would not even contain the most likely value of zero. Should the highest density interval be used? Should skewness be reported with p-values and confidence intervals? Furthermore, confidence intervals are conditional on the model chosen, and that introduces other problems. McElreath (2020) discusses a well-known issue in population biology about comparing a neutral model of the distribution of allele frequencies to a selective model. In short, the two differing hypotheses may suggest different process models which in turn lead to statistical models - some of which are shared by both hypotheses. Rejecting the statistical model doesn’t rule out either of the hypotheses. Should we scrap these principles and tools all together? Absolutely not. Most of these wrinkled problems (and others) have been talked about and ironed out through careful discussion and clever techniques, but the damage is done, and hypothesis testing and p-values are widely misunderstood and misused. The problem is that these techniques rest on having a strong foundation of statistical knowledge, both to produce and to properly understand. This requirement is stifling. Communicating statistical results is just as important as producing them, and with modern tools and a vast selection of expressive languages we can analyze data in a more intuitive and natural framework. 1.2 Proposal of New Methods In my biased opinion, the Bayesian framework for modeling is a much more natural way to conduct scientific research where some kind of data analysis is involved. Now of course, I can’t claim as such without some compelling argument or examples. I have already targeted some weak points of classical statistics, and throughout Chapter 3 I will highlight specific examples of where classical techniques are typically applied, and how they may fall short compared to my proposal methods. What I am proposing is a fully Bayesian workflow to build and analyze a statistical model. In this Bayesian workflow (which shall hence be referred to simply as “workflow”) I will highlight a set of principles that utilize domain expertise, and focus around building a multilevel model. My goal is to show that the combination of these two concepts yields better prediction results and greater inferential power. And in lieu of p-values and hypothesis testing, I let predictive inference narrate the statistical results and strength of association within the model. 1.3 Organization I have organized this thesis as follows. In Chapter 2 I introduce the data set that drives the narrative and that motivates the adoption of Bayesian multilevel modeling. In Chapter 3 I describe and work through a principled Bayesian workflow for multilevel modeling. Chapter 4 goes into more depth on checking the model goodness of fit and model diagnostics in a Bayesian setting. In Chapter 5 I demonstrate how to use the Bayesian model from the principled workflow for predictive inference, and use posterior predictive distributions to plot and compare models. Chapters 5 and 6 go over the quantitative results and discuss the qualitative choices in the workflow. Then I conclude this paper in Chapter 7. References "],["motivating-data.html", "2 What is a Model without Data 2.1 Psychometric Experiments 2.2 Temporal Order Judgment Data 2.3 Data Visualizations and Quirks", " 2 What is a Model without Data What is data without a model It was Charles Darwin who in his book On the Origin of Species developed the idea that living organisms adapt in order to better survive in their environment. Sir Francis Galton, inspired by Darwin’s ideas, became interested in the differences in human beings and in how to measure those differences. Though the dark side of statistics and hubris lead Galton to become a pioneer of eugenics, his works on studying and measuring human differences lead to the creation of psychometrics – the science of measuring mental faculties. Around the same time that he was developing his theories, Johann Friedrich Herbart was also interested in studying consciousness through the scientific method, and is responsible for creating mathematical models of the mind. E.H. Weber built upon Herbart’s work, and sought out to prove the idea of a psychological threshold. A psychological threshold is a minimum stimulus intensity necessary to activate a sensory system – a liminal stimulus. He paved the way for experimental psychology and is the namesake of Weber’s Law – the change in a stimulus that will be just noticeable is a constant ratio of the original stimulus (Britannica 2014). \\[ \\frac{\\Delta I}{I} = k \\] To put this law into practice, consider holding a 1 kg weight (\\(I = 1\\)), and further suppose that we can just detect the difference between a 1 kg weight and a 1.2 kg weight (\\(\\Delta I = 0.2\\)). Then the constant just noticeable ratio is \\[ k = \\frac{0.2}{1} = 0.2 \\] So now if we pick up a 10 kg weight, we should be able to determine how much more mass is required to just detect a difference: \\[ \\frac{\\Delta I}{10} = 0.2 \\Rightarrow \\Delta I = 2 \\] The difference between a 10 kg and a 12 kg weight should be just barely perceptible. Notice that the difference in the first set of weights is 0.2 and in the second set it is 2. Our perception of the difference in stimulus intensities is not absolute, but relative. G.T. Fechner devised the law (Weber-Fechner Law) that the strength of a sensation grows as the logarithm of the stimulus intensity. \\[S = K \\ln I\\] An example to this law is to consider two light sources, one that is 100 lumens (\\(S_1 = K \\ln 100\\)) and another that is 200 lumens (\\(S_2 = K \\ln 200\\)). The intensity of the second light is not perceived as twice as bright, but only about 1.15 times as bright according to the Weber-Fechner law. \\[\\theta = S_2 / S_1 \\approx 1.15\\] Notice that the value \\(K\\) cancels out when calculating the relative intensity, but knowing \\(K\\) can lead to important psychological insights; insights about differences between persons or groups of people! What biological and contextual factors affect how people perceive different stimuli? How do we measure their perception in a meaningful way? As one might expect, we can collect data from psychometric experiments, fit a model to the data from a family of functions called psychometric functions, and inspect key operating characteristics of those functions. 2.1 Psychometric Experiments Psychometric experiments are devised in a way to examine psychophysical processes, or the response between the world around us and our inward perceptions. A psychometric function relates an observer’s performance to an independent variable, usually some physical quantity of a stimulus in a psychophysical task (Wichmann and Hill 2001a). Psychometric functions were studied as early as the late 1800’s, and Edwin Boring published a chart of the psychometric function in The American Journal of Psychology in 1917 (Boring 1917). Figure 2.1: A chart of the psychometric function. The experiment in this paper places two points on a subject’s skin separated by some distance, and has them answer their impression of whether there is one point or two, recorded as either ‘two points’ or ‘not two points’. As the separation of aesthesiometer points increases, so too does the subject’s confidence in their perception of ‘two-ness’. So at what separation is the impression of two points liminal? Figure 2.1 displays the key aspects of the psychometric function. The most crucial part is the sigmoid function, the S-like non-decreasing curve which in this case is represented by the Normal CDF, \\(\\Phi(\\gamma)\\). The horizontal axis represents the stimulus stimulus intensity, the separation of two points in centimeters. The vertical axis represents the probability that a subject has the impression of two points. With only experimental data, the response proportion becomes an approximation for the probability. This leads me to talk about the type of psychometric experiment that this paper deals with called a temporal order judgment (TOJ) experiment. The concept is that if there are two distinct stimuli occurring nearly simultaneously then our brains will bind them into a single percept (perceive them as happening simultaneously). Compensation for small temporal differences is beneficial for coherent multisensory experiences, particularly in visual-speech synthesis as it is necessary to maintain an accurate representation of the sources of multisensory events. The temporal asynchrony between stimuli is called the stimulus onset asynchrony (SOA), and the range of SOAs for which sensory signals are integrated into a global percept is called the temporal binding window. When the SOA grows too large then the brain segregates the two signals and the temporal order can be determined. Our experiences in life as we age shape the mechanisms of processing multisensory signals, and some multisensory signals are integrated much more readily than others. Perceptual synchrony has been previously studied through the point of subjective simultaneity (PSS) – the temporal delay between two signals at which an observer is unsure about their temporal order (Stone et al. 2001). The temporal binding window is the time span over which sensory signals arising from different modalities appear integrated into a global percept. A deficit in temporal sensitivity may lead to a widening of the temporal binding window and reduce the ability to segregate unrelated sensory signals. In temporal order judgment tasks, the ability to discriminate the timing of multiple sensory signals is referred to as temporal sensitivity, and is studied through the measurement of the just noticeable difference (JND) – the smallest lapse in time so that a temporal order can just be determined. Figure 2.2 highlights the features through which we study psychometric functions. The PSS is defined as the point where an observer can do no better at determining temporal order than random guessing (i.e. the response probability is 50%). The JND is defined as the extra temporal delay between stimuli so that the temporal order is just able to be determined. Historically this has been defined as the difference between the 84% level3 and the PSS, though the upper level often depends on domain expertise. Figure 2.2: The PSS is defined as the point where an observer can do no better at determining temporal order than random guessing. The just noticeable difference is defined as the extra temporal delay between stimuli so that the temporal order is just able to be determined. Historically this has been defined as the difference between the 0.84 level and the PSS, though the upper level depends on domain expertise. Perceptual synchrony and temporal sensitivity can be modified through a baseline understanding. In order to perceive physical events as simultaneous, our brains must adjust for differences in temporal delays of transmission of both psychical signals and sensory processing (Fujisaki et al. 2004). In some cases such as with audiovisual stimuli, the perception of simultaneity can be modified by repeatedly presenting the audiovisual stimuli at fixed time separations (called an adapter stimulus) to an observer (Vroomen et al. 2004). This repetition of presenting the adapter stimulus is called temporal recalibration. The data set that I introduce in the next section concerns temporal order judgment across various sensory modalities with a temporal recalibration component. 2.2 Temporal Order Judgment Data Which came first, the chicken or the experimentally controlled stimulus The data set that I am using in this paper comes from experiments done by A.N. Scurry and Dr. F. Jiang in the Department of Psychology at the University of Nevada. Reduced temporal sensitivity in the aging population manifests in an impaired ability to perceive synchronous events as simultaneous, and similarly more difficulty in segregating asynchronous sensory signals that belong to different sources. The consequences of a widening of the temporal binding window is considered in Scurry et al. (2019), as well as a complete detailing of the experimental setup and recording process. A shortened summary of the methods is provided below. There are four different tasks in the experiment: audio-visual, visual-visual, visual-motor, and duration, and I will refer to each task respectively as audiovisual, visual, sensorimotor, and duration. The participants consist of 15 young adults (age 20-27), 15 middle age adults (age 39-50), and 15 older adults (age 65-75), all recruited from the University of Nevada, Reno. Additionally all subjects are right handed and were reported to have normal or corrected to normal hearing and vision. Table 2.1: Sample of motivating data. soa response sid task trial age_group age sex -350 0 O-m-BC audiovisual pre older_adult 70 M -200 0 M-m-SJ duration post1 middle_age 48 M 28 1 O-f-KK sensorimotor pre older_adult 66 F 275 1 O-f-MW visual post1 older_adult 69 F In the audiovisual TOJ task, participants were asked to determine the temporal order between an auditory and visual stimulus. Stimulus onset asynchrony values were selected uniformly between -500 to +500 ms with 50 ms steps, where negative SOAs indicated that the visual stimulus was leading, and positive values indicated that the auditory stimulus was leading. Each SOA value was presented 5 times in random order in the initial block. At the end of each trial the subject was asked to report if the auditory stimulus came before the visual, where a \\(1\\) indicates that they perceived the sound first, and a \\(0\\) indicates that they perceived the visual stimulus first. A similar setup is repeated for the visual, sensorimotor, and duration tasks. The visual task presented two visual stimuli on the left and right side of a display with temporal asynchronies that varied between -300 ms to +300 ms with 25 ms steps. Negative SOAs indicated that the left stimulus was first, and positive that the right came first. A positive response indicates that the subject perceived the right stimulus first. The sensorimotor task has subjects focus on a black cross on a screen. When it disappears, they respond by pressing a button. Additionally, when the cross disappears, a visual stimulus was flashed on the screen, and subjects were asked if they perceived the visual stimulus before or after their button press. The latency of the visual stimulus was partially determined by individual subject’s average response time, so SOA values are not fixed between subjects and trials. A positive response indicates that the visual stimulus was perceived after the button press. The duration task presents two vertically stacked circles on a screen with one appearing right after the other. The top stimulus appeared for a fixed amount of time of 300 ms, and the bottom was displayed for anywhere between +100 ms to +500 ms in 50 ms steps corresponding to SOA values between -200 ms to +200 ms. The subject then responds to if they perceived the bottom circle as appearing longer than the top circle. Table 2.2: Summary of TOJ Tasks Task Positive Response Positive SOA Truth Audiovisual Perceived audio first Audio came before visual Visual Perceived right first Right came before left Sensorimotor Perceived visual first Visual came before tactile Duration Perceived bottom as longer Bottom lasted longer than top Finally, after the first block of each task was completed, the participants went through an adaptation period where they were presented with the respective stimuli from each task repeatedly at fixed temporal delays, then they repeated the task. To ensure that the adaptation affect persisted, the subject were presented with the adapter stimulus at regular intervals throughout the second block. The blocks are designated as pre and post1, post2, etc. in the data set. In this paper I will only be focusing on the pre and post1 blocks. 2.3 Data Visualizations and Quirks The dependent variable in these experiments is the perceived response which is encoded as a 0 or a 1, and the independent variable is the SOA value. If the response is plotted against the SOA values, then it is difficult to determine any relationship (see figure 2.3). Transparency can be used to better visualize the relationships between SOA value and responses. The center plot in figure 2.3 uses the same data as the left plot, except that the transparency is set to 0.05. As a result, one can see that there is a higher density of “0” responses towards more negative SOAs, and a higher density of “1” responses for more positive SOAs. Taking it a step further, I can compute and plot the proportion of responses for a given SOA. This is displayed in the right panel. Now the relationship between SOA values and responses is clear – as the SOA value goes from more negative to more positive, the proportion of positive responses increases from near 0 to near 1. Figure 2.3: Left: Simple plot of response vs. soa value. Center: A plot of response vs. soa with transparency. Right: A plot of proportions vs. soa with transparency. Subjectively the right plot in figure 2.3 is the easiest to interpret. Because of this, I will often present the observed and predicted data using the proportion of responses rather than the actual response. Proportional data also has the advantage of being bounded on the same interval as the response. For the audiovisual task, the responses can be aggregated into binomial data – the number of positive responses for given SOA value – which is sometimes more efficient to work with than the Bernoulli data (see table 2.3). However the number of times an SOA is presented varies between the pre-adaptation and post-adaptation blocks; 5 and 3 times per SOA respectively. Table 2.3: Audiovisual task with aggregated responses. trial soa n k proportion pre 200 5 4 0.80 150 5 5 1.00 -350 5 0 0.00 post1 350 3 3 1.00 -500 3 1 0.33 -200 3 0 0.00 Other quirks about the data pertain to the subjects. There is one younger subject that did not complete the audiovisual task, and one younger subject that did not complete the duration task. Additionally there is one older subject who’s response data for the post-adaptation audiovisual task is unreasonable4 (see figure 2.4). Figure 2.4: Post-adaptation response data for O-f-CE It is unreasonable because, of all the negative SOAs, there were only two correct responses5. If a subject is randomly guessing the temporal order, then a naive estimate for the proportion of correct responses is 0.5. If a subject’s proportion of correct responses is above 0.5, then they are doing better than random guessing. In figure 2.5 it is seen that subject O-f-CE is the only one who’s proportion is below 0.5 (and by a considerable amount). Figure 2.5: Proportion of correct responses for negative SOA values during the post-adaptation audiovisual experiment. The consequences of leaving in this experimental block in the data is considered in the Chapter 6, but it is a clear outlier that must be noted. When this method of detecting outliers is repeated for all tasks and blocks, then I end up with 17 records in total (see figure 2.6), one of which is the aforementioned subject. Figure 2.6: Proportion of correct responses across all tasks and blocks Proportions are calculated individually for positive and negative SOAs. Most of the records that are flagged by this method of outlier detection are from the sensorimotor task, and none are from the visual task. This may be attributed to the perceived difficulty of the task. One consequence of higher temporal sensitivity is that it is easier to determine temporal order. It may also be that determining temporal order is inherently easier for certain multisensory tasks compared to others. Since the sensorimotor task does not have fixed SOA values like the other tasks, it may be perceived as more difficult. Or perhaps the mechanisms that process tactile and visual signals are not as well coupled as those that process audio and visual signals. Once again, I’ll consider the handling of the sensorimotor outliers in the results chapter. Now that I have introduced the motivating data and some of the theory behind psychometric experiments, I am ready to introduce a Bayesian workflow for multilevel modeling of the psychometric function. If the reader is interested in a fun story, in the discussion chapter I talk about the process I went through to read in this psychometric data, clean it up, and produce a tidy data set that is ready for modeling. While data cleaning and transforming is a topic entirely to itself, it is not the main focus of this paper. References "],["workflow.html", "3 Principled Bayesian Workflow 3.1 Iteration 1 (journey of a thousand miles) 3.2 Iteration 2 (electric boogaloo) 3.3 Iteration 3 (the one for me) 3.4 Iteration 4 (what’s one more) 3.5 Celebrate", " 3 Principled Bayesian Workflow The meat, the cheese, the entire sandwich Leading up to now, I haven’t discussed what is a principled Bayesian workflow, nor what multilevel modeling is. I was hoping to build up the suspense. Well I hope you’re now ready for the answer. A principled Bayesian workflow is a method of employing domain expertise and statistical knowledge to iteratively build a statistical model that satisfies the constraints and goals set forth by the researcher. Oh, and Bayesian techniques are used in exchange for classical ones. Maybe not worth the suspense, but the simple idea spawns a creative and descriptive way to analyze data. What about the multilevel aspect? While I get into that more in the following sections, the concept is simple. Multilevel models should be the default. The alternatives are models with complete pooling, or models with no pooling. Pooling vs. no pooling is a fancy way of saying that all the data is modeled as a whole, or the smallest component (group) is modeled individually. The former implies that the variation between groups is zero (all groups are the same), and the latter implies that the variation between groups is infinite (no groups are the same). Multilevel models assume that the truth is somewhere in the middle of zero and infinity. That’s not a difficult thing to posit. Hierarchical models are a specific kind of multilevel model where one or more groups are nested within a larger one. In the case of the psychometric data, there are three age groups, and within each age group are individual subjects. Multilevel modeling provides a way to quantify and apportion the variation within the data to each level in the model. For an in-depth introduction to multilevel modeling, see Gelman and Hill (2006). There are many great resources out there for following along with an analysis of some data or problem, and much more is the abundance of tips, tricks, techniques, and testimonies to good modeling practices. The problem is that many of these prescriptions are given without context for when they are appropriate to be taken. According to Betancourt (2020), this leaves “practitioners to piece together their own model building workflows from potentially incomplete or even inconsistent heuristics.” The concept of a principled workflow is that for any given problem, there is not, nor should there be, a default set of steps to take to get from data exploration to predictive inferences. Rather great consideration must be given to domain expertise and the questions that one is trying to answer with the data. Since everyone asks different questions, the value of a model is not in how well it ticks the boxes of goodness-of-fit checks, but in how consistent it is with domain expertise and its ability to answer the unique set of questions. Betancourt suggests answering four questions to evaluate a model by: Domain Expertise Consistency - Is our model consistent with our domain expertise? Computational Faithfulness - Will our computational tools be sufficient to accurately fit our posteriors? Inferential Adequacy - Will our inferences provide enough information to answer our questions? Model Adequacy - Is our model rich enough to capture the relevant structure of the true data generating process? Like any good Bayesian6, much work is done before seeing the data or building a model. This may include talking with experts to gain domain knowledge or to elicit priors. Experts may know something about a particular measure, perhaps the mean or variability of the data from years of research, and different experts may provide different estimates of a measure. The benefit of modeling in a Bayesian framework is that all prior knowledge may be incorporated into the model to be used to estimate the posterior distribution. The same prior knowledge may also be used to check the posterior to ensure that predictions remain within physical or expert-given constraints. Consistency is key. The computational tool I will be using to estimate the posterior is a probabilistic programming language (PPL) called Stan (Guo et al. 2020) within the R programming language. Stan uses the No U-Turn Sampler (NUTS) version of Hamiltonian Monte Carlo (HMC) which I will discuss more in chapter 4. For a gentle introduction to Bayesian statistics and sampling methods, see Bolstad and Curran (2016), and for an in-depth review of HMC see Betancourt (2017). The question of inferential adequacy depends on the set of questions that we are seeking to answer with the data from the psychometric experiment. The broad objective is to determine if there are any significant differences between age groups when it comes to temporal sensitivity, perceptual synchrony, and temporal recalibration, and if the task influences the results as well. The specific goals are to estimate and compare the PSS an JND across all age groups, conditions, and tasks, and determine the affect of recalibration between age groups. For the last question, model adequacy, I will be following a set of steps proposed in Betancourt (2020). The purpose of laying out these steps is not to again blindly check them off, but to force the analyst to carefully consider each point and make an informed decision whether the step is necessary or to craft the specifics of how the step should be completed. The steps are listed in table 3.1. These steps are also not meant to be followed linearly. If at any point it is discovered that there is an issue in conceptual understanding or model adequacy or something else, then it is encouraged to go back to a previous step and start with a new understanding. Table 3.1: Principled workflow Part Step Pre-Model, Pre-Data conceptual analysis define observational space construct summary statistics Post-Model, Pre-Data develop model construct summary functions simulate Bayesian ensemble prior checks configure algorithm fit simulated ensemble algorithmic calibration inferential calibration Post-Model, Post-Data fit observed data diagnose posterior fit posterior retrodictive checks celebrate I’ll talk about each step in the first iteration, but may choose to omit steps in subsequent iterations if there are no changes. For the purposes of building a model and being concise, I will focus around the audiovisual TOJ task in this chapter, but the final model will apply similarly to the visual and duration tasks. For the sensorimotor task, the model will be modified to accept Bernoulli data as opposed to aggregated Binomial counts (described more in the next section). 3.1 Iteration 1 (journey of a thousand miles) Pre-Model, Pre-Data I begin the modeling process by modeling the experiment according to the description of how it occurred and how the data were collected. This first part consists of conceptual analysis, defining the observational space, and constructing summary statistics that can help us to identify issues in the model specification. Conceptual Analysis In section 2.2 I discussed the experimental setup and data collection. To reiterate, subjects are presented with two stimuli separated by some temporal delay, and they are asked to respond as to their perception of the temporal order. There are 45 subjects with 15 each in the young, middle, and older age groups. As the SOA becomes larger in the positive direction, subjects are expected to give more “positive” responses, and as the SOA becomes larger in the negative direction, more “negative” responses are expected. By the way the experiment and responses are constructed, there is no expectation to see a reversal of this trend unless there was an issue with the subject’s understanding of the directions given to them or an error in the recording device. After the first experimental block the subjects go through a recalibration period, and repeat the experiment again. The interest is in seeing if the recalibration has an effect on temporal sensitivity and perceptual synchrony, and if the effect is different for each age group. Define Observational Space The response that subjects give during a TOJ task is recorded as a zero or a one (see section 2.2), and their relative performance is determined by the SOA value. Let \\(y\\) represent the binary outcome of a trial and let \\(x\\) be the SOA value. \\[\\begin{align*} y_i &\\in \\lbrace 0, 1\\rbrace \\\\ x_i &\\in \\mathbb{R} \\end{align*}\\] If the SOA values are fixed like in the audiovisual task, then the responses can be aggregated into binomial counts, \\(k\\). \\[ k_i, n_i \\in \\mathbb{Z}_0^+, k_i \\le n_i \\] In the above expression, \\(\\mathbb{Z}_0^+\\) represents the set of non-negative integers. Notice that the number of trials \\(n\\) has an index variable \\(i\\). This is because the number of trials per SOA is not fixed between blocks. In the pre-adaptation block, there are five trials per SOA compared to three in the post-adaptation block. So if observation 32 is recorded during a “pre” block, \\(n_{32} = 5\\), and if observation 1156 is during a “post” block, \\(n_{1156} = 3\\). Of course this is assuming that each subject completed all trials in the block, but the flexibility of the indexing can manage even if they didn’t. Then there are also three categorical variables – age group, subject ID, and trial (block). The first two are treated as factor variables7. Rather than using one-hot encoding or dummy variables, the age levels are left as categories and a coefficient is fit for each level. Among the benefits of this approach is the ease of interpretation and ease of working with the data programmatically. This is especially true at the subject level. If a dummy variables was used for all 45 subjects, we would have 44 different dummy variables to work with times the number of coefficients that make estimates at the subject level. The number of parameters in the model grows rapidly as the model complexity grows. Age groups and individual subjects can be indexed in the same way that number of trials is indexed. \\(S_i\\) refers to the subject in record \\(i\\), and similarly \\(G_i\\) refers to the age group of that subject. Observation 63 is for record ID av-post1-M-f-HG, so then \\(S_{63}\\) is M-f-HG and \\(G_{63}\\) is middle_age. Under the hood of R, these factor levels are represented as integers (e.g. middle age group level is stored internally as the number 2). (x <- factor(c("a", "a", "b", "c"))) #> [1] a a b c #> Levels: a b c storage.mode(x) #> [1] "integer" This data storage representation can later be exploited for the Stan model. The pre- and post-adaptation categories are treated as a binary indicator referred to as \\(trt\\) (short for treatment) since there are only two levels in the category. In this setup, a value of 1 indicates a post-adaptation block. I chose this encoding over the reverse because the pre-adaptation block is like the baseline performance, and it seemed more appropriate to interpret the post-adaptation block as turning on some effect. Using a binary indicator in a regression setting may not be the best practice as I discuss in section 3.2. In the Stan modeling language, data for a binomial model with subject and age group levels and treatment is specified as data { int N; // Number of observations int N_S; // Number of subject levels int N_G; // Number of age group levels int N_T; // Number of treatment/control groups int n[N]; // Trials per SOA int k[N]; // binomial counts vector[N] x; // SOA values int S[N]; // Subject identifier int G[N]; // Age group identifier int trt[N]; // Treatment indicator } In Stan (and unlike in R), data types must be statically declared. While sometimes a nuisance, this requirement aids in something called type inference, and also lets Stan optimize certain parts of the model. Construct Summary Statistics In order to effectively challenge the validity of the model, a set of summary statistics are constructed that help answer the questions of domain expertise consistency and model adequacy. We are studying the affects of age and temporal recalibration through the PSS and JND (see section 2.1), so it is natural to define summary statistics around these quantities to verify model consistency. Additionally the PSS and JND can be computed regardless of the model parameterization or chosen psychometric function. By the experimental setup and recording process, it is impossible that a properly conducted block would result in a JND less than 0 (i.e. the psychometric function is always non-decreasing), so that can be a lower limit for its threshold. On the other end it is unlikely that it will be beyond the limits of the SOA values, but even more concrete, it seems unlikely (though not impossible) that the just noticeable difference would be more than a second. The lower bound on the JND can be further refined if we draw information from other sources. Some studies show that we cannot perceive time differences below 30 ms, and others show that an input lag as small as 100ms can impair a person’s typing ability. Then according to these studies, a time delay of 100ms is enough to notice, and so a just noticeable difference should be much less than one second – much closer to 100ms. I’ll continue to use one second as an extreme estimate indicator, but will incorporate this knowledge when it comes to selecting priors. As for the point of subjective simultaneity, it can be either positive or negative, with the belief that larger values are more rare. Some studies suggest that for audio-visual TOJ tasks, the separation between stimuli need to be as little as 20 milliseconds for subjects to be able to determine which modality came first (Vatakis et al. 2007). Other studies suggest that our brains can detect temporal differences as small as 30 milliseconds. If these values are to be believed then we should be skeptical of PSS estimates larger than say 150 milliseconds in absolute value, just to be safe. A histogram of computed PSS and JND values will suffice for summary statistics. We can estimate the proportion of values that fall outside of our limits defined above, and use them as indications of problems with the model fitting or conceptual understanding. Post-Model, Pre-Data It is now time to define priors for the model, while still not having looked at the [distribution of] data. The priors should be motivated by domain expertise and prior knowledge, not the data. There are also many choices when it comes to selecting a psychometric (sigmoid) function. Common ones are logistic, Gaussian, and Weibull. Figure 3.1: Assortment of psychometric functions. The Weibull psychometric function is more common when it comes to 2-AFC psychometric experiments where the independent variable is a stimulus intensity (non-negative) and the goal is signal detection. The data in this paper includes both positive and negative SOA values, so the Weibull is not a natural choice. In fact, because this is essentially a model for logistic regression, my first choice is the logistic function as it is the canonical choice for Binomial data. Additionally, the data in this study are reversible. The label of a positive response can be swapped with the label of a negative response and the inferences should remain the same. Since there is no natural ordering, it makes more sense for the psychometric function to be symmetric, e.g. the logistic and Gaussian. I use symmetric loosely to mean that probability density function (PDF) is symmetric about its middle. More specifically, the distribution has zero skewness. In practice, there is little difference in inference between the logit and probit links, but computationally the logit link is more efficient. I am also more familiar with working on the log-odds scale compared to the probit scale, so I make the decision to go forward with the logistic function. In chapter 4 I will show how even with a mis-specified link function, we can still achieve accurate predictions. Develop Model Before moving on to specifying priors, I think it is appropriate to provide a little more background into generalized linear models (GLMs) and their role in working with psychometric functions. A GLM allows the linear model to be related to the outcome variable via a link function. An example of this is the logit link - the inverse of the logistic function. The logistic function, \\(F\\), takes \\(x \\in \\mathbb{R}\\) and constrains the output to be in \\((0, 1)\\). \\[\\begin{equation} F(\\theta) = \\frac{1}{1 + \\exp\\left(-\\theta\\right)} \\tag{3.1} \\end{equation}\\] Since \\(F\\) is a strictly increasing and continuous function, it has an inverse, and the link for (3.1) is the log-odds or logit function. \\[\\begin{equation} F^{-1}(\\pi) = \\mathrm{logit}(\\pi) = \\ln\\left(\\frac{\\pi}{1 - \\pi}\\right) \\tag{3.2} \\end{equation}\\] By taking \\((F^{-1} \\circ F)(\\theta)\\) we can arrive at a relationship that is linear in \\(\\theta\\). \\[\\begin{align*} \\pi = F(\\theta) \\Longleftrightarrow F^{-1}(\\pi) &= F^{-1}(F(\\theta)) \\\\ & = \\ln\\left(\\frac{F(\\theta)}{1 - F(\\theta)}\\right) \\\\ &= \\ln(F(\\theta)) - \\ln(1 - F(\\theta)) \\\\ &= \\ln\\left(\\frac{1}{1 + \\exp(-\\theta)}\\right) - \\ln\\left(\\frac{\\exp(-\\theta)}{1 + \\exp(-\\theta)}\\right) \\\\ &= - \\ln(1 + \\exp(-\\theta)) - \\ln(\\exp(-\\theta)) + \\ln(1 + \\exp(-\\theta)) \\\\ &= - \\ln(\\exp(-\\theta)) \\\\ &= \\theta \\end{align*}\\] The purpose of all this setup is to show that a model for the psychometric function can be specified using a linear predictor, \\(\\theta\\). Given a simple slope-intercept model, one would typically write the linear predictor as \\[\\begin{equation} \\theta = \\alpha + \\beta x \\tag{3.3} \\end{equation}\\] This isn’t the only acceptable form; it could be written in the centered parameterization \\[\\begin{equation} \\theta = \\beta(x - a) \\tag{3.4} \\end{equation}\\] Both parameterizations will describe the same geometry, so why should it matter which form is chosen? Clearly the interpretation of the parameters change between the two models, but the reason becomes clear when you consider how the linear model relates back to the physical properties that the psychometric model describes. Take equation (3.3), substitute it in to (3.1), and then take the logit of both sides \\[\\begin{equation} \\mathrm{logit}(\\pi) = \\alpha+\\beta x \\tag{3.5} \\end{equation}\\] Now recall that the PSS is defined as the SOA values such that the response probability, \\(\\pi\\), is \\(0.5\\). Substituting \\(\\pi = 0.5\\) into (3.5) and solving for \\(x\\) yields \\[ pss = -\\frac{\\alpha}{\\beta} \\] Similarly, the JND is defined as the difference between the SOA value at the 84% level and the PSS. Substituting \\(\\pi = 0.84\\) into (3.5), solving for \\(x\\), and subtracting off the pss yields \\[\\begin{equation} jnd = \\frac{\\mathrm{logit}(0.84)}{\\beta} \\tag{3.6} \\end{equation}\\] From the conceptual analysis, it is easy to define priors for the PSS and JND, but then how does one set the priors for \\(\\alpha\\) and \\(\\beta\\)? Let’s say the prior for the just noticeable difference is \\(jnd \\sim \\pi_j\\). Then the prior for \\(\\beta\\) would be \\[ \\beta \\sim \\frac{\\mathrm{logit}(0.84)}{\\pi_j} \\] The log-normal distribution has a nice property where its multiplicative inverse is still a log-normal distribution. We could let \\(\\pi_j = \\mathrm{Lognormal}(\\mu, \\sigma^2)\\) and then \\(\\beta\\) would be distributed as \\[ \\beta \\sim \\mathrm{Lognormal}(-\\mu + \\ln(\\mathrm{logit}(0.84)), \\sigma^2) \\] This is acceptable, as it was determined last chapter that the slope must always be positive, and a log-normal distribution constrains the support to postive real numbers. Next suppose that the prior distribution for the PSS is \\(pss \\sim \\pi_p\\). Then the prior for \\(\\alpha\\) is \\[ \\alpha \\sim -\\pi_p \\cdot \\beta \\] If \\(\\pi_p\\) is set to a log-normal distribution as well, then \\(\\pi_p \\cdot \\beta\\) would also be log-normal, but there is still the problem of the negative sign. If \\(\\alpha\\) is always negative, then the PSS will also always be negative, which is certainly not always true. Furthermore, I don’t want to a priori put more weight on positive PSS values compared to negative ones, for which a lognormal distribution would not do. Let’s now go back and consider using equation (3.4) and repeat the above process. \\[\\begin{equation} \\mathrm{logit}(\\pi) = \\beta(x - a) \\tag{3.7} \\end{equation}\\] The just noticeable difference is still given by (3.6) and so the same method for choosing a prior can be used, but the PSS is now given by \\[ pss = \\alpha \\] This is a fortunate consequence of using (3.4) because now the JND only depends on \\(\\beta\\) and the PSS only depends on \\(\\alpha\\), and now \\(\\alpha\\) can literally be interpreted as the PSS of the estimated psychometric function! Also thrown in is the ability to set a prior for \\(\\alpha\\) that is symmetric around \\(0\\) like a Gaussian distribution. This also brings me to point out the first benefit of using a modeling language like Stan over others. For fitting GLMs in R, there are a handful of functions that utilize MLE like stats::glm and others that use Bayesian methods like rstanarm::stan_glm and arm::bayesglm (Gabry and Goodrich 2020; Gelman and Su 2020). Each of these functions requires the linear predictor to be in the form of (3.3). The stan_glm function actually uses Stan in the backend to fit a model, but is limited to priors from the Student t family of distributions. By writing the model directly in Stan, the linear model can be parameterized in any way and with any prior distribution, and so allows for much more expressive modeling - a key aspect of this principled workflow. For the first iteration of this model, I am going to start with the simplest model that captures the structure of the data without including information about age group, treatment, or subject. Here is a simple model that draws information from the conceptual analysis. \\[\\begin{align*} k_i &\\sim \\mathrm{Binomial}(n_i, p_i) \\\\ \\mathrm{logit}(p_i) &= \\beta ( x_i - \\alpha ) \\end{align*}\\] Since I am using the linear model from (3.4), setting the priors for \\(\\alpha\\) and \\(\\beta\\) is relatively straightforward. The PSS can be positive or negative without any expected bias towards either, so a symmetric distribution like the Gaussian is a fine choice for \\(\\alpha\\) without having any other knowledge about the distribution of PSS values. Since I said earlier that a PSS value more than 150ms in absolute value is unlikely, I can define a Gaussian prior such that \\(P(|pss| > 0.150) \\approx 0.01\\). Since the prior does not need to be exact, the following mean and variance suffice \\[ pss \\sim \\mathcal{N}(0, 0.06^2) \\Longleftrightarrow \\alpha \\sim \\mathcal{N}(0, 0.06^2) \\] For the just noticeable difference, I will continue to use the log-normal distribution because it is constrained to positive values and has the nice reciprocal property. The JND is expected to be close to 100ms and extremely unlikely to exceed 1 second. This implies a prior such that the mean is around 100ms and the bulk of the distribution is below 1 second - i.e. \\(E[X] \\approx 0.100\\) and \\(P(X < 1) \\approx 0.99\\). This requires solving a system of nonlinear equations in two variables \\[ \\begin{cases} E[X] = 0.100 = \\exp\\left(\\mu + \\sigma^2 / 2\\right) \\\\ P(X < 1) = 0.99 = 0.5 + 0.5 \\cdot \\mathrm{erf}\\left[\\frac{\\ln (1) - \\mu}{\\sqrt{2} \\cdot \\sigma}\\right] \\end{cases} \\] This nonlinear system can be solved using Stan’s algebraic solver. functions { vector system(vector y, vector theta, real[] x_r, int[] x_i) { vector[2] z; z[1] = exp(y[1] + y[2]^2 / 2) - theta[1]; z[2] = 0.5 + 0.5 * erf(-y[1] / (sqrt(2) * y[2])) - theta[2]; return z; } } transformed data { vector[2] y_guess = [1, 1]'; real x_r[0]; int x_i[0]; } transformed parameters { vector[2] theta = [0.100, 0.99]'; vector[2] y; y = algebra_solver(system, y_guess, theta, x_r, x_i); } fit <- sampling(prior_jnd, iter=1, warmup=0, chains=1, refresh=0, seed=31, algorithm="Fixed_param") sol <- extract(fit) sol$y #> #> iterations [,1] [,2] #> [1,] -7.501 3.225 The solver has determined that \\(\\mathrm{Lognormal}(-7.5, 3.2^2)\\) is the appropriate prior. However, simulating some values from this distribution produces a lot of extremely small values (\\(<10^{-5}\\)) and a few extremely large values (\\(\\approx 10^2\\)). This is because the expected value of a log-normal random variable depends on both the mean and standard deviation. If the median is used in place for the mean, then a more acceptable prior may be determined. fit <- sampling(prior_jnd_using_median, iter=1, warmup=0, chains=1, refresh=0, seed=31, algorithm="Fixed_param") sol <- extract(fit) sol$y #> #> iterations [,1] [,2] #> [1,] -2.303 0.9898 Sampling from a log-normal distribution with these parameters and plotting the histogram shows no inconsistency with the domain expertise. So now with a prior for the JND, the prior for \\(\\beta\\) can be determined. \\[ jnd \\sim \\mathrm{Lognormal}(-2.3, 0.99^2) \\Longleftrightarrow \\frac{1}{jnd} \\sim \\mathrm{Lognormal}(2.3, 0.99^2) \\] and \\[ \\beta = \\frac{\\mathrm{logit}(0.84)}{jnd} \\sim \\mathrm{Lognormal}(2.8, 0.99^2) \\] The priors do not need to be too exact. Rounding the parameters for \\(\\beta\\), the simple model is \\[\\begin{align*} k_i &\\sim \\mathrm{Binomial}(n_i, p_i) \\\\ \\mathrm{logit}(p_i) &= \\beta ( x_i - \\alpha ) \\\\ \\alpha &\\sim \\mathcal{N}(0, 0.06^2) \\\\ \\beta &\\sim \\mathrm{Lognormal}(3, 1^2) \\end{align*}\\] and in Stan, the model code is data { int N; int n[N]; int k[N]; vector[N] x; } parameters { real alpha; real<lower=0> beta; } model { vector[N] p = beta * (x - alpha); alpha ~ normal(0, 0.06); beta ~ lognormal(3.0, 1.0); k ~ binomial_logit(n, p); } generated quantities { vector[N] log_lik; vector[N] k_pred; vector[N] theta = beta * (x - alpha); vector[N] p = inv_logit(theta); for (i in 1:N) { log_lik[i] = binomial_logit_lpmf(k[i] | n[i], theta[i]); k_pred[i] = binomial_rng(n[i], p[i]); } } Notice that the model block is nearly identical to the mathematical model! Construct Summary Functions Whew! that was a lot of work to define the priors for just two parameters. Thankfully going forward not as much work will need to be done to expand the model. The next step is to construct any relevant summary functions. Since the distribution of posterior PSS and JND values are needed for the summary statistics, it will be nice to have a function that can take in the posterior samples for \\(\\alpha\\) and \\(\\beta\\) and return the PSS and JND values. I’ll define \\(Q\\) as a more general function that takes in the two parameters and a probability, \\(\\pi\\), and returns the distribution of SOA values at \\(\\pi\\). \\[\\begin{equation} Q(\\pi; \\alpha, \\beta) = \\frac{\\mathrm{logit(\\pi)}}{\\beta} + \\alpha \\tag{3.8} \\end{equation}\\] The function can be defined in R as Q <- function(p, a, b) qlogis(p) / b + a With \\(Q\\), the PSS and JND can be calculated as \\[\\begin{align} pss &= Q(0.5) \\\\ jnd &= Q(0.84) - Q(0.5) \\end{align}\\] Simulate Bayesian Ensemble During this step, I simulate the Bayesian ensemble and later feed the prior values into the summary functions in order to verify that there are no other inconsistencies with domain knowledge. Since the model is fairly simple, I will simulate directly in R. set.seed(124) n <- 10000 a <- rnorm(n, 0, 0.06) b <- rlnorm(n, 3.0, 1) dat <- with(av_dat, list(N = N, x = x, n = n)) n_obs <- length(dat$x) idx <- sample(1:n, n_obs, replace = TRUE) probs <- logistic(b[idx] * (dat$x - a[idx])) sim_k <- rbinom(n_obs, dat$n, probs) Prior Checks This step pertains to ensuring that prior estimates are consistent with domain expertise. I already did that in the model construction step by sampling values for the just noticeable difference. The first prior chosen was not producing JND estimates that were consistent with domain knowledge, so I adjusted accordingly. That check would normally be done during this step, and I would have had to return to the model development step. Figure 3.2 shows the distribution of prior psychometric functions derived from the simulated ensemble. There are a few very steep and very shallow curves, but the majority fall within a range that appears likely. Figure 3.2: Prior distribution of psychometric functions using the priors for alpha and beta. Additionally most of the PSS values are within \\(\\pm 0.1\\) with room to allow for some larger values. Let’s check the prior distribution of PSS and JND values. Figure 3.3: PSS prior distribution. Figure 3.4: JND prior distribution. I am satisfied with the prior coverage of the PSS and JND values, and there are only a few samples that go beyond the extremes that were specified in the summary statistics step. Configure Algorithm There are a few parameters that can be set for Stan. On the user side, the main parameters are the number of iterations, the number of warm-up iterations, the target acceptance rate, and the number of chains to run. The NUTS algorithm samples in two phases: a warm-up phase and a sampling phase. During the warm-up phase, the sampler is automatically tuning three internal parameters that can significantly affect the sampling efficiency. By default, the Stan function will use half the number of iterations for warm-up and the other half for actual sampling. The full details of Stan’s HMC algorithm is described in the Stan reference manual. For now I am going to use the default algorithm parameters in Stan, and will tweak them later if and when issues arise. Fit Simulated Ensemble Nothing to say here. Only code. sim_dat <- with(av_dat, list(N = N, x = x, n = n, k = sim_k)) m031 <- sampling(m031_stan, data = sim_dat, chains = 4, cores = 4, refresh = 0) Algorithmic Calibration One benefit of using HMC over other samplers like Gibbs sampling is that HMC offers diagnostic tools for the health of chains and the ability to check for divergent transitions (discussed in 4.1.1.4). To check the basic diagnostics of the model, I run the following code. check_hmc_diagnostics(m031) #> #> Divergences: #> 0 of 4000 iterations ended with a divergence. #> #> Tree depth: #> 0 of 4000 iterations saturated the maximum tree depth of 10. #> #> Energy: #> E-BFMI indicated no pathological behavior. There is no undesirable behavior from this model, so next I check the summary statistics of the estimated parameters. The \\(\\hat{R}\\) statistic is a comparison of the measure of variance within chains and between chains. When chains have converged to a stationary distribution, the variance within and between chains is the same, and the ratio is one. Values of \\(\\hat{R} > 1.1\\) are usually indicative of chains that have not converged to a common distribution. Lastly there is the effective sample size (\\(N_{\\mathrm{eff}}\\)) which is a loose measure for the autocorrelation within the parameter samples. As autocorrelation generally decreases as the lag increases, one can achieve a higher \\(N_{\\mathrm{eff}}\\) by running a chain with more samples and then thinning the samples, i.e. saving only every \\(n^{th}\\) sample. Table 3.2: Summary statistics of the fitted Bayesian ensemble. parameter mean se_mean sd 2.5% 97.5% n_eff Rhat alpha 0.0061 0.0001 0.0038 -0.0012 0.0136 4039 0.9995 beta 10.7681 0.0051 0.2404 10.3043 11.2313 2202 1.0003 Both the \\(\\hat{R}\\) and \\(N_{\\mathrm{eff}}\\) look fine for both \\(\\alpha\\) and \\(\\beta\\), thought it is slightly concerning that \\(\\alpha\\) is centered relatively far from zero. This could just be due to sampling variance, so I will continue on to the next step. Post-Model, Post-Data Fit Observed Data All of the work up until now has been done without peaking at the observed data. Satisfied with the model so far, I can now go ahead and run the data through. m031 <- sampling(m031_stan, data = obs_dat, chains = 4, cores = 4, refresh = 200) Diagnose Posterior Fit Here I repeat the diagnostic checks that I used after fitting the simulated Bayesian ensemble. check_hmc_diagnostics(m031) #> #> Divergences: #> 0 of 4000 iterations ended with a divergence. #> #> Tree depth: #> 0 of 4000 iterations saturated the maximum tree depth of 10. #> #> Energy: #> E-BFMI indicated no pathological behavior. Table 3.3: Summary statistics of the fitted Bayesian ensemble. parameter mean se_mean sd 2.5% 97.5% n_eff Rhat alpha 0.0373 0.0001 0.0043 0.029 0.0458 3765 1.000 beta 8.4259 0.0039 0.1839 8.070 8.7897 2249 1.001 No indications of an ill-behaved posterior fit! Let’s also check the posterior distribution of \\(\\alpha\\) and \\(\\beta\\) against the prior density (3.5). Figure 3.5: Comparison of posterior distributions for alpha and beta to their respective prior distributions. The posterior distributions for \\(\\alpha\\) and \\(\\beta\\) are well within the range determined by domain knowledge, and highly concentrated due to both the large amount of data and the fact that this is a completely pooled model - no stratification. As expected, the prior for the JND could have been tighter with more weight below half a second compared to the one second limit used, but this is not prior information, so it is not prudent to change the prior in this manner after having seen the posterior. As a rule of thumb, priors should only be updated as motivated by domain expertise and not by posterior distributions. Posterior Retrodictive Checks Finally it is time to run the posterior samples through the summary functions and then perform retrodictive checks. A retrodiction is using the posterior model to predict and compare to the observed data. This is simply done by drawing samples from the posterior and feeding in the observational data. This may be repeated to gain a retrodictive distribution. posterior_pss <- Q(0.5, p031$alpha, p031$beta) posterior_jnd <- Q(0.84, p031$alpha, p031$beta) - posterior_pss Figure 3.6: Posterior distribution of the PSS and JND. Neither of the posterior estimates for the PSS or JND exceed the extreme cutoffs set in the earlier steps, so I can be confident that the model is consistent with domain expertise. Let’s also take a second to appreciate how simple it is to visualize and summarize the distribution of values for these measures. Using classical techniques like MLE might require using bootstrap methods to estimate the distribution of parameter values, or one might approximate the parameter distributions using the mean and standard error of the mean to simulate new values. Since we have the entire posterior distribution we can calculate the distribution of transformed parameters by working directly with the posterior samples and be sure that the intervals are credible. Next is to actually do the posterior retrodictions. I will do this in two steps to better show how the distribution of posterior psychometric functions relates to the observed data, and then compare the observed data to the retrodictions. Figure 3.7 shows the result of the first step. Figure 3.7: Posterior distribution of psychometric functions using pooled observations. Next I sample parameter values from the posterior distribution and use them to simulate a new data set. In the next iteration I will show how I can get Stan to automatically produce retrodictions for me in the model fitting step. The results of the posterior retrodictions are shown in figure 3.8. alpha <- sample(p031$alpha, n_obs, replace = TRUE) beta <- sample(p031$beta, n_obs, replace = TRUE) logodds <- beta * (av_dat$x - alpha) probs <- logistic(logodds) sim_k <- rbinom(n_obs, av_dat$n, probs) Figure 3.8: Observed data compared to the posterior retrodictions. The data is post-stratified by block for easier visualization. I want to make it clear exactly what the first iteration of this model tells us. It is the average distribution of underlying psychometric functions across all subjects and blocks. It cannot tell us what the differences are between pre- and post-adaptation blocks are, or even what the variation between subjects is. As such, it is only useful in determining if the average value for the PSS is different from 0 or if the average JND is different from some other predetermined level. This model is still useful given the right question, but this model cannot answer questions about group-level effects. Figure 3.8 shows that the model captures the broad structure of the observed data, but is perhaps a bit under-dispersed in the tail ends of the SOA values. Besides this one issue, I am satisfied with the first iteration of this model and am ready to proceed to the next iteration. 3.2 Iteration 2 (electric boogaloo) In this iteration I will be adding in the treatment and age groups into the model. There are no changes with the conceptual understanding of the experiment, and nothing to change with the observational space. As such I will be skipping the first three steps and go straight to the model development step. As I build the model, the number of changes from one iteration to the next should go to zero as the model expands to become only as complex as necessary to answer the research questions. Post-Model, Pre-Data Develop Model To start, let’s add in the treatment indicator and put off consideration of adding in the age group levels. In classical statistics, it is added as an indicator variable (zero or one) for both the slope and intercept (varying slopes, varying intercepts model). Let \\(trt\\) be \\(0\\) if it is the pre-adaptation block and \\(1\\) if the observation comes from the post-adaptation block. \\[ \\theta = \\alpha + \\alpha_{trt} \\times trt + \\beta \\times x + \\beta_{trt}\\times trt \\times x \\] Now when an observation comes from the pre-adaptation block (\\(trt=0\\)) the linear predictor is given by \\[ \\theta_{pre} = \\alpha + \\beta \\times x \\] and when an observation comes from the post-adaptation block (\\(trt=1\\)) the linear predictor is \\[ \\theta_{post} = (\\alpha + \\alpha_{trt}) + (\\beta + \\beta_{trt}) \\times x \\] This might seem like a natural way to introduce an indicator variable, but it comes with serious implications. This model implies that there is more uncertainty about the post-adaptation block compared to the baseline block, and this is not necessarily true. \\[\\begin{align*} \\mathrm{Var}(\\theta_{post}) &= \\mathrm{Var}((\\alpha + \\alpha_{trt}) + (\\beta + \\beta_{trt}) \\times x) \\\\ &= \\mathrm{Var}(\\alpha) + \\mathrm{Var}(\\alpha_{trt}) + x^2 \\mathrm{Var}(\\beta) + x^2\\mathrm{Var}(\\beta_{trt}) \\end{align*}\\] On the other hand, the variance of \\(\\theta_{pre}\\) is \\[ \\mathrm{Var}(\\theta_{pre}) = \\mathrm{Var}(\\alpha) + x^2 \\mathrm{Var}(\\beta) \\le \\mathrm{Var}(\\theta_{post}) \\] Furthermore, the intercept, \\(\\alpha\\), is no longer the average response probability at \\(x=0\\) for the entire data set, but is instead exclusively the average for the pre-adaptation block. This may not matter in certain analyses, but one nice property of multilevel models is the separation of population level estimates and group level estimates (fixed vs. mixed effects). So instead the treatment variable is introduced into the linear model as a factor variable. This essentially means that each level in the treatment gets its own parameter estimate, and this also makes it easier to set priors when there are many levels in a group (such as for the subject level). The linear model, using equation (3.4), with the treatment is written as \\[\\begin{equation} \\theta = (\\beta + \\beta_{trt[i]}) \\left[x_i - (\\alpha + \\alpha_{trt[i]})\\right] \\tag{3.9} \\end{equation}\\] As I add in more predictors and groups, equation (3.9) will start to be more difficult to read. What I can do is break up the slope and intercept parameters and write the linear model as \\[\\begin{align*} \\mu_\\alpha &= \\alpha + \\alpha_{trt[i]} \\\\ \\mu_\\beta &= \\beta + \\beta_{trt[i]} \\\\ \\theta &= \\mu_\\beta (x - \\mu_\\alpha) \\end{align*}\\] In this way the combined parameters can be considered separately from the linear parameterization. Which leads me to consider the priors for \\(\\alpha_{trt}\\) and \\(\\beta_{trt}\\). The way that we can turn an normal model with categorical predictors into a multilevel model is by allowing the priors to borrow information from other groups. This is accomplished by putting priors on priors. It is easier to write down the model first before explaining how it works. \\[\\begin{align*} k_i &\\sim \\mathrm{Binomial}(n_i, p_i) \\\\ \\mu_\\alpha &= \\alpha + \\alpha_{trt[i]} \\\\ \\mu_\\beta &= \\beta + \\beta_{trt[i]} \\\\ \\mathrm{logit}(p_i) &= \\mu_\\beta (x_i - \\mu_\\alpha) \\\\ \\alpha &\\sim \\mathcal{N}(0, 0.06^2) \\\\ \\alpha_{trt} &\\sim \\mathcal{N}(0, \\sigma_{trt}^2) \\\\ \\sigma_{trt} &\\sim \\textrm{to be defined} \\end{align*}\\] In the above model, \\(\\alpha\\) gets a fixed prior (the same as in the first iteration), and \\(\\alpha_{trt}\\) gets a Gaussian prior with an adaptive variance term that is allowed to be learned from the data. This notation is compact, but \\(\\alpha_{trt}\\) is actually two parameters - one each for the pre- and post-adaptation blocks - but they both share the same variance term \\(\\sigma_{trt}\\). This produces a regularizing effect where both treatment estimates are shrunk towards the mean, \\(\\alpha\\). I’ll discuss selecting a prior for the variance term shortly, but now I want to discuss setting the prior for the slope terms. Instead of modeling \\(\\beta\\) with a log-normal prior, I can sample from a normal distribution and take the exponential of it to produce a log-normal distribution. I.e. \\[ X \\sim \\mathcal{N}(3, 1^2) \\\\ Y = \\exp\\left\\lbrace X \\right\\rbrace \\Longleftrightarrow Y \\sim \\mathrm{Lognormal(3, 1^2)} \\] The motivation behind this transformation is that it is now easier to include new slope variables as an additive affect. If both \\(\\beta\\) and \\(\\beta_{trt}\\) are specified with Gaussian priors, then the exponential of the sum will be a log-normal distribution! So the model now gains \\[\\begin{align*} \\mathrm{logit}(p_i) &= \\exp(\\mu_\\beta) (x_i - \\mu_\\alpha) \\\\ \\beta &\\sim \\mathcal{N}(3, 1^2) \\\\ \\beta_{trt} &\\sim \\mathcal{N}(0, \\gamma_{trt}^2) \\\\ \\gamma_{trt} &\\sim \\textrm{to be defined} \\end{align*}\\] Deciding on priors for the variance term requires some careful consideration. In one sense, the variance term is the within group variance. Gelman and others (2006) recommends that for multilevel models with groups with less than say 5 levels to use a half Cauchy prior. This weakly informative prior still has a regularizing affect and dissuades larger variance estimates. Even though the treatment group only has two levels, there is still value in specifying an adaptive prior for them, and there is also a lot of data for each treatment so partial pooling won’t make a difference anyway. \\[\\begin{align*} \\sigma_{trt} &\\sim \\mathrm{HalfCauchy}(0, 1) \\\\ \\gamma_{trt} &\\sim \\mathrm{HalfCauchy}(0, 1) \\end{align*}\\] Finally I can add in the age group level effects and specify the variance terms. \\[\\begin{align*} \\alpha_{G} &\\sim \\mathcal{N}(0, \\tau_{G}^2)\\\\ \\beta_{G} &\\sim \\mathcal{N}(0, \\nu_{G}^2) \\\\ \\tau_{G} &\\sim \\mathrm{HalfCauchy}(0, 2) \\\\ \\nu_{G} &\\sim \\mathrm{HalfCauchy}(0, 2) \\end{align*}\\] The corresponding Stan model is becoming quite long, so I omit it from here on out. The final Stan model code may be found in the supplementary code of the appendix. Post-Model, Post-Data Fit Observed Data I’m choosing to skip the prior checks this time around and use the observed data to configure the algorithm and diagnose the posterior fit. m032 <- sampling(m032_stan, data = obs_dat, seed = 124, chains = 4, cores = 4, refresh = 100) Diagnose Posterior Fit check_hmc_diagnostics(m032) #> #> Divergences: #> 4 of 4000 iterations ended with a divergence (0.1%). #> Try increasing 'adapt_delta' to remove the divergences. #> #> Tree depth: #> 0 of 4000 iterations saturated the maximum tree depth of 10. #> #> Energy: #> E-BFMI indicated no pathological behavior. As well as the 4 divergent transitions, there was also a message about the effective sample size (ESS) being too low. The recommended prescription for low ESS is to run the chains for more iterations. The posterior summary shows that \\(N_{\\mathrm{eff}}\\) is low for the age group level parameters (table 3.4). Table 3.4: Summary statistics of the second iteration. parameter mean se_mean sd 2.5% 97.5% n_eff Rhat a 0.0222 0.0014 0.0412 -0.0683 0.1024 824.6 1.002 aG[1] -0.0009 0.0012 0.0313 -0.0531 0.0714 703.5 1.003 aG[2] 0.0274 0.0012 0.0316 -0.0218 0.0990 698.3 1.003 aG[3] -0.0078 0.0012 0.0311 -0.0609 0.0609 714.3 1.004 b 2.4114 0.0216 0.5665 1.4902 3.8499 688.2 1.003 bG[1] 0.0030 0.0170 0.2942 -0.7681 0.5013 301.3 1.004 bG[2] 0.0538 0.0170 0.2940 -0.7101 0.5499 299.9 1.004 bG[3] -0.2223 0.0172 0.2955 -1.0150 0.2597 296.9 1.004 So I can go back to the algorithm configuration step and increase the number of iterations and warm-up iterations, as well as increase the adapt delta parameter to reduce the number of divergent transitions (which really isn’t a problem right now). Another technique I can employ is non-centered parameterization, and now is as good a time as any to introduce it. I have actually already used non-centered parameterization in this iteration of the model without addressing it - the transformation of \\(\\beta\\) from a Gaussian to a log-normal distribution. Because HMC is a physics simulation, complicated geometry or posteriors with steep slopes can be difficult to traverse if the step size is too course. The solution is to explore a simpler geometry, and then transform the sample into the target distribution. Reparameterization is especially important for hierarchical models. The Cauchy distribution used for the variance term can be reparameterized by first drawing from a uniform distribution on \\((-\\pi/2, \\pi/2)\\). For a half Cauchy distribution, just sample from \\(\\mathcal{U}(0, \\pi/2)\\). \\[\\begin{align*} X &\\sim \\mathcal{U}(-\\pi/2, \\pi/2) \\\\ Y &= \\mu + \\tau \\cdot \\tan(X) \\Longrightarrow Y \\sim \\mathrm{Cauchy(\\mu, \\tau)} \\end{align*}\\] The Gaussian distributions can be reparameterized in a similar way. If \\(Z\\) is a standard normal random variable, then \\(\\mu + \\sigma Z \\sim \\mathcal{N}(\\mu, \\sigma^2)\\). For Stan, sampling from a standard normal or uniform distribution is very easy, and so the non-centered parameterization can alleviate divergent transitions. I now return to the model development step and incorporate the new methods. Develop Model The model changes consist of using the non-centered parameterizations discussed in the previous step. An example is in the parameterization of \\(\\tau_{G}\\). The other variance terms are parameterized in the same fashion. \\[\\begin{align*} U_\\tau &\\sim \\mathcal{U}(0, \\pi/2) \\\\ \\tau_{G} &= 2 \\cdot \\tan(U_1) \\Longrightarrow \\tau_G \\sim \\mathrm{HalfCauchy}(0, 2) \\end{align*}\\] As an aside, a multilevel model can be fit in R using lme4::glmer, brms::brm, or rstanarm::stan_glmer, and they all use the same notation to specify the model. The notation is very compact, but easy to unpack. Values not in a grouping term are fixed effects and values in a grouping term (e.g. (1 + x | G)) are mixed or random effects depending on which textbook you read. f <- formula(k|n ~ 1 + x + (1 + x | G) + (1 + x | trt)) lme4::glmer(f, data = data, family = binomial("logit")) rstanarm::stan_glmer(f, data = data, family = binomial("logit")) brms::brm(f, data = data, family = binomial("logit")) The simpler notation and compactness of these methods are very attractive, and for certain analyses they may be more than sufficient. The goal here is to decide early on if these methods satisfy the model adequacy, and to use more flexible modeling tools like Stan if necessary. Fit Observed Data Moving on to refitting the data, this time with more iterations and with the non-centered parameterization. Since this model is sampling from intermediate parameters, I can choose to keep only the transformed parameters. m032nc <- sampling(m032nc_stan, data = obs_dat, seed = 143, thin = 2, iter = 4000, warmup = 2000, pars = keep_pars, control = list(adapt_delta = 0.95), chains = 4, cores = 4, refresh = 100) Diagnose Posterior Fit check_hmc_diagnostics(m032nc) #> #> Divergences: #> 32 of 4000 iterations ended with a divergence (0.8%). #> Try increasing 'adapt_delta' to remove the divergences. #> #> Tree depth: #> 0 of 4000 iterations saturated the maximum tree depth of 10. #> #> Energy: #> E-BFMI indicated no pathological behavior. There are still a few divergent transitions (\\(<1\\%\\)), but the effective sample size increased significantly (table 3.5). Table 3.5: Summary statistics of the second iteration with non-centered parameterization. parameter mean se_mean sd 2.5% 97.5% n_eff Rhat a 0.0192 0.0008 0.0419 -0.0744 0.0956 2509 1.0005 aG[1] -0.0025 0.0006 0.0326 -0.0636 0.0739 2737 1.0014 aG[2] 0.0262 0.0006 0.0328 -0.0342 0.1044 2644 1.0014 aG[3] -0.0093 0.0006 0.0326 -0.0713 0.0652 2752 1.0011 aT[1] 0.0185 0.0009 0.0425 -0.0546 0.1242 2338 1.0005 aT[2] 0.0039 0.0009 0.0419 -0.0679 0.1089 2404 1.0005 b 2.3841 0.0115 0.5284 1.4762 3.6952 2109 1.0010 bG[1] 0.0170 0.0049 0.2730 -0.6323 0.4979 3106 1.0004 bG[2] 0.0678 0.0049 0.2728 -0.5773 0.5671 3113 1.0005 bG[3] -0.2075 0.0050 0.2741 -0.8506 0.2767 3026 1.0004 bT[1] -0.2764 0.0106 0.4914 -1.6338 0.5427 2141 0.9999 bT[2] -0.0501 0.0106 0.4909 -1.4120 0.7778 2125 1.0000 Now is also a good time to introduce a diagnostic tool called the trace plot. The trace plot is a way to visualize the sampling path of different parameters across all the chains. A healthy set of chains will look like a fuzzy caterpillar, bouncing around the posterior without any visual patterns or long sequences of being in the same place. Figure 3.9 shows the trace plot for the slope and intercept parameters. Each chain looks like it is sampling around the same average value as the others with identical spreads (stationary and homoscedastic). This also helps to solidify the idea that the \\(\\hat{R}\\) statistic is the measure of between chain variance compared to cross chain variance. Figure 3.9: Traceplot for the slope and intercept parameters. The chains in figure 3.9 look healthy as well as for the other parameters not shown. Since there are no algorithm issues, I can proceed to the posterior retrodictive checks. Posterior Retrodictive Checks In this iteration of the model, I now have estimates for the age groups and the treatment. The posterior estimates for the PSS and JND are shown in figure 3.10. There are many ways to visualize and compare the distributions across age groups and conditions, and it really depends on what question is being asked. If for example the question is “what is the qualitative difference between pre- and post-adaptation across age groups?”, then figure 3.10 could answer that because it juxtaposes the two blocks in the same panel. I will consider alternative ways of arranging the plots in chapter 5. Figure 3.10: Posterior distribution of the PSS and JND. As for the posterior retrodictions, I can do something similar to last time. First I’d like to point out that I had Stan perform posterior retrodictions during the fitting step. This was achieved by adding a generated quantities block to the Stan program that takes the posterior samples for the parameters, and then randomly generates a value from a binomial distribution for each observation in the data. In effect, I now have \\(10,000\\) simulated data sets! str(p032$k_pred) #> num [1:4000, 1:1827] 0 0 0 0 0 0 0 0 0 0 ... #> - attr(*, "dimnames")=List of 2 #> ..$ iterations: NULL #> ..$ : NULL I only need one to compare to the observed data, so I will select it randomly from the posterior. Figure 3.11: Observed data compared to the posterior retrodictions. The posterior retrodictions show no disagreement between the model and the observed data. I almost would say that this model is complete, but this model has one more problem - it measures the average difference in blocks, and the average difference in age groups, but does not consider any interaction between the two! Implicitly it assumes that temporal recalibration affects all age groups the same which may not be true, so in the next iteration I will need to address that. 3.3 Iteration 3 (the one for me) Since there is no change in the pre-model analysis, I’ll again jump straight to the model development step, after which I will jump right to the posterior retrodictive checks. The changes to the model going forward are minor, and subsequent steps are mostly repetitions of the ones taken in the first two iterations. Develop Model This time around I need to model an interaction between age group and treatment. In a simple model in R, interactions between factor variable \\(A\\) and factor variable \\(B\\) can be accomplished by taking the cross-product of all the factor levels. For example, if \\(A\\) has levels \\(a, b, c\\) and \\(B\\) has levels \\(x, y\\), then the interaction variable \\(C=A:B\\) will have levels \\(ax, ay, bx, by, cx, cy\\). The concept is similar in Stan - create a new variable that is indexed by the cross of the two other factor variables. \\[ \\beta_{G[i] \\times trt[i]} \\Longrightarrow bGT[G[i], trt[i]] \\] In the above expression, the interaction variable \\(\\beta_{G[i] \\times trt[i]}\\) is between age group and treatment. The right hand side is the corresponding Stan parameter. Notice that it is an array-like object that is indexed by the age group at observation \\(i\\) and the treatment at observation \\(i\\). For example, observation \\(51\\) is from a middle age adult subject during the post-adaptation block, so \\(bGT[G[51], trt[51]] = bGT[2, 2]\\). An interaction term is added for both the slope and intercept in this iteration. Post-Model, Post-Data Posterior Retrodictive Checks Again, I’ll start with the PSS and JND posterior densities. Because the model now allows for the interaction of age group and block, there is no longer a fixed shift in the posterior distribution of the PSS and JND values. Figure 3.12 shows that temporal recalibration had no discernible affect on the PSS estimates for the middle age group. Figure 3.12: Posterior distribution of the PSS and JND. The posterior retrodictions for this model are going to be similar to the last iteration. Instead, I want to see how this model performs when it comes to the posterior retrodictions of the visual TOJ data. There is something peculiar about that data that is readily apparent when I try to fit a GLM using classical MLE. vis_mle <- glm(cbind(k, n-k) ~ 0 + sid + sid:soa, data = visual_binomial, family = binomial("logit")) I get a message saying that the fitted probabilities are numerically 0 or 1. What does this mean? First this model estimates a slope and an intercept for each subject individually (no pooling model), so we can look at the estimates for each subject. Table 3.6 shows the top 3 coefficients sorted by largest standard error of the estimate for both slope and intercept. Table 3.6: Coefficients with the largest standard errors. Subject Coefficient Estimate Std. Error z value Pr(>|z|) O-f-MW Intercept -3.6313 1.2170 -2.9837 0.0028 M-f-CC Intercept -2.4925 1.0175 -2.4497 0.0143 M-f-DB Intercept -1.0928 0.6389 -1.7105 0.0872 Y-m-CB Slope 0.6254 12.7380 0.0491 0.9608 M-f-DB Slope 0.1434 0.0442 3.2471 0.0012 M-f-CC Slope 0.1434 0.0442 3.2471 0.0012 The standard error of the slope estimate for subject Y-m-CB is incredibly large in comparison to its own estimate and in comparison to the slope with the next largest standard error. To see what’s going wrong, let’s look at the graph for this subject. Figure 3.13: There is almost complete separation in the data. Figure 3.13 shows that there is almost perfect separation in the data for this subject, and that is giving the MLE algorithm trouble. It also has serious consequences on the estimated JND as the estimated JND for this subject is just 3ms which is suspect. Of course one remedy for this is to pool observations together as I have done for the model in this iteration. The data is pooled together at the age group level and variation in the subjects’ responses removes the separation. This isn’t always ideal, as sometimes we may be interested in studying the individuals within the experiment. If we can’t get accurate inferences about the individual, then the results are not valid. The better solution is to use a hierarchical model! With a hierarchical model, individual estimates are shrunk towards the group mean, and so inferences about individuals may be made along with inferences about the group that contains them. I am interested only in the group level inferences right now, but in chapter 5 I will compare the group level model at the end of this chapter to a model that includes individual estimates. Figure 3.14 shows the posterior distribution of psychometric functions for the visual TOJ data. Notice that there is almost no difference between the pre- and post-adaptation blocks. Figure 3.14: Posterior distribution of psychometric functions for the visual TOJ data. There is almost no visual difference between the pre- and post-adaptation blocks. Furthermore, as shown by the posterior retrodictions (figure 3.15), the model is not fully capturing the variation in the responses near the outer SOA values. I.e. the posterior retrodictions are tight around SOA values near zero. Figure 3.15: Observed visual TOJ data compared to the posterior retrodictions. The retrodictions are not capturing the variation at the outer SOA values. So why is the model having difficulty expressing the data? Well as it turns out, there is one more concept pertaining to psychometric experiments that I have left out until now, and that is a lapse in judgment. Not a lapse in judgment on my part, but the actual act of having a lapse while performing an experiment. So now, dear reader, I hope you have it in you for one last iteration of this model before moving on to read about the long sought after results. 3.4 Iteration 4 (what’s one more) Pre-Model, Pre-Data Conceptual Analysis A lapse in judgment can happen for any reason, and is assumed to be random and independent of other lapses. They can come in the form of the subject accidentally blinking during the presentation of a visual stimulus, or unintentionally pressing the wrong button to respond. Whatever the case is, lapses can have a significant affect on estimating the psychometric function. Post-Model, Pre-Data Develop Model Lapses can be modeled as occurring independently at some fixed rate. Fundamentally this means that the underlying performance function, \\(F\\), is bounded by some lower and upper lapse rate. This manifests as a scaling and translation of \\(F\\). For a given lower and upper lapse rate \\(\\lambda\\) and \\(\\gamma\\), the performance function \\(\\Psi\\) is \\[ \\Psi(x; \\alpha, \\beta, \\lambda, \\gamma) = \\lambda + (1 - \\lambda - \\gamma) F(x; \\alpha, \\beta) \\] Figure 3.16: Psychometric function with lower and upper performance bounds. In certain psychometric experiments, \\(\\lambda\\) is interpreted as the lower performance bound or the guessing rate. For example, in certain 2-alternative forced choice (2-AFC) tasks, subjects are asked to respond which of two masses is heavier, and the correctness of their response is recorded. When the masses are the same, the subject can do no better than random guessing. In this task, the lower performance bound is assumed to be 50% as their guess is split between two choices. As the absolute difference in mass grows, the subject’s correctness rate increases, though lapses can still happen. In this scenario, \\(\\lambda\\) is fixed at \\(0.5\\) and the lapse rate \\(\\gamma\\) is a parameter in the model. The model I am building for this data does not explicitly record correctness, so I do not give \\(\\lambda\\) the interpretation of a guessing rate. Since the data are recorded as proportion of positive responses, I instead treat \\(\\lambda\\) and \\(\\gamma\\) as lapse rates for negative and positive SOAs. But why should the upper and lower lapse rates be treated separately? A lapse in judgment can occur independently of the SOA, so \\(\\lambda\\) and \\(\\gamma\\) should be the same no matter what. With this assumption in mind, I can throw away \\(\\gamma\\) and assume that the lower and upper performance bounds are restricted by the same amount. I.e. \\[\\begin{equation} \\Psi(x; \\alpha, \\beta, \\lambda) = \\lambda + (1 - 2\\lambda) F(x; \\alpha, \\beta) \\tag{3.10} \\end{equation}\\] While I’m throwing in a lapse rate, I’ll also ask the question if different age groups have different lapse rates. To answer this (or rather have the model answer this), I include the new parameter \\(\\lambda_{G[i]}\\) into the model so that the lapse rate is estimated for each age group. It’s okay to assume that lapses in judgment are rare, and it’s also true that the rate (or probability) of a lapse is bounded in the interval \\([0, 1]\\). Because of this, I put a \\(\\mathrm{Beta(4, 96)}\\) prior on \\(\\lambda\\) which a priori puts 99% of the weight below \\(0.1\\) and an expected lapse rate of \\(0.04\\). I could also set up the model so that information about the lapse rate is shared between age groups (i.e. multilevel), but I’ll leave that as an exercise for the reader. Construct Summary Functions Since the fundamental structure of the linear model has changed, it is worth updating the summary function that computes the distribution of SOA values for a given response probability. Given equation (3.10), the summary function \\(Q\\) is \\[ Q(\\pi; \\alpha, \\beta, \\lambda) = F_{\\alpha, \\beta}^{-1}\\left(\\frac{\\pi - \\lambda}{1 - 2\\lambda}\\right) = \\frac{1}{\\exp(\\beta)} \\cdot \\mathrm{logit}\\left(\\frac{\\pi - \\lambda}{1-2\\lambda}\\right) + \\alpha \\] Post-Model, Post-Data Fit Observed Data Because it is the visual data that motivated this iteration, I will finish up using that data to fit the model and perform posterior retrodictive checks. Posterior Retrodictive Checks The plot for the distribution of psychometric functions is repeated one more time below (figure 3.17). There is now visual separation between the pre- and post-adaptation blocks, with the latter exhibiting a higher slope, which in turn implies a reduced just noticeable difference which is consistent with the audiovisual data in the previous model. Figure 3.17: There is now a visual distinction between the two blocks unlike in the model without lapse rate. The lapse rate acts as a balance between steep slopes near the PSS and variation near the outer SOA values. As for the posterior retrodictions, the model is now better capturing the outer SOA variation. This can best be seen in the comparison of the younger adult pre-adaptation block of figure 3.18. Figure 3.18: The lapse rate produces posterior retrodictions that are visually more similar to the observed data than in the previous model, suggesting that the model is now just complex enough to capture the relevant details of the data generating process. 3.5 Celebrate celebrate References "],["model-checking.html", "4 Model Fitting/Checking 4.1 Fitting using HMC 4.2 Prior Predictive Checks", " 4 Model Fitting/Checking Check your model before you wreck your model This chapter serves as the formal home of definitions and explanations of concepts relating to Markov Chain Monte Carlo (MCMC) and other diagnostic tools when working with Bayesian inference models. I touched on the physics of Hamiltonian Monte Carlo (HMC) and the diagnostic tools that come with it in the previous chapter, but now I will go into more detail. 4.1 Fitting using HMC Why do we need a sampler at all? Bayesian statistics and modeling stems from Bayes theorem (Equation (4.1)). The prior \\(P(\\theta)\\) is some distribution over the parameter space and the likelihood \\(P(X | \\theta)\\) is the probability of an outcome in the sample space given a value in the parameter space. To keep things simple, we generally say that the posterior is proportional to the prior times the likelihood. Why proportional? The posterior distribution is a probability distribution, which means that the sum or integral over the parameter space must evaluate to one. Because of this constraint, the denominator in (4.1) acts as a scale factor to ensure that the posterior is valid. \\[\\begin{equation} P(\\theta | X) = \\frac{P(X | \\theta)\\cdot P(\\theta)}{\\sum_i P(X | \\theta_i)} = \\frac{P(X | \\theta)\\cdot P(\\theta)}{\\int_\\Omega P(X | \\theta)d\\theta} \\tag{4.1} \\end{equation}\\] For simple models, the posterior distribution can sometimes be evaluated analytically. An example of this is in conjugate models, where the resulting posterior distribution is of the same type as the prior distribution, and an example of a conjugate model is the Beta distribution for inference about a proportion statistic. This is common in baseball for a player’s batting average. I don’t know a lot about baseball, but I know that hitting a baseball is a little less common than one in three swings, so a priori I believe the probability of hitting a baseball is distributed as \\(\\mathrm{Beta}(2, 5)\\) because the expected value is \\(\\approx 0.29\\) and not a lot of weight is given to any particular value. Throughout a game I follow one player and he hits four balls and misses six - data that can be modeled as a Binomial observation. To figure out the posterior distribution for batting average, I use Bayes’ theorem - posterior is proportional to the prior times the likelihood. \\[\\begin{align*} P(\\pi | y) &\\propto P(y | \\pi) \\cdot P(\\pi) \\\\ &= {10 \\choose 4}\\pi^{4} (1-\\pi)^{6} \\cdot \\frac{\\Gamma(2+5)}{\\Gamma(2)\\Gamma(5)} \\pi^{2-1}(1-\\pi)^{5-1} \\\\ &\\propto \\pi^{4+2-1}(1-\\pi)^{6+5-1} \\\\ &= \\pi^{6-1}(1-\\pi)^{11-1} \\end{align*}\\] The final line is the shape of a Beta distribution with parameters \\(6=2+4\\) and \\(11=5+6\\). The simple update rule is that for a prior \\(\\mathrm{Beta}(a, b)\\) and observed data with \\(y\\) successes in \\(n\\) observations, the posterior distribution is \\(\\mathrm{Beta}(a + y, b + n - y)\\). For the baseball player, the Bayesian estimate of his batting average is \\(6/(6+11) \\approx 0.353\\), but still with a good amount of uncertainty as shown in figure 4.1. Figure 4.1: After observing 4 hits in 10, the Beta(2,5) prior gets updated to become a Beta(6,11) posterior. Conjugate models are great for simple observational data, but often it happens that the posterior distribution cannot be deduced from the model or that the integral in the denominator is complex or of a high dimension. In the former situation, the integral may not be possible to evaluate, and in the latter there may not be enough computational resources in the world to perform a simple grid approximation. The solution is to use Markov Chain Monte Carlo (MCMC). The idea is that we can draw samples from the posterior distribution in a way that samples proportional to the density. This sampling is a form of approximation to the area under the curve (i.e. an approximation to the denominator in (4.1)). Rejection sampling (Gilks and Wild 1992) and slice sampling (Neal 2003) are basic methods for sampling from a target distribution, however they can often be inefficient8. NUTS is a much more complex algorithm that can be compared to a physics simulation. A massless “particle” is flicked in a random direction with some amount of kinetic energy in a probability field, and is stopped randomly. The stopping point is the new proposal sample. The No U-Turn part means that when the algorithm detects that the particle is turning around, it will stop so as not to return to the starting position. This sampling scheme has a much higher rate of accepted samples, and also comes with many built-in diagnostic tools that let us know when the sampler is having trouble efficiently exploring the posterior. I’ll talk more about these diagnostic tools throughout the remaining sections. 4.1.1 Diagnostic Tools 4.1.1.1 Trace Plots Trace plots are the first line of defense against misbehaved samplers. They are visual aids that let the practitioner asses the qualitative health of the chains, looking for properties such as autocorrelation, heteroskedacity, non-stationarity, and convergence. Healthy chains are well-mixing and stationary. It’s often better to run more chains during the model building process so that issues with mixing and convergence can be diagnosed sooner. Even one unhealthy chain can be indicative of a poorly specified model. The addition of more chains also contributes to the estimation of the Split \\(\\hat{R}\\) statistic, which I discuss in 4.1.1.2. Figure 4.2 shows what a set of healthy chains looks like. Figure 4.2: An example of healthy chains. There is a similar diagnostic plot called the rank histogram plot (or trank plot for trace rank plot). Vehtari et al. (2020) details the motivation for trank plots, but in short if the chains are all exploring the posterior efficiently, then the histograms will be similar and uniform. Figure 4.3 is from the same model as above but for the rank histogram. Figure 4.3: A trank plot of healthy chains. As the number of parameters in a model grows, it becomes exceedingly tedious to check the trace and trank plots of all parameters, and so numerical summaries are required to flag potential issues within the model. 4.1.1.2 \\(\\hat{R}\\) and Split \\(\\hat{R}\\) The most common summary statistic for chain health is the potential scale reduction factor (Gelman, Rubin, and others 1992) that measures the ratio of between chain variance and within chain variance. When the two have converged, the ratio is one. I’ve already shared examples of healthy chains which would also have healthy \\(\\hat{R}\\) values, but it’s valuable to also share an example of a bad model. Below is the 8 Schools example (Gelman et al. 2013) which is a classical example for introducing Stan and testing the operating characteristics of a model. schools_dat <- list( J = 8, y = c(28, 8, -3, 7, -1, 1, 18, 12), sigma = c(15, 10, 16, 11, 9, 11, 10, 18) ) The initial starting parameters for this model are intentionally set to vary between \\(-10\\) and \\(10\\) (in contrast to the default range of \\((-2, 2)\\)) and with only a few samples drawn in order to artificially drive up the split \\(\\hat{R}\\) statistic. The model is provided as supplementary code in the appendix. fit_cp <- sampling(schools_mod_cp, data = schools_dat, refresh = 0, iter = 50, init_r = 10, seed = 671254821) Stan instantly warns about many different issues with this model, but the R-hat is the one of interest. The largest is \\(1.68\\) which is incredibly large These chains do not look good at all! Let’s take a look at the \\(\\hat{R}\\) values and see if we can calculate one of the values manually. Table 4.1: Split R-hat values from the 8 Schools example. Parameter Rhat mu 1.234 tau 1.596 To calculate the (non split) \\(\\hat{R}\\), first calculate the between-chain variance, and then the average chain variance. For \\(M\\) independent Markov chains, \\(\\theta_m\\), with \\(N\\) samples each, the between-chain variance is \\[ B = \\frac{N}{M-1}\\sum_{m=1}^{M}\\left(\\bar{\\theta}_m - \\bar{\\theta}\\right)^2 \\] where \\[ \\bar{\\theta}_m = \\frac{1}{N}\\sum_{n=1}^{N}\\theta_{m}^{(n)} \\] and \\[ \\bar{\\theta} = \\frac{1}{M}\\sum_{m=1}^{M}\\bar{\\theta}_m \\] The within-chain variance, \\(W\\), is the variance averaged over all the chains. \\[ W = \\frac{1}{M}\\sum_{m=1}^{M} s_{m}^2 \\] where \\[ s_{m}^2 = \\frac{1}{N-1}\\sum_{n=1}^{N}\\left(\\theta_{m}^{(n)} - \\bar{\\theta}_m\\right)^2 \\] The variance estimator is a weighted mixture of the within-chain and cross-chain variation \\[ \\hat{var} = \\frac{N-1}{N} W + \\frac{1}{N} B \\] and finally \\[ \\hat{R} = \\sqrt{\\frac{\\hat{var}}{W}} \\] Here is the calculation in R param <- "mu" theta <- p_cp[,,param] N <- nrow(theta) M <- ncol(theta) theta_bar_m <- colMeans(theta) theta_bar <- mean(theta_bar_m) B <- N / (M - 1) * sum((theta_bar_m - theta_bar)^2) s_sq_m <- apply(theta, 2, var) W <- mean(s_sq_m) var_hat <- W * (N - 1) / N + B / N (mu_Rhat <- sqrt(var_hat / W)) #> [1] 1.134 The \\(\\hat{R}\\) statistic is smaller than the split \\(\\hat{R}\\) value provided by Stan. This is a consequence of steadily increasing or decreasing chains. The split value does what it sounds like, and splits the chains in half and measures each half separately. In this way, the measure is more robust in detecting unhealthy chains. This also highlights the utility in using both visual and statistical tools to evaluate models. 4.1.1.3 Effective Sample Size Samples from Markov Chains are typically autocorrelated, which can increase uncertainty of posterior estimates. I encountered this issue in the second iteration of the model building process, and the solution I used was to reparameterize the model to avoid steep log-posterior densities - the benefit of reparameterization is conveyed by the ratio of effective sample size to actual sample size in figure 4.4. When the HMC algorithm is exploring difficult geometry, it can get stuck in regions of high densities, which means that there is more correlation between successive samples. Figure 4.4: Ratio of N_eff to actual sample size. Low ratios imply high autocorrelation which can be alleviated by reparameterizing the model or by thinning. As the strength of autocorrelation generally decreases at larger lags, a simple prescription to decrease autocorrelation between samples and increase the effective sample size is to use thinning. Thinning means saving every \\(k^{th}\\) sample and throwing the rest away. If one desired to have 2000 posterior draws, it could be done in two of many possible ways Generate 2000 draws after warmup and save all of them Generate 10,000 draws after warmup and save every \\(5^{th}\\) sample. Both will produce 2000 samples, but the method using thinning will have less autocorrelation and a higher effective number of samples. Though it should be noted that generating 10,000 draws and saving all of them will have a higher number of effective samples than the second method with thinning, so thinning should only be favored to save memory. 4.1.1.4 Divergent Transitions Unlike the previous tools for algorithmic faithfulness which can be used for any MCMC sampler, information about divergent transitions is intrinsic to Hamiltonian Monte Carlo. Recall that the HMC and NUTS algorithm can be imagined as a physics simulation of a particle in a potential energy field, and a random momentum is imparted on the particle. The sum of the potential energy and the kinetic energy of the system is called the Hamiltonian, and is conserved along the trajectory of the particle (Stan Development Team 2020). The path that the particle takes is a discrete approximation to the actual path where the position of the particle is updated in small steps called leapfrog steps (see Leimkuhler and Reich (2004) for a detailed explanation of the leapfrog algorithm). A divergent transition happens when the simulated trajectory is far from the true trajectory as measured by the Hamiltonian. A few divergent transitions is not indicative of a poorly performing model, and often divergent transitions can be reduced by reducing the step size and increasing the adapt delta parameter. On the other hand, a bad model may never be improved just by tweaking some parameters. This is the folk theorem of statistical computing - if there is a problem with the sampling, blame the model, not the algorithm. Divergent transitions are never saved in the posterior samples, but they are saved internally to the Stan fit object and can be compared against good samples. Sometimes this can give insight into which parameters and which regions of the posterior the divergent transitions are coming from. Figure 4.5: Divergent transitions highlighted for some parameters from the second iteration model. Divergent transitions tend to occur when both the hierarchical variance terms are near zero. 4.2 Prior Predictive Checks I used prior predictive checks in the first iteration of the model to establish a few things pertaining to model adequacy and computational faithfulness. The first reason is to ensure that the selected priors do not put too much mass in completely implausible regions (such as really large JND estimates). Data simulated from the priors can also be used to check that the software works. When you have the exact priors that were used to generate the data, the fitting algorithm should be able to accurately recover the priors. transition to posterior predictive checks chapter fig 10 in for posterior predictive Gabry et al. (2019) References "],["predictive-inferences.html", "5 Predictive Inference", " 5 Predictive Inference All models are wrong but some are useful The above quote is from George Box, and it is a popular quote that statisticians like to throw around9. All models are wrong because it is nearly impossible to account for the minutiae of every process that contributes to an observed phenomenon, and often trying to results in poorer performing models. why is predictive performance the right model selection/comparison criteria idea of “geocentric” models: wrong models that still predict well notions overfitting/underfitting: more parameters leads to better in-sample fit a prefect fit to data is always possible but predicts poorly (overfit) underfitting fails to capture the regular features of the data (why regularizing priors are important) I think you covered this already in Ch. 1 and 2 but here is more thoughts: The PI’s predictive philosophy has evolved to prefer this reference model approach. Early on statisticians are usually taught to prefer parsimony or simple models. The idea is that this guards against overfitting and also boosts power to detect statistically significant effects. Also computation limitations made small models preferable. But in modern statistical learning, we tend to include all relevant data with elaborate probabilitistc structures. The idea is to include all the data with the aim of squeezing all predictive ability from the data points. not sure where this goes, but make sure you say that 1 model is not sufficient, we need a collection (or series/sequence) of models. that is why we need to fit models fast in stan/HMC transitional sentence: given that we want to compare models (and possibly select), how to quantifying Quantifying predictive performance log posterior predictive (more below) and information theory (if you want to talk about that at all) cross-validation, loo, WAIC and estimates of loo. loo psis Vehtari, Gelman, and Gabry (2017) some notes from my grant posterior. rewrite this for your glm based model. Given a model \\(M\\) with posterior predictive distribution \\(p( \\tilde{T} | \\tilde{x}, D\\) for a new survival time \\(\\tilde{T}\\) with observed data \\(D\\) with feature vector \\(\\tilde{x}\\). We evaluate predictive performance using the logarithm of the predictive density (LPD) evaluated pointwise at the actual observation \\(( \\tilde{t}, \\tilde{x}, M)\\) (???; ???). LPD is a proper scoring rule and measures both the calibration and sharpness of the predictive distribution (???). With omit technical definitions of these concepts, but loosely calibration means the statistical consistency between the predictive distribution and the observations (errors on the order). Sharpness, on the other hand, refers to how concentrated the predictive posterior (how precisely forecasted). Typically we don’t have the analytic form of the predictive posterior, so instead we use \\(J\\) MCMC draws to approximate the LPD (???): \\[\\begin{equation} LPD(M) \\approx \\frac{1}{J} \\Sigma_{j=1}^{J} log p( \\tilde{t} | \\tilde{x}, D, \\theta^{(j)} ), \\end{equation}\\] where \\(\\theta^{(j)}\\) is the posterior parameter vector from the \\(j\\)th posterior sample. Further we’ll like a metric of general predictive performance and so compute the average over \\(n\\) data points: Further, we’d like to compare the MLPD value of a model \\(M\\) and another model \\(M^*\\) (possibly a reference model or competing model): A negative difference in \\(\\Delta MLPD\\) for Model \\(M\\) compared to a reference Model (\\(M^*\\)) means worse performance for the model while a positive difference indicates better prediction. We assess the uncertainty in the difference using Bayesian bootstrap (???) samples of \\(\\Delta MLPD\\) between model \\(M\\) and \\(M^*\\): References "],["results.html", "6 Psychometric Results 6.1 Affect of Adaptation across Age Groups 6.2 Lapse Rate across Age Groups", " 6 Psychometric Results What was the point of going through all the work of building a model if not to answer the questions that motivated the model in the first place? To reiterate, the questions pertain to how the brain reconciles stimuli originating from different sources, and if biological (age) and contextual (task, temporal recalibration) factors contribute to global percepts. The way through which these questions are answered is through a psychometric experiment and the resulting psychometric function (chapter 2). I’ve divided this chapter into two sections - the affects of temporal recalibration and the consideration of a lapse rate. Temporal recalibration is considered in the context of perceptual synchrony and temporal sensitivity, and the results are broken down by age group. Also recall that there are four separate tasks - audiovisual, visual, duration, and sensorimotor. 6.1 Affect of Adaptation across Age Groups Temporal recalibration consists of presenting a subject with an adapting stimulus throughout a block of a psychometric experiment. Depending on the mechanisms at work, the resulting psychometric function can either be shifted (biased) towards the adapting stimulus (lag adaption) or away (Bayesian adaptation). The theory of integrating sensory signals is beyond my scope, but some papers discussing sensory adaptation in more detail are Miyazaki et al. (2006), Sato and Aihara (2011), and Stocker and Simoncelli (2005). I will be discussing the statistical results without considering the deeper psychological theory. 6.1.1 On Perceptual Synchrony Perceptual synchrony is when the temporal delay between two stimuli is small enough so that the brain integrates the two signals into a global percept - perceived as happening simultaneously. Perceptual synchrony is studied through the point of subjective simultaneity (PSS), and in a simple sense represents the bias towards a given stimulus. Ideally the bias would be zero, but human perception is liable to change due to every day experiences. The pre-adaptation block is a proxy for implicit bias, and the post-adaptation indicates whether lag or Bayesian adaptation is taking place. Some researchers believe that both forms of adaptation are taking place at all times and that the mixture rates are determined by biological and contextual factors. I will try to stay away from making any strong determinations and will only present the results conditional on the model and the data. Audiovisual TOJ Task There are two ways that we can visually draw inferences across the 6 different age-block combinations. The distributions can either be faceted by age group, or they can be faceted by block. There are actually many ways that the data can be presented, but these two methods of juxtaposition help to answer two questions - how does the effect of adaptation vary by age group, and is there a difference in age groups by block? The left hand plot of figure 6.1 answers the former, and the right hand plot answers the latter. Figure 6.1: Posterior distribution of PSS values for the audiovisual task. Across all age groups, temporal recalibration results in a negative shift towards zero in the PSS (as shown by the left hand plot), but there is no significant difference in the PSS between age groups (right hand plot). A very convenient consequence of using MCMC is that the samples from the posterior can be recombined in many ways to describe new phenomena. The PSS values can even be pooled across age groups so that the marginal affect of recalibration may be considered (left hand plot of figure 6.2). Figure 6.2: Posterior distribution of PSS values for the audiovisual task. Left: Marginal over age group. Right: Marginal over block. Now with the marginal of age group, the distribution of differences between pre- and post-adaptation blocks can be calculated. I could report a simple credible interval, but it almost seems disingenuous given that the entire distribution is available. I could report that the \\(90\\%\\) highest posterior density interval (HPDI) of the difference is \\((-0.036, 0.003)\\), but consider the following figure instead. Figure 6.3: Distribution of differences for pre- and post-adaptation PSS values with 90% HPDI. Figure 6.3 shows the distribution of differences with the \\(90\\%\\) HPDI region shaded. From this figure, one might conclude that the effect of recalibration, while small, is still noticeable for the audiovisual task. While this could be done for every task in the rest of this chapter, I do not think it is worth repeating as I am not trying to prove anything about the psychometric experiment itself (that is for a later paper). The point of this demonstration is simply that it can be done (and easily), and how to summarize the data both visually and quantitatively. Visual TOJ Task Figure 6.4: Posterior distribution of PSS values for the visual task. Here there is no clear determination if recalibration has an effect on perceptual synchrony, as it is only the middle age group that shows a shift in bias. Even more, there is a lot of overlap between age group. Looking at the marginal distributions (figure 6.5), there may be a difference between the younger and older age groups, and the middle age and older age groups. Figure 6.5: The difference between the older age group and the two others is noticeable, but not likely significant. These plots are useful for quickly being able to determine if there is a difference in factors. If there is a suspected difference, then the distribution can be calculated from the posterior samples as needed. I suspect that there may be a difference between the older age group and the other two, so I calculated the differences, and summarize them with the histogram in figure 6.6. Figure 6.6: The bulk of the distribution is above zero, but there is still a chance that there is no difference in the distribution of PSS values between the age groups during the visual TOJ experiment. The bulk of the distribution is above zero, but there is still a chance that there is no difference in the distribution of PSS values between the age groups during the visual TOJ experiment. Duration TOJ Task Figure 6.7: Posterior distribution of PSS values for the duration task. The duration TOJ task is very interesting because 1) recalibration had a visually significant effect across all age groups, and 2) there is virtually no difference between the age groups. I could plot the marginal distribution, but it wouldn’t likely give any more insight. What I might ask is what is it about the duration task that lets temporal recalibration have such a significant effect? Is human perception of time duration more malleable than our perception to other sensory signals? Sensorimotor TOJ Task Figure 6.8: Posterior distribution of PSS values for the sensorimotor task. There are no differences between age groups or blocks when it comes to perceptual synchrony in the sensorimotor task. 6.1.2 On Temporal Sensitivity Temporal sensitivity is the ability to successfully integrate signals arising from the same event, or segregate signals from different events. When the stimulus onset asynchrony increases, the ability to bind the signals into a single percept is reduced until they are perceived as distinct events with a temporal order. Those that are more readily able to determine temporal order have a higher temporal sensitivity, and it is measured through the slope of a psychometric function - specifically the quantity known as the just noticeable difference. Audiovisual TOJ Task Figure 6.9: Posterior distribution of JND values for the audiovisual task. All age groups experienced an increase in temporal sensitivity, but the effect is largest in the older age group which also had the largest pre-adaptation JND estimates. There also appears to be some distinction between the older age group and the younger ones in the pre-adaptation block, but recalibration closes the gap. Visual TOJ Task Figure 6.10: Posterior distribution of JND values for the visual task. The story for the visual TOJ task is similar to the audiovisual one - each age group experience heightened temporal sensitivity after recalibration, with the two older age groups receiving more benefit than the younger age group. It’s also worth noting that the younger age groups have higher baseline temporal sensitivity, so there may not be as much room for improvement. Duration TOJ Task Figure 6.11: Posterior distribution of JND values for the duration task. This time the effects of recalibration are not so strong, and just like for the PSS, there is no significant difference between age groups in the duration task. Sensorimotor TOJ Task Figure 6.12: Posterior distribution of JND values for the sensorimotor task. Finally in the sensorimotor task there are mixed results. Temporal recalibration increased the temporal sensitivity in the younger age group, reduced it in the middle age group, and had no effect on the older age group. Clearly the biological factors at play are complex, and the data here is a relatively thin slice of the population. More data and a better calibrated experiment may give better insights into the effects of temporal recalibration. 6.2 Lapse Rate across Age Groups Figure 6.13: Process model of the result of a psychometric experiment with the assumption that lapses occur at random and at a fixed rate, and that the subject guesses randomly in the event of a lapse. In the above figure, the outcome of one experiment can be represented as a directed acyclic graph (DAG) where at the start of the experiment, the subject either experiences a lapse in judgment with probability \\(\\gamma\\) or they do not experience a lapse in judgment. If there is no lapse, then they will give a positive response with probability \\(F(x)\\). If there is a lapse in judgment, then it is assumed that they will respond randomly - e.g. a fifty-fifty chance of a positive response. In this model of an experiment, the probability of a positive response is the sum of the two paths. \\[\\begin{align} \\mathrm{P}(\\textrm{positive}) &= \\mathrm{P}(\\textrm{lapse}) \\cdot \\mathrm{P}(\\textrm{positive} | \\textrm{lapse}) \\\\ &\\quad + \\mathrm{P}(\\textrm{no lapse}) \\cdot \\mathrm{P}(\\textrm{positive} | \\textrm{no lapse}) \\\\ &= \\frac{1}{2} \\gamma + (1 - \\gamma) \\cdot F(x) \\end{align}\\] If we then let \\(\\gamma = 2\\lambda\\) then the probability of a positive response becomes \\[ \\mathrm{P}(\\textrm{positive}) = \\lambda + (1 - 2\\lambda) \\cdot F(x) \\] This is the lapse model described in (3.10)! But now there is a little bit more insight into what the parameter \\(\\lambda\\) is. If \\(\\gamma\\) is the true lapse rate, then \\(\\lambda\\) is half the lapse rate. This may sound strange at first, but remember that equation (3.10) was motivated as a lower and upper bound to the psychometric function, and where the bounds are constrained by the same amount. Here the motivation is from a process model, yet the two lines of reasoning arrive at the same model. Figure 6.14 shows the distribution of lapse rates for each age group across the four separate tasks. There is no visual trend in the ranks of lapse rates, meaning that no single age group definitively experiences a lower lapse rate than the others, though the middle age group comes close to being the winner and the older age group is more likely to be trailing behind. The distribution of lapse rates does reveal something about the tasks themselves. Figure 6.14: Lapse rates for the different age groups across the four separate tasks. Visually there is no clear trend in lapses by age group, but the concentration of the distributions give insight into the perceived difficulty of a task where more diffuse distributions may indiciate more difficult tasks. I used the audiovisual data in the first few iterations of building a model and there were no immediate issues, but when I tested the model on the visual data it had trouble expressing the variability at outer SOA values. I noted that one subject had a near perfect response set, and many others had equally impressive performance. The model without a lapse rate was being torn between a very steep slope near the PSS and random variability near the outer SOAs. The remedy was to include a lapse rate (motivated by domain expertise) which allowed for that one extra degree of freedom necessary to reconcile the opposing forces. Why did the visual data behave this way when the audiovisual data had no issue? That gets deep into the theory of how our brains integrate signals arising from different modalities. Detecting the temporal order of two visual stimuli may be an easier mental task than that of heterogeneous signals. Then consider audiovisual versus duration or sensorimotor. Visual-speech synthesis is a much more common task throughout the day than visual-tactile (sensorimotor), and so perhaps we are better adjusted to such a task as audiovisual. The latent measure of relative performance or task difficulty might be picked up through the lapse rate. To test this idea, the TOJ experiment could be repeated, and then ask the subject afterwards how they would rate the difficulty of each task. For now, a post-hoc test can be done by comparing the mean and spread of the lapse rates to a pseuedo difficulty measure as defined by the mean of the incorrect responses. A response is correct when the sign of the SOA value is concordant with the response, e.g. a positive SOA and the subject gives the “positive” response or a negative SOA and the subject gives the “negative” response. Looking at figure 6.14, I would subjectively rate the tasks from easiest to hardest based on ocular analysis as Visual Audiovisual Duration Sensorimotor Again, this ranking is based on the mean (lower intrinsically meaning easier) and the spread (less diffuse implying more agreement of difficulty between age groups). The visual task has the tightest distribution of lapse rates, and the sensorimotor has the widest spread, so I can rank those first and last respectively. Audiovisual and duration are very similar in mean and spread, but the audiovisual has a bit more agreement between the young and middle age groups, so second and third go to audiovisual and duration. Table 6.1 shows the results arranged by increasing pseudo difficulty. As predicted, the visual task is squarely at the top and the sensorimotor is fully at the bottom. The only out of place group is the audiovisual task for the older age group, which is about equal to the older age group during the duration task. In fact, within tasks, the older age group always comes in last in terms of proportion of correct responses, while the young and middle age groups trade back and forth. Table 6.1: Relative difficulty of the different tasks by age group. The difficulty is measured by the proportion of incorrect responses. Task Age Group Pseudo Difficulty visual Middle Age 0.03 visual Young Adult 0.03 visual Older Adult 0.06 audiovisual Young Adult 0.12 audiovisual Middle Age 0.12 duration Middle Age 0.14 duration Young Adult 0.16 duration Older Adult 0.17 audiovisual Older Adult 0.17 sensorimotor Young Adult 0.22 sensorimotor Middle Age 0.24 sensorimotor Older Adult 0.29 One way to remove the uncertainty of the lapse rate could be to have some trials with very large SOA values. The reasoning is that if the difficulty of a task (given an SOA value) is lowered, than an incorrect response is more likely to be due to a true lapse in judgment as opposed to a genuinely incorrect response. Wichmann and Hill (2001b) recommends at least one sample at \\(\\pi \\ge 0.95\\) is necessary for reliable bootstrap confidence intervals, so the same reasoning can be applied when using Bayesian credible intervals. For a task such as visual TOJ, the \\(90\\%\\) level may occur at an SOA of \\(\\approx 40\\)ms while for the audiovisual TOJ it may be \\(\\approx 220\\)ms, so the sampling scheme for psychometric experiments must be tuned to the task. Wichmann and Hill (2001a) experimentally determined that the lapse rate for trained observers is between \\(0\\%\\) and \\(5\\%\\), and the data in this paper loosely agree with that conclusion. Any excess in lapse rate may be attributed to the perceived task difficulty and a sub-optimal sampling scheme. Since the visual TOJ task is relatively the easiest, the estimated lapse rates are more believable as true lapse rates, and fall closely within the \\((0, 0.05)\\) range. References "],["discussion.html", "7 Discussion 7.1 Model selection is not always the goal 7.2 Data Cleaning and Reproducibility 7.3 Developing a model", " 7 Discussion 7.1 Model selection is not always the goal Building a model motivated by a set of principles and domain expertise should be the preferred way of performing an analysis. The next important principle is model comparison, especially in terms of predictive inference. One model also doesn’t always work for everything. In the course of building a model that is just complex enough to answer questions about age and temporal recalibration, I mentioned that intermediate models could be used to answer questions about average effects at different levels. For purely predictive inference, there is also the possibility of Bayesian model averaging (BMA) and other ensemble methods. 7.2 Data Cleaning and Reproducibility Data doesn’t always come in a nice tidy10 format, and I had the pleasure of turning the raw experimental data into a clean data set that is ready for modeling. Sometimes the process is quick and straight forward, but other times, like with this psychometric data, it takes more effort and clever techniques. There is academic value in describing the steps I took up front to reduce the headache later. To begin, there is a strong push in recent years for reproducible data science. Scientific methods and results should be able to be replicated by other researchers, and part of that includes being able to replicate the process that takes the raw data and produces the tidy data that is ready for analysis. Tidy data is described by Wickham and others (2014) and can be summed up by three principles Each variable forms a column Each observation forms a row Each type of observational unit forms a table One problem I have come across and have been guilty of in the past is having data in a spread sheet, modifying it, and then having no way of recovering the original data. Spread sheets are a convenient way to organize, transform, and lightly analyze date, but problems can quickly arise unless there is a good backup/snapshot system in place. Data is immutable11, or at least that is the mindset that researchers must adopt in order to have truly reproducible workflows. The raw data that is collected or produced by a measurement device should never be modified without first being copied, even if for trivial reasons such as correcting a spelling mistake12. To begin the data cleaning journey, I’ll introduce the directory system that I had been given to work with. Each task is separated into its own folder, and within each folder is a subdirectory of age groups. RecalibrationData ├── ParticipantAgeSex.xlsx ├── Audiovisual │ ├── MiddleAge │ ├── Older │ └── Young ├── Duration │ ├── MiddleAge │ ├── Older │ └── Young ├── Sensorimotor │ ├── MiddleAge │ ├── Older │ └── Young └── Visual ├── MiddleAge ├── Older └── Young Within each age group subdirectory are the subdirectories for each subject named by their initials which then contain the experimental data in Matlab files. ├── MiddleAge │ ├── BT │ │ ├── BTadapt1__MAT.mat │ │ ├── ... │ ├── ... ├── Older │ ├── BB │ │ ├── BBadapt1__MAT.mat │ │ ├── ... │ ├── ... └── Young ├── AC │ ├── ACadapt1__MAT.mat │ ├── ... ├── ... At this point, the data appears manageable, there is information contained in the directory structure such as task, age group, and initials, and file name contains information about the experimental block. There is also an excel file that I was later given that contains more subject information like age and sex, though that information is not used in the model. The columns of the Matlab file depends on the task, but generally contains an SOA value and a response, but no column or row name information - that was provided by the researcher who collected the data. The next thing I did was to create a table of metadata - information extracted from the directory structure and file names combined with the the subject data and the file path. Regular expressions can be used to extract patterns from a string. With a list of all Matlab files within the RecalibrationData folder, I tried to extract the task, age group, initials, and block using the expression "^(\\\\w+)/(\\\\w+)/(\\\\w+)/[A-Z]{2,3}_*[A-Z]*(adapt[0-9]|baseline[0-9]*).*" Breaking it apart, the ^(\\\\w+)/ matches any word characters at the start and before the next slash. Since the directory structure is Task/AgeGroup/Subject/file.mat, the regular expression should match three words between slashes. The file name generally follows the pattern of Initials__block#__MAT.mat, so [A-Z]{2,3}_*[A-Z]* should match the initials, and (adapt[0-9]|baseline[0-9]*) should match the block (baseline or adapt). This method works for \\(536\\) of the \\(580\\) individual records. For the ones it failed, it was generally do to misspellings or irregular capitalizing of “baseline” and “adapt”. table(feat_typ[,4]) #> #> AC AG BB BC BT CB CC CE CJ CM DB DC DD DE DTF DW #> 13 12 13 13 13 13 10 12 13 4 13 13 7 12 12 13 #> EM ET GB GT HG IV JM JM_F JS JW KC KK LP MC MS MW #> 13 13 13 13 13 4 12 13 13 13 13 11 7 13 13 26 #> NP PB SB SJ SJF TS TW VM WL WW YG #> 12 13 12 26 13 13 13 13 13 12 7 table(feat_atyp[,4]) #> #> AG CC CE CM DD DTF IV JM JS KK NP SB WW YG #> 1 3 1 9 6 1 9 1 2 2 1 1 1 6 Since there is only a handful of irregular block names, they can be dealt with a separate regular expression that properly extracts the block information. Other challenges in cleaning the data include the handling of subjects with the same initials. This becomes a problem because filtering by a subject’s initials is not guaranteed to return a unique subject. Furthermore there are two middle age subjects with the same initials of “JM”, so one was also identified with their sex “JM_F”. The solution is to create a unique identifier (labeled as SID) that is a combination of age group, sex, and initials. For an experiment identifier (labeled as RID), the task and block were prepended to the SID. Each of these IDs uniquely identify the subjects and their experimental records making it easier to filter and search. glimpse(features) #> Rows: 580 #> Columns: 8 #> $ rid <fct> av-post1-M-f-CC, av-post1-M-f-DB, av-post1-M-f-HG, av-post1… #> $ sid <fct> M-f-CC, M-f-DB, M-f-HG, M-f-JM, M-f-MS, M-f-SJF, M-f-TS, M-… #> $ path <chr> "Audiovisual/MiddleAge/CC/CCadapt1__MAT.mat", "Audiovisual/… #> $ task <chr> "audiovisual", "audiovisual", "audiovisual", "audiovisual",… #> $ trial <fct> post1, post1, post1, post1, post1, post1, post1, post1, pos… #> $ age_group <fct> middle_age, middle_age, middle_age, middle_age, middle_age,… #> $ age <dbl> 39, 44, 41, 48, 49, 43, 47, 49, 49, 44, 43, 44, 48, 48, 50,… #> $ sex <fct> F, F, F, F, F, F, F, F, F, M, M, M, M, M, M, F, F, F, F, F,… Then with the table of clean metadata, the task is simply to loop through each row, read the Matlab file given by path, add the unique ID as a column, and then join the experimental data with the metadata to create a data set that is ready for model fitting and data exploration. The full code used to generate the clean data is not yet available online, but can be shared with the committee. The benefit of writing a script to generate the data is that others can look over my code and verify that it is doing what I intended for it to do, and I can go back to any step within the process to make changes if the need comes up. Another tool that contributed to the reproducibility is the version control management software, Git. With Git I can take a snapshot of the changes I make, and revert if necessary. This thesis is also hosted on Github, and the entire history of development can be viewed there. 7.3 Developing a model Chapter 3 details the deeper considerations that went into building a model, but doesn’t tell the full story of struggles and setbacks I faced. I find that I learn more from others when they share what didn’t work along with the final path that did work. There is knowledge to be gained in failed experiments, because then there is one more way to not do something, just like a failing outcome reduces the variance of the Beta distribution. I knew that I wanted to apply Bayesian modeling techniques to the data, because it was something knew that I was learning. I tried using a classical GLM to first get a baseline understanding of the data, but the fact that some estimates for certain subjects failed due to complete separation reinforced my enthusiasm to employ non-classical techniques. My first Bayesian model was derived from Lee and Wagenmakers (2014) which used nested loops to iterate over subjects and SOA values. I felt that the data was stored in a complicated way and made it difficult to comprehend and extend. Next I moved on to using arm::bayesglm to remove convergence issues, but was met with other limitations such as linear parameterization and lack of hierarchical modeling. The book Statistical Rethinking (McElreath 2020) was my first introduction to Bayesian multilevel modeling. His rethinking package accompanies the book, and offers a compact yet expressive syntax for models that get translated into a Stan model. A model with age group and block can be written using rethinking::ulam as rethinking::ulam(alist( k ~ binomial_logit(n, p), p = exp(b + bG[G] + bT[trt]) * (x - (a + aG[G] + aT[trt])), a ~ normal(0, 0.06), aG[G] ~ normal(0, sd_aG), aT[trt] ~ normal(0, sd_aT), b ~ normal(3, 1), bG[G] ~ normal(0, sd_bG), bT[trt] ~ normal(0, sd_bT), c(sd_aG, sd_aT, sd_bG, sd_bT) ~ half_cauchy(0, 5) ), data = df, chains = 4, cores = 4, log_lik = TRUE) During my time learning about multilevel models, I tried writing my own package that generates a Stan program based on R formula syntax. At the time I didn’t fully understand the concepts of no-pooling, complete pooling, and partial pooling, and my package was plagued by the same lack of flexibility that rstanarm and brms have. In fact I learned that brms and rstanarm already did what I was trying to do after I had already started making my library, but it was a fun learning and programming experience. The fossilized remains of my attempt can be viewed on github. I also tried using lme4, rstanarm, and brms, and learned more about the concepts of fixed and random effects. It was around this time that I noticed that parameterization can have a significant affect on the efficiency of a model and the inferential power of the estimated parameters. When fitting a classical model, there is little difference in estimating a + bx vs. d(x - c) since the latter can just be expanded as -cd + dx which is essentially the same as the first parameterization, but there is a practical difference in the interpretation of the parameters. The second parameterization implies that there is a dependence among the parameters that can be factored out. In the context of psychometric functions, there is a stronger connection between PSS and c and the JND and d. This parameterization made it easier to specify priors and also increased the model efficiency. Since only rethinking and Stan allow for arbitrary parameterization, I left the others behind. I finally arrived at a model that worked well, but learned that using a binary indicator variable for the treatment comes with the assumption of higher uncertainty for one of the conditions. The linear model that I arrived at is displayed in equation (7.1). \\[\\begin{equation} \\theta = \\exp(\\beta + \\beta_G +(\\beta_T + \\beta_{TG})\\times trt) \\left[x - (\\alpha + \\alpha_G + (\\alpha_T + \\alpha_{TG})\\times trt)\\right] \\tag{7.1} \\end{equation}\\] Using an indicator variable in this fashion also introduced an interaction effect into the model that I almost did not account for after I switched to using a factor variable. Interaction effects between factors is handled by creating a new factor that is essentially the cross-product of other factor variables. E.g. for factor variables \\(x\\) and \\(y\\) \\[ x = \\begin{bmatrix} a \\\\ b \\\\ c \\end{bmatrix}, y = \\begin{bmatrix} i \\\\ j \\end{bmatrix}\\Longrightarrow x\\times y = \\begin{bmatrix} ai & aj \\\\ bi & bj \\\\ ci & cj \\end{bmatrix} \\] The final round of reparameterization came in the form of adopting non-centered parameterization for more efficient models. To us, \\(Z \\sim N(0, 1^2);\\quad X = 3 + 2Z\\) is the same as \\(X \\sim N(3, 2^2)\\), but to a computer the process of sampling from \\(X\\) can be more difficult than sampling from \\(Z\\) (discussed in chapter 4). References "],["conclusion.html", "8 Conclusion", " 8 Conclusion The real story of developing this psychometric model (the one here and not in the workflow section) is more revealing of the real struggles of performing data analysis than the principled workflow would let on. Often I found myself putting in vast amounts of unnecessary work - but necessary for me to do in order to realize that it is unnecessary - just because I hadn’t yet learned what the likely paths to take were. On a more personal level, my struggles were not ever really in developing a model or coding it up - in fact I could tinker with a program for hours, improving it in this way or that. For me, programming was a puzzle and an art, and I internalized the idea that I could figure out any numerical task given enough time and focus. To the contrary, I believed that writing was a chore, that writing this thesis would be like trying to dam up all the deltas in an attempt to keep the main river on course. I think of learning mathematics as learning a puzzle game. The first few puzzles are easy, but then get progressively harder. But even as the puzzles get harder, your intuitive understanding of the game improves, and you can throw away the obviously poor moves from consideration, and try paths that are more likely to move you in the direction of the correct solution. I started my journey towards obtaining a Master of Science in Statistics precisely because I knew that I could obtain a more intuitive understanding of the quantitative world that I live in, and pick up some new tools along the way to create and solve more impressive puzzles. "],["supplementary-code.html", "A Supplementary Code", " A Supplementary Code Eight Schools Model data { int<lower=0> J; vector[J] y; vector<lower=0>[J] sigma; } parameters { real mu; real<lower=0> tau; vector[J] theta; } model { mu ~ normal(0, 10); tau ~ cauchy(0, 10); theta ~ normal(mu, tau); y ~ normal(theta, sigma); } "],["references.html", "References", " References "]] +[["index.html", "Application of a Principaled Bayesian Workflow to Multilevel Modeling 1 Introduction 1.1 Everything can be Blamed on Fisher 1.2 Proposal of New Methods 1.3 Organization", " Application of a Principaled Bayesian Workflow to Multilevel Modeling Alexander D. Knudson December, 2020 1 Introduction With the advances in computational power and high-level programming languages like Python, R, and Julia, statistical methods have evolved to be more flexible and expressive. No longer must we be subjugated by p-values and step-wise regression techniques. Gone are the days of using clever modeling techniques to tame misbehaved data. Now is the time for principled and informed decisions to create bespoke models and domain-motivated analyses. We have the shoulders of giants to stand upon and look out at the vast sea of data science. I want to talk about how the advances in computational power have lead to a sort of mini revolution - resurrection - in statistics where Bayesian modeling has gained an incredible following thanks to projects like Stan. The steady adoption of computer aided statistical workflows also brings the need for multidisciplinary techniques from numerical analysis, probability theory, statistics, computer science, visualizations, and more. And with the age of computers, there is a strong push towards reproducibility. Concepts of modular design, workflows, project history and versioning, virtual environments, and human readable code all contribute to reproducible analyses. And somehow I also want to tie in how data is immutable - raw data should (must) be treated as a constant and unchangeable entity, and merely touching it will cause data mitosis. I will now segue into introducing the intent of this paper. I believe that utilizing the computational ability of modern computers helps strengthen the validity of an analysis. This is achieved by using powerful but expressive tools like Stan to write models that visually match written mathematical models. Classical statistical tools, while fast, require clever mathematics to perform certain routines such as fitting mixed effects models or the interpretation of cryptic p-values to determine if a model is “good”. Instead I believe we should be moving towards probabilistic programming languages like Stan to carry out Statistical analyses. This paper is motivated by an experiment in psychometrics (chapter 2), and by highlighting a principled workflow I seek to convince the reader that Bayesian multilevel modeling should be the default tool for modeling psychometric experiments. In the next section of this introduction, I will list classical tools for statistical modeling [of psychometric experiments] and touch on the limitations of such tools. Following that section, I will introduce the methods I use for building a model that deviate from classical methods. 1.1 Everything can be Blamed on Fisher … or Pearson, or Gauss, or … When I hear the term “regression”, I instantly think about maximum likelihood estimation (MLE) of parameters. And why not? There is an endless wealth of literature on the subject of linear regression and MLE (Johnson, Wichern, and others 2002; Larsen and Marx 2005; Sheather 2009; Navidi 2015). Most introductory courses on statistics and regression center around classical techniques such as MLE, hypothesis testing, and residual analysis. For the common student, learning statistical modeling in classical way can feel sterilized and mechanic. Check that the data are normal. Check that the coefficients are significantly different from zero. Check that the residuals are normal. Etc. I’m not trying to say that these methods are not important or that they are deeply flawed - it would be bad for modern society if we were just now finding out that the models are wrong. Instead, I am arguing that because they are so common and easy to apply that they are used without much extra thought. Take variable selection as an example. In a data set where there are a dozen predictors, how does one go about selecting which parameters produce the best model? Without thought, one may reach for a step-wise selection algorithm, and confidently conclude that variables \\(x\\), \\(y\\), and \\(z\\) are significant because the p-values say so. This method does fall apart quickly because as the number of parameters grow, so too does the number of steps needed to find the best subset of variables1, and there is no guarantee that the algorithm actually selects the best2 subset. But even if the best subset of variables is found, one still needs to consider if the variables have a practical effect or if the model omitted an important variable of interest. Sure, the type of analysis is important to the techniques used. Variable selection through step-wise algorithms or penalized maximum likelihood estimation (Hoerl and Kennard 1970; Tibshirani 1996) may be appropriate in an exploratory data analysis, but improper for causal inference and other scientifically motivated experiments. Which brings me to talk next about p-values, confidence intervals, and hypothesis testing. The concept of basing scientific results on the falsifiability (Popper 1959) or refutability of a claim is a strong foundation for the scientific method, and is arguably much better than the previous grounds of verifiability – just because something has been true for a very long time, doesn’t mean it will always be true in the future. But hypothesis testing comes with its own set of problems. Null hypothesis testing for point estimates usually depends on calculating a confidence interval and seeing if the interval contains the point of interest. This can be misleading, as there is more than one confidence interval that can be calculated. For Gaussian distributions, the mean equals the median equals the mode, so a 95% confidence interval is evenly distributed around the central measures. Some distributions are skewed, so an equal tail area confidence interval might not necessarily include the most likely value. Take for example the exponential distribution \\[ X \\sim \\mathrm{exponential} (\\lambda) \\] An equal tail area 95% confidence interval would be \\(\\left(-\\ln(0.975)/\\lambda, -\\ln(0.025)/\\lambda\\right)\\) which would not even contain the most likely value of zero. Should the highest density interval be used? Should skewness be reported with p-values and confidence intervals? Furthermore, confidence intervals are conditional on the model chosen, and that introduces other problems. McElreath (2020) discusses a well-known issue in population biology about comparing a neutral model of the distribution of allele frequencies to a selective model. In short, the two differing hypotheses may suggest different process models which in turn lead to statistical models - some of which are shared by both hypotheses. Rejecting the statistical model doesn’t rule out either of the hypotheses. Should we scrap these principles and tools all together? Absolutely not. Most of these wrinkled problems (and others) have been talked about and ironed out through careful discussion and clever techniques, but the damage is done, and hypothesis testing and p-values are widely misunderstood and misused. The problem is that these techniques rest on having a strong foundation of statistical knowledge, both to produce and to properly understand. This requirement is stifling. Communicating statistical results is just as important as producing them, and with modern tools and a vast selection of expressive languages we can analyze data in a more intuitive and natural framework. 1.2 Proposal of New Methods In my biased opinion, the Bayesian framework for modeling is a much more natural way to conduct scientific research where some kind of data analysis is involved. Now of course, I can’t claim as such without some compelling argument or examples. I have already targeted some weak points of classical statistics, and throughout Chapter 3 I will highlight specific examples of where classical techniques are typically applied, and how they may fall short compared to my proposal methods. What I am proposing is a fully Bayesian workflow to build and analyze a statistical model. In this Bayesian workflow (which shall hence be referred to simply as “workflow”) I will highlight a set of principles that utilize domain expertise, and focus around building a multilevel model. My goal is to show that the combination of these two concepts yields better prediction results and greater inferential power. And in lieu of p-values and hypothesis testing, I let predictive inference narrate the statistical results and strength of association within the model. 1.3 Organization I have organized this thesis as follows. In Chapter 2 I introduce the data set that drives the narrative and that motivates the adoption of Bayesian multilevel modeling. In Chapter 3 I describe and work through a principled Bayesian workflow for multilevel modeling. Chapter 4 goes into more depth on checking the model goodness of fit and model diagnostics in a Bayesian setting. In Chapter 5 I demonstrate how to use the Bayesian model from the principled workflow for predictive inference, and use posterior predictive distributions to plot and compare models. Chapters 5 and 6 go over the quantitative results and discuss the qualitative choices in the workflow. Then I conclude this paper in Chapter 7. References "],["motivating-data.html", "2 What is a Model without Data 2.1 Psychometric Experiments 2.2 Temporal Order Judgment Data 2.3 Data Visualizations and Quirks", " 2 What is a Model without Data What is data without a model It was Charles Darwin who in his book On the Origin of Species developed the idea that living organisms adapt in order to better survive in their environment. Sir Francis Galton, inspired by Darwin’s ideas, became interested in the differences in human beings and in how to measure those differences. Though the dark side of statistics and hubris lead Galton to become a pioneer of eugenics, his works on studying and measuring human differences lead to the creation of psychometrics – the science of measuring mental faculties. Around the same time that he was developing his theories, Johann Friedrich Herbart was also interested in studying consciousness through the scientific method, and is responsible for creating mathematical models of the mind. E.H. Weber built upon Herbart’s work, and sought out to prove the idea of a psychological threshold. A psychological threshold is a minimum stimulus intensity necessary to activate a sensory system – a liminal stimulus. He paved the way for experimental psychology and is the namesake of Weber’s Law – the change in a stimulus that will be just noticeable is a constant ratio of the original stimulus (Britannica 2014). \\[ \\frac{\\Delta I}{I} = k \\] To put this law into practice, consider holding a 1 kg weight (\\(I = 1\\)), and further suppose that we can just detect the difference between a 1 kg weight and a 1.2 kg weight (\\(\\Delta I = 0.2\\)). Then the constant just noticeable ratio is \\[ k = \\frac{0.2}{1} = 0.2 \\] So now if we pick up a 10 kg weight, we should be able to determine how much more mass is required to just detect a difference: \\[ \\frac{\\Delta I}{10} = 0.2 \\Rightarrow \\Delta I = 2 \\] The difference between a 10 kg and a 12 kg weight should be just barely perceptible. Notice that the difference in the first set of weights is 0.2 and in the second set it is 2. Our perception of the difference in stimulus intensities is not absolute, but relative. G.T. Fechner devised the law (Weber-Fechner Law) that the strength of a sensation grows as the logarithm of the stimulus intensity. \\[S = K \\ln I\\] An example to this law is to consider two light sources, one that is 100 lumens (\\(S_1 = K \\ln 100\\)) and another that is 200 lumens (\\(S_2 = K \\ln 200\\)). The intensity of the second light is not perceived as twice as bright, but only about 1.15 times as bright according to the Weber-Fechner law. \\[\\theta = S_2 / S_1 \\approx 1.15\\] Notice that the value \\(K\\) cancels out when calculating the relative intensity, but knowing \\(K\\) can lead to important psychological insights; insights about differences between persons or groups of people! What biological and contextual factors affect how people perceive different stimuli? How do we measure their perception in a meaningful way? As one might expect, we can collect data from psychometric experiments, fit a model to the data from a family of functions called psychometric functions, and inspect key operating characteristics of those functions. 2.1 Psychometric Experiments Psychometric experiments are devised in a way to examine psychophysical processes, or the response between the world around us and our inward perceptions. A psychometric function relates an observer’s performance to an independent variable, usually some physical quantity of a stimulus in a psychophysical task (Wichmann and Hill 2001a). Psychometric functions were studied as early as the late 1800’s, and Edwin Boring published a chart of the psychometric function in The American Journal of Psychology in 1917 (Boring 1917). Figure 2.1: A chart of the psychometric function. The experiment in this paper places two points on a subject’s skin separated by some distance, and has them answer their impression of whether there is one point or two, recorded as either ‘two points’ or ‘not two points’. As the separation of aesthesiometer points increases, so too does the subject’s confidence in their perception of ‘two-ness’. So at what separation is the impression of two points liminal? Figure 2.1 displays the key aspects of the psychometric function. The most crucial part is the sigmoid function, the S-like non-decreasing curve which in this case is represented by the Normal CDF, \\(\\Phi(\\gamma)\\). The horizontal axis represents the stimulus stimulus intensity, the separation of two points in centimeters. The vertical axis represents the probability that a subject has the impression of two points. With only experimental data, the response proportion becomes an approximation for the probability. This leads me to talk about the type of psychometric experiment that this paper deals with called a temporal order judgment (TOJ) experiment. The concept is that if there are two distinct stimuli occurring nearly simultaneously then our brains will bind them into a single percept (perceive them as happening simultaneously). Compensation for small temporal differences is beneficial for coherent multisensory experiences, particularly in visual-speech synthesis as it is necessary to maintain an accurate representation of the sources of multisensory events. The temporal asynchrony between stimuli is called the stimulus onset asynchrony (SOA), and the range of SOAs for which sensory signals are integrated into a global percept is called the temporal binding window. When the SOA grows too large then the brain segregates the two signals and the temporal order can be determined. Our experiences in life as we age shape the mechanisms of processing multisensory signals, and some multisensory signals are integrated much more readily than others. Perceptual synchrony has been previously studied through the point of subjective simultaneity (PSS) – the temporal delay between two signals at which an observer is unsure about their temporal order (Stone et al. 2001). The temporal binding window is the time span over which sensory signals arising from different modalities appear integrated into a global percept. A deficit in temporal sensitivity may lead to a widening of the temporal binding window and reduce the ability to segregate unrelated sensory signals. In temporal order judgment tasks, the ability to discriminate the timing of multiple sensory signals is referred to as temporal sensitivity, and is studied through the measurement of the just noticeable difference (JND) – the smallest lapse in time so that a temporal order can just be determined. Figure 2.2 highlights the features through which we study psychometric functions. The PSS is defined as the point where an observer can do no better at determining temporal order than random guessing (i.e. the response probability is 50%). The JND is defined as the extra temporal delay between stimuli so that the temporal order is just able to be determined. Historically this has been defined as the difference between the 84% level3 and the PSS, though the upper level often depends on domain expertise. Figure 2.2: The PSS is defined as the point where an observer can do no better at determining temporal order than random guessing. The just noticeable difference is defined as the extra temporal delay between stimuli so that the temporal order is just able to be determined. Historically this has been defined as the difference between the 0.84 level and the PSS, though the upper level depends on domain expertise. Perceptual synchrony and temporal sensitivity can be modified through a baseline understanding. In order to perceive physical events as simultaneous, our brains must adjust for differences in temporal delays of transmission of both psychical signals and sensory processing (Fujisaki et al. 2004). In some cases such as with audiovisual stimuli, the perception of simultaneity can be modified by repeatedly presenting the audiovisual stimuli at fixed time separations (called an adapter stimulus) to an observer (Vroomen et al. 2004). This repetition of presenting the adapter stimulus is called temporal recalibration. The data set that I introduce in the next section concerns temporal order judgment across various sensory modalities with a temporal recalibration component. 2.2 Temporal Order Judgment Data Which came first, the chicken or the experimentally controlled stimulus The data set that I am using in this paper comes from experiments done by A.N. Scurry and Dr. F. Jiang in the Department of Psychology at the University of Nevada. Reduced temporal sensitivity in the aging population manifests in an impaired ability to perceive synchronous events as simultaneous, and similarly more difficulty in segregating asynchronous sensory signals that belong to different sources. The consequences of a widening of the temporal binding window is considered in Scurry et al. (2019), as well as a complete detailing of the experimental setup and recording process. A shortened summary of the methods is provided below. There are four different tasks in the experiment: audio-visual, visual-visual, visual-motor, and duration, and I will refer to each task respectively as audiovisual, visual, sensorimotor, and duration. The participants consist of 15 young adults (age 20-27), 15 middle age adults (age 39-50), and 15 older adults (age 65-75), all recruited from the University of Nevada, Reno. Additionally all subjects are right handed and were reported to have normal or corrected to normal hearing and vision. Table 2.1: Sample of motivating data. soa response sid task trial age_group age sex -350 0 O-m-BC audiovisual pre older_adult 70 M -200 0 M-m-SJ duration post1 middle_age 48 M 28 1 O-f-KK sensorimotor pre older_adult 66 F 275 1 O-f-MW visual post1 older_adult 69 F In the audiovisual TOJ task, participants were asked to determine the temporal order between an auditory and visual stimulus. Stimulus onset asynchrony values were selected uniformly between -500 to +500 ms with 50 ms steps, where negative SOAs indicated that the visual stimulus was leading, and positive values indicated that the auditory stimulus was leading. Each SOA value was presented 5 times in random order in the initial block. At the end of each trial the subject was asked to report if the auditory stimulus came before the visual, where a \\(1\\) indicates that they perceived the sound first, and a \\(0\\) indicates that they perceived the visual stimulus first. A similar setup is repeated for the visual, sensorimotor, and duration tasks. The visual task presented two visual stimuli on the left and right side of a display with temporal asynchronies that varied between -300 ms to +300 ms with 25 ms steps. Negative SOAs indicated that the left stimulus was first, and positive that the right came first. A positive response indicates that the subject perceived the right stimulus first. The sensorimotor task has subjects focus on a black cross on a screen. When it disappears, they respond by pressing a button. Additionally, when the cross disappears, a visual stimulus was flashed on the screen, and subjects were asked if they perceived the visual stimulus before or after their button press. The latency of the visual stimulus was partially determined by individual subject’s average response time, so SOA values are not fixed between subjects and trials. A positive response indicates that the visual stimulus was perceived after the button press. The duration task presents two vertically stacked circles on a screen with one appearing right after the other. The top stimulus appeared for a fixed amount of time of 300 ms, and the bottom was displayed for anywhere between +100 ms to +500 ms in 50 ms steps corresponding to SOA values between -200 ms to +200 ms. The subject then responds to if they perceived the bottom circle as appearing longer than the top circle. Table 2.2: Summary of TOJ Tasks Task Positive Response Positive SOA Truth Audiovisual Perceived audio first Audio came before visual Visual Perceived right first Right came before left Sensorimotor Perceived visual first Visual came before tactile Duration Perceived bottom as longer Bottom lasted longer than top Finally, after the first block of each task was completed, the participants went through an adaptation period where they were presented with the respective stimuli from each task repeatedly at fixed temporal delays, then they repeated the task. To ensure that the adaptation affect persisted, the subject were presented with the adapter stimulus at regular intervals throughout the second block. The blocks are designated as pre and post1, post2, etc. in the data set. In this paper I will only be focusing on the pre and post1 blocks. 2.3 Data Visualizations and Quirks The dependent variable in these experiments is the perceived response which is encoded as a 0 or a 1, and the independent variable is the SOA value. If the response is plotted against the SOA values, then it is difficult to determine any relationship (see figure 2.3). Transparency can be used to better visualize the relationships between SOA value and responses. The center plot in figure 2.3 uses the same data as the left plot, except that the transparency is set to 0.05. As a result, one can see that there is a higher density of “0” responses towards more negative SOAs, and a higher density of “1” responses for more positive SOAs. Taking it a step further, I can compute and plot the proportion of responses for a given SOA. This is displayed in the right panel. Now the relationship between SOA values and responses is clear – as the SOA value goes from more negative to more positive, the proportion of positive responses increases from near 0 to near 1. Figure 2.3: Left: Simple plot of response vs. soa value. Center: A plot of response vs. soa with transparency. Right: A plot of proportions vs. soa with transparency. Subjectively the right plot in figure 2.3 is the easiest to interpret. Because of this, I will often present the observed and predicted data using the proportion of responses rather than the actual response. Proportional data also has the advantage of being bounded on the same interval as the response. For the audiovisual task, the responses can be aggregated into binomial data – the number of positive responses for given SOA value – which is sometimes more efficient to work with than the Bernoulli data (see table 2.3). However the number of times an SOA is presented varies between the pre-adaptation and post-adaptation blocks; 5 and 3 times per SOA respectively. Table 2.3: Audiovisual task with aggregated responses. trial soa n k proportion pre 200 5 4 0.80 150 5 5 1.00 -350 5 0 0.00 post1 350 3 3 1.00 -500 3 1 0.33 -200 3 0 0.00 Other quirks about the data pertain to the subjects. There is one younger subject that did not complete the audiovisual task, and one younger subject that did not complete the duration task. Additionally there is one older subject who’s response data for the post-adaptation audiovisual task is unreasonable4 (see figure 2.4). Figure 2.4: Post-adaptation response data for O-f-CE It is unreasonable because, of all the negative SOAs, there were only two correct responses5. If a subject is randomly guessing the temporal order, then a naive estimate for the proportion of correct responses is 0.5. If a subject’s proportion of correct responses is above 0.5, then they are doing better than random guessing. In figure 2.5 it is seen that subject O-f-CE is the only one who’s proportion is below 0.5 (and by a considerable amount). Figure 2.5: Proportion of correct responses for negative SOA values during the post-adaptation audiovisual experiment. The consequences of leaving in this experimental block in the data is considered in the Chapter 6, but it is a clear outlier that must be noted. When this method of detecting outliers is repeated for all tasks and blocks, then I end up with 17 records in total (see figure 2.6), one of which is the aforementioned subject. Figure 2.6: Proportion of correct responses across all tasks and blocks Proportions are calculated individually for positive and negative SOAs. Most of the records that are flagged by this method of outlier detection are from the sensorimotor task, and none are from the visual task. This may be attributed to the perceived difficulty of the task. One consequence of higher temporal sensitivity is that it is easier to determine temporal order. It may also be that determining temporal order is inherently easier for certain multisensory tasks compared to others. Since the sensorimotor task does not have fixed SOA values like the other tasks, it may be perceived as more difficult. Or perhaps the mechanisms that process tactile and visual signals are not as well coupled as those that process audio and visual signals. Once again, I’ll consider the handling of the sensorimotor outliers in the results chapter. Now that I have introduced the motivating data and some of the theory behind psychometric experiments, I am ready to introduce a Bayesian workflow for multilevel modeling of the psychometric function. If the reader is interested in a fun story, in the discussion chapter I talk about the process I went through to read in this psychometric data, clean it up, and produce a tidy data set that is ready for modeling. While data cleaning and transforming is a topic entirely to itself, it is not the main focus of this paper. References "],["workflow.html", "3 Principled Bayesian Workflow 3.1 Iteration 1 (journey of a thousand miles) 3.2 Iteration 2 (electric boogaloo) 3.3 Iteration 3 (the one for me) 3.4 Iteration 4 (what’s one more) 3.5 Celebrate", " 3 Principled Bayesian Workflow The meat, the cheese, the entire sandwich Leading up to now, I haven’t discussed what is a principled Bayesian workflow, nor what multilevel modeling is. I was hoping to build up the suspense. Well I hope you’re now ready for the answer. A principled Bayesian workflow is a method of employing domain expertise and statistical knowledge to iteratively build a statistical model that satisfies the constraints and goals set forth by the researcher. Oh, and Bayesian techniques are used in exchange for classical ones. Maybe not worth the suspense, but the simple idea spawns a creative and descriptive way to analyze data. What about the multilevel aspect? While I get into that more in the following sections, the concept is simple. Multilevel models should be the default. The alternatives are models with complete pooling, or models with no pooling. Pooling vs. no pooling is a fancy way of saying that all the data is modeled as a whole, or the smallest component (group) is modeled individually. The former implies that the variation between groups is zero (all groups are the same), and the latter implies that the variation between groups is infinite (no groups are the same). Multilevel models assume that the truth is somewhere in the middle of zero and infinity. That’s not a difficult thing to posit. Hierarchical models are a specific kind of multilevel model where one or more groups are nested within a larger one. In the case of the psychometric data, there are three age groups, and within each age group are individual subjects. Multilevel modeling provides a way to quantify and apportion the variation within the data to each level in the model. For an in-depth introduction to multilevel modeling, see Gelman and Hill (2006). There are many great resources out there for following along with an analysis of some data or problem, and much more is the abundance of tips, tricks, techniques, and testimonies to good modeling practices. The problem is that many of these prescriptions are given without context for when they are appropriate to be taken. According to Betancourt (2020), this leaves “practitioners to piece together their own model building workflows from potentially incomplete or even inconsistent heuristics.” The concept of a principled workflow is that for any given problem, there is not, nor should there be, a default set of steps to take to get from data exploration to predictive inferences. Rather great consideration must be given to domain expertise and the questions that one is trying to answer with the data. Since everyone asks different questions, the value of a model is not in how well it ticks the boxes of goodness-of-fit checks, but in how consistent it is with domain expertise and its ability to answer the unique set of questions. Betancourt suggests answering four questions to evaluate a model by: Domain Expertise Consistency - Is our model consistent with our domain expertise? Computational Faithfulness - Will our computational tools be sufficient to accurately fit our posteriors? Inferential Adequacy - Will our inferences provide enough information to answer our questions? Model Adequacy - Is our model rich enough to capture the relevant structure of the true data generating process? Like any good Bayesian6, much work is done before seeing the data or building a model. This may include talking with experts to gain domain knowledge or to elicit priors. Experts may know something about a particular measure, perhaps the mean or variability of the data from years of research, and different experts may provide different estimates of a measure. The benefit of modeling in a Bayesian framework is that all prior knowledge may be incorporated into the model to be used to estimate the posterior distribution. The same prior knowledge may also be used to check the posterior to ensure that predictions remain within physical or expert-given constraints. Consistency is key. The computational tool I will be using to estimate the posterior is a probabilistic programming language (PPL) called Stan (Guo et al. 2020) within the R programming language. Stan uses the No U-Turn Sampler (NUTS) version of Hamiltonian Monte Carlo (HMC) which I will discuss more in chapter 4. For a gentle introduction to Bayesian statistics and sampling methods, see Bolstad and Curran (2016), and for an in-depth review of HMC see Betancourt (2017). The question of inferential adequacy depends on the set of questions that we are seeking to answer with the data from the psychometric experiment. The broad objective is to determine if there are any significant differences between age groups when it comes to temporal sensitivity, perceptual synchrony, and temporal recalibration, and if the task influences the results as well. The specific goals are to estimate and compare the PSS an JND across all age groups, conditions, and tasks, and determine the affect of recalibration between age groups. For the last question, model adequacy, I will be following a set of steps proposed in Betancourt (2020). The purpose of laying out these steps is not to again blindly check them off, but to force the analyst to carefully consider each point and make an informed decision whether the step is necessary or to craft the specifics of how the step should be completed. The steps are listed in table 3.1. These steps are also not meant to be followed linearly. If at any point it is discovered that there is an issue in conceptual understanding or model adequacy or something else, then it is encouraged to go back to a previous step and start with a new understanding. Table 3.1: Principled workflow Part Step Pre-Model, Pre-Data conceptual analysis define observational space construct summary statistics Post-Model, Pre-Data develop model construct summary functions simulate Bayesian ensemble prior checks configure algorithm fit simulated ensemble algorithmic calibration inferential calibration Post-Model, Post-Data fit observed data diagnose posterior fit posterior retrodictive checks celebrate I’ll talk about each step in the first iteration, but may choose to omit steps in subsequent iterations if there are no changes. For the purposes of building a model and being concise, I will focus around the audiovisual TOJ task in this chapter, but the final model will apply similarly to the visual and duration tasks. For the sensorimotor task, the model will be modified to accept Bernoulli data as opposed to aggregated Binomial counts (described more in the next section). 3.1 Iteration 1 (journey of a thousand miles) Pre-Model, Pre-Data I begin the modeling process by modeling the experiment according to the description of how it occurred and how the data were collected. This first part consists of conceptual analysis, defining the observational space, and constructing summary statistics that can help us to identify issues in the model specification. Conceptual Analysis In section 2.2 I discussed the experimental setup and data collection. To reiterate, subjects are presented with two stimuli separated by some temporal delay, and they are asked to respond as to their perception of the temporal order. There are 45 subjects with 15 each in the young, middle, and older age groups. As the SOA becomes larger in the positive direction, subjects are expected to give more “positive” responses, and as the SOA becomes larger in the negative direction, more “negative” responses are expected. By the way the experiment and responses are constructed, there is no expectation to see a reversal of this trend unless there was an issue with the subject’s understanding of the directions given to them or an error in the recording device. After the first experimental block the subjects go through a recalibration period, and repeat the experiment again. The interest is in seeing if the recalibration has an effect on temporal sensitivity and perceptual synchrony, and if the effect is different for each age group. Define Observational Space The response that subjects give during a TOJ task is recorded as a zero or a one (see section 2.2), and their relative performance is determined by the SOA value. Let \\(y\\) represent the binary outcome of a trial and let \\(x\\) be the SOA value. \\[\\begin{align*} y_i &\\in \\lbrace 0, 1\\rbrace \\\\ x_i &\\in \\mathbb{R} \\end{align*}\\] If the SOA values are fixed like in the audiovisual task, then the responses can be aggregated into binomial counts, \\(k\\). \\[ k_i, n_i \\in \\mathbb{Z}_0^+, k_i \\le n_i \\] In the above expression, \\(\\mathbb{Z}_0^+\\) represents the set of non-negative integers. Notice that the number of trials \\(n\\) has an index variable \\(i\\). This is because the number of trials per SOA is not fixed between blocks. In the pre-adaptation block, there are five trials per SOA compared to three in the post-adaptation block. So if observation 32 is recorded during a “pre” block, \\(n_{32} = 5\\), and if observation 1156 is during a “post” block, \\(n_{1156} = 3\\). Of course this is assuming that each subject completed all trials in the block, but the flexibility of the indexing can manage even if they didn’t. Then there are also three categorical variables – age group, subject ID, and trial (block). The first two are treated as factor variables7. Rather than using one-hot encoding or dummy variables, the age levels are left as categories and a coefficient is fit for each level. Among the benefits of this approach is the ease of interpretation and ease of working with the data programmatically. This is especially true at the subject level. If a dummy variables was used for all 45 subjects, we would have 44 different dummy variables to work with times the number of coefficients that make estimates at the subject level. The number of parameters in the model grows rapidly as the model complexity grows. Age groups and individual subjects can be indexed in the same way that number of trials is indexed. \\(S_i\\) refers to the subject in record \\(i\\), and similarly \\(G_i\\) refers to the age group of that subject. Observation 63 is for record ID av-post1-M-f-HG, so then \\(S_{63}\\) is M-f-HG and \\(G_{63}\\) is middle_age. Under the hood of R, these factor levels are represented as integers (e.g. middle age group level is stored internally as the number 2). (x <- factor(c("a", "a", "b", "c"))) #> [1] a a b c #> Levels: a b c storage.mode(x) #> [1] "integer" This data storage representation can later be exploited for the Stan model. The pre- and post-adaptation categories are treated as a binary indicator referred to as \\(trt\\) (short for treatment) since there are only two levels in the category. In this setup, a value of 1 indicates a post-adaptation block. I chose this encoding over the reverse because the pre-adaptation block is like the baseline performance, and it seemed more appropriate to interpret the post-adaptation block as turning on some effect. Using a binary indicator in a regression setting may not be the best practice as I discuss in section 3.2. In the Stan modeling language, data for a binomial model with subject and age group levels and treatment is specified as data { int N; // Number of observations int N_S; // Number of subject levels int N_G; // Number of age group levels int N_T; // Number of treatment/control groups int n[N]; // Trials per SOA int k[N]; // binomial counts vector[N] x; // SOA values int S[N]; // Subject identifier int G[N]; // Age group identifier int trt[N]; // Treatment indicator } In Stan (and unlike in R), data types must be statically declared. While sometimes a nuisance, this requirement aids in something called type inference, and also lets Stan optimize certain parts of the model. Construct Summary Statistics In order to effectively challenge the validity of the model, a set of summary statistics are constructed that help answer the questions of domain expertise consistency and model adequacy. We are studying the affects of age and temporal recalibration through the PSS and JND (see section 2.1), so it is natural to define summary statistics around these quantities to verify model consistency. Additionally the PSS and JND can be computed regardless of the model parameterization or chosen psychometric function. By the experimental setup and recording process, it is impossible that a properly conducted block would result in a JND less than 0 (i.e. the psychometric function is always non-decreasing), so that can be a lower limit for its threshold. On the other end it is unlikely that it will be beyond the limits of the SOA values, but even more concrete, it seems unlikely (though not impossible) that the just noticeable difference would be more than a second. The lower bound on the JND can be further refined if we draw information from other sources. Some studies show that we cannot perceive time differences below 30 ms, and others show that an input lag as small as 100ms can impair a person’s typing ability. Then according to these studies, a time delay of 100ms is enough to notice, and so a just noticeable difference should be much less than one second – much closer to 100ms. I’ll continue to use one second as an extreme estimate indicator, but will incorporate this knowledge when it comes to selecting priors. As for the point of subjective simultaneity, it can be either positive or negative, with the belief that larger values are more rare. Some studies suggest that for audio-visual TOJ tasks, the separation between stimuli need to be as little as 20 milliseconds for subjects to be able to determine which modality came first (Vatakis et al. 2007). Other studies suggest that our brains can detect temporal differences as small as 30 milliseconds. If these values are to be believed then we should be skeptical of PSS estimates larger than say 150 milliseconds in absolute value, just to be safe. A histogram of computed PSS and JND values will suffice for summary statistics. We can estimate the proportion of values that fall outside of our limits defined above, and use them as indications of problems with the model fitting or conceptual understanding. Post-Model, Pre-Data It is now time to define priors for the model, while still not having looked at the [distribution of] data. The priors should be motivated by domain expertise and prior knowledge, not the data. There are also many choices when it comes to selecting a psychometric (sigmoid) function. Common ones are logistic, Gaussian, and Weibull. Figure 3.1: Assortment of psychometric functions. The Weibull psychometric function is more common when it comes to 2-AFC psychometric experiments where the independent variable is a stimulus intensity (non-negative) and the goal is signal detection. The data in this paper includes both positive and negative SOA values, so the Weibull is not a natural choice. In fact, because this is essentially a model for logistic regression, my first choice is the logistic function as it is the canonical choice for Binomial data. Additionally, the data in this study are reversible. The label of a positive response can be swapped with the label of a negative response and the inferences should remain the same. Since there is no natural ordering, it makes more sense for the psychometric function to be symmetric, e.g. the logistic and Gaussian. I use symmetric loosely to mean that probability density function (PDF) is symmetric about its middle. More specifically, the distribution has zero skewness. In practice, there is little difference in inference between the logit and probit links, but computationally the logit link is more efficient. I am also more familiar with working on the log-odds scale compared to the probit scale, so I make the decision to go forward with the logistic function. In chapter 4 I will show how even with a mis-specified link function, we can still achieve accurate predictions. Develop Model Before moving on to specifying priors, I think it is appropriate to provide a little more background into generalized linear models (GLMs) and their role in working with psychometric functions. A GLM allows the linear model to be related to the outcome variable via a link function. An example of this is the logit link - the inverse of the logistic function. The logistic function, \\(F\\), takes \\(x \\in \\mathbb{R}\\) and constrains the output to be in \\((0, 1)\\). \\[\\begin{equation} F(\\theta) = \\frac{1}{1 + \\exp\\left(-\\theta\\right)} \\tag{3.1} \\end{equation}\\] Since \\(F\\) is a strictly increasing and continuous function, it has an inverse, and the link for (3.1) is the log-odds or logit function. \\[\\begin{equation} F^{-1}(\\pi) = \\mathrm{logit}(\\pi) = \\ln\\left(\\frac{\\pi}{1 - \\pi}\\right) \\tag{3.2} \\end{equation}\\] By taking \\((F^{-1} \\circ F)(\\theta)\\) we can arrive at a relationship that is linear in \\(\\theta\\). \\[\\begin{align*} \\pi = F(\\theta) \\Longleftrightarrow F^{-1}(\\pi) &= F^{-1}(F(\\theta)) \\\\ & = \\ln\\left(\\frac{F(\\theta)}{1 - F(\\theta)}\\right) \\\\ &= \\ln(F(\\theta)) - \\ln(1 - F(\\theta)) \\\\ &= \\ln\\left(\\frac{1}{1 + \\exp(-\\theta)}\\right) - \\ln\\left(\\frac{\\exp(-\\theta)}{1 + \\exp(-\\theta)}\\right) \\\\ &= - \\ln(1 + \\exp(-\\theta)) - \\ln(\\exp(-\\theta)) + \\ln(1 + \\exp(-\\theta)) \\\\ &= - \\ln(\\exp(-\\theta)) \\\\ &= \\theta \\end{align*}\\] The purpose of all this setup is to show that a model for the psychometric function can be specified using a linear predictor, \\(\\theta\\). Given a simple slope-intercept model, one would typically write the linear predictor as \\[\\begin{equation} \\theta = \\alpha + \\beta x \\tag{3.3} \\end{equation}\\] This isn’t the only acceptable form; it could be written in the centered parameterization \\[\\begin{equation} \\theta = \\beta(x - a) \\tag{3.4} \\end{equation}\\] Both parameterizations will describe the same geometry, so why should it matter which form is chosen? Clearly the interpretation of the parameters change between the two models, but the reason becomes clear when you consider how the linear model relates back to the physical properties that the psychometric model describes. Take equation (3.3), substitute it in to (3.1), and then take the logit of both sides \\[\\begin{equation} \\mathrm{logit}(\\pi) = \\alpha+\\beta x \\tag{3.5} \\end{equation}\\] Now recall that the PSS is defined as the SOA values such that the response probability, \\(\\pi\\), is \\(0.5\\). Substituting \\(\\pi = 0.5\\) into (3.5) and solving for \\(x\\) yields \\[ pss = -\\frac{\\alpha}{\\beta} \\] Similarly, the JND is defined as the difference between the SOA value at the 84% level and the PSS. Substituting \\(\\pi = 0.84\\) into (3.5), solving for \\(x\\), and subtracting off the pss yields \\[\\begin{equation} jnd = \\frac{\\mathrm{logit}(0.84)}{\\beta} \\tag{3.6} \\end{equation}\\] From the conceptual analysis, it is easy to define priors for the PSS and JND, but then how does one set the priors for \\(\\alpha\\) and \\(\\beta\\)? Let’s say the prior for the just noticeable difference is \\(jnd \\sim \\pi_j\\). Then the prior for \\(\\beta\\) would be \\[ \\beta \\sim \\frac{\\mathrm{logit}(0.84)}{\\pi_j} \\] The log-normal distribution has a nice property where its multiplicative inverse is still a log-normal distribution. We could let \\(\\pi_j = \\mathrm{Lognormal}(\\mu, \\sigma^2)\\) and then \\(\\beta\\) would be distributed as \\[ \\beta \\sim \\mathrm{Lognormal}(-\\mu + \\ln(\\mathrm{logit}(0.84)), \\sigma^2) \\] This is acceptable, as it was determined last chapter that the slope must always be positive, and a log-normal distribution constrains the support to postive real numbers. Next suppose that the prior distribution for the PSS is \\(pss \\sim \\pi_p\\). Then the prior for \\(\\alpha\\) is \\[ \\alpha \\sim -\\pi_p \\cdot \\beta \\] If \\(\\pi_p\\) is set to a log-normal distribution as well, then \\(\\pi_p \\cdot \\beta\\) would also be log-normal, but there is still the problem of the negative sign. If \\(\\alpha\\) is always negative, then the PSS will also always be negative, which is certainly not always true. Furthermore, I don’t want to a priori put more weight on positive PSS values compared to negative ones, for which a lognormal distribution would not do. Let’s now go back and consider using equation (3.4) and repeat the above process. \\[\\begin{equation} \\mathrm{logit}(\\pi) = \\beta(x - a) \\tag{3.7} \\end{equation}\\] The just noticeable difference is still given by (3.6) and so the same method for choosing a prior can be used, but the PSS is now given by \\[ pss = \\alpha \\] This is a fortunate consequence of using (3.4) because now the JND only depends on \\(\\beta\\) and the PSS only depends on \\(\\alpha\\), and now \\(\\alpha\\) can literally be interpreted as the PSS of the estimated psychometric function! Also thrown in is the ability to set a prior for \\(\\alpha\\) that is symmetric around \\(0\\) like a Gaussian distribution. This also brings me to point out the first benefit of using a modeling language like Stan over others. For fitting GLMs in R, there are a handful of functions that utilize MLE like stats::glm and others that use Bayesian methods like rstanarm::stan_glm and arm::bayesglm (Gabry and Goodrich 2020; Gelman and Su 2020). Each of these functions requires the linear predictor to be in the form of (3.3). The stan_glm function actually uses Stan in the backend to fit a model, but is limited to priors from the Student t family of distributions. By writing the model directly in Stan, the linear model can be parameterized in any way and with any prior distribution, and so allows for much more expressive modeling - a key aspect of this principled workflow. For the first iteration of this model, I am going to start with the simplest model that captures the structure of the data without including information about age group, treatment, or subject. Here is a simple model that draws information from the conceptual analysis. \\[\\begin{align*} k_i &\\sim \\mathrm{Binomial}(n_i, p_i) \\\\ \\mathrm{logit}(p_i) &= \\beta ( x_i - \\alpha ) \\end{align*}\\] Since I am using the linear model from (3.4), setting the priors for \\(\\alpha\\) and \\(\\beta\\) is relatively straightforward. The PSS can be positive or negative without any expected bias towards either, so a symmetric distribution like the Gaussian is a fine choice for \\(\\alpha\\) without having any other knowledge about the distribution of PSS values. Since I said earlier that a PSS value more than 150ms in absolute value is unlikely, I can define a Gaussian prior such that \\(P(|pss| > 0.150) \\approx 0.01\\). Since the prior does not need to be exact, the following mean and variance suffice \\[ pss \\sim \\mathcal{N}(0, 0.06^2) \\Longleftrightarrow \\alpha \\sim \\mathcal{N}(0, 0.06^2) \\] For the just noticeable difference, I will continue to use the log-normal distribution because it is constrained to positive values and has the nice reciprocal property. The JND is expected to be close to 100ms and extremely unlikely to exceed 1 second. This implies a prior such that the mean is around 100ms and the bulk of the distribution is below 1 second - i.e. \\(E[X] \\approx 0.100\\) and \\(P(X < 1) \\approx 0.99\\). This requires solving a system of nonlinear equations in two variables \\[ \\begin{cases} E[X] = 0.100 = \\exp\\left(\\mu + \\sigma^2 / 2\\right) \\\\ P(X < 1) = 0.99 = 0.5 + 0.5 \\cdot \\mathrm{erf}\\left[\\frac{\\ln (1) - \\mu}{\\sqrt{2} \\cdot \\sigma}\\right] \\end{cases} \\] This nonlinear system can be solved using Stan’s algebraic solver. functions { vector system(vector y, vector theta, real[] x_r, int[] x_i) { vector[2] z; z[1] = exp(y[1] + y[2]^2 / 2) - theta[1]; z[2] = 0.5 + 0.5 * erf(-y[1] / (sqrt(2) * y[2])) - theta[2]; return z; } } transformed data { vector[2] y_guess = [1, 1]'; real x_r[0]; int x_i[0]; } transformed parameters { vector[2] theta = [0.100, 0.99]'; vector[2] y; y = algebra_solver(system, y_guess, theta, x_r, x_i); } fit <- sampling(prior_jnd, iter=1, warmup=0, chains=1, refresh=0, seed=31, algorithm="Fixed_param") sol <- extract(fit) sol$y #> #> iterations [,1] [,2] #> [1,] -7.501 3.225 The solver has determined that \\(\\mathrm{Lognormal}(-7.5, 3.2^2)\\) is the appropriate prior. However, simulating some values from this distribution produces a lot of extremely small values (\\(<10^{-5}\\)) and a few extremely large values (\\(\\approx 10^2\\)). This is because the expected value of a log-normal random variable depends on both the mean and standard deviation. If the median is used in place for the mean, then a more acceptable prior may be determined. fit <- sampling(prior_jnd_using_median, iter=1, warmup=0, chains=1, refresh=0, seed=31, algorithm="Fixed_param") sol <- extract(fit) sol$y #> #> iterations [,1] [,2] #> [1,] -2.303 0.9898 Sampling from a log-normal distribution with these parameters and plotting the histogram shows no inconsistency with the domain expertise. So now with a prior for the JND, the prior for \\(\\beta\\) can be determined. \\[ jnd \\sim \\mathrm{Lognormal}(-2.3, 0.99^2) \\Longleftrightarrow \\frac{1}{jnd} \\sim \\mathrm{Lognormal}(2.3, 0.99^2) \\] and \\[ \\beta = \\frac{\\mathrm{logit}(0.84)}{jnd} \\sim \\mathrm{Lognormal}(2.8, 0.99^2) \\] The priors do not need to be too exact. Rounding the parameters for \\(\\beta\\), the simple model is \\[\\begin{align*} k_i &\\sim \\mathrm{Binomial}(n_i, p_i) \\\\ \\mathrm{logit}(p_i) &= \\beta ( x_i - \\alpha ) \\\\ \\alpha &\\sim \\mathcal{N}(0, 0.06^2) \\\\ \\beta &\\sim \\mathrm{Lognormal}(3, 1^2) \\end{align*}\\] and in Stan, the model code is data { int N; int n[N]; int k[N]; vector[N] x; } parameters { real alpha; real<lower=0> beta; } model { vector[N] p = beta * (x - alpha); alpha ~ normal(0, 0.06); beta ~ lognormal(3.0, 1.0); k ~ binomial_logit(n, p); } generated quantities { vector[N] log_lik; vector[N] k_pred; vector[N] theta = beta * (x - alpha); vector[N] p = inv_logit(theta); for (i in 1:N) { log_lik[i] = binomial_logit_lpmf(k[i] | n[i], theta[i]); k_pred[i] = binomial_rng(n[i], p[i]); } } Notice that the model block is nearly identical to the mathematical model! Construct Summary Functions Whew! that was a lot of work to define the priors for just two parameters. Thankfully going forward not as much work will need to be done to expand the model. The next step is to construct any relevant summary functions. Since the distribution of posterior PSS and JND values are needed for the summary statistics, it will be nice to have a function that can take in the posterior samples for \\(\\alpha\\) and \\(\\beta\\) and return the PSS and JND values. I’ll define \\(Q\\) as a more general function that takes in the two parameters and a probability, \\(\\pi\\), and returns the distribution of SOA values at \\(\\pi\\). \\[\\begin{equation} Q(\\pi; \\alpha, \\beta) = \\frac{\\mathrm{logit(\\pi)}}{\\beta} + \\alpha \\tag{3.8} \\end{equation}\\] The function can be defined in R as Q <- function(p, a, b) qlogis(p) / b + a With \\(Q\\), the PSS and JND can be calculated as \\[\\begin{align} pss &= Q(0.5) \\\\ jnd &= Q(0.84) - Q(0.5) \\end{align}\\] Simulate Bayesian Ensemble During this step, I simulate the Bayesian ensemble and later feed the prior values into the summary functions in order to verify that there are no other inconsistencies with domain knowledge. Since the model is fairly simple, I will simulate directly in R. set.seed(124) n <- 10000 a <- rnorm(n, 0, 0.06) b <- rlnorm(n, 3.0, 1) dat <- with(av_dat, list(N = N, x = x, n = n)) n_obs <- length(dat$x) idx <- sample(1:n, n_obs, replace = TRUE) probs <- logistic(b[idx] * (dat$x - a[idx])) sim_k <- rbinom(n_obs, dat$n, probs) Prior Checks This step pertains to ensuring that prior estimates are consistent with domain expertise. I already did that in the model construction step by sampling values for the just noticeable difference. The first prior chosen was not producing JND estimates that were consistent with domain knowledge, so I adjusted accordingly. That check would normally be done during this step, and I would have had to return to the model development step. Figure 3.2 shows the distribution of prior psychometric functions derived from the simulated ensemble. There are a few very steep and very shallow curves, but the majority fall within a range that appears likely. Figure 3.2: Prior distribution of psychometric functions using the priors for alpha and beta. Additionally most of the PSS values are within \\(\\pm 0.1\\) with room to allow for some larger values. Let’s check the prior distribution of PSS and JND values. Figure 3.3: PSS prior distribution. Figure 3.4: JND prior distribution. I am satisfied with the prior coverage of the PSS and JND values, and there are only a few samples that go beyond the extremes that were specified in the summary statistics step. Configure Algorithm There are a few parameters that can be set for Stan. On the user side, the main parameters are the number of iterations, the number of warm-up iterations, the target acceptance rate, and the number of chains to run. The NUTS algorithm samples in two phases: a warm-up phase and a sampling phase. During the warm-up phase, the sampler is automatically tuning three internal parameters that can significantly affect the sampling efficiency. By default, the Stan function will use half the number of iterations for warm-up and the other half for actual sampling. The full details of Stan’s HMC algorithm is described in the Stan reference manual. For now I am going to use the default algorithm parameters in Stan, and will tweak them later if and when issues arise. Fit Simulated Ensemble Nothing to say here. Only code. sim_dat <- with(av_dat, list(N = N, x = x, n = n, k = sim_k)) m031 <- sampling(m031_stan, data = sim_dat, chains = 4, cores = 4, refresh = 0) Algorithmic Calibration One benefit of using HMC over other samplers like Gibbs sampling is that HMC offers diagnostic tools for the health of chains and the ability to check for divergent transitions (discussed in 4.1.1.4). To check the basic diagnostics of the model, I run the following code. check_hmc_diagnostics(m031) #> #> Divergences: #> 0 of 4000 iterations ended with a divergence. #> #> Tree depth: #> 0 of 4000 iterations saturated the maximum tree depth of 10. #> #> Energy: #> E-BFMI indicated no pathological behavior. There is no undesirable behavior from this model, so next I check the summary statistics of the estimated parameters. The \\(\\hat{R}\\) statistic is a comparison of the measure of variance within chains and between chains. When chains have converged to a stationary distribution, the variance within and between chains is the same, and the ratio is one. Values of \\(\\hat{R} > 1.1\\) are usually indicative of chains that have not converged to a common distribution. Lastly there is the effective sample size (\\(N_{\\mathrm{eff}}\\)) which is a loose measure for the autocorrelation within the parameter samples. As autocorrelation generally decreases as the lag increases, one can achieve a higher \\(N_{\\mathrm{eff}}\\) by running a chain with more samples and then thinning the samples, i.e. saving only every \\(n^{th}\\) sample. Table 3.2: Summary statistics of the fitted Bayesian ensemble. parameter mean se_mean sd 2.5% 97.5% n_eff Rhat alpha 0.0061 0.0001 0.0038 -0.0012 0.0136 4039 0.9995 beta 10.7681 0.0051 0.2404 10.3043 11.2313 2202 1.0003 Both the \\(\\hat{R}\\) and \\(N_{\\mathrm{eff}}\\) look fine for both \\(\\alpha\\) and \\(\\beta\\), thought it is slightly concerning that \\(\\alpha\\) is centered relatively far from zero. This could just be due to sampling variance, so I will continue on to the next step. Post-Model, Post-Data Fit Observed Data All of the work up until now has been done without peaking at the observed data. Satisfied with the model so far, I can now go ahead and run the data through. m031 <- sampling(m031_stan, data = obs_dat, chains = 4, cores = 4, refresh = 200) Diagnose Posterior Fit Here I repeat the diagnostic checks that I used after fitting the simulated Bayesian ensemble. check_hmc_diagnostics(m031) #> #> Divergences: #> 0 of 4000 iterations ended with a divergence. #> #> Tree depth: #> 0 of 4000 iterations saturated the maximum tree depth of 10. #> #> Energy: #> E-BFMI indicated no pathological behavior. Table 3.3: Summary statistics of the fitted Bayesian ensemble. parameter mean se_mean sd 2.5% 97.5% n_eff Rhat alpha 0.0373 0.0001 0.0043 0.029 0.0458 3765 1.000 beta 8.4259 0.0039 0.1839 8.070 8.7897 2249 1.001 No indications of an ill-behaved posterior fit! Let’s also check the posterior distribution of \\(\\alpha\\) and \\(\\beta\\) against the prior density (3.5). Figure 3.5: Comparison of posterior distributions for alpha and beta to their respective prior distributions. The posterior distributions for \\(\\alpha\\) and \\(\\beta\\) are well within the range determined by domain knowledge, and highly concentrated due to both the large amount of data and the fact that this is a completely pooled model - no stratification. As expected, the prior for the JND could have been tighter with more weight below half a second compared to the one second limit used, but this is not prior information, so it is not prudent to change the prior in this manner after having seen the posterior. As a rule of thumb, priors should only be updated as motivated by domain expertise and not by posterior distributions. Posterior Retrodictive Checks Finally it is time to run the posterior samples through the summary functions and then perform retrodictive checks. A retrodiction is using the posterior model to predict and compare to the observed data. This is simply done by drawing samples from the posterior and feeding in the observational data. This may be repeated to gain a retrodictive distribution. posterior_pss <- Q(0.5, p031$alpha, p031$beta) posterior_jnd <- Q(0.84, p031$alpha, p031$beta) - posterior_pss Figure 3.6: Posterior distribution of the PSS and JND. Neither of the posterior estimates for the PSS or JND exceed the extreme cutoffs set in the earlier steps, so I can be confident that the model is consistent with domain expertise. Let’s also take a second to appreciate how simple it is to visualize and summarize the distribution of values for these measures. Using classical techniques like MLE might require using bootstrap methods to estimate the distribution of parameter values, or one might approximate the parameter distributions using the mean and standard error of the mean to simulate new values. Since we have the entire posterior distribution we can calculate the distribution of transformed parameters by working directly with the posterior samples and be sure that the intervals are credible. Next is to actually do the posterior retrodictions. I will do this in two steps to better show how the distribution of posterior psychometric functions relates to the observed data, and then compare the observed data to the retrodictions. Figure 3.7 shows the result of the first step. Figure 3.7: Posterior distribution of psychometric functions using pooled observations. Next I sample parameter values from the posterior distribution and use them to simulate a new data set. In the next iteration I will show how I can get Stan to automatically produce retrodictions for me in the model fitting step. The results of the posterior retrodictions are shown in figure 3.8. alpha <- sample(p031$alpha, n_obs, replace = TRUE) beta <- sample(p031$beta, n_obs, replace = TRUE) logodds <- beta * (av_dat$x - alpha) probs <- logistic(logodds) sim_k <- rbinom(n_obs, av_dat$n, probs) Figure 3.8: Observed data compared to the posterior retrodictions. The data is post-stratified by block for easier visualization. I want to make it clear exactly what the first iteration of this model tells us. It is the average distribution of underlying psychometric functions across all subjects and blocks. It cannot tell us what the differences are between pre- and post-adaptation blocks are, or even what the variation between subjects is. As such, it is only useful in determining if the average value for the PSS is different from 0 or if the average JND is different from some other predetermined level. This model is still useful given the right question, but this model cannot answer questions about group-level effects. Figure 3.8 shows that the model captures the broad structure of the observed data, but is perhaps a bit under-dispersed in the tail ends of the SOA values. Besides this one issue, I am satisfied with the first iteration of this model and am ready to proceed to the next iteration. 3.2 Iteration 2 (electric boogaloo) In this iteration I will be adding in the treatment and age groups into the model. There are no changes with the conceptual understanding of the experiment, and nothing to change with the observational space. As such I will be skipping the first three steps and go straight to the model development step. As I build the model, the number of changes from one iteration to the next should go to zero as the model expands to become only as complex as necessary to answer the research questions. Post-Model, Pre-Data Develop Model To start, let’s add in the treatment indicator and put off consideration of adding in the age group levels. In classical statistics, it is added as an indicator variable (zero or one) for both the slope and intercept (varying slopes, varying intercepts model). Let \\(trt\\) be \\(0\\) if it is the pre-adaptation block and \\(1\\) if the observation comes from the post-adaptation block. \\[ \\theta = \\alpha + \\alpha_{trt} \\times trt + \\beta \\times x + \\beta_{trt}\\times trt \\times x \\] Now when an observation comes from the pre-adaptation block (\\(trt=0\\)) the linear predictor is given by \\[ \\theta_{pre} = \\alpha + \\beta \\times x \\] and when an observation comes from the post-adaptation block (\\(trt=1\\)) the linear predictor is \\[ \\theta_{post} = (\\alpha + \\alpha_{trt}) + (\\beta + \\beta_{trt}) \\times x \\] This might seem like a natural way to introduce an indicator variable, but it comes with serious implications. This model implies that there is more uncertainty about the post-adaptation block compared to the baseline block, and this is not necessarily true. \\[\\begin{align*} \\mathrm{Var}(\\theta_{post}) &= \\mathrm{Var}((\\alpha + \\alpha_{trt}) + (\\beta + \\beta_{trt}) \\times x) \\\\ &= \\mathrm{Var}(\\alpha) + \\mathrm{Var}(\\alpha_{trt}) + x^2 \\mathrm{Var}(\\beta) + x^2\\mathrm{Var}(\\beta_{trt}) \\end{align*}\\] On the other hand, the variance of \\(\\theta_{pre}\\) is \\[ \\mathrm{Var}(\\theta_{pre}) = \\mathrm{Var}(\\alpha) + x^2 \\mathrm{Var}(\\beta) \\le \\mathrm{Var}(\\theta_{post}) \\] Furthermore, the intercept, \\(\\alpha\\), is no longer the average response probability at \\(x=0\\) for the entire data set, but is instead exclusively the average for the pre-adaptation block. This may not matter in certain analyses, but one nice property of multilevel models is the separation of population level estimates and group level estimates (fixed vs. mixed effects). So instead the treatment variable is introduced into the linear model as a factor variable. This essentially means that each level in the treatment gets its own parameter estimate, and this also makes it easier to set priors when there are many levels in a group (such as for the subject level). The linear model, using equation (3.4), with the treatment is written as \\[\\begin{equation} \\theta = (\\beta + \\beta_{trt[i]}) \\left[x_i - (\\alpha + \\alpha_{trt[i]})\\right] \\tag{3.9} \\end{equation}\\] As I add in more predictors and groups, equation (3.9) will start to be more difficult to read. What I can do is break up the slope and intercept parameters and write the linear model as \\[\\begin{align*} \\mu_\\alpha &= \\alpha + \\alpha_{trt[i]} \\\\ \\mu_\\beta &= \\beta + \\beta_{trt[i]} \\\\ \\theta &= \\mu_\\beta (x - \\mu_\\alpha) \\end{align*}\\] In this way the combined parameters can be considered separately from the linear parameterization. Which leads me to consider the priors for \\(\\alpha_{trt}\\) and \\(\\beta_{trt}\\). The way that we can turn an normal model with categorical predictors into a multilevel model is by allowing the priors to borrow information from other groups. This is accomplished by putting priors on priors. It is easier to write down the model first before explaining how it works. \\[\\begin{align*} k_i &\\sim \\mathrm{Binomial}(n_i, p_i) \\\\ \\mu_\\alpha &= \\alpha + \\alpha_{trt[i]} \\\\ \\mu_\\beta &= \\beta + \\beta_{trt[i]} \\\\ \\mathrm{logit}(p_i) &= \\mu_\\beta (x_i - \\mu_\\alpha) \\\\ \\alpha &\\sim \\mathcal{N}(0, 0.06^2) \\\\ \\alpha_{trt} &\\sim \\mathcal{N}(0, \\sigma_{trt}^2) \\\\ \\sigma_{trt} &\\sim \\textrm{to be defined} \\end{align*}\\] In the above model, \\(\\alpha\\) gets a fixed prior (the same as in the first iteration), and \\(\\alpha_{trt}\\) gets a Gaussian prior with an adaptive variance term that is allowed to be learned from the data. This notation is compact, but \\(\\alpha_{trt}\\) is actually two parameters - one each for the pre- and post-adaptation blocks - but they both share the same variance term \\(\\sigma_{trt}\\). This produces a regularizing effect where both treatment estimates are shrunk towards the mean, \\(\\alpha\\). I’ll discuss selecting a prior for the variance term shortly, but now I want to discuss setting the prior for the slope terms. Instead of modeling \\(\\beta\\) with a log-normal prior, I can sample from a normal distribution and take the exponential of it to produce a log-normal distribution. I.e. \\[ X \\sim \\mathcal{N}(3, 1^2) \\\\ Y = \\exp\\left\\lbrace X \\right\\rbrace \\Longleftrightarrow Y \\sim \\mathrm{Lognormal(3, 1^2)} \\] The motivation behind this transformation is that it is now easier to include new slope variables as an additive affect. If both \\(\\beta\\) and \\(\\beta_{trt}\\) are specified with Gaussian priors, then the exponential of the sum will be a log-normal distribution! So the model now gains \\[\\begin{align*} \\mathrm{logit}(p_i) &= \\exp(\\mu_\\beta) (x_i - \\mu_\\alpha) \\\\ \\beta &\\sim \\mathcal{N}(3, 1^2) \\\\ \\beta_{trt} &\\sim \\mathcal{N}(0, \\gamma_{trt}^2) \\\\ \\gamma_{trt} &\\sim \\textrm{to be defined} \\end{align*}\\] Deciding on priors for the variance term requires some careful consideration. In one sense, the variance term is the within group variance. Gelman and others (2006) recommends that for multilevel models with groups with less than say 5 levels to use a half Cauchy prior. This weakly informative prior still has a regularizing affect and dissuades larger variance estimates. Even though the treatment group only has two levels, there is still value in specifying an adaptive prior for them, and there is also a lot of data for each treatment so partial pooling won’t make a difference anyway. \\[\\begin{align*} \\sigma_{trt} &\\sim \\mathrm{HalfCauchy}(0, 1) \\\\ \\gamma_{trt} &\\sim \\mathrm{HalfCauchy}(0, 1) \\end{align*}\\] Finally I can add in the age group level effects and specify the variance terms. \\[\\begin{align*} \\alpha_{G} &\\sim \\mathcal{N}(0, \\tau_{G}^2)\\\\ \\beta_{G} &\\sim \\mathcal{N}(0, \\nu_{G}^2) \\\\ \\tau_{G} &\\sim \\mathrm{HalfCauchy}(0, 2) \\\\ \\nu_{G} &\\sim \\mathrm{HalfCauchy}(0, 2) \\end{align*}\\] The corresponding Stan model is becoming quite long, so I omit it from here on out. The final Stan model code may be found in the supplementary code of the appendix. Post-Model, Post-Data Fit Observed Data I’m choosing to skip the prior checks this time around and use the observed data to configure the algorithm and diagnose the posterior fit. m032 <- sampling(m032_stan, data = obs_dat, seed = 124, chains = 4, cores = 4, refresh = 100) Diagnose Posterior Fit check_hmc_diagnostics(m032) #> #> Divergences: #> 4 of 4000 iterations ended with a divergence (0.1%). #> Try increasing 'adapt_delta' to remove the divergences. #> #> Tree depth: #> 0 of 4000 iterations saturated the maximum tree depth of 10. #> #> Energy: #> E-BFMI indicated no pathological behavior. As well as the 4 divergent transitions, there was also a message about the effective sample size (ESS) being too low. The recommended prescription for low ESS is to run the chains for more iterations. The posterior summary shows that \\(N_{\\mathrm{eff}}\\) is low for the age group level parameters (table 3.4). Table 3.4: Summary statistics of the second iteration. parameter mean se_mean sd 2.5% 97.5% n_eff Rhat a 0.0222 0.0014 0.0412 -0.0683 0.1024 824.6 1.002 aG[1] -0.0009 0.0012 0.0313 -0.0531 0.0714 703.5 1.003 aG[2] 0.0274 0.0012 0.0316 -0.0218 0.0990 698.3 1.003 aG[3] -0.0078 0.0012 0.0311 -0.0609 0.0609 714.3 1.004 b 2.4114 0.0216 0.5665 1.4902 3.8499 688.2 1.003 bG[1] 0.0030 0.0170 0.2942 -0.7681 0.5013 301.3 1.004 bG[2] 0.0538 0.0170 0.2940 -0.7101 0.5499 299.9 1.004 bG[3] -0.2223 0.0172 0.2955 -1.0150 0.2597 296.9 1.004 So I can go back to the algorithm configuration step and increase the number of iterations and warm-up iterations, as well as increase the adapt delta parameter to reduce the number of divergent transitions (which really isn’t a problem right now). Another technique I can employ is non-centered parameterization, and now is as good a time as any to introduce it. I have actually already used non-centered parameterization in this iteration of the model without addressing it - the transformation of \\(\\beta\\) from a Gaussian to a log-normal distribution. Because HMC is a physics simulation, complicated geometry or posteriors with steep slopes can be difficult to traverse if the step size is too course. The solution is to explore a simpler geometry, and then transform the sample into the target distribution. Reparameterization is especially important for hierarchical models. The Cauchy distribution used for the variance term can be reparameterized by first drawing from a uniform distribution on \\((-\\pi/2, \\pi/2)\\). For a half Cauchy distribution, just sample from \\(\\mathcal{U}(0, \\pi/2)\\). \\[\\begin{align*} X &\\sim \\mathcal{U}(-\\pi/2, \\pi/2) \\\\ Y &= \\mu + \\tau \\cdot \\tan(X) \\Longrightarrow Y \\sim \\mathrm{Cauchy(\\mu, \\tau)} \\end{align*}\\] The Gaussian distributions can be reparameterized in a similar way. If \\(Z\\) is a standard normal random variable, then \\(\\mu + \\sigma Z \\sim \\mathcal{N}(\\mu, \\sigma^2)\\). For Stan, sampling from a standard normal or uniform distribution is very easy, and so the non-centered parameterization can alleviate divergent transitions. I now return to the model development step and incorporate the new methods. Develop Model The model changes consist of using the non-centered parameterizations discussed in the previous step. An example is in the parameterization of \\(\\tau_{G}\\). The other variance terms are parameterized in the same fashion. \\[\\begin{align*} U_\\tau &\\sim \\mathcal{U}(0, \\pi/2) \\\\ \\tau_{G} &= 2 \\cdot \\tan(U_1) \\Longrightarrow \\tau_G \\sim \\mathrm{HalfCauchy}(0, 2) \\end{align*}\\] As an aside, a multilevel model can be fit in R using lme4::glmer, brms::brm, or rstanarm::stan_glmer, and they all use the same notation to specify the model. The notation is very compact, but easy to unpack. Values not in a grouping term are fixed effects and values in a grouping term (e.g. (1 + x | G)) are mixed or random effects depending on which textbook you read. f <- formula(k|n ~ 1 + x + (1 + x | G) + (1 + x | trt)) lme4::glmer(f, data = data, family = binomial("logit")) rstanarm::stan_glmer(f, data = data, family = binomial("logit")) brms::brm(f, data = data, family = binomial("logit")) The simpler notation and compactness of these methods are very attractive, and for certain analyses they may be more than sufficient. The goal here is to decide early on if these methods satisfy the model adequacy, and to use more flexible modeling tools like Stan if necessary. Fit Observed Data Moving on to refitting the data, this time with more iterations and with the non-centered parameterization. Since this model is sampling from intermediate parameters, I can choose to keep only the transformed parameters. m032nc <- sampling(m032nc_stan, data = obs_dat, seed = 143, thin = 2, iter = 4000, warmup = 2000, pars = keep_pars, control = list(adapt_delta = 0.95), chains = 4, cores = 4, refresh = 100) Diagnose Posterior Fit check_hmc_diagnostics(m032nc) #> #> Divergences: #> 32 of 4000 iterations ended with a divergence (0.8%). #> Try increasing 'adapt_delta' to remove the divergences. #> #> Tree depth: #> 0 of 4000 iterations saturated the maximum tree depth of 10. #> #> Energy: #> E-BFMI indicated no pathological behavior. There are still a few divergent transitions (\\(<1\\%\\)), but the effective sample size increased significantly (table 3.5). Table 3.5: Summary statistics of the second iteration with non-centered parameterization. parameter mean se_mean sd 2.5% 97.5% n_eff Rhat a 0.0192 0.0008 0.0419 -0.0744 0.0956 2509 1.0005 aG[1] -0.0025 0.0006 0.0326 -0.0636 0.0739 2737 1.0014 aG[2] 0.0262 0.0006 0.0328 -0.0342 0.1044 2644 1.0014 aG[3] -0.0093 0.0006 0.0326 -0.0713 0.0652 2752 1.0011 aT[1] 0.0185 0.0009 0.0425 -0.0546 0.1242 2338 1.0005 aT[2] 0.0039 0.0009 0.0419 -0.0679 0.1089 2404 1.0005 b 2.3841 0.0115 0.5284 1.4762 3.6952 2109 1.0010 bG[1] 0.0170 0.0049 0.2730 -0.6323 0.4979 3106 1.0004 bG[2] 0.0678 0.0049 0.2728 -0.5773 0.5671 3113 1.0005 bG[3] -0.2075 0.0050 0.2741 -0.8506 0.2767 3026 1.0004 bT[1] -0.2764 0.0106 0.4914 -1.6338 0.5427 2141 0.9999 bT[2] -0.0501 0.0106 0.4909 -1.4120 0.7778 2125 1.0000 Now is also a good time to introduce a diagnostic tool called the trace plot. The trace plot is a way to visualize the sampling path of different parameters across all the chains. A healthy set of chains will look like a fuzzy caterpillar, bouncing around the posterior without any visual patterns or long sequences of being in the same place. Figure 3.9 shows the trace plot for the slope and intercept parameters. Each chain looks like it is sampling around the same average value as the others with identical spreads (stationary and homoscedastic). This also helps to solidify the idea that the \\(\\hat{R}\\) statistic is the measure of between chain variance compared to cross chain variance. Figure 3.9: Traceplot for the slope and intercept parameters. The chains in figure 3.9 look healthy as well as for the other parameters not shown. Since there are no algorithm issues, I can proceed to the posterior retrodictive checks. Posterior Retrodictive Checks In this iteration of the model, I now have estimates for the age groups and the treatment. The posterior estimates for the PSS and JND are shown in figure 3.10. There are many ways to visualize and compare the distributions across age groups and conditions, and it really depends on what question is being asked. If for example the question is “what is the qualitative difference between pre- and post-adaptation across age groups?”, then figure 3.10 could answer that because it juxtaposes the two blocks in the same panel. I will consider alternative ways of arranging the plots in chapter 5. Figure 3.10: Posterior distribution of the PSS and JND. As for the posterior retrodictions, I can do something similar to last time. First I’d like to point out that I had Stan perform posterior retrodictions during the fitting step. This was achieved by adding a generated quantities block to the Stan program that takes the posterior samples for the parameters, and then randomly generates a value from a binomial distribution for each observation in the data. In effect, I now have \\(10,000\\) simulated data sets! str(p032$k_pred) #> num [1:4000, 1:1827] 0 0 0 0 0 0 0 0 0 0 ... #> - attr(*, "dimnames")=List of 2 #> ..$ iterations: NULL #> ..$ : NULL I only need one to compare to the observed data, so I will select it randomly from the posterior. Figure 3.11: Observed data compared to the posterior retrodictions. The posterior retrodictions show no disagreement between the model and the observed data. I almost would say that this model is complete, but this model has one more problem - it measures the average difference in blocks, and the average difference in age groups, but does not consider any interaction between the two! Implicitly it assumes that temporal recalibration affects all age groups the same which may not be true, so in the next iteration I will need to address that. 3.3 Iteration 3 (the one for me) Since there is no change in the pre-model analysis, I’ll again jump straight to the model development step, after which I will jump right to the posterior retrodictive checks. The changes to the model going forward are minor, and subsequent steps are mostly repetitions of the ones taken in the first two iterations. Develop Model This time around I need to model an interaction between age group and treatment. In a simple model in R, interactions between factor variable \\(A\\) and factor variable \\(B\\) can be accomplished by taking the cross-product of all the factor levels. For example, if \\(A\\) has levels \\(a, b, c\\) and \\(B\\) has levels \\(x, y\\), then the interaction variable \\(C=A:B\\) will have levels \\(ax, ay, bx, by, cx, cy\\). The concept is similar in Stan - create a new variable that is indexed by the cross of the two other factor variables. \\[ \\beta_{G[i] \\times trt[i]} \\Longrightarrow bGT[G[i], trt[i]] \\] In the above expression, the interaction variable \\(\\beta_{G[i] \\times trt[i]}\\) is between age group and treatment. The right hand side is the corresponding Stan parameter. Notice that it is an array-like object that is indexed by the age group at observation \\(i\\) and the treatment at observation \\(i\\). For example, observation \\(51\\) is from a middle age adult subject during the post-adaptation block, so \\(bGT[G[51], trt[51]] = bGT[2, 2]\\). An interaction term is added for both the slope and intercept in this iteration. Post-Model, Post-Data Posterior Retrodictive Checks Again, I’ll start with the PSS and JND posterior densities. Because the model now allows for the interaction of age group and block, there is no longer a fixed shift in the posterior distribution of the PSS and JND values. Figure 3.12 shows that temporal recalibration had no discernible affect on the PSS estimates for the middle age group. Figure 3.12: Posterior distribution of the PSS and JND. The posterior retrodictions for this model are going to be similar to the last iteration. Instead, I want to see how this model performs when it comes to the posterior retrodictions of the visual TOJ data. There is something peculiar about that data that is readily apparent when I try to fit a GLM using classical MLE. vis_mle <- glm(cbind(k, n-k) ~ 0 + sid + sid:soa, data = visual_binomial, family = binomial("logit")) I get a message saying that the fitted probabilities are numerically 0 or 1. What does this mean? First this model estimates a slope and an intercept for each subject individually (no pooling model), so we can look at the estimates for each subject. Table 3.6 shows the top 3 coefficients sorted by largest standard error of the estimate for both slope and intercept. Table 3.6: Coefficients with the largest standard errors. Subject Coefficient Estimate Std. Error z value Pr(>|z|) O-f-MW Intercept -3.6313 1.2170 -2.9837 0.0028 M-f-CC Intercept -2.4925 1.0175 -2.4497 0.0143 M-f-DB Intercept -1.0928 0.6389 -1.7105 0.0872 Y-m-CB Slope 0.6254 12.7380 0.0491 0.9608 M-f-DB Slope 0.1434 0.0442 3.2471 0.0012 M-f-CC Slope 0.1434 0.0442 3.2471 0.0012 The standard error of the slope estimate for subject Y-m-CB is incredibly large in comparison to its own estimate and in comparison to the slope with the next largest standard error. To see what’s going wrong, let’s look at the graph for this subject. Figure 3.13: There is almost complete separation in the data. Figure 3.13 shows that there is almost perfect separation in the data for this subject, and that is giving the MLE algorithm trouble. It also has serious consequences on the estimated JND as the estimated JND for this subject is just 3ms which is suspect. Of course one remedy for this is to pool observations together as I have done for the model in this iteration. The data is pooled together at the age group level and variation in the subjects’ responses removes the separation. This isn’t always ideal, as sometimes we may be interested in studying the individuals within the experiment. If we can’t get accurate inferences about the individual, then the results are not valid. The better solution is to use a hierarchical model! With a hierarchical model, individual estimates are shrunk towards the group mean, and so inferences about individuals may be made along with inferences about the group that contains them. I am interested only in the group level inferences right now, but in chapter 5 I will compare the group level model at the end of this chapter to a model that includes individual estimates. Figure 3.14 shows the posterior distribution of psychometric functions for the visual TOJ data. Notice that there is almost no difference between the pre- and post-adaptation blocks. Figure 3.14: Posterior distribution of psychometric functions for the visual TOJ data. There is almost no visual difference between the pre- and post-adaptation blocks. Furthermore, as shown by the posterior retrodictions (figure 3.15), the model is not fully capturing the variation in the responses near the outer SOA values. I.e. the posterior retrodictions are tight around SOA values near zero. Figure 3.15: Observed visual TOJ data compared to the posterior retrodictions. The retrodictions are not capturing the variation at the outer SOA values. So why is the model having difficulty expressing the data? Well as it turns out, there is one more concept pertaining to psychometric experiments that I have left out until now, and that is a lapse in judgment. Not a lapse in judgment on my part, but the actual act of having a lapse while performing an experiment. So now, dear reader, I hope you have it in you for one last iteration of this model before moving on to read about the long sought after results. 3.4 Iteration 4 (what’s one more) Pre-Model, Pre-Data Conceptual Analysis A lapse in judgment can happen for any reason, and is assumed to be random and independent of other lapses. They can come in the form of the subject accidentally blinking during the presentation of a visual stimulus, or unintentionally pressing the wrong button to respond. Whatever the case is, lapses can have a significant affect on estimating the psychometric function. Post-Model, Pre-Data Develop Model Lapses can be modeled as occurring independently at some fixed rate. Fundamentally this means that the underlying performance function, \\(F\\), is bounded by some lower and upper lapse rate. This manifests as a scaling and translation of \\(F\\). For a given lower and upper lapse rate \\(\\lambda\\) and \\(\\gamma\\), the performance function \\(\\Psi\\) is \\[ \\Psi(x; \\alpha, \\beta, \\lambda, \\gamma) = \\lambda + (1 - \\lambda - \\gamma) F(x; \\alpha, \\beta) \\] Figure 3.16: Psychometric function with lower and upper performance bounds. In certain psychometric experiments, \\(\\lambda\\) is interpreted as the lower performance bound or the guessing rate. For example, in certain 2-alternative forced choice (2-AFC) tasks, subjects are asked to respond which of two masses is heavier, and the correctness of their response is recorded. When the masses are the same, the subject can do no better than random guessing. In this task, the lower performance bound is assumed to be 50% as their guess is split between two choices. As the absolute difference in mass grows, the subject’s correctness rate increases, though lapses can still happen. In this scenario, \\(\\lambda\\) is fixed at \\(0.5\\) and the lapse rate \\(\\gamma\\) is a parameter in the model. The model I am building for this data does not explicitly record correctness, so I do not give \\(\\lambda\\) the interpretation of a guessing rate. Since the data are recorded as proportion of positive responses, I instead treat \\(\\lambda\\) and \\(\\gamma\\) as lapse rates for negative and positive SOAs. But why should the upper and lower lapse rates be treated separately? A lapse in judgment can occur independently of the SOA, so \\(\\lambda\\) and \\(\\gamma\\) should be the same no matter what. With this assumption in mind, I can throw away \\(\\gamma\\) and assume that the lower and upper performance bounds are restricted by the same amount. I.e. \\[\\begin{equation} \\Psi(x; \\alpha, \\beta, \\lambda) = \\lambda + (1 - 2\\lambda) F(x; \\alpha, \\beta) \\tag{3.10} \\end{equation}\\] While I’m throwing in a lapse rate, I’ll also ask the question if different age groups have different lapse rates. To answer this (or rather have the model answer this), I include the new parameter \\(\\lambda_{G[i]}\\) into the model so that the lapse rate is estimated for each age group. It’s okay to assume that lapses in judgment are rare, and it’s also true that the rate (or probability) of a lapse is bounded in the interval \\([0, 1]\\). Because of this, I put a \\(\\mathrm{Beta(4, 96)}\\) prior on \\(\\lambda\\) which a priori puts 99% of the weight below \\(0.1\\) and an expected lapse rate of \\(0.04\\). I could also set up the model so that information about the lapse rate is shared between age groups (i.e. multilevel), but I’ll leave that as an exercise for the reader. Construct Summary Functions Since the fundamental structure of the linear model has changed, it is worth updating the summary function that computes the distribution of SOA values for a given response probability. Given equation (3.10), the summary function \\(Q\\) is \\[ Q(\\pi; \\alpha, \\beta, \\lambda) = F_{\\alpha, \\beta}^{-1}\\left(\\frac{\\pi - \\lambda}{1 - 2\\lambda}\\right) = \\frac{1}{\\exp(\\beta)} \\cdot \\mathrm{logit}\\left(\\frac{\\pi - \\lambda}{1-2\\lambda}\\right) + \\alpha \\] Post-Model, Post-Data Fit Observed Data Because it is the visual data that motivated this iteration, I will finish up using that data to fit the model and perform posterior retrodictive checks. Posterior Retrodictive Checks The plot for the distribution of psychometric functions is repeated one more time below (figure 3.17). There is now visual separation between the pre- and post-adaptation blocks, with the latter exhibiting a higher slope, which in turn implies a reduced just noticeable difference which is consistent with the audiovisual data in the previous model. Figure 3.17: There is now a visual distinction between the two blocks unlike in the model without lapse rate. The lapse rate acts as a balance between steep slopes near the PSS and variation near the outer SOA values. As for the posterior retrodictions, the model is now better capturing the outer SOA variation. This can best be seen in the comparison of the younger adult pre-adaptation block of figure 3.18. Figure 3.18: The lapse rate produces posterior retrodictions that are visually more similar to the observed data than in the previous model, suggesting that the model is now just complex enough to capture the relevant details of the data generating process. 3.5 Celebrate celebrate References "],["model-checking.html", "4 Model Fitting/Checking 4.1 Fitting using HMC 4.2 Prior Predictive Checks", " 4 Model Fitting/Checking Check your model before you wreck your model This chapter serves as the formal home of definitions and explanations of concepts relating to Markov Chain Monte Carlo (MCMC) and other diagnostic tools when working with Bayesian inference models. I touched on the physics of Hamiltonian Monte Carlo (HMC) and the diagnostic tools that come with it in the previous chapter, but now I will go into more detail. 4.1 Fitting using HMC Why do we need a sampler at all? Bayesian statistics and modeling stems from Bayes theorem (Equation (4.1)). The prior \\(P(\\theta)\\) is some distribution over the parameter space and the likelihood \\(P(X | \\theta)\\) is the probability of an outcome in the sample space given a value in the parameter space. To keep things simple, we generally say that the posterior is proportional to the prior times the likelihood. Why proportional? The posterior distribution is a probability distribution, which means that the sum or integral over the parameter space must evaluate to one. Because of this constraint, the denominator in (4.1) acts as a scale factor to ensure that the posterior is valid. \\[\\begin{equation} P(\\theta | X) = \\frac{P(X | \\theta)\\cdot P(\\theta)}{\\sum_i P(X | \\theta_i)} = \\frac{P(X | \\theta)\\cdot P(\\theta)}{\\int_\\Omega P(X | \\theta)d\\theta} \\tag{4.1} \\end{equation}\\] For simple models, the posterior distribution can sometimes be evaluated analytically. An example of this is in conjugate models, where the resulting posterior distribution is of the same type as the prior distribution, and an example of a conjugate model is the Beta distribution for inference about a proportion statistic. This is common in baseball for a player’s batting average. I don’t know a lot about baseball, but I know that hitting a baseball is a little less common than one in three swings, so a priori I believe the probability of hitting a baseball is distributed as \\(\\mathrm{Beta}(2, 5)\\) because the expected value is \\(\\approx 0.29\\) and not a lot of weight is given to any particular value. Throughout a game I follow one player and he hits four balls and misses six - data that can be modeled as a Binomial observation. To figure out the posterior distribution for batting average, I use Bayes’ theorem - posterior is proportional to the prior times the likelihood. \\[\\begin{align*} P(\\pi | y) &\\propto P(y | \\pi) \\cdot P(\\pi) \\\\ &= {10 \\choose 4}\\pi^{4} (1-\\pi)^{6} \\cdot \\frac{\\Gamma(2+5)}{\\Gamma(2)\\Gamma(5)} \\pi^{2-1}(1-\\pi)^{5-1} \\\\ &\\propto \\pi^{4+2-1}(1-\\pi)^{6+5-1} \\\\ &= \\pi^{6-1}(1-\\pi)^{11-1} \\end{align*}\\] The final line is the shape of a Beta distribution with parameters \\(6=2+4\\) and \\(11=5+6\\). The simple update rule is that for a prior \\(\\mathrm{Beta}(a, b)\\) and observed data with \\(y\\) successes in \\(n\\) observations, the posterior distribution is \\(\\mathrm{Beta}(a + y, b + n - y)\\). For the baseball player, the Bayesian estimate of his batting average is \\(6/(6+11) \\approx 0.353\\), but still with a good amount of uncertainty as shown in figure 4.1. Figure 4.1: After observing 4 hits in 10, the Beta(2,5) prior gets updated to become a Beta(6,11) posterior. Conjugate models are great for simple observational data, but often it happens that the posterior distribution cannot be deduced from the model or that the integral in the denominator is complex or of a high dimension. In the former situation, the integral may not be possible to evaluate, and in the latter there may not be enough computational resources in the world to perform a simple grid approximation. The solution is to use Markov Chain Monte Carlo (MCMC). The idea is that we can draw samples from the posterior distribution in a way that samples proportional to the density. This sampling is a form of approximation to the area under the curve (i.e. an approximation to the denominator in (4.1)). Rejection sampling (Gilks and Wild 1992) and slice sampling (Neal 2003) are basic methods for sampling from a target distribution, however they can often be inefficient8. NUTS is a much more complex algorithm that can be compared to a physics simulation. A massless “particle” is flicked in a random direction with some amount of kinetic energy in a probability field, and is stopped randomly. The stopping point is the new proposal sample. The No U-Turn part means that when the algorithm detects that the particle is turning around, it will stop so as not to return to the starting position. This sampling scheme has a much higher rate of accepted samples, and also comes with many built-in diagnostic tools that let us know when the sampler is having trouble efficiently exploring the posterior. I’ll talk more about these diagnostic tools throughout the remaining sections. 4.1.1 Diagnostic Tools 4.1.1.1 Trace Plots Trace plots are the first line of defense against misbehaved samplers. They are visual aids that let the practitioner asses the qualitative health of the chains, looking for properties such as autocorrelation, heteroskedacity, non-stationarity, and convergence. Healthy chains are well-mixing and stationary. It’s often better to run more chains during the model building process so that issues with mixing and convergence can be diagnosed sooner. Even one unhealthy chain can be indicative of a poorly specified model. The addition of more chains also contributes to the estimation of the Split \\(\\hat{R}\\) statistic, which I discuss in 4.1.1.2. Figure 4.2 shows what a set of healthy chains looks like. Figure 4.2: An example of healthy chains. There is a similar diagnostic plot called the rank histogram plot (or trank plot for trace rank plot). Vehtari, Gelman, et al. (2020) details the motivation for trank plots, but in short if the chains are all exploring the posterior efficiently, then the histograms will be similar and uniform. Figure 4.3 is from the same model as above but for the rank histogram. Figure 4.3: A trank plot of healthy chains. As the number of parameters in a model grows, it becomes exceedingly tedious to check the trace and trank plots of all parameters, and so numerical summaries are required to flag potential issues within the model. 4.1.1.2 \\(\\hat{R}\\) and Split \\(\\hat{R}\\) The most common summary statistic for chain health is the potential scale reduction factor (Gelman, Rubin, and others 1992) that measures the ratio of between chain variance and within chain variance. When the two have converged, the ratio is one. I’ve already shared examples of healthy chains which would also have healthy \\(\\hat{R}\\) values, but it’s valuable to also share an example of a bad model. Below is the 8 Schools example (Gelman et al. 2013) which is a classical example for introducing Stan and testing the operating characteristics of a model. schools_dat <- list( J = 8, y = c(28, 8, -3, 7, -1, 1, 18, 12), sigma = c(15, 10, 16, 11, 9, 11, 10, 18) ) The initial starting parameters for this model are intentionally set to vary between \\(-10\\) and \\(10\\) (in contrast to the default range of \\((-2, 2)\\)) and with only a few samples drawn in order to artificially drive up the split \\(\\hat{R}\\) statistic. The model is provided as supplementary code in the appendix. fit_cp <- sampling(schools_mod_cp, data = schools_dat, refresh = 0, iter = 50, init_r = 10, seed = 671254821) Stan instantly warns about many different issues with this model, but the R-hat is the one of interest. The largest is \\(1.68\\) which is incredibly large These chains do not look good at all! Let’s take a look at the \\(\\hat{R}\\) values and see if we can calculate one of the values manually. Table 4.1: Split R-hat values from the 8 Schools example. Parameter Rhat mu 1.234 tau 1.596 To calculate the (non split) \\(\\hat{R}\\), first calculate the between-chain variance, and then the average chain variance. For \\(M\\) independent Markov chains, \\(\\theta_m\\), with \\(N\\) samples each, the between-chain variance is \\[ B = \\frac{N}{M-1}\\sum_{m=1}^{M}\\left(\\bar{\\theta}_m - \\bar{\\theta}\\right)^2 \\] where \\[ \\bar{\\theta}_m = \\frac{1}{N}\\sum_{n=1}^{N}\\theta_{m}^{(n)} \\] and \\[ \\bar{\\theta} = \\frac{1}{M}\\sum_{m=1}^{M}\\bar{\\theta}_m \\] The within-chain variance, \\(W\\), is the variance averaged over all the chains. \\[ W = \\frac{1}{M}\\sum_{m=1}^{M} s_{m}^2 \\] where \\[ s_{m}^2 = \\frac{1}{N-1}\\sum_{n=1}^{N}\\left(\\theta_{m}^{(n)} - \\bar{\\theta}_m\\right)^2 \\] The variance estimator is a weighted mixture of the within-chain and cross-chain variation \\[ \\hat{var} = \\frac{N-1}{N} W + \\frac{1}{N} B \\] and finally \\[ \\hat{R} = \\sqrt{\\frac{\\hat{var}}{W}} \\] Here is the calculation in R param <- "mu" theta <- p_cp[,,param] N <- nrow(theta) M <- ncol(theta) theta_bar_m <- colMeans(theta) theta_bar <- mean(theta_bar_m) B <- N / (M - 1) * sum((theta_bar_m - theta_bar)^2) s_sq_m <- apply(theta, 2, var) W <- mean(s_sq_m) var_hat <- W * (N - 1) / N + B / N (mu_Rhat <- sqrt(var_hat / W)) #> [1] 1.134 The \\(\\hat{R}\\) statistic is smaller than the split \\(\\hat{R}\\) value provided by Stan. This is a consequence of steadily increasing or decreasing chains. The split value does what it sounds like, and splits the chains in half and measures each half separately. In this way, the measure is more robust in detecting unhealthy chains. This also highlights the utility in using both visual and statistical tools to evaluate models. 4.1.1.3 Effective Sample Size Samples from Markov Chains are typically autocorrelated, which can increase uncertainty of posterior estimates. I encountered this issue in the second iteration of the model building process, and the solution I used was to reparameterize the model to avoid steep log-posterior densities - the benefit of reparameterization is conveyed by the ratio of effective sample size to actual sample size in figure 4.4. When the HMC algorithm is exploring difficult geometry, it can get stuck in regions of high densities, which means that there is more correlation between successive samples. Figure 4.4: Ratio of N_eff to actual sample size. Low ratios imply high autocorrelation which can be alleviated by reparameterizing the model or by thinning. As the strength of autocorrelation generally decreases at larger lags, a simple prescription to decrease autocorrelation between samples and increase the effective sample size is to use thinning. Thinning means saving every \\(k^{th}\\) sample and throwing the rest away. If one desired to have 2000 posterior draws, it could be done in two of many possible ways Generate 2000 draws after warmup and save all of them Generate 10,000 draws after warmup and save every \\(5^{th}\\) sample. Both will produce 2000 samples, but the method using thinning will have less autocorrelation and a higher effective number of samples. Though it should be noted that generating 10,000 draws and saving all of them will have a higher number of effective samples than the second method with thinning, so thinning should only be favored to save memory. 4.1.1.4 Divergent Transitions Unlike the previous tools for algorithmic faithfulness which can be used for any MCMC sampler, information about divergent transitions is intrinsic to Hamiltonian Monte Carlo. Recall that the HMC and NUTS algorithm can be imagined as a physics simulation of a particle in a potential energy field, and a random momentum is imparted on the particle. The sum of the potential energy and the kinetic energy of the system is called the Hamiltonian, and is conserved along the trajectory of the particle (Stan Development Team 2020). The path that the particle takes is a discrete approximation to the actual path where the position of the particle is updated in small steps called leapfrog steps (see Leimkuhler and Reich (2004) for a detailed explanation of the leapfrog algorithm). A divergent transition happens when the simulated trajectory is far from the true trajectory as measured by the Hamiltonian. A few divergent transitions is not indicative of a poorly performing model, and often divergent transitions can be reduced by reducing the step size and increasing the adapt delta parameter. On the other hand, a bad model may never be improved just by tweaking some parameters. This is the folk theorem of statistical computing - if there is a problem with the sampling, blame the model, not the algorithm. Divergent transitions are never saved in the posterior samples, but they are saved internally to the Stan fit object and can be compared against good samples. Sometimes this can give insight into which parameters and which regions of the posterior the divergent transitions are coming from. Figure 4.5: Divergent transitions highlighted for some parameters from the second iteration model. Divergent transitions tend to occur when both the hierarchical variance terms are near zero. 4.2 Prior Predictive Checks I used prior predictive checks in the first iteration of the model to establish a few things pertaining to model adequacy and computational faithfulness. The first reason is to ensure that the selected priors do not put too much mass in completely implausible regions (such as really large JND estimates). Data simulated from the priors can also be used to check that the software works. When you have the exact priors that were used to generate the data, the fitting algorithm should be able to accurately recover the priors. transition to posterior predictive checks chapter fig 10 in for posterior predictive Gabry et al. (2019) References "],["predictive-inferences.html", "5 Predictive Inference 5.1 Model Comparison via Predictive Performance", " 5 Predictive Inference All models are wrong, but some are useful The above quote is from George Box, and it is a popular quote that statisticians like to throw around9. All models are wrong because it is nearly impossible to account for the minutiae of every process that contributes to an observed phenomenon, and often trying to results in poorer performing models. Also is it ever truly possible to prove that a model is correct? At best our scientific method can falsify certain hypotheses, but it cannot ever tell us if a model is universally correct. That doesn’t matter. What does matter is if the model is useful and can make accurate predictions. Why is predictive performance so important? Consider five points of data (figure 5.1). I have simulated values from some polynomial equation of degree less than five, but with no more information other than that, how can the best polynomial model be selected? Figure 5.1: Five points from a polynomial model. One thing to try is fit a handful of linear models, check the parameter’s p-values, the \\(R^2\\) statistic, and perform other goodness of fit tests, but there is a problem. As you increase the degree of the polynomial fit, the \\(R^2\\) statistic will always increase. In fact with five data points, a fourth degree polynomial will fit the data perfectly (figure 5.2). Figure 5.2: Data points with various polynomial regression lines. If I were to add a \\(6^{th}\\) point - a new observation - which of the models would you expect to do best? Can it be estimated which model will predict best before testing with new data? One guess is that the quadratic or cubic model will do well because because the linear model is potentially underfit to the data and the quartic is overfit to the data. Figure 5.3 shows the new data point from the polynomial model. Now the linear and cubic models are trending in the wrong direction. The quadratic and quartic models are both trending down, so may be the correct form for the model. Figure 5.3: The fitted polynomial models with a new observation. Figure 5.4 shows the 80% and 95% prediction intervals for a new observation given \\(x = 5\\) as well as the true outcome as a dashed line at \\(y = -3.434\\). The linear model has the smallest prediction interval (PI), but completely misses the target. The remaining three models all include the observed value in their 95% PIs, but the quadratic has the smallest PI of the three. The actual data generating polynomial is \\[ y \\sim \\mathcal{N}(\\mu, 1^2) \\\\ \\mu = -0.5(x - 2)^2 + 2 \\] Figure 5.4: 95% Prediction intervals for the four polynomial models, as well as the true value (dashed line). This is just a toy example, and real-world real-data models are often more complex, but they do present the same headaches when it comes to model/feature selection and goodness of fit checks. Clearly the quartic model has the best fit to the data, but it is too variable and doesn’t capture the regular features of the data, so it does poorly for the out-of-sample prediction. The linear model suffers as well by being less biased and too inflexible to capture the structure of the data. The quadratic and cubic are in the middle of the road, but the quadratic does well and makes fewer assumptions about the data. In other words, the quadratic model is just complex enough to predict well while making fewer assumptions. Information criteria is a way of weighing the prediction quality of a model against its complexity, and is arguably a better system for model selection/comparison than other goodness of fit statistics such as \\(R^2\\) or p-values. 5.1 Model Comparison via Predictive Performance We don’t always have the observed data to compare predictions against (nor the data generating model). Some techniques to compensate for this limitation include cross validation, where the data is split into training data and testing data. The model is fit to the training data, and then predictions are made with the testing data and compared to the observed values. This can often give a good estimate for out-of-sample prediction error. Cross validation can be extended into k-fold cross validation. The idea is to fold the data into \\(k\\) disjoint partitions, and predict partition \\(i\\) using the rest of the data to train on. The prediction error of the \\(k\\)-folds can then be averaged over to get an estimate for out-of-sample prediction error. Taking \\(k\\)-fold CV to the limit by letting \\(k = \\# observations\\) results in something called leave one out cross validation (LOOCV), where for each observation in the data, the model is fit to the remaining data and predicted for the left out observation. The downside of \\(k\\)-fold cross validation is that it requires fitting the model \\(k\\) times, which can be computationally expensive for complex Bayesian models. Thankfully there is a way to approximate LOOCV without having to refit the model many times. 5.1.1 LOOCV and Importance Sampling LOOCV and many other evaluation tools such as WAIC rest on the log-pointwise-predictive-density (lppd), which is a loose measure of deviance from some “true” probability distribution. Typically we don’t have the analytic form of the predictive posterior, so instead we use \\(S\\) MCMC draws to approximate the lppd (Vehtari, Gelman, and Gabry 2017): \\[ \\begin{equation} \\mathrm{lppd}(y, \\Theta) = \\sum_i \\log \\frac{1}{S} \\sum_s p(y_i | \\Theta_s) \\tag{5.1} \\end{equation} \\] To estimate LOOCV, the relative “importance” of each observation must be computed. Certain observations have more influence on the posterior distribution, and so have more impact on the posterior if they are removed. The intuition behind measuring importance is that more influential observations are relatively less likely than less important observations that are relatively expected. Then by omitting a sample, the relative importance weight can be measured by the lppd. This omitted calculation is known as the out-of-sample lppd. For each omitted \\(y_i\\), \\[ \\mathrm{lppd}_{CV} = \\sum_i \\frac{1}{S} \\sum_s \\log p(y_{i} | \\Theta_{-i,s}) \\] There is a package called loo that can compute the expected log-pointwise-posterior-density (ELPD) using PSIS-LOO, as well as the estimated number of effective parameters and LOO information criterion (Vehtari, Gabry, et al. 2020). For the part of the researcher, the log-likelihood of the observations must be computed in the model. For my models, I added this in the generated quantities block of my Stan program. It is standard practice to name the log-likelihood as log_lik in the model. generated quantities { vector[N] log_lik; for (i in 1:N) { real alpha = b + bGT[G[i], trt[i]]; real beta = a + aGT[G[i], trt[i]]; real lambda = lG[G[i]]; real p = lambda + (1 - 2*lambda) * inv_logit(exp(beta) * (x[i] - alpha)); log_lik[i] = binomial_lpmf(k[i] | n[i], p); } } Models can be compared simply using loo::loo_compare. It estimated the ELPD and its standard error, then calculates the relative differences between all the models. The model with the highest ELPD is predicted to have the best out-of-sample predictions. The comparison of the first three iterations of the model from chapter 3 for the audiovisual data are shown below. comp_av <- loo_compare(l031_av, l032_av, l032nc_av, l033_av) print(comp_av, simplify = FALSE) #> elpd_diff se_diff elpd_loo se_elpd_loo p_loo se_p_loo looic se_looic #> model4 0.0 0.0 -1615.7 42.8 16.2 0.9 3231.4 85.6 #> model2 -1.0 3.8 -1616.7 42.6 11.3 0.6 3233.3 85.2 #> model3 -1.3 3.8 -1617.0 42.7 11.8 0.6 3234.0 85.3 #> model1 -32.8 10.4 -1648.5 43.0 3.0 0.2 3296.9 86.1 The centered and non-centered parameterizations (models 2 and 3 respectively) have essentially the same ELPD. This is expected since they are essentially the same model. The reparameterization only helps with model fitting efficiency, though that can mean more reliable posteriors. The model with age-block interactions (model 4) has the highest ELPD, but is not decisively the best as determined by the standard error of the ELPD. The only thing that can be determined is that including age and block improves performance significantly over the base model (model 1). But how about for the visual data? The fourth iteration of the model introduced a lapse rate. Did the change significantly improve the ELPD? comp_vis <- loo_compare(l033_vis, l034_vis) print(comp_vis, simplify = FALSE) #> elpd_diff se_diff elpd_loo se_elpd_loo p_loo se_p_loo looic se_looic #> model2 0.0 0.0 -1001.1 44.0 19.2 1.9 2002.2 88.0 #> model1 -259.4 31.9 -1260.5 56.1 23.1 2.3 2520.9 112.2 Absolutely! Something else interesting also happened with the introduction of the lapse rate - the effective number of parameters decreased (p_loo). Earlier I argued that model selection is out, model comparison is in. At the end of chapter 3 I finished with a model that has age-block interactions and a lapse rate for each age group. There was one more model that I could have specified - one that estimates at the subject level. There is no domain-specific reason to include the subject level information, especially since the goal is to make inferences at the age group level, but there may still be statistical reason to add in the subjects. For one, adding in the subject as another level in a multilevel model can induce regularization among the subjects, which can overall make for better predictions on new data. I’ve gone ahead and fit the model with subject-level information, and the comparison between this new model and the one from iteration 4 is shown below. comp_vis2 <- loo_compare(l034_vis, l034s_vis) print(comp_vis2, simplify = FALSE) #> elpd_diff se_diff elpd_loo se_elpd_loo p_loo se_p_loo looic se_looic #> model2 0.0 0.0 -925.1 38.1 75.6 5.4 1850.3 76.2 #> model1 -76.0 19.1 -1001.1 44.0 19.2 1.9 2002.2 88.0 Including the subject-level information significantly improves the ELPD, and even though there are over 100 parameters in the model (slope and intercept for each of the 45 subjects), the effective number of parameters is much less. Since this new model is capable of making inferences at both the age group level and the subject level, I use it for the result section (chapter 6). One concern comes up when it comes to LOOCV and multilevel models. What does it mean to leave one out? Should one subject be left out? One age group? Just one observation? With more levels in a model, more careful considerations must be taken when it comes to estimating prediction performance. References "],["results.html", "6 Psychometric Results 6.1 Affect of Adaptation across Age Groups 6.2 Lapse Rate across Age Groups", " 6 Psychometric Results What was the point of going through all the work of building a model if not to answer the questions that motivated the model in the first place? To reiterate, the questions pertain to how the brain reconciles stimuli originating from different sources, and if biological (age) and contextual (task, temporal recalibration) factors contribute to global percepts. The way through which these questions are answered is through a psychometric experiment and the resulting psychometric function (chapter 2). I’ve divided this chapter into two sections - the affects of temporal recalibration and the consideration of a lapse rate. Temporal recalibration is considered in the context of perceptual synchrony and temporal sensitivity, and the results are broken down by age group. Also recall that there are four separate tasks - audiovisual, visual, duration, and sensorimotor. 6.1 Affect of Adaptation across Age Groups Temporal recalibration consists of presenting a subject with an adapting stimulus throughout a block of a psychometric experiment. Depending on the mechanisms at work, the resulting psychometric function can either be shifted (biased) towards the adapting stimulus (lag adaption) or away (Bayesian adaptation). The theory of integrating sensory signals is beyond my scope, but some papers discussing sensory adaptation in more detail are Miyazaki et al. (2006), Sato and Aihara (2011), and Stocker and Simoncelli (2005). I will be discussing the statistical results without considering the deeper psychological theory. 6.1.1 On Perceptual Synchrony Perceptual synchrony is when the temporal delay between two stimuli is small enough so that the brain integrates the two signals into a global percept - perceived as happening simultaneously. Perceptual synchrony is studied through the point of subjective simultaneity (PSS), and in a simple sense represents the bias towards a given stimulus. Ideally the bias would be zero, but human perception is liable to change due to every day experiences. The pre-adaptation block is a proxy for implicit bias, and the post-adaptation indicates whether lag or Bayesian adaptation is taking place. Some researchers believe that both forms of adaptation are taking place at all times and that the mixture rates are determined by biological and contextual factors. I will try to stay away from making any strong determinations and will only present the results conditional on the model and the data. Audiovisual TOJ Task There are two ways that we can visually draw inferences across the 6 different age-block combinations. The distributions can either be faceted by age group, or they can be faceted by block. There are actually many ways that the data can be presented, but these two methods of juxtaposition help to answer two questions - how does the effect of adaptation vary by age group, and is there a difference in age groups by block? The left hand plot of figure 6.1 answers the former, and the right hand plot answers the latter. Figure 6.1: Posterior distribution of PSS values for the audiovisual task. Across all age groups, temporal recalibration results in a negative shift towards zero in the PSS (as shown by the left hand plot), but there is no significant difference in the PSS between age groups (right hand plot). A very convenient consequence of using MCMC is that the samples from the posterior can be recombined in many ways to describe new phenomena. The PSS values can even be pooled across age groups so that the marginal affect of recalibration may be considered (left hand plot of figure 6.2). Figure 6.2: Posterior distribution of PSS values for the audiovisual task. Left: Marginal over age group. Right: Marginal over block. Now with the marginal of age group, the distribution of differences between pre- and post-adaptation blocks can be calculated. I could report a simple credible interval, but it almost seems disingenuous given that the entire distribution is available. I could report that the \\(90\\%\\) highest posterior density interval (HPDI) of the difference is \\((-0.036, 0.003)\\), but consider the following figure instead. Figure 6.3: Distribution of differences for pre- and post-adaptation PSS values with 90% HPDI. Figure 6.3 shows the distribution of differences with the \\(90\\%\\) HPDI region shaded. From this figure, one might conclude that the effect of recalibration, while small, is still noticeable for the audiovisual task. While this could be done for every task in the rest of this chapter, I do not think it is worth repeating as I am not trying to prove anything about the psychometric experiment itself (that is for a later paper). The point of this demonstration is simply that it can be done (and easily), and how to summarize the data both visually and quantitatively. Visual TOJ Task Figure 6.4: Posterior distribution of PSS values for the visual task. Here there is no clear determination if recalibration has an effect on perceptual synchrony, as it is only the middle age group that shows a shift in bias. Even more, there is a lot of overlap between age group. Looking at the marginal distributions (figure 6.5), there may be a difference between the younger and older age groups, and the middle age and older age groups. Figure 6.5: The difference between the older age group and the two others is noticeable, but not likely significant. These plots are useful for quickly being able to determine if there is a difference in factors. If there is a suspected difference, then the distribution can be calculated from the posterior samples as needed. I suspect that there may be a difference between the older age group and the other two, so I calculated the differences, and summarize them with the histogram in figure 6.6. Figure 6.6: The bulk of the distribution is above zero, but there is still a chance that there is no difference in the distribution of PSS values between the age groups during the visual TOJ experiment. The bulk of the distribution is above zero, but there is still a chance that there is no difference in the distribution of PSS values between the age groups during the visual TOJ experiment. Duration TOJ Task Figure 6.7: Posterior distribution of PSS values for the duration task. The duration TOJ task is very interesting because 1) recalibration had a visually significant effect across all age groups, and 2) there is virtually no difference between the age groups. I could plot the marginal distribution, but it wouldn’t likely give any more insight. What I might ask is what is it about the duration task that lets temporal recalibration have such a significant effect? Is human perception of time duration more malleable than our perception to other sensory signals? Sensorimotor TOJ Task Figure 6.8: Posterior distribution of PSS values for the sensorimotor task. There are no differences between age groups or blocks when it comes to perceptual synchrony in the sensorimotor task. 6.1.2 On Temporal Sensitivity Temporal sensitivity is the ability to successfully integrate signals arising from the same event, or segregate signals from different events. When the stimulus onset asynchrony increases, the ability to bind the signals into a single percept is reduced until they are perceived as distinct events with a temporal order. Those that are more readily able to determine temporal order have a higher temporal sensitivity, and it is measured through the slope of a psychometric function - specifically the quantity known as the just noticeable difference. Audiovisual TOJ Task Figure 6.9: Posterior distribution of JND values for the audiovisual task. All age groups experienced an increase in temporal sensitivity, but the effect is largest in the older age group which also had the largest pre-adaptation JND estimates. There also appears to be some distinction between the older age group and the younger ones in the pre-adaptation block, but recalibration closes the gap. Visual TOJ Task Figure 6.10: Posterior distribution of JND values for the visual task. The story for the visual TOJ task is similar to the audiovisual one - each age group experience heightened temporal sensitivity after recalibration, with the two older age groups receiving more benefit than the younger age group. It’s also worth noting that the younger age groups have higher baseline temporal sensitivity, so there may not be as much room for improvement. Duration TOJ Task Figure 6.11: Posterior distribution of JND values for the duration task. This time the effects of recalibration are not so strong, and just like for the PSS, there is no significant difference between age groups in the duration task. Sensorimotor TOJ Task Figure 6.12: Posterior distribution of JND values for the sensorimotor task. Finally in the sensorimotor task there are mixed results. Temporal recalibration increased the temporal sensitivity in the younger age group, reduced it in the middle age group, and had no effect on the older age group. Clearly the biological factors at play are complex, and the data here is a relatively thin slice of the population. More data and a better calibrated experiment may give better insights into the effects of temporal recalibration. 6.2 Lapse Rate across Age Groups Figure 6.13: Process model of the result of a psychometric experiment with the assumption that lapses occur at random and at a fixed rate, and that the subject guesses randomly in the event of a lapse. In the above figure, the outcome of one experiment can be represented as a directed acyclic graph (DAG) where at the start of the experiment, the subject either experiences a lapse in judgment with probability \\(\\gamma\\) or they do not experience a lapse in judgment. If there is no lapse, then they will give a positive response with probability \\(F(x)\\). If there is a lapse in judgment, then it is assumed that they will respond randomly - e.g. a fifty-fifty chance of a positive response. In this model of an experiment, the probability of a positive response is the sum of the two paths. \\[\\begin{align} \\mathrm{P}(\\textrm{positive}) &= \\mathrm{P}(\\textrm{lapse}) \\cdot \\mathrm{P}(\\textrm{positive} | \\textrm{lapse}) \\\\ &\\quad + \\mathrm{P}(\\textrm{no lapse}) \\cdot \\mathrm{P}(\\textrm{positive} | \\textrm{no lapse}) \\\\ &= \\frac{1}{2} \\gamma + (1 - \\gamma) \\cdot F(x) \\end{align}\\] If we then let \\(\\gamma = 2\\lambda\\) then the probability of a positive response becomes \\[ \\mathrm{P}(\\textrm{positive}) = \\lambda + (1 - 2\\lambda) \\cdot F(x) \\] This is the lapse model described in (3.10)! But now there is a little bit more insight into what the parameter \\(\\lambda\\) is. If \\(\\gamma\\) is the true lapse rate, then \\(\\lambda\\) is half the lapse rate. This may sound strange at first, but remember that equation (3.10) was motivated as a lower and upper bound to the psychometric function, and where the bounds are constrained by the same amount. Here the motivation is from a process model, yet the two lines of reasoning arrive at the same model. Figure 6.14 shows the distribution of lapse rates for each age group across the four separate tasks. There is no visual trend in the ranks of lapse rates, meaning that no single age group definitively experiences a lower lapse rate than the others, though the middle age group comes close to being the winner and the older age group is more likely to be trailing behind. The distribution of lapse rates does reveal something about the tasks themselves. Figure 6.14: Lapse rates for the different age groups across the four separate tasks. Visually there is no clear trend in lapses by age group, but the concentration of the distributions give insight into the perceived difficulty of a task where more diffuse distributions may indiciate more difficult tasks. I used the audiovisual data in the first few iterations of building a model and there were no immediate issues, but when I tested the model on the visual data it had trouble expressing the variability at outer SOA values. I noted that one subject had a near perfect response set, and many others had equally impressive performance. The model without a lapse rate was being torn between a very steep slope near the PSS and random variability near the outer SOAs. The remedy was to include a lapse rate (motivated by domain expertise) which allowed for that one extra degree of freedom necessary to reconcile the opposing forces. Why did the visual data behave this way when the audiovisual data had no issue? That gets deep into the theory of how our brains integrate signals arising from different modalities. Detecting the temporal order of two visual stimuli may be an easier mental task than that of heterogeneous signals. Then consider audiovisual versus duration or sensorimotor. Visual-speech synthesis is a much more common task throughout the day than visual-tactile (sensorimotor), and so perhaps we are better adjusted to such a task as audiovisual. The latent measure of relative performance or task difficulty might be picked up through the lapse rate. To test this idea, the TOJ experiment could be repeated, and then ask the subject afterwards how they would rate the difficulty of each task. For now, a post-hoc test can be done by comparing the mean and spread of the lapse rates to a pseuedo difficulty measure as defined by the mean of the incorrect responses. A response is correct when the sign of the SOA value is concordant with the response, e.g. a positive SOA and the subject gives the “positive” response or a negative SOA and the subject gives the “negative” response. Looking at figure 6.14, I would subjectively rate the tasks from easiest to hardest based on ocular analysis as Visual Audiovisual Duration Sensorimotor Again, this ranking is based on the mean (lower intrinsically meaning easier) and the spread (less diffuse implying more agreement of difficulty between age groups). The visual task has the tightest distribution of lapse rates, and the sensorimotor has the widest spread, so I can rank those first and last respectively. Audiovisual and duration are very similar in mean and spread, but the audiovisual has a bit more agreement between the young and middle age groups, so second and third go to audiovisual and duration. Table 6.1 shows the results arranged by increasing pseudo difficulty. As predicted, the visual task is squarely at the top and the sensorimotor is fully at the bottom. The only out of place group is the audiovisual task for the older age group, which is about equal to the older age group during the duration task. In fact, within tasks, the older age group always comes in last in terms of proportion of correct responses, while the young and middle age groups trade back and forth. Table 6.1: Relative difficulty of the different tasks by age group. The difficulty is measured by the proportion of incorrect responses. Task Age Group Pseudo Difficulty visual Middle Age 0.03 visual Young Adult 0.03 visual Older Adult 0.06 audiovisual Young Adult 0.12 audiovisual Middle Age 0.12 duration Middle Age 0.14 duration Young Adult 0.16 duration Older Adult 0.17 audiovisual Older Adult 0.17 sensorimotor Young Adult 0.22 sensorimotor Middle Age 0.24 sensorimotor Older Adult 0.29 One way to remove the uncertainty of the lapse rate could be to have some trials with very large SOA values. The reasoning is that if the difficulty of a task (given an SOA value) is lowered, than an incorrect response is more likely to be due to a true lapse in judgment as opposed to a genuinely incorrect response. Wichmann and Hill (2001b) recommends at least one sample at \\(\\pi \\ge 0.95\\) is necessary for reliable bootstrap confidence intervals, so the same reasoning can be applied when using Bayesian credible intervals. For a task such as visual TOJ, the \\(90\\%\\) level may occur at an SOA of \\(\\approx 40\\)ms while for the audiovisual TOJ it may be \\(\\approx 220\\)ms, so the sampling scheme for psychometric experiments must be tuned to the task. Wichmann and Hill (2001a) experimentally determined that the lapse rate for trained observers is between \\(0\\%\\) and \\(5\\%\\), and the data in this paper loosely agree with that conclusion. Any excess in lapse rate may be attributed to the perceived task difficulty and a sub-optimal sampling scheme. Since the visual TOJ task is relatively the easiest, the estimated lapse rates are more believable as true lapse rates, and fall closely within the \\((0, 0.05)\\) range. References "],["discussion.html", "7 Discussion 7.1 Model selection is not always the goal 7.2 Data Cleaning and Reproducibility 7.3 Developing a model", " 7 Discussion 7.1 Model selection is not always the goal Building a model motivated by a set of principles and domain expertise should be the preferred way of performing an analysis. The next important principle is model comparison, especially in terms of predictive inference. One model also doesn’t always work for everything. In the course of building a model that is just complex enough to answer questions about age and temporal recalibration, I mentioned that intermediate models could be used to answer questions about average effects at different levels. For purely predictive inference, there is also the possibility of Bayesian model averaging (BMA) and other ensemble methods. 7.2 Data Cleaning and Reproducibility Data doesn’t always come in a nice tidy10 format, and I had the pleasure of turning the raw experimental data into a clean data set that is ready for modeling. Sometimes the process is quick and straight forward, but other times, like with this psychometric data, it takes more effort and clever techniques. There is academic value in describing the steps I took up front to reduce the headache later. To begin, there is a strong push in recent years for reproducible data science. Scientific methods and results should be able to be replicated by other researchers, and part of that includes being able to replicate the process that takes the raw data and produces the tidy data that is ready for analysis. Tidy data is described by Wickham and others (2014) and can be summed up by three principles Each variable forms a column Each observation forms a row Each type of observational unit forms a table One problem I have come across and have been guilty of in the past is having data in a spread sheet, modifying it, and then having no way of recovering the original data. Spread sheets are a convenient way to organize, transform, and lightly analyze date, but problems can quickly arise unless there is a good backup/snapshot system in place. Data is immutable11, or at least that is the mindset that researchers must adopt in order to have truly reproducible workflows. The raw data that is collected or produced by a measurement device should never be modified without first being copied, even if for trivial reasons such as correcting a spelling mistake12. To begin the data cleaning journey, I’ll introduce the directory system that I had been given to work with. Each task is separated into its own folder, and within each folder is a subdirectory of age groups. RecalibrationData ├── ParticipantAgeSex.xlsx ├── Audiovisual │ ├── MiddleAge │ ├── Older │ └── Young ├── Duration │ ├── MiddleAge │ ├── Older │ └── Young ├── Sensorimotor │ ├── MiddleAge │ ├── Older │ └── Young └── Visual ├── MiddleAge ├── Older └── Young Within each age group subdirectory are the subdirectories for each subject named by their initials which then contain the experimental data in Matlab files. ├── MiddleAge │ ├── BT │ │ ├── BTadapt1__MAT.mat │ │ ├── ... │ ├── ... ├── Older │ ├── BB │ │ ├── BBadapt1__MAT.mat │ │ ├── ... │ ├── ... └── Young ├── AC │ ├── ACadapt1__MAT.mat │ ├── ... ├── ... At this point, the data appears manageable, there is information contained in the directory structure such as task, age group, and initials, and file name contains information about the experimental block. There is also an excel file that I was later given that contains more subject information like age and sex, though that information is not used in the model. The columns of the Matlab file depends on the task, but generally contains an SOA value and a response, but no column or row name information - that was provided by the researcher who collected the data. The next thing I did was to create a table of metadata - information extracted from the directory structure and file names combined with the the subject data and the file path. Regular expressions can be used to extract patterns from a string. With a list of all Matlab files within the RecalibrationData folder, I tried to extract the task, age group, initials, and block using the expression "^(\\\\w+)/(\\\\w+)/(\\\\w+)/[A-Z]{2,3}_*[A-Z]*(adapt[0-9]|baseline[0-9]*).*" Breaking it apart, the ^(\\\\w+)/ matches any word characters at the start and before the next slash. Since the directory structure is Task/AgeGroup/Subject/file.mat, the regular expression should match three words between slashes. The file name generally follows the pattern of Initials__block#__MAT.mat, so [A-Z]{2,3}_*[A-Z]* should match the initials, and (adapt[0-9]|baseline[0-9]*) should match the block (baseline or adapt). This method works for \\(536\\) of the \\(580\\) individual records. For the ones it failed, it was generally do to misspellings or irregular capitalizing of “baseline” and “adapt”. table(feat_typ[,4]) #> #> AC AG BB BC BT CB CC CE CJ CM DB DC DD DE DTF DW #> 13 12 13 13 13 13 10 12 13 4 13 13 7 12 12 13 #> EM ET GB GT HG IV JM JM_F JS JW KC KK LP MC MS MW #> 13 13 13 13 13 4 12 13 13 13 13 11 7 13 13 26 #> NP PB SB SJ SJF TS TW VM WL WW YG #> 12 13 12 26 13 13 13 13 13 12 7 table(feat_atyp[,4]) #> #> AG CC CE CM DD DTF IV JM JS KK NP SB WW YG #> 1 3 1 9 6 1 9 1 2 2 1 1 1 6 Since there is only a handful of irregular block names, they can be dealt with a separate regular expression that properly extracts the block information. Other challenges in cleaning the data include the handling of subjects with the same initials. This becomes a problem because filtering by a subject’s initials is not guaranteed to return a unique subject. Furthermore there are two middle age subjects with the same initials of “JM”, so one was also identified with their sex “JM_F”. The solution is to create a unique identifier (labeled as SID) that is a combination of age group, sex, and initials. For an experiment identifier (labeled as RID), the task and block were prepended to the SID. Each of these IDs uniquely identify the subjects and their experimental records making it easier to filter and search. glimpse(features) #> Rows: 580 #> Columns: 8 #> $ rid <fct> av-post1-M-f-CC, av-post1-M-f-DB, av-post1-M-f-HG, av-post1… #> $ sid <fct> M-f-CC, M-f-DB, M-f-HG, M-f-JM, M-f-MS, M-f-SJF, M-f-TS, M-… #> $ path <chr> "Audiovisual/MiddleAge/CC/CCadapt1__MAT.mat", "Audiovisual/… #> $ task <chr> "audiovisual", "audiovisual", "audiovisual", "audiovisual",… #> $ trial <fct> post1, post1, post1, post1, post1, post1, post1, post1, pos… #> $ age_group <fct> middle_age, middle_age, middle_age, middle_age, middle_age,… #> $ age <dbl> 39, 44, 41, 48, 49, 43, 47, 49, 49, 44, 43, 44, 48, 48, 50,… #> $ sex <fct> F, F, F, F, F, F, F, F, F, M, M, M, M, M, M, F, F, F, F, F,… Then with the table of clean metadata, the task is simply to loop through each row, read the Matlab file given by path, add the unique ID as a column, and then join the experimental data with the metadata to create a data set that is ready for model fitting and data exploration. The full code used to generate the clean data is not yet available online, but can be shared with the committee. The benefit of writing a script to generate the data is that others can look over my code and verify that it is doing what I intended for it to do, and I can go back to any step within the process to make changes if the need comes up. Another tool that contributed to the reproducibility is the version control management software, Git. With Git I can take a snapshot of the changes I make, and revert if necessary. This thesis is also hosted on Github, and the entire history of development can be viewed there. 7.3 Developing a model Chapter 3 details the deeper considerations that went into building a model, but doesn’t tell the full story of struggles and setbacks I faced. I find that I learn more from others when they share what didn’t work along with the final path that did work. There is knowledge to be gained in failed experiments, because then there is one more way to not do something, just like a failing outcome reduces the variance of the Beta distribution. I knew that I wanted to apply Bayesian modeling techniques to the data, because it was something knew that I was learning. I tried using a classical GLM to first get a baseline understanding of the data, but the fact that some estimates for certain subjects failed due to complete separation reinforced my enthusiasm to employ non-classical techniques. My first Bayesian model was derived from Lee and Wagenmakers (2014) which used nested loops to iterate over subjects and SOA values. I felt that the data was stored in a complicated way and made it difficult to comprehend and extend. Next I moved on to using arm::bayesglm to remove convergence issues, but was met with other limitations such as linear parameterization and lack of hierarchical modeling. The book Statistical Rethinking (McElreath 2020) was my first introduction to Bayesian multilevel modeling. His rethinking package accompanies the book, and offers a compact yet expressive syntax for models that get translated into a Stan model. A model with age group and block can be written using rethinking::ulam as rethinking::ulam(alist( k ~ binomial_logit(n, p), p = exp(b + bG[G] + bT[trt]) * (x - (a + aG[G] + aT[trt])), a ~ normal(0, 0.06), aG[G] ~ normal(0, sd_aG), aT[trt] ~ normal(0, sd_aT), b ~ normal(3, 1), bG[G] ~ normal(0, sd_bG), bT[trt] ~ normal(0, sd_bT), c(sd_aG, sd_aT, sd_bG, sd_bT) ~ half_cauchy(0, 5) ), data = df, chains = 4, cores = 4, log_lik = TRUE) During my time learning about multilevel models, I tried writing my own package that generates a Stan program based on R formula syntax. At the time I didn’t fully understand the concepts of no-pooling, complete pooling, and partial pooling, and my package was plagued by the same lack of flexibility that rstanarm and brms have. In fact I learned that brms and rstanarm already did what I was trying to do after I had already started making my library, but it was a fun learning and programming experience. The fossilized remains of my attempt can be viewed on github. I also tried using lme4, rstanarm, and brms, and learned more about the concepts of fixed and random effects. It was around this time that I noticed that parameterization can have a significant affect on the efficiency of a model and the inferential power of the estimated parameters. When fitting a classical model, there is little difference in estimating a + bx vs. d(x - c) since the latter can just be expanded as -cd + dx which is essentially the same as the first parameterization, but there is a practical difference in the interpretation of the parameters. The second parameterization implies that there is a dependence among the parameters that can be factored out. In the context of psychometric functions, there is a stronger connection between PSS and c and the JND and d. This parameterization made it easier to specify priors and also increased the model efficiency. Since only rethinking and Stan allow for arbitrary parameterization, I left the others behind. I finally arrived at a model that worked well, but learned that using a binary indicator variable for the treatment comes with the assumption of higher uncertainty for one of the conditions. The linear model that I arrived at is displayed in equation (7.1). \\[\\begin{equation} \\theta = \\exp(\\beta + \\beta_G +(\\beta_T + \\beta_{TG})\\times trt) \\left[x - (\\alpha + \\alpha_G + (\\alpha_T + \\alpha_{TG})\\times trt)\\right] \\tag{7.1} \\end{equation}\\] Using an indicator variable in this fashion also introduced an interaction effect into the model that I almost did not account for after I switched to using a factor variable. Interaction effects between factors is handled by creating a new factor that is essentially the cross-product of other factor variables. E.g. for factor variables \\(x\\) and \\(y\\) \\[ x = \\begin{bmatrix} a \\\\ b \\\\ c \\end{bmatrix}, y = \\begin{bmatrix} i \\\\ j \\end{bmatrix}\\Longrightarrow x\\times y = \\begin{bmatrix} ai & aj \\\\ bi & bj \\\\ ci & cj \\end{bmatrix} \\] The final round of reparameterization came in the form of adopting non-centered parameterization for more efficient models. To us, \\(Z \\sim N(0, 1^2);\\quad X = 3 + 2Z\\) is the same as \\(X \\sim N(3, 2^2)\\), but to a computer the process of sampling from \\(X\\) can be more difficult than sampling from \\(Z\\) (discussed in chapter 4). References "],["conclusion.html", "8 Conclusion", " 8 Conclusion The real story of developing this psychometric model (the one here and not in the workflow section) is more revealing of the real struggles of performing data analysis than the principled workflow would let on. Often I found myself putting in vast amounts of unnecessary work - but necessary for me to do in order to realize that it is unnecessary - just because I hadn’t yet learned what the likely paths to take were. On a more personal level, my struggles were not ever really in developing a model or coding it up - in fact I could tinker with a program for hours, improving it in this way or that. For me, programming was a puzzle and an art, and I internalized the idea that I could figure out any numerical task given enough time and focus. To the contrary, I believed that writing was a chore, that writing this thesis would be like trying to dam up all the deltas in an attempt to keep the main river on course. I think of learning mathematics as learning a puzzle game. The first few puzzles are easy, but then get progressively harder. But even as the puzzles get harder, your intuitive understanding of the game improves, and you can throw away the obviously poor moves from consideration, and try paths that are more likely to move you in the direction of the correct solution. I started my journey towards obtaining a Master of Science in Statistics precisely because I knew that I could obtain a more intuitive understanding of the quantitative world that I live in, and pick up some new tools along the way to create and solve more impressive puzzles. "],["supplementary-code.html", "A Supplementary Code", " A Supplementary Code Eight Schools Model data { int<lower=0> J; vector[J] y; vector<lower=0>[J] sigma; } parameters { real mu; real<lower=0> tau; vector[J] theta; } model { mu ~ normal(0, 10); tau ~ cauchy(0, 10); theta ~ normal(mu, tau); y ~ normal(theta, sigma); } "],["references.html", "References", " References "]] diff --git a/docs/supplementary-code.html b/docs/supplementary-code.html index 2b5b594..5230303 100644 --- a/docs/supplementary-code.html +++ b/docs/supplementary-code.html @@ -156,7 +156,11 @@
    • 4.2 Prior Predictive Checks
  • -
  • 5 Predictive Inference
  • +
  • 5 Predictive Inference
  • 6 Psychometric Results
  • -
  • 5 Predictive Inference
  • +
  • 5 Predictive Inference
  • 6 Psychometric Results
    • 6.1 Affect of Adaptation across Age Groups
      • 6.1.1 On Perceptual Synchrony
      • diff --git a/index.Rmd b/index.Rmd index 70532c2..8cd5f34 100644 --- a/index.Rmd +++ b/index.Rmd @@ -12,7 +12,8 @@ github-repo: adkudson/thesis ```{r ch010-setup, include=FALSE} knitr::write_bib(c( - 'knitr', 'bookdown', 'rstan', 'rethinking', 'rstanarm', 'stats', 'arm' + 'knitr', 'bookdown', 'rstan', 'rethinking', 'rstanarm', 'stats', 'arm', + 'loo' ), 'packages.bib', width = 80) ``` diff --git a/packages.bib b/packages.bib index f59f32a..4c99519 100644 --- a/packages.bib +++ b/packages.bib @@ -23,6 +23,16 @@ @Manual{R-knitr url = {https://yihui.org/knitr/}, } +@Manual{R-loo, + title = {loo: Efficient Leave-One-Out Cross-Validation and WAIC for Bayesian + Models}, + author = {Aki Vehtari and Jonah Gabry and Mans Magnusson and Yuling Yao and + Paul-Christian Bürkner and Topi Paananen and Andrew Gelman}, + year = {2020}, + note = {R package version 2.3.1}, + url = {https://CRAN.R-project.org/package=loo}, +} + @Manual{R-rethinking, title = {rethinking: Statistical Rethinking book package}, author = {Richard McElreath}, @@ -78,6 +88,26 @@ @InCollection{knitr2014 url = {http://www.crcpress.com/product/isbn/9781466561595}, } +@Article{loo2017a, + title = {Practical Bayesian model evaluation using leave-one-out cross- + validation and WAIC}, + author = {Aki Vehtari and Andrew Gelman and Jonah Gabry}, + year = {2017}, + journal = {Statistics and Computing}, + volume = {27}, + issue = {5}, + pages = {1413--1432}, + doi = {10.1007/s11222-016-9696-4}, +} + +@Article{loo2017b, + title = {Using stacking to average Bayesian predictive distributions}, + author = {Yuling Yao and Aki Vehtari and Daniel Simpson and Andrew Gelman}, + year = {2017}, + journal = {Bayesian Analysis}, + doi = {10.1214/17-BA1091}, +} + @Misc{rstanarm2018, title = {Joint longitudinal and time-to-event models via {Stan}.}, author = {SL Brilleman and MJ Crowther and M Moreno-Betancur and J {Buros