Checked everything from clean start

adknudson · Nov 10, 2020 · 0bab947 · 0bab947
1 parent 417597c
commit 0bab947
Show file tree

Hide file tree

Showing 22 changed files with 146 additions and 98 deletions.
diff --git a/030-workflow.Rmd b/030-workflow.Rmd
@@ -452,10 +452,20 @@ parameters {
 }
 model {
   vector[N] p = beta * (x - alpha);
-  alpha ~ normal(0, 0.05);
-  beta ~ lognormal(3.0, 1.5);
+  alpha ~ normal(0, 0.06);
+  beta ~ lognormal(3.0, 1.0);
   k ~ binomial_logit(n, p);
 }
+generated quantities {
+  vector[N] log_lik;
+  vector[N] k_pred;
+  vector[N] theta = beta * (x - alpha);
+  vector[N] p = inv_logit(theta);
+  for (i in 1:N) {
+    log_lik[i] = binomial_logit_lpmf(k[i] | n[i], theta[i]);
+    k_pred[i]  = binomial_rng(n[i], p[i]);
+  }
+}
 ```
 \setstretch{2.0}
 
@@ -606,17 +616,15 @@ obs_dat <- with(av_dat, list(N = N, x = x, n = n, k = k))
 if (file.exists("models/m031.rds")) {
   m031 <- readRDS("models/m031.rds")
 } else {
-  m031 <- sampling(m031_stan, data = obs_dat, 
-                 chains = 4, cores = 4, refresh = 0)
+  <<ch031-Homeless-Tea>>
   saveRDS(m031, "models/m031.rds")
 }
 ```
 
 
-```{r ch031-Homeless Tea, echo=TRUE, eval=FALSE}
-obs_dat <- with(av_dat, list(N = N, x = x, n = n, k = k)) 
+```{r ch031-Homeless-Tea, echo=TRUE, eval=FALSE}
 m031 <- sampling(m031_stan, data = obs_dat, 
-                 chains = 4, cores = 4, refresh = 0)
+                 chains = 4, cores = 4, refresh = 200)
 ```
 
 
@@ -932,21 +940,24 @@ generated quantities {
   matrix[N_G, N_T] pss;
   matrix[N_G, N_T] jnd;
   vector[N] k_pred;
+  vector[N] log_lik;
 
   for (i in 1:N_G) {
     for (j in 1:N_T) {
-      real mu_b = exp(b + bT[j] + bG[i]);
+      real mu_b = b + bT[j] + bG[i];
       real mu_a = a + aT[j] + aG[i];
       pss[i, j] = mu_a;
-      jnd[i, j] = logit(0.84) / mu_b;
+      jnd[i, j] = logit(0.84) / exp(mu_b);
     }
   }
   
   for (i in 1:N) {
     real mu_a = a + aT[trt[i]] + aG[G[i]];
     real mu_b = b + bT[trt[i]] + bG[G[i]];
-    real p = inv_logit(exp(mu_b) * (x[i] - mu_a));
+    real theta = exp(mu_b) * (x[i] - mu_a);
+    real p = inv_logit(theta);
     k_pred[i] = binomial_rng(n[i], p);
+    log_lik[i] = binomial_logit_lpmf(k[i] | n[i], theta);
   }
 }
 ```
@@ -1096,21 +1107,24 @@ generated quantities {
   matrix[N_G, N_T] pss;
   matrix[N_G, N_T] jnd;
   vector[N] k_pred;
+  vector[N] log_lik;
 
   for (i in 1:N_G) {
     for (j in 1:N_T) {
-      real mu_b = exp(b + bG[i] + bT[j]);
+      real mu_b = b + bG[i] + bT[j];
       real mu_a = a + aG[i] + aT[j];
       pss[i, j] = mu_a;
-      jnd[i, j] = logit(0.84) / mu_b;
+      jnd[i, j] = logit(0.84) / exp(mu_b);
     }
   }
   
   for (i in 1:N) {
     real mu_a = a + aT[trt[i]] + aG[G[i]];
     real mu_b = b + bT[trt[i]] + bG[G[i]];
-    real p = inv_logit(exp(mu_b) * (x[i] - mu_a));
+    real theta = exp(mu_b) * (x[i] - mu_a);
+    real p = inv_logit(theta);
     k_pred[i] = binomial_rng(n[i], p);
+    log_lik[i] = binomial_logit_lpmf(k[i] | n[i], theta);
   }
 }
 ```
@@ -1144,7 +1158,7 @@ keep_pars <- c(
   "sd_aG", "sd_bG",
   "sd_aT", "sd_bT",
   "pss", "jnd",
-  "k_pred"
+  "k_pred", "log_lik"
 )
 
 obs_dat <- with(av_dat, list(N = N, x = x, n = n, k = k, 
@@ -1368,21 +1382,24 @@ generated quantities {
   matrix[N_G, N_T] pss;
   matrix[N_G, N_T] jnd;
   vector[N] k_pred;
+  vector[N] log_lik;
 
   for (i in 1:N_G) {
     for (j in 1:N_T) {
-      real mu_b = exp(b + bGT[i, j]);
-      real mu_a =     a + aGT[i, j];
+      real mu_b = b + bGT[i, j];
+      real mu_a = a + aGT[i, j];
       pss[i, j] = mu_a;
-      jnd[i, j] = logit(0.84) / mu_b;
+      jnd[i, j] = logit(0.84) / exp(mu_b);
     }
   }
   
   for (i in 1:N) {
     real mu_b = b + bGT[G[i], trt[i]];
     real mu_a = a + aGT[G[i], trt[i]];
-    real p = inv_logit(exp(mu_b) * (x[i] - mu_a));
+    real theta = exp(mu_b) * (x[i] - mu_a);
+    real p = inv_logit(theta);
     k_pred[i] = binomial_rng(n[i], p);
+    log_lik[i] = binomial_logit_lpmf(k[i] | n[i], theta);
   }
 }
 ```
@@ -1410,7 +1427,7 @@ if (file.exists("models/m033.rds")) {
   m033 <- sampling(m033_stan, data = obs_dat, seed = 143, thin = 2,
                    iter = 4000, warmup = 2000, pars = keep_pars,
                    control = list(adapt_delta = 0.95),
-                   chains = 4, cores = 4, refresh = 0)
+                   chains = 4, cores = 4, refresh = 200)
   saveRDS(m033, "models/m033.rds")
 }
 ```
@@ -1529,7 +1546,7 @@ if (file.exists("models/m033vis.rds")) {
   m033vis <- sampling(m033_stan, data = obs_dat, seed = 143, thin = 2,
                       iter = 4000, warmup = 2000, pars = keep_pars,
                       control = list(adapt_delta = 0.95),
-                      chains = 4, cores = 4, refresh = 0)
+                      chains = 4, cores = 4, refresh = 200)
   saveRDS(m033vis, "models/m033vis.rds")
 }
 ```
@@ -1801,14 +1818,14 @@ init <- with(obs_dat, replicate(n_chains, list(
   lG = runif(N_G, 0, 0.05)),
 simplify = FALSE))
 
-if (file.exists("models/m034.rds")) {
-  m034 <- readRDS("models/m034.rds")
+if (file.exists("models/m034vis.rds")) {
+  m034vis <- readRDS("models/m034vis.rds")
 } else {
-  m034 <- sampling(m034_stan, data = obs_dat, seed = 626, init = init,
+  m034vis <- sampling(m034_stan, data = obs_dat, seed = 626, init = init,
                    iter = 4000, warmup = 2000, pars = keep_pars,
                    control = list(adapt_delta = 0.95), thin = 2,
                    chains = 4, cores = 4, refresh = 200)
-  saveRDS(m034, "models/m034.rds")
+  saveRDS(m034vis, "models/m034vis.rds")
 }
 ```
 
@@ -1818,7 +1835,7 @@ _Posterior Retrodictive Checks_
 The plot for the distribution of psychometric functions is repeated one more time below (figure \@ref(fig:ch034-Screaming-Proton)). There is now visual separation between the pre- and post-adaptation blocks, with the latter exhibiting a higher slope, which in turn implies a reduced just noticeable difference which is consistent with the audiovisual data in the previous model.
 
 ```{r ch034-Screaming-Proton, fig.cap="There is now a visual distinction between the two blocks unlike in the model without lapse rate. The lapse rate acts as a balance between steep slopes near the PSS and variation near the outer SOA values."}
-p034 <- extract(m034) 
+p034 <- extract(m034vis) 
 if (!file.exists("figures/ch034-Screaming-Proton.png")) {
   n <- 100
   idx <- sample(1:length(p034$a), n, replace = TRUE)

diff --git a/040-model-checking.Rmd b/040-model-checking.Rmd
@@ -16,7 +16,7 @@ p032nc <- as.array(m032nc)
 l032nc <- log_posterior(m032nc)
 n032nc <- nuts_params(m032nc)
 
-m034 <- readRDS("models/m034.rds")
+m034 <- readRDS("models/m034vis.rds")
 p034 <- as.array(m034)
 l034 <- log_posterior(m034)
 n034 <- nuts_params(m034)

diff --git a/050-predictive-inference.Rmd b/050-predictive-inference.Rmd
@@ -1,3 +1,16 @@
+```{r ch050-setup, include=FALSE}
+library(loo)
+
+l031_av   <- loo(readRDS("models/m031.rds"))
+l032_av   <- loo(readRDS("models/m032.rds"))
+l032nc_av <- loo(readRDS("models/m032nc.rds"))
+l033_av   <- loo(readRDS("models/m033.rds"))
+l033_vis  <- loo(readRDS("models/m033vis.rds"))
+l034_vis  <- loo(readRDS("models/m034vis.rds"))
+l034s_vis <- loo(readRDS("models/m034s_vis.rds"))
+```
+
+
 # Predictive Inference {#predictive-inferences}
 
 _All models are wrong but some are useful_

diff --git a/docs/030-workflow.md b/docs/030-workflow.md
@@ -436,10 +436,20 @@ parameters {
 }
 model {
   vector[N] p = beta * (x - alpha);
-  alpha ~ normal(0, 0.05);
-  beta ~ lognormal(3.0, 1.5);
+  alpha ~ normal(0, 0.06);
+  beta ~ lognormal(3.0, 1.0);
   k ~ binomial_logit(n, p);
 }
+generated quantities {
+  vector[N] log_lik;
+  vector[N] k_pred;
+  vector[N] theta = beta * (x - alpha);
+  vector[N] p = inv_logit(theta);
+  for (i in 1:N) {
+    log_lik[i] = binomial_logit_lpmf(k[i] | n[i], theta[i]);
+    k_pred[i]  = binomial_rng(n[i], p[i]);
+  }
+}
 ```
 \setstretch{2.0}
 
@@ -581,21 +591,21 @@ There is no undesirable behavior from this model, so next I check the summary st
    <td style="text-align:left;"> alpha </td>
    <td style="text-align:right;"> 0.0061 </td>
    <td style="text-align:right;"> 0.0001 </td>
-   <td style="text-align:right;"> 0.0035 </td>
-   <td style="text-align:right;"> -0.0007 </td>
-   <td style="text-align:right;"> 0.0129 </td>
-   <td style="text-align:right;"> 3728 </td>
-   <td style="text-align:right;"> 1.000 </td>
+   <td style="text-align:right;"> 0.0038 </td>
+   <td style="text-align:right;"> -0.0012 </td>
+   <td style="text-align:right;"> 0.0136 </td>
+   <td style="text-align:right;"> 4039 </td>
+   <td style="text-align:right;"> 0.9995 </td>
   </tr>
   <tr>
    <td style="text-align:left;"> beta </td>
-   <td style="text-align:right;"> 10.7726 </td>
-   <td style="text-align:right;"> 0.0054 </td>
-   <td style="text-align:right;"> 0.2473 </td>
-   <td style="text-align:right;"> 10.3054 </td>
-   <td style="text-align:right;"> 11.2600 </td>
-   <td style="text-align:right;"> 2073 </td>
-   <td style="text-align:right;"> 1.001 </td>
+   <td style="text-align:right;"> 10.7681 </td>
+   <td style="text-align:right;"> 0.0051 </td>
+   <td style="text-align:right;"> 0.2404 </td>
+   <td style="text-align:right;"> 10.3043 </td>
+   <td style="text-align:right;"> 11.2313 </td>
+   <td style="text-align:right;"> 2202 </td>
+   <td style="text-align:right;"> 1.0003 </td>
   </tr>
 </tbody>
 </table>
@@ -614,9 +624,8 @@ All of the work up until now has been done without peaking at the observed data.
 
 
 ```r
-obs_dat <- with(av_dat, list(N = N, x = x, n = n, k = k)) 
 m031 <- sampling(m031_stan, data = obs_dat, 
-                 chains = 4, cores = 4, refresh = 0)
+                 chains = 4, cores = 4, refresh = 200)
 ```
 
 
@@ -659,21 +668,21 @@ check_hmc_diagnostics(m031)
    <td style="text-align:left;"> alpha </td>
    <td style="text-align:right;"> 0.0373 </td>
    <td style="text-align:right;"> 0.0001 </td>
-   <td style="text-align:right;"> 0.0042 </td>
-   <td style="text-align:right;"> 0.0291 </td>
-   <td style="text-align:right;"> 0.0455 </td>
-   <td style="text-align:right;"> 3992 </td>
-   <td style="text-align:right;"> 1 </td>
+   <td style="text-align:right;"> 0.0043 </td>
+   <td style="text-align:right;"> 0.029 </td>
+   <td style="text-align:right;"> 0.0458 </td>
+   <td style="text-align:right;"> 3765 </td>
+   <td style="text-align:right;"> 1.000 </td>
   </tr>
   <tr>
    <td style="text-align:left;"> beta </td>
-   <td style="text-align:right;"> 8.4220 </td>
-   <td style="text-align:right;"> 0.0038 </td>
+   <td style="text-align:right;"> 8.4259 </td>
+   <td style="text-align:right;"> 0.0039 </td>
    <td style="text-align:right;"> 0.1839 </td>
-   <td style="text-align:right;"> 8.0591 </td>
-   <td style="text-align:right;"> 8.7779 </td>
-   <td style="text-align:right;"> 2393 </td>
-   <td style="text-align:right;"> 1 </td>
+   <td style="text-align:right;"> 8.070 </td>
+   <td style="text-align:right;"> 8.7897 </td>
+   <td style="text-align:right;"> 2249 </td>
+   <td style="text-align:right;"> 1.001 </td>
   </tr>
 </tbody>
 </table>

diff --git a/docs/030-workflow_files/figure-html/ch031-m031-posterior-alpha-beta-1.png b/docs/030-workflow_files/figure-html/ch031-m031-posterior-alpha-beta-1.png
diff --git a/docs/030-workflow_files/figure-html/ch031-obs-vs-retro-plot-1.png b/docs/030-workflow_files/figure-html/ch031-obs-vs-retro-plot-1.png
diff --git a/docs/030-workflow_files/figure-html/ch031-posterior-pf-plot-1.png b/docs/030-workflow_files/figure-html/ch031-posterior-pf-plot-1.png
diff --git a/docs/030-workflow_files/figure-html/ch031-posterior-pss-jnd-plot-1.png b/docs/030-workflow_files/figure-html/ch031-posterior-pss-jnd-plot-1.png
diff --git a/docs/030-workflow_files/figure-html/ch032-obs-vs-retro-plot-1.png b/docs/030-workflow_files/figure-html/ch032-obs-vs-retro-plot-1.png
diff --git a/docs/030-workflow_files/figure-html/ch033-obs-vs-retro-plot-1.png b/docs/030-workflow_files/figure-html/ch033-obs-vs-retro-plot-1.png
diff --git a/docs/030-workflow_files/figure-html/ch034-Insane-Metaphor-1.png b/docs/030-workflow_files/figure-html/ch034-Insane-Metaphor-1.png
diff --git a/docs/060-results_files/figure-html/ch060-Angry-Storm-1.png b/docs/060-results_files/figure-html/ch060-Angry-Storm-1.png
diff --git a/docs/060-results_files/figure-html/ch060-Empty-Silly-Clown-1.png b/docs/060-results_files/figure-html/ch060-Empty-Silly-Clown-1.png
diff --git a/docs/060-results_files/figure-html/ch060-Fish-Strawberry-1.png b/docs/060-results_files/figure-html/ch060-Fish-Strawberry-1.png
diff --git a/docs/060-results_files/figure-html/ch060-Stormy-Fist-1.png b/docs/060-results_files/figure-html/ch060-Stormy-Fist-1.png
diff --git a/docs/060-results_files/figure-html/unnamed-chunk-1-1.png b/docs/060-results_files/figure-html/unnamed-chunk-1-1.png
diff --git a/docs/060-results_files/figure-html/unnamed-chunk-2-1.png b/docs/060-results_files/figure-html/unnamed-chunk-2-1.png
diff --git a/docs/060-results_files/figure-html/unnamed-chunk-2-2.png b/docs/060-results_files/figure-html/unnamed-chunk-2-2.png
diff --git a/docs/070-discussion.md b/docs/070-discussion.md
@@ -76,20 +76,20 @@ Breaking it apart, the `^(\\w+)/` matches any word characters at the start and b
 ```r
 table(feat_typ[,4])
 #> 
-#>     adapt1     adapt2     adapt3   baseline  baseline2 baseline43 
-#>        165        162         43        162          3          1
+#>   AC   AG   BB   BC   BT   CB   CC   CE   CJ   CM   DB   DC   DD   DE  DTF   DW 
+#>   13   12   13   13   13   13   10   12   13    4   13   13    7   12   12   13 
+#>   EM   ET   GB   GT   HG   IV   JM JM_F   JS   JW   KC   KK   LP   MC   MS   MW 
+#>   13   13   13   13   13    4   12   13   13   13   13   11    7   13   13   26 
+#>   NP   PB   SB   SJ  SJF   TS   TW   VM   WL   WW   YG 
+#>   12   13   12   26   13   13   13   13   13   12    7
 ```
 
 
 ```r
 table(feat_atyp[,4])
 #> 
-#>                         adapat1      adapation1    Adaptation_1    Adaptation_2 
-#>               1               1               1               9               7 
-#>     Adaptation1     adaptation2     Adaptation2     adaptation3          adpat1 
-#>               2               1               3               1               1 
-#>          adpat2          adpat3        baseline        Baseline Visual_Baseline 
-#>               4               1               1              10               1
+#>  AG  CC  CE  CM  DD DTF  IV  JM  JS  KK  NP  SB  WW  YG 
+#>   1   3   1   9   6   1   9   1   2   2   1   1   1   6
 ```
 
 Since there is only a handful of irregular block names, they can be dealt with a separate regular expression that properly extracts the block information. Other challenges in cleaning the data include the handling of subjects with the same initials. This becomes a problem because filtering by a subject's initials is not guaranteed to return a unique subject. Furthermore there are two middle age subjects with the same initials of "JM", so one was also identified with their sex "JM_F". The solution is to create a unique identifier (labeled as SID) that is a combination of age group, sex, and initials. For an experiment identifier (labeled as RID), the task and block were prepended to the SID. Each of these IDs uniquely identify the subjects and their experimental records making it easier to filter and search.

diff --git a/docs/discussion.html b/docs/discussion.html
@@ -247,16 +247,16 @@ <h2><span class="header-section-number">7.2</span> Data Cleaning and Reproducibi
 <p>Breaking it apart, the <code>^(\\w+)/</code> matches any word characters at the start and before the next slash. Since the directory structure is <code>Task/AgeGroup/Subject/file.mat</code>, the regular expression should match three words between slashes. The file name generally follows the pattern of <code>Initials__block#__MAT.mat</code>, so <code>[A-Z]{2,3}_*[A-Z]*</code> should match the initials, and <code>(adapt[0-9]|baseline[0-9]*)</code> should match the block (baseline or adapt). This method works for <span class="math inline">\(536\)</span> of the <span class="math inline">\(580\)</span> individual records. For the ones it failed, it was generally do to misspellings or irregular capitalizing of “baseline” and “adapt”.</p>
 <div class="sourceCode" id="cb28"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb28-1"><a href="discussion.html#cb28-1"></a><span class="kw">table</span>(feat_typ[,<span class="dv">4</span>])</span>
 <span id="cb28-2"><a href="discussion.html#cb28-2"></a><span class="co">#&gt; </span></span>
-<span id="cb28-3"><a href="discussion.html#cb28-3"></a><span class="co">#&gt;     adapt1     adapt2     adapt3   baseline  baseline2 baseline43 </span></span>
-<span id="cb28-4"><a href="discussion.html#cb28-4"></a><span class="co">#&gt;        165        162         43        162          3          1</span></span></code></pre></div>
+<span id="cb28-3"><a href="discussion.html#cb28-3"></a><span class="co">#&gt;   AC   AG   BB   BC   BT   CB   CC   CE   CJ   CM   DB   DC   DD   DE  DTF   DW </span></span>
+<span id="cb28-4"><a href="discussion.html#cb28-4"></a><span class="co">#&gt;   13   12   13   13   13   13   10   12   13    4   13   13    7   12   12   13 </span></span>
+<span id="cb28-5"><a href="discussion.html#cb28-5"></a><span class="co">#&gt;   EM   ET   GB   GT   HG   IV   JM JM_F   JS   JW   KC   KK   LP   MC   MS   MW </span></span>
+<span id="cb28-6"><a href="discussion.html#cb28-6"></a><span class="co">#&gt;   13   13   13   13   13    4   12   13   13   13   13   11    7   13   13   26 </span></span>
+<span id="cb28-7"><a href="discussion.html#cb28-7"></a><span class="co">#&gt;   NP   PB   SB   SJ  SJF   TS   TW   VM   WL   WW   YG </span></span>
+<span id="cb28-8"><a href="discussion.html#cb28-8"></a><span class="co">#&gt;   12   13   12   26   13   13   13   13   13   12    7</span></span></code></pre></div>
 <div class="sourceCode" id="cb29"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb29-1"><a href="discussion.html#cb29-1"></a><span class="kw">table</span>(feat_atyp[,<span class="dv">4</span>])</span>
 <span id="cb29-2"><a href="discussion.html#cb29-2"></a><span class="co">#&gt; </span></span>
-<span id="cb29-3"><a href="discussion.html#cb29-3"></a><span class="co">#&gt;                         adapat1      adapation1    Adaptation_1    Adaptation_2 </span></span>
-<span id="cb29-4"><a href="discussion.html#cb29-4"></a><span class="co">#&gt;               1               1               1               9               7 </span></span>
-<span id="cb29-5"><a href="discussion.html#cb29-5"></a><span class="co">#&gt;     Adaptation1     adaptation2     Adaptation2     adaptation3          adpat1 </span></span>
-<span id="cb29-6"><a href="discussion.html#cb29-6"></a><span class="co">#&gt;               2               1               3               1               1 </span></span>
-<span id="cb29-7"><a href="discussion.html#cb29-7"></a><span class="co">#&gt;          adpat2          adpat3        baseline        Baseline Visual_Baseline </span></span>
-<span id="cb29-8"><a href="discussion.html#cb29-8"></a><span class="co">#&gt;               4               1               1              10               1</span></span></code></pre></div>
+<span id="cb29-3"><a href="discussion.html#cb29-3"></a><span class="co">#&gt;  AG  CC  CE  CM  DD DTF  IV  JM  JS  KK  NP  SB  WW  YG </span></span>
+<span id="cb29-4"><a href="discussion.html#cb29-4"></a><span class="co">#&gt;   1   3   1   9   6   1   9   1   2   2   1   1   1   6</span></span></code></pre></div>
 <p>Since there is only a handful of irregular block names, they can be dealt with a separate regular expression that properly extracts the block information. Other challenges in cleaning the data include the handling of subjects with the same initials. This becomes a problem because filtering by a subject’s initials is not guaranteed to return a unique subject. Furthermore there are two middle age subjects with the same initials of “JM”, so one was also identified with their sex “JM_F”. The solution is to create a unique identifier (labeled as SID) that is a combination of age group, sex, and initials. For an experiment identifier (labeled as RID), the task and block were prepended to the SID. Each of these IDs uniquely identify the subjects and their experimental records making it easier to filter and search.</p>
 <div class="sourceCode" id="cb30"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb30-1"><a href="discussion.html#cb30-1"></a><span class="kw">glimpse</span>(features)</span>
 <span id="cb30-2"><a href="discussion.html#cb30-2"></a><span class="co">#&gt; Rows: 580</span></span>

diff --git a/docs/search_index.json b/docs/search_index.json