-
Notifications
You must be signed in to change notification settings - Fork 110
/
HB Part-Worth Estimation results.txt
388 lines (376 loc) · 19.4 KB
/
HB Part-Worth Estimation results.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
> # @file Hierarchical Bayes Part-Worth Estimation.R
> # @brief This program trains and tests a Hierarchical Bayes Part-Worth Estimation model. The accompanying dataset is fictitious, a change in context from the computer choice study data, but using the same values.
> # @date 12/11/17
> # @author edited by Justin Wang, originally Thomas Miller
> # @resource ftpress.com/miller
>
> load(file="mtpa_market_simulation_utilities.RData") # load market simulation utilities
> library(ChoiceModelR) # for Hierarchical Bayes Estimation
> library(caret) # for confusion matrix function
>
> complete.data.frame <- read.csv("computer_choice_study.csv")
> # complete.data.frame <- read.csv("music_app_choice_study.csv")
>
> # set printing precision for conjoint measures
> pretty.print <- function(x) {sprintf("%1.2f",round(x,digits = 2))}
> # set up sum contrasts for effects coding
> options(contrasts=c("contr.sum","contr.poly"))
>
> # employ a training-and-test regimen across survey sets/items
> test.set.ids <- c("3","7","11","15") # select four sets/items
> training.set.ids <- setdiff(unique(complete.data.frame$setid),test.set.ids)
> training.data.frame <-
+ subset(complete.data.frame,subset=(setid %in% training.set.ids))
> test.data.frame <-
+ subset(complete.data.frame,subset=(setid %in% test.set.ids))
>
> UniqueID <- unique(training.data.frame$id)
> # set up zero priors
> cc.priors <- matrix(0,nrow=length(UniqueID),ncol=13)
>
> # we could use coefficients from aggregate model as starting values
> # here we comment out the code needed to do that
> aggregate.cc.betas <- c(as.numeric(conjoint.results$coefficients)[2:7],
+ -sum(as.numeric(conjoint.results$coefficients)[2:7]),
+ as.numeric(conjoint.results$coefficients)[8:13])
Error: object 'conjoint.results' not found
> # clone aggregate part-worths across the individuals in the study
> # set up Bayesian priors
> cc.priors <- matrix(0,nrow=length(UniqueID),ncol=length(aggregate.cc.betas))
Error in matrix(0, nrow = length(UniqueID), ncol = length(aggregate.cc.betas)) :
object 'aggregate.cc.betas' not found
> for(index.for.ID in seq(along=UniqueID))
+ cc.priors[index.for.ID,] <- aggregate.cc.betas
Error in aggregate.cc.betas : object 'aggregate.cc.betas' not found
>
> colnames(cc.priors) <- c("A1B1","A1B2","A1B3","A1B4","A1B5","A1B6","A1B7",
+ "A1B8","A2B1","A3B1","A4B1","A5B1","A6B1")
>
> # note that the actual names are as follows:
> AB.names <- c("Spotify","Deezer","Apple","Google","Pandora","AMZN","SoundCloud","Tidal",
+ "Catalog","Features","Bitrate","Radio","Price")
>
> # set up run parameters for the MCMC
> # using aggregate beta estimates to get started
> truebetas <- cc.priors
> cc.xcoding <- c(0,1,1,1,1,1) # first variable categorical others continuous
> cc.attlevels <- c(8,8,4,2,8,8) # test run with all attributes and levels
> # no constraint for order on brand so 8x8 matrix of zeroes
> c1 <- matrix(0,ncol=8,nrow=8)
>
> # first attribute is ordered higher numbers are better
> # continuous attributes have 1x1 matrix representation
> c2 <- matrix(1, ncol = 1, nrow = 1, byrow = TRUE)
>
> # second attribute is ordered higher numbers are better
> # continuous attributes have 1x1 matrix representation
> c3 <- matrix(1, ncol = 1, nrow = 1, byrow = TRUE)
>
> # third attribute is ordered higher numbers are better
> # continuous attributes have 1x1 matrix representation
> c4 <- matrix(1, ncol = 1, nrow = 1, byrow = TRUE)
>
> # fourth attribute is ordered lower numbers are better
> # continuous attributes have 1x1 matrix representation
> c5 <- matrix(-1, ncol = 1, nrow = 1, byrow = TRUE)
>
> # price has expected order... higher prices less valued
> # continuous attributes have 1x1 matrix representation
> c6 <- matrix(-1, ncol = 1, nrow = 1, byrow = TRUE)
>
> cc.constraints <- list(c1,c2,c3,c4,c5,c6)
>
> # controls for length of run and sampling from end of run
> # cc.mcmc <- list(R = 10, use = 10) # fast trial run
> # set run parameters 10000 total iterations with estimates based on last 2000
> cc.mcmc <- list(R = 10000, use = 2000) # run parameters
>
> # run options
> cc.options <- list(none=FALSE, save=TRUE, keep=1)
>
> # set up the data frame for analysis
> # redefine set ids so they are a complete set 1-12 as needed for HB functions
> training.data.frame$newsetid <- training.data.frame$setid
> training.data.frame$newsetid <- ifelse((training.data.frame$newsetid == 16),
+ 3,training.data.frame$newsetid)
> training.data.frame$newsetid <- ifelse((training.data.frame$newsetid == 14),
+ 7,training.data.frame$newsetid)
> training.data.frame$newsetid <- ifelse((training.data.frame$newsetid == 13),
+ 11,training.data.frame$newsetid)
>
> UnitID <- training.data.frame$id
> Set <- as.integer(training.data.frame$newsetid)
> Alt <- as.integer(training.data.frame$position)
> X_1 <- as.integer(training.data.frame$brand) # categories by brand
> X_2 <- as.integer(training.data.frame$catalog) # integer values 1 to 8
> X_3 <- as.integer(training.data.frame$features) # integer values 1 to 4
> X_4 <- as.integer(training.data.frame$bitrate) # integer values 1 to 2
> X_5 <- as.integer(training.data.frame$radio) # integer values 1 to 8
> X_6 <- as.integer(training.data.frame$price) # integer values 1 to 8
> y <- as.numeric(training.data.frame$choice) # using special response coding
>
> cc.data <- data.frame(UnitID,Set,Alt,X_1,X_2,X_3,X_4,X_5,X_6,y)
Error in data.frame(UnitID, Set, Alt, X_1, X_2, X_3, X_4, X_5, X_6, y) :
arguments imply differing number of rows: 10752, 0
>
> # now for the estimation... be patient
> set.seed(9999) # for reproducible results
> out <- choicemodelr(data=cc.data, xcoding = cc.xcoding,
+ mcmc = cc.mcmc, options = cc.options, constraints = cc.constraints)
Logit Data
==================================================
Attribute Type Levels
-----------------------------------
Attribute 1 Part Worth 8
Attribute 2 Linear 1
Attribute 3 Linear 1
Attribute 4 Linear 1
Attribute 5 Linear 1
Attribute 6 Linear 1
12 parameters to be estimated.
224 total units.
Average of 4 alternatives in each of 12 sets per unit.
2688 expanded tasks in total.
Table of choice data pooled across units:
Choice Count Pct.
--------------------
1 746 27.75%
2 872 32.44%
3 507 18.86%
4 563 20.94%
MCMC Inference for Hierarchical Logit
==================================================
Total Iterations: 10000
Draws used in estimation: 2000
Units: 224
Parameters per unit: 12
Task weight: 5
Constraints in effect.
Draws are to be saved.
Prior degrees of freedom: 5
Prior variance: 2
MCMC Iteration Beginning...
Iteration Acceptance RLH Pct. Cert. Avg. Var. RMS Time to End
100 0.339 0.412 0.319 0.37 0.40 2:53
200 0.305 0.570 0.561 1.15 0.96 2:13
300 0.307 0.665 0.689 2.39 1.52 1:60
400 0.303 0.717 0.752 3.64 1.96 1:55
500 0.305 0.746 0.785 5.31 2.40 1:49
600 0.308 0.764 0.805 6.87 2.75 1:48
700 0.311 0.778 0.818 8.40 3.05 1:47
800 0.304 0.787 0.827 10.20 3.38 1:45
900 0.308 0.794 0.833 11.69 3.64 1:46
1000 0.305 0.798 0.837 13.47 3.92 1:46
1100 0.306 0.802 0.841 15.82 4.25 1:45
1200 0.302 0.806 0.844 17.52 4.49 1:46
1300 0.305 0.809 0.847 18.58 4.62 1:45
1400 0.310 0.811 0.849 20.29 4.84 1:45
1500 0.306 0.815 0.852 22.46 5.10 1:44
1600 0.310 0.816 0.854 23.57 5.21 1:44
1700 0.306 0.817 0.854 24.63 5.33 1:44
1800 0.306 0.818 0.855 25.47 5.41 1:47
1900 0.312 0.819 0.856 26.30 5.52 1:48
2000 0.306 0.821 0.857 27.36 5.62 1:48
2100 0.302 0.821 0.858 28.05 5.66 1:48
2200 0.303 0.822 0.858 30.54 5.90 1:48
2300 0.306 0.822 0.859 31.87 6.02 1:48
2400 0.304 0.823 0.860 32.73 6.14 1:48
2500 0.304 0.824 0.861 34.64 6.30 1:47
2600 0.302 0.826 0.862 35.99 6.41 1:47
2700 0.308 0.827 0.863 37.58 6.53 1:47
2800 0.303 0.827 0.863 39.48 6.70 1:48
2900 0.304 0.829 0.864 42.10 6.95 1:47
3000 0.303 0.828 0.864 43.81 7.10 1:48
3100 0.298 0.829 0.864 44.41 7.17 1:48
3200 0.302 0.830 0.865 45.20 7.23 1:48
3300 0.305 0.830 0.866 44.69 7.20 1:47
3400 0.305 0.832 0.867 45.87 7.27 1:47
3500 0.307 0.832 0.867 47.70 7.42 1:46
3600 0.308 0.832 0.867 49.01 7.54 1:46
3700 0.307 0.832 0.868 50.25 7.62 1:45
3800 0.307 0.832 0.867 50.75 7.65 1:45
3900 0.306 0.833 0.868 51.00 7.66 1:45
4000 0.304 0.833 0.868 51.83 7.70 1:45
4100 0.306 0.833 0.869 54.25 7.86 1:44
4200 0.303 0.835 0.870 54.55 7.90 1:43
4300 0.303 0.835 0.870 54.47 7.91 1:42
4400 0.304 0.835 0.870 55.56 8.01 1:42
4500 0.303 0.835 0.870 57.28 8.14 1:41
4600 0.304 0.836 0.871 57.41 8.12 1:40
4700 0.301 0.836 0.871 57.84 8.17 1:39
4800 0.304 0.837 0.872 60.85 8.39 1:38
4900 0.300 0.836 0.871 59.78 8.36 1:37
5000 0.306 0.836 0.871 59.02 8.28 1:36
5100 0.309 0.837 0.871 59.96 8.36 1:35
5200 0.301 0.837 0.871 62.16 8.50 1:34
5300 0.308 0.836 0.871 61.82 8.46 1:33
5400 0.301 0.835 0.870 59.05 8.26 1:33
5500 0.304 0.835 0.870 58.30 8.21 1:32
5600 0.308 0.836 0.871 59.58 8.28 1:31
5700 0.303 0.836 0.871 60.43 8.33 1:30
5800 0.306 0.837 0.872 61.17 8.42 1:28
5900 0.304 0.836 0.871 62.82 8.53 1:27
6000 0.297 0.836 0.871 64.02 8.61 1:26
6100 0.305 0.837 0.872 63.46 8.59 1:24
6200 0.305 0.837 0.871 63.39 8.60 1:23
6300 0.309 0.837 0.872 64.17 8.68 1:21
6400 0.303 0.838 0.873 65.03 8.73 1:20
6500 0.300 0.838 0.872 66.64 8.83 1:18
6600 0.303 0.838 0.873 66.28 8.82 1:17
6700 0.302 0.839 0.873 65.92 8.82 1:16
6800 0.309 0.839 0.873 66.27 8.82 1:14
6900 0.306 0.839 0.873 64.31 8.66 1:12
7000 0.309 0.838 0.873 61.09 8.42 1:11
7100 0.306 0.838 0.872 59.40 8.32 1:09
7200 0.302 0.838 0.872 60.36 8.39 1:07
7300 0.302 0.837 0.872 60.94 8.44 1:05
7400 0.302 0.838 0.872 59.93 8.36 1:03
7500 0.304 0.838 0.872 60.77 8.38 1:01
7600 0.303 0.837 0.872 60.98 8.40 0:59
7700 0.304 0.838 0.872 61.26 8.45 0:57
7800 0.313 0.838 0.873 61.15 8.47 0:55
7900 0.308 0.838 0.872 61.70 8.50 0:53
8000 0.306 0.838 0.873 62.66 8.54 0:51
8100 0.309 0.839 0.873 63.26 8.60 0:49
8200 0.301 0.839 0.874 63.99 8.67 0:47
8300 0.299 0.840 0.874 66.20 8.83 0:45
8400 0.311 0.840 0.875 67.24 8.87 0:42
8500 0.306 0.840 0.875 68.29 8.93 0:40
8600 0.310 0.840 0.874 67.27 8.86 0:38
8700 0.299 0.840 0.874 66.83 8.80 0:35
8800 0.304 0.840 0.874 68.46 8.88 0:33
8900 0.305 0.840 0.874 70.62 8.98 0:30
9000 0.308 0.839 0.874 69.66 8.94 0:28
9100 0.304 0.839 0.873 68.17 8.83 0:25
9200 0.308 0.839 0.874 67.17 8.78 0:23
9300 0.304 0.839 0.874 68.08 8.86 0:20
9400 0.304 0.840 0.874 70.94 9.04 0:17
9500 0.304 0.840 0.875 73.45 9.18 0:14
9600 0.302 0.840 0.874 75.90 9.33 0:12
9700 0.300 0.840 0.874 77.00 9.42 0:09
9800 0.302 0.840 0.874 76.85 9.40 0:06
9900 0.301 0.839 0.873 76.13 9.38 0:03
10000 0.306 0.840 0.874 78.29 9.52 0:00
Total Time Elapsed: 4:57
Writing estimated unit-level betas to Rbetas.csv in the working directory
>
> # out provides a list for the posterior parameter estimates
> # for the runs sampled (use = 2000)
>
> # the MCMC beta parameter estimates are traced on the screen as it runs
>
> # individual part-worth estimates are provided in the output file RBetas.csv
> # the final estimates are printed to RBetas.csv with columns labeled as
> # A1B1 = first attribute first level
> # A1B2 = first attribute second level
> # ....
> # A2B1 = second attribute first level
> # ....
> # gather data from HB posterior parameter distributions
> # we imposed constraints on all continuous parameters so we use betadraw.c
> posterior.mean <- matrix(0, nrow = dim(out$betadraw.c)[1],
+ ncol = dim(out$betadraw.c)[2])
> posterior.sd <- matrix(0, nrow = dim(out$betadraw.c)[1],
+ ncol = dim(out$betadraw.c)[2])
> for(index.row in 1:dim(out$betadraw.c)[1])
+ for(index.col in 1:dim(out$betadraw.c)[2]) {
+ posterior.mean[index.row,index.col] <-
+ mean(out$betadraw.c[index.row,index.col,])
+ posterior.sd[index.row,index.col] <-
+ sd(out$betadraw.c[index.row,index.col,])
+ }
>
> # HB program uses effects coding for categorical variables and
> # mean-centers continuous variables across the levels appearing in the data
> # working with data for one respondent at a time we compute predicted choices
> # for both the training and test choice sets
>
> create.design.matrix <- function(input.data.frame.row) {
+ xdesign.row <- numeric(12)
+ if (input.data.frame.row$brand == "Spotify")
+ xdesign.row[1:7] <- c(1,0,0,0,0,0,0)
+ if (input.data.frame.row$brand == "Deezer")
+ xdesign.row[1:7] <- c(0,1,0,0,0,0,0)
+ if (input.data.frame.row$brand == "Apple")
+ xdesign.row[1:7] <- c(0,0,1,0,0,0,0)
+ if (input.data.frame.row$brand == "Google")
+ xdesign.row[1:7] <- c(0,0,0,1,0,0,0)
+ if (input.data.frame.row$brand == "Pandora")
+ xdesign.row[1:7] <- c(0,0,0,0,1,0,0)
+ if (input.data.frame.row$brand == "AMZN")
+ xdesign.row[1:7] <- c(0,0,0,0,0,1,0)
+ if (input.data.frame.row$brand == "SoundCloud")
+ xdesign.row[1:7] <- c(0,0,0,0,0,0,1)
+ if (input.data.frame.row$brand == "TIDAL")
+ xdesign.row[1:7] <- c(-1,-1,-1,-1,-1,-1,-1)
+
+ xdesign.row[8] <- input.data.frame.row$catalog -4.5
+ xdesign.row[9] <- input.data.frame.row$features -2.5
+ xdesign.row[10] <- input.data.frame.row$bitrate -1.5
+ xdesign.row[11] <- input.data.frame.row$radio -4.5
+ xdesign.row[12] <- input.data.frame.row$price -4.5
+ t(as.matrix(xdesign.row)) # return row of design matrix
+ }
>
> # evaluate performance in the training set
> training.choice.utility <- NULL # initialize utility vector
> # work with one row of respondent training data frame at a time
> # create choice prediction using the individual part-worths
> list.of.ids <- unique(training.data.frame$id)
> for (index.for.id in seq(along=list.of.ids)) {
+ this.id.part.worths <- posterior.mean[index.for.id,]
+ this.id.data.frame <- subset(training.data.frame,
+ subset=(id == list.of.ids[index.for.id]))
+ for (index.for.profile in 1:nrow(this.id.data.frame)) {
+ training.choice.utility <- c(training.choice.utility,
+ create.design.matrix(this.id.data.frame[index.for.profile,]) %*%
+ this.id.part.worths)
+ }
+ }
Error in xdesign.row[8] <- input.data.frame.row$catalog - 4.5 :
replacement has length zero
>
> training.predicted.choice <-
+ choice.set.predictor(training.choice.utility)
> training.actual.choice <- factor(training.data.frame$choice, levels = c(0,1),
+ labels = c("NO","YES"))
> # look for sensitivity > 0.25 for four-profile choice sets
> training.set.performance <- confusionMatrix(data = training.predicted.choice,
+ reference = training.actual.choice, positive = "YES")
Error in table(data, reference, dnn = dnn, ...) :
all arguments must have the same length
> # report choice prediction sensitivity for training data
> cat("\n\nTraining choice set sensitivity = ",
+ sprintf("%1.1f",training.set.performance$byClass[1]*100)," Percent",sep="")
Training choice set sensitivity = 93.7 Percent>
> # evaluate performance in the test set
> test.choice.utility <- NULL # initialize utility vector
> # work with one row of respondent test data frame at a time
> # create choice prediction using the individual part-worths
> list.of.ids <- unique(test.data.frame$id)
> for (index.for.id in seq(along=list.of.ids)) {
+ this.id.part.worths <- posterior.mean[index.for.id,]
+ this.id.data.frame <- subset(test.data.frame,
+ subset=(id == list.of.ids[index.for.id]))
+ for (index.for.profile in 1:nrow(this.id.data.frame)) {
+ test.choice.utility <- c(test.choice.utility,
+ create.design.matrix(this.id.data.frame[index.for.profile,]) %*%
+ this.id.part.worths)
+ }
+ }
Error in xdesign.row[8] <- input.data.frame.row$catalog - 4.5 :
replacement has length zero
>
> test.predicted.choice <-
+ choice.set.predictor(test.choice.utility)
> test.actual.choice <- factor(test.data.frame$choice, levels = c(0,1),
+ labels = c("NO","YES"))
> # look for sensitivity > 0.25 for four-profile choice sets
> test.set.performance <- confusionMatrix(data = test.predicted.choice,
+ reference = test.actual.choice, positive = "YES")
Error in table(data, reference, dnn = dnn, ...) :
all arguments must have the same length
> # report choice prediction sensitivity for test data
> cat("\n\nTest choice set sensitivity = ",
+ sprintf("%1.1f",test.set.performance$byClass[1]*100)," Percent",sep="")
Test choice set sensitivity = 52.6 Percent