diff --git a/data b/data index 126500ba4b8..8b407372f39 160000 --- a/data +++ b/data @@ -1 +1 @@ -Subproject commit 126500ba4b8fec148a5e43f5376938c0b351d675 +Subproject commit 8b407372f396a95f4b8fafdbee5e7b7755fddef9 diff --git a/doc/ipython-notebooks/classification/Classification.ipynb b/doc/ipython-notebooks/classification/Classification.ipynb index 06c36aae98c..56277fdd9ac 100644 --- a/doc/ipython-notebooks/classification/Classification.ipynb +++ b/doc/ipython-notebooks/classification/Classification.ipynb @@ -212,10 +212,9 @@ "epsilon = 1e-3\n", "\n", "svm_linear = sg.create_machine(\"LibLinear\", C1=c, C2=c, \n", - " labels=shogun_labels_linear, \n", " epsilon=epsilon,\n", " liblinear_solver_type=\"L2R_L2LOSS_SVC\")\n", - "svm_linear.train(shogun_feats_linear)\n", + "svm_linear.train(shogun_feats_linear, shogun_labels_linear)\n", "classifiers_linear.append(svm_linear)\n", "classifiers_names.append(\"SVM Linear\")\n", "fadings.append(True)\n", @@ -224,11 +223,10 @@ "plt.title(\"Linear SVM - Linear Features\")\n", "plot_model(plt,svm_linear,feats_linear,labels_linear)\n", "\n", - "svm_non_linear = sg.create_machine(\"LibLinear\", C1=c, C2=c, \n", - " labels=shogun_labels_non_linear,\n", + "svm_non_linear = sg.create_machine(\"LibLinear\", C1=c, C2=c,\n", " epsilon=epsilon,\n", " liblinear_solver_type=\"L2R_L2LOSS_SVC\")\n", - "svm_non_linear.train(shogun_feats_non_linear)\n", + "svm_non_linear.train(shogun_feats_non_linear, shogun_labels_non_linear)\n", "classifiers_non_linear.append(svm_non_linear)\n", "\n", "plt.subplot(122)\n", @@ -405,9 +403,7 @@ "shogun_multiclass_labels_non_linear = sg.MulticlassLabels(multiclass_labels_non_linear)\n", "\n", "naive_bayes_linear = sg.create_machine(\"GaussianNaiveBayes\")\n", - "naive_bayes_linear.put('features', shogun_feats_linear)\n", - "naive_bayes_linear.put('labels', shogun_multiclass_labels_linear)\n", - "naive_bayes_linear.train()\n", + "naive_bayes_linear.train(shogun_feats_linear, shogun_multiclass_labels_linear)\n", "classifiers_linear.append(naive_bayes_linear)\n", "classifiers_names.append(\"Naive Bayes\")\n", "fadings.append(False)\n", @@ -418,9 +414,7 @@ "plot_model(plt,naive_bayes_linear,feats_linear,labels_linear,fading=False)\n", "\n", "naive_bayes_non_linear = sg.create_machine(\"GaussianNaiveBayes\")\n", - "naive_bayes_non_linear.put('features', shogun_feats_non_linear)\n", - "naive_bayes_non_linear.put('labels', shogun_multiclass_labels_non_linear)\n", - "naive_bayes_non_linear.train()\n", + "naive_bayes_non_linear.train(shogun_feats_non_linear, shogun_multiclass_labels_non_linear)\n", "classifiers_non_linear.append(naive_bayes_non_linear)\n", "\n", "plt.subplot(122)\n", @@ -447,7 +441,7 @@ "distances_linear.init(shogun_feats_linear, shogun_feats_linear)\n", "knn_linear = sg.create_machine(\"KNN\", k=number_of_neighbors, distance=distances_linear, \n", " labels=shogun_labels_linear)\n", - "knn_linear.train()\n", + "knn_linear.train(shogun_feats_linear)\n", "classifiers_linear.append(knn_linear)\n", "classifiers_names.append(\"Nearest Neighbors\")\n", "fadings.append(False)\n", @@ -461,7 +455,7 @@ "distances_non_linear.init(shogun_feats_non_linear, shogun_feats_non_linear)\n", "knn_non_linear = sg.create_machine(\"KNN\", k=number_of_neighbors, distance=distances_non_linear, \n", " labels=shogun_labels_non_linear)\n", - "knn_non_linear.train()\n", + "knn_non_linear.train(shogun_feats_non_linear)\n", "classifiers_non_linear.append(knn_non_linear)\n", "\n", "plt.subplot(122)\n", @@ -484,8 +478,8 @@ "source": [ "gamma = 0.1\n", "\n", - "lda_linear = sg.create_machine('LDA', gamma=gamma, labels=shogun_labels_linear)\n", - "lda_linear.train(shogun_feats_linear)\n", + "lda_linear = sg.create_machine('LDA', gamma=gamma)\n", + "lda_linear.train(shogun_feats_linear, shogun_labels_linear)\n", "classifiers_linear.append(lda_linear)\n", "classifiers_names.append(\"LDA\")\n", "fadings.append(True)\n", @@ -495,8 +489,8 @@ "plt.title(\"LDA - Linear Features\")\n", "plot_model(plt,lda_linear,feats_linear,labels_linear)\n", "\n", - "lda_non_linear = sg.create_machine('LDA', gamma=gamma, labels=shogun_labels_non_linear)\n", - "lda_non_linear.train(shogun_feats_non_linear)\n", + "lda_non_linear = sg.create_machine('LDA', gamma=gamma)\n", + "lda_non_linear.train(shogun_feats_non_linear, shogun_labels_non_linear)\n", "classifiers_non_linear.append(lda_non_linear)\n", "\n", "plt.subplot(122)\n", @@ -517,8 +511,8 @@ "metadata": {}, "outputs": [], "source": [ - "qda_linear = sg.create_machine(\"QDA\", labels=shogun_multiclass_labels_linear)\n", - "qda_linear.train(shogun_feats_linear)\n", + "qda_linear = sg.create_machine(\"QDA\")\n", + "qda_linear.train(shogun_feats_linear, shogun_multiclass_labels_linear)\n", "classifiers_linear.append(qda_linear)\n", "classifiers_names.append(\"QDA\")\n", "fadings.append(False)\n", @@ -528,8 +522,8 @@ "plt.title(\"QDA - Linear Features\")\n", "plot_model(plt,qda_linear,feats_linear,labels_linear,fading=False)\n", "\n", - "qda_non_linear = sg.create_machine(\"QDA\", labels=shogun_multiclass_labels_non_linear)\n", - "qda_non_linear.train(shogun_feats_non_linear)\n", + "qda_non_linear = sg.create_machine(\"QDA\")\n", + "qda_non_linear.train(shogun_feats_non_linear, shogun_multiclass_labels_non_linear)\n", "classifiers_non_linear.append(qda_non_linear)\n", "\n", "plt.subplot(122)\n", @@ -682,8 +676,8 @@ "plot_binary_data(plt,feats_non_linear, labels_non_linear)\n", "\n", "for i in range(0,10):\n", - " plt.subplot(2,11,13+i)\n", - " plot_model(plt,classifiers_non_linear[i],feats_non_linear,labels_non_linear,fading=fadings[i])" + " plt.subplot(2,11,13+i)\n", + " plot_model(plt,classifiers_non_linear[i],feats_non_linear,labels_non_linear,fading=fadings[i])" ] }, { @@ -710,7 +704,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.1" + "version": "3.6.9" } }, "nbformat": 4, diff --git a/doc/ipython-notebooks/classification/HashedDocDotFeatures.ipynb b/doc/ipython-notebooks/classification/HashedDocDotFeatures.ipynb index bc001cf9be1..b81728bfb61 100644 --- a/doc/ipython-notebooks/classification/HashedDocDotFeatures.ipynb +++ b/doc/ipython-notebooks/classification/HashedDocDotFeatures.ipynb @@ -190,7 +190,7 @@ "source": [ "C = 0.1\n", "epsilon = 0.01\n", - "svm = sg.create_machine(\"SVMOcas\", C1=C, C2=C, labels=labels, epsilon=epsilon)" + "svm = sg.create_machine(\"SVMOcas\", C1=C, C2=C, epsilon=epsilon)" ] }, { @@ -207,7 +207,7 @@ "metadata": {}, "outputs": [], "source": [ - "_=svm.train(hashed_feats)" + "_=svm.train(hashed_feats, labels)" ] }, { @@ -224,7 +224,7 @@ "metadata": {}, "outputs": [], "source": [ - "predicted_labels = svm.apply()\n", + "predicted_labels = svm.apply(hashed_feats)\n", "print(predicted_labels.get(\"labels\"))" ] }, @@ -286,8 +286,8 @@ "metadata": {}, "outputs": [], "source": [ - "svm.train(hashed_feats_quad)\n", - "predicted_labels = svm.apply()\n", + "svm.train(hashed_feats_quad, labels)\n", + "predicted_labels = svm.apply(hashed_feats_quad)\n", "print(predicted_labels.get(\"labels\"))" ] }, @@ -454,4 +454,4 @@ }, "nbformat": 4, "nbformat_minor": 1 -} +} \ No newline at end of file diff --git a/doc/ipython-notebooks/classification/MKL.ipynb b/doc/ipython-notebooks/classification/MKL.ipynb index cda6f40a2e3..25ae1941792 100644 --- a/doc/ipython-notebooks/classification/MKL.ipynb +++ b/doc/ipython-notebooks/classification/MKL.ipynb @@ -253,10 +253,10 @@ "kernel.add(\"kernel_array\", kernel1)\n", "kernel.init(feats_train, feats_train)\n", "\n", - "mkl = sg.create_machine(\"MKLClassification\", mkl_norm=1, C1=1, C2=1, kernel=kernel, labels=labels)\n", + "mkl = sg.create_machine(\"MKLClassification\", mkl_norm=1, C1=1, C2=1, kernel=kernel)\n", "\n", "#train to get weights\n", - "mkl.train() \n", + "mkl.train(feats_train, labels) \n", "\n", "w=kernel.get_subkernel_weights()\n", "print(w)" @@ -490,9 +490,9 @@ " kernel.add(\"kernel_array\", kernel3)\n", " \n", " kernel.init(feats_tr, feats_tr)\n", - " mkl = sg.create_machine(\"MKLClassification\", mkl_norm=1, C1=1, C2=2, kernel=kernel, labels=lab)\n", + " mkl = sg.create_machine(\"MKLClassification\", mkl_norm=1, C1=1, C2=2, kernel=kernel)\n", " \n", - " mkl.train()\n", + " mkl.train(feats_tr, lab)\n", " \n", " w=kernel.get_subkernel_weights()\n", " return w, mkl\n", @@ -704,17 +704,17 @@ "kernel.init(feats_train, feats_train)\n", "\n", "mkl = sg.create_machine(\"MKLMulticlass\", C=1.2, kernel=kernel, \n", - " labels=labels, mkl_eps=0.001, mkl_norm=1)\n", + " mkl_eps=0.001, mkl_norm=1)\n", "\n", "# set epsilon of SVM\n", "mkl.get(\"machine\").put(\"epsilon\", 1e-2)\n", "\n", - "mkl.train()\n", + "mkl.train(feats_train, labels)\n", "\n", "#initialize with test features\n", "kernel.init(feats_train, feats_test) \n", "\n", - "out = mkl.apply()\n", + "out = mkl.apply(feats_test)\n", "evaluator = sg.create_evaluation(\"MulticlassAccuracy\")\n", "accuracy = evaluator.evaluate(out, labels_rem)\n", "print(\"Accuracy = %2.2f%%\" % (100*accuracy))\n", @@ -748,8 +748,8 @@ "\n", "pk = sg.create_kernel('PolyKernel', degree=10, c=2) \n", "\n", - "svm = sg.create_machine(\"GMNPSVM\", C=C, kernel=pk, labels=labels)\n", - "_=svm.train(feats)\n", + "svm = sg.create_machine(\"GMNPSVM\", C=C, kernel=pk)\n", + "_=svm.train(feats, labels)\n", "out=svm.apply(feats_rem)\n", "evaluator = sg.create_evaluation(\"MulticlassAccuracy\")\n", "accuracy = evaluator.evaluate(out, labels_rem)\n", @@ -776,8 +776,8 @@ "\n", "gk=sg.create_kernel(\"GaussianKernel\", width=width)\n", "\n", - "svm=sg.create_machine(\"GMNPSVM\", C=C, kernel=gk, labels=labels)\n", - "_=svm.train(feats)\n", + "svm=sg.create_machine(\"GMNPSVM\", C=C, kernel=gk)\n", + "_=svm.train(feats, labels)\n", "out=svm.apply(feats_rem)\n", "evaluator = sg.create_evaluation(\"MulticlassAccuracy\")\n", "accuracy = evaluator.evaluate(out, labels_rem)\n", @@ -984,7 +984,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.1" + "version": "3.6.9" } }, "nbformat": 4, diff --git a/doc/ipython-notebooks/classification/SupportVectorMachines.ipynb b/doc/ipython-notebooks/classification/SupportVectorMachines.ipynb index b58eabeab28..286de351266 100644 --- a/doc/ipython-notebooks/classification/SupportVectorMachines.ipynb +++ b/doc/ipython-notebooks/classification/SupportVectorMachines.ipynb @@ -164,8 +164,7 @@ "svm=sg.create_machine('LibLinear', C1=C, C2=C, liblinear_solver_type='L2R_L2LOSS_SVC', epsilon=epsilon)\n", "\n", "#train\n", - "svm.put('labels', labels)\n", - "svm.train(feats_train)\n", + "svm.train(feats_train, labels)\n", "w=svm.get('w')\n", "b=svm.get('bias')" ] @@ -933,8 +932,8 @@ "metadata": {}, "outputs": [], "source": [ - "svm=sg.create_machine(\"GMNPSVM\", C=1, kernel=gaussian_kernel, labels=labels)\n", - "_=svm.train(feats_train)\n", + "svm=sg.create_machine(\"GMNPSVM\", C=1, kernel=gaussian_kernel)\n", + "_=svm.train(feats_train, labels)\n", "\n", "size=100\n", "x1=np.linspace(-6, 6, size)\n", @@ -948,7 +947,7 @@ " plt.subplot(1,len(kernels),i+1)\n", " plt.title(kernels[i].get_name())\n", " svm.put(\"kernel\", kernels[i])\n", - " svm.train(feats_train)\n", + " svm.train(feats_train, labels)\n", " grid_out=svm.apply(grid)\n", " z=grid_out.get(\"labels\").reshape((size, size))\n", " plt.pcolor(x, y, z)\n", @@ -1001,7 +1000,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.1" + "version": "3.6.9" } }, "nbformat": 4, diff --git a/doc/ipython-notebooks/intro/Introduction.ipynb b/doc/ipython-notebooks/intro/Introduction.ipynb index 94029f81a2f..c9feae74d1f 100644 --- a/doc/ipython-notebooks/intro/Introduction.ipynb +++ b/doc/ipython-notebooks/intro/Introduction.ipynb @@ -338,10 +338,10 @@ "#prameters to svm\n", "C=0.9\n", "\n", - "svm=sg.create_machine(\"LibLinear\", C1=C, C2=C, labels=labels, \n", + "svm=sg.create_machine(\"LibLinear\", C1=C, C2=C,\n", " liblinear_solver_type=\"L2R_L2LOSS_SVC\")\n", "#train\n", - "svm.train(feats_train)\n", + "svm.train(feats_train, labels)\n", "\n", "size=100" ] @@ -495,11 +495,11 @@ "label_e=trainlab[num_train:]\n", "labels_true=sg.create_labels(label_e)\n", "\n", - "svm=sg.create_machine(\"LibLinear\", C1=C, C2=C, labels=labels, \n", + "svm=sg.create_machine(\"LibLinear\", C1=C, C2=C,\n", " liblinear_solver_type=\"L2R_L2LOSS_SVC\")\n", "\n", "#train and evaluate\n", - "svm.train(feats_train)\n", + "svm.train(feats_train, labels)\n", "output=svm.apply(feats_evaluate)\n", "\n", "#use AccuracyMeasure to get accuracy\n", @@ -688,7 +688,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.6.9" } }, "nbformat": 4, diff --git a/doc/ipython-notebooks/multiclass/KNN.ipynb b/doc/ipython-notebooks/multiclass/KNN.ipynb index 063a2569883..0587182462f 100644 --- a/doc/ipython-notebooks/multiclass/KNN.ipynb +++ b/doc/ipython-notebooks/multiclass/KNN.ipynb @@ -286,8 +286,7 @@ " labels.add_subset(idx_train)\n", "\n", " dist = sg.create_distance('EuclideanDistance')\n", - " dist.init(feats, feats)\n", - " knn = sg.create_machine(\"KNN\", k=k, distance=dist, labels=labels)\n", + " knn = sg.create_machine(\"KNN\", k=k, distance=dist)\n", " #knn.set_store_model_features(True)\n", " #FIXME: causes SEGFAULT\n", " if use_cover_tree:\n", @@ -295,10 +294,10 @@ " # knn.put('knn_solver', \"KNN_COVER_TREE\")\n", " else:\n", " knn.put('knn_solver', \"KNN_BRUTE\")\n", - " knn.train()\n", + " knn.train(feats, labels)\n", "\n", " evaluator = sg.create_evaluation(\"MulticlassAccuracy\")\n", - " pred = knn.apply()\n", + " pred = knn.apply(feats)\n", " acc_train[i, j] = evaluator.evaluate(pred, labels)\n", "\n", " feats.remove_subset()\n", @@ -409,8 +408,8 @@ "\n", "gk=sg.create_kernel(\"GaussianKernel\", width=width)\n", "\n", - "svm=sg.create_machine(\"GMNPSVM\", C=C, kernel=gk, labels=labels)\n", - "_=svm.train(feats)" + "svm=sg.create_machine(\"GMNPSVM\", C=C, kernel=gk)\n", + "_=svm.train(feats, labels)" ] }, { @@ -490,7 +489,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.6.9" } }, "nbformat": 4, diff --git a/doc/ipython-notebooks/multiclass/Tree/DecisionTrees.ipynb b/doc/ipython-notebooks/multiclass/Tree/DecisionTrees.ipynb index 29de81445cf..2c0d16ae009 100644 --- a/doc/ipython-notebooks/multiclass/Tree/DecisionTrees.ipynb +++ b/doc/ipython-notebooks/multiclass/Tree/DecisionTrees.ipynb @@ -197,10 +197,10 @@ "outputs": [], "source": [ "# create ID3ClassifierTree object\n", - "id3 = sg.create_machine(\"ID3ClassifierTree\", labels=labels)\n", + "id3 = sg.create_machine(\"ID3ClassifierTree\")\n", "\n", "# learn the tree from training features\n", - "is_successful = id3.train(train_feats)" + "is_successful = id3.train(train_feats, labels)" ] }, { @@ -412,10 +412,10 @@ " train_lab = sg.create_labels(labels)\n", "\n", " # create ID3ClassifierTree object\n", - " id3 = sg.create_machine(\"ID3ClassifierTree\", labels=train_lab)\n", + " id3 = sg.create_machine(\"ID3ClassifierTree\")\n", "\n", " # learn the tree from training features\n", - " id3.train(train_feats)\n", + " id3.train(train_feats, train_lab)\n", "\n", " # apply to test dataset\n", " output = id3.apply(test_feats)\n", @@ -610,9 +610,9 @@ "# steps in C4.5 Tree training bundled together in a python method\n", "def train_tree(feats,types,labels):\n", " # C4.5 Tree object\n", - " tree = sg.create_machine(\"C45ClassifierTree\", labels=labels, m_nominal=types)\n", + " tree = sg.create_machine(\"C45ClassifierTree\", m_nominal=types)\n", " # supply training matrix and train\n", - " tree.train(feats)\n", + " tree.train(feats, labels)\n", " \n", " return tree\n", "\n", @@ -974,10 +974,9 @@ " c = sg.create_machine(\"CARTree\", nominal=feat_types,\n", " mode=problem_type,\n", " folds=num_folds,\n", - " apply_cv_pruning=use_cv_pruning,\n", - " labels=labels)\n", + " apply_cv_pruning=use_cv_pruning)\n", " # train using training features\n", - " c.train(feats)\n", + " c.train(feats, labels)\n", " \n", " return c\n", "\n", @@ -1407,10 +1406,9 @@ " # create CHAID tree object\n", " c = sg.create_machine(\"CHAIDTree\", dependent_vartype=dependent_var_type,\n", " feature_types=feature_types,\n", - " num_breakpoints=num_bins,\n", - " labels=labels)\n", + " num_breakpoints=num_bins)\n", " # train using training features\n", - " c.train(feats)\n", + " c.train(feats, labels)\n", " \n", " return c\n", "\n", @@ -1722,9 +1720,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 1 -} \ No newline at end of file +} diff --git a/doc/ipython-notebooks/multiclass/Tree/TreeEnsemble.ipynb b/doc/ipython-notebooks/multiclass/Tree/TreeEnsemble.ipynb index d2991dacf25..84231482df7 100644 --- a/doc/ipython-notebooks/multiclass/Tree/TreeEnsemble.ipynb +++ b/doc/ipython-notebooks/multiclass/Tree/TreeEnsemble.ipynb @@ -112,8 +112,7 @@ "outputs": [], "source": [ "# train forest\n", - "rand_forest.put('labels', train_labels)\n", - "rand_forest.train(train_feats)\n", + "rand_forest.train(train_feats, train_labels)\n", "\n", "# load test dataset\n", "testfeat_file= os.path.join(SHOGUN_DATA_DIR, 'uci/letter/test_fm_letter.dat')\n", @@ -142,9 +141,8 @@ " c=sg.create_machine(\"CARTree\", nominal=feature_types,\n", " mode=problem_type,\n", " folds=2,\n", - " apply_cv_pruning=False,\n", - " labels=train_labels)\n", - " c.train(train_feats)\n", + " apply_cv_pruning=False)\n", + " c.train(train_feats, train_labels)\n", " \n", " return c\n", "\n", @@ -213,8 +211,7 @@ "source": [ "def get_rf_accuracy(num_trees,rand_subset_size):\n", " rf=setup_random_forest(num_trees,rand_subset_size,comb_rule,feat_types)\n", - " rf.put('labels', train_labels)\n", - " rf.train(train_feats)\n", + " rf.train(train_feats, train_labels)\n", " out_test=rf.apply_multiclass(test_feats)\n", " acc=sg.create_evaluation(\"MulticlassAccuracy\")\n", " return acc.evaluate(out_test,test_labels)" @@ -365,8 +362,7 @@ "outputs": [], "source": [ "rf=setup_random_forest(100,2,comb_rule,feat_types)\n", - "rf.put('labels', train_labels)\n", - "rf.train(train_feats)\n", + "rf.train(train_feats, train_labels)\n", " \n", "# set evaluation strategy\n", "rf.put(\"oob_evaluation_metric\", sg.create_evaluation(\"MulticlassAccuracy\"))\n", @@ -411,8 +407,7 @@ "def get_oob_errors_wine(num_trees,rand_subset_size):\n", " feat_types=np.array([False]*13)\n", " rf=setup_random_forest(num_trees,rand_subset_size,sg.create_combination_rule(\"MajorityVote\"),feat_types)\n", - " rf.put('labels', train_labels)\n", - " rf.train(train_feats)\n", + " rf.train(train_feats, train_labels)\n", " rf.put(\"oob_evaluation_metric\", sg.create_evaluation(\"MulticlassAccuracy\"))\n", " return rf.get(\"oob_error\") \n", "\n", @@ -494,7 +489,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.6.9" } }, "nbformat": 4, diff --git a/doc/ipython-notebooks/multiclass/multiclass_reduction.ipynb b/doc/ipython-notebooks/multiclass/multiclass_reduction.ipynb index 8d86f6a5bbd..7152032694c 100644 --- a/doc/ipython-notebooks/multiclass/multiclass_reduction.ipynb +++ b/doc/ipython-notebooks/multiclass/multiclass_reduction.ipynb @@ -205,11 +205,10 @@ "\n", " mc_machine = sg.create_machine(\"LinearMulticlassMachine\",\n", " multiclass_strategy=strategy, \n", - " machine=bin_machine, \n", - " labels=lab_train)\n", + " machine=bin_machine)\n", "\n", " t_begin = time.process_time()\n", - " mc_machine.train(feats_train)\n", + " mc_machine.train(feats_train, lab_train)\n", " t_train = time.process_time() - t_begin\n", "\n", " t_begin = time.process_time()\n", @@ -259,11 +258,10 @@ "metadata": {}, "outputs": [], "source": [ - "mcsvm = sg.create_machine(\"MulticlassLibLinear\", C=5.0, \n", - " labels=lab_train, use_bias=True)\n", + "mcsvm = sg.create_machine(\"MulticlassLibLinear\", C=5.0, use_bias=True)\n", "\n", "t_begin = time.process_time()\n", - "mcsvm.train(feats_train)\n", + "mcsvm.train(feats_train, lab_train)\n", "t_train = time.process_time() - t_begin\n", "\n", "t_begin = time.process_time()\n", @@ -472,11 +470,10 @@ " mc_machine = sg.create_machine(\"KernelMulticlassMachine\",\n", " multiclass_strategy=strategy, \n", " kernel=kernel, \n", - " machine=classifier,\n", - " labels=lab_train)\n", + " machine=classifier)\n", "\n", " t_begin = time.process_time()\n", - " mc_machine.train()\n", + " mc_machine.train(feats_train, lab_train)\n", " t_train = time.process_time() - t_begin\n", "\n", " t_begin = time.process_time()\n", @@ -609,10 +606,9 @@ "mc_machine=sg.create_machine(\"KernelMulticlassMachine\",\n", " multiclass_strategy=sg.create_multiclass_strategy(\"MulticlassOneVsRestStrategy\"),\n", " kernel=kernel, \n", - " machine=classifier, \n", - " labels=labels)\n", + " machine=classifier)\n", "\n", - "mc_machine.train()\n", + "mc_machine.train(feats_tr, labels)\n", "\n", "size=100\n", "x1=linspace(-10, 10, size)\n", @@ -668,9 +664,8 @@ "\n", "mc_machine1 = sg.create_machine(\"LinearMulticlassMachine\",\n", " multiclass_strategy=sg.create_multiclass_strategy(\"MulticlassOneVsOneStrategy\"),\n", - " machine=bin_machine, \n", - " labels=labels)\n", - "mc_machine1.train(feats_tr)\n", + " machine=bin_machine)\n", + "mc_machine1.train(feats_tr, labels)\n", "\n", "out1=mc_machine1.apply_multiclass(grid) #main output\n", "z1=out1.get_labels().reshape((size, size))\n", @@ -728,7 +723,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.1" + "version": "3.6.9" } }, "nbformat": 4, diff --git a/doc/ipython-notebooks/multiclass/naive_bayes.ipynb b/doc/ipython-notebooks/multiclass/naive_bayes.ipynb index e9d5a4abeb1..4ed06769292 100644 --- a/doc/ipython-notebooks/multiclass/naive_bayes.ipynb +++ b/doc/ipython-notebooks/multiclass/naive_bayes.ipynb @@ -135,9 +135,9 @@ "source": [ "X_train, Y_train = gen_samples(n_train)\n", "\n", - "machine = sg.create_machine(\"GaussianNaiveBayes\", labels=sg.create_labels(Y_train))\n", + "machine = sg.create_machine(\"GaussianNaiveBayes\")\n", "\n", - "machine.train(sg.create_features(X_train))" + "machine.train(sg.create_features(X_train), sg.create_labels(Y_train))" ] }, { @@ -283,7 +283,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.6.9" } }, "nbformat": 4, diff --git a/doc/ipython-notebooks/neuralnets/autoencoders.ipynb b/doc/ipython-notebooks/neuralnets/autoencoders.ipynb index 32327fee204..ff6f4808fc1 100644 --- a/doc/ipython-notebooks/neuralnets/autoencoders.ipynb +++ b/doc/ipython-notebooks/neuralnets/autoencoders.ipynb @@ -276,8 +276,7 @@ "\n", "nn.put('max_num_epochs', 50)\n", "\n", - "nn.put('labels', Ytrain)\n", - "_ = nn.train(Xtrain)" + "_ = nn.train(Xtrain, Ytrain)" ] }, { @@ -404,10 +403,9 @@ "# train the network\n", "conv_nn.put('epsilon', 0.0)\n", "conv_nn.put('max_num_epochs', 50)\n", - "conv_nn.put('labels', Ytrain)\n", "\n", "# start training. this might take some time\n", - "_ = conv_nn.train(Xtrain)" + "_ = conv_nn.train(Xtrain, Ytrain)" ] }, { @@ -462,7 +460,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.6.9" } }, "nbformat": 4, diff --git a/doc/ipython-notebooks/neuralnets/neuralnets_digits.ipynb b/doc/ipython-notebooks/neuralnets/neuralnets_digits.ipynb index 4dca02f606a..0e15ba56f1c 100644 --- a/doc/ipython-notebooks/neuralnets/neuralnets_digits.ipynb +++ b/doc/ipython-notebooks/neuralnets/neuralnets_digits.ipynb @@ -236,8 +236,7 @@ "# uncomment this line to allow the training progress to be printed on the console\n", "#from shogun import MSG_INFO; net_no_reg.io.put('loglevel', MSG_INFO)\n", "\n", - "net_no_reg.put('labels', Ytrain)\n", - "net_no_reg.train(Xtrain) # this might take a while, depending on your machine\n", + "net_no_reg.train(Xtrain, Ytrain) # this might take a while, depending on your machine\n", "\n", "# compute accuracy on the validation set\n", "print(\"Without regularization, accuracy on the validation set =\", compute_accuracy(net_no_reg, Xval, Yval), \"%\")" @@ -265,8 +264,7 @@ "net_l2.put('max_num_epochs', 600)\n", "net_l2.put('seed', 10)\n", "\n", - "net_l2.put('labels', Ytrain)\n", - "net_l2.train(Xtrain) # this might take a while, depending on your machine\n", + "net_l2.train(Xtrain, Ytrain) # this might take a while, depending on your machine\n", "\n", "# compute accuracy on the validation set\n", "print(\"With L2 regularization, accuracy on the validation set =\", compute_accuracy(net_l2, Xval, Yval), \"%\")" @@ -294,8 +292,7 @@ "net_l1.put('max_num_epochs', 600)\n", "net_l1.put('seed', 10)\n", "\n", - "net_l1.put('labels', Ytrain)\n", - "net_l1.train(Xtrain) # this might take a while, depending on your machine\n", + "net_l1.train(Xtrain, Ytrain) # this might take a while, depending on your machine\n", "\n", "# compute accuracy on the validation set\n", "print(\"With L1 regularization, accuracy on the validation set =\", compute_accuracy(net_l1, Xval, Yval), \"%\")" @@ -336,8 +333,7 @@ "net_dropout.put('gd_learning_rate', 0.5)\n", "net_dropout.put('gd_mini_batch_size', 100)\n", "\n", - "net_dropout.put('labels', Ytrain)\n", - "net_dropout.train(Xtrain) # this might take a while, depending on your machine\n", + "net_dropout.train(Xtrain, Ytrain) # this might take a while, depending on your machine\n", "\n", "# compute accuracy on the validation set\n", "print(\"With dropout, accuracy on the validation set =\", compute_accuracy(net_dropout, Xval, Yval), \"%\")" @@ -431,8 +427,7 @@ "net_conv.put(\"seed\", 10)\n", "\n", "# start training\n", - "net_conv.put('labels', Ytrain)\n", - "net_conv.train(Xtrain)\n", + "net_conv.train(Xtrain, Ytrain)\n", "\n", "# compute accuracy on the validation set\n", "print(\"With a convolutional network, accuracy on the validation set =\", compute_accuracy(net_conv, Xval, Yval), \"%\")" @@ -511,7 +506,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.6.9" } }, "nbformat": 4, diff --git a/doc/ipython-notebooks/neuralnets/rbms_dbns.ipynb b/doc/ipython-notebooks/neuralnets/rbms_dbns.ipynb index 6adb5c1b07a..36c28d0c5c5 100644 --- a/doc/ipython-notebooks/neuralnets/rbms_dbns.ipynb +++ b/doc/ipython-notebooks/neuralnets/rbms_dbns.ipynb @@ -370,8 +370,7 @@ "nn.put(\"l2_coefficient\", 0.0001)\n", "\n", "# start training\n", - "nn.put('labels', sg.create_labels(Ytrain))\n", - "nn.train(sg.create_features(Xtrain))" + "nn.train(sg.create_features(Xtrain), sg.create_labels(Ytrain))" ] }, { @@ -426,7 +425,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.6.9" } }, "nbformat": 4, diff --git a/doc/ipython-notebooks/regression/Regression.ipynb b/doc/ipython-notebooks/regression/Regression.ipynb index 7f8b7b48de4..2c5d84a750e 100644 --- a/doc/ipython-notebooks/regression/Regression.ipynb +++ b/doc/ipython-notebooks/regression/Regression.ipynb @@ -142,8 +142,8 @@ "metadata": {}, "outputs": [], "source": [ - "ls = sg.create_machine(\"LeastSquaresRegression\", labels=labels_train, features=feats_train)\n", - "ls.train(feats_train)\n", + "ls = sg.create_machine(\"LeastSquaresRegression\")\n", + "ls.train(feats_train, labels_train)\n", "w = ls.get('w')\n", "print('Weights:')\n", "print(w)" @@ -244,8 +244,8 @@ "outputs": [], "source": [ "tau = 0.8\n", - "rr = sg.create_machine(\"LinearRidgeRegression\", tau=tau, features=feats_train, labels=labels_train)\n", - "rr.train(feats_train)\n", + "rr = sg.create_machine(\"LinearRidgeRegression\", tau=tau)\n", + "rr.train(feats_train, labels_train)\n", "w = rr.get('w')\n", "print(w)\n", "out = rr.apply(feats_test).get(\"labels\")" @@ -311,12 +311,12 @@ " preproc.fit(feats_train)\n", " processed_feats = preproc.transform(feats_train) \n", " weights = []\n", - " rr = sg.create_machine(\"LinearRidgeRegression\", tau=tau, labels=labels_train, use_bias=use_bias)\n", + " rr = sg.create_machine(\"LinearRidgeRegression\", tau=tau, use_bias=use_bias)\n", " \n", " #vary regularization\n", " for t in taus:\n", " rr.put('tau', t)\n", - " rr.train(processed_feats)\n", + " rr.train(processed_feats, labels_train)\n", " weights.append(rr.get(\"w\"))\n", " return weights, rr\n", "\n", @@ -553,8 +553,7 @@ "source": [ "#Train and generate weights\n", "la=sg.create_machine(\"LeastAngleRegression\")\n", - "la.put('labels', labels_train)\n", - "la.train(feats_train)\n", + "la.train(feats_train, labels_train)\n", "\n", "size=la.get(\"path_size\")\n", "print (\"Size of path is %s\" %size)" @@ -674,8 +673,8 @@ "width=0.5\n", "tau=0.5\n", "kernel=sg.create_kernel(\"GaussianKernel\", width=width)\n", - "krr=sg.create_machine(\"KernelRidgeRegression\", tau=tau, kernel=kernel, labels=train_labels)\n", - "krr.train(feats_train)\n", + "krr=sg.create_machine(\"KernelRidgeRegression\", tau=tau, kernel=kernel)\n", + "krr.train(feats_train, train_labels)\n", "\n", "feats_test=sg.create_features(x1.reshape(1,len(x1)))\n", "kernel.init(feats_train, feats_test)\n", @@ -887,7 +886,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.6.9" } }, "nbformat": 4, diff --git a/examples/meta/src/base_api/dense_dispatching.sg.in b/examples/meta/src/base_api/dense_dispatching.sg.in index eef4d5ff3a7..9539d9a9666 100644 --- a/examples/meta/src/base_api/dense_dispatching.sg.in +++ b/examples/meta/src/base_api/dense_dispatching.sg.in @@ -9,13 +9,13 @@ Labels labels_train = create_labels(f_labels_train) #![create_features] #![create_instance] -Machine lda = create_machine("LDA", labels=labels_train) +Machine lda = create_machine("LDA") #![create_instance] #![train_with_double] -lda.train(features_double) +lda.train(features_double, labels_train) #![train_with_double] #![train_with_float] -lda.train(features_float) +lda.train(features_float, labels_train) #![train_with_float] diff --git a/examples/meta/src/binary/averaged_perceptron.sg.in b/examples/meta/src/binary/averaged_perceptron.sg.in index d40c9ed1f29..d2a133529f8 100644 --- a/examples/meta/src/binary/averaged_perceptron.sg.in +++ b/examples/meta/src/binary/averaged_perceptron.sg.in @@ -11,11 +11,11 @@ Labels labels_test = create_labels(f_labels_test) #![create_features] #![set_parameters] -Machine perceptron = create_machine("AveragedPerceptron", labels=labels_train, learn_rate=1.0, max_iterations=1000) +Machine perceptron = create_machine("AveragedPerceptron", learn_rate=1.0, max_iterations=1000) #![set_parameters] #![train_and_apply] -perceptron.train(features_train) +perceptron.train(features_train, labels_train) Labels labels_predict = perceptron.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/binary/domainadaptationsvm.sg.in b/examples/meta/src/binary/domainadaptationsvm.sg.in index 3dd0984b04a..6ed4b58a31f 100644 --- a/examples/meta/src/binary/domainadaptationsvm.sg.in +++ b/examples/meta/src/binary/domainadaptationsvm.sg.in @@ -14,8 +14,8 @@ svm_kernel.init(feats_train, feats_train) #![create_kernel] #![create_svm_and_train] -Machine svm = create_machine("SVMLight", kernel=svm_kernel, labels=labels_train, C1=1.0, C2=1.0) -svm.train() +Machine svm = create_machine("SVMLight", kernel=svm_kernel, C1=1.0, C2=1.0) +svm.train(feats_train, labels_train) #![create_svm_and_train] #![create_kernel] @@ -24,11 +24,11 @@ svm_kernel2.init(feats_train, feats_train) #![create_kernel] #![obtain_dasvm_from_the_previous_svm] -Machine dasvm = create_machine("DomainAdaptationSVM", C1=1.0, C2=1.0, kernel=svm_kernel2, labels=labels_train, presvm=as_svm(svm), B=1.0) +Machine dasvm = create_machine("DomainAdaptationSVM", C1=1.0, C2=1.0, kernel=svm_kernel2, presvm=as_svm(svm), B=1.0) #![obtain_dasvm_from_the_previous_svm] #![train_and_apply] -dasvm.train() +dasvm.train(feats_train, labels_train) Labels labels_predict = dasvm.apply(feats_test) RealVector labels_vector = labels_predict.get_real_vector("labels") RealVector weights = svm.get_real_vector("m_alpha") diff --git a/examples/meta/src/binary/linear_discriminant_analysis.sg.in b/examples/meta/src/binary/linear_discriminant_analysis.sg.in index 02b6f65229a..628abd8d622 100644 --- a/examples/meta/src/binary/linear_discriminant_analysis.sg.in +++ b/examples/meta/src/binary/linear_discriminant_analysis.sg.in @@ -11,11 +11,11 @@ Labels labels_test = create_labels(f_labels_test) #![create_features] #![create_instance] -Machine lda = create_machine("LDA", labels=labels_train) +Machine lda = create_machine("LDA") #![create_instance] #![train_and_apply] -lda.train(features_train) +lda.train(features_train, labels_train) Labels labels_predict = lda.apply(features_test) RealVector labels = labels_predict.get_real_vector("labels") #![train_and_apply] diff --git a/examples/meta/src/binary/linear_support_vector_machine.sg.in b/examples/meta/src/binary/linear_support_vector_machine.sg.in index 00ed07bd8b8..49e00c825b4 100644 --- a/examples/meta/src/binary/linear_support_vector_machine.sg.in +++ b/examples/meta/src/binary/linear_support_vector_machine.sg.in @@ -16,11 +16,11 @@ real epsilon = 0.001 #![set_parameters] #![create_instance] -Machine svm = create_machine("LibLinear", C1=C, C2=C, labels=labels_train, epsilon=epsilon, liblinear_solver_type="L2R_L2LOSS_SVC", use_bias=True) +Machine svm = create_machine("LibLinear", C1=C, C2=C, epsilon=epsilon, liblinear_solver_type="L2R_L2LOSS_SVC", use_bias=True) #![create_instance] #![train_and_apply] -svm.train(features_train) +svm.train(features_train, labels_train) Labels labels_predict = svm.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/binary/newton_support_vector_machine.sg.in b/examples/meta/src/binary/newton_support_vector_machine.sg.in index 2bf77e06695..d919fed7de9 100644 --- a/examples/meta/src/binary/newton_support_vector_machine.sg.in +++ b/examples/meta/src/binary/newton_support_vector_machine.sg.in @@ -11,11 +11,11 @@ Labels labels_test = create_labels(f_labels_test) #![create_features] #![create_instance] -Machine svm = create_machine("NewtonSVM", labels=labels_train) +Machine svm = create_machine("NewtonSVM") #![create_instance] #![train_and_apply] -svm.train(features_train) +svm.train(features_train, labels_train) BinaryLabels labels_predict = svm.apply_binary(features_test) #![train_and_apply] diff --git a/examples/meta/src/binary/perceptron.sg.in b/examples/meta/src/binary/perceptron.sg.in index 5710db4be7c..807c6434ff0 100644 --- a/examples/meta/src/binary/perceptron.sg.in +++ b/examples/meta/src/binary/perceptron.sg.in @@ -11,11 +11,11 @@ Labels labels_test = create_labels(f_labels_test) #![create_features] #![create_instance] -Machine perceptron = create_machine("Perceptron", labels=labels_train, learn_rate=1.0, max_iterations=1000) +Machine perceptron = create_machine("Perceptron", learn_rate=1.0, max_iterations=1000) #![create_instance] #![train_and_apply] -perceptron.train(features_train) +perceptron.train(features_train, labels_train) Labels labels_predict = perceptron.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/binary/svmlin.sg.in b/examples/meta/src/binary/svmlin.sg.in index 1c241d6665a..8d361932b91 100644 --- a/examples/meta/src/binary/svmlin.sg.in +++ b/examples/meta/src/binary/svmlin.sg.in @@ -8,8 +8,8 @@ Features feats_test = create_features(f_feats_test) Labels labels_train = create_labels(f_labels_train) Labels labels_test = create_labels(f_labels_test) -Machine svm = create_machine("SVMLin", C1=0.9, C2=0.9, epsilon=0.00001, labels=labels_train) -svm.train(feats_train) +Machine svm = create_machine("SVMLin", C1=0.9, C2=0.9, epsilon=0.00001) +svm.train(feats_train, labels_train) RealVector weights = svm.get_real_vector("w") real bias = svm.get_real("bias") diff --git a/examples/meta/src/binary/svmocas.sg.in b/examples/meta/src/binary/svmocas.sg.in index 124f25fb132..19e640c7f18 100644 --- a/examples/meta/src/binary/svmocas.sg.in +++ b/examples/meta/src/binary/svmocas.sg.in @@ -9,11 +9,11 @@ Labels labels_train = create_labels(f_labels_train) #![create_features] #![create_classifier] -Machine svm = create_machine("SVMOcas", features=feats_train, labels=labels_train, C1=0.9, C2=0.9, epsilon=0.00001, use_bias=True) +Machine svm = create_machine("SVMOcas", C1=0.9, C2=0.9, epsilon=0.00001, use_bias=True) #![create_classifier] #![train_and_extract_weights] -svm.train() +svm.train(feats_train, labels_train) RealVector weights = svm.get_real_vector("w") real bias = svm.get_real("bias") #![train_and_extract_weights] diff --git a/examples/meta/src/binary/svmsgd.sg.in b/examples/meta/src/binary/svmsgd.sg.in index 75c158e1c06..29a7bb74129 100644 --- a/examples/meta/src/binary/svmsgd.sg.in +++ b/examples/meta/src/binary/svmsgd.sg.in @@ -8,8 +8,8 @@ Features feats_test = create_features(f_feats_test) Labels labels_train = create_labels(f_labels_train) Labels labels_test = create_labels(f_labels_test) -Machine svm = create_machine("SVMSGD", C1=0.9, C2=0.9, epochs=5, labels=labels_train) -svm.train(feats_train) +Machine svm = create_machine("SVMSGD", C1=0.9, C2=0.9, epochs=5) +svm.train(feats_train, labels_train) RealVector weights = svm.get_real_vector("w") real bias = svm.get_real("bias") diff --git a/examples/meta/src/composite/ensemble.sg.in b/examples/meta/src/composite/ensemble.sg.in index b0649d819a1..d1e3523c1f9 100644 --- a/examples/meta/src/composite/ensemble.sg.in +++ b/examples/meta/src/composite/ensemble.sg.in @@ -11,7 +11,7 @@ Labels labels_test = create_labels(f_labels_test) #![create_features] #![create machine] -Machine ensemble = create_machine("EnsembleMachine", labels = labels_train) +Machine ensemble = create_machine("EnsembleMachine") Machine submachine1 = create_machine("MulticlassOCAS") Machine submachine2 = create_machine("MulticlassLibLinear") ensemble.add("machines", submachine1) @@ -21,7 +21,7 @@ ensemble.put("combination_rule", c) #![create machine] #![train_and_apply] -ensemble.train(features_train) +ensemble.train(features_train, labels_train) Labels labels_predict = ensemble.apply_multiclass(features_test) #![train_and_apply] diff --git a/examples/meta/src/evaluation/accuracy_multiclass.sg.in b/examples/meta/src/evaluation/accuracy_multiclass.sg.in index d57830112ea..76a7dd4ff55 100644 --- a/examples/meta/src/evaluation/accuracy_multiclass.sg.in +++ b/examples/meta/src/evaluation/accuracy_multiclass.sg.in @@ -11,11 +11,11 @@ Labels labels_test = create_labels(f_labels_test) #![create_features] #![create_classifier] -Machine svm= create_machine("MulticlassLibLinear", C=1.0, labels=labels_train) +Machine svm= create_machine("MulticlassLibLinear", C=1.0) #![create_classifier] #![train_and_apply] -svm.train(feats_train) +svm.train(feats_train, labels_train) Labels predicted_labels = svm.apply(feats_test) #![train_and_apply] diff --git a/examples/meta/src/evaluation/clustering.sg.in b/examples/meta/src/evaluation/clustering.sg.in index a040d69bde7..5160da9fd49 100644 --- a/examples/meta/src/evaluation/clustering.sg.in +++ b/examples/meta/src/evaluation/clustering.sg.in @@ -18,8 +18,8 @@ RealMatrix centers = kmeans.get_real_matrix("cluster_centers") Labels labels_centroids = create_labels(f_labels_centroids) Features fea_centroids = create_features(centers) Distance d2 = create_distance("EuclideanDistance", lhs=fea_centroids, rhs=fea_centroids) -Machine knn = create_machine("KNN", k=1, distance=d2, labels=labels_centroids) -knn.train() +Machine knn = create_machine("KNN", k=1, distance=d2) +knn.train(fea_centroids, labels_centroids) Labels gnd_hat = knn.apply(features_train) #![assign_labels] diff --git a/examples/meta/src/evaluation/cross_validation.sg.in b/examples/meta/src/evaluation/cross_validation.sg.in index a33b205b1e9..448f62f782b 100644 --- a/examples/meta/src/evaluation/cross_validation.sg.in +++ b/examples/meta/src/evaluation/cross_validation.sg.in @@ -16,7 +16,7 @@ real epsilon = 0.001 #![set_parameters] #![create_instance] -Machine svm = create_machine("LibLinear", labels=labels_train, epsilon=epsilon, C1=C, C2=C, liblinear_solver_type="L2R_L2LOSS_SVC", seed=2) +Machine svm = create_machine("LibLinear", epsilon=epsilon, C1=C, C2=C, liblinear_solver_type="L2R_L2LOSS_SVC", seed=2) #![create_instance] #![create_cross_validation] @@ -32,7 +32,7 @@ real stddev = result.get_real("std_dev") #![evaluate_and_get_result] #![get_results_test_data] -svm.train(features_train) +svm.train(features_train, labels_train) Labels labels_predict = svm.apply(features_test) real accuracy_test = evaluation_criterion.evaluate(labels_predict, labels_test) #![get_results_test_data] @@ -51,7 +51,7 @@ Labels reg_labels_test = create_labels(reg_lab_test) #![create_machine_REGRESSION] real tau = 0.001 -Machine lrr = create_machine("LinearRidgeRegression", tau=tau, labels=reg_labels_train) +Machine lrr = create_machine("LinearRidgeRegression", tau=tau) #![create_instance_REGRESSION] #![create_cross_validation_REGRESSION] @@ -65,7 +65,7 @@ EvaluationResult result_lrr = cross_validation.evaluate() #![evaluate_and_get_result_REGRESSION] #![evaluate_error_REGRESSION] -lrr.train(reg_features_train) +lrr.train(reg_features_train, reg_labels_train) Labels reg_labels_predict = lrr.apply(reg_features_test) real mse = MSE_evaluation.evaluate(reg_labels_predict, reg_labels_test) #![evaluate_error_REGRESSION] diff --git a/examples/meta/src/evaluation/multiclass_ovr.sg.in b/examples/meta/src/evaluation/multiclass_ovr.sg.in index bb1fdd1d00b..e072e244ed5 100644 --- a/examples/meta/src/evaluation/multiclass_ovr.sg.in +++ b/examples/meta/src/evaluation/multiclass_ovr.sg.in @@ -7,12 +7,12 @@ Labels labels_train = create_labels(f_labels_train) #![create_features] #![create_classifier] -Machine svm= create_machine("MulticlassLibLinear", C=1.0, labels=labels_train) +Machine svm= create_machine("MulticlassLibLinear", C=1.0) #![create_classifier] #![train_and_apply] -svm.train(feats_train) -Labels labels_predicted = svm.apply() +svm.train(feats_train, labels_train) +Labels labels_predicted = svm.apply(feats_train) RealVector labels = labels_predicted.get_real_vector("labels") #![train_and_apply] diff --git a/examples/meta/src/gaussian_process/classifier.sg.in b/examples/meta/src/gaussian_process/classifier.sg.in index cc007183063..02d93825ecc 100644 --- a/examples/meta/src/gaussian_process/classifier.sg.in +++ b/examples/meta/src/gaussian_process/classifier.sg.in @@ -18,11 +18,11 @@ MeanFunction mean_function = create_gp_mean("ConstMean") #![create_instance] LikelihoodModel gauss_likelihood = create_gp_likelihood("SoftMaxLikelihood") Inference inference_method = create_gp_inference("MultiLaplaceInferenceMethod", kernel=k, mean_function=mean_function, likelihood_model=gauss_likelihood) -GaussianProcess gp_classifier = create_gaussian_process("GaussianProcessClassification", inference_method=inference_method, seed=1, labels=labels_train) +GaussianProcess gp_classifier = create_gaussian_process("GaussianProcessClassification", inference_method=inference_method, seed=1) #![create_instance] #![train_and_apply] -gp_classifier.train(features_train) +gp_classifier.train(features_train, labels_train) MulticlassLabels labels_predict = gp_classifier.apply_multiclass(features_test) #![train_and_apply] diff --git a/examples/meta/src/gaussian_process/sparse_regression.sg.in b/examples/meta/src/gaussian_process/sparse_regression.sg.in index b070b7e4990..7cdee6dc614 100644 --- a/examples/meta/src/gaussian_process/sparse_regression.sg.in +++ b/examples/meta/src/gaussian_process/sparse_regression.sg.in @@ -30,11 +30,11 @@ Inference inference_method = create_gp_inference("FITCInferenceMethod", kernel=k #![create_inference] #![create_instance] -GaussianProcess gp_regression = create_gaussian_process("GaussianProcessRegression", inference_method=inference_method, labels=labels_train, inducing_features=inducing_features) +GaussianProcess gp_regression = create_gaussian_process("GaussianProcessRegression", inference_method=inference_method, inducing_features=inducing_features) #![create_instance] #![train_and_apply] -gp_regression.train(features_train) +gp_regression.train(features_train, labels_train) RegressionLabels labels_predict = gp_regression.apply_regression(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/cartree.sg.in b/examples/meta/src/multiclass/cartree.sg.in index d842ac93624..2064342f8a3 100644 --- a/examples/meta/src/multiclass/cartree.sg.in +++ b/examples/meta/src/multiclass/cartree.sg.in @@ -19,11 +19,10 @@ ft[1] = False #![create_instance] Machine classifier = create_machine("CARTree", nominal = ft,mode = enum EProblemType.PT_MULTICLASS, folds=5, apply_cv_pruning=True, seed=1) -classifier.set_labels(labels_train) #![create_instance] #![train_and_apply] -classifier.train(features_train) +classifier.train(features_train, labels_train) MulticlassLabels labels_predict = classifier.apply_multiclass(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/chaid_tree.sg.in b/examples/meta/src/multiclass/chaid_tree.sg.in index cf43fc41b45..eebbfbd806a 100644 --- a/examples/meta/src/multiclass/chaid_tree.sg.in +++ b/examples/meta/src/multiclass/chaid_tree.sg.in @@ -18,11 +18,10 @@ ft[1] = 2 #![create_instance] CHAIDTree classifier(0, ft, 10) -classifier.set_labels(labels_train) #![create_instance] #![train_and_apply] -classifier.train(features_train) +classifier.train(features_train, labels_train) MulticlassLabels labels_predict = classifier.apply_multiclass(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_discriminant_aed.sg.in b/examples/meta/src/multiclass/ecoc_discriminant_aed.sg.in index b1804b4fa54..4dd87e8fadd 100644 --- a/examples/meta/src/multiclass/ecoc_discriminant_aed.sg.in +++ b/examples/meta/src/multiclass/ecoc_discriminant_aed.sg.in @@ -23,11 +23,11 @@ MulticlassStrategy strategy = create_multiclass_strategy("ECOCStrategy", encoder #![choose_strategy] #![create_instance] -Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier, labels=labels_train) +Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_discriminant_ed.sg.in b/examples/meta/src/multiclass/ecoc_discriminant_ed.sg.in index 9d70fe306dc..b44f265cbbf 100644 --- a/examples/meta/src/multiclass/ecoc_discriminant_ed.sg.in +++ b/examples/meta/src/multiclass/ecoc_discriminant_ed.sg.in @@ -23,11 +23,11 @@ MulticlassStrategy strategy = create_multiclass_strategy("ECOCStrategy", encoder #![choose_strategy] #![create_instance] -Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier, labels=labels_train) +Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_discriminant_hd.sg.in b/examples/meta/src/multiclass/ecoc_discriminant_hd.sg.in index 66729334f5d..6e8186cfc1e 100644 --- a/examples/meta/src/multiclass/ecoc_discriminant_hd.sg.in +++ b/examples/meta/src/multiclass/ecoc_discriminant_hd.sg.in @@ -23,11 +23,11 @@ MulticlassStrategy strategy = create_multiclass_strategy("ECOCStrategy", encoder #![choose_strategy] #![create_instance] -Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier, labels=labels_train) +Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_discriminant_ihd.sg.in b/examples/meta/src/multiclass/ecoc_discriminant_ihd.sg.in index ea28f9d2aeb..a2215264934 100644 --- a/examples/meta/src/multiclass/ecoc_discriminant_ihd.sg.in +++ b/examples/meta/src/multiclass/ecoc_discriminant_ihd.sg.in @@ -23,11 +23,11 @@ MulticlassStrategy strategy = create_multiclass_strategy("ECOCStrategy", encoder #![choose_strategy] #![create_instance] -Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier, labels=labels_train) +Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_discriminant_llb.sg.in b/examples/meta/src/multiclass/ecoc_discriminant_llb.sg.in index 3ee82d36705..cb7287ab646 100644 --- a/examples/meta/src/multiclass/ecoc_discriminant_llb.sg.in +++ b/examples/meta/src/multiclass/ecoc_discriminant_llb.sg.in @@ -23,11 +23,11 @@ MulticlassStrategy strategy = create_multiclass_strategy("ECOCStrategy", encoder #![choose_strategy] #![create_instance] -Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier, labels=labels_train) +Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_forest_aed.sg.in b/examples/meta/src/multiclass/ecoc_forest_aed.sg.in index 009c9e80592..7938cec73e2 100644 --- a/examples/meta/src/multiclass/ecoc_forest_aed.sg.in +++ b/examples/meta/src/multiclass/ecoc_forest_aed.sg.in @@ -23,11 +23,11 @@ MulticlassStrategy strategy = create_multiclass_strategy("ECOCStrategy", encoder #![choose_strategy] #![create_instance] -Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier, labels=labels_train) +Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_forest_ed.sg.in b/examples/meta/src/multiclass/ecoc_forest_ed.sg.in index a90e1a83b05..c5fa5c93c48 100644 --- a/examples/meta/src/multiclass/ecoc_forest_ed.sg.in +++ b/examples/meta/src/multiclass/ecoc_forest_ed.sg.in @@ -23,11 +23,11 @@ MulticlassStrategy strategy = create_multiclass_strategy("ECOCStrategy", encoder #![choose_strategy] #![create_instance] -Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier, labels=labels_train) +Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_forest_hd.sg.in b/examples/meta/src/multiclass/ecoc_forest_hd.sg.in index 7b869c1ad4e..26fa6d3fb70 100644 --- a/examples/meta/src/multiclass/ecoc_forest_hd.sg.in +++ b/examples/meta/src/multiclass/ecoc_forest_hd.sg.in @@ -23,11 +23,11 @@ MulticlassStrategy strategy = create_multiclass_strategy("ECOCStrategy", encoder #![choose_strategy] #![create_instance] -Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier, labels=labels_train) +Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_forest_ihd.sg.in b/examples/meta/src/multiclass/ecoc_forest_ihd.sg.in index ded419c6d92..ab6e5ccbbcb 100644 --- a/examples/meta/src/multiclass/ecoc_forest_ihd.sg.in +++ b/examples/meta/src/multiclass/ecoc_forest_ihd.sg.in @@ -23,11 +23,11 @@ MulticlassStrategy strategy = create_multiclass_strategy("ECOCStrategy", encoder #![choose_strategy] #![create_instance] -Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier, labels=labels_train) +Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_forest_llb.sg.in b/examples/meta/src/multiclass/ecoc_forest_llb.sg.in index 50711a9eeb0..0bd0893aa7c 100644 --- a/examples/meta/src/multiclass/ecoc_forest_llb.sg.in +++ b/examples/meta/src/multiclass/ecoc_forest_llb.sg.in @@ -23,11 +23,11 @@ MulticlassStrategy strategy = create_multiclass_strategy("ECOCStrategy", encoder #![choose_strategy] #![create_instance] -Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier, labels=labels_train) +Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_ovo_aed.sg.in b/examples/meta/src/multiclass/ecoc_ovo_aed.sg.in index 60a1a05d306..f2902224260 100644 --- a/examples/meta/src/multiclass/ecoc_ovo_aed.sg.in +++ b/examples/meta/src/multiclass/ecoc_ovo_aed.sg.in @@ -21,11 +21,11 @@ MulticlassStrategy strategy = create_multiclass_strategy("ECOCStrategy", encoder #![choose_strategy] #![create_instance] -Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier, labels=labels_train) +Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_ovo_ed.sg.in b/examples/meta/src/multiclass/ecoc_ovo_ed.sg.in index 46301177202..99e21b96a8b 100644 --- a/examples/meta/src/multiclass/ecoc_ovo_ed.sg.in +++ b/examples/meta/src/multiclass/ecoc_ovo_ed.sg.in @@ -21,11 +21,11 @@ MulticlassStrategy strategy = create_multiclass_strategy("ECOCStrategy", encoder #![choose_strategy] #![create_instance] -Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier, labels=labels_train) +Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_ovo_hd.sg.in b/examples/meta/src/multiclass/ecoc_ovo_hd.sg.in index 8fe8d4f40cc..428b3fc4edc 100644 --- a/examples/meta/src/multiclass/ecoc_ovo_hd.sg.in +++ b/examples/meta/src/multiclass/ecoc_ovo_hd.sg.in @@ -21,11 +21,11 @@ MulticlassStrategy strategy = create_multiclass_strategy("ECOCStrategy", encoder #![choose_strategy] #![create_instance] -Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier, labels=labels_train) +Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_ovo_ihd.sg.in b/examples/meta/src/multiclass/ecoc_ovo_ihd.sg.in index ca6fb711422..b688533d3bc 100644 --- a/examples/meta/src/multiclass/ecoc_ovo_ihd.sg.in +++ b/examples/meta/src/multiclass/ecoc_ovo_ihd.sg.in @@ -21,11 +21,11 @@ MulticlassStrategy strategy = create_multiclass_strategy("ECOCStrategy", encoder #![choose_strategy] #![create_instance] -Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier, labels=labels_train) +Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_ovo_llb.sg.in b/examples/meta/src/multiclass/ecoc_ovo_llb.sg.in index 4baefb7b9a6..b62ed72f0dd 100644 --- a/examples/meta/src/multiclass/ecoc_ovo_llb.sg.in +++ b/examples/meta/src/multiclass/ecoc_ovo_llb.sg.in @@ -21,11 +21,11 @@ MulticlassStrategy strategy = create_multiclass_strategy("ECOCStrategy", encoder #![choose_strategy] #![create_instance] -Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier, labels=labels_train) +Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_ovr_aed.sg.in b/examples/meta/src/multiclass/ecoc_ovr_aed.sg.in index 46e30f98353..bfc23865b9a 100644 --- a/examples/meta/src/multiclass/ecoc_ovr_aed.sg.in +++ b/examples/meta/src/multiclass/ecoc_ovr_aed.sg.in @@ -21,11 +21,11 @@ MulticlassStrategy strategy = create_multiclass_strategy("ECOCStrategy", encoder #![choose_strategy] #![create_instance] -Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier, labels=labels_train) +Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_ovr_ed.sg.in b/examples/meta/src/multiclass/ecoc_ovr_ed.sg.in index 2bb4f1cd1c5..3592c33fdef 100644 --- a/examples/meta/src/multiclass/ecoc_ovr_ed.sg.in +++ b/examples/meta/src/multiclass/ecoc_ovr_ed.sg.in @@ -21,11 +21,11 @@ MulticlassStrategy strategy = create_multiclass_strategy("ECOCStrategy", encoder #![choose_strategy] #![create_instance] -Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier, labels=labels_train) +Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_ovr_hd.sg.in b/examples/meta/src/multiclass/ecoc_ovr_hd.sg.in index 878372b9a7b..3319cf4a22e 100644 --- a/examples/meta/src/multiclass/ecoc_ovr_hd.sg.in +++ b/examples/meta/src/multiclass/ecoc_ovr_hd.sg.in @@ -21,11 +21,11 @@ MulticlassStrategy strategy = create_multiclass_strategy("ECOCStrategy", encoder #![choose_strategy] #![create_instance] -Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier, labels=labels_train) +Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_ovr_ihd.sg.in b/examples/meta/src/multiclass/ecoc_ovr_ihd.sg.in index a600caf88ac..14a510db114 100644 --- a/examples/meta/src/multiclass/ecoc_ovr_ihd.sg.in +++ b/examples/meta/src/multiclass/ecoc_ovr_ihd.sg.in @@ -21,11 +21,11 @@ MulticlassStrategy strategy = create_multiclass_strategy("ECOCStrategy", encoder #![choose_strategy] #![create_instance] -Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier, labels=labels_train) +Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_ovr_llb.sg.in b/examples/meta/src/multiclass/ecoc_ovr_llb.sg.in index 29f3ae28400..2b5f8bde0ac 100644 --- a/examples/meta/src/multiclass/ecoc_ovr_llb.sg.in +++ b/examples/meta/src/multiclass/ecoc_ovr_llb.sg.in @@ -21,11 +21,11 @@ MulticlassStrategy strategy = create_multiclass_strategy("ECOCStrategy", encoder #![choose_strategy] #![create_instance] -Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier, labels=labels_train) +Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_random_dense_aed.sg.in b/examples/meta/src/multiclass/ecoc_random_dense_aed.sg.in index ace60664712..d3c98ffcb38 100644 --- a/examples/meta/src/multiclass/ecoc_random_dense_aed.sg.in +++ b/examples/meta/src/multiclass/ecoc_random_dense_aed.sg.in @@ -22,11 +22,11 @@ MulticlassStrategy rnd_dense_strategy=create_multiclass_strategy("ECOCStrategy", #![choose_strategy] #![create_instance] -Machine mc_classifier=create_machine("LinearMulticlassMachine", multiclass_strategy=rnd_dense_strategy, machine=classifier, labels=labels_train) +Machine mc_classifier=create_machine("LinearMulticlassMachine", multiclass_strategy=rnd_dense_strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_random_dense_ed.sg.in b/examples/meta/src/multiclass/ecoc_random_dense_ed.sg.in index 0db262c1f26..0f47968be09 100644 --- a/examples/meta/src/multiclass/ecoc_random_dense_ed.sg.in +++ b/examples/meta/src/multiclass/ecoc_random_dense_ed.sg.in @@ -22,11 +22,11 @@ MulticlassStrategy rnd_dense_strategy=create_multiclass_strategy("ECOCStrategy", #![choose_strategy] #![create_instance] -Machine mc_classifier=create_machine("LinearMulticlassMachine", multiclass_strategy=rnd_dense_strategy, machine=classifier, labels=labels_train) +Machine mc_classifier=create_machine("LinearMulticlassMachine", multiclass_strategy=rnd_dense_strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_random_dense_hd.sg.in b/examples/meta/src/multiclass/ecoc_random_dense_hd.sg.in index 945cb5390a2..2061ef0eab1 100644 --- a/examples/meta/src/multiclass/ecoc_random_dense_hd.sg.in +++ b/examples/meta/src/multiclass/ecoc_random_dense_hd.sg.in @@ -22,12 +22,12 @@ MulticlassStrategy rnd_dense_strategy=create_multiclass_strategy("ECOCStrategy", #![choose_strategy] #![create_instance] -Machine mc_classifier=create_machine("LinearMulticlassMachine", multiclass_strategy=rnd_dense_strategy, machine=classifier, labels=labels_train) +Machine mc_classifier=create_machine("LinearMulticlassMachine", multiclass_strategy=rnd_dense_strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) -Labels labels_predict = mc_classifier.apply(features_test) +mc_classifier.train(features_train, labels_train) +MulticlassLabels labels_predict = mc_classifier.apply_multiclass(features_test) #![train_and_apply] #![evaluate_accuracy] diff --git a/examples/meta/src/multiclass/ecoc_random_dense_ihd.sg.in b/examples/meta/src/multiclass/ecoc_random_dense_ihd.sg.in index 6f914c05f23..7d67bfcf27c 100644 --- a/examples/meta/src/multiclass/ecoc_random_dense_ihd.sg.in +++ b/examples/meta/src/multiclass/ecoc_random_dense_ihd.sg.in @@ -22,11 +22,11 @@ MulticlassStrategy rnd_dense_strategy=create_multiclass_strategy("ECOCStrategy", #![choose_strategy] #![create_instance] -Machine mc_classifier=create_machine("LinearMulticlassMachine", multiclass_strategy=rnd_dense_strategy, machine=classifier, labels=labels_train) +Machine mc_classifier=create_machine("LinearMulticlassMachine", multiclass_strategy=rnd_dense_strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_random_dense_llb.sg.in b/examples/meta/src/multiclass/ecoc_random_dense_llb.sg.in index 521af659c05..aa68ddeee44 100644 --- a/examples/meta/src/multiclass/ecoc_random_dense_llb.sg.in +++ b/examples/meta/src/multiclass/ecoc_random_dense_llb.sg.in @@ -22,11 +22,11 @@ MulticlassStrategy rnd_dense_strategy=create_multiclass_strategy("ECOCStrategy", #![choose_strategy] #![create_instance] -Machine mc_classifier=create_machine("LinearMulticlassMachine", multiclass_strategy=rnd_dense_strategy, machine=classifier, labels=labels_train) +Machine mc_classifier=create_machine("LinearMulticlassMachine", multiclass_strategy=rnd_dense_strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_random_sparse_aed.sg.in b/examples/meta/src/multiclass/ecoc_random_sparse_aed.sg.in index 3eb830c9cf3..373b3635c35 100644 --- a/examples/meta/src/multiclass/ecoc_random_sparse_aed.sg.in +++ b/examples/meta/src/multiclass/ecoc_random_sparse_aed.sg.in @@ -22,11 +22,11 @@ MulticlassStrategy rnd_dense_strategy=create_multiclass_strategy("ECOCStrategy", #![choose_strategy] #![create_instance] -Machine mc_classifier=create_machine("LinearMulticlassMachine", multiclass_strategy=rnd_dense_strategy, machine=classifier, labels=labels_train) +Machine mc_classifier=create_machine("LinearMulticlassMachine", multiclass_strategy=rnd_dense_strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_random_sparse_ed.sg.in b/examples/meta/src/multiclass/ecoc_random_sparse_ed.sg.in index 9d40f6f296b..6b9c9ffec9a 100644 --- a/examples/meta/src/multiclass/ecoc_random_sparse_ed.sg.in +++ b/examples/meta/src/multiclass/ecoc_random_sparse_ed.sg.in @@ -22,11 +22,11 @@ MulticlassStrategy rnd_dense_strategy=create_multiclass_strategy("ECOCStrategy", #![choose_strategy] #![create_instance] -Machine mc_classifier=create_machine("LinearMulticlassMachine", multiclass_strategy=rnd_dense_strategy, machine=classifier, labels=labels_train) +Machine mc_classifier=create_machine("LinearMulticlassMachine", multiclass_strategy=rnd_dense_strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_random_sparse_hd.sg.in b/examples/meta/src/multiclass/ecoc_random_sparse_hd.sg.in index ea5c2cee788..0dd38f099db 100644 --- a/examples/meta/src/multiclass/ecoc_random_sparse_hd.sg.in +++ b/examples/meta/src/multiclass/ecoc_random_sparse_hd.sg.in @@ -22,11 +22,11 @@ MulticlassStrategy rnd_dense_strategy=create_multiclass_strategy("ECOCStrategy", #![choose_strategy] #![create_instance] -Machine mc_classifier=create_machine("LinearMulticlassMachine", multiclass_strategy=rnd_dense_strategy, machine=classifier, labels=labels_train) +Machine mc_classifier=create_machine("LinearMulticlassMachine", multiclass_strategy=rnd_dense_strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_random_sparse_ihd.sg.in b/examples/meta/src/multiclass/ecoc_random_sparse_ihd.sg.in index c7fbca0933d..817d5f7ed14 100644 --- a/examples/meta/src/multiclass/ecoc_random_sparse_ihd.sg.in +++ b/examples/meta/src/multiclass/ecoc_random_sparse_ihd.sg.in @@ -22,11 +22,11 @@ MulticlassStrategy rnd_dense_strategy=create_multiclass_strategy("ECOCStrategy", #![choose_strategy] #![create_instance] -Machine mc_classifier=create_machine("LinearMulticlassMachine", multiclass_strategy=rnd_dense_strategy, machine=classifier, labels=labels_train) +Machine mc_classifier=create_machine("LinearMulticlassMachine", multiclass_strategy=rnd_dense_strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/ecoc_random_sparse_llb.sg.in b/examples/meta/src/multiclass/ecoc_random_sparse_llb.sg.in index 5738b69eb98..8d3c0ad8efe 100644 --- a/examples/meta/src/multiclass/ecoc_random_sparse_llb.sg.in +++ b/examples/meta/src/multiclass/ecoc_random_sparse_llb.sg.in @@ -22,11 +22,11 @@ MulticlassStrategy rnd_dense_strategy=create_multiclass_strategy("ECOCStrategy", #![choose_strategy] #![create_instance] -Machine mc_classifier=create_machine("LinearMulticlassMachine", multiclass_strategy=rnd_dense_strategy, machine=classifier, labels=labels_train) +Machine mc_classifier=create_machine("LinearMulticlassMachine", multiclass_strategy=rnd_dense_strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) Labels labels_predict = mc_classifier.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/gaussian_naive_bayes.sg.in b/examples/meta/src/multiclass/gaussian_naive_bayes.sg.in index 47c50613df0..9709698d1c1 100644 --- a/examples/meta/src/multiclass/gaussian_naive_bayes.sg.in +++ b/examples/meta/src/multiclass/gaussian_naive_bayes.sg.in @@ -10,11 +10,11 @@ Labels labels_train = create_labels(f_labels_train) #![create_instance] -Machine gnb = create_machine("GaussianNaiveBayes", features=features_train, labels=labels_train) +Machine gnb = create_machine("GaussianNaiveBayes") #![create_instance] #![train_and_apply] -gnb.train() +gnb.train(features_train, labels_train) Labels labels_predict = gnb.apply(features_test) RealVector labels = labels_predict.get_real_vector("labels") #![train_and_apply] diff --git a/examples/meta/src/multiclass/gmnpsvm.sg.in b/examples/meta/src/multiclass/gmnpsvm.sg.in index d29a1a4dc27..5e28fb1ab0b 100644 --- a/examples/meta/src/multiclass/gmnpsvm.sg.in +++ b/examples/meta/src/multiclass/gmnpsvm.sg.in @@ -10,11 +10,11 @@ Labels labels_train = create_labels(f_labels_train) #![create_machine] Kernel gaussian_kernel = create_kernel("GaussianKernel", width=2.1) -Machine gmnpsvm = create_machine("GMNPSVM", C=1.0, kernel=gaussian_kernel, labels=labels_train) +Machine gmnpsvm = create_machine("GMNPSVM", C=1.0, kernel=gaussian_kernel) #![create_machine] #![train_and_apply] -gmnpsvm.train(feats_train) +gmnpsvm.train(feats_train, labels_train) Labels test_labels = gmnpsvm.apply(feats_test) RealVector test_labels_vector = test_labels.get_real_vector("labels") #![train_and_apply] diff --git a/examples/meta/src/multiclass/k_nearest_neighbours.sg.in b/examples/meta/src/multiclass/k_nearest_neighbours.sg.in index 390f044b78f..332866a9e83 100644 --- a/examples/meta/src/multiclass/k_nearest_neighbours.sg.in +++ b/examples/meta/src/multiclass/k_nearest_neighbours.sg.in @@ -11,7 +11,7 @@ Labels labels_test = create_labels(f_labels_test) #![create_features] #![choose_distance] -Distance d = create_distance("EuclideanDistance", lhs=features_train, rhs=features_train) +Distance d = create_distance("EuclideanDistance") #![choose_distance] #![create_instance] @@ -20,7 +20,7 @@ Machine knn = create_machine("KNN", k=k, distance=d, labels=labels_train) #![create_instance] #![train_and_apply] -knn.train() +knn.train(features_train, labels_train) MulticlassLabels labels_predict = knn.apply_multiclass(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/large_margin_nearest_neighbours.sg.in b/examples/meta/src/multiclass/large_margin_nearest_neighbours.sg.in index 5f9cd3b672c..5b254385b75 100644 --- a/examples/meta/src/multiclass/large_margin_nearest_neighbours.sg.in +++ b/examples/meta/src/multiclass/large_margin_nearest_neighbours.sg.in @@ -21,8 +21,8 @@ Distance lmnn_distance = lmnn.get_distance() #![train_metric] #![train_and_apply] -Machine knn = create_machine("KNN", k=k, distance=lmnn_distance, labels=labels_train) -knn.train() +Machine knn = create_machine("KNN", k=k, distance=lmnn_distance) +knn.train(features_train, labels_train) MulticlassLabels labels_predict = knn.apply_multiclass(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/linear.sg.in b/examples/meta/src/multiclass/linear.sg.in index a58892de26f..18e6328f7e6 100644 --- a/examples/meta/src/multiclass/linear.sg.in +++ b/examples/meta/src/multiclass/linear.sg.in @@ -19,11 +19,11 @@ MulticlassStrategy strategy=create_multiclass_strategy("MulticlassOneVsOneStrate #![choose_strategy] #![create_instance] -Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier, labels=labels_train) +Machine mc_classifier = create_machine("LinearMulticlassMachine", multiclass_strategy=strategy, machine=classifier) #![create_instance] #![train_and_apply] -mc_classifier.train(features_train) +mc_classifier.train(features_train, labels_train) MulticlassLabels labels_predict = mc_classifier.apply_multiclass(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/linear_discriminant_analysis.sg.in b/examples/meta/src/multiclass/linear_discriminant_analysis.sg.in index 6b2c00d6fed..b25e95eb6f2 100644 --- a/examples/meta/src/multiclass/linear_discriminant_analysis.sg.in +++ b/examples/meta/src/multiclass/linear_discriminant_analysis.sg.in @@ -11,11 +11,11 @@ Labels labels_test = create_labels(f_labels_test) #![create_features] #![create_instance] -Machine mc_lda = create_machine("MCLDA", labels=labels_train, m_tolerance=0.0001, m_store_cov=True) +Machine mc_lda = create_machine("MCLDA", m_tolerance=0.0001, m_store_cov=True) #![create_instance] #![train_and_apply] -mc_lda.train(features_train) +mc_lda.train(features_train, labels_train) MulticlassLabels labels_predict = mc_lda.apply_multiclass(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/lmnn.sg.in b/examples/meta/src/multiclass/lmnn.sg.in index ab64a312efa..ccd6c00dc53 100644 --- a/examples/meta/src/multiclass/lmnn.sg.in +++ b/examples/meta/src/multiclass/lmnn.sg.in @@ -20,7 +20,7 @@ Machine knn = create_machine("KNN", k=k,distance=lmnn_distance,labels=labels_tra #![create_instance] #![train_and_apply] -knn.train() +knn.train(features_train) Labels labels_predict = knn.apply(features_test) RealVector output = labels_predict.get_real_vector("labels") #![train_and_apply] diff --git a/examples/meta/src/multiclass/logistic_regression.sg.in b/examples/meta/src/multiclass/logistic_regression.sg.in index 8550ec5d469..1e3a9915458 100644 --- a/examples/meta/src/multiclass/logistic_regression.sg.in +++ b/examples/meta/src/multiclass/logistic_regression.sg.in @@ -12,11 +12,11 @@ Labels labels_test = create_labels(f_labels_test) #![create_instance] -Machine classifier = create_machine("MulticlassLogisticRegression", m_z=1.0, labels=labels_train) +Machine classifier = create_machine("MulticlassLogisticRegression", m_z=1.0) #![create_instance] #![train_and_apply] -classifier.train(features_train) +classifier.train(features_train, labels_train) MulticlassLabels labels_predict = classifier.apply_multiclass(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/multiclassliblinear.sg.in b/examples/meta/src/multiclass/multiclassliblinear.sg.in index 5ce2e770f0c..204510c0a6d 100644 --- a/examples/meta/src/multiclass/multiclassliblinear.sg.in +++ b/examples/meta/src/multiclass/multiclassliblinear.sg.in @@ -9,11 +9,11 @@ Labels labels_train = create_labels(label_train_multiclass) #![create_features] #![create_machine] -Machine classifier = create_machine("MulticlassLibLinear", C=1.0, labels=labels_train) +Machine classifier = create_machine("MulticlassLibLinear", C=1.0) #![create_machine] #![train_and_apply] -classifier.train(feats_train) +classifier.train(feats_train, labels_train) Labels labels_train_predict = classifier.apply(feats_train) Labels labels_test_predict = classifier.apply(feats_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/one_versus_rest.sg.in b/examples/meta/src/multiclass/one_versus_rest.sg.in index 9913830aab9..8a358b4fc78 100644 --- a/examples/meta/src/multiclass/one_versus_rest.sg.in +++ b/examples/meta/src/multiclass/one_versus_rest.sg.in @@ -21,11 +21,11 @@ Machine classifier = create_machine("LibSVM") #![create_classifier] #![create_machine] -Machine multiclass_machine = create_machine("KernelMulticlassMachine", multiclass_strategy=one_versus_rest, kernel=k, machine=classifier, labels=labels_train) +Machine multiclass_machine = create_machine("KernelMulticlassMachine", multiclass_strategy=one_versus_rest, kernel=k, machine=classifier) #![create_machine] #![train_and_apply] -multiclass_machine.train(features_train) +multiclass_machine.train(features_train, labels_train) Labels labels_predict = multiclass_machine.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/quadratic_discriminant_analysis.sg.in b/examples/meta/src/multiclass/quadratic_discriminant_analysis.sg.in index 5a8152d0561..fb885b3162d 100644 --- a/examples/meta/src/multiclass/quadratic_discriminant_analysis.sg.in +++ b/examples/meta/src/multiclass/quadratic_discriminant_analysis.sg.in @@ -11,11 +11,11 @@ Labels labels_test = create_labels(f_labels_test) #![create_features] #![create_instance] -Machine qda = create_machine("QDA", labels=labels_train, m_tolerance=0.0001, m_store_covs=True) +Machine qda = create_machine("QDA", m_tolerance=0.0001, m_store_covs=True) #![create_instance] #![train_and_apply] -qda.train(features_train) +qda.train(features_train, labels_train) MulticlassLabels labels_predict = qda.apply_multiclass(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/random_forest.sg.in b/examples/meta/src/multiclass/random_forest.sg.in index ed6a09bee40..540c8b75e6b 100644 --- a/examples/meta/src/multiclass/random_forest.sg.in +++ b/examples/meta/src/multiclass/random_forest.sg.in @@ -15,13 +15,13 @@ CombinationRule m_vote = create_combination_rule("MajorityVote") #![create_combination_rule] #![create_instance] -Machine rand_forest = create_machine("RandomForest", labels=labels_train, num_bags=100, combination_rule=m_vote, seed=1) +Machine rand_forest = create_machine("RandomForest", num_bags=100, combination_rule=m_vote, seed=1) Parallel p = rand_forest.get_global_parallel() p.set_num_threads(1) #![create_instance] #![train_and_apply] -rand_forest.train(features_train) +rand_forest.train(features_train, labels_train) MulticlassLabels labels_predict = rand_forest.apply_multiclass(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/relaxed_tree.sg.in b/examples/meta/src/multiclass/relaxed_tree.sg.in index 0aaf98d5b7f..29a5e0d1683 100644 --- a/examples/meta/src/multiclass/relaxed_tree.sg.in +++ b/examples/meta/src/multiclass/relaxed_tree.sg.in @@ -17,13 +17,12 @@ Kernel k = create_kernel("GaussianKernel") #![create_instance] RelaxedTree machine() -machine.set_labels(labels_train) machine.set_machine_for_confusion_matrix(mll) machine.set_kernel(k) #![create_instance] #![train_and_apply] -machine.train(features_train) +machine.train(features_train, labels_train) MulticlassLabels labels_predict = machine.apply_multiclass(features_test) #![train_and_apply] diff --git a/examples/meta/src/multiclass/shareboost.sg.in b/examples/meta/src/multiclass/shareboost.sg.in index 9c6bb7065cf..9ba14445974 100644 --- a/examples/meta/src/multiclass/shareboost.sg.in +++ b/examples/meta/src/multiclass/shareboost.sg.in @@ -11,11 +11,11 @@ Labels labels_test = create_labels(f_labels_test) #![create_features] #![create_instance] -Machine shareboost = create_machine("ShareBoost", labels=labels_train, nonzero_feas=2) +Machine shareboost = create_machine("ShareBoost", nonzero_feas=2) #![create_instance] #![train_and_apply] -shareboost.train(features_train) +shareboost.train(features_train, labels_train) Features features_test_sub = create_subset_features(features_test, shareboost.get_int_vector("active_set")) MulticlassLabels labels_predict = shareboost.apply_multiclass(features_test_sub) #![train_and_apply] diff --git a/examples/meta/src/multiclass/support_vector_machine.sg.in b/examples/meta/src/multiclass/support_vector_machine.sg.in index 1ca37f9ebfb..27a38640588 100644 --- a/examples/meta/src/multiclass/support_vector_machine.sg.in +++ b/examples/meta/src/multiclass/support_vector_machine.sg.in @@ -16,11 +16,11 @@ Kernel gauss_kernel = create_kernel("GaussianKernel", width=1.0) #![set_parameters] #![create_instance] -Machine svm = create_machine("MulticlassLibSVM", C=C, kernel=gauss_kernel, labels=labels_train) +Machine svm = create_machine("MulticlassLibSVM", C=C, kernel=gauss_kernel) #![create_instance] #![train_and_apply] -svm.train(features_train) +svm.train(features_train, labels_train) Labels labels_predict = svm.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/neural_nets/convolutional_net_classification.sg.in b/examples/meta/src/neural_nets/convolutional_net_classification.sg.in index 18b8158e5cd..0ab8bbb6355 100644 --- a/examples/meta/src/neural_nets/convolutional_net_classification.sg.in +++ b/examples/meta/src/neural_nets/convolutional_net_classification.sg.in @@ -11,7 +11,7 @@ Labels labels_test = create_labels(f_labels_test) #![create_features] #![create_instance] -Machine network = create_machine("NeuralNetwork", labels=labels_train, auto_quick_initialize=True, max_num_epochs=4, epsilon=0.0, optimization_method="NNOM_GRADIENT_DESCENT", gd_learning_rate=0.01, gd_mini_batch_size=3, max_norm=1.0, dropout_input=0.5) +Machine network = create_machine("NeuralNetwork", auto_quick_initialize=True, max_num_epochs=4, epsilon=0.0, optimization_method="NNOM_GRADIENT_DESCENT", gd_learning_rate=0.01, gd_mini_batch_size=3, max_norm=1.0, dropout_input=0.5) #![create_instance] #![add_layers] @@ -27,7 +27,7 @@ network.put("seed", 10) #![add_layers] #![train_and_apply] -network.train(features_train) +network.train(features_train, labels_train) Labels labels_predict = network.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/neural_nets/feedforward_net_classification.sg.in b/examples/meta/src/neural_nets/feedforward_net_classification.sg.in index 8d396b8b2f1..beedfbad974 100644 --- a/examples/meta/src/neural_nets/feedforward_net_classification.sg.in +++ b/examples/meta/src/neural_nets/feedforward_net_classification.sg.in @@ -12,7 +12,7 @@ Labels labels_test = create_labels(f_labels_test) #![create_instance] int num_feats = features_train.get_int("num_features") -Machine network = create_machine("NeuralNetwork", labels=labels_train, auto_quick_initialize=True, l2_coefficient=0.01, dropout_hidden=0.5, max_num_epochs=50, gd_mini_batch_size=num_feats, gd_learning_rate=0.1, gd_momentum=0.9) +Machine network = create_machine("NeuralNetwork", auto_quick_initialize=True, l2_coefficient=0.01, dropout_hidden=0.5, max_num_epochs=50, gd_mini_batch_size=num_feats, gd_learning_rate=0.1, gd_momentum=0.9) #![create_instance] #![add_layers] @@ -26,7 +26,7 @@ network.put("seed", 1) #![add_layers] #![train_and_apply] -network.train(features_train) +network.train(features_train, labels_train) Labels labels_predict = network.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/neural_nets/feedforward_net_regression.sg.in b/examples/meta/src/neural_nets/feedforward_net_regression.sg.in index 53154541fd6..213138de542 100644 --- a/examples/meta/src/neural_nets/feedforward_net_regression.sg.in +++ b/examples/meta/src/neural_nets/feedforward_net_regression.sg.in @@ -13,7 +13,7 @@ Labels labels_test = create_labels(f_labels_test) #![create_instance] int num_feats = features_train.get_int("num_features") -Machine network = create_machine("NeuralNetwork", labels=labels_train, auto_quick_initialize=True, l2_coefficient=0.1, epsilon=0.0, max_num_epochs=40, gd_learning_rate=0.1, gd_momentum=0.9) +Machine network = create_machine("NeuralNetwork", auto_quick_initialize=True, l2_coefficient=0.1, epsilon=0.0, max_num_epochs=40, gd_learning_rate=0.1, gd_momentum=0.9) #![create_instance] #![add_layers] @@ -27,7 +27,7 @@ network.put("seed", 1) #![add_layers] #![train_and_apply] -network.train(features_train) +network.train(features_train, labels_train) Labels labels_predict = network.apply(features_test) #![train_and_apply] diff --git a/examples/meta/src/observers/least_angle_regression.sg.in b/examples/meta/src/observers/least_angle_regression.sg.in index 046238dafdf..f368487ab7d 100644 --- a/examples/meta/src/observers/least_angle_regression.sg.in +++ b/examples/meta/src/observers/least_angle_regression.sg.in @@ -22,7 +22,7 @@ Features normalized_features_test = Normalize.transform(pruned_features_test) #![preprocess_features] #![create_instance] -Machine lars = create_machine("LeastAngleRegression", labels=labels_train, lasso=False, max_l1_norm=0.01) +Machine lars = create_machine("LeastAngleRegression", lasso=False, max_l1_norm=0.01) #![create_instance] #![create_observer] @@ -31,7 +31,7 @@ lars.subscribe(logger) #![create_observer] #![train_and_apply] -lars.train(normalized_features_train) +lars.train(normalized_features_train, labels_train) Labels labels_predict = lars.apply(normalized_features_test) #![train_and_apply] diff --git a/examples/meta/src/regression/cartree.sg.in b/examples/meta/src/regression/cartree.sg.in index 8c9846b8f8e..faea334f478 100644 --- a/examples/meta/src/regression/cartree.sg.in +++ b/examples/meta/src/regression/cartree.sg.in @@ -14,11 +14,11 @@ ft[0] = False #![set_attribute_types] #![create_machine] -Machine cartree = create_machine("CARTree", labels=labels_train, nominal=ft, mode=enum EProblemType.PT_REGRESSION, folds=5, apply_cv_pruning=True, seed=1) +Machine cartree = create_machine("CARTree", nominal=ft, mode=enum EProblemType.PT_REGRESSION, folds=5, apply_cv_pruning=True, seed=1) #![create_machine] #![train_and_apply] -cartree.train(feats_train) +cartree.train(feats_train, labels_train) Labels labels_predict = cartree.apply(feats_test) #![train_and_apply] diff --git a/examples/meta/src/regression/chaid_tree.sg.in b/examples/meta/src/regression/chaid_tree.sg.in index 90c173ecc90..5fd6c37e816 100644 --- a/examples/meta/src/regression/chaid_tree.sg.in +++ b/examples/meta/src/regression/chaid_tree.sg.in @@ -14,11 +14,11 @@ ft[0] = 2 #![set_feature_types] #![create_machine] -Machine chaidtree = create_machine("CHAIDTree", labels=labels_train, dependent_vartype=2, feature_types=ft, num_breakpoints=50) +Machine chaidtree = create_machine("CHAIDTree", dependent_vartype=2, feature_types=ft, num_breakpoints=50) #![create_machine] #![train_and_apply] -chaidtree.train(feats_train) +chaidtree.train(feats_train, labels_train) Labels labels_predict = chaidtree.apply(feats_test) #![train_and_apply] diff --git a/examples/meta/src/regression/least_angle_regression.sg.in b/examples/meta/src/regression/least_angle_regression.sg.in index 235373b013b..e47d70fb7c1 100644 --- a/examples/meta/src/regression/least_angle_regression.sg.in +++ b/examples/meta/src/regression/least_angle_regression.sg.in @@ -22,11 +22,11 @@ Features normalized_features_test = Normalize.transform(pruned_features_test) #![preprocess_features] #![create_instance] -Machine lars = create_machine("LeastAngleRegression", labels=labels_train, lasso=False, max_l1_norm=0.01) +Machine lars = create_machine("LeastAngleRegression", lasso=False, max_l1_norm=0.01) #![create_instance] #![train_and_apply] -lars.train(normalized_features_train) +lars.train(normalized_features_train, labels_train) Labels labels_predict = lars.apply(normalized_features_test) #[!extract_w] diff --git a/examples/meta/src/regression/linear_ridge_regression.sg.in b/examples/meta/src/regression/linear_ridge_regression.sg.in index 5be74a891a0..186366bf4ff 100644 --- a/examples/meta/src/regression/linear_ridge_regression.sg.in +++ b/examples/meta/src/regression/linear_ridge_regression.sg.in @@ -11,11 +11,11 @@ Labels labels_test = create_labels(f_labels_test) #![create_features] #![create_instance] -Machine lrr = create_machine("LinearRidgeRegression", tau=0.001, labels=labels_train) +Machine lrr = create_machine("LinearRidgeRegression", tau=0.001) #![create_instance] #![train_and_apply] -lrr.train(features_train) +lrr.train(features_train, labels_train) Labels labels_predict = lrr.apply(features_test) #![train_and_apply] @@ -25,8 +25,8 @@ RealVector w = lrr.get_real_vector("w") #[!extract_w] #[!manual_bias] -Machine lrr2 = create_machine("LinearRidgeRegression", tau=0.001, labels=labels_train, use_bias=False) -lrr2.train(features_train) +Machine lrr2 = create_machine("LinearRidgeRegression", tau=0.001, use_bias=False) +lrr2.train(features_train, labels_train) real my_bias = 0.1 lrr2.put("bias", my_bias) Labels labels_predict2 = lrr2.apply(features_test) diff --git a/examples/meta/src/regression/random_forest_regression.sg.in b/examples/meta/src/regression/random_forest_regression.sg.in index 346d73a0119..1828ebb69aa 100644 --- a/examples/meta/src/regression/random_forest_regression.sg.in +++ b/examples/meta/src/regression/random_forest_regression.sg.in @@ -15,12 +15,12 @@ CombinationRule mean_rule = create_combination_rule("MeanRule") #![create_combination_rule] #![create_instance] -Machine rand_forest = create_machine("RandomForest", labels=labels_train, num_bags=5, seed=1, combination_rule=mean_rule) +Machine rand_forest = create_machine("RandomForest", num_bags=5, seed=1, combination_rule=mean_rule) #![create_instance] #![train_and_apply] -rand_forest.train(features_train) -RegressionLabels labels_predict = rand_forest.apply_regression(features_test) +rand_forest.train(features_train, labels_train) +Labels labels_predict = rand_forest.apply_regression(features_test) #![train_and_apply] #![evaluate_error] @@ -32,3 +32,4 @@ real mserror = mse.evaluate(labels_predict, labels_test) # additional integration testing variables RealVector output = labels_predict.get_real_vector("labels") + diff --git a/examples/undocumented/python/classifier_multiclassocas.py b/examples/undocumented/python/classifier_multiclassocas.py index 8febc57ec6a..1746833a475 100644 --- a/examples/undocumented/python/classifier_multiclassocas.py +++ b/examples/undocumented/python/classifier_multiclassocas.py @@ -28,8 +28,8 @@ def classifier_multiclassocas (num_vec=10,num_class=3,distance=15,width=2.1,C=1, labels=sg.create_labels(label_train) - classifier = sg.create_machine("MulticlassOCAS", labels=labels, C=C) - classifier.train(feats_train) + classifier = sg.create_machine("MulticlassOCAS", C=C) + classifier.train(feats_train, labels) out = classifier.apply(feats_test).get("labels") #print label_test diff --git a/examples/undocumented/python/classifier_multilabeloutputliblinear.py b/examples/undocumented/python/classifier_multilabeloutputliblinear.py index 1bd417d8c19..11aae702253 100644 --- a/examples/undocumented/python/classifier_multilabeloutputliblinear.py +++ b/examples/undocumented/python/classifier_multilabeloutputliblinear.py @@ -14,8 +14,8 @@ def classifier_multilabeloutputliblinear (fm_train_real=traindat,fm_test_real=te labels=MulticlassLabels(label_train_multiclass) - classifier = sg.create_machine("MulticlassLibLinear", C=C, labels=labels) - classifier.train(feats_train) + classifier = sg.create_machine("MulticlassLibLinear", C=C) + classifier.train(feats_train, labels) # TODO: figure out the new style API for the below call, disabling for now #label_pred = classifier.apply_multilabel_output(feats_test,2) diff --git a/examples/undocumented/python/evaluation_clustering_simple.py b/examples/undocumented/python/evaluation_clustering_simple.py index b0ddfcb03a2..64c6cccc9d9 100644 --- a/examples/undocumented/python/evaluation_clustering_simple.py +++ b/examples/undocumented/python/evaluation_clustering_simple.py @@ -25,8 +25,8 @@ def assign_labels(data, centroids, ncenters): fea_centroids = sg.create_features(centroids) distance = sg.create_distance('EuclideanDistance') distance.init(fea_centroids, fea_centroids) - knn = sg.create_machine("KNN", k=1, distance=distance, labels=labels) - knn.train() + knn = sg.create_machine("KNN", k=1, distance=distance) + knn.train(fea_centroids, labels) return knn.apply(data) def evaluation_clustering_simple (n_data=100, sqrt_num_blobs=4, distance=5): diff --git a/examples/undocumented/python/kernel_histogram_word_string.py b/examples/undocumented/python/kernel_histogram_word_string.py index bb66029f25d..902b9fc212f 100644 --- a/examples/undocumented/python/kernel_histogram_word_string.py +++ b/examples/undocumented/python/kernel_histogram_word_string.py @@ -17,8 +17,8 @@ def kernel_histogram_word_string (fm_train_dna=traindat,fm_test_dna=testdat,labe feats_test=sg.create_string_features(charfeat, order-1, order, 0, False) labels=sg.create_labels(label_train_dna) - pie=sg.create_machine("PluginEstimate", pos_pseudo=ppseudo_count, neg_pseudo=npseudo_count, labels=labels) - pie.train(feats_train) + pie=sg.create_machine("PluginEstimate", pos_pseudo=ppseudo_count, neg_pseudo=npseudo_count) + pie.train(feats_train, labels) kernel=sg.create_kernel("HistogramWordStringKernel", estimate=pie) kernel.init(feats_train, feats_train) diff --git a/examples/undocumented/python/kernel_salzberg_word_string.py b/examples/undocumented/python/kernel_salzberg_word_string.py index ff5c16037b0..60c8b5e23ba 100644 --- a/examples/undocumented/python/kernel_salzberg_word_string.py +++ b/examples/undocumented/python/kernel_salzberg_word_string.py @@ -17,8 +17,8 @@ def kernel_salzberg_word_string (fm_train_dna=traindat,fm_test_dna=testdat,label feats_test=sg.create_string_features(charfeat, order-1, order, gap, reverse) labels=sg.create_labels(label_train_dna) - pie=sg.create_machine("PluginEstimate", labels=labels) - pie.train(feats_train) + pie=sg.create_machine("PluginEstimate") + pie.train(feats_train, labels) kernel=sg.create_kernel("SalzbergWordStringKernel", plugin_estimate=pie, labels=labels) kernel.init(feats_train, feats_train) diff --git a/examples/undocumented/python/mkl_multiclass.py b/examples/undocumented/python/mkl_multiclass.py index a3d583fd41e..5990c245eb6 100644 --- a/examples/undocumented/python/mkl_multiclass.py +++ b/examples/undocumented/python/mkl_multiclass.py @@ -44,14 +44,14 @@ def mkl_multiclass (fm_train_real, fm_test_real, label_train_multiclass, labels = MulticlassLabels(label_train_multiclass) - mkl = sg.create_machine("MKLMulticlass", C=C, kernel=kernel, labels=labels, + mkl = sg.create_machine("MKLMulticlass", C=C, kernel=kernel, mkl_eps=mkl_epsilon, mkl_norm=mkl_norm) mkl.get("machine").put("epsilon", epsilon) mkl.get_global_parallel().set_num_threads(num_threads) - mkl.train() + mkl.train(feats_train, labels) kernel.init(feats_train, feats_test) diff --git a/examples/undocumented/python/multiclass_c45classifiertree.py b/examples/undocumented/python/multiclass_c45classifiertree.py index d1f2d860a99..07c89904ec1 100644 --- a/examples/undocumented/python/multiclass_c45classifiertree.py +++ b/examples/undocumented/python/multiclass_c45classifiertree.py @@ -34,9 +34,8 @@ def multiclass_c45classifiertree(train=traindat,test=testdat,labels=label_traind feats_train.add_subset(trsubset) c=C45ClassifierTree() - c.set_labels(train_labels) c.set_feature_types(ft) - c.train(feats_train) + c.train(feats_train, train_labels) train_labels.remove_subset() feats_train.remove_subset() diff --git a/examples/undocumented/python/multiclass_id3classifiertree.py b/examples/undocumented/python/multiclass_id3classifiertree.py index 6b1effe229b..b0ce96ca763 100644 --- a/examples/undocumented/python/multiclass_id3classifiertree.py +++ b/examples/undocumented/python/multiclass_id3classifiertree.py @@ -30,8 +30,7 @@ def multiclass_id3classifiertree(train=train_data,labels=train_labels,test=test_ # ID3 Tree formation id3=ID3ClassifierTree() - id3.set_labels(feats_labels) - id3.train(feats_train) + id3.train(feats_train, feats_labels) # Classify test data output=id3.apply_multiclass(feats_test).get_labels() diff --git a/examples/undocumented/python/stochasticgbmachine.py b/examples/undocumented/python/stochasticgbmachine.py index 04b2609b1fd..993d4274e12 100644 --- a/examples/undocumented/python/stochasticgbmachine.py +++ b/examples/undocumented/python/stochasticgbmachine.py @@ -28,8 +28,7 @@ def stochasticgbmachine(train=traindat,train_labels=label_traindat,ft=feat_types # train feats.add_subset(np.int32(p[0:int(num)])) labels.add_subset(np.int32(p[0:int(num)])) - s.set_labels(labels) - s.train(feats) + s.train(feats, labels) feats.remove_subset() labels.remove_subset() diff --git a/examples/undocumented/python/structure_discrete_hmsvm_bmrm.py b/examples/undocumented/python/structure_discrete_hmsvm_bmrm.py index 7907fb9f3bf..28a644d1bf5 100644 --- a/examples/undocumented/python/structure_discrete_hmsvm_bmrm.py +++ b/examples/undocumented/python/structure_discrete_hmsvm_bmrm.py @@ -29,8 +29,8 @@ def structure_discrete_hmsvm_bmrm (m_data_dict=data_dict): model = sg.create_structured_model("HMSVMModel", features=features, labels=labels, state_model_type="SMT_TWO_STATE", num_obs=num_obs) - sosvm = sg.create_machine("DualLibQPBMSOSVM", model=model, labels=labels, m_lambda=5000.0) - sosvm.train() + sosvm = sg.create_machine("DualLibQPBMSOSVM", model=model, m_lambda=5000.0) + sosvm.train(features, labels) #print sosvm.get_w() predicted = sosvm.apply(features) diff --git a/examples/undocumented/python/structure_factor_graph_model.py b/examples/undocumented/python/structure_factor_graph_model.py index e666ffdb85a..56c32f415f5 100644 --- a/examples/undocumented/python/structure_factor_graph_model.py +++ b/examples/undocumented/python/structure_factor_graph_model.py @@ -112,9 +112,9 @@ def structure_factor_graph_model(tr_samples = samples, tr_labels = labels, w = w model.add("factor_types", ftype[2]) # --- training with BMRM --- - bmrm = sg.create_machine("DualLibQPBMSOSVM", model=model, labels=tr_labels, m_lambda=0.01) + bmrm = sg.create_machine("DualLibQPBMSOSVM", model=model, m_lambda=0.01) #bmrm.set_verbose(True) - bmrm.train() + bmrm.train(tr_samples, tr_labels) #print 'learned weights:' #print bmrm.get_w() #print 'ground truth weights:' @@ -142,9 +142,9 @@ def structure_factor_graph_model(tr_samples = samples, tr_labels = labels, w = w #print hbm.get_train_errors() # --- training with SGD --- - sgd = sg.create_machine("StochasticSOSVM", model=model, labels=tr_labels, m_lambda=0.01) + sgd = sg.create_machine("StochasticSOSVM", model=model, m_lambda=0.01) #sgd.set_verbose(True) - sgd.train() + sgd.train(tr_samples, tr_labels) # evaluation #print('SGD: Average training error is %.4f' % SOSVMHelper.average_loss(sgd.get_w(), model)) @@ -154,9 +154,9 @@ def structure_factor_graph_model(tr_samples = samples, tr_labels = labels, w = w #print hp.get_train_errors() # --- training with FW --- - fw = sg.create_machine("FWSOSVM", model=model, labels=tr_labels, m_lambda=0.01, + fw = sg.create_machine("FWSOSVM", model=model, m_lambda=0.01, gap_threshold=0.01) - fw.train() + fw.train(tr_samples, tr_labels) # evaluation #print('FW: Average training error is %.4f' % SOSVMHelper.average_loss(fw.get_w(), model)) diff --git a/examples/undocumented/python/structure_graphcuts.py b/examples/undocumented/python/structure_graphcuts.py index 2da38de1a1b..fd6adec03e9 100644 --- a/examples/undocumented/python/structure_graphcuts.py +++ b/examples/undocumented/python/structure_graphcuts.py @@ -180,12 +180,12 @@ def graphcuts_sosvm(num_train_samples = 10, len_label = 5, len_feat = 20, num_te # the 3rd parameter is do_weighted_averaging, by turning this on, # a possibly faster convergence rate may be achieved. # the 4th parameter controls outputs of verbose training information - sgd = sg.create_machine("StochasticSOSVM", model=model, labels=labels_fg, do_weighted_averaging=True, + sgd = sg.create_machine("StochasticSOSVM", model=model, do_weighted_averaging=True, num_iter=150, m_lambda=0.0001) # train t0 = time.time() - sgd.train() + sgd.train(feats_fg, labels_fg) t1 = time.time() w_sgd = sgd.get("w") #print "SGD took", t1 - t0, "seconds." diff --git a/examples/undocumented/python/structure_hierarchical_multilabel_classification.py b/examples/undocumented/python/structure_hierarchical_multilabel_classification.py index a675a6b6ded..3cb94dc377b 100644 --- a/examples/undocumented/python/structure_hierarchical_multilabel_classification.py +++ b/examples/undocumented/python/structure_hierarchical_multilabel_classification.py @@ -110,7 +110,7 @@ def structure_hierarchical_multilabel_classification(train_file_name, features=train_features, labels=train_labels, taxonomy=train_taxonomy) - sgd = sg.create_machine("StochasticSOSVM", model=model, labels=train_labels) + sgd = sg.create_machine("StochasticSOSVM", model=model) # t1 = time.time() # sgd.train() # print('>>> Took %f time for training' % (time.time() - t1)) diff --git a/examples/undocumented/python/transfer_multitask_clustered_logistic_regression.py b/examples/undocumented/python/transfer_multitask_clustered_logistic_regression.py index b202cd0782b..a84a57534d1 100644 --- a/examples/undocumented/python/transfer_multitask_clustered_logistic_regression.py +++ b/examples/undocumented/python/transfer_multitask_clustered_logistic_regression.py @@ -30,14 +30,14 @@ def transfer_multitask_clustered_logistic_regression (fm_train=traindat,fm_test= task_group.append_task(task_two) task_group.append_task(task_three) - mtlr = sg.MultitaskClusteredLogisticRegression(1.0,100.0,features,labels,task_group,2) + mtlr = sg.MultitaskClusteredLogisticRegression(1.0,100.0,task_group,2) #mtlr.io.set_loglevel(MSG_DEBUG) mtlr.set_tolerance(1e-3) # use 1e-2 tolerance mtlr.set_max_iter(100) - mtlr.train() + mtlr.train(features,labels) mtlr.set_current_task(0) #print mtlr.get_w() - out = mtlr.apply_regression().get("labels") + out = mtlr.apply_regression(features).get("labels") return out diff --git a/examples/undocumented/python/transfer_multitask_l12_logistic_regression.py b/examples/undocumented/python/transfer_multitask_l12_logistic_regression.py index 364721911be..bc1f3c774f0 100644 --- a/examples/undocumented/python/transfer_multitask_l12_logistic_regression.py +++ b/examples/undocumented/python/transfer_multitask_l12_logistic_regression.py @@ -29,12 +29,12 @@ def transfer_multitask_l12_logistic_regression (fm_train=traindat,fm_test=testda task_group.append_task(task_one) task_group.append_task(task_two) - mtlr = MultitaskL12LogisticRegression(0.1,0.1,features,labels,task_group) + mtlr = MultitaskL12LogisticRegression(0.1,0.1,task_group) mtlr.set_tolerance(1e-2) # use 1e-2 tolerance mtlr.set_max_iter(10) - mtlr.train() + mtlr.train(features,labels) mtlr.set_current_task(0) - out = mtlr.apply_regression().get_labels() + out = mtlr.apply_regression(features).get_labels() return out diff --git a/examples/undocumented/python/transfer_multitask_leastsquares_regression.py b/examples/undocumented/python/transfer_multitask_leastsquares_regression.py index ac8518f4b2f..b143eda13c8 100644 --- a/examples/undocumented/python/transfer_multitask_leastsquares_regression.py +++ b/examples/undocumented/python/transfer_multitask_leastsquares_regression.py @@ -29,12 +29,12 @@ def transfer_multitask_leastsquares_regression (fm_train=traindat,fm_test=testda task_group.append_task(task_one) task_group.append_task(task_two) - mtlsr = MultitaskLeastSquaresRegression(0.1,features,labels,task_group) + mtlsr = MultitaskLeastSquaresRegression(0.1,task_group) mtlsr.set_regularization(1) # use regularization ratio mtlsr.set_tolerance(1e-2) # use 1e-2 tolerance - mtlsr.train() + mtlsr.train(features,labels) mtlsr.set_current_task(0) - out = mtlsr.apply_regression().get_labels() + out = mtlsr.apply_regression(features).get_labels() return out if __name__=='__main__': diff --git a/examples/undocumented/python/transfer_multitask_logistic_regression.py b/examples/undocumented/python/transfer_multitask_logistic_regression.py index 24991dffbe8..b204f20ffb6 100644 --- a/examples/undocumented/python/transfer_multitask_logistic_regression.py +++ b/examples/undocumented/python/transfer_multitask_logistic_regression.py @@ -29,12 +29,12 @@ def transfer_multitask_logistic_regression (fm_train=traindat,fm_test=testdat,la task_group.append_task(task_one) task_group.append_task(task_two) - mtlr = MultitaskLogisticRegression(0.1,features,labels,task_group) + mtlr = MultitaskLogisticRegression(0.1,task_group) mtlr.set_regularization(1) # use regularization ratio mtlr.set_tolerance(1e-2) # use 1e-2 tolerance - mtlr.train() + mtlr.train(features,labels) mtlr.set_current_task(0) - out = mtlr.apply().get("labels") + out = mtlr.apply(features).get("labels") return out diff --git a/examples/undocumented/python/transfer_multitask_trace_logistic_regression.py b/examples/undocumented/python/transfer_multitask_trace_logistic_regression.py index 5f8a1c99fdb..00a296ff137 100644 --- a/examples/undocumented/python/transfer_multitask_trace_logistic_regression.py +++ b/examples/undocumented/python/transfer_multitask_trace_logistic_regression.py @@ -29,12 +29,12 @@ def transfer_multitask_trace_logistic_regression (fm_train=traindat,fm_test=test task_group.append_task(task_one) task_group.append_task(task_two) - mtlr = MultitaskTraceLogisticRegression(0.1,features,labels,task_group) + mtlr = MultitaskTraceLogisticRegression(0.1,task_group) mtlr.set_tolerance(1e-2) # use 1e-2 tolerance mtlr.set_max_iter(10) - mtlr.train() + mtlr.train(features,labels) mtlr.set_current_task(0) - out = mtlr.apply_regression().get_labels() + out = mtlr.apply_regression(features).get_labels() return out diff --git a/src/gpl b/src/gpl index 8e361a17b48..e2c1db008aa 160000 --- a/src/gpl +++ b/src/gpl @@ -1 +1 @@ -Subproject commit 8e361a17b48c17ebd6a0255a4f53a19fd84ea27f +Subproject commit e2c1db008aa05266f97e7f5f4e1ceb38003b6d13 diff --git a/src/interfaces/swig/Classifier.i b/src/interfaces/swig/Classifier.i index 9da45046125..48002eae9c6 100644 --- a/src/interfaces/swig/Classifier.i +++ b/src/interfaces/swig/Classifier.i @@ -30,6 +30,9 @@ /* Include Class Headers to make them visible from within the target language */ %include +%include +%include +%include %include %include %include diff --git a/src/interfaces/swig/Clustering.i b/src/interfaces/swig/Clustering.i index 8029f2d1beb..42634009a20 100644 --- a/src/interfaces/swig/Clustering.i +++ b/src/interfaces/swig/Clustering.i @@ -17,6 +17,8 @@ SHARED_RANDOM_INTERFACE(shogun::DistanceMachine) %shared_ptr(shogun::GMM) /* Include Class Headers to make them visible from within the target language */ +%include +%include %include RANDOM_INTERFACE(DistanceMachine) %include diff --git a/src/interfaces/swig/GaussianProcess.i b/src/interfaces/swig/GaussianProcess.i index f31adfb4416..fb4af0bd84f 100644 --- a/src/interfaces/swig/GaussianProcess.i +++ b/src/interfaces/swig/GaussianProcess.i @@ -12,10 +12,10 @@ SHARED_RANDOM_INTERFACE(shogun::Inference) SHARED_RANDOM_INTERFACE(shogun::LikelihoodModel) %shared_ptr(shogun::GaussianProcess) SHARED_RANDOM_INTERFACE(shogun::GaussianProcess) - +SHARED_RANDOM_INTERFACE(shogun::NonParametricMachine) /* These functions return new Objects */ -RANDOM_INTERFACE(Machine) +RANDOM_INTERFACE(NonParametricMachine) /* Include Class Headers to make them visible from within the target language */ %include diff --git a/src/interfaces/swig/Machine.i b/src/interfaces/swig/Machine.i index c676dcd49a8..5aeb1546984 100644 --- a/src/interfaces/swig/Machine.i +++ b/src/interfaces/swig/Machine.i @@ -9,3 +9,4 @@ SHARED_RANDOM_INTERFACE(shogun::Machine) %shared_ptr(shogun::LinearMachine) %shared_ptr(shogun::DistanceMachine) %shared_ptr(shogun::IterativeMachine) +%shared_ptr(shogun::NonParametricMachine) diff --git a/src/shogun/classifier/AveragedPerceptron.cpp b/src/shogun/classifier/AveragedPerceptron.cpp index b2fcc9bc577..ae13718c544 100644 --- a/src/shogun/classifier/AveragedPerceptron.cpp +++ b/src/shogun/classifier/AveragedPerceptron.cpp @@ -40,22 +40,9 @@ void AveragedPerceptron::init() ParameterProperties::MODEL) } -void AveragedPerceptron::init_model(const std::shared_ptr data) +void AveragedPerceptron::init_model(const std::shared_ptr& features) { - ASSERT(m_labels) - if (data) - { - if (!data->has_property(FP_DOT)) - error("Specified features are not of type CDotFeatures"); - set_features(std::static_pointer_cast(data)); - } - ASSERT(features) - - SGVector train_labels = binary_labels(m_labels)->get_int_labels(); int32_t num_feat = features->get_dim_feature_space(); - int32_t num_vec = features->get_num_vectors(); - ASSERT(num_vec == train_labels.vlen) - SGVector w(num_feat); cached_w = SGVector(num_feat); // start with uniform w, bias=0, tmp_bias=0 @@ -66,13 +53,13 @@ void AveragedPerceptron::init_model(const std::shared_ptr data) set_w(w); } -void AveragedPerceptron::iteration() +void AveragedPerceptron::iteration( + const std::shared_ptr& features, const std::shared_ptr& labs) { bool converged = true; SGVector w = get_w(); - auto labels = binary_labels(m_labels)->get_int_labels(); - + auto labels = binary_labels(labs)->get_int_labels(); int32_t num_vec = features->get_num_vectors(); // this assumes that m_current_iteration starts at 0 int32_t num_prev_weights = num_vec * m_current_iteration + 1; diff --git a/src/shogun/classifier/AveragedPerceptron.h b/src/shogun/classifier/AveragedPerceptron.h index 08d91866985..8307d9ff601 100644 --- a/src/shogun/classifier/AveragedPerceptron.h +++ b/src/shogun/classifier/AveragedPerceptron.h @@ -69,8 +69,10 @@ namespace shogun /** registers and initializes parameters */ void init(); - void init_model(std::shared_ptr data) override; - void iteration() override; + void init_model(const std::shared_ptr& data) override; + void iteration( + const std::shared_ptr& data, + const std::shared_ptr& labs) override; protected: /** learning rate */ diff --git a/src/shogun/classifier/GaussianProcessClassification.cpp b/src/shogun/classifier/GaussianProcessClassification.cpp index 58dd4d9f7c7..a058e38d4cc 100644 --- a/src/shogun/classifier/GaussianProcessClassification.cpp +++ b/src/shogun/classifier/GaussianProcessClassification.cpp @@ -71,18 +71,6 @@ std::shared_ptr GaussianProcessClassification::apply_multiclas require(m_method->supports_multiclass(), "{} with {} doesn't support " "multi classification\n", m_method->get_name(), lik->get_name()); - // if regression data equals to NULL, then apply classification on training - // features - if (!data) - { - if (m_method->get_inference_type()==INF_SPARSE) - { - not_implemented(SOURCE_LOCATION); - } - else - data=m_method->get_features(); - } - const index_t n=data->get_num_vectors(); SGVector mean=get_mean_vector(data); const index_t C=mean.vlen/n; @@ -110,21 +98,6 @@ std::shared_ptr GaussianProcessClassification::apply_binary( require(m_method->supports_binary(), "{} with {} doesn't support " "binary classification\n", m_method->get_name(), lik->get_name()); - if (!data) - { - if (m_method->get_inference_type()== INF_FITC_LAPLACE_SINGLE) - { -#ifdef USE_GPL_SHOGUN - auto fitc_method = m_method->as(); - data=fitc_method->get_inducing_features(); -#else - gpl_only(SOURCE_LOCATION); -#endif //USE_GPL_SHOGUN - } - else - data=m_method->get_features(); - } - auto result=std::make_shared(get_mean_vector(data)); if (m_compute_variance) result->put("current_values", get_variance_vector(data)); diff --git a/src/shogun/classifier/LDA.cpp b/src/shogun/classifier/LDA.cpp index 2da9f9b940f..9a4c6b65533 100644 --- a/src/shogun/classifier/LDA.cpp +++ b/src/shogun/classifier/LDA.cpp @@ -31,19 +31,6 @@ LDA::LDA(float64_t gamma, ELDAMethod method, bool bdc_svd) m_bdc_svd = bdc_svd; } -LDA::LDA( - float64_t gamma, const std::shared_ptr>& traindat, std::shared_ptr trainlab, - ELDAMethod method, bool bdc_svd) - : DenseRealDispatch(), m_gamma(gamma) -{ - init(); - - features = traindat; - m_labels = std::move(trainlab); - m_method = method; - m_gamma = gamma; - m_bdc_svd = bdc_svd; -} void LDA::init() { @@ -63,12 +50,10 @@ void LDA::init() SG_OPTIONS(AUTO_LDA, SVD_LDA, FLD_LDA)) } -LDA::~LDA() -{ -} - template -bool LDA::train_machine_templated(const std::shared_ptr>& data) +bool LDA::train_machine_templated( + const std::shared_ptr>& data, + const std::shared_ptr& labs) { index_t num_feat = data->get_num_features(); index_t num_vec = data->get_num_vectors(); @@ -76,15 +61,17 @@ bool LDA::train_machine_templated(const std::shared_ptr>& data bool lda_more_efficient = (m_method == AUTO_LDA && num_vec <= num_feat); if (m_method == SVD_LDA || lda_more_efficient) - return solver_svd(data); + return solver_svd(data, labs); else - return solver_classic(data); + return solver_classic(data, labs); } template -bool LDA::solver_svd(std::shared_ptr> data) +bool LDA::solver_svd( + const std::shared_ptr>& data, + const std::shared_ptr& labs) { - auto labels = multiclass_labels(m_labels); + auto labels = multiclass_labels(labs); require( labels->get_num_classes() == 2, "Number of classes ({}) must be 2", labels->get_num_classes()); @@ -118,9 +105,11 @@ bool LDA::solver_svd(std::shared_ptr> data) } template -bool LDA::solver_classic(std::shared_ptr> data) +bool LDA::solver_classic( + const std::shared_ptr>& data, + const std::shared_ptr& labs) { - auto labels = multiclass_labels(m_labels); + auto labels = multiclass_labels(labs); require( labels->get_num_classes() == 2, "Number of classes ({}) must be 2", labels->get_num_classes()); diff --git a/src/shogun/classifier/LDA.h b/src/shogun/classifier/LDA.h index 1618be76cdc..073feb13385 100644 --- a/src/shogun/classifier/LDA.h +++ b/src/shogun/classifier/LDA.h @@ -112,25 +112,6 @@ class LDA : public DenseRealDispatch LDA( float64_t gamma = 0, ELDAMethod method = AUTO_LDA, bool bdc_svd = true); - - /** constructor - * - * @param gamma gamma - * @param traindat training features - * @param trainlab labels for training features - * @param method LDA using Fisher's algorithm or Singular Value - * Decomposition : ::FLD_LDA/::SVD_LDA/::AUTO_LDA[default] - * @param bdc_svd when using SVD solver switch between - * Bidiagonal Divide and Conquer algorithm (BDC-SVD) and - * Jacobi's algorithm, for the differences @see linalg::SVDAlgorithm. - * [default = BDC-SVD] - */ - LDA( - float64_t gamma, const std::shared_ptr>& traindat, - std::shared_ptr trainlab, ELDAMethod method = AUTO_LDA, - bool bdc_svd = true); - ~LDA() override; - /** get classifier type * * @return classifier type LDA @@ -152,9 +133,12 @@ class LDA : public DenseRealDispatch * * @return whether training was successful */ - template ::value>> - bool train_machine_templated(const std::shared_ptr>& data); + template < + typename ST, typename U = typename std::enable_if_t< + std::is_floating_point::value>> + bool train_machine_templated( + const std::shared_ptr>& data, + const std::shared_ptr& labs); /** * Train the machine with the svd-based solver (@see CFisherLDA). @@ -162,7 +146,9 @@ class LDA : public DenseRealDispatch * @param labels labels for training data */ template - bool solver_svd(std::shared_ptr> data); + bool solver_svd( + const std::shared_ptr>& data, + const std::shared_ptr& labs); /** * Train the machine with the classic method based on the cholesky @@ -171,7 +157,9 @@ class LDA : public DenseRealDispatch * @param labels labels for training data */ template - bool solver_classic(std::shared_ptr> data); + bool solver_classic( + const std::shared_ptr>& data, + const std::shared_ptr& labs); protected: diff --git a/src/shogun/classifier/NearestCentroid.cpp b/src/shogun/classifier/NearestCentroid.cpp index 1598df603d7..16ff2913059 100644 --- a/src/shogun/classifier/NearestCentroid.cpp +++ b/src/shogun/classifier/NearestCentroid.cpp @@ -17,43 +17,24 @@ namespace shogun{ NearestCentroid::NearestCentroid() : DistanceMachine() { - init(); } - NearestCentroid::NearestCentroid(const std::shared_ptr& d, const std::shared_ptr& trainlab) : DistanceMachine() + NearestCentroid::NearestCentroid(const std::shared_ptr& d) : DistanceMachine() { - init(); ASSERT(d) - ASSERT(trainlab) set_distance(d); - set_labels(trainlab); } NearestCentroid::~NearestCentroid() { } - void NearestCentroid::init() - { - m_shrinking=0; - m_is_trained=false; - } - - bool NearestCentroid::train_machine(std::shared_ptr data) { - ASSERT(m_labels) - ASSERT(distance) - if (data) - { - if (m_labels->get_num_labels() != data->get_num_vectors()) - error("Number of training vectors does not match number of labels"); - distance->init(data, data); - } - else - { - data = distance->get_lhs(); - } + require(distance, "Distance not set"); + require(m_labels->get_num_labels() == data->get_num_vectors(), + "Number of training vectors does not match number of labels"); + distance->init(data, data); auto multiclass_labels = m_labels->as(); auto dense_data = data->as>(); @@ -83,7 +64,7 @@ namespace shogun{ linalg::scale(centroids, centroids, scale); auto centroids_feats = std::make_shared>(centroids); - + m_centroids = centroids_feats; m_is_trained=true; distance->init(centroids_feats, distance->get_rhs()); diff --git a/src/shogun/classifier/NearestCentroid.h b/src/shogun/classifier/NearestCentroid.h index ecb4e87653d..14fd4af4489 100644 --- a/src/shogun/classifier/NearestCentroid.h +++ b/src/shogun/classifier/NearestCentroid.h @@ -45,7 +45,7 @@ class NearestCentroid : public DistanceMachine{ * @param distance distance * @param trainlab labels for training */ - NearestCentroid(const std::shared_ptr& distance, const std::shared_ptr& trainlab); + NearestCentroid(const std::shared_ptr& distance); /** Destructor */ @@ -92,26 +92,19 @@ class NearestCentroid : public DistanceMachine{ */ bool train_machine(std::shared_ptr data=NULL) override; - /** Stores feature data of underlying model. - * - * Sets centroids as lhs - */ - -private: - void init(); protected: /// number of classes (i.e. number of values labels can take) int32_t m_num_classes; /// Shrinking parameter - float64_t m_shrinking; + float64_t m_shrinking = 0; /// The centroids of the trained features std::shared_ptr> m_centroids; /// Tells if the classifier has been trained or not - bool m_is_trained; + bool m_is_trained = false; }; } diff --git a/src/shogun/classifier/Perceptron.cpp b/src/shogun/classifier/Perceptron.cpp index 72b0118cfd0..cd8c66340e9 100644 --- a/src/shogun/classifier/Perceptron.cpp +++ b/src/shogun/classifier/Perceptron.cpp @@ -34,15 +34,8 @@ Perceptron::~Perceptron() { } -void Perceptron::init_model(const std::shared_ptr data) +void Perceptron::init_model(const std::shared_ptr& features) { - if (data) - { - if (!data->has_property(FP_DOT)) - error("Specified features are not of type CDotFeatures"); - set_features(std::static_pointer_cast(data)); - } - int32_t num_feat = features->get_dim_feature_space(); SGVector w; @@ -57,13 +50,13 @@ void Perceptron::init_model(const std::shared_ptr data) } } -void Perceptron::iteration() +void Perceptron::iteration( + const std::shared_ptr& features, const std::shared_ptr& labs) { bool converged = true; SGVector w = get_w(); - auto labels = binary_labels(m_labels)->get_int_labels(); - + auto labels = labs->as()->get_int_labels(); for (const auto& [v, true_label] : zip_iterator(DotIterator(features), labels)) { const auto predicted_label = v.dot(w) + bias; diff --git a/src/shogun/classifier/Perceptron.h b/src/shogun/classifier/Perceptron.h index 7c74984d939..4ca077c8855 100644 --- a/src/shogun/classifier/Perceptron.h +++ b/src/shogun/classifier/Perceptron.h @@ -59,8 +59,10 @@ class Perceptron : public IterativeMachine const char* get_name() const override { return "Perceptron"; } protected: - void init_model(std::shared_ptr data) override; - void iteration() override; + void init_model(const std::shared_ptr& data) override; + void iteration( + const std::shared_ptr& data, + const std::shared_ptr& labs) override; protected: /** learning rate */ diff --git a/src/shogun/classifier/PluginEstimate.cpp b/src/shogun/classifier/PluginEstimate.cpp index 0ad4266fbde..22112217dfd 100644 --- a/src/shogun/classifier/PluginEstimate.cpp +++ b/src/shogun/classifier/PluginEstimate.cpp @@ -39,10 +39,8 @@ PluginEstimate::~PluginEstimate() { } -bool PluginEstimate::train_machine(std::shared_ptr data) +bool PluginEstimate::train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) { - ASSERT(m_labels) - ASSERT(m_labels->get_label_type() == LT_BINARY) if (data) { if (data->get_feature_class() != C_STRING || @@ -55,21 +53,19 @@ bool PluginEstimate::train_machine(std::shared_ptr data) } ASSERT(features) - - pos_model=std::make_shared(features); neg_model=std::make_shared(features); int32_t* pos_indizes=SG_MALLOC(int32_t, std::static_pointer_cast>(features)->get_num_vectors()); int32_t* neg_indizes=SG_MALLOC(int32_t, std::static_pointer_cast>(features)->get_num_vectors()); - ASSERT(m_labels->get_num_labels() == features->get_num_vectors()) + ASSERT(labs->get_num_labels() == features->get_num_vectors()) int32_t pos_idx = 0; int32_t neg_idx = 0; - auto binary_labels = std::static_pointer_cast(m_labels); - for (int32_t i=0; iget_num_labels(); i++) + auto binary_labels = std::static_pointer_cast(labs); + for (int32_t i=0; iget_num_labels(); i++) { if (binary_labels->get_label(i) > 0) pos_indizes[pos_idx++]=i; diff --git a/src/shogun/classifier/PluginEstimate.h b/src/shogun/classifier/PluginEstimate.h index ee40dbc3240..e9dfbc6224a 100644 --- a/src/shogun/classifier/PluginEstimate.h +++ b/src/shogun/classifier/PluginEstimate.h @@ -49,7 +49,7 @@ class PluginEstimate: public Machine * @param data (test)data to be classified * @return classified labels */ - std::shared_ptr apply_binary(std::shared_ptr data=NULL) override; + std::shared_ptr apply_binary(std::shared_ptr data) override; /** set features * @@ -206,7 +206,7 @@ class PluginEstimate: public Machine * * @return whether training was successful */ - bool train_machine(std::shared_ptr data=NULL) override; + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) override; protected: /** pseudo count for positive class */ diff --git a/src/shogun/classifier/mkl/MKLMulticlass.cpp b/src/shogun/classifier/mkl/MKLMulticlass.cpp index b50876f1459..c47a0e8f93e 100644 --- a/src/shogun/classifier/mkl/MKLMulticlass.cpp +++ b/src/shogun/classifier/mkl/MKLMulticlass.cpp @@ -26,8 +26,8 @@ MKLMulticlass::MKLMulticlass() init(); } -MKLMulticlass::MKLMulticlass(float64_t C, std::shared_ptr k, std::shared_ptr lab) -: MulticlassSVM(std::make_shared(), C, std::move(k), std::move(lab)) +MKLMulticlass::MKLMulticlass(float64_t C, std::shared_ptr k ) +: MulticlassSVM(std::make_shared(), C, std::move(k) ) { svm=NULL; lpw=NULL; @@ -72,9 +72,9 @@ MKLMulticlass MKLMulticlass::operator=( const MKLMulticlass & cm) } -void MKLMulticlass::initsvm() +void MKLMulticlass::initsvm( const std::shared_ptr& labs) { - if (!m_labels) + if (!labs) { error("MKLMulticlass::initsvm(): the set labels is NULL"); } @@ -84,13 +84,11 @@ void MKLMulticlass::initsvm() svm->set_C(get_C()); svm->set_epsilon(get_epsilon()); - if (m_labels->get_num_labels()<=0) + if (labs->get_num_labels()<=0) { error("MKLMulticlass::initsvm(): the number of labels is " "nonpositive, do not know how to handle this!\n"); } - - svm->set_labels(m_labels); } void MKLMulticlass::initlpsolver() @@ -210,8 +208,8 @@ bool MKLMulticlass::evaluatefinishcriterion(const int32_t return false; } -void MKLMulticlass::addingweightsstep( const std::vector & - curweights) +void MKLMulticlass::addingweightsstep( const std::vector & curweights, + const std::shared_ptr& data, const std::shared_ptr& labs) { if (weightshistory.size()>2) @@ -228,12 +226,12 @@ void MKLMulticlass::addingweightsstep( const std::vector & //delete[] weights; //weights=NULL; - initsvm(); + initsvm(labs); svm->set_kernel(m_kernel); - svm->train(); + svm->train(data, labs); - float64_t sumofsignfreealphas=getsumofsignfreealphas(); + float64_t sumofsignfreealphas=getsumofsignfreealphas(labs); curalphaterm=sumofsignfreealphas; int32_t numkernels= @@ -243,23 +241,23 @@ void MKLMulticlass::addingweightsstep( const std::vector & normweightssquared.resize(numkernels); for (int32_t ind=0; ind < numkernels; ++ind ) { - normweightssquared[ind]=getsquarenormofprimalcoefficients( ind ); + normweightssquared[ind]=getsquarenormofprimalcoefficients(ind, labs); } lpw->addconstraint(normweightssquared,sumofsignfreealphas); } -float64_t MKLMulticlass::getsumofsignfreealphas() +float64_t MKLMulticlass::getsumofsignfreealphas( const std::shared_ptr& labs) { - std::vector trainlabels2(m_labels->get_num_labels()); - SGVector lab=(std::static_pointer_cast(m_labels))->get_int_labels(); + std::vector trainlabels2(labs->get_num_labels()); + SGVector lab=(std::static_pointer_cast(labs))->get_int_labels(); std::copy(lab.vector,lab.vector+lab.vlen, trainlabels2.begin()); ASSERT (trainlabels2.size()>0) float64_t sum=0; - for (int32_t nc=0; nc< (std::static_pointer_cast(m_labels))->get_num_classes();++nc) + for (int32_t nc=0; nc< (std::static_pointer_cast(labs))->get_num_classes();++nc) { auto sm=svm->get_svm(nc); @@ -275,7 +273,7 @@ float64_t MKLMulticlass::getsumofsignfreealphas() for (size_t lb=0; lb< trainlabels2.size();++lb) { - for (int32_t nc=0; nc< (std::static_pointer_cast(m_labels))->get_num_classes();++nc) + for (int32_t nc=0; nc< (std::static_pointer_cast(labs))->get_num_classes();++nc) { auto sm=svm->get_svm(nc); @@ -297,13 +295,13 @@ float64_t MKLMulticlass::getsumofsignfreealphas() } float64_t MKLMulticlass::getsquarenormofprimalcoefficients( - const int32_t ind) + const int32_t ind, const std::shared_ptr& labs) { auto ker=std::dynamic_pointer_cast(m_kernel)->get_kernel(ind); float64_t tmp=0; - for (int32_t classindex=0; classindex< (std::static_pointer_cast(m_labels))->get_num_classes(); + for (int32_t classindex=0; classindex< (std::static_pointer_cast(labs))->get_num_classes(); ++classindex) { auto sm=svm->get_svm(classindex); @@ -332,22 +330,22 @@ float64_t MKLMulticlass::getsquarenormofprimalcoefficients( } -bool MKLMulticlass::train_machine(std::shared_ptr data) +bool MKLMulticlass::train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) { ASSERT(m_kernel) - ASSERT(m_labels && m_labels->get_num_labels()) - ASSERT(m_labels->get_label_type() == LT_MULTICLASS) - init_strategy(); + ASSERT(labs && labs->get_num_labels()) + ASSERT(labs->get_label_type() == LT_MULTICLASS) + init_strategy(labs); - int numcl=(std::static_pointer_cast(m_labels))->get_num_classes(); + int numcl=(std::static_pointer_cast(labs))->get_num_classes(); if (data) { - if (m_labels->get_num_labels() != data->get_num_vectors()) + if (labs->get_num_labels() != data->get_num_vectors()) { error("{}::train_machine(): Number of training vectors ({}) does" " not match number of labels ({})\n", get_name(), - data->get_num_vectors(), m_labels->get_num_labels()); + data->get_num_vectors(), labs->get_num_labels()); } m_kernel->init(data, data); } @@ -362,7 +360,7 @@ bool MKLMulticlass::train_machine(std::shared_ptr data) ::std::vector curweights(numkernels,1.0/numkernels); weightshistory.push_back(curweights); - addingweightsstep(curweights); + addingweightsstep(curweights, data, labs); oldalphaterm=curalphaterm; oldnormweightssquared=normweightssquared; @@ -377,7 +375,7 @@ bool MKLMulticlass::train_machine(std::shared_ptr data) lpw->computeweights(curweights); weightshistory.push_back(curweights); - addingweightsstep(curweights); + addingweightsstep(curweights, data, labs); //new weights new biasterm diff --git a/src/shogun/classifier/mkl/MKLMulticlass.h b/src/shogun/classifier/mkl/MKLMulticlass.h index 652c7b553ed..566a6643aee 100644 --- a/src/shogun/classifier/mkl/MKLMulticlass.h +++ b/src/shogun/classifier/mkl/MKLMulticlass.h @@ -42,7 +42,7 @@ class MKLMulticlass : public MulticlassSVM * @param k kernel * @param lab labels */ - MKLMulticlass(float64_t C, std::shared_ptr k, std::shared_ptr lab); + MKLMulticlass(float64_t C, std::shared_ptr k ); /** Class default Destructor */ ~MKLMulticlass() override; @@ -109,7 +109,7 @@ class MKLMulticlass : public MulticlassSVM /** inits the underlying Multiclass SVM * */ - void initsvm(); + void initsvm( const std::shared_ptr& labs); /** checks MKL for convergence @@ -130,13 +130,14 @@ class MKLMulticlass : public MulticlassSVM * and * float64_t getsumofsignfreealphas(); */ - void addingweightsstep( const std::vector & curweights); + void addingweightsstep( const std::vector & curweights, + const std::shared_ptr& data, const std::shared_ptr& labs); /** computes the first svm-dependent part used for generating MKL constraints * it is * \f$ \sum_y b_y^2-\sum_i \sum_{ y | y \neq y_i} \alpha_{iy}(b_{y_i}-b_y-1) \f$ */ - float64_t getsumofsignfreealphas(); + float64_t getsumofsignfreealphas( const std::shared_ptr& labs); /** computes the second svm-dependent part used for generating MKL * constraints @@ -145,7 +146,7 @@ class MKLMulticlass : public MulticlassSVM * to compute \f$ \|w \|^2 \f$ */ float64_t getsquarenormofprimalcoefficients( - const int32_t ind); + const int32_t ind, const std::shared_ptr& labs); /** train Multiclass MKL classifier * @@ -155,7 +156,7 @@ class MKLMulticlass : public MulticlassSVM * * @return whether training was successful */ - bool train_machine(std::shared_ptr data=NULL) override; + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) override; /** @return object name */ const char* get_name() const override { return "MKLMulticlass"; } diff --git a/src/shogun/classifier/svm/LibLinear.cpp b/src/shogun/classifier/svm/LibLinear.cpp index d6793ed17f9..4ba93d24b98 100644 --- a/src/shogun/classifier/svm/LibLinear.cpp +++ b/src/shogun/classifier/svm/LibLinear.cpp @@ -35,16 +35,11 @@ LibLinear::LibLinear(LIBLINEAR_SOLVER_TYPE l) : RandomMixin() set_liblinear_solver_type(l); } -LibLinear::LibLinear(float64_t C, std::shared_ptr traindat, std::shared_ptr trainlab) - : RandomMixin() +LibLinear::LibLinear(float64_t C) : RandomMixin() { init(); set_C(C, C); set_bias_enabled(true); - - set_features(std::move(traindat)); - set_labels(std::move(trainlab)); - init_linear_term(); } void LibLinear::init() @@ -73,22 +68,19 @@ LibLinear::~LibLinear() { } -bool LibLinear::train_machine(std::shared_ptr data) +bool LibLinear::train(const std::shared_ptr& data, const std::shared_ptr& labs) { + m_num_labels = labs->get_num_labels(); + return train_machine(data->as(), labs); +} - ASSERT(m_labels) - init_linear_term(); - - if (data) - { - if (!data->has_property(FP_DOT)) - error("Specified features are not of type CDotFeatures"); +bool LibLinear::train_machine( + const std::shared_ptr& features, const std::shared_ptr& labs) +{ - set_features(std::static_pointer_cast(data)); - } - ASSERT(features) + init_linear_term(labs); - int32_t num_train_labels = m_labels->get_num_labels(); + int32_t num_train_labels = labs->get_num_labels(); int32_t num_feat = features->get_dim_feature_space(); int32_t num_vec = features->get_num_vectors(); @@ -144,7 +136,7 @@ bool LibLinear::train_machine(std::shared_ptr data) double Cp = get_C1(); double Cn = get_C2(); - auto labels = binary_labels(m_labels); + auto labels = binary_labels(labs); for (int32_t i = 0; i < prob.l; i++) { prob.y[i] = labels->get_int_label(i); @@ -1372,19 +1364,9 @@ void LibLinear::solve_l2r_lr_dual( void LibLinear::set_linear_term(const SGVector linear_term) { - if (!m_labels) - error("Please assign labels first!"); - - int32_t num_labels = m_labels->get_num_labels(); - - if (num_labels != linear_term.vlen) - { - error( - "Number of labels ({}) does not match number" + require(m_num_labels == linear_term.vlen, "Number of labels ({}) does not match number" " of entries ({}) in linear term \n", - num_labels, linear_term.vlen); - } - + m_num_labels, linear_term.vlen); m_linear_term = linear_term; } @@ -1396,12 +1378,10 @@ SGVector LibLinear::get_linear_term() return m_linear_term; } -void LibLinear::init_linear_term() +void LibLinear::init_linear_term(const std::shared_ptr& labs) { - if (!m_labels) - error("Please assign labels first!"); - m_linear_term = SGVector(m_labels->get_num_labels()); + m_linear_term = SGVector(labs->get_num_labels()); SGVector::fill_vector( m_linear_term.vector, m_linear_term.vlen, -1.0); } diff --git a/src/shogun/classifier/svm/LibLinear.h b/src/shogun/classifier/svm/LibLinear.h index 12ec174aa65..ecee5992a8a 100644 --- a/src/shogun/classifier/svm/LibLinear.h +++ b/src/shogun/classifier/svm/LibLinear.h @@ -75,10 +75,8 @@ namespace shogun /** constructor (using L2R_L1LOSS_SVC_DUAL as default) * * @param C constant C - * @param traindat training features - * @param trainlab training labels */ - LibLinear(float64_t C, std::shared_ptr traindat, std::shared_ptr trainlab); + LibLinear(float64_t C); /** destructor */ ~LibLinear() override; @@ -199,7 +197,7 @@ namespace shogun SGVector get_linear_term(); /** set the linear term for qp */ - void init_linear_term(); + void init_linear_term(const std::shared_ptr&); /** check if linear_term been inited * @return if linear_term been inited @@ -212,6 +210,8 @@ namespace shogun return true; } + bool train(const std::shared_ptr& data, const std::shared_ptr& labs) override; + protected: /** train linear SVM classifier * @@ -221,7 +221,9 @@ namespace shogun * * @return whether training was successful */ - bool train_machine(std::shared_ptr data = NULL) override; + bool train_machine( + const std::shared_ptr& data, + const std::shared_ptr& labs) override; private: /** set up parameters */ @@ -261,6 +263,8 @@ namespace shogun /** solver type */ LIBLINEAR_SOLVER_TYPE liblinear_solver_type; + + int32_t m_num_labels; }; } /* namespace shogun */ diff --git a/src/shogun/classifier/svm/NewtonSVM.cpp b/src/shogun/classifier/svm/NewtonSVM.cpp index bfb0c65e42a..a90dae08d9e 100644 --- a/src/shogun/classifier/svm/NewtonSVM.cpp +++ b/src/shogun/classifier/svm/NewtonSVM.cpp @@ -30,8 +30,7 @@ NewtonSVM::NewtonSVM() : IterativeMachine() t = 0; } -NewtonSVM::NewtonSVM( - float64_t c, std::shared_ptr traindat, std::shared_ptr trainlab, int32_t itr) +NewtonSVM::NewtonSVM(float64_t c, int32_t itr) : IterativeMachine() { lambda=1/c; @@ -39,8 +38,6 @@ NewtonSVM::NewtonSVM( prec=1e-6; C=c; t = 0; - set_features(std::move(traindat)); - set_labels(std::move(trainlab)); } @@ -48,18 +45,8 @@ NewtonSVM::~NewtonSVM() { } -void NewtonSVM::init_model(const std::shared_ptr data) +void NewtonSVM::init_model(const std::shared_ptr& features) { - if (data) - { - if (!data->has_property(FP_DOT)) - error("Specified features are not of type CDotFeatures"); - set_features(std::static_pointer_cast(data)); - } - - ASSERT(features) - - SGVector train_labels = binary_labels(m_labels)->get_labels(); int32_t num_feat=features->get_dim_feature_space(); int32_t num_vec=features->get_num_vectors(); @@ -67,8 +54,6 @@ void NewtonSVM::init_model(const std::shared_ptr data) x_n=num_vec; x_d=num_feat; - ASSERT(num_vec==train_labels.vlen) - SGVector weights(x_d); set_w(weights); out = SGVector(x_n); @@ -81,9 +66,10 @@ void NewtonSVM::init_model(const std::shared_ptr data) grad.set_const(0.0); } -void NewtonSVM::iteration() +void NewtonSVM::iteration( + const std::shared_ptr& features, const std::shared_ptr& labs) { - obj_fun_linear(); + obj_fun_linear(features, labs); SGVector weights = get_w(); SGVector sgv; @@ -132,7 +118,7 @@ void NewtonSVM::iteration() for (int32_t i = 0; i < x_d + 1; i++) step[i] = -s2[i]; - line_search_linear(step); + line_search_linear(step, features, labs); SGVector tmp_step(step.data(), x_d, false); linalg::add(weights, tmp_step, weights, 1.0, t); @@ -143,9 +129,11 @@ void NewtonSVM::iteration() m_complete = true; } -void NewtonSVM::line_search_linear(const SGVector& d) +void NewtonSVM::line_search_linear( + const SGVector& d, const std::shared_ptr& features, + const std::shared_ptr& labs) { - SGVector Y = binary_labels(m_labels)->get_labels(); + SGVector Y = binary_labels(labs)->get_labels(); SGVector outz(x_n); SGVector temp1(x_n); SGVector temp1forout(x_n); @@ -213,11 +201,11 @@ void NewtonSVM::line_search_linear(const SGVector& d) out = outz.clone(); } -void NewtonSVM::obj_fun_linear() +void NewtonSVM::obj_fun_linear( + const std::shared_ptr& features, const std::shared_ptr& labs) { SGVector weights = get_w(); - SGVector v = binary_labels(m_labels)->get_labels(); - + SGVector v = binary_labels(labs)->get_labels(); for (int32_t i=0; i /** constructor * @param C constant C * @param itr constant no of iterations - * @param traindat training features - * @param trainlab labels for features */ - NewtonSVM(float64_t C, std::shared_ptr traindat, std::shared_ptr trainlab, int32_t itr=20); + NewtonSVM(float64_t C, int32_t itr = 20); ~NewtonSVM() override; @@ -93,13 +91,19 @@ class NewtonSVM : public IterativeMachine const char* get_name() const override { return "NewtonSVM"; } protected: - void init_model(std::shared_ptr data) override; - void iteration() override; + void init_model(const std::shared_ptr& data) override; + virtual void iteration( + const std::shared_ptr& data, + const std::shared_ptr& labs) override; private: - void obj_fun_linear(); + void obj_fun_linear( + const std::shared_ptr& data, + const std::shared_ptr& labs); - void line_search_linear(const SGVector& d); + void line_search_linear( + const SGVector& d, const std::shared_ptr& data, + const std::shared_ptr& labs); protected: /** lambda=1/C */ diff --git a/src/shogun/classifier/svm/SGDQN.cpp b/src/shogun/classifier/svm/SGDQN.cpp index 6be3b020730..912b9f36696 100644 --- a/src/shogun/classifier/svm/SGDQN.cpp +++ b/src/shogun/classifier/svm/SGDQN.cpp @@ -31,16 +31,6 @@ SGDQN::SGDQN(float64_t C) C2=C; } -SGDQN::SGDQN(float64_t C, std::shared_ptr traindat, std::shared_ptr trainlab) -: LinearMachine() -{ - init(); - C1=C; - C2=C; - - set_features(std::move(traindat)); - set_labels(std::move(trainlab)); -} SGDQN::~SGDQN() { @@ -77,27 +67,14 @@ void SGDQN::combine_and_clip(float64_t* Bc,float64_t* B,int32_t dim,float64_t c1 } } } - -bool SGDQN::train(std::shared_ptr data) +bool SGDQN::train_machine( + const std::shared_ptr& features, const std::shared_ptr& labs) { - ASSERT(m_labels) - ASSERT(m_labels->get_label_type() == LT_BINARY) + const auto binary_labels = labs->as(); - if (data) - { - if (!data->has_property(FP_DOT)) - error("Specified features are not of type CDotFeatures"); - set_features(std::static_pointer_cast(data)); - } - - ASSERT(features) - - int32_t num_train_labels=m_labels->get_num_labels(); - int32_t num_vec=features->get_num_vectors(); - - ASSERT(num_vec==num_train_labels) - ASSERT(num_vec>0) + int32_t num_train_labels = binary_labels->get_num_labels(); + int32_t num_vec = features->get_num_vectors(); SGVector w(features->get_dim_feature_space()); w.zero(); @@ -122,7 +99,7 @@ bool SGDQN::train(std::shared_ptr data) float64_t* B=SG_MALLOC(float64_t, w.vlen); //Calibrate - calibrate(); + calibrate(features); io::info("Training on {} vectors", num_vec); @@ -131,7 +108,6 @@ bool SGDQN::train(std::shared_ptr data) if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN)) is_log_loss = true; - auto binary_labels = std::static_pointer_cast(m_labels); for (auto e : SG_PROGRESS(range(epochs))) { COMPUTATION_CONTROLLERS @@ -192,11 +168,8 @@ bool SGDQN::train(std::shared_ptr data) return true; } - - -void SGDQN::calibrate() +void SGDQN::calibrate(const std::shared_ptr& features) { - ASSERT(features) int32_t num_vec=features->get_num_vectors(); int32_t c_dim=features->get_dim_feature_space(); diff --git a/src/shogun/classifier/svm/SGDQN.h b/src/shogun/classifier/svm/SGDQN.h index 904980f999f..7e41fa99ac9 100644 --- a/src/shogun/classifier/svm/SGDQN.h +++ b/src/shogun/classifier/svm/SGDQN.h @@ -35,16 +35,6 @@ class SGDQN : public LinearMachine */ SGDQN(float64_t C); - /** constructor - * - * @param C constant C - * @param traindat training features - * @param trainlab labels for training features - */ - SGDQN( - float64_t C, std::shared_ptr traindat, - std::shared_ptr trainlab); - ~SGDQN() override; /** get classifier type @@ -52,17 +42,6 @@ class SGDQN : public LinearMachine * @return classifier type SVMSGDQN */ EMachineType get_classifier_type() override { return CT_SGDQN; } - - /** train classifier - * - * @param data training data (parameter can be avoided if distance or - * kernel-based classifiers are used and distance/kernels are - * initialized with train data) - * - * @return whether training was successful - */ - bool train(std::shared_ptr data=NULL) override; - /** set C * * @param c_neg new C constant for negatively labeled examples @@ -117,8 +96,12 @@ class SGDQN : public LinearMachine const char* get_name() const override { return "SGDQN"; } protected: + bool train_machine( + const std::shared_ptr&, + const std::shared_ptr&) override; + /** calibrate */ - void calibrate(); + void calibrate(const std::shared_ptr& features); private: void init(); diff --git a/src/shogun/classifier/svm/SVM.cpp b/src/shogun/classifier/svm/SVM.cpp index c8cdb086592..2b1c8f296fd 100644 --- a/src/shogun/classifier/svm/SVM.cpp +++ b/src/shogun/classifier/svm/SVM.cpp @@ -34,6 +34,14 @@ SVM::SVM(float64_t C, std::shared_ptr k, std::shared_ptr lab) set_kernel(std::move(k)); } +SVM::SVM(float64_t C, std::shared_ptr k) +: KernelMachine() +{ + set_defaults(); + set_C(C,C); + set_kernel(std::move(k)); +} + SVM::~SVM() { diff --git a/src/shogun/classifier/svm/SVM.h b/src/shogun/classifier/svm/SVM.h index 648f2b806bf..57720acec01 100644 --- a/src/shogun/classifier/svm/SVM.h +++ b/src/shogun/classifier/svm/SVM.h @@ -64,6 +64,8 @@ class SVM : public KernelMachine */ SVM(float64_t C, std::shared_ptr k, std::shared_ptr lab); + SVM(float64_t C, std::shared_ptr k); + ~SVM() override; /** set default values for members a SVM object diff --git a/src/shogun/classifier/svm/SVMOcas.cpp b/src/shogun/classifier/svm/SVMOcas.cpp index adf241c843d..8715d6477f5 100644 --- a/src/shogun/classifier/svm/SVMOcas.cpp +++ b/src/shogun/classifier/svm/SVMOcas.cpp @@ -33,16 +33,12 @@ SVMOcas::SVMOcas(E_SVM_TYPE type) method=type; } -SVMOcas::SVMOcas( - float64_t C, const std::shared_ptr& traindat, std::shared_ptr trainlab) -: LinearMachine() +SVMOcas::SVMOcas(float64_t C) : LinearMachine() { init(); C1=C; C2=C; - set_features(std::dynamic_pointer_cast(traindat)); - set_labels(std::move(trainlab)); } @@ -50,24 +46,16 @@ SVMOcas::~SVMOcas() { } -bool SVMOcas::train_machine(std::shared_ptr data) +bool SVMOcas::train_machine( + const std::shared_ptr& features, const std::shared_ptr& labs) { io::info("C={}, epsilon={}, bufsize={}", get_C1(), get_epsilon(), bufsize); SG_DEBUG("use_bias = {}", get_bias_enabled()) - ASSERT(m_labels) - ASSERT(m_labels->get_label_type() == LT_BINARY) - if (data) - { - if (!data->has_property(FP_DOT)) - error("Specified features are not of type CDotFeatures"); - set_features(std::static_pointer_cast(data)); - } - ASSERT(features) - + m_features = features; int32_t num_vec=features->get_num_vectors(); lab = SGVector(num_vec); - auto labels = binary_labels(m_labels); + auto labels = binary_labels(labs); for (int32_t i=0; iget_label(i); @@ -185,7 +173,7 @@ int SVMOcas::add_new_cut( uint32_t nSel, void* ptr) { auto o = (SVMOcas*)ptr; - auto f = o->features; + auto f = o->m_features; uint32_t nDim=(uint32_t) o->current_w.vlen; float64_t* y = o->lab.vector; @@ -270,7 +258,7 @@ int SVMOcas::sort(float64_t* vals, float64_t* data, uint32_t size) int SVMOcas::compute_output(float64_t *output, void* ptr) { auto o = (SVMOcas*)ptr; - auto f=o->features; + auto f = o->m_features; int32_t nData=f->get_num_vectors(); float64_t* y = o->lab.vector; diff --git a/src/shogun/classifier/svm/SVMOcas.h b/src/shogun/classifier/svm/SVMOcas.h index 8644350c8a2..8975bf9dc7a 100644 --- a/src/shogun/classifier/svm/SVMOcas.h +++ b/src/shogun/classifier/svm/SVMOcas.h @@ -47,12 +47,8 @@ class SVMOcas : public LinearMachine /** constructor * * @param C constant C - * @param traindat training features - * @param trainlab labels for training features */ - SVMOcas( - float64_t C, const std::shared_ptr& traindat, - std::shared_ptr trainlab); + SVMOcas(float64_t C); ~SVMOcas() override; /** get classifier type @@ -187,7 +183,9 @@ class SVMOcas : public LinearMachine * * @return whether training was successful */ - bool train_machine(std::shared_ptr data=NULL) override; + bool train_machine( + const std::shared_ptr& data, + const std::shared_ptr& labs) override; private: void init(); @@ -229,6 +227,8 @@ class SVMOcas : public LinearMachine /** primal objective */ float64_t primal_objective; + + std::shared_ptr m_features; }; } #endif diff --git a/src/shogun/classifier/svm/WDSVMOcas.cpp b/src/shogun/classifier/svm/WDSVMOcas.cpp index c7dc47b652c..aa2ea22d8aa 100644 --- a/src/shogun/classifier/svm/WDSVMOcas.cpp +++ b/src/shogun/classifier/svm/WDSVMOcas.cpp @@ -76,8 +76,7 @@ WDSVMOcas::WDSVMOcas(E_SVM_TYPE type) } WDSVMOcas::WDSVMOcas( - float64_t C, int32_t d, int32_t from_d, std::shared_ptr> traindat, - std::shared_ptr trainlab) + float64_t C, int32_t d, int32_t from_d, std::shared_ptr> traindat) : Machine(), use_bias(false), bufsize(3000), C1(C), C2(C), epsilon(1e-3), degree(d), from_degree(from_d) { @@ -85,7 +84,6 @@ WDSVMOcas::WDSVMOcas( old_w=NULL; method=SVM_OCAS; features=std::move(traindat); - set_labels(std::move(trainlab)); wd_weights=NULL; w_offsets=NULL; normalization_const=1.0; @@ -158,29 +156,24 @@ int32_t WDSVMOcas::set_wd_weights() return w_dim_single_c; } -bool WDSVMOcas::train_machine(std::shared_ptr data) +bool WDSVMOcas::train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) { io::info("C={}, epsilon={}, bufsize={}", get_C1(), get_epsilon(), bufsize); - ASSERT(m_labels) - ASSERT(m_labels->get_label_type() == LT_BINARY) - if (data) - { - if (data->get_feature_class() != C_STRING || + if (data->get_feature_class() != C_STRING || data->get_feature_type() != F_BYTE) - { - error("Features not of class string type byte"); - } - set_features(std::static_pointer_cast>(data)); + { + error("Features not of class string type byte"); } ASSERT(get_features()) - auto alphabet=get_features()->get_alphabet(); + features = data->as>(); + auto alphabet=features->get_alphabet(); ASSERT(alphabet && alphabet->get_alphabet()==RAWDNA) alphabet_size=alphabet->get_num_symbols(); string_length=features->get_num_vectors(); - SGVector labvec=(std::static_pointer_cast(m_labels))->get_labels(); + SGVector labvec=(std::static_pointer_cast(labs))->get_labels(); lab=labvec.vector; w_dim_single_char=set_wd_weights(); @@ -188,7 +181,7 @@ bool WDSVMOcas::train_machine(std::shared_ptr data) SG_DEBUG("w_dim_single_char={}", w_dim_single_char) w_dim=string_length*w_dim_single_char; SG_DEBUG("cutting plane has {} dims", w_dim) - num_vec=get_features()->get_max_vector_length(); + num_vec=features->get_max_vector_length(); set_normalization_const(); io::info("num_vec: {} num_lab: {}", num_vec, labvec.vlen); diff --git a/src/shogun/classifier/svm/WDSVMOcas.h b/src/shogun/classifier/svm/WDSVMOcas.h index 8220b279116..dd798f613cd 100644 --- a/src/shogun/classifier/svm/WDSVMOcas.h +++ b/src/shogun/classifier/svm/WDSVMOcas.h @@ -46,7 +46,7 @@ class WDSVMOcas : public Machine */ WDSVMOcas( float64_t C, int32_t d, int32_t from_d, - std::shared_ptr> traindat, std::shared_ptr trainlab); + std::shared_ptr> traindat); ~WDSVMOcas() override; /** get classifier type @@ -311,7 +311,7 @@ class WDSVMOcas : public Machine * * @return whether training was successful */ - bool train_machine(std::shared_ptr data=NULL) override; + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) override; protected: /** features */ diff --git a/src/shogun/clustering/GMM.cpp b/src/shogun/clustering/GMM.cpp index af2eddc78a4..66c96ae2c57 100644 --- a/src/shogun/clustering/GMM.cpp +++ b/src/shogun/clustering/GMM.cpp @@ -774,8 +774,8 @@ SGMatrix GMM::alpha_init(SGMatrix init_means) SGVector label_num(init_means.num_cols); linalg::range_fill(label_num); - auto knn=std::make_shared(1, std::make_shared(), std::make_shared(label_num)); - knn->train(std::make_shared>(init_means)); + auto knn=std::make_shared(1, std::make_shared()); + knn->train(std::make_shared>(init_means), std::make_shared(label_num)); auto init_labels = knn->apply(features)->as(); SGMatrix alpha(num_vectors, index_t(m_components.size())); diff --git a/src/shogun/clustering/KMeans.cpp b/src/shogun/clustering/KMeans.cpp index f3922077b29..01f06fb4a06 100644 --- a/src/shogun/clustering/KMeans.cpp +++ b/src/shogun/clustering/KMeans.cpp @@ -181,6 +181,7 @@ void KMeans::Lloyd_KMeans(SGMatrix centers, int32_t num_centers) bool KMeans::train_machine(std::shared_ptr data) { + m_features = data; initialize_training(data); Lloyd_KMeans(cluster_centers, k); compute_cluster_variances(); diff --git a/src/shogun/evaluation/CrossValidation.cpp b/src/shogun/evaluation/CrossValidation.cpp index baaaf7621b3..af5e83deee7 100644 --- a/src/shogun/evaluation/CrossValidation.cpp +++ b/src/shogun/evaluation/CrossValidation.cpp @@ -116,8 +116,7 @@ float64_t CrossValidation::evaluate_one_run(int64_t index) const auto evaluation_criterion = make_clone(m_evaluation_criterion); - machine->set_labels(labels_train); - machine->train(features_train); + machine->train(features_train, labels_train); auto result_labels = machine->apply(features_test); diff --git a/src/shogun/latent/LatentSVM.cpp b/src/shogun/latent/LatentSVM.cpp index f6f857ef111..e334b246a0b 100644 --- a/src/shogun/latent/LatentSVM.cpp +++ b/src/shogun/latent/LatentSVM.cpp @@ -59,13 +59,13 @@ std::shared_ptr LatentSVM::apply_latent() float64_t LatentSVM::do_inner_loop(float64_t cooling_eps) { auto ys = m_model->get_labels()->get_labels(); - auto feats = (m_model->get_caching() ? + std::shared_ptr dot_feats = (m_model->get_caching() ? m_model->get_cached_psi_features() : m_model->get_psi_feature_vectors()); - SVMOcas svm(m_C, feats, ys); + const auto feats = std::static_pointer_cast(dot_feats); + SVMOcas svm(m_C); svm.set_epsilon(cooling_eps); - svm.train(); - + svm.train(feats, ys); /* copy the resulting w */ set_w(svm.get_w().clone()); diff --git a/src/shogun/machine/BaggingMachine.cpp b/src/shogun/machine/BaggingMachine.cpp index 632a5150ad6..a41289edfa9 100644 --- a/src/shogun/machine/BaggingMachine.cpp +++ b/src/shogun/machine/BaggingMachine.cpp @@ -24,12 +24,6 @@ BaggingMachine::BaggingMachine() : RandomMixin() register_parameters(); } -BaggingMachine::BaggingMachine(std::shared_ptr features, std::shared_ptr labels) - : BaggingMachine() -{ - set_labels(std::move(labels)); - m_features = std::move(features); -} std::shared_ptr BaggingMachine::apply_binary(std::shared_ptr data) { @@ -48,21 +42,12 @@ std::shared_ptr BaggingMachine::apply_multiclass(std::shared_p { SGMatrix bagged_outputs = apply_outputs_without_combination(data); - - require(m_labels, "Labels not set."); - require( - m_labels->get_label_type() == LT_MULTICLASS, - "Labels ({}) are not compatible with multiclass.", - m_labels->get_name()); - - auto labels_multiclass = std::dynamic_pointer_cast(m_labels); auto num_samples = bagged_outputs.size() / m_num_bags; - auto num_classes = labels_multiclass->get_num_classes(); auto pred = std::make_shared(num_samples); - pred->allocate_confidences_for(num_classes); + pred->allocate_confidences_for(m_num_classes); - SGMatrix class_probabilities(num_classes, num_samples); + SGMatrix class_probabilities(m_num_classes, num_samples); class_probabilities.zero(); for (auto i = 0; i < num_samples; ++i) @@ -125,27 +110,24 @@ BaggingMachine::apply_outputs_without_combination(std::shared_ptr data return output; } -bool BaggingMachine::train_machine(std::shared_ptr data) +bool BaggingMachine::train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) { require(m_machine != NULL, "Machine is not set!"); require(m_num_bags > 0, "Number of bag is not set!"); - - if (data) + m_num_vectors = data->get_num_vectors(); + if(auto multiclass_labs = std::dynamic_pointer_cast(labs)) { - m_features = data; - - ASSERT(m_features->get_num_vectors() == m_labels->get_num_labels()); + m_num_classes = multiclass_labs->get_num_classes(); } - // if bag size is not provided, set it equal to number of training vectors if (m_bag_size == 0) - m_bag_size = m_features->get_num_vectors(); + m_bag_size = data->get_num_vectors(); // clear the array, if previously trained m_bags.clear(); // reset the oob index vector - m_all_oob_idx = SGVector(m_features->get_num_vectors()); + m_all_oob_idx = SGVector(data->get_num_vectors()); m_all_oob_idx.zero(); @@ -160,24 +142,27 @@ bool BaggingMachine::train_machine(std::shared_ptr data) { auto c=std::dynamic_pointer_cast(m_machine->clone()); ASSERT(c != NULL); - SGVector idx( - rnd_indicies.get_column_vector(i), m_bag_size, false); + SGVector idx(rnd_indicies.get_column_vector(i), m_bag_size, false); std::shared_ptr features; std::shared_ptr labels; if (env()->get_num_threads() == 1) { - features = m_features; - labels = m_labels; + features = data; + labels = labs; } else { - features = m_features->shallow_subset_copy(); - labels = m_labels->shallow_subset_copy(); + features = data->shallow_subset_copy(); + labels = labs->shallow_subset_copy(); } - - labels->add_subset(idx); +#pragma omp critical + { + labels->add_subset(idx); + features->add_subset(idx); + } + /* TODO: if it's a binary labeling ensure that there's always samples of both classes @@ -194,12 +179,15 @@ bool BaggingMachine::train_machine(std::shared_ptr data) } } */ - features->add_subset(idx); + set_machine_parameters(c, idx); - c->set_labels(labels); - c->train(features); - features->remove_subset(); - labels->remove_subset(); + c->train(features, labels); +#pragma omp critical + { + features->remove_subset(); + labels->remove_subset(); + } + #pragma omp critical { @@ -214,7 +202,7 @@ bool BaggingMachine::train_machine(std::shared_ptr data) pb.print_progress(); } pb.complete(); - + get_oob_error_lambda = [=](){return get_oob_error_impl(data, labs);}; return true; } @@ -224,7 +212,6 @@ void BaggingMachine::set_machine_parameters(std::shared_ptr m, SGVector void BaggingMachine::register_parameters() { - SG_ADD(&m_features, kFeatures, "Train features for bagging"); SG_ADD( &m_num_bags, kNBags, "Number of bags", ParameterProperties::HYPER); SG_ADD( @@ -275,9 +262,7 @@ void BaggingMachine::set_machine(std::shared_ptr machine) void BaggingMachine::init() { m_machine = nullptr; - m_features = nullptr; m_combination_rule = nullptr; - m_labels = nullptr; m_num_bags = 0; m_bag_size = 0; m_all_oob_idx = SGVector(); @@ -294,7 +279,7 @@ std::shared_ptr BaggingMachine::get_combination_rule() const return m_combination_rule; } -float64_t BaggingMachine::get_oob_error() const +float64_t BaggingMachine::get_oob_error_impl(const std::shared_ptr& data, const std::shared_ptr& labs) const { require( m_oob_evaluation_metric, "Out of bag evaluation metric is not set!"); @@ -302,8 +287,8 @@ float64_t BaggingMachine::get_oob_error() const require(m_bags.size() > 0, "BaggingMachine is not trained!"); SGMatrix output( - m_features->get_num_vectors(), m_bags.size()); - if (m_labels->get_label_type() == LT_REGRESSION) + m_num_vectors, m_bags.size()); + if (labs->get_label_type() == LT_REGRESSION) output.zero(); else output.set_const(NAN); @@ -318,9 +303,9 @@ float64_t BaggingMachine::get_oob_error() const auto current_oob = m_oob_indices[i]; SGVector oob(current_oob.data(), current_oob.size(), false); - m_features->add_subset(oob); + data->add_subset(oob); - auto l = m->apply(m_features); + auto l = m->apply(data); SGVector lv; if (l!=NULL) lv = std::dynamic_pointer_cast(l)->get_labels(); @@ -331,14 +316,14 @@ float64_t BaggingMachine::get_oob_error() const for (index_t j = 0; j < oob.vlen; j++) output(oob[j], i) = lv[j]; - m_features->remove_subset(); + data->remove_subset(); } std::vector idx; - for (index_t i = 0; i < m_features->get_num_vectors(); i++) + for (index_t i = 0; i < data->get_num_vectors(); i++) { if (m_all_oob_idx[i]) idx.push_back(i); @@ -350,7 +335,7 @@ float64_t BaggingMachine::get_oob_error() const lab[i] = combined[idx[i]]; std::shared_ptr predicted = NULL; - switch (m_labels->get_label_type()) + switch (labs->get_label_type()) { case LT_BINARY: predicted = std::make_shared(lab); @@ -369,16 +354,16 @@ float64_t BaggingMachine::get_oob_error() const } - m_labels->add_subset(SGVector(idx.data(), idx.size(), false)); - float64_t res = m_oob_evaluation_metric->evaluate(predicted, m_labels); - m_labels->remove_subset(); + labs->add_subset(SGVector(idx.data(), idx.size(), false)); + float64_t res = m_oob_evaluation_metric->evaluate(predicted, labs); + labs->remove_subset(); return res; } std::vector BaggingMachine::get_oob_indices(const SGVector& in_bag) { - SGVector out_of_bag(m_features->get_num_vectors()); + SGVector out_of_bag(m_num_vectors); out_of_bag.set_const(true); // mark the ones that are in_bag diff --git a/src/shogun/machine/BaggingMachine.h b/src/shogun/machine/BaggingMachine.h index a08ff0fb1f2..a4693ce891d 100644 --- a/src/shogun/machine/BaggingMachine.h +++ b/src/shogun/machine/BaggingMachine.h @@ -30,19 +30,11 @@ namespace shogun /** default ctor */ BaggingMachine(); - /** - * constructor - * - * @param features training features - * @param labels training labels - */ - BaggingMachine(std::shared_ptr features, std::shared_ptr labels); - ~BaggingMachine() override = default; - std::shared_ptr apply_binary(std::shared_ptr data=NULL) override; - std::shared_ptr apply_multiclass(std::shared_ptr data=NULL) override; - std::shared_ptr apply_regression(std::shared_ptr data=NULL) override; + std::shared_ptr apply_binary(std::shared_ptr data) override; + std::shared_ptr apply_multiclass(std::shared_ptr data) override; + std::shared_ptr apply_regression(std::shared_ptr data) override; /** * Set number of bags/machine to create @@ -118,8 +110,10 @@ namespace shogun * @param eval Evaluation method to use for calculating the error * @return out-of-bag error. */ - float64_t get_oob_error() const; - + float64_t get_oob_error() const + { + return get_oob_error_lambda(); + } /** name **/ const char* get_name() const override { @@ -127,7 +121,7 @@ namespace shogun } protected: - bool train_machine(std::shared_ptr data=NULL) override; + bool train_machine(const std::shared_ptr&, const std::shared_ptr& labs) override; /** * sets parameters of Machine - useful in Random Forest @@ -170,13 +164,11 @@ namespace shogun std::vector get_oob_indices(const SGVector& in_bag); + float64_t get_oob_error_impl(const std::shared_ptr& data, const std::shared_ptr& labs) const; protected: /** bags array */ std::vector> m_bags; - /** features to train on */ - std::shared_ptr m_features; - /** machine to use for bagging */ std::shared_ptr m_machine; @@ -198,9 +190,15 @@ namespace shogun /** metric to calculate the oob error */ std::shared_ptr m_oob_evaluation_metric; + int32_t m_num_classes; + + int32_t m_num_vectors; + + std::function get_oob_error_lambda; + + #ifndef SWIG public: - static constexpr std::string_view kFeatures = "features"; static constexpr std::string_view kNBags = "num_bags"; static constexpr std::string_view kBagSize = "bag_size"; static constexpr std::string_view kBags = "bags"; @@ -208,8 +206,8 @@ namespace shogun static constexpr std::string_view kAllOobIdx = "all_oob_idx"; static constexpr std::string_view kOobIndices = "oob_indices"; static constexpr std::string_view kMachine = "machine"; + static constexpr std::string_view kOobEvaluationMetric = "oob_evaluation_metric"; static constexpr std::string_view kOobError = "oob_error"; - static constexpr std::string_view kOobEvaluationMetric = "oob_evaluation_metric"; #endif }; } // namespace shogun diff --git a/src/shogun/machine/Composite.h b/src/shogun/machine/Composite.h index 4b7471589ea..000b1b619c9 100644 --- a/src/shogun/machine/Composite.h +++ b/src/shogun/machine/Composite.h @@ -56,7 +56,7 @@ namespace shogun m_stages = std::forward(stages); } - std::shared_ptr train( + bool train( const std::shared_ptr& data, const std::shared_ptr& labs) { @@ -78,7 +78,7 @@ namespace shogun }, v.second); } m_ensemble_machine->train(current_data, labs); - return m_ensemble_machine; + return true; } std::shared_ptr apply_multiclass(std::shared_ptr data) override diff --git a/src/shogun/machine/DirectorLinearMachine.h b/src/shogun/machine/DirectorLinearMachine.h index a702e4da564..d969f11a2a6 100644 --- a/src/shogun/machine/DirectorLinearMachine.h +++ b/src/shogun/machine/DirectorLinearMachine.h @@ -36,42 +36,12 @@ IGNORE_IN_CLASSLIST class DirectorLinearMachine : public LinearMachine } - /** train machine - * - * @param data training data (parameter can be avoided if distance or - * kernel-based classifiers are used and distance/kernels are - * initialized with train data). - * - * @return whether training was successful - */ - virtual bool train(std::shared_ptr data=NULL) - { - return LinearMachine::train(data); - } - virtual bool train_function(std::shared_ptr data=NULL) { error("Train function of Director Linear Machine needs to be overridden."); return false; } - /** set features - * - * @param feat features to set - */ - virtual void set_features(std::shared_ptr feat) - { - LinearMachine::set_features(feat); - } - - /** get features - * - * @return features - */ - virtual std::shared_ptr get_features() - { - return LinearMachine::get_features(); - } /** apply machine to data * if data is not specified apply to the current features @@ -99,27 +69,9 @@ IGNORE_IN_CLASSLIST class DirectorLinearMachine : public LinearMachine /** apply machine to data in means of multiclass classification problem */ using LinearMachine::apply_multiclass; - virtual float64_t apply_one(int32_t vec_idx) + virtual float64_t apply_one(const std::shared_ptr& features, int32_t vec_idx) { - return LinearMachine::apply_one(vec_idx); - } - - /** set labels - * - * @param lab labels - */ - virtual void set_labels(std::shared_ptr lab) - { - LinearMachine::set_labels(lab); - } - - /** get labels - * - * @return labels - */ - virtual std::shared_ptr get_labels() - { - return LinearMachine::get_labels(); + return LinearMachine::apply_one(features, vec_idx); } /** get classifier type @@ -143,13 +95,12 @@ IGNORE_IN_CLASSLIST class DirectorLinearMachine : public LinearMachine * kernel-based classifiers are used and distance/kernels are * initialized with train data) * - * NOT IMPLEMENTED! - * + * NOT IMPLEMENTED! * @return whether training was successful */ - virtual bool train_machine(std::shared_ptr data=NULL) + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) override { - return train_function(data); + return LinearMachine::train_machine(data, labs); } }; diff --git a/src/shogun/machine/DistanceMachine.cpp b/src/shogun/machine/DistanceMachine.cpp index 75310c62149..4c571e6c9a1 100644 --- a/src/shogun/machine/DistanceMachine.cpp +++ b/src/shogun/machine/DistanceMachine.cpp @@ -17,7 +17,7 @@ using namespace shogun; DistanceMachine::DistanceMachine() -: Machine() +: NonParametricMachine() { init(); } @@ -99,6 +99,7 @@ void DistanceMachine::distances_rhs(SGVector& result, index_t idx_b1, std::shared_ptr DistanceMachine::apply_multiclass(std::shared_ptr data) { + if (data) { /* set distance features to given ones and apply to all */ @@ -118,30 +119,20 @@ std::shared_ptr DistanceMachine::apply_multiclass(std::shared_ return apply_multiclass(all); } return NULL; + } float64_t DistanceMachine::apply_one(int32_t num) { /* number of clusters */ - auto lhs=distance->get_lhs(); + const auto& lhs=distance->get_lhs(); int32_t num_clusters=lhs->get_num_vectors(); /* (multiple threads) calculate distances to all cluster centers */ SGVector dists(num_clusters); distances_lhs(dists, 0, num_clusters-1, num); - - /* find cluster index with smallest distance */ - float64_t result=dists.vector[0]; - index_t best_index=0; - for (index_t i=1; i #include - +#include namespace shogun { @@ -24,7 +24,7 @@ namespace shogun * * A distance machine is based on a a-priori choosen distance. */ -class DistanceMachine : public Machine +class DistanceMachine : public NonParametricMachine { public: /** default constructor */ diff --git a/src/shogun/machine/EnsembleMachine.h b/src/shogun/machine/EnsembleMachine.h index b21e6227828..bf0880f46dc 100644 --- a/src/shogun/machine/EnsembleMachine.h +++ b/src/shogun/machine/EnsembleMachine.h @@ -61,14 +61,14 @@ namespace shogun m_machines.push_back(machine); } - bool train_machine(std::shared_ptr data) override{ - require(m_labels, "Labels not set"); - train(data, m_labels); - return true; + bool train_machine( + const std::shared_ptr& data, const std::shared_ptr& labs) override { + return train(data, labs); } - void train( + + bool train( const std::shared_ptr& data, - const std::shared_ptr& labs) + const std::shared_ptr& labs) override { const int32_t& num_threads = env()->get_num_threads(); if (num_threads > 1) @@ -86,8 +86,7 @@ namespace shogun [&](int32_t start, int32_t end) { for (auto i = start; i < end; i++) { - m_machines[i]->set_labels(labs); - m_machines[i]->train(data); + m_machines[i]->train(data, labs); } }, t, t + machine_per_thread); @@ -98,18 +97,17 @@ namespace shogun } for (int i = machine_per_thread * num_threads; i < num_machine; i++) { - m_machines[i]->set_labels(labs); - m_machines[i]->train(data); + m_machines[i]->train(data, labs); } } else { for (auto&& machine : m_machines) { - machine->set_labels(labs); - machine->train(data); + machine->train(data, labs); } } + return true; } const char* get_name() const override diff --git a/src/shogun/machine/FeatureDispatchCRTP.h b/src/shogun/machine/FeatureDispatchCRTP.h index 591ee7d5459..2c2598f2c64 100644 --- a/src/shogun/machine/FeatureDispatchCRTP.h +++ b/src/shogun/machine/FeatureDispatchCRTP.h @@ -33,21 +33,27 @@ namespace shogun } protected: - bool train_dense(std::shared_ptr data) override + bool train_dense( + const std::shared_ptr& data, + const std::shared_ptr& labs) override { auto* this_casted = static_cast(this); switch (data->get_feature_type()) { case F_DREAL: return this_casted->template train_machine_templated( - std::dynamic_pointer_cast>(data)); + std::dynamic_pointer_cast>(data), + labs); case F_SHORTREAL: return this_casted->template train_machine_templated( - std::dynamic_pointer_cast>(data)); + std::dynamic_pointer_cast>(data), + labs); case F_LONGREAL: return this_casted ->template train_machine_templated( - std::dynamic_pointer_cast>(data)); + std::dynamic_pointer_cast>( + data), + labs); default: error( "Training with {} of provided type {} is not " @@ -83,20 +89,22 @@ namespace shogun } protected: - virtual bool train_string(std::shared_ptr data) + virtual bool train_string( + const std::shared_ptr& data, + const std::shared_ptr& labs) { auto this_casted = this->template as

(); switch (data->get_feature_type()) { case F_BYTE: return this_casted->template train_machine_templated( - data->as>()); + data->as>(), labs); case F_CHAR: return this_casted->template train_machine_templated( - data->as>()); + data->as>(), labs); case F_WORD: return this_casted->template train_machine_templated( - data->as>()); + data->as>(), labs); default: error( "Training with {} of provided type {} is " diff --git a/src/shogun/machine/GLM.cpp b/src/shogun/machine/GLM.cpp index 9f0d5a09799..534649a3b99 100644 --- a/src/shogun/machine/GLM.cpp +++ b/src/shogun/machine/GLM.cpp @@ -71,15 +71,14 @@ GLM::GLM( std::shared_ptr GLM::apply_regression(std::shared_ptr data) { + std::shared_ptr features; if (data) { if (!data->has_property(FP_DOT)) error("Specified features are not of type CDotFeatures"); - set_features(std::static_pointer_cast(data)); + features = std::static_pointer_cast(data); } - require(features, "Features are not provided"); - auto num = features->get_num_vectors(); ASSERT(num > 0) ASSERT(m_w.vlen == features->get_dim_feature_space()) @@ -92,19 +91,10 @@ GLM::apply_regression(std::shared_ptr data) return std::make_shared(result); } -void GLM::init_model(const std::shared_ptr data) +void GLM::init_model(const std::shared_ptr& data) { - ASSERT(m_labels) - if (data) - { - if (!data->has_property(FP_DOT)) - error("Specified features are not of type CDotFeatures"); - set_features(std::static_pointer_cast(data)); - } - ASSERT(features) - NormalDistribution normal_dist; - const auto& n_features = features->get_dim_feature_space(); + const auto& n_features = data->get_dim_feature_space(); if (m_w.vlen == 0) { @@ -123,12 +113,13 @@ void GLM::init_model(const std::shared_ptr data) } } -void GLM::iteration() +void GLM::iteration(const std::shared_ptr& features, + const std::shared_ptr& labs) { SGVector w_old = m_w.clone(); - auto X = get_features()->get_computed_dot_feature_matrix(); - auto y = regression_labels(get_labels())->get_labels(); + auto X = features->get_computed_dot_feature_matrix(); + auto y = regression_labels(labs)->get_labels(); auto gradient_w = m_cost_function->get_gradient_weights( X, y, m_w, bias, m_lambda, m_alpha, m_compute_bias, m_eta, diff --git a/src/shogun/machine/GLM.h b/src/shogun/machine/GLM.h index 6b616cd11f5..948521795c2 100644 --- a/src/shogun/machine/GLM.h +++ b/src/shogun/machine/GLM.h @@ -90,9 +90,10 @@ namespace shogun } protected: - void init_model(const std::shared_ptr data) override; + void init_model(const std::shared_ptr& data) override; - void iteration() override; + void iteration(const std::shared_ptr& features, + const std::shared_ptr& labs) override; private: /** Distribution type */ diff --git a/src/shogun/machine/GaussianProcess.cpp b/src/shogun/machine/GaussianProcess.cpp index b6f6636d052..fb5aea01a02 100644 --- a/src/shogun/machine/GaussianProcess.cpp +++ b/src/shogun/machine/GaussianProcess.cpp @@ -23,7 +23,7 @@ using namespace shogun; using namespace Eigen; -GaussianProcess::GaussianProcess() : RandomMixin() +GaussianProcess::GaussianProcess() : RandomMixin() { init(); } diff --git a/src/shogun/machine/GaussianProcess.h b/src/shogun/machine/GaussianProcess.h index 6e4b936e693..032cb859e3f 100644 --- a/src/shogun/machine/GaussianProcess.h +++ b/src/shogun/machine/GaussianProcess.h @@ -18,6 +18,7 @@ #include #include #include +#include namespace shogun { @@ -33,7 +34,7 @@ namespace shogun * * where \f$m(x)\f$ - mean function, \f$k(x, x')\f$ - covariance function. */ - class GaussianProcess : public RandomMixin + class GaussianProcess : public RandomMixin { public: /** default constructor */ @@ -104,7 +105,7 @@ namespace shogun */ void set_labels(std::shared_ptr lab) override { - Machine::set_labels(lab); + NonParametricMachine::set_labels(lab); m_method->set_labels(lab); } diff --git a/src/shogun/machine/IterativeMachine.h b/src/shogun/machine/IterativeMachine.h index 393ce7512c3..e9407228566 100644 --- a/src/shogun/machine/IterativeMachine.h +++ b/src/shogun/machine/IterativeMachine.h @@ -45,10 +45,7 @@ namespace shogun SG_ADD( &m_continue_features, "continue_features", "Continue Features"); } - ~IterativeMachine() override - { - - } + ~IterativeMachine() override = default; /** Returns convergence status */ bool is_complete() @@ -56,7 +53,9 @@ namespace shogun return m_complete; } - bool continue_train() override + virtual bool continue_train( + const std::shared_ptr& data, + const std::shared_ptr& labs) { this->reset_computation_variables(); //this->put("features", m_continue_features); @@ -65,7 +64,7 @@ namespace shogun while (m_current_iteration < m_max_iterations && !m_complete) { COMPUTATION_CONTROLLERS - iteration(); + iteration(data, labs); m_current_iteration++; pb.print_progress(); } @@ -92,28 +91,27 @@ namespace shogun } protected: - bool train_machine(std::shared_ptr data = NULL) override + bool train_machine( + const std::shared_ptr& data, + const std::shared_ptr& lab) override { - if (data) - { - - - m_continue_features = data; - } + m_continue_features = data; m_current_iteration = 0; m_complete = false; init_model(data); - return continue_train(); + return continue_train(data, lab); } /** To be overloaded by sublcasses to implement custom single * iterations of training loop. */ - virtual void iteration() = 0; + virtual void iteration( + const std::shared_ptr& data, + const std::shared_ptr& labs) = 0; /** To be overloaded in subclasses to initialize the model for training */ - virtual void init_model(const std::shared_ptr data = NULL) = 0; + virtual void init_model(const std::shared_ptr& data) = 0; /** Can be overloaded in subclasses to show more information * and/or clean up states diff --git a/src/shogun/machine/KernelMachine.cpp b/src/shogun/machine/KernelMachine.cpp index d8813cabb54..3a7239bf1b0 100644 --- a/src/shogun/machine/KernelMachine.cpp +++ b/src/shogun/machine/KernelMachine.cpp @@ -38,16 +38,14 @@ struct S_THREAD_PARAM_KERNEL_MACHINE }; #endif // DOXYGEN_SHOULD_SKIP_THIS -KernelMachine::KernelMachine() : Machine() +KernelMachine::KernelMachine() : NonParametricMachine() { init(); } KernelMachine::KernelMachine(const std::shared_ptr& k, SGVector alphas, - SGVector svs, float64_t b) : Machine() + SGVector svs, float64_t b) : KernelMachine() { - init(); - int32_t num_sv=svs.vlen; ASSERT(num_sv == alphas.vlen) create_new_model(num_sv); @@ -57,10 +55,8 @@ KernelMachine::KernelMachine(const std::shared_ptr& k, SGVector& machine) : Machine() +KernelMachine::KernelMachine(const std::shared_ptr& machine) : KernelMachine() { - init(); - SGVector alphas = machine->get_alphas().clone(); SGVector svs = machine->get_support_vectors().clone(); float64_t bias = machine->get_bias(); diff --git a/src/shogun/machine/KernelMachine.h b/src/shogun/machine/KernelMachine.h index f79b310b71f..15fc312215b 100644 --- a/src/shogun/machine/KernelMachine.h +++ b/src/shogun/machine/KernelMachine.h @@ -15,7 +15,7 @@ #include #include #include - +#include namespace shogun { @@ -41,7 +41,7 @@ class Features; * Using an a-priori choosen kernel, the \f$\alpha_i\f$ and bias are determined * in a training procedure. */ -class KernelMachine : public Machine +class KernelMachine : public NonParametricMachine { public: /** default constructor */ diff --git a/src/shogun/machine/KernelMulticlassMachine.cpp b/src/shogun/machine/KernelMulticlassMachine.cpp index e25f3966393..08ebc0bfb90 100644 --- a/src/shogun/machine/KernelMulticlassMachine.cpp +++ b/src/shogun/machine/KernelMulticlassMachine.cpp @@ -84,8 +84,8 @@ KernelMulticlassMachine::KernelMulticlassMachine() : MulticlassMachine(), m_kern * @param machine kernel machine * @param labs labels */ -KernelMulticlassMachine::KernelMulticlassMachine(std::shared_ptrstrategy, std::shared_ptr kernel, std::shared_ptr machine, std::shared_ptr labs) : - MulticlassMachine(std::move(strategy),std::move(machine),std::move(labs)), m_kernel(NULL) +KernelMulticlassMachine::KernelMulticlassMachine(std::shared_ptrstrategy, std::shared_ptr kernel, std::shared_ptr machine ) : + MulticlassMachine(std::move(strategy),std::move(machine)), m_kernel(NULL) { set_kernel(std::move(kernel)); SG_ADD(&m_kernel,"kernel", "The kernel to be used", ParameterProperties::HYPER); diff --git a/src/shogun/machine/KernelMulticlassMachine.h b/src/shogun/machine/KernelMulticlassMachine.h index ac50837495a..448d32e3103 100644 --- a/src/shogun/machine/KernelMulticlassMachine.h +++ b/src/shogun/machine/KernelMulticlassMachine.h @@ -32,7 +32,7 @@ class KernelMulticlassMachine : public MulticlassMachine * @param machine kernel machine * @param labs labels */ - KernelMulticlassMachine(std::shared_ptrstrategy, std::shared_ptr kernel, std::shared_ptr machine, std::shared_ptr labs); + KernelMulticlassMachine(std::shared_ptrstrategy, std::shared_ptr kernel, std::shared_ptr machine ); /** destructor */ ~KernelMulticlassMachine() override; diff --git a/src/shogun/machine/LinearMachine.cpp b/src/shogun/machine/LinearMachine.cpp index e9cc65e6362..f236230864a 100644 --- a/src/shogun/machine/LinearMachine.cpp +++ b/src/shogun/machine/LinearMachine.cpp @@ -33,13 +33,8 @@ LinearMachine::LinearMachine(const std::shared_ptr& machine) : Ma void LinearMachine::init() { - bias = 0; - features = NULL; - SG_ADD(&m_w, "w", "Parameter vector w.", ParameterProperties::MODEL); SG_ADD(&bias, "bias", "Bias b.", ParameterProperties::MODEL); - SG_ADD( - (std::shared_ptr*)&features, "features", "Feature object."); } @@ -48,7 +43,8 @@ LinearMachine::~LinearMachine() } -float64_t LinearMachine::apply_one(int32_t vec_idx) +float64_t LinearMachine::apply_one( + const std::shared_ptr& features, int32_t vec_idx) { return features->dot(vec_idx, m_w) + bias; } @@ -67,20 +63,11 @@ std::shared_ptr LinearMachine::apply_binary(std::shared_ptr LinearMachine::apply_get_outputs(std::shared_ptr data) { - if (data) - { - if (!data->has_property(FP_DOT)) - error("Specified features are not of type CDotFeatures"); - - set_features(std::static_pointer_cast(data)); - } - - if (!features) - return SGVector(); - + const auto features = data->as(); int32_t num=features->get_num_vectors(); - ASSERT(num>0) - ASSERT(m_w.vlen==features->get_dim_feature_space()) + require( + m_w.vlen == features->get_dim_feature_space(), + "Fetures expected to have {} dimentions", m_w.vlen); SGVector out(num); features->dense_dot_range(out.vector, 0, num, NULL, m_w.vector, m_w.vlen, bias); return out; @@ -106,16 +93,4 @@ float64_t LinearMachine::get_bias() const return bias; } -void LinearMachine::set_features(std::shared_ptr feat) -{ - - - features=std::move(feat); -} - -std::shared_ptr LinearMachine::get_features() -{ - - return features; -} diff --git a/src/shogun/machine/LinearMachine.h b/src/shogun/machine/LinearMachine.h index 3d4c51185ea..2f2d1458696 100644 --- a/src/shogun/machine/LinearMachine.h +++ b/src/shogun/machine/LinearMachine.h @@ -15,6 +15,7 @@ #include #include #include +#include namespace shogun @@ -95,19 +96,14 @@ class LinearMachine : public Machine */ virtual float64_t get_bias() const; - /** set features - * - * @param feat features to set - */ - virtual void set_features(std::shared_ptr feat); - /** apply linear machine to data * for binary classification problem * * @param data (test)data to be classified * @return classified labels */ - std::shared_ptr apply_binary(std::shared_ptr data=NULL) override; + std::shared_ptr + apply_binary(std::shared_ptr data) override; /** apply linear machine to data * for regression problem @@ -115,16 +111,12 @@ class LinearMachine : public Machine * @param data (test)data to be classified * @return classified labels */ - std::shared_ptr apply_regression(std::shared_ptr data=NULL) override; + std::shared_ptr + apply_regression(std::shared_ptr data) override; /** applies to one vector */ - float64_t apply_one(int32_t vec_idx) override; - - /** get features - * - * @return features - */ - virtual std::shared_ptr get_features(); + virtual float64_t apply_one( + const std::shared_ptr& features, int32_t vec_idx); /** Returns the name of the SGSerializable instance. It MUST BE * the CLASS NAME without the prefixed `C'. @@ -142,6 +134,17 @@ class LinearMachine : public Machine */ virtual SGVector apply_get_outputs(std::shared_ptr data); + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) final + { + const auto dot_feat = data->as(); + return train_machine(dot_feat, labs); + } + + virtual bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) + { + not_implemented(SOURCE_LOCATION); + return false; + } private: void init(); @@ -151,10 +154,7 @@ class LinearMachine : public Machine SGVector m_w; /** bias */ - float64_t bias; - - /** features */ - std::shared_ptr features; + float64_t bias = 0.0; }; } #endif diff --git a/src/shogun/machine/LinearMulticlassMachine.h b/src/shogun/machine/LinearMulticlassMachine.h index 27ae4d36aeb..6c9c3938748 100644 --- a/src/shogun/machine/LinearMulticlassMachine.h +++ b/src/shogun/machine/LinearMulticlassMachine.h @@ -30,20 +30,16 @@ class LinearMulticlassMachine : public MulticlassMachine /** default constructor */ LinearMulticlassMachine() : MulticlassMachine() { - SG_ADD(&m_features, "m_features", "Feature object."); + } /** standard constructor * @param strategy multiclass strategy - * @param features features * @param machine linear machine - * @param labs labels */ - LinearMulticlassMachine(std::shared_ptr strategy, std::shared_ptr features, std::shared_ptr machine, std::shared_ptr labs) : - MulticlassMachine(strategy, machine,labs) + LinearMulticlassMachine(std::shared_ptr strategy, std::shared_ptr machine ) : + MulticlassMachine(strategy, machine) { - set_features(features->as()); - SG_ADD(&m_features, "m_features", "Feature object."); } /** destructor */ @@ -57,81 +53,83 @@ class LinearMulticlassMachine : public MulticlassMachine return "LinearMulticlassMachine"; } - /** set features - * - * @param f features - */ - void set_features(std::shared_ptr f) - { - m_features = f; - for (auto m: m_machines) - { - auto machine = m->as(); - machine->set_features(f); - } + virtual int32_t get_num_classes() const { + return m_num_classes; } - /** get features - * - * @return features - */ - std::shared_ptr get_features() const - { - return m_features; + virtual int32_t get_dim_feature_space() const{ + return m_dim_feature_space; } protected: - /** init machine for train with setting features */ - bool init_machine_for_train(std::shared_ptr data) override + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) override { - if (!m_machine) - error("No machine given in Multiclass constructor"); + m_num_vectors = data->get_num_vectors(); + m_num_classes = multiclass_labels(labs)->get_num_classes(); + m_dim_feature_space = data->as()->get_dim_feature_space(); + + require(m_multiclass_strategy, "Multiclass strategy not set"); + int32_t num_classes = labs->as()->get_num_classes(); + m_multiclass_strategy->set_num_classes(num_classes); + + m_machines.clear(); + auto train_labels = std::make_shared(get_num_rhs_vectors()); + m_multiclass_strategy->train_start( + multiclass_labels(labs), train_labels); + while (m_multiclass_strategy->train_has_more()) + { + SGVector subset=m_multiclass_strategy->train_prepare_next(); + if (subset.vlen) + { + train_labels->add_subset(subset); + data->add_subset(subset); + } + m_machine->train(data, train_labels); + m_machines.push_back(get_machine_from_trained(m_machine)); + + if (subset.vlen) + { + train_labels->remove_subset(); + data->remove_subset(); + } + } - if (data) - set_features(data->as()); + m_multiclass_strategy->train_stop(); - m_machine->as()->set_features(m_features); return true; } + /** init machine for train with setting features */ + bool init_machine_for_train(std::shared_ptr data) override + { + require(m_machine, "No machine given in Multiclass constructor"); + return true; + } /** init machines for applying with setting features */ bool init_machines_for_apply(std::shared_ptr data) override { - if (data) - set_features(data->as()); - - for (auto m: m_machines) - { - auto machine = m->as(); - ASSERT(m_features) - ASSERT(machine) - machine->set_features(m_features); - } - return true; } /** check features availability */ bool is_ready() override { - if (m_features) - return true; + return true; - return false; } /** construct linear machine from given linear machine */ std::shared_ptr get_machine_from_trained(std::shared_ptr machine) const override { - return std::make_shared(machine->as()); + return machine->clone(ParameterProperties::MODEL)->as(); } /** get number of rhs feature vectors */ int32_t get_num_rhs_vectors() const override { - return m_features->get_num_vectors(); + return m_num_vectors; } /** set subset to the features of the machine, deletes old one @@ -140,23 +138,19 @@ class LinearMulticlassMachine : public MulticlassMachine */ void add_machine_subset(SGVector subset) override { - /* changing the subset structure to use subset stacks. This might - * have to be revised. Heiko Strathmann */ - m_features->add_subset(subset); + } /** deletes any subset set to the features of the machine */ void remove_machine_subset() override { - /* changing the subset structure to use subset stacks. This might - * have to be revised. Heiko Strathmann */ - m_features->remove_subset(); + } protected: - - /** features */ - std::shared_ptr m_features; + int32_t m_num_vectors; + int32_t m_dim_feature_space; + int32_t m_num_classes; }; } #endif diff --git a/src/shogun/machine/Machine.cpp b/src/shogun/machine/Machine.cpp index 09bce852c85..a9af08d68ca 100644 --- a/src/shogun/machine/Machine.cpp +++ b/src/shogun/machine/Machine.cpp @@ -13,11 +13,10 @@ using namespace shogun; Machine::Machine() - : StoppableSGObject(), m_max_train_time(0), m_labels(NULL), + : StoppableSGObject(), m_max_train_time(0), m_solver_type(ST_AUTO) { SG_ADD(&m_max_train_time, "max_train_time", "Maximum training time."); - SG_ADD(&m_labels, "labels", "Labels to be used."); SG_ADD_OPTIONS( (machine_int_t*)&m_solver_type, "solver_type", "Type of solver.", ParameterProperties::NONE, @@ -33,26 +32,11 @@ Machine::~Machine() bool Machine::train(std::shared_ptr data) { - if (train_require_labels()) - { - if (m_labels == NULL) - error("{}@{}: No labels given", get_name(), fmt::ptr(this)); - - m_labels->ensure_valid(get_name()); - } - auto sub = connect_to_signal_handler(); bool result = false; if (support_feature_dispatching()) { - require(data != NULL, "Features not provided!"); - require( - data->get_num_vectors() == m_labels->get_num_labels(), - "Number of training vectors ({}) does not match number of " - "labels ({})", - data->get_num_vectors(), m_labels->get_num_labels()); - if (support_dense_dispatching() && data->get_feature_class() == C_DENSE) result = train_dense(data); else if ( @@ -71,22 +55,41 @@ bool Machine::train(std::shared_ptr data) return result; } -void Machine::set_labels(std::shared_ptr lab) +bool Machine::train( + const std::shared_ptr& data, const std::shared_ptr& labs) { - if (lab != NULL) - { - if (!is_label_valid(lab)) - error("Invalid label for {}", get_name()); + if(data) + { + require(data->get_num_vectors() == labs->get_num_labels(), + "Number of training vectors ({}) does not match number of " + "labels ({})", + data->get_num_vectors(), labs->get_num_labels()); + } + + auto sub = connect_to_signal_handler(); + bool result = false; - m_labels = lab; - } -} + if (support_feature_dispatching()) + { + if (support_dense_dispatching() && data->get_feature_class() == C_DENSE) + result = train_dense(data, labs); + else if ( + support_string_dispatching() && + data->get_feature_class() == C_STRING) + result = train_string(data, labs); + else + error("Training with {} is not implemented!", data->get_name()); + } + else + result = train_machine(data, labs); -std::shared_ptr Machine::get_labels() -{ - return m_labels; + sub.unsubscribe(); + reset_computation_variables(); + + return result; } + void Machine::set_max_train_time(float64_t t) { m_max_train_time = t; diff --git a/src/shogun/machine/Machine.h b/src/shogun/machine/Machine.h index 98b2627c208..3b4f615c2f0 100644 --- a/src/shogun/machine/Machine.h +++ b/src/shogun/machine/Machine.h @@ -154,6 +154,17 @@ class Machine : public StoppableSGObject */ virtual bool train(std::shared_ptr data=NULL); + /** train machine + * + * @param data training data + * @param lab training label + * + * @return whether training was successful + */ + virtual bool train( + const std::shared_ptr& data, + const std::shared_ptr& lab); + /** apply machine to data * if data is not specified apply to the current features * @@ -173,18 +184,6 @@ class Machine : public StoppableSGObject /** apply machine to data in means of latent problem */ virtual std::shared_ptr apply_latent(std::shared_ptr data=NULL); - /** set labels - * - * @param lab labels - */ - virtual void set_labels(std::shared_ptr lab); - - /** get labels - * - * @return labels - */ - virtual std::shared_ptr get_labels(); - /** set maximum training time * * @param t maximimum training time @@ -255,11 +254,33 @@ class Machine : public StoppableSGObject return false; } + virtual bool train_machine( + const std::shared_ptr& data, const std::shared_ptr& labs) + { + error("train_machine is not yet implemented for {}!", get_name()); + return false; + } + virtual bool train_dense(std::shared_ptr data) { not_implemented(SOURCE_LOCATION); return false; } + virtual bool train_dense( + const std::shared_ptr& data, + const std::shared_ptr& labs) + { + not_implemented(SOURCE_LOCATION); + return false; + } + + virtual bool train_string( + const std::shared_ptr& data, + const std::shared_ptr& labs) + { + not_implemented(SOURCE_LOCATION); + return false; + } virtual bool train_string(std::shared_ptr data) { @@ -313,7 +334,7 @@ class Machine : public StoppableSGObject float64_t m_max_train_time; /** labels */ - std::shared_ptr m_labels; + //std::shared_ptr m_labels; /** solver type */ ESolverType m_solver_type; diff --git a/src/shogun/machine/MulticlassMachine.cpp b/src/shogun/machine/MulticlassMachine.cpp index 96b9800caa9..0e4bbcf3692 100644 --- a/src/shogun/machine/MulticlassMachine.cpp +++ b/src/shogun/machine/MulticlassMachine.cpp @@ -27,11 +27,9 @@ MulticlassMachine::MulticlassMachine() MulticlassMachine::MulticlassMachine( std::shared_ptrstrategy, - std::shared_ptr machine, std::shared_ptr labs) + std::shared_ptr machine ) : BaseMulticlassMachine(), m_multiclass_strategy(std::move(strategy)) { - set_labels(std::move(labs)); - m_machine = std::move(machine); register_parameters(); } @@ -40,28 +38,23 @@ MulticlassMachine::~MulticlassMachine() { } -void MulticlassMachine::set_labels(std::shared_ptr lab) -{ - Machine::set_labels(lab); -} - void MulticlassMachine::register_parameters() { SG_ADD(&m_multiclass_strategy,"multiclass_strategy", "Multiclass strategy"); SG_ADD(&m_machine, "machine", "The base machine"); } -void MulticlassMachine::init_strategy() +void MulticlassMachine::init_strategy( const std::shared_ptr& labs) { - int32_t num_classes = m_labels->as()->get_num_classes(); + int32_t num_classes = labs->as()->get_num_classes(); m_multiclass_strategy->set_num_classes(num_classes); } -std::shared_ptr MulticlassMachine::get_submachine_outputs(int32_t i) +std::shared_ptr MulticlassMachine::get_submachine_outputs(const std::shared_ptr& data, int32_t i) { auto machine = m_machines.at(i); ASSERT(machine) - return machine->apply_binary(); + return machine->apply_binary(data); } float64_t MulticlassMachine::get_submachine_output(int32_t i, int32_t num) @@ -76,7 +69,7 @@ std::shared_ptr MulticlassMachine::apply_multiclass(std::share SG_TRACE("entering {}::apply_multiclass({} at {})", get_name(), data ? data->get_name() : "NULL", fmt::ptr(data.get())); - std::shared_ptr return_labels=NULL; + std::shared_ptr return_labels; if (data) init_machines_for_apply(data); @@ -88,7 +81,6 @@ std::shared_ptr MulticlassMachine::apply_multiclass(std::share /* num vectors depends on whether data is provided */ int32_t num_vectors=data ? data->get_num_vectors() : get_num_rhs_vectors(); - int32_t num_machines=m_machines.size(); if (num_machines <= 0) error("num_machines = {}, did you train your machine?", num_machines); @@ -107,11 +99,9 @@ std::shared_ptr MulticlassMachine::apply_multiclass(std::share std::vector> outputs(num_machines); SGVector As(num_machines); SGVector Bs(num_machines); - for (int32_t i=0; iget_values()); @@ -122,7 +112,6 @@ std::shared_ptr MulticlassMachine::apply_multiclass(std::share if (heuris!=PROB_HEURIS_NONE && heuris!=OVA_SOFTMAX) outputs[i]->scores_to_probabilities(0,0); } - SGVector output_for_i(num_machines); SGVector r_output_for_i(num_machines); if (heuris!=PROB_HEURIS_NONE) @@ -180,10 +169,11 @@ std::shared_ptr MulticlassMachine::apply_multilabel_output(std if (is_ready()) { + /* num vectors depends on whether data is provided */ int32_t num_vectors=data ? data->get_num_vectors() : get_num_rhs_vectors(); - + int32_t num_machines=m_machines.size(); if (num_machines <= 0) error("num_machines = {}, did you train your machine?", num_machines); @@ -191,16 +181,14 @@ std::shared_ptr MulticlassMachine::apply_multilabel_output(std auto result=std::make_shared(num_vectors, n_outputs); std::vector> outputs(num_machines); - + for (int32_t i=0; i < num_machines; ++i) - outputs[i] = get_submachine_outputs(i); - + outputs[i] = get_submachine_outputs(data, i); SGVector output_for_i(num_machines); for (int32_t i=0; iget_value(i); - result->set_label(i, m_multiclass_strategy->decide_label_multiple_output(output_for_i, n_outputs)); } for (int32_t i=0; i < num_machines; ++i) @@ -214,10 +202,10 @@ std::shared_ptr MulticlassMachine::apply_multilabel_output(std return return_labels; } -bool MulticlassMachine::train_machine(std::shared_ptr data) +bool MulticlassMachine::train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) { ASSERT(m_multiclass_strategy) - init_strategy(); + init_strategy(labs); if ( !data && !is_ready() ) error("Please provide training data."); @@ -227,10 +215,8 @@ bool MulticlassMachine::train_machine(std::shared_ptr data) m_machines.clear(); auto train_labels = std::make_shared(get_num_rhs_vectors()); - m_machine->set_labels(train_labels); - m_multiclass_strategy->train_start( - multiclass_labels(m_labels), train_labels); + multiclass_labels(labs), train_labels); while (m_multiclass_strategy->train_has_more()) { SGVector subset=m_multiclass_strategy->train_prepare_next(); @@ -240,7 +226,7 @@ bool MulticlassMachine::train_machine(std::shared_ptr data) add_machine_subset(subset); } - m_machine->train(); + m_machine->train(data, train_labels); m_machines.push_back(get_machine_from_trained(m_machine)); if (subset.vlen) diff --git a/src/shogun/machine/MulticlassMachine.h b/src/shogun/machine/MulticlassMachine.h index 2a08c39313a..6b11735a54c 100644 --- a/src/shogun/machine/MulticlassMachine.h +++ b/src/shogun/machine/MulticlassMachine.h @@ -36,17 +36,11 @@ class MulticlassMachine : public BaseMulticlassMachine * @param machine machine * @param labels labels */ - MulticlassMachine(std::shared_ptr strategy, std::shared_ptr machine, std::shared_ptr labels); + MulticlassMachine(std::shared_ptr strategy, std::shared_ptr machine ); /** destructor */ ~MulticlassMachine() override; - /** set labels - * - * @param lab labels - */ - void set_labels(std::shared_ptr lab) override; - /** set machine * * @param num index of machine @@ -74,10 +68,11 @@ class MulticlassMachine : public BaseMulticlassMachine } /** get outputs of i-th submachine + * @param data features to be trained * @param i number of submachine * @return outputs */ - virtual std::shared_ptr get_submachine_outputs(int32_t i); + virtual std::shared_ptr get_submachine_outputs(const std::shared_ptr& data, int32_t i); /** get output of i-th submachine for num-th vector * @param i number of submachine @@ -90,13 +85,13 @@ class MulticlassMachine : public BaseMulticlassMachine * * @return resulting labels */ - std::shared_ptr apply_multiclass(std::shared_ptr data=NULL) override; + std::shared_ptr apply_multiclass(std::shared_ptr data) override; /** classify all examples with multiple output * * @return resulting labels */ - virtual std::shared_ptr apply_multilabel_output(std::shared_ptr data=NULL, int32_t n_outputs=5); + virtual std::shared_ptr apply_multilabel_output(std::shared_ptr data, int32_t n_outputs=5); /** classify one example * @param vec_idx @@ -154,13 +149,13 @@ class MulticlassMachine : public BaseMulticlassMachine protected: /** init strategy */ - void init_strategy(); + void init_strategy( const std::shared_ptr& labs); /** clear machines */ void clear_machines(); /** train machine */ - bool train_machine(std::shared_ptr data = NULL) override; + bool train_machine(const std::shared_ptr&, const std::shared_ptr& labs) override; /** abstract init machine for training method */ virtual bool init_machine_for_train(std::shared_ptr data) = 0; diff --git a/src/shogun/machine/NonParametricMachine.h b/src/shogun/machine/NonParametricMachine.h new file mode 100644 index 00000000000..2d78be4da2d --- /dev/null +++ b/src/shogun/machine/NonParametricMachine.h @@ -0,0 +1,70 @@ +/* + * This software is distributed under BSD 3-clause license (see LICENSE file). + * + * Authors: Yuhui Liu + */ + +#ifndef NONPARAMETRCMACHINE_H_ +#define NONPARAMETRCMACHINE_H_ + +#include + +namespace shogun +{ + + class NonParametricMachine : public Machine + { + public: + NonParametricMachine() : Machine() + { + // TODO : when all refactor is done, m_labels should be removed from + // Machine Class + SG_ADD( + &m_labels, "labels", "labels used in train machine algorithm"); + SG_ADD( + &m_features, "features_train", + "Training features of nonparametric model", + ParameterProperties::READONLY); + } + virtual ~NonParametricMachine() + { + } + using Machine::train; + + bool train( + const std::shared_ptr& data, + const std::shared_ptr& lab) override + { + m_labels = lab; + require( + data->get_num_vectors() == m_labels->get_num_labels(), + "Number of training vectors ({}) does not match number of " + "labels ({})", + data->get_num_vectors(), m_labels->get_num_labels()); + return Machine::train(data); + } + + const char* get_name() const override + { + return "NonParametricMachine"; + } + + virtual void set_labels(std::shared_ptr lab) + { + m_labels = lab; + } + + /** get labels + * + * @return labels + */ + virtual std::shared_ptr get_labels() + { + return m_labels; + } + protected: + std::shared_ptr m_features; + std::shared_ptr m_labels; + }; +} // namespace shogun +#endif \ No newline at end of file diff --git a/src/shogun/machine/Pipeline.cpp b/src/shogun/machine/Pipeline.cpp index 9dfc49c6a0e..8ae0d31c5a8 100644 --- a/src/shogun/machine/Pipeline.cpp +++ b/src/shogun/machine/Pipeline.cpp @@ -122,31 +122,12 @@ namespace shogun bool Pipeline::train_machine(std::shared_ptr data) { - if (train_require_labels()) - { - require(m_labels, "No labels given."); - } - auto current_data = data; - for (auto&& stage : m_stages) - { - if (holds_alternative>(stage.second)) - { - auto transformer = shogun::get>(stage.second); - transformer->train_require_labels() - ? transformer->fit(current_data, m_labels) - : transformer->fit(current_data); - - current_data = transformer->transform(current_data); - } - else - { - auto machine = shogun::get>(stage.second); - if (machine->train_require_labels()) - machine->set_labels(m_labels); - machine->train(current_data); - } - } - return true; + return train_machine_impl(data); + } + bool Pipeline::train_machine(const std::shared_ptr& data, + const std::shared_ptr& labs) + { + return train_machine_impl(data, labs); } std::shared_ptr Pipeline::apply(std::shared_ptr data) diff --git a/src/shogun/machine/Pipeline.h b/src/shogun/machine/Pipeline.h index 69944bb1504..ca814e6db37 100644 --- a/src/shogun/machine/Pipeline.h +++ b/src/shogun/machine/Pipeline.h @@ -123,8 +123,32 @@ namespace shogun EProblemType get_machine_problem_type() const override; protected: - bool train_machine(std::shared_ptr data = NULL) override; - + template + bool train_machine_impl(std::shared_ptr data, Args&& ... args) + { + require(data, "Data should not be NULL"); + auto current_data = data; + for (auto&& stage : m_stages) + { + if (holds_alternative>(stage.second)) + { + auto transformer = shogun::get>(stage.second); + transformer->train_require_labels() + ? transformer->fit(current_data, args...) + : transformer->fit(current_data); + current_data = transformer->transform(current_data); + } + else + { + auto machine = shogun::get>(stage.second); + machine->train(current_data, args...); + } + } + return true; + } + bool train_machine(std::shared_ptr data) override; + bool train_machine(const std::shared_ptr& data, + const std::shared_ptr& labs) override; std::vector, std::shared_ptr>>> m_stages; bool train_require_labels() const override; diff --git a/src/shogun/machine/RandomForest.cpp b/src/shogun/machine/RandomForest.cpp index 410d99379f0..436fe1b78c6 100644 --- a/src/shogun/machine/RandomForest.cpp +++ b/src/shogun/machine/RandomForest.cpp @@ -53,26 +53,12 @@ RandomForest::RandomForest(int32_t rand_numfeats, int32_t num_bags) m_machine->as()->set_feature_subset_size(rand_numfeats); } -RandomForest::RandomForest(std::shared_ptr features, std::shared_ptr labels, int32_t num_bags, int32_t rand_numfeats) -: BaggingMachine() -{ - init(); - m_features=std::move(features); - set_labels(std::move(labels)); - - set_num_bags(num_bags); - - if (rand_numfeats>0) - m_machine->as()->set_feature_subset_size(rand_numfeats); -} -RandomForest::RandomForest(std::shared_ptr features, std::shared_ptr labels, SGVector weights, int32_t num_bags, int32_t rand_numfeats) +RandomForest::RandomForest(SGVector weights, int32_t num_bags, int32_t rand_numfeats) : BaggingMachine() { init(); - m_features=std::move(features); - set_labels(std::move(labels)); m_weights=weights; set_num_bags(num_bags); @@ -163,24 +149,17 @@ void RandomForest::set_machine_parameters(std::shared_ptr m, SGVectorset_machine_problem_type(m_machine->as()->get_machine_problem_type()); } -bool RandomForest::train_machine(std::shared_ptr data) +bool RandomForest::train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) { - if (data) - { - m_features = data; - } - - require(m_features, "Training features not set!"); - - m_machine->as()->pre_sort_features(m_features, m_sorted_transposed_feats, m_sorted_indices); - return BaggingMachine::train_machine(); + m_machine->as()->pre_sort_features(data, m_sorted_transposed_feats, m_sorted_indices); + m_num_features = data->as>()->get_num_features(); + return BaggingMachine::train_machine(data, labs); } SGVector RandomForest::get_feature_importances() const { - auto num_feats = - m_features->as>()->get_num_features(); + const auto& num_feats = m_num_features; SGVector feat_importances(num_feats); feat_importances.zero(); for (size_t i = 0; i < m_bags.size(); i++) diff --git a/src/shogun/machine/RandomForest.h b/src/shogun/machine/RandomForest.h index 8990d5d25ee..e7eebbe93df 100644 --- a/src/shogun/machine/RandomForest.h +++ b/src/shogun/machine/RandomForest.h @@ -56,15 +56,6 @@ class RandomForest : public BaggingMachine */ RandomForest(int32_t num_rand_feats, int32_t num_bags=10); - /** constructor - * - * @param features training features - * @param labels training labels - * @param num_bags number of trees in forest - * @param num_rand_feats number of attributes chosen randomly during node split in candidate trees - */ - RandomForest(std::shared_ptr features, std::shared_ptr labels, int32_t num_bags=10, int32_t num_rand_feats=0); - /** constructor * * @param features training features @@ -73,7 +64,7 @@ class RandomForest : public BaggingMachine * @param num_bags number of trees in forest * @param num_rand_feats number of attributes chosen randomly during node split in candidate trees */ - RandomForest(std::shared_ptr features, std::shared_ptr labels, SGVector weights, int32_t num_bags=10, int32_t num_rand_feats=0); + RandomForest(SGVector weights, int32_t num_bags=10, int32_t num_rand_feats=0); /** destructor */ ~RandomForest() override; @@ -146,7 +137,7 @@ class RandomForest : public BaggingMachine protected: - bool train_machine(std::shared_ptr data=NULL) override; + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) override; /** sets parameters of CARTree - sets machine labels and weights here * * @param m machine @@ -159,6 +150,7 @@ class RandomForest : public BaggingMachine void init(); private: + int32_t m_num_features; /** weights */ SGVector m_weights; diff --git a/src/shogun/machine/StochasticGBMachine.cpp b/src/shogun/machine/StochasticGBMachine.cpp index 9b8cee1bbce..7cbb8991418 100644 --- a/src/shogun/machine/StochasticGBMachine.cpp +++ b/src/shogun/machine/StochasticGBMachine.cpp @@ -161,12 +161,11 @@ std::shared_ptr StochasticGBMachine::apply_regression(std::sha return std::make_shared(retlabs); } -bool StochasticGBMachine::train_machine(std::shared_ptr data) +bool StochasticGBMachine::train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) { require(data,"training data not supplied!"); require(m_machine,"machine not set!"); require(m_loss,"loss function not specified"); - require(m_labels, "labels not specified"); auto feats=data->as>(); @@ -181,7 +180,7 @@ bool StochasticGBMachine::train_machine(std::shared_ptr data) for (auto i : SG_PROGRESS(range(m_num_iter))) { - const auto result = get_subset(feats, interf); + const auto result = get_subset(feats, interf, labs); const auto& feats_iter = std::get<0>(result); const auto& interf_iter = std::get<1>(result); const auto& labels_iter = std::get<2>(result); @@ -237,8 +236,7 @@ std::shared_ptr StochasticGBMachine::fit_model(const std::shared_ptrclone()->as(); // train cloned machine - c->set_labels(labels); - c->train(feats); + c->train(feats, labels); return c; } @@ -259,10 +257,10 @@ std::shared_ptr StochasticGBMachine::compute_pseudo_residuals( std::tuple>, std::shared_ptr, std::shared_ptr> StochasticGBMachine::get_subset( - std::shared_ptr> f, std::shared_ptr interf) + std::shared_ptr> f, std::shared_ptr interf, std::shared_ptr labs) { if (m_subset_frac == 1.0) - return std::make_tuple(f, interf, m_labels); + return std::make_tuple(f, interf, labs); int32_t subset_size=m_subset_frac*(f->get_num_vectors()); SGVector idx(f->get_num_vectors()); @@ -274,7 +272,7 @@ StochasticGBMachine::get_subset( return std::make_tuple( view(f, subset), view(interf, subset), - view(m_labels, subset)); + view(labs, subset)); } void StochasticGBMachine::initialize_learners() diff --git a/src/shogun/machine/StochasticGBMachine.h b/src/shogun/machine/StochasticGBMachine.h index 75086b08526..c8887972495 100644 --- a/src/shogun/machine/StochasticGBMachine.h +++ b/src/shogun/machine/StochasticGBMachine.h @@ -148,7 +148,7 @@ class StochasticGBMachine : public RandomMixin * @param data training data * @return true */ - bool train_machine(std::shared_ptr data=NULL) override; + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) override; /** compute gamma values * @@ -185,7 +185,8 @@ class StochasticGBMachine : public RandomMixin */ std::tuple>, std::shared_ptr, std::shared_ptr> - get_subset(std::shared_ptr> f, std::shared_ptr interf); + get_subset(std::shared_ptr> f, std::shared_ptr interf, + std::shared_ptr labs); /** reset arrays of weak learners and gamma values */ void initialize_learners(); diff --git a/src/shogun/machine/StructuredOutputMachine.cpp b/src/shogun/machine/StructuredOutputMachine.cpp index b3d42f8396a..f4200a9c0f7 100644 --- a/src/shogun/machine/StructuredOutputMachine.cpp +++ b/src/shogun/machine/StructuredOutputMachine.cpp @@ -27,7 +27,6 @@ StructuredOutputMachine::StructuredOutputMachine( const std::shared_ptr& labs) : Machine(), m_model(std::move(model)), m_surrogate_loss(NULL) { - set_labels(labs); register_parameters(); } @@ -56,13 +55,6 @@ void StructuredOutputMachine::register_parameters() m_helper = NULL; } -void StructuredOutputMachine::set_labels(std::shared_ptr lab) -{ - Machine::set_labels(lab); - require(m_model != NULL, "please call set_model() before set_labels()"); - m_model->set_labels(lab->as()); -} - void StructuredOutputMachine::set_features(std::shared_ptr f) { m_model->set_features(std::move(f)); diff --git a/src/shogun/machine/StructuredOutputMachine.h b/src/shogun/machine/StructuredOutputMachine.h index d0ce6d4f716..3ac0e1a73ad 100644 --- a/src/shogun/machine/StructuredOutputMachine.h +++ b/src/shogun/machine/StructuredOutputMachine.h @@ -77,12 +77,6 @@ class StructuredOutputMachine : public Machine return "StructuredOutputMachine"; } - /** set labels - * - * @param lab labels - */ - void set_labels(std::shared_ptr lab) override; - /** set features * * @param f features diff --git a/src/shogun/mathematics/Seedable.h b/src/shogun/mathematics/Seedable.h index 1bd02e7c990..d6fd90b294e 100644 --- a/src/shogun/mathematics/Seedable.h +++ b/src/shogun/mathematics/Seedable.h @@ -13,7 +13,7 @@ namespace shogun #ifndef SWIG static constexpr std::string_view kSetRandomSeed = "set_random_seed"; static constexpr std::string_view kSeed = "seed"; -#endif // SWIG +#endif // SWIG /** Seeds an SGObject using a specific seed */ template < @@ -46,7 +46,6 @@ namespace shogun { return "Seedable"; } - protected: /** Seeds an SGObject using the current object seed * This is intended to seed non-parameter SGObjects created inside diff --git a/src/shogun/metric/LMNNImpl.cpp b/src/shogun/metric/LMNNImpl.cpp index c533b87491b..1ac1fbeefd0 100644 --- a/src/shogun/metric/LMNNImpl.cpp +++ b/src/shogun/metric/LMNNImpl.cpp @@ -141,7 +141,8 @@ SGMatrix LMNNImpl::find_target_nn(const std::shared_ptr>(slice_mat); auto labels_slice = std::make_shared(labels_vec); - auto knn = std::make_shared(k+1, std::make_shared(features_slice, features_slice), labels_slice); + auto knn = std::make_shared(k+1, std::make_shared()); + knn->train(features_slice, labels_slice); SGMatrix target_slice = knn->nearest_neighbors(); // sanity check ASSERT(target_slice.num_rows==k+1 && target_slice.num_cols==slice_size) diff --git a/src/shogun/multiclass/GMNPSVM.cpp b/src/shogun/multiclass/GMNPSVM.cpp index eec13580a2c..f6878f08e36 100644 --- a/src/shogun/multiclass/GMNPSVM.cpp +++ b/src/shogun/multiclass/GMNPSVM.cpp @@ -26,8 +26,8 @@ GMNPSVM::GMNPSVM() init(); } -GMNPSVM::GMNPSVM(float64_t C, std::shared_ptr k, std::shared_ptr lab) -: MulticlassSVM(std::make_shared(), C, std::move(k), std::move(lab)) +GMNPSVM::GMNPSVM(float64_t C, std::shared_ptr k ) +: MulticlassSVM(std::make_shared(), C, std::move(k) ) { init(); } @@ -50,32 +50,32 @@ GMNPSVM::init() m_basealphas = NULL, m_basealphas_y = 0, m_basealphas_x = 0; } -bool GMNPSVM::train_machine(std::shared_ptr data) +bool GMNPSVM::train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) { ASSERT(m_kernel) - ASSERT(m_labels && m_labels->get_num_labels()) - ASSERT(m_labels->get_label_type() == LT_MULTICLASS) - init_strategy(); + ASSERT(labs && labs->get_num_labels()) + ASSERT(labs->get_label_type() == LT_MULTICLASS) + init_strategy(labs); if (data) { - if (m_labels->get_num_labels() != data->get_num_vectors()) + if (labs->get_num_labels() != data->get_num_vectors()) { error("{}::train_machine(): Number of training vectors ({}) does" " not match number of labels ({})", get_name(), - data->get_num_vectors(), m_labels->get_num_labels()); + data->get_num_vectors(), labs->get_num_labels()); } m_kernel->init(data, data); } - int32_t num_data = m_labels->get_num_labels(); + int32_t num_data = labs->get_num_labels(); int32_t num_classes = m_multiclass_strategy->get_num_classes(); int32_t num_virtual_data= num_data*(num_classes-1); io::info("{} trainlabels, {} classes", num_data, num_classes); float64_t* vector_y = SG_MALLOC(float64_t, num_data); - auto mc = multiclass_labels(m_labels); + auto mc = multiclass_labels(labs); for (int32_t i=0; iget_label(i)+1; diff --git a/src/shogun/multiclass/GMNPSVM.h b/src/shogun/multiclass/GMNPSVM.h index 645027b9593..60f3d4ef401 100644 --- a/src/shogun/multiclass/GMNPSVM.h +++ b/src/shogun/multiclass/GMNPSVM.h @@ -33,7 +33,7 @@ class GMNPSVM : public MulticlassSVM * @param k kernel * @param lab labels */ - GMNPSVM(float64_t C, std::shared_ptr k, std::shared_ptr lab); + GMNPSVM(float64_t C, std::shared_ptr k ); /** default destructor */ ~GMNPSVM() override; @@ -67,7 +67,7 @@ class GMNPSVM : public MulticlassSVM * * @return whether training was successful */ - bool train_machine(std::shared_ptr data=NULL) override; + bool train_machine(const std::shared_ptr&, const std::shared_ptr& labs) override; protected: /** required for MKLMulticlass diff --git a/src/shogun/multiclass/GaussianNaiveBayes.cpp b/src/shogun/multiclass/GaussianNaiveBayes.cpp index 61881d3e851..8090de32793 100644 --- a/src/shogun/multiclass/GaussianNaiveBayes.cpp +++ b/src/shogun/multiclass/GaussianNaiveBayes.cpp @@ -25,15 +25,12 @@ GaussianNaiveBayes::GaussianNaiveBayes() : NativeMulticlassMachine(), m_features init(); }; -GaussianNaiveBayes::GaussianNaiveBayes(const std::shared_ptr& train_examples, - const std::shared_ptr& train_labels) : NativeMulticlassMachine(), m_features(NULL), +GaussianNaiveBayes::GaussianNaiveBayes(const std::shared_ptr& train_examples) + : NativeMulticlassMachine(), m_features(NULL), m_min_label(0), m_num_classes(0), m_dim(0), m_means(), m_variances(), m_label_prob(), m_rates() { init(); - ASSERT(train_examples->get_num_vectors() == train_labels->get_num_labels()) - set_labels(train_labels); - if (!train_examples->has_property(FP_DOT)) error("Specified features are not of type CDotFeatures"); @@ -59,7 +56,7 @@ void GaussianNaiveBayes::set_features(std::shared_ptr features) m_features = features->as(); } -bool GaussianNaiveBayes::train_machine(std::shared_ptr data) +bool GaussianNaiveBayes::train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) { // init features with data if necessary and assure type is correct if (data) @@ -70,9 +67,9 @@ bool GaussianNaiveBayes::train_machine(std::shared_ptr data) } // get int labels to train_labels and check length equality - ASSERT(m_labels) + ASSERT(labs) SGVector train_labels = - multiclass_labels(m_labels)->get_int_labels(); + multiclass_labels(labs)->get_int_labels(); ASSERT(m_features->get_num_vectors()==train_labels.vlen) // find minimal and maximal label diff --git a/src/shogun/multiclass/GaussianNaiveBayes.h b/src/shogun/multiclass/GaussianNaiveBayes.h index d2fab8c5d7a..5f4438b87f1 100644 --- a/src/shogun/multiclass/GaussianNaiveBayes.h +++ b/src/shogun/multiclass/GaussianNaiveBayes.h @@ -46,7 +46,7 @@ class GaussianNaiveBayes : public NativeMulticlassMachine * @param train_examples train examples * @param train_labels labels corresponding to train_examples */ - GaussianNaiveBayes(const std::shared_ptr& train_examples, const std::shared_ptr& train_labels); + GaussianNaiveBayes(const std::shared_ptr& train_examples); /** destructor * @@ -91,7 +91,7 @@ class GaussianNaiveBayes : public NativeMulticlassMachine * @param data train examples * @return true if successful */ - bool train_machine(std::shared_ptr data=NULL) override; + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) override; private: void init(); diff --git a/src/shogun/multiclass/KNN.cpp b/src/shogun/multiclass/KNN.cpp index 6e0190a858d..b490a01cb0b 100644 --- a/src/shogun/multiclass/KNN.cpp +++ b/src/shogun/multiclass/KNN.cpp @@ -27,7 +27,7 @@ KNN::KNN() init(); } -KNN::KNN(int32_t k, const std::shared_ptr& d, const std::shared_ptr& trainlab, KNN_SOLVER knn_solver) +KNN::KNN(int32_t k, const std::shared_ptr& d, KNN_SOLVER knn_solver) : DistanceMachine() { init(); @@ -35,11 +35,8 @@ KNN::KNN(int32_t k, const std::shared_ptr& d, const std::shared_ptrget_num_labels(); m_knn_solver=knn_solver; } @@ -76,16 +73,13 @@ bool KNN::train_machine(std::shared_ptr data) { require(m_labels, "No training labels provided."); require(distance, "No training distance provided."); - - if (data) - { - require( + require( m_labels->get_num_labels() == data->get_num_vectors(), "Number of training vectors ({}) does not match number of labels " "({})", data->get_num_vectors(), m_labels->get_num_labels()); - distance->init(data, data); - } + m_features = data; + distance->init(data, data); SGVector lab=multiclass_labels(m_labels)->get_int_labels(); m_train_labels=lab.clone(); @@ -158,9 +152,8 @@ SGMatrix KNN::nearest_neighbors() std::shared_ptr KNN::apply_multiclass(std::shared_ptr data) { - if (data) - init_distance(data); - + init_distance(data); + m_features = data; //redirecting to fast (without sorting) classify if k==1 if (m_k == 1) return classify_NN(); @@ -206,21 +199,9 @@ std::shared_ptr KNN::classify_NN() COMPUTATION_CONTROLLERS // get distances from i-th test example to 0..num_m_train_labels-1 train examples distances_lhs(distances,0,m_train_labels.vlen-1,i); - int32_t j; - - // assuming 0th train examples as nearest to i-th test example - int32_t out_idx = 0; - float64_t min_dist = distances.vector[0]; - - // searching for nearest neighbor by comparing distances - for (j=0; jset_label(i,m_train_labels.vector[out_idx]+m_min_label); diff --git a/src/shogun/multiclass/KNN.h b/src/shogun/multiclass/KNN.h index f12c9a1388c..e0f98f367ca 100644 --- a/src/shogun/multiclass/KNN.h +++ b/src/shogun/multiclass/KNN.h @@ -80,7 +80,7 @@ class KNN : public DistanceMachine * @param d distance * @param trainlab labels for training */ - KNN(int32_t k, const std::shared_ptr& d, const std::shared_ptr& trainlab, KNN_SOLVER knn_solver=KNN_BRUTE); + KNN(int32_t k, const std::shared_ptr& d, KNN_SOLVER knn_solver=KNN_BRUTE); ~KNN() override; diff --git a/src/shogun/multiclass/MCLDA.cpp b/src/shogun/multiclass/MCLDA.cpp index bc7de2ee8cc..2105534dccd 100644 --- a/src/shogun/multiclass/MCLDA.cpp +++ b/src/shogun/multiclass/MCLDA.cpp @@ -31,7 +31,7 @@ MCLDA::MCLDA(float64_t tolerance, bool store_cov) } -MCLDA::MCLDA(const std::shared_ptr>& traindat, std::shared_ptr trainlab, float64_t tolerance, bool store_cov) +MCLDA::MCLDA(const std::shared_ptr>& traindat, float64_t tolerance, bool store_cov) : NativeMulticlassMachine() { init(); @@ -40,7 +40,6 @@ MCLDA::MCLDA(const std::shared_ptr>& traindat, std::sha m_store_cov=store_cov; set_features(traindat); - set_labels(std::move(trainlab)); } MCLDA::~MCLDA() @@ -149,9 +148,9 @@ std::shared_ptr MCLDA::apply_multiclass(std::shared_ptr data) +bool MCLDA::train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) { - if (!m_labels) + if (!labs) error("No labels allocated in MCLDA training"); if (data) @@ -165,14 +164,14 @@ bool MCLDA::train_machine(std::shared_ptr data) if (!m_features) error("No features allocated in MCLDA training"); - SGVector< int32_t > train_labels = multiclass_labels(m_labels)->get_int_labels(); + SGVector< int32_t > train_labels = multiclass_labels(labs)->get_int_labels(); if (!train_labels.vector) error("No train_labels allocated in MCLDA training"); cleanup(); - m_num_classes = multiclass_labels(m_labels)->get_num_classes(); + m_num_classes = multiclass_labels(labs)->get_num_classes(); m_dim = m_features->get_dim_feature_space(); int32_t num_vec = m_features->get_num_vectors(); diff --git a/src/shogun/multiclass/MCLDA.h b/src/shogun/multiclass/MCLDA.h index 6ca77fc0011..5dd67a69eba 100644 --- a/src/shogun/multiclass/MCLDA.h +++ b/src/shogun/multiclass/MCLDA.h @@ -48,7 +48,7 @@ class MCLDA : public NativeMulticlassMachine * @param tolerance tolerance used in training * @param store_cov whether to store the within class covariances */ - MCLDA(const std::shared_ptr>& traindat, std::shared_ptr trainlab, float64_t tolerance = 1e-4, bool store_cov = false); + MCLDA(const std::shared_ptr>& traindat, float64_t tolerance = 1e-4, bool store_cov = false); ~MCLDA() override; @@ -131,7 +131,7 @@ class MCLDA : public NativeMulticlassMachine * * @return whether training was successful */ - bool train_machine(std::shared_ptr data = NULL) override; + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) override; private: void init(); diff --git a/src/shogun/multiclass/MulticlassLibLinear.cpp b/src/shogun/multiclass/MulticlassLibLinear.cpp index bf976ff4fd2..9295571adaf 100644 --- a/src/shogun/multiclass/MulticlassLibLinear.cpp +++ b/src/shogun/multiclass/MulticlassLibLinear.cpp @@ -24,8 +24,8 @@ MulticlassLibLinear::MulticlassLibLinear() : init_defaults(); } -MulticlassLibLinear::MulticlassLibLinear(float64_t C, std::shared_ptr features, std::shared_ptr labs) : - RandomMixin(std::make_shared(),std::move(features),nullptr,std::move(labs)) +MulticlassLibLinear::MulticlassLibLinear(float64_t C) : + RandomMixin(std::make_shared(), nullptr) { register_parameters(); init_defaults(); @@ -60,19 +60,14 @@ SGVector MulticlassLibLinear::get_support_vectors() const if (!m_train_state) error("Please enable save_train_state option and train machine."); - ASSERT(m_labels && m_labels->get_label_type() == LT_MULTICLASS) - - int32_t num_vectors = m_features->get_num_vectors(); - int32_t num_classes = multiclass_labels(m_labels)->get_num_classes(); - v_array nz_idxs; - nz_idxs.reserve(num_vectors); + nz_idxs.reserve(m_num_vectors); - for (int32_t i=0; ialpha[i*num_classes+y])>1e-6) + if (Math::abs(m_train_state->alpha[i*m_num_classes+y])>1e-6) { nz_idxs.push(i); break; @@ -89,28 +84,23 @@ SGMatrix MulticlassLibLinear::obtain_regularizer_matrix() const return SGMatrix(); } -bool MulticlassLibLinear::train_machine(std::shared_ptr data) +bool MulticlassLibLinear::train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) { - if (data) - set_features(data->as()); - - ASSERT(m_features) - ASSERT(m_labels && m_labels->get_label_type()==LT_MULTICLASS) - ASSERT(m_multiclass_strategy) - init_strategy(); - - int32_t num_vectors = m_features->get_num_vectors(); - int32_t num_classes = multiclass_labels(m_labels)->get_num_classes(); + require(m_multiclass_strategy, "Multiclass strategy not set"); + init_strategy(labs); + auto feats = data->as(); + m_num_vectors = data->get_num_vectors(); + m_num_classes = multiclass_labels(labs)->get_num_classes(); int32_t bias_n = m_use_bias ? 1 : 0; liblinear_problem mc_problem; - mc_problem.l = num_vectors; - mc_problem.n = m_features->get_dim_feature_space() + bias_n; + mc_problem.l = m_num_vectors; + mc_problem.n = feats->get_dim_feature_space() + bias_n; mc_problem.y = SG_MALLOC(float64_t, mc_problem.l); - for (int32_t i=0; iget_int_label(i); + for (int32_t i=0; iget_int_label(i); - mc_problem.x = m_features; + mc_problem.x = feats; mc_problem.use_bias = m_use_bias; SGMatrix w0 = obtain_regularizer_matrix(); @@ -118,27 +108,27 @@ bool MulticlassLibLinear::train_machine(std::shared_ptr data) if (!m_train_state) m_train_state = new mcsvm_state(); - float64_t* C = SG_MALLOC(float64_t, num_vectors); - for (int32_t i=0; i(); SGVector cw(mc_problem.n-bias_n); for (int32_t j=0; jw[j*num_classes+i]; + cw[j] = m_train_state->w[j*m_num_classes+i]; machine->set_w(cw); if (m_use_bias) - machine->set_bias(m_train_state->w[(mc_problem.n-bias_n)*num_classes+i]); + machine->set_bias(m_train_state->w[(mc_problem.n-bias_n)*m_num_classes+i]); m_machines.push_back(machine); } diff --git a/src/shogun/multiclass/MulticlassLibLinear.h b/src/shogun/multiclass/MulticlassLibLinear.h index c60128059c7..b59c51c3291 100644 --- a/src/shogun/multiclass/MulticlassLibLinear.h +++ b/src/shogun/multiclass/MulticlassLibLinear.h @@ -45,7 +45,7 @@ class MulticlassLibLinear : public RandomMixin * @param features features * @param labs labels */ - MulticlassLibLinear(float64_t C, std::shared_ptr features, std::shared_ptr labs); + MulticlassLibLinear(float64_t C); /** destructor */ ~MulticlassLibLinear() override; @@ -143,7 +143,7 @@ class MulticlassLibLinear : public RandomMixin protected: /** train machine */ - bool train_machine(std::shared_ptr data = NULL) override; + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) override; /** obtain regularizer (w0) matrix */ virtual SGMatrix obtain_regularizer_matrix() const; diff --git a/src/shogun/multiclass/MulticlassLibSVM.cpp b/src/shogun/multiclass/MulticlassLibSVM.cpp index 57bb16cc304..7a543a3db49 100644 --- a/src/shogun/multiclass/MulticlassLibSVM.cpp +++ b/src/shogun/multiclass/MulticlassLibSVM.cpp @@ -19,8 +19,8 @@ MulticlassLibSVM::MulticlassLibSVM(LIBSVM_SOLVER_TYPE st) { } -MulticlassLibSVM::MulticlassLibSVM(float64_t C, std::shared_ptr k, std::shared_ptr lab) -: MulticlassSVM(std::make_shared(), C, std::move(k), std::move(lab)), solver_type(LIBSVM_C_SVC) +MulticlassLibSVM::MulticlassLibSVM(float64_t C, std::shared_ptr k ) +: MulticlassSVM(std::make_shared(), C, std::move(k) ), solver_type(LIBSVM_C_SVC) { } @@ -36,7 +36,7 @@ void MulticlassLibSVM::register_params() SG_OPTIONS(LIBSVM_C_SVC, LIBSVM_NU_SVC)); } -bool MulticlassLibSVM::train_machine(std::shared_ptr data) +bool MulticlassLibSVM::train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) { svm_problem problem; svm_parameter param; @@ -46,17 +46,17 @@ bool MulticlassLibSVM::train_machine(std::shared_ptr data) problem = svm_problem(); - ASSERT(m_labels && m_labels->get_num_labels()) - ASSERT(m_labels->get_label_type() == LT_MULTICLASS) - init_strategy(); + ASSERT(labs && labs->get_num_labels()) + ASSERT(labs->get_label_type() == LT_MULTICLASS) + init_strategy(labs); int32_t num_classes = m_multiclass_strategy->get_num_classes(); - problem.l=m_labels->get_num_labels(); + problem.l=labs->get_num_labels(); io::info("{} trainlabels, {} classes", problem.l, num_classes); if (data) { - if (m_labels->get_num_labels() != data->get_num_vectors()) + if (labs->get_num_labels() != data->get_num_vectors()) { error("Number of training vectors does not match number of " "labels"); @@ -74,7 +74,7 @@ bool MulticlassLibSVM::train_machine(std::shared_ptr data) for (int32_t i=0; iget_label(i); + problem.y[i]=multiclass_labels(labs)->get_label(i); problem.x[i]=&x_space[2*i]; x_space[2*i].index=i; x_space[2*i+1].index=-1; diff --git a/src/shogun/multiclass/MulticlassLibSVM.h b/src/shogun/multiclass/MulticlassLibSVM.h index 095c561eaf3..1bca4776b75 100644 --- a/src/shogun/multiclass/MulticlassLibSVM.h +++ b/src/shogun/multiclass/MulticlassLibSVM.h @@ -30,7 +30,7 @@ class MulticlassLibSVM : public MulticlassSVM * @param k kernel * @param lab labels */ - MulticlassLibSVM(float64_t C, std::shared_ptr k, std::shared_ptr lab); + MulticlassLibSVM(float64_t C, std::shared_ptr k ); /** destructor */ ~MulticlassLibSVM() override; @@ -53,7 +53,7 @@ class MulticlassLibSVM : public MulticlassSVM * * @return whether training was successful */ - bool train_machine(std::shared_ptr data=NULL) override; + bool train_machine(const std::shared_ptr&, const std::shared_ptr& labs) override; private: void register_params(); diff --git a/src/shogun/multiclass/MulticlassOCAS.cpp b/src/shogun/multiclass/MulticlassOCAS.cpp index d492deda9b2..b519c0b57e5 100644 --- a/src/shogun/multiclass/MulticlassOCAS.cpp +++ b/src/shogun/multiclass/MulticlassOCAS.cpp @@ -40,8 +40,8 @@ MulticlassOCAS::MulticlassOCAS() : set_buf_size(5000); } -MulticlassOCAS::MulticlassOCAS(float64_t C, const std::shared_ptr& train_features, std::shared_ptr train_labels) : - LinearMulticlassMachine(std::make_shared(), train_features->as(), NULL, std::move(train_labels)), m_C(C) +MulticlassOCAS::MulticlassOCAS(float64_t C) : + LinearMulticlassMachine(std::make_shared(), NULL ), m_C(C) { register_parameters(); set_epsilon(1e-2); @@ -65,22 +65,18 @@ MulticlassOCAS::~MulticlassOCAS() { } -bool MulticlassOCAS::train_machine(std::shared_ptr data) +bool MulticlassOCAS::train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) { - if (data) - set_features(data->as()); - ASSERT(m_features) - ASSERT(m_labels) - ASSERT(m_multiclass_strategy) - init_strategy(); - - int32_t num_vectors = m_features->get_num_vectors(); + require(m_multiclass_strategy, "Multiclass strategy not set"); + init_strategy(labs); + auto feats = data->as(); + int32_t num_vectors = feats->get_num_vectors(); int32_t num_classes = m_multiclass_strategy->get_num_classes(); - int32_t num_features = m_features->get_dim_feature_space(); + int32_t num_features = feats->get_dim_feature_space(); float64_t C = m_C; - SGVector labels = multiclass_labels(m_labels)->get_labels(); + SGVector labels = multiclass_labels(labs)->get_labels(); uint32_t nY = num_classes; uint32_t nData = num_vectors; float64_t TolRel = m_epsilon; @@ -91,7 +87,7 @@ bool MulticlassOCAS::train_machine(std::shared_ptr data) uint8_t Method = m_method; mocas_data user_data; - user_data.features = m_features; + user_data.features = feats; user_data.W = SG_CALLOC(float64_t, (int64_t)num_features*num_classes); user_data.oldW = SG_CALLOC(float64_t, (int64_t)num_features*num_classes); user_data.new_a = SG_CALLOC(float64_t, (int64_t)num_features*num_classes); diff --git a/src/shogun/multiclass/MulticlassOCAS.h b/src/shogun/multiclass/MulticlassOCAS.h index 20b63da9e60..21a1e0dfb34 100644 --- a/src/shogun/multiclass/MulticlassOCAS.h +++ b/src/shogun/multiclass/MulticlassOCAS.h @@ -30,7 +30,7 @@ class MulticlassOCAS : public LinearMulticlassMachine * @param features features * @param labs labels */ - MulticlassOCAS(float64_t C, const std::shared_ptr& features, std::shared_ptr labs); + MulticlassOCAS(float64_t C); /** destructor */ ~MulticlassOCAS() override; @@ -109,7 +109,7 @@ class MulticlassOCAS : public LinearMulticlassMachine protected: /** train machine */ - bool train_machine(std::shared_ptr data = NULL) override; + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) override; /** update W */ static float64_t msvm_update_W(float64_t t, void* user_data); diff --git a/src/shogun/multiclass/MulticlassSVM.cpp b/src/shogun/multiclass/MulticlassSVM.cpp index 31cfb63501c..dab92bc0a44 100644 --- a/src/shogun/multiclass/MulticlassSVM.cpp +++ b/src/shogun/multiclass/MulticlassSVM.cpp @@ -19,14 +19,14 @@ MulticlassSVM::MulticlassSVM() } MulticlassSVM::MulticlassSVM(std::shared_ptrstrategy) - :KernelMulticlassMachine(std::move(strategy), NULL, std::make_shared(0), NULL) + :KernelMulticlassMachine(std::move(strategy), NULL, std::make_shared(0) ) { init(); } MulticlassSVM::MulticlassSVM( - std::shared_ptrstrategy, float64_t C, std::shared_ptr k, std::shared_ptr lab) - : KernelMulticlassMachine(std::move(strategy), k, std::make_shared(C, k, lab), lab) + std::shared_ptrstrategy, float64_t C, std::shared_ptr k) + : KernelMulticlassMachine(std::move(strategy), k, std::make_shared(C, k)) { init(); m_C=C; diff --git a/src/shogun/multiclass/MulticlassSVM.h b/src/shogun/multiclass/MulticlassSVM.h index e0053e98e5e..cf2ab2bf357 100644 --- a/src/shogun/multiclass/MulticlassSVM.h +++ b/src/shogun/multiclass/MulticlassSVM.h @@ -44,7 +44,7 @@ class MulticlassSVM : public KernelMulticlassMachine * @param lab labels */ MulticlassSVM( - std::shared_ptrstrategy, float64_t C, std::shared_ptr k, std::shared_ptr lab); + std::shared_ptrstrategy, float64_t C, std::shared_ptr k ); ~MulticlassSVM() override; /** create multiclass SVM. Appends the appropriate number of svm pointer diff --git a/src/shogun/multiclass/QDA.cpp b/src/shogun/multiclass/QDA.cpp index 7b7f672bc2b..ce59a3b6f3d 100644 --- a/src/shogun/multiclass/QDA.cpp +++ b/src/shogun/multiclass/QDA.cpp @@ -36,40 +36,36 @@ QDA::QDA(float64_t tolerance, bool store_covs) m_store_covs = store_covs; } -QDA::QDA(const std::shared_ptr>& traindat, std::shared_ptr trainlab) +QDA::QDA(const std::shared_ptr>& traindat ) : NativeMulticlassMachine(), m_num_classes(0), m_dim(0) { init(); set_features(traindat); - set_labels(std::move(trainlab)); } -QDA::QDA(const std::shared_ptr>& traindat, std::shared_ptr trainlab, float64_t tolerance) +QDA::QDA(const std::shared_ptr>& traindat, float64_t tolerance) : NativeMulticlassMachine(), m_num_classes(0), m_dim(0) { init(); set_features(traindat); - set_labels(std::move(trainlab)); m_tolerance = tolerance; } -QDA::QDA(const std::shared_ptr>& traindat, std::shared_ptr trainlab, bool store_covs) +QDA::QDA(const std::shared_ptr>& traindat, bool store_covs) : NativeMulticlassMachine(), m_num_classes(0), m_dim(0) { init(); set_features(traindat); - set_labels(std::move(trainlab)); m_store_covs = store_covs; } -QDA::QDA(const std::shared_ptr>& traindat, std::shared_ptr trainlab, float64_t tolerance, bool store_covs) +QDA::QDA(const std::shared_ptr>& traindat, float64_t tolerance, bool store_covs) : NativeMulticlassMachine(), m_num_classes(0), m_dim(0) { init(); set_features(traindat); - set_labels(std::move(trainlab)); m_tolerance = tolerance; m_store_covs = store_covs; } @@ -170,9 +166,9 @@ std::shared_ptr QDA::apply_multiclass(std::shared_ptr data) +bool QDA::train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) { - if (!m_labels) + if (!labs) error("No labels allocated in QDA training"); if ( data ) @@ -186,14 +182,14 @@ bool QDA::train_machine(std::shared_ptr data) if (!m_features) error("No features allocated in QDA training"); - SGVector< int32_t > train_labels = multiclass_labels(m_labels)->get_int_labels(); + SGVector< int32_t > train_labels = multiclass_labels(labs)->get_int_labels(); if (!train_labels.vector) error("No train_labels allocated in QDA training"); cleanup(); - m_num_classes = multiclass_labels(m_labels)->get_num_classes(); + m_num_classes = multiclass_labels(labs)->get_num_classes(); m_dim = m_features->get_dim_feature_space(); int32_t num_vec = m_features->get_num_vectors(); diff --git a/src/shogun/multiclass/QDA.h b/src/shogun/multiclass/QDA.h index dcebea2c523..400c2eeb24a 100644 --- a/src/shogun/multiclass/QDA.h +++ b/src/shogun/multiclass/QDA.h @@ -49,7 +49,7 @@ class QDA : public NativeMulticlassMachine * @param traindat training features * @param trainlab labels for training features */ - QDA(const std::shared_ptr>& traindat, std::shared_ptr trainlab); + QDA(const std::shared_ptr>& traindat ); /** constructor * @@ -57,7 +57,7 @@ class QDA : public NativeMulticlassMachine * @param trainlab labels for training features * @param tolerance tolerance used in training */ - QDA(const std::shared_ptr>& traindat, std::shared_ptr trainlab, float64_t tolerance); + QDA(const std::shared_ptr>& traindat, float64_t tolerance); /** constructor * @@ -65,7 +65,7 @@ class QDA : public NativeMulticlassMachine * @param trainlab labels for training features * @param store_covs whether to store the within class covariances */ - QDA(const std::shared_ptr>& traindat, std::shared_ptr trainlab, bool store_covs); + QDA(const std::shared_ptr>& traindat, bool store_covs); /** constructor * @@ -74,7 +74,7 @@ class QDA : public NativeMulticlassMachine * @param tolerance tolerance used in training * @param store_covs whether to store the within class covariances */ - QDA(const std::shared_ptr>& traindat, std::shared_ptr trainlab, float64_t tolerance, bool store_covs); + QDA(const std::shared_ptr>& traindat, float64_t tolerance, bool store_covs); ~QDA() override; @@ -173,7 +173,7 @@ class QDA : public NativeMulticlassMachine * * @return whether training was successful */ - bool train_machine(std::shared_ptr data = NULL) override; + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) override; private: void init(); diff --git a/src/shogun/multiclass/ScatterSVM.cpp b/src/shogun/multiclass/ScatterSVM.cpp index 4a7beb1d916..5183c11401d 100644 --- a/src/shogun/multiclass/ScatterSVM.cpp +++ b/src/shogun/multiclass/ScatterSVM.cpp @@ -32,8 +32,8 @@ ScatterSVM::ScatterSVM(SCATTER_TYPE type) { } -ScatterSVM::ScatterSVM(float64_t C, std::shared_ptr k, std::shared_ptr lab) -: MulticlassSVM(std::make_shared(), C, std::move(k), std::move(lab)), scatter_type(NO_BIAS_LIBSVM), +ScatterSVM::ScatterSVM(float64_t C, std::shared_ptr k ) +: MulticlassSVM(std::make_shared(), C, std::move(k) ), scatter_type(NO_BIAS_LIBSVM), norm_wc(NULL), norm_wc_len(0), norm_wcw(NULL), norm_wcw_len(0), rho(0), m_num_classes(0) { } @@ -69,18 +69,18 @@ void ScatterSVM::register_params() #endif // USE_SVMLIGHT } -bool ScatterSVM::train_machine(std::shared_ptr data) +bool ScatterSVM::train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) { - ASSERT(m_labels && m_labels->get_num_labels()) - ASSERT(m_labels->get_label_type() == LT_MULTICLASS) - init_strategy(); + ASSERT(labs && labs->get_num_labels()) + ASSERT(labs->get_label_type() == LT_MULTICLASS) + init_strategy(labs); m_num_classes = m_multiclass_strategy->get_num_classes(); - int32_t num_vectors = m_labels->get_num_labels(); + int32_t num_vectors = labs->get_num_labels(); if (data) { - if (m_labels->get_num_labels() != data->get_num_vectors()) + if (labs->get_num_labels() != data->get_num_vectors()) error("Number of training vectors does not match number of labels"); m_kernel->init(data, data); } @@ -88,7 +88,7 @@ bool ScatterSVM::train_machine(std::shared_ptr data) int32_t* numc=SG_MALLOC(int32_t, m_num_classes); SGVector::fill_vector(numc, m_num_classes, 0); - auto mc = multiclass_labels(m_labels); + auto mc = multiclass_labels(labs); for (int32_t i=0; iget_int_label(i)]++; @@ -110,12 +110,12 @@ bool ScatterSVM::train_machine(std::shared_ptr data) if (scatter_type==NO_BIAS_LIBSVM) { - result=train_no_bias_libsvm(); + result=train_no_bias_libsvm(labs); } #ifdef USE_SVMLIGHT else if (scatter_type==NO_BIAS_SVMLIGHT) { - result=train_no_bias_svmlight(); + result=train_no_bias_svmlight(labs); } #endif //USE_SVMLIGHT else if (scatter_type==TEST_RULE1 || scatter_type==TEST_RULE2) @@ -128,7 +128,7 @@ bool ScatterSVM::train_machine(std::shared_ptr data) if (get_nu()nu_max) error("nu out of valid range [{} ... {}]", nu_min, nu_max); - result=train_testrule12(); + result=train_testrule12(labs); } else error("Unknown Scatter type"); @@ -136,7 +136,7 @@ bool ScatterSVM::train_machine(std::shared_ptr data) return result; } -bool ScatterSVM::train_no_bias_libsvm() +bool ScatterSVM::train_no_bias_libsvm( const std::shared_ptr& labs) { svm_problem problem; svm_parameter param; @@ -144,7 +144,7 @@ bool ScatterSVM::train_no_bias_libsvm() struct svm_node* x_space; - problem.l=m_labels->get_num_labels(); + problem.l=labs->get_num_labels(); io::info("{} trainlabels", problem.l); problem.y=SG_MALLOC(float64_t, problem.l); @@ -173,7 +173,7 @@ bool ScatterSVM::train_no_bias_libsvm() param.nu = get_nu(); // Nu auto prev_normalizer=m_kernel->get_normalizer(); m_kernel->set_normalizer(std::make_shared( - m_num_classes-1, -1, m_labels, prev_normalizer)); + m_num_classes-1, -1, labs, prev_normalizer)); param.kernel=m_kernel.get(); param.cache_size = m_kernel->get_cache_size(); param.C = 0; @@ -246,11 +246,11 @@ bool ScatterSVM::train_no_bias_libsvm() } #ifdef USE_SVMLIGHT -bool ScatterSVM::train_no_bias_svmlight() +bool ScatterSVM::train_no_bias_svmlight( const std::shared_ptr& labs) { auto prev_normalizer=m_kernel->get_normalizer(); auto n=std::make_shared( - m_num_classes-1, -1, m_labels, prev_normalizer); + m_num_classes-1, -1, labs, prev_normalizer); m_kernel->set_normalizer(n); m_kernel->init_normalizer(); @@ -276,21 +276,21 @@ bool ScatterSVM::train_no_bias_svmlight() } #endif //USE_SVMLIGHT -bool ScatterSVM::train_testrule12() +bool ScatterSVM::train_testrule12( const std::shared_ptr& labs) { svm_problem problem; svm_parameter param; struct svm_model* model = nullptr; struct svm_node* x_space; - problem.l=m_labels->get_num_labels(); + problem.l=labs->get_num_labels(); io::info("{} trainlabels", problem.l); problem.y=SG_MALLOC(float64_t, problem.l); problem.x=SG_MALLOC(struct svm_node*, problem.l); x_space=SG_MALLOC(struct svm_node, 2*problem.l); - auto mc = multiclass_labels(m_labels); + auto mc = multiclass_labels(labs); for (int32_t i=0; iget_label(i); @@ -406,7 +406,7 @@ void ScatterSVM::compute_norm_wc() norm_wc[i] = std::sqrt(norm_wc[i]); } -std::shared_ptr ScatterSVM::classify_one_vs_rest() +std::shared_ptr ScatterSVM::classify_one_vs_rest( const std::shared_ptr& labs) { if (!m_kernel) { @@ -434,7 +434,7 @@ std::shared_ptr ScatterSVM::classify_one_vs_rest() float64_t* outputs=SG_MALLOC(float64_t, num_vectors*m_num_classes); SGVector::fill_vector(outputs,num_vectors*m_num_classes,0.0); - auto mc = multiclass_labels(m_labels); + auto mc = multiclass_labels(labs); for (int32_t i=0; iget_num_support_vectors(); j++) @@ -483,7 +483,7 @@ std::shared_ptr ScatterSVM::classify_one_vs_rest() auto svm = get_svm(i); ASSERT(svm) svm->set_kernel(m_kernel); - svm->set_labels(m_labels); + svm->set_labels(labs); outputs[i]=svm->apply(); } diff --git a/src/shogun/multiclass/ScatterSVM.h b/src/shogun/multiclass/ScatterSVM.h index 0777d2726f1..4e00c1b716c 100644 --- a/src/shogun/multiclass/ScatterSVM.h +++ b/src/shogun/multiclass/ScatterSVM.h @@ -60,7 +60,7 @@ class ScatterSVM : public MulticlassSVM * @param k kernel * @param lab labels */ - ScatterSVM(float64_t C, std::shared_ptr k, std::shared_ptr lab); + ScatterSVM(float64_t C, std::shared_ptr k ); /** default destructor */ ~ScatterSVM() override; @@ -82,7 +82,7 @@ class ScatterSVM : public MulticlassSVM * * @return resulting labels */ - virtual std::shared_ptr classify_one_vs_rest(); + virtual std::shared_ptr classify_one_vs_rest( const std::shared_ptr& labs); /** @return object name */ const char* get_name() const override { return "ScatterSVM"; } @@ -96,15 +96,15 @@ class ScatterSVM : public MulticlassSVM * * @return whether training was successful */ - bool train_machine(std::shared_ptr data=NULL) override; + bool train_machine(const std::shared_ptr&, const std::shared_ptr& labs) override; private: void compute_norm_wc(); - virtual bool train_no_bias_libsvm(); + virtual bool train_no_bias_libsvm( const std::shared_ptr& labs); #ifdef USE_SVMLIGHT - virtual bool train_no_bias_svmlight(); + virtual bool train_no_bias_svmlight( const std::shared_ptr& labs); #endif //USE_SVMLIGHT - virtual bool train_testrule12(); + virtual bool train_testrule12( const std::shared_ptr& labs); void register_params(); diff --git a/src/shogun/multiclass/ShareBoost.cpp b/src/shogun/multiclass/ShareBoost.cpp index ddcab5ada7d..20bba85af30 100644 --- a/src/shogun/multiclass/ShareBoost.cpp +++ b/src/shogun/multiclass/ShareBoost.cpp @@ -23,8 +23,8 @@ ShareBoost::ShareBoost() init_sb_params(); } -ShareBoost::ShareBoost(const std::shared_ptr >&features, const std::shared_ptr&labs, int32_t num_nonzero_feas) - :LinearMulticlassMachine(std::make_shared(), features, NULL, labs), m_nonzero_feas(num_nonzero_feas) +ShareBoost::ShareBoost(const std::shared_ptr&labs, int32_t num_nonzero_feas) + :LinearMulticlassMachine(std::make_shared(), NULL ), m_nonzero_feas(num_nonzero_feas) { init_sb_params(); } @@ -40,18 +40,12 @@ SGVector ShareBoost::get_activeset() return m_activeset; } -bool ShareBoost::train_machine(std::shared_ptr data) +bool ShareBoost::train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) { - if (data) - set_features(data); - auto fea = m_features->as>(); - - if (m_features == NULL) - error("No features given for training"); - if (m_labels == NULL) - error("No labels given for training"); - - init_strategy(); + m_share_boost_labels = labs; + auto fea = data->as>(); + m_features = fea; + init_strategy(labs); if (m_nonzero_feas <= 0) error("Set a valid (> 0) number of non-zero features to seek before training"); @@ -78,13 +72,13 @@ bool ShareBoost::train_machine(std::shared_ptr data) for (auto t : SG_PROGRESS(range(m_nonzero_feas))) { timer->start(); - compute_rho(); - int32_t i_fea = choose_feature(); + compute_rho(labs); + int32_t i_fea = choose_feature(labs); m_activeset.vector[m_activeset.vlen] = i_fea; m_activeset.vlen += 1; float64_t t_choose_feature = timer->cur_time_diff(); timer->start(); - optimize_coefficients(); + optimize_coefficients(labs); float64_t t_optimize = timer->cur_time_diff(); SG_DEBUG(" SB[round {:03d}]: ({:8.4f} + {:8.4f}) sec.", t, @@ -108,8 +102,7 @@ bool ShareBoost::train_machine(std::shared_ptr data) void ShareBoost::compute_pred() { - auto fea = m_features->as>(); - auto subset_fea = std::make_shared>(fea, m_activeset); + auto subset_fea = std::make_shared>(m_features, m_activeset); for (int32_t i=0; i < m_multiclass_strategy->get_num_classes(); ++i) { auto machine = m_machines.at(i)->as(); @@ -136,9 +129,9 @@ void ShareBoost::compute_pred(const float64_t *W) compute_pred(); } -void ShareBoost::compute_rho() +void ShareBoost::compute_rho( const std::shared_ptr& labs) { - auto lab = multiclass_labels(m_labels); + auto lab = multiclass_labels(labs); for (int32_t i=0; i < m_rho.num_rows; ++i) { // i loop classes @@ -160,10 +153,10 @@ void ShareBoost::compute_rho() } } -int32_t ShareBoost::choose_feature() +int32_t ShareBoost::choose_feature( const std::shared_ptr& labs) { SGVector l1norm(m_fea.num_rows); - auto lab = multiclass_labels(m_labels); + auto lab = multiclass_labels(labs); for (int32_t j=0; j < m_fea.num_rows; ++j) { if (std::find(&m_activeset[0], &m_activeset[m_activeset.vlen], j) != @@ -191,16 +184,10 @@ int32_t ShareBoost::choose_feature() return Math::arg_max(l1norm.vector, 1, l1norm.vlen); } -void ShareBoost::optimize_coefficients() +void ShareBoost::optimize_coefficients(const std::shared_ptr& labs) { ShareBoostOptimizer optimizer(shared_from_this()->as(), false); optimizer.optimize(); } -void ShareBoost::set_features(const std::shared_ptr&f) -{ - auto fea = f->as>(); - if (fea == NULL) - error("Require DenseFeatures"); - LinearMulticlassMachine::set_features(fea); -} + diff --git a/src/shogun/multiclass/ShareBoost.h b/src/shogun/multiclass/ShareBoost.h index b202e7e13d8..9b09b787056 100644 --- a/src/shogun/multiclass/ShareBoost.h +++ b/src/shogun/multiclass/ShareBoost.h @@ -32,7 +32,7 @@ class ShareBoost: public LinearMulticlassMachine ShareBoost(); /** constructor */ - ShareBoost(const std::shared_ptr >&features, const std::shared_ptr&labs, int32_t num_nonzero_feas); + ShareBoost(const std::shared_ptr&labs, int32_t num_nonzero_feas); /** destructor */ ~ShareBoost() override {} @@ -46,9 +46,6 @@ class ShareBoost: public LinearMulticlassMachine /** get number of non-zero features the algorithm should seek */ int32_t get_num_nonzero_feas() const { return m_nonzero_feas; } - /** assign features */ - void set_features(const std::shared_ptr&f); - /** get active set */ SGVector get_activeset(); @@ -56,14 +53,14 @@ class ShareBoost: public LinearMulticlassMachine protected: /** train machine */ - bool train_machine(std::shared_ptr data = NULL) override; + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) override; private: void init_sb_params(); ///< init machine parameters - void compute_rho(); ///< compute the rho matrix - int32_t choose_feature(); ///< choose next feature greedily - void optimize_coefficients(); ///< optimize coefficients with gradient descent + void compute_rho( const std::shared_ptr& labs); ///< compute the rho matrix + int32_t choose_feature( const std::shared_ptr& labs); ///< choose next feature greedily + void optimize_coefficients(const std::shared_ptr& labs); ///< optimize coefficients with gradient descent void compute_pred(); ///< compute predictions on training data, according to W in m_machines void compute_pred(const float64_t *W); ///< compute predictions on training data, according to given W @@ -74,6 +71,8 @@ class ShareBoost: public LinearMulticlassMachine SGMatrix m_rho; ///< cache_matrix for rho SGVector m_rho_norm; ///< column sum of m_rho SGMatrix m_pred; ///< predictions, used in training + std::shared_ptr> m_features; + std::shared_ptr m_share_boost_labels; }; } /* shogun */ diff --git a/src/shogun/multiclass/ShareBoostOptimizer.cpp b/src/shogun/multiclass/ShareBoostOptimizer.cpp index 144a1e800f9..9607055b80e 100644 --- a/src/shogun/multiclass/ShareBoostOptimizer.cpp +++ b/src/shogun/multiclass/ShareBoostOptimizer.cpp @@ -45,13 +45,13 @@ float64_t ShareBoostOptimizer::lbfgs_evaluate(void *userdata, const float64_t *W ShareBoostOptimizer *optimizer = static_cast(userdata); optimizer->m_sb->compute_pred(W); - optimizer->m_sb->compute_rho(); + optimizer->m_sb->compute_rho(optimizer->m_sb->m_share_boost_labels); int32_t m = optimizer->m_sb->m_activeset.vlen; int32_t k = optimizer->m_sb->m_multiclass_strategy->get_num_classes(); SGMatrix fea = optimizer->m_sb->m_fea; - auto lab = multiclass_labels(optimizer->m_sb->m_labels); + auto lab = multiclass_labels(optimizer->m_sb->m_share_boost_labels); // compute gradient for (int32_t i=0; i < m; ++i) diff --git a/src/shogun/multiclass/ShareBoostOptimizer.h b/src/shogun/multiclass/ShareBoostOptimizer.h index 29b09e68a8f..ab8bf1b4f2b 100644 --- a/src/shogun/multiclass/ShareBoostOptimizer.h +++ b/src/shogun/multiclass/ShareBoostOptimizer.h @@ -20,7 +20,7 @@ class ShareBoostOptimizer public: /** constructor */ ShareBoostOptimizer(std::shared_ptrsb, bool verbose=false) - :m_sb(sb), m_verbose(verbose) { } + :m_sb(sb), m_verbose(verbose){} /** destructor */ ~ShareBoostOptimizer() { } diff --git a/src/shogun/multiclass/tree/C45ClassifierTree.cpp b/src/shogun/multiclass/tree/C45ClassifierTree.cpp index 146ea836b7a..ce285a7cef3 100644 --- a/src/shogun/multiclass/tree/C45ClassifierTree.cpp +++ b/src/shogun/multiclass/tree/C45ClassifierTree.cpp @@ -105,7 +105,8 @@ void C45ClassifierTree::clear_feature_types() m_types_set=false; } -bool C45ClassifierTree::train_machine(std::shared_ptr data) +bool C45ClassifierTree::train_machine(const std::shared_ptr& data, + const std::shared_ptr& labs) { require(data,"Data required for training"); require(data->get_feature_class()==C_DENSE,"Dense data required for training"); @@ -140,7 +141,7 @@ bool C45ClassifierTree::train_machine(std::shared_ptr data) SGVector feature_ids(num_features); feature_ids.range_fill(); - set_root(C45train(data, m_weights, multiclass_labels(m_labels), feature_ids, 0)); + set_root(C45train(data, m_weights, multiclass_labels(labs), feature_ids, 0)); if (m_root) { compute_feature_importance(num_features, m_root); diff --git a/src/shogun/multiclass/tree/C45ClassifierTree.h b/src/shogun/multiclass/tree/C45ClassifierTree.h index 22353ff5ddd..c3e32d0341d 100644 --- a/src/shogun/multiclass/tree/C45ClassifierTree.h +++ b/src/shogun/multiclass/tree/C45ClassifierTree.h @@ -157,7 +157,7 @@ class C45ClassifierTree : public FeatureImportanceTree /** train machine - build C4.5 Tree from training data * @param data training data */ - bool train_machine(std::shared_ptr data=NULL) override; + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) override; private: diff --git a/src/shogun/multiclass/tree/CARTree.cpp b/src/shogun/multiclass/tree/CARTree.cpp index 217081ed3cc..780f4b1edc6 100644 --- a/src/shogun/multiclass/tree/CARTree.cpp +++ b/src/shogun/multiclass/tree/CARTree.cpp @@ -75,17 +75,6 @@ CARTree::~CARTree() { } -void CARTree::set_labels(std::shared_ptr lab) -{ - if (lab->get_label_type()==LT_MULTICLASS) - set_machine_problem_type(PT_MULTICLASS); - else if (lab->get_label_type()==LT_REGRESSION) - set_machine_problem_type(PT_REGRESSION); - else - error("label type supplied is not supported"); - - m_labels=lab; -} void CARTree::set_machine_problem_type(EProblemType mode) { @@ -255,11 +244,11 @@ bool CARTree::weights_set() return m_weights.size() != 0; } -bool CARTree::train_machine(std::shared_ptr data) +bool CARTree::train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) { require(data,"Data required for training"); require(data->get_feature_class()==C_DENSE,"Dense data required for training"); - + set_machine_problem_type(labs); auto dense_features = data->as>(); auto num_features = dense_features->get_num_features(); auto num_vectors = dense_features->get_num_vectors(); @@ -292,12 +281,12 @@ bool CARTree::train_machine(std::shared_ptr data) linalg::set_const(m_nominal, false); } - auto dense_labels = m_labels->as(); + auto dense_labels = labs->as(); set_root(CARTtrain(dense_features,m_weights,dense_labels,0)); if (m_apply_cv_pruning) { - prune_by_cross_validation(dense_features,m_folds); + prune_by_cross_validation(dense_features, labs, m_folds); } // compute feature importances and normalize it if (m_root) @@ -1223,7 +1212,7 @@ std::shared_ptr CARTree::apply_from_current_node(const std::shared_ptr>& data, int32_t folds) +void CARTree::prune_by_cross_validation(const std::shared_ptr>& data, const std::shared_ptr& labs, int32_t folds) { auto num_vecs=data->get_num_vectors(); @@ -1254,7 +1243,7 @@ void CARTree::prune_by_cross_validation(const std::shared_ptr subset(train_indices.data(),train_indices.size(),false); - auto dense_labels = m_labels->as(); + auto dense_labels = labs->as(); auto feats_train = view(data, subset); auto labels_train = view(dense_labels, subset); SGVector subset_weights(train_indices.size()); diff --git a/src/shogun/multiclass/tree/CARTree.h b/src/shogun/multiclass/tree/CARTree.h index 7a90eb6e259..4c341176da5 100644 --- a/src/shogun/multiclass/tree/CARTree.h +++ b/src/shogun/multiclass/tree/CARTree.h @@ -105,11 +105,6 @@ class CARTree : public RandomMixin> /** destructor */ ~CARTree() override; - /** set labels - automagically switch machine problem type based on type of labels supplied - * @param lab labels - */ - void set_labels(std::shared_ptr lab) override; - /** get name * @return class name CARTree */ @@ -248,7 +243,7 @@ class CARTree : public RandomMixin> * @param data training data * @return true */ - bool train_machine(std::shared_ptr data=NULL) override; + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) override; /** CARTtrain - recursive CART training method * @@ -387,7 +382,7 @@ class CARTree : public RandomMixin> * @param data training data * @param folds the integer V for V-fold cross validation */ - void prune_by_cross_validation(const std::shared_ptr>& data, int32_t folds); + void prune_by_cross_validation(const std::shared_ptr>& data, const std::shared_ptr& labs, int32_t folds); /** computes error in classification/regression * for classification it eveluates weight_missclassified/total_weight @@ -429,6 +424,16 @@ class CARTree : public RandomMixin> /** initializes members of class */ void init(); + + void set_machine_problem_type(const std::shared_ptr& labs) + { + if (labs->get_label_type()==LT_MULTICLASS) + set_machine_problem_type(PT_MULTICLASS); + else if (labs->get_label_type()==LT_REGRESSION) + set_machine_problem_type(PT_REGRESSION); + else + error("label type supplied is not supported"); + } public: /** denotes that a feature in a vector is missing MISSING = NOT_A_NUMBER */ static const float64_t MISSING; diff --git a/src/shogun/multiclass/tree/CHAIDTree.cpp b/src/shogun/multiclass/tree/CHAIDTree.cpp index b3973647b6c..7e041b4a09f 100644 --- a/src/shogun/multiclass/tree/CHAIDTree.cpp +++ b/src/shogun/multiclass/tree/CHAIDTree.cpp @@ -152,7 +152,7 @@ void CHAIDTree::set_dependent_vartype(int32_t var) m_dependent_vartype=var; } -bool CHAIDTree::train_machine(std::shared_ptr data) +bool CHAIDTree::train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) { require(data, "Data required for training"); @@ -188,7 +188,7 @@ bool CHAIDTree::train_machine(std::shared_ptr data) } } - set_root(CHAIDtrain(data,m_weights,m_labels,0)); + set_root(CHAIDtrain(data,m_weights,labs,0)); // restore feature types if (updated) diff --git a/src/shogun/multiclass/tree/CHAIDTree.h b/src/shogun/multiclass/tree/CHAIDTree.h index 2492cc981bd..d53352ae426 100644 --- a/src/shogun/multiclass/tree/CHAIDTree.h +++ b/src/shogun/multiclass/tree/CHAIDTree.h @@ -231,7 +231,7 @@ class CHAIDTree : public TreeMachine * @param data training data * @return true */ - bool train_machine(std::shared_ptr data=NULL) override; + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) override; private: /** CHAIDtrain - recursive CHAID training method diff --git a/src/shogun/multiclass/tree/ID3ClassifierTree.cpp b/src/shogun/multiclass/tree/ID3ClassifierTree.cpp index 715cf51e4e3..d5866bdf545 100644 --- a/src/shogun/multiclass/tree/ID3ClassifierTree.cpp +++ b/src/shogun/multiclass/tree/ID3ClassifierTree.cpp @@ -68,7 +68,7 @@ bool ID3ClassifierTree::prune_tree(std::shared_ptr> val return true; } -bool ID3ClassifierTree::train_machine(std::shared_ptr data) +bool ID3ClassifierTree::train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) { require(data,"Data required for training"); require(data->get_feature_class()==C_DENSE, "Dense data required for training"); @@ -77,7 +77,7 @@ bool ID3ClassifierTree::train_machine(std::shared_ptr data) SGVector feature_ids = SGVector(num_features); feature_ids.range_fill(); - set_root(id3train(data, multiclass_labels(m_labels), feature_ids, 0)); + set_root(id3train(data, multiclass_labels(labs), feature_ids, 0)); if (m_root) { diff --git a/src/shogun/multiclass/tree/ID3ClassifierTree.h b/src/shogun/multiclass/tree/ID3ClassifierTree.h index f601ba239dc..595301ebdde 100644 --- a/src/shogun/multiclass/tree/ID3ClassifierTree.h +++ b/src/shogun/multiclass/tree/ID3ClassifierTree.h @@ -120,7 +120,7 @@ class ID3ClassifierTree : public FeatureImportanceTree /** train machine - build ID3 Tree from training data * @param data training data */ - bool train_machine(std::shared_ptr data=NULL) override; + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) override; private: diff --git a/src/shogun/multiclass/tree/RelaxedTree.cpp b/src/shogun/multiclass/tree/RelaxedTree.cpp index 70e3a537aa6..425e5796940 100644 --- a/src/shogun/multiclass/tree/RelaxedTree.cpp +++ b/src/shogun/multiclass/tree/RelaxedTree.cpp @@ -21,7 +21,7 @@ using namespace shogun; RelaxedTree::RelaxedTree() :m_max_num_iter(3), m_A(0.5), m_B(5), m_svm_C(1), m_svm_epsilon(0.001), - m_kernel(NULL), m_feats(NULL), m_machine_for_confusion_matrix(NULL), m_num_classes(0) + m_kernel(NULL), m_machine_for_confusion_matrix(NULL), m_num_classes(0) { SG_ADD(&m_max_num_iter, "m_max_num_iter", "max number of iterations in alternating optimization"); SG_ADD(&m_svm_C, "m_svm_C", "C for svm", ParameterProperties::HYPER); @@ -36,22 +36,17 @@ RelaxedTree::~RelaxedTree() std::shared_ptr RelaxedTree::apply_multiclass(std::shared_ptr data) { - if (data != NULL) - { - auto feats = data->as>(); - set_features(feats); - } - + auto feats = data->as>(); // init kernels for all sub-machines for (auto m: m_machines) { auto machine = m->as(); auto kernel = machine->get_kernel(); auto lhs = kernel->get_lhs(); - kernel->init(lhs, m_feats); + kernel->init(lhs, feats); } - auto lab = std::make_shared(m_feats->get_num_vectors()); + auto lab = std::make_shared(feats->get_num_vectors()); for (int32_t i=0; i < lab->get_num_labels(); ++i) { @@ -115,31 +110,23 @@ float64_t RelaxedTree::apply_one(int32_t idx) return klass; } -bool RelaxedTree::train_machine(std::shared_ptr data) +bool RelaxedTree::train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) { - if (m_machine_for_confusion_matrix == NULL) - error("Call set_machine_for_confusion_matrix before training"); - if (m_kernel == NULL) - error("assign a valid kernel before training"); - - if (data) - { - set_features(data->template as>()); - } - - auto lab = multiclass_labels(m_labels); - + require(m_machine_for_confusion_matrix, + "Call set_machine_for_confusion_matrix before training"); + auto lab = multiclass_labels(labs); + m_num_classes = lab->get_num_classes(); RelaxedTreeUtil util; SGMatrix conf_mat = util.estimate_confusion_matrix( m_machine_for_confusion_matrix->as(), - m_feats, lab, m_num_classes); + data, lab, m_num_classes); // train root SGVector classes(m_num_classes); classes.range_fill(); - m_root = train_node(conf_mat, classes); + m_root = train_node(conf_mat, classes, data, labs); std::queue> node_q; node_q.push(m_root->as()); @@ -163,7 +150,7 @@ bool RelaxedTree::train_machine(std::shared_ptr data) if (left_classes.vlen >= 2) { - auto left_node = train_node(conf_mat, left_classes); + auto left_node = train_node(conf_mat, left_classes, data, labs); node->left(left_node); node_q.push(left_node); } @@ -182,7 +169,7 @@ bool RelaxedTree::train_machine(std::shared_ptr data) if (right_classes.vlen >= 2) { - auto right_node = train_node(conf_mat, right_classes); + auto right_node = train_node(conf_mat, right_classes, data, labs); node->right(right_node); node_q.push(right_node); } @@ -193,7 +180,8 @@ bool RelaxedTree::train_machine(std::shared_ptr data) return true; } -std::shared_ptr RelaxedTree::train_node(const SGMatrix &conf_mat, SGVector classes) +std::shared_ptr RelaxedTree::train_node(const SGMatrix &conf_mat, + SGVector classes, const std::shared_ptr& data, const std::shared_ptr& labs) { SGVector best_mu; std::shared_ptr best_svm = NULL; @@ -204,7 +192,7 @@ std::shared_ptr RelaxedTree::train_node(const SGMatrix(); - SGVector mu = train_node_with_initialization(*it, classes, svm); + SGVector mu = train_node_with_initialization(*it, classes, svm, data, labs); float64_t score = compute_score(mu, svm); if (score < best_score) @@ -255,7 +243,8 @@ float64_t RelaxedTree::compute_score(SGVector mu, const std::shared_ptr return score; } -SGVector RelaxedTree::train_node_with_initialization(const RelaxedTree::entry_t &mu_entry, SGVector classes, const std::shared_ptr&svm) +SGVector RelaxedTree::train_node_with_initialization(const RelaxedTree::entry_t &mu_entry, SGVector classes, + const std::shared_ptr&svm, const std::shared_ptr& data, const std::shared_ptr& labels) { SGVector mu(classes.vlen), prev_mu(classes.vlen); mu.zero(); @@ -266,7 +255,7 @@ SGVector RelaxedTree::train_node_with_initialization(const RelaxedTree: svm->set_C(m_svm_C, m_svm_C); svm->set_epsilon(m_svm_epsilon); - auto labs = multiclass_labels(m_labels); + auto labs = multiclass_labels(labels); for (int32_t iiter=0; iiter < m_max_num_iter; ++iiter) { long_mu.zero(); @@ -278,8 +267,8 @@ SGVector RelaxedTree::train_node_with_initialization(const RelaxedTree: long_mu[classes[i]] = -1; } - SGVector subset(m_feats->get_num_vectors()); - SGVector binlab(m_feats->get_num_vectors()); + SGVector subset(data->get_num_vectors()); + SGVector binlab(data->get_num_vectors()); int32_t k=0; for (int32_t i=0; i < binlab.vlen; ++i) @@ -293,7 +282,7 @@ SGVector RelaxedTree::train_node_with_initialization(const RelaxedTree: subset.vlen = k; auto binary_labels = std::make_shared(binlab); - auto feats_train = view(m_feats, subset); + auto feats_train = view(data, subset); auto labels_train = view(binary_labels, subset); auto kernel = make_clone(m_kernel, ParameterProperties::ALL^ParameterProperties::MODEL); @@ -305,7 +294,7 @@ SGVector RelaxedTree::train_node_with_initialization(const RelaxedTree: std::copy(&mu[0], &mu[mu.vlen], &prev_mu[0]); - mu = color_label_space(svm, classes); + mu = color_label_space(svm, classes, data, labs); bool bbreak = true; for (int32_t i=0; i < mu.vlen; ++i) @@ -369,12 +358,12 @@ std::vector RelaxedTree::init_node(const SGMatrix(entries.begin(), entries.begin() + n_samples); } -SGVector RelaxedTree::color_label_space(std::shared_ptrsvm, SGVector classes) +SGVector RelaxedTree::color_label_space(std::shared_ptrsvm, SGVector classes, const std::shared_ptr& data, const std::shared_ptr& labs) { SGVector mu(classes.vlen); - auto labels = multiclass_labels(m_labels); + auto labels = multiclass_labels(labs); - SGVector resp = eval_binary_model_K(std::move(svm)); + SGVector resp = eval_binary_model_K(std::move(svm), data); ASSERT(resp.vlen == labels->get_num_labels()) SGVector xi_pos_class(classes.vlen), xi_neg_class(classes.vlen); @@ -871,9 +860,9 @@ void RelaxedTree::enforce_balance_constraints_lower(SGVector &mu, SGVec } } -SGVector RelaxedTree::eval_binary_model_K(const std::shared_ptr&svm) +SGVector RelaxedTree::eval_binary_model_K(const std::shared_ptr&svm, const std::shared_ptr& data) { - auto lab = svm->apply_regression(m_feats); + auto lab = svm->apply_regression(data); SGVector resp(lab->get_num_labels()); for (int32_t i=0; i < resp.vlen; ++i) resp[i] = lab->get_label(i) - m_A/m_svm_C; diff --git a/src/shogun/multiclass/tree/RelaxedTree.h b/src/shogun/multiclass/tree/RelaxedTree.h index 1e82fa6f920..12e26b8ae20 100644 --- a/src/shogun/multiclass/tree/RelaxedTree.h +++ b/src/shogun/multiclass/tree/RelaxedTree.h @@ -45,14 +45,6 @@ class RelaxedTree: public TreeMachine /** apply machine to data in means of multiclass classification problem */ std::shared_ptr apply_multiclass(std::shared_ptr data=NULL) override; - /** set features - * @param feats features - */ - void set_features(std::shared_ptr >feats) - { - m_feats = std::move(feats); - } - /** set kernel * @param kernel the kernel to be used */ @@ -61,19 +53,6 @@ class RelaxedTree: public TreeMachine m_kernel = std::move(kernel); } - /** set labels - * - * @param lab labels - */ - void set_labels(std::shared_ptr lab) override - { - auto mlab = multiclass_labels(lab); - require(lab, "requires MulticlassLabes"); - - Machine::set_labels(mlab); - m_num_classes = mlab->get_num_classes(); - } - /** set machine for confusion matrix * @param machine the multiclass machine for initializing the confusion matrix */ @@ -162,20 +141,6 @@ class RelaxedTree: public TreeMachine return m_max_num_iter; } - /** train machine - * - * @param data training data (parameter can be avoided if distance or - * kernel-based classifiers are used and distance/kernels are - * initialized with train data). - * If flag is set, model features will be stored after training. - * - * @return whether training was successful - */ - bool train(std::shared_ptr data=NULL) override - { - return Machine::train(data); - } - /** entry type */ typedef std::pair, float64_t> entry_t; protected: @@ -193,21 +158,24 @@ class RelaxedTree: public TreeMachine * * @return whether training was successful */ - bool train_machine(std::shared_ptr data) override; + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) override; /** train node */ - std::shared_ptr train_node(const SGMatrix &conf_mat, SGVector classes); + std::shared_ptr train_node(const SGMatrix &conf_mat, SGVector classes, + const std::shared_ptr&, const std::shared_ptr&); /** init node */ std::vector init_node(const SGMatrix &global_conf_mat, SGVector classes); /** train node with initialization */ - SGVector train_node_with_initialization(const RelaxedTree::entry_t &mu_entry, SGVector classes, const std::shared_ptr&svm); + SGVector train_node_with_initialization(const RelaxedTree::entry_t &mu_entry, SGVector classes, + const std::shared_ptr&svm, const std::shared_ptr&, const std::shared_ptr&); /** compute score */ float64_t compute_score(SGVector mu, const std::shared_ptr&svm); /** color label space */ - SGVector color_label_space(std::shared_ptrsvm, SGVector classes); + SGVector color_label_space(std::shared_ptrsvm, SGVector classes, + const std::shared_ptr&, const std::shared_ptr&); /** evaluate binary model K */ - SGVector eval_binary_model_K(const std::shared_ptr&svm); + SGVector eval_binary_model_K(const std::shared_ptr&svm, const std::shared_ptr& data); /** enforce balance constraints upper */ void enforce_balance_constraints_upper(SGVector &mu, SGVector &delta_neg, SGVector &delta_pos, int32_t B_prime, SGVector& xi_neg_class); diff --git a/src/shogun/multiclass/tree/RelaxedTreeUtil.cpp b/src/shogun/multiclass/tree/RelaxedTreeUtil.cpp index 3335926ca43..d53875393da 100644 --- a/src/shogun/multiclass/tree/RelaxedTreeUtil.cpp +++ b/src/shogun/multiclass/tree/RelaxedTreeUtil.cpp @@ -24,8 +24,7 @@ SGMatrix RelaxedTreeUtil::estimate_confusion_matrix(const std::shared { // subset for training SGVector inverse_subset_indices = split->generate_subset_inverse(i); - machine->set_labels(view(Y, inverse_subset_indices)); - machine->train(view(X, inverse_subset_indices)); + machine->train(view(X, inverse_subset_indices), view(Y, inverse_subset_indices)); // subset for predicting SGVector subset_indices = split->generate_subset_indices(i); diff --git a/src/shogun/neuralnets/NeuralNetwork.cpp b/src/shogun/neuralnets/NeuralNetwork.cpp index 80438378cf5..5a9784f7dbd 100644 --- a/src/shogun/neuralnets/NeuralNetwork.cpp +++ b/src/shogun/neuralnets/NeuralNetwork.cpp @@ -231,8 +231,14 @@ std::shared_ptr> NeuralNetwork::transform( return std::make_shared>(output_activations); } -bool NeuralNetwork::train_machine(std::shared_ptr data) +bool NeuralNetwork::train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) { + if (labs->get_label_type() == LT_BINARY) + m_problem_type = PT_BINARY; + else if (labs->get_label_type() == LT_REGRESSION) + m_problem_type = PT_REGRESSION; + else + m_problem_type = PT_MULTICLASS; if (m_auto_quick_initialize) { quick_connect(); @@ -243,7 +249,7 @@ bool NeuralNetwork::train_machine(std::shared_ptr data) "Maximum number of epochs ({}) must be >= 0", m_max_num_epochs); SGMatrix inputs = features_to_matrix(data); - SGMatrix targets = labels_to_matrix(m_labels); + SGMatrix targets = labels_to_matrix(labs); for (int32_t i=0; i NeuralNetwork::labels_to_matrix(const std::shared_ptrget_label_type() == LT_BINARY) - return PT_BINARY; - else if (m_labels->get_label_type() == LT_REGRESSION) - return PT_REGRESSION; - else return PT_MULTICLASS; + return m_problem_type; } bool NeuralNetwork::is_label_valid(std::shared_ptr lab) const @@ -703,21 +700,6 @@ bool NeuralNetwork::is_label_valid(std::shared_ptr lab) const lab->get_label_type() == LT_REGRESSION); } -void NeuralNetwork::set_labels(std::shared_ptr lab) -{ - if (lab->get_label_type() == LT_BINARY) - { - require(get_num_outputs() <= 2, "Cannot use {} in a neural network " - "with more that 2 output neurons", lab->get_name()); - } - else if (lab->get_label_type() == LT_REGRESSION) - { - require(get_num_outputs() == 1, "Cannot use {} in a neural network " - "with more that 1 output neuron", lab->get_name()); - } - - Machine::set_labels(lab); -} SGVector* NeuralNetwork::get_layer_parameters(int32_t i) const { diff --git a/src/shogun/neuralnets/NeuralNetwork.h b/src/shogun/neuralnets/NeuralNetwork.h index 1a929337b5e..ab275f8c19d 100644 --- a/src/shogun/neuralnets/NeuralNetwork.h +++ b/src/shogun/neuralnets/NeuralNetwork.h @@ -180,12 +180,6 @@ friend class DeepBeliefNetwork; virtual std::shared_ptr> transform( std::shared_ptr> data); - /** set labels - * - * @param lab labels - */ - void set_labels(std::shared_ptr lab) override; - /** get classifier type * * @return classifier type CT_NEURALNETWORK @@ -469,7 +463,7 @@ friend class DeepBeliefNetwork; protected: /** trains the network */ - bool train_machine(std::shared_ptr data=NULL) override; + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) override; /** trains the network using gradient descent*/ virtual bool train_gradient_descent(SGMatrix inputs, @@ -737,6 +731,8 @@ friend class DeepBeliefNetwork; */ const SGMatrix* m_lbfgs_temp_inputs; const SGMatrix* m_lbfgs_temp_targets; + + EProblemType m_problem_type; }; } diff --git a/src/shogun/regression/GaussianProcessRegression.cpp b/src/shogun/regression/GaussianProcessRegression.cpp index 33495b00ebb..e2b56851b38 100644 --- a/src/shogun/regression/GaussianProcessRegression.cpp +++ b/src/shogun/regression/GaussianProcessRegression.cpp @@ -37,37 +37,9 @@ std::shared_ptr GaussianProcessRegression::apply_regression(st require(m_method->supports_regression(), "{} with {} doesn't support " "regression", m_method->get_name(), lik->get_name()); - - std::shared_ptr result; - - // if regression data equals to NULL, then apply regression on training - // features - if (!data) - { - std::shared_ptr feat; - - // use inducing features for FITC inference method - if (m_method->get_inference_type()==INF_FITC_REGRESSION) - { - auto fitc_method = m_method->as(); - feat=fitc_method->get_inducing_features(); - } - else - feat=m_method->get_features(); - - result=std::make_shared(get_mean_vector(feat)); - if (m_compute_variance) - result->put("current_values", get_variance_vector(feat)); - - - } - else - { - result=std::make_shared(get_mean_vector(data)); - if (m_compute_variance) - result->put("current_values", get_variance_vector(data)); - } - + auto result=std::make_shared(get_mean_vector(data)); + if (m_compute_variance) + result->set_values(get_variance_vector(data)); return result; } diff --git a/src/shogun/regression/KRRNystrom.cpp b/src/shogun/regression/KRRNystrom.cpp index 4a4ba94f3c6..43a1c9102e1 100644 --- a/src/shogun/regression/KRRNystrom.cpp +++ b/src/shogun/regression/KRRNystrom.cpp @@ -45,8 +45,8 @@ KRRNystrom::KRRNystrom() : RandomMixin() init(); } -KRRNystrom::KRRNystrom(float64_t tau, int32_t m, std::shared_ptr k, std::shared_ptr lab) -: RandomMixin(tau, std::move(k), std::move(lab)) +KRRNystrom::KRRNystrom(float64_t tau, int32_t m, std::shared_ptr k) +: RandomMixin(tau, std::move(k)) { init(); diff --git a/src/shogun/regression/KRRNystrom.h b/src/shogun/regression/KRRNystrom.h index 07fec24d201..d5e4784b241 100644 --- a/src/shogun/regression/KRRNystrom.h +++ b/src/shogun/regression/KRRNystrom.h @@ -77,7 +77,7 @@ class KRRNystrom : public RandomMixin * @param k kernel * @param lab labels */ - KRRNystrom(float64_t tau, int32_t m, std::shared_ptr k, std::shared_ptr lab); + KRRNystrom(float64_t tau, int32_t m, std::shared_ptr k); /** Default destructor */ ~KRRNystrom() override {} @@ -100,12 +100,12 @@ less than number of data points ({})", m_num_rkhs_basis, n); }; - bool train_machine(std::shared_ptrdata) override; - /** @return object name */ const char* get_name() const override { return "KRRNystrom"; } protected: + bool train_machine(std::shared_ptrdata) override; + /** Train regression using the Nyström method. * * @return boolean to indicate success diff --git a/src/shogun/regression/KernelRidgeRegression.cpp b/src/shogun/regression/KernelRidgeRegression.cpp index ea1768f51ec..118705629bf 100644 --- a/src/shogun/regression/KernelRidgeRegression.cpp +++ b/src/shogun/regression/KernelRidgeRegression.cpp @@ -23,13 +23,13 @@ KernelRidgeRegression::KernelRidgeRegression() init(); } -KernelRidgeRegression::KernelRidgeRegression(float64_t tau, std::shared_ptr k, std::shared_ptr lab) -: KernelMachine() +KernelRidgeRegression::KernelRidgeRegression( + float64_t tau, std::shared_ptr k) + : KernelMachine() { init(); set_tau(tau); - set_labels(std::move(lab)); set_kernel(std::move(k)); } @@ -64,23 +64,10 @@ bool KernelRidgeRegression::solve_krr_system() return true; } -bool KernelRidgeRegression::train_machine(std::shared_ptrdata) +bool KernelRidgeRegression::train_machine(std::shared_ptr data) { - require(m_labels, "No labels set"); - - if (data) - { - if (m_labels->get_num_labels() != data->get_num_vectors()) - error("Number of training vectors does not match number of labels"); - kernel->init(data, data); - } - ASSERT(kernel && kernel->has_features()) - - if (m_labels->get_num_labels() != kernel->get_num_vec_rhs()) - { - error("Number of labels does not match number of kernel" - " columns (num_labels={} cols={}", m_labels->get_num_labels(), kernel->get_num_vec_rhs()); - } + require(kernel, "Kernel not set"); + kernel->init(data, data); // allocate alpha vector set_alphas(SGVector(m_labels->get_num_labels())); diff --git a/src/shogun/regression/KernelRidgeRegression.h b/src/shogun/regression/KernelRidgeRegression.h index 9d4b6b04030..3115d4622fa 100644 --- a/src/shogun/regression/KernelRidgeRegression.h +++ b/src/shogun/regression/KernelRidgeRegression.h @@ -61,7 +61,7 @@ class KernelRidgeRegression : public KernelMachine * @param k kernel * @param lab labels */ - KernelRidgeRegression(float64_t tau, std::shared_ptr k, std::shared_ptr lab); + KernelRidgeRegression(float64_t tau, std::shared_ptr k); /** default destructor */ ~KernelRidgeRegression() override {} diff --git a/src/shogun/regression/LeastAngleRegression.cpp b/src/shogun/regression/LeastAngleRegression.cpp index 08cbb46a328..f75d94620aa 100644 --- a/src/shogun/regression/LeastAngleRegression.cpp +++ b/src/shogun/regression/LeastAngleRegression.cpp @@ -104,7 +104,9 @@ void LeastAngleRegression::plane_rot(ST x0, ST x1, } template -bool LeastAngleRegression::train_machine_templated(const std::shared_ptr>& data) +bool LeastAngleRegression::train_machine_templated( + const std::shared_ptr>& data, + const std::shared_ptr& labs) { std::vector> m_beta_path_t; @@ -122,7 +124,7 @@ bool LeastAngleRegression::train_machine_templated(const std::shared_ptr y = regression_labels(m_labels)->template get_labels_t(); + SGVector y = regression_labels(labs)->template get_labels_t(); typename SGVector::EigenVectorXtMap map_y(y.vector, y.size()); // transpose(X) is more convenient to work with since we care @@ -429,9 +431,15 @@ SGMatrix LeastAngleRegression::cholesky_delete(SGMatrix& R, int32_t i_ki return nR; } -template bool LeastAngleRegression::train_machine_templated(const std::shared_ptr>& data); -template bool LeastAngleRegression::train_machine_templated(const std::shared_ptr>& data); -template bool LeastAngleRegression::train_machine_templated(const std::shared_ptr>& data); +template bool LeastAngleRegression::train_machine_templated( + const std::shared_ptr>& data, + const std::shared_ptr& labs); +template bool LeastAngleRegression::train_machine_templated( + const std::shared_ptr>& data, + const std::shared_ptr& labs); +template bool LeastAngleRegression::train_machine_templated( + const std::shared_ptr>& data, + const std::shared_ptr& labs); template SGMatrix LeastAngleRegression::cholesky_insert(const SGMatrix& X, const SGMatrix& X_active, SGMatrix& R, int32_t i_max_corr, int32_t num_active); template SGMatrix LeastAngleRegression::cholesky_insert(const SGMatrix& X, const SGMatrix& X_active, SGMatrix& R, int32_t i_max_corr, int32_t num_active); template SGMatrix LeastAngleRegression::cholesky_insert(const SGMatrix& X, const SGMatrix& X_active, SGMatrix& R, int32_t i_max_corr, int32_t num_active); diff --git a/src/shogun/regression/LeastAngleRegression.h b/src/shogun/regression/LeastAngleRegression.h index 85b049b3355..96f99d7d620 100644 --- a/src/shogun/regression/LeastAngleRegression.h +++ b/src/shogun/regression/LeastAngleRegression.h @@ -171,9 +171,12 @@ class LeastAngleRegression: public DenseRealDispatch::value>> - bool train_machine_templated(const std::shared_ptr>& data); + template < + typename ST, typename U = typename std::enable_if_t< + std::is_floating_point::value>> + bool train_machine_templated( + const std::shared_ptr>& data, + const std::shared_ptr& labs); private: /** Initialize and register parameters */ diff --git a/src/shogun/regression/LeastSquaresRegression.cpp b/src/shogun/regression/LeastSquaresRegression.cpp index b1c660de11d..d0edda066ba 100644 --- a/src/shogun/regression/LeastSquaresRegression.cpp +++ b/src/shogun/regression/LeastSquaresRegression.cpp @@ -22,8 +22,5 @@ LeastSquaresRegression::LeastSquaresRegression() m_tau=0; } -LeastSquaresRegression::LeastSquaresRegression(std::shared_ptr> data, std::shared_ptr lab) -: LinearRidgeRegression(0, std::move(data), std::move(lab)) -{ -} + #endif diff --git a/src/shogun/regression/LeastSquaresRegression.h b/src/shogun/regression/LeastSquaresRegression.h index 0945c9b6035..cff982ebeff 100644 --- a/src/shogun/regression/LeastSquaresRegression.h +++ b/src/shogun/regression/LeastSquaresRegression.h @@ -31,13 +31,7 @@ namespace shogun /** default constructor */ LeastSquaresRegression(); - /** constructor - * - * @param data training data - * @param lab labels - */ - LeastSquaresRegression(std::shared_ptr> data, std::shared_ptr lab); - ~LeastSquaresRegression() override {} + ~LeastSquaresRegression() override = default; /** get classifier type * diff --git a/src/shogun/regression/LinearRidgeRegression.cpp b/src/shogun/regression/LinearRidgeRegression.cpp index bd6e61b7f01..209c8f13ad1 100644 --- a/src/shogun/regression/LinearRidgeRegression.cpp +++ b/src/shogun/regression/LinearRidgeRegression.cpp @@ -21,15 +21,12 @@ LinearRidgeRegression::LinearRidgeRegression() init(); } -LinearRidgeRegression::LinearRidgeRegression( - float64_t tau, const std::shared_ptr>& data, std::shared_ptr lab) +LinearRidgeRegression::LinearRidgeRegression(float64_t tau) : DenseRealDispatch() { init(); set_tau(tau); - set_labels(std::move(lab)); - set_features(data); } void LinearRidgeRegression::init() @@ -45,12 +42,13 @@ void LinearRidgeRegression::init() template bool LinearRidgeRegression::train_machine_templated( - const std::shared_ptr>& feats) + const std::shared_ptr>& feats, + const std::shared_ptr& labs) { auto N = feats->get_num_vectors(); auto D = feats->get_num_features(); - auto y = regression_labels(m_labels)->get_labels().as(); + auto y = regression_labels(labs)->get_labels().as(); T tau = m_tau; SGVector x_mean; diff --git a/src/shogun/regression/LinearRidgeRegression.h b/src/shogun/regression/LinearRidgeRegression.h index 7cc23894e69..ed084c43a36 100644 --- a/src/shogun/regression/LinearRidgeRegression.h +++ b/src/shogun/regression/LinearRidgeRegression.h @@ -69,8 +69,8 @@ namespace shogun * @param data training data * @param lab labels */ - LinearRidgeRegression(float64_t tau, const std::shared_ptr>& data, std::shared_ptr lab); - ~LinearRidgeRegression() override {} + LinearRidgeRegression(float64_t tau); + ~LinearRidgeRegression() override = default; /** set regularization constant * @@ -106,7 +106,9 @@ namespace shogun protected: template - bool train_machine_templated(const std::shared_ptr>& feats); + bool train_machine_templated( + const std::shared_ptr>& feats, + const std::shared_ptr& labs); private: void init(); diff --git a/src/shogun/regression/svr/LibLinearRegression.cpp b/src/shogun/regression/svr/LibLinearRegression.cpp index e33e663570e..2a29ea5a418 100644 --- a/src/shogun/regression/svr/LibLinearRegression.cpp +++ b/src/shogun/regression/svr/LibLinearRegression.cpp @@ -27,14 +27,12 @@ LibLinearRegression::LibLinearRegression() : init_defaults(); } -LibLinearRegression::LibLinearRegression(float64_t C, std::shared_ptr feats, std::shared_ptr labs) : - RandomMixin() +LibLinearRegression::LibLinearRegression(float64_t C) + : RandomMixin() { register_parameters(); init_defaults(); set_C(C); - set_features(std::move(feats)); - set_labels(std::move(labs)); } void LibLinearRegression::init_defaults() @@ -71,26 +69,14 @@ LibLinearRegression::~LibLinearRegression() { } -bool LibLinearRegression::train_machine(std::shared_ptr data) +bool LibLinearRegression::train_machine( + const std::shared_ptr& features, const std::shared_ptr& labs) { - - if (data) - set_features(data->as()); - - ASSERT(features) - ASSERT(m_labels && m_labels->get_label_type()==LT_REGRESSION) - - auto num_train_labels=m_labels->get_num_labels(); + auto labels = labs->as(); + auto num_feat=features->get_dim_feature_space(); auto num_vec=features->get_num_vectors(); - if (num_vec!=num_train_labels) - { - error("number of vectors {} does not match " - "number of training labels {}", - num_vec, num_train_labels); - } - SGVector w; auto prob = liblinear_problem(); prob.use_bias = get_use_bias(); @@ -112,7 +98,6 @@ bool LibLinearRegression::train_machine(std::shared_ptr data) } prob.l=num_vec; prob.x=features; - auto labels = regression_labels(m_labels); // store reference to vector locally in order to prevent free-ing auto lab = labels->get_labels(); diff --git a/src/shogun/regression/svr/LibLinearRegression.h b/src/shogun/regression/svr/LibLinearRegression.h index 7a70e2c4c44..131020110fe 100644 --- a/src/shogun/regression/svr/LibLinearRegression.h +++ b/src/shogun/regression/svr/LibLinearRegression.h @@ -50,10 +50,8 @@ class LibLinearRegression : public RandomMixin /** standard constructor * @param C C regularization constant value - * @param features features - * @param labs labels */ - LibLinearRegression(float64_t C, std::shared_ptr features, std::shared_ptr labs); + LibLinearRegression(float64_t C); /** destructor */ ~LibLinearRegression() override; @@ -148,7 +146,9 @@ class LibLinearRegression : public RandomMixin protected: /** train machine */ - bool train_machine(std::shared_ptr data = NULL) override; + bool train_machine( + const std::shared_ptr& data, + const std::shared_ptr& labs) override; private: /** solve svr with l1 or l2 loss */ diff --git a/src/shogun/structure/FWSOSVM.cpp b/src/shogun/structure/FWSOSVM.cpp index 475d31fbfbf..a3b7d09b4c7 100644 --- a/src/shogun/structure/FWSOSVM.cpp +++ b/src/shogun/structure/FWSOSVM.cpp @@ -61,7 +61,7 @@ EMachineType FWSOSVM::get_classifier_type() return CT_FWSOSVM; } -bool FWSOSVM::train_machine(std::shared_ptr data) +bool FWSOSVM::train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) { SG_TRACE("Entering CFWSOSVM::train_machine."); if (data) @@ -76,7 +76,7 @@ bool FWSOSVM::train_machine(std::shared_ptr data) // Dimensionality of the joint feature space int32_t M = m_model->get_dim(); // Number of training examples - int32_t N = m_labels->as()->get_num_labels(); + int32_t N = labs->as()->get_num_labels(); SG_DEBUG("M={}, N ={}.", M, N); diff --git a/src/shogun/structure/FWSOSVM.h b/src/shogun/structure/FWSOSVM.h index 0995aab3178..70a6d42946e 100644 --- a/src/shogun/structure/FWSOSVM.h +++ b/src/shogun/structure/FWSOSVM.h @@ -89,7 +89,7 @@ class FWSOSVM : public LinearStructuredOutputMachine * @param data training data * @return whether the training was successful */ - bool train_machine(std::shared_ptr data = NULL) override; + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) override; private: /** register and initialize parameters */ diff --git a/src/shogun/structure/FactorGraphDataGenerator.cpp b/src/shogun/structure/FactorGraphDataGenerator.cpp index c3ff583bb2e..f4001ef300a 100644 --- a/src/shogun/structure/FactorGraphDataGenerator.cpp +++ b/src/shogun/structure/FactorGraphDataGenerator.cpp @@ -531,7 +531,7 @@ float64_t FactorGraphDataGenerator::test_sosvm(EMAPInferType infer_type) // 2.2 Train SGD - sgd->train(); + sgd->train(fg_feats_train, fg_labels_train); // 3.1 Evaluation auto labels_sgd = sgd->apply()->as(); diff --git a/src/shogun/structure/StochasticSOSVM.cpp b/src/shogun/structure/StochasticSOSVM.cpp index b953734bb7a..81f903f4c4b 100644 --- a/src/shogun/structure/StochasticSOSVM.cpp +++ b/src/shogun/structure/StochasticSOSVM.cpp @@ -61,7 +61,8 @@ EMachineType StochasticSOSVM::get_classifier_type() return CT_STOCHASTICSOSVM; } -bool StochasticSOSVM::train_machine(std::shared_ptr data) +bool StochasticSOSVM::train_machine(const std::shared_ptr& data, + const std::shared_ptr& labs) { SG_TRACE("Entering CStochasticSOSVM::train_machine."); if (data) @@ -76,7 +77,7 @@ bool StochasticSOSVM::train_machine(std::shared_ptr data) // Dimensionality of the joint feature space int32_t M = m_model->get_dim(); // Number of training examples - int32_t N = m_labels->as()->get_num_labels(); + int32_t N = labs->as()->get_num_labels(); require(M > 0, "StochasticSOSVM underlying model has not been initialized properly." "Expected number of dimensions to be greater than 0."); diff --git a/src/shogun/structure/StochasticSOSVM.h b/src/shogun/structure/StochasticSOSVM.h index ca83b039a68..0e5e4aac2e9 100644 --- a/src/shogun/structure/StochasticSOSVM.h +++ b/src/shogun/structure/StochasticSOSVM.h @@ -88,7 +88,8 @@ class StochasticSOSVM : public RandomMixin * @param data training data * @return whether the training was successful */ - bool train_machine(std::shared_ptr data = NULL) override; + bool train_machine(const std::shared_ptr& data, + const std::shared_ptr& labs) override; private: /** register and initialize parameters */ diff --git a/src/shogun/transfer/domain_adaptation/DomainAdaptationMulticlassLibLinear.cpp b/src/shogun/transfer/domain_adaptation/DomainAdaptationMulticlassLibLinear.cpp index facc7f0263b..9fd7f36e6ba 100644 --- a/src/shogun/transfer/domain_adaptation/DomainAdaptationMulticlassLibLinear.cpp +++ b/src/shogun/transfer/domain_adaptation/DomainAdaptationMulticlassLibLinear.cpp @@ -20,9 +20,8 @@ DomainAdaptationMulticlassLibLinear::DomainAdaptationMulticlassLibLinear() : } DomainAdaptationMulticlassLibLinear::DomainAdaptationMulticlassLibLinear( - float64_t target_C, std::shared_ptr target_features, std::shared_ptr target_labels, - std::shared_ptr source_machine) : - MulticlassLibLinear(target_C,std::move(target_features),std::move(target_labels)) + float64_t target_C, std::shared_ptr source_machine) : + MulticlassLibLinear(target_C) { init_defaults(); @@ -88,8 +87,8 @@ DomainAdaptationMulticlassLibLinear::~DomainAdaptationMulticlassLibLinear() SGMatrix DomainAdaptationMulticlassLibLinear::obtain_regularizer_matrix() const { ASSERT(get_use_bias()==false) - int32_t n_classes = m_source_machine->get_labels()->as()->get_num_classes(); - int32_t n_features = m_source_machine->get_features()->as()->get_dim_feature_space(); + int32_t n_classes = m_source_machine->get_num_classes(); + int32_t n_features = m_source_machine->get_dim_feature_space(); SGMatrix w0(n_classes,n_features); for (int32_t i=0; i DomainAdaptationMulticlassLibLinear::obtain_regularizer_matr return w0; } -std::shared_ptr DomainAdaptationMulticlassLibLinear::get_submachine_outputs(int32_t i) +std::shared_ptr DomainAdaptationMulticlassLibLinear::get_submachine_outputs(const std::shared_ptr& data, int32_t i) { - auto target_outputs = MulticlassMachine::get_submachine_outputs(i); - auto source_outputs = m_source_machine->get_submachine_outputs(i); + auto target_outputs = MulticlassMachine::get_submachine_outputs(data, i); + auto source_outputs = m_source_machine->get_submachine_outputs(data, i); int32_t n_target_outputs = target_outputs->get_num_labels(); ASSERT(n_target_outputs==source_outputs->get_num_labels()) SGVector result(n_target_outputs); diff --git a/src/shogun/transfer/domain_adaptation/DomainAdaptationMulticlassLibLinear.h b/src/shogun/transfer/domain_adaptation/DomainAdaptationMulticlassLibLinear.h index 828589035a2..6135a379dbc 100644 --- a/src/shogun/transfer/domain_adaptation/DomainAdaptationMulticlassLibLinear.h +++ b/src/shogun/transfer/domain_adaptation/DomainAdaptationMulticlassLibLinear.h @@ -28,15 +28,13 @@ class DomainAdaptationMulticlassLibLinear : public MulticlassLibLinear * @param target_labels target domain labels * @param source_machine source domain machine to regularize against */ - DomainAdaptationMulticlassLibLinear(float64_t target_C, - std::shared_ptr target_features, std::shared_ptr target_labels, - std::shared_ptr source_machine); + DomainAdaptationMulticlassLibLinear(float64_t target_C, std::shared_ptr source_machine); /** destructor */ ~DomainAdaptationMulticlassLibLinear() override; /** get submachine outputs */ - std::shared_ptr get_submachine_outputs(int32_t) override; + std::shared_ptr get_submachine_outputs(const std::shared_ptr& data, int32_t i) override; /** get name */ const char* get_name() const override diff --git a/src/shogun/transfer/domain_adaptation/DomainAdaptationSVMLinear.cpp b/src/shogun/transfer/domain_adaptation/DomainAdaptationSVMLinear.cpp index abc1efb71b8..ba83915959f 100644 --- a/src/shogun/transfer/domain_adaptation/DomainAdaptationSVMLinear.cpp +++ b/src/shogun/transfer/domain_adaptation/DomainAdaptationSVMLinear.cpp @@ -25,7 +25,7 @@ DomainAdaptationSVMLinear::DomainAdaptationSVMLinear() : LibLinear(L2R_L1LOSS_SV } -DomainAdaptationSVMLinear::DomainAdaptationSVMLinear(float64_t C, std::shared_ptr f, std::shared_ptr lab, std::shared_ptr pre_svm, float64_t B_param) : LibLinear(C, std::move(f), std::move(lab)) +DomainAdaptationSVMLinear::DomainAdaptationSVMLinear(float64_t C, std::shared_ptr pre_svm, float64_t B_param) : LibLinear(C) { init(std::move(pre_svm), B_param); @@ -80,9 +80,6 @@ bool DomainAdaptationSVMLinear::is_presvm_sane() error("presvm bias not set to zero"); } - if (presvm->get_features()->get_feature_type() != this->get_features()->get_feature_type()) { - error("feature types do not agree"); - } } return true; @@ -90,30 +87,10 @@ bool DomainAdaptationSVMLinear::is_presvm_sane() } -bool DomainAdaptationSVMLinear::train_machine(std::shared_ptr train_data) +bool DomainAdaptationSVMLinear::train_machine(const std::shared_ptr& train_data, const std::shared_ptr& labs) { - std::shared_ptr tmp_data; - - if (m_labels->get_label_type() != LT_BINARY) - error("DomainAdaptationSVMLinear requires binary labels"); - - if (train_data) - { - if (!train_data->has_property(FP_DOT)) - error("DotFeatures expected"); - - if (m_labels->as()->get_num_labels() != train_data->get_num_vectors()) - error("Number of training vectors does not match number of labels"); - - tmp_data = train_data->as(); - } - else - { - tmp_data = features; - } - - auto labels = binary_labels(get_labels()); + auto labels = binary_labels(labs); int32_t num_training_points = labels->get_num_labels(); std::vector lin_term = std::vector(num_training_points); @@ -123,7 +100,7 @@ bool DomainAdaptationSVMLinear::train_machine(std::shared_ptr train_da ASSERT(presvm->get_bias() == 0.0) // bias of parent SVM was set to zero in constructor, already contains B - auto parent_svm_out = presvm->apply_binary(tmp_data); + auto parent_svm_out = presvm->apply_binary(train_data); SG_DEBUG("pre-computing linear term from presvm") @@ -161,20 +138,7 @@ bool DomainAdaptationSVMLinear::train_machine(std::shared_ptr train_da set_w(tmp_w_copy, w_dim); SG_FREE(tmp_w_copy); */ - - bool success = false; - - //train SVM - if (train_data) - { - success = LibLinear::train_machine(train_data); - } else { - success = LibLinear::train_machine(); - } - - //ASSERT(presvm) - - return success; + return LibLinear::train_machine(train_data, labs); } diff --git a/src/shogun/transfer/domain_adaptation/DomainAdaptationSVMLinear.h b/src/shogun/transfer/domain_adaptation/DomainAdaptationSVMLinear.h index d55e558310d..29b8c50f9fb 100644 --- a/src/shogun/transfer/domain_adaptation/DomainAdaptationSVMLinear.h +++ b/src/shogun/transfer/domain_adaptation/DomainAdaptationSVMLinear.h @@ -36,7 +36,7 @@ class DomainAdaptationSVMLinear : public LibLinear * @param presvm trained SVM to regularize against * @param B trade-off constant B */ - DomainAdaptationSVMLinear(float64_t C, std::shared_ptr f, std::shared_ptr lab, std::shared_ptr presvm, float64_t B); + DomainAdaptationSVMLinear(float64_t C, std::shared_ptr presvm, float64_t B); /** destructor */ @@ -62,7 +62,7 @@ class DomainAdaptationSVMLinear : public LibLinear * @param data (test)data to be classified * @return classified labels */ - std::shared_ptr apply_binary(std::shared_ptr data=NULL) override; + std::shared_ptr apply_binary(std::shared_ptr data) override; /** returns SVM that is used as prior information @@ -126,8 +126,7 @@ class DomainAdaptationSVMLinear : public LibLinear * * @return whether training was successful */ - bool train_machine(std::shared_ptr data=NULL) override; - + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) override; protected: /** SVM to regularize against */ diff --git a/src/shogun/transfer/multitask/LibLinearMTL.cpp b/src/shogun/transfer/multitask/LibLinearMTL.cpp index 97bb869e864..47f415bc838 100644 --- a/src/shogun/transfer/multitask/LibLinearMTL.cpp +++ b/src/shogun/transfer/multitask/LibLinearMTL.cpp @@ -29,18 +29,12 @@ using namespace shogun; init(); } -LibLinearMTL::LibLinearMTL( - float64_t C, std::shared_ptr traindat, std::shared_ptr trainlab) -: RandomMixin() +LibLinearMTL::LibLinearMTL(float64_t C): RandomMixin() { init(); C1=C; C2=C; use_bias=true; - - set_features(std::move(traindat)); - set_labels(std::move(trainlab)); - } @@ -64,33 +58,17 @@ LibLinearMTL::~LibLinearMTL() { } -bool LibLinearMTL::train_machine(std::shared_ptr data) +bool LibLinearMTL::train_machine(const std::shared_ptr& features, const std::shared_ptr& labs) { - - ASSERT(m_labels) - - if (data) - { - if (!data->has_property(FP_DOT)) - error("Specified features are not of type DotFeatures"); - - set_features(data->as()); - } - ASSERT(features) - m_labels->ensure_valid(); - - - int32_t num_train_labels=m_labels->get_num_labels(); + int32_t num_labels=labs->get_num_labels(); + require(num_labels==m_linear_term.vlen, "Number of labels ({}) does not match number" + " of entries ({}) in linear term ", num_labels, + m_linear_term.vlen); + labs->ensure_valid(); + int32_t num_train_labels=labs->get_num_labels(); int32_t num_feat=features->get_dim_feature_space(); int32_t num_vec=features->get_num_vectors(); - if (num_vec!=num_train_labels) - { - error("number of vectors {} does not match " - "number of training labels {}", - num_vec, num_train_labels); - } - float64_t* training_w = NULL; if (use_bias) @@ -114,7 +92,7 @@ bool LibLinearMTL::train_machine(std::shared_ptr data) prob.y=SG_MALLOC(float64_t, prob.l); prob.use_bias=use_bias; - auto bl = binary_labels(m_labels); + auto bl = binary_labels(labs); for (int32_t i=0; iget_label(i); @@ -391,8 +369,9 @@ void LibLinearMTL::solve_l2r_l1l2_svc(const liblinear_problem *prob, double eps, } -float64_t LibLinearMTL::compute_primal_obj() +float64_t LibLinearMTL::compute_primal_obj(const std::shared_ptr& data, const std::shared_ptr& labs) { + /* python protype num_param = param.shape[0] num_dim = len(all_xt[0]) @@ -436,7 +415,7 @@ return obj io::info("DONE to compute Primal OBJ"); // calculate objective value SGMatrix W = get_W(); - + const auto features = data->as(); float64_t obj = 0; int32_t num_vec = features->get_num_vectors(); int32_t w_size = features->get_dim_feature_space(); @@ -469,7 +448,7 @@ return obj } // loss - auto bl = binary_labels(m_labels); + auto bl = binary_labels(labs); for(int32_t i=0; i& data) { /* python prototype num_xt = len(xt) @@ -502,7 +481,7 @@ obj -= 0.5 * M[s,t] * alphas[i] * alphas[j] * lt[i] * lt[j] * np.dot(xt[i], xt[j return obj */ - + const auto features = data->as(); io::info("starting to compute DUAL OBJ"); int32_t num_vec=features->get_num_vectors(); diff --git a/src/shogun/transfer/multitask/LibLinearMTL.h b/src/shogun/transfer/multitask/LibLinearMTL.h index 2c07e52ce82..c5c78dfcd21 100644 --- a/src/shogun/transfer/multitask/LibLinearMTL.h +++ b/src/shogun/transfer/multitask/LibLinearMTL.h @@ -93,12 +93,8 @@ class LibLinearMTL : public RandomMixin /** constructor (using L2R_L1LOSS_SVC_DUAL as default) * * @param C constant C - * @param traindat training features - * @param trainlab training labels */ - LibLinearMTL( - float64_t C, std::shared_ptr traindat, - std::shared_ptr trainlab); + LibLinearMTL(float64_t C); /** destructor */ ~LibLinearMTL() override; @@ -177,18 +173,6 @@ class LibLinearMTL : public RandomMixin /** set the linear term for qp */ inline void set_linear_term(SGVector linear_term) { - if (!m_labels) - error("Please assign labels first!"); - - int32_t num_labels=m_labels->get_num_labels(); - - if (num_labels!=linear_term.vlen) - { - error("Number of labels ({}) does not match number" - " of entries ({}) in linear term ", num_labels, - linear_term.vlen); - } - m_linear_term = linear_term; } @@ -269,13 +253,13 @@ class LibLinearMTL : public RandomMixin * * @return primal objective */ - virtual float64_t compute_primal_obj(); + virtual float64_t compute_primal_obj(const std::shared_ptr& features, const std::shared_ptr& labs); /** compute dual objective * * @return dual objective */ - virtual float64_t compute_dual_obj(); + virtual float64_t compute_dual_obj(const std::shared_ptr& features); /** compute duality gap * @@ -293,7 +277,7 @@ class LibLinearMTL : public RandomMixin * * @return whether training was successful */ - bool train_machine(std::shared_ptr data=NULL) override; + bool train_machine(const std::shared_ptr& data, const std::shared_ptr& labs) override; private: /** set up parameters */ diff --git a/tests/unit/classifier/LDA_unittest.cc b/tests/unit/classifier/LDA_unittest.cc index 7fa5d5e3474..77d033e1991 100644 --- a/tests/unit/classifier/LDA_unittest.cc +++ b/tests/unit/classifier/LDA_unittest.cc @@ -108,8 +108,7 @@ void test_with_method( std::shared_ptr labels = std::make_shared(lab); auto lda = std::make_shared(0, method); - lda->put("labels", labels); - lda->train(features); + lda->train(features, labels); auto results = lda->apply_regression(features); projection = results->get>("labels"); @@ -202,10 +201,9 @@ TEST(LDA, num_classes_in_labels_exception) std::shared_ptr labels = std::make_shared(lab); auto features = std::make_shared>(feat); auto lda = std::make_shared(0, SVD_LDA); - lda->put("labels", labels); // should throw an incorrect number of classes exception (expected value is // 2) - EXPECT_THROW(lda->train(features), ShogunException); + EXPECT_THROW(lda->train(features, labels), ShogunException); } //FLD template testing diff --git a/tests/unit/classifier/NearestCentroid_unittest.cc b/tests/unit/classifier/NearestCentroid_unittest.cc new file mode 100644 index 00000000000..036a433c549 --- /dev/null +++ b/tests/unit/classifier/NearestCentroid_unittest.cc @@ -0,0 +1,31 @@ +/* + * This software is distributed under BSD 3-clause license (see LICENSE file). + * + * Authors: Yuhui Liu + */ +#include +#include +#include +#include + +using namespace shogun; +TEST(NearestCentroid, fit_and_predict) +{ + SGMatrix X{{-10, -1}, {-2, -1}, {-3, -2}, + {1, 1}, {2, 1}, {3, 2}}; + SGVector y{0, 0, 0, 1, 1, 1}; + + auto train_data = std::make_shared>(X); + auto train_labels = std::make_shared(y); + auto distance = std::make_shared(); + + SGMatrix t{{3, 2}, {-10, -1}, {-100, 100}}; + auto test_data = std::make_shared>(t); + auto clf = std::make_shared(distance); + clf->train(train_data, train_labels); + auto result_labels = clf->apply(test_data); + auto result = result_labels->as()->get_labels(); + EXPECT_EQ(result[0], 1); + EXPECT_EQ(result[1], 0); + EXPECT_EQ(result[2], 0); +} \ No newline at end of file diff --git a/tests/unit/classifier/Perceptron_unittest.cc b/tests/unit/classifier/Perceptron_unittest.cc index c822e879a38..bd70e3f55cc 100644 --- a/tests/unit/classifier/Perceptron_unittest.cc +++ b/tests/unit/classifier/Perceptron_unittest.cc @@ -56,8 +56,7 @@ TEST(Perceptron, train) auto test_labels = env->get_labels_test(); auto perceptron = std::make_shared(); - perceptron->set_labels(labels); - EXPECT_TRUE(perceptron->train(features)); + EXPECT_TRUE(perceptron->train(features, labels)); auto results = perceptron->apply(test_features); auto acc = std::make_shared(); @@ -73,8 +72,7 @@ TEST(Perceptron, custom_hyperplane_initialization) auto test_labels = env->get_labels_test(); auto perceptron = std::make_shared(); - perceptron->set_labels(labels); - perceptron->train(features); + perceptron->train(features, labels); auto weights = perceptron->get_w(); @@ -82,9 +80,8 @@ TEST(Perceptron, custom_hyperplane_initialization) perceptron_initialized->set_initialize_hyperplane(false); perceptron_initialized->set_w(weights); perceptron_initialized->put("max_iterations", 1); - perceptron_initialized->set_labels(labels); - perceptron_initialized->train(features); + perceptron_initialized->train(features, labels); EXPECT_TRUE(perceptron_initialized->get_w().equals(weights)); } diff --git a/tests/unit/classifier/svm/LibLinear_unittest.cc b/tests/unit/classifier/svm/LibLinear_unittest.cc index 3e6f045552f..f1688cdc63b 100644 --- a/tests/unit/classifier/svm/LibLinear_unittest.cc +++ b/tests/unit/classifier/svm/LibLinear_unittest.cc @@ -53,11 +53,9 @@ class LibLinearFixture : public ::testing::Test ll->set_bias_enabled(biasEnable); - ll->set_features(train_feats); - ll->set_labels(ground_truth); ll->set_liblinear_solver_type(liblinear_solver_type); - ll->train(); + ll->train(train_feats, ground_truth); auto pred = ll->apply_binary(test_feats); auto liblin_accuracy = eval->evaluate(pred, ground_truth); @@ -82,13 +80,11 @@ class LibLinearFixture : public ::testing::Test ll->set_bias_enabled(biasEnable); - ll->set_features(train_feats); if (C_value) ll->set_C(0.1,0.1); //Only in the case of L2R_L1LOSS_SVC_DUAL - ll->set_labels(ground_truth); ll->set_liblinear_solver_type(liblinear_solver_type); ll->put("seed", seed); - ll->train(); + ll->train(train_feats, ground_truth); auto pred = ll->apply_binary(test_feats); diff --git a/tests/unit/classifier/svm/SVMOcas_unittest.cc b/tests/unit/classifier/svm/SVMOcas_unittest.cc index c11d37844b8..1617bcc5801 100644 --- a/tests/unit/classifier/svm/SVMOcas_unittest.cc +++ b/tests/unit/classifier/svm/SVMOcas_unittest.cc @@ -23,10 +23,10 @@ TEST(SVMOcasTest,train) auto ground_truth = std::static_pointer_cast(mockData->get_labels_test()); - auto ocas = std::make_shared(1.0, train_feats, ground_truth); + auto ocas = std::make_shared(1.0); env()->set_num_threads(1); ocas->set_epsilon(1e-5); - ocas->train(); + ocas->train(train_feats, ground_truth); float64_t objective = ocas->compute_primal_objective(); EXPECT_NEAR(objective, 0.024344632618686062, 1e-2); diff --git a/tests/unit/evaluation/CrossValidation_unittest.cc b/tests/unit/evaluation/CrossValidation_unittest.cc index e69f31133fc..34baa92c92f 100644 --- a/tests/unit/evaluation/CrossValidation_unittest.cc +++ b/tests/unit/evaluation/CrossValidation_unittest.cc @@ -168,9 +168,8 @@ class CrossValidationTests : public ::testing::Test std::shared_ptr machine; }; -typedef ::testing::Types +typedef ::testing::Types MachineTypes; TYPED_TEST_CASE(CrossValidationTests, MachineTypes); diff --git a/tests/unit/lib/observers/ParameterObserverCV_unittest.cc b/tests/unit/lib/observers/ParameterObserverCV_unittest.cc index d71e2fef1a5..18ef81aa1b3 100644 --- a/tests/unit/lib/observers/ParameterObserverCV_unittest.cc +++ b/tests/unit/lib/observers/ParameterObserverCV_unittest.cc @@ -53,7 +53,7 @@ std::shared_ptr generate(bool locked = true) /* kernel ridge regression*/ float64_t tau = 0.0001; auto krr = - std::make_shared(tau, kernel, labels); + std::make_shared(tau, kernel); /* evaluation criterion */ auto eval_crit = std::make_shared(); diff --git a/tests/unit/machine/EnsembleMachine_unittest.cc b/tests/unit/machine/EnsembleMachine_unittest.cc index 81dcf3b2f21..25d4720cbee 100644 --- a/tests/unit/machine/EnsembleMachine_unittest.cc +++ b/tests/unit/machine/EnsembleMachine_unittest.cc @@ -55,11 +55,12 @@ TEST(Composite, train) std::static_pointer_cast(mockData->get_labels_test()); auto composite = std::make_shared(); - auto pred = composite->over(std::make_shared()) - ->over(std::make_shared()) - ->then(std::make_shared()) - ->train(train_feats, train_labels) - ->apply_multiclass(test_feats); + composite->over(std::make_shared()) + ->over(std::make_shared()) + ->then(std::make_shared()) + ->train(train_feats, train_labels); + + auto pred = composite->apply_multiclass(test_feats); MulticlassAccuracy evaluate; float64_t result = evaluate.evaluate(pred, ground_truth); @@ -77,13 +78,15 @@ TEST(combinate_composite_and_pipeline, train) auto ground_truth = std::static_pointer_cast(mockData->get_labels_test()); auto pipeline = std::make_shared(); - auto pred = pipeline ->over(std::make_shared()) - ->composite() - ->over(std::make_shared()) - ->over(std::make_shared()) - ->then(std::make_shared()) - ->train(train_feats, train_labels) - ->apply_multiclass(test_feats); + auto machine = pipeline->over(std::make_shared()) + ->composite() + ->over(std::make_shared()) + ->over(std::make_shared()) + ->then(std::make_shared()); + + machine->train(train_feats, train_labels); + + auto pred = machine->apply_multiclass(test_feats); MulticlassAccuracy evaluate; float64_t result = evaluate.evaluate(pred, ground_truth); diff --git a/tests/unit/machine/FeatureDispatchCRTP_unittest.cc b/tests/unit/machine/FeatureDispatchCRTP_unittest.cc index a389cc903ba..78ce9bfe37a 100644 --- a/tests/unit/machine/FeatureDispatchCRTP_unittest.cc +++ b/tests/unit/machine/FeatureDispatchCRTP_unittest.cc @@ -22,7 +22,9 @@ class DenseRealMockMachine { } template - bool train_machine_templated(const std::shared_ptr>& data) + bool train_machine_templated( + const std::shared_ptr>& data, + const std::shared_ptr& labs) { if (data->get_feature_type() == m_expected_feature_type) return true; @@ -49,7 +51,9 @@ class StringMockMachine { } template - bool train_machine_templated(const std::shared_ptr>& data) + bool train_machine_templated( + const std::shared_ptr>& data, + const std::shared_ptr& labs) { if (data->get_feature_type() == m_expected_feature_type) return true; @@ -80,9 +84,8 @@ TYPED_TEST(DenseDispatchCRTP, train_with_dense) auto mock_machine = std::make_shared(features->get_feature_type()); - mock_machine->set_labels(std::make_shared(labels)); - - EXPECT_TRUE(mock_machine->train(features)); + auto labs = std::make_shared(labels); + EXPECT_TRUE(mock_machine->train(features, labs)); } typedef ::testing::Types SGCharTypes; @@ -103,9 +106,8 @@ TYPED_TEST(StringDispatchCRTP, train_with_string) auto labels = SGVector({1, -1}); auto mock_machine = std::make_shared(features->get_feature_type()); - mock_machine->set_labels(std::make_shared(labels)); - - EXPECT_TRUE(mock_machine->train(features)); + auto labs = std::make_shared(labels); + EXPECT_TRUE(mock_machine->train(features, labs)); } TEST(TrainDense, train_dense_with_wrong_feature_type) @@ -117,9 +119,8 @@ TEST(TrainDense, train_dense_with_wrong_feature_type) auto mock_machine = std::make_shared(features->get_feature_type()); - mock_machine->set_labels(std::make_shared(labels)); - - EXPECT_THROW(mock_machine->train(features), ShogunException); + auto labs = std::make_shared(labels); + EXPECT_THROW(mock_machine->train(features, labs), ShogunException); } TEST(TrainDense, train_dense_with_wrong_feature_class) @@ -132,6 +133,6 @@ TEST(TrainDense, train_dense_with_wrong_feature_class) auto mock_machine = std::make_shared(features->get_feature_type()); - mock_machine->set_labels(std::make_shared(labels)); - EXPECT_THROW(mock_machine->train(features), ShogunException); + auto labs = std::make_shared(labels); + EXPECT_THROW(mock_machine->train(features, labs), ShogunException); } diff --git a/tests/unit/machine/MockMachine.h b/tests/unit/machine/MockMachine.h index d29c8532925..6b8479308ff 100644 --- a/tests/unit/machine/MockMachine.h +++ b/tests/unit/machine/MockMachine.h @@ -10,6 +10,7 @@ namespace shogun { public: MOCK_METHOD1(apply, std::shared_ptr(std::shared_ptr)); MOCK_METHOD1(train_machine, bool(std::shared_ptr)); + MOCK_METHOD2(train_machine, bool(const std::shared_ptr&, const std::shared_ptr&)); MOCK_CONST_METHOD1(clone, std::shared_ptr(ParameterProperties)); virtual const char* get_name() const { return "MockMachine"; } diff --git a/tests/unit/machine/Pipeline_unittest.cc b/tests/unit/machine/Pipeline_unittest.cc index 23002ceda46..c8db5c75328 100644 --- a/tests/unit/machine/Pipeline_unittest.cc +++ b/tests/unit/machine/Pipeline_unittest.cc @@ -50,7 +50,8 @@ TEST_F(PipelineTest, fit_predict) ->then(machine); // no labels given - EXPECT_THROW(pipeline->train(features), ShogunException); + //EXPECT_THROW(pipeline->train(features), ShogunException); + //pipeline->train(features, labels); InSequence s; @@ -60,10 +61,9 @@ TEST_F(PipelineTest, fit_predict) EXPECT_CALL(*transformer2, fit(_)).Times(0); EXPECT_CALL(*transformer2, fit(_, _)); EXPECT_CALL(*transformer2, transform(_, _)); - EXPECT_CALL(*machine, train_machine(_)); + EXPECT_CALL(*machine, train_machine(_,_)); - pipeline->set_labels(labels); - pipeline->train(features); + pipeline->train(features, labels); Mock::VerifyAndClearExpectations(transformer1.get()); Mock::VerifyAndClearExpectations(transformer2.get()); diff --git a/tests/unit/machine/StochasticGBMachine_unittest.cc b/tests/unit/machine/StochasticGBMachine_unittest.cc index c241cf34d7f..563682c9633 100644 --- a/tests/unit/machine/StochasticGBMachine_unittest.cc +++ b/tests/unit/machine/StochasticGBMachine_unittest.cc @@ -129,8 +129,7 @@ TEST_F(StochasticGBMachineTest, sinusoid_curve_fitting) auto sq=std::make_shared(); auto sgbm = std::make_shared(tree, sq, 100, 0.1, 1.0); sgbm->put("seed", seed); - sgbm->set_labels(train_labels); - sgbm->train(train_feats); + sgbm->train(train_feats, train_labels); auto ret_labels = sgbm->apply_regression(test_feats); SGVector ret=ret_labels->get_labels(); @@ -160,8 +159,7 @@ TEST_F(StochasticGBMachineTest, sinusoid_curve_fitting_subset_fraction) auto sgbm = std::make_shared(tree, sq, 100, 0.1, fraction); sgbm->put("seed", seed); - sgbm->set_labels(train_labels); - sgbm->train(train_feats); + sgbm->train(train_feats, train_labels); auto ret_labels = sgbm->apply_regression(test_feats); SGVector ret = ret_labels->get_labels(); diff --git a/tests/unit/machine/glm_unittest.cc b/tests/unit/machine/glm_unittest.cc index 7be84b7cab1..317bd1b165c 100644 --- a/tests/unit/machine/glm_unittest.cc +++ b/tests/unit/machine/glm_unittest.cc @@ -67,9 +67,7 @@ TEST(GLM, GLM_basic_test) glm->set_bias(0.44101309); glm->set_w(SGVector({0.1000393, 0.2446845, 0.5602233})); - glm->set_labels(labels_train); - - glm->train(features_train); + glm->train(features_train, labels_train); auto labels_predict = glm->apply_regression(features_test); diff --git a/tests/unit/multiclass/BaggingMachine_unittest.cc b/tests/unit/multiclass/BaggingMachine_unittest.cc index ea3e3388c93..3a2145eb1ba 100644 --- a/tests/unit/multiclass/BaggingMachine_unittest.cc +++ b/tests/unit/multiclass/BaggingMachine_unittest.cc @@ -79,7 +79,7 @@ TEST_F(BaggingMachineTest, mock_train) auto features = std::make_shared>(); auto labels = std::make_shared>(); - auto bm = std::make_shared(features, labels); + auto bm = std::make_shared(); auto mm = std::make_shared>(); auto mv = std::make_shared(); @@ -90,7 +90,7 @@ TEST_F(BaggingMachineTest, mock_train) bm->set_combination_rule(mv); bm->put("seed", seed); - ON_CALL(*mm, train_machine(_)) + ON_CALL(*mm, train_machine(_, _)) .WillByDefault(Return(true)); ON_CALL(*features, get_num_vectors()) @@ -103,13 +103,13 @@ TEST_F(BaggingMachineTest, mock_train) .Times(1) .WillRepeatedly(Return(mm)); - EXPECT_CALL(*mm, train_machine(_)) + EXPECT_CALL(*mm, train_machine(_, _)) .Times(1) .WillRepeatedly(Return(true)); } } - bm->train(); + bm->train(features_train, labels_train); EXPECT_TRUE(Mock::VerifyAndClearExpectations(mm.get())); } @@ -120,7 +120,7 @@ TEST_F(BaggingMachineTest, classify_CART) auto cv=std::make_shared(); cart->set_feature_types(ft); - auto c = std::make_shared(features_train, labels_train); + auto c = std::make_shared(); env()->set_num_threads(1); c->set_machine(cart); @@ -128,7 +128,7 @@ TEST_F(BaggingMachineTest, classify_CART) c->set_num_bags(10); c->set_combination_rule(cv); c->put("seed", seed); - c->train(features_train); + c->train(features_train, labels_train); auto result = c->apply_multiclass(features_test); SGVector res_vector=result->get_labels(); @@ -151,14 +151,14 @@ TEST_F(BaggingMachineTest, output_binary) auto cv = std::make_shared(); cart->set_feature_types(ft); - auto c = std::make_shared(features_train, labels_train); + auto c = std::make_shared(); env()->set_num_threads(1); c->set_machine(cart); c->set_bag_size(14); c->set_num_bags(10); c->set_combination_rule(cv); c->put("seed", seed); - c->train(features_train); + c->train(features_train, labels_train); auto result = c->apply_binary(features_test); SGVector res_vector = result->get_labels(); @@ -185,13 +185,13 @@ TEST_F(BaggingMachineTest, output_multiclass_probs_sum_to_one) auto cv = std::make_shared(); cart->set_feature_types(ft); - auto c = std::make_shared(features_train, labels_train); + auto c = std::make_shared(); c->set_machine(cart); c->set_bag_size(14); c->set_num_bags(10); c->set_combination_rule(cv); c->put("seed", seed); - c->train(features_train); + c->train(features_train, labels_train); auto result = c->apply_multiclass(features_test); diff --git a/tests/unit/multiclass/KNN_unittest.cc b/tests/unit/multiclass/KNN_unittest.cc index dd4a23b468e..fd12ac84da3 100644 --- a/tests/unit/multiclass/KNN_unittest.cc +++ b/tests/unit/multiclass/KNN_unittest.cc @@ -85,8 +85,8 @@ class KNNTest : public ::testing::Test // typedef ::testing::Types KNNTypes; TEST_F(KNNTest, brute_solver) { - auto knn = std::make_shared(k, distance, labels, KNN_BRUTE); - knn->train(features); + auto knn = std::make_shared(k, distance, KNN_BRUTE); + knn->train(features, labels); auto output = knn->apply(features_test)->as(); for ( index_t i = 0; i < labels_test->get_num_labels(); ++i ) @@ -95,8 +95,8 @@ TEST_F(KNNTest, brute_solver) TEST_F(KNNTest, kdtree_solver) { - auto knn = std::make_shared(k, distance, labels, KNN_KDTREE); - knn->train(features); + auto knn = std::make_shared(k, distance, KNN_KDTREE); + knn->train(features, labels); auto output = knn->apply(features_test)->as(); for ( index_t i = 0; i < labels_test->get_num_labels(); ++i ) @@ -106,8 +106,8 @@ TEST_F(KNNTest, kdtree_solver) TEST_F(KNNTest, lsh_solver) { - auto knn = std::make_shared(k, distance, labels, KNN_LSH); - knn->train(features); + auto knn = std::make_shared(k, distance, KNN_LSH); + knn->train(features, labels); auto output = knn->apply(features_test)->as(); for ( index_t i = 0; i < labels_test->get_num_labels(); ++i ) @@ -117,11 +117,11 @@ TEST_F(KNNTest, lsh_solver) TEST_F(KNNTest, lsh_solver_sparse) { - auto knn = std::make_shared(k, distance, labels, KNN_LSH); + auto knn = std::make_shared(k, distance, KNN_LSH); // TODO: the sparse features should be actually sparse auto features_sparse = std::make_shared>(features); auto features_test_sparse = std::make_shared>(features_test); - knn->train(features_sparse); + knn->train(features_sparse, labels); auto output = knn->apply(features_test_sparse)->as(); for ( index_t i = 0; i < labels_test->get_num_labels(); ++i ) @@ -153,12 +153,12 @@ TEST(KNN, classify_multiple_brute) int32_t k=4; auto distance = std::make_shared(); - auto knn=std::make_shared (k, distance, labels, KNN_BRUTE); + auto knn=std::make_shared (k, distance, KNN_BRUTE); features->add_subset(train); labels->add_subset(train); - knn->train(features); + knn->train(features, labels); // classify for multiple k features_test->add_subset(test); @@ -203,12 +203,12 @@ TEST(KNN, classify_multiple_kdtree) int32_t k=4; auto distance = std::make_shared(); - auto knn=std::make_shared(k, distance, labels, KNN_KDTREE); + auto knn=std::make_shared(k, distance, KNN_KDTREE); features->add_subset(train); labels->add_subset(train); - knn->train(features); + knn->train(features, labels); // classify for multiple k features_test->add_subset(test); diff --git a/tests/unit/multiclass/MCLDA_unittest.cc b/tests/unit/multiclass/MCLDA_unittest.cc index 4a515536b42..f4599393022 100644 --- a/tests/unit/multiclass/MCLDA_unittest.cc +++ b/tests/unit/multiclass/MCLDA_unittest.cc @@ -31,11 +31,11 @@ TEST(MCLDA, train_and_apply) auto labels = std::make_shared(lab); auto features = std::make_shared>(feat); - auto lda = std::make_shared(features, labels); + auto lda = std::make_shared(); - lda->train(); + lda->train(features, labels); - auto output = lda->apply()->as(); + auto output = lda->apply(features)->as(); // Test for ( index_t i = 0; i < CLASSES*NUM; ++i ) EXPECT_EQ(output->get_label(i), labels->get_label(i)); diff --git a/tests/unit/multiclass/MulticlassLibLinear_unittest.cc b/tests/unit/multiclass/MulticlassLibLinear_unittest.cc index 4e853d67ab3..e08ab920110 100644 --- a/tests/unit/multiclass/MulticlassLibLinear_unittest.cc +++ b/tests/unit/multiclass/MulticlassLibLinear_unittest.cc @@ -52,11 +52,10 @@ TEST(MulticlassLibLinearTest,train_and_apply) float64_t C=1.0; - auto mocas=std::make_shared(C, features, - labels); + auto mocas=std::make_shared(C); env()->set_num_threads(1); mocas->set_epsilon(1e-5); - mocas->train(); + mocas->train(features, labels); auto pred=mocas->apply(features_test)->as(); for (int i=0; iget_num_vectors(); ++i) diff --git a/tests/unit/multiclass/MulticlassOCAS_unittest.cc b/tests/unit/multiclass/MulticlassOCAS_unittest.cc index c6409f8a54d..907992dde9a 100644 --- a/tests/unit/multiclass/MulticlassOCAS_unittest.cc +++ b/tests/unit/multiclass/MulticlassOCAS_unittest.cc @@ -21,10 +21,10 @@ TEST(MulticlassOCASTest,train) auto test_feats = mockData->get_features_test(); auto ground_truth = std::static_pointer_cast(mockData->get_labels_test()); - auto mocas = std::make_shared(C, train_feats, ground_truth); + auto mocas = std::make_shared(C); env()->set_num_threads(1); mocas->set_epsilon(1e-5); - mocas->train(); + mocas->train(train_feats, ground_truth); auto pred = mocas->apply(test_feats)->as(); diff --git a/tests/unit/multiclass/QDA_unittest.cc b/tests/unit/multiclass/QDA_unittest.cc index a52b6d379bf..d47f1628f1b 100644 --- a/tests/unit/multiclass/QDA_unittest.cc +++ b/tests/unit/multiclass/QDA_unittest.cc @@ -31,11 +31,11 @@ TEST(QDA, train_and_apply) auto labels = std::make_shared(lab); auto features = std::make_shared>(feat); - auto qda = std::make_shared(features, labels); + auto qda = std::make_shared(); - qda->train(); + qda->train(features, labels); - auto output = qda->apply()->as(); + auto output = qda->apply(features)->as(); // Test for ( index_t i = 0; i < CLASSES*NUM; ++i ) EXPECT_EQ(output->get_label(i), labels->get_label(i)); diff --git a/tests/unit/multiclass/tree/C45ClassifierTree_unittest.cc b/tests/unit/multiclass/tree/C45ClassifierTree_unittest.cc index 9f4bb4eba99..08aecff27d7 100644 --- a/tests/unit/multiclass/tree/C45ClassifierTree_unittest.cc +++ b/tests/unit/multiclass/tree/C45ClassifierTree_unittest.cc @@ -160,9 +160,8 @@ TEST(C45ClassifierTree, classify_equivalence_check_to_id3) auto labels=std::make_shared(lab); auto c45=std::make_shared(); - c45->set_labels(labels); c45->set_feature_types(ft); - c45->train(feats); + c45->train(feats, labels); SGMatrix test(4,5); test(0,0)=overcast; @@ -310,9 +309,8 @@ TEST(C45ClassifierTree, classify_continuous_plus_categorical_data) auto labels=std::make_shared(lab); auto c45=std::make_shared(); - c45->set_labels(labels); c45->set_feature_types(ft); - c45->train(feats); + c45->train(feats, labels); SGMatrix test(4,5); test(0,0)=overcast; @@ -386,9 +384,8 @@ TEST(C45ClassifierTree, missing_attribute) auto labels=std::make_shared(lab); auto c45=std::make_shared(); - c45->set_labels(labels); c45->set_feature_types(ft); - c45->train(feats); + c45->train(feats, labels); SGMatrix test(1,2); test(0,0)=32; @@ -500,9 +497,8 @@ TEST(C45ClassifierTree, tree_prune_categorical_attributes) auto c45tree=std::make_shared(); - c45tree->set_labels(train_lab); c45tree->set_feature_types(feature_types); - c45tree->train(train_features); + c45tree->train(train_features, train_lab); c45tree->prune_tree(train_features,validation_lab); auto result=c45tree->apply(train_features)->as(); @@ -589,9 +585,8 @@ TEST(C45ClassifierTree, tree_prune_continuous_attributes) auto c45tree=std::make_shared(); - c45tree->set_labels(train_lab); c45tree->set_feature_types(feature_types); - c45tree->train(train_features); + c45tree->train(train_features, train_lab); c45tree->prune_tree(validation_features,validation_lab); auto result=c45tree->apply(train_features)->as(); diff --git a/tests/unit/multiclass/tree/CARTree_unittest.cc b/tests/unit/multiclass/tree/CARTree_unittest.cc index 2af2a21702f..d766fb279e7 100644 --- a/tests/unit/multiclass/tree/CARTree_unittest.cc +++ b/tests/unit/multiclass/tree/CARTree_unittest.cc @@ -155,9 +155,8 @@ TEST(CARTree, classify_nominal) auto labels=std::make_shared(lab); auto c=std::make_shared(); - c->set_labels(labels); c->set_feature_types(ft); - c->train(feats); + c->train(feats, labels); SGMatrix test(4,5); test(0,0)=overcast; @@ -218,8 +217,7 @@ TEST(CARTree, comparable_with_sklearn) auto labels = std::make_shared(y); auto c = std::make_shared(); - c->set_labels(labels); - c->train(feats); + c->train(feats, labels); auto feat_import = c->get_feature_importance(); // those data are generated by below sklearn program EXPECT_NEAR(0.111111, feat_import[0], 0.00001); @@ -340,9 +338,8 @@ TEST(CARTree, classify_non_nominal) auto labels=std::make_shared(lab); auto c=std::make_shared(); - c->set_labels(labels); c->set_feature_types(ft); - c->train(feats); + c->train(feats, labels); SGMatrix test(4,5); test(0,0)=overcast; @@ -443,9 +440,8 @@ TEST(CARTree, handle_missing_nominal) auto labels=std::make_shared(lab); auto c=std::make_shared(); - c->set_labels(labels); c->set_feature_types(ft); - c->train(feats); + c->train(feats, labels); auto root=c->get_root()->as>(); auto left=root->left(); @@ -516,9 +512,8 @@ TEST(CARTree, handle_missing_continuous) auto labels=std::make_shared(lab); auto c=std::make_shared(); - c->set_labels(labels); c->set_feature_types(ft); - c->train(feats); + c->train(feats, labels); auto root=c->get_root()->as>(); auto left=root->left(); @@ -553,9 +548,8 @@ TEST(CARTree, form_t1_test) auto labels=std::make_shared(lab); auto c=std::make_shared(); - c->set_labels(labels); c->set_feature_types(ft); - c->train(feats); + c->train(feats, labels); auto root=c->get_root(); EXPECT_EQ(2,root->data.num_leaves); @@ -643,9 +637,8 @@ TEST(CARTree,cv_prune_simple) auto labels=std::make_shared(lab); auto c=std::make_shared(); - c->set_labels(labels); c->set_feature_types(ft); - c->train(feats); + c->train(feats, labels); auto root=c->get_root()->as>(); @@ -654,7 +647,7 @@ TEST(CARTree,cv_prune_simple) c->set_num_folds(2); c->set_cv_pruning(true); - c->train(feats); + c->train(feats, labels); root=c->get_root()->as>(); diff --git a/tests/unit/multiclass/tree/CHAIDTree_unittest.cc b/tests/unit/multiclass/tree/CHAIDTree_unittest.cc index 7f6f39c83f7..4a9d4679649 100644 --- a/tests/unit/multiclass/tree/CHAIDTree_unittest.cc +++ b/tests/unit/multiclass/tree/CHAIDTree_unittest.cc @@ -157,11 +157,10 @@ TEST(CHAIDTree, test_tree_structure) auto labels=std::make_shared(lab); auto c=std::make_shared(0); - c->set_labels(labels); c->set_feature_types(ft); c->set_alpha_merge(Math::MIN_REAL_NUMBER); c->set_alpha_split(Math::MAX_REAL_NUMBER); - c->train(feats); + c->train(feats, labels); auto node=c->get_root(); EXPECT_EQ(2,node->data.attribute_id); @@ -190,7 +189,7 @@ TEST(CHAIDTree, test_tree_structure) ft[2]=1; ft[3]=1; c->set_feature_types(ft); - c->train(feats); + c->train(feats, labels); @@ -251,11 +250,10 @@ TEST(CHAIDTree, test_classify_multiclass) auto labels=std::make_shared(lab); auto c=std::make_shared(0); - c->set_labels(labels); c->set_feature_types(ft); c->set_alpha_merge(Math::MIN_REAL_NUMBER); c->set_alpha_split(Math::MAX_REAL_NUMBER); - c->train(feats); + c->train(feats, labels); SGMatrix test(4,5); test(0,0)=overcast; @@ -298,7 +296,7 @@ TEST(CHAIDTree, test_classify_multiclass) ft[2]=1; ft[3]=1; c->set_feature_types(ft); - c->train(feats); + c->train(feats, labels); result=c->apply_multiclass(test_feats); diff --git a/tests/unit/multiclass/tree/ID3ClassifierTree_unittest.cc b/tests/unit/multiclass/tree/ID3ClassifierTree_unittest.cc index faa45c52a75..68fd3199068 100644 --- a/tests/unit/multiclass/tree/ID3ClassifierTree_unittest.cc +++ b/tests/unit/multiclass/tree/ID3ClassifierTree_unittest.cc @@ -154,8 +154,7 @@ TEST(ID3ClassifierTree, classify_simple) auto labels=std::make_shared(lab); auto id3=std::make_shared(); - id3->set_labels(labels); - id3->train(feats); + id3->train(feats, labels); SGMatrix test(4,5); test(0,0)=overcast; @@ -282,8 +281,7 @@ TEST(ID3ClassifierTree, tree_prune) auto id3tree=std::make_shared(); - id3tree->set_labels(train_lab); - id3tree->train(train_features); + id3tree->train(train_features, train_lab); id3tree->prune_tree(train_features,validation_lab); auto result=id3tree->apply(train_features)->as(); diff --git a/tests/unit/multiclass/tree/RandomCARTree_unittest.cc b/tests/unit/multiclass/tree/RandomCARTree_unittest.cc index 599d63fb12d..8961d72d042 100644 --- a/tests/unit/multiclass/tree/RandomCARTree_unittest.cc +++ b/tests/unit/multiclass/tree/RandomCARTree_unittest.cc @@ -153,11 +153,10 @@ TEST(RandomCARTree, classify_nominal) auto labels=std::make_shared(lab); auto c=std::make_shared(); - c->set_labels(labels); c->set_feature_types(ft); c->set_feature_subset_size(4); c->put("seed", seed); - c->train(feats); + c->train(feats, labels); SGMatrix test(4,5); test(0,0)=overcast; diff --git a/tests/unit/multiclass/tree/RandomForest_unittest.cc b/tests/unit/multiclass/tree/RandomForest_unittest.cc index 67766b111a9..82998d3781c 100644 --- a/tests/unit/multiclass/tree/RandomForest_unittest.cc +++ b/tests/unit/multiclass/tree/RandomForest_unittest.cc @@ -93,13 +93,13 @@ TEST_F(RandomForestTest, classify_nominal_test) { int32_t seed = 2343; auto c = - std::make_shared(weather_features_train, weather_labels_train, 100, 2); + std::make_shared(2, 100); c->set_feature_types(weather_ft); auto mv = std::make_shared(); c->set_combination_rule(mv); env()->set_num_threads(1); c->put("seed", seed); - c->train(weather_features_train); + c->train(weather_features_train, weather_labels_train); auto result = c->apply(weather_features_test)->as(); @@ -126,13 +126,13 @@ TEST_F(RandomForestTest, classify_non_nominal_test) weather_ft[3] = false; auto c = - std::make_shared(weather_features_train, weather_labels_train, 100, 2); + std::make_shared(2, 100); c->set_feature_types(weather_ft); auto mv = std::make_shared(); c->set_combination_rule(mv); env()->set_num_threads(1); c->put("seed", seed); - c->train(weather_features_train); + c->train(weather_features_train, weather_labels_train); auto result = c->apply(weather_features_test)->as(); @@ -147,7 +147,7 @@ TEST_F(RandomForestTest, classify_non_nominal_test) std::shared_ptr eval=std::make_shared(); c->put(RandomForest::kOobEvaluationMetric, eval); - EXPECT_NEAR(0.714285,c->get(RandomForest::kOobError),1e-6); + EXPECT_NEAR(0.7142857,c->get(RandomForest::kOobError),1e-6); } TEST_F(RandomForestTest, score_compare_sklearn_toydata) @@ -166,7 +166,7 @@ TEST_F(RandomForestTest, score_compare_sklearn_toydata) SGVector lab {0.0, 0.0, 1.0, 1.0}; auto labels_train = std::make_shared(lab); - auto c = std::make_shared(features_train, labels_train, 10, 2); + auto c = std::make_shared(2, 10); SGVector ft = SGVector(2); ft[0] = false; ft[1] = false; @@ -175,7 +175,7 @@ TEST_F(RandomForestTest, score_compare_sklearn_toydata) auto mr = std::make_shared(); c->set_combination_rule(mr); c->put("seed", seed); - c->train(features_train); + c->train(features_train, labels_train); auto result = c->apply_binary(features_train); SGVector res_vector = result->get_labels(); @@ -226,7 +226,7 @@ TEST_F(RandomForestTest, score_consistent_with_binary_trivial_data) std::make_shared>(test_data); auto c = - std::make_shared(features_train, labels_train, num_trees, 1); + std::make_shared(1, num_trees); SGVector ft = SGVector(1); ft[0] = false; c->set_feature_types(ft); @@ -234,7 +234,7 @@ TEST_F(RandomForestTest, score_consistent_with_binary_trivial_data) auto mr = std::make_shared(); c->set_combination_rule(mr); c->put("seed", seed); - c->train(features_train); + c->train(features_train, labels_train); auto result = c->apply_binary(features_test); SGVector res_vector = result->get_labels(); diff --git a/tests/unit/neuralnets/NeuralNetwork_unittest.cc b/tests/unit/neuralnets/NeuralNetwork_unittest.cc index 1048d3c9628..d292a602f06 100644 --- a/tests/unit/neuralnets/NeuralNetwork_unittest.cc +++ b/tests/unit/neuralnets/NeuralNetwork_unittest.cc @@ -284,8 +284,7 @@ TEST(NeuralNetwork, binary_classification) network->set_epsilon(1e-8); - network->set_labels(labels); - network->train(features); + network->train(features, labels); auto predictions = network->apply_binary(features); @@ -339,8 +338,7 @@ TEST(NeuralNetwork, multiclass_classification) network->set_epsilon(1e-8); - network->set_labels(labels); - network->train(features); + network->train(features, labels); auto predictions = network->apply_multiclass(features); @@ -386,8 +384,7 @@ TEST(NeuralNetwork, regression) network->set_epsilon(1e-6); - network->set_labels(labels); - network->train(features); + network->train(features, labels); auto predictions = network->apply_regression(features); @@ -439,8 +436,7 @@ TEST(NeuralNetwork, gradient_descent) network->set_epsilon(0.0); network->set_max_num_epochs(1000); - network->set_labels(labels); - network->train(features); + network->train(features, labels); auto predictions = network->apply_binary(features); diff --git a/tests/unit/regression/GaussianProcessRegression_unittest.cc b/tests/unit/regression/GaussianProcessRegression_unittest.cc index acf9200bc30..276a990631a 100644 --- a/tests/unit/regression/GaussianProcessRegression_unittest.cc +++ b/tests/unit/regression/GaussianProcessRegression_unittest.cc @@ -198,16 +198,17 @@ TEST(GaussianProcessRegression, apply_regression_on_training_features) auto liklihood=std::make_shared(0.25); // specify GP regression with exact inference - auto inf=std::make_shared(kernel, features_train, - mean, labels_train, liklihood); - + auto inf=std::make_shared(); + inf->set_mean(mean); + inf->set_kernel(kernel); + inf->set_model(liklihood); auto gpr=std::make_shared(inf); // train model - gpr->train(); + gpr->train(features_train, labels_train); // apply regression - auto predictions=gpr->apply_regression(); + auto predictions=gpr->apply_regression(features_train); SGVector prediction_vector=predictions->get_labels(); // comparison of predictions with result from GPML package: @@ -452,16 +453,17 @@ TEST(GaussianProcessRegression,apply_regression_scaled_kernel) auto lik=std::make_shared(0.25); // specify GP regression with exact inference - auto inf=std::make_shared(kernel, features_train, - mean, labels_train, lik); + auto inf=std::make_shared(); inf->set_scale(0.8); - + inf->set_mean(mean); + inf->set_kernel(kernel); + inf->set_model(lik); // create GPR and train auto gpr=std::make_shared(inf); - gpr->train(); + gpr->train(features_train, labels_train); // apply regression to train features - auto predictions=gpr->apply_regression(); + auto predictions=gpr->apply_regression(features_train); // comparison of predictions with result from GPML package SGVector mu=predictions->get_labels(); diff --git a/tests/unit/regression/LibLinearRegression_unittest.cc b/tests/unit/regression/LibLinearRegression_unittest.cc index 043f93d5695..2a57e6c3728 100644 --- a/tests/unit/regression/LibLinearRegression_unittest.cc +++ b/tests/unit/regression/LibLinearRegression_unittest.cc @@ -30,12 +30,11 @@ TEST(LibLinearRegression, lr_with_bias) auto labels_test = mockData->get_labels_test(); auto labels_train = mockData->get_labels_train(); - auto lr = - std::make_shared(1., train_feats, labels_train); + auto lr = std::make_shared(1.); lr->set_use_bias(use_bias); lr->set_epsilon(epsilon); lr->set_tube_epsilon(epsilon); - lr->train(); + lr->train(train_feats, labels_train); auto predicted_labels = lr->apply(test_feats)->as(); @@ -66,12 +65,11 @@ TEST(LibLinearRegression, lr_without_bias) auto labels_test = mockData->get_labels_test(); auto labels_train = mockData->get_labels_train(); - auto lr = - std::make_shared(1., train_feats, labels_train); + auto lr = std::make_shared(1.); lr->set_use_bias(use_bias); lr->set_epsilon(epsilon); lr->set_tube_epsilon(epsilon); - lr->train(); + lr->train(train_feats, labels_train); auto predicted_labels = lr->apply(test_feats)->as(); diff --git a/tests/unit/regression/krrnystrom_unittest.cc b/tests/unit/regression/krrnystrom_unittest.cc index afcd994bfaa..1db56a3ad13 100644 --- a/tests/unit/regression/krrnystrom_unittest.cc +++ b/tests/unit/regression/krrnystrom_unittest.cc @@ -84,11 +84,11 @@ TEST(KRRNystrom, apply_and_compare_to_KRR_with_all_columns) /* kernel ridge regression and the nystrom approximation */ float64_t tau=0.01; - auto nystrom=std::make_shared(tau, num_vectors, kernel, labels); - auto krr=std::make_shared(tau, kernel_krr, labels_krr); + auto nystrom=std::make_shared(tau, num_vectors, kernel); + auto krr=std::make_shared(tau, kernel_krr); - nystrom->train(features); - krr->train(features); + nystrom->train(features, labels); + krr->train(features, labels_krr); SGVector alphas=nystrom->get_alphas(); SGVector alphas_krr=krr->get_alphas(); @@ -151,11 +151,11 @@ TEST(KRRNystrom, apply_and_compare_to_KRR_with_column_subset) /* kernel ridge regression and the nystrom approximation */ float64_t tau=0.01; - auto nystrom=std::make_shared(tau, num_basis_rkhs, kernel, labels); - auto krr=std::make_shared(tau, kernel_krr, labels_krr); + auto nystrom=std::make_shared(tau, num_basis_rkhs, kernel); + auto krr=std::make_shared(tau, kernel_krr); - nystrom->train(features); - krr->train(features); + nystrom->train(features, labels); + krr->train(features, labels_krr); auto result = nystrom->apply_regression(test_features); diff --git a/tests/unit/regression/lars_unittest.cc b/tests/unit/regression/lars_unittest.cc index 2a0745f4361..ba90598a808 100644 --- a/tests/unit/regression/lars_unittest.cc +++ b/tests/unit/regression/lars_unittest.cc @@ -106,8 +106,7 @@ TEST(LeastAngleRegression, lasso_n_greater_than_d) auto labels=std::make_shared(lab); auto lars=std::make_shared(); - lars->set_labels(labels); - lars->train(features); + lars->train(features, labels); SGVector active3=SGVector(lars->get_w_for_var(3)); SGVector active2=SGVector(lars->get_w_for_var(2)); @@ -138,8 +137,7 @@ TEST(LeastAngleRegression, lasso_n_less_than_d) auto labels=std::make_shared(lab); auto lars=std::make_shared(); - lars->set_labels(labels); - lars->train(features); + lars->train(features, labels); SGVector active2=SGVector(lars->get_w_for_var(2)); SGVector active1=SGVector(lars->get_w_for_var(1)); @@ -169,8 +167,7 @@ TEST(LeastAngleRegression, lars_n_greater_than_d) auto labels=std::make_shared(lab); auto lars=std::make_shared(false); - lars->set_labels(labels); - lars->train(features); + lars->train(features, labels); SGVector active3=SGVector(lars->get_w_for_var(3)); SGVector active2=SGVector(lars->get_w_for_var(2)); @@ -201,8 +198,7 @@ TEST(LeastAngleRegression, lars_n_less_than_d) auto labels=std::make_shared(lab); auto lars=std::make_shared(false); - lars->set_labels(labels); - lars->train(features); + lars->train(features, labels); SGVector active2=SGVector(lars->get_w_for_var(2)); SGVector active1=SGVector(lars->get_w_for_var(1)); @@ -240,12 +236,11 @@ void lars_n_less_than_d_feature_test_templated() auto lars=std::make_shared(false); - lars->set_labels(labels); //Catch exceptions thrown when training, clean up try { - lars->train(features); + lars->train(features, labels); } catch(...) { @@ -427,8 +422,7 @@ TEST(LeastAngleRegression, ols_equivalence) auto labels = std::make_shared(lab); auto lars = std::make_shared(false); - lars->set_labels(labels); - lars->train(features); + lars->train(features, labels); // Full LAR model SGVector w=lars->get_w(); Map map_w(w.vector, w.size()); @@ -456,10 +450,9 @@ TEST(LeastAngleRegression, early_stop_l1_norm) auto labels=std::make_shared(lab); auto lars=std::make_shared(false); - lars->set_labels(labels); // set max l1 norm lars->put("max_l1_norm", 1.0); - lars->train(features); + lars->train(features, labels); SGVector active2=SGVector(lars->get_w_for_var(2)); SGVector active1=SGVector(lars->get_w_for_var(1)); diff --git a/tests/unit/structure/DualLibQPBMSOSVM_unittest.cc b/tests/unit/structure/DualLibQPBMSOSVM_unittest.cc index 1f7708da0f9..7b543d6c4b6 100644 --- a/tests/unit/structure/DualLibQPBMSOSVM_unittest.cc +++ b/tests/unit/structure/DualLibQPBMSOSVM_unittest.cc @@ -102,7 +102,7 @@ TEST_P(DualLibQPBMSOSVMTestLoopSolvers,train_small_problem_and_predict) // sosvm->set_verbose(true); sosvm->set_BufSize(8); - sosvm->train(); + sosvm->train(features, labels); BmrmStatistics res = sosvm->get_result(); //SG_PRINT("result = { Fp={}, Fd={}, nIter={}, nCP={}, nzA={}, exitflag={} }\n", diff --git a/tests/unit/structure/SOSVM_unittest.cc b/tests/unit/structure/SOSVM_unittest.cc index 21bb06de02a..9cd13ed65a6 100644 --- a/tests/unit/structure/SOSVM_unittest.cc +++ b/tests/unit/structure/SOSVM_unittest.cc @@ -78,7 +78,7 @@ TEST(SOSVM, sgd_check_w_helper) auto sgd = std::make_shared(model, labels, false, false); sgd->set_num_iter(1); sgd->set_lambda(1.0); - sgd->train(); + sgd->train(instances, labels); w = sgd->get_w(); for (int32_t i = 0; i < w.vlen; i++) @@ -161,7 +161,7 @@ TEST(SOSVM, fw_check_w_helper) fw->set_num_iter(1); fw->set_lambda(1.0); fw->set_gap_threshold(0.0); - fw->train(); + fw->train(instances, labels); w = fw->get_w(); for (int32_t i = 0; i < w.vlen; i++) diff --git a/tests/unit/transfer/MALSAR_unittest.cc b/tests/unit/transfer/MALSAR_unittest.cc index 302ee93a8d3..f7ef9f591c0 100644 --- a/tests/unit/transfer/MALSAR_unittest.cc +++ b/tests/unit/transfer/MALSAR_unittest.cc @@ -60,11 +60,10 @@ TEST(MalsarL12Test, train) auto task = std::make_shared(0, data.second->get_num_labels()); task_group->append_task(task); - auto mtlr = std::make_shared(0.1,0.1,data.first.first,data.second,task_group); - mtlr->train(); - mtlr->set_features(data.first.second); + auto mtlr = std::make_shared(0.1,0.1, task_group); + mtlr->train(data.first.first, data.second); mtlr->set_current_task(0); - auto output = mtlr->apply(); + auto output = mtlr->apply(data.first.second); } @@ -77,11 +76,10 @@ TEST(MalsarClusteredTest, train) auto task = std::make_shared(0, data.second->get_num_labels()); task_group->append_task(task); - auto mtlr = std::make_shared(0.1,0.1,data.first.first,data.second,task_group,1); - mtlr->train(); - mtlr->set_features(data.first.second); + auto mtlr = std::make_shared(0.1,0.1, task_group,1); + mtlr->train(data.first.first, data.second); mtlr->set_current_task(0); - auto output = mtlr->apply(); + auto output = mtlr->apply(data.first.second); } @@ -94,11 +92,10 @@ TEST(MalsarTraceTest, train) auto task = std::make_shared(0, data.second->get_num_labels()); task_group->append_task(task); - auto mtlr = std::make_shared(0.1,data.first.first,data.second,task_group); - mtlr->train(); - mtlr->set_features(data.first.second); + auto mtlr = std::make_shared(0.1, task_group); + mtlr->train(data.first.first, data.second); mtlr->set_current_task(0); - auto output = mtlr->apply(); + auto output = mtlr->apply(data.first.second); }