// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt /* This example program shows you how to create your own custom binary classification trainer object and use it with the multiclass classification tools in the dlib C++ library. This example assumes you have already become familiar with the concepts introduced in the multiclass_classification_ex.cpp example program. In this example we will create a very simple trainer object that takes a binary classification problem and produces a decision rule which says a test point has the same class as whichever centroid it is closest to. The multiclass training dataset will consist of four classes. Each class will be a blob of points in one of the quadrants of the cartesian plane. For fun, we will use std::string labels and therefore the labels of these classes will be the following: "upper_left", "upper_right", "lower_left", "lower_right" */ #include <dlib/svm_threaded.h> #include <iostream> #include <vector> #include <dlib/rand.h> using namespace std; using namespace dlib; // Our data will be 2-dimensional data. So declare an appropriate type to contain these points. typedef matrix<double,2,1> sample_type; // ---------------------------------------------------------------------------------------- struct custom_decision_function { /*! WHAT THIS OBJECT REPRESENTS This object is the representation of our binary decision rule. !*/ // centers of the two classes sample_type positive_center, negative_center; double operator() ( const sample_type& x ) const { // if x is closer to the positive class then return +1 if (length(positive_center - x) < length(negative_center - x)) return +1; else return -1; } }; // Later on in this example we will save our decision functions to disk. This // pair of routines is needed for this functionality. void serialize (const custom_decision_function& item, std::ostream& out) { // write the state of item to the output stream serialize(item.positive_center, out); serialize(item.negative_center, out); } void deserialize (custom_decision_function& item, std::istream& in) { // read the data from the input stream and store it in item deserialize(item.positive_center, in); deserialize(item.negative_center, in); } // ---------------------------------------------------------------------------------------- class simple_custom_trainer { /*! WHAT THIS OBJECT REPRESENTS This is our example custom binary classifier trainer object. It simply computes the means of the +1 and -1 classes, puts them into our custom_decision_function, and returns the results. Below we define the train() function. I have also included the requires/ensures definition for a generic binary classifier's train() !*/ public: custom_decision_function train ( const std::vector<sample_type>& samples, const std::vector<double>& labels ) const /*! requires - is_binary_classification_problem(samples, labels) == true (e.g. labels consists of only +1 and -1 values, samples.size() == labels.size()) ensures - returns a decision function F with the following properties: - if (new_x is a sample predicted have +1 label) then - F(new_x) >= 0 - else - F(new_x) < 0 !*/ { sample_type positive_center, negative_center; // compute sums of each class positive_center = 0; negative_center = 0; for (unsigned long i = 0; i < samples.size(); ++i) { if (labels[i] == +1) positive_center += samples[i]; else // this is a -1 sample negative_center += samples[i]; } // divide by number of +1 samples positive_center /= sum(mat(labels) == +1); // divide by number of -1 samples negative_center /= sum(mat(labels) == -1); custom_decision_function df; df.positive_center = positive_center; df.negative_center = negative_center; return df; } }; // ---------------------------------------------------------------------------------------- void generate_data ( std::vector<sample_type>& samples, std::vector<string>& labels ); /*! ensures - make some four class data as described above. - each class will have 50 samples in it !*/ // ---------------------------------------------------------------------------------------- int main() { std::vector<sample_type> samples; std::vector<string> labels; // First, get our labeled set of training data generate_data(samples, labels); cout << "samples.size(): "<< samples.size() << endl; // Define the trainer we will use. The second template argument specifies the type // of label used, which is string in this case. typedef one_vs_one_trainer<any_trainer<sample_type>, string> ovo_trainer; ovo_trainer trainer; // Now tell the one_vs_one_trainer that, by default, it should use the simple_custom_trainer // to solve the individual binary classification subproblems. trainer.set_trainer(simple_custom_trainer()); // Next, to make things a little more interesting, we will setup the one_vs_one_trainer // to use kernel ridge regression to solve the upper_left vs lower_right binary classification // subproblem. typedef radial_basis_kernel<sample_type> rbf_kernel; krr_trainer<rbf_kernel> rbf_trainer; rbf_trainer.set_kernel(rbf_kernel(0.1)); trainer.set_trainer(rbf_trainer, "upper_left", "lower_right"); // Now let's do 5-fold cross-validation using the one_vs_one_trainer we just setup. // As an aside, always shuffle the order of the samples before doing cross validation. // For a discussion of why this is a good idea see the svm_ex.cpp example. randomize_samples(samples, labels); cout << "cross validation: \n" << cross_validate_multiclass_trainer(trainer, samples, labels, 5) << endl; // This dataset is very easy and everything is correctly classified. Therefore, the output of // cross validation is the following confusion matrix. /* 50 0 0 0 0 50 0 0 0 0 50 0 0 0 0 50 */ // We can also obtain the decision rule as always. one_vs_one_decision_function<ovo_trainer> df = trainer.train(samples, labels); cout << "predicted label: "<< df(samples[0]) << ", true label: "<< labels[0] << endl; cout << "predicted label: "<< df(samples[90]) << ", true label: "<< labels[90] << endl; // The output is: /* predicted label: upper_right, true label: upper_right predicted label: lower_left, true label: lower_left */ // Finally, let's save our multiclass decision rule to disk. Remember that we have // to specify the types of binary decision function used inside the one_vs_one_decision_function. one_vs_one_decision_function<ovo_trainer, custom_decision_function, // This is the output of the simple_custom_trainer decision_function<radial_basis_kernel<sample_type> > // This is the output of the rbf_trainer > df2, df3; df2 = df; // save to a file called df.dat serialize("df.dat") << df2; // load the function back in from disk and store it in df3. deserialize("df.dat") >> df3; // Test df3 to see that this worked. cout << endl; cout << "predicted label: "<< df3(samples[0]) << ", true label: "<< labels[0] << endl; cout << "predicted label: "<< df3(samples[90]) << ", true label: "<< labels[90] << endl; // Test df3 on the samples and labels and print the confusion matrix. cout << "test deserialized function: \n" << test_multiclass_decision_function(df3, samples, labels) << endl; } // ---------------------------------------------------------------------------------------- void generate_data ( std::vector<sample_type>& samples, std::vector<string>& labels ) { const long num = 50; sample_type m; dlib::rand rnd; // add some points in the upper right quadrant m = 10, 10; for (long i = 0; i < num; ++i) { samples.push_back(m + randm(2,1,rnd)); labels.push_back("upper_right"); } // add some points in the upper left quadrant m = -10, 10; for (long i = 0; i < num; ++i) { samples.push_back(m + randm(2,1,rnd)); labels.push_back("upper_left"); } // add some points in the lower right quadrant m = 10, -10; for (long i = 0; i < num; ++i) { samples.push_back(m + randm(2,1,rnd)); labels.push_back("lower_right"); } // add some points in the lower left quadrant m = -10, -10; for (long i = 0; i < num; ++i) { samples.push_back(m + randm(2,1,rnd)); labels.push_back("lower_left"); } } // ----------------------------------------------------------------------------------------