feat: 切换后端至PaddleOCR-NCNN,切换工程为CMake

1.项目后端整体迁移至PaddleOCR-NCNN算法,已通过基本的兼容性测试
2.工程改为使用CMake组织,后续为了更好地兼容第三方库,不再提供QMake工程
3.重整权利声明文件,重整代码工程,确保最小化侵权风险

Log: 切换后端至PaddleOCR-NCNN,切换工程为CMake
Change-Id: I4d5d2c5d37505a4a24b389b1a4c5d12f17bfa38c
This commit is contained in:
wangzhengyang
2022-05-10 09:54:44 +08:00
parent ecdd171c6f
commit 718c41634f
10018 changed files with 3593797 additions and 186748 deletions

View File

@ -0,0 +1,200 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
// #define GENERATE_TESTDATA
namespace opencv_test { namespace {
struct Activation
{
int id;
const char * name;
};
void PrintTo(const Activation &a, std::ostream *os) { *os << a.name; }
Activation activation_list[] =
{
{ ml::ANN_MLP::IDENTITY, "identity" },
{ ml::ANN_MLP::SIGMOID_SYM, "sigmoid_sym" },
{ ml::ANN_MLP::GAUSSIAN, "gaussian" },
{ ml::ANN_MLP::RELU, "relu" },
{ ml::ANN_MLP::LEAKYRELU, "leakyrelu" },
};
typedef testing::TestWithParam< Activation > ML_ANN_Params;
TEST_P(ML_ANN_Params, ActivationFunction)
{
const Activation &activation = GetParam();
const string dataname = "waveform";
const string data_path = findDataFile(dataname + ".data");
const string model_name = dataname + "_" + activation.name + ".yml";
Ptr<TrainData> tdata = TrainData::loadFromCSV(data_path, 0);
ASSERT_FALSE(tdata.empty());
// hack?
const uint64 old_state = theRNG().state;
theRNG().state = 1027401484159173092;
tdata->setTrainTestSplit(500);
theRNG().state = old_state;
Mat_<int> layerSizes(1, 4);
layerSizes(0, 0) = tdata->getNVars();
layerSizes(0, 1) = 100;
layerSizes(0, 2) = 100;
layerSizes(0, 3) = tdata->getResponses().cols;
Mat testSamples = tdata->getTestSamples();
Mat rx, ry;
{
Ptr<ml::ANN_MLP> x = ml::ANN_MLP::create();
x->setActivationFunction(activation.id);
x->setLayerSizes(layerSizes);
x->setTrainMethod(ml::ANN_MLP::RPROP, 0.01, 0.1);
x->setTermCriteria(TermCriteria(TermCriteria::COUNT, 300, 0.01));
x->train(tdata, ml::ANN_MLP::NO_OUTPUT_SCALE);
ASSERT_TRUE(x->isTrained());
x->predict(testSamples, rx);
#ifdef GENERATE_TESTDATA
x->save(cvtest::TS::ptr()->get_data_path() + model_name);
#endif
}
{
const string model_path = findDataFile(model_name);
Ptr<ml::ANN_MLP> y = Algorithm::load<ANN_MLP>(model_path);
ASSERT_TRUE(y);
y->predict(testSamples, ry);
EXPECT_MAT_NEAR(rx, ry, FLT_EPSILON);
}
}
INSTANTIATE_TEST_CASE_P(/**/, ML_ANN_Params, testing::ValuesIn(activation_list));
//==================================================================================================
CV_ENUM(ANN_MLP_METHOD, ANN_MLP::RPROP, ANN_MLP::ANNEAL)
typedef tuple<ANN_MLP_METHOD, string, int> ML_ANN_METHOD_Params;
typedef TestWithParam<ML_ANN_METHOD_Params> ML_ANN_METHOD;
TEST_P(ML_ANN_METHOD, Test)
{
int methodType = get<0>(GetParam());
string methodName = get<1>(GetParam());
int N = get<2>(GetParam());
String folder = string(cvtest::TS::ptr()->get_data_path());
String original_path = findDataFile("waveform.data");
string dataname = "waveform_" + methodName;
string weight_name = dataname + "_init_weight.yml.gz";
string model_name = dataname + ".yml.gz";
string response_name = dataname + "_response.yml.gz";
Ptr<TrainData> tdata2 = TrainData::loadFromCSV(original_path, 0);
ASSERT_FALSE(tdata2.empty());
Mat samples = tdata2->getSamples()(Range(0, N), Range::all());
Mat responses(N, 3, CV_32FC1, Scalar(0));
for (int i = 0; i < N; i++)
responses.at<float>(i, static_cast<int>(tdata2->getResponses().at<float>(i, 0))) = 1;
Ptr<TrainData> tdata = TrainData::create(samples, ml::ROW_SAMPLE, responses);
ASSERT_FALSE(tdata.empty());
// hack?
const uint64 old_state = theRNG().state;
theRNG().state = 0;
tdata->setTrainTestSplitRatio(0.8);
theRNG().state = old_state;
Mat testSamples = tdata->getTestSamples();
// train 1st stage
Ptr<ml::ANN_MLP> xx = ml::ANN_MLP::create();
Mat_<int> layerSizes(1, 4);
layerSizes(0, 0) = tdata->getNVars();
layerSizes(0, 1) = 30;
layerSizes(0, 2) = 30;
layerSizes(0, 3) = tdata->getResponses().cols;
xx->setLayerSizes(layerSizes);
xx->setActivationFunction(ml::ANN_MLP::SIGMOID_SYM);
xx->setTrainMethod(ml::ANN_MLP::RPROP);
xx->setTermCriteria(TermCriteria(TermCriteria::COUNT, 1, 0.01));
xx->train(tdata, ml::ANN_MLP::NO_OUTPUT_SCALE + ml::ANN_MLP::NO_INPUT_SCALE);
#ifdef GENERATE_TESTDATA
{
FileStorage fs;
fs.open(cvtest::TS::ptr()->get_data_path() + weight_name, FileStorage::WRITE + FileStorage::BASE64);
xx->write(fs);
}
#endif
// train 2nd stage
Mat r_gold;
Ptr<ml::ANN_MLP> x = ml::ANN_MLP::create();
{
const string weight_file = findDataFile(weight_name);
FileStorage fs;
fs.open(weight_file, FileStorage::READ);
x->read(fs.root());
}
x->setTrainMethod(methodType);
if (methodType == ml::ANN_MLP::ANNEAL)
{
x->setAnnealEnergyRNG(RNG(CV_BIG_INT(0xffffffff)));
x->setAnnealInitialT(12);
x->setAnnealFinalT(0.15);
x->setAnnealCoolingRatio(0.96);
x->setAnnealItePerStep(11);
}
x->setTermCriteria(TermCriteria(TermCriteria::COUNT, 100, 0.01));
x->train(tdata, ml::ANN_MLP::NO_OUTPUT_SCALE + ml::ANN_MLP::NO_INPUT_SCALE + ml::ANN_MLP::UPDATE_WEIGHTS);
ASSERT_TRUE(x->isTrained());
#ifdef GENERATE_TESTDATA
x->save(cvtest::TS::ptr()->get_data_path() + model_name);
x->predict(testSamples, r_gold);
{
FileStorage fs_response(cvtest::TS::ptr()->get_data_path() + response_name, FileStorage::WRITE + FileStorage::BASE64);
fs_response << "response" << r_gold;
}
#endif
{
const string response_file = findDataFile(response_name);
FileStorage fs_response(response_file, FileStorage::READ);
fs_response["response"] >> r_gold;
}
ASSERT_FALSE(r_gold.empty());
// verify
const string model_file = findDataFile(model_name);
Ptr<ml::ANN_MLP> y = Algorithm::load<ANN_MLP>(model_file);
ASSERT_TRUE(y);
Mat rx, ry;
for (int j = 0; j < 4; j++)
{
rx = x->getWeights(j);
ry = y->getWeights(j);
EXPECT_MAT_NEAR(rx, ry, FLT_EPSILON) << "Weights are not equal for layer: " << j;
}
x->predict(testSamples, rx);
y->predict(testSamples, ry);
EXPECT_MAT_NEAR(ry, rx, FLT_EPSILON) << "Predict are not equal to result of the saved model";
EXPECT_MAT_NEAR(r_gold, rx, FLT_EPSILON) << "Predict are not equal to 'gold' response";
}
INSTANTIATE_TEST_CASE_P(/*none*/, ML_ANN_METHOD,
testing::Values(
ML_ANN_METHOD_Params(ml::ANN_MLP::RPROP, "rprop", 5000),
ML_ANN_METHOD_Params(ml::ANN_MLP::ANNEAL, "anneal", 1000)
// ML_ANN_METHOD_Params(ml::ANN_MLP::BACKPROP, "backprop", 500) -----> NO BACKPROP TEST
)
);
}} // namespace

View File

@ -0,0 +1,56 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
namespace opencv_test { namespace {
TEST(ML_NBAYES, regression_5911)
{
int N=12;
Ptr<ml::NormalBayesClassifier> nb = cv::ml::NormalBayesClassifier::create();
// data:
float X_data[] = {
1,2,3,4, 1,2,3,4, 1,2,3,4, 1,2,3,4,
5,5,5,5, 5,5,5,5, 5,5,5,5, 5,5,5,5,
4,3,2,1, 4,3,2,1, 4,3,2,1, 4,3,2,1
};
Mat_<float> X(N, 4, X_data);
// labels:
int Y_data[] = { 0,0,0,0, 1,1,1,1, 2,2,2,2 };
Mat_<int> Y(N, 1, Y_data);
nb->train(X, ml::ROW_SAMPLE, Y);
// single prediction:
Mat R1,P1;
for (int i=0; i<N; i++)
{
Mat r,p;
nb->predictProb(X.row(i), r, p);
R1.push_back(r);
P1.push_back(p);
}
// bulk prediction (continuous memory):
Mat R2,P2;
nb->predictProb(X, R2, P2);
EXPECT_EQ(255 * R2.total(), sum(R1 == R2)[0]);
EXPECT_EQ(255 * P2.total(), sum(P1 == P2)[0]);
// bulk prediction, with non-continuous memory storage
Mat R3_(N, 1+1, CV_32S),
P3_(N, 3+1, CV_32F);
nb->predictProb(X, R3_.col(0), P3_.colRange(0,3));
Mat R3 = R3_.col(0).clone(),
P3 = P3_.colRange(0,3).clone();
EXPECT_EQ(255 * R3.total(), sum(R1 == R3)[0]);
EXPECT_EQ(255 * P3.total(), sum(P1 == P3)[0]);
}
}} // namespace

View File

@ -0,0 +1,186 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
namespace opencv_test { namespace {
CV_ENUM(EM_START_STEP, EM::START_AUTO_STEP, EM::START_M_STEP, EM::START_E_STEP)
CV_ENUM(EM_COV_MAT, EM::COV_MAT_GENERIC, EM::COV_MAT_DIAGONAL, EM::COV_MAT_SPHERICAL)
typedef testing::TestWithParam< tuple<EM_START_STEP, EM_COV_MAT> > ML_EM_Params;
TEST_P(ML_EM_Params, accuracy)
{
const int nclusters = 3;
const int sizesArr[] = { 500, 700, 800 };
const vector<int> sizes( sizesArr, sizesArr + sizeof(sizesArr) / sizeof(sizesArr[0]) );
const int pointsCount = sizesArr[0] + sizesArr[1] + sizesArr[2];
Mat means;
vector<Mat> covs;
defaultDistribs( means, covs, CV_64FC1 );
Mat trainData(pointsCount, 2, CV_64FC1 );
Mat trainLabels;
generateData( trainData, trainLabels, sizes, means, covs, CV_64FC1, CV_32SC1 );
Mat testData( pointsCount, 2, CV_64FC1 );
Mat testLabels;
generateData( testData, testLabels, sizes, means, covs, CV_64FC1, CV_32SC1 );
Mat probs(trainData.rows, nclusters, CV_64FC1, cv::Scalar(1));
Mat weights(1, nclusters, CV_64FC1, cv::Scalar(1));
TermCriteria termCrit(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 100, FLT_EPSILON);
int startStep = get<0>(GetParam());
int covMatType = get<1>(GetParam());
cv::Mat labels;
Ptr<EM> em = EM::create();
em->setClustersNumber(nclusters);
em->setCovarianceMatrixType(covMatType);
em->setTermCriteria(termCrit);
if( startStep == EM::START_AUTO_STEP )
em->trainEM( trainData, noArray(), labels, noArray() );
else if( startStep == EM::START_E_STEP )
em->trainE( trainData, means, covs, weights, noArray(), labels, noArray() );
else if( startStep == EM::START_M_STEP )
em->trainM( trainData, probs, noArray(), labels, noArray() );
{
SCOPED_TRACE("Train");
float err = 1000;
EXPECT_TRUE(calcErr( labels, trainLabels, sizes, err , false, false ));
EXPECT_LE(err, 0.008f);
}
{
SCOPED_TRACE("Test");
float err = 1000;
labels.create( testData.rows, 1, CV_32SC1 );
for( int i = 0; i < testData.rows; i++ )
{
Mat sample = testData.row(i);
Mat out_probs;
labels.at<int>(i) = static_cast<int>(em->predict2( sample, out_probs )[1]);
}
EXPECT_TRUE(calcErr( labels, testLabels, sizes, err, false, false ));
EXPECT_LE(err, 0.008f);
}
}
INSTANTIATE_TEST_CASE_P(/**/, ML_EM_Params,
testing::Combine(
testing::Values(EM::START_AUTO_STEP, EM::START_M_STEP, EM::START_E_STEP),
testing::Values(EM::COV_MAT_GENERIC, EM::COV_MAT_DIAGONAL, EM::COV_MAT_SPHERICAL)
));
//==================================================================================================
TEST(ML_EM, save_load)
{
const int nclusters = 2;
Mat_<double> samples(3, 1);
samples << 1., 2., 3.;
std::vector<double> firstResult;
string filename = cv::tempfile(".xml");
{
Mat labels;
Ptr<EM> em = EM::create();
em->setClustersNumber(nclusters);
em->trainEM(samples, noArray(), labels, noArray());
for( int i = 0; i < samples.rows; i++)
{
Vec2d res = em->predict2(samples.row(i), noArray());
firstResult.push_back(res[1]);
}
{
FileStorage fs = FileStorage(filename, FileStorage::WRITE);
ASSERT_NO_THROW(fs << "em" << "{");
ASSERT_NO_THROW(em->write(fs));
ASSERT_NO_THROW(fs << "}");
}
}
{
Ptr<EM> em;
ASSERT_NO_THROW(em = Algorithm::load<EM>(filename));
for( int i = 0; i < samples.rows; i++)
{
SCOPED_TRACE(i);
Vec2d res = em->predict2(samples.row(i), noArray());
EXPECT_DOUBLE_EQ(firstResult[i], res[1]);
}
}
remove(filename.c_str());
}
//==================================================================================================
TEST(ML_EM, classification)
{
// This test classifies spam by the following way:
// 1. estimates distributions of "spam" / "not spam"
// 2. predict classID using Bayes classifier for estimated distributions.
string dataFilename = findDataFile("spambase.data");
Ptr<TrainData> data = TrainData::loadFromCSV(dataFilename, 0);
ASSERT_FALSE(data.empty());
Mat samples = data->getSamples();
ASSERT_EQ(samples.cols, 57);
Mat responses = data->getResponses();
vector<int> trainSamplesMask(samples.rows, 0);
const int trainSamplesCount = (int)(0.5f * samples.rows);
const int testSamplesCount = samples.rows - trainSamplesCount;
for(int i = 0; i < trainSamplesCount; i++)
trainSamplesMask[i] = 1;
RNG &rng = cv::theRNG();
for(size_t i = 0; i < trainSamplesMask.size(); i++)
{
int i1 = rng(static_cast<unsigned>(trainSamplesMask.size()));
int i2 = rng(static_cast<unsigned>(trainSamplesMask.size()));
std::swap(trainSamplesMask[i1], trainSamplesMask[i2]);
}
Mat samples0, samples1;
for(int i = 0; i < samples.rows; i++)
{
if(trainSamplesMask[i])
{
Mat sample = samples.row(i);
int resp = (int)responses.at<float>(i);
if(resp == 0)
samples0.push_back(sample);
else
samples1.push_back(sample);
}
}
Ptr<EM> model0 = EM::create();
model0->setClustersNumber(3);
model0->trainEM(samples0, noArray(), noArray(), noArray());
Ptr<EM> model1 = EM::create();
model1->setClustersNumber(3);
model1->trainEM(samples1, noArray(), noArray(), noArray());
// confusion matrices
Mat_<int> trainCM(2, 2, 0);
Mat_<int> testCM(2, 2, 0);
const double lambda = 1.;
for(int i = 0; i < samples.rows; i++)
{
Mat sample = samples.row(i);
double sampleLogLikelihoods0 = model0->predict2(sample, noArray())[0];
double sampleLogLikelihoods1 = model1->predict2(sample, noArray())[0];
int classID = (sampleLogLikelihoods0 >= lambda * sampleLogLikelihoods1) ? 0 : 1;
int resp = (int)responses.at<float>(i);
EXPECT_TRUE(resp == 0 || resp == 1);
if(trainSamplesMask[i])
trainCM(resp, classID)++;
else
testCM(resp, classID)++;
}
EXPECT_LE((double)(trainCM(1,0) + trainCM(0,1)) / trainSamplesCount, 0.23);
EXPECT_LE((double)(testCM(1,0) + testCM(0,1)) / testSamplesCount, 0.26);
}
}} // namespace

View File

@ -0,0 +1,53 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
namespace opencv_test { namespace {
TEST(ML_KMeans, accuracy)
{
const int iters = 100;
int sizesArr[] = { 5000, 7000, 8000 };
int pointsCount = sizesArr[0]+ sizesArr[1] + sizesArr[2];
Mat data( pointsCount, 2, CV_32FC1 ), labels;
vector<int> sizes( sizesArr, sizesArr + sizeof(sizesArr) / sizeof(sizesArr[0]) );
Mat means;
vector<Mat> covs;
defaultDistribs( means, covs );
generateData( data, labels, sizes, means, covs, CV_32FC1, CV_32SC1 );
TermCriteria termCriteria( TermCriteria::COUNT, iters, 0.0);
{
SCOPED_TRACE("KMEANS_PP_CENTERS");
float err = 1000;
Mat bestLabels;
kmeans( data, 3, bestLabels, termCriteria, 0, KMEANS_PP_CENTERS, noArray() );
EXPECT_TRUE(calcErr( bestLabels, labels, sizes, err , false ));
EXPECT_LE(err, 0.01f);
}
{
SCOPED_TRACE("KMEANS_RANDOM_CENTERS");
float err = 1000;
Mat bestLabels;
kmeans( data, 3, bestLabels, termCriteria, 0, KMEANS_RANDOM_CENTERS, noArray() );
EXPECT_TRUE(calcErr( bestLabels, labels, sizes, err, false ));
EXPECT_LE(err, 0.01f);
}
{
SCOPED_TRACE("KMEANS_USE_INITIAL_LABELS");
float err = 1000;
Mat bestLabels;
labels.copyTo( bestLabels );
RNG &rng = cv::theRNG();
for( int i = 0; i < 0.5f * pointsCount; i++ )
bestLabels.at<int>( rng.next() % pointsCount, 0 ) = rng.next() % 3;
kmeans( data, 3, bestLabels, termCriteria, 0, KMEANS_USE_INITIAL_LABELS, noArray() );
EXPECT_TRUE(calcErr( bestLabels, labels, sizes, err, false ));
EXPECT_LE(err, 0.01f);
}
}
}} // namespace

View File

@ -0,0 +1,112 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
namespace opencv_test { namespace {
using cv::ml::TrainData;
using cv::ml::EM;
using cv::ml::KNearest;
TEST(ML_KNearest, accuracy)
{
int sizesArr[] = { 500, 700, 800 };
int pointsCount = sizesArr[0]+ sizesArr[1] + sizesArr[2];
Mat trainData( pointsCount, 2, CV_32FC1 ), trainLabels;
vector<int> sizes( sizesArr, sizesArr + sizeof(sizesArr) / sizeof(sizesArr[0]) );
Mat means;
vector<Mat> covs;
defaultDistribs( means, covs );
generateData( trainData, trainLabels, sizes, means, covs, CV_32FC1, CV_32FC1 );
Mat testData( pointsCount, 2, CV_32FC1 );
Mat testLabels;
generateData( testData, testLabels, sizes, means, covs, CV_32FC1, CV_32FC1 );
{
SCOPED_TRACE("Default");
Mat bestLabels;
float err = 1000;
Ptr<KNearest> knn = KNearest::create();
knn->train(trainData, ml::ROW_SAMPLE, trainLabels);
knn->findNearest(testData, 4, bestLabels);
EXPECT_TRUE(calcErr( bestLabels, testLabels, sizes, err, true ));
EXPECT_LE(err, 0.01f);
}
{
SCOPED_TRACE("KDTree");
Mat neighborIndexes;
float err = 1000;
Ptr<KNearest> knn = KNearest::create();
knn->setAlgorithmType(KNearest::KDTREE);
knn->train(trainData, ml::ROW_SAMPLE, trainLabels);
knn->findNearest(testData, 4, neighborIndexes);
Mat bestLabels;
// The output of the KDTree are the neighbor indexes, not actual class labels
// so we need to do some extra work to get actual predictions
for(int row_num = 0; row_num < neighborIndexes.rows; ++row_num){
vector<float> labels;
for(int index = 0; index < neighborIndexes.row(row_num).cols; ++index) {
labels.push_back(trainLabels.at<float>(neighborIndexes.row(row_num).at<int>(0, index) , 0));
}
// computing the mode of the output class predictions to determine overall prediction
std::vector<int> histogram(3,0);
for( int i=0; i<3; ++i )
++histogram[ static_cast<int>(labels[i]) ];
int bestLabel = static_cast<int>(std::max_element( histogram.begin(), histogram.end() ) - histogram.begin());
bestLabels.push_back(bestLabel);
}
bestLabels.convertTo(bestLabels, testLabels.type());
EXPECT_TRUE(calcErr( bestLabels, testLabels, sizes, err, true ));
EXPECT_LE(err, 0.01f);
}
}
TEST(ML_KNearest, regression_12347)
{
Mat xTrainData = (Mat_<float>(5,2) << 1, 1.1, 1.1, 1, 2, 2, 2.1, 2, 2.1, 2.1);
Mat yTrainLabels = (Mat_<float>(5,1) << 1, 1, 2, 2, 2);
Ptr<KNearest> knn = KNearest::create();
knn->train(xTrainData, ml::ROW_SAMPLE, yTrainLabels);
Mat xTestData = (Mat_<float>(2,2) << 1.1, 1.1, 2, 2.2);
Mat zBestLabels, neighbours, dist;
// check output shapes:
int K = 16, Kexp = std::min(K, xTrainData.rows);
knn->findNearest(xTestData, K, zBestLabels, neighbours, dist);
EXPECT_EQ(xTestData.rows, zBestLabels.rows);
EXPECT_EQ(neighbours.cols, Kexp);
EXPECT_EQ(dist.cols, Kexp);
// see if the result is still correct:
K = 2;
knn->findNearest(xTestData, K, zBestLabels, neighbours, dist);
EXPECT_EQ(1, zBestLabels.at<float>(0,0));
EXPECT_EQ(2, zBestLabels.at<float>(1,0));
}
TEST(ML_KNearest, bug_11877)
{
Mat trainData = (Mat_<float>(5,2) << 3, 3, 3, 3, 4, 4, 4, 4, 4, 4);
Mat trainLabels = (Mat_<float>(5,1) << 0, 0, 1, 1, 1);
Ptr<KNearest> knnKdt = KNearest::create();
knnKdt->setAlgorithmType(KNearest::KDTREE);
knnKdt->setIsClassifier(true);
knnKdt->train(trainData, ml::ROW_SAMPLE, trainLabels);
Mat testData = (Mat_<float>(2,2) << 3.1, 3.1, 4, 4.1);
Mat testLabels = (Mat_<int>(2,1) << 0, 1);
Mat result;
knnKdt->findNearest(testData, 1, result);
EXPECT_EQ(1, int(result.at<int>(0, 0)));
EXPECT_EQ(2, int(result.at<int>(1, 0)));
EXPECT_EQ(0, trainLabels.at<int>(result.at<int>(0, 0), 0));
}
}} // namespace

View File

@ -0,0 +1,81 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// AUTHOR: Rahul Kavi rahulkavi[at]live[at]com
//
// Test data uses subset of data from the popular Iris Dataset (1936):
// - http://archive.ics.uci.edu/ml/datasets/Iris
// - https://en.wikipedia.org/wiki/Iris_flower_data_set
//
#include "test_precomp.hpp"
namespace opencv_test { namespace {
TEST(ML_LR, accuracy)
{
std::string dataFileName = findDataFile("iris.data");
Ptr<TrainData> tdata = TrainData::loadFromCSV(dataFileName, 0);
ASSERT_FALSE(tdata.empty());
Ptr<LogisticRegression> p = LogisticRegression::create();
p->setLearningRate(1.0);
p->setIterations(10001);
p->setRegularization(LogisticRegression::REG_L2);
p->setTrainMethod(LogisticRegression::BATCH);
p->setMiniBatchSize(10);
p->train(tdata);
Mat responses;
p->predict(tdata->getSamples(), responses);
float error = 1000;
EXPECT_TRUE(calculateError(responses, tdata->getResponses(), error));
EXPECT_LE(error, 0.05f);
}
//==================================================================================================
TEST(ML_LR, save_load)
{
string dataFileName = findDataFile("iris.data");
Ptr<TrainData> tdata = TrainData::loadFromCSV(dataFileName, 0);
ASSERT_FALSE(tdata.empty());
Mat responses1, responses2;
Mat learnt_mat1, learnt_mat2;
String filename = tempfile(".xml");
{
Ptr<LogisticRegression> lr1 = LogisticRegression::create();
lr1->setLearningRate(1.0);
lr1->setIterations(10001);
lr1->setRegularization(LogisticRegression::REG_L2);
lr1->setTrainMethod(LogisticRegression::BATCH);
lr1->setMiniBatchSize(10);
ASSERT_NO_THROW(lr1->train(tdata));
ASSERT_NO_THROW(lr1->predict(tdata->getSamples(), responses1));
ASSERT_NO_THROW(lr1->save(filename));
learnt_mat1 = lr1->get_learnt_thetas();
}
{
Ptr<LogisticRegression> lr2;
ASSERT_NO_THROW(lr2 = Algorithm::load<LogisticRegression>(filename));
ASSERT_NO_THROW(lr2->predict(tdata->getSamples(), responses2));
learnt_mat2 = lr2->get_learnt_thetas();
}
// compare difference in prediction outputs and stored inputs
EXPECT_MAT_NEAR(responses1, responses2, 0.f);
Mat comp_learnt_mats;
comp_learnt_mats = (learnt_mat1 == learnt_mat2);
comp_learnt_mats = comp_learnt_mats.reshape(1, comp_learnt_mats.rows*comp_learnt_mats.cols);
comp_learnt_mats.convertTo(comp_learnt_mats, CV_32S);
comp_learnt_mats = comp_learnt_mats/255;
// check if there is any difference between computed learnt mat and retrieved mat
EXPECT_EQ(comp_learnt_mats.rows, sum(comp_learnt_mats)[0]);
remove( filename.c_str() );
}
}} // namespace

View File

@ -0,0 +1,10 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
#if defined(HAVE_HPX)
#include <hpx/hpx_main.hpp>
#endif
CV_TEST_MAIN("ml")

View File

@ -0,0 +1,373 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
namespace opencv_test { namespace {
struct DatasetDesc
{
string name;
int resp_idx;
int train_count;
int cat_num;
string type_desc;
public:
Ptr<TrainData> load()
{
string filename = findDataFile(name + ".data");
Ptr<TrainData> data = TrainData::loadFromCSV(filename, 0, resp_idx, resp_idx + 1, type_desc);
data->setTrainTestSplit(train_count);
data->shuffleTrainTest();
return data;
}
};
// see testdata/ml/protocol.txt (?)
DatasetDesc datasets[] = {
{ "mushroom", 0, 4000, 16, "cat" },
{ "adult", 14, 22561, 16, "ord[0,2,4,10-12],cat[1,3,5-9,13,14]" },
{ "vehicle", 18, 761, 4, "ord[0-17],cat[18]" },
{ "abalone", 8, 3133, 16, "ord[1-8],cat[0]" },
{ "ringnorm", 20, 300, 2, "ord[0-19],cat[20]" },
{ "spambase", 57, 3221, 3, "ord[0-56],cat[57]" },
{ "waveform", 21, 300, 3, "ord[0-20],cat[21]" },
{ "elevators", 18, 5000, 0, "ord" },
{ "letter", 16, 10000, 26, "ord[0-15],cat[16]" },
{ "twonorm", 20, 300, 3, "ord[0-19],cat[20]" },
{ "poletelecomm", 48, 2500, 0, "ord" },
};
static DatasetDesc & getDataset(const string & name)
{
const int sz = sizeof(datasets)/sizeof(datasets[0]);
for (int i = 0; i < sz; ++i)
{
DatasetDesc & desc = datasets[i];
if (desc.name == name)
return desc;
}
CV_Error(Error::StsInternal, "");
}
//==================================================================================================
// interfaces and templates
template <typename T> string modelName() { return "Unknown"; };
template <typename T> Ptr<T> tuneModel(const DatasetDesc &, Ptr<T> m) { return m; }
struct IModelFactory
{
virtual Ptr<StatModel> createNew(const DatasetDesc &dataset) const = 0;
virtual Ptr<StatModel> loadFromFile(const string &filename) const = 0;
virtual string name() const = 0;
virtual ~IModelFactory() {}
};
template <typename T>
struct ModelFactory : public IModelFactory
{
Ptr<StatModel> createNew(const DatasetDesc &dataset) const CV_OVERRIDE
{
return tuneModel<T>(dataset, T::create());
}
Ptr<StatModel> loadFromFile(const string & filename) const CV_OVERRIDE
{
return T::load(filename);
}
string name() const CV_OVERRIDE { return modelName<T>(); }
};
// implementation
template <> string modelName<NormalBayesClassifier>() { return "NormalBayesClassifier"; }
template <> string modelName<DTrees>() { return "DTrees"; }
template <> string modelName<KNearest>() { return "KNearest"; }
template <> string modelName<RTrees>() { return "RTrees"; }
template <> string modelName<SVMSGD>() { return "SVMSGD"; }
template<> Ptr<DTrees> tuneModel<DTrees>(const DatasetDesc &dataset, Ptr<DTrees> m)
{
m->setMaxDepth(10);
m->setMinSampleCount(2);
m->setRegressionAccuracy(0);
m->setUseSurrogates(false);
m->setCVFolds(0);
m->setUse1SERule(false);
m->setTruncatePrunedTree(false);
m->setPriors(Mat());
m->setMaxCategories(dataset.cat_num);
return m;
}
template<> Ptr<RTrees> tuneModel<RTrees>(const DatasetDesc &dataset, Ptr<RTrees> m)
{
m->setMaxDepth(20);
m->setMinSampleCount(2);
m->setRegressionAccuracy(0);
m->setUseSurrogates(false);
m->setPriors(Mat());
m->setCalculateVarImportance(true);
m->setActiveVarCount(0);
m->setTermCriteria(TermCriteria(TermCriteria::COUNT, 100, 0.0));
m->setMaxCategories(dataset.cat_num);
return m;
}
template<> Ptr<SVMSGD> tuneModel<SVMSGD>(const DatasetDesc &, Ptr<SVMSGD> m)
{
m->setSvmsgdType(SVMSGD::ASGD);
m->setMarginType(SVMSGD::SOFT_MARGIN);
m->setMarginRegularization(0.00001f);
m->setInitialStepSize(0.1f);
m->setStepDecreasingPower(0.75);
m->setTermCriteria(TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 10000, 0.00001));
return m;
}
template <>
struct ModelFactory<Boost> : public IModelFactory
{
ModelFactory(int boostType_) : boostType(boostType_) {}
Ptr<StatModel> createNew(const DatasetDesc &) const CV_OVERRIDE
{
Ptr<Boost> m = Boost::create();
m->setBoostType(boostType);
m->setWeakCount(20);
m->setWeightTrimRate(0.95);
m->setMaxDepth(4);
m->setUseSurrogates(false);
m->setPriors(Mat());
return m;
}
Ptr<StatModel> loadFromFile(const string &filename) const { return Boost::load(filename); }
string name() const CV_OVERRIDE { return "Boost"; }
int boostType;
};
template <>
struct ModelFactory<SVM> : public IModelFactory
{
ModelFactory(int svmType_, int kernelType_, double gamma_, double c_, double nu_)
: svmType(svmType_), kernelType(kernelType_), gamma(gamma_), c(c_), nu(nu_) {}
Ptr<StatModel> createNew(const DatasetDesc &) const CV_OVERRIDE
{
Ptr<SVM> m = SVM::create();
m->setType(svmType);
m->setKernel(kernelType);
m->setDegree(0);
m->setGamma(gamma);
m->setCoef0(0);
m->setC(c);
m->setNu(nu);
m->setP(0);
return m;
}
Ptr<StatModel> loadFromFile(const string &filename) const { return SVM::load(filename); }
string name() const CV_OVERRIDE { return "SVM"; }
int svmType;
int kernelType;
double gamma;
double c;
double nu;
};
//==================================================================================================
struct ML_Params_t
{
Ptr<IModelFactory> factory;
string dataset;
float mean;
float sigma;
};
void PrintTo(const ML_Params_t & param, std::ostream *os)
{
*os << param.factory->name() << "_" << param.dataset;
}
ML_Params_t ML_Params_List[] = {
{ makePtr< ModelFactory<DTrees> >(), "mushroom", 0.027401f, 0.036236f },
{ makePtr< ModelFactory<DTrees> >(), "adult", 14.279000f, 0.354323f },
{ makePtr< ModelFactory<DTrees> >(), "vehicle", 29.761162f, 4.823927f },
{ makePtr< ModelFactory<DTrees> >(), "abalone", 7.297540f, 0.510058f },
{ makePtr< ModelFactory<Boost> >(Boost::REAL), "adult", 13.894001f, 0.337763f },
{ makePtr< ModelFactory<Boost> >(Boost::DISCRETE), "mushroom", 0.007274f, 0.029400f },
{ makePtr< ModelFactory<Boost> >(Boost::LOGIT), "ringnorm", 9.993943f, 0.860256f },
{ makePtr< ModelFactory<Boost> >(Boost::GENTLE), "spambase", 5.404347f, 0.581716f },
{ makePtr< ModelFactory<RTrees> >(), "waveform", 17.100641f, 0.630052f },
{ makePtr< ModelFactory<RTrees> >(), "mushroom", 0.006547f, 0.028248f },
{ makePtr< ModelFactory<RTrees> >(), "adult", 13.5129f, 0.266065f },
{ makePtr< ModelFactory<RTrees> >(), "abalone", 4.745199f, 0.282112f },
{ makePtr< ModelFactory<RTrees> >(), "vehicle", 24.964712f, 4.469287f },
{ makePtr< ModelFactory<RTrees> >(), "letter", 5.334999f, 0.261142f },
{ makePtr< ModelFactory<RTrees> >(), "ringnorm", 6.248733f, 0.904713f },
{ makePtr< ModelFactory<RTrees> >(), "twonorm", 4.506479f, 0.449739f },
{ makePtr< ModelFactory<RTrees> >(), "spambase", 5.243477f, 0.54232f },
};
typedef testing::TestWithParam<ML_Params_t> ML_Params;
TEST_P(ML_Params, accuracy)
{
const ML_Params_t & param = GetParam();
DatasetDesc &dataset = getDataset(param.dataset);
Ptr<TrainData> data = dataset.load();
ASSERT_TRUE(data);
ASSERT_TRUE(data->getNSamples() > 0);
Ptr<StatModel> m = param.factory->createNew(dataset);
ASSERT_TRUE(m);
ASSERT_TRUE(m->train(data, 0));
float err = m->calcError(data, true, noArray());
EXPECT_NEAR(err, param.mean, 4 * param.sigma);
}
INSTANTIATE_TEST_CASE_P(/**/, ML_Params, testing::ValuesIn(ML_Params_List));
//==================================================================================================
struct ML_SL_Params_t
{
Ptr<IModelFactory> factory;
string dataset;
};
void PrintTo(const ML_SL_Params_t & param, std::ostream *os)
{
*os << param.factory->name() << "_" << param.dataset;
}
ML_SL_Params_t ML_SL_Params_List[] = {
{ makePtr< ModelFactory<NormalBayesClassifier> >(), "waveform" },
{ makePtr< ModelFactory<KNearest> >(), "waveform" },
{ makePtr< ModelFactory<KNearest> >(), "abalone" },
{ makePtr< ModelFactory<SVM> >(SVM::C_SVC, SVM::LINEAR, 1, 0.5, 0), "waveform" },
{ makePtr< ModelFactory<SVM> >(SVM::NU_SVR, SVM::RBF, 0.00225, 62.5, 0.03), "poletelecomm" },
{ makePtr< ModelFactory<DTrees> >(), "mushroom" },
{ makePtr< ModelFactory<DTrees> >(), "abalone" },
{ makePtr< ModelFactory<Boost> >(Boost::REAL), "adult" },
{ makePtr< ModelFactory<RTrees> >(), "waveform" },
{ makePtr< ModelFactory<RTrees> >(), "abalone" },
{ makePtr< ModelFactory<SVMSGD> >(), "waveform" },
};
typedef testing::TestWithParam<ML_SL_Params_t> ML_SL_Params;
TEST_P(ML_SL_Params, save_load)
{
const ML_SL_Params_t & param = GetParam();
DatasetDesc &dataset = getDataset(param.dataset);
Ptr<TrainData> data = dataset.load();
ASSERT_TRUE(data);
ASSERT_TRUE(data->getNSamples() > 0);
Mat responses1, responses2;
string file1 = tempfile(".json.gz");
string file2 = tempfile(".json.gz");
{
Ptr<StatModel> m = param.factory->createNew(dataset);
ASSERT_TRUE(m);
ASSERT_TRUE(m->train(data, 0));
m->calcError(data, true, responses1);
m->save(file1 + "?base64");
}
{
Ptr<StatModel> m = param.factory->loadFromFile(file1);
ASSERT_TRUE(m);
m->calcError(data, true, responses2);
m->save(file2 + "?base64");
}
EXPECT_MAT_NEAR(responses1, responses2, 0.0);
{
ifstream f1(file1.c_str(), std::ios_base::binary);
ifstream f2(file2.c_str(), std::ios_base::binary);
ASSERT_TRUE(f1.is_open() && f2.is_open());
const size_t BUFSZ = 10000;
vector<char> buf1(BUFSZ, 0);
vector<char> buf2(BUFSZ, 0);
while (true)
{
f1.read(&buf1[0], BUFSZ);
f2.read(&buf2[0], BUFSZ);
EXPECT_EQ(f1.gcount(), f2.gcount());
EXPECT_EQ(f1.eof(), f2.eof());
if (!f1.good() || !f2.good() || f1.gcount() != f2.gcount())
break;
ASSERT_EQ(buf1, buf2);
}
}
remove(file1.c_str());
remove(file2.c_str());
}
INSTANTIATE_TEST_CASE_P(/**/, ML_SL_Params, testing::ValuesIn(ML_SL_Params_List));
//==================================================================================================
TEST(TrainDataGet, layout_ROW_SAMPLE) // Details: #12236
{
cv::Mat test = cv::Mat::ones(150, 30, CV_32FC1) * 2;
test.col(3) += Scalar::all(3);
cv::Mat labels = cv::Mat::ones(150, 3, CV_32SC1) * 5;
labels.col(1) += 1;
cv::Ptr<cv::ml::TrainData> train_data = cv::ml::TrainData::create(test, cv::ml::ROW_SAMPLE, labels);
train_data->setTrainTestSplitRatio(0.9);
Mat tidx = train_data->getTestSampleIdx();
EXPECT_EQ((size_t)15, tidx.total());
Mat tresp = train_data->getTestResponses();
EXPECT_EQ(15, tresp.rows);
EXPECT_EQ(labels.cols, tresp.cols);
EXPECT_EQ(5, tresp.at<int>(0, 0)) << tresp;
EXPECT_EQ(6, tresp.at<int>(0, 1)) << tresp;
EXPECT_EQ(6, tresp.at<int>(14, 1)) << tresp;
EXPECT_EQ(5, tresp.at<int>(14, 2)) << tresp;
Mat tsamples = train_data->getTestSamples();
EXPECT_EQ(15, tsamples.rows);
EXPECT_EQ(test.cols, tsamples.cols);
EXPECT_EQ(2, tsamples.at<float>(0, 0)) << tsamples;
EXPECT_EQ(5, tsamples.at<float>(0, 3)) << tsamples;
EXPECT_EQ(2, tsamples.at<float>(14, test.cols - 1)) << tsamples;
EXPECT_EQ(5, tsamples.at<float>(14, 3)) << tsamples;
}
TEST(TrainDataGet, layout_COL_SAMPLE) // Details: #12236
{
cv::Mat test = cv::Mat::ones(30, 150, CV_32FC1) * 3;
test.row(3) += Scalar::all(3);
cv::Mat labels = cv::Mat::ones(3, 150, CV_32SC1) * 5;
labels.row(1) += 1;
cv::Ptr<cv::ml::TrainData> train_data = cv::ml::TrainData::create(test, cv::ml::COL_SAMPLE, labels);
train_data->setTrainTestSplitRatio(0.9);
Mat tidx = train_data->getTestSampleIdx();
EXPECT_EQ((size_t)15, tidx.total());
Mat tresp = train_data->getTestResponses(); // always row-based, transposed
EXPECT_EQ(15, tresp.rows);
EXPECT_EQ(labels.rows, tresp.cols);
EXPECT_EQ(5, tresp.at<int>(0, 0)) << tresp;
EXPECT_EQ(6, tresp.at<int>(0, 1)) << tresp;
EXPECT_EQ(6, tresp.at<int>(14, 1)) << tresp;
EXPECT_EQ(5, tresp.at<int>(14, 2)) << tresp;
Mat tsamples = train_data->getTestSamples();
EXPECT_EQ(15, tsamples.cols);
EXPECT_EQ(test.rows, tsamples.rows);
EXPECT_EQ(3, tsamples.at<float>(0, 0)) << tsamples;
EXPECT_EQ(6, tsamples.at<float>(3, 0)) << tsamples;
EXPECT_EQ(6, tsamples.at<float>(3, 14)) << tsamples;
EXPECT_EQ(3, tsamples.at<float>(test.rows - 1, 14)) << tsamples;
}
}} // namespace

View File

@ -0,0 +1,51 @@
#ifndef __OPENCV_TEST_PRECOMP_HPP__
#define __OPENCV_TEST_PRECOMP_HPP__
#include "opencv2/ts.hpp"
#include <opencv2/ts/cuda_test.hpp> // EXPECT_MAT_NEAR
#include "opencv2/ml.hpp"
#include "opencv2/core/core_c.h"
#include <fstream>
using std::ifstream;
namespace opencv_test {
using namespace cv::ml;
#define CV_NBAYES "nbayes"
#define CV_KNEAREST "knearest"
#define CV_SVM "svm"
#define CV_EM "em"
#define CV_ANN "ann"
#define CV_DTREE "dtree"
#define CV_BOOST "boost"
#define CV_RTREES "rtrees"
#define CV_ERTREES "ertrees"
#define CV_SVMSGD "svmsgd"
using cv::Ptr;
using cv::ml::StatModel;
using cv::ml::TrainData;
using cv::ml::NormalBayesClassifier;
using cv::ml::SVM;
using cv::ml::KNearest;
using cv::ml::ParamGrid;
using cv::ml::ANN_MLP;
using cv::ml::DTrees;
using cv::ml::Boost;
using cv::ml::RTrees;
using cv::ml::SVMSGD;
void defaultDistribs( Mat& means, vector<Mat>& covs, int type=CV_32FC1 );
void generateData( Mat& data, Mat& labels, const vector<int>& sizes, const Mat& _means, const vector<Mat>& covs, int dataType, int labelType );
int maxIdx( const vector<int>& count );
bool getLabelsMap( const Mat& labels, const vector<int>& sizes, vector<int>& labelsMap, bool checkClusterUniq=true );
bool calcErr( const Mat& labels, const Mat& origLabels, const vector<int>& sizes, float& err, bool labelsEquivalent = true, bool checkClusterUniq=true );
// used in LR test
bool calculateError( const Mat& _p_labels, const Mat& _o_labels, float& error);
} // namespace
#endif

View File

@ -0,0 +1,119 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
namespace opencv_test { namespace {
TEST(ML_RTrees, getVotes)
{
int n = 12;
int count, i;
int label_size = 3;
int predicted_class = 0;
int max_votes = -1;
int val;
// RTrees for classification
Ptr<ml::RTrees> rt = cv::ml::RTrees::create();
//data
Mat data(n, 4, CV_32F);
randu(data, 0, 10);
//labels
Mat labels = (Mat_<int>(n,1) << 0,0,0,0, 1,1,1,1, 2,2,2,2);
rt->train(data, ml::ROW_SAMPLE, labels);
//run function
Mat test(1, 4, CV_32F);
Mat result;
randu(test, 0, 10);
rt->getVotes(test, result, 0);
//count vote amount and find highest vote
count = 0;
const int* result_row = result.ptr<int>(1);
for( i = 0; i < label_size; i++ )
{
val = result_row[i];
//predicted_class = max_votes < val? i;
if( max_votes < val )
{
max_votes = val;
predicted_class = i;
}
count += val;
}
EXPECT_EQ(count, (int)rt->getRoots().size());
EXPECT_EQ(result.at<float>(0, predicted_class), rt->predict(test));
}
TEST(ML_RTrees, 11142_sample_weights_regression)
{
int n = 3;
// RTrees for regression
Ptr<ml::RTrees> rt = cv::ml::RTrees::create();
//simple regression problem of x -> 2x
Mat data = (Mat_<float>(n,1) << 1, 2, 3);
Mat values = (Mat_<float>(n,1) << 2, 4, 6);
Mat weights = (Mat_<float>(n, 1) << 10, 10, 10);
Ptr<TrainData> trainData = TrainData::create(data, ml::ROW_SAMPLE, values);
rt->train(trainData);
double error_without_weights = round(rt->getOOBError());
rt->clear();
Ptr<TrainData> trainDataWithWeights = TrainData::create(data, ml::ROW_SAMPLE, values, Mat(), Mat(), weights );
rt->train(trainDataWithWeights);
double error_with_weights = round(rt->getOOBError());
// error with weights should be larger than error without weights
EXPECT_GE(error_with_weights, error_without_weights);
}
TEST(ML_RTrees, 11142_sample_weights_classification)
{
int n = 12;
// RTrees for classification
Ptr<ml::RTrees> rt = cv::ml::RTrees::create();
Mat data(n, 4, CV_32F);
randu(data, 0, 10);
Mat labels = (Mat_<int>(n,1) << 0,0,0,0, 1,1,1,1, 2,2,2,2);
Mat weights = (Mat_<float>(n, 1) << 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10);
rt->train(data, ml::ROW_SAMPLE, labels);
rt->clear();
double error_without_weights = round(rt->getOOBError());
Ptr<TrainData> trainDataWithWeights = TrainData::create(data, ml::ROW_SAMPLE, labels, Mat(), Mat(), weights );
rt->train(data, ml::ROW_SAMPLE, labels);
double error_with_weights = round(rt->getOOBError());
std::cout << error_without_weights << std::endl;
std::cout << error_with_weights << std::endl;
// error with weights should be larger than error without weights
EXPECT_GE(error_with_weights, error_without_weights);
}
TEST(ML_RTrees, bug_12974_throw_exception_when_predict_different_feature_count)
{
int numFeatures = 5;
// create a 5 feature dataset and train the model
cv::Ptr<RTrees> model = RTrees::create();
Mat samples(10, numFeatures, CV_32F);
randu(samples, 0, 10);
Mat labels = (Mat_<int>(10,1) << 0,0,0,0,0,1,1,1,1,1);
cv::Ptr<TrainData> trainData = TrainData::create(samples, cv::ml::ROW_SAMPLE, labels);
model->train(trainData);
// try to predict on data which have fewer features - this should throw an exception
for(int i = 1; i < numFeatures - 1; ++i) {
Mat test(1, i, CV_32FC1);
ASSERT_THROW(model->predict(test), Exception);
}
// try to predict on data which have more features - this should also throw an exception
Mat test(1, numFeatures + 1, CV_32FC1);
ASSERT_THROW(model->predict(test), Exception);
}
}} // namespace

View File

@ -0,0 +1,107 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
namespace opencv_test { namespace {
void randomFillCategories(const string & filename, Mat & input)
{
Mat catMap;
Mat catCount;
std::vector<uchar> varTypes;
FileStorage fs(filename, FileStorage::READ);
FileNode root = fs.getFirstTopLevelNode();
root["cat_map"] >> catMap;
root["cat_count"] >> catCount;
root["var_type"] >> varTypes;
int offset = 0;
int countOffset = 0;
uint var = 0, varCount = (uint)varTypes.size();
for (; var < varCount; ++var)
{
if (varTypes[var] == ml::VAR_CATEGORICAL)
{
int size = catCount.at<int>(0, countOffset);
for (int row = 0; row < input.rows; ++row)
{
int randomChosenIndex = offset + ((uint)cv::theRNG()) % size;
int value = catMap.at<int>(0, randomChosenIndex);
input.at<float>(row, var) = (float)value;
}
offset += size;
++countOffset;
}
}
}
//==================================================================================================
typedef tuple<string, string> ML_Legacy_Param;
typedef testing::TestWithParam< ML_Legacy_Param > ML_Legacy_Params;
TEST_P(ML_Legacy_Params, legacy_load)
{
const string modelName = get<0>(GetParam());
const string dataName = get<1>(GetParam());
const string filename = findDataFile("legacy/" + modelName + "_" + dataName + ".xml");
const bool isTree = modelName == CV_BOOST || modelName == CV_DTREE || modelName == CV_RTREES;
Ptr<StatModel> model;
if (modelName == CV_BOOST)
model = Algorithm::load<Boost>(filename);
else if (modelName == CV_ANN)
model = Algorithm::load<ANN_MLP>(filename);
else if (modelName == CV_DTREE)
model = Algorithm::load<DTrees>(filename);
else if (modelName == CV_NBAYES)
model = Algorithm::load<NormalBayesClassifier>(filename);
else if (modelName == CV_SVM)
model = Algorithm::load<SVM>(filename);
else if (modelName == CV_RTREES)
model = Algorithm::load<RTrees>(filename);
else if (modelName == CV_SVMSGD)
model = Algorithm::load<SVMSGD>(filename);
ASSERT_TRUE(model);
Mat input = Mat(isTree ? 10 : 1, model->getVarCount(), CV_32F);
cv::theRNG().fill(input, RNG::UNIFORM, 0, 40);
if (isTree)
randomFillCategories(filename, input);
Mat output;
EXPECT_NO_THROW(model->predict(input, output, StatModel::RAW_OUTPUT | (isTree ? DTrees::PREDICT_SUM : 0)));
// just check if no internal assertions or errors thrown
}
ML_Legacy_Param param_list[] = {
ML_Legacy_Param(CV_ANN, "waveform"),
ML_Legacy_Param(CV_BOOST, "adult"),
ML_Legacy_Param(CV_BOOST, "1"),
ML_Legacy_Param(CV_BOOST, "2"),
ML_Legacy_Param(CV_BOOST, "3"),
ML_Legacy_Param(CV_DTREE, "abalone"),
ML_Legacy_Param(CV_DTREE, "mushroom"),
ML_Legacy_Param(CV_NBAYES, "waveform"),
ML_Legacy_Param(CV_SVM, "poletelecomm"),
ML_Legacy_Param(CV_SVM, "waveform"),
ML_Legacy_Param(CV_RTREES, "waveform"),
ML_Legacy_Param(CV_SVMSGD, "waveform"),
};
INSTANTIATE_TEST_CASE_P(/**/, ML_Legacy_Params, testing::ValuesIn(param_list));
/*TEST(ML_SVM, throw_exception_when_save_untrained_model)
{
Ptr<cv::ml::SVM> svm;
string filename = tempfile("svm.xml");
ASSERT_THROW(svm.save(filename.c_str()), Exception);
remove(filename.c_str());
}*/
}} // namespace

View File

@ -0,0 +1,156 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
namespace opencv_test { namespace {
static const int TEST_VALUE_LIMIT = 500;
enum
{
UNIFORM_SAME_SCALE,
UNIFORM_DIFFERENT_SCALES
};
CV_ENUM(SVMSGD_TYPE, UNIFORM_SAME_SCALE, UNIFORM_DIFFERENT_SCALES)
typedef std::vector< std::pair<float,float> > BorderList;
static void makeData(RNG &rng, int samplesCount, const Mat &weights, float shift, const BorderList & borders, Mat &samples, Mat & responses)
{
int featureCount = weights.cols;
samples.create(samplesCount, featureCount, CV_32FC1);
for (int featureIndex = 0; featureIndex < featureCount; featureIndex++)
rng.fill(samples.col(featureIndex), RNG::UNIFORM, borders[featureIndex].first, borders[featureIndex].second);
responses.create(samplesCount, 1, CV_32FC1);
for (int i = 0 ; i < samplesCount; i++)
{
double res = samples.row(i).dot(weights) + shift;
responses.at<float>(i) = res > 0 ? 1.f : -1.f;
}
}
//==================================================================================================
typedef tuple<SVMSGD_TYPE, int, double> ML_SVMSGD_Param;
typedef testing::TestWithParam<ML_SVMSGD_Param> ML_SVMSGD_Params;
TEST_P(ML_SVMSGD_Params, scale_and_features)
{
const int type = get<0>(GetParam());
const int featureCount = get<1>(GetParam());
const double precision = get<2>(GetParam());
RNG &rng = cv::theRNG();
Mat_<float> weights(1, featureCount);
rng.fill(weights, RNG::UNIFORM, -1, 1);
const float shift = static_cast<float>(rng.uniform(-featureCount, featureCount));
BorderList borders;
float lowerLimit = -TEST_VALUE_LIMIT;
float upperLimit = TEST_VALUE_LIMIT;
if (type == UNIFORM_SAME_SCALE)
{
for (int featureIndex = 0; featureIndex < featureCount; featureIndex++)
borders.push_back(std::pair<float,float>(lowerLimit, upperLimit));
}
else if (type == UNIFORM_DIFFERENT_SCALES)
{
for (int featureIndex = 0; featureIndex < featureCount; featureIndex++)
{
int crit = rng.uniform(0, 2);
if (crit > 0)
borders.push_back(std::pair<float,float>(lowerLimit, upperLimit));
else
borders.push_back(std::pair<float,float>(lowerLimit/1000, upperLimit/1000));
}
}
ASSERT_FALSE(borders.empty());
Mat trainSamples;
Mat trainResponses;
int trainSamplesCount = 10000;
makeData(rng, trainSamplesCount, weights, shift, borders, trainSamples, trainResponses);
ASSERT_EQ(trainResponses.type(), CV_32FC1);
Mat testSamples;
Mat testResponses;
int testSamplesCount = 100000;
makeData(rng, testSamplesCount, weights, shift, borders, testSamples, testResponses);
ASSERT_EQ(testResponses.type(), CV_32FC1);
Ptr<TrainData> data = TrainData::create(trainSamples, cv::ml::ROW_SAMPLE, trainResponses);
ASSERT_TRUE(data);
cv::Ptr<SVMSGD> svmsgd = SVMSGD::create();
ASSERT_TRUE(svmsgd);
svmsgd->train(data);
Mat responses;
svmsgd->predict(testSamples, responses);
ASSERT_EQ(responses.type(), CV_32FC1);
ASSERT_EQ(responses.rows, testSamplesCount);
int errCount = 0;
for (int i = 0; i < testSamplesCount; i++)
if (responses.at<float>(i) * testResponses.at<float>(i) < 0)
errCount++;
float err = (float)errCount / testSamplesCount;
EXPECT_LE(err, precision);
}
ML_SVMSGD_Param params_list[] = {
ML_SVMSGD_Param(UNIFORM_SAME_SCALE, 2, 0.01),
ML_SVMSGD_Param(UNIFORM_SAME_SCALE, 5, 0.01),
ML_SVMSGD_Param(UNIFORM_SAME_SCALE, 100, 0.02),
ML_SVMSGD_Param(UNIFORM_DIFFERENT_SCALES, 2, 0.01),
ML_SVMSGD_Param(UNIFORM_DIFFERENT_SCALES, 5, 0.01),
ML_SVMSGD_Param(UNIFORM_DIFFERENT_SCALES, 100, 0.01),
};
INSTANTIATE_TEST_CASE_P(/**/, ML_SVMSGD_Params, testing::ValuesIn(params_list));
//==================================================================================================
TEST(ML_SVMSGD, twoPoints)
{
Mat samples(2, 2, CV_32FC1);
samples.at<float>(0,0) = 0;
samples.at<float>(0,1) = 0;
samples.at<float>(1,0) = 1000;
samples.at<float>(1,1) = 1;
Mat responses(2, 1, CV_32FC1);
responses.at<float>(0) = -1;
responses.at<float>(1) = 1;
cv::Ptr<TrainData> trainData = TrainData::create(samples, cv::ml::ROW_SAMPLE, responses);
Mat realWeights(1, 2, CV_32FC1);
realWeights.at<float>(0) = 1000;
realWeights.at<float>(1) = 1;
float realShift = -500000.5;
float normRealWeights = static_cast<float>(cv::norm(realWeights)); // TODO cvtest
realWeights /= normRealWeights;
realShift /= normRealWeights;
cv::Ptr<SVMSGD> svmsgd = SVMSGD::create();
svmsgd->setOptimalParameters();
svmsgd->train( trainData );
Mat foundWeights = svmsgd->getWeights();
float foundShift = svmsgd->getShift();
float normFoundWeights = static_cast<float>(cv::norm(foundWeights)); // TODO cvtest
foundWeights /= normFoundWeights;
foundShift /= normFoundWeights;
EXPECT_LE(cv::norm(Mat(foundWeights - realWeights)), 0.001); // TODO cvtest
EXPECT_LE(std::abs((foundShift - realShift) / realShift), 0.05);
}
}} // namespace

View File

@ -0,0 +1,164 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
namespace opencv_test { namespace {
using cv::ml::SVM;
using cv::ml::TrainData;
static Ptr<TrainData> makeRandomData(int datasize)
{
cv::Mat samples = cv::Mat::zeros( datasize, 2, CV_32FC1 );
cv::Mat responses = cv::Mat::zeros( datasize, 1, CV_32S );
RNG &rng = cv::theRNG();
for (int i = 0; i < datasize; ++i)
{
int response = rng.uniform(0, 2); // Random from {0, 1}.
samples.at<float>( i, 0 ) = rng.uniform(0.f, 0.5f) + response * 0.5f;
samples.at<float>( i, 1 ) = rng.uniform(0.f, 0.5f) + response * 0.5f;
responses.at<int>( i, 0 ) = response;
}
return TrainData::create( samples, cv::ml::ROW_SAMPLE, responses );
}
static Ptr<TrainData> makeCircleData(int datasize, float scale_factor, float radius)
{
// Populate samples with data that can be split into two concentric circles
cv::Mat samples = cv::Mat::zeros( datasize, 2, CV_32FC1 );
cv::Mat responses = cv::Mat::zeros( datasize, 1, CV_32S );
for (int i = 0; i < datasize; i+=2)
{
const float pi = 3.14159f;
const float angle_rads = (i/datasize) * pi;
const float x = radius * cos(angle_rads);
const float y = radius * cos(angle_rads);
// Larger circle
samples.at<float>( i, 0 ) = x;
samples.at<float>( i, 1 ) = y;
responses.at<int>( i, 0 ) = 0;
// Smaller circle
samples.at<float>( i + 1, 0 ) = x * scale_factor;
samples.at<float>( i + 1, 1 ) = y * scale_factor;
responses.at<int>( i + 1, 0 ) = 1;
}
return TrainData::create( samples, cv::ml::ROW_SAMPLE, responses );
}
static Ptr<TrainData> makeRandomData2(int datasize)
{
cv::Mat samples = cv::Mat::zeros( datasize, 2, CV_32FC1 );
cv::Mat responses = cv::Mat::zeros( datasize, 1, CV_32S );
RNG &rng = cv::theRNG();
for (int i = 0; i < datasize; ++i)
{
int response = rng.uniform(0, 2); // Random from {0, 1}.
samples.at<float>( i, 0 ) = 0;
samples.at<float>( i, 1 ) = (0.5f - response) * rng.uniform(0.f, 1.2f) + response;
responses.at<int>( i, 0 ) = response;
}
return TrainData::create( samples, cv::ml::ROW_SAMPLE, responses );
}
//==================================================================================================
TEST(ML_SVM, trainauto)
{
const int datasize = 100;
cv::Ptr<TrainData> data = makeRandomData(datasize);
ASSERT_TRUE(data);
cv::Ptr<SVM> svm = SVM::create();
ASSERT_TRUE(svm);
svm->trainAuto( data, 10 ); // 2-fold cross validation.
float test_data0[2] = {0.25f, 0.25f};
cv::Mat test_point0 = cv::Mat( 1, 2, CV_32FC1, test_data0 );
float result0 = svm->predict( test_point0 );
float test_data1[2] = {0.75f, 0.75f};
cv::Mat test_point1 = cv::Mat( 1, 2, CV_32FC1, test_data1 );
float result1 = svm->predict( test_point1 );
EXPECT_NEAR(result0, 0, 0.001);
EXPECT_NEAR(result1, 1, 0.001);
}
TEST(ML_SVM, trainauto_sigmoid)
{
const int datasize = 100;
const float scale_factor = 0.5;
const float radius = 2.0;
cv::Ptr<TrainData> data = makeCircleData(datasize, scale_factor, radius);
ASSERT_TRUE(data);
cv::Ptr<SVM> svm = SVM::create();
ASSERT_TRUE(svm);
svm->setKernel(SVM::SIGMOID);
svm->setGamma(10.0);
svm->setCoef0(-10.0);
svm->trainAuto( data, 10 ); // 2-fold cross validation.
float test_data0[2] = {radius, radius};
cv::Mat test_point0 = cv::Mat( 1, 2, CV_32FC1, test_data0 );
EXPECT_FLOAT_EQ(svm->predict( test_point0 ), 0);
float test_data1[2] = {scale_factor * radius, scale_factor * radius};
cv::Mat test_point1 = cv::Mat( 1, 2, CV_32FC1, test_data1 );
EXPECT_FLOAT_EQ(svm->predict( test_point1 ), 1);
}
TEST(ML_SVM, trainAuto_regression_5369)
{
const int datasize = 100;
Ptr<TrainData> data = makeRandomData2(datasize);
cv::Ptr<SVM> svm = SVM::create();
svm->trainAuto( data, 10 ); // 2-fold cross validation.
float test_data0[2] = {0.25f, 0.25f};
cv::Mat test_point0 = cv::Mat( 1, 2, CV_32FC1, test_data0 );
float result0 = svm->predict( test_point0 );
float test_data1[2] = {0.75f, 0.75f};
cv::Mat test_point1 = cv::Mat( 1, 2, CV_32FC1, test_data1 );
float result1 = svm->predict( test_point1 );
EXPECT_EQ(0., result0);
EXPECT_EQ(1., result1);
}
TEST(ML_SVM, getSupportVectors)
{
// Set up training data
int labels[4] = {1, -1, -1, -1};
float trainingData[4][2] = { {501, 10}, {255, 10}, {501, 255}, {10, 501} };
Mat trainingDataMat(4, 2, CV_32FC1, trainingData);
Mat labelsMat(4, 1, CV_32SC1, labels);
Ptr<SVM> svm = SVM::create();
ASSERT_TRUE(svm);
svm->setType(SVM::C_SVC);
svm->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER, 100, 1e-6));
// Test retrieval of SVs and compressed SVs on linear SVM
svm->setKernel(SVM::LINEAR);
svm->train(trainingDataMat, cv::ml::ROW_SAMPLE, labelsMat);
Mat sv = svm->getSupportVectors();
EXPECT_EQ(1, sv.rows); // by default compressed SV returned
sv = svm->getUncompressedSupportVectors();
EXPECT_EQ(3, sv.rows);
// Test retrieval of SVs and compressed SVs on non-linear SVM
svm->setKernel(SVM::POLY);
svm->setDegree(2);
svm->train(trainingDataMat, cv::ml::ROW_SAMPLE, labelsMat);
sv = svm->getSupportVectors();
EXPECT_EQ(3, sv.rows);
sv = svm->getUncompressedSupportVectors();
EXPECT_EQ(0, sv.rows); // inapplicable for non-linear SVMs
}
}} // namespace

View File

@ -0,0 +1,189 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
namespace opencv_test {
void defaultDistribs( Mat& means, vector<Mat>& covs, int type)
{
float mp0[] = {0.0f, 0.0f}, cp0[] = {0.67f, 0.0f, 0.0f, 0.67f};
float mp1[] = {5.0f, 0.0f}, cp1[] = {1.0f, 0.0f, 0.0f, 1.0f};
float mp2[] = {1.0f, 5.0f}, cp2[] = {1.0f, 0.0f, 0.0f, 1.0f};
means.create(3, 2, type);
Mat m0( 1, 2, CV_32FC1, mp0 ), c0( 2, 2, CV_32FC1, cp0 );
Mat m1( 1, 2, CV_32FC1, mp1 ), c1( 2, 2, CV_32FC1, cp1 );
Mat m2( 1, 2, CV_32FC1, mp2 ), c2( 2, 2, CV_32FC1, cp2 );
means.resize(3), covs.resize(3);
Mat mr0 = means.row(0);
m0.convertTo(mr0, type);
c0.convertTo(covs[0], type);
Mat mr1 = means.row(1);
m1.convertTo(mr1, type);
c1.convertTo(covs[1], type);
Mat mr2 = means.row(2);
m2.convertTo(mr2, type);
c2.convertTo(covs[2], type);
}
// generate points sets by normal distributions
void generateData( Mat& data, Mat& labels, const vector<int>& sizes, const Mat& _means, const vector<Mat>& covs, int dataType, int labelType )
{
vector<int>::const_iterator sit = sizes.begin();
int total = 0;
for( ; sit != sizes.end(); ++sit )
total += *sit;
CV_Assert( _means.rows == (int)sizes.size() && covs.size() == sizes.size() );
CV_Assert( !data.empty() && data.rows == total );
CV_Assert( data.type() == dataType );
labels.create( data.rows, 1, labelType );
randn( data, Scalar::all(-1.0), Scalar::all(1.0) );
vector<Mat> means(sizes.size());
for(int i = 0; i < _means.rows; i++)
means[i] = _means.row(i);
vector<Mat>::const_iterator mit = means.begin(), cit = covs.begin();
int bi, ei = 0;
sit = sizes.begin();
for( int p = 0, l = 0; sit != sizes.end(); ++sit, ++mit, ++cit, l++ )
{
bi = ei;
ei = bi + *sit;
CV_Assert( mit->rows == 1 && mit->cols == data.cols );
CV_Assert( cit->rows == data.cols && cit->cols == data.cols );
for( int i = bi; i < ei; i++, p++ )
{
Mat r = data.row(i);
r = r * (*cit) + *mit;
if( labelType == CV_32FC1 )
labels.at<float>(p, 0) = (float)l;
else if( labelType == CV_32SC1 )
labels.at<int>(p, 0) = l;
else
{
CV_DbgAssert(0);
}
}
}
}
int maxIdx( const vector<int>& count )
{
int idx = -1;
int maxVal = -1;
vector<int>::const_iterator it = count.begin();
for( int i = 0; it != count.end(); ++it, i++ )
{
if( *it > maxVal)
{
maxVal = *it;
idx = i;
}
}
CV_Assert( idx >= 0);
return idx;
}
bool getLabelsMap( const Mat& labels, const vector<int>& sizes, vector<int>& labelsMap, bool checkClusterUniq)
{
size_t total = 0, nclusters = sizes.size();
for(size_t i = 0; i < sizes.size(); i++)
total += sizes[i];
CV_Assert( !labels.empty() );
CV_Assert( labels.total() == total && (labels.cols == 1 || labels.rows == 1));
CV_Assert( labels.type() == CV_32SC1 || labels.type() == CV_32FC1 );
bool isFlt = labels.type() == CV_32FC1;
labelsMap.resize(nclusters);
vector<bool> buzy(nclusters, false);
int startIndex = 0;
for( size_t clusterIndex = 0; clusterIndex < sizes.size(); clusterIndex++ )
{
vector<int> count( nclusters, 0 );
for( int i = startIndex; i < startIndex + sizes[clusterIndex]; i++)
{
int lbl = isFlt ? (int)labels.at<float>(i) : labels.at<int>(i);
CV_Assert(lbl < (int)nclusters);
count[lbl]++;
CV_Assert(count[lbl] < (int)total);
}
startIndex += sizes[clusterIndex];
int cls = maxIdx( count );
CV_Assert( !checkClusterUniq || !buzy[cls] );
labelsMap[clusterIndex] = cls;
buzy[cls] = true;
}
if(checkClusterUniq)
{
for(size_t i = 0; i < buzy.size(); i++)
if(!buzy[i])
return false;
}
return true;
}
bool calcErr( const Mat& labels, const Mat& origLabels, const vector<int>& sizes, float& err, bool labelsEquivalent, bool checkClusterUniq)
{
err = 0;
CV_Assert( !labels.empty() && !origLabels.empty() );
CV_Assert( labels.rows == 1 || labels.cols == 1 );
CV_Assert( origLabels.rows == 1 || origLabels.cols == 1 );
CV_Assert( labels.total() == origLabels.total() );
CV_Assert( labels.type() == CV_32SC1 || labels.type() == CV_32FC1 );
CV_Assert( origLabels.type() == labels.type() );
vector<int> labelsMap;
bool isFlt = labels.type() == CV_32FC1;
if( !labelsEquivalent )
{
if( !getLabelsMap( labels, sizes, labelsMap, checkClusterUniq ) )
return false;
for( int i = 0; i < labels.rows; i++ )
if( isFlt )
err += labels.at<float>(i) != labelsMap[(int)origLabels.at<float>(i)] ? 1.f : 0.f;
else
err += labels.at<int>(i) != labelsMap[origLabels.at<int>(i)] ? 1.f : 0.f;
}
else
{
for( int i = 0; i < labels.rows; i++ )
if( isFlt )
err += labels.at<float>(i) != origLabels.at<float>(i) ? 1.f : 0.f;
else
err += labels.at<int>(i) != origLabels.at<int>(i) ? 1.f : 0.f;
}
err /= (float)labels.rows;
return true;
}
bool calculateError( const Mat& _p_labels, const Mat& _o_labels, float& error)
{
error = 0.0f;
float accuracy = 0.0f;
Mat _p_labels_temp;
Mat _o_labels_temp;
_p_labels.convertTo(_p_labels_temp, CV_32S);
_o_labels.convertTo(_o_labels_temp, CV_32S);
CV_Assert(_p_labels_temp.total() == _o_labels_temp.total());
CV_Assert(_p_labels_temp.rows == _o_labels_temp.rows);
accuracy = (float)countNonZero(_p_labels_temp == _o_labels_temp)/_p_labels_temp.rows;
error = 1 - accuracy;
return true;
}
} // namespace