feat: 切换后端至PaddleOCR-NCNN,切换工程为CMake

1.项目后端整体迁移至PaddleOCR-NCNN算法,已通过基本的兼容性测试
2.工程改为使用CMake组织,后续为了更好地兼容第三方库,不再提供QMake工程
3.重整权利声明文件,重整代码工程,确保最小化侵权风险

Log: 切换后端至PaddleOCR-NCNN,切换工程为CMake
Change-Id: I4d5d2c5d37505a4a24b389b1a4c5d12f17bfa38c
This commit is contained in:
wangzhengyang
2022-05-10 09:54:44 +08:00
parent ecdd171c6f
commit 718c41634f
10018 changed files with 3593797 additions and 186748 deletions

76
3rdparty/ncnn/examples/CMakeLists.txt vendored Normal file
View File

@ -0,0 +1,76 @@
macro(ncnn_add_example name)
add_executable(${name} ${name}.cpp)
if(OpenCV_FOUND)
target_include_directories(${name} PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(${name} PRIVATE ncnn ${OpenCV_LIBS})
elseif(NCNN_SIMPLEOCV)
target_compile_definitions(${name} PUBLIC USE_NCNN_SIMPLEOCV)
target_link_libraries(${name} PRIVATE ncnn)
endif()
# add test to a virtual project group
set_property(TARGET ${name} PROPERTY FOLDER "examples")
endmacro()
if(NCNN_PIXEL)
if(NOT NCNN_SIMPLEOCV)
find_package(OpenCV QUIET COMPONENTS opencv_world)
# for opencv 2.4 on ubuntu 16.04, there is no opencv_world but OpenCV_FOUND will be TRUE
if("${OpenCV_LIBS}" STREQUAL "")
set(OpenCV_FOUND FALSE)
endif()
if(NOT OpenCV_FOUND)
find_package(OpenCV QUIET COMPONENTS core highgui imgproc imgcodecs videoio)
endif()
if(NOT OpenCV_FOUND)
find_package(OpenCV QUIET COMPONENTS core highgui imgproc)
endif()
endif()
if(OpenCV_FOUND OR NCNN_SIMPLEOCV)
if(OpenCV_FOUND)
message(STATUS "OpenCV library: ${OpenCV_INSTALL_PATH}")
message(STATUS " version: ${OpenCV_VERSION}")
message(STATUS " libraries: ${OpenCV_LIBS}")
message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}")
if(${OpenCV_VERSION_MAJOR} GREATER 3)
set(CMAKE_CXX_STANDARD 11)
endif()
endif()
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../src)
include_directories(${CMAKE_CURRENT_BINARY_DIR}/../src)
ncnn_add_example(squeezenet)
ncnn_add_example(squeezenet_c_api)
ncnn_add_example(fasterrcnn)
ncnn_add_example(rfcn)
ncnn_add_example(yolov2)
ncnn_add_example(yolov3)
ncnn_add_example(yolov5)
ncnn_add_example(yolov5_pnnx)
ncnn_add_example(yolox)
ncnn_add_example(mobilenetv2ssdlite)
ncnn_add_example(mobilenetssd)
ncnn_add_example(squeezenetssd)
ncnn_add_example(shufflenetv2)
ncnn_add_example(peleenetssd_seg)
ncnn_add_example(simplepose)
ncnn_add_example(retinaface)
ncnn_add_example(yolact)
ncnn_add_example(nanodet)
ncnn_add_example(nanodetplus_pnnx)
ncnn_add_example(scrfd)
ncnn_add_example(scrfd_crowdhuman)
if(OpenCV_FOUND)
ncnn_add_example(yolov4)
ncnn_add_example(rvm)
ncnn_add_example(p2pnet)
endif()
else()
message(WARNING "OpenCV not found and NCNN_SIMPLEOCV disabled, examples won't be built")
endif()
else()
message(WARNING "NCNN_PIXEL not enabled, examples won't be built")
endif()

358
3rdparty/ncnn/examples/fasterrcnn.cpp vendored Normal file
View File

@ -0,0 +1,358 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "net.h"
#include <math.h>
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif
#include <stdio.h>
struct Object
{
cv::Rect_<float> rect;
int label;
float prob;
};
static inline float intersection_area(const Object& a, const Object& b)
{
cv::Rect_<float> inter = a.rect & b.rect;
return inter.area();
}
static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right)
{
int i = left;
int j = right;
float p = objects[(left + right) / 2].prob;
while (i <= j)
{
while (objects[i].prob > p)
i++;
while (objects[j].prob < p)
j--;
if (i <= j)
{
// swap
std::swap(objects[i], objects[j]);
i++;
j--;
}
}
#pragma omp parallel sections
{
#pragma omp section
{
if (left < j) qsort_descent_inplace(objects, left, j);
}
#pragma omp section
{
if (i < right) qsort_descent_inplace(objects, i, right);
}
}
}
static void qsort_descent_inplace(std::vector<Object>& objects)
{
if (objects.empty())
return;
qsort_descent_inplace(objects, 0, objects.size() - 1);
}
static void nms_sorted_bboxes(const std::vector<Object>& objects, std::vector<int>& picked, float nms_threshold)
{
picked.clear();
const int n = objects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
areas[i] = objects[i].rect.area();
}
for (int i = 0; i < n; i++)
{
const Object& a = objects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const Object& b = objects[picked[j]];
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
}
if (keep)
picked.push_back(i);
}
}
static int detect_fasterrcnn(const cv::Mat& bgr, std::vector<Object>& objects)
{
ncnn::Net fasterrcnn;
fasterrcnn.opt.use_vulkan_compute = true;
// original pretrained model from https://github.com/rbgirshick/py-faster-rcnn
// py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_alt_opt/faster_rcnn_test.pt
// https://dl.dropboxusercontent.com/s/o6ii098bu51d139/faster_rcnn_models.tgz?dl=0
// ZF_faster_rcnn_final.caffemodel
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
fasterrcnn.load_param("ZF_faster_rcnn_final.param");
fasterrcnn.load_model("ZF_faster_rcnn_final.bin");
// hyper parameters taken from
// py-faster-rcnn/lib/fast_rcnn/config.py
// py-faster-rcnn/lib/fast_rcnn/test.py
const int target_size = 600; // __C.TEST.SCALES
const int max_per_image = 100;
const float confidence_thresh = 0.05f;
const float nms_threshold = 0.3f; // __C.TEST.NMS
// scale to target detect size
int w = bgr.cols;
int h = bgr.rows;
float scale = 1.f;
if (w < h)
{
scale = (float)target_size / w;
w = target_size;
h = h * scale;
}
else
{
scale = (float)target_size / h;
h = target_size;
w = w * scale;
}
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, w, h);
const float mean_vals[3] = {102.9801f, 115.9465f, 122.7717f};
in.substract_mean_normalize(mean_vals, 0);
ncnn::Mat im_info(3);
im_info[0] = h;
im_info[1] = w;
im_info[2] = scale;
// step1, extract feature and all rois
ncnn::Extractor ex1 = fasterrcnn.create_extractor();
ex1.input("data", in);
ex1.input("im_info", im_info);
ncnn::Mat conv5_relu5; // feature
ncnn::Mat rois; // all rois
ex1.extract("conv5_relu5", conv5_relu5);
ex1.extract("rois", rois);
// step2, extract bbox and score for each roi
std::vector<std::vector<Object> > class_candidates;
for (int i = 0; i < rois.c; i++)
{
ncnn::Extractor ex2 = fasterrcnn.create_extractor();
ncnn::Mat roi = rois.channel(i); // get single roi
ex2.input("conv5_relu5", conv5_relu5);
ex2.input("rois", roi);
ncnn::Mat bbox_pred;
ncnn::Mat cls_prob;
ex2.extract("bbox_pred", bbox_pred);
ex2.extract("cls_prob", cls_prob);
int num_class = cls_prob.w;
class_candidates.resize(num_class);
// find class id with highest score
int label = 0;
float score = 0.f;
for (int i = 0; i < num_class; i++)
{
float class_score = cls_prob[i];
if (class_score > score)
{
label = i;
score = class_score;
}
}
// ignore background or low score
if (label == 0 || score <= confidence_thresh)
continue;
// fprintf(stderr, "%d = %f\n", label, score);
// unscale to image size
float x1 = roi[0] / scale;
float y1 = roi[1] / scale;
float x2 = roi[2] / scale;
float y2 = roi[3] / scale;
float pb_w = x2 - x1 + 1;
float pb_h = y2 - y1 + 1;
// apply bbox regression
float dx = bbox_pred[label * 4];
float dy = bbox_pred[label * 4 + 1];
float dw = bbox_pred[label * 4 + 2];
float dh = bbox_pred[label * 4 + 3];
float cx = x1 + pb_w * 0.5f;
float cy = y1 + pb_h * 0.5f;
float obj_cx = cx + pb_w * dx;
float obj_cy = cy + pb_h * dy;
float obj_w = pb_w * exp(dw);
float obj_h = pb_h * exp(dh);
float obj_x1 = obj_cx - obj_w * 0.5f;
float obj_y1 = obj_cy - obj_h * 0.5f;
float obj_x2 = obj_cx + obj_w * 0.5f;
float obj_y2 = obj_cy + obj_h * 0.5f;
// clip
obj_x1 = std::max(std::min(obj_x1, (float)(bgr.cols - 1)), 0.f);
obj_y1 = std::max(std::min(obj_y1, (float)(bgr.rows - 1)), 0.f);
obj_x2 = std::max(std::min(obj_x2, (float)(bgr.cols - 1)), 0.f);
obj_y2 = std::max(std::min(obj_y2, (float)(bgr.rows - 1)), 0.f);
// append object
Object obj;
obj.rect = cv::Rect_<float>(obj_x1, obj_y1, obj_x2 - obj_x1 + 1, obj_y2 - obj_y1 + 1);
obj.label = label;
obj.prob = score;
class_candidates[label].push_back(obj);
}
// post process
objects.clear();
for (int i = 0; i < (int)class_candidates.size(); i++)
{
std::vector<Object>& candidates = class_candidates[i];
qsort_descent_inplace(candidates);
std::vector<int> picked;
nms_sorted_bboxes(candidates, picked, nms_threshold);
for (int j = 0; j < (int)picked.size(); j++)
{
int z = picked[j];
objects.push_back(candidates[z]);
}
}
qsort_descent_inplace(objects);
if (max_per_image > 0 && max_per_image < objects.size())
{
objects.resize(max_per_image);
}
return 0;
}
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
{
static const char* class_names[] = {"background",
"aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair",
"cow", "diningtable", "dog", "horse",
"motorbike", "person", "pottedplant",
"sheep", "sofa", "train", "tvmonitor"
};
cv::Mat image = bgr.clone();
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
cv::imshow("image", image);
cv::waitKey(0);
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<Object> objects;
detect_fasterrcnn(m, objects);
draw_objects(m, objects);
return 0;
}

152
3rdparty/ncnn/examples/mobilenetssd.cpp vendored Normal file
View File

@ -0,0 +1,152 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "net.h"
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif
#include <stdio.h>
#include <vector>
struct Object
{
cv::Rect_<float> rect;
int label;
float prob;
};
static int detect_mobilenet(const cv::Mat& bgr, std::vector<Object>& objects)
{
ncnn::Net mobilenet;
mobilenet.opt.use_vulkan_compute = true;
// model is converted from https://github.com/chuanqi305/MobileNet-SSD
// and can be downloaded from https://drive.google.com/open?id=0ByaKLD9QaPtucWk0Y0dha1VVY0U
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
mobilenet.load_param("mobilenet_ssd_voc_ncnn.param");
mobilenet.load_model("mobilenet_ssd_voc_ncnn.bin");
const int target_size = 300;
int img_w = bgr.cols;
int img_h = bgr.rows;
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, target_size, target_size);
const float mean_vals[3] = {127.5f, 127.5f, 127.5f};
const float norm_vals[3] = {1.0 / 127.5, 1.0 / 127.5, 1.0 / 127.5};
in.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Extractor ex = mobilenet.create_extractor();
ex.input("data", in);
ncnn::Mat out;
ex.extract("detection_out", out);
// printf("%d %d %d\n", out.w, out.h, out.c);
objects.clear();
for (int i = 0; i < out.h; i++)
{
const float* values = out.row(i);
Object object;
object.label = values[0];
object.prob = values[1];
object.rect.x = values[2] * img_w;
object.rect.y = values[3] * img_h;
object.rect.width = values[4] * img_w - object.rect.x;
object.rect.height = values[5] * img_h - object.rect.y;
objects.push_back(object);
}
return 0;
}
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
{
static const char* class_names[] = {"background",
"aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair",
"cow", "diningtable", "dog", "horse",
"motorbike", "person", "pottedplant",
"sheep", "sofa", "train", "tvmonitor"
};
cv::Mat image = bgr.clone();
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
cv::imshow("image", image);
cv::waitKey(0);
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<Object> objects;
detect_mobilenet(m, objects);
draw_objects(m, objects);
return 0;
}

View File

@ -0,0 +1,159 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "net.h"
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif
#include <stdio.h>
#include <vector>
class Noop : public ncnn::Layer
{
};
DEFINE_LAYER_CREATOR(Noop)
struct Object
{
cv::Rect_<float> rect;
int label;
float prob;
};
static int detect_mobilenetv2(const cv::Mat& bgr, std::vector<Object>& objects)
{
ncnn::Net mobilenetv2;
mobilenetv2.opt.use_vulkan_compute = true;
mobilenetv2.register_custom_layer("Silence", Noop_layer_creator);
// original pretrained model from https://github.com/chuanqi305/MobileNetv2-SSDLite
// https://github.com/chuanqi305/MobileNetv2-SSDLite/blob/master/ssdlite/voc/deploy.prototxt
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
mobilenetv2.load_param("mobilenetv2_ssdlite_voc.param");
mobilenetv2.load_model("mobilenetv2_ssdlite_voc.bin");
const int target_size = 300;
int img_w = bgr.cols;
int img_h = bgr.rows;
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, target_size, target_size);
const float mean_vals[3] = {127.5f, 127.5f, 127.5f};
const float norm_vals[3] = {1.0 / 127.5, 1.0 / 127.5, 1.0 / 127.5};
in.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Extractor ex = mobilenetv2.create_extractor();
ex.input("data", in);
ncnn::Mat out;
ex.extract("detection_out", out);
// printf("%d %d %d\n", out.w, out.h, out.c);
objects.clear();
for (int i = 0; i < out.h; i++)
{
const float* values = out.row(i);
Object object;
object.label = values[0];
object.prob = values[1];
object.rect.x = values[2] * img_w;
object.rect.y = values[3] * img_h;
object.rect.width = values[4] * img_w - object.rect.x;
object.rect.height = values[5] * img_h - object.rect.y;
objects.push_back(object);
}
return 0;
}
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
{
static const char* class_names[] = {"background",
"aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair",
"cow", "diningtable", "dog", "horse",
"motorbike", "person", "pottedplant",
"sheep", "sofa", "train", "tvmonitor"
};
cv::Mat image = bgr.clone();
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
cv::imshow("image", image);
cv::waitKey(0);
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<Object> objects;
detect_mobilenetv2(m, objects);
draw_objects(m, objects);
return 0;
}

View File

@ -0,0 +1,173 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "net.h"
#include "platform.h"
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif
#include <stdio.h>
#include <vector>
#if NCNN_VULKAN
#include "gpu.h"
#endif // NCNN_VULKAN
template<class T>
const T& clamp(const T& v, const T& lo, const T& hi)
{
assert(!(hi < lo));
return v < lo ? lo : hi < v ? hi : v;
}
struct Object
{
cv::Rect_<float> rect;
int label;
float prob;
};
static int detect_mobilenetv3(const cv::Mat& bgr, std::vector<Object>& objects)
{
ncnn::Net mobilenetv3;
#if NCNN_VULKAN
mobilenetv3.opt.use_vulkan_compute = true;
#endif // NCNN_VULKAN
// converted ncnn model from https://github.com/ujsyehao/mobilenetv3-ssd
mobilenetv3.load_param("./mobilenetv3_ssdlite_voc.param");
mobilenetv3.load_model("./mobilenetv3_ssdlite_voc.bin");
const int target_size = 300;
int img_w = bgr.cols;
int img_h = bgr.rows;
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, bgr.cols, bgr.rows, target_size, target_size);
const float mean_vals[3] = {123.675f, 116.28f, 103.53f};
const float norm_vals[3] = {1.0f, 1.0f, 1.0f};
in.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Extractor ex = mobilenetv3.create_extractor();
ex.input("input", in);
ncnn::Mat out;
ex.extract("detection_out", out);
// printf("%d %d %d\n", out.w, out.h, out.c);
objects.clear();
for (int i = 0; i < out.h; i++)
{
const float* values = out.row(i);
Object object;
object.label = values[0];
object.prob = values[1];
// filter out cross-boundary
float x1 = clamp(values[2] * target_size, 0.f, float(target_size - 1)) / target_size * img_w;
float y1 = clamp(values[3] * target_size, 0.f, float(target_size - 1)) / target_size * img_h;
float x2 = clamp(values[4] * target_size, 0.f, float(target_size - 1)) / target_size * img_w;
float y2 = clamp(values[5] * target_size, 0.f, float(target_size - 1)) / target_size * img_h;
object.rect.x = x1;
object.rect.y = y1;
object.rect.width = x2 - x1;
object.rect.height = y2 - y1;
objects.push_back(object);
}
return 0;
}
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
{
static const char* class_names[] = {"background",
"aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair",
"cow", "diningtable", "dog", "horse",
"motorbike", "person", "pottedplant",
"sheep", "sofa", "train", "tvmonitor"
};
cv::Mat image = bgr.clone();
for (size_t i = 0; i < objects.size(); i++)
{
if (objects[i].prob > 0.6)
{
const Object& obj = objects[i];
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
}
cv::imshow("image", image);
cv::waitKey(0);
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<Object> objects;
detect_mobilenetv3(m, objects);
draw_objects(m, objects);
return 0;
}

420
3rdparty/ncnn/examples/nanodet.cpp vendored Normal file
View File

@ -0,0 +1,420 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "net.h"
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif
#include <stdlib.h>
#include <float.h>
#include <stdio.h>
#include <vector>
struct Object
{
cv::Rect_<float> rect;
int label;
float prob;
};
static inline float intersection_area(const Object& a, const Object& b)
{
cv::Rect_<float> inter = a.rect & b.rect;
return inter.area();
}
static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
{
int i = left;
int j = right;
float p = faceobjects[(left + right) / 2].prob;
while (i <= j)
{
while (faceobjects[i].prob > p)
i++;
while (faceobjects[j].prob < p)
j--;
if (i <= j)
{
// swap
std::swap(faceobjects[i], faceobjects[j]);
i++;
j--;
}
}
#pragma omp parallel sections
{
#pragma omp section
{
if (left < j) qsort_descent_inplace(faceobjects, left, j);
}
#pragma omp section
{
if (i < right) qsort_descent_inplace(faceobjects, i, right);
}
}
}
static void qsort_descent_inplace(std::vector<Object>& faceobjects)
{
if (faceobjects.empty())
return;
qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
}
static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)
{
picked.clear();
const int n = faceobjects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
areas[i] = faceobjects[i].rect.width * faceobjects[i].rect.height;
}
for (int i = 0; i < n; i++)
{
const Object& a = faceobjects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const Object& b = faceobjects[picked[j]];
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
}
if (keep)
picked.push_back(i);
}
}
static void generate_proposals(const ncnn::Mat& cls_pred, const ncnn::Mat& dis_pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
{
const int num_grid = cls_pred.h;
int num_grid_x;
int num_grid_y;
if (in_pad.w > in_pad.h)
{
num_grid_x = in_pad.w / stride;
num_grid_y = num_grid / num_grid_x;
}
else
{
num_grid_y = in_pad.h / stride;
num_grid_x = num_grid / num_grid_y;
}
const int num_class = cls_pred.w;
const int reg_max_1 = dis_pred.w / 4;
for (int i = 0; i < num_grid_y; i++)
{
for (int j = 0; j < num_grid_x; j++)
{
const int idx = i * num_grid_x + j;
const float* scores = cls_pred.row(idx);
// find label with max score
int label = -1;
float score = -FLT_MAX;
for (int k = 0; k < num_class; k++)
{
if (scores[k] > score)
{
label = k;
score = scores[k];
}
}
if (score >= prob_threshold)
{
ncnn::Mat bbox_pred(reg_max_1, 4, (void*)dis_pred.row(idx));
{
ncnn::Layer* softmax = ncnn::create_layer("Softmax");
ncnn::ParamDict pd;
pd.set(0, 1); // axis
pd.set(1, 1);
softmax->load_param(pd);
ncnn::Option opt;
opt.num_threads = 1;
opt.use_packing_layout = false;
softmax->create_pipeline(opt);
softmax->forward_inplace(bbox_pred, opt);
softmax->destroy_pipeline(opt);
delete softmax;
}
float pred_ltrb[4];
for (int k = 0; k < 4; k++)
{
float dis = 0.f;
const float* dis_after_sm = bbox_pred.row(k);
for (int l = 0; l < reg_max_1; l++)
{
dis += l * dis_after_sm[l];
}
pred_ltrb[k] = dis * stride;
}
float pb_cx = (j + 0.5f) * stride;
float pb_cy = (i + 0.5f) * stride;
float x0 = pb_cx - pred_ltrb[0];
float y0 = pb_cy - pred_ltrb[1];
float x1 = pb_cx + pred_ltrb[2];
float y1 = pb_cy + pred_ltrb[3];
Object obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = x1 - x0;
obj.rect.height = y1 - y0;
obj.label = label;
obj.prob = score;
objects.push_back(obj);
}
}
}
}
static int detect_nanodet(const cv::Mat& bgr, std::vector<Object>& objects)
{
ncnn::Net nanodet;
nanodet.opt.use_vulkan_compute = true;
// nanodet.opt.use_bf16_storage = true;
// original pretrained model from https://github.com/RangiLyu/nanodet
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
nanodet.load_param("nanodet_m.param");
nanodet.load_model("nanodet_m.bin");
int width = bgr.cols;
int height = bgr.rows;
const int target_size = 320;
const float prob_threshold = 0.4f;
const float nms_threshold = 0.5f;
// pad to multiple of 32
int w = width;
int h = height;
float scale = 1.f;
if (w > h)
{
scale = (float)target_size / w;
w = target_size;
h = h * scale;
}
else
{
scale = (float)target_size / h;
h = target_size;
w = w * scale;
}
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, width, height, w, h);
// pad to target_size rectangle
int wpad = (w + 31) / 32 * 32 - w;
int hpad = (h + 31) / 32 * 32 - h;
ncnn::Mat in_pad;
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
const float mean_vals[3] = {103.53f, 116.28f, 123.675f};
const float norm_vals[3] = {0.017429f, 0.017507f, 0.017125f};
in_pad.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Extractor ex = nanodet.create_extractor();
ex.input("input.1", in_pad);
std::vector<Object> proposals;
// stride 8
{
ncnn::Mat cls_pred;
ncnn::Mat dis_pred;
ex.extract("792", cls_pred);
ex.extract("795", dis_pred);
std::vector<Object> objects8;
generate_proposals(cls_pred, dis_pred, 8, in_pad, prob_threshold, objects8);
proposals.insert(proposals.end(), objects8.begin(), objects8.end());
}
// stride 16
{
ncnn::Mat cls_pred;
ncnn::Mat dis_pred;
ex.extract("814", cls_pred);
ex.extract("817", dis_pred);
std::vector<Object> objects16;
generate_proposals(cls_pred, dis_pred, 16, in_pad, prob_threshold, objects16);
proposals.insert(proposals.end(), objects16.begin(), objects16.end());
}
// stride 32
{
ncnn::Mat cls_pred;
ncnn::Mat dis_pred;
ex.extract("836", cls_pred);
ex.extract("839", dis_pred);
std::vector<Object> objects32;
generate_proposals(cls_pred, dis_pred, 32, in_pad, prob_threshold, objects32);
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
}
// sort all proposals by score from highest to lowest
qsort_descent_inplace(proposals);
// apply nms with nms_threshold
std::vector<int> picked;
nms_sorted_bboxes(proposals, picked, nms_threshold);
int count = picked.size();
objects.resize(count);
for (int i = 0; i < count; i++)
{
objects[i] = proposals[picked[i]];
// adjust offset to original unpadded
float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
// clip
x0 = std::max(std::min(x0, (float)(width - 1)), 0.f);
y0 = std::max(std::min(y0, (float)(height - 1)), 0.f);
x1 = std::max(std::min(x1, (float)(width - 1)), 0.f);
y1 = std::max(std::min(y1, (float)(height - 1)), 0.f);
objects[i].rect.x = x0;
objects[i].rect.y = y0;
objects[i].rect.width = x1 - x0;
objects[i].rect.height = y1 - y0;
}
return 0;
}
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
{
static const char* class_names[] = {
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"
};
cv::Mat image = bgr.clone();
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
cv::imshow("image", image);
cv::waitKey(0);
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<Object> objects;
detect_nanodet(m, objects);
draw_objects(m, objects);
return 0;
}

View File

@ -0,0 +1,426 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "net.h"
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif
#include <stdlib.h>
#include <float.h>
#include <stdio.h>
#include <vector>
struct Object
{
cv::Rect_<float> rect;
int label;
float prob;
};
static inline float intersection_area(const Object& a, const Object& b)
{
cv::Rect_<float> inter = a.rect & b.rect;
return inter.area();
}
static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
{
int i = left;
int j = right;
float p = faceobjects[(left + right) / 2].prob;
while (i <= j)
{
while (faceobjects[i].prob > p)
i++;
while (faceobjects[j].prob < p)
j--;
if (i <= j)
{
// swap
std::swap(faceobjects[i], faceobjects[j]);
i++;
j--;
}
}
#pragma omp parallel sections
{
#pragma omp section
{
if (left < j) qsort_descent_inplace(faceobjects, left, j);
}
#pragma omp section
{
if (i < right) qsort_descent_inplace(faceobjects, i, right);
}
}
}
static void qsort_descent_inplace(std::vector<Object>& faceobjects)
{
if (faceobjects.empty())
return;
qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
}
static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)
{
picked.clear();
const int n = faceobjects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
areas[i] = faceobjects[i].rect.width * faceobjects[i].rect.height;
}
for (int i = 0; i < n; i++)
{
const Object& a = faceobjects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const Object& b = faceobjects[picked[j]];
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
}
if (keep)
picked.push_back(i);
}
}
static inline float sigmoid(float x)
{
return 1.0f / (1.0f + exp(-x));
}
static void generate_proposals(const ncnn::Mat& pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
{
const int num_grid = pred.h;
int num_grid_x = pred.w;
int num_grid_y = pred.h;
const int num_class = 80; // number of classes. 80 for COCO
const int reg_max_1 = (pred.c - num_class) / 4;
for (int i = 0; i < num_grid_y; i++)
{
for (int j = 0; j < num_grid_x; j++)
{
// find label with max score
int label = -1;
float score = -FLT_MAX;
for (int k = 0; k < num_class; k++)
{
float s = pred.channel(k).row(i)[j];
if (s > score)
{
label = k;
score = s;
}
}
score = sigmoid(score);
if (score >= prob_threshold)
{
ncnn::Mat bbox_pred(reg_max_1, 4);
for (int k = 0; k < reg_max_1 * 4; k++)
{
bbox_pred[k] = pred.channel(num_class + k).row(i)[j];
}
{
ncnn::Layer* softmax = ncnn::create_layer("Softmax");
ncnn::ParamDict pd;
pd.set(0, 1); // axis
pd.set(1, 1);
softmax->load_param(pd);
ncnn::Option opt;
opt.num_threads = 1;
opt.use_packing_layout = false;
softmax->create_pipeline(opt);
softmax->forward_inplace(bbox_pred, opt);
softmax->destroy_pipeline(opt);
delete softmax;
}
float pred_ltrb[4];
for (int k = 0; k < 4; k++)
{
float dis = 0.f;
const float* dis_after_sm = bbox_pred.row(k);
for (int l = 0; l < reg_max_1; l++)
{
dis += l * dis_after_sm[l];
}
pred_ltrb[k] = dis * stride;
}
float pb_cx = j * stride;
float pb_cy = i * stride;
float x0 = pb_cx - pred_ltrb[0];
float y0 = pb_cy - pred_ltrb[1];
float x1 = pb_cx + pred_ltrb[2];
float y1 = pb_cy + pred_ltrb[3];
Object obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = x1 - x0;
obj.rect.height = y1 - y0;
obj.label = label;
obj.prob = score;
objects.push_back(obj);
}
}
}
}
static int detect_nanodet(const cv::Mat& bgr, std::vector<Object>& objects)
{
ncnn::Net nanodet;
nanodet.opt.use_vulkan_compute = true;
// nanodet.opt.use_bf16_storage = true;
// original pretrained model from https://github.com/RangiLyu/nanodet
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
// nanodet.load_param("nanodet-plus-m_320.torchscript.ncnn.param");
// nanodet.load_model("nanodet-plus-m_320.torchscript.ncnn.bin");
nanodet.load_param("nanodet-plus-m_416.torchscript.ncnn.param");
nanodet.load_model("nanodet-plus-m_416.torchscript.ncnn.bin");
int width = bgr.cols;
int height = bgr.rows;
// const int target_size = 320;
const int target_size = 416;
const float prob_threshold = 0.4f;
const float nms_threshold = 0.5f;
// pad to multiple of 32
int w = width;
int h = height;
float scale = 1.f;
if (w > h)
{
scale = (float)target_size / w;
w = target_size;
h = h * scale;
}
else
{
scale = (float)target_size / h;
h = target_size;
w = w * scale;
}
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, width, height, w, h);
// pad to target_size rectangle
int wpad = (w + 31) / 32 * 32 - w;
int hpad = (h + 31) / 32 * 32 - h;
ncnn::Mat in_pad;
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
const float mean_vals[3] = {103.53f, 116.28f, 123.675f};
const float norm_vals[3] = {0.017429f, 0.017507f, 0.017125f};
in_pad.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Extractor ex = nanodet.create_extractor();
ex.input("in0", in_pad);
std::vector<Object> proposals;
// stride 8
{
ncnn::Mat pred;
ex.extract("231", pred);
std::vector<Object> objects8;
generate_proposals(pred, 8, in_pad, prob_threshold, objects8);
proposals.insert(proposals.end(), objects8.begin(), objects8.end());
}
// stride 16
{
ncnn::Mat pred;
ex.extract("228", pred);
std::vector<Object> objects16;
generate_proposals(pred, 16, in_pad, prob_threshold, objects16);
proposals.insert(proposals.end(), objects16.begin(), objects16.end());
}
// stride 32
{
ncnn::Mat pred;
ex.extract("225", pred);
std::vector<Object> objects32;
generate_proposals(pred, 32, in_pad, prob_threshold, objects32);
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
}
// stride 64
{
ncnn::Mat pred;
ex.extract("222", pred);
std::vector<Object> objects64;
generate_proposals(pred, 64, in_pad, prob_threshold, objects64);
proposals.insert(proposals.end(), objects64.begin(), objects64.end());
}
// sort all proposals by score from highest to lowest
qsort_descent_inplace(proposals);
// apply nms with nms_threshold
std::vector<int> picked;
nms_sorted_bboxes(proposals, picked, nms_threshold);
int count = picked.size();
objects.resize(count);
for (int i = 0; i < count; i++)
{
objects[i] = proposals[picked[i]];
// adjust offset to original unpadded
float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
// clip
x0 = std::max(std::min(x0, (float)(width - 1)), 0.f);
y0 = std::max(std::min(y0, (float)(height - 1)), 0.f);
x1 = std::max(std::min(x1, (float)(width - 1)), 0.f);
y1 = std::max(std::min(y1, (float)(height - 1)), 0.f);
objects[i].rect.x = x0;
objects[i].rect.y = y0;
objects[i].rect.width = x1 - x0;
objects[i].rect.height = y1 - y0;
}
return 0;
}
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
{
static const char* class_names[] = {
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"
};
cv::Mat image = bgr.clone();
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
cv::imshow("image", image);
cv::waitKey(0);
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<Object> objects;
detect_nanodet(m, objects);
draw_objects(m, objects);
return 0;
}

240
3rdparty/ncnn/examples/p2pnet.cpp vendored Normal file
View File

@ -0,0 +1,240 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "net.h"
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif
#include <stdlib.h>
#include <float.h>
#include <stdio.h>
#include <vector>
struct CrowdPoint
{
cv::Point pt;
float prob;
};
static void shift(int w, int h, int stride, std::vector<float> anchor_points, std::vector<float>& shifted_anchor_points)
{
std::vector<float> x_, y_;
for (int i = 0; i < w; i++)
{
float x = (i + 0.5) * stride;
x_.push_back(x);
}
for (int i = 0; i < h; i++)
{
float y = (i + 0.5) * stride;
y_.push_back(y);
}
std::vector<float> shift_x((size_t)w * h, 0), shift_y((size_t)w * h, 0);
for (int i = 0; i < h; i++)
{
for (int j = 0; j < w; j++)
{
shift_x[i * w + j] = x_[j];
}
}
for (int i = 0; i < h; i++)
{
for (int j = 0; j < w; j++)
{
shift_y[i * w + j] = y_[i];
}
}
std::vector<float> shifts((size_t)w * h * 2, 0);
for (int i = 0; i < w * h; i++)
{
shifts[i * 2] = shift_x[i];
shifts[i * 2 + 1] = shift_y[i];
}
shifted_anchor_points.resize((size_t)2 * w * h * anchor_points.size() / 2, 0);
for (int i = 0; i < w * h; i++)
{
for (int j = 0; j < anchor_points.size() / 2; j++)
{
float x = anchor_points[j * 2] + shifts[i * 2];
float y = anchor_points[j * 2 + 1] + shifts[i * 2 + 1];
shifted_anchor_points[i * anchor_points.size() / 2 * 2 + j * 2] = x;
shifted_anchor_points[i * anchor_points.size() / 2 * 2 + j * 2 + 1] = y;
}
}
}
static void generate_anchor_points(int stride, int row, int line, std::vector<float>& anchor_points)
{
float row_step = (float)stride / row;
float line_step = (float)stride / line;
std::vector<float> x_, y_;
for (int i = 1; i < line + 1; i++)
{
float x = (i - 0.5) * line_step - stride / 2;
x_.push_back(x);
}
for (int i = 1; i < row + 1; i++)
{
float y = (i - 0.5) * row_step - stride / 2;
y_.push_back(y);
}
std::vector<float> shift_x((size_t)row * line, 0), shift_y((size_t)row * line, 0);
for (int i = 0; i < row; i++)
{
for (int j = 0; j < line; j++)
{
shift_x[i * line + j] = x_[j];
}
}
for (int i = 0; i < row; i++)
{
for (int j = 0; j < line; j++)
{
shift_y[i * line + j] = y_[i];
}
}
anchor_points.resize((size_t)row * line * 2, 0);
for (int i = 0; i < row * line; i++)
{
float x = shift_x[i];
float y = shift_y[i];
anchor_points[i * 2] = x;
anchor_points[i * 2 + 1] = y;
}
}
static void generate_anchor_points(int img_w, int img_h, std::vector<int> pyramid_levels, int row, int line, std::vector<float>& all_anchor_points)
{
std::vector<std::pair<int, int> > image_shapes;
std::vector<int> strides;
for (int i = 0; i < pyramid_levels.size(); i++)
{
int new_h = std::floor((img_h + std::pow(2, pyramid_levels[i]) - 1) / std::pow(2, pyramid_levels[i]));
int new_w = std::floor((img_w + std::pow(2, pyramid_levels[i]) - 1) / std::pow(2, pyramid_levels[i]));
image_shapes.push_back(std::make_pair(new_w, new_h));
strides.push_back(std::pow(2, pyramid_levels[i]));
}
all_anchor_points.clear();
for (int i = 0; i < pyramid_levels.size(); i++)
{
std::vector<float> anchor_points;
generate_anchor_points(std::pow(2, pyramid_levels[i]), row, line, anchor_points);
std::vector<float> shifted_anchor_points;
shift(image_shapes[i].first, image_shapes[i].second, strides[i], anchor_points, shifted_anchor_points);
all_anchor_points.insert(all_anchor_points.end(), shifted_anchor_points.begin(), shifted_anchor_points.end());
}
}
static int detect_crowd(const cv::Mat& bgr, std::vector<CrowdPoint>& crowd_points)
{
ncnn::Option opt;
opt.num_threads = 4;
opt.use_vulkan_compute = false;
opt.use_bf16_storage = false;
ncnn::Net net;
net.opt = opt;
// model is converted from
// https://github.com/TencentYoutuResearch/CrowdCounting-P2PNet
// the ncnn model https://pan.baidu.com/s/1O1CBgvY6yJkrK8Npxx3VMg pwd: ezhx
net.load_param("p2pnet.param");
net.load_model("p2pnet.bin");
int width = bgr.cols;
int height = bgr.rows;
int new_width = width / 128 * 128;
int new_height = height / 128 * 128;
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, width, height, new_width, new_height);
std::vector<int> pyramid_levels(1, 3);
std::vector<float> all_anchor_points;
generate_anchor_points(in.w, in.h, pyramid_levels, 2, 2, all_anchor_points);
ncnn::Mat anchor_points = ncnn::Mat(2, all_anchor_points.size() / 2, all_anchor_points.data());
ncnn::Extractor ex = net.create_extractor();
const float mean_vals1[3] = {123.675f, 116.28f, 103.53f};
const float norm_vals1[3] = {0.01712475f, 0.0175f, 0.01742919f};
in.substract_mean_normalize(mean_vals1, norm_vals1);
ex.input("input", in);
ex.input("anchor", anchor_points);
ncnn::Mat score, points;
ex.extract("pred_scores", score);
ex.extract("pred_points", points);
for (int i = 0; i < points.h; i++)
{
float* score_data = score.row(i);
float* points_data = points.row(i);
CrowdPoint cp;
int x = points_data[0] / new_width * width;
int y = points_data[1] / new_height * height;
cp.pt = cv::Point(x, y);
cp.prob = score_data[1];
crowd_points.push_back(cp);
}
return 0;
}
static void draw_result(const cv::Mat& bgr, const std::vector<CrowdPoint>& crowd_points)
{
cv::Mat image = bgr.clone();
const float threshold = 0.5f;
for (int i = 0; i < crowd_points.size(); i++)
{
if (crowd_points[i].prob > threshold)
{
cv::circle(image, crowd_points[i].pt, 4, cv::Scalar(0, 0, 255), -1, 8, 0);
}
}
cv::imshow("image", image);
cv::waitKey();
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat bgr = cv::imread(imagepath, 1);
if (bgr.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<CrowdPoint> crowd_points;
detect_crowd(bgr, crowd_points);
draw_result(bgr, crowd_points);
return 0;
}

View File

@ -0,0 +1,196 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "net.h"
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif
#include <stdio.h>
#include <vector>
struct Object
{
cv::Rect_<float> rect;
int label;
float prob;
};
static int detect_peleenet(const cv::Mat& bgr, std::vector<Object>& objects, ncnn::Mat& resized)
{
ncnn::Net peleenet;
peleenet.opt.use_vulkan_compute = true;
// model is converted from https://github.com/eric612/MobileNet-YOLO
// and can be downloaded from https://drive.google.com/open?id=1Wt6jKv13sBRMHgrGAJYlOlRF-o80pC0g
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
peleenet.load_param("pelee.param");
peleenet.load_model("pelee.bin");
const int target_size = 304;
int img_w = bgr.cols;
int img_h = bgr.rows;
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, target_size, target_size);
const float mean_vals[3] = {103.9f, 116.7f, 123.6f};
const float norm_vals[3] = {0.017f, 0.017f, 0.017f};
in.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Extractor ex = peleenet.create_extractor();
ex.input("data", in);
ncnn::Mat out;
ex.extract("detection_out", out);
// printf("%d %d %d\n", out.w, out.h, out.c);
objects.clear();
for (int i = 0; i < out.h; i++)
{
const float* values = out.row(i);
Object object;
object.label = values[0];
object.prob = values[1];
object.rect.x = values[2] * img_w;
object.rect.y = values[3] * img_h;
object.rect.width = values[4] * img_w - object.rect.x;
object.rect.height = values[5] * img_h - object.rect.y;
objects.push_back(object);
}
ncnn::Mat seg_out;
ex.extract("sigmoid", seg_out);
resize_bilinear(seg_out, resized, img_w, img_h);
//resize_bicubic(seg_out,resized,img_w,img_h); // sharpness
return 0;
}
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects, ncnn::Mat map)
{
static const char* class_names[] = {"background",
"person", "rider", "car", "bus",
"truck", "bike", "motor",
"traffic light", "traffic sign", "train"
};
cv::Mat image = bgr.clone();
const int color[] = {128, 255, 128, 244, 35, 232};
const int color_count = sizeof(color) / sizeof(int);
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
int width = map.w;
int height = map.h;
int size = map.c;
int img_index2 = 0;
float threshold = 0.45;
const float* ptr2 = map;
for (int i = 0; i < height; i++)
{
unsigned char* ptr1 = image.ptr<unsigned char>(i);
int img_index1 = 0;
for (int j = 0; j < width; j++)
{
float maxima = threshold;
int index = -1;
for (int c = 0; c < size; c++)
{
//const float* ptr3 = map.channel(c);
const float* ptr3 = ptr2 + c * width * height;
if (ptr3[img_index2] > maxima)
{
maxima = ptr3[img_index2];
index = c;
}
}
if (index > -1)
{
int color_index = (index)*3;
if (color_index < color_count)
{
int b = color[color_index];
int g = color[color_index + 1];
int r = color[color_index + 2];
ptr1[img_index1] = b / 2 + ptr1[img_index1] / 2;
ptr1[img_index1 + 1] = g / 2 + ptr1[img_index1 + 1] / 2;
ptr1[img_index1 + 2] = r / 2 + ptr1[img_index1 + 2] / 2;
}
}
img_index1 += 3;
img_index2++;
}
}
cv::imshow("image", image);
cv::waitKey(0);
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<Object> objects;
ncnn::Mat seg_out;
detect_peleenet(m, objects, seg_out);
draw_objects(m, objects, seg_out);
return 0;
}

434
3rdparty/ncnn/examples/retinaface.cpp vendored Normal file
View File

@ -0,0 +1,434 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "net.h"
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif
#include <stdio.h>
#include <vector>
struct FaceObject
{
cv::Rect_<float> rect;
cv::Point2f landmark[5];
float prob;
};
static inline float intersection_area(const FaceObject& a, const FaceObject& b)
{
cv::Rect_<float> inter = a.rect & b.rect;
return inter.area();
}
static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects, int left, int right)
{
int i = left;
int j = right;
float p = faceobjects[(left + right) / 2].prob;
while (i <= j)
{
while (faceobjects[i].prob > p)
i++;
while (faceobjects[j].prob < p)
j--;
if (i <= j)
{
// swap
std::swap(faceobjects[i], faceobjects[j]);
i++;
j--;
}
}
#pragma omp parallel sections
{
#pragma omp section
{
if (left < j) qsort_descent_inplace(faceobjects, left, j);
}
#pragma omp section
{
if (i < right) qsort_descent_inplace(faceobjects, i, right);
}
}
}
static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects)
{
if (faceobjects.empty())
return;
qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
}
static void nms_sorted_bboxes(const std::vector<FaceObject>& faceobjects, std::vector<int>& picked, float nms_threshold)
{
picked.clear();
const int n = faceobjects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
areas[i] = faceobjects[i].rect.area();
}
for (int i = 0; i < n; i++)
{
const FaceObject& a = faceobjects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const FaceObject& b = faceobjects[picked[j]];
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
}
if (keep)
picked.push_back(i);
}
}
// copy from src/layer/proposal.cpp
static ncnn::Mat generate_anchors(int base_size, const ncnn::Mat& ratios, const ncnn::Mat& scales)
{
int num_ratio = ratios.w;
int num_scale = scales.w;
ncnn::Mat anchors;
anchors.create(4, num_ratio * num_scale);
const float cx = base_size * 0.5f;
const float cy = base_size * 0.5f;
for (int i = 0; i < num_ratio; i++)
{
float ar = ratios[i];
int r_w = round(base_size / sqrt(ar));
int r_h = round(r_w * ar); //round(base_size * sqrt(ar));
for (int j = 0; j < num_scale; j++)
{
float scale = scales[j];
float rs_w = r_w * scale;
float rs_h = r_h * scale;
float* anchor = anchors.row(i * num_scale + j);
anchor[0] = cx - rs_w * 0.5f;
anchor[1] = cy - rs_h * 0.5f;
anchor[2] = cx + rs_w * 0.5f;
anchor[3] = cy + rs_h * 0.5f;
}
}
return anchors;
}
static void generate_proposals(const ncnn::Mat& anchors, int feat_stride, const ncnn::Mat& score_blob, const ncnn::Mat& bbox_blob, const ncnn::Mat& landmark_blob, float prob_threshold, std::vector<FaceObject>& faceobjects)
{
int w = score_blob.w;
int h = score_blob.h;
// generate face proposal from bbox deltas and shifted anchors
const int num_anchors = anchors.h;
for (int q = 0; q < num_anchors; q++)
{
const float* anchor = anchors.row(q);
const ncnn::Mat score = score_blob.channel(q + num_anchors);
const ncnn::Mat bbox = bbox_blob.channel_range(q * 4, 4);
const ncnn::Mat landmark = landmark_blob.channel_range(q * 10, 10);
// shifted anchor
float anchor_y = anchor[1];
float anchor_w = anchor[2] - anchor[0];
float anchor_h = anchor[3] - anchor[1];
for (int i = 0; i < h; i++)
{
float anchor_x = anchor[0];
for (int j = 0; j < w; j++)
{
int index = i * w + j;
float prob = score[index];
if (prob >= prob_threshold)
{
// apply center size
float dx = bbox.channel(0)[index];
float dy = bbox.channel(1)[index];
float dw = bbox.channel(2)[index];
float dh = bbox.channel(3)[index];
float cx = anchor_x + anchor_w * 0.5f;
float cy = anchor_y + anchor_h * 0.5f;
float pb_cx = cx + anchor_w * dx;
float pb_cy = cy + anchor_h * dy;
float pb_w = anchor_w * exp(dw);
float pb_h = anchor_h * exp(dh);
float x0 = pb_cx - pb_w * 0.5f;
float y0 = pb_cy - pb_h * 0.5f;
float x1 = pb_cx + pb_w * 0.5f;
float y1 = pb_cy + pb_h * 0.5f;
FaceObject obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = x1 - x0 + 1;
obj.rect.height = y1 - y0 + 1;
obj.landmark[0].x = cx + (anchor_w + 1) * landmark.channel(0)[index];
obj.landmark[0].y = cy + (anchor_h + 1) * landmark.channel(1)[index];
obj.landmark[1].x = cx + (anchor_w + 1) * landmark.channel(2)[index];
obj.landmark[1].y = cy + (anchor_h + 1) * landmark.channel(3)[index];
obj.landmark[2].x = cx + (anchor_w + 1) * landmark.channel(4)[index];
obj.landmark[2].y = cy + (anchor_h + 1) * landmark.channel(5)[index];
obj.landmark[3].x = cx + (anchor_w + 1) * landmark.channel(6)[index];
obj.landmark[3].y = cy + (anchor_h + 1) * landmark.channel(7)[index];
obj.landmark[4].x = cx + (anchor_w + 1) * landmark.channel(8)[index];
obj.landmark[4].y = cy + (anchor_h + 1) * landmark.channel(9)[index];
obj.prob = prob;
faceobjects.push_back(obj);
}
anchor_x += feat_stride;
}
anchor_y += feat_stride;
}
}
}
static int detect_retinaface(const cv::Mat& bgr, std::vector<FaceObject>& faceobjects)
{
ncnn::Net retinaface;
retinaface.opt.use_vulkan_compute = true;
// model is converted from
// https://github.com/deepinsight/insightface/tree/master/RetinaFace#retinaface-pretrained-models
// https://github.com/deepinsight/insightface/issues/669
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
// retinaface.load_param("retinaface-R50.param");
// retinaface.load_model("retinaface-R50.bin");
retinaface.load_param("mnet.25-opt.param");
retinaface.load_model("mnet.25-opt.bin");
const float prob_threshold = 0.8f;
const float nms_threshold = 0.4f;
int img_w = bgr.cols;
int img_h = bgr.rows;
ncnn::Mat in = ncnn::Mat::from_pixels(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h);
ncnn::Extractor ex = retinaface.create_extractor();
ex.input("data", in);
std::vector<FaceObject> faceproposals;
// stride 32
{
ncnn::Mat score_blob, bbox_blob, landmark_blob;
ex.extract("face_rpn_cls_prob_reshape_stride32", score_blob);
ex.extract("face_rpn_bbox_pred_stride32", bbox_blob);
ex.extract("face_rpn_landmark_pred_stride32", landmark_blob);
const int base_size = 16;
const int feat_stride = 32;
ncnn::Mat ratios(1);
ratios[0] = 1.f;
ncnn::Mat scales(2);
scales[0] = 32.f;
scales[1] = 16.f;
ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);
std::vector<FaceObject> faceobjects32;
generate_proposals(anchors, feat_stride, score_blob, bbox_blob, landmark_blob, prob_threshold, faceobjects32);
faceproposals.insert(faceproposals.end(), faceobjects32.begin(), faceobjects32.end());
}
// stride 16
{
ncnn::Mat score_blob, bbox_blob, landmark_blob;
ex.extract("face_rpn_cls_prob_reshape_stride16", score_blob);
ex.extract("face_rpn_bbox_pred_stride16", bbox_blob);
ex.extract("face_rpn_landmark_pred_stride16", landmark_blob);
const int base_size = 16;
const int feat_stride = 16;
ncnn::Mat ratios(1);
ratios[0] = 1.f;
ncnn::Mat scales(2);
scales[0] = 8.f;
scales[1] = 4.f;
ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);
std::vector<FaceObject> faceobjects16;
generate_proposals(anchors, feat_stride, score_blob, bbox_blob, landmark_blob, prob_threshold, faceobjects16);
faceproposals.insert(faceproposals.end(), faceobjects16.begin(), faceobjects16.end());
}
// stride 8
{
ncnn::Mat score_blob, bbox_blob, landmark_blob;
ex.extract("face_rpn_cls_prob_reshape_stride8", score_blob);
ex.extract("face_rpn_bbox_pred_stride8", bbox_blob);
ex.extract("face_rpn_landmark_pred_stride8", landmark_blob);
const int base_size = 16;
const int feat_stride = 8;
ncnn::Mat ratios(1);
ratios[0] = 1.f;
ncnn::Mat scales(2);
scales[0] = 2.f;
scales[1] = 1.f;
ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);
std::vector<FaceObject> faceobjects8;
generate_proposals(anchors, feat_stride, score_blob, bbox_blob, landmark_blob, prob_threshold, faceobjects8);
faceproposals.insert(faceproposals.end(), faceobjects8.begin(), faceobjects8.end());
}
// sort all proposals by score from highest to lowest
qsort_descent_inplace(faceproposals);
// apply nms with nms_threshold
std::vector<int> picked;
nms_sorted_bboxes(faceproposals, picked, nms_threshold);
int face_count = picked.size();
faceobjects.resize(face_count);
for (int i = 0; i < face_count; i++)
{
faceobjects[i] = faceproposals[picked[i]];
// clip to image size
float x0 = faceobjects[i].rect.x;
float y0 = faceobjects[i].rect.y;
float x1 = x0 + faceobjects[i].rect.width;
float y1 = y0 + faceobjects[i].rect.height;
x0 = std::max(std::min(x0, (float)img_w - 1), 0.f);
y0 = std::max(std::min(y0, (float)img_h - 1), 0.f);
x1 = std::max(std::min(x1, (float)img_w - 1), 0.f);
y1 = std::max(std::min(y1, (float)img_h - 1), 0.f);
faceobjects[i].rect.x = x0;
faceobjects[i].rect.y = y0;
faceobjects[i].rect.width = x1 - x0;
faceobjects[i].rect.height = y1 - y0;
}
return 0;
}
static void draw_faceobjects(const cv::Mat& bgr, const std::vector<FaceObject>& faceobjects)
{
cv::Mat image = bgr.clone();
for (size_t i = 0; i < faceobjects.size(); i++)
{
const FaceObject& obj = faceobjects[i];
fprintf(stderr, "%.5f at %.2f %.2f %.2f x %.2f\n", obj.prob,
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::rectangle(image, obj.rect, cv::Scalar(0, 255, 0));
cv::circle(image, obj.landmark[0], 2, cv::Scalar(0, 255, 255), -1);
cv::circle(image, obj.landmark[1], 2, cv::Scalar(0, 255, 255), -1);
cv::circle(image, obj.landmark[2], 2, cv::Scalar(0, 255, 255), -1);
cv::circle(image, obj.landmark[3], 2, cv::Scalar(0, 255, 255), -1);
cv::circle(image, obj.landmark[4], 2, cv::Scalar(0, 255, 255), -1);
char text[256];
sprintf(text, "%.1f%%", obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
cv::imshow("image", image);
cv::waitKey(0);
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<FaceObject> faceobjects;
detect_retinaface(m, faceobjects);
draw_faceobjects(m, faceobjects);
return 0;
}

357
3rdparty/ncnn/examples/rfcn.cpp vendored Normal file
View File

@ -0,0 +1,357 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "net.h"
#include <math.h>
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif
#include <stdio.h>
struct Object
{
cv::Rect_<float> rect;
int label;
float prob;
};
static inline float intersection_area(const Object& a, const Object& b)
{
cv::Rect_<float> inter = a.rect & b.rect;
return inter.area();
}
static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right)
{
int i = left;
int j = right;
float p = objects[(left + right) / 2].prob;
while (i <= j)
{
while (objects[i].prob > p)
i++;
while (objects[j].prob < p)
j--;
if (i <= j)
{
// swap
std::swap(objects[i], objects[j]);
i++;
j--;
}
}
#pragma omp parallel sections
{
#pragma omp section
{
if (left < j) qsort_descent_inplace(objects, left, j);
}
#pragma omp section
{
if (i < right) qsort_descent_inplace(objects, i, right);
}
}
}
static void qsort_descent_inplace(std::vector<Object>& objects)
{
if (objects.empty())
return;
qsort_descent_inplace(objects, 0, objects.size() - 1);
}
static void nms_sorted_bboxes(const std::vector<Object>& objects, std::vector<int>& picked, float nms_threshold)
{
picked.clear();
const int n = objects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
areas[i] = objects[i].rect.area();
}
for (int i = 0; i < n; i++)
{
const Object& a = objects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const Object& b = objects[picked[j]];
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
}
if (keep)
picked.push_back(i);
}
}
static int detect_rfcn(const cv::Mat& bgr, std::vector<Object>& objects)
{
ncnn::Net rfcn;
rfcn.opt.use_vulkan_compute = true;
// original pretrained model from https://github.com/YuwenXiong/py-R-FCN
// https://github.com/YuwenXiong/py-R-FCN/blob/master/models/pascal_voc/ResNet-50/rfcn_end2end/test_agnostic.prototxt
// https://1drv.ms/u/s!AoN7vygOjLIQqUWHpY67oaC7mopf
// resnet50_rfcn_final.caffemodel
rfcn.load_param("rfcn_end2end.param");
rfcn.load_model("rfcn_end2end.bin");
const int target_size = 224;
const int max_per_image = 100;
const float confidence_thresh = 0.6f; // CONF_THRESH
const float nms_threshold = 0.3f; // NMS_THRESH
// scale to target detect size
int w = bgr.cols;
int h = bgr.rows;
float scale = 1.f;
if (w < h)
{
scale = (float)target_size / w;
w = target_size;
h = h * scale;
}
else
{
scale = (float)target_size / h;
h = target_size;
w = w * scale;
}
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, w, h);
const float mean_vals[3] = {102.9801f, 115.9465f, 122.7717f};
in.substract_mean_normalize(mean_vals, 0);
ncnn::Mat im_info(3);
im_info[0] = h;
im_info[1] = w;
im_info[2] = scale;
// step1, extract feature and all rois
ncnn::Extractor ex1 = rfcn.create_extractor();
ex1.input("data", in);
ex1.input("im_info", im_info);
ncnn::Mat rfcn_cls;
ncnn::Mat rfcn_bbox;
ncnn::Mat rois; // all rois
ex1.extract("rfcn_cls", rfcn_cls);
ex1.extract("rfcn_bbox", rfcn_bbox);
ex1.extract("rois", rois);
// step2, extract bbox and score for each roi
std::vector<std::vector<Object> > class_candidates;
for (int i = 0; i < rois.c; i++)
{
ncnn::Extractor ex2 = rfcn.create_extractor();
ncnn::Mat roi = rois.channel(i); // get single roi
ex2.input("rfcn_cls", rfcn_cls);
ex2.input("rfcn_bbox", rfcn_bbox);
ex2.input("rois", roi);
ncnn::Mat bbox_pred;
ncnn::Mat cls_prob;
ex2.extract("bbox_pred", bbox_pred);
ex2.extract("cls_prob", cls_prob);
int num_class = cls_prob.w;
class_candidates.resize(num_class);
// find class id with highest score
int label = 0;
float score = 0.f;
for (int i = 0; i < num_class; i++)
{
float class_score = cls_prob[i];
if (class_score > score)
{
label = i;
score = class_score;
}
}
// ignore background or low score
if (label == 0 || score <= confidence_thresh)
continue;
// fprintf(stderr, "%d = %f\n", label, score);
// unscale to image size
float x1 = roi[0] / scale;
float y1 = roi[1] / scale;
float x2 = roi[2] / scale;
float y2 = roi[3] / scale;
float pb_w = x2 - x1 + 1;
float pb_h = y2 - y1 + 1;
// apply bbox regression
float dx = bbox_pred[4];
float dy = bbox_pred[4 + 1];
float dw = bbox_pred[4 + 2];
float dh = bbox_pred[4 + 3];
float cx = x1 + pb_w * 0.5f;
float cy = y1 + pb_h * 0.5f;
float obj_cx = cx + pb_w * dx;
float obj_cy = cy + pb_h * dy;
float obj_w = pb_w * exp(dw);
float obj_h = pb_h * exp(dh);
float obj_x1 = obj_cx - obj_w * 0.5f;
float obj_y1 = obj_cy - obj_h * 0.5f;
float obj_x2 = obj_cx + obj_w * 0.5f;
float obj_y2 = obj_cy + obj_h * 0.5f;
// clip
obj_x1 = std::max(std::min(obj_x1, (float)(bgr.cols - 1)), 0.f);
obj_y1 = std::max(std::min(obj_y1, (float)(bgr.rows - 1)), 0.f);
obj_x2 = std::max(std::min(obj_x2, (float)(bgr.cols - 1)), 0.f);
obj_y2 = std::max(std::min(obj_y2, (float)(bgr.rows - 1)), 0.f);
// append object
Object obj;
obj.rect = cv::Rect_<float>(obj_x1, obj_y1, obj_x2 - obj_x1 + 1, obj_y2 - obj_y1 + 1);
obj.label = label;
obj.prob = score;
class_candidates[label].push_back(obj);
}
// post process
objects.clear();
for (int i = 0; i < (int)class_candidates.size(); i++)
{
std::vector<Object>& candidates = class_candidates[i];
qsort_descent_inplace(candidates);
std::vector<int> picked;
nms_sorted_bboxes(candidates, picked, nms_threshold);
for (int j = 0; j < (int)picked.size(); j++)
{
int z = picked[j];
objects.push_back(candidates[z]);
}
}
qsort_descent_inplace(objects);
if (max_per_image > 0 && max_per_image < objects.size())
{
objects.resize(max_per_image);
}
return 0;
}
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
{
static const char* class_names[] = {"background",
"aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair",
"cow", "diningtable", "dog", "horse",
"motorbike", "person", "pottedplant",
"sheep", "sofa", "train", "tvmonitor"
};
cv::Mat image = bgr.clone();
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
cv::imshow("image", image);
cv::waitKey(0);
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<Object> objects;
detect_rfcn(m, objects);
draw_objects(m, objects);
return 0;
}

132
3rdparty/ncnn/examples/rvm.cpp vendored Normal file
View File

@ -0,0 +1,132 @@
#include "net.h"
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif
#include <stdlib.h>
#include <float.h>
#include <stdio.h>
#include <vector>
static void draw_objects(const cv::Mat& bgr, const cv::Mat& fgr, const cv::Mat& pha)
{
cv::Mat fgr8U;
fgr.convertTo(fgr8U, CV_8UC3, 255.0, 0);
cv::Mat pha8U;
pha.convertTo(pha8U, CV_8UC1, 255.0, 0);
cv::Mat comp;
cv::resize(bgr, comp, pha.size(), 0, 0, 1);
for (int i = 0; i < pha8U.rows; i++)
{
for (int j = 0; j < pha8U.cols; j++)
{
uchar data = pha8U.at<uchar>(i, j);
float alpha = (float)data / 255;
comp.at<cv::Vec3b>(i, j)[0] = fgr8U.at<cv::Vec3b>(i, j)[0] * alpha + (1 - alpha) * 155;
comp.at<cv::Vec3b>(i, j)[1] = fgr8U.at<cv::Vec3b>(i, j)[1] * alpha + (1 - alpha) * 255;
comp.at<cv::Vec3b>(i, j)[2] = fgr8U.at<cv::Vec3b>(i, j)[2] * alpha + (1 - alpha) * 120;
}
}
cv::imshow("pha", pha8U);
cv::imshow("fgr", fgr8U);
cv::imshow("comp", comp);
cv::waitKey(0);
}
static int detect_rvm(const cv::Mat& bgr, cv::Mat& pha, cv::Mat& fgr)
{
const float downsample_ratio = 0.5f;
const int target_width = 512;
const int target_height = 512;
ncnn::Net net;
net.opt.use_vulkan_compute = false;
//original pretrained model from https://github.com/PeterL1n/RobustVideoMatting
//ncnn model https://pan.baidu.com/s/11iEY2RGfzWFtce8ue7T3JQ password: d9t6
net.load_param("rvm_512.param");
net.load_model("rvm_512.bin");
//if you use another input size,pleaze change input shape
ncnn::Mat r1i = ncnn::Mat(128, 128, 16);
ncnn::Mat r2i = ncnn::Mat(64, 64, 20);
ncnn::Mat r3i = ncnn::Mat(32, 32, 40);
ncnn::Mat r4i = ncnn::Mat(16, 16, 64);
r1i.fill(0.0f);
r2i.fill(0.0f);
r3i.fill(0.0f);
r4i.fill(0.0f);
ncnn::Extractor ex = net.create_extractor();
const float mean_vals1[3] = {123.675f, 116.28f, 103.53f};
const float norm_vals1[3] = {0.01712475f, 0.0175f, 0.01742919f};
const float mean_vals2[3] = {0, 0, 0};
const float norm_vals2[3] = {1 / 255.0, 1 / 255.0, 1 / 255.0};
ncnn::Mat ncnn_in2 = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, bgr.cols, bgr.rows, target_width, target_height);
ncnn::Mat ncnn_in1 = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, bgr.cols, bgr.rows, target_width * downsample_ratio, target_height * downsample_ratio);
ncnn_in1.substract_mean_normalize(mean_vals1, norm_vals1);
ncnn_in2.substract_mean_normalize(mean_vals2, norm_vals2);
ex.input("src1", ncnn_in1);
ex.input("src2", ncnn_in2);
ex.input("r1i", r1i);
ex.input("r2i", r2i);
ex.input("r3i", r3i);
ex.input("r4i", r4i);
//if use video matting,these output will be input of next infer
ex.extract("r4o", r4i);
ex.extract("r3o", r3i);
ex.extract("r2o", r2i);
ex.extract("r1o", r1i);
ncnn::Mat pha_;
ex.extract("pha", pha_);
ncnn::Mat fgr_;
ex.extract("fgr", fgr_);
cv::Mat cv_pha = cv::Mat(pha_.h, pha_.w, CV_32FC1, (float*)pha_.data);
cv::Mat cv_fgr = cv::Mat(fgr_.h, fgr_.w, CV_32FC3);
float* fgr_data = (float*)fgr_.data;
for (int i = 0; i < fgr_.h; i++)
{
for (int j = 0; j < fgr_.w; j++)
{
cv_fgr.at<cv::Vec3f>(i, j)[2] = fgr_data[0 * fgr_.h * fgr_.w + i * fgr_.w + j];
cv_fgr.at<cv::Vec3f>(i, j)[1] = fgr_data[1 * fgr_.h * fgr_.w + i * fgr_.w + j];
cv_fgr.at<cv::Vec3f>(i, j)[0] = fgr_data[2 * fgr_.h * fgr_.w + i * fgr_.w + j];
}
}
cv_pha.copyTo(pha);
cv_fgr.copyTo(fgr);
return 0;
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
cv::Mat fgr, pha;
detect_rvm(m, pha, fgr);
draw_objects(m, fgr, pha);
return 0;
}

434
3rdparty/ncnn/examples/scrfd.cpp vendored Normal file
View File

@ -0,0 +1,434 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "net.h"
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif
#include <stdio.h>
#include <vector>
struct FaceObject
{
cv::Rect_<float> rect;
float prob;
};
static inline float intersection_area(const FaceObject& a, const FaceObject& b)
{
cv::Rect_<float> inter = a.rect & b.rect;
return inter.area();
}
static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects, int left, int right)
{
int i = left;
int j = right;
float p = faceobjects[(left + right) / 2].prob;
while (i <= j)
{
while (faceobjects[i].prob > p)
i++;
while (faceobjects[j].prob < p)
j--;
if (i <= j)
{
// swap
std::swap(faceobjects[i], faceobjects[j]);
i++;
j--;
}
}
#pragma omp parallel sections
{
#pragma omp section
{
if (left < j) qsort_descent_inplace(faceobjects, left, j);
}
#pragma omp section
{
if (i < right) qsort_descent_inplace(faceobjects, i, right);
}
}
}
static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects)
{
if (faceobjects.empty())
return;
qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
}
static void nms_sorted_bboxes(const std::vector<FaceObject>& faceobjects, std::vector<int>& picked, float nms_threshold)
{
picked.clear();
const int n = faceobjects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
areas[i] = faceobjects[i].rect.area();
}
for (int i = 0; i < n; i++)
{
const FaceObject& a = faceobjects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const FaceObject& b = faceobjects[picked[j]];
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
}
if (keep)
picked.push_back(i);
}
}
// insightface/detection/scrfd/mmdet/core/anchor/anchor_generator.py gen_single_level_base_anchors()
static ncnn::Mat generate_anchors(int base_size, const ncnn::Mat& ratios, const ncnn::Mat& scales)
{
int num_ratio = ratios.w;
int num_scale = scales.w;
ncnn::Mat anchors;
anchors.create(4, num_ratio * num_scale);
const float cx = 0;
const float cy = 0;
for (int i = 0; i < num_ratio; i++)
{
float ar = ratios[i];
int r_w = round(base_size / sqrt(ar));
int r_h = round(r_w * ar); //round(base_size * sqrt(ar));
for (int j = 0; j < num_scale; j++)
{
float scale = scales[j];
float rs_w = r_w * scale;
float rs_h = r_h * scale;
float* anchor = anchors.row(i * num_scale + j);
anchor[0] = cx - rs_w * 0.5f;
anchor[1] = cy - rs_h * 0.5f;
anchor[2] = cx + rs_w * 0.5f;
anchor[3] = cy + rs_h * 0.5f;
}
}
return anchors;
}
static void generate_proposals(const ncnn::Mat& anchors, int feat_stride, const ncnn::Mat& score_blob, const ncnn::Mat& bbox_blob, float prob_threshold, std::vector<FaceObject>& faceobjects)
{
int w = score_blob.w;
int h = score_blob.h;
// generate face proposal from bbox deltas and shifted anchors
const int num_anchors = anchors.h;
for (int q = 0; q < num_anchors; q++)
{
const float* anchor = anchors.row(q);
const ncnn::Mat score = score_blob.channel(q);
const ncnn::Mat bbox = bbox_blob.channel_range(q * 4, 4);
// shifted anchor
float anchor_y = anchor[1];
float anchor_w = anchor[2] - anchor[0];
float anchor_h = anchor[3] - anchor[1];
for (int i = 0; i < h; i++)
{
float anchor_x = anchor[0];
for (int j = 0; j < w; j++)
{
int index = i * w + j;
float prob = score[index];
if (prob >= prob_threshold)
{
// insightface/detection/scrfd/mmdet/models/dense_heads/scrfd_head.py _get_bboxes_single()
float dx = bbox.channel(0)[index] * feat_stride;
float dy = bbox.channel(1)[index] * feat_stride;
float dw = bbox.channel(2)[index] * feat_stride;
float dh = bbox.channel(3)[index] * feat_stride;
// insightface/detection/scrfd/mmdet/core/bbox/transforms.py distance2bbox()
float cx = anchor_x + anchor_w * 0.5f;
float cy = anchor_y + anchor_h * 0.5f;
float x0 = cx - dx;
float y0 = cy - dy;
float x1 = cx + dw;
float y1 = cy + dh;
FaceObject obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = x1 - x0 + 1;
obj.rect.height = y1 - y0 + 1;
obj.prob = prob;
faceobjects.push_back(obj);
}
anchor_x += feat_stride;
}
anchor_y += feat_stride;
}
}
}
static int detect_scrfd(const cv::Mat& bgr, std::vector<FaceObject>& faceobjects)
{
ncnn::Net scrfd;
scrfd.opt.use_vulkan_compute = true;
// model is converted from
// https://github.com/deepinsight/insightface/tree/master/detection/scrfd
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
scrfd.load_param("scrfd_500m-opt2.param");
scrfd.load_model("scrfd_500m-opt2.bin");
int width = bgr.cols;
int height = bgr.rows;
// insightface/detection/scrfd/configs/scrfd/scrfd_500m.py
const int target_size = 640;
const float prob_threshold = 0.3f;
const float nms_threshold = 0.45f;
// pad to multiple of 32
int w = width;
int h = height;
float scale = 1.f;
if (w > h)
{
scale = (float)target_size / w;
w = target_size;
h = h * scale;
}
else
{
scale = (float)target_size / h;
h = target_size;
w = w * scale;
}
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, width, height, w, h);
// pad to target_size rectangle
int wpad = (w + 31) / 32 * 32 - w;
int hpad = (h + 31) / 32 * 32 - h;
ncnn::Mat in_pad;
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
const float mean_vals[3] = {127.5f, 127.5f, 127.5f};
const float norm_vals[3] = {1 / 128.f, 1 / 128.f, 1 / 128.f};
in_pad.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Extractor ex = scrfd.create_extractor();
ex.input("input.1", in_pad);
std::vector<FaceObject> faceproposals;
// stride 32
{
ncnn::Mat score_blob, bbox_blob;
ex.extract("412", score_blob);
ex.extract("415", bbox_blob);
const int base_size = 16;
const int feat_stride = 8;
ncnn::Mat ratios(1);
ratios[0] = 1.f;
ncnn::Mat scales(2);
scales[0] = 1.f;
scales[1] = 2.f;
ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);
std::vector<FaceObject> faceobjects32;
generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects32);
faceproposals.insert(faceproposals.end(), faceobjects32.begin(), faceobjects32.end());
}
// stride 16
{
ncnn::Mat score_blob, bbox_blob;
ex.extract("474", score_blob);
ex.extract("477", bbox_blob);
const int base_size = 64;
const int feat_stride = 16;
ncnn::Mat ratios(1);
ratios[0] = 1.f;
ncnn::Mat scales(2);
scales[0] = 1.f;
scales[1] = 2.f;
ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);
std::vector<FaceObject> faceobjects16;
generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects16);
faceproposals.insert(faceproposals.end(), faceobjects16.begin(), faceobjects16.end());
}
// stride 8
{
ncnn::Mat score_blob, bbox_blob;
ex.extract("536", score_blob);
ex.extract("539", bbox_blob);
const int base_size = 256;
const int feat_stride = 32;
ncnn::Mat ratios(1);
ratios[0] = 1.f;
ncnn::Mat scales(2);
scales[0] = 1.f;
scales[1] = 2.f;
ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);
std::vector<FaceObject> faceobjects8;
generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects8);
faceproposals.insert(faceproposals.end(), faceobjects8.begin(), faceobjects8.end());
}
// sort all proposals by score from highest to lowest
qsort_descent_inplace(faceproposals);
// apply nms with nms_threshold
std::vector<int> picked;
nms_sorted_bboxes(faceproposals, picked, nms_threshold);
int face_count = picked.size();
faceobjects.resize(face_count);
for (int i = 0; i < face_count; i++)
{
faceobjects[i] = faceproposals[picked[i]];
// adjust offset to original unpadded
float x0 = (faceobjects[i].rect.x - (wpad / 2)) / scale;
float y0 = (faceobjects[i].rect.y - (hpad / 2)) / scale;
float x1 = (faceobjects[i].rect.x + faceobjects[i].rect.width - (wpad / 2)) / scale;
float y1 = (faceobjects[i].rect.y + faceobjects[i].rect.height - (hpad / 2)) / scale;
x0 = std::max(std::min(x0, (float)width - 1), 0.f);
y0 = std::max(std::min(y0, (float)height - 1), 0.f);
x1 = std::max(std::min(x1, (float)width - 1), 0.f);
y1 = std::max(std::min(y1, (float)height - 1), 0.f);
faceobjects[i].rect.x = x0;
faceobjects[i].rect.y = y0;
faceobjects[i].rect.width = x1 - x0;
faceobjects[i].rect.height = y1 - y0;
}
return 0;
}
static void draw_faceobjects(const cv::Mat& bgr, const std::vector<FaceObject>& faceobjects)
{
cv::Mat image = bgr.clone();
for (size_t i = 0; i < faceobjects.size(); i++)
{
const FaceObject& obj = faceobjects[i];
fprintf(stderr, "%.5f at %.2f %.2f %.2f x %.2f\n", obj.prob,
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::rectangle(image, obj.rect, cv::Scalar(0, 255, 0));
char text[256];
sprintf(text, "%.1f%%", obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
cv::imshow("image", image);
cv::waitKey(0);
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<FaceObject> faceobjects;
detect_scrfd(m, faceobjects);
draw_faceobjects(m, faceobjects);
return 0;
}

View File

@ -0,0 +1,471 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "net.h"
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif
#include <stdio.h>
#include <vector>
struct FaceObject
{
cv::Rect_<float> rect;
float prob;
};
static inline float intersection_area(const FaceObject& a, const FaceObject& b)
{
cv::Rect_<float> inter = a.rect & b.rect;
return inter.area();
}
static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects, int left, int right)
{
int i = left;
int j = right;
float p = faceobjects[(left + right) / 2].prob;
while (i <= j)
{
while (faceobjects[i].prob > p)
i++;
while (faceobjects[j].prob < p)
j--;
if (i <= j)
{
// swap
std::swap(faceobjects[i], faceobjects[j]);
i++;
j--;
}
}
#pragma omp parallel sections
{
#pragma omp section
{
if (left < j) qsort_descent_inplace(faceobjects, left, j);
}
#pragma omp section
{
if (i < right) qsort_descent_inplace(faceobjects, i, right);
}
}
}
static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects)
{
if (faceobjects.empty())
return;
qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
}
static void nms_sorted_bboxes(const std::vector<FaceObject>& faceobjects, std::vector<int>& picked, float nms_threshold)
{
picked.clear();
const int n = faceobjects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
areas[i] = faceobjects[i].rect.area();
}
for (int i = 0; i < n; i++)
{
const FaceObject& a = faceobjects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const FaceObject& b = faceobjects[picked[j]];
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
}
if (keep)
picked.push_back(i);
}
}
// insightface/detection/scrfd/mmdet/core/anchor/anchor_generator.py gen_single_level_base_anchors()
static ncnn::Mat generate_anchors(int base_size, const ncnn::Mat& ratios, const ncnn::Mat& scales)
{
int num_ratio = ratios.w;
int num_scale = scales.w;
ncnn::Mat anchors;
anchors.create(4, num_ratio * num_scale);
const float cx = 0;
const float cy = 0;
for (int i = 0; i < num_ratio; i++)
{
float ar = ratios[i];
int r_w = round(base_size / sqrt(ar));
int r_h = round(r_w * ar); //round(base_size * sqrt(ar));
for (int j = 0; j < num_scale; j++)
{
float scale = scales[j];
float rs_w = r_w * scale;
float rs_h = r_h * scale;
float* anchor = anchors.row(i * num_scale + j);
anchor[0] = cx - rs_w * 0.5f;
anchor[1] = cy - rs_h * 0.5f;
anchor[2] = cx + rs_w * 0.5f;
anchor[3] = cy + rs_h * 0.5f;
}
}
return anchors;
}
static void generate_proposals(const ncnn::Mat& anchors, int feat_stride, const ncnn::Mat& score_blob, const ncnn::Mat& bbox_blob, float prob_threshold, std::vector<FaceObject>& faceobjects)
{
int w = score_blob.w;
int h = score_blob.h;
// generate face proposal from bbox deltas and shifted anchors
const int num_anchors = anchors.h;
for (int q = 0; q < num_anchors; q++)
{
const float* anchor = anchors.row(q);
const ncnn::Mat score = score_blob.channel(q);
const ncnn::Mat bbox = bbox_blob.channel_range(q * 4, 4);
// shifted anchor
float anchor_y = anchor[1];
float anchor_w = anchor[2] - anchor[0];
float anchor_h = anchor[3] - anchor[1];
for (int i = 0; i < h; i++)
{
float anchor_x = anchor[0];
for (int j = 0; j < w; j++)
{
int index = i * w + j;
float prob = score[index];
if (prob >= prob_threshold)
{
// insightface/detection/scrfd/mmdet/models/dense_heads/scrfd_head.py _get_bboxes_single()
float dx = bbox.channel(0)[index] * feat_stride;
float dy = bbox.channel(1)[index] * feat_stride;
float dw = bbox.channel(2)[index] * feat_stride;
float dh = bbox.channel(3)[index] * feat_stride;
// insightface/detection/scrfd/mmdet/core/bbox/transforms.py distance2bbox()
float cx = anchor_x + anchor_w * 0.5f;
float cy = anchor_y + anchor_h * 0.5f;
float x0 = cx - dx;
float y0 = cy - dy;
float x1 = cx + dw;
float y1 = cy + dh;
FaceObject obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = x1 - x0 + 1;
obj.rect.height = y1 - y0 + 1;
obj.prob = prob;
faceobjects.push_back(obj);
}
anchor_x += feat_stride;
}
anchor_y += feat_stride;
}
}
}
static int detect_scrfd(const cv::Mat& bgr, std::vector<FaceObject>& faceobjects)
{
ncnn::Net scrfd;
scrfd.opt.use_vulkan_compute = true;
// Insight face does not provided a trained scrfd_crowdhuman model
// but I have one for detecing cat face, you can have a try here:
// https://drive.google.com/file/d/1JogkKa0f_09HkENbCnXy9hRYxm35wKTn
scrfd.load_param("scrfd_crowdhuman.param");
scrfd.load_model("scrfd_crowdhuman.bin");
int width = bgr.cols;
int height = bgr.rows;
const int target_size = 640;
const float prob_threshold = 0.3f;
const float nms_threshold = 0.45f;
// pad to multiple of 32
int w = width;
int h = height;
float scale = 1.f;
if (w > h)
{
scale = (float)target_size / w;
w = target_size;
h = h * scale;
}
else
{
scale = (float)target_size / h;
h = target_size;
w = w * scale;
}
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, width, height, w, h);
// pad to target_size rectangle
int wpad = (w + 31) / 32 * 32 - w;
int hpad = (h + 31) / 32 * 32 - h;
ncnn::Mat in_pad;
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
const float mean_vals[3] = {127.5f, 127.5f, 127.5f};
const float norm_vals[3] = {1 / 128.f, 1 / 128.f, 1 / 128.f};
in_pad.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Extractor ex = scrfd.create_extractor();
ex.input("input.1", in_pad);
std::vector<FaceObject> faceproposals;
// stride 8
{
ncnn::Mat score_blob, bbox_blob;
ex.extract("490", score_blob);
ex.extract("493", bbox_blob);
const int base_size = 8;
const int feat_stride = 8;
ncnn::Mat ratios(1);
ratios[0] = 2.f;
ncnn::Mat scales(1);
scales[0] = 3.f;
ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);
std::vector<FaceObject> faceobjects32;
generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects32);
faceproposals.insert(faceproposals.end(), faceobjects32.begin(), faceobjects32.end());
}
// stride 16
{
ncnn::Mat score_blob, bbox_blob;
ex.extract("510", score_blob);
ex.extract("513", bbox_blob);
const int base_size = 16;
const int feat_stride = 16;
ncnn::Mat ratios(1);
ratios[0] = 2.f;
ncnn::Mat scales(1);
scales[0] = 3.f;
ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);
std::vector<FaceObject> faceobjects16;
generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects16);
faceproposals.insert(faceproposals.end(), faceobjects16.begin(), faceobjects16.end());
}
// stride 32
{
ncnn::Mat score_blob, bbox_blob;
ex.extract("530", score_blob);
ex.extract("533", bbox_blob);
const int base_size = 32;
const int feat_stride = 32;
ncnn::Mat ratios(1);
ratios[0] = 2.f;
ncnn::Mat scales(1);
scales[0] = 3.f;
ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);
std::vector<FaceObject> faceobjects8;
generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects8);
faceproposals.insert(faceproposals.end(), faceobjects8.begin(), faceobjects8.end());
}
// stride 64
{
ncnn::Mat score_blob, bbox_blob, kps_blob;
ex.extract("550", score_blob);
ex.extract("553", bbox_blob);
const int base_size = 64;
const int feat_stride = 64;
ncnn::Mat ratios(1);
ratios[0] = 2.f;
ncnn::Mat scales(1);
scales[0] = 3.f;
ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);
std::vector<FaceObject> faceobjects8;
generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects8);
faceproposals.insert(faceproposals.end(), faceobjects8.begin(), faceobjects8.end());
}
// stride 128
{
ncnn::Mat score_blob, bbox_blob, kps_blob;
ex.extract("570", score_blob);
ex.extract("573", bbox_blob);
const int base_size = 128;
const int feat_stride = 128;
ncnn::Mat ratios(1);
ratios[0] = 2.f;
ncnn::Mat scales(1);
scales[0] = 3.f;
ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);
std::vector<FaceObject> faceobjects8;
generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects8);
faceproposals.insert(faceproposals.end(), faceobjects8.begin(), faceobjects8.end());
}
// sort all proposals by score from highest to lowest
qsort_descent_inplace(faceproposals);
// apply nms with nms_threshold
std::vector<int> picked;
nms_sorted_bboxes(faceproposals, picked, nms_threshold);
int face_count = picked.size();
faceobjects.resize(face_count);
for (int i = 0; i < face_count; i++)
{
faceobjects[i] = faceproposals[picked[i]];
// adjust offset to original unpadded
float x0 = (faceobjects[i].rect.x - (wpad / 2)) / scale;
float y0 = (faceobjects[i].rect.y - (hpad / 2)) / scale;
float x1 = (faceobjects[i].rect.x + faceobjects[i].rect.width - (wpad / 2)) / scale;
float y1 = (faceobjects[i].rect.y + faceobjects[i].rect.height - (hpad / 2)) / scale;
x0 = std::max(std::min(x0, (float)width - 1), 0.f);
y0 = std::max(std::min(y0, (float)height - 1), 0.f);
x1 = std::max(std::min(x1, (float)width - 1), 0.f);
y1 = std::max(std::min(y1, (float)height - 1), 0.f);
faceobjects[i].rect.x = x0;
faceobjects[i].rect.y = y0;
faceobjects[i].rect.width = x1 - x0;
faceobjects[i].rect.height = y1 - y0;
}
return 0;
}
static void draw_faceobjects(const cv::Mat& bgr, const std::vector<FaceObject>& faceobjects)
{
cv::Mat image = bgr.clone();
for (size_t i = 0; i < faceobjects.size(); i++)
{
const FaceObject& obj = faceobjects[i];
fprintf(stderr, "%.5f at %.2f %.2f %.2f x %.2f\n", obj.prob,
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::rectangle(image, obj.rect, cv::Scalar(0, 255, 0));
char text[256];
sprintf(text, "%.1f%%", obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
cv::imshow("image", image);
cv::waitKey(0);
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<FaceObject> faceobjects;
detect_scrfd(m, faceobjects);
draw_faceobjects(m, faceobjects);
return 0;
}

123
3rdparty/ncnn/examples/shufflenetv2.cpp vendored Normal file
View File

@ -0,0 +1,123 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "net.h"
#include <algorithm>
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#endif
#include <stdio.h>
#include <vector>
static int detect_shufflenetv2(const cv::Mat& bgr, std::vector<float>& cls_scores)
{
ncnn::Net shufflenetv2;
shufflenetv2.opt.use_vulkan_compute = true;
// https://github.com/miaow1988/ShuffleNet_V2_pytorch_caffe
// models can be downloaded from https://github.com/miaow1988/ShuffleNet_V2_pytorch_caffe/releases
shufflenetv2.load_param("shufflenet_v2_x0.5.param");
shufflenetv2.load_model("shufflenet_v2_x0.5.bin");
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, 224, 224);
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
in.substract_mean_normalize(0, norm_vals);
ncnn::Extractor ex = shufflenetv2.create_extractor();
ex.input("data", in);
ncnn::Mat out;
ex.extract("fc", out);
// manually call softmax on the fc output
// convert result into probability
// skip if your model already has softmax operation
{
ncnn::Layer* softmax = ncnn::create_layer("Softmax");
ncnn::ParamDict pd;
softmax->load_param(pd);
softmax->forward_inplace(out, shufflenetv2.opt);
delete softmax;
}
out = out.reshape(out.w * out.h * out.c);
cls_scores.resize(out.w);
for (int j = 0; j < out.w; j++)
{
cls_scores[j] = out[j];
}
return 0;
}
static int print_topk(const std::vector<float>& cls_scores, int topk)
{
// partial sort topk with index
int size = cls_scores.size();
std::vector<std::pair<float, int> > vec;
vec.resize(size);
for (int i = 0; i < size; i++)
{
vec[i] = std::make_pair(cls_scores[i], i);
}
std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
std::greater<std::pair<float, int> >());
// print topk and score
for (int i = 0; i < topk; i++)
{
float score = vec[i].first;
int index = vec[i].second;
fprintf(stderr, "%d = %f\n", index, score);
}
return 0;
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<float> cls_scores;
detect_shufflenetv2(m, cls_scores);
print_topk(cls_scores, 3);
return 0;
}

165
3rdparty/ncnn/examples/simplepose.cpp vendored Normal file
View File

@ -0,0 +1,165 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "net.h"
#include <algorithm>
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif
#include <stdio.h>
#include <vector>
struct KeyPoint
{
cv::Point2f p;
float prob;
};
static int detect_posenet(const cv::Mat& bgr, std::vector<KeyPoint>& keypoints)
{
ncnn::Net posenet;
posenet.opt.use_vulkan_compute = true;
// the simple baseline human pose estimation from gluon-cv
// https://gluon-cv.mxnet.io/build/examples_pose/demo_simple_pose.html
// mxnet model exported via
// pose_net.hybridize()
// pose_net.export('pose')
// then mxnet2ncnn
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
posenet.load_param("pose.param");
posenet.load_model("pose.bin");
int w = bgr.cols;
int h = bgr.rows;
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, w, h, 192, 256);
// transforms.ToTensor(),
// transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
// R' = (R / 255 - 0.485) / 0.229 = (R - 0.485 * 255) / 0.229 / 255
// G' = (G / 255 - 0.456) / 0.224 = (G - 0.456 * 255) / 0.224 / 255
// B' = (B / 255 - 0.406) / 0.225 = (B - 0.406 * 255) / 0.225 / 255
const float mean_vals[3] = {0.485f * 255.f, 0.456f * 255.f, 0.406f * 255.f};
const float norm_vals[3] = {1 / 0.229f / 255.f, 1 / 0.224f / 255.f, 1 / 0.225f / 255.f};
in.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Extractor ex = posenet.create_extractor();
ex.input("data", in);
ncnn::Mat out;
ex.extract("conv3_fwd", out);
// resolve point from heatmap
keypoints.clear();
for (int p = 0; p < out.c; p++)
{
const ncnn::Mat m = out.channel(p);
float max_prob = 0.f;
int max_x = 0;
int max_y = 0;
for (int y = 0; y < out.h; y++)
{
const float* ptr = m.row(y);
for (int x = 0; x < out.w; x++)
{
float prob = ptr[x];
if (prob > max_prob)
{
max_prob = prob;
max_x = x;
max_y = y;
}
}
}
KeyPoint keypoint;
keypoint.p = cv::Point2f(max_x * w / (float)out.w, max_y * h / (float)out.h);
keypoint.prob = max_prob;
keypoints.push_back(keypoint);
}
return 0;
}
static void draw_pose(const cv::Mat& bgr, const std::vector<KeyPoint>& keypoints)
{
cv::Mat image = bgr.clone();
// draw bone
static const int joint_pairs[16][2] = {
{0, 1}, {1, 3}, {0, 2}, {2, 4}, {5, 6}, {5, 7}, {7, 9}, {6, 8}, {8, 10}, {5, 11}, {6, 12}, {11, 12}, {11, 13}, {12, 14}, {13, 15}, {14, 16}
};
for (int i = 0; i < 16; i++)
{
const KeyPoint& p1 = keypoints[joint_pairs[i][0]];
const KeyPoint& p2 = keypoints[joint_pairs[i][1]];
if (p1.prob < 0.2f || p2.prob < 0.2f)
continue;
cv::line(image, p1.p, p2.p, cv::Scalar(255, 0, 0), 2);
}
// draw joint
for (size_t i = 0; i < keypoints.size(); i++)
{
const KeyPoint& keypoint = keypoints[i];
fprintf(stderr, "%.2f %.2f = %.5f\n", keypoint.p.x, keypoint.p.y, keypoint.prob);
if (keypoint.prob < 0.2f)
continue;
cv::circle(image, keypoint.p, 3, cv::Scalar(0, 255, 0), -1);
}
cv::imshow("image", image);
cv::waitKey(0);
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<KeyPoint> keypoints;
detect_posenet(m, keypoints);
draw_pose(m, keypoints);
return 0;
}

View File

@ -0,0 +1 @@
The squeezenet android example project has been moved to https://github.com/nihui/ncnn-android-squeezenet

106
3rdparty/ncnn/examples/squeezenet.cpp vendored Normal file
View File

@ -0,0 +1,106 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "net.h"
#include <algorithm>
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#endif
#include <stdio.h>
#include <vector>
static int detect_squeezenet(const cv::Mat& bgr, std::vector<float>& cls_scores)
{
ncnn::Net squeezenet;
squeezenet.opt.use_vulkan_compute = true;
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
squeezenet.load_param("squeezenet_v1.1.param");
squeezenet.load_model("squeezenet_v1.1.bin");
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, 227, 227);
const float mean_vals[3] = {104.f, 117.f, 123.f};
in.substract_mean_normalize(mean_vals, 0);
ncnn::Extractor ex = squeezenet.create_extractor();
ex.input("data", in);
ncnn::Mat out;
ex.extract("prob", out);
cls_scores.resize(out.w);
for (int j = 0; j < out.w; j++)
{
cls_scores[j] = out[j];
}
return 0;
}
static int print_topk(const std::vector<float>& cls_scores, int topk)
{
// partial sort topk with index
int size = cls_scores.size();
std::vector<std::pair<float, int> > vec;
vec.resize(size);
for (int i = 0; i < size; i++)
{
vec[i] = std::make_pair(cls_scores[i], i);
}
std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
std::greater<std::pair<float, int> >());
// print topk and score
for (int i = 0; i < topk; i++)
{
float score = vec[i].first;
int index = vec[i].second;
fprintf(stderr, "%d = %f\n", index, score);
}
return 0;
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<float> cls_scores;
detect_squeezenet(m, cls_scores);
print_topk(cls_scores, 3);
return 0;
}

View File

@ -0,0 +1,121 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "c_api.h"
#include <algorithm>
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#endif
#include <stdio.h>
#include <vector>
static int detect_squeezenet(const cv::Mat& bgr, std::vector<float>& cls_scores)
{
ncnn_net_t squeezenet = ncnn_net_create();
ncnn_option_t opt = ncnn_option_create();
ncnn_option_set_use_vulkan_compute(opt, 1);
ncnn_net_set_option(squeezenet, opt);
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
ncnn_net_load_param(squeezenet, "squeezenet_v1.1.param");
ncnn_net_load_model(squeezenet, "squeezenet_v1.1.bin");
ncnn_mat_t in = ncnn_mat_from_pixels_resize(bgr.data, NCNN_MAT_PIXEL_BGR, bgr.cols, bgr.rows, bgr.cols * 3, 227, 227, NULL);
const float mean_vals[3] = {104.f, 117.f, 123.f};
ncnn_mat_substract_mean_normalize(in, mean_vals, 0);
ncnn_extractor_t ex = ncnn_extractor_create(squeezenet);
ncnn_extractor_input(ex, "data", in);
ncnn_mat_t out;
ncnn_extractor_extract(ex, "prob", &out);
const int out_w = ncnn_mat_get_w(out);
const float* out_data = (const float*)ncnn_mat_get_data(out);
cls_scores.resize(out_w);
for (int j = 0; j < out_w; j++)
{
cls_scores[j] = out_data[j];
}
ncnn_mat_destroy(in);
ncnn_mat_destroy(out);
ncnn_extractor_destroy(ex);
ncnn_option_destroy(opt);
ncnn_net_destroy(squeezenet);
return 0;
}
static int print_topk(const std::vector<float>& cls_scores, int topk)
{
// partial sort topk with index
int size = cls_scores.size();
std::vector<std::pair<float, int> > vec;
vec.resize(size);
for (int i = 0; i < size; i++)
{
vec[i] = std::make_pair(cls_scores[i], i);
}
std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
std::greater<std::pair<float, int> >());
// print topk and score
for (int i = 0; i < topk; i++)
{
float score = vec[i].first;
int index = vec[i].second;
fprintf(stderr, "%d = %f\n", index, score);
}
return 0;
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<float> cls_scores;
detect_squeezenet(m, cls_scores);
print_topk(cls_scores, 3);
return 0;
}

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,77 @@
7767517
75 83
Input data 0 1 data 0=227 1=227 2=3
Convolution conv1 1 1 data conv1 0=64 1=3 2=1 3=2 4=0 5=1 6=1728
ReLU relu_conv1 1 1 conv1 conv1_relu_conv1 0=0.000000
Pooling pool1 1 1 conv1_relu_conv1 pool1 0=0 1=3 2=2 3=0 4=0
Convolution fire2/squeeze1x1 1 1 pool1 fire2/squeeze1x1 0=16 1=1 2=1 3=1 4=0 5=1 6=1024
ReLU fire2/relu_squeeze1x1 1 1 fire2/squeeze1x1 fire2/squeeze1x1_fire2/relu_squeeze1x1 0=0.000000
Split splitncnn_0 1 2 fire2/squeeze1x1_fire2/relu_squeeze1x1 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_0 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_1
Convolution fire2/expand1x1 1 1 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_1 fire2/expand1x1 0=64 1=1 2=1 3=1 4=0 5=1 6=1024
ReLU fire2/relu_expand1x1 1 1 fire2/expand1x1 fire2/expand1x1_fire2/relu_expand1x1 0=0.000000
Convolution fire2/expand3x3 1 1 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_0 fire2/expand3x3 0=64 1=3 2=1 3=1 4=1 5=1 6=9216
ReLU fire2/relu_expand3x3 1 1 fire2/expand3x3 fire2/expand3x3_fire2/relu_expand3x3 0=0.000000
Concat fire2/concat 2 1 fire2/expand1x1_fire2/relu_expand1x1 fire2/expand3x3_fire2/relu_expand3x3 fire2/concat 0=0
Convolution fire3/squeeze1x1 1 1 fire2/concat fire3/squeeze1x1 0=16 1=1 2=1 3=1 4=0 5=1 6=2048
ReLU fire3/relu_squeeze1x1 1 1 fire3/squeeze1x1 fire3/squeeze1x1_fire3/relu_squeeze1x1 0=0.000000
Split splitncnn_1 1 2 fire3/squeeze1x1_fire3/relu_squeeze1x1 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_0 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_1
Convolution fire3/expand1x1 1 1 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_1 fire3/expand1x1 0=64 1=1 2=1 3=1 4=0 5=1 6=1024
ReLU fire3/relu_expand1x1 1 1 fire3/expand1x1 fire3/expand1x1_fire3/relu_expand1x1 0=0.000000
Convolution fire3/expand3x3 1 1 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_0 fire3/expand3x3 0=64 1=3 2=1 3=1 4=1 5=1 6=9216
ReLU fire3/relu_expand3x3 1 1 fire3/expand3x3 fire3/expand3x3_fire3/relu_expand3x3 0=0.000000
Concat fire3/concat 2 1 fire3/expand1x1_fire3/relu_expand1x1 fire3/expand3x3_fire3/relu_expand3x3 fire3/concat 0=0
Pooling pool3 1 1 fire3/concat pool3 0=0 1=3 2=2 3=0 4=0
Convolution fire4/squeeze1x1 1 1 pool3 fire4/squeeze1x1 0=32 1=1 2=1 3=1 4=0 5=1 6=4096
ReLU fire4/relu_squeeze1x1 1 1 fire4/squeeze1x1 fire4/squeeze1x1_fire4/relu_squeeze1x1 0=0.000000
Split splitncnn_2 1 2 fire4/squeeze1x1_fire4/relu_squeeze1x1 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_0 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_1
Convolution fire4/expand1x1 1 1 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_1 fire4/expand1x1 0=128 1=1 2=1 3=1 4=0 5=1 6=4096
ReLU fire4/relu_expand1x1 1 1 fire4/expand1x1 fire4/expand1x1_fire4/relu_expand1x1 0=0.000000
Convolution fire4/expand3x3 1 1 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_0 fire4/expand3x3 0=128 1=3 2=1 3=1 4=1 5=1 6=36864
ReLU fire4/relu_expand3x3 1 1 fire4/expand3x3 fire4/expand3x3_fire4/relu_expand3x3 0=0.000000
Concat fire4/concat 2 1 fire4/expand1x1_fire4/relu_expand1x1 fire4/expand3x3_fire4/relu_expand3x3 fire4/concat 0=0
Convolution fire5/squeeze1x1 1 1 fire4/concat fire5/squeeze1x1 0=32 1=1 2=1 3=1 4=0 5=1 6=8192
ReLU fire5/relu_squeeze1x1 1 1 fire5/squeeze1x1 fire5/squeeze1x1_fire5/relu_squeeze1x1 0=0.000000
Split splitncnn_3 1 2 fire5/squeeze1x1_fire5/relu_squeeze1x1 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_0 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_1
Convolution fire5/expand1x1 1 1 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_1 fire5/expand1x1 0=128 1=1 2=1 3=1 4=0 5=1 6=4096
ReLU fire5/relu_expand1x1 1 1 fire5/expand1x1 fire5/expand1x1_fire5/relu_expand1x1 0=0.000000
Convolution fire5/expand3x3 1 1 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_0 fire5/expand3x3 0=128 1=3 2=1 3=1 4=1 5=1 6=36864
ReLU fire5/relu_expand3x3 1 1 fire5/expand3x3 fire5/expand3x3_fire5/relu_expand3x3 0=0.000000
Concat fire5/concat 2 1 fire5/expand1x1_fire5/relu_expand1x1 fire5/expand3x3_fire5/relu_expand3x3 fire5/concat 0=0
Pooling pool5 1 1 fire5/concat pool5 0=0 1=3 2=2 3=0 4=0
Convolution fire6/squeeze1x1 1 1 pool5 fire6/squeeze1x1 0=48 1=1 2=1 3=1 4=0 5=1 6=12288
ReLU fire6/relu_squeeze1x1 1 1 fire6/squeeze1x1 fire6/squeeze1x1_fire6/relu_squeeze1x1 0=0.000000
Split splitncnn_4 1 2 fire6/squeeze1x1_fire6/relu_squeeze1x1 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_0 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_1
Convolution fire6/expand1x1 1 1 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_1 fire6/expand1x1 0=192 1=1 2=1 3=1 4=0 5=1 6=9216
ReLU fire6/relu_expand1x1 1 1 fire6/expand1x1 fire6/expand1x1_fire6/relu_expand1x1 0=0.000000
Convolution fire6/expand3x3 1 1 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_0 fire6/expand3x3 0=192 1=3 2=1 3=1 4=1 5=1 6=82944
ReLU fire6/relu_expand3x3 1 1 fire6/expand3x3 fire6/expand3x3_fire6/relu_expand3x3 0=0.000000
Concat fire6/concat 2 1 fire6/expand1x1_fire6/relu_expand1x1 fire6/expand3x3_fire6/relu_expand3x3 fire6/concat 0=0
Convolution fire7/squeeze1x1 1 1 fire6/concat fire7/squeeze1x1 0=48 1=1 2=1 3=1 4=0 5=1 6=18432
ReLU fire7/relu_squeeze1x1 1 1 fire7/squeeze1x1 fire7/squeeze1x1_fire7/relu_squeeze1x1 0=0.000000
Split splitncnn_5 1 2 fire7/squeeze1x1_fire7/relu_squeeze1x1 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_0 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_1
Convolution fire7/expand1x1 1 1 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_1 fire7/expand1x1 0=192 1=1 2=1 3=1 4=0 5=1 6=9216
ReLU fire7/relu_expand1x1 1 1 fire7/expand1x1 fire7/expand1x1_fire7/relu_expand1x1 0=0.000000
Convolution fire7/expand3x3 1 1 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_0 fire7/expand3x3 0=192 1=3 2=1 3=1 4=1 5=1 6=82944
ReLU fire7/relu_expand3x3 1 1 fire7/expand3x3 fire7/expand3x3_fire7/relu_expand3x3 0=0.000000
Concat fire7/concat 2 1 fire7/expand1x1_fire7/relu_expand1x1 fire7/expand3x3_fire7/relu_expand3x3 fire7/concat 0=0
Convolution fire8/squeeze1x1 1 1 fire7/concat fire8/squeeze1x1 0=64 1=1 2=1 3=1 4=0 5=1 6=24576
ReLU fire8/relu_squeeze1x1 1 1 fire8/squeeze1x1 fire8/squeeze1x1_fire8/relu_squeeze1x1 0=0.000000
Split splitncnn_6 1 2 fire8/squeeze1x1_fire8/relu_squeeze1x1 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_0 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_1
Convolution fire8/expand1x1 1 1 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_1 fire8/expand1x1 0=256 1=1 2=1 3=1 4=0 5=1 6=16384
ReLU fire8/relu_expand1x1 1 1 fire8/expand1x1 fire8/expand1x1_fire8/relu_expand1x1 0=0.000000
Convolution fire8/expand3x3 1 1 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_0 fire8/expand3x3 0=256 1=3 2=1 3=1 4=1 5=1 6=147456
ReLU fire8/relu_expand3x3 1 1 fire8/expand3x3 fire8/expand3x3_fire8/relu_expand3x3 0=0.000000
Concat fire8/concat 2 1 fire8/expand1x1_fire8/relu_expand1x1 fire8/expand3x3_fire8/relu_expand3x3 fire8/concat 0=0
Convolution fire9/squeeze1x1 1 1 fire8/concat fire9/squeeze1x1 0=64 1=1 2=1 3=1 4=0 5=1 6=32768
ReLU fire9/relu_squeeze1x1 1 1 fire9/squeeze1x1 fire9/squeeze1x1_fire9/relu_squeeze1x1 0=0.000000
Split splitncnn_7 1 2 fire9/squeeze1x1_fire9/relu_squeeze1x1 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_0 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_1
Convolution fire9/expand1x1 1 1 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_1 fire9/expand1x1 0=256 1=1 2=1 3=1 4=0 5=1 6=16384
ReLU fire9/relu_expand1x1 1 1 fire9/expand1x1 fire9/expand1x1_fire9/relu_expand1x1 0=0.000000
Convolution fire9/expand3x3 1 1 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_0 fire9/expand3x3 0=256 1=3 2=1 3=1 4=1 5=1 6=147456
ReLU fire9/relu_expand3x3 1 1 fire9/expand3x3 fire9/expand3x3_fire9/relu_expand3x3 0=0.000000
Concat fire9/concat 2 1 fire9/expand1x1_fire9/relu_expand1x1 fire9/expand3x3_fire9/relu_expand3x3 fire9/concat 0=0
Dropout drop9 1 1 fire9/concat fire9/concat_drop9
Convolution conv10 1 1 fire9/concat_drop9 conv10 0=1000 1=1 2=1 3=1 4=1 5=1 6=512000
ReLU relu_conv10 1 1 conv10 conv10_relu_conv10 0=0.000000
Pooling pool10 1 1 conv10_relu_conv10 pool10 0=1 1=0 2=1 3=0 4=1
Softmax prob 1 1 pool10 prob 0=0

Binary file not shown.

View File

@ -0,0 +1,548 @@
name: "squeezenet_v1.1_deploy"
layer {
name: "data"
type: "Input"
top: "data"
input_param { shape: { dim: 1 dim: 3 dim: 227 dim: 227 } }
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
convolution_param {
num_output: 64
kernel_size: 3
stride: 2
}
}
layer {
name: "relu_conv1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "fire2/squeeze1x1"
type: "Convolution"
bottom: "pool1"
top: "fire2/squeeze1x1"
convolution_param {
num_output: 16
kernel_size: 1
}
}
layer {
name: "fire2/relu_squeeze1x1"
type: "ReLU"
bottom: "fire2/squeeze1x1"
top: "fire2/squeeze1x1"
}
layer {
name: "fire2/expand1x1"
type: "Convolution"
bottom: "fire2/squeeze1x1"
top: "fire2/expand1x1"
convolution_param {
num_output: 64
kernel_size: 1
}
}
layer {
name: "fire2/relu_expand1x1"
type: "ReLU"
bottom: "fire2/expand1x1"
top: "fire2/expand1x1"
}
layer {
name: "fire2/expand3x3"
type: "Convolution"
bottom: "fire2/squeeze1x1"
top: "fire2/expand3x3"
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
}
}
layer {
name: "fire2/relu_expand3x3"
type: "ReLU"
bottom: "fire2/expand3x3"
top: "fire2/expand3x3"
}
layer {
name: "fire2/concat"
type: "Concat"
bottom: "fire2/expand1x1"
bottom: "fire2/expand3x3"
top: "fire2/concat"
}
layer {
name: "fire3/squeeze1x1"
type: "Convolution"
bottom: "fire2/concat"
top: "fire3/squeeze1x1"
convolution_param {
num_output: 16
kernel_size: 1
}
}
layer {
name: "fire3/relu_squeeze1x1"
type: "ReLU"
bottom: "fire3/squeeze1x1"
top: "fire3/squeeze1x1"
}
layer {
name: "fire3/expand1x1"
type: "Convolution"
bottom: "fire3/squeeze1x1"
top: "fire3/expand1x1"
convolution_param {
num_output: 64
kernel_size: 1
}
}
layer {
name: "fire3/relu_expand1x1"
type: "ReLU"
bottom: "fire3/expand1x1"
top: "fire3/expand1x1"
}
layer {
name: "fire3/expand3x3"
type: "Convolution"
bottom: "fire3/squeeze1x1"
top: "fire3/expand3x3"
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
}
}
layer {
name: "fire3/relu_expand3x3"
type: "ReLU"
bottom: "fire3/expand3x3"
top: "fire3/expand3x3"
}
layer {
name: "fire3/concat"
type: "Concat"
bottom: "fire3/expand1x1"
bottom: "fire3/expand3x3"
top: "fire3/concat"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "fire3/concat"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "fire4/squeeze1x1"
type: "Convolution"
bottom: "pool3"
top: "fire4/squeeze1x1"
convolution_param {
num_output: 32
kernel_size: 1
}
}
layer {
name: "fire4/relu_squeeze1x1"
type: "ReLU"
bottom: "fire4/squeeze1x1"
top: "fire4/squeeze1x1"
}
layer {
name: "fire4/expand1x1"
type: "Convolution"
bottom: "fire4/squeeze1x1"
top: "fire4/expand1x1"
convolution_param {
num_output: 128
kernel_size: 1
}
}
layer {
name: "fire4/relu_expand1x1"
type: "ReLU"
bottom: "fire4/expand1x1"
top: "fire4/expand1x1"
}
layer {
name: "fire4/expand3x3"
type: "Convolution"
bottom: "fire4/squeeze1x1"
top: "fire4/expand3x3"
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
}
}
layer {
name: "fire4/relu_expand3x3"
type: "ReLU"
bottom: "fire4/expand3x3"
top: "fire4/expand3x3"
}
layer {
name: "fire4/concat"
type: "Concat"
bottom: "fire4/expand1x1"
bottom: "fire4/expand3x3"
top: "fire4/concat"
}
layer {
name: "fire5/squeeze1x1"
type: "Convolution"
bottom: "fire4/concat"
top: "fire5/squeeze1x1"
convolution_param {
num_output: 32
kernel_size: 1
}
}
layer {
name: "fire5/relu_squeeze1x1"
type: "ReLU"
bottom: "fire5/squeeze1x1"
top: "fire5/squeeze1x1"
}
layer {
name: "fire5/expand1x1"
type: "Convolution"
bottom: "fire5/squeeze1x1"
top: "fire5/expand1x1"
convolution_param {
num_output: 128
kernel_size: 1
}
}
layer {
name: "fire5/relu_expand1x1"
type: "ReLU"
bottom: "fire5/expand1x1"
top: "fire5/expand1x1"
}
layer {
name: "fire5/expand3x3"
type: "Convolution"
bottom: "fire5/squeeze1x1"
top: "fire5/expand3x3"
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
}
}
layer {
name: "fire5/relu_expand3x3"
type: "ReLU"
bottom: "fire5/expand3x3"
top: "fire5/expand3x3"
}
layer {
name: "fire5/concat"
type: "Concat"
bottom: "fire5/expand1x1"
bottom: "fire5/expand3x3"
top: "fire5/concat"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "fire5/concat"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "fire6/squeeze1x1"
type: "Convolution"
bottom: "pool5"
top: "fire6/squeeze1x1"
convolution_param {
num_output: 48
kernel_size: 1
}
}
layer {
name: "fire6/relu_squeeze1x1"
type: "ReLU"
bottom: "fire6/squeeze1x1"
top: "fire6/squeeze1x1"
}
layer {
name: "fire6/expand1x1"
type: "Convolution"
bottom: "fire6/squeeze1x1"
top: "fire6/expand1x1"
convolution_param {
num_output: 192
kernel_size: 1
}
}
layer {
name: "fire6/relu_expand1x1"
type: "ReLU"
bottom: "fire6/expand1x1"
top: "fire6/expand1x1"
}
layer {
name: "fire6/expand3x3"
type: "Convolution"
bottom: "fire6/squeeze1x1"
top: "fire6/expand3x3"
convolution_param {
num_output: 192
pad: 1
kernel_size: 3
}
}
layer {
name: "fire6/relu_expand3x3"
type: "ReLU"
bottom: "fire6/expand3x3"
top: "fire6/expand3x3"
}
layer {
name: "fire6/concat"
type: "Concat"
bottom: "fire6/expand1x1"
bottom: "fire6/expand3x3"
top: "fire6/concat"
}
layer {
name: "fire7/squeeze1x1"
type: "Convolution"
bottom: "fire6/concat"
top: "fire7/squeeze1x1"
convolution_param {
num_output: 48
kernel_size: 1
}
}
layer {
name: "fire7/relu_squeeze1x1"
type: "ReLU"
bottom: "fire7/squeeze1x1"
top: "fire7/squeeze1x1"
}
layer {
name: "fire7/expand1x1"
type: "Convolution"
bottom: "fire7/squeeze1x1"
top: "fire7/expand1x1"
convolution_param {
num_output: 192
kernel_size: 1
}
}
layer {
name: "fire7/relu_expand1x1"
type: "ReLU"
bottom: "fire7/expand1x1"
top: "fire7/expand1x1"
}
layer {
name: "fire7/expand3x3"
type: "Convolution"
bottom: "fire7/squeeze1x1"
top: "fire7/expand3x3"
convolution_param {
num_output: 192
pad: 1
kernel_size: 3
}
}
layer {
name: "fire7/relu_expand3x3"
type: "ReLU"
bottom: "fire7/expand3x3"
top: "fire7/expand3x3"
}
layer {
name: "fire7/concat"
type: "Concat"
bottom: "fire7/expand1x1"
bottom: "fire7/expand3x3"
top: "fire7/concat"
}
layer {
name: "fire8/squeeze1x1"
type: "Convolution"
bottom: "fire7/concat"
top: "fire8/squeeze1x1"
convolution_param {
num_output: 64
kernel_size: 1
}
}
layer {
name: "fire8/relu_squeeze1x1"
type: "ReLU"
bottom: "fire8/squeeze1x1"
top: "fire8/squeeze1x1"
}
layer {
name: "fire8/expand1x1"
type: "Convolution"
bottom: "fire8/squeeze1x1"
top: "fire8/expand1x1"
convolution_param {
num_output: 256
kernel_size: 1
}
}
layer {
name: "fire8/relu_expand1x1"
type: "ReLU"
bottom: "fire8/expand1x1"
top: "fire8/expand1x1"
}
layer {
name: "fire8/expand3x3"
type: "Convolution"
bottom: "fire8/squeeze1x1"
top: "fire8/expand3x3"
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
}
}
layer {
name: "fire8/relu_expand3x3"
type: "ReLU"
bottom: "fire8/expand3x3"
top: "fire8/expand3x3"
}
layer {
name: "fire8/concat"
type: "Concat"
bottom: "fire8/expand1x1"
bottom: "fire8/expand3x3"
top: "fire8/concat"
}
layer {
name: "fire9/squeeze1x1"
type: "Convolution"
bottom: "fire8/concat"
top: "fire9/squeeze1x1"
convolution_param {
num_output: 64
kernel_size: 1
}
}
layer {
name: "fire9/relu_squeeze1x1"
type: "ReLU"
bottom: "fire9/squeeze1x1"
top: "fire9/squeeze1x1"
}
layer {
name: "fire9/expand1x1"
type: "Convolution"
bottom: "fire9/squeeze1x1"
top: "fire9/expand1x1"
convolution_param {
num_output: 256
kernel_size: 1
}
}
layer {
name: "fire9/relu_expand1x1"
type: "ReLU"
bottom: "fire9/expand1x1"
top: "fire9/expand1x1"
}
layer {
name: "fire9/expand3x3"
type: "Convolution"
bottom: "fire9/squeeze1x1"
top: "fire9/expand3x3"
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
}
}
layer {
name: "fire9/relu_expand3x3"
type: "ReLU"
bottom: "fire9/expand3x3"
top: "fire9/expand3x3"
}
layer {
name: "fire9/concat"
type: "Concat"
bottom: "fire9/expand1x1"
bottom: "fire9/expand3x3"
top: "fire9/concat"
}
layer {
name: "drop9"
type: "Dropout"
bottom: "fire9/concat"
top: "fire9/concat"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "conv10"
type: "Convolution"
bottom: "fire9/concat"
top: "conv10"
convolution_param {
num_output: 1000
pad: 1
kernel_size: 1
}
}
layer {
name: "relu_conv10"
type: "ReLU"
bottom: "conv10"
top: "conv10"
}
layer {
name: "pool10"
type: "Pooling"
bottom: "conv10"
top: "pool10"
pooling_param {
pool: AVE
global_pooling: true
}
}
layer {
name: "prob"
type: "Softmax"
bottom: "pool10"
top: "prob"
}

152
3rdparty/ncnn/examples/squeezenetssd.cpp vendored Normal file
View File

@ -0,0 +1,152 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "net.h"
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif
#include <stdio.h>
#include <vector>
struct Object
{
cv::Rect_<float> rect;
int label;
float prob;
};
static int detect_squeezenet(const cv::Mat& bgr, std::vector<Object>& objects)
{
ncnn::Net squeezenet;
squeezenet.opt.use_vulkan_compute = true;
// original pretrained model from https://github.com/chuanqi305/SqueezeNet-SSD
// squeezenet_ssd_voc_deploy.prototxt
// https://drive.google.com/open?id=0B3gersZ2cHIxdGpyZlZnbEQ5Snc
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
squeezenet.load_param("squeezenet_ssd_voc.param");
squeezenet.load_model("squeezenet_ssd_voc.bin");
const int target_size = 300;
int img_w = bgr.cols;
int img_h = bgr.rows;
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, target_size, target_size);
const float mean_vals[3] = {104.f, 117.f, 123.f};
in.substract_mean_normalize(mean_vals, 0);
ncnn::Extractor ex = squeezenet.create_extractor();
ex.input("data", in);
ncnn::Mat out;
ex.extract("detection_out", out);
// printf("%d %d %d\n", out.w, out.h, out.c);
objects.clear();
for (int i = 0; i < out.h; i++)
{
const float* values = out.row(i);
Object object;
object.label = values[0];
object.prob = values[1];
object.rect.x = values[2] * img_w;
object.rect.y = values[3] * img_h;
object.rect.width = values[4] * img_w - object.rect.x;
object.rect.height = values[5] * img_h - object.rect.y;
objects.push_back(object);
}
return 0;
}
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
{
static const char* class_names[] = {"background",
"aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair",
"cow", "diningtable", "dog", "horse",
"motorbike", "person", "pottedplant",
"sheep", "sofa", "train", "tvmonitor"
};
cv::Mat image = bgr.clone();
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
cv::imshow("image", image);
cv::waitKey(0);
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<Object> objects;
detect_squeezenet(m, objects);
draw_objects(m, objects);
return 0;
}

1000
3rdparty/ncnn/examples/synset_words.txt vendored Normal file

File diff suppressed because it is too large Load Diff

544
3rdparty/ncnn/examples/yolact.cpp vendored Normal file
View File

@ -0,0 +1,544 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "net.h"
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif
#include <stdio.h>
#include <vector>
struct Object
{
cv::Rect_<float> rect;
int label;
float prob;
std::vector<float> maskdata;
cv::Mat mask;
};
static inline float intersection_area(const Object& a, const Object& b)
{
cv::Rect_<float> inter = a.rect & b.rect;
return inter.area();
}
static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right)
{
int i = left;
int j = right;
float p = objects[(left + right) / 2].prob;
while (i <= j)
{
while (objects[i].prob > p)
i++;
while (objects[j].prob < p)
j--;
if (i <= j)
{
// swap
std::swap(objects[i], objects[j]);
i++;
j--;
}
}
#pragma omp parallel sections
{
#pragma omp section
{
if (left < j) qsort_descent_inplace(objects, left, j);
}
#pragma omp section
{
if (i < right) qsort_descent_inplace(objects, i, right);
}
}
}
static void qsort_descent_inplace(std::vector<Object>& objects)
{
if (objects.empty())
return;
qsort_descent_inplace(objects, 0, objects.size() - 1);
}
static void nms_sorted_bboxes(const std::vector<Object>& objects, std::vector<int>& picked, float nms_threshold)
{
picked.clear();
const int n = objects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
areas[i] = objects[i].rect.area();
}
for (int i = 0; i < n; i++)
{
const Object& a = objects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const Object& b = objects[picked[j]];
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
}
if (keep)
picked.push_back(i);
}
}
static int detect_yolact(const cv::Mat& bgr, std::vector<Object>& objects)
{
ncnn::Net yolact;
yolact.opt.use_vulkan_compute = true;
// original model converted from https://github.com/dbolya/yolact
// yolact_resnet50_54_800000.pth
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
yolact.load_param("yolact.param");
yolact.load_model("yolact.bin");
const int target_size = 550;
int img_w = bgr.cols;
int img_h = bgr.rows;
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, target_size, target_size);
const float mean_vals[3] = {123.68f, 116.78f, 103.94f};
const float norm_vals[3] = {1.0 / 58.40f, 1.0 / 57.12f, 1.0 / 57.38f};
in.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Extractor ex = yolact.create_extractor();
ex.input("input.1", in);
ncnn::Mat maskmaps;
ncnn::Mat location;
ncnn::Mat mask;
ncnn::Mat confidence;
ex.extract("619", maskmaps); // 138x138 x 32
ex.extract("816", location); // 4 x 19248
ex.extract("818", mask); // maskdim 32 x 19248
ex.extract("820", confidence); // 81 x 19248
int num_class = confidence.w;
int num_priors = confidence.h;
// make priorbox
ncnn::Mat priorbox(4, num_priors);
{
const int conv_ws[5] = {69, 35, 18, 9, 5};
const int conv_hs[5] = {69, 35, 18, 9, 5};
const float aspect_ratios[3] = {1.f, 0.5f, 2.f};
const float scales[5] = {24.f, 48.f, 96.f, 192.f, 384.f};
float* pb = priorbox;
for (int p = 0; p < 5; p++)
{
int conv_w = conv_ws[p];
int conv_h = conv_hs[p];
float scale = scales[p];
for (int i = 0; i < conv_h; i++)
{
for (int j = 0; j < conv_w; j++)
{
// +0.5 because priors are in center-size notation
float cx = (j + 0.5f) / conv_w;
float cy = (i + 0.5f) / conv_h;
for (int k = 0; k < 3; k++)
{
float ar = aspect_ratios[k];
ar = sqrt(ar);
float w = scale * ar / 550;
float h = scale / ar / 550;
// This is for backward compatibility with a bug where I made everything square by accident
// cfg.backbone.use_square_anchors:
h = w;
pb[0] = cx;
pb[1] = cy;
pb[2] = w;
pb[3] = h;
pb += 4;
}
}
}
}
}
const float confidence_thresh = 0.05f;
const float nms_threshold = 0.5f;
const int keep_top_k = 200;
std::vector<std::vector<Object> > class_candidates;
class_candidates.resize(num_class);
for (int i = 0; i < num_priors; i++)
{
const float* conf = confidence.row(i);
const float* loc = location.row(i);
const float* pb = priorbox.row(i);
const float* maskdata = mask.row(i);
// find class id with highest score
// start from 1 to skip background
int label = 0;
float score = 0.f;
for (int j = 1; j < num_class; j++)
{
float class_score = conf[j];
if (class_score > score)
{
label = j;
score = class_score;
}
}
// ignore background or low score
if (label == 0 || score <= confidence_thresh)
continue;
// CENTER_SIZE
float var[4] = {0.1f, 0.1f, 0.2f, 0.2f};
float pb_cx = pb[0];
float pb_cy = pb[1];
float pb_w = pb[2];
float pb_h = pb[3];
float bbox_cx = var[0] * loc[0] * pb_w + pb_cx;
float bbox_cy = var[1] * loc[1] * pb_h + pb_cy;
float bbox_w = (float)(exp(var[2] * loc[2]) * pb_w);
float bbox_h = (float)(exp(var[3] * loc[3]) * pb_h);
float obj_x1 = bbox_cx - bbox_w * 0.5f;
float obj_y1 = bbox_cy - bbox_h * 0.5f;
float obj_x2 = bbox_cx + bbox_w * 0.5f;
float obj_y2 = bbox_cy + bbox_h * 0.5f;
// clip
obj_x1 = std::max(std::min(obj_x1 * bgr.cols, (float)(bgr.cols - 1)), 0.f);
obj_y1 = std::max(std::min(obj_y1 * bgr.rows, (float)(bgr.rows - 1)), 0.f);
obj_x2 = std::max(std::min(obj_x2 * bgr.cols, (float)(bgr.cols - 1)), 0.f);
obj_y2 = std::max(std::min(obj_y2 * bgr.rows, (float)(bgr.rows - 1)), 0.f);
// append object
Object obj;
obj.rect = cv::Rect_<float>(obj_x1, obj_y1, obj_x2 - obj_x1 + 1, obj_y2 - obj_y1 + 1);
obj.label = label;
obj.prob = score;
obj.maskdata = std::vector<float>(maskdata, maskdata + mask.w);
class_candidates[label].push_back(obj);
}
objects.clear();
for (int i = 0; i < (int)class_candidates.size(); i++)
{
std::vector<Object>& candidates = class_candidates[i];
qsort_descent_inplace(candidates);
std::vector<int> picked;
nms_sorted_bboxes(candidates, picked, nms_threshold);
for (int j = 0; j < (int)picked.size(); j++)
{
int z = picked[j];
objects.push_back(candidates[z]);
}
}
qsort_descent_inplace(objects);
// keep_top_k
if (keep_top_k < (int)objects.size())
{
objects.resize(keep_top_k);
}
// generate mask
for (int i = 0; i < (int)objects.size(); i++)
{
Object& obj = objects[i];
cv::Mat mask(maskmaps.h, maskmaps.w, CV_32FC1);
{
mask = cv::Scalar(0.f);
for (int p = 0; p < maskmaps.c; p++)
{
const float* maskmap = maskmaps.channel(p);
float coeff = obj.maskdata[p];
float* mp = (float*)mask.data;
// mask += m * coeff
for (int j = 0; j < maskmaps.w * maskmaps.h; j++)
{
mp[j] += maskmap[j] * coeff;
}
}
}
cv::Mat mask2;
cv::resize(mask, mask2, cv::Size(img_w, img_h));
// crop obj box and binarize
obj.mask = cv::Mat(img_h, img_w, CV_8UC1);
{
obj.mask = cv::Scalar(0);
for (int y = 0; y < img_h; y++)
{
if (y < obj.rect.y || y > obj.rect.y + obj.rect.height)
continue;
const float* mp2 = mask2.ptr<const float>(y);
uchar* bmp = obj.mask.ptr<uchar>(y);
for (int x = 0; x < img_w; x++)
{
if (x < obj.rect.x || x > obj.rect.x + obj.rect.width)
continue;
bmp[x] = mp2[x] > 0.5f ? 255 : 0;
}
}
}
}
return 0;
}
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
{
static const char* class_names[] = {"background",
"person", "bicycle", "car", "motorcycle", "airplane", "bus",
"train", "truck", "boat", "traffic light", "fire hydrant",
"stop sign", "parking meter", "bench", "bird", "cat", "dog",
"horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
"backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat",
"baseball glove", "skateboard", "surfboard", "tennis racket",
"bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
"banana", "apple", "sandwich", "orange", "broccoli", "carrot",
"hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop",
"mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
"toaster", "sink", "refrigerator", "book", "clock", "vase",
"scissors", "teddy bear", "hair drier", "toothbrush"
};
static const unsigned char colors[81][3] = {
{56, 0, 255},
{226, 255, 0},
{0, 94, 255},
{0, 37, 255},
{0, 255, 94},
{255, 226, 0},
{0, 18, 255},
{255, 151, 0},
{170, 0, 255},
{0, 255, 56},
{255, 0, 75},
{0, 75, 255},
{0, 255, 169},
{255, 0, 207},
{75, 255, 0},
{207, 0, 255},
{37, 0, 255},
{0, 207, 255},
{94, 0, 255},
{0, 255, 113},
{255, 18, 0},
{255, 0, 56},
{18, 0, 255},
{0, 255, 226},
{170, 255, 0},
{255, 0, 245},
{151, 255, 0},
{132, 255, 0},
{75, 0, 255},
{151, 0, 255},
{0, 151, 255},
{132, 0, 255},
{0, 255, 245},
{255, 132, 0},
{226, 0, 255},
{255, 37, 0},
{207, 255, 0},
{0, 255, 207},
{94, 255, 0},
{0, 226, 255},
{56, 255, 0},
{255, 94, 0},
{255, 113, 0},
{0, 132, 255},
{255, 0, 132},
{255, 170, 0},
{255, 0, 188},
{113, 255, 0},
{245, 0, 255},
{113, 0, 255},
{255, 188, 0},
{0, 113, 255},
{255, 0, 0},
{0, 56, 255},
{255, 0, 113},
{0, 255, 188},
{255, 0, 94},
{255, 0, 18},
{18, 255, 0},
{0, 255, 132},
{0, 188, 255},
{0, 245, 255},
{0, 169, 255},
{37, 255, 0},
{255, 0, 151},
{188, 0, 255},
{0, 255, 37},
{0, 255, 0},
{255, 0, 170},
{255, 0, 37},
{255, 75, 0},
{0, 0, 255},
{255, 207, 0},
{255, 0, 226},
{255, 245, 0},
{188, 255, 0},
{0, 255, 18},
{0, 255, 75},
{0, 255, 151},
{255, 56, 0},
{245, 255, 0}
};
cv::Mat image = bgr.clone();
int color_index = 0;
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
if (obj.prob < 0.15)
continue;
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
const unsigned char* color = colors[color_index % 81];
color_index++;
cv::rectangle(image, obj.rect, cv::Scalar(color[0], color[1], color[2]));
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
// draw mask
for (int y = 0; y < image.rows; y++)
{
const uchar* mp = obj.mask.ptr(y);
uchar* p = image.ptr(y);
for (int x = 0; x < image.cols; x++)
{
if (mp[x] == 255)
{
p[0] = cv::saturate_cast<uchar>(p[0] * 0.5 + color[0] * 0.5);
p[1] = cv::saturate_cast<uchar>(p[1] * 0.5 + color[1] * 0.5);
p[2] = cv::saturate_cast<uchar>(p[2] * 0.5 + color[2] * 0.5);
}
p += 3;
}
}
}
cv::imwrite("result.png", image);
cv::imshow("image", image);
cv::waitKey(0);
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<Object> objects;
detect_yolact(m, objects);
draw_objects(m, objects);
return 0;
}

156
3rdparty/ncnn/examples/yolov2.cpp vendored Normal file
View File

@ -0,0 +1,156 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "net.h"
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif
#include <stdio.h>
#include <vector>
struct Object
{
cv::Rect_<float> rect;
int label;
float prob;
};
static int detect_yolov2(const cv::Mat& bgr, std::vector<Object>& objects)
{
ncnn::Net yolov2;
yolov2.opt.use_vulkan_compute = true;
// original pretrained model from https://github.com/eric612/MobileNet-YOLO
// https://github.com/eric612/MobileNet-YOLO/blob/master/models/yolov2/mobilenet_yolo_deploy.prototxt
// https://github.com/eric612/MobileNet-YOLO/blob/master/models/yolov2/mobilenet_yolo_deploy_iter_80000.caffemodel
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
yolov2.load_param("mobilenet_yolo.param");
yolov2.load_model("mobilenet_yolo.bin");
const int target_size = 416;
int img_w = bgr.cols;
int img_h = bgr.rows;
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, target_size, target_size);
// the Caffe-YOLOv2-Windows style
// X' = X * scale - mean
const float mean_vals[3] = {1.0f, 1.0f, 1.0f};
const float norm_vals[3] = {0.007843f, 0.007843f, 0.007843f};
in.substract_mean_normalize(0, norm_vals);
in.substract_mean_normalize(mean_vals, 0);
ncnn::Extractor ex = yolov2.create_extractor();
ex.input("data", in);
ncnn::Mat out;
ex.extract("detection_out", out);
// printf("%d %d %d\n", out.w, out.h, out.c);
objects.clear();
for (int i = 0; i < out.h; i++)
{
const float* values = out.row(i);
Object object;
object.label = values[0];
object.prob = values[1];
object.rect.x = values[2] * img_w;
object.rect.y = values[3] * img_h;
object.rect.width = values[4] * img_w - object.rect.x;
object.rect.height = values[5] * img_h - object.rect.y;
objects.push_back(object);
}
return 0;
}
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
{
static const char* class_names[] = {"background",
"aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair",
"cow", "diningtable", "dog", "horse",
"motorbike", "person", "pottedplant",
"sheep", "sofa", "train", "tvmonitor"
};
cv::Mat image = bgr.clone();
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
cv::imshow("image", image);
cv::waitKey(0);
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<Object> objects;
detect_yolov2(m, objects);
draw_objects(m, objects);
return 0;
}

153
3rdparty/ncnn/examples/yolov3.cpp vendored Normal file
View File

@ -0,0 +1,153 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "net.h"
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif
#include <stdio.h>
#include <vector>
struct Object
{
cv::Rect_<float> rect;
int label;
float prob;
};
static int detect_yolov3(const cv::Mat& bgr, std::vector<Object>& objects)
{
ncnn::Net yolov3;
yolov3.opt.use_vulkan_compute = true;
// original pretrained model from https://github.com/eric612/MobileNet-YOLO
// param : https://drive.google.com/open?id=1V9oKHP6G6XvXZqhZbzNKL6FI_clRWdC-
// bin : https://drive.google.com/open?id=1DBcuFCr-856z3FRQznWL_S5h-Aj3RawA
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
yolov3.load_param("mobilenetv2_yolov3.param");
yolov3.load_model("mobilenetv2_yolov3.bin");
const int target_size = 352;
int img_w = bgr.cols;
int img_h = bgr.rows;
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, target_size, target_size);
const float mean_vals[3] = {127.5f, 127.5f, 127.5f};
const float norm_vals[3] = {0.007843f, 0.007843f, 0.007843f};
in.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Extractor ex = yolov3.create_extractor();
ex.input("data", in);
ncnn::Mat out;
ex.extract("detection_out", out);
// printf("%d %d %d\n", out.w, out.h, out.c);
objects.clear();
for (int i = 0; i < out.h; i++)
{
const float* values = out.row(i);
Object object;
object.label = values[0];
object.prob = values[1];
object.rect.x = values[2] * img_w;
object.rect.y = values[3] * img_h;
object.rect.width = values[4] * img_w - object.rect.x;
object.rect.height = values[5] * img_h - object.rect.y;
objects.push_back(object);
}
return 0;
}
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
{
static const char* class_names[] = {"background",
"aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair",
"cow", "diningtable", "dog", "horse",
"motorbike", "person", "pottedplant",
"sheep", "sofa", "train", "tvmonitor"
};
cv::Mat image = bgr.clone();
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
cv::imshow("image", image);
cv::waitKey(0);
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<Object> objects;
detect_yolov3(m, objects);
draw_objects(m, objects);
return 0;
}

311
3rdparty/ncnn/examples/yolov4.cpp vendored Normal file
View File

@ -0,0 +1,311 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "net.h"
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#if CV_MAJOR_VERSION >= 3
#include <opencv2/videoio/videoio.hpp>
#endif
#include <vector>
#include <stdio.h>
#define NCNN_PROFILING
#define YOLOV4_TINY //Using yolov4_tiny, if undef, using original yolov4
#ifdef NCNN_PROFILING
#include "benchmark.h"
#endif
struct Object
{
cv::Rect_<float> rect;
int label;
float prob;
};
static int init_yolov4(ncnn::Net* yolov4, int* target_size)
{
/* --> Set the params you need for the ncnn inference <-- */
yolov4->opt.num_threads = 4; //You need to compile with libgomp for multi thread support
yolov4->opt.use_vulkan_compute = true; //You need to compile with libvulkan for gpu support
yolov4->opt.use_winograd_convolution = true;
yolov4->opt.use_sgemm_convolution = true;
yolov4->opt.use_fp16_packed = true;
yolov4->opt.use_fp16_storage = true;
yolov4->opt.use_fp16_arithmetic = true;
yolov4->opt.use_packing_layout = true;
yolov4->opt.use_shader_pack8 = false;
yolov4->opt.use_image_storage = false;
/* --> End of setting params <-- */
int ret = 0;
// original pretrained model from https://github.com/AlexeyAB/darknet
// the ncnn model https://drive.google.com/drive/folders/1YzILvh0SKQPS_lrb33dmGNq7aVTKPWS0?usp=sharing
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
#ifdef YOLOV4_TINY
const char* yolov4_param = "yolov4-tiny-opt.param";
const char* yolov4_model = "yolov4-tiny-opt.bin";
*target_size = 416;
#else
const char* yolov4_param = "yolov4-opt.param";
const char* yolov4_model = "yolov4-opt.bin";
*target_size = 608;
#endif
ret = yolov4->load_param(yolov4_param);
if (ret != 0)
{
return ret;
}
ret = yolov4->load_model(yolov4_model);
if (ret != 0)
{
return ret;
}
return 0;
}
static int detect_yolov4(const cv::Mat& bgr, std::vector<Object>& objects, int target_size, ncnn::Net* yolov4)
{
int img_w = bgr.cols;
int img_h = bgr.rows;
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, bgr.cols, bgr.rows, target_size, target_size);
const float mean_vals[3] = {0, 0, 0};
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
in.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Extractor ex = yolov4->create_extractor();
ex.input("data", in);
ncnn::Mat out;
ex.extract("output", out);
objects.clear();
for (int i = 0; i < out.h; i++)
{
const float* values = out.row(i);
Object object;
object.label = values[0];
object.prob = values[1];
object.rect.x = values[2] * img_w;
object.rect.y = values[3] * img_h;
object.rect.width = values[4] * img_w - object.rect.x;
object.rect.height = values[5] * img_h - object.rect.y;
objects.push_back(object);
}
return 0;
}
static int draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects, int is_streaming)
{
static const char* class_names[] = {"background", "person", "bicycle",
"car", "motorbike", "aeroplane", "bus", "train", "truck",
"boat", "traffic light", "fire hydrant", "stop sign",
"parking meter", "bench", "bird", "cat", "dog", "horse",
"sheep", "cow", "elephant", "bear", "zebra", "giraffe",
"backpack", "umbrella", "handbag", "tie", "suitcase",
"frisbee", "skis", "snowboard", "sports ball", "kite",
"baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork",
"knife", "spoon", "bowl", "banana", "apple", "sandwich",
"orange", "broccoli", "carrot", "hot dog", "pizza", "donut",
"cake", "chair", "sofa", "pottedplant", "bed", "diningtable",
"toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard",
"cell phone", "microwave", "oven", "toaster", "sink",
"refrigerator", "book", "clock", "vase", "scissors",
"teddy bear", "hair drier", "toothbrush"
};
cv::Mat image = bgr.clone();
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
cv::imshow("image", image);
if (is_streaming)
{
cv::waitKey(1);
}
else
{
cv::waitKey(0);
}
return 0;
}
int main(int argc, char** argv)
{
cv::Mat frame;
std::vector<Object> objects;
cv::VideoCapture cap;
ncnn::Net yolov4;
const char* devicepath;
int target_size = 0;
int is_streaming = 0;
if (argc < 2)
{
fprintf(stderr, "Usage: %s [v4l input device or image]\n", argv[0]);
return -1;
}
devicepath = argv[1];
#ifdef NCNN_PROFILING
double t_load_start = ncnn::get_current_time();
#endif
int ret = init_yolov4(&yolov4, &target_size); //We load model and param first!
if (ret != 0)
{
fprintf(stderr, "Failed to load model or param, error %d", ret);
return -1;
}
#ifdef NCNN_PROFILING
double t_load_end = ncnn::get_current_time();
fprintf(stdout, "NCNN Init time %.02lfms\n", t_load_end - t_load_start);
#endif
if (strstr(devicepath, "/dev/video") == NULL)
{
frame = cv::imread(argv[1], 1);
if (frame.empty())
{
fprintf(stderr, "Failed to read image %s.\n", argv[1]);
return -1;
}
}
else
{
cap.open(devicepath);
if (!cap.isOpened())
{
fprintf(stderr, "Failed to open %s", devicepath);
return -1;
}
cap >> frame;
if (frame.empty())
{
fprintf(stderr, "Failed to read from device %s.\n", devicepath);
return -1;
}
is_streaming = 1;
}
while (1)
{
if (is_streaming)
{
#ifdef NCNN_PROFILING
double t_capture_start = ncnn::get_current_time();
#endif
cap >> frame;
#ifdef NCNN_PROFILING
double t_capture_end = ncnn::get_current_time();
fprintf(stdout, "NCNN OpenCV capture time %.02lfms\n", t_capture_end - t_capture_start);
#endif
if (frame.empty())
{
fprintf(stderr, "OpenCV Failed to Capture from device %s\n", devicepath);
return -1;
}
}
#ifdef NCNN_PROFILING
double t_detect_start = ncnn::get_current_time();
#endif
detect_yolov4(frame, objects, target_size, &yolov4); //Create an extractor and run detection
#ifdef NCNN_PROFILING
double t_detect_end = ncnn::get_current_time();
fprintf(stdout, "NCNN detection time %.02lfms\n", t_detect_end - t_detect_start);
#endif
#ifdef NCNN_PROFILING
double t_draw_start = ncnn::get_current_time();
#endif
draw_objects(frame, objects, is_streaming); //Draw detection results on opencv image
#ifdef NCNN_PROFILING
double t_draw_end = ncnn::get_current_time();
fprintf(stdout, "NCNN OpenCV draw result time %.02lfms\n", t_draw_end - t_draw_start);
#endif
if (!is_streaming)
{ //If it is a still image, exit!
return 0;
}
}
return 0;
}

503
3rdparty/ncnn/examples/yolov5.cpp vendored Normal file
View File

@ -0,0 +1,503 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "layer.h"
#include "net.h"
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif
#include <float.h>
#include <stdio.h>
#include <vector>
#define YOLOV5_V60 1 //YOLOv5 v6.0
#if YOLOV5_V60
#define MAX_STRIDE 64
#else
#define MAX_STRIDE 32
class YoloV5Focus : public ncnn::Layer
{
public:
YoloV5Focus()
{
one_blob_only = true;
}
virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob, const ncnn::Option& opt) const
{
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
int outw = w / 2;
int outh = h / 2;
int outc = channels * 4;
top_blob.create(outw, outh, outc, 4u, 1, opt.blob_allocator);
if (top_blob.empty())
return -100;
#pragma omp parallel for num_threads(opt.num_threads)
for (int p = 0; p < outc; p++)
{
const float* ptr = bottom_blob.channel(p % channels).row((p / channels) % 2) + ((p / channels) / 2);
float* outptr = top_blob.channel(p);
for (int i = 0; i < outh; i++)
{
for (int j = 0; j < outw; j++)
{
*outptr = *ptr;
outptr += 1;
ptr += 2;
}
ptr += w;
}
}
return 0;
}
};
DEFINE_LAYER_CREATOR(YoloV5Focus)
#endif //YOLOV5_V60
struct Object
{
cv::Rect_<float> rect;
int label;
float prob;
};
static inline float intersection_area(const Object& a, const Object& b)
{
cv::Rect_<float> inter = a.rect & b.rect;
return inter.area();
}
static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
{
int i = left;
int j = right;
float p = faceobjects[(left + right) / 2].prob;
while (i <= j)
{
while (faceobjects[i].prob > p)
i++;
while (faceobjects[j].prob < p)
j--;
if (i <= j)
{
// swap
std::swap(faceobjects[i], faceobjects[j]);
i++;
j--;
}
}
#pragma omp parallel sections
{
#pragma omp section
{
if (left < j) qsort_descent_inplace(faceobjects, left, j);
}
#pragma omp section
{
if (i < right) qsort_descent_inplace(faceobjects, i, right);
}
}
}
static void qsort_descent_inplace(std::vector<Object>& faceobjects)
{
if (faceobjects.empty())
return;
qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
}
static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)
{
picked.clear();
const int n = faceobjects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
areas[i] = faceobjects[i].rect.area();
}
for (int i = 0; i < n; i++)
{
const Object& a = faceobjects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const Object& b = faceobjects[picked[j]];
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
}
if (keep)
picked.push_back(i);
}
}
static inline float sigmoid(float x)
{
return static_cast<float>(1.f / (1.f + exp(-x)));
}
static void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn::Mat& in_pad, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<Object>& objects)
{
const int num_grid = feat_blob.h;
int num_grid_x;
int num_grid_y;
if (in_pad.w > in_pad.h)
{
num_grid_x = in_pad.w / stride;
num_grid_y = num_grid / num_grid_x;
}
else
{
num_grid_y = in_pad.h / stride;
num_grid_x = num_grid / num_grid_y;
}
const int num_class = feat_blob.w - 5;
const int num_anchors = anchors.w / 2;
for (int q = 0; q < num_anchors; q++)
{
const float anchor_w = anchors[q * 2];
const float anchor_h = anchors[q * 2 + 1];
const ncnn::Mat feat = feat_blob.channel(q);
for (int i = 0; i < num_grid_y; i++)
{
for (int j = 0; j < num_grid_x; j++)
{
const float* featptr = feat.row(i * num_grid_x + j);
float box_confidence = sigmoid(featptr[4]);
if (box_confidence >= prob_threshold)
{
// find class index with max class score
int class_index = 0;
float class_score = -FLT_MAX;
for (int k = 0; k < num_class; k++)
{
float score = featptr[5 + k];
if (score > class_score)
{
class_index = k;
class_score = score;
}
}
float confidence = box_confidence * sigmoid(class_score);
if (confidence >= prob_threshold)
{
// yolov5/models/yolo.py Detect forward
// y = x[i].sigmoid()
// y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy
// y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
float dx = sigmoid(featptr[0]);
float dy = sigmoid(featptr[1]);
float dw = sigmoid(featptr[2]);
float dh = sigmoid(featptr[3]);
float pb_cx = (dx * 2.f - 0.5f + j) * stride;
float pb_cy = (dy * 2.f - 0.5f + i) * stride;
float pb_w = pow(dw * 2.f, 2) * anchor_w;
float pb_h = pow(dh * 2.f, 2) * anchor_h;
float x0 = pb_cx - pb_w * 0.5f;
float y0 = pb_cy - pb_h * 0.5f;
float x1 = pb_cx + pb_w * 0.5f;
float y1 = pb_cy + pb_h * 0.5f;
Object obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = x1 - x0;
obj.rect.height = y1 - y0;
obj.label = class_index;
obj.prob = confidence;
objects.push_back(obj);
}
}
}
}
}
}
static int detect_yolov5(const cv::Mat& bgr, std::vector<Object>& objects)
{
ncnn::Net yolov5;
yolov5.opt.use_vulkan_compute = true;
// yolov5.opt.use_bf16_storage = true;
// original pretrained model from https://github.com/ultralytics/yolov5
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
#if YOLOV5_V60
yolov5.load_param("yolov5s_6.0.param");
yolov5.load_model("yolov5s_6.0.bin");
#else
yolov5.register_custom_layer("YoloV5Focus", YoloV5Focus_layer_creator);
yolov5.load_param("yolov5s.param");
yolov5.load_model("yolov5s.bin");
#endif
const int target_size = 640;
const float prob_threshold = 0.25f;
const float nms_threshold = 0.45f;
int img_w = bgr.cols;
int img_h = bgr.rows;
// letterbox pad to multiple of MAX_STRIDE
int w = img_w;
int h = img_h;
float scale = 1.f;
if (w > h)
{
scale = (float)target_size / w;
w = target_size;
h = h * scale;
}
else
{
scale = (float)target_size / h;
h = target_size;
w = w * scale;
}
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, w, h);
// pad to target_size rectangle
// yolov5/utils/datasets.py letterbox
int wpad = (w + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - w;
int hpad = (h + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - h;
ncnn::Mat in_pad;
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
in_pad.substract_mean_normalize(0, norm_vals);
ncnn::Extractor ex = yolov5.create_extractor();
ex.input("images", in_pad);
std::vector<Object> proposals;
// anchor setting from yolov5/models/yolov5s.yaml
// stride 8
{
ncnn::Mat out;
ex.extract("output", out);
ncnn::Mat anchors(6);
anchors[0] = 10.f;
anchors[1] = 13.f;
anchors[2] = 16.f;
anchors[3] = 30.f;
anchors[4] = 33.f;
anchors[5] = 23.f;
std::vector<Object> objects8;
generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects8);
proposals.insert(proposals.end(), objects8.begin(), objects8.end());
}
// stride 16
{
ncnn::Mat out;
#if YOLOV5_V60
ex.extract("376", out);
#else
ex.extract("781", out);
#endif
ncnn::Mat anchors(6);
anchors[0] = 30.f;
anchors[1] = 61.f;
anchors[2] = 62.f;
anchors[3] = 45.f;
anchors[4] = 59.f;
anchors[5] = 119.f;
std::vector<Object> objects16;
generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects16);
proposals.insert(proposals.end(), objects16.begin(), objects16.end());
}
// stride 32
{
ncnn::Mat out;
#if YOLOV5_V60
ex.extract("401", out);
#else
ex.extract("801", out);
#endif
ncnn::Mat anchors(6);
anchors[0] = 116.f;
anchors[1] = 90.f;
anchors[2] = 156.f;
anchors[3] = 198.f;
anchors[4] = 373.f;
anchors[5] = 326.f;
std::vector<Object> objects32;
generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects32);
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
}
// sort all proposals by score from highest to lowest
qsort_descent_inplace(proposals);
// apply nms with nms_threshold
std::vector<int> picked;
nms_sorted_bboxes(proposals, picked, nms_threshold);
int count = picked.size();
objects.resize(count);
for (int i = 0; i < count; i++)
{
objects[i] = proposals[picked[i]];
// adjust offset to original unpadded
float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
// clip
x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
objects[i].rect.x = x0;
objects[i].rect.y = y0;
objects[i].rect.width = x1 - x0;
objects[i].rect.height = y1 - y0;
}
return 0;
}
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
{
static const char* class_names[] = {
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"
};
cv::Mat image = bgr.clone();
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
cv::imshow("image", image);
cv::waitKey(0);
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<Object> objects;
detect_yolov5(m, objects);
draw_objects(m, objects);
return 0;
}

422
3rdparty/ncnn/examples/yolov5_pnnx.cpp vendored Normal file
View File

@ -0,0 +1,422 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "layer.h"
#include "net.h"
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif
#include <float.h>
#include <stdio.h>
#include <vector>
struct Object
{
cv::Rect_<float> rect;
int label;
float prob;
};
static inline float intersection_area(const Object& a, const Object& b)
{
cv::Rect_<float> inter = a.rect & b.rect;
return inter.area();
}
static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
{
int i = left;
int j = right;
float p = faceobjects[(left + right) / 2].prob;
while (i <= j)
{
while (faceobjects[i].prob > p)
i++;
while (faceobjects[j].prob < p)
j--;
if (i <= j)
{
// swap
std::swap(faceobjects[i], faceobjects[j]);
i++;
j--;
}
}
#pragma omp parallel sections
{
#pragma omp section
{
if (left < j) qsort_descent_inplace(faceobjects, left, j);
}
#pragma omp section
{
if (i < right) qsort_descent_inplace(faceobjects, i, right);
}
}
}
static void qsort_descent_inplace(std::vector<Object>& faceobjects)
{
if (faceobjects.empty())
return;
qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
}
static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)
{
picked.clear();
const int n = faceobjects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
areas[i] = faceobjects[i].rect.area();
}
for (int i = 0; i < n; i++)
{
const Object& a = faceobjects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const Object& b = faceobjects[picked[j]];
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
}
if (keep)
picked.push_back(i);
}
}
static inline float sigmoid(float x)
{
return static_cast<float>(1.f / (1.f + exp(-x)));
}
static void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn::Mat& in_pad, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<Object>& objects)
{
const int num_grid_x = feat_blob.w;
const int num_grid_y = feat_blob.h;
const int num_anchors = anchors.w / 2;
const int num_class = 80;
for (int q = 0; q < num_anchors; q++)
{
const float anchor_w = anchors[q * 2];
const float anchor_h = anchors[q * 2 + 1];
for (int i = 0; i < num_grid_y; i++)
{
for (int j = 0; j < num_grid_x; j++)
{
// find class index with max class score
int class_index = 0;
float class_score = -FLT_MAX;
for (int k = 0; k < num_class; k++)
{
float score = feat_blob.channel(q * 85 + 5 + k).row(i)[j];
if (score > class_score)
{
class_index = k;
class_score = score;
}
}
float box_score = feat_blob.channel(q * 85 + 4).row(i)[j];
float confidence = sigmoid(box_score) * sigmoid(class_score);
if (confidence >= prob_threshold)
{
// yolov5/models/yolo.py Detect forward
// y = x[i].sigmoid()
// y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy
// y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
float dx = sigmoid(feat_blob.channel(q * 85 + 0).row(i)[j]);
float dy = sigmoid(feat_blob.channel(q * 85 + 1).row(i)[j]);
float dw = sigmoid(feat_blob.channel(q * 85 + 2).row(i)[j]);
float dh = sigmoid(feat_blob.channel(q * 85 + 3).row(i)[j]);
float pb_cx = (dx * 2.f - 0.5f + j) * stride;
float pb_cy = (dy * 2.f - 0.5f + i) * stride;
float pb_w = pow(dw * 2.f, 2) * anchor_w;
float pb_h = pow(dh * 2.f, 2) * anchor_h;
float x0 = pb_cx - pb_w * 0.5f;
float y0 = pb_cy - pb_h * 0.5f;
float x1 = pb_cx + pb_w * 0.5f;
float y1 = pb_cy + pb_h * 0.5f;
Object obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = x1 - x0;
obj.rect.height = y1 - y0;
obj.label = class_index;
obj.prob = confidence;
objects.push_back(obj);
}
}
}
}
}
static int detect_yolov5(const cv::Mat& bgr, std::vector<Object>& objects)
{
ncnn::Net yolov5;
yolov5.opt.use_vulkan_compute = true;
// yolov5.opt.use_bf16_storage = true;
// original pretrained model from https://github.com/ultralytics/yolov5
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
yolov5.load_param("yolov5s.ncnn.param");
yolov5.load_model("yolov5s.ncnn.bin");
const int target_size = 640;
const float prob_threshold = 0.25f;
const float nms_threshold = 0.45f;
int img_w = bgr.cols;
int img_h = bgr.rows;
// yolov5/models/common.py DetectMultiBackend
const int max_stride = 64;
// letterbox pad to multiple of max_stride
int w = img_w;
int h = img_h;
float scale = 1.f;
if (w > h)
{
scale = (float)target_size / w;
w = target_size;
h = h * scale;
}
else
{
scale = (float)target_size / h;
h = target_size;
w = w * scale;
}
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, w, h);
// pad to target_size rectangle
// yolov5/utils/datasets.py letterbox
int wpad = (w + max_stride - 1) / max_stride * max_stride - w;
int hpad = (h + max_stride - 1) / max_stride * max_stride - h;
ncnn::Mat in_pad;
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
in_pad.substract_mean_normalize(0, norm_vals);
ncnn::Extractor ex = yolov5.create_extractor();
ex.input("in0", in_pad);
std::vector<Object> proposals;
// anchor setting from yolov5/models/yolov5s.yaml
// stride 8
{
ncnn::Mat out;
ex.extract("out0", out);
ncnn::Mat anchors(6);
anchors[0] = 10.f;
anchors[1] = 13.f;
anchors[2] = 16.f;
anchors[3] = 30.f;
anchors[4] = 33.f;
anchors[5] = 23.f;
std::vector<Object> objects8;
generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects8);
proposals.insert(proposals.end(), objects8.begin(), objects8.end());
}
// stride 16
{
ncnn::Mat out;
ex.extract("out1", out);
ncnn::Mat anchors(6);
anchors[0] = 30.f;
anchors[1] = 61.f;
anchors[2] = 62.f;
anchors[3] = 45.f;
anchors[4] = 59.f;
anchors[5] = 119.f;
std::vector<Object> objects16;
generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects16);
proposals.insert(proposals.end(), objects16.begin(), objects16.end());
}
// stride 32
{
ncnn::Mat out;
ex.extract("out2", out);
ncnn::Mat anchors(6);
anchors[0] = 116.f;
anchors[1] = 90.f;
anchors[2] = 156.f;
anchors[3] = 198.f;
anchors[4] = 373.f;
anchors[5] = 326.f;
std::vector<Object> objects32;
generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects32);
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
}
// sort all proposals by score from highest to lowest
qsort_descent_inplace(proposals);
// apply nms with nms_threshold
std::vector<int> picked;
nms_sorted_bboxes(proposals, picked, nms_threshold);
int count = picked.size();
objects.resize(count);
for (int i = 0; i < count; i++)
{
objects[i] = proposals[picked[i]];
// adjust offset to original unpadded
float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
// clip
x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
objects[i].rect.x = x0;
objects[i].rect.y = y0;
objects[i].rect.width = x1 - x0;
objects[i].rect.height = y1 - y0;
}
return 0;
}
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
{
static const char* class_names[] = {
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"
};
cv::Mat image = bgr.clone();
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
cv::imshow("image", image);
cv::waitKey(0);
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<Object> objects;
detect_yolov5(m, objects);
draw_objects(m, objects);
return 0;
}

418
3rdparty/ncnn/examples/yolox.cpp vendored Normal file
View File

@ -0,0 +1,418 @@
// This file is wirtten base on the following file:
// https://github.com/Tencent/ncnn/blob/master/examples/yolov5.cpp
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
// ------------------------------------------------------------------------------
// Copyright (C) 2020-2021, Megvii Inc. All rights reserved.
#include "layer.h"
#include "net.h"
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif
#include <float.h>
#include <stdio.h>
#include <vector>
#define YOLOX_NMS_THRESH 0.45 // nms threshold
#define YOLOX_CONF_THRESH 0.25 // threshold of bounding box prob
#define YOLOX_TARGET_SIZE 640 // target image size after resize, might use 416 for small model
// YOLOX use the same focus in yolov5
class YoloV5Focus : public ncnn::Layer
{
public:
YoloV5Focus()
{
one_blob_only = true;
}
virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob, const ncnn::Option& opt) const
{
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
int outw = w / 2;
int outh = h / 2;
int outc = channels * 4;
top_blob.create(outw, outh, outc, 4u, 1, opt.blob_allocator);
if (top_blob.empty())
return -100;
#pragma omp parallel for num_threads(opt.num_threads)
for (int p = 0; p < outc; p++)
{
const float* ptr = bottom_blob.channel(p % channels).row((p / channels) % 2) + ((p / channels) / 2);
float* outptr = top_blob.channel(p);
for (int i = 0; i < outh; i++)
{
for (int j = 0; j < outw; j++)
{
*outptr = *ptr;
outptr += 1;
ptr += 2;
}
ptr += w;
}
}
return 0;
}
};
DEFINE_LAYER_CREATOR(YoloV5Focus)
struct Object
{
cv::Rect_<float> rect;
int label;
float prob;
};
struct GridAndStride
{
int grid0;
int grid1;
int stride;
};
static inline float intersection_area(const Object& a, const Object& b)
{
cv::Rect_<float> inter = a.rect & b.rect;
return inter.area();
}
static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
{
int i = left;
int j = right;
float p = faceobjects[(left + right) / 2].prob;
while (i <= j)
{
while (faceobjects[i].prob > p)
i++;
while (faceobjects[j].prob < p)
j--;
if (i <= j)
{
// swap
std::swap(faceobjects[i], faceobjects[j]);
i++;
j--;
}
}
#pragma omp parallel sections
{
#pragma omp section
{
if (left < j) qsort_descent_inplace(faceobjects, left, j);
}
#pragma omp section
{
if (i < right) qsort_descent_inplace(faceobjects, i, right);
}
}
}
static void qsort_descent_inplace(std::vector<Object>& objects)
{
if (objects.empty())
return;
qsort_descent_inplace(objects, 0, objects.size() - 1);
}
static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)
{
picked.clear();
const int n = faceobjects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
areas[i] = faceobjects[i].rect.area();
}
for (int i = 0; i < n; i++)
{
const Object& a = faceobjects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const Object& b = faceobjects[picked[j]];
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
}
if (keep)
picked.push_back(i);
}
}
static void generate_grids_and_stride(const int target_size, std::vector<int>& strides, std::vector<GridAndStride>& grid_strides)
{
for (int i = 0; i < (int)strides.size(); i++)
{
int stride = strides[i];
int num_grid = target_size / stride;
for (int g1 = 0; g1 < num_grid; g1++)
{
for (int g0 = 0; g0 < num_grid; g0++)
{
GridAndStride gs;
gs.grid0 = g0;
gs.grid1 = g1;
gs.stride = stride;
grid_strides.push_back(gs);
}
}
}
}
static void generate_yolox_proposals(std::vector<GridAndStride> grid_strides, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<Object>& objects)
{
const int num_grid = feat_blob.h;
const int num_class = feat_blob.w - 5;
const int num_anchors = grid_strides.size();
const float* feat_ptr = feat_blob.channel(0);
for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++)
{
const int grid0 = grid_strides[anchor_idx].grid0;
const int grid1 = grid_strides[anchor_idx].grid1;
const int stride = grid_strides[anchor_idx].stride;
// yolox/models/yolo_head.py decode logic
// outputs[..., :2] = (outputs[..., :2] + grids) * strides
// outputs[..., 2:4] = torch.exp(outputs[..., 2:4]) * strides
float x_center = (feat_ptr[0] + grid0) * stride;
float y_center = (feat_ptr[1] + grid1) * stride;
float w = exp(feat_ptr[2]) * stride;
float h = exp(feat_ptr[3]) * stride;
float x0 = x_center - w * 0.5f;
float y0 = y_center - h * 0.5f;
float box_objectness = feat_ptr[4];
for (int class_idx = 0; class_idx < num_class; class_idx++)
{
float box_cls_score = feat_ptr[5 + class_idx];
float box_prob = box_objectness * box_cls_score;
if (box_prob > prob_threshold)
{
Object obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = w;
obj.rect.height = h;
obj.label = class_idx;
obj.prob = box_prob;
objects.push_back(obj);
}
} // class loop
feat_ptr += feat_blob.w;
} // point anchor loop
}
static int detect_yolox(const cv::Mat& bgr, std::vector<Object>& objects)
{
ncnn::Net yolox;
yolox.opt.use_vulkan_compute = true;
// yolox.opt.use_bf16_storage = true;
// Focus in yolov5
yolox.register_custom_layer("YoloV5Focus", YoloV5Focus_layer_creator);
// original pretrained model from https://github.com/Megvii-BaseDetection/YOLOX
// ncnn model param: https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s_ncnn.tar.gz
// NOTE that newest version YOLOX remove normalization of model (minus mean and then div by std),
// which might cause your model outputs becoming a total mess, plz check carefully.
yolox.load_param("yolox.param");
yolox.load_model("yolox.bin");
int img_w = bgr.cols;
int img_h = bgr.rows;
int w = img_w;
int h = img_h;
float scale = 1.f;
if (w > h)
{
scale = (float)YOLOX_TARGET_SIZE / w;
w = YOLOX_TARGET_SIZE;
h = h * scale;
}
else
{
scale = (float)YOLOX_TARGET_SIZE / h;
h = YOLOX_TARGET_SIZE;
w = w * scale;
}
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, img_w, img_h, w, h);
// pad to YOLOX_TARGET_SIZE rectangle
int wpad = YOLOX_TARGET_SIZE - w;
int hpad = YOLOX_TARGET_SIZE - h;
ncnn::Mat in_pad;
// different from yolov5, yolox only pad on bottom and right side,
// which means users don't need to extra padding info to decode boxes coordinate.
ncnn::copy_make_border(in, in_pad, 0, hpad, 0, wpad, ncnn::BORDER_CONSTANT, 114.f);
ncnn::Extractor ex = yolox.create_extractor();
ex.input("images", in_pad);
std::vector<Object> proposals;
{
ncnn::Mat out;
ex.extract("output", out);
static const int stride_arr[] = {8, 16, 32}; // might have stride=64 in YOLOX
std::vector<int> strides(stride_arr, stride_arr + sizeof(stride_arr) / sizeof(stride_arr[0]));
std::vector<GridAndStride> grid_strides;
generate_grids_and_stride(YOLOX_TARGET_SIZE, strides, grid_strides);
generate_yolox_proposals(grid_strides, out, YOLOX_CONF_THRESH, proposals);
}
// sort all proposals by score from highest to lowest
qsort_descent_inplace(proposals);
// apply nms with nms_threshold
std::vector<int> picked;
nms_sorted_bboxes(proposals, picked, YOLOX_NMS_THRESH);
int count = picked.size();
objects.resize(count);
for (int i = 0; i < count; i++)
{
objects[i] = proposals[picked[i]];
// adjust offset to original unpadded
float x0 = (objects[i].rect.x) / scale;
float y0 = (objects[i].rect.y) / scale;
float x1 = (objects[i].rect.x + objects[i].rect.width) / scale;
float y1 = (objects[i].rect.y + objects[i].rect.height) / scale;
// clip
x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
objects[i].rect.x = x0;
objects[i].rect.y = y0;
objects[i].rect.width = x1 - x0;
objects[i].rect.height = y1 - y0;
}
return 0;
}
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
{
static const char* class_names[] = {
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"
};
cv::Mat image = bgr.clone();
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
cv::imshow("image", image);
cv::waitKey(0);
}
int main(int argc, char** argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
return -1;
}
const char* imagepath = argv[1];
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
std::vector<Object> objects;
detect_yolox(m, objects);
draw_objects(m, objects);
return 0;
}