feat: 切换后端至PaddleOCR-NCNN,切换工程为CMake
1.项目后端整体迁移至PaddleOCR-NCNN算法,已通过基本的兼容性测试 2.工程改为使用CMake组织,后续为了更好地兼容第三方库,不再提供QMake工程 3.重整权利声明文件,重整代码工程,确保最小化侵权风险 Log: 切换后端至PaddleOCR-NCNN,切换工程为CMake Change-Id: I4d5d2c5d37505a4a24b389b1a4c5d12f17bfa38c
This commit is contained in:
76
3rdparty/ncnn/examples/CMakeLists.txt
vendored
Normal file
76
3rdparty/ncnn/examples/CMakeLists.txt
vendored
Normal file
@ -0,0 +1,76 @@
|
||||
macro(ncnn_add_example name)
|
||||
add_executable(${name} ${name}.cpp)
|
||||
if(OpenCV_FOUND)
|
||||
target_include_directories(${name} PRIVATE ${OpenCV_INCLUDE_DIRS})
|
||||
target_link_libraries(${name} PRIVATE ncnn ${OpenCV_LIBS})
|
||||
elseif(NCNN_SIMPLEOCV)
|
||||
target_compile_definitions(${name} PUBLIC USE_NCNN_SIMPLEOCV)
|
||||
target_link_libraries(${name} PRIVATE ncnn)
|
||||
endif()
|
||||
|
||||
# add test to a virtual project group
|
||||
set_property(TARGET ${name} PROPERTY FOLDER "examples")
|
||||
endmacro()
|
||||
|
||||
if(NCNN_PIXEL)
|
||||
if(NOT NCNN_SIMPLEOCV)
|
||||
find_package(OpenCV QUIET COMPONENTS opencv_world)
|
||||
# for opencv 2.4 on ubuntu 16.04, there is no opencv_world but OpenCV_FOUND will be TRUE
|
||||
if("${OpenCV_LIBS}" STREQUAL "")
|
||||
set(OpenCV_FOUND FALSE)
|
||||
endif()
|
||||
if(NOT OpenCV_FOUND)
|
||||
find_package(OpenCV QUIET COMPONENTS core highgui imgproc imgcodecs videoio)
|
||||
endif()
|
||||
if(NOT OpenCV_FOUND)
|
||||
find_package(OpenCV QUIET COMPONENTS core highgui imgproc)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(OpenCV_FOUND OR NCNN_SIMPLEOCV)
|
||||
if(OpenCV_FOUND)
|
||||
message(STATUS "OpenCV library: ${OpenCV_INSTALL_PATH}")
|
||||
message(STATUS " version: ${OpenCV_VERSION}")
|
||||
message(STATUS " libraries: ${OpenCV_LIBS}")
|
||||
message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}")
|
||||
|
||||
if(${OpenCV_VERSION_MAJOR} GREATER 3)
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../src)
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR}/../src)
|
||||
|
||||
ncnn_add_example(squeezenet)
|
||||
ncnn_add_example(squeezenet_c_api)
|
||||
ncnn_add_example(fasterrcnn)
|
||||
ncnn_add_example(rfcn)
|
||||
ncnn_add_example(yolov2)
|
||||
ncnn_add_example(yolov3)
|
||||
ncnn_add_example(yolov5)
|
||||
ncnn_add_example(yolov5_pnnx)
|
||||
ncnn_add_example(yolox)
|
||||
ncnn_add_example(mobilenetv2ssdlite)
|
||||
ncnn_add_example(mobilenetssd)
|
||||
ncnn_add_example(squeezenetssd)
|
||||
ncnn_add_example(shufflenetv2)
|
||||
ncnn_add_example(peleenetssd_seg)
|
||||
ncnn_add_example(simplepose)
|
||||
ncnn_add_example(retinaface)
|
||||
ncnn_add_example(yolact)
|
||||
ncnn_add_example(nanodet)
|
||||
ncnn_add_example(nanodetplus_pnnx)
|
||||
ncnn_add_example(scrfd)
|
||||
ncnn_add_example(scrfd_crowdhuman)
|
||||
if(OpenCV_FOUND)
|
||||
ncnn_add_example(yolov4)
|
||||
ncnn_add_example(rvm)
|
||||
ncnn_add_example(p2pnet)
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "OpenCV not found and NCNN_SIMPLEOCV disabled, examples won't be built")
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "NCNN_PIXEL not enabled, examples won't be built")
|
||||
endif()
|
358
3rdparty/ncnn/examples/fasterrcnn.cpp
vendored
Normal file
358
3rdparty/ncnn/examples/fasterrcnn.cpp
vendored
Normal file
@ -0,0 +1,358 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "net.h"
|
||||
|
||||
#include <math.h>
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
|
||||
struct Object
|
||||
{
|
||||
cv::Rect_<float> rect;
|
||||
int label;
|
||||
float prob;
|
||||
};
|
||||
|
||||
static inline float intersection_area(const Object& a, const Object& b)
|
||||
{
|
||||
cv::Rect_<float> inter = a.rect & b.rect;
|
||||
return inter.area();
|
||||
}
|
||||
|
||||
static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right)
|
||||
{
|
||||
int i = left;
|
||||
int j = right;
|
||||
float p = objects[(left + right) / 2].prob;
|
||||
|
||||
while (i <= j)
|
||||
{
|
||||
while (objects[i].prob > p)
|
||||
i++;
|
||||
|
||||
while (objects[j].prob < p)
|
||||
j--;
|
||||
|
||||
if (i <= j)
|
||||
{
|
||||
// swap
|
||||
std::swap(objects[i], objects[j]);
|
||||
|
||||
i++;
|
||||
j--;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp parallel sections
|
||||
{
|
||||
#pragma omp section
|
||||
{
|
||||
if (left < j) qsort_descent_inplace(objects, left, j);
|
||||
}
|
||||
#pragma omp section
|
||||
{
|
||||
if (i < right) qsort_descent_inplace(objects, i, right);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void qsort_descent_inplace(std::vector<Object>& objects)
|
||||
{
|
||||
if (objects.empty())
|
||||
return;
|
||||
|
||||
qsort_descent_inplace(objects, 0, objects.size() - 1);
|
||||
}
|
||||
|
||||
static void nms_sorted_bboxes(const std::vector<Object>& objects, std::vector<int>& picked, float nms_threshold)
|
||||
{
|
||||
picked.clear();
|
||||
|
||||
const int n = objects.size();
|
||||
|
||||
std::vector<float> areas(n);
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
areas[i] = objects[i].rect.area();
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
const Object& a = objects[i];
|
||||
|
||||
int keep = 1;
|
||||
for (int j = 0; j < (int)picked.size(); j++)
|
||||
{
|
||||
const Object& b = objects[picked[j]];
|
||||
|
||||
// intersection over union
|
||||
float inter_area = intersection_area(a, b);
|
||||
float union_area = areas[i] + areas[picked[j]] - inter_area;
|
||||
// float IoU = inter_area / union_area
|
||||
if (inter_area / union_area > nms_threshold)
|
||||
keep = 0;
|
||||
}
|
||||
|
||||
if (keep)
|
||||
picked.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
static int detect_fasterrcnn(const cv::Mat& bgr, std::vector<Object>& objects)
|
||||
{
|
||||
ncnn::Net fasterrcnn;
|
||||
|
||||
fasterrcnn.opt.use_vulkan_compute = true;
|
||||
|
||||
// original pretrained model from https://github.com/rbgirshick/py-faster-rcnn
|
||||
// py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_alt_opt/faster_rcnn_test.pt
|
||||
// https://dl.dropboxusercontent.com/s/o6ii098bu51d139/faster_rcnn_models.tgz?dl=0
|
||||
// ZF_faster_rcnn_final.caffemodel
|
||||
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
|
||||
fasterrcnn.load_param("ZF_faster_rcnn_final.param");
|
||||
fasterrcnn.load_model("ZF_faster_rcnn_final.bin");
|
||||
|
||||
// hyper parameters taken from
|
||||
// py-faster-rcnn/lib/fast_rcnn/config.py
|
||||
// py-faster-rcnn/lib/fast_rcnn/test.py
|
||||
const int target_size = 600; // __C.TEST.SCALES
|
||||
|
||||
const int max_per_image = 100;
|
||||
const float confidence_thresh = 0.05f;
|
||||
|
||||
const float nms_threshold = 0.3f; // __C.TEST.NMS
|
||||
|
||||
// scale to target detect size
|
||||
int w = bgr.cols;
|
||||
int h = bgr.rows;
|
||||
float scale = 1.f;
|
||||
if (w < h)
|
||||
{
|
||||
scale = (float)target_size / w;
|
||||
w = target_size;
|
||||
h = h * scale;
|
||||
}
|
||||
else
|
||||
{
|
||||
scale = (float)target_size / h;
|
||||
h = target_size;
|
||||
w = w * scale;
|
||||
}
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, w, h);
|
||||
|
||||
const float mean_vals[3] = {102.9801f, 115.9465f, 122.7717f};
|
||||
in.substract_mean_normalize(mean_vals, 0);
|
||||
|
||||
ncnn::Mat im_info(3);
|
||||
im_info[0] = h;
|
||||
im_info[1] = w;
|
||||
im_info[2] = scale;
|
||||
|
||||
// step1, extract feature and all rois
|
||||
ncnn::Extractor ex1 = fasterrcnn.create_extractor();
|
||||
|
||||
ex1.input("data", in);
|
||||
ex1.input("im_info", im_info);
|
||||
|
||||
ncnn::Mat conv5_relu5; // feature
|
||||
ncnn::Mat rois; // all rois
|
||||
ex1.extract("conv5_relu5", conv5_relu5);
|
||||
ex1.extract("rois", rois);
|
||||
|
||||
// step2, extract bbox and score for each roi
|
||||
std::vector<std::vector<Object> > class_candidates;
|
||||
for (int i = 0; i < rois.c; i++)
|
||||
{
|
||||
ncnn::Extractor ex2 = fasterrcnn.create_extractor();
|
||||
|
||||
ncnn::Mat roi = rois.channel(i); // get single roi
|
||||
ex2.input("conv5_relu5", conv5_relu5);
|
||||
ex2.input("rois", roi);
|
||||
|
||||
ncnn::Mat bbox_pred;
|
||||
ncnn::Mat cls_prob;
|
||||
ex2.extract("bbox_pred", bbox_pred);
|
||||
ex2.extract("cls_prob", cls_prob);
|
||||
|
||||
int num_class = cls_prob.w;
|
||||
class_candidates.resize(num_class);
|
||||
|
||||
// find class id with highest score
|
||||
int label = 0;
|
||||
float score = 0.f;
|
||||
for (int i = 0; i < num_class; i++)
|
||||
{
|
||||
float class_score = cls_prob[i];
|
||||
if (class_score > score)
|
||||
{
|
||||
label = i;
|
||||
score = class_score;
|
||||
}
|
||||
}
|
||||
|
||||
// ignore background or low score
|
||||
if (label == 0 || score <= confidence_thresh)
|
||||
continue;
|
||||
|
||||
// fprintf(stderr, "%d = %f\n", label, score);
|
||||
|
||||
// unscale to image size
|
||||
float x1 = roi[0] / scale;
|
||||
float y1 = roi[1] / scale;
|
||||
float x2 = roi[2] / scale;
|
||||
float y2 = roi[3] / scale;
|
||||
|
||||
float pb_w = x2 - x1 + 1;
|
||||
float pb_h = y2 - y1 + 1;
|
||||
|
||||
// apply bbox regression
|
||||
float dx = bbox_pred[label * 4];
|
||||
float dy = bbox_pred[label * 4 + 1];
|
||||
float dw = bbox_pred[label * 4 + 2];
|
||||
float dh = bbox_pred[label * 4 + 3];
|
||||
|
||||
float cx = x1 + pb_w * 0.5f;
|
||||
float cy = y1 + pb_h * 0.5f;
|
||||
|
||||
float obj_cx = cx + pb_w * dx;
|
||||
float obj_cy = cy + pb_h * dy;
|
||||
|
||||
float obj_w = pb_w * exp(dw);
|
||||
float obj_h = pb_h * exp(dh);
|
||||
|
||||
float obj_x1 = obj_cx - obj_w * 0.5f;
|
||||
float obj_y1 = obj_cy - obj_h * 0.5f;
|
||||
float obj_x2 = obj_cx + obj_w * 0.5f;
|
||||
float obj_y2 = obj_cy + obj_h * 0.5f;
|
||||
|
||||
// clip
|
||||
obj_x1 = std::max(std::min(obj_x1, (float)(bgr.cols - 1)), 0.f);
|
||||
obj_y1 = std::max(std::min(obj_y1, (float)(bgr.rows - 1)), 0.f);
|
||||
obj_x2 = std::max(std::min(obj_x2, (float)(bgr.cols - 1)), 0.f);
|
||||
obj_y2 = std::max(std::min(obj_y2, (float)(bgr.rows - 1)), 0.f);
|
||||
|
||||
// append object
|
||||
Object obj;
|
||||
obj.rect = cv::Rect_<float>(obj_x1, obj_y1, obj_x2 - obj_x1 + 1, obj_y2 - obj_y1 + 1);
|
||||
obj.label = label;
|
||||
obj.prob = score;
|
||||
|
||||
class_candidates[label].push_back(obj);
|
||||
}
|
||||
|
||||
// post process
|
||||
objects.clear();
|
||||
for (int i = 0; i < (int)class_candidates.size(); i++)
|
||||
{
|
||||
std::vector<Object>& candidates = class_candidates[i];
|
||||
|
||||
qsort_descent_inplace(candidates);
|
||||
|
||||
std::vector<int> picked;
|
||||
nms_sorted_bboxes(candidates, picked, nms_threshold);
|
||||
|
||||
for (int j = 0; j < (int)picked.size(); j++)
|
||||
{
|
||||
int z = picked[j];
|
||||
objects.push_back(candidates[z]);
|
||||
}
|
||||
}
|
||||
|
||||
qsort_descent_inplace(objects);
|
||||
|
||||
if (max_per_image > 0 && max_per_image < objects.size())
|
||||
{
|
||||
objects.resize(max_per_image);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
|
||||
{
|
||||
static const char* class_names[] = {"background",
|
||||
"aeroplane", "bicycle", "bird", "boat",
|
||||
"bottle", "bus", "car", "cat", "chair",
|
||||
"cow", "diningtable", "dog", "horse",
|
||||
"motorbike", "person", "pottedplant",
|
||||
"sheep", "sofa", "train", "tvmonitor"
|
||||
};
|
||||
|
||||
cv::Mat image = bgr.clone();
|
||||
|
||||
for (size_t i = 0; i < objects.size(); i++)
|
||||
{
|
||||
const Object& obj = objects[i];
|
||||
|
||||
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
|
||||
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
|
||||
|
||||
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
|
||||
|
||||
char text[256];
|
||||
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
|
||||
|
||||
int baseLine = 0;
|
||||
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||
|
||||
int x = obj.rect.x;
|
||||
int y = obj.rect.y - label_size.height - baseLine;
|
||||
if (y < 0)
|
||||
y = 0;
|
||||
if (x + label_size.width > image.cols)
|
||||
x = image.cols - label_size.width;
|
||||
|
||||
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
|
||||
cv::Scalar(255, 255, 255), -1);
|
||||
|
||||
cv::putText(image, text, cv::Point(x, y + label_size.height),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
|
||||
cv::imshow("image", image);
|
||||
cv::waitKey(0);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<Object> objects;
|
||||
detect_fasterrcnn(m, objects);
|
||||
|
||||
draw_objects(m, objects);
|
||||
|
||||
return 0;
|
||||
}
|
152
3rdparty/ncnn/examples/mobilenetssd.cpp
vendored
Normal file
152
3rdparty/ncnn/examples/mobilenetssd.cpp
vendored
Normal file
@ -0,0 +1,152 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "net.h"
|
||||
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
struct Object
|
||||
{
|
||||
cv::Rect_<float> rect;
|
||||
int label;
|
||||
float prob;
|
||||
};
|
||||
|
||||
static int detect_mobilenet(const cv::Mat& bgr, std::vector<Object>& objects)
|
||||
{
|
||||
ncnn::Net mobilenet;
|
||||
|
||||
mobilenet.opt.use_vulkan_compute = true;
|
||||
|
||||
// model is converted from https://github.com/chuanqi305/MobileNet-SSD
|
||||
// and can be downloaded from https://drive.google.com/open?id=0ByaKLD9QaPtucWk0Y0dha1VVY0U
|
||||
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
|
||||
mobilenet.load_param("mobilenet_ssd_voc_ncnn.param");
|
||||
mobilenet.load_model("mobilenet_ssd_voc_ncnn.bin");
|
||||
|
||||
const int target_size = 300;
|
||||
|
||||
int img_w = bgr.cols;
|
||||
int img_h = bgr.rows;
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, target_size, target_size);
|
||||
|
||||
const float mean_vals[3] = {127.5f, 127.5f, 127.5f};
|
||||
const float norm_vals[3] = {1.0 / 127.5, 1.0 / 127.5, 1.0 / 127.5};
|
||||
in.substract_mean_normalize(mean_vals, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = mobilenet.create_extractor();
|
||||
|
||||
ex.input("data", in);
|
||||
|
||||
ncnn::Mat out;
|
||||
ex.extract("detection_out", out);
|
||||
|
||||
// printf("%d %d %d\n", out.w, out.h, out.c);
|
||||
objects.clear();
|
||||
for (int i = 0; i < out.h; i++)
|
||||
{
|
||||
const float* values = out.row(i);
|
||||
|
||||
Object object;
|
||||
object.label = values[0];
|
||||
object.prob = values[1];
|
||||
object.rect.x = values[2] * img_w;
|
||||
object.rect.y = values[3] * img_h;
|
||||
object.rect.width = values[4] * img_w - object.rect.x;
|
||||
object.rect.height = values[5] * img_h - object.rect.y;
|
||||
|
||||
objects.push_back(object);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
|
||||
{
|
||||
static const char* class_names[] = {"background",
|
||||
"aeroplane", "bicycle", "bird", "boat",
|
||||
"bottle", "bus", "car", "cat", "chair",
|
||||
"cow", "diningtable", "dog", "horse",
|
||||
"motorbike", "person", "pottedplant",
|
||||
"sheep", "sofa", "train", "tvmonitor"
|
||||
};
|
||||
|
||||
cv::Mat image = bgr.clone();
|
||||
|
||||
for (size_t i = 0; i < objects.size(); i++)
|
||||
{
|
||||
const Object& obj = objects[i];
|
||||
|
||||
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
|
||||
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
|
||||
|
||||
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
|
||||
|
||||
char text[256];
|
||||
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
|
||||
|
||||
int baseLine = 0;
|
||||
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||
|
||||
int x = obj.rect.x;
|
||||
int y = obj.rect.y - label_size.height - baseLine;
|
||||
if (y < 0)
|
||||
y = 0;
|
||||
if (x + label_size.width > image.cols)
|
||||
x = image.cols - label_size.width;
|
||||
|
||||
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
|
||||
cv::Scalar(255, 255, 255), -1);
|
||||
|
||||
cv::putText(image, text, cv::Point(x, y + label_size.height),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
|
||||
cv::imshow("image", image);
|
||||
cv::waitKey(0);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<Object> objects;
|
||||
detect_mobilenet(m, objects);
|
||||
|
||||
draw_objects(m, objects);
|
||||
|
||||
return 0;
|
||||
}
|
159
3rdparty/ncnn/examples/mobilenetv2ssdlite.cpp
vendored
Normal file
159
3rdparty/ncnn/examples/mobilenetv2ssdlite.cpp
vendored
Normal file
@ -0,0 +1,159 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "net.h"
|
||||
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
class Noop : public ncnn::Layer
|
||||
{
|
||||
};
|
||||
DEFINE_LAYER_CREATOR(Noop)
|
||||
|
||||
struct Object
|
||||
{
|
||||
cv::Rect_<float> rect;
|
||||
int label;
|
||||
float prob;
|
||||
};
|
||||
|
||||
static int detect_mobilenetv2(const cv::Mat& bgr, std::vector<Object>& objects)
|
||||
{
|
||||
ncnn::Net mobilenetv2;
|
||||
|
||||
mobilenetv2.opt.use_vulkan_compute = true;
|
||||
|
||||
mobilenetv2.register_custom_layer("Silence", Noop_layer_creator);
|
||||
|
||||
// original pretrained model from https://github.com/chuanqi305/MobileNetv2-SSDLite
|
||||
// https://github.com/chuanqi305/MobileNetv2-SSDLite/blob/master/ssdlite/voc/deploy.prototxt
|
||||
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
|
||||
mobilenetv2.load_param("mobilenetv2_ssdlite_voc.param");
|
||||
mobilenetv2.load_model("mobilenetv2_ssdlite_voc.bin");
|
||||
|
||||
const int target_size = 300;
|
||||
|
||||
int img_w = bgr.cols;
|
||||
int img_h = bgr.rows;
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, target_size, target_size);
|
||||
|
||||
const float mean_vals[3] = {127.5f, 127.5f, 127.5f};
|
||||
const float norm_vals[3] = {1.0 / 127.5, 1.0 / 127.5, 1.0 / 127.5};
|
||||
in.substract_mean_normalize(mean_vals, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = mobilenetv2.create_extractor();
|
||||
|
||||
ex.input("data", in);
|
||||
|
||||
ncnn::Mat out;
|
||||
ex.extract("detection_out", out);
|
||||
|
||||
// printf("%d %d %d\n", out.w, out.h, out.c);
|
||||
objects.clear();
|
||||
for (int i = 0; i < out.h; i++)
|
||||
{
|
||||
const float* values = out.row(i);
|
||||
|
||||
Object object;
|
||||
object.label = values[0];
|
||||
object.prob = values[1];
|
||||
object.rect.x = values[2] * img_w;
|
||||
object.rect.y = values[3] * img_h;
|
||||
object.rect.width = values[4] * img_w - object.rect.x;
|
||||
object.rect.height = values[5] * img_h - object.rect.y;
|
||||
|
||||
objects.push_back(object);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
|
||||
{
|
||||
static const char* class_names[] = {"background",
|
||||
"aeroplane", "bicycle", "bird", "boat",
|
||||
"bottle", "bus", "car", "cat", "chair",
|
||||
"cow", "diningtable", "dog", "horse",
|
||||
"motorbike", "person", "pottedplant",
|
||||
"sheep", "sofa", "train", "tvmonitor"
|
||||
};
|
||||
|
||||
cv::Mat image = bgr.clone();
|
||||
|
||||
for (size_t i = 0; i < objects.size(); i++)
|
||||
{
|
||||
const Object& obj = objects[i];
|
||||
|
||||
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
|
||||
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
|
||||
|
||||
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
|
||||
|
||||
char text[256];
|
||||
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
|
||||
|
||||
int baseLine = 0;
|
||||
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||
|
||||
int x = obj.rect.x;
|
||||
int y = obj.rect.y - label_size.height - baseLine;
|
||||
if (y < 0)
|
||||
y = 0;
|
||||
if (x + label_size.width > image.cols)
|
||||
x = image.cols - label_size.width;
|
||||
|
||||
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
|
||||
cv::Scalar(255, 255, 255), -1);
|
||||
|
||||
cv::putText(image, text, cv::Point(x, y + label_size.height),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
|
||||
cv::imshow("image", image);
|
||||
cv::waitKey(0);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<Object> objects;
|
||||
detect_mobilenetv2(m, objects);
|
||||
|
||||
draw_objects(m, objects);
|
||||
|
||||
return 0;
|
||||
}
|
173
3rdparty/ncnn/examples/mobilenetv3ssdlite.cpp
vendored
Normal file
173
3rdparty/ncnn/examples/mobilenetv3ssdlite.cpp
vendored
Normal file
@ -0,0 +1,173 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "net.h"
|
||||
#include "platform.h"
|
||||
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
#if NCNN_VULKAN
|
||||
#include "gpu.h"
|
||||
#endif // NCNN_VULKAN
|
||||
|
||||
template<class T>
|
||||
const T& clamp(const T& v, const T& lo, const T& hi)
|
||||
{
|
||||
assert(!(hi < lo));
|
||||
return v < lo ? lo : hi < v ? hi : v;
|
||||
}
|
||||
|
||||
struct Object
|
||||
{
|
||||
cv::Rect_<float> rect;
|
||||
int label;
|
||||
float prob;
|
||||
};
|
||||
|
||||
static int detect_mobilenetv3(const cv::Mat& bgr, std::vector<Object>& objects)
|
||||
{
|
||||
ncnn::Net mobilenetv3;
|
||||
|
||||
#if NCNN_VULKAN
|
||||
mobilenetv3.opt.use_vulkan_compute = true;
|
||||
#endif // NCNN_VULKAN
|
||||
|
||||
// converted ncnn model from https://github.com/ujsyehao/mobilenetv3-ssd
|
||||
mobilenetv3.load_param("./mobilenetv3_ssdlite_voc.param");
|
||||
mobilenetv3.load_model("./mobilenetv3_ssdlite_voc.bin");
|
||||
|
||||
const int target_size = 300;
|
||||
|
||||
int img_w = bgr.cols;
|
||||
int img_h = bgr.rows;
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, bgr.cols, bgr.rows, target_size, target_size);
|
||||
|
||||
const float mean_vals[3] = {123.675f, 116.28f, 103.53f};
|
||||
const float norm_vals[3] = {1.0f, 1.0f, 1.0f};
|
||||
in.substract_mean_normalize(mean_vals, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = mobilenetv3.create_extractor();
|
||||
|
||||
ex.input("input", in);
|
||||
|
||||
ncnn::Mat out;
|
||||
ex.extract("detection_out", out);
|
||||
|
||||
// printf("%d %d %d\n", out.w, out.h, out.c);
|
||||
objects.clear();
|
||||
for (int i = 0; i < out.h; i++)
|
||||
{
|
||||
const float* values = out.row(i);
|
||||
|
||||
Object object;
|
||||
object.label = values[0];
|
||||
object.prob = values[1];
|
||||
|
||||
// filter out cross-boundary
|
||||
float x1 = clamp(values[2] * target_size, 0.f, float(target_size - 1)) / target_size * img_w;
|
||||
float y1 = clamp(values[3] * target_size, 0.f, float(target_size - 1)) / target_size * img_h;
|
||||
float x2 = clamp(values[4] * target_size, 0.f, float(target_size - 1)) / target_size * img_w;
|
||||
float y2 = clamp(values[5] * target_size, 0.f, float(target_size - 1)) / target_size * img_h;
|
||||
|
||||
object.rect.x = x1;
|
||||
object.rect.y = y1;
|
||||
object.rect.width = x2 - x1;
|
||||
object.rect.height = y2 - y1;
|
||||
|
||||
objects.push_back(object);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
|
||||
{
|
||||
static const char* class_names[] = {"background",
|
||||
"aeroplane", "bicycle", "bird", "boat",
|
||||
"bottle", "bus", "car", "cat", "chair",
|
||||
"cow", "diningtable", "dog", "horse",
|
||||
"motorbike", "person", "pottedplant",
|
||||
"sheep", "sofa", "train", "tvmonitor"
|
||||
};
|
||||
|
||||
cv::Mat image = bgr.clone();
|
||||
|
||||
for (size_t i = 0; i < objects.size(); i++)
|
||||
{
|
||||
if (objects[i].prob > 0.6)
|
||||
{
|
||||
const Object& obj = objects[i];
|
||||
|
||||
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
|
||||
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
|
||||
|
||||
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
|
||||
|
||||
char text[256];
|
||||
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
|
||||
|
||||
int baseLine = 0;
|
||||
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||
|
||||
int x = obj.rect.x;
|
||||
int y = obj.rect.y - label_size.height - baseLine;
|
||||
if (y < 0)
|
||||
y = 0;
|
||||
if (x + label_size.width > image.cols)
|
||||
x = image.cols - label_size.width;
|
||||
|
||||
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
|
||||
cv::Scalar(255, 255, 255), -1);
|
||||
|
||||
cv::putText(image, text, cv::Point(x, y + label_size.height),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
}
|
||||
|
||||
cv::imshow("image", image);
|
||||
cv::waitKey(0);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<Object> objects;
|
||||
detect_mobilenetv3(m, objects);
|
||||
|
||||
draw_objects(m, objects);
|
||||
|
||||
return 0;
|
||||
}
|
420
3rdparty/ncnn/examples/nanodet.cpp
vendored
Normal file
420
3rdparty/ncnn/examples/nanodet.cpp
vendored
Normal file
@ -0,0 +1,420 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "net.h"
|
||||
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
#include <float.h>
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
struct Object
|
||||
{
|
||||
cv::Rect_<float> rect;
|
||||
int label;
|
||||
float prob;
|
||||
};
|
||||
|
||||
static inline float intersection_area(const Object& a, const Object& b)
|
||||
{
|
||||
cv::Rect_<float> inter = a.rect & b.rect;
|
||||
return inter.area();
|
||||
}
|
||||
|
||||
static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
|
||||
{
|
||||
int i = left;
|
||||
int j = right;
|
||||
float p = faceobjects[(left + right) / 2].prob;
|
||||
|
||||
while (i <= j)
|
||||
{
|
||||
while (faceobjects[i].prob > p)
|
||||
i++;
|
||||
|
||||
while (faceobjects[j].prob < p)
|
||||
j--;
|
||||
|
||||
if (i <= j)
|
||||
{
|
||||
// swap
|
||||
std::swap(faceobjects[i], faceobjects[j]);
|
||||
|
||||
i++;
|
||||
j--;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp parallel sections
|
||||
{
|
||||
#pragma omp section
|
||||
{
|
||||
if (left < j) qsort_descent_inplace(faceobjects, left, j);
|
||||
}
|
||||
#pragma omp section
|
||||
{
|
||||
if (i < right) qsort_descent_inplace(faceobjects, i, right);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void qsort_descent_inplace(std::vector<Object>& faceobjects)
|
||||
{
|
||||
if (faceobjects.empty())
|
||||
return;
|
||||
|
||||
qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
|
||||
}
|
||||
|
||||
static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)
|
||||
{
|
||||
picked.clear();
|
||||
|
||||
const int n = faceobjects.size();
|
||||
|
||||
std::vector<float> areas(n);
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
areas[i] = faceobjects[i].rect.width * faceobjects[i].rect.height;
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
const Object& a = faceobjects[i];
|
||||
|
||||
int keep = 1;
|
||||
for (int j = 0; j < (int)picked.size(); j++)
|
||||
{
|
||||
const Object& b = faceobjects[picked[j]];
|
||||
|
||||
// intersection over union
|
||||
float inter_area = intersection_area(a, b);
|
||||
float union_area = areas[i] + areas[picked[j]] - inter_area;
|
||||
// float IoU = inter_area / union_area
|
||||
if (inter_area / union_area > nms_threshold)
|
||||
keep = 0;
|
||||
}
|
||||
|
||||
if (keep)
|
||||
picked.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
static void generate_proposals(const ncnn::Mat& cls_pred, const ncnn::Mat& dis_pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
|
||||
{
|
||||
const int num_grid = cls_pred.h;
|
||||
|
||||
int num_grid_x;
|
||||
int num_grid_y;
|
||||
if (in_pad.w > in_pad.h)
|
||||
{
|
||||
num_grid_x = in_pad.w / stride;
|
||||
num_grid_y = num_grid / num_grid_x;
|
||||
}
|
||||
else
|
||||
{
|
||||
num_grid_y = in_pad.h / stride;
|
||||
num_grid_x = num_grid / num_grid_y;
|
||||
}
|
||||
|
||||
const int num_class = cls_pred.w;
|
||||
const int reg_max_1 = dis_pred.w / 4;
|
||||
|
||||
for (int i = 0; i < num_grid_y; i++)
|
||||
{
|
||||
for (int j = 0; j < num_grid_x; j++)
|
||||
{
|
||||
const int idx = i * num_grid_x + j;
|
||||
|
||||
const float* scores = cls_pred.row(idx);
|
||||
|
||||
// find label with max score
|
||||
int label = -1;
|
||||
float score = -FLT_MAX;
|
||||
for (int k = 0; k < num_class; k++)
|
||||
{
|
||||
if (scores[k] > score)
|
||||
{
|
||||
label = k;
|
||||
score = scores[k];
|
||||
}
|
||||
}
|
||||
|
||||
if (score >= prob_threshold)
|
||||
{
|
||||
ncnn::Mat bbox_pred(reg_max_1, 4, (void*)dis_pred.row(idx));
|
||||
{
|
||||
ncnn::Layer* softmax = ncnn::create_layer("Softmax");
|
||||
|
||||
ncnn::ParamDict pd;
|
||||
pd.set(0, 1); // axis
|
||||
pd.set(1, 1);
|
||||
softmax->load_param(pd);
|
||||
|
||||
ncnn::Option opt;
|
||||
opt.num_threads = 1;
|
||||
opt.use_packing_layout = false;
|
||||
|
||||
softmax->create_pipeline(opt);
|
||||
|
||||
softmax->forward_inplace(bbox_pred, opt);
|
||||
|
||||
softmax->destroy_pipeline(opt);
|
||||
|
||||
delete softmax;
|
||||
}
|
||||
|
||||
float pred_ltrb[4];
|
||||
for (int k = 0; k < 4; k++)
|
||||
{
|
||||
float dis = 0.f;
|
||||
const float* dis_after_sm = bbox_pred.row(k);
|
||||
for (int l = 0; l < reg_max_1; l++)
|
||||
{
|
||||
dis += l * dis_after_sm[l];
|
||||
}
|
||||
|
||||
pred_ltrb[k] = dis * stride;
|
||||
}
|
||||
|
||||
float pb_cx = (j + 0.5f) * stride;
|
||||
float pb_cy = (i + 0.5f) * stride;
|
||||
|
||||
float x0 = pb_cx - pred_ltrb[0];
|
||||
float y0 = pb_cy - pred_ltrb[1];
|
||||
float x1 = pb_cx + pred_ltrb[2];
|
||||
float y1 = pb_cy + pred_ltrb[3];
|
||||
|
||||
Object obj;
|
||||
obj.rect.x = x0;
|
||||
obj.rect.y = y0;
|
||||
obj.rect.width = x1 - x0;
|
||||
obj.rect.height = y1 - y0;
|
||||
obj.label = label;
|
||||
obj.prob = score;
|
||||
|
||||
objects.push_back(obj);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int detect_nanodet(const cv::Mat& bgr, std::vector<Object>& objects)
|
||||
{
|
||||
ncnn::Net nanodet;
|
||||
|
||||
nanodet.opt.use_vulkan_compute = true;
|
||||
// nanodet.opt.use_bf16_storage = true;
|
||||
|
||||
// original pretrained model from https://github.com/RangiLyu/nanodet
|
||||
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
|
||||
nanodet.load_param("nanodet_m.param");
|
||||
nanodet.load_model("nanodet_m.bin");
|
||||
|
||||
int width = bgr.cols;
|
||||
int height = bgr.rows;
|
||||
|
||||
const int target_size = 320;
|
||||
const float prob_threshold = 0.4f;
|
||||
const float nms_threshold = 0.5f;
|
||||
|
||||
// pad to multiple of 32
|
||||
int w = width;
|
||||
int h = height;
|
||||
float scale = 1.f;
|
||||
if (w > h)
|
||||
{
|
||||
scale = (float)target_size / w;
|
||||
w = target_size;
|
||||
h = h * scale;
|
||||
}
|
||||
else
|
||||
{
|
||||
scale = (float)target_size / h;
|
||||
h = target_size;
|
||||
w = w * scale;
|
||||
}
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, width, height, w, h);
|
||||
|
||||
// pad to target_size rectangle
|
||||
int wpad = (w + 31) / 32 * 32 - w;
|
||||
int hpad = (h + 31) / 32 * 32 - h;
|
||||
ncnn::Mat in_pad;
|
||||
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
|
||||
|
||||
const float mean_vals[3] = {103.53f, 116.28f, 123.675f};
|
||||
const float norm_vals[3] = {0.017429f, 0.017507f, 0.017125f};
|
||||
in_pad.substract_mean_normalize(mean_vals, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = nanodet.create_extractor();
|
||||
|
||||
ex.input("input.1", in_pad);
|
||||
|
||||
std::vector<Object> proposals;
|
||||
|
||||
// stride 8
|
||||
{
|
||||
ncnn::Mat cls_pred;
|
||||
ncnn::Mat dis_pred;
|
||||
ex.extract("792", cls_pred);
|
||||
ex.extract("795", dis_pred);
|
||||
|
||||
std::vector<Object> objects8;
|
||||
generate_proposals(cls_pred, dis_pred, 8, in_pad, prob_threshold, objects8);
|
||||
|
||||
proposals.insert(proposals.end(), objects8.begin(), objects8.end());
|
||||
}
|
||||
|
||||
// stride 16
|
||||
{
|
||||
ncnn::Mat cls_pred;
|
||||
ncnn::Mat dis_pred;
|
||||
ex.extract("814", cls_pred);
|
||||
ex.extract("817", dis_pred);
|
||||
|
||||
std::vector<Object> objects16;
|
||||
generate_proposals(cls_pred, dis_pred, 16, in_pad, prob_threshold, objects16);
|
||||
|
||||
proposals.insert(proposals.end(), objects16.begin(), objects16.end());
|
||||
}
|
||||
|
||||
// stride 32
|
||||
{
|
||||
ncnn::Mat cls_pred;
|
||||
ncnn::Mat dis_pred;
|
||||
ex.extract("836", cls_pred);
|
||||
ex.extract("839", dis_pred);
|
||||
|
||||
std::vector<Object> objects32;
|
||||
generate_proposals(cls_pred, dis_pred, 32, in_pad, prob_threshold, objects32);
|
||||
|
||||
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
|
||||
}
|
||||
|
||||
// sort all proposals by score from highest to lowest
|
||||
qsort_descent_inplace(proposals);
|
||||
|
||||
// apply nms with nms_threshold
|
||||
std::vector<int> picked;
|
||||
nms_sorted_bboxes(proposals, picked, nms_threshold);
|
||||
|
||||
int count = picked.size();
|
||||
|
||||
objects.resize(count);
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
objects[i] = proposals[picked[i]];
|
||||
|
||||
// adjust offset to original unpadded
|
||||
float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
|
||||
float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
|
||||
float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
|
||||
float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
|
||||
|
||||
// clip
|
||||
x0 = std::max(std::min(x0, (float)(width - 1)), 0.f);
|
||||
y0 = std::max(std::min(y0, (float)(height - 1)), 0.f);
|
||||
x1 = std::max(std::min(x1, (float)(width - 1)), 0.f);
|
||||
y1 = std::max(std::min(y1, (float)(height - 1)), 0.f);
|
||||
|
||||
objects[i].rect.x = x0;
|
||||
objects[i].rect.y = y0;
|
||||
objects[i].rect.width = x1 - x0;
|
||||
objects[i].rect.height = y1 - y0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
|
||||
{
|
||||
static const char* class_names[] = {
|
||||
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
|
||||
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
|
||||
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
|
||||
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
|
||||
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
|
||||
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
|
||||
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
|
||||
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
|
||||
"hair drier", "toothbrush"
|
||||
};
|
||||
|
||||
cv::Mat image = bgr.clone();
|
||||
|
||||
for (size_t i = 0; i < objects.size(); i++)
|
||||
{
|
||||
const Object& obj = objects[i];
|
||||
|
||||
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
|
||||
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
|
||||
|
||||
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
|
||||
|
||||
char text[256];
|
||||
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
|
||||
|
||||
int baseLine = 0;
|
||||
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||
|
||||
int x = obj.rect.x;
|
||||
int y = obj.rect.y - label_size.height - baseLine;
|
||||
if (y < 0)
|
||||
y = 0;
|
||||
if (x + label_size.width > image.cols)
|
||||
x = image.cols - label_size.width;
|
||||
|
||||
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
|
||||
cv::Scalar(255, 255, 255), -1);
|
||||
|
||||
cv::putText(image, text, cv::Point(x, y + label_size.height),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
|
||||
cv::imshow("image", image);
|
||||
cv::waitKey(0);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<Object> objects;
|
||||
detect_nanodet(m, objects);
|
||||
|
||||
draw_objects(m, objects);
|
||||
|
||||
return 0;
|
||||
}
|
426
3rdparty/ncnn/examples/nanodetplus_pnnx.cpp
vendored
Normal file
426
3rdparty/ncnn/examples/nanodetplus_pnnx.cpp
vendored
Normal file
@ -0,0 +1,426 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "net.h"
|
||||
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
#include <float.h>
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
struct Object
|
||||
{
|
||||
cv::Rect_<float> rect;
|
||||
int label;
|
||||
float prob;
|
||||
};
|
||||
|
||||
static inline float intersection_area(const Object& a, const Object& b)
|
||||
{
|
||||
cv::Rect_<float> inter = a.rect & b.rect;
|
||||
return inter.area();
|
||||
}
|
||||
|
||||
static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
|
||||
{
|
||||
int i = left;
|
||||
int j = right;
|
||||
float p = faceobjects[(left + right) / 2].prob;
|
||||
|
||||
while (i <= j)
|
||||
{
|
||||
while (faceobjects[i].prob > p)
|
||||
i++;
|
||||
|
||||
while (faceobjects[j].prob < p)
|
||||
j--;
|
||||
|
||||
if (i <= j)
|
||||
{
|
||||
// swap
|
||||
std::swap(faceobjects[i], faceobjects[j]);
|
||||
|
||||
i++;
|
||||
j--;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp parallel sections
|
||||
{
|
||||
#pragma omp section
|
||||
{
|
||||
if (left < j) qsort_descent_inplace(faceobjects, left, j);
|
||||
}
|
||||
#pragma omp section
|
||||
{
|
||||
if (i < right) qsort_descent_inplace(faceobjects, i, right);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void qsort_descent_inplace(std::vector<Object>& faceobjects)
|
||||
{
|
||||
if (faceobjects.empty())
|
||||
return;
|
||||
|
||||
qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
|
||||
}
|
||||
|
||||
static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)
|
||||
{
|
||||
picked.clear();
|
||||
|
||||
const int n = faceobjects.size();
|
||||
|
||||
std::vector<float> areas(n);
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
areas[i] = faceobjects[i].rect.width * faceobjects[i].rect.height;
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
const Object& a = faceobjects[i];
|
||||
|
||||
int keep = 1;
|
||||
for (int j = 0; j < (int)picked.size(); j++)
|
||||
{
|
||||
const Object& b = faceobjects[picked[j]];
|
||||
|
||||
// intersection over union
|
||||
float inter_area = intersection_area(a, b);
|
||||
float union_area = areas[i] + areas[picked[j]] - inter_area;
|
||||
// float IoU = inter_area / union_area
|
||||
if (inter_area / union_area > nms_threshold)
|
||||
keep = 0;
|
||||
}
|
||||
|
||||
if (keep)
|
||||
picked.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
static inline float sigmoid(float x)
|
||||
{
|
||||
return 1.0f / (1.0f + exp(-x));
|
||||
}
|
||||
|
||||
static void generate_proposals(const ncnn::Mat& pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
|
||||
{
|
||||
const int num_grid = pred.h;
|
||||
|
||||
int num_grid_x = pred.w;
|
||||
int num_grid_y = pred.h;
|
||||
|
||||
const int num_class = 80; // number of classes. 80 for COCO
|
||||
const int reg_max_1 = (pred.c - num_class) / 4;
|
||||
|
||||
for (int i = 0; i < num_grid_y; i++)
|
||||
{
|
||||
for (int j = 0; j < num_grid_x; j++)
|
||||
{
|
||||
// find label with max score
|
||||
int label = -1;
|
||||
float score = -FLT_MAX;
|
||||
for (int k = 0; k < num_class; k++)
|
||||
{
|
||||
float s = pred.channel(k).row(i)[j];
|
||||
if (s > score)
|
||||
{
|
||||
label = k;
|
||||
score = s;
|
||||
}
|
||||
}
|
||||
|
||||
score = sigmoid(score);
|
||||
|
||||
if (score >= prob_threshold)
|
||||
{
|
||||
ncnn::Mat bbox_pred(reg_max_1, 4);
|
||||
for (int k = 0; k < reg_max_1 * 4; k++)
|
||||
{
|
||||
bbox_pred[k] = pred.channel(num_class + k).row(i)[j];
|
||||
}
|
||||
{
|
||||
ncnn::Layer* softmax = ncnn::create_layer("Softmax");
|
||||
|
||||
ncnn::ParamDict pd;
|
||||
pd.set(0, 1); // axis
|
||||
pd.set(1, 1);
|
||||
softmax->load_param(pd);
|
||||
|
||||
ncnn::Option opt;
|
||||
opt.num_threads = 1;
|
||||
opt.use_packing_layout = false;
|
||||
|
||||
softmax->create_pipeline(opt);
|
||||
|
||||
softmax->forward_inplace(bbox_pred, opt);
|
||||
|
||||
softmax->destroy_pipeline(opt);
|
||||
|
||||
delete softmax;
|
||||
}
|
||||
|
||||
float pred_ltrb[4];
|
||||
for (int k = 0; k < 4; k++)
|
||||
{
|
||||
float dis = 0.f;
|
||||
const float* dis_after_sm = bbox_pred.row(k);
|
||||
for (int l = 0; l < reg_max_1; l++)
|
||||
{
|
||||
dis += l * dis_after_sm[l];
|
||||
}
|
||||
|
||||
pred_ltrb[k] = dis * stride;
|
||||
}
|
||||
|
||||
float pb_cx = j * stride;
|
||||
float pb_cy = i * stride;
|
||||
|
||||
float x0 = pb_cx - pred_ltrb[0];
|
||||
float y0 = pb_cy - pred_ltrb[1];
|
||||
float x1 = pb_cx + pred_ltrb[2];
|
||||
float y1 = pb_cy + pred_ltrb[3];
|
||||
|
||||
Object obj;
|
||||
obj.rect.x = x0;
|
||||
obj.rect.y = y0;
|
||||
obj.rect.width = x1 - x0;
|
||||
obj.rect.height = y1 - y0;
|
||||
obj.label = label;
|
||||
obj.prob = score;
|
||||
|
||||
objects.push_back(obj);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int detect_nanodet(const cv::Mat& bgr, std::vector<Object>& objects)
|
||||
{
|
||||
ncnn::Net nanodet;
|
||||
|
||||
nanodet.opt.use_vulkan_compute = true;
|
||||
// nanodet.opt.use_bf16_storage = true;
|
||||
|
||||
// original pretrained model from https://github.com/RangiLyu/nanodet
|
||||
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
|
||||
// nanodet.load_param("nanodet-plus-m_320.torchscript.ncnn.param");
|
||||
// nanodet.load_model("nanodet-plus-m_320.torchscript.ncnn.bin");
|
||||
nanodet.load_param("nanodet-plus-m_416.torchscript.ncnn.param");
|
||||
nanodet.load_model("nanodet-plus-m_416.torchscript.ncnn.bin");
|
||||
|
||||
int width = bgr.cols;
|
||||
int height = bgr.rows;
|
||||
|
||||
// const int target_size = 320;
|
||||
const int target_size = 416;
|
||||
const float prob_threshold = 0.4f;
|
||||
const float nms_threshold = 0.5f;
|
||||
|
||||
// pad to multiple of 32
|
||||
int w = width;
|
||||
int h = height;
|
||||
float scale = 1.f;
|
||||
if (w > h)
|
||||
{
|
||||
scale = (float)target_size / w;
|
||||
w = target_size;
|
||||
h = h * scale;
|
||||
}
|
||||
else
|
||||
{
|
||||
scale = (float)target_size / h;
|
||||
h = target_size;
|
||||
w = w * scale;
|
||||
}
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, width, height, w, h);
|
||||
|
||||
// pad to target_size rectangle
|
||||
int wpad = (w + 31) / 32 * 32 - w;
|
||||
int hpad = (h + 31) / 32 * 32 - h;
|
||||
ncnn::Mat in_pad;
|
||||
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
|
||||
|
||||
const float mean_vals[3] = {103.53f, 116.28f, 123.675f};
|
||||
const float norm_vals[3] = {0.017429f, 0.017507f, 0.017125f};
|
||||
in_pad.substract_mean_normalize(mean_vals, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = nanodet.create_extractor();
|
||||
|
||||
ex.input("in0", in_pad);
|
||||
|
||||
std::vector<Object> proposals;
|
||||
|
||||
// stride 8
|
||||
{
|
||||
ncnn::Mat pred;
|
||||
ex.extract("231", pred);
|
||||
|
||||
std::vector<Object> objects8;
|
||||
generate_proposals(pred, 8, in_pad, prob_threshold, objects8);
|
||||
|
||||
proposals.insert(proposals.end(), objects8.begin(), objects8.end());
|
||||
}
|
||||
|
||||
// stride 16
|
||||
{
|
||||
ncnn::Mat pred;
|
||||
ex.extract("228", pred);
|
||||
|
||||
std::vector<Object> objects16;
|
||||
generate_proposals(pred, 16, in_pad, prob_threshold, objects16);
|
||||
|
||||
proposals.insert(proposals.end(), objects16.begin(), objects16.end());
|
||||
}
|
||||
|
||||
// stride 32
|
||||
{
|
||||
ncnn::Mat pred;
|
||||
ex.extract("225", pred);
|
||||
|
||||
std::vector<Object> objects32;
|
||||
generate_proposals(pred, 32, in_pad, prob_threshold, objects32);
|
||||
|
||||
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
|
||||
}
|
||||
|
||||
// stride 64
|
||||
{
|
||||
ncnn::Mat pred;
|
||||
ex.extract("222", pred);
|
||||
|
||||
std::vector<Object> objects64;
|
||||
generate_proposals(pred, 64, in_pad, prob_threshold, objects64);
|
||||
|
||||
proposals.insert(proposals.end(), objects64.begin(), objects64.end());
|
||||
}
|
||||
|
||||
// sort all proposals by score from highest to lowest
|
||||
qsort_descent_inplace(proposals);
|
||||
|
||||
// apply nms with nms_threshold
|
||||
std::vector<int> picked;
|
||||
nms_sorted_bboxes(proposals, picked, nms_threshold);
|
||||
|
||||
int count = picked.size();
|
||||
|
||||
objects.resize(count);
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
objects[i] = proposals[picked[i]];
|
||||
|
||||
// adjust offset to original unpadded
|
||||
float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
|
||||
float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
|
||||
float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
|
||||
float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
|
||||
|
||||
// clip
|
||||
x0 = std::max(std::min(x0, (float)(width - 1)), 0.f);
|
||||
y0 = std::max(std::min(y0, (float)(height - 1)), 0.f);
|
||||
x1 = std::max(std::min(x1, (float)(width - 1)), 0.f);
|
||||
y1 = std::max(std::min(y1, (float)(height - 1)), 0.f);
|
||||
|
||||
objects[i].rect.x = x0;
|
||||
objects[i].rect.y = y0;
|
||||
objects[i].rect.width = x1 - x0;
|
||||
objects[i].rect.height = y1 - y0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
|
||||
{
|
||||
static const char* class_names[] = {
|
||||
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
|
||||
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
|
||||
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
|
||||
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
|
||||
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
|
||||
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
|
||||
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
|
||||
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
|
||||
"hair drier", "toothbrush"
|
||||
};
|
||||
|
||||
cv::Mat image = bgr.clone();
|
||||
|
||||
for (size_t i = 0; i < objects.size(); i++)
|
||||
{
|
||||
const Object& obj = objects[i];
|
||||
|
||||
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
|
||||
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
|
||||
|
||||
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
|
||||
|
||||
char text[256];
|
||||
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
|
||||
|
||||
int baseLine = 0;
|
||||
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||
|
||||
int x = obj.rect.x;
|
||||
int y = obj.rect.y - label_size.height - baseLine;
|
||||
if (y < 0)
|
||||
y = 0;
|
||||
if (x + label_size.width > image.cols)
|
||||
x = image.cols - label_size.width;
|
||||
|
||||
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
|
||||
cv::Scalar(255, 255, 255), -1);
|
||||
|
||||
cv::putText(image, text, cv::Point(x, y + label_size.height),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
|
||||
cv::imshow("image", image);
|
||||
cv::waitKey(0);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<Object> objects;
|
||||
detect_nanodet(m, objects);
|
||||
|
||||
draw_objects(m, objects);
|
||||
|
||||
return 0;
|
||||
}
|
240
3rdparty/ncnn/examples/p2pnet.cpp
vendored
Normal file
240
3rdparty/ncnn/examples/p2pnet.cpp
vendored
Normal file
@ -0,0 +1,240 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "net.h"
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
#include <float.h>
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
struct CrowdPoint
|
||||
{
|
||||
cv::Point pt;
|
||||
float prob;
|
||||
};
|
||||
|
||||
static void shift(int w, int h, int stride, std::vector<float> anchor_points, std::vector<float>& shifted_anchor_points)
|
||||
{
|
||||
std::vector<float> x_, y_;
|
||||
for (int i = 0; i < w; i++)
|
||||
{
|
||||
float x = (i + 0.5) * stride;
|
||||
x_.push_back(x);
|
||||
}
|
||||
for (int i = 0; i < h; i++)
|
||||
{
|
||||
float y = (i + 0.5) * stride;
|
||||
y_.push_back(y);
|
||||
}
|
||||
|
||||
std::vector<float> shift_x((size_t)w * h, 0), shift_y((size_t)w * h, 0);
|
||||
for (int i = 0; i < h; i++)
|
||||
{
|
||||
for (int j = 0; j < w; j++)
|
||||
{
|
||||
shift_x[i * w + j] = x_[j];
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < h; i++)
|
||||
{
|
||||
for (int j = 0; j < w; j++)
|
||||
{
|
||||
shift_y[i * w + j] = y_[i];
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<float> shifts((size_t)w * h * 2, 0);
|
||||
for (int i = 0; i < w * h; i++)
|
||||
{
|
||||
shifts[i * 2] = shift_x[i];
|
||||
shifts[i * 2 + 1] = shift_y[i];
|
||||
}
|
||||
|
||||
shifted_anchor_points.resize((size_t)2 * w * h * anchor_points.size() / 2, 0);
|
||||
for (int i = 0; i < w * h; i++)
|
||||
{
|
||||
for (int j = 0; j < anchor_points.size() / 2; j++)
|
||||
{
|
||||
float x = anchor_points[j * 2] + shifts[i * 2];
|
||||
float y = anchor_points[j * 2 + 1] + shifts[i * 2 + 1];
|
||||
shifted_anchor_points[i * anchor_points.size() / 2 * 2 + j * 2] = x;
|
||||
shifted_anchor_points[i * anchor_points.size() / 2 * 2 + j * 2 + 1] = y;
|
||||
}
|
||||
}
|
||||
}
|
||||
static void generate_anchor_points(int stride, int row, int line, std::vector<float>& anchor_points)
|
||||
{
|
||||
float row_step = (float)stride / row;
|
||||
float line_step = (float)stride / line;
|
||||
|
||||
std::vector<float> x_, y_;
|
||||
for (int i = 1; i < line + 1; i++)
|
||||
{
|
||||
float x = (i - 0.5) * line_step - stride / 2;
|
||||
x_.push_back(x);
|
||||
}
|
||||
for (int i = 1; i < row + 1; i++)
|
||||
{
|
||||
float y = (i - 0.5) * row_step - stride / 2;
|
||||
y_.push_back(y);
|
||||
}
|
||||
std::vector<float> shift_x((size_t)row * line, 0), shift_y((size_t)row * line, 0);
|
||||
for (int i = 0; i < row; i++)
|
||||
{
|
||||
for (int j = 0; j < line; j++)
|
||||
{
|
||||
shift_x[i * line + j] = x_[j];
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < row; i++)
|
||||
{
|
||||
for (int j = 0; j < line; j++)
|
||||
{
|
||||
shift_y[i * line + j] = y_[i];
|
||||
}
|
||||
}
|
||||
anchor_points.resize((size_t)row * line * 2, 0);
|
||||
for (int i = 0; i < row * line; i++)
|
||||
{
|
||||
float x = shift_x[i];
|
||||
float y = shift_y[i];
|
||||
anchor_points[i * 2] = x;
|
||||
anchor_points[i * 2 + 1] = y;
|
||||
}
|
||||
}
|
||||
static void generate_anchor_points(int img_w, int img_h, std::vector<int> pyramid_levels, int row, int line, std::vector<float>& all_anchor_points)
|
||||
{
|
||||
std::vector<std::pair<int, int> > image_shapes;
|
||||
std::vector<int> strides;
|
||||
for (int i = 0; i < pyramid_levels.size(); i++)
|
||||
{
|
||||
int new_h = std::floor((img_h + std::pow(2, pyramid_levels[i]) - 1) / std::pow(2, pyramid_levels[i]));
|
||||
int new_w = std::floor((img_w + std::pow(2, pyramid_levels[i]) - 1) / std::pow(2, pyramid_levels[i]));
|
||||
image_shapes.push_back(std::make_pair(new_w, new_h));
|
||||
strides.push_back(std::pow(2, pyramid_levels[i]));
|
||||
}
|
||||
|
||||
all_anchor_points.clear();
|
||||
for (int i = 0; i < pyramid_levels.size(); i++)
|
||||
{
|
||||
std::vector<float> anchor_points;
|
||||
generate_anchor_points(std::pow(2, pyramid_levels[i]), row, line, anchor_points);
|
||||
std::vector<float> shifted_anchor_points;
|
||||
shift(image_shapes[i].first, image_shapes[i].second, strides[i], anchor_points, shifted_anchor_points);
|
||||
all_anchor_points.insert(all_anchor_points.end(), shifted_anchor_points.begin(), shifted_anchor_points.end());
|
||||
}
|
||||
}
|
||||
|
||||
static int detect_crowd(const cv::Mat& bgr, std::vector<CrowdPoint>& crowd_points)
|
||||
{
|
||||
ncnn::Option opt;
|
||||
opt.num_threads = 4;
|
||||
opt.use_vulkan_compute = false;
|
||||
opt.use_bf16_storage = false;
|
||||
|
||||
ncnn::Net net;
|
||||
net.opt = opt;
|
||||
|
||||
// model is converted from
|
||||
// https://github.com/TencentYoutuResearch/CrowdCounting-P2PNet
|
||||
// the ncnn model https://pan.baidu.com/s/1O1CBgvY6yJkrK8Npxx3VMg pwd: ezhx
|
||||
net.load_param("p2pnet.param");
|
||||
net.load_model("p2pnet.bin");
|
||||
|
||||
int width = bgr.cols;
|
||||
int height = bgr.rows;
|
||||
|
||||
int new_width = width / 128 * 128;
|
||||
int new_height = height / 128 * 128;
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, width, height, new_width, new_height);
|
||||
|
||||
std::vector<int> pyramid_levels(1, 3);
|
||||
std::vector<float> all_anchor_points;
|
||||
generate_anchor_points(in.w, in.h, pyramid_levels, 2, 2, all_anchor_points);
|
||||
|
||||
ncnn::Mat anchor_points = ncnn::Mat(2, all_anchor_points.size() / 2, all_anchor_points.data());
|
||||
|
||||
ncnn::Extractor ex = net.create_extractor();
|
||||
const float mean_vals1[3] = {123.675f, 116.28f, 103.53f};
|
||||
const float norm_vals1[3] = {0.01712475f, 0.0175f, 0.01742919f};
|
||||
|
||||
in.substract_mean_normalize(mean_vals1, norm_vals1);
|
||||
|
||||
ex.input("input", in);
|
||||
ex.input("anchor", anchor_points);
|
||||
|
||||
ncnn::Mat score, points;
|
||||
ex.extract("pred_scores", score);
|
||||
ex.extract("pred_points", points);
|
||||
|
||||
for (int i = 0; i < points.h; i++)
|
||||
{
|
||||
float* score_data = score.row(i);
|
||||
float* points_data = points.row(i);
|
||||
CrowdPoint cp;
|
||||
int x = points_data[0] / new_width * width;
|
||||
int y = points_data[1] / new_height * height;
|
||||
cp.pt = cv::Point(x, y);
|
||||
cp.prob = score_data[1];
|
||||
crowd_points.push_back(cp);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void draw_result(const cv::Mat& bgr, const std::vector<CrowdPoint>& crowd_points)
|
||||
{
|
||||
cv::Mat image = bgr.clone();
|
||||
const float threshold = 0.5f;
|
||||
for (int i = 0; i < crowd_points.size(); i++)
|
||||
{
|
||||
if (crowd_points[i].prob > threshold)
|
||||
{
|
||||
cv::circle(image, crowd_points[i].pt, 4, cv::Scalar(0, 0, 255), -1, 8, 0);
|
||||
}
|
||||
}
|
||||
cv::imshow("image", image);
|
||||
cv::waitKey();
|
||||
}
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat bgr = cv::imread(imagepath, 1);
|
||||
if (bgr.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<CrowdPoint> crowd_points;
|
||||
detect_crowd(bgr, crowd_points);
|
||||
draw_result(bgr, crowd_points);
|
||||
|
||||
return 0;
|
||||
}
|
196
3rdparty/ncnn/examples/peleenetssd_seg.cpp
vendored
Normal file
196
3rdparty/ncnn/examples/peleenetssd_seg.cpp
vendored
Normal file
@ -0,0 +1,196 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "net.h"
|
||||
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
struct Object
|
||||
{
|
||||
cv::Rect_<float> rect;
|
||||
int label;
|
||||
float prob;
|
||||
};
|
||||
|
||||
static int detect_peleenet(const cv::Mat& bgr, std::vector<Object>& objects, ncnn::Mat& resized)
|
||||
{
|
||||
ncnn::Net peleenet;
|
||||
|
||||
peleenet.opt.use_vulkan_compute = true;
|
||||
|
||||
// model is converted from https://github.com/eric612/MobileNet-YOLO
|
||||
// and can be downloaded from https://drive.google.com/open?id=1Wt6jKv13sBRMHgrGAJYlOlRF-o80pC0g
|
||||
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
|
||||
peleenet.load_param("pelee.param");
|
||||
peleenet.load_model("pelee.bin");
|
||||
|
||||
const int target_size = 304;
|
||||
|
||||
int img_w = bgr.cols;
|
||||
int img_h = bgr.rows;
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, target_size, target_size);
|
||||
|
||||
const float mean_vals[3] = {103.9f, 116.7f, 123.6f};
|
||||
const float norm_vals[3] = {0.017f, 0.017f, 0.017f};
|
||||
in.substract_mean_normalize(mean_vals, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = peleenet.create_extractor();
|
||||
|
||||
ex.input("data", in);
|
||||
|
||||
ncnn::Mat out;
|
||||
ex.extract("detection_out", out);
|
||||
|
||||
// printf("%d %d %d\n", out.w, out.h, out.c);
|
||||
objects.clear();
|
||||
for (int i = 0; i < out.h; i++)
|
||||
{
|
||||
const float* values = out.row(i);
|
||||
|
||||
Object object;
|
||||
object.label = values[0];
|
||||
object.prob = values[1];
|
||||
object.rect.x = values[2] * img_w;
|
||||
object.rect.y = values[3] * img_h;
|
||||
object.rect.width = values[4] * img_w - object.rect.x;
|
||||
object.rect.height = values[5] * img_h - object.rect.y;
|
||||
|
||||
objects.push_back(object);
|
||||
}
|
||||
ncnn::Mat seg_out;
|
||||
ex.extract("sigmoid", seg_out);
|
||||
resize_bilinear(seg_out, resized, img_w, img_h);
|
||||
//resize_bicubic(seg_out,resized,img_w,img_h); // sharpness
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects, ncnn::Mat map)
|
||||
{
|
||||
static const char* class_names[] = {"background",
|
||||
"person", "rider", "car", "bus",
|
||||
"truck", "bike", "motor",
|
||||
"traffic light", "traffic sign", "train"
|
||||
};
|
||||
|
||||
cv::Mat image = bgr.clone();
|
||||
const int color[] = {128, 255, 128, 244, 35, 232};
|
||||
const int color_count = sizeof(color) / sizeof(int);
|
||||
|
||||
for (size_t i = 0; i < objects.size(); i++)
|
||||
{
|
||||
const Object& obj = objects[i];
|
||||
|
||||
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
|
||||
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
|
||||
|
||||
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
|
||||
|
||||
char text[256];
|
||||
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
|
||||
|
||||
int baseLine = 0;
|
||||
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||
|
||||
int x = obj.rect.x;
|
||||
int y = obj.rect.y - label_size.height - baseLine;
|
||||
if (y < 0)
|
||||
y = 0;
|
||||
if (x + label_size.width > image.cols)
|
||||
x = image.cols - label_size.width;
|
||||
|
||||
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
|
||||
cv::Scalar(255, 255, 255), -1);
|
||||
|
||||
cv::putText(image, text, cv::Point(x, y + label_size.height),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
int width = map.w;
|
||||
int height = map.h;
|
||||
int size = map.c;
|
||||
int img_index2 = 0;
|
||||
float threshold = 0.45;
|
||||
const float* ptr2 = map;
|
||||
for (int i = 0; i < height; i++)
|
||||
{
|
||||
unsigned char* ptr1 = image.ptr<unsigned char>(i);
|
||||
int img_index1 = 0;
|
||||
for (int j = 0; j < width; j++)
|
||||
{
|
||||
float maxima = threshold;
|
||||
int index = -1;
|
||||
for (int c = 0; c < size; c++)
|
||||
{
|
||||
//const float* ptr3 = map.channel(c);
|
||||
const float* ptr3 = ptr2 + c * width * height;
|
||||
if (ptr3[img_index2] > maxima)
|
||||
{
|
||||
maxima = ptr3[img_index2];
|
||||
index = c;
|
||||
}
|
||||
}
|
||||
if (index > -1)
|
||||
{
|
||||
int color_index = (index)*3;
|
||||
if (color_index < color_count)
|
||||
{
|
||||
int b = color[color_index];
|
||||
int g = color[color_index + 1];
|
||||
int r = color[color_index + 2];
|
||||
ptr1[img_index1] = b / 2 + ptr1[img_index1] / 2;
|
||||
ptr1[img_index1 + 1] = g / 2 + ptr1[img_index1 + 1] / 2;
|
||||
ptr1[img_index1 + 2] = r / 2 + ptr1[img_index1 + 2] / 2;
|
||||
}
|
||||
}
|
||||
img_index1 += 3;
|
||||
img_index2++;
|
||||
}
|
||||
}
|
||||
cv::imshow("image", image);
|
||||
cv::waitKey(0);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<Object> objects;
|
||||
ncnn::Mat seg_out;
|
||||
detect_peleenet(m, objects, seg_out);
|
||||
|
||||
draw_objects(m, objects, seg_out);
|
||||
|
||||
return 0;
|
||||
}
|
434
3rdparty/ncnn/examples/retinaface.cpp
vendored
Normal file
434
3rdparty/ncnn/examples/retinaface.cpp
vendored
Normal file
@ -0,0 +1,434 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "net.h"
|
||||
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
struct FaceObject
|
||||
{
|
||||
cv::Rect_<float> rect;
|
||||
cv::Point2f landmark[5];
|
||||
float prob;
|
||||
};
|
||||
|
||||
static inline float intersection_area(const FaceObject& a, const FaceObject& b)
|
||||
{
|
||||
cv::Rect_<float> inter = a.rect & b.rect;
|
||||
return inter.area();
|
||||
}
|
||||
|
||||
static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects, int left, int right)
|
||||
{
|
||||
int i = left;
|
||||
int j = right;
|
||||
float p = faceobjects[(left + right) / 2].prob;
|
||||
|
||||
while (i <= j)
|
||||
{
|
||||
while (faceobjects[i].prob > p)
|
||||
i++;
|
||||
|
||||
while (faceobjects[j].prob < p)
|
||||
j--;
|
||||
|
||||
if (i <= j)
|
||||
{
|
||||
// swap
|
||||
std::swap(faceobjects[i], faceobjects[j]);
|
||||
|
||||
i++;
|
||||
j--;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp parallel sections
|
||||
{
|
||||
#pragma omp section
|
||||
{
|
||||
if (left < j) qsort_descent_inplace(faceobjects, left, j);
|
||||
}
|
||||
#pragma omp section
|
||||
{
|
||||
if (i < right) qsort_descent_inplace(faceobjects, i, right);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects)
|
||||
{
|
||||
if (faceobjects.empty())
|
||||
return;
|
||||
|
||||
qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
|
||||
}
|
||||
|
||||
static void nms_sorted_bboxes(const std::vector<FaceObject>& faceobjects, std::vector<int>& picked, float nms_threshold)
|
||||
{
|
||||
picked.clear();
|
||||
|
||||
const int n = faceobjects.size();
|
||||
|
||||
std::vector<float> areas(n);
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
areas[i] = faceobjects[i].rect.area();
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
const FaceObject& a = faceobjects[i];
|
||||
|
||||
int keep = 1;
|
||||
for (int j = 0; j < (int)picked.size(); j++)
|
||||
{
|
||||
const FaceObject& b = faceobjects[picked[j]];
|
||||
|
||||
// intersection over union
|
||||
float inter_area = intersection_area(a, b);
|
||||
float union_area = areas[i] + areas[picked[j]] - inter_area;
|
||||
// float IoU = inter_area / union_area
|
||||
if (inter_area / union_area > nms_threshold)
|
||||
keep = 0;
|
||||
}
|
||||
|
||||
if (keep)
|
||||
picked.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
// copy from src/layer/proposal.cpp
|
||||
static ncnn::Mat generate_anchors(int base_size, const ncnn::Mat& ratios, const ncnn::Mat& scales)
|
||||
{
|
||||
int num_ratio = ratios.w;
|
||||
int num_scale = scales.w;
|
||||
|
||||
ncnn::Mat anchors;
|
||||
anchors.create(4, num_ratio * num_scale);
|
||||
|
||||
const float cx = base_size * 0.5f;
|
||||
const float cy = base_size * 0.5f;
|
||||
|
||||
for (int i = 0; i < num_ratio; i++)
|
||||
{
|
||||
float ar = ratios[i];
|
||||
|
||||
int r_w = round(base_size / sqrt(ar));
|
||||
int r_h = round(r_w * ar); //round(base_size * sqrt(ar));
|
||||
|
||||
for (int j = 0; j < num_scale; j++)
|
||||
{
|
||||
float scale = scales[j];
|
||||
|
||||
float rs_w = r_w * scale;
|
||||
float rs_h = r_h * scale;
|
||||
|
||||
float* anchor = anchors.row(i * num_scale + j);
|
||||
|
||||
anchor[0] = cx - rs_w * 0.5f;
|
||||
anchor[1] = cy - rs_h * 0.5f;
|
||||
anchor[2] = cx + rs_w * 0.5f;
|
||||
anchor[3] = cy + rs_h * 0.5f;
|
||||
}
|
||||
}
|
||||
|
||||
return anchors;
|
||||
}
|
||||
|
||||
static void generate_proposals(const ncnn::Mat& anchors, int feat_stride, const ncnn::Mat& score_blob, const ncnn::Mat& bbox_blob, const ncnn::Mat& landmark_blob, float prob_threshold, std::vector<FaceObject>& faceobjects)
|
||||
{
|
||||
int w = score_blob.w;
|
||||
int h = score_blob.h;
|
||||
|
||||
// generate face proposal from bbox deltas and shifted anchors
|
||||
const int num_anchors = anchors.h;
|
||||
|
||||
for (int q = 0; q < num_anchors; q++)
|
||||
{
|
||||
const float* anchor = anchors.row(q);
|
||||
|
||||
const ncnn::Mat score = score_blob.channel(q + num_anchors);
|
||||
const ncnn::Mat bbox = bbox_blob.channel_range(q * 4, 4);
|
||||
const ncnn::Mat landmark = landmark_blob.channel_range(q * 10, 10);
|
||||
|
||||
// shifted anchor
|
||||
float anchor_y = anchor[1];
|
||||
|
||||
float anchor_w = anchor[2] - anchor[0];
|
||||
float anchor_h = anchor[3] - anchor[1];
|
||||
|
||||
for (int i = 0; i < h; i++)
|
||||
{
|
||||
float anchor_x = anchor[0];
|
||||
|
||||
for (int j = 0; j < w; j++)
|
||||
{
|
||||
int index = i * w + j;
|
||||
|
||||
float prob = score[index];
|
||||
|
||||
if (prob >= prob_threshold)
|
||||
{
|
||||
// apply center size
|
||||
float dx = bbox.channel(0)[index];
|
||||
float dy = bbox.channel(1)[index];
|
||||
float dw = bbox.channel(2)[index];
|
||||
float dh = bbox.channel(3)[index];
|
||||
|
||||
float cx = anchor_x + anchor_w * 0.5f;
|
||||
float cy = anchor_y + anchor_h * 0.5f;
|
||||
|
||||
float pb_cx = cx + anchor_w * dx;
|
||||
float pb_cy = cy + anchor_h * dy;
|
||||
|
||||
float pb_w = anchor_w * exp(dw);
|
||||
float pb_h = anchor_h * exp(dh);
|
||||
|
||||
float x0 = pb_cx - pb_w * 0.5f;
|
||||
float y0 = pb_cy - pb_h * 0.5f;
|
||||
float x1 = pb_cx + pb_w * 0.5f;
|
||||
float y1 = pb_cy + pb_h * 0.5f;
|
||||
|
||||
FaceObject obj;
|
||||
obj.rect.x = x0;
|
||||
obj.rect.y = y0;
|
||||
obj.rect.width = x1 - x0 + 1;
|
||||
obj.rect.height = y1 - y0 + 1;
|
||||
obj.landmark[0].x = cx + (anchor_w + 1) * landmark.channel(0)[index];
|
||||
obj.landmark[0].y = cy + (anchor_h + 1) * landmark.channel(1)[index];
|
||||
obj.landmark[1].x = cx + (anchor_w + 1) * landmark.channel(2)[index];
|
||||
obj.landmark[1].y = cy + (anchor_h + 1) * landmark.channel(3)[index];
|
||||
obj.landmark[2].x = cx + (anchor_w + 1) * landmark.channel(4)[index];
|
||||
obj.landmark[2].y = cy + (anchor_h + 1) * landmark.channel(5)[index];
|
||||
obj.landmark[3].x = cx + (anchor_w + 1) * landmark.channel(6)[index];
|
||||
obj.landmark[3].y = cy + (anchor_h + 1) * landmark.channel(7)[index];
|
||||
obj.landmark[4].x = cx + (anchor_w + 1) * landmark.channel(8)[index];
|
||||
obj.landmark[4].y = cy + (anchor_h + 1) * landmark.channel(9)[index];
|
||||
obj.prob = prob;
|
||||
|
||||
faceobjects.push_back(obj);
|
||||
}
|
||||
|
||||
anchor_x += feat_stride;
|
||||
}
|
||||
|
||||
anchor_y += feat_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int detect_retinaface(const cv::Mat& bgr, std::vector<FaceObject>& faceobjects)
|
||||
{
|
||||
ncnn::Net retinaface;
|
||||
|
||||
retinaface.opt.use_vulkan_compute = true;
|
||||
|
||||
// model is converted from
|
||||
// https://github.com/deepinsight/insightface/tree/master/RetinaFace#retinaface-pretrained-models
|
||||
// https://github.com/deepinsight/insightface/issues/669
|
||||
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
|
||||
// retinaface.load_param("retinaface-R50.param");
|
||||
// retinaface.load_model("retinaface-R50.bin");
|
||||
retinaface.load_param("mnet.25-opt.param");
|
||||
retinaface.load_model("mnet.25-opt.bin");
|
||||
|
||||
const float prob_threshold = 0.8f;
|
||||
const float nms_threshold = 0.4f;
|
||||
|
||||
int img_w = bgr.cols;
|
||||
int img_h = bgr.rows;
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h);
|
||||
|
||||
ncnn::Extractor ex = retinaface.create_extractor();
|
||||
|
||||
ex.input("data", in);
|
||||
|
||||
std::vector<FaceObject> faceproposals;
|
||||
|
||||
// stride 32
|
||||
{
|
||||
ncnn::Mat score_blob, bbox_blob, landmark_blob;
|
||||
ex.extract("face_rpn_cls_prob_reshape_stride32", score_blob);
|
||||
ex.extract("face_rpn_bbox_pred_stride32", bbox_blob);
|
||||
ex.extract("face_rpn_landmark_pred_stride32", landmark_blob);
|
||||
|
||||
const int base_size = 16;
|
||||
const int feat_stride = 32;
|
||||
ncnn::Mat ratios(1);
|
||||
ratios[0] = 1.f;
|
||||
ncnn::Mat scales(2);
|
||||
scales[0] = 32.f;
|
||||
scales[1] = 16.f;
|
||||
ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);
|
||||
|
||||
std::vector<FaceObject> faceobjects32;
|
||||
generate_proposals(anchors, feat_stride, score_blob, bbox_blob, landmark_blob, prob_threshold, faceobjects32);
|
||||
|
||||
faceproposals.insert(faceproposals.end(), faceobjects32.begin(), faceobjects32.end());
|
||||
}
|
||||
|
||||
// stride 16
|
||||
{
|
||||
ncnn::Mat score_blob, bbox_blob, landmark_blob;
|
||||
ex.extract("face_rpn_cls_prob_reshape_stride16", score_blob);
|
||||
ex.extract("face_rpn_bbox_pred_stride16", bbox_blob);
|
||||
ex.extract("face_rpn_landmark_pred_stride16", landmark_blob);
|
||||
|
||||
const int base_size = 16;
|
||||
const int feat_stride = 16;
|
||||
ncnn::Mat ratios(1);
|
||||
ratios[0] = 1.f;
|
||||
ncnn::Mat scales(2);
|
||||
scales[0] = 8.f;
|
||||
scales[1] = 4.f;
|
||||
ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);
|
||||
|
||||
std::vector<FaceObject> faceobjects16;
|
||||
generate_proposals(anchors, feat_stride, score_blob, bbox_blob, landmark_blob, prob_threshold, faceobjects16);
|
||||
|
||||
faceproposals.insert(faceproposals.end(), faceobjects16.begin(), faceobjects16.end());
|
||||
}
|
||||
|
||||
// stride 8
|
||||
{
|
||||
ncnn::Mat score_blob, bbox_blob, landmark_blob;
|
||||
ex.extract("face_rpn_cls_prob_reshape_stride8", score_blob);
|
||||
ex.extract("face_rpn_bbox_pred_stride8", bbox_blob);
|
||||
ex.extract("face_rpn_landmark_pred_stride8", landmark_blob);
|
||||
|
||||
const int base_size = 16;
|
||||
const int feat_stride = 8;
|
||||
ncnn::Mat ratios(1);
|
||||
ratios[0] = 1.f;
|
||||
ncnn::Mat scales(2);
|
||||
scales[0] = 2.f;
|
||||
scales[1] = 1.f;
|
||||
ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);
|
||||
|
||||
std::vector<FaceObject> faceobjects8;
|
||||
generate_proposals(anchors, feat_stride, score_blob, bbox_blob, landmark_blob, prob_threshold, faceobjects8);
|
||||
|
||||
faceproposals.insert(faceproposals.end(), faceobjects8.begin(), faceobjects8.end());
|
||||
}
|
||||
|
||||
// sort all proposals by score from highest to lowest
|
||||
qsort_descent_inplace(faceproposals);
|
||||
|
||||
// apply nms with nms_threshold
|
||||
std::vector<int> picked;
|
||||
nms_sorted_bboxes(faceproposals, picked, nms_threshold);
|
||||
|
||||
int face_count = picked.size();
|
||||
|
||||
faceobjects.resize(face_count);
|
||||
for (int i = 0; i < face_count; i++)
|
||||
{
|
||||
faceobjects[i] = faceproposals[picked[i]];
|
||||
|
||||
// clip to image size
|
||||
float x0 = faceobjects[i].rect.x;
|
||||
float y0 = faceobjects[i].rect.y;
|
||||
float x1 = x0 + faceobjects[i].rect.width;
|
||||
float y1 = y0 + faceobjects[i].rect.height;
|
||||
|
||||
x0 = std::max(std::min(x0, (float)img_w - 1), 0.f);
|
||||
y0 = std::max(std::min(y0, (float)img_h - 1), 0.f);
|
||||
x1 = std::max(std::min(x1, (float)img_w - 1), 0.f);
|
||||
y1 = std::max(std::min(y1, (float)img_h - 1), 0.f);
|
||||
|
||||
faceobjects[i].rect.x = x0;
|
||||
faceobjects[i].rect.y = y0;
|
||||
faceobjects[i].rect.width = x1 - x0;
|
||||
faceobjects[i].rect.height = y1 - y0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void draw_faceobjects(const cv::Mat& bgr, const std::vector<FaceObject>& faceobjects)
|
||||
{
|
||||
cv::Mat image = bgr.clone();
|
||||
|
||||
for (size_t i = 0; i < faceobjects.size(); i++)
|
||||
{
|
||||
const FaceObject& obj = faceobjects[i];
|
||||
|
||||
fprintf(stderr, "%.5f at %.2f %.2f %.2f x %.2f\n", obj.prob,
|
||||
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
|
||||
|
||||
cv::rectangle(image, obj.rect, cv::Scalar(0, 255, 0));
|
||||
|
||||
cv::circle(image, obj.landmark[0], 2, cv::Scalar(0, 255, 255), -1);
|
||||
cv::circle(image, obj.landmark[1], 2, cv::Scalar(0, 255, 255), -1);
|
||||
cv::circle(image, obj.landmark[2], 2, cv::Scalar(0, 255, 255), -1);
|
||||
cv::circle(image, obj.landmark[3], 2, cv::Scalar(0, 255, 255), -1);
|
||||
cv::circle(image, obj.landmark[4], 2, cv::Scalar(0, 255, 255), -1);
|
||||
|
||||
char text[256];
|
||||
sprintf(text, "%.1f%%", obj.prob * 100);
|
||||
|
||||
int baseLine = 0;
|
||||
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||
|
||||
int x = obj.rect.x;
|
||||
int y = obj.rect.y - label_size.height - baseLine;
|
||||
if (y < 0)
|
||||
y = 0;
|
||||
if (x + label_size.width > image.cols)
|
||||
x = image.cols - label_size.width;
|
||||
|
||||
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
|
||||
cv::Scalar(255, 255, 255), -1);
|
||||
|
||||
cv::putText(image, text, cv::Point(x, y + label_size.height),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
|
||||
cv::imshow("image", image);
|
||||
cv::waitKey(0);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<FaceObject> faceobjects;
|
||||
detect_retinaface(m, faceobjects);
|
||||
|
||||
draw_faceobjects(m, faceobjects);
|
||||
|
||||
return 0;
|
||||
}
|
357
3rdparty/ncnn/examples/rfcn.cpp
vendored
Normal file
357
3rdparty/ncnn/examples/rfcn.cpp
vendored
Normal file
@ -0,0 +1,357 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "net.h"
|
||||
|
||||
#include <math.h>
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
|
||||
struct Object
|
||||
{
|
||||
cv::Rect_<float> rect;
|
||||
int label;
|
||||
float prob;
|
||||
};
|
||||
|
||||
static inline float intersection_area(const Object& a, const Object& b)
|
||||
{
|
||||
cv::Rect_<float> inter = a.rect & b.rect;
|
||||
return inter.area();
|
||||
}
|
||||
|
||||
static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right)
|
||||
{
|
||||
int i = left;
|
||||
int j = right;
|
||||
float p = objects[(left + right) / 2].prob;
|
||||
|
||||
while (i <= j)
|
||||
{
|
||||
while (objects[i].prob > p)
|
||||
i++;
|
||||
|
||||
while (objects[j].prob < p)
|
||||
j--;
|
||||
|
||||
if (i <= j)
|
||||
{
|
||||
// swap
|
||||
std::swap(objects[i], objects[j]);
|
||||
|
||||
i++;
|
||||
j--;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp parallel sections
|
||||
{
|
||||
#pragma omp section
|
||||
{
|
||||
if (left < j) qsort_descent_inplace(objects, left, j);
|
||||
}
|
||||
#pragma omp section
|
||||
{
|
||||
if (i < right) qsort_descent_inplace(objects, i, right);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void qsort_descent_inplace(std::vector<Object>& objects)
|
||||
{
|
||||
if (objects.empty())
|
||||
return;
|
||||
|
||||
qsort_descent_inplace(objects, 0, objects.size() - 1);
|
||||
}
|
||||
|
||||
static void nms_sorted_bboxes(const std::vector<Object>& objects, std::vector<int>& picked, float nms_threshold)
|
||||
{
|
||||
picked.clear();
|
||||
|
||||
const int n = objects.size();
|
||||
|
||||
std::vector<float> areas(n);
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
areas[i] = objects[i].rect.area();
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
const Object& a = objects[i];
|
||||
|
||||
int keep = 1;
|
||||
for (int j = 0; j < (int)picked.size(); j++)
|
||||
{
|
||||
const Object& b = objects[picked[j]];
|
||||
|
||||
// intersection over union
|
||||
float inter_area = intersection_area(a, b);
|
||||
float union_area = areas[i] + areas[picked[j]] - inter_area;
|
||||
// float IoU = inter_area / union_area
|
||||
if (inter_area / union_area > nms_threshold)
|
||||
keep = 0;
|
||||
}
|
||||
|
||||
if (keep)
|
||||
picked.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
static int detect_rfcn(const cv::Mat& bgr, std::vector<Object>& objects)
|
||||
{
|
||||
ncnn::Net rfcn;
|
||||
|
||||
rfcn.opt.use_vulkan_compute = true;
|
||||
|
||||
// original pretrained model from https://github.com/YuwenXiong/py-R-FCN
|
||||
// https://github.com/YuwenXiong/py-R-FCN/blob/master/models/pascal_voc/ResNet-50/rfcn_end2end/test_agnostic.prototxt
|
||||
// https://1drv.ms/u/s!AoN7vygOjLIQqUWHpY67oaC7mopf
|
||||
// resnet50_rfcn_final.caffemodel
|
||||
rfcn.load_param("rfcn_end2end.param");
|
||||
rfcn.load_model("rfcn_end2end.bin");
|
||||
|
||||
const int target_size = 224;
|
||||
|
||||
const int max_per_image = 100;
|
||||
const float confidence_thresh = 0.6f; // CONF_THRESH
|
||||
|
||||
const float nms_threshold = 0.3f; // NMS_THRESH
|
||||
|
||||
// scale to target detect size
|
||||
int w = bgr.cols;
|
||||
int h = bgr.rows;
|
||||
float scale = 1.f;
|
||||
if (w < h)
|
||||
{
|
||||
scale = (float)target_size / w;
|
||||
w = target_size;
|
||||
h = h * scale;
|
||||
}
|
||||
else
|
||||
{
|
||||
scale = (float)target_size / h;
|
||||
h = target_size;
|
||||
w = w * scale;
|
||||
}
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, w, h);
|
||||
|
||||
const float mean_vals[3] = {102.9801f, 115.9465f, 122.7717f};
|
||||
in.substract_mean_normalize(mean_vals, 0);
|
||||
|
||||
ncnn::Mat im_info(3);
|
||||
im_info[0] = h;
|
||||
im_info[1] = w;
|
||||
im_info[2] = scale;
|
||||
|
||||
// step1, extract feature and all rois
|
||||
ncnn::Extractor ex1 = rfcn.create_extractor();
|
||||
|
||||
ex1.input("data", in);
|
||||
ex1.input("im_info", im_info);
|
||||
|
||||
ncnn::Mat rfcn_cls;
|
||||
ncnn::Mat rfcn_bbox;
|
||||
ncnn::Mat rois; // all rois
|
||||
ex1.extract("rfcn_cls", rfcn_cls);
|
||||
ex1.extract("rfcn_bbox", rfcn_bbox);
|
||||
ex1.extract("rois", rois);
|
||||
|
||||
// step2, extract bbox and score for each roi
|
||||
std::vector<std::vector<Object> > class_candidates;
|
||||
for (int i = 0; i < rois.c; i++)
|
||||
{
|
||||
ncnn::Extractor ex2 = rfcn.create_extractor();
|
||||
|
||||
ncnn::Mat roi = rois.channel(i); // get single roi
|
||||
ex2.input("rfcn_cls", rfcn_cls);
|
||||
ex2.input("rfcn_bbox", rfcn_bbox);
|
||||
ex2.input("rois", roi);
|
||||
|
||||
ncnn::Mat bbox_pred;
|
||||
ncnn::Mat cls_prob;
|
||||
ex2.extract("bbox_pred", bbox_pred);
|
||||
ex2.extract("cls_prob", cls_prob);
|
||||
|
||||
int num_class = cls_prob.w;
|
||||
class_candidates.resize(num_class);
|
||||
|
||||
// find class id with highest score
|
||||
int label = 0;
|
||||
float score = 0.f;
|
||||
for (int i = 0; i < num_class; i++)
|
||||
{
|
||||
float class_score = cls_prob[i];
|
||||
if (class_score > score)
|
||||
{
|
||||
label = i;
|
||||
score = class_score;
|
||||
}
|
||||
}
|
||||
|
||||
// ignore background or low score
|
||||
if (label == 0 || score <= confidence_thresh)
|
||||
continue;
|
||||
|
||||
// fprintf(stderr, "%d = %f\n", label, score);
|
||||
|
||||
// unscale to image size
|
||||
float x1 = roi[0] / scale;
|
||||
float y1 = roi[1] / scale;
|
||||
float x2 = roi[2] / scale;
|
||||
float y2 = roi[3] / scale;
|
||||
|
||||
float pb_w = x2 - x1 + 1;
|
||||
float pb_h = y2 - y1 + 1;
|
||||
|
||||
// apply bbox regression
|
||||
float dx = bbox_pred[4];
|
||||
float dy = bbox_pred[4 + 1];
|
||||
float dw = bbox_pred[4 + 2];
|
||||
float dh = bbox_pred[4 + 3];
|
||||
|
||||
float cx = x1 + pb_w * 0.5f;
|
||||
float cy = y1 + pb_h * 0.5f;
|
||||
|
||||
float obj_cx = cx + pb_w * dx;
|
||||
float obj_cy = cy + pb_h * dy;
|
||||
|
||||
float obj_w = pb_w * exp(dw);
|
||||
float obj_h = pb_h * exp(dh);
|
||||
|
||||
float obj_x1 = obj_cx - obj_w * 0.5f;
|
||||
float obj_y1 = obj_cy - obj_h * 0.5f;
|
||||
float obj_x2 = obj_cx + obj_w * 0.5f;
|
||||
float obj_y2 = obj_cy + obj_h * 0.5f;
|
||||
|
||||
// clip
|
||||
obj_x1 = std::max(std::min(obj_x1, (float)(bgr.cols - 1)), 0.f);
|
||||
obj_y1 = std::max(std::min(obj_y1, (float)(bgr.rows - 1)), 0.f);
|
||||
obj_x2 = std::max(std::min(obj_x2, (float)(bgr.cols - 1)), 0.f);
|
||||
obj_y2 = std::max(std::min(obj_y2, (float)(bgr.rows - 1)), 0.f);
|
||||
|
||||
// append object
|
||||
Object obj;
|
||||
obj.rect = cv::Rect_<float>(obj_x1, obj_y1, obj_x2 - obj_x1 + 1, obj_y2 - obj_y1 + 1);
|
||||
obj.label = label;
|
||||
obj.prob = score;
|
||||
|
||||
class_candidates[label].push_back(obj);
|
||||
}
|
||||
|
||||
// post process
|
||||
objects.clear();
|
||||
for (int i = 0; i < (int)class_candidates.size(); i++)
|
||||
{
|
||||
std::vector<Object>& candidates = class_candidates[i];
|
||||
|
||||
qsort_descent_inplace(candidates);
|
||||
|
||||
std::vector<int> picked;
|
||||
nms_sorted_bboxes(candidates, picked, nms_threshold);
|
||||
|
||||
for (int j = 0; j < (int)picked.size(); j++)
|
||||
{
|
||||
int z = picked[j];
|
||||
objects.push_back(candidates[z]);
|
||||
}
|
||||
}
|
||||
|
||||
qsort_descent_inplace(objects);
|
||||
|
||||
if (max_per_image > 0 && max_per_image < objects.size())
|
||||
{
|
||||
objects.resize(max_per_image);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
|
||||
{
|
||||
static const char* class_names[] = {"background",
|
||||
"aeroplane", "bicycle", "bird", "boat",
|
||||
"bottle", "bus", "car", "cat", "chair",
|
||||
"cow", "diningtable", "dog", "horse",
|
||||
"motorbike", "person", "pottedplant",
|
||||
"sheep", "sofa", "train", "tvmonitor"
|
||||
};
|
||||
|
||||
cv::Mat image = bgr.clone();
|
||||
|
||||
for (size_t i = 0; i < objects.size(); i++)
|
||||
{
|
||||
const Object& obj = objects[i];
|
||||
|
||||
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
|
||||
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
|
||||
|
||||
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
|
||||
|
||||
char text[256];
|
||||
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
|
||||
|
||||
int baseLine = 0;
|
||||
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||
|
||||
int x = obj.rect.x;
|
||||
int y = obj.rect.y - label_size.height - baseLine;
|
||||
if (y < 0)
|
||||
y = 0;
|
||||
if (x + label_size.width > image.cols)
|
||||
x = image.cols - label_size.width;
|
||||
|
||||
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
|
||||
cv::Scalar(255, 255, 255), -1);
|
||||
|
||||
cv::putText(image, text, cv::Point(x, y + label_size.height),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
|
||||
cv::imshow("image", image);
|
||||
cv::waitKey(0);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<Object> objects;
|
||||
detect_rfcn(m, objects);
|
||||
|
||||
draw_objects(m, objects);
|
||||
|
||||
return 0;
|
||||
}
|
132
3rdparty/ncnn/examples/rvm.cpp
vendored
Normal file
132
3rdparty/ncnn/examples/rvm.cpp
vendored
Normal file
@ -0,0 +1,132 @@
|
||||
#include "net.h"
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
#include <float.h>
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
static void draw_objects(const cv::Mat& bgr, const cv::Mat& fgr, const cv::Mat& pha)
|
||||
{
|
||||
cv::Mat fgr8U;
|
||||
fgr.convertTo(fgr8U, CV_8UC3, 255.0, 0);
|
||||
cv::Mat pha8U;
|
||||
pha.convertTo(pha8U, CV_8UC1, 255.0, 0);
|
||||
|
||||
cv::Mat comp;
|
||||
cv::resize(bgr, comp, pha.size(), 0, 0, 1);
|
||||
for (int i = 0; i < pha8U.rows; i++)
|
||||
{
|
||||
for (int j = 0; j < pha8U.cols; j++)
|
||||
{
|
||||
uchar data = pha8U.at<uchar>(i, j);
|
||||
float alpha = (float)data / 255;
|
||||
comp.at<cv::Vec3b>(i, j)[0] = fgr8U.at<cv::Vec3b>(i, j)[0] * alpha + (1 - alpha) * 155;
|
||||
comp.at<cv::Vec3b>(i, j)[1] = fgr8U.at<cv::Vec3b>(i, j)[1] * alpha + (1 - alpha) * 255;
|
||||
comp.at<cv::Vec3b>(i, j)[2] = fgr8U.at<cv::Vec3b>(i, j)[2] * alpha + (1 - alpha) * 120;
|
||||
}
|
||||
}
|
||||
|
||||
cv::imshow("pha", pha8U);
|
||||
cv::imshow("fgr", fgr8U);
|
||||
cv::imshow("comp", comp);
|
||||
cv::waitKey(0);
|
||||
}
|
||||
static int detect_rvm(const cv::Mat& bgr, cv::Mat& pha, cv::Mat& fgr)
|
||||
{
|
||||
const float downsample_ratio = 0.5f;
|
||||
const int target_width = 512;
|
||||
const int target_height = 512;
|
||||
|
||||
ncnn::Net net;
|
||||
net.opt.use_vulkan_compute = false;
|
||||
//original pretrained model from https://github.com/PeterL1n/RobustVideoMatting
|
||||
//ncnn model https://pan.baidu.com/s/11iEY2RGfzWFtce8ue7T3JQ password: d9t6
|
||||
net.load_param("rvm_512.param");
|
||||
net.load_model("rvm_512.bin");
|
||||
|
||||
//if you use another input size,pleaze change input shape
|
||||
ncnn::Mat r1i = ncnn::Mat(128, 128, 16);
|
||||
ncnn::Mat r2i = ncnn::Mat(64, 64, 20);
|
||||
ncnn::Mat r3i = ncnn::Mat(32, 32, 40);
|
||||
ncnn::Mat r4i = ncnn::Mat(16, 16, 64);
|
||||
r1i.fill(0.0f);
|
||||
r2i.fill(0.0f);
|
||||
r3i.fill(0.0f);
|
||||
r4i.fill(0.0f);
|
||||
|
||||
ncnn::Extractor ex = net.create_extractor();
|
||||
const float mean_vals1[3] = {123.675f, 116.28f, 103.53f};
|
||||
const float norm_vals1[3] = {0.01712475f, 0.0175f, 0.01742919f};
|
||||
const float mean_vals2[3] = {0, 0, 0};
|
||||
const float norm_vals2[3] = {1 / 255.0, 1 / 255.0, 1 / 255.0};
|
||||
ncnn::Mat ncnn_in2 = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, bgr.cols, bgr.rows, target_width, target_height);
|
||||
ncnn::Mat ncnn_in1 = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, bgr.cols, bgr.rows, target_width * downsample_ratio, target_height * downsample_ratio);
|
||||
|
||||
ncnn_in1.substract_mean_normalize(mean_vals1, norm_vals1);
|
||||
ncnn_in2.substract_mean_normalize(mean_vals2, norm_vals2);
|
||||
|
||||
ex.input("src1", ncnn_in1);
|
||||
ex.input("src2", ncnn_in2);
|
||||
ex.input("r1i", r1i);
|
||||
ex.input("r2i", r2i);
|
||||
ex.input("r3i", r3i);
|
||||
ex.input("r4i", r4i);
|
||||
|
||||
//if use video matting,these output will be input of next infer
|
||||
ex.extract("r4o", r4i);
|
||||
ex.extract("r3o", r3i);
|
||||
ex.extract("r2o", r2i);
|
||||
ex.extract("r1o", r1i);
|
||||
|
||||
ncnn::Mat pha_;
|
||||
ex.extract("pha", pha_);
|
||||
ncnn::Mat fgr_;
|
||||
ex.extract("fgr", fgr_);
|
||||
|
||||
cv::Mat cv_pha = cv::Mat(pha_.h, pha_.w, CV_32FC1, (float*)pha_.data);
|
||||
cv::Mat cv_fgr = cv::Mat(fgr_.h, fgr_.w, CV_32FC3);
|
||||
float* fgr_data = (float*)fgr_.data;
|
||||
for (int i = 0; i < fgr_.h; i++)
|
||||
{
|
||||
for (int j = 0; j < fgr_.w; j++)
|
||||
{
|
||||
cv_fgr.at<cv::Vec3f>(i, j)[2] = fgr_data[0 * fgr_.h * fgr_.w + i * fgr_.w + j];
|
||||
cv_fgr.at<cv::Vec3f>(i, j)[1] = fgr_data[1 * fgr_.h * fgr_.w + i * fgr_.w + j];
|
||||
cv_fgr.at<cv::Vec3f>(i, j)[0] = fgr_data[2 * fgr_.h * fgr_.w + i * fgr_.w + j];
|
||||
}
|
||||
}
|
||||
|
||||
cv_pha.copyTo(pha);
|
||||
cv_fgr.copyTo(fgr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
cv::Mat fgr, pha;
|
||||
detect_rvm(m, pha, fgr);
|
||||
draw_objects(m, fgr, pha);
|
||||
|
||||
return 0;
|
||||
}
|
434
3rdparty/ncnn/examples/scrfd.cpp
vendored
Normal file
434
3rdparty/ncnn/examples/scrfd.cpp
vendored
Normal file
@ -0,0 +1,434 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "net.h"
|
||||
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
struct FaceObject
|
||||
{
|
||||
cv::Rect_<float> rect;
|
||||
float prob;
|
||||
};
|
||||
|
||||
static inline float intersection_area(const FaceObject& a, const FaceObject& b)
|
||||
{
|
||||
cv::Rect_<float> inter = a.rect & b.rect;
|
||||
return inter.area();
|
||||
}
|
||||
|
||||
static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects, int left, int right)
|
||||
{
|
||||
int i = left;
|
||||
int j = right;
|
||||
float p = faceobjects[(left + right) / 2].prob;
|
||||
|
||||
while (i <= j)
|
||||
{
|
||||
while (faceobjects[i].prob > p)
|
||||
i++;
|
||||
|
||||
while (faceobjects[j].prob < p)
|
||||
j--;
|
||||
|
||||
if (i <= j)
|
||||
{
|
||||
// swap
|
||||
std::swap(faceobjects[i], faceobjects[j]);
|
||||
|
||||
i++;
|
||||
j--;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp parallel sections
|
||||
{
|
||||
#pragma omp section
|
||||
{
|
||||
if (left < j) qsort_descent_inplace(faceobjects, left, j);
|
||||
}
|
||||
#pragma omp section
|
||||
{
|
||||
if (i < right) qsort_descent_inplace(faceobjects, i, right);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects)
|
||||
{
|
||||
if (faceobjects.empty())
|
||||
return;
|
||||
|
||||
qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
|
||||
}
|
||||
|
||||
static void nms_sorted_bboxes(const std::vector<FaceObject>& faceobjects, std::vector<int>& picked, float nms_threshold)
|
||||
{
|
||||
picked.clear();
|
||||
|
||||
const int n = faceobjects.size();
|
||||
|
||||
std::vector<float> areas(n);
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
areas[i] = faceobjects[i].rect.area();
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
const FaceObject& a = faceobjects[i];
|
||||
|
||||
int keep = 1;
|
||||
for (int j = 0; j < (int)picked.size(); j++)
|
||||
{
|
||||
const FaceObject& b = faceobjects[picked[j]];
|
||||
|
||||
// intersection over union
|
||||
float inter_area = intersection_area(a, b);
|
||||
float union_area = areas[i] + areas[picked[j]] - inter_area;
|
||||
// float IoU = inter_area / union_area
|
||||
if (inter_area / union_area > nms_threshold)
|
||||
keep = 0;
|
||||
}
|
||||
|
||||
if (keep)
|
||||
picked.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
// insightface/detection/scrfd/mmdet/core/anchor/anchor_generator.py gen_single_level_base_anchors()
|
||||
static ncnn::Mat generate_anchors(int base_size, const ncnn::Mat& ratios, const ncnn::Mat& scales)
|
||||
{
|
||||
int num_ratio = ratios.w;
|
||||
int num_scale = scales.w;
|
||||
|
||||
ncnn::Mat anchors;
|
||||
anchors.create(4, num_ratio * num_scale);
|
||||
|
||||
const float cx = 0;
|
||||
const float cy = 0;
|
||||
|
||||
for (int i = 0; i < num_ratio; i++)
|
||||
{
|
||||
float ar = ratios[i];
|
||||
|
||||
int r_w = round(base_size / sqrt(ar));
|
||||
int r_h = round(r_w * ar); //round(base_size * sqrt(ar));
|
||||
|
||||
for (int j = 0; j < num_scale; j++)
|
||||
{
|
||||
float scale = scales[j];
|
||||
|
||||
float rs_w = r_w * scale;
|
||||
float rs_h = r_h * scale;
|
||||
|
||||
float* anchor = anchors.row(i * num_scale + j);
|
||||
|
||||
anchor[0] = cx - rs_w * 0.5f;
|
||||
anchor[1] = cy - rs_h * 0.5f;
|
||||
anchor[2] = cx + rs_w * 0.5f;
|
||||
anchor[3] = cy + rs_h * 0.5f;
|
||||
}
|
||||
}
|
||||
|
||||
return anchors;
|
||||
}
|
||||
|
||||
static void generate_proposals(const ncnn::Mat& anchors, int feat_stride, const ncnn::Mat& score_blob, const ncnn::Mat& bbox_blob, float prob_threshold, std::vector<FaceObject>& faceobjects)
|
||||
{
|
||||
int w = score_blob.w;
|
||||
int h = score_blob.h;
|
||||
|
||||
// generate face proposal from bbox deltas and shifted anchors
|
||||
const int num_anchors = anchors.h;
|
||||
|
||||
for (int q = 0; q < num_anchors; q++)
|
||||
{
|
||||
const float* anchor = anchors.row(q);
|
||||
|
||||
const ncnn::Mat score = score_blob.channel(q);
|
||||
const ncnn::Mat bbox = bbox_blob.channel_range(q * 4, 4);
|
||||
|
||||
// shifted anchor
|
||||
float anchor_y = anchor[1];
|
||||
|
||||
float anchor_w = anchor[2] - anchor[0];
|
||||
float anchor_h = anchor[3] - anchor[1];
|
||||
|
||||
for (int i = 0; i < h; i++)
|
||||
{
|
||||
float anchor_x = anchor[0];
|
||||
|
||||
for (int j = 0; j < w; j++)
|
||||
{
|
||||
int index = i * w + j;
|
||||
|
||||
float prob = score[index];
|
||||
|
||||
if (prob >= prob_threshold)
|
||||
{
|
||||
// insightface/detection/scrfd/mmdet/models/dense_heads/scrfd_head.py _get_bboxes_single()
|
||||
float dx = bbox.channel(0)[index] * feat_stride;
|
||||
float dy = bbox.channel(1)[index] * feat_stride;
|
||||
float dw = bbox.channel(2)[index] * feat_stride;
|
||||
float dh = bbox.channel(3)[index] * feat_stride;
|
||||
|
||||
// insightface/detection/scrfd/mmdet/core/bbox/transforms.py distance2bbox()
|
||||
float cx = anchor_x + anchor_w * 0.5f;
|
||||
float cy = anchor_y + anchor_h * 0.5f;
|
||||
|
||||
float x0 = cx - dx;
|
||||
float y0 = cy - dy;
|
||||
float x1 = cx + dw;
|
||||
float y1 = cy + dh;
|
||||
|
||||
FaceObject obj;
|
||||
obj.rect.x = x0;
|
||||
obj.rect.y = y0;
|
||||
obj.rect.width = x1 - x0 + 1;
|
||||
obj.rect.height = y1 - y0 + 1;
|
||||
obj.prob = prob;
|
||||
|
||||
faceobjects.push_back(obj);
|
||||
}
|
||||
|
||||
anchor_x += feat_stride;
|
||||
}
|
||||
|
||||
anchor_y += feat_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int detect_scrfd(const cv::Mat& bgr, std::vector<FaceObject>& faceobjects)
|
||||
{
|
||||
ncnn::Net scrfd;
|
||||
|
||||
scrfd.opt.use_vulkan_compute = true;
|
||||
|
||||
// model is converted from
|
||||
// https://github.com/deepinsight/insightface/tree/master/detection/scrfd
|
||||
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
|
||||
scrfd.load_param("scrfd_500m-opt2.param");
|
||||
scrfd.load_model("scrfd_500m-opt2.bin");
|
||||
|
||||
int width = bgr.cols;
|
||||
int height = bgr.rows;
|
||||
|
||||
// insightface/detection/scrfd/configs/scrfd/scrfd_500m.py
|
||||
const int target_size = 640;
|
||||
const float prob_threshold = 0.3f;
|
||||
const float nms_threshold = 0.45f;
|
||||
|
||||
// pad to multiple of 32
|
||||
int w = width;
|
||||
int h = height;
|
||||
float scale = 1.f;
|
||||
if (w > h)
|
||||
{
|
||||
scale = (float)target_size / w;
|
||||
w = target_size;
|
||||
h = h * scale;
|
||||
}
|
||||
else
|
||||
{
|
||||
scale = (float)target_size / h;
|
||||
h = target_size;
|
||||
w = w * scale;
|
||||
}
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, width, height, w, h);
|
||||
|
||||
// pad to target_size rectangle
|
||||
int wpad = (w + 31) / 32 * 32 - w;
|
||||
int hpad = (h + 31) / 32 * 32 - h;
|
||||
ncnn::Mat in_pad;
|
||||
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
|
||||
|
||||
const float mean_vals[3] = {127.5f, 127.5f, 127.5f};
|
||||
const float norm_vals[3] = {1 / 128.f, 1 / 128.f, 1 / 128.f};
|
||||
in_pad.substract_mean_normalize(mean_vals, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = scrfd.create_extractor();
|
||||
|
||||
ex.input("input.1", in_pad);
|
||||
|
||||
std::vector<FaceObject> faceproposals;
|
||||
|
||||
// stride 32
|
||||
{
|
||||
ncnn::Mat score_blob, bbox_blob;
|
||||
ex.extract("412", score_blob);
|
||||
ex.extract("415", bbox_blob);
|
||||
|
||||
const int base_size = 16;
|
||||
const int feat_stride = 8;
|
||||
ncnn::Mat ratios(1);
|
||||
ratios[0] = 1.f;
|
||||
ncnn::Mat scales(2);
|
||||
scales[0] = 1.f;
|
||||
scales[1] = 2.f;
|
||||
ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);
|
||||
|
||||
std::vector<FaceObject> faceobjects32;
|
||||
generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects32);
|
||||
|
||||
faceproposals.insert(faceproposals.end(), faceobjects32.begin(), faceobjects32.end());
|
||||
}
|
||||
|
||||
// stride 16
|
||||
{
|
||||
ncnn::Mat score_blob, bbox_blob;
|
||||
ex.extract("474", score_blob);
|
||||
ex.extract("477", bbox_blob);
|
||||
|
||||
const int base_size = 64;
|
||||
const int feat_stride = 16;
|
||||
ncnn::Mat ratios(1);
|
||||
ratios[0] = 1.f;
|
||||
ncnn::Mat scales(2);
|
||||
scales[0] = 1.f;
|
||||
scales[1] = 2.f;
|
||||
ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);
|
||||
|
||||
std::vector<FaceObject> faceobjects16;
|
||||
generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects16);
|
||||
|
||||
faceproposals.insert(faceproposals.end(), faceobjects16.begin(), faceobjects16.end());
|
||||
}
|
||||
|
||||
// stride 8
|
||||
{
|
||||
ncnn::Mat score_blob, bbox_blob;
|
||||
ex.extract("536", score_blob);
|
||||
ex.extract("539", bbox_blob);
|
||||
|
||||
const int base_size = 256;
|
||||
const int feat_stride = 32;
|
||||
ncnn::Mat ratios(1);
|
||||
ratios[0] = 1.f;
|
||||
ncnn::Mat scales(2);
|
||||
scales[0] = 1.f;
|
||||
scales[1] = 2.f;
|
||||
ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);
|
||||
|
||||
std::vector<FaceObject> faceobjects8;
|
||||
generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects8);
|
||||
|
||||
faceproposals.insert(faceproposals.end(), faceobjects8.begin(), faceobjects8.end());
|
||||
}
|
||||
|
||||
// sort all proposals by score from highest to lowest
|
||||
qsort_descent_inplace(faceproposals);
|
||||
|
||||
// apply nms with nms_threshold
|
||||
std::vector<int> picked;
|
||||
nms_sorted_bboxes(faceproposals, picked, nms_threshold);
|
||||
|
||||
int face_count = picked.size();
|
||||
|
||||
faceobjects.resize(face_count);
|
||||
for (int i = 0; i < face_count; i++)
|
||||
{
|
||||
faceobjects[i] = faceproposals[picked[i]];
|
||||
|
||||
// adjust offset to original unpadded
|
||||
float x0 = (faceobjects[i].rect.x - (wpad / 2)) / scale;
|
||||
float y0 = (faceobjects[i].rect.y - (hpad / 2)) / scale;
|
||||
float x1 = (faceobjects[i].rect.x + faceobjects[i].rect.width - (wpad / 2)) / scale;
|
||||
float y1 = (faceobjects[i].rect.y + faceobjects[i].rect.height - (hpad / 2)) / scale;
|
||||
|
||||
x0 = std::max(std::min(x0, (float)width - 1), 0.f);
|
||||
y0 = std::max(std::min(y0, (float)height - 1), 0.f);
|
||||
x1 = std::max(std::min(x1, (float)width - 1), 0.f);
|
||||
y1 = std::max(std::min(y1, (float)height - 1), 0.f);
|
||||
|
||||
faceobjects[i].rect.x = x0;
|
||||
faceobjects[i].rect.y = y0;
|
||||
faceobjects[i].rect.width = x1 - x0;
|
||||
faceobjects[i].rect.height = y1 - y0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void draw_faceobjects(const cv::Mat& bgr, const std::vector<FaceObject>& faceobjects)
|
||||
{
|
||||
cv::Mat image = bgr.clone();
|
||||
|
||||
for (size_t i = 0; i < faceobjects.size(); i++)
|
||||
{
|
||||
const FaceObject& obj = faceobjects[i];
|
||||
|
||||
fprintf(stderr, "%.5f at %.2f %.2f %.2f x %.2f\n", obj.prob,
|
||||
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
|
||||
|
||||
cv::rectangle(image, obj.rect, cv::Scalar(0, 255, 0));
|
||||
|
||||
char text[256];
|
||||
sprintf(text, "%.1f%%", obj.prob * 100);
|
||||
|
||||
int baseLine = 0;
|
||||
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||
|
||||
int x = obj.rect.x;
|
||||
int y = obj.rect.y - label_size.height - baseLine;
|
||||
if (y < 0)
|
||||
y = 0;
|
||||
if (x + label_size.width > image.cols)
|
||||
x = image.cols - label_size.width;
|
||||
|
||||
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
|
||||
cv::Scalar(255, 255, 255), -1);
|
||||
|
||||
cv::putText(image, text, cv::Point(x, y + label_size.height),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
|
||||
cv::imshow("image", image);
|
||||
cv::waitKey(0);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<FaceObject> faceobjects;
|
||||
detect_scrfd(m, faceobjects);
|
||||
|
||||
draw_faceobjects(m, faceobjects);
|
||||
|
||||
return 0;
|
||||
}
|
471
3rdparty/ncnn/examples/scrfd_crowdhuman.cpp
vendored
Normal file
471
3rdparty/ncnn/examples/scrfd_crowdhuman.cpp
vendored
Normal file
@ -0,0 +1,471 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "net.h"
|
||||
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
struct FaceObject
|
||||
{
|
||||
cv::Rect_<float> rect;
|
||||
float prob;
|
||||
};
|
||||
|
||||
static inline float intersection_area(const FaceObject& a, const FaceObject& b)
|
||||
{
|
||||
cv::Rect_<float> inter = a.rect & b.rect;
|
||||
return inter.area();
|
||||
}
|
||||
|
||||
static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects, int left, int right)
|
||||
{
|
||||
int i = left;
|
||||
int j = right;
|
||||
float p = faceobjects[(left + right) / 2].prob;
|
||||
|
||||
while (i <= j)
|
||||
{
|
||||
while (faceobjects[i].prob > p)
|
||||
i++;
|
||||
|
||||
while (faceobjects[j].prob < p)
|
||||
j--;
|
||||
|
||||
if (i <= j)
|
||||
{
|
||||
// swap
|
||||
std::swap(faceobjects[i], faceobjects[j]);
|
||||
|
||||
i++;
|
||||
j--;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp parallel sections
|
||||
{
|
||||
#pragma omp section
|
||||
{
|
||||
if (left < j) qsort_descent_inplace(faceobjects, left, j);
|
||||
}
|
||||
#pragma omp section
|
||||
{
|
||||
if (i < right) qsort_descent_inplace(faceobjects, i, right);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects)
|
||||
{
|
||||
if (faceobjects.empty())
|
||||
return;
|
||||
|
||||
qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
|
||||
}
|
||||
|
||||
static void nms_sorted_bboxes(const std::vector<FaceObject>& faceobjects, std::vector<int>& picked, float nms_threshold)
|
||||
{
|
||||
picked.clear();
|
||||
|
||||
const int n = faceobjects.size();
|
||||
|
||||
std::vector<float> areas(n);
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
areas[i] = faceobjects[i].rect.area();
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
const FaceObject& a = faceobjects[i];
|
||||
|
||||
int keep = 1;
|
||||
for (int j = 0; j < (int)picked.size(); j++)
|
||||
{
|
||||
const FaceObject& b = faceobjects[picked[j]];
|
||||
|
||||
// intersection over union
|
||||
float inter_area = intersection_area(a, b);
|
||||
float union_area = areas[i] + areas[picked[j]] - inter_area;
|
||||
// float IoU = inter_area / union_area
|
||||
if (inter_area / union_area > nms_threshold)
|
||||
keep = 0;
|
||||
}
|
||||
|
||||
if (keep)
|
||||
picked.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
// insightface/detection/scrfd/mmdet/core/anchor/anchor_generator.py gen_single_level_base_anchors()
|
||||
static ncnn::Mat generate_anchors(int base_size, const ncnn::Mat& ratios, const ncnn::Mat& scales)
|
||||
{
|
||||
int num_ratio = ratios.w;
|
||||
int num_scale = scales.w;
|
||||
|
||||
ncnn::Mat anchors;
|
||||
anchors.create(4, num_ratio * num_scale);
|
||||
|
||||
const float cx = 0;
|
||||
const float cy = 0;
|
||||
|
||||
for (int i = 0; i < num_ratio; i++)
|
||||
{
|
||||
float ar = ratios[i];
|
||||
|
||||
int r_w = round(base_size / sqrt(ar));
|
||||
int r_h = round(r_w * ar); //round(base_size * sqrt(ar));
|
||||
|
||||
for (int j = 0; j < num_scale; j++)
|
||||
{
|
||||
float scale = scales[j];
|
||||
|
||||
float rs_w = r_w * scale;
|
||||
float rs_h = r_h * scale;
|
||||
|
||||
float* anchor = anchors.row(i * num_scale + j);
|
||||
|
||||
anchor[0] = cx - rs_w * 0.5f;
|
||||
anchor[1] = cy - rs_h * 0.5f;
|
||||
anchor[2] = cx + rs_w * 0.5f;
|
||||
anchor[3] = cy + rs_h * 0.5f;
|
||||
}
|
||||
}
|
||||
|
||||
return anchors;
|
||||
}
|
||||
|
||||
static void generate_proposals(const ncnn::Mat& anchors, int feat_stride, const ncnn::Mat& score_blob, const ncnn::Mat& bbox_blob, float prob_threshold, std::vector<FaceObject>& faceobjects)
|
||||
{
|
||||
int w = score_blob.w;
|
||||
int h = score_blob.h;
|
||||
|
||||
// generate face proposal from bbox deltas and shifted anchors
|
||||
const int num_anchors = anchors.h;
|
||||
|
||||
for (int q = 0; q < num_anchors; q++)
|
||||
{
|
||||
const float* anchor = anchors.row(q);
|
||||
|
||||
const ncnn::Mat score = score_blob.channel(q);
|
||||
const ncnn::Mat bbox = bbox_blob.channel_range(q * 4, 4);
|
||||
|
||||
// shifted anchor
|
||||
float anchor_y = anchor[1];
|
||||
|
||||
float anchor_w = anchor[2] - anchor[0];
|
||||
float anchor_h = anchor[3] - anchor[1];
|
||||
|
||||
for (int i = 0; i < h; i++)
|
||||
{
|
||||
float anchor_x = anchor[0];
|
||||
|
||||
for (int j = 0; j < w; j++)
|
||||
{
|
||||
int index = i * w + j;
|
||||
|
||||
float prob = score[index];
|
||||
|
||||
if (prob >= prob_threshold)
|
||||
{
|
||||
// insightface/detection/scrfd/mmdet/models/dense_heads/scrfd_head.py _get_bboxes_single()
|
||||
float dx = bbox.channel(0)[index] * feat_stride;
|
||||
float dy = bbox.channel(1)[index] * feat_stride;
|
||||
float dw = bbox.channel(2)[index] * feat_stride;
|
||||
float dh = bbox.channel(3)[index] * feat_stride;
|
||||
|
||||
// insightface/detection/scrfd/mmdet/core/bbox/transforms.py distance2bbox()
|
||||
float cx = anchor_x + anchor_w * 0.5f;
|
||||
float cy = anchor_y + anchor_h * 0.5f;
|
||||
|
||||
float x0 = cx - dx;
|
||||
float y0 = cy - dy;
|
||||
float x1 = cx + dw;
|
||||
float y1 = cy + dh;
|
||||
|
||||
FaceObject obj;
|
||||
obj.rect.x = x0;
|
||||
obj.rect.y = y0;
|
||||
obj.rect.width = x1 - x0 + 1;
|
||||
obj.rect.height = y1 - y0 + 1;
|
||||
obj.prob = prob;
|
||||
|
||||
faceobjects.push_back(obj);
|
||||
}
|
||||
|
||||
anchor_x += feat_stride;
|
||||
}
|
||||
|
||||
anchor_y += feat_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int detect_scrfd(const cv::Mat& bgr, std::vector<FaceObject>& faceobjects)
|
||||
{
|
||||
ncnn::Net scrfd;
|
||||
|
||||
scrfd.opt.use_vulkan_compute = true;
|
||||
|
||||
// Insight face does not provided a trained scrfd_crowdhuman model
|
||||
// but I have one for detecing cat face, you can have a try here:
|
||||
// https://drive.google.com/file/d/1JogkKa0f_09HkENbCnXy9hRYxm35wKTn
|
||||
|
||||
scrfd.load_param("scrfd_crowdhuman.param");
|
||||
scrfd.load_model("scrfd_crowdhuman.bin");
|
||||
|
||||
int width = bgr.cols;
|
||||
int height = bgr.rows;
|
||||
|
||||
const int target_size = 640;
|
||||
const float prob_threshold = 0.3f;
|
||||
const float nms_threshold = 0.45f;
|
||||
|
||||
// pad to multiple of 32
|
||||
int w = width;
|
||||
int h = height;
|
||||
float scale = 1.f;
|
||||
if (w > h)
|
||||
{
|
||||
scale = (float)target_size / w;
|
||||
w = target_size;
|
||||
h = h * scale;
|
||||
}
|
||||
else
|
||||
{
|
||||
scale = (float)target_size / h;
|
||||
h = target_size;
|
||||
w = w * scale;
|
||||
}
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, width, height, w, h);
|
||||
|
||||
// pad to target_size rectangle
|
||||
int wpad = (w + 31) / 32 * 32 - w;
|
||||
int hpad = (h + 31) / 32 * 32 - h;
|
||||
ncnn::Mat in_pad;
|
||||
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
|
||||
|
||||
const float mean_vals[3] = {127.5f, 127.5f, 127.5f};
|
||||
const float norm_vals[3] = {1 / 128.f, 1 / 128.f, 1 / 128.f};
|
||||
in_pad.substract_mean_normalize(mean_vals, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = scrfd.create_extractor();
|
||||
|
||||
ex.input("input.1", in_pad);
|
||||
|
||||
std::vector<FaceObject> faceproposals;
|
||||
|
||||
// stride 8
|
||||
{
|
||||
ncnn::Mat score_blob, bbox_blob;
|
||||
ex.extract("490", score_blob);
|
||||
ex.extract("493", bbox_blob);
|
||||
|
||||
const int base_size = 8;
|
||||
const int feat_stride = 8;
|
||||
ncnn::Mat ratios(1);
|
||||
ratios[0] = 2.f;
|
||||
ncnn::Mat scales(1);
|
||||
scales[0] = 3.f;
|
||||
ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);
|
||||
|
||||
std::vector<FaceObject> faceobjects32;
|
||||
generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects32);
|
||||
|
||||
faceproposals.insert(faceproposals.end(), faceobjects32.begin(), faceobjects32.end());
|
||||
}
|
||||
|
||||
// stride 16
|
||||
{
|
||||
ncnn::Mat score_blob, bbox_blob;
|
||||
ex.extract("510", score_blob);
|
||||
ex.extract("513", bbox_blob);
|
||||
|
||||
const int base_size = 16;
|
||||
const int feat_stride = 16;
|
||||
ncnn::Mat ratios(1);
|
||||
ratios[0] = 2.f;
|
||||
ncnn::Mat scales(1);
|
||||
scales[0] = 3.f;
|
||||
ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);
|
||||
|
||||
std::vector<FaceObject> faceobjects16;
|
||||
generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects16);
|
||||
|
||||
faceproposals.insert(faceproposals.end(), faceobjects16.begin(), faceobjects16.end());
|
||||
}
|
||||
|
||||
// stride 32
|
||||
{
|
||||
ncnn::Mat score_blob, bbox_blob;
|
||||
ex.extract("530", score_blob);
|
||||
ex.extract("533", bbox_blob);
|
||||
|
||||
const int base_size = 32;
|
||||
const int feat_stride = 32;
|
||||
ncnn::Mat ratios(1);
|
||||
ratios[0] = 2.f;
|
||||
ncnn::Mat scales(1);
|
||||
scales[0] = 3.f;
|
||||
ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);
|
||||
|
||||
std::vector<FaceObject> faceobjects8;
|
||||
generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects8);
|
||||
|
||||
faceproposals.insert(faceproposals.end(), faceobjects8.begin(), faceobjects8.end());
|
||||
}
|
||||
|
||||
// stride 64
|
||||
{
|
||||
ncnn::Mat score_blob, bbox_blob, kps_blob;
|
||||
ex.extract("550", score_blob);
|
||||
ex.extract("553", bbox_blob);
|
||||
|
||||
const int base_size = 64;
|
||||
const int feat_stride = 64;
|
||||
ncnn::Mat ratios(1);
|
||||
ratios[0] = 2.f;
|
||||
ncnn::Mat scales(1);
|
||||
scales[0] = 3.f;
|
||||
ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);
|
||||
|
||||
std::vector<FaceObject> faceobjects8;
|
||||
generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects8);
|
||||
|
||||
faceproposals.insert(faceproposals.end(), faceobjects8.begin(), faceobjects8.end());
|
||||
}
|
||||
|
||||
// stride 128
|
||||
{
|
||||
ncnn::Mat score_blob, bbox_blob, kps_blob;
|
||||
ex.extract("570", score_blob);
|
||||
ex.extract("573", bbox_blob);
|
||||
|
||||
const int base_size = 128;
|
||||
const int feat_stride = 128;
|
||||
ncnn::Mat ratios(1);
|
||||
ratios[0] = 2.f;
|
||||
ncnn::Mat scales(1);
|
||||
scales[0] = 3.f;
|
||||
ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);
|
||||
|
||||
std::vector<FaceObject> faceobjects8;
|
||||
generate_proposals(anchors, feat_stride, score_blob, bbox_blob, prob_threshold, faceobjects8);
|
||||
|
||||
faceproposals.insert(faceproposals.end(), faceobjects8.begin(), faceobjects8.end());
|
||||
}
|
||||
|
||||
// sort all proposals by score from highest to lowest
|
||||
qsort_descent_inplace(faceproposals);
|
||||
|
||||
// apply nms with nms_threshold
|
||||
std::vector<int> picked;
|
||||
nms_sorted_bboxes(faceproposals, picked, nms_threshold);
|
||||
|
||||
int face_count = picked.size();
|
||||
|
||||
faceobjects.resize(face_count);
|
||||
for (int i = 0; i < face_count; i++)
|
||||
{
|
||||
faceobjects[i] = faceproposals[picked[i]];
|
||||
|
||||
// adjust offset to original unpadded
|
||||
float x0 = (faceobjects[i].rect.x - (wpad / 2)) / scale;
|
||||
float y0 = (faceobjects[i].rect.y - (hpad / 2)) / scale;
|
||||
float x1 = (faceobjects[i].rect.x + faceobjects[i].rect.width - (wpad / 2)) / scale;
|
||||
float y1 = (faceobjects[i].rect.y + faceobjects[i].rect.height - (hpad / 2)) / scale;
|
||||
|
||||
x0 = std::max(std::min(x0, (float)width - 1), 0.f);
|
||||
y0 = std::max(std::min(y0, (float)height - 1), 0.f);
|
||||
x1 = std::max(std::min(x1, (float)width - 1), 0.f);
|
||||
y1 = std::max(std::min(y1, (float)height - 1), 0.f);
|
||||
|
||||
faceobjects[i].rect.x = x0;
|
||||
faceobjects[i].rect.y = y0;
|
||||
faceobjects[i].rect.width = x1 - x0;
|
||||
faceobjects[i].rect.height = y1 - y0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void draw_faceobjects(const cv::Mat& bgr, const std::vector<FaceObject>& faceobjects)
|
||||
{
|
||||
cv::Mat image = bgr.clone();
|
||||
|
||||
for (size_t i = 0; i < faceobjects.size(); i++)
|
||||
{
|
||||
const FaceObject& obj = faceobjects[i];
|
||||
|
||||
fprintf(stderr, "%.5f at %.2f %.2f %.2f x %.2f\n", obj.prob,
|
||||
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
|
||||
|
||||
cv::rectangle(image, obj.rect, cv::Scalar(0, 255, 0));
|
||||
|
||||
char text[256];
|
||||
sprintf(text, "%.1f%%", obj.prob * 100);
|
||||
|
||||
int baseLine = 0;
|
||||
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||
|
||||
int x = obj.rect.x;
|
||||
int y = obj.rect.y - label_size.height - baseLine;
|
||||
if (y < 0)
|
||||
y = 0;
|
||||
if (x + label_size.width > image.cols)
|
||||
x = image.cols - label_size.width;
|
||||
|
||||
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
|
||||
cv::Scalar(255, 255, 255), -1);
|
||||
|
||||
cv::putText(image, text, cv::Point(x, y + label_size.height),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
|
||||
cv::imshow("image", image);
|
||||
cv::waitKey(0);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<FaceObject> faceobjects;
|
||||
detect_scrfd(m, faceobjects);
|
||||
|
||||
draw_faceobjects(m, faceobjects);
|
||||
|
||||
return 0;
|
||||
}
|
123
3rdparty/ncnn/examples/shufflenetv2.cpp
vendored
Normal file
123
3rdparty/ncnn/examples/shufflenetv2.cpp
vendored
Normal file
@ -0,0 +1,123 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "net.h"
|
||||
|
||||
#include <algorithm>
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
static int detect_shufflenetv2(const cv::Mat& bgr, std::vector<float>& cls_scores)
|
||||
{
|
||||
ncnn::Net shufflenetv2;
|
||||
|
||||
shufflenetv2.opt.use_vulkan_compute = true;
|
||||
|
||||
// https://github.com/miaow1988/ShuffleNet_V2_pytorch_caffe
|
||||
// models can be downloaded from https://github.com/miaow1988/ShuffleNet_V2_pytorch_caffe/releases
|
||||
shufflenetv2.load_param("shufflenet_v2_x0.5.param");
|
||||
shufflenetv2.load_model("shufflenet_v2_x0.5.bin");
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, 224, 224);
|
||||
|
||||
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
|
||||
in.substract_mean_normalize(0, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = shufflenetv2.create_extractor();
|
||||
|
||||
ex.input("data", in);
|
||||
|
||||
ncnn::Mat out;
|
||||
ex.extract("fc", out);
|
||||
|
||||
// manually call softmax on the fc output
|
||||
// convert result into probability
|
||||
// skip if your model already has softmax operation
|
||||
{
|
||||
ncnn::Layer* softmax = ncnn::create_layer("Softmax");
|
||||
|
||||
ncnn::ParamDict pd;
|
||||
softmax->load_param(pd);
|
||||
|
||||
softmax->forward_inplace(out, shufflenetv2.opt);
|
||||
|
||||
delete softmax;
|
||||
}
|
||||
|
||||
out = out.reshape(out.w * out.h * out.c);
|
||||
|
||||
cls_scores.resize(out.w);
|
||||
for (int j = 0; j < out.w; j++)
|
||||
{
|
||||
cls_scores[j] = out[j];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int print_topk(const std::vector<float>& cls_scores, int topk)
|
||||
{
|
||||
// partial sort topk with index
|
||||
int size = cls_scores.size();
|
||||
std::vector<std::pair<float, int> > vec;
|
||||
vec.resize(size);
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
vec[i] = std::make_pair(cls_scores[i], i);
|
||||
}
|
||||
|
||||
std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
|
||||
std::greater<std::pair<float, int> >());
|
||||
|
||||
// print topk and score
|
||||
for (int i = 0; i < topk; i++)
|
||||
{
|
||||
float score = vec[i].first;
|
||||
int index = vec[i].second;
|
||||
fprintf(stderr, "%d = %f\n", index, score);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<float> cls_scores;
|
||||
detect_shufflenetv2(m, cls_scores);
|
||||
|
||||
print_topk(cls_scores, 3);
|
||||
|
||||
return 0;
|
||||
}
|
165
3rdparty/ncnn/examples/simplepose.cpp
vendored
Normal file
165
3rdparty/ncnn/examples/simplepose.cpp
vendored
Normal file
@ -0,0 +1,165 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "net.h"
|
||||
|
||||
#include <algorithm>
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
struct KeyPoint
|
||||
{
|
||||
cv::Point2f p;
|
||||
float prob;
|
||||
};
|
||||
|
||||
static int detect_posenet(const cv::Mat& bgr, std::vector<KeyPoint>& keypoints)
|
||||
{
|
||||
ncnn::Net posenet;
|
||||
|
||||
posenet.opt.use_vulkan_compute = true;
|
||||
|
||||
// the simple baseline human pose estimation from gluon-cv
|
||||
// https://gluon-cv.mxnet.io/build/examples_pose/demo_simple_pose.html
|
||||
// mxnet model exported via
|
||||
// pose_net.hybridize()
|
||||
// pose_net.export('pose')
|
||||
// then mxnet2ncnn
|
||||
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
|
||||
posenet.load_param("pose.param");
|
||||
posenet.load_model("pose.bin");
|
||||
|
||||
int w = bgr.cols;
|
||||
int h = bgr.rows;
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, w, h, 192, 256);
|
||||
|
||||
// transforms.ToTensor(),
|
||||
// transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
|
||||
// R' = (R / 255 - 0.485) / 0.229 = (R - 0.485 * 255) / 0.229 / 255
|
||||
// G' = (G / 255 - 0.456) / 0.224 = (G - 0.456 * 255) / 0.224 / 255
|
||||
// B' = (B / 255 - 0.406) / 0.225 = (B - 0.406 * 255) / 0.225 / 255
|
||||
const float mean_vals[3] = {0.485f * 255.f, 0.456f * 255.f, 0.406f * 255.f};
|
||||
const float norm_vals[3] = {1 / 0.229f / 255.f, 1 / 0.224f / 255.f, 1 / 0.225f / 255.f};
|
||||
in.substract_mean_normalize(mean_vals, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = posenet.create_extractor();
|
||||
|
||||
ex.input("data", in);
|
||||
|
||||
ncnn::Mat out;
|
||||
ex.extract("conv3_fwd", out);
|
||||
|
||||
// resolve point from heatmap
|
||||
keypoints.clear();
|
||||
for (int p = 0; p < out.c; p++)
|
||||
{
|
||||
const ncnn::Mat m = out.channel(p);
|
||||
|
||||
float max_prob = 0.f;
|
||||
int max_x = 0;
|
||||
int max_y = 0;
|
||||
for (int y = 0; y < out.h; y++)
|
||||
{
|
||||
const float* ptr = m.row(y);
|
||||
for (int x = 0; x < out.w; x++)
|
||||
{
|
||||
float prob = ptr[x];
|
||||
if (prob > max_prob)
|
||||
{
|
||||
max_prob = prob;
|
||||
max_x = x;
|
||||
max_y = y;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
KeyPoint keypoint;
|
||||
keypoint.p = cv::Point2f(max_x * w / (float)out.w, max_y * h / (float)out.h);
|
||||
keypoint.prob = max_prob;
|
||||
|
||||
keypoints.push_back(keypoint);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void draw_pose(const cv::Mat& bgr, const std::vector<KeyPoint>& keypoints)
|
||||
{
|
||||
cv::Mat image = bgr.clone();
|
||||
|
||||
// draw bone
|
||||
static const int joint_pairs[16][2] = {
|
||||
{0, 1}, {1, 3}, {0, 2}, {2, 4}, {5, 6}, {5, 7}, {7, 9}, {6, 8}, {8, 10}, {5, 11}, {6, 12}, {11, 12}, {11, 13}, {12, 14}, {13, 15}, {14, 16}
|
||||
};
|
||||
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
const KeyPoint& p1 = keypoints[joint_pairs[i][0]];
|
||||
const KeyPoint& p2 = keypoints[joint_pairs[i][1]];
|
||||
|
||||
if (p1.prob < 0.2f || p2.prob < 0.2f)
|
||||
continue;
|
||||
|
||||
cv::line(image, p1.p, p2.p, cv::Scalar(255, 0, 0), 2);
|
||||
}
|
||||
|
||||
// draw joint
|
||||
for (size_t i = 0; i < keypoints.size(); i++)
|
||||
{
|
||||
const KeyPoint& keypoint = keypoints[i];
|
||||
|
||||
fprintf(stderr, "%.2f %.2f = %.5f\n", keypoint.p.x, keypoint.p.y, keypoint.prob);
|
||||
|
||||
if (keypoint.prob < 0.2f)
|
||||
continue;
|
||||
|
||||
cv::circle(image, keypoint.p, 3, cv::Scalar(0, 255, 0), -1);
|
||||
}
|
||||
|
||||
cv::imshow("image", image);
|
||||
cv::waitKey(0);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<KeyPoint> keypoints;
|
||||
detect_posenet(m, keypoints);
|
||||
|
||||
draw_pose(m, keypoints);
|
||||
|
||||
return 0;
|
||||
}
|
1
3rdparty/ncnn/examples/squeezencnn/README.md
vendored
Normal file
1
3rdparty/ncnn/examples/squeezencnn/README.md
vendored
Normal file
@ -0,0 +1 @@
|
||||
The squeezenet android example project has been moved to https://github.com/nihui/ncnn-android-squeezenet
|
106
3rdparty/ncnn/examples/squeezenet.cpp
vendored
Normal file
106
3rdparty/ncnn/examples/squeezenet.cpp
vendored
Normal file
@ -0,0 +1,106 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "net.h"
|
||||
|
||||
#include <algorithm>
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
static int detect_squeezenet(const cv::Mat& bgr, std::vector<float>& cls_scores)
|
||||
{
|
||||
ncnn::Net squeezenet;
|
||||
|
||||
squeezenet.opt.use_vulkan_compute = true;
|
||||
|
||||
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
|
||||
squeezenet.load_param("squeezenet_v1.1.param");
|
||||
squeezenet.load_model("squeezenet_v1.1.bin");
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, 227, 227);
|
||||
|
||||
const float mean_vals[3] = {104.f, 117.f, 123.f};
|
||||
in.substract_mean_normalize(mean_vals, 0);
|
||||
|
||||
ncnn::Extractor ex = squeezenet.create_extractor();
|
||||
|
||||
ex.input("data", in);
|
||||
|
||||
ncnn::Mat out;
|
||||
ex.extract("prob", out);
|
||||
|
||||
cls_scores.resize(out.w);
|
||||
for (int j = 0; j < out.w; j++)
|
||||
{
|
||||
cls_scores[j] = out[j];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int print_topk(const std::vector<float>& cls_scores, int topk)
|
||||
{
|
||||
// partial sort topk with index
|
||||
int size = cls_scores.size();
|
||||
std::vector<std::pair<float, int> > vec;
|
||||
vec.resize(size);
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
vec[i] = std::make_pair(cls_scores[i], i);
|
||||
}
|
||||
|
||||
std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
|
||||
std::greater<std::pair<float, int> >());
|
||||
|
||||
// print topk and score
|
||||
for (int i = 0; i < topk; i++)
|
||||
{
|
||||
float score = vec[i].first;
|
||||
int index = vec[i].second;
|
||||
fprintf(stderr, "%d = %f\n", index, score);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<float> cls_scores;
|
||||
detect_squeezenet(m, cls_scores);
|
||||
|
||||
print_topk(cls_scores, 3);
|
||||
|
||||
return 0;
|
||||
}
|
121
3rdparty/ncnn/examples/squeezenet_c_api.cpp
vendored
Normal file
121
3rdparty/ncnn/examples/squeezenet_c_api.cpp
vendored
Normal file
@ -0,0 +1,121 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "c_api.h"
|
||||
|
||||
#include <algorithm>
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
static int detect_squeezenet(const cv::Mat& bgr, std::vector<float>& cls_scores)
|
||||
{
|
||||
ncnn_net_t squeezenet = ncnn_net_create();
|
||||
|
||||
ncnn_option_t opt = ncnn_option_create();
|
||||
ncnn_option_set_use_vulkan_compute(opt, 1);
|
||||
|
||||
ncnn_net_set_option(squeezenet, opt);
|
||||
|
||||
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
|
||||
ncnn_net_load_param(squeezenet, "squeezenet_v1.1.param");
|
||||
ncnn_net_load_model(squeezenet, "squeezenet_v1.1.bin");
|
||||
|
||||
ncnn_mat_t in = ncnn_mat_from_pixels_resize(bgr.data, NCNN_MAT_PIXEL_BGR, bgr.cols, bgr.rows, bgr.cols * 3, 227, 227, NULL);
|
||||
|
||||
const float mean_vals[3] = {104.f, 117.f, 123.f};
|
||||
ncnn_mat_substract_mean_normalize(in, mean_vals, 0);
|
||||
|
||||
ncnn_extractor_t ex = ncnn_extractor_create(squeezenet);
|
||||
|
||||
ncnn_extractor_input(ex, "data", in);
|
||||
|
||||
ncnn_mat_t out;
|
||||
ncnn_extractor_extract(ex, "prob", &out);
|
||||
|
||||
const int out_w = ncnn_mat_get_w(out);
|
||||
const float* out_data = (const float*)ncnn_mat_get_data(out);
|
||||
|
||||
cls_scores.resize(out_w);
|
||||
for (int j = 0; j < out_w; j++)
|
||||
{
|
||||
cls_scores[j] = out_data[j];
|
||||
}
|
||||
|
||||
ncnn_mat_destroy(in);
|
||||
ncnn_mat_destroy(out);
|
||||
|
||||
ncnn_extractor_destroy(ex);
|
||||
|
||||
ncnn_option_destroy(opt);
|
||||
|
||||
ncnn_net_destroy(squeezenet);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int print_topk(const std::vector<float>& cls_scores, int topk)
|
||||
{
|
||||
// partial sort topk with index
|
||||
int size = cls_scores.size();
|
||||
std::vector<std::pair<float, int> > vec;
|
||||
vec.resize(size);
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
vec[i] = std::make_pair(cls_scores[i], i);
|
||||
}
|
||||
|
||||
std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
|
||||
std::greater<std::pair<float, int> >());
|
||||
|
||||
// print topk and score
|
||||
for (int i = 0; i < topk; i++)
|
||||
{
|
||||
float score = vec[i].first;
|
||||
int index = vec[i].second;
|
||||
fprintf(stderr, "%d = %f\n", index, score);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<float> cls_scores;
|
||||
detect_squeezenet(m, cls_scores);
|
||||
|
||||
print_topk(cls_scores, 3);
|
||||
|
||||
return 0;
|
||||
}
|
BIN
3rdparty/ncnn/examples/squeezenet_v1.1.bin
vendored
Normal file
BIN
3rdparty/ncnn/examples/squeezenet_v1.1.bin
vendored
Normal file
Binary file not shown.
BIN
3rdparty/ncnn/examples/squeezenet_v1.1.caffemodel
vendored
Normal file
BIN
3rdparty/ncnn/examples/squeezenet_v1.1.caffemodel
vendored
Normal file
Binary file not shown.
77
3rdparty/ncnn/examples/squeezenet_v1.1.param
vendored
Normal file
77
3rdparty/ncnn/examples/squeezenet_v1.1.param
vendored
Normal file
@ -0,0 +1,77 @@
|
||||
7767517
|
||||
75 83
|
||||
Input data 0 1 data 0=227 1=227 2=3
|
||||
Convolution conv1 1 1 data conv1 0=64 1=3 2=1 3=2 4=0 5=1 6=1728
|
||||
ReLU relu_conv1 1 1 conv1 conv1_relu_conv1 0=0.000000
|
||||
Pooling pool1 1 1 conv1_relu_conv1 pool1 0=0 1=3 2=2 3=0 4=0
|
||||
Convolution fire2/squeeze1x1 1 1 pool1 fire2/squeeze1x1 0=16 1=1 2=1 3=1 4=0 5=1 6=1024
|
||||
ReLU fire2/relu_squeeze1x1 1 1 fire2/squeeze1x1 fire2/squeeze1x1_fire2/relu_squeeze1x1 0=0.000000
|
||||
Split splitncnn_0 1 2 fire2/squeeze1x1_fire2/relu_squeeze1x1 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_0 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire2/expand1x1 1 1 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_1 fire2/expand1x1 0=64 1=1 2=1 3=1 4=0 5=1 6=1024
|
||||
ReLU fire2/relu_expand1x1 1 1 fire2/expand1x1 fire2/expand1x1_fire2/relu_expand1x1 0=0.000000
|
||||
Convolution fire2/expand3x3 1 1 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_0 fire2/expand3x3 0=64 1=3 2=1 3=1 4=1 5=1 6=9216
|
||||
ReLU fire2/relu_expand3x3 1 1 fire2/expand3x3 fire2/expand3x3_fire2/relu_expand3x3 0=0.000000
|
||||
Concat fire2/concat 2 1 fire2/expand1x1_fire2/relu_expand1x1 fire2/expand3x3_fire2/relu_expand3x3 fire2/concat 0=0
|
||||
Convolution fire3/squeeze1x1 1 1 fire2/concat fire3/squeeze1x1 0=16 1=1 2=1 3=1 4=0 5=1 6=2048
|
||||
ReLU fire3/relu_squeeze1x1 1 1 fire3/squeeze1x1 fire3/squeeze1x1_fire3/relu_squeeze1x1 0=0.000000
|
||||
Split splitncnn_1 1 2 fire3/squeeze1x1_fire3/relu_squeeze1x1 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_0 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire3/expand1x1 1 1 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_1 fire3/expand1x1 0=64 1=1 2=1 3=1 4=0 5=1 6=1024
|
||||
ReLU fire3/relu_expand1x1 1 1 fire3/expand1x1 fire3/expand1x1_fire3/relu_expand1x1 0=0.000000
|
||||
Convolution fire3/expand3x3 1 1 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_0 fire3/expand3x3 0=64 1=3 2=1 3=1 4=1 5=1 6=9216
|
||||
ReLU fire3/relu_expand3x3 1 1 fire3/expand3x3 fire3/expand3x3_fire3/relu_expand3x3 0=0.000000
|
||||
Concat fire3/concat 2 1 fire3/expand1x1_fire3/relu_expand1x1 fire3/expand3x3_fire3/relu_expand3x3 fire3/concat 0=0
|
||||
Pooling pool3 1 1 fire3/concat pool3 0=0 1=3 2=2 3=0 4=0
|
||||
Convolution fire4/squeeze1x1 1 1 pool3 fire4/squeeze1x1 0=32 1=1 2=1 3=1 4=0 5=1 6=4096
|
||||
ReLU fire4/relu_squeeze1x1 1 1 fire4/squeeze1x1 fire4/squeeze1x1_fire4/relu_squeeze1x1 0=0.000000
|
||||
Split splitncnn_2 1 2 fire4/squeeze1x1_fire4/relu_squeeze1x1 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_0 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire4/expand1x1 1 1 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_1 fire4/expand1x1 0=128 1=1 2=1 3=1 4=0 5=1 6=4096
|
||||
ReLU fire4/relu_expand1x1 1 1 fire4/expand1x1 fire4/expand1x1_fire4/relu_expand1x1 0=0.000000
|
||||
Convolution fire4/expand3x3 1 1 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_0 fire4/expand3x3 0=128 1=3 2=1 3=1 4=1 5=1 6=36864
|
||||
ReLU fire4/relu_expand3x3 1 1 fire4/expand3x3 fire4/expand3x3_fire4/relu_expand3x3 0=0.000000
|
||||
Concat fire4/concat 2 1 fire4/expand1x1_fire4/relu_expand1x1 fire4/expand3x3_fire4/relu_expand3x3 fire4/concat 0=0
|
||||
Convolution fire5/squeeze1x1 1 1 fire4/concat fire5/squeeze1x1 0=32 1=1 2=1 3=1 4=0 5=1 6=8192
|
||||
ReLU fire5/relu_squeeze1x1 1 1 fire5/squeeze1x1 fire5/squeeze1x1_fire5/relu_squeeze1x1 0=0.000000
|
||||
Split splitncnn_3 1 2 fire5/squeeze1x1_fire5/relu_squeeze1x1 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_0 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire5/expand1x1 1 1 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_1 fire5/expand1x1 0=128 1=1 2=1 3=1 4=0 5=1 6=4096
|
||||
ReLU fire5/relu_expand1x1 1 1 fire5/expand1x1 fire5/expand1x1_fire5/relu_expand1x1 0=0.000000
|
||||
Convolution fire5/expand3x3 1 1 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_0 fire5/expand3x3 0=128 1=3 2=1 3=1 4=1 5=1 6=36864
|
||||
ReLU fire5/relu_expand3x3 1 1 fire5/expand3x3 fire5/expand3x3_fire5/relu_expand3x3 0=0.000000
|
||||
Concat fire5/concat 2 1 fire5/expand1x1_fire5/relu_expand1x1 fire5/expand3x3_fire5/relu_expand3x3 fire5/concat 0=0
|
||||
Pooling pool5 1 1 fire5/concat pool5 0=0 1=3 2=2 3=0 4=0
|
||||
Convolution fire6/squeeze1x1 1 1 pool5 fire6/squeeze1x1 0=48 1=1 2=1 3=1 4=0 5=1 6=12288
|
||||
ReLU fire6/relu_squeeze1x1 1 1 fire6/squeeze1x1 fire6/squeeze1x1_fire6/relu_squeeze1x1 0=0.000000
|
||||
Split splitncnn_4 1 2 fire6/squeeze1x1_fire6/relu_squeeze1x1 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_0 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire6/expand1x1 1 1 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_1 fire6/expand1x1 0=192 1=1 2=1 3=1 4=0 5=1 6=9216
|
||||
ReLU fire6/relu_expand1x1 1 1 fire6/expand1x1 fire6/expand1x1_fire6/relu_expand1x1 0=0.000000
|
||||
Convolution fire6/expand3x3 1 1 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_0 fire6/expand3x3 0=192 1=3 2=1 3=1 4=1 5=1 6=82944
|
||||
ReLU fire6/relu_expand3x3 1 1 fire6/expand3x3 fire6/expand3x3_fire6/relu_expand3x3 0=0.000000
|
||||
Concat fire6/concat 2 1 fire6/expand1x1_fire6/relu_expand1x1 fire6/expand3x3_fire6/relu_expand3x3 fire6/concat 0=0
|
||||
Convolution fire7/squeeze1x1 1 1 fire6/concat fire7/squeeze1x1 0=48 1=1 2=1 3=1 4=0 5=1 6=18432
|
||||
ReLU fire7/relu_squeeze1x1 1 1 fire7/squeeze1x1 fire7/squeeze1x1_fire7/relu_squeeze1x1 0=0.000000
|
||||
Split splitncnn_5 1 2 fire7/squeeze1x1_fire7/relu_squeeze1x1 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_0 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire7/expand1x1 1 1 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_1 fire7/expand1x1 0=192 1=1 2=1 3=1 4=0 5=1 6=9216
|
||||
ReLU fire7/relu_expand1x1 1 1 fire7/expand1x1 fire7/expand1x1_fire7/relu_expand1x1 0=0.000000
|
||||
Convolution fire7/expand3x3 1 1 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_0 fire7/expand3x3 0=192 1=3 2=1 3=1 4=1 5=1 6=82944
|
||||
ReLU fire7/relu_expand3x3 1 1 fire7/expand3x3 fire7/expand3x3_fire7/relu_expand3x3 0=0.000000
|
||||
Concat fire7/concat 2 1 fire7/expand1x1_fire7/relu_expand1x1 fire7/expand3x3_fire7/relu_expand3x3 fire7/concat 0=0
|
||||
Convolution fire8/squeeze1x1 1 1 fire7/concat fire8/squeeze1x1 0=64 1=1 2=1 3=1 4=0 5=1 6=24576
|
||||
ReLU fire8/relu_squeeze1x1 1 1 fire8/squeeze1x1 fire8/squeeze1x1_fire8/relu_squeeze1x1 0=0.000000
|
||||
Split splitncnn_6 1 2 fire8/squeeze1x1_fire8/relu_squeeze1x1 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_0 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire8/expand1x1 1 1 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_1 fire8/expand1x1 0=256 1=1 2=1 3=1 4=0 5=1 6=16384
|
||||
ReLU fire8/relu_expand1x1 1 1 fire8/expand1x1 fire8/expand1x1_fire8/relu_expand1x1 0=0.000000
|
||||
Convolution fire8/expand3x3 1 1 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_0 fire8/expand3x3 0=256 1=3 2=1 3=1 4=1 5=1 6=147456
|
||||
ReLU fire8/relu_expand3x3 1 1 fire8/expand3x3 fire8/expand3x3_fire8/relu_expand3x3 0=0.000000
|
||||
Concat fire8/concat 2 1 fire8/expand1x1_fire8/relu_expand1x1 fire8/expand3x3_fire8/relu_expand3x3 fire8/concat 0=0
|
||||
Convolution fire9/squeeze1x1 1 1 fire8/concat fire9/squeeze1x1 0=64 1=1 2=1 3=1 4=0 5=1 6=32768
|
||||
ReLU fire9/relu_squeeze1x1 1 1 fire9/squeeze1x1 fire9/squeeze1x1_fire9/relu_squeeze1x1 0=0.000000
|
||||
Split splitncnn_7 1 2 fire9/squeeze1x1_fire9/relu_squeeze1x1 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_0 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire9/expand1x1 1 1 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_1 fire9/expand1x1 0=256 1=1 2=1 3=1 4=0 5=1 6=16384
|
||||
ReLU fire9/relu_expand1x1 1 1 fire9/expand1x1 fire9/expand1x1_fire9/relu_expand1x1 0=0.000000
|
||||
Convolution fire9/expand3x3 1 1 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_0 fire9/expand3x3 0=256 1=3 2=1 3=1 4=1 5=1 6=147456
|
||||
ReLU fire9/relu_expand3x3 1 1 fire9/expand3x3 fire9/expand3x3_fire9/relu_expand3x3 0=0.000000
|
||||
Concat fire9/concat 2 1 fire9/expand1x1_fire9/relu_expand1x1 fire9/expand3x3_fire9/relu_expand3x3 fire9/concat 0=0
|
||||
Dropout drop9 1 1 fire9/concat fire9/concat_drop9
|
||||
Convolution conv10 1 1 fire9/concat_drop9 conv10 0=1000 1=1 2=1 3=1 4=1 5=1 6=512000
|
||||
ReLU relu_conv10 1 1 conv10 conv10_relu_conv10 0=0.000000
|
||||
Pooling pool10 1 1 conv10_relu_conv10 pool10 0=1 1=0 2=1 3=0 4=1
|
||||
Softmax prob 1 1 pool10 prob 0=0
|
BIN
3rdparty/ncnn/examples/squeezenet_v1.1.param.bin
vendored
Normal file
BIN
3rdparty/ncnn/examples/squeezenet_v1.1.param.bin
vendored
Normal file
Binary file not shown.
548
3rdparty/ncnn/examples/squeezenet_v1.1.prototxt
vendored
Normal file
548
3rdparty/ncnn/examples/squeezenet_v1.1.prototxt
vendored
Normal file
@ -0,0 +1,548 @@
|
||||
name: "squeezenet_v1.1_deploy"
|
||||
|
||||
layer {
|
||||
name: "data"
|
||||
type: "Input"
|
||||
top: "data"
|
||||
input_param { shape: { dim: 1 dim: 3 dim: 227 dim: 227 } }
|
||||
}
|
||||
layer {
|
||||
name: "conv1"
|
||||
type: "Convolution"
|
||||
bottom: "data"
|
||||
top: "conv1"
|
||||
convolution_param {
|
||||
num_output: 64
|
||||
kernel_size: 3
|
||||
stride: 2
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "relu_conv1"
|
||||
type: "ReLU"
|
||||
bottom: "conv1"
|
||||
top: "conv1"
|
||||
}
|
||||
layer {
|
||||
name: "pool1"
|
||||
type: "Pooling"
|
||||
bottom: "conv1"
|
||||
top: "pool1"
|
||||
pooling_param {
|
||||
pool: MAX
|
||||
kernel_size: 3
|
||||
stride: 2
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire2/squeeze1x1"
|
||||
type: "Convolution"
|
||||
bottom: "pool1"
|
||||
top: "fire2/squeeze1x1"
|
||||
convolution_param {
|
||||
num_output: 16
|
||||
kernel_size: 1
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire2/relu_squeeze1x1"
|
||||
type: "ReLU"
|
||||
bottom: "fire2/squeeze1x1"
|
||||
top: "fire2/squeeze1x1"
|
||||
}
|
||||
layer {
|
||||
name: "fire2/expand1x1"
|
||||
type: "Convolution"
|
||||
bottom: "fire2/squeeze1x1"
|
||||
top: "fire2/expand1x1"
|
||||
convolution_param {
|
||||
num_output: 64
|
||||
kernel_size: 1
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire2/relu_expand1x1"
|
||||
type: "ReLU"
|
||||
bottom: "fire2/expand1x1"
|
||||
top: "fire2/expand1x1"
|
||||
}
|
||||
layer {
|
||||
name: "fire2/expand3x3"
|
||||
type: "Convolution"
|
||||
bottom: "fire2/squeeze1x1"
|
||||
top: "fire2/expand3x3"
|
||||
convolution_param {
|
||||
num_output: 64
|
||||
pad: 1
|
||||
kernel_size: 3
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire2/relu_expand3x3"
|
||||
type: "ReLU"
|
||||
bottom: "fire2/expand3x3"
|
||||
top: "fire2/expand3x3"
|
||||
}
|
||||
layer {
|
||||
name: "fire2/concat"
|
||||
type: "Concat"
|
||||
bottom: "fire2/expand1x1"
|
||||
bottom: "fire2/expand3x3"
|
||||
top: "fire2/concat"
|
||||
}
|
||||
layer {
|
||||
name: "fire3/squeeze1x1"
|
||||
type: "Convolution"
|
||||
bottom: "fire2/concat"
|
||||
top: "fire3/squeeze1x1"
|
||||
convolution_param {
|
||||
num_output: 16
|
||||
kernel_size: 1
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire3/relu_squeeze1x1"
|
||||
type: "ReLU"
|
||||
bottom: "fire3/squeeze1x1"
|
||||
top: "fire3/squeeze1x1"
|
||||
}
|
||||
layer {
|
||||
name: "fire3/expand1x1"
|
||||
type: "Convolution"
|
||||
bottom: "fire3/squeeze1x1"
|
||||
top: "fire3/expand1x1"
|
||||
convolution_param {
|
||||
num_output: 64
|
||||
kernel_size: 1
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire3/relu_expand1x1"
|
||||
type: "ReLU"
|
||||
bottom: "fire3/expand1x1"
|
||||
top: "fire3/expand1x1"
|
||||
}
|
||||
layer {
|
||||
name: "fire3/expand3x3"
|
||||
type: "Convolution"
|
||||
bottom: "fire3/squeeze1x1"
|
||||
top: "fire3/expand3x3"
|
||||
convolution_param {
|
||||
num_output: 64
|
||||
pad: 1
|
||||
kernel_size: 3
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire3/relu_expand3x3"
|
||||
type: "ReLU"
|
||||
bottom: "fire3/expand3x3"
|
||||
top: "fire3/expand3x3"
|
||||
}
|
||||
layer {
|
||||
name: "fire3/concat"
|
||||
type: "Concat"
|
||||
bottom: "fire3/expand1x1"
|
||||
bottom: "fire3/expand3x3"
|
||||
top: "fire3/concat"
|
||||
}
|
||||
layer {
|
||||
name: "pool3"
|
||||
type: "Pooling"
|
||||
bottom: "fire3/concat"
|
||||
top: "pool3"
|
||||
pooling_param {
|
||||
pool: MAX
|
||||
kernel_size: 3
|
||||
stride: 2
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire4/squeeze1x1"
|
||||
type: "Convolution"
|
||||
bottom: "pool3"
|
||||
top: "fire4/squeeze1x1"
|
||||
convolution_param {
|
||||
num_output: 32
|
||||
kernel_size: 1
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire4/relu_squeeze1x1"
|
||||
type: "ReLU"
|
||||
bottom: "fire4/squeeze1x1"
|
||||
top: "fire4/squeeze1x1"
|
||||
}
|
||||
layer {
|
||||
name: "fire4/expand1x1"
|
||||
type: "Convolution"
|
||||
bottom: "fire4/squeeze1x1"
|
||||
top: "fire4/expand1x1"
|
||||
convolution_param {
|
||||
num_output: 128
|
||||
kernel_size: 1
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire4/relu_expand1x1"
|
||||
type: "ReLU"
|
||||
bottom: "fire4/expand1x1"
|
||||
top: "fire4/expand1x1"
|
||||
}
|
||||
layer {
|
||||
name: "fire4/expand3x3"
|
||||
type: "Convolution"
|
||||
bottom: "fire4/squeeze1x1"
|
||||
top: "fire4/expand3x3"
|
||||
convolution_param {
|
||||
num_output: 128
|
||||
pad: 1
|
||||
kernel_size: 3
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire4/relu_expand3x3"
|
||||
type: "ReLU"
|
||||
bottom: "fire4/expand3x3"
|
||||
top: "fire4/expand3x3"
|
||||
}
|
||||
layer {
|
||||
name: "fire4/concat"
|
||||
type: "Concat"
|
||||
bottom: "fire4/expand1x1"
|
||||
bottom: "fire4/expand3x3"
|
||||
top: "fire4/concat"
|
||||
}
|
||||
layer {
|
||||
name: "fire5/squeeze1x1"
|
||||
type: "Convolution"
|
||||
bottom: "fire4/concat"
|
||||
top: "fire5/squeeze1x1"
|
||||
convolution_param {
|
||||
num_output: 32
|
||||
kernel_size: 1
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire5/relu_squeeze1x1"
|
||||
type: "ReLU"
|
||||
bottom: "fire5/squeeze1x1"
|
||||
top: "fire5/squeeze1x1"
|
||||
}
|
||||
layer {
|
||||
name: "fire5/expand1x1"
|
||||
type: "Convolution"
|
||||
bottom: "fire5/squeeze1x1"
|
||||
top: "fire5/expand1x1"
|
||||
convolution_param {
|
||||
num_output: 128
|
||||
kernel_size: 1
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire5/relu_expand1x1"
|
||||
type: "ReLU"
|
||||
bottom: "fire5/expand1x1"
|
||||
top: "fire5/expand1x1"
|
||||
}
|
||||
layer {
|
||||
name: "fire5/expand3x3"
|
||||
type: "Convolution"
|
||||
bottom: "fire5/squeeze1x1"
|
||||
top: "fire5/expand3x3"
|
||||
convolution_param {
|
||||
num_output: 128
|
||||
pad: 1
|
||||
kernel_size: 3
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire5/relu_expand3x3"
|
||||
type: "ReLU"
|
||||
bottom: "fire5/expand3x3"
|
||||
top: "fire5/expand3x3"
|
||||
}
|
||||
layer {
|
||||
name: "fire5/concat"
|
||||
type: "Concat"
|
||||
bottom: "fire5/expand1x1"
|
||||
bottom: "fire5/expand3x3"
|
||||
top: "fire5/concat"
|
||||
}
|
||||
layer {
|
||||
name: "pool5"
|
||||
type: "Pooling"
|
||||
bottom: "fire5/concat"
|
||||
top: "pool5"
|
||||
pooling_param {
|
||||
pool: MAX
|
||||
kernel_size: 3
|
||||
stride: 2
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire6/squeeze1x1"
|
||||
type: "Convolution"
|
||||
bottom: "pool5"
|
||||
top: "fire6/squeeze1x1"
|
||||
convolution_param {
|
||||
num_output: 48
|
||||
kernel_size: 1
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire6/relu_squeeze1x1"
|
||||
type: "ReLU"
|
||||
bottom: "fire6/squeeze1x1"
|
||||
top: "fire6/squeeze1x1"
|
||||
}
|
||||
layer {
|
||||
name: "fire6/expand1x1"
|
||||
type: "Convolution"
|
||||
bottom: "fire6/squeeze1x1"
|
||||
top: "fire6/expand1x1"
|
||||
convolution_param {
|
||||
num_output: 192
|
||||
kernel_size: 1
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire6/relu_expand1x1"
|
||||
type: "ReLU"
|
||||
bottom: "fire6/expand1x1"
|
||||
top: "fire6/expand1x1"
|
||||
}
|
||||
layer {
|
||||
name: "fire6/expand3x3"
|
||||
type: "Convolution"
|
||||
bottom: "fire6/squeeze1x1"
|
||||
top: "fire6/expand3x3"
|
||||
convolution_param {
|
||||
num_output: 192
|
||||
pad: 1
|
||||
kernel_size: 3
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire6/relu_expand3x3"
|
||||
type: "ReLU"
|
||||
bottom: "fire6/expand3x3"
|
||||
top: "fire6/expand3x3"
|
||||
}
|
||||
layer {
|
||||
name: "fire6/concat"
|
||||
type: "Concat"
|
||||
bottom: "fire6/expand1x1"
|
||||
bottom: "fire6/expand3x3"
|
||||
top: "fire6/concat"
|
||||
}
|
||||
layer {
|
||||
name: "fire7/squeeze1x1"
|
||||
type: "Convolution"
|
||||
bottom: "fire6/concat"
|
||||
top: "fire7/squeeze1x1"
|
||||
convolution_param {
|
||||
num_output: 48
|
||||
kernel_size: 1
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire7/relu_squeeze1x1"
|
||||
type: "ReLU"
|
||||
bottom: "fire7/squeeze1x1"
|
||||
top: "fire7/squeeze1x1"
|
||||
}
|
||||
layer {
|
||||
name: "fire7/expand1x1"
|
||||
type: "Convolution"
|
||||
bottom: "fire7/squeeze1x1"
|
||||
top: "fire7/expand1x1"
|
||||
convolution_param {
|
||||
num_output: 192
|
||||
kernel_size: 1
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire7/relu_expand1x1"
|
||||
type: "ReLU"
|
||||
bottom: "fire7/expand1x1"
|
||||
top: "fire7/expand1x1"
|
||||
}
|
||||
layer {
|
||||
name: "fire7/expand3x3"
|
||||
type: "Convolution"
|
||||
bottom: "fire7/squeeze1x1"
|
||||
top: "fire7/expand3x3"
|
||||
convolution_param {
|
||||
num_output: 192
|
||||
pad: 1
|
||||
kernel_size: 3
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire7/relu_expand3x3"
|
||||
type: "ReLU"
|
||||
bottom: "fire7/expand3x3"
|
||||
top: "fire7/expand3x3"
|
||||
}
|
||||
layer {
|
||||
name: "fire7/concat"
|
||||
type: "Concat"
|
||||
bottom: "fire7/expand1x1"
|
||||
bottom: "fire7/expand3x3"
|
||||
top: "fire7/concat"
|
||||
}
|
||||
layer {
|
||||
name: "fire8/squeeze1x1"
|
||||
type: "Convolution"
|
||||
bottom: "fire7/concat"
|
||||
top: "fire8/squeeze1x1"
|
||||
convolution_param {
|
||||
num_output: 64
|
||||
kernel_size: 1
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire8/relu_squeeze1x1"
|
||||
type: "ReLU"
|
||||
bottom: "fire8/squeeze1x1"
|
||||
top: "fire8/squeeze1x1"
|
||||
}
|
||||
layer {
|
||||
name: "fire8/expand1x1"
|
||||
type: "Convolution"
|
||||
bottom: "fire8/squeeze1x1"
|
||||
top: "fire8/expand1x1"
|
||||
convolution_param {
|
||||
num_output: 256
|
||||
kernel_size: 1
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire8/relu_expand1x1"
|
||||
type: "ReLU"
|
||||
bottom: "fire8/expand1x1"
|
||||
top: "fire8/expand1x1"
|
||||
}
|
||||
layer {
|
||||
name: "fire8/expand3x3"
|
||||
type: "Convolution"
|
||||
bottom: "fire8/squeeze1x1"
|
||||
top: "fire8/expand3x3"
|
||||
convolution_param {
|
||||
num_output: 256
|
||||
pad: 1
|
||||
kernel_size: 3
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire8/relu_expand3x3"
|
||||
type: "ReLU"
|
||||
bottom: "fire8/expand3x3"
|
||||
top: "fire8/expand3x3"
|
||||
}
|
||||
layer {
|
||||
name: "fire8/concat"
|
||||
type: "Concat"
|
||||
bottom: "fire8/expand1x1"
|
||||
bottom: "fire8/expand3x3"
|
||||
top: "fire8/concat"
|
||||
}
|
||||
layer {
|
||||
name: "fire9/squeeze1x1"
|
||||
type: "Convolution"
|
||||
bottom: "fire8/concat"
|
||||
top: "fire9/squeeze1x1"
|
||||
convolution_param {
|
||||
num_output: 64
|
||||
kernel_size: 1
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire9/relu_squeeze1x1"
|
||||
type: "ReLU"
|
||||
bottom: "fire9/squeeze1x1"
|
||||
top: "fire9/squeeze1x1"
|
||||
}
|
||||
layer {
|
||||
name: "fire9/expand1x1"
|
||||
type: "Convolution"
|
||||
bottom: "fire9/squeeze1x1"
|
||||
top: "fire9/expand1x1"
|
||||
convolution_param {
|
||||
num_output: 256
|
||||
kernel_size: 1
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire9/relu_expand1x1"
|
||||
type: "ReLU"
|
||||
bottom: "fire9/expand1x1"
|
||||
top: "fire9/expand1x1"
|
||||
}
|
||||
layer {
|
||||
name: "fire9/expand3x3"
|
||||
type: "Convolution"
|
||||
bottom: "fire9/squeeze1x1"
|
||||
top: "fire9/expand3x3"
|
||||
convolution_param {
|
||||
num_output: 256
|
||||
pad: 1
|
||||
kernel_size: 3
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "fire9/relu_expand3x3"
|
||||
type: "ReLU"
|
||||
bottom: "fire9/expand3x3"
|
||||
top: "fire9/expand3x3"
|
||||
}
|
||||
layer {
|
||||
name: "fire9/concat"
|
||||
type: "Concat"
|
||||
bottom: "fire9/expand1x1"
|
||||
bottom: "fire9/expand3x3"
|
||||
top: "fire9/concat"
|
||||
}
|
||||
layer {
|
||||
name: "drop9"
|
||||
type: "Dropout"
|
||||
bottom: "fire9/concat"
|
||||
top: "fire9/concat"
|
||||
dropout_param {
|
||||
dropout_ratio: 0.5
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "conv10"
|
||||
type: "Convolution"
|
||||
bottom: "fire9/concat"
|
||||
top: "conv10"
|
||||
convolution_param {
|
||||
num_output: 1000
|
||||
pad: 1
|
||||
kernel_size: 1
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "relu_conv10"
|
||||
type: "ReLU"
|
||||
bottom: "conv10"
|
||||
top: "conv10"
|
||||
}
|
||||
layer {
|
||||
name: "pool10"
|
||||
type: "Pooling"
|
||||
bottom: "conv10"
|
||||
top: "pool10"
|
||||
pooling_param {
|
||||
pool: AVE
|
||||
global_pooling: true
|
||||
}
|
||||
}
|
||||
layer {
|
||||
name: "prob"
|
||||
type: "Softmax"
|
||||
bottom: "pool10"
|
||||
top: "prob"
|
||||
}
|
152
3rdparty/ncnn/examples/squeezenetssd.cpp
vendored
Normal file
152
3rdparty/ncnn/examples/squeezenetssd.cpp
vendored
Normal file
@ -0,0 +1,152 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "net.h"
|
||||
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
struct Object
|
||||
{
|
||||
cv::Rect_<float> rect;
|
||||
int label;
|
||||
float prob;
|
||||
};
|
||||
|
||||
static int detect_squeezenet(const cv::Mat& bgr, std::vector<Object>& objects)
|
||||
{
|
||||
ncnn::Net squeezenet;
|
||||
|
||||
squeezenet.opt.use_vulkan_compute = true;
|
||||
|
||||
// original pretrained model from https://github.com/chuanqi305/SqueezeNet-SSD
|
||||
// squeezenet_ssd_voc_deploy.prototxt
|
||||
// https://drive.google.com/open?id=0B3gersZ2cHIxdGpyZlZnbEQ5Snc
|
||||
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
|
||||
squeezenet.load_param("squeezenet_ssd_voc.param");
|
||||
squeezenet.load_model("squeezenet_ssd_voc.bin");
|
||||
|
||||
const int target_size = 300;
|
||||
|
||||
int img_w = bgr.cols;
|
||||
int img_h = bgr.rows;
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, target_size, target_size);
|
||||
|
||||
const float mean_vals[3] = {104.f, 117.f, 123.f};
|
||||
in.substract_mean_normalize(mean_vals, 0);
|
||||
|
||||
ncnn::Extractor ex = squeezenet.create_extractor();
|
||||
|
||||
ex.input("data", in);
|
||||
|
||||
ncnn::Mat out;
|
||||
ex.extract("detection_out", out);
|
||||
|
||||
// printf("%d %d %d\n", out.w, out.h, out.c);
|
||||
objects.clear();
|
||||
for (int i = 0; i < out.h; i++)
|
||||
{
|
||||
const float* values = out.row(i);
|
||||
|
||||
Object object;
|
||||
object.label = values[0];
|
||||
object.prob = values[1];
|
||||
object.rect.x = values[2] * img_w;
|
||||
object.rect.y = values[3] * img_h;
|
||||
object.rect.width = values[4] * img_w - object.rect.x;
|
||||
object.rect.height = values[5] * img_h - object.rect.y;
|
||||
|
||||
objects.push_back(object);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
|
||||
{
|
||||
static const char* class_names[] = {"background",
|
||||
"aeroplane", "bicycle", "bird", "boat",
|
||||
"bottle", "bus", "car", "cat", "chair",
|
||||
"cow", "diningtable", "dog", "horse",
|
||||
"motorbike", "person", "pottedplant",
|
||||
"sheep", "sofa", "train", "tvmonitor"
|
||||
};
|
||||
|
||||
cv::Mat image = bgr.clone();
|
||||
|
||||
for (size_t i = 0; i < objects.size(); i++)
|
||||
{
|
||||
const Object& obj = objects[i];
|
||||
|
||||
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
|
||||
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
|
||||
|
||||
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
|
||||
|
||||
char text[256];
|
||||
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
|
||||
|
||||
int baseLine = 0;
|
||||
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||
|
||||
int x = obj.rect.x;
|
||||
int y = obj.rect.y - label_size.height - baseLine;
|
||||
if (y < 0)
|
||||
y = 0;
|
||||
if (x + label_size.width > image.cols)
|
||||
x = image.cols - label_size.width;
|
||||
|
||||
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
|
||||
cv::Scalar(255, 255, 255), -1);
|
||||
|
||||
cv::putText(image, text, cv::Point(x, y + label_size.height),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
|
||||
cv::imshow("image", image);
|
||||
cv::waitKey(0);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<Object> objects;
|
||||
detect_squeezenet(m, objects);
|
||||
|
||||
draw_objects(m, objects);
|
||||
|
||||
return 0;
|
||||
}
|
1000
3rdparty/ncnn/examples/synset_words.txt
vendored
Normal file
1000
3rdparty/ncnn/examples/synset_words.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
544
3rdparty/ncnn/examples/yolact.cpp
vendored
Normal file
544
3rdparty/ncnn/examples/yolact.cpp
vendored
Normal file
@ -0,0 +1,544 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "net.h"
|
||||
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
struct Object
|
||||
{
|
||||
cv::Rect_<float> rect;
|
||||
int label;
|
||||
float prob;
|
||||
std::vector<float> maskdata;
|
||||
cv::Mat mask;
|
||||
};
|
||||
|
||||
static inline float intersection_area(const Object& a, const Object& b)
|
||||
{
|
||||
cv::Rect_<float> inter = a.rect & b.rect;
|
||||
return inter.area();
|
||||
}
|
||||
|
||||
static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right)
|
||||
{
|
||||
int i = left;
|
||||
int j = right;
|
||||
float p = objects[(left + right) / 2].prob;
|
||||
|
||||
while (i <= j)
|
||||
{
|
||||
while (objects[i].prob > p)
|
||||
i++;
|
||||
|
||||
while (objects[j].prob < p)
|
||||
j--;
|
||||
|
||||
if (i <= j)
|
||||
{
|
||||
// swap
|
||||
std::swap(objects[i], objects[j]);
|
||||
|
||||
i++;
|
||||
j--;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp parallel sections
|
||||
{
|
||||
#pragma omp section
|
||||
{
|
||||
if (left < j) qsort_descent_inplace(objects, left, j);
|
||||
}
|
||||
#pragma omp section
|
||||
{
|
||||
if (i < right) qsort_descent_inplace(objects, i, right);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void qsort_descent_inplace(std::vector<Object>& objects)
|
||||
{
|
||||
if (objects.empty())
|
||||
return;
|
||||
|
||||
qsort_descent_inplace(objects, 0, objects.size() - 1);
|
||||
}
|
||||
|
||||
static void nms_sorted_bboxes(const std::vector<Object>& objects, std::vector<int>& picked, float nms_threshold)
|
||||
{
|
||||
picked.clear();
|
||||
|
||||
const int n = objects.size();
|
||||
|
||||
std::vector<float> areas(n);
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
areas[i] = objects[i].rect.area();
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
const Object& a = objects[i];
|
||||
|
||||
int keep = 1;
|
||||
for (int j = 0; j < (int)picked.size(); j++)
|
||||
{
|
||||
const Object& b = objects[picked[j]];
|
||||
|
||||
// intersection over union
|
||||
float inter_area = intersection_area(a, b);
|
||||
float union_area = areas[i] + areas[picked[j]] - inter_area;
|
||||
// float IoU = inter_area / union_area
|
||||
if (inter_area / union_area > nms_threshold)
|
||||
keep = 0;
|
||||
}
|
||||
|
||||
if (keep)
|
||||
picked.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
static int detect_yolact(const cv::Mat& bgr, std::vector<Object>& objects)
|
||||
{
|
||||
ncnn::Net yolact;
|
||||
|
||||
yolact.opt.use_vulkan_compute = true;
|
||||
|
||||
// original model converted from https://github.com/dbolya/yolact
|
||||
// yolact_resnet50_54_800000.pth
|
||||
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
|
||||
yolact.load_param("yolact.param");
|
||||
yolact.load_model("yolact.bin");
|
||||
|
||||
const int target_size = 550;
|
||||
|
||||
int img_w = bgr.cols;
|
||||
int img_h = bgr.rows;
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, target_size, target_size);
|
||||
|
||||
const float mean_vals[3] = {123.68f, 116.78f, 103.94f};
|
||||
const float norm_vals[3] = {1.0 / 58.40f, 1.0 / 57.12f, 1.0 / 57.38f};
|
||||
in.substract_mean_normalize(mean_vals, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = yolact.create_extractor();
|
||||
|
||||
ex.input("input.1", in);
|
||||
|
||||
ncnn::Mat maskmaps;
|
||||
ncnn::Mat location;
|
||||
ncnn::Mat mask;
|
||||
ncnn::Mat confidence;
|
||||
|
||||
ex.extract("619", maskmaps); // 138x138 x 32
|
||||
|
||||
ex.extract("816", location); // 4 x 19248
|
||||
ex.extract("818", mask); // maskdim 32 x 19248
|
||||
ex.extract("820", confidence); // 81 x 19248
|
||||
|
||||
int num_class = confidence.w;
|
||||
int num_priors = confidence.h;
|
||||
|
||||
// make priorbox
|
||||
ncnn::Mat priorbox(4, num_priors);
|
||||
{
|
||||
const int conv_ws[5] = {69, 35, 18, 9, 5};
|
||||
const int conv_hs[5] = {69, 35, 18, 9, 5};
|
||||
|
||||
const float aspect_ratios[3] = {1.f, 0.5f, 2.f};
|
||||
const float scales[5] = {24.f, 48.f, 96.f, 192.f, 384.f};
|
||||
|
||||
float* pb = priorbox;
|
||||
|
||||
for (int p = 0; p < 5; p++)
|
||||
{
|
||||
int conv_w = conv_ws[p];
|
||||
int conv_h = conv_hs[p];
|
||||
|
||||
float scale = scales[p];
|
||||
|
||||
for (int i = 0; i < conv_h; i++)
|
||||
{
|
||||
for (int j = 0; j < conv_w; j++)
|
||||
{
|
||||
// +0.5 because priors are in center-size notation
|
||||
float cx = (j + 0.5f) / conv_w;
|
||||
float cy = (i + 0.5f) / conv_h;
|
||||
|
||||
for (int k = 0; k < 3; k++)
|
||||
{
|
||||
float ar = aspect_ratios[k];
|
||||
|
||||
ar = sqrt(ar);
|
||||
|
||||
float w = scale * ar / 550;
|
||||
float h = scale / ar / 550;
|
||||
|
||||
// This is for backward compatibility with a bug where I made everything square by accident
|
||||
// cfg.backbone.use_square_anchors:
|
||||
h = w;
|
||||
|
||||
pb[0] = cx;
|
||||
pb[1] = cy;
|
||||
pb[2] = w;
|
||||
pb[3] = h;
|
||||
|
||||
pb += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const float confidence_thresh = 0.05f;
|
||||
const float nms_threshold = 0.5f;
|
||||
const int keep_top_k = 200;
|
||||
|
||||
std::vector<std::vector<Object> > class_candidates;
|
||||
class_candidates.resize(num_class);
|
||||
|
||||
for (int i = 0; i < num_priors; i++)
|
||||
{
|
||||
const float* conf = confidence.row(i);
|
||||
const float* loc = location.row(i);
|
||||
const float* pb = priorbox.row(i);
|
||||
const float* maskdata = mask.row(i);
|
||||
|
||||
// find class id with highest score
|
||||
// start from 1 to skip background
|
||||
int label = 0;
|
||||
float score = 0.f;
|
||||
for (int j = 1; j < num_class; j++)
|
||||
{
|
||||
float class_score = conf[j];
|
||||
if (class_score > score)
|
||||
{
|
||||
label = j;
|
||||
score = class_score;
|
||||
}
|
||||
}
|
||||
|
||||
// ignore background or low score
|
||||
if (label == 0 || score <= confidence_thresh)
|
||||
continue;
|
||||
|
||||
// CENTER_SIZE
|
||||
float var[4] = {0.1f, 0.1f, 0.2f, 0.2f};
|
||||
|
||||
float pb_cx = pb[0];
|
||||
float pb_cy = pb[1];
|
||||
float pb_w = pb[2];
|
||||
float pb_h = pb[3];
|
||||
|
||||
float bbox_cx = var[0] * loc[0] * pb_w + pb_cx;
|
||||
float bbox_cy = var[1] * loc[1] * pb_h + pb_cy;
|
||||
float bbox_w = (float)(exp(var[2] * loc[2]) * pb_w);
|
||||
float bbox_h = (float)(exp(var[3] * loc[3]) * pb_h);
|
||||
|
||||
float obj_x1 = bbox_cx - bbox_w * 0.5f;
|
||||
float obj_y1 = bbox_cy - bbox_h * 0.5f;
|
||||
float obj_x2 = bbox_cx + bbox_w * 0.5f;
|
||||
float obj_y2 = bbox_cy + bbox_h * 0.5f;
|
||||
|
||||
// clip
|
||||
obj_x1 = std::max(std::min(obj_x1 * bgr.cols, (float)(bgr.cols - 1)), 0.f);
|
||||
obj_y1 = std::max(std::min(obj_y1 * bgr.rows, (float)(bgr.rows - 1)), 0.f);
|
||||
obj_x2 = std::max(std::min(obj_x2 * bgr.cols, (float)(bgr.cols - 1)), 0.f);
|
||||
obj_y2 = std::max(std::min(obj_y2 * bgr.rows, (float)(bgr.rows - 1)), 0.f);
|
||||
|
||||
// append object
|
||||
Object obj;
|
||||
obj.rect = cv::Rect_<float>(obj_x1, obj_y1, obj_x2 - obj_x1 + 1, obj_y2 - obj_y1 + 1);
|
||||
obj.label = label;
|
||||
obj.prob = score;
|
||||
obj.maskdata = std::vector<float>(maskdata, maskdata + mask.w);
|
||||
|
||||
class_candidates[label].push_back(obj);
|
||||
}
|
||||
|
||||
objects.clear();
|
||||
for (int i = 0; i < (int)class_candidates.size(); i++)
|
||||
{
|
||||
std::vector<Object>& candidates = class_candidates[i];
|
||||
|
||||
qsort_descent_inplace(candidates);
|
||||
|
||||
std::vector<int> picked;
|
||||
nms_sorted_bboxes(candidates, picked, nms_threshold);
|
||||
|
||||
for (int j = 0; j < (int)picked.size(); j++)
|
||||
{
|
||||
int z = picked[j];
|
||||
objects.push_back(candidates[z]);
|
||||
}
|
||||
}
|
||||
|
||||
qsort_descent_inplace(objects);
|
||||
|
||||
// keep_top_k
|
||||
if (keep_top_k < (int)objects.size())
|
||||
{
|
||||
objects.resize(keep_top_k);
|
||||
}
|
||||
|
||||
// generate mask
|
||||
for (int i = 0; i < (int)objects.size(); i++)
|
||||
{
|
||||
Object& obj = objects[i];
|
||||
|
||||
cv::Mat mask(maskmaps.h, maskmaps.w, CV_32FC1);
|
||||
{
|
||||
mask = cv::Scalar(0.f);
|
||||
|
||||
for (int p = 0; p < maskmaps.c; p++)
|
||||
{
|
||||
const float* maskmap = maskmaps.channel(p);
|
||||
float coeff = obj.maskdata[p];
|
||||
float* mp = (float*)mask.data;
|
||||
|
||||
// mask += m * coeff
|
||||
for (int j = 0; j < maskmaps.w * maskmaps.h; j++)
|
||||
{
|
||||
mp[j] += maskmap[j] * coeff;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat mask2;
|
||||
cv::resize(mask, mask2, cv::Size(img_w, img_h));
|
||||
|
||||
// crop obj box and binarize
|
||||
obj.mask = cv::Mat(img_h, img_w, CV_8UC1);
|
||||
{
|
||||
obj.mask = cv::Scalar(0);
|
||||
|
||||
for (int y = 0; y < img_h; y++)
|
||||
{
|
||||
if (y < obj.rect.y || y > obj.rect.y + obj.rect.height)
|
||||
continue;
|
||||
|
||||
const float* mp2 = mask2.ptr<const float>(y);
|
||||
uchar* bmp = obj.mask.ptr<uchar>(y);
|
||||
|
||||
for (int x = 0; x < img_w; x++)
|
||||
{
|
||||
if (x < obj.rect.x || x > obj.rect.x + obj.rect.width)
|
||||
continue;
|
||||
|
||||
bmp[x] = mp2[x] > 0.5f ? 255 : 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
|
||||
{
|
||||
static const char* class_names[] = {"background",
|
||||
"person", "bicycle", "car", "motorcycle", "airplane", "bus",
|
||||
"train", "truck", "boat", "traffic light", "fire hydrant",
|
||||
"stop sign", "parking meter", "bench", "bird", "cat", "dog",
|
||||
"horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
|
||||
"backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
|
||||
"skis", "snowboard", "sports ball", "kite", "baseball bat",
|
||||
"baseball glove", "skateboard", "surfboard", "tennis racket",
|
||||
"bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
|
||||
"banana", "apple", "sandwich", "orange", "broccoli", "carrot",
|
||||
"hot dog", "pizza", "donut", "cake", "chair", "couch",
|
||||
"potted plant", "bed", "dining table", "toilet", "tv", "laptop",
|
||||
"mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
|
||||
"toaster", "sink", "refrigerator", "book", "clock", "vase",
|
||||
"scissors", "teddy bear", "hair drier", "toothbrush"
|
||||
};
|
||||
|
||||
static const unsigned char colors[81][3] = {
|
||||
{56, 0, 255},
|
||||
{226, 255, 0},
|
||||
{0, 94, 255},
|
||||
{0, 37, 255},
|
||||
{0, 255, 94},
|
||||
{255, 226, 0},
|
||||
{0, 18, 255},
|
||||
{255, 151, 0},
|
||||
{170, 0, 255},
|
||||
{0, 255, 56},
|
||||
{255, 0, 75},
|
||||
{0, 75, 255},
|
||||
{0, 255, 169},
|
||||
{255, 0, 207},
|
||||
{75, 255, 0},
|
||||
{207, 0, 255},
|
||||
{37, 0, 255},
|
||||
{0, 207, 255},
|
||||
{94, 0, 255},
|
||||
{0, 255, 113},
|
||||
{255, 18, 0},
|
||||
{255, 0, 56},
|
||||
{18, 0, 255},
|
||||
{0, 255, 226},
|
||||
{170, 255, 0},
|
||||
{255, 0, 245},
|
||||
{151, 255, 0},
|
||||
{132, 255, 0},
|
||||
{75, 0, 255},
|
||||
{151, 0, 255},
|
||||
{0, 151, 255},
|
||||
{132, 0, 255},
|
||||
{0, 255, 245},
|
||||
{255, 132, 0},
|
||||
{226, 0, 255},
|
||||
{255, 37, 0},
|
||||
{207, 255, 0},
|
||||
{0, 255, 207},
|
||||
{94, 255, 0},
|
||||
{0, 226, 255},
|
||||
{56, 255, 0},
|
||||
{255, 94, 0},
|
||||
{255, 113, 0},
|
||||
{0, 132, 255},
|
||||
{255, 0, 132},
|
||||
{255, 170, 0},
|
||||
{255, 0, 188},
|
||||
{113, 255, 0},
|
||||
{245, 0, 255},
|
||||
{113, 0, 255},
|
||||
{255, 188, 0},
|
||||
{0, 113, 255},
|
||||
{255, 0, 0},
|
||||
{0, 56, 255},
|
||||
{255, 0, 113},
|
||||
{0, 255, 188},
|
||||
{255, 0, 94},
|
||||
{255, 0, 18},
|
||||
{18, 255, 0},
|
||||
{0, 255, 132},
|
||||
{0, 188, 255},
|
||||
{0, 245, 255},
|
||||
{0, 169, 255},
|
||||
{37, 255, 0},
|
||||
{255, 0, 151},
|
||||
{188, 0, 255},
|
||||
{0, 255, 37},
|
||||
{0, 255, 0},
|
||||
{255, 0, 170},
|
||||
{255, 0, 37},
|
||||
{255, 75, 0},
|
||||
{0, 0, 255},
|
||||
{255, 207, 0},
|
||||
{255, 0, 226},
|
||||
{255, 245, 0},
|
||||
{188, 255, 0},
|
||||
{0, 255, 18},
|
||||
{0, 255, 75},
|
||||
{0, 255, 151},
|
||||
{255, 56, 0},
|
||||
{245, 255, 0}
|
||||
};
|
||||
|
||||
cv::Mat image = bgr.clone();
|
||||
|
||||
int color_index = 0;
|
||||
|
||||
for (size_t i = 0; i < objects.size(); i++)
|
||||
{
|
||||
const Object& obj = objects[i];
|
||||
|
||||
if (obj.prob < 0.15)
|
||||
continue;
|
||||
|
||||
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
|
||||
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
|
||||
|
||||
const unsigned char* color = colors[color_index % 81];
|
||||
color_index++;
|
||||
|
||||
cv::rectangle(image, obj.rect, cv::Scalar(color[0], color[1], color[2]));
|
||||
|
||||
char text[256];
|
||||
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
|
||||
|
||||
int baseLine = 0;
|
||||
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||
|
||||
int x = obj.rect.x;
|
||||
int y = obj.rect.y - label_size.height - baseLine;
|
||||
if (y < 0)
|
||||
y = 0;
|
||||
if (x + label_size.width > image.cols)
|
||||
x = image.cols - label_size.width;
|
||||
|
||||
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
|
||||
cv::Scalar(255, 255, 255), -1);
|
||||
|
||||
cv::putText(image, text, cv::Point(x, y + label_size.height),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
|
||||
|
||||
// draw mask
|
||||
for (int y = 0; y < image.rows; y++)
|
||||
{
|
||||
const uchar* mp = obj.mask.ptr(y);
|
||||
uchar* p = image.ptr(y);
|
||||
for (int x = 0; x < image.cols; x++)
|
||||
{
|
||||
if (mp[x] == 255)
|
||||
{
|
||||
p[0] = cv::saturate_cast<uchar>(p[0] * 0.5 + color[0] * 0.5);
|
||||
p[1] = cv::saturate_cast<uchar>(p[1] * 0.5 + color[1] * 0.5);
|
||||
p[2] = cv::saturate_cast<uchar>(p[2] * 0.5 + color[2] * 0.5);
|
||||
}
|
||||
p += 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cv::imwrite("result.png", image);
|
||||
cv::imshow("image", image);
|
||||
cv::waitKey(0);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<Object> objects;
|
||||
detect_yolact(m, objects);
|
||||
|
||||
draw_objects(m, objects);
|
||||
|
||||
return 0;
|
||||
}
|
156
3rdparty/ncnn/examples/yolov2.cpp
vendored
Normal file
156
3rdparty/ncnn/examples/yolov2.cpp
vendored
Normal file
@ -0,0 +1,156 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "net.h"
|
||||
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
struct Object
|
||||
{
|
||||
cv::Rect_<float> rect;
|
||||
int label;
|
||||
float prob;
|
||||
};
|
||||
|
||||
static int detect_yolov2(const cv::Mat& bgr, std::vector<Object>& objects)
|
||||
{
|
||||
ncnn::Net yolov2;
|
||||
|
||||
yolov2.opt.use_vulkan_compute = true;
|
||||
|
||||
// original pretrained model from https://github.com/eric612/MobileNet-YOLO
|
||||
// https://github.com/eric612/MobileNet-YOLO/blob/master/models/yolov2/mobilenet_yolo_deploy.prototxt
|
||||
// https://github.com/eric612/MobileNet-YOLO/blob/master/models/yolov2/mobilenet_yolo_deploy_iter_80000.caffemodel
|
||||
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
|
||||
yolov2.load_param("mobilenet_yolo.param");
|
||||
yolov2.load_model("mobilenet_yolo.bin");
|
||||
|
||||
const int target_size = 416;
|
||||
|
||||
int img_w = bgr.cols;
|
||||
int img_h = bgr.rows;
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, target_size, target_size);
|
||||
|
||||
// the Caffe-YOLOv2-Windows style
|
||||
// X' = X * scale - mean
|
||||
const float mean_vals[3] = {1.0f, 1.0f, 1.0f};
|
||||
const float norm_vals[3] = {0.007843f, 0.007843f, 0.007843f};
|
||||
in.substract_mean_normalize(0, norm_vals);
|
||||
in.substract_mean_normalize(mean_vals, 0);
|
||||
|
||||
ncnn::Extractor ex = yolov2.create_extractor();
|
||||
|
||||
ex.input("data", in);
|
||||
|
||||
ncnn::Mat out;
|
||||
ex.extract("detection_out", out);
|
||||
|
||||
// printf("%d %d %d\n", out.w, out.h, out.c);
|
||||
objects.clear();
|
||||
for (int i = 0; i < out.h; i++)
|
||||
{
|
||||
const float* values = out.row(i);
|
||||
|
||||
Object object;
|
||||
object.label = values[0];
|
||||
object.prob = values[1];
|
||||
object.rect.x = values[2] * img_w;
|
||||
object.rect.y = values[3] * img_h;
|
||||
object.rect.width = values[4] * img_w - object.rect.x;
|
||||
object.rect.height = values[5] * img_h - object.rect.y;
|
||||
|
||||
objects.push_back(object);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
|
||||
{
|
||||
static const char* class_names[] = {"background",
|
||||
"aeroplane", "bicycle", "bird", "boat",
|
||||
"bottle", "bus", "car", "cat", "chair",
|
||||
"cow", "diningtable", "dog", "horse",
|
||||
"motorbike", "person", "pottedplant",
|
||||
"sheep", "sofa", "train", "tvmonitor"
|
||||
};
|
||||
|
||||
cv::Mat image = bgr.clone();
|
||||
|
||||
for (size_t i = 0; i < objects.size(); i++)
|
||||
{
|
||||
const Object& obj = objects[i];
|
||||
|
||||
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
|
||||
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
|
||||
|
||||
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
|
||||
|
||||
char text[256];
|
||||
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
|
||||
|
||||
int baseLine = 0;
|
||||
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||
|
||||
int x = obj.rect.x;
|
||||
int y = obj.rect.y - label_size.height - baseLine;
|
||||
if (y < 0)
|
||||
y = 0;
|
||||
if (x + label_size.width > image.cols)
|
||||
x = image.cols - label_size.width;
|
||||
|
||||
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
|
||||
cv::Scalar(255, 255, 255), -1);
|
||||
|
||||
cv::putText(image, text, cv::Point(x, y + label_size.height),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
|
||||
cv::imshow("image", image);
|
||||
cv::waitKey(0);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<Object> objects;
|
||||
detect_yolov2(m, objects);
|
||||
|
||||
draw_objects(m, objects);
|
||||
|
||||
return 0;
|
||||
}
|
153
3rdparty/ncnn/examples/yolov3.cpp
vendored
Normal file
153
3rdparty/ncnn/examples/yolov3.cpp
vendored
Normal file
@ -0,0 +1,153 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "net.h"
|
||||
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
struct Object
|
||||
{
|
||||
cv::Rect_<float> rect;
|
||||
int label;
|
||||
float prob;
|
||||
};
|
||||
|
||||
static int detect_yolov3(const cv::Mat& bgr, std::vector<Object>& objects)
|
||||
{
|
||||
ncnn::Net yolov3;
|
||||
|
||||
yolov3.opt.use_vulkan_compute = true;
|
||||
|
||||
// original pretrained model from https://github.com/eric612/MobileNet-YOLO
|
||||
// param : https://drive.google.com/open?id=1V9oKHP6G6XvXZqhZbzNKL6FI_clRWdC-
|
||||
// bin : https://drive.google.com/open?id=1DBcuFCr-856z3FRQznWL_S5h-Aj3RawA
|
||||
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
|
||||
yolov3.load_param("mobilenetv2_yolov3.param");
|
||||
yolov3.load_model("mobilenetv2_yolov3.bin");
|
||||
|
||||
const int target_size = 352;
|
||||
|
||||
int img_w = bgr.cols;
|
||||
int img_h = bgr.rows;
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, bgr.cols, bgr.rows, target_size, target_size);
|
||||
|
||||
const float mean_vals[3] = {127.5f, 127.5f, 127.5f};
|
||||
const float norm_vals[3] = {0.007843f, 0.007843f, 0.007843f};
|
||||
in.substract_mean_normalize(mean_vals, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = yolov3.create_extractor();
|
||||
|
||||
ex.input("data", in);
|
||||
|
||||
ncnn::Mat out;
|
||||
ex.extract("detection_out", out);
|
||||
|
||||
// printf("%d %d %d\n", out.w, out.h, out.c);
|
||||
objects.clear();
|
||||
for (int i = 0; i < out.h; i++)
|
||||
{
|
||||
const float* values = out.row(i);
|
||||
|
||||
Object object;
|
||||
object.label = values[0];
|
||||
object.prob = values[1];
|
||||
object.rect.x = values[2] * img_w;
|
||||
object.rect.y = values[3] * img_h;
|
||||
object.rect.width = values[4] * img_w - object.rect.x;
|
||||
object.rect.height = values[5] * img_h - object.rect.y;
|
||||
|
||||
objects.push_back(object);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
|
||||
{
|
||||
static const char* class_names[] = {"background",
|
||||
"aeroplane", "bicycle", "bird", "boat",
|
||||
"bottle", "bus", "car", "cat", "chair",
|
||||
"cow", "diningtable", "dog", "horse",
|
||||
"motorbike", "person", "pottedplant",
|
||||
"sheep", "sofa", "train", "tvmonitor"
|
||||
};
|
||||
|
||||
cv::Mat image = bgr.clone();
|
||||
|
||||
for (size_t i = 0; i < objects.size(); i++)
|
||||
{
|
||||
const Object& obj = objects[i];
|
||||
|
||||
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
|
||||
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
|
||||
|
||||
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
|
||||
|
||||
char text[256];
|
||||
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
|
||||
|
||||
int baseLine = 0;
|
||||
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||
|
||||
int x = obj.rect.x;
|
||||
int y = obj.rect.y - label_size.height - baseLine;
|
||||
if (y < 0)
|
||||
y = 0;
|
||||
if (x + label_size.width > image.cols)
|
||||
x = image.cols - label_size.width;
|
||||
|
||||
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
|
||||
cv::Scalar(255, 255, 255), -1);
|
||||
|
||||
cv::putText(image, text, cv::Point(x, y + label_size.height),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
|
||||
cv::imshow("image", image);
|
||||
cv::waitKey(0);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<Object> objects;
|
||||
detect_yolov3(m, objects);
|
||||
|
||||
draw_objects(m, objects);
|
||||
|
||||
return 0;
|
||||
}
|
311
3rdparty/ncnn/examples/yolov4.cpp
vendored
Normal file
311
3rdparty/ncnn/examples/yolov4.cpp
vendored
Normal file
@ -0,0 +1,311 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "net.h"
|
||||
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
|
||||
#if CV_MAJOR_VERSION >= 3
|
||||
#include <opencv2/videoio/videoio.hpp>
|
||||
#endif
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#define NCNN_PROFILING
|
||||
#define YOLOV4_TINY //Using yolov4_tiny, if undef, using original yolov4
|
||||
|
||||
#ifdef NCNN_PROFILING
|
||||
#include "benchmark.h"
|
||||
#endif
|
||||
|
||||
struct Object
|
||||
{
|
||||
cv::Rect_<float> rect;
|
||||
int label;
|
||||
float prob;
|
||||
};
|
||||
|
||||
static int init_yolov4(ncnn::Net* yolov4, int* target_size)
|
||||
{
|
||||
/* --> Set the params you need for the ncnn inference <-- */
|
||||
|
||||
yolov4->opt.num_threads = 4; //You need to compile with libgomp for multi thread support
|
||||
|
||||
yolov4->opt.use_vulkan_compute = true; //You need to compile with libvulkan for gpu support
|
||||
|
||||
yolov4->opt.use_winograd_convolution = true;
|
||||
yolov4->opt.use_sgemm_convolution = true;
|
||||
yolov4->opt.use_fp16_packed = true;
|
||||
yolov4->opt.use_fp16_storage = true;
|
||||
yolov4->opt.use_fp16_arithmetic = true;
|
||||
yolov4->opt.use_packing_layout = true;
|
||||
yolov4->opt.use_shader_pack8 = false;
|
||||
yolov4->opt.use_image_storage = false;
|
||||
|
||||
/* --> End of setting params <-- */
|
||||
int ret = 0;
|
||||
|
||||
// original pretrained model from https://github.com/AlexeyAB/darknet
|
||||
// the ncnn model https://drive.google.com/drive/folders/1YzILvh0SKQPS_lrb33dmGNq7aVTKPWS0?usp=sharing
|
||||
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
|
||||
#ifdef YOLOV4_TINY
|
||||
const char* yolov4_param = "yolov4-tiny-opt.param";
|
||||
const char* yolov4_model = "yolov4-tiny-opt.bin";
|
||||
*target_size = 416;
|
||||
#else
|
||||
const char* yolov4_param = "yolov4-opt.param";
|
||||
const char* yolov4_model = "yolov4-opt.bin";
|
||||
*target_size = 608;
|
||||
#endif
|
||||
|
||||
ret = yolov4->load_param(yolov4_param);
|
||||
if (ret != 0)
|
||||
{
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = yolov4->load_model(yolov4_model);
|
||||
if (ret != 0)
|
||||
{
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int detect_yolov4(const cv::Mat& bgr, std::vector<Object>& objects, int target_size, ncnn::Net* yolov4)
|
||||
{
|
||||
int img_w = bgr.cols;
|
||||
int img_h = bgr.rows;
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, bgr.cols, bgr.rows, target_size, target_size);
|
||||
|
||||
const float mean_vals[3] = {0, 0, 0};
|
||||
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
|
||||
in.substract_mean_normalize(mean_vals, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = yolov4->create_extractor();
|
||||
|
||||
ex.input("data", in);
|
||||
|
||||
ncnn::Mat out;
|
||||
ex.extract("output", out);
|
||||
|
||||
objects.clear();
|
||||
for (int i = 0; i < out.h; i++)
|
||||
{
|
||||
const float* values = out.row(i);
|
||||
|
||||
Object object;
|
||||
object.label = values[0];
|
||||
object.prob = values[1];
|
||||
object.rect.x = values[2] * img_w;
|
||||
object.rect.y = values[3] * img_h;
|
||||
object.rect.width = values[4] * img_w - object.rect.x;
|
||||
object.rect.height = values[5] * img_h - object.rect.y;
|
||||
|
||||
objects.push_back(object);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects, int is_streaming)
|
||||
{
|
||||
static const char* class_names[] = {"background", "person", "bicycle",
|
||||
"car", "motorbike", "aeroplane", "bus", "train", "truck",
|
||||
"boat", "traffic light", "fire hydrant", "stop sign",
|
||||
"parking meter", "bench", "bird", "cat", "dog", "horse",
|
||||
"sheep", "cow", "elephant", "bear", "zebra", "giraffe",
|
||||
"backpack", "umbrella", "handbag", "tie", "suitcase",
|
||||
"frisbee", "skis", "snowboard", "sports ball", "kite",
|
||||
"baseball bat", "baseball glove", "skateboard", "surfboard",
|
||||
"tennis racket", "bottle", "wine glass", "cup", "fork",
|
||||
"knife", "spoon", "bowl", "banana", "apple", "sandwich",
|
||||
"orange", "broccoli", "carrot", "hot dog", "pizza", "donut",
|
||||
"cake", "chair", "sofa", "pottedplant", "bed", "diningtable",
|
||||
"toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard",
|
||||
"cell phone", "microwave", "oven", "toaster", "sink",
|
||||
"refrigerator", "book", "clock", "vase", "scissors",
|
||||
"teddy bear", "hair drier", "toothbrush"
|
||||
};
|
||||
|
||||
cv::Mat image = bgr.clone();
|
||||
|
||||
for (size_t i = 0; i < objects.size(); i++)
|
||||
{
|
||||
const Object& obj = objects[i];
|
||||
|
||||
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
|
||||
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
|
||||
|
||||
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
|
||||
|
||||
char text[256];
|
||||
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
|
||||
|
||||
int baseLine = 0;
|
||||
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||
|
||||
int x = obj.rect.x;
|
||||
int y = obj.rect.y - label_size.height - baseLine;
|
||||
if (y < 0)
|
||||
y = 0;
|
||||
if (x + label_size.width > image.cols)
|
||||
x = image.cols - label_size.width;
|
||||
|
||||
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
|
||||
cv::Scalar(255, 255, 255), -1);
|
||||
|
||||
cv::putText(image, text, cv::Point(x, y + label_size.height),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
|
||||
cv::imshow("image", image);
|
||||
|
||||
if (is_streaming)
|
||||
{
|
||||
cv::waitKey(1);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::waitKey(0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
cv::Mat frame;
|
||||
std::vector<Object> objects;
|
||||
|
||||
cv::VideoCapture cap;
|
||||
|
||||
ncnn::Net yolov4;
|
||||
|
||||
const char* devicepath;
|
||||
|
||||
int target_size = 0;
|
||||
int is_streaming = 0;
|
||||
|
||||
if (argc < 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [v4l input device or image]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
devicepath = argv[1];
|
||||
|
||||
#ifdef NCNN_PROFILING
|
||||
double t_load_start = ncnn::get_current_time();
|
||||
#endif
|
||||
|
||||
int ret = init_yolov4(&yolov4, &target_size); //We load model and param first!
|
||||
if (ret != 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to load model or param, error %d", ret);
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef NCNN_PROFILING
|
||||
double t_load_end = ncnn::get_current_time();
|
||||
fprintf(stdout, "NCNN Init time %.02lfms\n", t_load_end - t_load_start);
|
||||
#endif
|
||||
|
||||
if (strstr(devicepath, "/dev/video") == NULL)
|
||||
{
|
||||
frame = cv::imread(argv[1], 1);
|
||||
if (frame.empty())
|
||||
{
|
||||
fprintf(stderr, "Failed to read image %s.\n", argv[1]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cap.open(devicepath);
|
||||
|
||||
if (!cap.isOpened())
|
||||
{
|
||||
fprintf(stderr, "Failed to open %s", devicepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
cap >> frame;
|
||||
|
||||
if (frame.empty())
|
||||
{
|
||||
fprintf(stderr, "Failed to read from device %s.\n", devicepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
is_streaming = 1;
|
||||
}
|
||||
|
||||
while (1)
|
||||
{
|
||||
if (is_streaming)
|
||||
{
|
||||
#ifdef NCNN_PROFILING
|
||||
double t_capture_start = ncnn::get_current_time();
|
||||
#endif
|
||||
|
||||
cap >> frame;
|
||||
|
||||
#ifdef NCNN_PROFILING
|
||||
double t_capture_end = ncnn::get_current_time();
|
||||
fprintf(stdout, "NCNN OpenCV capture time %.02lfms\n", t_capture_end - t_capture_start);
|
||||
#endif
|
||||
if (frame.empty())
|
||||
{
|
||||
fprintf(stderr, "OpenCV Failed to Capture from device %s\n", devicepath);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef NCNN_PROFILING
|
||||
double t_detect_start = ncnn::get_current_time();
|
||||
#endif
|
||||
|
||||
detect_yolov4(frame, objects, target_size, &yolov4); //Create an extractor and run detection
|
||||
|
||||
#ifdef NCNN_PROFILING
|
||||
double t_detect_end = ncnn::get_current_time();
|
||||
fprintf(stdout, "NCNN detection time %.02lfms\n", t_detect_end - t_detect_start);
|
||||
#endif
|
||||
|
||||
#ifdef NCNN_PROFILING
|
||||
double t_draw_start = ncnn::get_current_time();
|
||||
#endif
|
||||
|
||||
draw_objects(frame, objects, is_streaming); //Draw detection results on opencv image
|
||||
|
||||
#ifdef NCNN_PROFILING
|
||||
double t_draw_end = ncnn::get_current_time();
|
||||
fprintf(stdout, "NCNN OpenCV draw result time %.02lfms\n", t_draw_end - t_draw_start);
|
||||
#endif
|
||||
|
||||
if (!is_streaming)
|
||||
{ //If it is a still image, exit!
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
503
3rdparty/ncnn/examples/yolov5.cpp
vendored
Normal file
503
3rdparty/ncnn/examples/yolov5.cpp
vendored
Normal file
@ -0,0 +1,503 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "layer.h"
|
||||
#include "net.h"
|
||||
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#endif
|
||||
#include <float.h>
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
#define YOLOV5_V60 1 //YOLOv5 v6.0
|
||||
|
||||
#if YOLOV5_V60
|
||||
#define MAX_STRIDE 64
|
||||
#else
|
||||
#define MAX_STRIDE 32
|
||||
class YoloV5Focus : public ncnn::Layer
|
||||
{
|
||||
public:
|
||||
YoloV5Focus()
|
||||
{
|
||||
one_blob_only = true;
|
||||
}
|
||||
|
||||
virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob, const ncnn::Option& opt) const
|
||||
{
|
||||
int w = bottom_blob.w;
|
||||
int h = bottom_blob.h;
|
||||
int channels = bottom_blob.c;
|
||||
|
||||
int outw = w / 2;
|
||||
int outh = h / 2;
|
||||
int outc = channels * 4;
|
||||
|
||||
top_blob.create(outw, outh, outc, 4u, 1, opt.blob_allocator);
|
||||
if (top_blob.empty())
|
||||
return -100;
|
||||
|
||||
#pragma omp parallel for num_threads(opt.num_threads)
|
||||
for (int p = 0; p < outc; p++)
|
||||
{
|
||||
const float* ptr = bottom_blob.channel(p % channels).row((p / channels) % 2) + ((p / channels) / 2);
|
||||
float* outptr = top_blob.channel(p);
|
||||
|
||||
for (int i = 0; i < outh; i++)
|
||||
{
|
||||
for (int j = 0; j < outw; j++)
|
||||
{
|
||||
*outptr = *ptr;
|
||||
|
||||
outptr += 1;
|
||||
ptr += 2;
|
||||
}
|
||||
|
||||
ptr += w;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
DEFINE_LAYER_CREATOR(YoloV5Focus)
|
||||
#endif //YOLOV5_V60
|
||||
|
||||
struct Object
|
||||
{
|
||||
cv::Rect_<float> rect;
|
||||
int label;
|
||||
float prob;
|
||||
};
|
||||
|
||||
static inline float intersection_area(const Object& a, const Object& b)
|
||||
{
|
||||
cv::Rect_<float> inter = a.rect & b.rect;
|
||||
return inter.area();
|
||||
}
|
||||
|
||||
static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
|
||||
{
|
||||
int i = left;
|
||||
int j = right;
|
||||
float p = faceobjects[(left + right) / 2].prob;
|
||||
|
||||
while (i <= j)
|
||||
{
|
||||
while (faceobjects[i].prob > p)
|
||||
i++;
|
||||
|
||||
while (faceobjects[j].prob < p)
|
||||
j--;
|
||||
|
||||
if (i <= j)
|
||||
{
|
||||
// swap
|
||||
std::swap(faceobjects[i], faceobjects[j]);
|
||||
|
||||
i++;
|
||||
j--;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp parallel sections
|
||||
{
|
||||
#pragma omp section
|
||||
{
|
||||
if (left < j) qsort_descent_inplace(faceobjects, left, j);
|
||||
}
|
||||
#pragma omp section
|
||||
{
|
||||
if (i < right) qsort_descent_inplace(faceobjects, i, right);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void qsort_descent_inplace(std::vector<Object>& faceobjects)
|
||||
{
|
||||
if (faceobjects.empty())
|
||||
return;
|
||||
|
||||
qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
|
||||
}
|
||||
|
||||
static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)
|
||||
{
|
||||
picked.clear();
|
||||
|
||||
const int n = faceobjects.size();
|
||||
|
||||
std::vector<float> areas(n);
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
areas[i] = faceobjects[i].rect.area();
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
const Object& a = faceobjects[i];
|
||||
|
||||
int keep = 1;
|
||||
for (int j = 0; j < (int)picked.size(); j++)
|
||||
{
|
||||
const Object& b = faceobjects[picked[j]];
|
||||
|
||||
// intersection over union
|
||||
float inter_area = intersection_area(a, b);
|
||||
float union_area = areas[i] + areas[picked[j]] - inter_area;
|
||||
// float IoU = inter_area / union_area
|
||||
if (inter_area / union_area > nms_threshold)
|
||||
keep = 0;
|
||||
}
|
||||
|
||||
if (keep)
|
||||
picked.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
static inline float sigmoid(float x)
|
||||
{
|
||||
return static_cast<float>(1.f / (1.f + exp(-x)));
|
||||
}
|
||||
|
||||
static void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn::Mat& in_pad, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<Object>& objects)
|
||||
{
|
||||
const int num_grid = feat_blob.h;
|
||||
|
||||
int num_grid_x;
|
||||
int num_grid_y;
|
||||
if (in_pad.w > in_pad.h)
|
||||
{
|
||||
num_grid_x = in_pad.w / stride;
|
||||
num_grid_y = num_grid / num_grid_x;
|
||||
}
|
||||
else
|
||||
{
|
||||
num_grid_y = in_pad.h / stride;
|
||||
num_grid_x = num_grid / num_grid_y;
|
||||
}
|
||||
|
||||
const int num_class = feat_blob.w - 5;
|
||||
|
||||
const int num_anchors = anchors.w / 2;
|
||||
|
||||
for (int q = 0; q < num_anchors; q++)
|
||||
{
|
||||
const float anchor_w = anchors[q * 2];
|
||||
const float anchor_h = anchors[q * 2 + 1];
|
||||
|
||||
const ncnn::Mat feat = feat_blob.channel(q);
|
||||
|
||||
for (int i = 0; i < num_grid_y; i++)
|
||||
{
|
||||
for (int j = 0; j < num_grid_x; j++)
|
||||
{
|
||||
const float* featptr = feat.row(i * num_grid_x + j);
|
||||
float box_confidence = sigmoid(featptr[4]);
|
||||
if (box_confidence >= prob_threshold)
|
||||
{
|
||||
// find class index with max class score
|
||||
int class_index = 0;
|
||||
float class_score = -FLT_MAX;
|
||||
for (int k = 0; k < num_class; k++)
|
||||
{
|
||||
float score = featptr[5 + k];
|
||||
if (score > class_score)
|
||||
{
|
||||
class_index = k;
|
||||
class_score = score;
|
||||
}
|
||||
}
|
||||
float confidence = box_confidence * sigmoid(class_score);
|
||||
if (confidence >= prob_threshold)
|
||||
{
|
||||
// yolov5/models/yolo.py Detect forward
|
||||
// y = x[i].sigmoid()
|
||||
// y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy
|
||||
// y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
|
||||
|
||||
float dx = sigmoid(featptr[0]);
|
||||
float dy = sigmoid(featptr[1]);
|
||||
float dw = sigmoid(featptr[2]);
|
||||
float dh = sigmoid(featptr[3]);
|
||||
|
||||
float pb_cx = (dx * 2.f - 0.5f + j) * stride;
|
||||
float pb_cy = (dy * 2.f - 0.5f + i) * stride;
|
||||
|
||||
float pb_w = pow(dw * 2.f, 2) * anchor_w;
|
||||
float pb_h = pow(dh * 2.f, 2) * anchor_h;
|
||||
|
||||
float x0 = pb_cx - pb_w * 0.5f;
|
||||
float y0 = pb_cy - pb_h * 0.5f;
|
||||
float x1 = pb_cx + pb_w * 0.5f;
|
||||
float y1 = pb_cy + pb_h * 0.5f;
|
||||
|
||||
Object obj;
|
||||
obj.rect.x = x0;
|
||||
obj.rect.y = y0;
|
||||
obj.rect.width = x1 - x0;
|
||||
obj.rect.height = y1 - y0;
|
||||
obj.label = class_index;
|
||||
obj.prob = confidence;
|
||||
|
||||
objects.push_back(obj);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int detect_yolov5(const cv::Mat& bgr, std::vector<Object>& objects)
|
||||
{
|
||||
ncnn::Net yolov5;
|
||||
|
||||
yolov5.opt.use_vulkan_compute = true;
|
||||
// yolov5.opt.use_bf16_storage = true;
|
||||
|
||||
// original pretrained model from https://github.com/ultralytics/yolov5
|
||||
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
|
||||
#if YOLOV5_V60
|
||||
yolov5.load_param("yolov5s_6.0.param");
|
||||
yolov5.load_model("yolov5s_6.0.bin");
|
||||
#else
|
||||
yolov5.register_custom_layer("YoloV5Focus", YoloV5Focus_layer_creator);
|
||||
|
||||
yolov5.load_param("yolov5s.param");
|
||||
yolov5.load_model("yolov5s.bin");
|
||||
#endif
|
||||
|
||||
const int target_size = 640;
|
||||
const float prob_threshold = 0.25f;
|
||||
const float nms_threshold = 0.45f;
|
||||
|
||||
int img_w = bgr.cols;
|
||||
int img_h = bgr.rows;
|
||||
|
||||
// letterbox pad to multiple of MAX_STRIDE
|
||||
int w = img_w;
|
||||
int h = img_h;
|
||||
float scale = 1.f;
|
||||
if (w > h)
|
||||
{
|
||||
scale = (float)target_size / w;
|
||||
w = target_size;
|
||||
h = h * scale;
|
||||
}
|
||||
else
|
||||
{
|
||||
scale = (float)target_size / h;
|
||||
h = target_size;
|
||||
w = w * scale;
|
||||
}
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, w, h);
|
||||
|
||||
// pad to target_size rectangle
|
||||
// yolov5/utils/datasets.py letterbox
|
||||
int wpad = (w + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - w;
|
||||
int hpad = (h + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - h;
|
||||
ncnn::Mat in_pad;
|
||||
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);
|
||||
|
||||
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
|
||||
in_pad.substract_mean_normalize(0, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = yolov5.create_extractor();
|
||||
|
||||
ex.input("images", in_pad);
|
||||
|
||||
std::vector<Object> proposals;
|
||||
|
||||
// anchor setting from yolov5/models/yolov5s.yaml
|
||||
|
||||
// stride 8
|
||||
{
|
||||
ncnn::Mat out;
|
||||
ex.extract("output", out);
|
||||
|
||||
ncnn::Mat anchors(6);
|
||||
anchors[0] = 10.f;
|
||||
anchors[1] = 13.f;
|
||||
anchors[2] = 16.f;
|
||||
anchors[3] = 30.f;
|
||||
anchors[4] = 33.f;
|
||||
anchors[5] = 23.f;
|
||||
|
||||
std::vector<Object> objects8;
|
||||
generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects8);
|
||||
|
||||
proposals.insert(proposals.end(), objects8.begin(), objects8.end());
|
||||
}
|
||||
|
||||
// stride 16
|
||||
{
|
||||
ncnn::Mat out;
|
||||
#if YOLOV5_V60
|
||||
ex.extract("376", out);
|
||||
#else
|
||||
ex.extract("781", out);
|
||||
#endif
|
||||
|
||||
ncnn::Mat anchors(6);
|
||||
anchors[0] = 30.f;
|
||||
anchors[1] = 61.f;
|
||||
anchors[2] = 62.f;
|
||||
anchors[3] = 45.f;
|
||||
anchors[4] = 59.f;
|
||||
anchors[5] = 119.f;
|
||||
|
||||
std::vector<Object> objects16;
|
||||
generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects16);
|
||||
|
||||
proposals.insert(proposals.end(), objects16.begin(), objects16.end());
|
||||
}
|
||||
|
||||
// stride 32
|
||||
{
|
||||
ncnn::Mat out;
|
||||
#if YOLOV5_V60
|
||||
ex.extract("401", out);
|
||||
#else
|
||||
ex.extract("801", out);
|
||||
#endif
|
||||
ncnn::Mat anchors(6);
|
||||
anchors[0] = 116.f;
|
||||
anchors[1] = 90.f;
|
||||
anchors[2] = 156.f;
|
||||
anchors[3] = 198.f;
|
||||
anchors[4] = 373.f;
|
||||
anchors[5] = 326.f;
|
||||
|
||||
std::vector<Object> objects32;
|
||||
generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects32);
|
||||
|
||||
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
|
||||
}
|
||||
|
||||
// sort all proposals by score from highest to lowest
|
||||
qsort_descent_inplace(proposals);
|
||||
|
||||
// apply nms with nms_threshold
|
||||
std::vector<int> picked;
|
||||
nms_sorted_bboxes(proposals, picked, nms_threshold);
|
||||
|
||||
int count = picked.size();
|
||||
|
||||
objects.resize(count);
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
objects[i] = proposals[picked[i]];
|
||||
|
||||
// adjust offset to original unpadded
|
||||
float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
|
||||
float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
|
||||
float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
|
||||
float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
|
||||
|
||||
// clip
|
||||
x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
|
||||
y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
|
||||
x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
|
||||
y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
|
||||
|
||||
objects[i].rect.x = x0;
|
||||
objects[i].rect.y = y0;
|
||||
objects[i].rect.width = x1 - x0;
|
||||
objects[i].rect.height = y1 - y0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
|
||||
{
|
||||
static const char* class_names[] = {
|
||||
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
|
||||
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
|
||||
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
|
||||
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
|
||||
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
|
||||
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
|
||||
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
|
||||
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
|
||||
"hair drier", "toothbrush"
|
||||
};
|
||||
|
||||
cv::Mat image = bgr.clone();
|
||||
|
||||
for (size_t i = 0; i < objects.size(); i++)
|
||||
{
|
||||
const Object& obj = objects[i];
|
||||
|
||||
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
|
||||
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
|
||||
|
||||
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
|
||||
|
||||
char text[256];
|
||||
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
|
||||
|
||||
int baseLine = 0;
|
||||
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||
|
||||
int x = obj.rect.x;
|
||||
int y = obj.rect.y - label_size.height - baseLine;
|
||||
if (y < 0)
|
||||
y = 0;
|
||||
if (x + label_size.width > image.cols)
|
||||
x = image.cols - label_size.width;
|
||||
|
||||
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
|
||||
cv::Scalar(255, 255, 255), -1);
|
||||
|
||||
cv::putText(image, text, cv::Point(x, y + label_size.height),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
|
||||
cv::imshow("image", image);
|
||||
cv::waitKey(0);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<Object> objects;
|
||||
detect_yolov5(m, objects);
|
||||
|
||||
draw_objects(m, objects);
|
||||
|
||||
return 0;
|
||||
}
|
422
3rdparty/ncnn/examples/yolov5_pnnx.cpp
vendored
Normal file
422
3rdparty/ncnn/examples/yolov5_pnnx.cpp
vendored
Normal file
@ -0,0 +1,422 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "layer.h"
|
||||
#include "net.h"
|
||||
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#endif
|
||||
#include <float.h>
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
struct Object
|
||||
{
|
||||
cv::Rect_<float> rect;
|
||||
int label;
|
||||
float prob;
|
||||
};
|
||||
|
||||
static inline float intersection_area(const Object& a, const Object& b)
|
||||
{
|
||||
cv::Rect_<float> inter = a.rect & b.rect;
|
||||
return inter.area();
|
||||
}
|
||||
|
||||
static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
|
||||
{
|
||||
int i = left;
|
||||
int j = right;
|
||||
float p = faceobjects[(left + right) / 2].prob;
|
||||
|
||||
while (i <= j)
|
||||
{
|
||||
while (faceobjects[i].prob > p)
|
||||
i++;
|
||||
|
||||
while (faceobjects[j].prob < p)
|
||||
j--;
|
||||
|
||||
if (i <= j)
|
||||
{
|
||||
// swap
|
||||
std::swap(faceobjects[i], faceobjects[j]);
|
||||
|
||||
i++;
|
||||
j--;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp parallel sections
|
||||
{
|
||||
#pragma omp section
|
||||
{
|
||||
if (left < j) qsort_descent_inplace(faceobjects, left, j);
|
||||
}
|
||||
#pragma omp section
|
||||
{
|
||||
if (i < right) qsort_descent_inplace(faceobjects, i, right);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void qsort_descent_inplace(std::vector<Object>& faceobjects)
|
||||
{
|
||||
if (faceobjects.empty())
|
||||
return;
|
||||
|
||||
qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
|
||||
}
|
||||
|
||||
static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)
|
||||
{
|
||||
picked.clear();
|
||||
|
||||
const int n = faceobjects.size();
|
||||
|
||||
std::vector<float> areas(n);
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
areas[i] = faceobjects[i].rect.area();
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
const Object& a = faceobjects[i];
|
||||
|
||||
int keep = 1;
|
||||
for (int j = 0; j < (int)picked.size(); j++)
|
||||
{
|
||||
const Object& b = faceobjects[picked[j]];
|
||||
|
||||
// intersection over union
|
||||
float inter_area = intersection_area(a, b);
|
||||
float union_area = areas[i] + areas[picked[j]] - inter_area;
|
||||
// float IoU = inter_area / union_area
|
||||
if (inter_area / union_area > nms_threshold)
|
||||
keep = 0;
|
||||
}
|
||||
|
||||
if (keep)
|
||||
picked.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
static inline float sigmoid(float x)
|
||||
{
|
||||
return static_cast<float>(1.f / (1.f + exp(-x)));
|
||||
}
|
||||
|
||||
static void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn::Mat& in_pad, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<Object>& objects)
|
||||
{
|
||||
const int num_grid_x = feat_blob.w;
|
||||
const int num_grid_y = feat_blob.h;
|
||||
|
||||
const int num_anchors = anchors.w / 2;
|
||||
|
||||
const int num_class = 80;
|
||||
|
||||
for (int q = 0; q < num_anchors; q++)
|
||||
{
|
||||
const float anchor_w = anchors[q * 2];
|
||||
const float anchor_h = anchors[q * 2 + 1];
|
||||
|
||||
for (int i = 0; i < num_grid_y; i++)
|
||||
{
|
||||
for (int j = 0; j < num_grid_x; j++)
|
||||
{
|
||||
// find class index with max class score
|
||||
int class_index = 0;
|
||||
float class_score = -FLT_MAX;
|
||||
for (int k = 0; k < num_class; k++)
|
||||
{
|
||||
float score = feat_blob.channel(q * 85 + 5 + k).row(i)[j];
|
||||
if (score > class_score)
|
||||
{
|
||||
class_index = k;
|
||||
class_score = score;
|
||||
}
|
||||
}
|
||||
|
||||
float box_score = feat_blob.channel(q * 85 + 4).row(i)[j];
|
||||
|
||||
float confidence = sigmoid(box_score) * sigmoid(class_score);
|
||||
|
||||
if (confidence >= prob_threshold)
|
||||
{
|
||||
// yolov5/models/yolo.py Detect forward
|
||||
// y = x[i].sigmoid()
|
||||
// y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy
|
||||
// y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
|
||||
|
||||
float dx = sigmoid(feat_blob.channel(q * 85 + 0).row(i)[j]);
|
||||
float dy = sigmoid(feat_blob.channel(q * 85 + 1).row(i)[j]);
|
||||
float dw = sigmoid(feat_blob.channel(q * 85 + 2).row(i)[j]);
|
||||
float dh = sigmoid(feat_blob.channel(q * 85 + 3).row(i)[j]);
|
||||
|
||||
float pb_cx = (dx * 2.f - 0.5f + j) * stride;
|
||||
float pb_cy = (dy * 2.f - 0.5f + i) * stride;
|
||||
|
||||
float pb_w = pow(dw * 2.f, 2) * anchor_w;
|
||||
float pb_h = pow(dh * 2.f, 2) * anchor_h;
|
||||
|
||||
float x0 = pb_cx - pb_w * 0.5f;
|
||||
float y0 = pb_cy - pb_h * 0.5f;
|
||||
float x1 = pb_cx + pb_w * 0.5f;
|
||||
float y1 = pb_cy + pb_h * 0.5f;
|
||||
|
||||
Object obj;
|
||||
obj.rect.x = x0;
|
||||
obj.rect.y = y0;
|
||||
obj.rect.width = x1 - x0;
|
||||
obj.rect.height = y1 - y0;
|
||||
obj.label = class_index;
|
||||
obj.prob = confidence;
|
||||
|
||||
objects.push_back(obj);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int detect_yolov5(const cv::Mat& bgr, std::vector<Object>& objects)
|
||||
{
|
||||
ncnn::Net yolov5;
|
||||
|
||||
yolov5.opt.use_vulkan_compute = true;
|
||||
// yolov5.opt.use_bf16_storage = true;
|
||||
|
||||
// original pretrained model from https://github.com/ultralytics/yolov5
|
||||
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
|
||||
yolov5.load_param("yolov5s.ncnn.param");
|
||||
yolov5.load_model("yolov5s.ncnn.bin");
|
||||
|
||||
const int target_size = 640;
|
||||
const float prob_threshold = 0.25f;
|
||||
const float nms_threshold = 0.45f;
|
||||
|
||||
int img_w = bgr.cols;
|
||||
int img_h = bgr.rows;
|
||||
|
||||
// yolov5/models/common.py DetectMultiBackend
|
||||
const int max_stride = 64;
|
||||
|
||||
// letterbox pad to multiple of max_stride
|
||||
int w = img_w;
|
||||
int h = img_h;
|
||||
float scale = 1.f;
|
||||
if (w > h)
|
||||
{
|
||||
scale = (float)target_size / w;
|
||||
w = target_size;
|
||||
h = h * scale;
|
||||
}
|
||||
else
|
||||
{
|
||||
scale = (float)target_size / h;
|
||||
h = target_size;
|
||||
w = w * scale;
|
||||
}
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, w, h);
|
||||
|
||||
// pad to target_size rectangle
|
||||
// yolov5/utils/datasets.py letterbox
|
||||
int wpad = (w + max_stride - 1) / max_stride * max_stride - w;
|
||||
int hpad = (h + max_stride - 1) / max_stride * max_stride - h;
|
||||
ncnn::Mat in_pad;
|
||||
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);
|
||||
|
||||
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
|
||||
in_pad.substract_mean_normalize(0, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = yolov5.create_extractor();
|
||||
|
||||
ex.input("in0", in_pad);
|
||||
|
||||
std::vector<Object> proposals;
|
||||
|
||||
// anchor setting from yolov5/models/yolov5s.yaml
|
||||
|
||||
// stride 8
|
||||
{
|
||||
ncnn::Mat out;
|
||||
ex.extract("out0", out);
|
||||
|
||||
ncnn::Mat anchors(6);
|
||||
anchors[0] = 10.f;
|
||||
anchors[1] = 13.f;
|
||||
anchors[2] = 16.f;
|
||||
anchors[3] = 30.f;
|
||||
anchors[4] = 33.f;
|
||||
anchors[5] = 23.f;
|
||||
|
||||
std::vector<Object> objects8;
|
||||
generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects8);
|
||||
|
||||
proposals.insert(proposals.end(), objects8.begin(), objects8.end());
|
||||
}
|
||||
|
||||
// stride 16
|
||||
{
|
||||
ncnn::Mat out;
|
||||
ex.extract("out1", out);
|
||||
|
||||
ncnn::Mat anchors(6);
|
||||
anchors[0] = 30.f;
|
||||
anchors[1] = 61.f;
|
||||
anchors[2] = 62.f;
|
||||
anchors[3] = 45.f;
|
||||
anchors[4] = 59.f;
|
||||
anchors[5] = 119.f;
|
||||
|
||||
std::vector<Object> objects16;
|
||||
generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects16);
|
||||
|
||||
proposals.insert(proposals.end(), objects16.begin(), objects16.end());
|
||||
}
|
||||
|
||||
// stride 32
|
||||
{
|
||||
ncnn::Mat out;
|
||||
ex.extract("out2", out);
|
||||
|
||||
ncnn::Mat anchors(6);
|
||||
anchors[0] = 116.f;
|
||||
anchors[1] = 90.f;
|
||||
anchors[2] = 156.f;
|
||||
anchors[3] = 198.f;
|
||||
anchors[4] = 373.f;
|
||||
anchors[5] = 326.f;
|
||||
|
||||
std::vector<Object> objects32;
|
||||
generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects32);
|
||||
|
||||
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
|
||||
}
|
||||
|
||||
// sort all proposals by score from highest to lowest
|
||||
qsort_descent_inplace(proposals);
|
||||
|
||||
// apply nms with nms_threshold
|
||||
std::vector<int> picked;
|
||||
nms_sorted_bboxes(proposals, picked, nms_threshold);
|
||||
|
||||
int count = picked.size();
|
||||
|
||||
objects.resize(count);
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
objects[i] = proposals[picked[i]];
|
||||
|
||||
// adjust offset to original unpadded
|
||||
float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
|
||||
float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
|
||||
float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
|
||||
float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
|
||||
|
||||
// clip
|
||||
x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
|
||||
y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
|
||||
x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
|
||||
y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
|
||||
|
||||
objects[i].rect.x = x0;
|
||||
objects[i].rect.y = y0;
|
||||
objects[i].rect.width = x1 - x0;
|
||||
objects[i].rect.height = y1 - y0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
|
||||
{
|
||||
static const char* class_names[] = {
|
||||
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
|
||||
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
|
||||
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
|
||||
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
|
||||
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
|
||||
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
|
||||
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
|
||||
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
|
||||
"hair drier", "toothbrush"
|
||||
};
|
||||
|
||||
cv::Mat image = bgr.clone();
|
||||
|
||||
for (size_t i = 0; i < objects.size(); i++)
|
||||
{
|
||||
const Object& obj = objects[i];
|
||||
|
||||
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
|
||||
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
|
||||
|
||||
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
|
||||
|
||||
char text[256];
|
||||
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
|
||||
|
||||
int baseLine = 0;
|
||||
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||
|
||||
int x = obj.rect.x;
|
||||
int y = obj.rect.y - label_size.height - baseLine;
|
||||
if (y < 0)
|
||||
y = 0;
|
||||
if (x + label_size.width > image.cols)
|
||||
x = image.cols - label_size.width;
|
||||
|
||||
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
|
||||
cv::Scalar(255, 255, 255), -1);
|
||||
|
||||
cv::putText(image, text, cv::Point(x, y + label_size.height),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
|
||||
cv::imshow("image", image);
|
||||
cv::waitKey(0);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<Object> objects;
|
||||
detect_yolov5(m, objects);
|
||||
|
||||
draw_objects(m, objects);
|
||||
|
||||
return 0;
|
||||
}
|
418
3rdparty/ncnn/examples/yolox.cpp
vendored
Normal file
418
3rdparty/ncnn/examples/yolox.cpp
vendored
Normal file
@ -0,0 +1,418 @@
|
||||
// This file is wirtten base on the following file:
|
||||
// https://github.com/Tencent/ncnn/blob/master/examples/yolov5.cpp
|
||||
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
// ------------------------------------------------------------------------------
|
||||
// Copyright (C) 2020-2021, Megvii Inc. All rights reserved.
|
||||
|
||||
#include "layer.h"
|
||||
#include "net.h"
|
||||
|
||||
#if defined(USE_NCNN_SIMPLEOCV)
|
||||
#include "simpleocv.h"
|
||||
#else
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include <opencv2/imgproc/imgproc.hpp>
|
||||
#endif
|
||||
#include <float.h>
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
#define YOLOX_NMS_THRESH 0.45 // nms threshold
|
||||
#define YOLOX_CONF_THRESH 0.25 // threshold of bounding box prob
|
||||
#define YOLOX_TARGET_SIZE 640 // target image size after resize, might use 416 for small model
|
||||
|
||||
// YOLOX use the same focus in yolov5
|
||||
class YoloV5Focus : public ncnn::Layer
|
||||
{
|
||||
public:
|
||||
YoloV5Focus()
|
||||
{
|
||||
one_blob_only = true;
|
||||
}
|
||||
|
||||
virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob, const ncnn::Option& opt) const
|
||||
{
|
||||
int w = bottom_blob.w;
|
||||
int h = bottom_blob.h;
|
||||
int channels = bottom_blob.c;
|
||||
|
||||
int outw = w / 2;
|
||||
int outh = h / 2;
|
||||
int outc = channels * 4;
|
||||
|
||||
top_blob.create(outw, outh, outc, 4u, 1, opt.blob_allocator);
|
||||
if (top_blob.empty())
|
||||
return -100;
|
||||
|
||||
#pragma omp parallel for num_threads(opt.num_threads)
|
||||
for (int p = 0; p < outc; p++)
|
||||
{
|
||||
const float* ptr = bottom_blob.channel(p % channels).row((p / channels) % 2) + ((p / channels) / 2);
|
||||
float* outptr = top_blob.channel(p);
|
||||
|
||||
for (int i = 0; i < outh; i++)
|
||||
{
|
||||
for (int j = 0; j < outw; j++)
|
||||
{
|
||||
*outptr = *ptr;
|
||||
|
||||
outptr += 1;
|
||||
ptr += 2;
|
||||
}
|
||||
|
||||
ptr += w;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
DEFINE_LAYER_CREATOR(YoloV5Focus)
|
||||
|
||||
struct Object
|
||||
{
|
||||
cv::Rect_<float> rect;
|
||||
int label;
|
||||
float prob;
|
||||
};
|
||||
|
||||
struct GridAndStride
|
||||
{
|
||||
int grid0;
|
||||
int grid1;
|
||||
int stride;
|
||||
};
|
||||
|
||||
static inline float intersection_area(const Object& a, const Object& b)
|
||||
{
|
||||
cv::Rect_<float> inter = a.rect & b.rect;
|
||||
return inter.area();
|
||||
}
|
||||
|
||||
static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
|
||||
{
|
||||
int i = left;
|
||||
int j = right;
|
||||
float p = faceobjects[(left + right) / 2].prob;
|
||||
|
||||
while (i <= j)
|
||||
{
|
||||
while (faceobjects[i].prob > p)
|
||||
i++;
|
||||
|
||||
while (faceobjects[j].prob < p)
|
||||
j--;
|
||||
|
||||
if (i <= j)
|
||||
{
|
||||
// swap
|
||||
std::swap(faceobjects[i], faceobjects[j]);
|
||||
|
||||
i++;
|
||||
j--;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp parallel sections
|
||||
{
|
||||
#pragma omp section
|
||||
{
|
||||
if (left < j) qsort_descent_inplace(faceobjects, left, j);
|
||||
}
|
||||
#pragma omp section
|
||||
{
|
||||
if (i < right) qsort_descent_inplace(faceobjects, i, right);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void qsort_descent_inplace(std::vector<Object>& objects)
|
||||
{
|
||||
if (objects.empty())
|
||||
return;
|
||||
|
||||
qsort_descent_inplace(objects, 0, objects.size() - 1);
|
||||
}
|
||||
|
||||
static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)
|
||||
{
|
||||
picked.clear();
|
||||
|
||||
const int n = faceobjects.size();
|
||||
|
||||
std::vector<float> areas(n);
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
areas[i] = faceobjects[i].rect.area();
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
const Object& a = faceobjects[i];
|
||||
|
||||
int keep = 1;
|
||||
for (int j = 0; j < (int)picked.size(); j++)
|
||||
{
|
||||
const Object& b = faceobjects[picked[j]];
|
||||
|
||||
// intersection over union
|
||||
float inter_area = intersection_area(a, b);
|
||||
float union_area = areas[i] + areas[picked[j]] - inter_area;
|
||||
// float IoU = inter_area / union_area
|
||||
if (inter_area / union_area > nms_threshold)
|
||||
keep = 0;
|
||||
}
|
||||
|
||||
if (keep)
|
||||
picked.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
static void generate_grids_and_stride(const int target_size, std::vector<int>& strides, std::vector<GridAndStride>& grid_strides)
|
||||
{
|
||||
for (int i = 0; i < (int)strides.size(); i++)
|
||||
{
|
||||
int stride = strides[i];
|
||||
int num_grid = target_size / stride;
|
||||
for (int g1 = 0; g1 < num_grid; g1++)
|
||||
{
|
||||
for (int g0 = 0; g0 < num_grid; g0++)
|
||||
{
|
||||
GridAndStride gs;
|
||||
gs.grid0 = g0;
|
||||
gs.grid1 = g1;
|
||||
gs.stride = stride;
|
||||
grid_strides.push_back(gs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void generate_yolox_proposals(std::vector<GridAndStride> grid_strides, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<Object>& objects)
|
||||
{
|
||||
const int num_grid = feat_blob.h;
|
||||
const int num_class = feat_blob.w - 5;
|
||||
const int num_anchors = grid_strides.size();
|
||||
|
||||
const float* feat_ptr = feat_blob.channel(0);
|
||||
for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++)
|
||||
{
|
||||
const int grid0 = grid_strides[anchor_idx].grid0;
|
||||
const int grid1 = grid_strides[anchor_idx].grid1;
|
||||
const int stride = grid_strides[anchor_idx].stride;
|
||||
|
||||
// yolox/models/yolo_head.py decode logic
|
||||
// outputs[..., :2] = (outputs[..., :2] + grids) * strides
|
||||
// outputs[..., 2:4] = torch.exp(outputs[..., 2:4]) * strides
|
||||
float x_center = (feat_ptr[0] + grid0) * stride;
|
||||
float y_center = (feat_ptr[1] + grid1) * stride;
|
||||
float w = exp(feat_ptr[2]) * stride;
|
||||
float h = exp(feat_ptr[3]) * stride;
|
||||
float x0 = x_center - w * 0.5f;
|
||||
float y0 = y_center - h * 0.5f;
|
||||
|
||||
float box_objectness = feat_ptr[4];
|
||||
for (int class_idx = 0; class_idx < num_class; class_idx++)
|
||||
{
|
||||
float box_cls_score = feat_ptr[5 + class_idx];
|
||||
float box_prob = box_objectness * box_cls_score;
|
||||
if (box_prob > prob_threshold)
|
||||
{
|
||||
Object obj;
|
||||
obj.rect.x = x0;
|
||||
obj.rect.y = y0;
|
||||
obj.rect.width = w;
|
||||
obj.rect.height = h;
|
||||
obj.label = class_idx;
|
||||
obj.prob = box_prob;
|
||||
|
||||
objects.push_back(obj);
|
||||
}
|
||||
|
||||
} // class loop
|
||||
feat_ptr += feat_blob.w;
|
||||
|
||||
} // point anchor loop
|
||||
}
|
||||
|
||||
static int detect_yolox(const cv::Mat& bgr, std::vector<Object>& objects)
|
||||
{
|
||||
ncnn::Net yolox;
|
||||
|
||||
yolox.opt.use_vulkan_compute = true;
|
||||
// yolox.opt.use_bf16_storage = true;
|
||||
|
||||
// Focus in yolov5
|
||||
yolox.register_custom_layer("YoloV5Focus", YoloV5Focus_layer_creator);
|
||||
|
||||
// original pretrained model from https://github.com/Megvii-BaseDetection/YOLOX
|
||||
// ncnn model param: https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s_ncnn.tar.gz
|
||||
// NOTE that newest version YOLOX remove normalization of model (minus mean and then div by std),
|
||||
// which might cause your model outputs becoming a total mess, plz check carefully.
|
||||
yolox.load_param("yolox.param");
|
||||
yolox.load_model("yolox.bin");
|
||||
|
||||
int img_w = bgr.cols;
|
||||
int img_h = bgr.rows;
|
||||
|
||||
int w = img_w;
|
||||
int h = img_h;
|
||||
float scale = 1.f;
|
||||
if (w > h)
|
||||
{
|
||||
scale = (float)YOLOX_TARGET_SIZE / w;
|
||||
w = YOLOX_TARGET_SIZE;
|
||||
h = h * scale;
|
||||
}
|
||||
else
|
||||
{
|
||||
scale = (float)YOLOX_TARGET_SIZE / h;
|
||||
h = YOLOX_TARGET_SIZE;
|
||||
w = w * scale;
|
||||
}
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, img_w, img_h, w, h);
|
||||
|
||||
// pad to YOLOX_TARGET_SIZE rectangle
|
||||
int wpad = YOLOX_TARGET_SIZE - w;
|
||||
int hpad = YOLOX_TARGET_SIZE - h;
|
||||
ncnn::Mat in_pad;
|
||||
// different from yolov5, yolox only pad on bottom and right side,
|
||||
// which means users don't need to extra padding info to decode boxes coordinate.
|
||||
ncnn::copy_make_border(in, in_pad, 0, hpad, 0, wpad, ncnn::BORDER_CONSTANT, 114.f);
|
||||
|
||||
ncnn::Extractor ex = yolox.create_extractor();
|
||||
|
||||
ex.input("images", in_pad);
|
||||
|
||||
std::vector<Object> proposals;
|
||||
|
||||
{
|
||||
ncnn::Mat out;
|
||||
ex.extract("output", out);
|
||||
|
||||
static const int stride_arr[] = {8, 16, 32}; // might have stride=64 in YOLOX
|
||||
std::vector<int> strides(stride_arr, stride_arr + sizeof(stride_arr) / sizeof(stride_arr[0]));
|
||||
std::vector<GridAndStride> grid_strides;
|
||||
generate_grids_and_stride(YOLOX_TARGET_SIZE, strides, grid_strides);
|
||||
generate_yolox_proposals(grid_strides, out, YOLOX_CONF_THRESH, proposals);
|
||||
}
|
||||
|
||||
// sort all proposals by score from highest to lowest
|
||||
qsort_descent_inplace(proposals);
|
||||
|
||||
// apply nms with nms_threshold
|
||||
std::vector<int> picked;
|
||||
nms_sorted_bboxes(proposals, picked, YOLOX_NMS_THRESH);
|
||||
|
||||
int count = picked.size();
|
||||
|
||||
objects.resize(count);
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
objects[i] = proposals[picked[i]];
|
||||
|
||||
// adjust offset to original unpadded
|
||||
float x0 = (objects[i].rect.x) / scale;
|
||||
float y0 = (objects[i].rect.y) / scale;
|
||||
float x1 = (objects[i].rect.x + objects[i].rect.width) / scale;
|
||||
float y1 = (objects[i].rect.y + objects[i].rect.height) / scale;
|
||||
|
||||
// clip
|
||||
x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
|
||||
y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
|
||||
x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
|
||||
y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
|
||||
|
||||
objects[i].rect.x = x0;
|
||||
objects[i].rect.y = y0;
|
||||
objects[i].rect.width = x1 - x0;
|
||||
objects[i].rect.height = y1 - y0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
|
||||
{
|
||||
static const char* class_names[] = {
|
||||
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
|
||||
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
|
||||
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
|
||||
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
|
||||
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
|
||||
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
|
||||
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
|
||||
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
|
||||
"hair drier", "toothbrush"
|
||||
};
|
||||
|
||||
cv::Mat image = bgr.clone();
|
||||
|
||||
for (size_t i = 0; i < objects.size(); i++)
|
||||
{
|
||||
const Object& obj = objects[i];
|
||||
|
||||
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
|
||||
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
|
||||
|
||||
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
|
||||
|
||||
char text[256];
|
||||
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
|
||||
|
||||
int baseLine = 0;
|
||||
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
||||
|
||||
int x = obj.rect.x;
|
||||
int y = obj.rect.y - label_size.height - baseLine;
|
||||
if (y < 0)
|
||||
y = 0;
|
||||
if (x + label_size.width > image.cols)
|
||||
x = image.cols - label_size.width;
|
||||
|
||||
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
|
||||
cv::Scalar(255, 255, 255), -1);
|
||||
|
||||
cv::putText(image, text, cv::Point(x, y + label_size.height),
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
|
||||
}
|
||||
|
||||
cv::imshow("image", image);
|
||||
cv::waitKey(0);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* imagepath = argv[1];
|
||||
|
||||
cv::Mat m = cv::imread(imagepath, 1);
|
||||
if (m.empty())
|
||||
{
|
||||
fprintf(stderr, "cv::imread %s failed\n", imagepath);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<Object> objects;
|
||||
detect_yolox(m, objects);
|
||||
|
||||
draw_objects(m, objects);
|
||||
|
||||
return 0;
|
||||
}
|
Reference in New Issue
Block a user