718c41634f
1.项目后端整体迁移至PaddleOCR-NCNN算法,已通过基本的兼容性测试 2.工程改为使用CMake组织,后续为了更好地兼容第三方库,不再提供QMake工程 3.重整权利声明文件,重整代码工程,确保最小化侵权风险 Log: 切换后端至PaddleOCR-NCNN,切换工程为CMake Change-Id: I4d5d2c5d37505a4a24b389b1a4c5d12f17bfa38c
312 lines
7.2 KiB
Markdown
312 lines
7.2 KiB
Markdown
# implement elementwise addition with/without broadcast using BinaryOp operation
|
|
|
|
* input must be fp32 storage without packing
|
|
* output is expected to be fp32 storage without packing
|
|
|
|
```cpp
|
|
void binary_add(const ncnn::Mat& a, const ncnn::Mat& b, ncnn::Mat& c)
|
|
{
|
|
ncnn::Option opt;
|
|
opt.num_threads = 2;
|
|
opt.use_fp16_storage = false;
|
|
opt.use_packing_layout = false;
|
|
|
|
ncnn::Layer* op = ncnn::create_layer("BinaryOp");
|
|
|
|
// set param
|
|
ncnn::ParamDict pd;
|
|
pd.set(0, 0);// op_type
|
|
|
|
op->load_param(pd);
|
|
|
|
op->create_pipeline(opt);
|
|
|
|
// forward
|
|
std::vector<ncnn::Mat> bottoms(2);
|
|
bottoms[0] = a;
|
|
bottoms[1] = b;
|
|
|
|
std::vector<ncnn::Mat> tops(1);
|
|
op->forward(bottoms, tops, opt);
|
|
|
|
c = tops[0];
|
|
|
|
op->destroy_pipeline(opt);
|
|
|
|
delete op;
|
|
}
|
|
```
|
|
|
|
# implement 3x3 box blur on three channel image using ConvolutionDepthWise operation
|
|
|
|
* input must be fp32 storage without packing
|
|
* output is expected to be fp32 storage without packing
|
|
|
|
```cpp
|
|
void convolution_3x3_boxblur_RGB(const ncnn::Mat& rgb, ncnn::Mat& out)
|
|
{
|
|
ncnn::Option opt;
|
|
opt.num_threads = 2;
|
|
opt.use_fp16_storage = false;
|
|
opt.use_packing_layout = false;
|
|
|
|
ncnn::Layer* op = ncnn::create_layer("ConvolutionDepthWise");
|
|
|
|
// set param
|
|
ncnn::ParamDict pd;
|
|
pd.set(0, 3);// num_output
|
|
pd.set(1, 3);// kernel_w
|
|
pd.set(5, 0);// bias_term
|
|
pd.set(6, 3*3*3);// weight_data_size
|
|
pd.set(7, 3);// group
|
|
|
|
op->load_param(pd);
|
|
|
|
// set weights
|
|
ncnn::Mat weights[1];
|
|
weights[0].create(3*3*3);// weight_data
|
|
|
|
for (int i=0; i<3*3*3; i++)
|
|
{
|
|
weights[0][i] = 1.f / 9;
|
|
}
|
|
|
|
op->load_model(ncnn::ModelBinFromMatArray(weights));
|
|
|
|
op->create_pipeline(opt);
|
|
|
|
// forward
|
|
op->forward(rgb, out, opt);
|
|
|
|
op->destroy_pipeline(opt);
|
|
|
|
delete op;
|
|
}
|
|
```
|
|
# transpose Mat, chw to cwh
|
|
|
|
* input must be fp32 storage with/without packing
|
|
* output is expected to be fp32 storage packed
|
|
|
|
```cpp
|
|
void transpose(const ncnn::Mat& in, ncnn::Mat& out)
|
|
{
|
|
ncnn::Option opt;
|
|
opt.num_threads = 2;
|
|
opt.use_fp16_storage = false;
|
|
opt.use_packing_layout = true;
|
|
|
|
ncnn::Layer* op = ncnn::create_layer("Permute");
|
|
|
|
// set param
|
|
ncnn::ParamDict pd;
|
|
pd.set(0, 1);// order_type
|
|
|
|
op->load_param(pd);
|
|
|
|
op->create_pipeline(opt);
|
|
|
|
ncnn::Mat in_packed = in;
|
|
{
|
|
// resolve dst_elempack
|
|
int dims = in.dims;
|
|
int elemcount = 0;
|
|
if (dims == 1) elemcount = in.elempack * in.w;
|
|
if (dims == 2) elemcount = in.elempack * in.h;
|
|
if (dims == 3) elemcount = in.elempack * in.c;
|
|
|
|
int dst_elempack = 1;
|
|
if (op->support_packing)
|
|
{
|
|
if (elemcount % 8 == 0 && (ncnn::cpu_support_x86_avx2() || ncnn::cpu_support_x86_avx()))
|
|
dst_elempack = 8;
|
|
else if (elemcount % 4 == 0)
|
|
dst_elempack = 4;
|
|
}
|
|
|
|
if (in.elempack != dst_elempack)
|
|
{
|
|
convert_packing(in, in_packed, dst_elempack, opt);
|
|
}
|
|
}
|
|
|
|
// forward
|
|
op->forward(in_packed, out, opt);
|
|
|
|
op->destroy_pipeline(opt);
|
|
|
|
delete op;
|
|
}
|
|
```
|
|
# apply instance normalization
|
|
// x = (x - mean) / sqrt(var)
|
|
|
|
* input can be fp32/fp16 storage with/without packing
|
|
* output is expected to be fp16 storage packed when supported, or fp32 storage packed otherwise
|
|
|
|
```cpp
|
|
void normalize(const ncnn::Mat& in, ncnn::Mat& out)
|
|
{
|
|
ncnn::Option opt;
|
|
opt.num_threads = 2;
|
|
opt.use_fp16_storage = true;
|
|
opt.use_packing_layout = true;
|
|
|
|
ncnn::Layer* op = ncnn::create_layer("InstanceNorm");
|
|
|
|
// set param
|
|
ncnn::ParamDict pd;
|
|
pd.set(0, in.c);// channels
|
|
pd.set(1, 0.f);// eps
|
|
|
|
op->load_param(pd);
|
|
|
|
// set weights
|
|
ncnn::Mat weights[2];
|
|
weights[0].create(in.c);// gamma_data
|
|
weights[1].create(in.c);// beta_data
|
|
|
|
weights[0].fill(1.f);
|
|
weights[1].fill(0.f);
|
|
|
|
op->load_model(ncnn::ModelBinFromMatArray(weights));
|
|
|
|
op->create_pipeline(opt);
|
|
|
|
ncnn::Mat in_fp16 = in;
|
|
if (in.elembits() == 32 && op->support_fp16_storage)
|
|
{
|
|
cast_float32_to_float16(in, in_fp16, opt);
|
|
}
|
|
if (in.elembits() == 16 && !op->support_fp16_storage)
|
|
{
|
|
cast_float16_to_float32(in, in_fp16, opt);
|
|
}
|
|
|
|
ncnn::Mat in_fp16_packed = in_fp16;
|
|
{
|
|
// resolve dst_elempack
|
|
int dims = in_fp16.dims;
|
|
int elemcount = 0;
|
|
if (dims == 1) elemcount = in_fp16.elempack * in_fp16.w;
|
|
if (dims == 2) elemcount = in_fp16.elempack * in_fp16.h;
|
|
if (dims == 3) elemcount = in_fp16.elempack * in_fp16.c;
|
|
|
|
int dst_elempack = 1;
|
|
if (op->support_packing)
|
|
{
|
|
if (elemcount % 8 == 0 && (ncnn::cpu_support_x86_avx2() || ncnn::cpu_support_x86_avx()))
|
|
dst_elempack = 8;
|
|
else if (elemcount % 4 == 0)
|
|
dst_elempack = 4;
|
|
}
|
|
|
|
if (in_fp16.elempack != dst_elempack)
|
|
{
|
|
convert_packing(in_fp16, in_fp16_packed, dst_elempack, opt);
|
|
}
|
|
}
|
|
|
|
// forward
|
|
op->forward(in_fp16_packed, out, opt);
|
|
|
|
op->destroy_pipeline(opt);
|
|
|
|
delete op;
|
|
}
|
|
```
|
|
|
|
# cpu -> gpu -> forward -> gpu -> cpu
|
|
|
|
```cpp
|
|
ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
|
|
|
|
ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
|
|
ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
|
|
|
|
ncnn::VkWeightAllocator* weight_vkallocator = new ncnn::VkWeightAllocator(vkdev);
|
|
ncnn::VkWeightStagingAllocator* weight_staging_vkallocator = new ncnn::VkWeightStagingAllocator(vkdev);
|
|
|
|
// create layer
|
|
ncnn::Layer* convolution = ncnn::create_layer("Convolution");
|
|
convolution->vkdev = vkdev;
|
|
|
|
// set option
|
|
ncnn::Option opt;
|
|
opt.num_threads = 4;
|
|
opt.use_vulkan_compute = true;
|
|
opt.blob_vkallocator = blob_vkallocator;
|
|
opt.workspace_vkallocator = blob_vkallocator;
|
|
opt.staging_vkallocator = staging_vkallocator;
|
|
|
|
// load param
|
|
{
|
|
ncnn::ParamDict pd;
|
|
pd.set(0, outch);
|
|
pd.set(1, ksize);
|
|
pd.set(6, outch*inch*ksize*ksize);
|
|
pd.use_vulkan_compute = 1;
|
|
|
|
convolution->load_param(pd);
|
|
}
|
|
|
|
// load model
|
|
{
|
|
ncnn::Mat weights[2];
|
|
weights[0] = random_mat(outch*inch*ksize*ksize);
|
|
weights[1] = random_mat(outch);
|
|
|
|
ncnn::ModelBinFromMatArray mb(weights);
|
|
convolution->load_model(mb);
|
|
}
|
|
|
|
// create pipeline
|
|
convolution->create_pipeline(opt);
|
|
|
|
// upload model
|
|
{
|
|
ncnn::VkTransfer cmd(vkdev);
|
|
|
|
ncnn::Option opt_upload = opt;
|
|
opt_upload.blob_vkallocator = weight_vkallocator;
|
|
opt_upload.workspace_vkallocator = weight_vkallocator;
|
|
opt_upload.staging_vkallocator = weight_staging_vkallocator;
|
|
|
|
convolution->upload_model(cmd, opt_upload);
|
|
|
|
cmd.submit_and_wait();
|
|
}
|
|
|
|
ncnn::Mat bottom = random_mat(w, h, inch);
|
|
|
|
ncnn::Mat top;
|
|
|
|
// forward
|
|
{
|
|
ncnn::VkCompute cmd(vkdev);
|
|
|
|
ncnn::VkMat bottom_gpu;
|
|
cmd.record_upload(bottom, bottom_gpu, opt);
|
|
|
|
ncnn::VkMat top_gpu;
|
|
convolution->forward(bottom_gpu, top_gpu, cmd, opt);
|
|
|
|
cmd.record_download(top_gpu, top, opt);
|
|
|
|
cmd.submit_and_wait();
|
|
}
|
|
|
|
convolution->destroy_pipeline(opt);
|
|
|
|
delete convolution;
|
|
|
|
vkdev->reclaim_blob_allocator(blob_vkallocator);
|
|
vkdev->reclaim_staging_allocator(staging_vkallocator);
|
|
|
|
weight_vkallocator->clear();
|
|
weight_staging_vkallocator->clear();
|
|
delete weight_vkallocator;
|
|
delete weight_staging_vkallocator;
|
|
```
|
|
|