feat: 切换后端至PaddleOCR-NCNN,切换工程为CMake
1.项目后端整体迁移至PaddleOCR-NCNN算法,已通过基本的兼容性测试 2.工程改为使用CMake组织,后续为了更好地兼容第三方库,不再提供QMake工程 3.重整权利声明文件,重整代码工程,确保最小化侵权风险 Log: 切换后端至PaddleOCR-NCNN,切换工程为CMake Change-Id: I4d5d2c5d37505a4a24b389b1a4c5d12f17bfa38c
This commit is contained in:
141
3rdparty/opencv-4.5.4/modules/dnn/src/layers/accum_layer.cpp
vendored
Normal file
141
3rdparty/opencv-4.5.4/modules/dnn/src/layers/accum_layer.cpp
vendored
Normal file
@ -0,0 +1,141 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// Copyright (C) 2020, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
|
||||
|
||||
namespace cv { namespace dnn {
|
||||
|
||||
class AccumLayerImpl CV_FINAL : public AccumLayer
|
||||
{
|
||||
public:
|
||||
AccumLayerImpl(const LayerParams& params)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
top_height = params.get<int>("top_height", 0);
|
||||
top_width = params.get<int>("top_width", 0);
|
||||
divisor = params.get<int>("size_divisible_by", 0);
|
||||
have_reference = params.get<String>("have_reference", "false") == "true";
|
||||
}
|
||||
|
||||
virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
std::vector<int> outShape;
|
||||
int batch = inputs[0][0];
|
||||
outShape.push_back(batch);
|
||||
|
||||
if (have_reference)
|
||||
{
|
||||
CV_Assert(inputs.size() >= 2);
|
||||
int totalchannels = 0;
|
||||
for (int i = 0; i < inputs.size() - 1; i++) {
|
||||
CV_Assert(inputs[i][0] == batch);
|
||||
totalchannels += inputs[i][1];
|
||||
}
|
||||
outShape.push_back(totalchannels);
|
||||
|
||||
int height = inputs.back()[2];
|
||||
int width = inputs.back()[3];
|
||||
|
||||
outShape.push_back(height);
|
||||
outShape.push_back(width);
|
||||
}
|
||||
else
|
||||
{
|
||||
int maxwidth = -1;
|
||||
int maxheight = -1;
|
||||
int totalchannels = 0;
|
||||
|
||||
// Find largest blob size and count total channels
|
||||
for (int i = 0; i < inputs.size(); ++i)
|
||||
{
|
||||
totalchannels += inputs[i][1];
|
||||
maxheight = std::max(maxheight, inputs[i][2]);
|
||||
maxwidth = std::max(maxwidth, inputs[i][3]);
|
||||
CV_Assert(inputs[i][0] == batch);
|
||||
}
|
||||
outShape.push_back(totalchannels);
|
||||
|
||||
int out_h = divisor ? static_cast<int>(ceil(maxheight / divisor) * divisor) : top_height;
|
||||
int out_w = divisor ? static_cast<int>(ceil(maxwidth / divisor) * divisor) : top_width;
|
||||
|
||||
// Layer can specify custom top size which is larger than default
|
||||
if (out_h <= maxheight || out_w <= maxwidth)
|
||||
{
|
||||
out_h = maxheight;
|
||||
out_w = maxwidth;
|
||||
}
|
||||
|
||||
outShape.push_back(out_h);
|
||||
outShape.push_back(out_w);
|
||||
}
|
||||
|
||||
outputs.assign(1, outShape);
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
|
||||
{
|
||||
LayerParams resizeParams;
|
||||
resizeParams.set("interpolation", "bilinear");
|
||||
resizeParams.set("align_corners", true);
|
||||
resize = ResizeLayer::create(resizeParams);
|
||||
}
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
const int out_h = outputs[0].size[2];
|
||||
const int out_w = outputs[0].size[3];
|
||||
float* out_data = outputs[0].ptr<float>();
|
||||
std::vector<int> sizes(&outputs[0].size[0], &outputs[0].size[0] + outputs[0].size.dims());
|
||||
for (int i = 0; i < inputs.size() - have_reference; i++)
|
||||
{
|
||||
sizes[1] = inputs[i].size[1];
|
||||
Mat outSlice(sizes, CV_32F, out_data);
|
||||
|
||||
if (out_h == inputs[i].size[2] && out_w == inputs[i].size[3])
|
||||
{
|
||||
inputs[i].copyTo(outSlice);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<Mat> inp_slices, out_slices;
|
||||
inp_slices.push_back(inputs[i]);
|
||||
out_slices.push_back(outSlice);
|
||||
|
||||
resize->finalize(inp_slices, out_slices);
|
||||
resize->forward(inp_slices, out_slices, internals_arr);
|
||||
}
|
||||
out_data += outSlice.total(1);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
int top_height;
|
||||
int top_width;
|
||||
int divisor;
|
||||
bool have_reference;
|
||||
Ptr<ResizeLayer> resize;
|
||||
};
|
||||
|
||||
Ptr<AccumLayer> AccumLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<AccumLayer>(new AccumLayerImpl(params));
|
||||
}
|
||||
|
||||
}} // namespace cv::dnn
|
||||
447
3rdparty/opencv-4.5.4/modules/dnn/src/layers/batch_norm_layer.cpp
vendored
Normal file
447
3rdparty/opencv-4.5.4/modules/dnn/src/layers/batch_norm_layer.cpp
vendored
Normal file
@ -0,0 +1,447 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// Copyright (C) 2016, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
/*
|
||||
Implementation of Batch Normalization layer.
|
||||
*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
#include "../op_halide.hpp"
|
||||
#include "../op_inf_engine.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/batch_norm.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
class BatchNormLayerImpl CV_FINAL : public BatchNormLayer
|
||||
{
|
||||
public:
|
||||
Mat origin_weights, origin_bias;
|
||||
Mat weights_, bias_;
|
||||
UMat umat_weight, umat_bias;
|
||||
mutable int dims;
|
||||
|
||||
|
||||
BatchNormLayerImpl(const LayerParams& params)
|
||||
: dims(-1)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
CV_Assert(blobs.size() >= 2);
|
||||
|
||||
hasWeights = params.get<bool>("has_weight", false);
|
||||
hasBias = params.get<bool>("has_bias", false);
|
||||
useGlobalStats = params.get<bool>("use_global_stats", true);
|
||||
if(params.get<bool>("scale_bias", false))
|
||||
hasWeights = hasBias = true;
|
||||
epsilon = params.get<float>("eps", 1E-5);
|
||||
|
||||
size_t n = blobs[0].total();
|
||||
CV_Assert(blobs[1].total() == n &&
|
||||
blobs[0].isContinuous() && blobs[1].isContinuous() &&
|
||||
blobs[0].type() == CV_32F && blobs[1].type() == CV_32F);
|
||||
|
||||
float varMeanScale = 1.f;
|
||||
if (!hasWeights && !hasBias && blobs.size() > 2 && useGlobalStats) {
|
||||
CV_Assert(blobs.size() == 3); CV_CheckTypeEQ(blobs[2].type(), CV_32FC1, "");
|
||||
varMeanScale = blobs[2].at<float>(0);
|
||||
if (varMeanScale != 0)
|
||||
varMeanScale = 1/varMeanScale;
|
||||
}
|
||||
|
||||
const int biasBlobIndex = blobs.size() - 1;
|
||||
const int weightsBlobIndex = biasBlobIndex - hasBias;
|
||||
|
||||
if( hasWeights )
|
||||
{
|
||||
CV_Assert((size_t)weightsBlobIndex < blobs.size());
|
||||
const Mat& w = blobs[weightsBlobIndex];
|
||||
CV_Assert(w.isContinuous() && w.type() == CV_32F && w.total() == (size_t)n);
|
||||
}
|
||||
|
||||
if( hasBias )
|
||||
{
|
||||
CV_Assert((size_t)biasBlobIndex < blobs.size());
|
||||
const Mat& b = blobs[weightsBlobIndex];
|
||||
CV_Assert(b.isContinuous() && b.type() == CV_32F && b.total() == (size_t)n);
|
||||
}
|
||||
|
||||
const float* meanData = blobs[0].ptr<float>();
|
||||
const float* stdData = blobs[1].ptr<float>();
|
||||
const float* weightsData = hasWeights ? blobs[weightsBlobIndex].ptr<float>() : 0;
|
||||
const float* biasData = hasBias ? blobs[biasBlobIndex].ptr<float>() : 0;
|
||||
|
||||
origin_weights.create(1, (int)n, CV_32F);
|
||||
origin_bias.create(1, (int)n, CV_32F);
|
||||
|
||||
float* dstWeightsData = origin_weights.ptr<float>();
|
||||
float* dstBiasData = origin_bias.ptr<float>();
|
||||
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
{
|
||||
float w = (hasWeights ? weightsData[i] : 1.0f) / sqrt(stdData[i] * varMeanScale + epsilon);
|
||||
dstWeightsData[i] = w;
|
||||
dstBiasData[i] = (hasBias ? biasData[i] : 0.0f) - w * meanData[i] * varMeanScale;
|
||||
}
|
||||
}
|
||||
|
||||
virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE
|
||||
{
|
||||
origin_weights.reshape(1, 1).copyTo(weights_);
|
||||
origin_bias.reshape(1, 1).copyTo(bias_);
|
||||
}
|
||||
|
||||
void getScaleShift(Mat& scale, Mat& shift) const CV_OVERRIDE
|
||||
{
|
||||
scale = weights_;
|
||||
shift = bias_;
|
||||
}
|
||||
|
||||
virtual bool tryFuse(Ptr<Layer>& top) CV_OVERRIDE
|
||||
{
|
||||
Mat w, b;
|
||||
top->getScaleShift(w, b);
|
||||
if (w.empty() && b.empty())
|
||||
return false;
|
||||
|
||||
const int numChannels = weights_.total();
|
||||
const int numFusedWeights = w.total();
|
||||
const int numFusedBias = b.total();
|
||||
|
||||
if ((numFusedWeights != numChannels && numFusedWeights != 1 && !w.empty()) ||
|
||||
(numFusedBias != numChannels && numFusedBias != 1 && !b.empty()))
|
||||
return false;
|
||||
|
||||
if (!w.empty())
|
||||
{
|
||||
w = w.reshape(1, 1);
|
||||
if (numFusedWeights == 1)
|
||||
{
|
||||
multiply(weights_, w.at<float>(0), weights_);
|
||||
multiply(bias_, w.at<float>(0), bias_);
|
||||
}
|
||||
else
|
||||
{
|
||||
multiply(weights_, w, weights_);
|
||||
multiply(bias_, w, bias_);
|
||||
}
|
||||
}
|
||||
if (!b.empty())
|
||||
{
|
||||
b = b.reshape(1, 1);
|
||||
if (numFusedBias == 1)
|
||||
add(bias_, b.at<float>(0), bias_);
|
||||
else
|
||||
add(bias_, b.reshape(1, 1), bias_);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
dims = inputs[0].size();
|
||||
if (!useGlobalStats && inputs[0][0] != 1)
|
||||
CV_Error(Error::StsNotImplemented, "Batch normalization in training mode with batch size > 1");
|
||||
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return (backendId == DNN_BACKEND_OPENCV) ||
|
||||
backendId == DNN_BACKEND_CUDA ||
|
||||
(backendId == DNN_BACKEND_HALIDE && haveHalide()) ||
|
||||
((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine() && (preferableTarget == DNN_TARGET_CPU || dims == 4));
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
bool use_half = (inputs_.depth() == CV_16S);
|
||||
inputs_.getUMatVector(inputs);
|
||||
outputs_.getUMatVector(outputs);
|
||||
|
||||
CV_Assert(blobs.size() >= 2);
|
||||
CV_Assert(inputs.size() == 1);
|
||||
|
||||
if (use_half && inputs[0].dims == 2)
|
||||
return false;
|
||||
|
||||
if (umat_weight.empty())
|
||||
{
|
||||
weights_.copyTo(umat_weight);
|
||||
bias_.copyTo(umat_bias);
|
||||
}
|
||||
|
||||
UMat &inpBlob = inputs[0];
|
||||
int groups = inpBlob.size[0];
|
||||
int channels = inpBlob.size[1];
|
||||
int planeSize = 1;
|
||||
for (size_t i = 2; i < inpBlob.dims; i++) {
|
||||
planeSize *= inpBlob.size[i];
|
||||
}
|
||||
|
||||
String opts = (use_half) ? " -DDtype=half" : " -DDtype=float";
|
||||
for (size_t ii = 0; ii < outputs.size(); ii++)
|
||||
{
|
||||
if (inpBlob.dims == 2)
|
||||
{
|
||||
UMat& src = inputs[ii];
|
||||
UMat& dst = outputs[ii];
|
||||
multiply(src, weights_, dst);
|
||||
add(dst, bias_, dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
MatShape s = shape(groups * channels, planeSize);
|
||||
UMat src = inputs[ii].reshape(1, s.size(), &s[0]);
|
||||
UMat dst = outputs[ii].reshape(1, s.size(), &s[0]);
|
||||
int number = (s[1] % 8 == 0) ? 8 : ((s[1] % 4 == 0) ? 4 : 1);
|
||||
String buildopt = format("-DNUM=%d", number) + opts;
|
||||
String kname = format("batch_norm%d", number);
|
||||
if (number == 1)
|
||||
buildopt += format(" -Dconvert_T=convert_%s", use_half ? "half" : "float");
|
||||
else
|
||||
buildopt += format(" -Dconvert_T=convert_%s%d", use_half ? "half" : "float", number);
|
||||
ocl::Kernel kernel(kname.c_str(), ocl::dnn::batchnorm_oclsrc, buildopt);
|
||||
if (kernel.empty())
|
||||
return false;
|
||||
size_t global[] = { (size_t)s[0], (size_t)(s[1] / number) };
|
||||
kernel.set(0, ocl::KernelArg::PtrReadOnly(src));
|
||||
kernel.set(1, (int)s[0]);
|
||||
kernel.set(2, (int)s[1]);
|
||||
kernel.set(3, (int)channels);
|
||||
kernel.set(4, ocl::KernelArg::PtrReadOnly(umat_weight));
|
||||
kernel.set(5, ocl::KernelArg::PtrReadOnly(umat_bias));
|
||||
kernel.set(6, ocl::KernelArg::PtrWriteOnly(dst));
|
||||
bool ret = kernel.run_(2, global, NULL, false);
|
||||
if (!ret)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
CV_Assert(blobs.size() >= 2);
|
||||
CV_Assert(inputs.size() == 1);
|
||||
|
||||
Mat &inpBlob = inputs[0];
|
||||
int planeSize = 1;
|
||||
for (size_t i = 2; i < inpBlob.dims; i++) {
|
||||
planeSize *= inpBlob.size[i];
|
||||
}
|
||||
|
||||
for (size_t ii = 0; ii < outputs.size(); ii++)
|
||||
{
|
||||
Mat &outBlob = outputs[ii];
|
||||
|
||||
for(int num = 0; num < outBlob.size[0]; num++)
|
||||
{
|
||||
for (int n = 0; n < outBlob.size[1]; n++)
|
||||
{
|
||||
float w = weights_.at<float>(n);
|
||||
float b = bias_.at<float>(n);
|
||||
Mat inpBlobPlane(1, planeSize, CV_32F, inpBlob.ptr<float>(num, n));
|
||||
Mat outBlobPlane(1, planeSize, CV_32F, outBlob.ptr<float>(num, n));
|
||||
inpBlobPlane.convertTo(outBlobPlane, CV_32F, w, b);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void forwardSlice(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const CV_OVERRIDE
|
||||
{
|
||||
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
|
||||
{
|
||||
int i = 0;
|
||||
float w = weights_.at<float>(cn);
|
||||
float b = bias_.at<float>(cn);
|
||||
#if CV_SIMD128
|
||||
v_float32x4 wV = v_setall_f32(w), bV = v_setall_f32(b);
|
||||
for( ; i <= len - 16; i += 16 )
|
||||
{
|
||||
v_float32x4 x0 = v_load(srcptr + i);
|
||||
v_float32x4 x1 = v_load(srcptr + i + 4);
|
||||
v_float32x4 x2 = v_load(srcptr + i + 8);
|
||||
v_float32x4 x3 = v_load(srcptr + i + 12);
|
||||
x0 = v_muladd(x0, wV, bV);
|
||||
x1 = v_muladd(x1, wV, bV);
|
||||
x2 = v_muladd(x2, wV, bV);
|
||||
x3 = v_muladd(x3, wV, bV);
|
||||
v_store(dstptr + i, x0);
|
||||
v_store(dstptr + i + 4, x1);
|
||||
v_store(dstptr + i + 8, x2);
|
||||
v_store(dstptr + i + 12, x3);
|
||||
}
|
||||
#endif
|
||||
for( ; i < len; i++ )
|
||||
dstptr[i] = w * srcptr[i] + b;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
return make_cuda_node<cuda4dnn::BatchNormOp>(preferableTarget, std::move(context->stream), weights_, bias_);
|
||||
}
|
||||
#endif
|
||||
|
||||
virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node) CV_OVERRIDE
|
||||
{
|
||||
switch (node->backendId)
|
||||
{
|
||||
case DNN_BACKEND_HALIDE:
|
||||
{
|
||||
#ifdef HAVE_HALIDE
|
||||
auto base = node.dynamicCast<HalideBackendNode>();
|
||||
Halide::Func& input = base->funcs.back();
|
||||
Halide::Var x("x"), y("y"), c("c"), n("n");
|
||||
Halide::Func top = attachHalide(input(x, y, c, n));
|
||||
return Ptr<BackendNode>(new HalideBackendNode(base, top));
|
||||
#endif // HAVE_HALIDE
|
||||
break;
|
||||
}
|
||||
}
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_HALIDE
|
||||
Halide::Buffer<float> input = halideBuffer(inputs[0]);
|
||||
Halide::Var x("x"), y("y"), c("c"), n("n");
|
||||
Halide::Func top = attachHalide(input(x, y, c, n));
|
||||
return Ptr<BackendNode>(new HalideBackendNode(top));
|
||||
#endif // HAVE_HALIDE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
#ifdef HAVE_HALIDE
|
||||
// attachHalide can work both with Halide::Buffer and Halide::Func. In the
|
||||
// second case it will be a fusion.
|
||||
Halide::Func attachHalide(const Halide::Expr& input)
|
||||
{
|
||||
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
|
||||
Halide::Var x("x"), y("y"), c("c"), n("n");
|
||||
|
||||
const int numChannels = weights_.total();
|
||||
auto weights = wrapToHalideBuffer(weights_, {numChannels});
|
||||
auto bias = wrapToHalideBuffer(bias_, {numChannels});
|
||||
top(x, y, c, n) = input * weights(c) + bias(c);
|
||||
return top;
|
||||
}
|
||||
#endif // HAVE_HALIDE
|
||||
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
InferenceEngine::Builder::Layer ieLayer = InferenceEngine::Builder::ScaleShiftLayer(name);
|
||||
const size_t numChannels = weights_.total();
|
||||
addConstantData("weights", wrapToInfEngineBlob(weights_, {numChannels}, InferenceEngine::Layout::C), ieLayer);
|
||||
addConstantData("biases", wrapToInfEngineBlob(bias_, {numChannels}, InferenceEngine::Layout::C), ieLayer);
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
}
|
||||
#endif // HAVE_DNN_IE_NN_BUILDER_2019
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
auto ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
std::vector<size_t> shape(ieInpNode->get_shape().size(), 1);
|
||||
shape[1] = weights_.total();
|
||||
auto weight = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), weights_.data);
|
||||
auto bias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), bias_.data);
|
||||
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2021_2)
|
||||
auto scale_node = std::make_shared<ngraph::op::v1::Multiply>(ieInpNode, weight, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
#else
|
||||
auto scale_node = std::make_shared<ngraph::op::v0::Multiply>(ieInpNode, weight, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
#endif
|
||||
auto scale_shift = std::make_shared<ngraph::op::v1::Add>(scale_node, bias, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(scale_shift));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
|
||||
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
|
||||
{
|
||||
params.set("input_scale", scales[0][0]);
|
||||
params.set("input_zeropoint", zeropoints[0][0]);
|
||||
|
||||
params.blobs.clear();
|
||||
params.blobs.push_back(origin_weights);
|
||||
params.blobs.push_back(origin_bias);
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
|
||||
const std::vector<MatShape> &outputs) const CV_OVERRIDE
|
||||
{
|
||||
CV_UNUSED(outputs); // suppress unused variable warning
|
||||
|
||||
int64 flops = 0;
|
||||
for(int i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
flops += 3*total(inputs[i]);
|
||||
}
|
||||
return flops;
|
||||
}
|
||||
|
||||
private:
|
||||
bool useGlobalStats;
|
||||
};
|
||||
|
||||
Ptr<BatchNormLayer> BatchNormLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<BatchNormLayer>(new BatchNormLayerImpl(params));
|
||||
}
|
||||
|
||||
} // namespace dnn
|
||||
} // namespace cv
|
||||
198
3rdparty/opencv-4.5.4/modules/dnn/src/layers/blank_layer.cpp
vendored
Normal file
198
3rdparty/opencv-4.5.4/modules/dnn/src/layers/blank_layer.cpp
vendored
Normal file
@ -0,0 +1,198 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
#include "../precomp.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
#include "../op_inf_engine.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/reshape.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
class BlankLayerImpl CV_FINAL : public BlankLayer
|
||||
{
|
||||
public:
|
||||
BlankLayerImpl(const LayerParams& params)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CUDA ||
|
||||
((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine());
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inputs_.getUMatVector(inputs);
|
||||
outputs_.getUMatVector(outputs);
|
||||
|
||||
for (int i = 0, n = outputs.size(); i < n; ++i)
|
||||
{
|
||||
void *src_handle = inputs[i].handle(ACCESS_READ);
|
||||
void *dst_handle = outputs[i].handle(ACCESS_WRITE);
|
||||
if (src_handle != dst_handle)
|
||||
inputs[i].copyTo(outputs[i]);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
for (int i = 0, n = outputs.size(); i < n; ++i)
|
||||
if (outputs[i].data != inputs[i].data)
|
||||
inputs[i].copyTo(outputs[i]);
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
|
||||
{
|
||||
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
|
||||
std::vector<size_t> dims = input->getDims();
|
||||
CV_Assert(!dims.empty());
|
||||
|
||||
InferenceEngine::Builder::Layer ieLayer(name);
|
||||
ieLayer.setName(name);
|
||||
if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL)
|
||||
{
|
||||
ieLayer.setType("Copy");
|
||||
}
|
||||
else
|
||||
{
|
||||
ieLayer.setType("Split");
|
||||
ieLayer.getParameters()["axis"] = dims.size() - 1;
|
||||
ieLayer.getParameters()["out_sizes"] = dims[0];
|
||||
}
|
||||
ieLayer.setInputPorts({InferenceEngine::Port(dims)});
|
||||
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
}
|
||||
#endif // HAVE_DNN_IE_NN_BUILDER_2019
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
ngraph::OutputVector inp{ieInpNode};
|
||||
auto blank = std::make_shared<ngraph::op::Concat>(inp, 0);
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(blank));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
return make_cuda_node<cuda4dnn::ReshapeOp>(preferableTarget, std::move(context->stream));
|
||||
}
|
||||
#endif
|
||||
|
||||
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
|
||||
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
|
||||
{
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
Ptr<Layer> BlankLayer::create(const LayerParams& params)
|
||||
{
|
||||
// In case of Caffe's Dropout layer from Faster-RCNN framework,
|
||||
// https://github.com/rbgirshick/caffe-fast-rcnn/tree/faster-rcnn
|
||||
// return Power layer.
|
||||
if (!params.get<bool>("scale_train", true))
|
||||
{
|
||||
float scale = 1 - params.get<float>("dropout_ratio", 0.5f);
|
||||
CV_Assert(scale > 0);
|
||||
|
||||
LayerParams powerParams;
|
||||
powerParams.name = params.name;
|
||||
powerParams.type = "Power";
|
||||
powerParams.set("scale", scale);
|
||||
|
||||
return PowerLayer::create(powerParams);
|
||||
}
|
||||
else
|
||||
return Ptr<BlankLayer>(new BlankLayerImpl(params));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
419
3rdparty/opencv-4.5.4/modules/dnn/src/layers/concat_layer.cpp
vendored
Normal file
419
3rdparty/opencv-4.5.4/modules/dnn/src/layers/concat_layer.cpp
vendored
Normal file
@ -0,0 +1,419 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
#include "../op_halide.hpp"
|
||||
#include "../op_inf_engine.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
#include "../op_vkcom.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/concat.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
class ConcatLayerImpl CV_FINAL : public ConcatLayer
|
||||
{
|
||||
public:
|
||||
ConcatLayerImpl(const LayerParams& params)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
axis = params.get<int>("axis", 1);
|
||||
padding = params.get<bool>("padding", false);
|
||||
paddingValue = params.get<int>("padding_value", 0);
|
||||
}
|
||||
|
||||
virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(inputs.size() > 0);
|
||||
outputs.resize(1, inputs[0]);
|
||||
int cAxis = normalize_axis(axis, inputs[0]);
|
||||
|
||||
int axisSum = 0;
|
||||
for (size_t i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
MatShape curShape = inputs[i];
|
||||
|
||||
if (padding)
|
||||
{
|
||||
for (int curAxis = 0; curAxis < outputs[0].size(); curAxis++)
|
||||
{
|
||||
outputs[0][curAxis] = std::max(outputs[0][curAxis], curShape[curAxis]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(curShape.size() == outputs[0].size());
|
||||
for (int curAxis = 0; curAxis < outputs[0].size(); curAxis++)
|
||||
{
|
||||
if (curAxis != cAxis && outputs[0][curAxis] != curShape[curAxis])
|
||||
CV_Error(Error::StsBadSize, "Inconsistent shape for ConcatLayer");
|
||||
}
|
||||
}
|
||||
|
||||
axisSum += curShape[cAxis];
|
||||
}
|
||||
outputs[0][cAxis] = axisSum;
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CUDA ||
|
||||
(backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 && !padding) || // By channels
|
||||
(backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && haveInfEngine() && !padding) ||
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ||
|
||||
(backendId == DNN_BACKEND_VKCOM && haveVulkan() && !padding);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
class ChannelConcatInvoker : public ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
std::vector<Mat>* inputs;
|
||||
Mat* output;
|
||||
int nstripes;
|
||||
std::vector<const T*> chptrs;
|
||||
|
||||
static void run(std::vector<Mat>& inputs, Mat& output, int nstripes)
|
||||
{
|
||||
ChannelConcatInvoker cc;
|
||||
cc.inputs = &inputs;
|
||||
cc.output = &output;
|
||||
cc.nstripes = nstripes;
|
||||
|
||||
size_t i, ninputs = inputs.size();
|
||||
int nchannels = 0, batchsz = output.size[0];
|
||||
for( i = 0; i < ninputs; i++ )
|
||||
{
|
||||
Mat& inp = inputs[i];
|
||||
CV_Assert( inp.isContinuous() && (inp.type() == CV_32F || inp.type() == CV_16S || inp.type() == CV_8S) &&
|
||||
inp.dims == 4 && inp.size[0] == output.size[0] &&
|
||||
inp.size[2] == output.size[2] &&
|
||||
inp.size[3] == output.size[3] );
|
||||
nchannels += inp.size[1];
|
||||
}
|
||||
CV_Assert( nchannels == output.size[1] );
|
||||
CV_Assert( output.isContinuous() && (output.type() == CV_32F || output.type() == CV_16S || output.type() == CV_8S) );
|
||||
|
||||
cc.chptrs.resize(nchannels*batchsz);
|
||||
|
||||
int ofs = 0;
|
||||
for( i = 0; i < ninputs; i++)
|
||||
{
|
||||
Mat& inp = inputs[i];
|
||||
for( int j = 0; j < batchsz; j++ )
|
||||
for( int k = 0; k < inp.size[1]; k++ )
|
||||
{
|
||||
const T* ptr = inp.ptr<T>(j, k);
|
||||
cc.chptrs[ofs + j*nchannels + k] = ptr;
|
||||
}
|
||||
ofs += inp.size[1];
|
||||
}
|
||||
|
||||
parallel_for_(Range(0, nstripes), cc, nstripes);
|
||||
}
|
||||
|
||||
ChannelConcatInvoker() : inputs(0), output(0), nstripes(0) {}
|
||||
|
||||
void operator()(const Range& r) const CV_OVERRIDE
|
||||
{
|
||||
size_t planeSize = (size_t)output->size[2]*output->size[3];
|
||||
size_t nch = chptrs.size();
|
||||
size_t total = nch*planeSize;
|
||||
size_t stripeSize = (total + nstripes - 1)/nstripes;
|
||||
size_t stripeStart = r.start*stripeSize;
|
||||
size_t stripeEnd = std::min(total, r.end*stripeSize);
|
||||
const T** ptrs = (const T**)&chptrs[0];
|
||||
T* outptr = output->ptr<T>();
|
||||
size_t blockSize0 = 1 << 16;
|
||||
|
||||
for( size_t ofs0 = stripeStart; ofs0 < stripeEnd; )
|
||||
{
|
||||
size_t ch = ofs0/planeSize;
|
||||
size_t ofs = ofs0 - ch*planeSize;
|
||||
size_t blockSize = std::min(blockSize0, planeSize - ofs);
|
||||
memcpy(outptr + ofs0, ptrs[ch] + ofs, blockSize*sizeof(outptr[0]));
|
||||
ofs0 += blockSize;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
bool use_half = (inps.depth() == CV_16S);
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
int cAxis = normalize_axis(axis, inputs[0].dims);
|
||||
if (padding)
|
||||
return false;
|
||||
|
||||
int bottom_concat_axis;
|
||||
int concat_size = total(shape(inputs[0]), cAxis + 1);
|
||||
int top_concat_axis = outputs[0].size[cAxis];
|
||||
int num_concats = total(shape(inputs[0]), 0, cAxis);
|
||||
int offset_concat_axis = 0;
|
||||
UMat& outMat = outputs[0];
|
||||
String buildopt = format(" -DDtype=%s", (use_half) ? "half" : "float");
|
||||
String kname = format("concat_%s", use_half ? "half" : "float");
|
||||
|
||||
for (size_t i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
ocl::Kernel kernel(kname.c_str(), ocl::dnn::concat_oclsrc, buildopt);
|
||||
if (kernel.empty())
|
||||
return false;
|
||||
|
||||
UMat& inpMat = inputs[i];
|
||||
bottom_concat_axis = inputs[i].size[cAxis];
|
||||
size_t nthreads = inputs[i].total();
|
||||
|
||||
kernel.set(0, (int)nthreads);
|
||||
kernel.set(1, ocl::KernelArg::PtrReadOnly(inpMat));
|
||||
kernel.set(2, (int)num_concats);
|
||||
kernel.set(3, (int)concat_size);
|
||||
kernel.set(4, (int)top_concat_axis);
|
||||
kernel.set(5, (int)bottom_concat_axis);
|
||||
kernel.set(6, (int)offset_concat_axis);
|
||||
kernel.set(7, ocl::KernelArg::PtrWriteOnly(outMat));
|
||||
|
||||
if (!kernel.run(1, &nthreads, NULL, false))
|
||||
return false;
|
||||
|
||||
offset_concat_axis += bottom_concat_axis;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
inputs_arr.depth() != CV_8S,
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
int cAxis = normalize_axis(axis, inputs[0].dims);
|
||||
Mat& outMat = outputs[0];
|
||||
|
||||
if (padding)
|
||||
outMat.setTo(paddingValue);
|
||||
|
||||
if( cAxis == 1 && outMat.dims == 4 && !padding)
|
||||
{
|
||||
int nstripes = getNumThreads();
|
||||
if (outMat.type() == CV_8S)
|
||||
ChannelConcatInvoker<int8_t>::run(inputs, outMat, nstripes);
|
||||
else
|
||||
ChannelConcatInvoker<float>::run(inputs, outMat, nstripes);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<Range> ranges(outputs[0].dims, Range::all());
|
||||
|
||||
ranges[cAxis].start = 0;
|
||||
for (size_t i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
ranges[cAxis].end = ranges[cAxis].start + inputs[i].size[cAxis];
|
||||
for (int j = 0; j < outMat.dims; ++j)
|
||||
{
|
||||
if (j == cAxis) continue;
|
||||
ranges[j].start = (outMat.size[j] - inputs[i].size[j]) / 2;
|
||||
ranges[j].end = ranges[j].start + inputs[i].size[j];
|
||||
}
|
||||
inputs[i].copyTo(outMat(&ranges[0]));
|
||||
ranges[cAxis].start = ranges[cAxis].end;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
|
||||
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
|
||||
auto concat_axis = normalize_axis(axis, input_wrapper->getRank());
|
||||
return make_cuda_node<cuda4dnn::ConcatOp>(preferableTarget, std::move(context->stream), concat_axis, padding);
|
||||
}
|
||||
#endif
|
||||
|
||||
virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_VULKAN
|
||||
vkcom::Tensor in = VkComTensor(input[0]);
|
||||
int cAxis = normalize_axis(axis, in.dimNum());
|
||||
std::shared_ptr<vkcom::OpBase> op(new vkcom::OpConcat(cAxis));
|
||||
return Ptr<BackendNode>(new VkComBackendNode(input, op));
|
||||
#endif // HAVE_VULKAN
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_HALIDE
|
||||
std::vector<Halide::Buffer<> > inputBuffers = halideBuffers(input);
|
||||
|
||||
Halide::Var x("x"), y("y"), c("c"), n("n");
|
||||
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
|
||||
int offset = inputBuffers[0].channels();
|
||||
Halide::Expr topExpr = select(c < offset,
|
||||
inputBuffers[0](x, y, c, n),
|
||||
inputBuffers[1](x, y, c - offset, n));
|
||||
for (int i = 2; i < input.size(); ++i)
|
||||
{
|
||||
offset += inputBuffers[i - 1].channels();
|
||||
topExpr = select(c < offset, topExpr,
|
||||
inputBuffers[i](x, y, c - offset, n));
|
||||
}
|
||||
top(x, y, c, n) = topExpr;
|
||||
return Ptr<BackendNode>(new HalideBackendNode(top));
|
||||
#endif // HAVE_HALIDE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
|
||||
{
|
||||
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
|
||||
|
||||
InferenceEngine::Builder::ConcatLayer ieLayer(name);
|
||||
ieLayer.setAxis(normalize_axis(axis, input->getDims().size()));
|
||||
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(inputs.size()));
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
}
|
||||
#endif // HAVE_DNN_IE_NN_BUILDER_2019
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
InferenceEngine::DataPtr data = ngraphDataNode(inputs[0]);
|
||||
const int numDims = data->getDims().size();
|
||||
const int cAxis = normalize_axis(axis, numDims);
|
||||
std::vector<size_t> maxDims(numDims, 0);
|
||||
|
||||
CV_Assert(inputs.size() == nodes.size());
|
||||
ngraph::OutputVector inp_nodes;
|
||||
for (int i = 0; i < nodes.size(); ++i)
|
||||
{
|
||||
inp_nodes.push_back(nodes[i].dynamicCast<InfEngineNgraphNode>()->node);
|
||||
|
||||
std::vector<size_t> inpShape = ngraphDataNode(inputs[i])->getDims();
|
||||
for (int i = 0; i < numDims; ++i)
|
||||
maxDims[i] = std::max(maxDims[i], inpShape[i]);
|
||||
}
|
||||
for (int i = 0; i < inp_nodes.size(); ++i)
|
||||
{
|
||||
bool needPadding = false;
|
||||
std::vector<size_t> inpShape = ngraphDataNode(inputs[i])->getDims();
|
||||
std::vector<int64_t> begins(inpShape.size(), 0), ends(inpShape.size(), 0);
|
||||
for (int j = 0; j < inpShape.size(); ++j)
|
||||
{
|
||||
if (j != cAxis && inpShape[j] != maxDims[j])
|
||||
{
|
||||
needPadding = true;
|
||||
begins[j] = static_cast<int64_t>((maxDims[j] - inpShape[j]) / 2);
|
||||
ends[j] = static_cast<int64_t>(maxDims[j] - inpShape[j] - begins[j]);
|
||||
}
|
||||
}
|
||||
if (needPadding)
|
||||
{
|
||||
inp_nodes[i] = std::make_shared<ngraph::op::v1::Pad>(
|
||||
inp_nodes[i],
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{begins.size()}, begins.data()),
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{ends.size()}, ends.data()),
|
||||
ngraph::op::PadMode::CONSTANT);
|
||||
}
|
||||
}
|
||||
auto concat = std::make_shared<ngraph::op::Concat>(inp_nodes, cAxis);
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(concat));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
|
||||
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
|
||||
{
|
||||
if (padding)
|
||||
params.set("padding_value", zeropoints[1][0]);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
Ptr<ConcatLayer> ConcatLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<ConcatLayer>(new ConcatLayerImpl(params));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
131
3rdparty/opencv-4.5.4/modules/dnn/src/layers/const_layer.cpp
vendored
Normal file
131
3rdparty/opencv-4.5.4/modules/dnn/src/layers/const_layer.cpp
vendored
Normal file
@ -0,0 +1,131 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// Copyright (C) 2018, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "../op_inf_engine.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/const.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv { namespace dnn {
|
||||
|
||||
class ConstLayerImpl CV_FINAL : public ConstLayer
|
||||
{
|
||||
public:
|
||||
ConstLayerImpl(const LayerParams& params)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
CV_Assert(blobs.size() == 1);
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ||
|
||||
backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(inputs.empty());
|
||||
outputs.assign(1, shape(blobs[0]));
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||
{
|
||||
std::vector<UMat> outputs;
|
||||
outs.getUMatVector(outputs);
|
||||
if (outs.depth() == CV_16S)
|
||||
convertFp16(blobs[0], outputs[0]);
|
||||
else
|
||||
blobs[0].copyTo(outputs[0]);
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
std::vector<Mat> outputs;
|
||||
outputs_arr.getMatVector(outputs);
|
||||
blobs[0].copyTo(outputs[0]);
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
InferenceEngine::Builder::ConstLayer ieLayer(name);
|
||||
ieLayer.setData(wrapToInfEngineBlob(blobs[0]));
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
}
|
||||
#endif // HAVE_DNN_IE_NN_BUILDER_2019
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
auto node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
|
||||
getShape<size_t>(blobs[0]),
|
||||
blobs[0].data);
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(node));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
|
||||
CV_Assert(blobs.size() == 1);
|
||||
return make_cuda_node<cuda4dnn::ConstOp>(preferableTarget, std::move(context->stream), blobs[0]);
|
||||
}
|
||||
#endif
|
||||
|
||||
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
|
||||
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
|
||||
{
|
||||
Mat quantizedBlob;
|
||||
blobs[0].convertTo(quantizedBlob, CV_8S, 1.f/scales[1][0], zeropoints[1][0]);
|
||||
params.blobs.clear();
|
||||
params.blobs.push_back(quantizedBlob);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
Ptr<Layer> ConstLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<Layer>(new ConstLayerImpl(params));
|
||||
}
|
||||
|
||||
}} // namespace cv::dnn
|
||||
3050
3rdparty/opencv-4.5.4/modules/dnn/src/layers/convolution_layer.cpp
vendored
Normal file
3050
3rdparty/opencv-4.5.4/modules/dnn/src/layers/convolution_layer.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
207
3rdparty/opencv-4.5.4/modules/dnn/src/layers/correlation_layer.cpp
vendored
Normal file
207
3rdparty/opencv-4.5.4/modules/dnn/src/layers/correlation_layer.cpp
vendored
Normal file
@ -0,0 +1,207 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// Copyright (C) 2020, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
|
||||
|
||||
namespace cv { namespace dnn {
|
||||
|
||||
class CorrelationLayerImpl CV_FINAL : public CorrelationLayer
|
||||
{
|
||||
public:
|
||||
CorrelationLayerImpl(const LayerParams& params)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
pad = params.get<int>("pad", 0);
|
||||
CV_Assert_N(params.has("kernel_size"), params.has("max_displacement"));
|
||||
max_displacement = params.get<int>("max_displacement");
|
||||
kernel = params.get<int>("kernel_size");
|
||||
if (kernel % 2 == 0)
|
||||
CV_Error(Error::StsNotImplemented, "Odd kernel size required.");
|
||||
|
||||
stride_1 = params.get<int>("stride_1", 1);
|
||||
stride_2 = params.get<int>("stride_2", 1);
|
||||
}
|
||||
|
||||
virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert_N(inputs.size() == 2, inputs[0].size() == 4, inputs[1].size() == 4);
|
||||
|
||||
int padded_height = inputs[0][2] + 2 * pad;
|
||||
int padded_width = inputs[0][3] + 2 * pad;
|
||||
|
||||
int kernel_radius = (kernel - 1) / 2;
|
||||
int border_size = max_displacement + kernel_radius;
|
||||
|
||||
int neighborhood_grid_radius = max_displacement / stride_2;
|
||||
int neighborhood_grid_width = neighborhood_grid_radius * 2 + 1;
|
||||
|
||||
std::vector<int> outShape;
|
||||
|
||||
int num = inputs[0][0];
|
||||
outShape.push_back(num);
|
||||
|
||||
int out_c = neighborhood_grid_width * neighborhood_grid_width;
|
||||
outShape.push_back(out_c);
|
||||
|
||||
int out_h = ceil(static_cast<float>(padded_height - border_size * 2) / stride_1);
|
||||
int out_w = ceil(static_cast<float>(padded_width - border_size * 2) / stride_1);
|
||||
CV_Assert_N(out_h >= 1, out_w >= 1);
|
||||
|
||||
outShape.push_back(out_h);
|
||||
outShape.push_back(out_w);
|
||||
outputs.assign(1, outShape);
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
|
||||
{
|
||||
std::vector<Mat> inputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
|
||||
int padded_height = inputs[0].size[2] + 2 * pad;
|
||||
int padded_width = inputs[0].size[3] + 2 * pad;
|
||||
|
||||
int size[] = {inputs[0].size[0], padded_height, padded_width, inputs[0].size[1]};
|
||||
rbot0 = Mat(4, &size[0], CV_32F, float(0));
|
||||
rbot1 = Mat(4, &size[0], CV_32F, float(0));
|
||||
}
|
||||
|
||||
void blobRearrangeKernel2(const Mat& input, Mat& output)
|
||||
{
|
||||
const int num = input.size[0];
|
||||
const int channels = input.size[1];
|
||||
const int height = input.size[2];
|
||||
const int width = input.size[3];
|
||||
const int area = height * width;
|
||||
const int pad_area = (width + 2 * pad) * (height + 2 * pad);
|
||||
|
||||
const float* in = input.ptr<float>();
|
||||
float* out = output.ptr<float>();
|
||||
for (int n = 0; n < num; n++)
|
||||
{
|
||||
for (int ch = 0; ch < channels; ch++)
|
||||
{
|
||||
for (int xy = 0; xy < area; xy++)
|
||||
{
|
||||
float value = in[(n * channels + ch) * area + xy];
|
||||
int xpad = (xy % width + pad);
|
||||
int ypad = (xy / width + pad);
|
||||
int xypad = ypad * (width + 2 * pad) + xpad;
|
||||
out[(n * pad_area + xypad) * channels + ch] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void correlationKernelSubtraction(const Mat& input0, const Mat& input1, Mat& output, int item)
|
||||
{
|
||||
const int inp_h = input0.size[1];
|
||||
const int inp_w = input0.size[2];
|
||||
const int inp_c = input0.size[3];
|
||||
|
||||
const int out_c = output.size[1];
|
||||
const int out_h = output.size[2];
|
||||
const int out_w = output.size[3];
|
||||
|
||||
int topcount = output.total(1);
|
||||
int neighborhood_grid_radius = max_displacement / stride_2;
|
||||
int neighborhood_grid_width = neighborhood_grid_radius * 2 + 1;
|
||||
|
||||
const float* inp0_data = input0.ptr<float>();
|
||||
const float* inp1_data = input1.ptr<float>();
|
||||
float* out_data = output.ptr<float>();
|
||||
int sumelems = kernel * kernel * inp_c;
|
||||
std::vector<float> patch_data(sumelems, 0);
|
||||
for (int y = 0; y < out_h; y++)
|
||||
{
|
||||
for (int x = 0; x < out_w; x++)
|
||||
{
|
||||
int x1 = x * stride_1 + max_displacement;
|
||||
int y1 = y * stride_1 + max_displacement;
|
||||
|
||||
for (int j = 0; j < kernel; j++)
|
||||
{
|
||||
for (int i = 0; i < kernel; i++)
|
||||
{
|
||||
int ji_off = ((j * kernel) + i) * inp_c;
|
||||
for (int ch = 0; ch < inp_c; ch++)
|
||||
{
|
||||
int idx1 = ((item * inp_h + y1 + j) * inp_w + x1 + i) * inp_c + ch;
|
||||
int idxPatchData = ji_off + ch;
|
||||
patch_data[idxPatchData] = inp0_data[idx1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int out_ch = 0; out_ch < out_c; out_ch++)
|
||||
{
|
||||
float sum = 0;
|
||||
int s2o = (out_ch % neighborhood_grid_width - neighborhood_grid_radius) * stride_2;
|
||||
int s2p = (out_ch / neighborhood_grid_width - neighborhood_grid_radius) * stride_2;
|
||||
|
||||
int x2 = x1 + s2o;
|
||||
int y2 = y1 + s2p;
|
||||
for (int j = 0; j < kernel; j++)
|
||||
{
|
||||
for (int i = 0; i < kernel; i++)
|
||||
{
|
||||
int ji_off = ((j * kernel) + i) * inp_c;
|
||||
for (int ch = 0; ch < inp_c; ch++)
|
||||
{
|
||||
int idxPatchData = ji_off + ch;
|
||||
int idx2 = ((item * inp_h + y2 + j) * inp_w + x2 + i) * inp_c + ch;
|
||||
sum += patch_data[idxPatchData] * inp1_data[idx2];
|
||||
}
|
||||
}
|
||||
}
|
||||
int index = ((out_ch * out_h + y) * out_w) + x;
|
||||
out_data[index + item * topcount] = static_cast<float>(sum) / sumelems;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
std::vector<Mat> inputs, outputs, internals;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
internals_arr.getMatVector(internals);
|
||||
|
||||
blobRearrangeKernel2(inputs[0], rbot0);
|
||||
blobRearrangeKernel2(inputs[1], rbot1);
|
||||
for (int i = 0; i < inputs[0].size[0]; i++)
|
||||
{
|
||||
correlationKernelSubtraction(rbot0, rbot1, outputs[0], i);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
int pad;
|
||||
int kernel;
|
||||
int max_displacement;
|
||||
int stride_1;
|
||||
int stride_2;
|
||||
Mat rbot0;
|
||||
Mat rbot1;
|
||||
};
|
||||
|
||||
Ptr<CorrelationLayer> CorrelationLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<CorrelationLayer>(new CorrelationLayerImpl(params));
|
||||
}
|
||||
|
||||
}} // namespace cv::dnn
|
||||
186
3rdparty/opencv-4.5.4/modules/dnn/src/layers/crop_and_resize_layer.cpp
vendored
Normal file
186
3rdparty/opencv-4.5.4/modules/dnn/src/layers/crop_and_resize_layer.cpp
vendored
Normal file
@ -0,0 +1,186 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// Copyright (C) 2018, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
#include "../precomp.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
#include "layers_common.hpp"
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/crop_and_resize.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv { namespace dnn {
|
||||
|
||||
class CropAndResizeLayerImpl CV_FINAL : public CropAndResizeLayer
|
||||
{
|
||||
public:
|
||||
CropAndResizeLayerImpl(const LayerParams& params)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
CV_Assert_N(params.has("width"), params.has("height"));
|
||||
outWidth = params.get<float>("width");
|
||||
outHeight = params.get<float>("height");
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV
|
||||
|| backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH
|
||||
|| backendId == DNN_BACKEND_CUDA
|
||||
;
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert_N(inputs.size() == 2, inputs[0].size() == 4);
|
||||
if (inputs[0][0] != 1)
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
outputs.resize(1, MatShape(4));
|
||||
outputs[0][0] = inputs[1][2]; // Number of bounding boxes.
|
||||
outputs[0][1] = inputs[0][1]; // Number of channels.
|
||||
outputs[0][2] = outHeight;
|
||||
outputs[0][3] = outWidth;
|
||||
return false;
|
||||
}
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
Mat& inp = inputs[0];
|
||||
Mat& out = outputs[0];
|
||||
Mat boxes = inputs[1].reshape(1, inputs[1].total() / 7);
|
||||
const int numChannels = inp.size[1];
|
||||
const int inpHeight = inp.size[2];
|
||||
const int inpWidth = inp.size[3];
|
||||
const int inpSpatialSize = inpHeight * inpWidth;
|
||||
const int outSpatialSize = outHeight * outWidth;
|
||||
CV_Assert_N(inp.isContinuous(), out.isContinuous());
|
||||
|
||||
for (int b = 0; b < boxes.rows; ++b)
|
||||
{
|
||||
float* outDataBox = out.ptr<float>(b);
|
||||
float left = boxes.at<float>(b, 3);
|
||||
float top = boxes.at<float>(b, 4);
|
||||
float right = boxes.at<float>(b, 5);
|
||||
float bottom = boxes.at<float>(b, 6);
|
||||
float boxWidth = right - left;
|
||||
float boxHeight = bottom - top;
|
||||
|
||||
float heightScale = boxHeight * static_cast<float>(inpHeight - 1) / (outHeight - 1);
|
||||
float widthScale = boxWidth * static_cast<float>(inpWidth - 1) / (outWidth - 1);
|
||||
for (int y = 0; y < outHeight; ++y)
|
||||
{
|
||||
float input_y = top * (inpHeight - 1) + y * heightScale;
|
||||
int y0 = static_cast<int>(input_y);
|
||||
const float* inpData_row0 = inp.ptr<float>(0, 0, y0);
|
||||
const float* inpData_row1 = (y0 + 1 < inpHeight) ? (inpData_row0 + inpWidth) : inpData_row0;
|
||||
for (int x = 0; x < outWidth; ++x)
|
||||
{
|
||||
float input_x = left * (inpWidth - 1) + x * widthScale;
|
||||
int x0 = static_cast<int>(input_x);
|
||||
int x1 = std::min(x0 + 1, inpWidth - 1);
|
||||
|
||||
float* outData = outDataBox + y * outWidth + x;
|
||||
const float* inpData_row0_c = inpData_row0;
|
||||
const float* inpData_row1_c = inpData_row1;
|
||||
for (int c = 0; c < numChannels; ++c)
|
||||
{
|
||||
*outData = inpData_row0_c[x0] +
|
||||
(input_y - y0) * (inpData_row1_c[x0] - inpData_row0_c[x0]) +
|
||||
(input_x - x0) * (inpData_row0_c[x1] - inpData_row0_c[x0] +
|
||||
(input_y - y0) * (inpData_row1_c[x1] - inpData_row0_c[x1] - inpData_row1_c[x0] + inpData_row0_c[x0]));
|
||||
|
||||
inpData_row0_c += inpSpatialSize;
|
||||
inpData_row1_c += inpSpatialSize;
|
||||
outData += outSpatialSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (boxes.rows < out.size[0])
|
||||
{
|
||||
// left = top = right = bottom = 0
|
||||
std::vector<cv::Range> dstRanges(4, Range::all());
|
||||
dstRanges[0] = Range(boxes.rows, out.size[0]);
|
||||
out(dstRanges).setTo(inp.ptr<float>(0, 0, 0)[0]);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
// Slice second input: from 1x1xNx7 to 1x1xNx5
|
||||
auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
auto rois = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
|
||||
auto rois_shape = rois->get_shape();
|
||||
std::vector<int64_t> dims(rois_shape.begin(), rois_shape.end()), offsets(4, 0);
|
||||
offsets[3] = 2;
|
||||
dims[3] = 7;
|
||||
|
||||
auto lower_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
|
||||
ngraph::Shape{offsets.size()}, offsets.data());
|
||||
auto upper_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
|
||||
ngraph::Shape{dims.size()}, dims.data());
|
||||
auto strides = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
|
||||
ngraph::Shape{dims.size()}, std::vector<int64_t>((int64_t)dims.size(), 1));
|
||||
auto slice = std::make_shared<ngraph::op::v1::StridedSlice>(rois,
|
||||
lower_bounds, upper_bounds, strides, std::vector<int64_t>{}, std::vector<int64_t>{});
|
||||
|
||||
// Reshape rois from 4D to 2D
|
||||
std::vector<int64_t> shapeData = {dims[2], 5};
|
||||
auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, shapeData.data());
|
||||
auto reshape = std::make_shared<ngraph::op::v1::Reshape>(slice, shape, true);
|
||||
|
||||
auto roiPooling =
|
||||
std::make_shared<ngraph::op::v0::ROIPooling>(input, reshape,
|
||||
ngraph::Shape{(size_t)outHeight, (size_t)outWidth},
|
||||
1.0f, "bilinear");
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(roiPooling));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
return make_cuda_node<cuda4dnn::CropAndResizeOp>(preferableTarget, std::move(context->stream));
|
||||
}
|
||||
#endif
|
||||
|
||||
private:
|
||||
int outWidth, outHeight;
|
||||
};
|
||||
|
||||
Ptr<Layer> CropAndResizeLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<CropAndResizeLayer>(new CropAndResizeLayerImpl(params));
|
||||
}
|
||||
|
||||
} // namespace dnn
|
||||
} // namespace cv
|
||||
131
3rdparty/opencv-4.5.4/modules/dnn/src/layers/cumsum_layer.cpp
vendored
Normal file
131
3rdparty/opencv-4.5.4/modules/dnn/src/layers/cumsum_layer.cpp
vendored
Normal file
@ -0,0 +1,131 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
class CumSumLayerImpl CV_FINAL : public CumSumLayer
|
||||
{
|
||||
public:
|
||||
CumSumLayerImpl(const LayerParams ¶ms)
|
||||
{
|
||||
axis_raw = params.get<int>("axis", 0);
|
||||
exclusive_raw = params.get<int>("exclusive", 0);
|
||||
reverse_raw = params.get<int>("reverse", 0);
|
||||
setParamsFrom(params);
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
|
||||
return true;
|
||||
}
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs, internals;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
// Get x tensor.
|
||||
const auto &src_mat = inputs[0];
|
||||
const auto *src_ptr = src_mat.ptr<float>();
|
||||
|
||||
// Get axis.
|
||||
const int axis = normalize_axis(axis_raw, src_mat.dims);
|
||||
|
||||
// Get y tensor.
|
||||
auto &dst_mat = outputs[0];
|
||||
src_mat.copyTo(dst_mat);
|
||||
auto *dst_ptr = dst_mat.ptr<float>();
|
||||
|
||||
// Get flags.
|
||||
const auto exclusive = exclusive_raw == 1;
|
||||
const auto reverse = reverse_raw == 1;
|
||||
|
||||
// Get parameters to iterate outer dimension.
|
||||
const size_t outer_size = src_mat.total(0, axis);
|
||||
const size_t outer_step_length = src_mat.total(axis);
|
||||
|
||||
// Get parameters to iterate inner dimension.
|
||||
const size_t inner_size = src_mat.size[axis];
|
||||
|
||||
if (!inner_size)
|
||||
return;
|
||||
|
||||
const size_t inner_step_length = src_mat.total(axis + 1);
|
||||
const int inner_step = (reverse ? -1 : 1) * inner_step_length;
|
||||
const int inner_start = reverse ? inner_size - 1 : 0;
|
||||
const int inner_stop = reverse ? -1 : inner_size;
|
||||
const int inner_delta = reverse ? -1 : 1;
|
||||
|
||||
// Get parameters to populate channels.
|
||||
const size_t num_channels = src_mat.total(axis + 1);
|
||||
|
||||
for (size_t outer_dim = 0; outer_dim < outer_size; outer_dim++)
|
||||
{
|
||||
const size_t outer_offset = outer_dim * outer_step_length;
|
||||
size_t src_offset = outer_offset + inner_start * inner_step_length;
|
||||
|
||||
// Populate first element of inner dimension.
|
||||
for (size_t channel = 0; channel < num_channels; channel++)
|
||||
{
|
||||
if (exclusive)
|
||||
{
|
||||
dst_ptr[src_offset + channel] = 0.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
dst_ptr[src_offset + channel] = src_ptr[src_offset + channel];
|
||||
src_offset += inner_step;
|
||||
}
|
||||
}
|
||||
|
||||
// Populate remaining elements of inner dimension.
|
||||
for (int inner_dim = inner_start + inner_delta; inner_dim != inner_stop; inner_dim += inner_delta)
|
||||
{
|
||||
const size_t dst_offset = outer_offset + inner_dim * inner_step_length;
|
||||
|
||||
for (size_t channel = 0; channel < num_channels; channel++)
|
||||
{
|
||||
const size_t previous_dst_offset = dst_offset - inner_step;
|
||||
dst_ptr[dst_offset + channel] = dst_ptr[previous_dst_offset + channel] +
|
||||
src_ptr[src_offset + channel];
|
||||
src_offset += inner_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int axis_raw;
|
||||
int exclusive_raw;
|
||||
int reverse_raw;
|
||||
};
|
||||
|
||||
Ptr<CumSumLayer> CumSumLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<CumSumLayer>(new CumSumLayerImpl(params));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
1075
3rdparty/opencv-4.5.4/modules/dnn/src/layers/detection_output_layer.cpp
vendored
Normal file
1075
3rdparty/opencv-4.5.4/modules/dnn/src/layers/detection_output_layer.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1964
3rdparty/opencv-4.5.4/modules/dnn/src/layers/elementwise_layers.cpp
vendored
Normal file
1964
3rdparty/opencv-4.5.4/modules/dnn/src/layers/elementwise_layers.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
965
3rdparty/opencv-4.5.4/modules/dnn/src/layers/eltwise_layer.cpp
vendored
Normal file
965
3rdparty/opencv-4.5.4/modules/dnn/src/layers/eltwise_layer.cpp
vendored
Normal file
@ -0,0 +1,965 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
#include "../op_halide.hpp"
|
||||
#include "../op_inf_engine.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/eltwise.hpp"
|
||||
#include "../cuda4dnn/primitives/shortcut.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
class EltwiseLayerImpl CV_FINAL : public EltwiseLayer
|
||||
{
|
||||
public:
|
||||
enum EltwiseOp
|
||||
{
|
||||
PROD = 0,
|
||||
SUM = 1,
|
||||
MAX = 2,
|
||||
DIV = 3,
|
||||
MIN = 4,
|
||||
} op;
|
||||
std::vector<float> coeffs;
|
||||
|
||||
enum OutputChannelsMode
|
||||
{
|
||||
ELTWISE_CHANNNELS_SAME = 0, //!< number of channels from inputs must be the same and equal to output's number of channels
|
||||
ELTWISE_CHANNNELS_INPUT_0, //!< number of channels from inputs may be different,
|
||||
//!< output's number of channels is equal to number of channels of first input
|
||||
//!< number of channels of other inputs should not be greater than number of channels of first input
|
||||
ELTWISE_CHANNNELS_INPUT_0_TRUNCATE, //!< number of channels from inputs may be different,
|
||||
//!< output's number of channels is equal to number of channels of first input
|
||||
//!< there is restriction on number of channels of other inputs
|
||||
//!< extra channels of other inputs is ignored
|
||||
ELTWISE_CHANNNELS_USE_MAX, //!< number of channels from inputs may be different,
|
||||
//!< output's number of channels is equal to maximal number of input channels
|
||||
//!< @note supported operation: `SUM`
|
||||
} channelsModeInput;
|
||||
|
||||
|
||||
mutable OutputChannelsMode channelsMode; //!< "optimized" channels mode (switch to ELTWISE_CHANNNELS_SAME if number of input channels are equal)
|
||||
mutable /*size_t*/int outputChannels;
|
||||
|
||||
EltwiseLayerImpl(const LayerParams& params)
|
||||
: outputChannels(0)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
hasVecInput = false;
|
||||
op = SUM;
|
||||
if (params.has("operation"))
|
||||
{
|
||||
String operation = toLowerCase(params.get<String>("operation"));
|
||||
if (operation == "prod")
|
||||
op = PROD;
|
||||
else if (operation == "sum")
|
||||
op = SUM;
|
||||
else if (operation == "max")
|
||||
op = MAX;
|
||||
else if (operation == "min")
|
||||
op = MIN;
|
||||
else if (operation == "div")
|
||||
op = DIV;
|
||||
else
|
||||
CV_Error(cv::Error::StsBadArg, "Unknown operation type \"" + operation + "\"");
|
||||
}
|
||||
|
||||
if (params.has("coeff"))
|
||||
{
|
||||
DictValue paramCoeff = params.get("coeff");
|
||||
int i, n = paramCoeff.size();
|
||||
coeffs.resize(n);
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
coeffs[i] = paramCoeff.get<float>(i);
|
||||
}
|
||||
}
|
||||
|
||||
channelsModeInput = ELTWISE_CHANNNELS_SAME;
|
||||
if (params.has("output_channels_mode"))
|
||||
{
|
||||
String v = toLowerCase(params.get<String>("output_channels_mode"));
|
||||
if (v == "same")
|
||||
{
|
||||
channelsModeInput = ELTWISE_CHANNNELS_SAME;
|
||||
}
|
||||
else if (v == "input_0")
|
||||
{
|
||||
channelsModeInput = ELTWISE_CHANNNELS_INPUT_0;
|
||||
}
|
||||
else if (v == "input_0_truncate")
|
||||
{
|
||||
channelsModeInput = ELTWISE_CHANNNELS_INPUT_0_TRUNCATE;
|
||||
}
|
||||
else if (v == "max_input_channels")
|
||||
{
|
||||
channelsModeInput = ELTWISE_CHANNNELS_USE_MAX;
|
||||
if (op != SUM)
|
||||
CV_Error(cv::Error::StsBadArg, "[" + type + "]:(" + name + ") 'max' channels mode is limited to SUM operation only");
|
||||
}
|
||||
else
|
||||
CV_Error(cv::Error::StsBadArg, "[" + type + "]:(" + name + ") unknown channels mode: \"" + v + "\"");
|
||||
}
|
||||
channelsMode = channelsModeInput;
|
||||
|
||||
// TODO Must have checks for other unknown options
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
if (hasVecInput && ELTWISE_CHANNNELS_SAME)
|
||||
return backendId == DNN_BACKEND_OPENCV;
|
||||
|
||||
if (backendId == DNN_BACKEND_CUDA)
|
||||
{
|
||||
if(channelsModeInput == ELTWISE_CHANNNELS_INPUT_0 || channelsModeInput == ELTWISE_CHANNNELS_INPUT_0_TRUNCATE)
|
||||
return op == SUM && coeffs.empty();
|
||||
return channelsModeInput == ELTWISE_CHANNNELS_SAME;
|
||||
}
|
||||
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
(backendId == DNN_BACKEND_HALIDE && op != DIV) || // TODO: not implemented, see PR #15811
|
||||
((((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (preferableTarget != DNN_TARGET_OPENCL || coeffs.empty()))
|
||||
|| backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && channelsMode == ELTWISE_CHANNNELS_SAME));
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(inputs.size() >= 2);
|
||||
CV_Assert(inputs[0].size() >= 2);
|
||||
CV_Assert(coeffs.size() == 0 || coeffs.size() == inputs.size());
|
||||
CV_Assert(op == SUM || coeffs.size() == 0);
|
||||
|
||||
int dims = inputs[0].size();
|
||||
// Number of channels in output shape is determined by the first input tensor.
|
||||
bool variableChannels = false;
|
||||
int numChannels = inputs[0][1];
|
||||
for (size_t i = 1; i < inputs.size(); i++)
|
||||
{
|
||||
CV_Assert(inputs[0][0] == inputs[i][0]); // batch sizes are equal
|
||||
|
||||
int input_channels = inputs[i][1];
|
||||
if (numChannels != input_channels)
|
||||
variableChannels = true;
|
||||
|
||||
if (channelsModeInput == ELTWISE_CHANNNELS_SAME)
|
||||
{
|
||||
CV_Assert(numChannels == input_channels);
|
||||
}
|
||||
else if (channelsModeInput == ELTWISE_CHANNNELS_INPUT_0)
|
||||
{
|
||||
CV_Assert(numChannels >= input_channels);
|
||||
}
|
||||
else if (channelsModeInput == ELTWISE_CHANNNELS_INPUT_0_TRUNCATE)
|
||||
{
|
||||
// nothing to check
|
||||
}
|
||||
else if (channelsModeInput == ELTWISE_CHANNNELS_USE_MAX)
|
||||
{
|
||||
numChannels = std::max(numChannels, input_channels);
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(0 && "Internal error");
|
||||
}
|
||||
}
|
||||
|
||||
channelsMode = variableChannels ? channelsModeInput : ELTWISE_CHANNNELS_SAME;
|
||||
outputChannels = numChannels;
|
||||
|
||||
outputs.assign(1, inputs[0]);
|
||||
outputs[0][1] = numChannels;
|
||||
|
||||
if (dims > 2)
|
||||
{
|
||||
size_t vecIdx = 0;
|
||||
bool isVecFound = false;
|
||||
for (size_t i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
bool allOnes = isAllOnes(inputs[i], 2, dims);
|
||||
if (!allOnes && !isVecFound)
|
||||
{
|
||||
vecIdx = i;
|
||||
isVecFound = true;
|
||||
}
|
||||
|
||||
if (!allOnes && i != vecIdx)
|
||||
{
|
||||
for (size_t j = 2; j < dims; j++)
|
||||
{
|
||||
CV_Assert(inputs[vecIdx][j] == inputs[i][j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (channelsModeInput == ELTWISE_CHANNNELS_SAME && isVecFound)
|
||||
{
|
||||
for (size_t j = 2; j < dims; j++)
|
||||
{
|
||||
outputs[0][j] = inputs[vecIdx][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
|
||||
{
|
||||
std::vector<Mat> inputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
|
||||
for (size_t i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
MatShape inpShape = shape(inputs[i].size);
|
||||
if (isAllOnes(inpShape, 2, inputs[i].dims))
|
||||
{
|
||||
hasVecInput = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class EltwiseInvoker : public ParallelLoopBody
|
||||
{
|
||||
EltwiseLayerImpl& self;
|
||||
std::vector<const Mat*> srcs;
|
||||
std::vector<int> srcNumChannels;
|
||||
int nsrcs;
|
||||
Mat* dst;
|
||||
std::vector<float> coeffs;
|
||||
int nstripes;
|
||||
const ActivationLayer* activ;
|
||||
int channels;
|
||||
size_t planeSize;
|
||||
|
||||
EltwiseInvoker(EltwiseLayerImpl& self_)
|
||||
: self(self_)
|
||||
, nsrcs(0), dst(0), nstripes(0), activ(0), channels(0)
|
||||
, planeSize(0)
|
||||
{}
|
||||
|
||||
public:
|
||||
static void run(EltwiseLayerImpl& self,
|
||||
const Mat* srcs, int nsrcs, Mat& dst,
|
||||
int nstripes)
|
||||
{
|
||||
const EltwiseOp op = self.op;
|
||||
CV_Check(dst.dims, 1 < dst.dims && dst.dims <= 5, ""); CV_CheckTypeEQ(dst.type(), CV_32FC1, ""); CV_Assert(dst.isContinuous());
|
||||
CV_Assert(self.coeffs.empty() || self.coeffs.size() == (size_t)nsrcs);
|
||||
CV_CheckGE(nsrcs, 2, "");
|
||||
|
||||
CV_Assert(self.outputChannels == dst.size[1]);
|
||||
|
||||
EltwiseInvoker p(self);
|
||||
p.srcs.resize(nsrcs);
|
||||
p.srcNumChannels.resize(nsrcs);
|
||||
p.coeffs = self.coeffs; // can be sorted
|
||||
|
||||
bool sortInputs = false;
|
||||
for( int i = 0; i < nsrcs; i++ )
|
||||
{
|
||||
p.srcs[i] = &srcs[i];
|
||||
CV_CheckEQ(srcs[i].dims, dst.dims, "");
|
||||
CV_Assert(srcs[i].isContinuous());
|
||||
CV_Assert(srcs[i].type() == dst.type());
|
||||
p.srcNumChannels[i] = (srcs[i].dims >= 4) ? srcs[i].size[1] : 1;
|
||||
|
||||
if (self.channelsMode == ELTWISE_CHANNNELS_SAME)
|
||||
{
|
||||
CV_Assert(srcs[i].size == dst.size);
|
||||
}
|
||||
else if (self.channelsMode == ELTWISE_CHANNNELS_INPUT_0)
|
||||
{
|
||||
if (i == 0)
|
||||
CV_Assert(srcs[0].size == dst.size);
|
||||
CV_Assert(self.outputChannels >= p.srcNumChannels[i]);
|
||||
sortInputs = true;
|
||||
}
|
||||
else if (self.channelsMode == ELTWISE_CHANNNELS_INPUT_0_TRUNCATE)
|
||||
{
|
||||
if (i == 0)
|
||||
CV_Assert(srcs[0].size == dst.size);
|
||||
sortInputs = true;
|
||||
}
|
||||
else if (self.channelsMode == ELTWISE_CHANNNELS_USE_MAX)
|
||||
{
|
||||
CV_Assert(op == SUM);
|
||||
CV_Assert(self.outputChannels >= p.srcNumChannels[i]);
|
||||
sortInputs = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(0 && "Internal error");
|
||||
}
|
||||
|
||||
if (sortInputs)
|
||||
{
|
||||
// Sort srcs and coefficients in the desc order by number of channels
|
||||
for (int j = i; j >= 1; j--)
|
||||
{
|
||||
if (std::min(self.outputChannels, p.srcs[j - 1]->size[1]) < std::min(self.outputChannels, p.srcs[j]->size[1]))
|
||||
{
|
||||
std::swap(p.srcs[j - 1], p.srcs[j]);
|
||||
std::swap(p.srcNumChannels[j - 1], p.srcNumChannels[j]);
|
||||
if (!p.coeffs.empty())
|
||||
std::swap(p.coeffs[j - 1], p.coeffs[j]);
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
p.nsrcs = nsrcs;
|
||||
p.dst = &dst;
|
||||
p.nstripes = nstripes;
|
||||
p.channels = (dst.dims >= 4 ? dst.size[1] : 1);
|
||||
|
||||
p.planeSize = dst.total(dst.dims >= 4 ? 2 : 1);
|
||||
CV_CheckEQ(dst.total(), dst.size[0] * p.channels * p.planeSize, "");
|
||||
|
||||
bool simpleCoeffs = true;
|
||||
if (op == SUM && !p.coeffs.empty())
|
||||
{
|
||||
CV_CheckEQ(p.coeffs.size(), (size_t)nsrcs, "");
|
||||
|
||||
for (size_t i = 0; i < p.coeffs.size(); i++)
|
||||
{
|
||||
if (p.coeffs[i] != 1)
|
||||
{
|
||||
simpleCoeffs = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (simpleCoeffs)
|
||||
p.coeffs.clear();
|
||||
p.activ = self.activ.get();
|
||||
|
||||
parallel_for_(Range(0, nstripes), p, nstripes);
|
||||
}
|
||||
|
||||
void operator()(const Range& r) const CV_OVERRIDE
|
||||
{
|
||||
const EltwiseOp op = self.op;
|
||||
size_t total = dst->size[0]*planeSize;
|
||||
size_t stripeSize = (total + nstripes - 1)/nstripes;
|
||||
size_t stripeStart = r.start*stripeSize;
|
||||
size_t stripeEnd = std::min(r.end*stripeSize, total);
|
||||
const float* coeffsptr = !coeffs.empty() ? &coeffs[0] : 0;
|
||||
float* dstptr0 = dst->ptr<float>();
|
||||
int blockSize0 = 1 << 12;
|
||||
|
||||
for (size_t ofs = stripeStart; ofs < stripeEnd; )
|
||||
{
|
||||
int sampleIdx = (int)(ofs / planeSize);
|
||||
int delta = (int)ofs - sampleIdx * planeSize;
|
||||
int blockSize = std::min(blockSize0, std::min((int)(stripeEnd - ofs), (int)planeSize - delta));
|
||||
if( blockSize <= 0 )
|
||||
break;
|
||||
ofs += blockSize;
|
||||
|
||||
for (int c = 0; c < channels; c++)
|
||||
{
|
||||
size_t dstIdx = delta + (sampleIdx*channels + c)*planeSize;
|
||||
float* dstptr = dstptr0 + dstIdx;
|
||||
|
||||
// process first two inputs
|
||||
{
|
||||
const float* srcptr0 = srcs[0]->ptr<float>() + dstIdx;
|
||||
|
||||
const int inputIdx = 1;
|
||||
int src1_channels = srcNumChannels[inputIdx];
|
||||
if (c >= src1_channels)
|
||||
{
|
||||
// no data from second input
|
||||
if (!coeffsptr || coeffsptr[0] == 1.0f)
|
||||
{
|
||||
for (int j = 0; j < blockSize; j++)
|
||||
{
|
||||
dstptr[j] = srcptr0[j];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
float c0 = coeffsptr[0];
|
||||
for (int j = 0; j < blockSize; j++)
|
||||
{
|
||||
dstptr[j] = c0*srcptr0[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t srcIdx = delta + (sampleIdx * src1_channels + c) * planeSize;
|
||||
const float* srcptrI = srcs[inputIdx]->ptr<float>() + srcIdx;
|
||||
|
||||
if (op == PROD)
|
||||
{
|
||||
for (int j = 0; j < blockSize; j++)
|
||||
{
|
||||
dstptr[j] = srcptr0[j] * srcptrI[j];
|
||||
}
|
||||
}
|
||||
else if (op == DIV)
|
||||
{
|
||||
for (int j = 0; j < blockSize; j++)
|
||||
{
|
||||
dstptr[j] = srcptr0[j] / srcptrI[j];
|
||||
}
|
||||
}
|
||||
else if (op == MAX)
|
||||
{
|
||||
for (int j = 0; j < blockSize; j++)
|
||||
{
|
||||
dstptr[j] = std::max(srcptr0[j], srcptrI[j]);
|
||||
}
|
||||
}
|
||||
else if (op == MIN)
|
||||
{
|
||||
for (int j = 0; j < blockSize; j++)
|
||||
{
|
||||
dstptr[j] = std::min(srcptr0[j], srcptrI[j]);
|
||||
}
|
||||
}
|
||||
else if (op == SUM)
|
||||
{
|
||||
if (!coeffsptr || (coeffsptr[0] == 1.0f && coeffsptr[1] == 1.0f))
|
||||
{
|
||||
for (int j = 0; j < blockSize; j++)
|
||||
{
|
||||
dstptr[j] = srcptr0[j] + srcptrI[j];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
float c0 = coeffsptr[0];
|
||||
float c1 = coeffsptr[1];
|
||||
for (int j = 0; j < blockSize; j++)
|
||||
{
|
||||
dstptr[j] = c0*srcptr0[j] + c1*srcptrI[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
CV_Error(Error::StsInternal, "");
|
||||
}
|
||||
}
|
||||
|
||||
// aggregate other inputs (3+)
|
||||
for (size_t inputIdx = 2; inputIdx < nsrcs; inputIdx++)
|
||||
{
|
||||
int srcI_channels = srcNumChannels[inputIdx];
|
||||
if (c >= srcI_channels)
|
||||
continue; // no data from second input
|
||||
size_t srcIdx = delta + (sampleIdx * srcI_channels + c) * planeSize;
|
||||
const float* srcptrI = srcs[inputIdx]->ptr<float>() + srcIdx;
|
||||
|
||||
if (op == PROD)
|
||||
{
|
||||
for (int j = 0; j < blockSize; j++)
|
||||
{
|
||||
dstptr[j] *= srcptrI[j];
|
||||
}
|
||||
}
|
||||
else if (op == DIV)
|
||||
{
|
||||
for (int j = 0; j < blockSize; j++)
|
||||
{
|
||||
dstptr[j] /= srcptrI[j];
|
||||
}
|
||||
}
|
||||
else if (op == MAX)
|
||||
{
|
||||
for (int j = 0; j < blockSize; j++)
|
||||
{
|
||||
dstptr[j] = std::max(dstptr[j], srcptrI[j]);
|
||||
}
|
||||
}
|
||||
else if (op == MIN)
|
||||
{
|
||||
for (int j = 0; j < blockSize; j++)
|
||||
{
|
||||
dstptr[j] = std::min(dstptr[j], srcptrI[j]);
|
||||
}
|
||||
}
|
||||
else if (op == SUM)
|
||||
{
|
||||
if (!coeffsptr || coeffsptr[inputIdx] == 1.0f)
|
||||
{
|
||||
for (int j = 0; j < blockSize; j++)
|
||||
{
|
||||
dstptr[j] += srcptrI[j];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
float cI = coeffsptr[inputIdx];
|
||||
for (int j = 0; j < blockSize; j++)
|
||||
{
|
||||
dstptr[j] += cI * srcptrI[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
CV_Error(Error::StsInternal, "");
|
||||
}
|
||||
}
|
||||
|
||||
if( activ )
|
||||
{
|
||||
float* ptr = dstptr0 + delta + sampleIdx*channels*planeSize;
|
||||
activ->forwardSlice(ptr, ptr, blockSize, planeSize, 0, channels);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
if ((inputs_.depth() == CV_16S && op != SUM) || (channelsMode != ELTWISE_CHANNNELS_SAME))
|
||||
return false;
|
||||
|
||||
if (hasVecInput)
|
||||
return false; // TODO not implemented yet: https://github.com/opencv/opencv/pull/19477
|
||||
|
||||
inputs_.getUMatVector(inputs);
|
||||
outputs_.getUMatVector(outputs);
|
||||
|
||||
switch (op)
|
||||
{
|
||||
case SUM:
|
||||
{
|
||||
int channels = total(shape(outputs[0]), 0, 2);
|
||||
int plane_size = total(shape(outputs[0]), 2);
|
||||
if (channels % 4 == 0 && plane_size % 4 == 0)
|
||||
{
|
||||
size_t localsize[] = { 128 };
|
||||
size_t globalsize[] = { (size_t)channels / 4 * localsize[0] };
|
||||
String opts;
|
||||
if (inputs_.depth() == CV_16S)
|
||||
opts = " -DDtype=half -DDtype4=half4 -DDtype8=half8";
|
||||
else
|
||||
opts = " -DDtype=float -DDtype4=float4 -DDtype8=float8";
|
||||
|
||||
for (int i = 0; i < (inputs.size() - 1); ++i)
|
||||
{
|
||||
String buildopt = format("-DLOOP=%d", i) + opts;
|
||||
ocl::Kernel kernel("op_sum4", ocl::dnn::eltwise_oclsrc, buildopt);
|
||||
int idx = 0;
|
||||
UMat inpMat = (i == 0) ? inputs[0] : UMat();
|
||||
float coeff1 = (coeffs.empty() || i > 0) ? 1.0f : coeffs[i];
|
||||
float coeff2 = coeffs.empty() ? 1.0f : coeffs[i + 1];
|
||||
kernel.set(idx++, ocl::KernelArg::PtrReadOnly(inputs[0]));
|
||||
kernel.set(idx++, ocl::KernelArg::PtrReadOnly(inputs[1]));
|
||||
kernel.set(idx++, (int)plane_size);
|
||||
kernel.set(idx++, (float)coeff1);
|
||||
kernel.set(idx++, (float)coeff2);
|
||||
kernel.set(idx++, ocl::KernelArg::PtrReadWrite(outputs[0]));
|
||||
bool ret = kernel.run(1, globalsize, localsize, false);
|
||||
if (!ret)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (inputs_.depth() == CV_16S)
|
||||
return false;
|
||||
|
||||
float coeff1 = coeffs.empty() ? 1.f : coeffs[0];
|
||||
float coeff2 = coeffs.empty() ? 1.f : coeffs[1];
|
||||
UMat mul0, mul1;
|
||||
multiply(coeff1, inputs[0], mul0);
|
||||
multiply(coeff2, inputs[1], mul1);
|
||||
add(mul0, mul1, outputs[0]);
|
||||
for (int i = 2; i < inputs.size(); ++i)
|
||||
{
|
||||
float coeff = coeffs.empty() ? 1.f : coeffs[i];
|
||||
multiply(coeff, inputs[i], mul0);
|
||||
add(mul0, outputs[0], outputs[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case PROD:
|
||||
multiply(inputs[0], inputs[1], outputs[0]);
|
||||
for (int i = 2; i < inputs.size(); ++i)
|
||||
multiply(inputs[i], outputs[0], outputs[0]);
|
||||
break;
|
||||
case DIV:
|
||||
divide(inputs[0], inputs[1], outputs[0]);
|
||||
for (int i = 2; i < inputs.size(); ++i)
|
||||
divide(outputs[0], inputs[i], outputs[0]);
|
||||
break;
|
||||
case MAX:
|
||||
max(inputs[0], inputs[1], outputs[0]);
|
||||
for (int i = 2; i < inputs.size(); ++i)
|
||||
max(inputs[i], outputs[0], outputs[0]);
|
||||
break;
|
||||
case MIN:
|
||||
min(inputs[0], inputs[1], outputs[0]);
|
||||
for (int i = 2; i < inputs.size(); ++i)
|
||||
min(inputs[i], outputs[0], outputs[0]);
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
CV_Assert(outputs.size() == 1);
|
||||
const int nstripes = getNumThreads();
|
||||
|
||||
if (channelsModeInput == ELTWISE_CHANNNELS_SAME && inputs[0].dims > 2)
|
||||
{
|
||||
for (size_t i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
MatShape inpShape = shape(inputs[i].size);
|
||||
bool allOnes = isAllOnes(inpShape, 2, inputs[i].dims);
|
||||
|
||||
if (allOnes)
|
||||
{
|
||||
Mat tmpInput = inputs[i];
|
||||
MatShape outShape = shape(outputs[0].size);
|
||||
size_t xSize = outShape[2];
|
||||
for (size_t j = 3; j < outShape.size(); j++)
|
||||
xSize *= outShape[j];
|
||||
|
||||
int dimVec[3] = {outShape[0], outShape[1], (int) xSize};
|
||||
std::vector<int> matSizesVec(&dimVec[0], &dimVec[0] + 3);
|
||||
inputs[i] = Mat(matSizesVec, tmpInput.type());
|
||||
|
||||
std::vector<int> idx(outShape.size(), 0);
|
||||
std::vector<int> outIdx(inpShape.size(), 0);
|
||||
|
||||
for (size_t j = 0; j < outShape[0]; j++)
|
||||
{
|
||||
outIdx[0] = idx[0] = j;
|
||||
for(size_t k = 0; k < outShape[1]; k++)
|
||||
{
|
||||
outIdx[1] = idx[1] = k;
|
||||
for (size_t x = 0; x < xSize; x++)
|
||||
{
|
||||
outIdx[2] = x;
|
||||
inputs[i].at<float>(outIdx.data()) = tmpInput.at<float>(idx.data());
|
||||
}
|
||||
}
|
||||
}
|
||||
inputs[i] = inputs[i].reshape(0, outShape);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
EltwiseInvoker::run(*this,
|
||||
&inputs[0], (int)inputs.size(), outputs[0],
|
||||
nstripes);
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
|
||||
CV_Assert(channelsModeInput == ELTWISE_CHANNNELS_INPUT_0 ||
|
||||
channelsModeInput == ELTWISE_CHANNNELS_INPUT_0_TRUNCATE ||
|
||||
channelsModeInput == ELTWISE_CHANNNELS_SAME);
|
||||
|
||||
if(channelsModeInput == ELTWISE_CHANNNELS_INPUT_0 || channelsModeInput == ELTWISE_CHANNNELS_INPUT_0_TRUNCATE)
|
||||
{
|
||||
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
|
||||
for (int i = 1; i < inputs.size(); i++)
|
||||
{
|
||||
auto from_wrapper = inputs[i].dynamicCast<CUDABackendWrapper>();
|
||||
if (input_wrapper->getShape()[1] != from_wrapper->getShape()[1])
|
||||
{
|
||||
CV_Assert(op == SUM);
|
||||
CV_Assert(coeffs.empty());
|
||||
return make_cuda_node<cuda4dnn::ShortcutOp>(preferableTarget, std::move(context->stream));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto op_ = [this] {
|
||||
switch (op) {
|
||||
case MAX: return cuda4dnn::EltwiseOpType::MAX;
|
||||
case MIN: return cuda4dnn::EltwiseOpType::MIN;
|
||||
case SUM: return cuda4dnn::EltwiseOpType::SUM;
|
||||
case PROD: return cuda4dnn::EltwiseOpType::PRODUCT;
|
||||
case DIV: return cuda4dnn::EltwiseOpType::DIV;
|
||||
}
|
||||
return cuda4dnn::EltwiseOpType::SUM;
|
||||
}();
|
||||
|
||||
return make_cuda_node<cuda4dnn::EltwiseOp>(preferableTarget, std::move(context->stream), op_, coeffs);
|
||||
}
|
||||
#endif
|
||||
|
||||
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_HALIDE
|
||||
Halide::Var x("x"), y("y"), c("c"), n("n");
|
||||
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
|
||||
Halide::Expr topExpr;
|
||||
std::vector<Halide::Buffer<> > inputBuffers = halideBuffers(input);
|
||||
switch (op)
|
||||
{
|
||||
case SUM:
|
||||
if (coeffs.empty())
|
||||
{
|
||||
topExpr = inputBuffers[0](x, y, c, n) +
|
||||
inputBuffers[1](x, y, c, n);
|
||||
for (int i = 2; i < inputBuffers.size(); ++i)
|
||||
topExpr += inputBuffers[i](x, y, c, n);
|
||||
}
|
||||
else
|
||||
{
|
||||
topExpr = coeffs[0] * inputBuffers[0](x, y, c, n) +
|
||||
coeffs[1] * inputBuffers[1](x, y, c, n);
|
||||
for (int i = 2; i < inputBuffers.size(); ++i)
|
||||
topExpr += coeffs[i] * inputBuffers[i](x, y, c, n);
|
||||
}
|
||||
break;
|
||||
case PROD:
|
||||
topExpr = inputBuffers[0](x, y, c, n) *
|
||||
inputBuffers[1](x, y, c, n);
|
||||
for (int i = 2; i < inputBuffers.size(); ++i)
|
||||
topExpr *= inputBuffers[i](x, y, c, n);
|
||||
break;
|
||||
case DIV:
|
||||
topExpr = inputBuffers[0](x, y, c, n) /
|
||||
inputBuffers[1](x, y, c, n);
|
||||
for (int i = 2; i < inputBuffers.size(); ++i)
|
||||
topExpr /= inputBuffers[i](x, y, c, n);
|
||||
break;
|
||||
case MAX:
|
||||
topExpr = max(inputBuffers[0](x, y, c, n),
|
||||
inputBuffers[1](x, y, c, n));
|
||||
for (int i = 2; i < inputBuffers.size(); ++i)
|
||||
topExpr = max(topExpr, inputBuffers[i](x, y, c, n));
|
||||
break;
|
||||
case MIN:
|
||||
topExpr = min(inputBuffers[0](x, y, c, n),
|
||||
inputBuffers[1](x, y, c, n));
|
||||
for (int i = 2; i < inputBuffers.size(); ++i)
|
||||
topExpr = min(topExpr, inputBuffers[i](x, y, c, n));
|
||||
break;
|
||||
default:
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
top(x, y, c, n) = topExpr;
|
||||
return Ptr<BackendNode>(new HalideBackendNode(top));
|
||||
#endif // HAVE_HALIDE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
|
||||
{
|
||||
InferenceEngine::Builder::EltwiseLayer ieLayer(name);
|
||||
|
||||
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(inputs.size()));
|
||||
|
||||
if (op == SUM)
|
||||
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::SUM);
|
||||
else if (op == PROD)
|
||||
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MUL);
|
||||
else if (op == DIV)
|
||||
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::DIV);
|
||||
else if (op == MAX)
|
||||
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MAX);
|
||||
else if (op == MIN)
|
||||
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MIN);
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
|
||||
|
||||
InferenceEngine::Builder::Layer l = ieLayer;
|
||||
if (!coeffs.empty())
|
||||
l.getParameters()["coeff"] = coeffs;
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(l));
|
||||
}
|
||||
#endif // HAVE_DNN_IE_NN_BUILDER_2019
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
auto curr_node = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
if (!coeffs.empty()) {
|
||||
auto coeff = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &coeffs[0]);
|
||||
curr_node = std::make_shared<ngraph::op::v1::Multiply>(curr_node, coeff, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
}
|
||||
|
||||
for (size_t i = 1; i < nodes.size(); i++)
|
||||
{
|
||||
auto next_node = nodes[i].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
if (!coeffs.empty()) {
|
||||
auto coeff = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &coeffs[i]);
|
||||
next_node = std::make_shared<ngraph::op::v1::Multiply>(next_node, coeff, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
}
|
||||
switch (op) {
|
||||
case SUM: curr_node = std::make_shared<ngraph::op::v1::Add>(curr_node, next_node); break;
|
||||
case PROD: curr_node = std::make_shared<ngraph::op::v1::Multiply>(curr_node, next_node); break;
|
||||
case DIV: curr_node = std::make_shared<ngraph::op::v1::Divide>(curr_node, next_node); break;
|
||||
case MAX: curr_node = std::make_shared<ngraph::op::v1::Maximum>(curr_node, next_node); break;
|
||||
case MIN: curr_node = std::make_shared<ngraph::op::v1::Minimum>(curr_node, next_node); break;
|
||||
default: CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
|
||||
}
|
||||
}
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(curr_node));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
|
||||
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
|
||||
{
|
||||
if (op == SUM)
|
||||
{
|
||||
std::vector<float> newCoeffs;
|
||||
float offset = zeropoints[1][0];
|
||||
float out_sc = scales[1][0];
|
||||
for (int i = 0; i < scales[0].size(); i++)
|
||||
{
|
||||
float coeff = coeffs.empty() ? 1.f : coeffs[i];
|
||||
float newcoeff = (scales[0][i] * coeff) / out_sc;
|
||||
newCoeffs.push_back(newcoeff);
|
||||
offset -= (newcoeff * zeropoints[0][i]);
|
||||
}
|
||||
params.set("coeff", DictValue::arrayReal(newCoeffs.data(), newCoeffs.size()));
|
||||
params.set("offset", offset);
|
||||
return true;
|
||||
}
|
||||
else if (op == PROD)
|
||||
{
|
||||
std::vector<float> newCoeffs = scales[0];
|
||||
newCoeffs[0] /= scales[1][0];
|
||||
params.set("coeff", DictValue::arrayReal(newCoeffs.data(), newCoeffs.size()));
|
||||
params.set("offset", zeropoints[1][0]);
|
||||
params.set("input_zeropoints", DictValue::arrayInt(zeropoints[0].data(), zeropoints[0].size()));
|
||||
return true;
|
||||
}
|
||||
return op == MAX;
|
||||
}
|
||||
|
||||
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
|
||||
const std::vector<MatShape> &outputs) const CV_OVERRIDE
|
||||
{
|
||||
CV_UNUSED(outputs); // suppress unused variable warning
|
||||
CV_Assert(inputs.size());
|
||||
|
||||
// FIXIT: handle inputs with different number of channels
|
||||
long flops = inputs.size() * total(inputs[0]);
|
||||
|
||||
return flops;
|
||||
}
|
||||
|
||||
bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
|
||||
{
|
||||
if (activ.empty() || layer.empty())
|
||||
{
|
||||
activ = layer;
|
||||
return !activ.empty();
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
Ptr<ActivationLayer> activ;
|
||||
|
||||
private:
|
||||
bool hasVecInput;
|
||||
};
|
||||
|
||||
Ptr<EltwiseLayer> EltwiseLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<EltwiseLayer>(new EltwiseLayerImpl(params));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
246
3rdparty/opencv-4.5.4/modules/dnn/src/layers/flatten_layer.cpp
vendored
Normal file
246
3rdparty/opencv-4.5.4/modules/dnn/src/layers/flatten_layer.cpp
vendored
Normal file
@ -0,0 +1,246 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
#include "../op_inf_engine.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
|
||||
#include <float.h>
|
||||
#include <algorithm>
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/reshape.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
class FlattenLayerImpl CV_FINAL : public FlattenLayer
|
||||
{
|
||||
public:
|
||||
FlattenLayerImpl(const LayerParams ¶ms)
|
||||
{
|
||||
_startAxis = params.get<int>("axis", 1);
|
||||
_endAxis = params.get<int>("end_axis", -1);
|
||||
setParamsFrom(params);
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CUDA ||
|
||||
((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine());
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(inputs.size() > 0);
|
||||
for (size_t i = 1; i < inputs.size(); i++)
|
||||
{
|
||||
CV_Assert(inputs[i] == inputs[0]);
|
||||
}
|
||||
|
||||
int numAxes = inputs[0].size();
|
||||
int startAxis = normalize_axis(_startAxis, numAxes);
|
||||
int endAxis = normalize_axis(_endAxis, numAxes);
|
||||
|
||||
CV_Assert(startAxis >= 0);
|
||||
CV_Assert(endAxis >= startAxis && endAxis < (int)numAxes);
|
||||
|
||||
size_t flattenedDimensionSize = total(inputs[0], startAxis, endAxis + 1);
|
||||
|
||||
MatShape outputShapeVec;
|
||||
for (int i = 0; i < startAxis; i++)
|
||||
{
|
||||
outputShapeVec.push_back(inputs[0][i]);
|
||||
}
|
||||
outputShapeVec.push_back(flattenedDimensionSize);
|
||||
for (size_t i = endAxis + 1; i < numAxes; i++)
|
||||
{
|
||||
outputShapeVec.push_back(inputs[0][i]);
|
||||
}
|
||||
CV_Assert(outputShapeVec.size() <= 4);
|
||||
|
||||
outputs.resize(inputs.size(), outputShapeVec);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
|
||||
{
|
||||
std::vector<Mat> inputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
|
||||
int numAxes = inputs[0].dims;
|
||||
_startAxis = normalize_axis(_startAxis, numAxes);
|
||||
_endAxis = normalize_axis(_endAxis, numAxes);
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||
{
|
||||
std::vector<UMat> inpvec;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inputs_arr.getUMatVector(inpvec);
|
||||
outputs_arr.getUMatVector(outputs);
|
||||
|
||||
std::vector<UMat*> inputs(inpvec.size());
|
||||
for (int i = 0; i < inpvec.size(); i++)
|
||||
inputs[i] = &inpvec[i];
|
||||
|
||||
for (size_t i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
MatShape outShape = shape(outputs[i]);
|
||||
UMat& output = outputs_arr.getUMatRef(i);
|
||||
output = inputs[i]->reshape(1, (int)outShape.size(), &outShape[0]);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
outputs_arr.isUMatVector(),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
for (size_t i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
MatShape outShape = shape(outputs[i]);
|
||||
if (inputs[i].data != outputs[i].data)
|
||||
{
|
||||
inputs[i].reshape(1, (int)outShape.size(), &outShape[0]).copyTo(outputs[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
|
||||
{
|
||||
InferenceEngine::Builder::Layer ieLayer(name);
|
||||
ieLayer.setName(name);
|
||||
ieLayer.setType("Flatten");
|
||||
ieLayer.getParameters()["axis"] = (size_t)_startAxis;
|
||||
ieLayer.getParameters()["end_axis"] = _endAxis; // Do not cast to size_t because it might be negative.
|
||||
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(1));
|
||||
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
}
|
||||
#endif // HAVE_DNN_IE_NN_BUILDER_2019
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
std::vector<size_t> dims = ieInpNode->get_shape();
|
||||
|
||||
int numAxes = dims.size();
|
||||
int startAxis = normalize_axis(_startAxis, numAxes);
|
||||
int endAxis = normalize_axis(_endAxis, numAxes);
|
||||
|
||||
CV_Assert(startAxis >= 0);
|
||||
CV_Assert(endAxis >= startAxis && endAxis < numAxes);
|
||||
int64_t flattenedDimensionSize = std::accumulate(dims.begin() + startAxis,
|
||||
dims.begin() + endAxis + 1, 1, std::multiplies<size_t>());
|
||||
|
||||
std::vector<int64_t> outputShapeVec(dims.begin(), dims.begin() + startAxis);
|
||||
outputShapeVec.push_back(flattenedDimensionSize);
|
||||
outputShapeVec.insert(outputShapeVec.end(), dims.begin() + endAxis + 1, dims.end());
|
||||
|
||||
auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
|
||||
ngraph::Shape({outputShapeVec.size()}), outputShapeVec.data());
|
||||
auto reshape = std::make_shared<ngraph::op::v1::Reshape>(ieInpNode, shape, true);
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(reshape));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
return make_cuda_node<cuda4dnn::ReshapeOp>(preferableTarget, std::move(context->stream));
|
||||
}
|
||||
#endif
|
||||
|
||||
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
|
||||
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
int _startAxis;
|
||||
int _endAxis;
|
||||
};
|
||||
|
||||
Ptr<FlattenLayer> FlattenLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<FlattenLayer>(new FlattenLayerImpl(params));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
117
3rdparty/opencv-4.5.4/modules/dnn/src/layers/flow_warp_layer.cpp
vendored
Normal file
117
3rdparty/opencv-4.5.4/modules/dnn/src/layers/flow_warp_layer.cpp
vendored
Normal file
@ -0,0 +1,117 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// Copyright (C) 2020, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
|
||||
|
||||
namespace cv { namespace dnn {
|
||||
|
||||
class FlowWarpLayerImpl CV_FINAL : public FlowWarpLayer
|
||||
{
|
||||
public:
|
||||
FlowWarpLayerImpl(const LayerParams& params)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
String fill_string = toLowerCase(params.get<String>("FillParameter", "ZERO"));
|
||||
if (fill_string != "zero")
|
||||
CV_Error(Error::StsNotImplemented, "Only zero filling supported.");
|
||||
fill_value = 0;
|
||||
}
|
||||
|
||||
virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(inputs.size() == 2);
|
||||
CV_Assert_N(inputs[0][0] == inputs[1][0], inputs[1][1] == 2,
|
||||
inputs[0][2] == inputs[1][2], inputs[0][3] == inputs[1][3]);
|
||||
|
||||
outputs.assign(1, inputs[0]);
|
||||
return false;
|
||||
}
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
const int out_n = outputs[0].size[0];
|
||||
const int out_c = outputs[0].size[1];
|
||||
const int out_h = outputs[0].size[2];
|
||||
const int out_w = outputs[0].size[3];
|
||||
|
||||
const int area = out_w * out_h;
|
||||
const int total = area * out_c;
|
||||
|
||||
const float* image_data = inputs[0].ptr<float>();
|
||||
const float* flow_data = inputs[1].ptr<float>();
|
||||
float* out_data = outputs[0].ptr<float>();
|
||||
|
||||
for (int n = 0; n < out_n; n++)
|
||||
{
|
||||
int off = total * n;
|
||||
for (int x = 0; x < out_w; x++)
|
||||
{
|
||||
for (int y = 0; y < out_h; y++)
|
||||
{
|
||||
int idx = 2 * area * n + y * out_w + x;
|
||||
float fx = flow_data[idx];
|
||||
float fy = flow_data[idx + area];
|
||||
|
||||
float x2 = x + fx;
|
||||
float y2 = y + fy;
|
||||
|
||||
if (x2 >= 0 && y2 >= 0 && x2 < out_w && y2 < out_h)
|
||||
{
|
||||
int ix2_L = x2;
|
||||
float alpha = x2 - ix2_L;
|
||||
|
||||
int iy2_T = y2;
|
||||
float beta = y2 - iy2_T;
|
||||
|
||||
int ix2_R = std::min(ix2_L + 1, out_w - 1);
|
||||
int iy2_B = std::min(iy2_T + 1, out_h - 1);
|
||||
|
||||
for (int c = 0; c < out_c; c++)
|
||||
{
|
||||
float TL = image_data[off + c * area + iy2_T * out_w + ix2_L];
|
||||
float TR = image_data[off + c * area + iy2_T * out_w + ix2_R];
|
||||
float BL = image_data[off + c * area + iy2_B * out_w + ix2_L];
|
||||
float BR = image_data[off + c * area + iy2_B * out_w + ix2_R];
|
||||
|
||||
out_data[off + c * area + y * out_w + x] = (1 - alpha) * (1 - beta) * TL +
|
||||
(1 - alpha) * beta * BL +
|
||||
alpha * (1 - beta) * TR +
|
||||
alpha * beta * BR;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int c = 0; c < out_c; c++)
|
||||
out_data[off + c * area + y * out_w + x] = fill_value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
float fill_value;
|
||||
};
|
||||
|
||||
Ptr<FlowWarpLayer> FlowWarpLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<FlowWarpLayer>(new FlowWarpLayerImpl(params));
|
||||
}
|
||||
|
||||
}} // namespace cv::dnn
|
||||
687
3rdparty/opencv-4.5.4/modules/dnn/src/layers/fully_connected_layer.cpp
vendored
Normal file
687
3rdparty/opencv-4.5.4/modules/dnn/src/layers/fully_connected_layer.cpp
vendored
Normal file
@ -0,0 +1,687 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
#include "../op_halide.hpp"
|
||||
#include "../op_inf_engine.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
using namespace cv::dnn::ocl4dnn;
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/matmul.hpp"
|
||||
#include "../cuda4dnn/primitives/inner_product.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
class FullyConnectedLayerImpl CV_FINAL : public InnerProductLayer
|
||||
{
|
||||
public:
|
||||
enum { VEC_ALIGN = 8 };
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
Ptr<OCL4DNNInnerProduct<float> > innerProductOp;
|
||||
std::vector<UMat> umat_blobs;
|
||||
std::vector<UMat> half_blobs;
|
||||
#endif
|
||||
|
||||
FullyConnectedLayerImpl(const LayerParams& params)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
bias = params.get<bool>("bias_term", true);
|
||||
axis = params.get<int>("axis", 1);
|
||||
if (!blobs.empty())
|
||||
{
|
||||
CV_Assert(1 <= blobs.size() && blobs.size() <= 2);
|
||||
int numOutput = params.get<int>("num_output");
|
||||
int innerSize = (int)blobs[0].total() / numOutput;
|
||||
|
||||
CV_Assert(blobs[0].dims >= 2 && (size_t)(innerSize * numOutput) == blobs[0].total());
|
||||
CV_Assert(!bias || (blobs.size() == 2 && (size_t)numOutput == blobs[1].total()));
|
||||
|
||||
weightsMat = blobs[0] = blobs[0].reshape(1, numOutput);
|
||||
int vecsize = weightsMat.cols;
|
||||
if (vecsize % VEC_ALIGN != 0)
|
||||
{
|
||||
int vecsize_aligned = (int)alignSize(vecsize, VEC_ALIGN);
|
||||
Mat weightsBuf(weightsMat.rows, vecsize_aligned, weightsMat.type());
|
||||
Mat wpadding = weightsBuf.colRange(vecsize, vecsize_aligned);
|
||||
wpadding.setTo(Scalar::all(0.));
|
||||
weightsMat = weightsBuf.colRange(0, vecsize);
|
||||
blobs[0].copyTo(weightsMat);
|
||||
}
|
||||
|
||||
if (bias)
|
||||
biasMat = blobs[1] = blobs[1].reshape(1, 1);
|
||||
else
|
||||
biasMat = Mat::zeros(1, numOutput, weightsMat.type());
|
||||
}
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &) const CV_OVERRIDE
|
||||
{
|
||||
int numOutput, cAxis;
|
||||
if (blobs.empty())
|
||||
{
|
||||
CV_CheckEQ(inputs.size(), (size_t)2, "");
|
||||
numOutput = inputs[1].back();
|
||||
cAxis = inputs[0].size() - 1;
|
||||
int dims = inputs[0].size();
|
||||
CV_CheckEQ(inputs[1].size(), (size_t)dims, "");
|
||||
CV_CheckGE(dims, 2, "");
|
||||
for (int i = 0; i < dims - 2; i++)
|
||||
CV_CheckEQ(inputs[0][i], inputs[1][i], "");
|
||||
CV_CheckEQ(inputs[0].back(), inputs[1][dims - 2], "");
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_CheckEQ(inputs.size(), (size_t)1, "");
|
||||
CV_CheckEQ(blobs[0].dims, 2, "");
|
||||
numOutput = blobs[0].size[0];
|
||||
CV_Assert(!bias || (size_t)numOutput == blobs[1].total());
|
||||
cAxis = normalize_axis(axis, inputs[0]);
|
||||
}
|
||||
|
||||
MatShape outShape(cAxis + 1);
|
||||
for (int i = 0; i < cAxis; ++i)
|
||||
outShape[i] = inputs[0][i];
|
||||
outShape.back() = numOutput;
|
||||
|
||||
outputs.resize(1, outShape);
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CUDA ||
|
||||
(backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1) ||
|
||||
(((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && !blobs.empty()) ||
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && axis == 1);
|
||||
}
|
||||
|
||||
virtual bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
|
||||
{
|
||||
if (activ.empty() || layer.empty())
|
||||
{
|
||||
activ = layer;
|
||||
return !activ.empty();
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
class FullyConnected : public ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
FullyConnected() : srcMat(0), weights(0), biasMat(0), activ(0), dstMat(0), nstripes(0), useAVX(false), useAVX2(false), useAVX512(false), useRVV(false) {}
|
||||
|
||||
static void run(const Mat& srcMat, const Mat& weights, const Mat& biasMat,
|
||||
Mat& dstMat, const ActivationLayer* activ, int nstripes)
|
||||
{
|
||||
CV_Assert( srcMat.dims == 2 && srcMat.cols == weights.cols &&
|
||||
dstMat.rows == srcMat.rows && dstMat.cols == weights.rows &&
|
||||
srcMat.type() == weights.type() && weights.type() == dstMat.type() &&
|
||||
srcMat.type() == CV_32F &&
|
||||
(biasMat.empty() || (biasMat.type() == srcMat.type() &&
|
||||
biasMat.isContinuous() && (int)biasMat.total() == dstMat.cols)) );
|
||||
|
||||
FullyConnected p;
|
||||
|
||||
p.srcMat = &srcMat;
|
||||
p.weights = &weights;
|
||||
p.biasMat = &biasMat;
|
||||
p.dstMat = &dstMat;
|
||||
p.nstripes = nstripes;
|
||||
p.activ = activ;
|
||||
p.useAVX = checkHardwareSupport(CPU_AVX);
|
||||
p.useAVX2 = checkHardwareSupport(CPU_AVX2);
|
||||
p.useAVX512 = CV_CPU_HAS_SUPPORT_AVX512_SKX;
|
||||
p.useRVV = checkHardwareSupport(CPU_RVV);
|
||||
|
||||
parallel_for_(Range(0, nstripes), p, nstripes);
|
||||
}
|
||||
|
||||
void operator()(const Range& r) const CV_OVERRIDE
|
||||
{
|
||||
int valign = FullyConnectedLayerImpl::VEC_ALIGN;
|
||||
int nsamples = srcMat->rows;
|
||||
int nw0 = weights->rows;
|
||||
int k, vecsize = srcMat->cols;
|
||||
int vecsize_aligned = (int)alignSize(vecsize, VEC_ALIGN);
|
||||
size_t total = (size_t)nsamples*nw0;
|
||||
size_t stripeSize = (total + nstripes - 1)/nstripes;
|
||||
size_t stripeStart = r.start*stripeSize;
|
||||
size_t stripeEnd = r.end == nstripes ? total : std::min(r.end*stripeSize, total);
|
||||
size_t wstep = weights->step1();
|
||||
AutoBuffer<float> srcbuf(vecsize_aligned + valign);
|
||||
float* sptr = alignPtr(srcbuf.data(), (int)(valign*sizeof(float)));
|
||||
|
||||
for( k = vecsize; k < vecsize_aligned; k++ )
|
||||
sptr[k] = 0.f;
|
||||
|
||||
for( size_t ofs = stripeStart; ofs < stripeEnd; )
|
||||
{
|
||||
int sampleIdx = (int)(ofs / nw0);
|
||||
int delta = (int)(ofs - (size_t)sampleIdx*nw0);
|
||||
const float* sptr_ = srcMat->ptr<float>(sampleIdx);
|
||||
const float* wptr = weights->ptr<float>(delta);
|
||||
float* dptr = dstMat->ptr<float>(sampleIdx) + delta;
|
||||
const float* biasptr = biasMat->ptr<float>() + delta;
|
||||
int nw = std::min(nw0 - delta, (int)(stripeEnd - ofs));
|
||||
|
||||
memcpy(sptr, sptr_, vecsize*sizeof(sptr[0]));
|
||||
|
||||
#if CV_TRY_AVX512_SKX
|
||||
if( useAVX512 )
|
||||
opt_AVX512_SKX::fastGEMM1T( sptr, wptr, wstep, biasptr, dptr, nw, vecsize);
|
||||
else
|
||||
#endif
|
||||
#if CV_TRY_AVX2
|
||||
if( useAVX2 )
|
||||
opt_AVX2::fastGEMM1T( sptr, wptr, wstep, biasptr, dptr, nw, vecsize);
|
||||
else
|
||||
#endif
|
||||
#if CV_TRY_AVX
|
||||
if( useAVX )
|
||||
opt_AVX::fastGEMM1T( sptr, wptr, wstep, biasptr, dptr, nw, vecsize);
|
||||
else
|
||||
#endif
|
||||
#if CV_TRY_RVV
|
||||
if( useRVV )
|
||||
opt_RVV::fastGEMM1T( sptr, wptr, wstep, biasptr, dptr, nw, vecsize);
|
||||
else
|
||||
#endif
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
#if CV_SIMD128
|
||||
for( ; i <= nw - 4; i += 4, wptr += 4*wstep )
|
||||
{
|
||||
v_float32x4 vs0 = v_setall_f32(0.f);
|
||||
v_float32x4 vs1 = v_setall_f32(0.f);
|
||||
v_float32x4 vs2 = v_setall_f32(0.f);
|
||||
v_float32x4 vs3 = v_setall_f32(0.f);
|
||||
|
||||
for( k = 0; k < vecsize; k += 4 )
|
||||
{
|
||||
v_float32x4 v = v_load_aligned(sptr + k);
|
||||
vs0 = v_fma(v, v_load_aligned(wptr + k), vs0);
|
||||
vs1 = v_fma(v, v_load_aligned(wptr + wstep + k), vs1);
|
||||
vs2 = v_fma(v, v_load_aligned(wptr + wstep*2 + k), vs2);
|
||||
vs3 = v_fma(v, v_load_aligned(wptr + wstep*3 + k), vs3);
|
||||
}
|
||||
|
||||
v_float32x4 s = v_reduce_sum4(vs0, vs1, vs2, vs3);
|
||||
s += v_load(biasptr + i);
|
||||
v_store(dptr + i, s);
|
||||
}
|
||||
#endif
|
||||
|
||||
for( ; i < nw; i++, wptr += wstep )
|
||||
{
|
||||
float s0=biasptr[i];
|
||||
|
||||
for( k = 0; k < vecsize; k++ )
|
||||
{
|
||||
float v = sptr[k];
|
||||
s0 += v*wptr[k];
|
||||
}
|
||||
dptr[i] = s0;
|
||||
}
|
||||
}
|
||||
|
||||
if(activ)
|
||||
activ->forwardSlice(dptr, dptr, 1, 1, delta, delta + nw);
|
||||
|
||||
ofs += nw;
|
||||
}
|
||||
}
|
||||
|
||||
const Mat *srcMat, *weights, *biasMat;
|
||||
const ActivationLayer* activ;
|
||||
Mat* dstMat;
|
||||
int nstripes;
|
||||
bool useAVX;
|
||||
bool useAVX2;
|
||||
bool useAVX512;
|
||||
bool useRVV;
|
||||
};
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE
|
||||
{
|
||||
innerProductOp.release();
|
||||
umat_blobs.clear();
|
||||
half_blobs.clear();
|
||||
}
|
||||
|
||||
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, InputArrayOfArrays internals)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
bool use_half = (inps.depth() == CV_16S);
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
if (inputs.size() == 2)
|
||||
{
|
||||
int dims = outputs[0].dims;
|
||||
int m = inputs[0].size[dims - 2];
|
||||
int n = inputs[0].size[dims - 1];
|
||||
int k = inputs[1].size[dims - 1];
|
||||
int rows = inputs[0].total() / (m * n);
|
||||
|
||||
MatShape sh_A = shape(rows, m * n);
|
||||
MatShape sh_B = shape(rows, n * k);
|
||||
MatShape sh_C = shape(rows, m * k);
|
||||
UMat inp = inputs[0].reshape(1, sh_A.size(), &sh_A[0]);
|
||||
UMat weight = inputs[1].reshape(1, sh_B.size(), &sh_B[0]);
|
||||
UMat out = outputs[0].reshape(1, sh_C.size(), &sh_C[0]);
|
||||
|
||||
UMat A, B, C, A_fp32, B_fp32, C_fp32;
|
||||
for (int i = 0; i < rows; ++i)
|
||||
{
|
||||
A = inp.row(i).reshape(1, m);
|
||||
B = weight.row(i).reshape(1, n);
|
||||
C = out.row(i).reshape(1, m);
|
||||
|
||||
if (use_half)
|
||||
{
|
||||
convertFp16(A, A_fp32);
|
||||
convertFp16(B, B_fp32);
|
||||
convertFp16(C, C_fp32);
|
||||
}
|
||||
else
|
||||
{
|
||||
A_fp32 = A;
|
||||
B_fp32 = B;
|
||||
C_fp32 = C;
|
||||
}
|
||||
cv::gemm(A_fp32, B_fp32, 1, noArray(), 0, C_fp32);
|
||||
if (use_half)
|
||||
{
|
||||
convertFp16(A_fp32, A);
|
||||
convertFp16(B_fp32, B);
|
||||
convertFp16(C_fp32, C);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int axisCan = normalize_axis(axis, inputs[0].dims);
|
||||
int numOutput = blobs[0].size[0];
|
||||
int innerSize = blobs[0].size[1];
|
||||
int outerSize = total(shape(inputs[0]), 0, axisCan);
|
||||
bool ret = true;
|
||||
|
||||
if (innerProductOp.empty())
|
||||
{
|
||||
size_t n = blobs.size();
|
||||
umat_blobs.resize(n);
|
||||
for (int i = 0; i < n; i++) blobs[i].copyTo(umat_blobs[i]);
|
||||
|
||||
OCL4DNNInnerProductConfig config;
|
||||
config.num_output = numOutput;
|
||||
config.bias_term = bias;
|
||||
config.M = outerSize;
|
||||
config.K = innerSize;
|
||||
config.use_half = use_half;
|
||||
|
||||
if (use_half)
|
||||
{
|
||||
half_blobs.resize(umat_blobs.size());
|
||||
for (int i = 0; i < umat_blobs.size(); i++)
|
||||
{
|
||||
if (!umat_blobs[i].empty())
|
||||
convertFp16(umat_blobs[i], half_blobs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
innerProductOp = Ptr<OCL4DNNInnerProduct<float> >(new OCL4DNNInnerProduct<float>(config));
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
MatShape inshape, outshape;
|
||||
inshape = shape(outerSize, innerSize);
|
||||
outshape = shape(outerSize, numOutput);
|
||||
|
||||
UMat srcMat, dstMat;
|
||||
srcMat = inputs[i].reshape(1, inshape.size(), &inshape[0]);
|
||||
dstMat = outputs[i].reshape(1, outshape.size(), &outshape[0]);
|
||||
|
||||
if (!innerProductOp->Forward(srcMat, (use_half) ? half_blobs[0] : umat_blobs[0],
|
||||
(bias) ? (use_half ? half_blobs[1] : umat_blobs[1]) : UMat(),
|
||||
dstMat))
|
||||
{
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!use_half && bias && (outerSize > 1))
|
||||
{
|
||||
UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
|
||||
UMat& biases = umat_blobs[1];
|
||||
cv::gemm(biasOnesMat, biases, 1, dstMat, 1, dstMat, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (ret) return true;
|
||||
|
||||
UMat& weights = umat_blobs[0];
|
||||
for (size_t i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
MatShape inshape, outshape;
|
||||
inshape = shape(outerSize, innerSize);
|
||||
outshape = shape(outerSize, numOutput);
|
||||
|
||||
UMat srcMat, dstMat, srcMat_fp32, dstMat_fp32;
|
||||
srcMat = inputs[i].reshape(1, inshape.size(), &inshape[0]);
|
||||
dstMat = outputs[i].reshape(1, outshape.size(), &outshape[0]);
|
||||
|
||||
if (use_half)
|
||||
{
|
||||
convertFp16(srcMat, srcMat_fp32);
|
||||
convertFp16(dstMat, dstMat_fp32);
|
||||
}
|
||||
else
|
||||
{
|
||||
srcMat_fp32 = srcMat;
|
||||
dstMat_fp32 = dstMat;
|
||||
}
|
||||
|
||||
cv::gemm(srcMat_fp32, weights, 1, noArray(), 0, dstMat_fp32, GEMM_2_T);
|
||||
|
||||
if (bias)
|
||||
{
|
||||
UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
|
||||
UMat& biases = umat_blobs[1];
|
||||
cv::gemm(biasOnesMat, biases, 1, dstMat_fp32, 1, dstMat_fp32, 0);
|
||||
}
|
||||
if (use_half)
|
||||
{
|
||||
convertFp16(srcMat_fp32, srcMat);
|
||||
convertFp16(dstMat_fp32, dstMat);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> input, output;
|
||||
inputs_arr.getMatVector(input);
|
||||
outputs_arr.getMatVector(output);
|
||||
|
||||
if (!blobs.empty())
|
||||
{
|
||||
int axisCan = normalize_axis(axis, input[0].dims);
|
||||
int outerSize = input[0].total(0, axisCan);
|
||||
|
||||
for (size_t i = 0; i < input.size(); i++)
|
||||
{
|
||||
Mat srcMat = input[i].reshape(1, outerSize);
|
||||
Mat dstMat = output[i].reshape(1, outerSize);
|
||||
|
||||
const int nstripes = getNumThreads();
|
||||
FullyConnected::run(srcMat, weightsMat, biasMat, dstMat, activ.get(), nstripes);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
float* inpData = input[0].ptr<float>();
|
||||
float* weightData = input[1].ptr<float>();
|
||||
float* outData = output[0].ptr<float>();
|
||||
|
||||
int dims = output[0].dims;
|
||||
int numSlice = output[0].total() / output[0].total(dims - 2);
|
||||
int m = input[0].size[dims - 2];
|
||||
int n = input[0].size[dims - 1];
|
||||
int k = input[1].size[dims - 1];
|
||||
for (int i = 0; i < numSlice; i++)
|
||||
{
|
||||
Mat inpSlice(m, n, CV_32F, inpData);
|
||||
Mat weightSlice(n, k, CV_32F, weightData);
|
||||
Mat outSlice(m, k, CV_32F, outData);
|
||||
|
||||
outSlice = inpSlice * weightSlice;
|
||||
inpData += inpSlice.total();
|
||||
weightData += weightSlice.total();
|
||||
outData += outSlice.total();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
|
||||
if (weightsMat.empty())
|
||||
{
|
||||
CV_Assert(!bias);
|
||||
return make_cuda_node<cuda4dnn::MatMulOp>(preferableTarget, std::move(context->stream), std::move(context->cublas_handle));
|
||||
}
|
||||
|
||||
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
|
||||
auto flatten_start_axis = normalize_axis(axis, input_wrapper->getRank());
|
||||
auto biasMat_ = bias ? biasMat : Mat();
|
||||
return make_cuda_node<cuda4dnn::InnerProductOp>(preferableTarget, std::move(context->stream), std::move(context->cublas_handle), flatten_start_axis, weightsMat, biasMat_);
|
||||
}
|
||||
#endif
|
||||
|
||||
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_HALIDE
|
||||
int inW, inH, inC, inN, outC = blobs[0].size[0];
|
||||
Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
|
||||
getCanonicalSize(inputBuffer, &inW, &inH, &inC, &inN);
|
||||
auto weights = wrapToHalideBuffer(blobs[0], {inW, inH, inC, outC});
|
||||
|
||||
Halide::Var x("x"), y("y"), c("c"), n("n");
|
||||
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
|
||||
Halide::RDom r(0, inW, 0, inH, 0, inC);
|
||||
Halide::Expr topExpr = sum(inputBuffer(r.x, r.y, r.z, n) *
|
||||
weights(r.x, r.y, r.z, c));
|
||||
if (bias)
|
||||
{
|
||||
Halide::Buffer<float> bias = wrapToHalideBuffer(blobs[1], {outC});
|
||||
topExpr += bias(c);
|
||||
}
|
||||
top(x, y, c, n) = topExpr;
|
||||
return Ptr<BackendNode>(new HalideBackendNode(top));
|
||||
#endif // HAVE_HALIDE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
InferenceEngine::Builder::FullyConnectedLayer ieLayer(name);
|
||||
|
||||
const int outNum = blobs[0].size[0];
|
||||
ieLayer.setOutputNum(outNum);
|
||||
|
||||
InferenceEngine::Builder::Layer l = ieLayer;
|
||||
addConstantData("weights", wrapToInfEngineBlob(blobs[0], {(size_t)blobs[0].size[0], (size_t)blobs[0].size[1], 1, 1}, InferenceEngine::Layout::OIHW), l);
|
||||
if (bias)
|
||||
addConstantData("biases", wrapToInfEngineBlob(blobs[1], {(size_t)outNum}, InferenceEngine::Layout::C), l);
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(l));
|
||||
}
|
||||
#endif // HAVE_DNN_IE_NN_BUILDER_2019
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
std::shared_ptr<ngraph::Node> matmul;
|
||||
|
||||
if (nodes.size() == 2)
|
||||
{
|
||||
auto& inp2 = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
matmul = std::make_shared<ngraph::op::MatMul>(ieInpNode, inp2, false, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<int64_t> data = {(int64_t)ieInpNode->get_shape()[0], (int64_t)blobs[0].size[1]};
|
||||
auto new_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, data.data());
|
||||
auto inp = std::make_shared<ngraph::op::v1::Reshape>(ieInpNode, new_shape, true);
|
||||
|
||||
std::vector<size_t> weight_shape{(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]};
|
||||
auto ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, weight_shape, blobs[0].data);
|
||||
matmul = std::make_shared<ngraph::op::MatMul>(inp, ieWeights, false, true);
|
||||
}
|
||||
|
||||
if (bias) {
|
||||
auto bias_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
|
||||
ngraph::Shape{(size_t)blobs[1].size[1]}, blobs[1].data);
|
||||
matmul = std::make_shared<ngraph::op::v1::Add>(matmul, bias_node, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
}
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(matmul));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
|
||||
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
|
||||
{
|
||||
if (blobs.empty())
|
||||
return false;
|
||||
|
||||
int numOutput = blobs[0].size[0];
|
||||
float inputScale = scales[0][0], outputScale = scales[1][0];
|
||||
int inputZp = zeropoints[0][0];
|
||||
|
||||
Mat weightsQuantized(weightsMat.rows, weightsMat.cols, CV_8S);
|
||||
Mat biasQuantized(1, numOutput, CV_32S);
|
||||
Mat outputMultiplier(1, numOutput, CV_32F);
|
||||
|
||||
double realMin, realMax, weightsScale;
|
||||
for( int i = 0; i < numOutput; i++ )
|
||||
{
|
||||
// Quantize weights
|
||||
cv::minMaxIdx(weightsMat.row(i), &realMin, &realMax);
|
||||
realMin = std::min(realMin, 0.0);
|
||||
realMax = std::max(realMax, 0.0);
|
||||
weightsScale = (realMax == realMin) ? 1.0 : std::max(-realMin, realMax)/127;
|
||||
weightsMat.row(i).convertTo(weightsQuantized.row(i), CV_8S, 1.f/weightsScale);
|
||||
|
||||
// Quantize biases
|
||||
float biasScale = inputScale * weightsScale;
|
||||
biasQuantized.at<int>(i) = (int)std::round(biasMat.at<float>(i)/biasScale) - inputZp*(cv::sum(weightsQuantized.row(i))[0]);
|
||||
|
||||
// Store multiplier
|
||||
outputMultiplier.at<float>(i) = biasScale / outputScale;
|
||||
}
|
||||
|
||||
params.blobs.clear();
|
||||
params.blobs.push_back(weightsQuantized.reshape(1, shape(blobs[0])));
|
||||
params.blobs.push_back(biasQuantized);
|
||||
params.blobs.push_back(outputMultiplier);
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
|
||||
const std::vector<MatShape> &outputs) const CV_OVERRIDE
|
||||
{
|
||||
CV_UNUSED(inputs); // suppress unused variable warning
|
||||
long flops = 0;
|
||||
|
||||
int innerSize = blobs[0].size[1];
|
||||
for(int i = 0; i < outputs.size(); i++)
|
||||
{
|
||||
flops += CV_BIG_INT(3)*innerSize*total(outputs[i]);
|
||||
}
|
||||
|
||||
return flops;
|
||||
|
||||
}
|
||||
|
||||
bool bias;
|
||||
Mat weightsMat, biasMat;
|
||||
Ptr<ActivationLayer> activ;
|
||||
};
|
||||
|
||||
Ptr<InnerProductLayer> InnerProductLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<InnerProductLayer>(new FullyConnectedLayerImpl(params));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
254
3rdparty/opencv-4.5.4/modules/dnn/src/layers/layers_common.cpp
vendored
Normal file
254
3rdparty/opencv-4.5.4/modules/dnn/src/layers/layers_common.cpp
vendored
Normal file
@ -0,0 +1,254 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
namespace util
|
||||
{
|
||||
|
||||
std::string makeName(const std::string& str1, const std::string& str2)
|
||||
{
|
||||
return str1 + str2;
|
||||
}
|
||||
|
||||
bool getParameter(const LayerParams ¶ms, const std::string& nameBase, const std::string& nameAll,
|
||||
std::vector<size_t>& parameter, bool hasDefault = false, const std::vector<size_t>& defaultValue = std::vector<size_t>(2, 0))
|
||||
{
|
||||
std::string nameH = makeName(nameBase, std::string("_h"));
|
||||
std::string nameW = makeName(nameBase, std::string("_w"));
|
||||
std::string nameAll_ = nameAll;
|
||||
if (nameAll_ == "")
|
||||
nameAll_ = nameBase;
|
||||
|
||||
if (params.has(nameH) && params.has(nameW))
|
||||
{
|
||||
CV_Assert(params.get<int>(nameH) >= 0 && params.get<int>(nameW) >= 0);
|
||||
parameter.push_back(params.get<int>(nameH));
|
||||
parameter.push_back(params.get<int>(nameW));
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (params.has(nameAll_))
|
||||
{
|
||||
DictValue param = params.get(nameAll_);
|
||||
for (int i = 0; i < param.size(); i++) {
|
||||
CV_Assert(param.get<int>(i) >= 0);
|
||||
parameter.push_back(param.get<int>(i));
|
||||
}
|
||||
if (parameter.size() == 1)
|
||||
parameter.resize(2, parameter[0]);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (hasDefault)
|
||||
{
|
||||
parameter = defaultValue;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void getKernelSize(const LayerParams ¶ms, std::vector<size_t>& kernel)
|
||||
{
|
||||
if (!util::getParameter(params, "kernel", "kernel_size", kernel))
|
||||
CV_Error(cv::Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified");
|
||||
|
||||
for (int i = 0; i < kernel.size(); i++)
|
||||
CV_Assert(kernel[i] > 0);
|
||||
}
|
||||
|
||||
void getStrideAndPadding(const LayerParams ¶ms, std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end,
|
||||
std::vector<size_t>& strides, cv::String& padMode, size_t kernel_size = 2)
|
||||
{
|
||||
if (params.has("pad_l") && params.has("pad_t") && params.has("pad_r") && params.has("pad_b")) {
|
||||
CV_Assert(params.get<int>("pad_t") >= 0 && params.get<int>("pad_l") >= 0 &&
|
||||
params.get<int>("pad_b") >= 0 && params.get<int>("pad_r") >= 0);
|
||||
pads_begin.push_back(params.get<int>("pad_t"));
|
||||
pads_begin.push_back(params.get<int>("pad_l"));
|
||||
pads_end.push_back(params.get<int>("pad_b"));
|
||||
pads_end.push_back(params.get<int>("pad_r"));
|
||||
}
|
||||
else {
|
||||
util::getParameter(params, "pad", "pad", pads_begin, true, std::vector<size_t>(kernel_size, 0));
|
||||
if (pads_begin.size() < 4)
|
||||
pads_end = pads_begin;
|
||||
else
|
||||
{
|
||||
pads_end = std::vector<size_t>(pads_begin.begin() + pads_begin.size() / 2, pads_begin.end());
|
||||
pads_begin.resize(pads_begin.size() / 2);
|
||||
}
|
||||
CV_Assert(pads_begin.size() == pads_end.size());
|
||||
}
|
||||
util::getParameter(params, "stride", "stride", strides, true, std::vector<size_t>(kernel_size, 1));
|
||||
|
||||
padMode = "";
|
||||
if (params.has("pad_mode"))
|
||||
{
|
||||
padMode = params.get<String>("pad_mode");
|
||||
}
|
||||
|
||||
for (int i = 0; i < strides.size(); i++)
|
||||
CV_Assert(strides[i] > 0);
|
||||
}
|
||||
}
|
||||
|
||||
void getPoolingKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, std::vector<bool>& globalPooling,
|
||||
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end,
|
||||
std::vector<size_t>& strides, cv::String &padMode)
|
||||
{
|
||||
bool is_global = params.get<bool>("global_pooling", false);
|
||||
globalPooling.resize(3);
|
||||
globalPooling[0] = params.get<bool>("global_pooling_d", is_global);
|
||||
globalPooling[1] = params.get<bool>("global_pooling_h", is_global);
|
||||
globalPooling[2] = params.get<bool>("global_pooling_w", is_global);
|
||||
|
||||
if (globalPooling[0] || globalPooling[1] || globalPooling[2])
|
||||
{
|
||||
util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode);
|
||||
if ((globalPooling[0] && params.has("kernel_d")) ||
|
||||
(globalPooling[1] && params.has("kernel_h")) ||
|
||||
(globalPooling[2] && params.has("kernel_w")) ||
|
||||
params.has("kernel_size")) {
|
||||
CV_Error(cv::Error::StsBadArg, "In global_pooling mode, kernel_size (or kernel_h and kernel_w) cannot be specified");
|
||||
}
|
||||
|
||||
kernel.resize(3);
|
||||
kernel[0] = params.get<int>("kernel_d", 1);
|
||||
kernel[1] = params.get<int>("kernel_h", 1);
|
||||
kernel[2] = params.get<int>("kernel_w", 1);
|
||||
|
||||
for (int i = 0, j = globalPooling.size() - pads_begin.size(); i < pads_begin.size(); i++, j++) {
|
||||
if ((pads_begin[i] != 0 || pads_end[i] != 0) && globalPooling[j])
|
||||
CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0");
|
||||
}
|
||||
for (int i = 0, j = globalPooling.size() - strides.size(); i < strides.size(); i++, j++) {
|
||||
if (strides[i] != 1 && globalPooling[j])
|
||||
CV_Error(cv::Error::StsBadArg, "In global_pooling mode, strides must be = 1");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
util::getKernelSize(params, kernel);
|
||||
util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode, kernel.size());
|
||||
}
|
||||
}
|
||||
|
||||
void getConvolutionKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, std::vector<size_t>& pads_begin,
|
||||
std::vector<size_t>& pads_end, std::vector<size_t>& strides,
|
||||
std::vector<size_t>& dilations, cv::String &padMode, std::vector<size_t>& adjust_pads)
|
||||
{
|
||||
util::getKernelSize(params, kernel);
|
||||
util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode, kernel.size());
|
||||
util::getParameter(params, "dilation", "dilation", dilations, true, std::vector<size_t>(kernel.size(), 1));
|
||||
util::getParameter(params, "adj", "adj", adjust_pads, true, std::vector<size_t>(kernel.size(), 0));
|
||||
|
||||
for (int i = 0; i < dilations.size(); i++)
|
||||
CV_Assert(dilations[i] > 0);
|
||||
}
|
||||
|
||||
// From TensorFlow code:
|
||||
// Total padding on rows and cols is
|
||||
// Pr = (R' - 1) * S + Kr - R
|
||||
// Pc = (C' - 1) * S + Kc - C
|
||||
// where (R', C') are output dimensions, (R, C) are input dimensions, S
|
||||
// is stride, (Kr, Kc) are filter dimensions.
|
||||
// We pad Pr/2 on the left and Pr - Pr/2 on the right, Pc/2 on the top
|
||||
// and Pc - Pc/2 on the bottom. When Pr or Pc is odd, this means
|
||||
// we pad more on the right and bottom than on the top and left.
|
||||
void getConvPoolOutParams(const std::vector<int>& inp, const std::vector<size_t>& kernel,
|
||||
const std::vector<size_t>& stride, const String &padMode,
|
||||
const std::vector<size_t>& dilation, std::vector<int>& out)
|
||||
{
|
||||
if (padMode == "VALID")
|
||||
{
|
||||
for (int i = 0; i < inp.size(); i++)
|
||||
out.push_back((inp[i] - dilation[i] * (kernel[i] - 1) - 1 + stride[i]) / stride[i]);
|
||||
}
|
||||
else if (padMode == "SAME")
|
||||
{
|
||||
for (int i = 0; i < inp.size(); i++)
|
||||
out.push_back((inp[i] - 1 + stride[i]) / stride[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Error(Error::StsError, "Unsupported padding mode");
|
||||
}
|
||||
}
|
||||
|
||||
void getConvPoolPaddings(const std::vector<int>& inp, const std::vector<size_t>& kernel,
|
||||
const std::vector<size_t>& strides, const String &padMode,
|
||||
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end)
|
||||
{
|
||||
if (padMode == "SAME" || padMode == "VALID")
|
||||
{
|
||||
pads_begin.assign(kernel.size(), 0);
|
||||
pads_end.assign(kernel.size(), 0);
|
||||
}
|
||||
if (padMode == "SAME")
|
||||
{
|
||||
CV_Assert_N(kernel.size() == strides.size(), kernel.size() == inp.size());
|
||||
for (int i = 0; i < pads_begin.size(); i++) {
|
||||
// There are test cases with stride > kernel.
|
||||
if (strides[i] <= kernel[i])
|
||||
{
|
||||
int pad = (kernel[i] - 1 - (inp[i] - 1 + strides[i]) % strides[i]) / 2;
|
||||
pads_begin[i] = pads_end[i] = pad;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
79
3rdparty/opencv-4.5.4/modules/dnn/src/layers/layers_common.hpp
vendored
Normal file
79
3rdparty/opencv-4.5.4/modules/dnn/src/layers/layers_common.hpp
vendored
Normal file
@ -0,0 +1,79 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_DNN_LAYERS_LAYERS_COMMON_HPP__
|
||||
#define __OPENCV_DNN_LAYERS_LAYERS_COMMON_HPP__
|
||||
#include <opencv2/dnn.hpp>
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
#define CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||
// dispatched AVX/AVX2 optimizations
|
||||
#include "./layers_common.simd.hpp"
|
||||
#include "layers/layers_common.simd_declarations.hpp"
|
||||
#undef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
#include "../ocl4dnn/include/ocl4dnn.hpp"
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
void getConvolutionKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, std::vector<size_t>& pads_begin,
|
||||
std::vector<size_t>& pads_end, std::vector<size_t>& strides, std::vector<size_t>& dilations,
|
||||
cv::String &padMode, std::vector<size_t>& adjust_pads);
|
||||
|
||||
void getPoolingKernelParams(const LayerParams ¶ms, std::vector<size_t>& kernel, std::vector<bool>& globalPooling,
|
||||
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end, std::vector<size_t>& strides, cv::String &padMode);
|
||||
|
||||
void getConvPoolOutParams(const std::vector<int>& inp, const std::vector<size_t>& kernel,
|
||||
const std::vector<size_t>& stride, const String &padMode,
|
||||
const std::vector<size_t>& dilation, std::vector<int>& out);
|
||||
|
||||
void getConvPoolPaddings(const std::vector<int>& inp, const std::vector<size_t>& kernel,
|
||||
const std::vector<size_t>& strides, const String &padMode,
|
||||
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
1356
3rdparty/opencv-4.5.4/modules/dnn/src/layers/layers_common.simd.hpp
vendored
Normal file
1356
3rdparty/opencv-4.5.4/modules/dnn/src/layers/layers_common.simd.hpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
530
3rdparty/opencv-4.5.4/modules/dnn/src/layers/lrn_layer.cpp
vendored
Normal file
530
3rdparty/opencv-4.5.4/modules/dnn/src/layers/lrn_layer.cpp
vendored
Normal file
@ -0,0 +1,530 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
#include "../op_halide.hpp"
|
||||
#include "../op_inf_engine.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
#include "../op_vkcom.hpp"
|
||||
|
||||
#include "opencv2/imgproc.hpp"
|
||||
#include "opencv2/dnn/shape_utils.hpp"
|
||||
#include "opencv2/core/hal/hal.hpp"
|
||||
#include <algorithm>
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
using namespace cv::dnn::ocl4dnn;
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/lrn.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
class LRNLayerImpl CV_FINAL : public LRNLayer
|
||||
{
|
||||
public:
|
||||
LRNLayerImpl(const LayerParams& params)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
type = -1;
|
||||
String nrmType = params.get<String>("norm_region", "ACROSS_CHANNELS");
|
||||
if (nrmType == "ACROSS_CHANNELS")
|
||||
type = CHANNEL_NRM;
|
||||
else if (nrmType == "WITHIN_CHANNEL")
|
||||
type = SPATIAL_NRM;
|
||||
else
|
||||
CV_Error(Error::StsBadArg, "Unknown region type \"" + nrmType + "\"");
|
||||
|
||||
size = params.get<int>("local_size", 5);
|
||||
if (size % 2 != 1 || size <= 0)
|
||||
CV_Error(Error::StsBadArg, "LRN layer supports only positive odd values for local_size");
|
||||
|
||||
alpha = params.get<double>("alpha", 1);
|
||||
beta = params.get<double>("beta", 0.75);
|
||||
bias = params.get<double>("bias", 1);
|
||||
normBySize = params.get<bool>("norm_by_size", true);
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
Ptr<OCL4DNNLRN<float> > lrnOp;
|
||||
#endif
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) {
|
||||
return bias == (int)bias;
|
||||
}
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
|
||||
return bias == (int)bias;
|
||||
}
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CUDA ||
|
||||
backendId == DNN_BACKEND_HALIDE ||
|
||||
(backendId == DNN_BACKEND_VKCOM && haveVulkan() && (size % 2 == 1) && (type == CHANNEL_NRM));
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE
|
||||
{
|
||||
lrnOp.release();
|
||||
}
|
||||
|
||||
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
bool use_half = (inps.depth() == CV_16S);
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
if (lrnOp.empty())
|
||||
{
|
||||
OCL4DNNLRNConfig config;
|
||||
config.lrn_type = type == CHANNEL_NRM ?
|
||||
LRNParameter_NormRegion_ACROSS_CHANNELS :
|
||||
LRNParameter_NormRegion_WITHIN_CHANNEL;
|
||||
|
||||
CHECK_EQ(size % 2, 1)<< "LRN only supports odd values for local_size";
|
||||
config.local_size = size;
|
||||
config.alpha = alpha;
|
||||
config.beta = beta;
|
||||
config.k = bias;
|
||||
CHECK_EQ(4, inputs[0].dims) << "Input must have 4 axes, "
|
||||
<< "corresponding to (num, channels, height, width)";
|
||||
config.batch_size = inputs[0].size[0];
|
||||
config.channels = inputs[0].size[1];
|
||||
config.height = inputs[0].size[2];
|
||||
config.width = inputs[0].size[3];
|
||||
config.norm_by_size = normBySize;
|
||||
config.use_half = use_half;
|
||||
|
||||
lrnOp = Ptr<OCL4DNNLRN<float> >(new OCL4DNNLRN<float>(config));
|
||||
}
|
||||
|
||||
if (!lrnOp->Forward(inputs[0], outputs[0]))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_Assert(inputs_arr.total() == outputs_arr.total());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
CV_Assert(inputs.size() == outputs.size());
|
||||
|
||||
for (int i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
CV_Assert(inputs[i].dims == 4);
|
||||
|
||||
Mat &src = inputs[i];
|
||||
Mat &dst = outputs[i];
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case CHANNEL_NRM:
|
||||
channelNormalization(src, dst);
|
||||
break;
|
||||
case SPATIAL_NRM:
|
||||
spatialNormalization(src, dst);
|
||||
break;
|
||||
default:
|
||||
CV_Error(Error::StsNotImplemented, "Unimplemented mode of LRN layer");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class ChannelLRN : public ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
ChannelLRN(const float* src, float* dst, int channels, int ksize,
|
||||
float alpha1, float bias1, float beta1,
|
||||
size_t planeSize, int nsamples, int nstripes)
|
||||
{
|
||||
src_ = src; dst_ = dst;
|
||||
channels_ = channels;
|
||||
ksize_ = ksize;
|
||||
alpha1_ = alpha1; bias1_ = bias1; beta1_ = beta1;
|
||||
planeSize_ = planeSize; nsamples_ = nsamples; nstripes_ = nstripes;
|
||||
}
|
||||
|
||||
void operator()(const Range& r) const CV_OVERRIDE
|
||||
{
|
||||
int nsamples = nsamples_, nstripes = nstripes_;
|
||||
size_t planeSize = planeSize_, planeSize_n = planeSize * nsamples;
|
||||
size_t elemsPerStripe = (planeSize_n + nstripes - 1)/nstripes;
|
||||
size_t rstart = r.start*elemsPerStripe;
|
||||
size_t rend = r.end == nstripes ? planeSize_n : r.end*elemsPerStripe;
|
||||
rstart = std::min(rstart, planeSize_n);
|
||||
rend = std::min(rend, planeSize_n);
|
||||
float alpha1 = alpha1_, bias1 = bias1_, beta1 = beta1_;
|
||||
int k, channels = channels_, ksize = ksize_;
|
||||
|
||||
AutoBuffer<float> buf_((channels + ksize + 1)*2);
|
||||
float* acc = buf_.data();
|
||||
float* buf = acc + channels + ksize + 1;
|
||||
for( k = 0; k <= ksize; k++ )
|
||||
buf[-k-1] = buf[channels + k] = 0.f;
|
||||
|
||||
for( size_t ofs = rstart; ofs < rend; )
|
||||
{
|
||||
int sampleIdx = (int)(ofs/planeSize);
|
||||
if( sampleIdx >= nsamples )
|
||||
break;
|
||||
size_t ofs0 = ofs - sampleIdx*planeSize;
|
||||
size_t ofs1 = std::min(planeSize - ofs0, rend - ofs) + ofs;
|
||||
const float* src = src_ + sampleIdx*planeSize*channels + ofs0;
|
||||
float* dst = dst_ + sampleIdx*planeSize*channels + ofs0;
|
||||
|
||||
for( ; ofs < ofs1; ofs++, src++, dst++ )
|
||||
{
|
||||
for( k = 0; k < channels; k++ )
|
||||
buf[k] = src[k*planeSize];
|
||||
float s = 0;
|
||||
for( k = 0; k < ksize; k++ )
|
||||
s += buf[k]*buf[k];
|
||||
for( k = 0; k < channels; k++ )
|
||||
{
|
||||
float x1 = buf[k + ksize];
|
||||
float x0 = buf[k - ksize - 1];
|
||||
s = std::max(s + (x1 + x0)*(x1 - x0), 0.f);
|
||||
acc[k] = (float)(alpha1*s + bias1);
|
||||
}
|
||||
|
||||
hal::log32f(acc, acc, channels);
|
||||
for( k = 0; k < channels; k++ )
|
||||
acc[k] *= beta1;
|
||||
hal::exp32f(acc, acc, channels);
|
||||
|
||||
for( k = 0; k < channels; k++ )
|
||||
dst[k*planeSize] = buf[k]*acc[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const float* src_;
|
||||
float* dst_;
|
||||
float alpha1_, bias1_, beta1_;
|
||||
size_t planeSize_;
|
||||
int channels_, ksize_, nsamples_, nstripes_;
|
||||
};
|
||||
|
||||
void channelNormalization(Mat &srcBlob, Mat &dstBlob)
|
||||
{
|
||||
int num = srcBlob.size[0];
|
||||
int channels = srcBlob.size[1];
|
||||
int ksize = (size - 1) / 2;
|
||||
int sizeNormFactor = normBySize ? size : 1;
|
||||
size_t planeSize = srcBlob.size[2]*srcBlob.size[3];
|
||||
|
||||
int nstripes = std::max(getNumThreads(), 1);
|
||||
|
||||
ChannelLRN clrn(srcBlob.ptr<float>(), dstBlob.ptr<float>(), channels,
|
||||
ksize, alpha/sizeNormFactor, bias, -beta, planeSize, num, nstripes);
|
||||
parallel_for_(Range(0, nstripes), clrn, nstripes);
|
||||
}
|
||||
|
||||
void sqrBoxFilter_(const Mat &src, Mat &dst)
|
||||
{
|
||||
Mat srcRawWrapper(src.rows, src.cols, src.type(), src.data, src.step[0]);
|
||||
cv::sqrBoxFilter(srcRawWrapper, dst, dst.depth(), Size(size, size), Point(-1, -1), false, BORDER_CONSTANT);
|
||||
}
|
||||
|
||||
void spatialNormalization(Mat &srcBlob, Mat &dstBlob)
|
||||
{
|
||||
int num = srcBlob.size[0];
|
||||
int channels = srcBlob.size[1];
|
||||
int sizeNormFactor = normBySize ? size*size : 1;
|
||||
|
||||
Mat srcMat = srcBlob;
|
||||
Mat dstMat = dstBlob;
|
||||
|
||||
for (int n = 0; n < num; n++)
|
||||
{
|
||||
for (int cn = 0; cn < channels; cn++)
|
||||
{
|
||||
Mat src = getPlane(srcMat, n, cn);
|
||||
Mat dst = getPlane(dstMat, n, cn);
|
||||
|
||||
sqrBoxFilter_(src, dst);
|
||||
|
||||
dst.convertTo(dst, dst.type(), alpha/sizeNormFactor, bias);
|
||||
cv::pow(dst, beta, dst);
|
||||
cv::divide(src, dst, dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
|
||||
cuda4dnn::LRNType type_;
|
||||
if (type == CHANNEL_NRM)
|
||||
type_ = cuda4dnn::LRNType::ACROSS_CHANNELS;
|
||||
else if (type == SPATIAL_NRM)
|
||||
type_ = cuda4dnn::LRNType::WITHIN_CHANNEL;
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Unknown normalization region");
|
||||
|
||||
float alphaSize = alpha;
|
||||
if (!normBySize) {
|
||||
switch (type) {
|
||||
case CHANNEL_NRM: alphaSize = alpha * size; break;
|
||||
case SPATIAL_NRM: alphaSize = alpha * size * size; break;
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t largestInputSize = 0;
|
||||
for(auto& wrapper : inputs) {
|
||||
auto input_wrapper = wrapper.dynamicCast<CUDABackendWrapper>();
|
||||
auto shape = input_wrapper->getShape();
|
||||
largestInputSize = std::max<std::size_t>(
|
||||
largestInputSize,
|
||||
std::accumulate(std::begin(shape), std::end(shape), 1, std::multiplies<int>())
|
||||
);
|
||||
}
|
||||
|
||||
return make_cuda_node<cuda4dnn::LRNOp>(preferableTarget,
|
||||
std::move(context->cudnn_handle), type_, size, alphaSize, beta, bias, largestInputSize);
|
||||
}
|
||||
#endif
|
||||
|
||||
virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_VULKAN
|
||||
std::shared_ptr<vkcom::OpBase> op(new vkcom::OpLRN(size / 2, bias, alpha, beta, normBySize));
|
||||
return Ptr<BackendNode>(new VkComBackendNode(inputs, op));
|
||||
#endif
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_HALIDE
|
||||
float alphaSize = alpha;
|
||||
if (normBySize)
|
||||
alphaSize /= (type == CHANNEL_NRM ? size : size * size);
|
||||
int width, height, channels, numImgs;
|
||||
Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
|
||||
getCanonicalSize(inputBuffer, &width, &height, &channels, &numImgs);
|
||||
|
||||
Halide::Var x("x"), y("y"), c("c"), n("n");
|
||||
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
|
||||
Halide::Func padded_sq(name + "_padded_sq");
|
||||
Halide::Func sq("sq");
|
||||
sq(x, y, c, n) = inputBuffer(x, y, c, n) * inputBuffer(x, y, c, n);
|
||||
|
||||
Halide::Func bounded =
|
||||
Halide::BoundaryConditions::constant_exterior(sq, 0, 0, width,
|
||||
0, height,
|
||||
0, channels,
|
||||
0, numImgs);
|
||||
padded_sq(x, y, c, n) = bounded(x, y, c, n);
|
||||
|
||||
Halide::Expr base;
|
||||
if (type == CHANNEL_NRM)
|
||||
{
|
||||
Halide::RDom r((1 - size) / 2, size);
|
||||
base = alphaSize * sum(padded_sq(x, y, c + r, n));
|
||||
}
|
||||
else // SPATIAL_NRM
|
||||
{
|
||||
Halide::RDom r((1 - size) / 2, size, (1 - size) / 2, size);
|
||||
base = alphaSize * sum(padded_sq(x + r.x, y + r.y, c, n));
|
||||
}
|
||||
base += static_cast<float>(bias);
|
||||
top(x, y, c, n) = inputBuffer(x, y, c, n) / pow(base, beta);
|
||||
return Ptr<BackendNode>(new HalideBackendNode({ padded_sq, top }));
|
||||
#endif // HAVE_HALIDE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
virtual void applyHalideScheduler(Ptr<BackendNode>& node,
|
||||
const std::vector<Mat*> &inputs,
|
||||
const std::vector<Mat> &outputs,
|
||||
int targetId) const CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_HALIDE
|
||||
if (targetId != DNN_TARGET_CPU)
|
||||
{
|
||||
Layer::applyHalideScheduler(node, inputs, outputs, targetId);
|
||||
return;
|
||||
}
|
||||
int outW, outH, outC, outN;
|
||||
getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN);
|
||||
|
||||
Halide::Var x("x"), y("y"), c("c"), n("n"), yo("yo"), yi("yi"), tile("tile");
|
||||
Halide::Func& top = node.dynamicCast<HalideBackendNode>()->funcs[1];
|
||||
Halide::Func& padded_sq = node.dynamicCast<HalideBackendNode>()->funcs[0];
|
||||
|
||||
if (outW < 8 || outH <= 2)
|
||||
return;
|
||||
|
||||
top.reorder(x, c, y, n)
|
||||
.split(y, yo, yi, 2)
|
||||
.fuse(yo, n, tile)
|
||||
.parallel(tile)
|
||||
.unroll(yi)
|
||||
.vectorize(x, 8);
|
||||
padded_sq.store_at(top, tile)
|
||||
.compute_at(top, yi);
|
||||
#endif // HAVE_HALIDE
|
||||
}
|
||||
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
float alphaSize = alpha;
|
||||
if (!normBySize)
|
||||
alphaSize *= (type == SPATIAL_NRM ? size*size : size);
|
||||
|
||||
InferenceEngine::Builder::NormLayer ieLayer(name);
|
||||
ieLayer.setSize(size);
|
||||
ieLayer.setAlpha(alphaSize);
|
||||
ieLayer.setBeta(beta);
|
||||
ieLayer.setAcrossMaps(type == CHANNEL_NRM);
|
||||
|
||||
InferenceEngine::Builder::Layer l = ieLayer;
|
||||
l.getParameters()["k"] = bias;
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(l));
|
||||
}
|
||||
#endif // HAVE_DNN_IE_NN_BUILDER_2019
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
float alphaSize = alpha;
|
||||
if (!normBySize)
|
||||
alphaSize *= (type == SPATIAL_NRM ? size*size : size);
|
||||
|
||||
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
std::vector<int64_t> axes;
|
||||
if (type != SPATIAL_NRM) {
|
||||
axes = {1};
|
||||
} else {
|
||||
axes.resize(ieInpNode->get_shape().size() - 2);
|
||||
std::iota(axes.begin(), axes.end(), 2);
|
||||
}
|
||||
auto ngraph_axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{axes.size()}, axes.data());
|
||||
auto lrn = std::make_shared<ngraph::op::LRN>(ieInpNode, ngraph_axes, alphaSize, beta, bias, size);
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(lrn));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
|
||||
const std::vector<MatShape> &outputs) const CV_OVERRIDE
|
||||
{
|
||||
CV_UNUSED(outputs); // suppress unused variable warning
|
||||
CV_Assert(inputs.size() > 0);
|
||||
long flops = 0;
|
||||
|
||||
for(int i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
if (type == CHANNEL_NRM)
|
||||
{
|
||||
int channels = inputs[i][1];
|
||||
int ksize = (size - 1) / 2;
|
||||
|
||||
flops += inputs[i][0]*(std::min(ksize, channels)*2*total(inputs[i], 2) + channels*4*total(inputs[i], 2));
|
||||
|
||||
if (ksize < channels)
|
||||
{
|
||||
flops += (size + 2*(channels - size))*total(inputs[i], 2);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
flops += total(inputs[i])*(2*size*size + 2);
|
||||
}
|
||||
}
|
||||
return flops;
|
||||
}
|
||||
|
||||
private:
|
||||
enum Type
|
||||
{
|
||||
CHANNEL_NRM,
|
||||
SPATIAL_NRM
|
||||
};
|
||||
};
|
||||
|
||||
Ptr<LRNLayer> LRNLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<LRNLayer>(new LRNLayerImpl(params));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
196
3rdparty/opencv-4.5.4/modules/dnn/src/layers/max_unpooling_layer.cpp
vendored
Normal file
196
3rdparty/opencv-4.5.4/modules/dnn/src/layers/max_unpooling_layer.cpp
vendored
Normal file
@ -0,0 +1,196 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// Copyright (C) 2016, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
/*
|
||||
Implementation of Batch Normalization layer.
|
||||
*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
#include "../op_halide.hpp"
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/max_unpooling.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
class MaxUnpoolLayerImpl CV_FINAL : public MaxUnpoolLayer
|
||||
{
|
||||
public:
|
||||
MaxUnpoolLayerImpl(const LayerParams& params)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
poolKernel = Size(params.get<int>("pool_k_w"), params.get<int>("pool_k_h"));
|
||||
poolPad = Size(params.get<int>("pool_pad_w"), params.get<int>("pool_pad_h"));
|
||||
poolStride = Size(params.get<int>("pool_stride_w"), params.get<int>("pool_stride_h"));
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CUDA ||
|
||||
(backendId == DNN_BACKEND_HALIDE && haveHalide() && !poolPad.width && !poolPad.height);
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(inputs.size() == 2 || inputs.size() == 3);
|
||||
CV_Assert(total(inputs[0]) == total(inputs[1]));
|
||||
|
||||
MatShape outShape;
|
||||
if (inputs.size() == 2)
|
||||
{
|
||||
outShape = inputs[0];
|
||||
outShape[2] = (outShape[2] - 1) * poolStride.height + poolKernel.height - 2 * poolPad.height;
|
||||
outShape[3] = (outShape[3] - 1) * poolStride.width + poolKernel.width - 2 * poolPad.width;
|
||||
}
|
||||
else
|
||||
outShape = inputs[2];
|
||||
|
||||
outputs.clear();
|
||||
outputs.push_back(outShape);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
CV_Assert(inputs.size() == 2 || inputs.size() == 3);
|
||||
Mat& input = inputs[0];
|
||||
Mat& indices = inputs[1];
|
||||
|
||||
CV_Assert(input.total() == indices.total());
|
||||
CV_Assert(input.size[0] == 1);
|
||||
CV_Assert(input.isContinuous());
|
||||
|
||||
for(int i_n = 0; i_n < outputs.size(); i_n++)
|
||||
{
|
||||
Mat& outBlob = outputs[i_n];
|
||||
outBlob.setTo(0);
|
||||
CV_Assert(input.size[1] == outBlob.size[1]);
|
||||
int outPlaneTotal = outBlob.size[2]*outBlob.size[3];
|
||||
|
||||
for (int i_c = 0; i_c < input.size[1]; i_c++)
|
||||
{
|
||||
Mat outPlane = getPlane(outBlob, 0, i_c);
|
||||
int wh_area = input.size[2]*input.size[3];
|
||||
const float* inptr = input.ptr<float>(0, i_c);
|
||||
const float* idxptr = indices.ptr<float>(0, i_c);
|
||||
float* outptr = outPlane.ptr<float>();
|
||||
|
||||
for(int i_wh = 0; i_wh < wh_area; i_wh++)
|
||||
{
|
||||
int index = idxptr[i_wh];
|
||||
if (!(0 <= index && index < outPlaneTotal))
|
||||
{
|
||||
std::cerr
|
||||
<< "i_n=" << i_n << std::endl
|
||||
<< "i_c=" << i_c << std::endl
|
||||
<< "i_wh=" << i_wh << std::endl
|
||||
<< "index=" << index << std::endl
|
||||
<< "maxval=" << inptr[i_wh] << std::endl
|
||||
<< "outPlaneTotal=" << outPlaneTotal << std::endl
|
||||
<< "input.size=" << input.size << std::endl
|
||||
<< "indices.size=" << indices.size << std::endl
|
||||
<< "outBlob=" << outBlob.size << std::endl
|
||||
;
|
||||
CV_Assert(0 <= index && index < outPlaneTotal);
|
||||
}
|
||||
outptr[index] = inptr[i_wh];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
|
||||
cuda4dnn::MaxUnpoolingConfiguration config;
|
||||
auto& window_size = config.window_size;
|
||||
window_size.resize(2);
|
||||
window_size[0] = poolKernel.height;
|
||||
window_size[1] = poolKernel.width;
|
||||
|
||||
auto& strides = config.strides;
|
||||
strides.resize(2);
|
||||
strides[0] = poolStride.height;
|
||||
strides[1] = poolStride.width;
|
||||
|
||||
auto& pads_begin = config.pads_begin;
|
||||
pads_begin.resize(2);
|
||||
pads_begin[0] = poolPad.height;
|
||||
pads_begin[1] = poolPad.width;
|
||||
|
||||
return make_cuda_node<cuda4dnn::MaxUnpoolingOp>(preferableTarget, std::move(context->stream), config);
|
||||
}
|
||||
#endif
|
||||
|
||||
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_HALIDE
|
||||
// Meaningless operation if false because if kernel > stride
|
||||
// it is not deterministic and if kernel < stride we just
|
||||
// skip a part of input data (you'd better change your model).
|
||||
if (poolKernel.width != poolStride.width ||
|
||||
poolKernel.height != poolStride.height)
|
||||
CV_Error(cv::Error::StsNotImplemented,
|
||||
"Halide backend for maximum unpooling "
|
||||
"is not support cases when kernel != stride");
|
||||
|
||||
Halide::Var x("x"), y("y"), c("c"), n("n");
|
||||
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
|
||||
Halide::Buffer<float> inputBuffer = halideBuffer(input[0]);
|
||||
Halide::Buffer<float> indices = halideBuffer(input[1]);
|
||||
|
||||
Halide::Expr pooledX = x / poolKernel.width;
|
||||
Halide::Expr pooledY = y / poolKernel.height;
|
||||
|
||||
const int outW = inputBuffer.width() * poolKernel.width;
|
||||
top(x, y, c, n) = select(y * outW + x == indices(pooledX, pooledY, c, n),
|
||||
inputBuffer(pooledX, pooledY, c, n), 0.0f);
|
||||
return Ptr<BackendNode>(new HalideBackendNode(top));
|
||||
#endif // HAVE_HALIDE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
};
|
||||
|
||||
Ptr<MaxUnpoolLayer> MaxUnpoolLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<MaxUnpoolLayer>(new MaxUnpoolLayerImpl(params));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
463
3rdparty/opencv-4.5.4/modules/dnn/src/layers/mvn_layer.cpp
vendored
Normal file
463
3rdparty/opencv-4.5.4/modules/dnn/src/layers/mvn_layer.cpp
vendored
Normal file
@ -0,0 +1,463 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_inf_engine.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
#include "../ocl4dnn/include/math_functions.hpp"
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/mvn.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
class MVNLayerImpl CV_FINAL : public MVNLayer
|
||||
{
|
||||
public:
|
||||
MVNLayerImpl(const LayerParams& params)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
normVariance = params.get<bool>("normalize_variance", true);
|
||||
acrossChannels = params.get<bool>("across_channels", false);
|
||||
eps = params.get<double>("eps", 1e-9);
|
||||
fuse_batch_norm = false;
|
||||
fuse_relu = false;
|
||||
relu_slope = 0.f;
|
||||
zeroDev = false;
|
||||
}
|
||||
|
||||
Mat scale, shift;
|
||||
#ifdef HAVE_OPENCL
|
||||
UMat umat_scale, umat_shift;
|
||||
#endif
|
||||
bool fuse_batch_norm;
|
||||
|
||||
Ptr<ReLULayer> activ_relu;
|
||||
float relu_slope;
|
||||
bool fuse_relu;
|
||||
bool zeroDev; // TODO: Doesn't considered in Intel's Inference Engine backend.
|
||||
bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
|
||||
{
|
||||
if (!layer.empty() && !fuse_relu && !fuse_batch_norm)
|
||||
{
|
||||
layer->getScaleShift(scale, shift);
|
||||
fuse_batch_norm = !scale.empty() || !shift.empty();
|
||||
return fuse_batch_norm;
|
||||
}
|
||||
|
||||
if (!layer.empty() && preferableTarget == DNN_TARGET_OPENCL)
|
||||
{
|
||||
activ_relu = layer.dynamicCast<ReLULayer>();
|
||||
if( !activ_relu.empty() )
|
||||
relu_slope = activ_relu->negativeSlope;
|
||||
}
|
||||
fuse_relu = !activ_relu.empty();
|
||||
return fuse_relu;
|
||||
}
|
||||
|
||||
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
|
||||
{
|
||||
std::vector<Mat> inputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
int splitDim = (acrossChannels) ? 1 : 2;
|
||||
int i, newRows = 1;
|
||||
for( i = 0; i < splitDim; i++ )
|
||||
newRows *= inputs[0].size[i];
|
||||
zeroDev = inputs[0].total() == newRows;
|
||||
#ifdef HAVE_OPENCL
|
||||
umat_scale.release();
|
||||
umat_shift.release();
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
||||
{
|
||||
bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL;
|
||||
return !zeroDev && (!isMyriad || eps <= 1e-7f);
|
||||
}
|
||||
#endif
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
return true;
|
||||
#endif
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool fast_forward_ocl(std::vector<UMat> &inputs, std::vector<UMat> &outputs)
|
||||
{
|
||||
if (umat_scale.empty() && !scale.empty())
|
||||
scale.copyTo(umat_scale);
|
||||
if (umat_shift.empty() && !shift.empty())
|
||||
shift.copyTo(umat_shift);
|
||||
UMat& bnorm_weight = umat_scale;
|
||||
UMat& bnorm_bias = umat_shift;
|
||||
|
||||
const unsigned LOCAL_SIZE = 128;
|
||||
bool use_half = (inputs[0].depth() == CV_16S);
|
||||
String opts = format(" -DT=%s -DT4=%s -Dconvert_T=%s -DLOCAL_SIZE=%u", use_half ? "half" : "float",
|
||||
use_half ? "half4" : "float4", use_half ? "convert_half4" : "convert_float4",
|
||||
LOCAL_SIZE
|
||||
);
|
||||
|
||||
int splitDim = (acrossChannels) ? 1 : 2;
|
||||
for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
|
||||
{
|
||||
UMat &inpMat = inputs[inpIdx];
|
||||
UMat &outMat = outputs[inpIdx];
|
||||
int newRows = total(shape(inpMat), 0, splitDim);
|
||||
CV_Assert(newRows != 0);
|
||||
|
||||
MatShape s = shape(newRows, inpMat.total() / newRows);
|
||||
UMat meanMat = UMat(s[0], 1, (use_half) ? CV_16S : CV_32F);
|
||||
UMat tmpMat = UMat(s[0], s[1], CV_32F);
|
||||
float alpha = 1.0f / s[1];
|
||||
|
||||
String buildopt = "-DNUM=4" + opts;
|
||||
ocl::Kernel k("mean_fuse4", ocl::dnn::mvn_oclsrc, buildopt + " -DKERNEL_MEAN_FUSE");
|
||||
size_t localsize[] = { LOCAL_SIZE };
|
||||
size_t globalsize[] = { (size_t)s[0] / 4 * localsize[0] };
|
||||
|
||||
int argId = 0;
|
||||
k.set(argId++, ocl::KernelArg::PtrReadOnly(inpMat));
|
||||
k.set(argId++, (int)s[1]);
|
||||
k.set(argId++, alpha);
|
||||
k.set(argId++, ocl::KernelArg::PtrWriteOnly(meanMat));
|
||||
k.set(argId++, ocl::KernelArg::PtrWriteOnly(tmpMat));
|
||||
bool ret = k.run(1, globalsize, localsize, false);
|
||||
if (!ret)
|
||||
return false;
|
||||
|
||||
buildopt += format(" %s %s", (fuse_batch_norm) ? "-DFUSE_BATCH_NORM" : "",
|
||||
(fuse_relu) ? "-DFUSE_RELU" : "");
|
||||
|
||||
ocl::Kernel k1("mvn_fuse4", ocl::dnn::mvn_oclsrc, buildopt + " -DKERNEL_MVN_FUSE");
|
||||
argId = 0;
|
||||
k1.set(argId++, ocl::KernelArg::PtrReadOnly(tmpMat));
|
||||
k1.set(argId++, ocl::KernelArg::PtrReadOnly(inpMat));
|
||||
k1.set(argId++, ocl::KernelArg::PtrReadOnly(meanMat));
|
||||
k1.set(argId++, (int)s[1]);
|
||||
k1.set(argId++, (float)alpha);
|
||||
k1.set(argId++, (float)eps);
|
||||
k1.set(argId++, (float)relu_slope);
|
||||
k1.set(argId++, ocl::KernelArg::PtrReadOnly(bnorm_weight));
|
||||
k1.set(argId++, ocl::KernelArg::PtrReadOnly(bnorm_bias));
|
||||
k1.set(argId++, ocl::KernelArg::PtrWriteOnly(outMat));
|
||||
ret = k1.run_(1, globalsize, localsize, false);
|
||||
if (!ret)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
|
||||
{
|
||||
if (umat_scale.empty() && !scale.empty())
|
||||
scale.copyTo(umat_scale);
|
||||
if (umat_shift.empty() && !shift.empty())
|
||||
shift.copyTo(umat_shift);
|
||||
UMat& bnorm_weight = umat_scale;
|
||||
UMat& bnorm_bias = umat_shift;
|
||||
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inputs_.getUMatVector(inputs);
|
||||
outputs_.getUMatVector(outputs);
|
||||
|
||||
int splitDim = (acrossChannels) ? 1 : 2;
|
||||
int row_size = total(shape(inputs[0]), 0, splitDim);
|
||||
int plane_size = total(shape(inputs[0]), splitDim);
|
||||
if (normVariance && (row_size % 4 == 0) && (plane_size % 4 == 0))
|
||||
return fast_forward_ocl(inputs, outputs);
|
||||
|
||||
if (inputs[0].depth() == CV_16S)
|
||||
return false;
|
||||
|
||||
String opts = format(" -DT=float -DT4=float4 -Dconvert_T=convert_float4");
|
||||
|
||||
for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
|
||||
{
|
||||
UMat &inpMat = inputs[inpIdx];
|
||||
UMat &outMat = outputs[inpIdx];
|
||||
int newRows = total(shape(inpMat), 0, splitDim);
|
||||
CV_Assert(newRows != 0);
|
||||
|
||||
MatShape s = shape(newRows, inpMat.total() / newRows);
|
||||
UMat oneMat = UMat::ones(s[1], 1, CV_32F);
|
||||
UMat meanMat = UMat(s[0], 1, CV_32F);
|
||||
UMat devMat = UMat(s[0], 1, CV_32F);
|
||||
UMat tmpMat = UMat(s[0], s[1], CV_32F);
|
||||
float alpha = 1.0f / s[1];
|
||||
|
||||
bool ret = ocl4dnn::ocl4dnnGEMV<float>(ocl4dnn::CblasNoTrans, s[0], s[1], alpha,
|
||||
inpMat, 0, oneMat, 0, 0.0f, meanMat, 0);
|
||||
if (!ret)
|
||||
return false;
|
||||
|
||||
int number = (s[1] % 8 == 0) ? 8 : ((s[1] % 4 == 0) ? 4 : 1);
|
||||
size_t global[] = { (size_t)s[0], (size_t)(s[1] / number) };
|
||||
String buildopt = format("-DNUM=%d", number) + opts;
|
||||
if (normVariance)
|
||||
{
|
||||
String kname = format("calc_mean%d", number);
|
||||
ocl::Kernel kernel(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt + " -DKERNEL_MEAN");
|
||||
if (kernel.empty())
|
||||
return false;
|
||||
|
||||
kernel.set(0, ocl::KernelArg::PtrReadOnly(inpMat));
|
||||
kernel.set(1, (int)s[0]);
|
||||
kernel.set(2, (int)s[1]);
|
||||
kernel.set(3, ocl::KernelArg::PtrReadOnly(meanMat));
|
||||
kernel.set(4, ocl::KernelArg::PtrWriteOnly(tmpMat));
|
||||
ret = kernel.run(2, global, NULL, false);
|
||||
if (!ret)
|
||||
return false;
|
||||
|
||||
ret = ocl4dnn::ocl4dnnGEMV<float>(ocl4dnn::CblasNoTrans, s[0], s[1], alpha,
|
||||
tmpMat, 0, oneMat, 0, 0.0f, devMat, 0);
|
||||
if (!ret)
|
||||
return false;
|
||||
}
|
||||
|
||||
String kname = format("mvn%d", number);
|
||||
buildopt += format("%s%s%s -DKERNEL_MVN", (normVariance) ? " -DNORM_VARIANCE" : "",
|
||||
(fuse_batch_norm) ? " -DFUSE_BATCH_NORM" : "",
|
||||
(fuse_relu) ? " -DFUSE_RELU" : "");
|
||||
ocl::Kernel kernel1(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt);
|
||||
if (kernel1.empty())
|
||||
return false;
|
||||
kernel1.set(0, ocl::KernelArg::PtrReadOnly(inpMat));
|
||||
kernel1.set(1, (int)s[0]);
|
||||
kernel1.set(2, (int)s[1]);
|
||||
kernel1.set(3, (float)eps);
|
||||
kernel1.set(4, ocl::KernelArg::PtrReadOnly(meanMat));
|
||||
kernel1.set(5, ocl::KernelArg::PtrReadOnly(devMat));
|
||||
kernel1.set(6, ocl::KernelArg::PtrReadOnly(bnorm_weight));
|
||||
kernel1.set(7, ocl::KernelArg::PtrReadOnly(bnorm_bias));
|
||||
kernel1.set(8, (int)inpMat.size[1]);
|
||||
kernel1.set(9, (float)relu_slope);
|
||||
kernel1.set(10, ocl::KernelArg::PtrWriteOnly(outMat));
|
||||
ret = kernel1.run(2, global, NULL, false);
|
||||
if (!ret)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs, internals;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
internals_arr.getMatVector(internals);
|
||||
|
||||
for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
|
||||
{
|
||||
Mat &inpBlob = inputs[inpIdx];
|
||||
Mat &outBlob = outputs[inpIdx];
|
||||
|
||||
int splitDim = (acrossChannels) ? 1 : 2;
|
||||
int i, newRows = 1;
|
||||
for( i = 0; i < splitDim; i++ )
|
||||
newRows *= inpBlob.size[i];
|
||||
|
||||
Mat inpMat = inpBlob.reshape(1, newRows);
|
||||
Mat outMat = outBlob.reshape(1, newRows);
|
||||
|
||||
if ( inpBlob.total() == newRows )
|
||||
{
|
||||
// MVN is applied to single values at an every row.
|
||||
if (shift.empty())
|
||||
{
|
||||
outBlob.setTo(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
for ( i = 0; i < newRows; i++ )
|
||||
{
|
||||
outMat.row(i).setTo(((float*)shift.data)[i]);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
Scalar mean, dev;
|
||||
for ( i = 0; i < newRows; i++)
|
||||
{
|
||||
Mat inpRow = inpMat.row(i);
|
||||
Mat outRow = outMat.row(i);
|
||||
float weight = 1.f;
|
||||
float bias = 0.f;
|
||||
if (fuse_batch_norm)
|
||||
{
|
||||
weight = i < scale.cols ? ((float*)scale.data)[i] : weight;
|
||||
bias = i < shift.cols ? ((float*)shift.data)[i] : bias;
|
||||
}
|
||||
cv::meanStdDev(inpRow, mean, (normVariance) ? dev : noArray());
|
||||
double alpha = 1;
|
||||
if (normVariance)
|
||||
{
|
||||
alpha = 1 / std::sqrt(eps + dev[0]*dev[0]);
|
||||
}
|
||||
double normalizationScale = 1.0;
|
||||
double normalizationShift = 0.0;
|
||||
if (fuse_batch_norm)
|
||||
{
|
||||
normalizationScale = alpha * weight;
|
||||
normalizationShift = -mean[0] * normalizationScale + bias;
|
||||
}
|
||||
else
|
||||
{
|
||||
normalizationScale = alpha;
|
||||
normalizationShift = -mean[0] * alpha;
|
||||
}
|
||||
inpRow.convertTo(outRow, outRow.type(), normalizationScale, normalizationShift);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
InferenceEngine::Builder::MVNLayer ieLayer(name);
|
||||
ieLayer.setAcrossChannels(acrossChannels);
|
||||
ieLayer.setNormalize(normVariance);
|
||||
ieLayer.setEpsilon(eps);
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
}
|
||||
#endif // HAVE_DNN_IE_NN_BUILDER_2019
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2021_2)
|
||||
auto mvn = std::make_shared<ngraph::op::MVN>(ieInpNode, acrossChannels, normVariance, eps);
|
||||
#else
|
||||
int64_t start_axis = acrossChannels ? 1 : 2;
|
||||
std::vector<int64_t> axes_v(ieInpNode->get_shape().size() - start_axis);
|
||||
std::iota(axes_v.begin(), axes_v.end(), start_axis);
|
||||
auto axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{axes_v.size()}, axes_v.data());
|
||||
auto mvn = std::make_shared<ngraph::op::v6::MVN>(ieInpNode, axes, normVariance, eps, ngraph::op::MVNEpsMode::INSIDE_SQRT);
|
||||
#endif
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(mvn));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
|
||||
cuda4dnn::MVNConfiguration config;
|
||||
config.split_axis = acrossChannels ? 1 : 2;
|
||||
config.normalize_variance = normVariance;
|
||||
config.epsilon = eps;
|
||||
config.input_shapes.resize(inputs.size());
|
||||
for (int i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
auto wrapper = inputs[i].dynamicCast<CUDABackendWrapper>();
|
||||
auto shape = wrapper->getShape();
|
||||
config.input_shapes[i].assign(std::begin(shape), std::end(shape));
|
||||
}
|
||||
|
||||
return make_cuda_node<cuda4dnn::MVNOp>(preferableTarget, std::move(context->stream), config);
|
||||
}
|
||||
#endif
|
||||
|
||||
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
|
||||
const std::vector<MatShape> &outputs) const CV_OVERRIDE
|
||||
{
|
||||
CV_UNUSED(outputs); // suppress unused variable warning
|
||||
long flops = 0;
|
||||
for(int i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
flops += 6*total(inputs[i]) + 3*total(inputs[i], 0, normVariance ? 2 : 1);
|
||||
}
|
||||
return flops;
|
||||
}
|
||||
};
|
||||
|
||||
Ptr<MVNLayer> MVNLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<MVNLayer>(new MVNLayerImpl(params));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
402
3rdparty/opencv-4.5.4/modules/dnn/src/layers/normalize_bbox_layer.cpp
vendored
Normal file
402
3rdparty/opencv-4.5.4/modules/dnn/src/layers/normalize_bbox_layer.cpp
vendored
Normal file
@ -0,0 +1,402 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
#include "../op_inf_engine.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/normalize_bbox.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv { namespace dnn {
|
||||
|
||||
class NormalizeBBoxLayerImpl CV_FINAL : public NormalizeBBoxLayer
|
||||
{
|
||||
public:
|
||||
NormalizeBBoxLayerImpl(const LayerParams& params)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
pnorm = params.get<float>("p", 2);
|
||||
epsilon = params.get<float>("eps", 1e-10f);
|
||||
acrossSpatial = params.get<bool>("across_spatial", true);
|
||||
startAxis = params.get<int>("start_axis", 1);
|
||||
CV_Assert(!params.has("across_spatial") || !params.has("end_axis"));
|
||||
endAxis = params.get<int>("end_axis", acrossSpatial ? -1 : startAxis);
|
||||
CV_Assert(pnorm > 0);
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
{
|
||||
if (pnorm != 2)
|
||||
return false;
|
||||
|
||||
bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL;
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && isMyriad)
|
||||
return !acrossSpatial;
|
||||
|
||||
return startAxis == 1;
|
||||
}
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
(backendId == DNN_BACKEND_CUDA && (pnorm == 1 || pnorm == 2));
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(inputs.size() == 1);
|
||||
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
|
||||
internals.resize(1, inputs[0]);
|
||||
internals[0][0] = 1; // Batch size.
|
||||
return true;
|
||||
}
|
||||
|
||||
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
|
||||
{
|
||||
std::vector<Mat> inputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
CV_Assert(inputs.size() == 1);
|
||||
endAxis = endAxis == -1 ? (inputs[0].dims - 1) : endAxis;
|
||||
startAxis = startAxis == -1 ? (inputs[0].dims - 1) : startAxis;
|
||||
acrossSpatial = (startAxis == 1 && endAxis == inputs[0].dims - 1);
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
std::vector<UMat> internals;
|
||||
|
||||
if (inputs_.depth() == CV_16S)
|
||||
return false;
|
||||
|
||||
inputs_.getUMatVector(inputs);
|
||||
outputs_.getUMatVector(outputs);
|
||||
internals_.getUMatVector(internals);
|
||||
|
||||
CV_Assert(inputs.size() == 1 && outputs.size() == 1);
|
||||
CV_Assert(inputs[0].total() == outputs[0].total());
|
||||
|
||||
const UMat& inp0 = inputs[0];
|
||||
UMat& buffer = internals[0];
|
||||
startAxis = normalize_axis(startAxis, inp0.dims);
|
||||
endAxis = normalize_axis(endAxis, inp0.dims);
|
||||
|
||||
size_t num = total(shape(inp0.size), 0, startAxis);
|
||||
size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1);
|
||||
size_t planeSize = inp0.total() / (num * numPlanes);
|
||||
MatShape s = shape(1, inputs[0].total());
|
||||
UMat inp = inputs[0].reshape(1, s.size(), &s[0]).reshape(1, num);
|
||||
UMat out = outputs[0].reshape(1, s.size(), &s[0]).reshape(1, num);
|
||||
for (size_t i = 0; i < num; ++i)
|
||||
{
|
||||
s = shape(numPlanes, planeSize);
|
||||
UMat src = inp.row(i).reshape(1, s.size(), &s[0]);
|
||||
UMat dst = out.row(i).reshape(1, s.size(), &s[0]);
|
||||
|
||||
UMat abs_mat;
|
||||
absdiff(src, cv::Scalar::all(0), abs_mat);
|
||||
pow(abs_mat, pnorm, buffer);
|
||||
|
||||
if (planeSize == 1)
|
||||
{
|
||||
// add eps to avoid overflow
|
||||
float absSum = sum(buffer)[0] + epsilon;
|
||||
float norm = pow(absSum, 1.0f / pnorm);
|
||||
multiply(src, 1.0f / norm, dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
Mat norm;
|
||||
reduce(buffer, norm, 0, REDUCE_SUM);
|
||||
norm += epsilon;
|
||||
|
||||
// compute inverted norm to call multiply instead divide
|
||||
cv::pow(norm, -1.0f / pnorm, norm);
|
||||
|
||||
repeat(norm, numPlanes, 1, buffer);
|
||||
multiply(src, buffer, dst);
|
||||
}
|
||||
|
||||
if (!blobs.empty())
|
||||
{
|
||||
// scale the output
|
||||
Mat scale = blobs[0];
|
||||
if (scale.total() == 1)
|
||||
{
|
||||
// _scale: 1 x 1
|
||||
multiply(dst, scale.at<float>(0, 0), dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
// _scale: _channels x 1
|
||||
CV_Assert(scale.total() == numPlanes);
|
||||
repeat(scale, 1, dst.cols, buffer);
|
||||
multiply(dst, buffer, dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs, internals;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
internals_arr.getMatVector(internals);
|
||||
|
||||
CV_Assert(inputs.size() == 1 && outputs.size() == 1);
|
||||
CV_Assert(inputs[0].total() == outputs[0].total());
|
||||
|
||||
const Mat& inp0 = inputs[0];
|
||||
Mat& buffer = internals[0];
|
||||
startAxis = normalize_axis(startAxis, inp0.dims);
|
||||
endAxis = normalize_axis(endAxis, inp0.dims);
|
||||
|
||||
const float* inpData = inp0.ptr<float>();
|
||||
float* outData = outputs[0].ptr<float>();
|
||||
|
||||
size_t num = total(shape(inp0.size), 0, startAxis);
|
||||
size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1);
|
||||
CV_Assert(num * numPlanes != 0);
|
||||
size_t planeSize = inp0.total() / (num * numPlanes);
|
||||
for (size_t n = 0; n < num; ++n)
|
||||
{
|
||||
Mat src = Mat(numPlanes, planeSize, CV_32F, (void*)inpData);
|
||||
Mat dst = Mat(numPlanes, planeSize, CV_32F, (void*)outData);
|
||||
cv::pow(abs(src), pnorm, buffer);
|
||||
|
||||
if (planeSize == 1)
|
||||
{
|
||||
// add eps to avoid overflow
|
||||
float absSum = sum(buffer)[0] + epsilon;
|
||||
float norm = pow(absSum, 1.0f / pnorm);
|
||||
multiply(src, 1.0f / norm, dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
Mat norm;
|
||||
reduce(buffer, norm, 0, REDUCE_SUM);
|
||||
norm += epsilon;
|
||||
|
||||
// compute inverted norm to call multiply instead divide
|
||||
cv::pow(norm, -1.0f / pnorm, norm);
|
||||
|
||||
repeat(norm, numPlanes, 1, buffer);
|
||||
multiply(src, buffer, dst);
|
||||
}
|
||||
|
||||
if (!blobs.empty())
|
||||
{
|
||||
// scale the output
|
||||
Mat scale = blobs[0];
|
||||
if (scale.total() == 1)
|
||||
{
|
||||
// _scale: 1 x 1
|
||||
dst *= scale.at<float>(0, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
// _scale: _channels x 1
|
||||
CV_Assert(scale.total() == numPlanes);
|
||||
repeat(scale, 1, dst.cols, buffer);
|
||||
multiply(dst, buffer, dst);
|
||||
}
|
||||
}
|
||||
inpData += numPlanes * planeSize;
|
||||
outData += numPlanes * planeSize;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
|
||||
{
|
||||
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
|
||||
std::vector<size_t> dims = input->getDims();
|
||||
if (dims.size() == 4)
|
||||
{
|
||||
InferenceEngine::Builder::NormalizeLayer ieLayer(name);
|
||||
|
||||
ieLayer.setChannelShared(false);
|
||||
ieLayer.setAcrossMaps(acrossSpatial);
|
||||
ieLayer.setEpsilon(epsilon);
|
||||
|
||||
InferenceEngine::Builder::Layer l = ieLayer;
|
||||
const int numChannels = dims[1];
|
||||
InferenceEngine::Blob::Ptr weights;
|
||||
if (blobs.empty())
|
||||
{
|
||||
weights = InferenceEngine::make_shared_blob<float>({
|
||||
InferenceEngine::Precision::FP32,
|
||||
{(size_t)numChannels}, InferenceEngine::Layout::C
|
||||
});
|
||||
weights->allocate();
|
||||
|
||||
Mat weightsMat = infEngineBlobToMat(weights).reshape(1, numChannels);
|
||||
Mat(numChannels, 1, CV_32F, Scalar(1)).copyTo(weightsMat);
|
||||
l.getParameters()["channel_shared"] = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(numChannels == blobs[0].total());
|
||||
weights = wrapToInfEngineBlob(blobs[0], {(size_t)numChannels}, InferenceEngine::Layout::C);
|
||||
l.getParameters()["channel_shared"] = blobs[0].total() == 1;
|
||||
}
|
||||
addConstantData("weights", weights, l);
|
||||
l.getParameters()["across_spatial"] = acrossSpatial;
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(l));
|
||||
}
|
||||
else
|
||||
{
|
||||
InferenceEngine::Builder::GRNLayer ieLayer(name);
|
||||
ieLayer.setBeta(epsilon);
|
||||
|
||||
InferenceEngine::Builder::Layer l = ieLayer;
|
||||
l.getParameters()["bias"] = epsilon;
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(l));
|
||||
}
|
||||
}
|
||||
#endif // HAVE_DNN_IE_NN_BUILDER_2019
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
const size_t batch = ieInpNode->get_shape()[0];
|
||||
const size_t numChannels = ieInpNode->get_shape()[1];
|
||||
|
||||
std::vector<int64_t> axes_data;
|
||||
if (!acrossSpatial) {
|
||||
axes_data.push_back(1);
|
||||
} else {
|
||||
axes_data.resize(ieInpNode->get_shape().size() - 1);
|
||||
std::iota(axes_data.begin(), axes_data.end(), 1);
|
||||
}
|
||||
auto axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{axes_data.size()}, axes_data);
|
||||
auto norm = std::make_shared<ngraph::op::v0::NormalizeL2>(ieInpNode, axes, epsilon, ngraph::op::EpsMode::ADD);
|
||||
|
||||
CV_Assert(blobs.empty() || numChannels == blobs[0].total());
|
||||
std::vector<size_t> shape(ieInpNode->get_shape().size(), 1);
|
||||
shape[0] = blobs.empty() ? 1 : batch;
|
||||
shape[1] = numChannels;
|
||||
if (!blobs.empty())
|
||||
{
|
||||
auto weight = std::make_shared<ngraph::op::Constant>(
|
||||
ngraph::element::f32, ngraph::Shape(shape), blobs[0].data);
|
||||
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2021_2)
|
||||
auto mul = std::make_shared<ngraph::op::v1::Multiply>(norm, weight, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
#else
|
||||
auto mul = std::make_shared<ngraph::op::v0::Multiply>(norm, weight, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
#endif
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(mul));
|
||||
}
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(norm));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
|
||||
if(pnorm != 1 && pnorm != 2)
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported normalization mode");
|
||||
|
||||
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
|
||||
auto input_shape = input_wrapper->getShape();
|
||||
|
||||
NormalizeConfiguration<float> config;
|
||||
config.input_shape.assign(std::begin(input_shape), std::end(input_shape));
|
||||
config.axis_start = normalize_axis(startAxis, input_shape.size());
|
||||
config.axis_end = normalize_axis(endAxis, input_shape.size()) + 1; /* +1 because NormalizeOp follows [start, end) convention */
|
||||
config.norm = pnorm;
|
||||
config.eps = epsilon;
|
||||
|
||||
const auto& weightsMat = blobs.empty() ? Mat() : blobs[0];
|
||||
return make_cuda_node<cuda4dnn::NormalizeOp>(preferableTarget, std::move(context->stream), weightsMat, config);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
private:
|
||||
int startAxis, endAxis;
|
||||
};
|
||||
|
||||
|
||||
Ptr<NormalizeBBoxLayer> NormalizeBBoxLayer::create(const LayerParams ¶ms)
|
||||
{
|
||||
return Ptr<NormalizeBBoxLayer>(new NormalizeBBoxLayerImpl(params));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
194
3rdparty/opencv-4.5.4/modules/dnn/src/layers/not_implemented_layer.cpp
vendored
Normal file
194
3rdparty/opencv-4.5.4/modules/dnn/src/layers/not_implemented_layer.cpp
vendored
Normal file
@ -0,0 +1,194 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "../dnn_common.hpp"
|
||||
|
||||
namespace cv { namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
namespace detail {
|
||||
|
||||
class NotImplementedImpl CV_FINAL : public NotImplemented
|
||||
{
|
||||
public:
|
||||
NotImplementedImpl(const LayerParams& params)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
CV_Assert(params.has("type"));
|
||||
std::stringstream ss;
|
||||
ss << "Node for layer '" << params.name << "' of type '" << params.get("type") << "' wasn't initialized.";
|
||||
msg = ss.str();
|
||||
}
|
||||
|
||||
CV_DEPRECATED_EXTERNAL
|
||||
virtual void finalize(const std::vector<Mat*> &input, std::vector<Mat> &output) CV_OVERRIDE
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
virtual void finalize(InputArrayOfArrays inputs, OutputArrayOfArrays outputs) CV_OVERRIDE
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
CV_DEPRECATED_EXTERNAL
|
||||
virtual void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals) CV_OVERRIDE
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
virtual void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
void forward_fallback(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
CV_DEPRECATED_EXTERNAL
|
||||
void finalize(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
CV_DEPRECATED std::vector<Mat> finalize(const std::vector<Mat> &inputs)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
CV_DEPRECATED void run(const std::vector<Mat> &inputs,
|
||||
CV_OUT std::vector<Mat> &outputs,
|
||||
CV_IN_OUT std::vector<Mat> &internals)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
virtual int inputNameToIndex(String inputName) CV_OVERRIDE
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
virtual int outputNameToIndex(const String& outputName) CV_OVERRIDE
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
virtual Ptr<BackendNode> initCUDA(
|
||||
void *context,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) CV_OVERRIDE
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
virtual void applyHalideScheduler(Ptr<BackendNode>& node,
|
||||
const std::vector<Mat*> &inputs,
|
||||
const std::vector<Mat> &outputs,
|
||||
int targetId) const CV_OVERRIDE
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node) CV_OVERRIDE
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
virtual bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
virtual bool tryFuse(Ptr<Layer>& top) CV_OVERRIDE
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
virtual void getScaleShift(Mat& scale, Mat& shift) const CV_OVERRIDE
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
virtual void unsetAttached() CV_OVERRIDE
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
|
||||
const std::vector<MatShape> &outputs) const CV_OVERRIDE
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
virtual bool updateMemoryShapes(const std::vector<MatShape> &inputs) CV_OVERRIDE
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, msg);
|
||||
}
|
||||
|
||||
private:
|
||||
std::string msg;
|
||||
};
|
||||
|
||||
Ptr<Layer> NotImplemented::create(const LayerParams& params)
|
||||
{
|
||||
return makePtr<NotImplementedImpl>(params);
|
||||
}
|
||||
|
||||
Ptr<Layer> notImplementedRegisterer(LayerParams ¶ms)
|
||||
{
|
||||
return detail::NotImplemented::create(params);
|
||||
}
|
||||
|
||||
void NotImplemented::Register()
|
||||
{
|
||||
LayerFactory::registerLayer("NotImplemented", detail::notImplementedRegisterer);
|
||||
}
|
||||
|
||||
void NotImplemented::unRegister()
|
||||
{
|
||||
LayerFactory::unregisterLayer("NotImplemented");
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace cv::dnn
|
||||
293
3rdparty/opencv-4.5.4/modules/dnn/src/layers/padding_layer.cpp
vendored
Normal file
293
3rdparty/opencv-4.5.4/modules/dnn/src/layers/padding_layer.cpp
vendored
Normal file
@ -0,0 +1,293 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
/*
|
||||
Implementation of padding layer, which adds paddings to input blob.
|
||||
*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
#include "../op_halide.hpp"
|
||||
#include "../op_inf_engine.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/padding.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
class PaddingLayerImpl CV_FINAL : public PaddingLayer
|
||||
{
|
||||
public:
|
||||
PaddingLayerImpl(const LayerParams ¶ms)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
paddingValue = params.get<float>("value", 0);
|
||||
inputDims = params.get<int>("input_dims", -1);
|
||||
paddingType = params.get<String>("type", "constant");
|
||||
|
||||
CV_Assert(params.has("paddings"));
|
||||
const DictValue& paddingsParam = params.get("paddings");
|
||||
CV_Assert((paddingsParam.size() & 1) == 0);
|
||||
|
||||
paddings.resize(paddingsParam.size() / 2);
|
||||
for (int i = 0; i < paddings.size(); ++i)
|
||||
{
|
||||
paddings[i].first = paddingsParam.get<int>(i * 2); // Pad before.
|
||||
paddings[i].second = paddingsParam.get<int>(i * 2 + 1); // Pad after.
|
||||
CV_Assert_N(paddings[i].first >= 0, paddings[i].second >= 0);
|
||||
}
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(inputs.size() == 1);
|
||||
const MatShape& inpShape = inputs[0];
|
||||
CV_Assert(inpShape.size() >= paddings.size());
|
||||
CV_Assert(inputDims == -1 || inpShape.size() == inputDims || inpShape.size() > paddings.size());
|
||||
|
||||
outputs.resize(1, inpShape);
|
||||
int offset = (inputDims == -1 ? 0 : (inpShape.size() > inputDims ? 1 : 0));
|
||||
for (int i = 0; i < paddings.size(); ++i)
|
||||
{
|
||||
outputs[0][offset + i] = inpShape[offset + i] + paddings[i].first + paddings[i].second;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
|
||||
{
|
||||
std::vector<Mat> inputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
|
||||
// Compute dstRanges.
|
||||
const MatSize& inpShape = inputs[0].size;
|
||||
|
||||
if (inputDims != -1 && inputs[0].dims != inputDims)
|
||||
{
|
||||
paddings.insert(paddings.begin(), std::make_pair(0, 0));
|
||||
}
|
||||
|
||||
dstRanges.resize(paddings.size());
|
||||
for (int i = 0; i < paddings.size(); ++i)
|
||||
{
|
||||
dstRanges[i].start = paddings[i].first;
|
||||
dstRanges[i].end = paddings[i].first + inpShape[i];
|
||||
}
|
||||
|
||||
// Add the rest of dimensions.
|
||||
for (int i = dstRanges.size(); i < inputs[0].dims; ++i)
|
||||
{
|
||||
dstRanges.push_back(Range::all());
|
||||
paddings.push_back(std::make_pair(0, 0));
|
||||
}
|
||||
inputDims = -1; // Next time paddings are filled for all the dimensions.
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
{
|
||||
bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL;
|
||||
if (INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) && isMyriad)
|
||||
return dstRanges.size() == 4 && paddings[0].first == 0 && paddings[0].second == 0;
|
||||
|
||||
return (dstRanges.size() <= 4 || !isArmComputePlugin());
|
||||
}
|
||||
#endif
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CUDA ||
|
||||
(backendId == DNN_BACKEND_HALIDE && haveHalide() && dstRanges.size() == 4);
|
||||
}
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
if (paddingType == "constant")
|
||||
{
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
std::vector<float> paddingValue_fp32(1, paddingValue);
|
||||
std::vector<int16_t> paddingValue_fp16(1);
|
||||
cv::convertFp16(paddingValue_fp32, paddingValue_fp16);
|
||||
outputs[0].setTo(paddingValue_fp16[0]);
|
||||
}
|
||||
else if (inputs_arr.depth() == CV_8S)
|
||||
outputs[0].setTo(saturate_cast<int8_t>(paddingValue));
|
||||
else
|
||||
outputs[0].setTo(paddingValue);
|
||||
inputs[0].copyTo(outputs[0](dstRanges));
|
||||
}
|
||||
else if (paddingType == "reflect")
|
||||
{
|
||||
CV_Assert(inputs.size() == 1);
|
||||
CV_Assert(outputs.size() == 1);
|
||||
CV_Assert(inputs[0].dims == 4);
|
||||
CV_Assert(outputs[0].dims == 4);
|
||||
|
||||
if (inputs[0].size[0] != outputs[0].size[0] || inputs[0].size[1] != outputs[0].size[1])
|
||||
CV_Error(Error::StsNotImplemented, "Only spatial reflection padding is supported.");
|
||||
|
||||
const int inpHeight = inputs[0].size[2];
|
||||
const int inpWidth = inputs[0].size[3];
|
||||
const int outHeight = outputs[0].size[2];
|
||||
const int outWidth = outputs[0].size[3];
|
||||
const int padTop = dstRanges[2].start;
|
||||
const int padBottom = outHeight - dstRanges[2].end;
|
||||
const int padLeft = dstRanges[3].start;
|
||||
const int padRight = outWidth - dstRanges[3].end;
|
||||
CV_CheckLT(padTop, inpHeight, ""); CV_CheckLT(padBottom, inpHeight, "");
|
||||
CV_CheckLT(padLeft, inpWidth, ""); CV_CheckLT(padRight, inpWidth, "");
|
||||
|
||||
for (size_t n = 0; n < inputs[0].size[0]; ++n)
|
||||
{
|
||||
for (size_t ch = 0; ch < inputs[0].size[1]; ++ch)
|
||||
{
|
||||
copyMakeBorder(getPlane(inputs[0], n, ch),
|
||||
getPlane(outputs[0], n, ch),
|
||||
padTop, padBottom, padLeft, padRight,
|
||||
BORDER_REFLECT_101);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Unknown padding type: " + paddingType);
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
|
||||
cuda4dnn::PaddingType ptype;
|
||||
if (paddingType == "constant")
|
||||
ptype = PaddingType::CONSTANT;
|
||||
else if (paddingType == "reflect")
|
||||
ptype = PaddingType::REFLECTION101;
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported padding mode");
|
||||
|
||||
return make_cuda_node<cuda4dnn::PaddingOp>(preferableTarget, std::move(context->stream), ptype, paddingValue, dstRanges);
|
||||
}
|
||||
#endif
|
||||
|
||||
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_HALIDE
|
||||
int inW, inH, inC, inN;
|
||||
int minN = std::max(dstRanges[0].start, 0);
|
||||
int minC = std::max(dstRanges[1].start, 0);
|
||||
int minY = std::max(dstRanges[2].start, 0);
|
||||
int minX = std::max(dstRanges[3].start, 0);
|
||||
Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
|
||||
getCanonicalSize(inputBuffer, &inW, &inH, &inC, &inN);
|
||||
|
||||
Halide::Var x("x"), y("y"), c("c"), n("n");
|
||||
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
|
||||
Halide::Func padded =
|
||||
Halide::BoundaryConditions::constant_exterior(inputBuffer, paddingValue);
|
||||
top(x, y, c, n) = padded(x - minX, y - minY, c - minC, n - minN);
|
||||
return Ptr<BackendNode>(new HalideBackendNode(top));
|
||||
#endif // HAVE_HALIDE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
InferenceEngine::Builder::Layer ieLayer(name);
|
||||
ieLayer.setName(name);
|
||||
ieLayer.setType("Pad");
|
||||
|
||||
std::vector<int> begins(paddings.size(), 0), ends(paddings.size(), 0);
|
||||
for (int i = 0; i < paddings.size(); ++i)
|
||||
{
|
||||
begins[i] = paddings[i].first;
|
||||
ends[i] = paddings[i].second;
|
||||
}
|
||||
ieLayer.getParameters()["pads_begin"] = begins;
|
||||
ieLayer.getParameters()["pads_end"] = ends;
|
||||
ieLayer.getParameters()["pad_mode"] = paddingType;
|
||||
if (paddingType == "constant")
|
||||
ieLayer.getParameters()["pad_value"] = paddingValue;
|
||||
|
||||
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(1));
|
||||
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
std::vector<int64_t> begins(paddings.size(), 0), ends(paddings.size(), 0);
|
||||
for (int i = 0; i < paddings.size(); ++i)
|
||||
{
|
||||
begins[i] = static_cast<int64_t>(paddings[i].first);
|
||||
ends[i] = static_cast<int64_t>(paddings[i].second);
|
||||
}
|
||||
auto padding_below = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{begins.size()}, begins.data());
|
||||
auto padding_above = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{ends.size()}, ends.data());
|
||||
auto pad_mode = paddingType == "constant" ? ngraph::op::PadMode::CONSTANT : ngraph::op::PadMode::REFLECT; // SYMMETRIC
|
||||
auto arg_pad_value = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{}, &paddingValue);;
|
||||
|
||||
auto pad = paddingType == "constant" ?
|
||||
std::make_shared<ngraph::op::v1::Pad>(ieInpNode, padding_below, padding_above, arg_pad_value, pad_mode) :
|
||||
std::make_shared<ngraph::op::v1::Pad>(ieInpNode, padding_below, padding_above, pad_mode);
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(pad));
|
||||
}
|
||||
#endif
|
||||
|
||||
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
|
||||
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
|
||||
{
|
||||
float outputScale = scales[1][0];
|
||||
int outputZp = zeropoints[1][0];
|
||||
float padValue = outputZp + std::round(params.get<float>("value", 0)/outputScale);
|
||||
params.set("value", padValue);
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::pair<int, int> > paddings; // Pairs pad before, pad after.
|
||||
std::vector<Range> dstRanges;
|
||||
int inputDims;
|
||||
float paddingValue;
|
||||
std::string paddingType;
|
||||
};
|
||||
|
||||
Ptr<PaddingLayer> PaddingLayer::create(const LayerParams ¶ms)
|
||||
{
|
||||
return Ptr<PaddingLayer>(new PaddingLayerImpl(params));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
494
3rdparty/opencv-4.5.4/modules/dnn/src/layers/permute_layer.cpp
vendored
Normal file
494
3rdparty/opencv-4.5.4/modules/dnn/src/layers/permute_layer.cpp
vendored
Normal file
@ -0,0 +1,494 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
#include "../op_inf_engine.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
#include "../op_vkcom.hpp"
|
||||
|
||||
#include <float.h>
|
||||
#include <algorithm>
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/permute.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
class PermuteLayerImpl CV_FINAL : public PermuteLayer
|
||||
{
|
||||
public:
|
||||
void checkNeedForPermutation()
|
||||
{
|
||||
_needsPermute = false;
|
||||
for (size_t i = 0; i < _numAxes; ++i)
|
||||
{
|
||||
if (_order[i] != i)
|
||||
{
|
||||
_needsPermute = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PermuteLayerImpl(const LayerParams ¶ms)
|
||||
: _count(0), _needsPermute(false), _numAxes(0)
|
||||
{
|
||||
if (!params.has("order"))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
DictValue paramOrder = params.get("order");
|
||||
_numAxes = paramOrder.size();
|
||||
|
||||
for (size_t i = 0; i < _numAxes; i++)
|
||||
{
|
||||
int currentOrder = paramOrder.get<int>(i);
|
||||
if (currentOrder < 0 || currentOrder > _numAxes)
|
||||
{
|
||||
CV_Error(Error::StsBadArg,
|
||||
format("Orders of dimensions in Permute layer parameter"
|
||||
"must be in [0...%zu]", _numAxes - 1));
|
||||
}
|
||||
if (std::find(_order.begin(), _order.end(), currentOrder) != _order.end())
|
||||
{
|
||||
CV_Error(Error::StsBadArg,
|
||||
"Permute layer parameter contains duplicated orders.");
|
||||
}
|
||||
_order.push_back(currentOrder);
|
||||
}
|
||||
|
||||
setParamsFrom(params);
|
||||
checkNeedForPermutation();
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && preferableTarget == DNN_TARGET_CPU)
|
||||
return _order.size() <= 4 || !isArmComputePlugin();
|
||||
#endif
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CUDA ||
|
||||
((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine()) ||
|
||||
(backendId == DNN_BACKEND_VKCOM && haveVulkan());
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
if(!_needsPermute)
|
||||
{
|
||||
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
|
||||
return true;
|
||||
}
|
||||
|
||||
CV_Assert(inputs.size() > 0);
|
||||
CV_Assert((int)_numAxes == inputs[0].size());
|
||||
|
||||
MatShape shapeBefore = inputs[0], shapeAfter;
|
||||
for (size_t i = 0; i < _numAxes; i++)
|
||||
{
|
||||
shapeAfter.push_back(shapeBefore[_order[i]]);
|
||||
}
|
||||
|
||||
outputs.clear();
|
||||
|
||||
for (size_t i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
CV_Assert(total(inputs[i]) == total(shapeAfter));
|
||||
outputs.push_back(shapeAfter);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void computeStrides(const MatShape &shapeBefore, const MatShape &shapeAfter)
|
||||
{
|
||||
_oldStride.resize(_numAxes);
|
||||
_newStride.resize(_numAxes);
|
||||
|
||||
_oldStride[_numAxes - 1] = 1;
|
||||
_newStride[_numAxes - 1] = 1;
|
||||
|
||||
for(int i = _numAxes - 2; i >= 0; i--)
|
||||
{
|
||||
_oldStride[i] = _oldStride[i + 1] * shapeBefore[i + 1];
|
||||
_newStride[i] = _newStride[i + 1] * shapeAfter[i + 1];
|
||||
}
|
||||
|
||||
_count = _oldStride[0] * shapeBefore[0];
|
||||
}
|
||||
|
||||
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
|
||||
{
|
||||
if(!_needsPermute)
|
||||
{
|
||||
return;
|
||||
}
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
CV_Assert(inputs.size() > 0);
|
||||
const Mat& inp0 = inputs[0];
|
||||
CV_Assert((int)_numAxes == inp0.dims);
|
||||
|
||||
computeStrides(shape(inputs[0]), shape(outputs[0]));
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
uorder.release();
|
||||
uold_stride.release();
|
||||
unew_stride.release();
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class T>
|
||||
class PermuteInvoker : public ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
const Mat* inp;
|
||||
Mat* out;
|
||||
const std::vector<size_t>* order;
|
||||
int nstripes;
|
||||
|
||||
static void run(const Mat& inp, Mat& out, const std::vector<size_t>& order, int nstripes)
|
||||
{
|
||||
PermuteInvoker p;
|
||||
p.inp = &inp;
|
||||
p.out = &out;
|
||||
p.order = ℴ
|
||||
p.nstripes = nstripes;
|
||||
|
||||
CV_Assert( out.size[0] == inp.size[order[0]] &&
|
||||
out.size[1] == inp.size[order[1]] &&
|
||||
out.size[2] == inp.size[order[2]] &&
|
||||
out.size[3] == inp.size[order[3]]);
|
||||
|
||||
parallel_for_(Range(0, nstripes), p, nstripes);
|
||||
}
|
||||
|
||||
PermuteInvoker() : inp(0), out(0), order(0), nstripes(0) {}
|
||||
|
||||
void operator()(const Range& r) const CV_OVERRIDE
|
||||
{
|
||||
int n0 = out->size[0], n1 = out->size[1], n2 = out->size[2], n3 = out->size[3];
|
||||
|
||||
size_t orows = (size_t)n0*n1*n2;
|
||||
size_t stripeSize = (orows + nstripes - 1)/nstripes;
|
||||
size_t stripeStart = r.start*stripeSize;
|
||||
size_t stripeEnd = std::min(r.end*stripeSize, orows);
|
||||
|
||||
const size_t esz = sizeof(T);
|
||||
size_t ostep0 = out->step[0]/esz, ostep1 = out->step[1]/esz, ostep2 = out->step[2]/esz;
|
||||
const size_t* ord = &order->at(0);
|
||||
size_t istep0 = inp->step[ord[0]]/esz, istep1 = inp->step[ord[1]]/esz,
|
||||
istep2 = inp->step[ord[2]]/esz, istep3 = inp->step[ord[3]]/esz;
|
||||
|
||||
size_t val = stripeStart;
|
||||
int i2 = (int)(val % n2);
|
||||
val /= n2;
|
||||
int i1 = (int)(val % n1);
|
||||
int i0 = (int)(val / n1);
|
||||
|
||||
const T* inptr_orig = inp->ptr<T>();
|
||||
T* outptr_orig = out->ptr<T>();
|
||||
|
||||
for( size_t ofs = stripeStart; ofs < stripeEnd; ofs++ )
|
||||
{
|
||||
const T* inptr = inptr_orig + i0*istep0 + i1*istep1 + i2*istep2;
|
||||
T* outptr = outptr_orig + i0*ostep0 + i1*ostep1 + i2*ostep2;
|
||||
|
||||
for( int i3 = 0; i3 < n3; i3++ )
|
||||
outptr[i3] = inptr[i3*istep3];
|
||||
|
||||
if( ++i2 >= n2 )
|
||||
{
|
||||
i2 = 0;
|
||||
if( ++i1 >= n1 )
|
||||
{
|
||||
i1 = 0;
|
||||
if( ++i0 >= n0 )
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
if (!_needsPermute)
|
||||
return false;
|
||||
|
||||
if (uorder.empty())
|
||||
{
|
||||
std::vector<int> orderVec(_order.begin(), _order.end());;
|
||||
Mat morder(1, orderVec.size(), CV_32SC1, &orderVec[0]);
|
||||
|
||||
std::vector<int> oldStrideVec(_oldStride.begin(), _oldStride.end());
|
||||
Mat mold_stride(1, _oldStride.size(), CV_32SC1, &oldStrideVec[0]);
|
||||
|
||||
std::vector<int> newStrideVec(_newStride.begin(), _newStride.end());
|
||||
Mat mnew_stride(1, newStrideVec.size(), CV_32SC1, &newStrideVec[0]);
|
||||
|
||||
morder.copyTo(uorder);
|
||||
mold_stride.copyTo(uold_stride);
|
||||
mnew_stride.copyTo(unew_stride);
|
||||
}
|
||||
|
||||
bool use_half = (inps.depth() == CV_16S);
|
||||
String opts = format("-DDtype=%s", use_half ? "half" : "float");
|
||||
for (size_t i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
ocl::Kernel kernel("permute", ocl::dnn::permute_oclsrc, opts);
|
||||
|
||||
kernel.set(0, (int)_count);
|
||||
kernel.set(1, ocl::KernelArg::PtrReadOnly(inputs[i]));
|
||||
kernel.set(2, ocl::KernelArg::PtrReadOnly(uorder));
|
||||
kernel.set(3, ocl::KernelArg::PtrReadOnly(uold_stride));
|
||||
kernel.set(4, ocl::KernelArg::PtrReadOnly(unew_stride));
|
||||
kernel.set(5, (int)_numAxes);
|
||||
kernel.set(6, ocl::KernelArg::PtrWriteOnly(outputs[i]));
|
||||
|
||||
if (!kernel.run(1, &_count, NULL, false))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
inputs_arr.depth() != CV_8S,
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
size_t k, ninputs = inputs.size();
|
||||
if(!_needsPermute)
|
||||
{
|
||||
for (k = 0; k < ninputs; k++)
|
||||
{
|
||||
CV_Assert(outputs[k].total() == inputs[k].total());
|
||||
if (outputs[k].data != inputs[k].data)
|
||||
inputs[k].copyTo(outputs[k]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t i, j, count = _count, numAxes = _numAxes;
|
||||
const size_t* newStride = &_newStride[0];
|
||||
const size_t* oldStride = &_oldStride[0];
|
||||
const size_t* order = &_order[0];
|
||||
|
||||
for (k = 0; k < ninputs; k++)
|
||||
{
|
||||
const Mat& inp = inputs[k];
|
||||
Mat& out = outputs[k];
|
||||
|
||||
CV_Assert(inp.dims == numAxes && inp.size == inputs[0].size);
|
||||
CV_Assert(out.dims == numAxes && out.size == outputs[0].size);
|
||||
|
||||
CV_Assert(inp.isContinuous() && out.isContinuous());
|
||||
// CV_Assert(inp.type() == CV_32F && out.type() == CV_32F);
|
||||
|
||||
if( numAxes == 4 )
|
||||
{
|
||||
int nstripes = getNumThreads();
|
||||
if (inp.type() == CV_8S)
|
||||
PermuteInvoker<int8_t>::run(inp, out, _order, nstripes);
|
||||
else
|
||||
PermuteInvoker<float>::run(inp, out, _order, nstripes);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (inp.type() == CV_8S)
|
||||
{
|
||||
const int8_t *srcData = inp.ptr<int8_t>();
|
||||
int8_t *dstData = out.ptr<int8_t>();
|
||||
|
||||
for (i = 0; i < count; ++i)
|
||||
{
|
||||
size_t oldPosition = 0;
|
||||
size_t newPosition = i;
|
||||
|
||||
for (j = 0; j < numAxes; ++j)
|
||||
{
|
||||
oldPosition += (newPosition / newStride[j]) * oldStride[order[j]];
|
||||
newPosition %= newStride[j];
|
||||
}
|
||||
dstData[i] = srcData[oldPosition];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const float *srcData = inp.ptr<float>();
|
||||
float *dstData = out.ptr<float>();
|
||||
|
||||
for (i = 0; i < count; ++i)
|
||||
{
|
||||
size_t oldPosition = 0;
|
||||
size_t newPosition = i;
|
||||
|
||||
for (j = 0; j < numAxes; ++j)
|
||||
{
|
||||
oldPosition += (newPosition / newStride[j]) * oldStride[order[j]];
|
||||
newPosition %= newStride[j];
|
||||
}
|
||||
dstData[i] = srcData[oldPosition];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
InferenceEngine::Builder::PermuteLayer ieLayer(name);
|
||||
ieLayer.setOrder(_order);
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
}
|
||||
#endif // HAVE_DNN_IE_NN_BUILDER_2019
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
std::vector<int64_t> order(_order.begin(), _order.end());
|
||||
auto tr_axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
|
||||
ngraph::Shape({order.size()}), order.data());
|
||||
auto transpose = std::make_shared<ngraph::op::Transpose>(ieInpNode, tr_axes);
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(transpose));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
return make_cuda_node<cuda4dnn::PermuteOp>(preferableTarget, std::move(context->stream), _order);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef HAVE_VULKAN
|
||||
virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(!_order.empty());
|
||||
std::shared_ptr<vkcom::OpBase> op(new vkcom::OpPermute(_order));
|
||||
return Ptr<BackendNode>(new VkComBackendNode(input, op));
|
||||
}
|
||||
#endif // HAVE_VULKAN
|
||||
|
||||
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
|
||||
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t _count;
|
||||
std::vector<size_t> _order;
|
||||
|
||||
std::vector<int> _oldDimensionSize;
|
||||
std::vector<int> _newDimensionSize;
|
||||
|
||||
std::vector<size_t> _oldStride;
|
||||
std::vector<size_t> _newStride;
|
||||
bool _needsPermute;
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
UMat uorder, uold_stride, unew_stride;
|
||||
#endif
|
||||
|
||||
size_t _numAxes;
|
||||
};
|
||||
|
||||
Ptr<PermuteLayer> PermuteLayer::create(const LayerParams ¶ms)
|
||||
{
|
||||
return Ptr<PermuteLayer>(new PermuteLayerImpl(params));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
1389
3rdparty/opencv-4.5.4/modules/dnn/src/layers/pooling_layer.cpp
vendored
Normal file
1389
3rdparty/opencv-4.5.4/modules/dnn/src/layers/pooling_layer.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
765
3rdparty/opencv-4.5.4/modules/dnn/src/layers/prior_box_layer.cpp
vendored
Normal file
765
3rdparty/opencv-4.5.4/modules/dnn/src/layers/prior_box_layer.cpp
vendored
Normal file
@ -0,0 +1,765 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
#include "../op_inf_engine.hpp"
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
#include "../ie_ngraph.hpp"
|
||||
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
|
||||
#include <ngraph/op/prior_box.hpp>
|
||||
#include <ngraph/op/prior_box_clustered.hpp>
|
||||
#else
|
||||
#include <ngraph/op/experimental/layers/prior_box.hpp>
|
||||
#include <ngraph/op/experimental/layers/prior_box_clustered.hpp>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "../op_vkcom.hpp"
|
||||
|
||||
#include <float.h>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/prior_box.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
class PriorBoxLayerImpl CV_FINAL : public PriorBoxLayer
|
||||
{
|
||||
public:
|
||||
static bool getParameterDict(const LayerParams ¶ms,
|
||||
const std::string ¶meterName,
|
||||
DictValue& result)
|
||||
{
|
||||
if (!params.has(parameterName))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
result = params.get(parameterName);
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T getParameter(const LayerParams ¶ms,
|
||||
const std::string ¶meterName,
|
||||
const size_t &idx=0,
|
||||
const bool required=true,
|
||||
const T& defaultValue=T())
|
||||
{
|
||||
DictValue dictValue;
|
||||
bool success = getParameterDict(params, parameterName, dictValue);
|
||||
if(!success)
|
||||
{
|
||||
if(required)
|
||||
{
|
||||
std::string message = _layerName;
|
||||
message += " layer parameter does not contain ";
|
||||
message += parameterName;
|
||||
message += " parameter.";
|
||||
CV_Error(Error::StsBadArg, message);
|
||||
}
|
||||
else
|
||||
{
|
||||
return defaultValue;
|
||||
}
|
||||
}
|
||||
return dictValue.get<T>(idx);
|
||||
}
|
||||
|
||||
void getAspectRatios(const LayerParams ¶ms)
|
||||
{
|
||||
DictValue aspectRatioParameter;
|
||||
bool aspectRatioRetieved = getParameterDict(params, "aspect_ratio", aspectRatioParameter);
|
||||
if (!aspectRatioRetieved)
|
||||
return;
|
||||
|
||||
for (int i = 0; i < aspectRatioParameter.size(); ++i)
|
||||
{
|
||||
float aspectRatio = aspectRatioParameter.get<float>(i);
|
||||
bool alreadyExists = fabs(aspectRatio - 1.f) < 1e-6f;
|
||||
|
||||
for (size_t j = 0; j < _aspectRatios.size() && !alreadyExists; ++j)
|
||||
{
|
||||
alreadyExists = fabs(aspectRatio - _aspectRatios[j]) < 1e-6;
|
||||
}
|
||||
if (!alreadyExists)
|
||||
{
|
||||
_aspectRatios.push_back(aspectRatio);
|
||||
if (_flip)
|
||||
{
|
||||
_aspectRatios.push_back(1./aspectRatio);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void getParams(const std::string& name, const LayerParams ¶ms,
|
||||
std::vector<float>* values)
|
||||
{
|
||||
DictValue dict;
|
||||
if (getParameterDict(params, name, dict))
|
||||
{
|
||||
values->resize(dict.size());
|
||||
for (int i = 0; i < dict.size(); ++i)
|
||||
{
|
||||
(*values)[i] = dict.get<float>(i);
|
||||
}
|
||||
}
|
||||
else
|
||||
values->clear();
|
||||
}
|
||||
|
||||
void getVariance(const LayerParams ¶ms)
|
||||
{
|
||||
DictValue varianceParameter;
|
||||
bool varianceParameterRetrieved = getParameterDict(params, "variance", varianceParameter);
|
||||
CV_Assert(varianceParameterRetrieved);
|
||||
|
||||
int varianceSize = varianceParameter.size();
|
||||
if (varianceSize > 1)
|
||||
{
|
||||
// Must and only provide 4 variance.
|
||||
CV_Assert(varianceSize == 4);
|
||||
|
||||
for (int i = 0; i < varianceSize; ++i)
|
||||
{
|
||||
float variance = varianceParameter.get<float>(i);
|
||||
CV_Assert(variance > 0);
|
||||
_variance.push_back(variance);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (varianceSize == 1)
|
||||
{
|
||||
float variance = varianceParameter.get<float>(0);
|
||||
CV_Assert(variance > 0);
|
||||
_variance.push_back(variance);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Set default to 0.1.
|
||||
_variance.push_back(0.1f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PriorBoxLayerImpl(const LayerParams ¶ms)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
_flip = getParameter<bool>(params, "flip", 0, false, true);
|
||||
_clip = getParameter<bool>(params, "clip", 0, false, true);
|
||||
_bboxesNormalized = getParameter<bool>(params, "normalized_bbox", 0, false, true);
|
||||
|
||||
getParams("min_size", params, &_minSize);
|
||||
getAspectRatios(params);
|
||||
getVariance(params);
|
||||
|
||||
if (params.has("max_size"))
|
||||
{
|
||||
getParams("max_size", params, &_maxSize);
|
||||
CV_Assert(_minSize.size() == _maxSize.size());
|
||||
for (int i = 0; i < _maxSize.size(); i++)
|
||||
CV_Assert(_minSize[i] < _maxSize[i]);
|
||||
}
|
||||
|
||||
std::vector<float> widths, heights;
|
||||
getParams("width", params, &widths);
|
||||
getParams("height", params, &heights);
|
||||
_explicitSizes = !widths.empty();
|
||||
CV_Assert(widths.size() == heights.size());
|
||||
|
||||
if (_explicitSizes)
|
||||
{
|
||||
CV_Assert(_aspectRatios.empty());
|
||||
CV_Assert(!params.has("min_size"));
|
||||
CV_Assert(!params.has("max_size"));
|
||||
_boxWidths = widths;
|
||||
_boxHeights = heights;
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(!_minSize.empty());
|
||||
for (int i = 0; i < _minSize.size(); ++i)
|
||||
{
|
||||
float minSize = _minSize[i];
|
||||
CV_Assert(minSize > 0);
|
||||
_boxWidths.push_back(minSize);
|
||||
_boxHeights.push_back(minSize);
|
||||
|
||||
if (_maxSize.size() > 0)
|
||||
{
|
||||
float size = sqrt(minSize * _maxSize[i]);
|
||||
_boxWidths.push_back(size);
|
||||
_boxHeights.push_back(size);
|
||||
}
|
||||
|
||||
// rest of priors
|
||||
for (size_t r = 0; r < _aspectRatios.size(); ++r)
|
||||
{
|
||||
float arSqrt = sqrt(_aspectRatios[r]);
|
||||
_boxWidths.push_back(minSize * arSqrt);
|
||||
_boxHeights.push_back(minSize / arSqrt);
|
||||
}
|
||||
}
|
||||
}
|
||||
CV_Assert(_boxWidths.size() == _boxHeights.size());
|
||||
_numPriors = _boxWidths.size();
|
||||
|
||||
if (params.has("step_h") || params.has("step_w")) {
|
||||
CV_Assert(!params.has("step"));
|
||||
_stepY = getParameter<float>(params, "step_h");
|
||||
CV_Assert(_stepY > 0.);
|
||||
_stepX = getParameter<float>(params, "step_w");
|
||||
CV_Assert(_stepX > 0.);
|
||||
} else if (params.has("step")) {
|
||||
const float step = getParameter<float>(params, "step");
|
||||
CV_Assert(step > 0);
|
||||
_stepY = step;
|
||||
_stepX = step;
|
||||
} else {
|
||||
_stepY = 0;
|
||||
_stepX = 0;
|
||||
}
|
||||
if (params.has("offset_h") || params.has("offset_w"))
|
||||
{
|
||||
CV_Assert_N(!params.has("offset"), params.has("offset_h"), params.has("offset_w"));
|
||||
getParams("offset_h", params, &_offsetsY);
|
||||
getParams("offset_w", params, &_offsetsX);
|
||||
CV_Assert(_offsetsX.size() == _offsetsY.size());
|
||||
_numPriors *= std::max((size_t)1, 2 * (_offsetsX.size() - 1));
|
||||
}
|
||||
else
|
||||
{
|
||||
float offset = getParameter<float>(params, "offset", 0, false, 0.5);
|
||||
_offsetsX.assign(1, offset);
|
||||
_offsetsY.assign(1, offset);
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
return _explicitSizes || _stepX == _stepY;
|
||||
#endif
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CUDA ||
|
||||
(backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && haveInfEngine() &&
|
||||
( _explicitSizes || (_minSize.size() == 1 && _maxSize.size() <= 1)))
|
||||
|| (backendId == DNN_BACKEND_VKCOM && haveVulkan());
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(!inputs.empty());
|
||||
|
||||
int layerHeight = inputs[0][2];
|
||||
int layerWidth = inputs[0][3];
|
||||
|
||||
// Since all images in a batch has same height and width, we only need to
|
||||
// generate one set of priors which can be shared across all images.
|
||||
size_t outNum = 1;
|
||||
// 2 channels. First channel stores the mean of each prior coordinate.
|
||||
// Second channel stores the variance of each prior coordinate.
|
||||
size_t outChannels = 2;
|
||||
|
||||
outputs.resize(1, shape(outNum, outChannels,
|
||||
layerHeight * layerWidth * _numPriors * 4));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
|
||||
{
|
||||
std::vector<Mat> inputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
|
||||
CV_CheckGT(inputs.size(), (size_t)1, "");
|
||||
CV_CheckEQ(inputs[0].dims, 4, ""); CV_CheckEQ(inputs[1].dims, 4, "");
|
||||
int layerWidth = inputs[0].size[3];
|
||||
int layerHeight = inputs[0].size[2];
|
||||
|
||||
int imageWidth = inputs[1].size[3];
|
||||
int imageHeight = inputs[1].size[2];
|
||||
|
||||
_stepY = _stepY == 0 ? (static_cast<float>(imageHeight) / layerHeight) : _stepY;
|
||||
_stepX = _stepX == 0 ? (static_cast<float>(imageWidth) / layerWidth) : _stepX;
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
bool use_half = (inps.depth() == CV_16S);
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
int _layerWidth = inputs[0].size[3];
|
||||
int _layerHeight = inputs[0].size[2];
|
||||
|
||||
int _imageWidth = inputs[1].size[3];
|
||||
int _imageHeight = inputs[1].size[2];
|
||||
|
||||
if (umat_offsetsX.empty())
|
||||
{
|
||||
Mat offsetsX(1, _offsetsX.size(), CV_32FC1, &_offsetsX[0]);
|
||||
Mat offsetsY(1, _offsetsY.size(), CV_32FC1, &_offsetsY[0]);
|
||||
Mat variance(1, _variance.size(), CV_32FC1, &_variance[0]);
|
||||
Mat widths(1, _boxWidths.size(), CV_32FC1, &_boxWidths[0]);
|
||||
Mat heights(1, _boxHeights.size(), CV_32FC1, &_boxHeights[0]);
|
||||
|
||||
offsetsX.copyTo(umat_offsetsX);
|
||||
offsetsY.copyTo(umat_offsetsY);
|
||||
variance.copyTo(umat_variance);
|
||||
widths.copyTo(umat_widths);
|
||||
heights.copyTo(umat_heights);
|
||||
}
|
||||
|
||||
String opts;
|
||||
if (use_half)
|
||||
opts = "-DDtype=half -DDtype4=half4 -Dconvert_T=convert_half4";
|
||||
else
|
||||
opts = "-DDtype=float -DDtype4=float4 -Dconvert_T=convert_float4";
|
||||
|
||||
size_t nthreads = _layerHeight * _layerWidth;
|
||||
ocl::Kernel kernel("prior_box", ocl::dnn::prior_box_oclsrc, opts);
|
||||
|
||||
kernel.set(0, (int)nthreads);
|
||||
kernel.set(1, (float)_stepX);
|
||||
kernel.set(2, (float)_stepY);
|
||||
kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_offsetsX));
|
||||
kernel.set(4, ocl::KernelArg::PtrReadOnly(umat_offsetsY));
|
||||
kernel.set(5, (int)_offsetsX.size());
|
||||
kernel.set(6, ocl::KernelArg::PtrReadOnly(umat_widths));
|
||||
kernel.set(7, ocl::KernelArg::PtrReadOnly(umat_heights));
|
||||
kernel.set(8, (int)_boxWidths.size());
|
||||
kernel.set(9, ocl::KernelArg::PtrWriteOnly(outputs[0]));
|
||||
kernel.set(10, (int)_layerHeight);
|
||||
kernel.set(11, (int)_layerWidth);
|
||||
kernel.set(12, (int)_imageHeight);
|
||||
kernel.set(13, (int)_imageWidth);
|
||||
kernel.run(1, &nthreads, NULL, false);
|
||||
|
||||
// clip the prior's coordinate such that it is within [0, 1]
|
||||
if (_clip)
|
||||
{
|
||||
ocl::Kernel kernel("clip", ocl::dnn::prior_box_oclsrc, opts);
|
||||
size_t nthreads = _layerHeight * _layerWidth * _numPriors * 4;
|
||||
if (!kernel.args((int)nthreads, ocl::KernelArg::PtrReadWrite(outputs[0]))
|
||||
.run(1, &nthreads, NULL, false))
|
||||
return false;
|
||||
}
|
||||
|
||||
// set the variance.
|
||||
{
|
||||
ocl::Kernel kernel("set_variance", ocl::dnn::prior_box_oclsrc, opts);
|
||||
int offset = total(shape(outputs[0]), 2);
|
||||
size_t nthreads = _layerHeight * _layerWidth * _numPriors;
|
||||
kernel.set(0, (int)nthreads);
|
||||
kernel.set(1, (int)offset);
|
||||
kernel.set(2, (int)_variance.size());
|
||||
kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_variance));
|
||||
kernel.set(4, ocl::KernelArg::PtrWriteOnly(outputs[0]));
|
||||
if (!kernel.run(1, &nthreads, NULL, false))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
CV_Assert(inputs.size() == 2);
|
||||
|
||||
int _layerWidth = inputs[0].size[3];
|
||||
int _layerHeight = inputs[0].size[2];
|
||||
|
||||
int _imageWidth = inputs[1].size[3];
|
||||
int _imageHeight = inputs[1].size[2];
|
||||
|
||||
float* outputPtr = outputs[0].ptr<float>();
|
||||
float _boxWidth, _boxHeight;
|
||||
for (size_t h = 0; h < _layerHeight; ++h)
|
||||
{
|
||||
for (size_t w = 0; w < _layerWidth; ++w)
|
||||
{
|
||||
for (size_t i = 0; i < _boxWidths.size(); ++i)
|
||||
{
|
||||
_boxWidth = _boxWidths[i];
|
||||
_boxHeight = _boxHeights[i];
|
||||
for (int j = 0; j < _offsetsX.size(); ++j)
|
||||
{
|
||||
float center_x = (w + _offsetsX[j]) * _stepX;
|
||||
float center_y = (h + _offsetsY[j]) * _stepY;
|
||||
outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth,
|
||||
_imageHeight, _bboxesNormalized, outputPtr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// clip the prior's coordinate such that it is within [0, 1]
|
||||
if (_clip)
|
||||
{
|
||||
int _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4;
|
||||
outputPtr = outputs[0].ptr<float>();
|
||||
for (size_t d = 0; d < _outChannelSize; ++d)
|
||||
{
|
||||
outputPtr[d] = std::min<float>(std::max<float>(outputPtr[d], 0.), 1.);
|
||||
}
|
||||
}
|
||||
// set the variance.
|
||||
outputPtr = outputs[0].ptr<float>(0, 1);
|
||||
if(_variance.size() == 1)
|
||||
{
|
||||
Mat secondChannel(1, outputs[0].size[2], CV_32F, outputPtr);
|
||||
secondChannel.setTo(Scalar::all(_variance[0]));
|
||||
}
|
||||
else
|
||||
{
|
||||
int count = 0;
|
||||
for (size_t h = 0; h < _layerHeight; ++h)
|
||||
{
|
||||
for (size_t w = 0; w < _layerWidth; ++w)
|
||||
{
|
||||
for (size_t i = 0; i < _numPriors; ++i)
|
||||
{
|
||||
for (int j = 0; j < 4; ++j)
|
||||
{
|
||||
outputPtr[count] = _variance[j];
|
||||
++count;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
if (_explicitSizes)
|
||||
{
|
||||
InferenceEngine::Builder::PriorBoxClusteredLayer ieLayer(name);
|
||||
ieLayer.setSteps({_stepY, _stepX});
|
||||
|
||||
CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
|
||||
ieLayer.setOffset(_offsetsX[0]);
|
||||
|
||||
ieLayer.setClip(_clip);
|
||||
ieLayer.setFlip(false); // We already flipped aspect ratios.
|
||||
|
||||
InferenceEngine::Builder::Layer l = ieLayer;
|
||||
|
||||
CV_Assert_N(!_boxWidths.empty(), !_boxHeights.empty(), !_variance.empty());
|
||||
CV_Assert(_boxWidths.size() == _boxHeights.size());
|
||||
l.getParameters()["width"] = _boxWidths;
|
||||
l.getParameters()["height"] = _boxHeights;
|
||||
l.getParameters()["variance"] = _variance;
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(l));
|
||||
}
|
||||
else
|
||||
{
|
||||
InferenceEngine::Builder::PriorBoxLayer ieLayer(name);
|
||||
|
||||
CV_Assert(!_explicitSizes);
|
||||
ieLayer.setMinSize(_minSize[0]);
|
||||
if (!_maxSize.empty())
|
||||
ieLayer.setMaxSize(_maxSize[0]);
|
||||
|
||||
CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
|
||||
ieLayer.setOffset(_offsetsX[0]);
|
||||
|
||||
ieLayer.setClip(_clip);
|
||||
ieLayer.setFlip(false); // We already flipped aspect ratios.
|
||||
|
||||
InferenceEngine::Builder::Layer l = ieLayer;
|
||||
if (_stepX == _stepY)
|
||||
{
|
||||
l.getParameters()["step"] = _stepX;
|
||||
l.getParameters()["step_h"] = 0.0f;
|
||||
l.getParameters()["step_w"] = 0.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
l.getParameters()["step"] = 0.0f;
|
||||
l.getParameters()["step_h"] = _stepY;
|
||||
l.getParameters()["step_w"] = _stepX;
|
||||
}
|
||||
if (!_aspectRatios.empty())
|
||||
{
|
||||
l.getParameters()["aspect_ratio"] = _aspectRatios;
|
||||
}
|
||||
CV_Assert(!_variance.empty());
|
||||
l.getParameters()["variance"] = _variance;
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(l));
|
||||
}
|
||||
}
|
||||
#endif // HAVE_DNN_IE_NN_BUILDER_2019
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(nodes.size() == 2);
|
||||
auto layer = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
auto image = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
auto layer_shape = std::make_shared<ngraph::op::ShapeOf>(layer);
|
||||
auto image_shape = std::make_shared<ngraph::op::ShapeOf>(image);
|
||||
|
||||
auto lower_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, std::vector<int64_t>{2});
|
||||
auto upper_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, std::vector<int64_t>{4});
|
||||
auto strides = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, std::vector<int64_t>{1});
|
||||
|
||||
auto slice_layer = std::make_shared<ngraph::op::v1::StridedSlice>(layer_shape,
|
||||
lower_bounds, upper_bounds, strides, std::vector<int64_t>{}, std::vector<int64_t>{});
|
||||
auto slice_image = std::make_shared<ngraph::op::v1::StridedSlice>(image_shape,
|
||||
lower_bounds, upper_bounds, strides, std::vector<int64_t>{}, std::vector<int64_t>{});
|
||||
|
||||
if (_explicitSizes)
|
||||
{
|
||||
CV_Assert_N(!_boxWidths.empty(), !_boxHeights.empty(), !_variance.empty());
|
||||
CV_Assert(_boxWidths.size() == _boxHeights.size());
|
||||
ngraph::op::PriorBoxClusteredAttrs attrs;
|
||||
attrs.widths = _boxWidths;
|
||||
attrs.heights = _boxHeights;
|
||||
attrs.clip = _clip;
|
||||
CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
|
||||
attrs.offset = _offsetsX[0];
|
||||
attrs.step_heights = _stepY;
|
||||
attrs.step_widths = _stepX;
|
||||
attrs.variances = _variance;
|
||||
|
||||
auto priorBox = std::make_shared<ngraph::op::PriorBoxClustered>(slice_layer, slice_image, attrs);
|
||||
auto axis = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, std::vector<int64_t>{0});
|
||||
auto unsqueeze = std::make_shared<ngraph::op::v0::Unsqueeze>(priorBox, axis);
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(unsqueeze));
|
||||
}
|
||||
else
|
||||
{
|
||||
ngraph::op::PriorBoxAttrs attrs;
|
||||
attrs.min_size = _minSize;
|
||||
attrs.max_size = _maxSize;
|
||||
// doesn't work with empty aspectRatio
|
||||
attrs.aspect_ratio = !_aspectRatios.empty()? _aspectRatios : std::vector<float>{1.0f};
|
||||
attrs.clip = _clip;
|
||||
attrs.flip = false;
|
||||
attrs.variance = _variance;
|
||||
CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
|
||||
attrs.offset = _offsetsX[0];
|
||||
|
||||
attrs.step = _stepX;
|
||||
attrs.scale_all_sizes = !_aspectRatios.empty();
|
||||
|
||||
auto priorBox = std::make_shared<ngraph::op::PriorBox>(slice_layer, slice_image, attrs);
|
||||
auto axis = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, std::vector<int64_t>{0});
|
||||
auto unsqueeze = std::make_shared<ngraph::op::v0::Unsqueeze>(priorBox, axis);
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(unsqueeze));
|
||||
}
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
|
||||
auto feature_map_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
|
||||
auto feature_map_shape = feature_map_wrapper->getShape();
|
||||
|
||||
auto image_wrapper = inputs[1].dynamicCast<CUDABackendWrapper>();
|
||||
auto image_shape = image_wrapper->getShape();
|
||||
|
||||
PriorBoxConfiguration config;
|
||||
config.feature_map_width = feature_map_shape.rbegin()[0];
|
||||
config.feature_map_height = feature_map_shape.rbegin()[1];
|
||||
config.image_width = image_shape.rbegin()[0];
|
||||
config.image_height = image_shape.rbegin()[1];
|
||||
|
||||
config.num_priors = _numPriors;
|
||||
config.box_widths = _boxWidths;
|
||||
config.box_heights = _boxHeights;
|
||||
config.offsets_x = _offsetsX;
|
||||
config.offsets_y = _offsetsY;
|
||||
config.stepX = _stepX;
|
||||
config.stepY = _stepY;
|
||||
|
||||
config.variance = _variance;
|
||||
|
||||
config.clip = _clip;
|
||||
config.normalize = _bboxesNormalized;
|
||||
|
||||
return make_cuda_node<cuda4dnn::PriorBoxOp>(preferableTarget, std::move(context->stream), config);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef HAVE_VULKAN
|
||||
virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
|
||||
{
|
||||
std::shared_ptr<vkcom::OpBase> op(new vkcom::OpPriorBox(_stepX, _stepY,
|
||||
_clip, _numPriors,
|
||||
_variance, _offsetsX,
|
||||
_offsetsY, _boxWidths,
|
||||
_boxHeights));
|
||||
return Ptr<BackendNode>(new VkComBackendNode(input, op));
|
||||
}
|
||||
#endif // HAVE_VULKAN
|
||||
|
||||
|
||||
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
|
||||
const std::vector<MatShape> &outputs) const CV_OVERRIDE
|
||||
{
|
||||
CV_UNUSED(outputs); // suppress unused variable warning
|
||||
long flops = 0;
|
||||
|
||||
for (int i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
flops += total(inputs[i], 2) * _numPriors * 4;
|
||||
}
|
||||
|
||||
return flops;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<float> _minSize;
|
||||
std::vector<float> _maxSize;
|
||||
|
||||
float _stepX, _stepY;
|
||||
|
||||
std::vector<float> _aspectRatios;
|
||||
std::vector<float> _variance;
|
||||
std::vector<float> _offsetsX;
|
||||
std::vector<float> _offsetsY;
|
||||
// Precomputed final widths and heights based on aspect ratios or explicit sizes.
|
||||
std::vector<float> _boxWidths;
|
||||
std::vector<float> _boxHeights;
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
UMat umat_offsetsX;
|
||||
UMat umat_offsetsY;
|
||||
UMat umat_widths;
|
||||
UMat umat_heights;
|
||||
UMat umat_variance;
|
||||
#endif
|
||||
|
||||
bool _flip;
|
||||
bool _clip;
|
||||
bool _explicitSizes;
|
||||
bool _bboxesNormalized;
|
||||
|
||||
size_t _numPriors;
|
||||
|
||||
static const size_t _numAxes = 4;
|
||||
static const std::string _layerName;
|
||||
|
||||
static float* addPrior(float center_x, float center_y, float width, float height,
|
||||
float imgWidth, float imgHeight, bool normalized, float* dst)
|
||||
{
|
||||
if (normalized)
|
||||
{
|
||||
dst[0] = (center_x - width * 0.5f) / imgWidth; // xmin
|
||||
dst[1] = (center_y - height * 0.5f) / imgHeight; // ymin
|
||||
dst[2] = (center_x + width * 0.5f) / imgWidth; // xmax
|
||||
dst[3] = (center_y + height * 0.5f) / imgHeight; // ymax
|
||||
}
|
||||
else
|
||||
{
|
||||
dst[0] = center_x - width * 0.5f; // xmin
|
||||
dst[1] = center_y - height * 0.5f; // ymin
|
||||
dst[2] = center_x + width * 0.5f - 1.0f; // xmax
|
||||
dst[3] = center_y + height * 0.5f - 1.0f; // ymax
|
||||
}
|
||||
return dst + 4;
|
||||
}
|
||||
};
|
||||
|
||||
const std::string PriorBoxLayerImpl::_layerName = std::string("PriorBox");
|
||||
|
||||
Ptr<PriorBoxLayer> PriorBoxLayer::create(const LayerParams ¶ms)
|
||||
{
|
||||
return Ptr<PriorBoxLayer>(new PriorBoxLayerImpl(params));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
451
3rdparty/opencv-4.5.4/modules/dnn/src/layers/proposal_layer.cpp
vendored
Normal file
451
3rdparty/opencv-4.5.4/modules/dnn/src/layers/proposal_layer.cpp
vendored
Normal file
@ -0,0 +1,451 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_inf_engine.hpp"
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
#include "../ie_ngraph.hpp"
|
||||
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
|
||||
#include <ngraph/op/proposal.hpp>
|
||||
#else
|
||||
#include <ngraph/op/experimental/layers/proposal.hpp>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace cv { namespace dnn {
|
||||
|
||||
class ProposalLayerImpl CV_FINAL : public ProposalLayer
|
||||
{
|
||||
public:
|
||||
ProposalLayerImpl(const LayerParams& params)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
|
||||
featStride = params.get<uint32_t>("feat_stride", 16);
|
||||
baseSize = params.get<uint32_t>("base_size", 16);
|
||||
// uint32_t minSize = params.get<uint32_t>("min_size", 16);
|
||||
keepTopBeforeNMS = params.get<uint32_t>("pre_nms_topn", 6000);
|
||||
keepTopAfterNMS = params.get<uint32_t>("post_nms_topn", 300);
|
||||
nmsThreshold = params.get<float>("nms_thresh", 0.7);
|
||||
ratios = params.get("ratio");
|
||||
scales = params.get("scale");
|
||||
|
||||
{
|
||||
LayerParams lp;
|
||||
lp.set("step", featStride);
|
||||
lp.set("flip", false);
|
||||
lp.set("clip", false);
|
||||
lp.set("normalized_bbox", false);
|
||||
lp.set("offset", 0.5 * baseSize / featStride);
|
||||
|
||||
// Unused values.
|
||||
float variance[] = {0.1f, 0.1f, 0.2f, 0.2f};
|
||||
lp.set("variance", DictValue::arrayReal<float*>(&variance[0], 4));
|
||||
|
||||
// Compute widths and heights explicitly.
|
||||
std::vector<float> widths, heights;
|
||||
widths.reserve(ratios.size() * scales.size());
|
||||
heights.reserve(ratios.size() * scales.size());
|
||||
for (int i = 0; i < ratios.size(); ++i)
|
||||
{
|
||||
float ratio = ratios.get<float>(i);
|
||||
float width = std::floor(baseSize / sqrt(ratio) + 0.5f);
|
||||
float height = std::floor(width * ratio + 0.5f);
|
||||
for (int j = 0; j < scales.size(); ++j)
|
||||
{
|
||||
float scale = scales.get<float>(j);
|
||||
widths.push_back(scale * width);
|
||||
heights.push_back(scale * height);
|
||||
}
|
||||
}
|
||||
lp.set("width", DictValue::arrayReal<float*>(&widths[0], widths.size()));
|
||||
lp.set("height", DictValue::arrayReal<float*>(&heights[0], heights.size()));
|
||||
|
||||
priorBoxLayer = PriorBoxLayer::create(lp);
|
||||
}
|
||||
{
|
||||
int order[] = {0, 2, 3, 1};
|
||||
LayerParams lp;
|
||||
lp.set("order", DictValue::arrayInt<int*>(&order[0], 4));
|
||||
|
||||
deltasPermute = PermuteLayer::create(lp);
|
||||
scoresPermute = PermuteLayer::create(lp);
|
||||
}
|
||||
{
|
||||
LayerParams lp;
|
||||
lp.set("code_type", "CENTER_SIZE");
|
||||
lp.set("num_classes", 1);
|
||||
lp.set("share_location", true);
|
||||
lp.set("background_label_id", 1); // We won't pass background scores so set it out of range [0, num_classes)
|
||||
lp.set("variance_encoded_in_target", true);
|
||||
lp.set("keep_top_k", keepTopAfterNMS);
|
||||
lp.set("top_k", keepTopBeforeNMS);
|
||||
lp.set("nms_threshold", nmsThreshold);
|
||||
lp.set("normalized_bbox", false);
|
||||
lp.set("clip", true);
|
||||
|
||||
detectionOutputLayer = DetectionOutputLayer::create(lp);
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
{
|
||||
bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL;
|
||||
return !isMyriad;
|
||||
}
|
||||
#endif
|
||||
return backendId == DNN_BACKEND_OPENCV;
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
// We need to allocate the following blobs:
|
||||
// - output priors from PriorBoxLayer
|
||||
// - permuted priors
|
||||
// - permuted scores
|
||||
CV_Assert(inputs.size() == 3);
|
||||
|
||||
const MatShape& scores = inputs[0];
|
||||
const MatShape& bboxDeltas = inputs[1];
|
||||
|
||||
std::vector<MatShape> layerInputs, layerOutputs, layerInternals;
|
||||
|
||||
// Prior boxes layer.
|
||||
layerInputs.assign(1, scores);
|
||||
priorBoxLayer->getMemoryShapes(layerInputs, 1, layerOutputs, layerInternals);
|
||||
CV_Assert(layerOutputs.size() == 1);
|
||||
CV_Assert(layerInternals.empty());
|
||||
internals.push_back(layerOutputs[0]);
|
||||
|
||||
// Scores permute layer.
|
||||
CV_Assert(scores.size() == 4);
|
||||
MatShape objectScores = scores;
|
||||
CV_Assert((scores[1] & 1) == 0); // Number of channels is even.
|
||||
objectScores[1] /= 2;
|
||||
layerInputs.assign(1, objectScores);
|
||||
scoresPermute->getMemoryShapes(layerInputs, 1, layerOutputs, layerInternals);
|
||||
CV_Assert(layerOutputs.size() == 1);
|
||||
CV_Assert(layerInternals.empty());
|
||||
internals.push_back(layerOutputs[0]);
|
||||
|
||||
// BBox predictions permute layer.
|
||||
layerInputs.assign(1, bboxDeltas);
|
||||
deltasPermute->getMemoryShapes(layerInputs, 1, layerOutputs, layerInternals);
|
||||
CV_Assert(layerOutputs.size() == 1);
|
||||
CV_Assert(layerInternals.empty());
|
||||
internals.push_back(layerOutputs[0]);
|
||||
|
||||
// Detections layer.
|
||||
internals.push_back(shape(1, 1, keepTopAfterNMS, 7));
|
||||
|
||||
outputs.resize(2);
|
||||
outputs[0] = shape(keepTopAfterNMS, 5);
|
||||
outputs[1] = shape(keepTopAfterNMS, 1);
|
||||
return false;
|
||||
}
|
||||
|
||||
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
|
||||
{
|
||||
std::vector<Mat> inputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
|
||||
std::vector<Mat> layerInputs;
|
||||
std::vector<Mat> layerOutputs;
|
||||
|
||||
// Scores permute layer.
|
||||
Mat scores = getObjectScores(inputs[0]);
|
||||
layerInputs.assign(1, scores);
|
||||
layerOutputs.assign(1, Mat(shape(scores.size[0], scores.size[2],
|
||||
scores.size[3], scores.size[1]), CV_32FC1));
|
||||
scoresPermute->finalize(layerInputs, layerOutputs);
|
||||
|
||||
// BBox predictions permute layer.
|
||||
const Mat& bboxDeltas = inputs[1];
|
||||
CV_Assert(bboxDeltas.dims == 4);
|
||||
layerInputs.assign(1, bboxDeltas);
|
||||
layerOutputs.assign(1, Mat(shape(bboxDeltas.size[0], bboxDeltas.size[2],
|
||||
bboxDeltas.size[3], bboxDeltas.size[1]), CV_32FC1));
|
||||
deltasPermute->finalize(layerInputs, layerOutputs);
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
std::vector<UMat> internals;
|
||||
|
||||
if (inputs_.depth() == CV_16S)
|
||||
return false;
|
||||
|
||||
inputs_.getUMatVector(inputs);
|
||||
outputs_.getUMatVector(outputs);
|
||||
internals_.getUMatVector(internals);
|
||||
|
||||
CV_Assert(inputs.size() == 3);
|
||||
CV_Assert(internals.size() == 4);
|
||||
const UMat& scores = inputs[0];
|
||||
const UMat& bboxDeltas = inputs[1];
|
||||
const UMat& imInfo = inputs[2];
|
||||
UMat& priorBoxes = internals[0];
|
||||
UMat& permuttedScores = internals[1];
|
||||
UMat& permuttedDeltas = internals[2];
|
||||
UMat& detections = internals[3];
|
||||
|
||||
CV_Assert(imInfo.total() >= 2);
|
||||
// We've chosen the smallest data type because we need just a shape from it.
|
||||
Mat szMat;
|
||||
imInfo.copyTo(szMat);
|
||||
int rows = (int)szMat.at<float>(0);
|
||||
int cols = (int)szMat.at<float>(1);
|
||||
umat_fakeImageBlob.create(shape(1, 1, rows, cols), CV_8UC1);
|
||||
umat_fakeImageBlob.setTo(0);
|
||||
|
||||
// Generate prior boxes.
|
||||
std::vector<UMat> layerInputs(2), layerOutputs(1, priorBoxes);
|
||||
layerInputs[0] = scores;
|
||||
layerInputs[1] = umat_fakeImageBlob;
|
||||
priorBoxLayer->forward(layerInputs, layerOutputs, internals);
|
||||
|
||||
// Permute scores.
|
||||
layerInputs.assign(1, getObjectScores(scores));
|
||||
layerOutputs.assign(1, permuttedScores);
|
||||
scoresPermute->forward(layerInputs, layerOutputs, internals);
|
||||
|
||||
// Permute deltas.
|
||||
layerInputs.assign(1, bboxDeltas);
|
||||
layerOutputs.assign(1, permuttedDeltas);
|
||||
deltasPermute->forward(layerInputs, layerOutputs, internals);
|
||||
|
||||
// Sort predictions by scores and apply NMS. DetectionOutputLayer allocates
|
||||
// output internally because of different number of objects after NMS.
|
||||
layerInputs.resize(4);
|
||||
layerInputs[0] = permuttedDeltas;
|
||||
layerInputs[1] = permuttedScores;
|
||||
layerInputs[2] = priorBoxes;
|
||||
layerInputs[3] = umat_fakeImageBlob;
|
||||
|
||||
layerOutputs[0] = detections;
|
||||
detectionOutputLayer->forward(layerInputs, layerOutputs, internals);
|
||||
|
||||
// DetectionOutputLayer produces 1x1xNx7 output where N might be less or
|
||||
// equal to keepTopAfterNMS. We fill the rest by zeros.
|
||||
const int numDets = layerOutputs[0].total() / 7;
|
||||
CV_Assert(numDets <= keepTopAfterNMS);
|
||||
|
||||
MatShape s = shape(numDets, 7);
|
||||
layerOutputs[0] = layerOutputs[0].reshape(1, s.size(), &s[0]);
|
||||
|
||||
// The boxes.
|
||||
UMat dst = outputs[0].rowRange(0, numDets);
|
||||
layerOutputs[0].colRange(3, 7).copyTo(dst.colRange(1, 5));
|
||||
dst.col(0).setTo(0); // First column are batch ids. Keep it zeros too.
|
||||
|
||||
// The scores.
|
||||
dst = outputs[1].rowRange(0, numDets);
|
||||
layerOutputs[0].col(2).copyTo(dst);
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs, internals;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
internals_arr.getMatVector(internals);
|
||||
|
||||
CV_Assert(inputs.size() == 3);
|
||||
CV_Assert(internals.size() == 4);
|
||||
const Mat& scores = inputs[0];
|
||||
const Mat& bboxDeltas = inputs[1];
|
||||
const Mat& imInfo = inputs[2];
|
||||
Mat& priorBoxes = internals[0];
|
||||
Mat& permuttedScores = internals[1];
|
||||
Mat& permuttedDeltas = internals[2];
|
||||
Mat& detections = internals[3];
|
||||
|
||||
CV_Assert(imInfo.total() >= 2);
|
||||
// We've chosen the smallest data type because we need just a shape from it.
|
||||
// We don't allocate memory but just need the shape is correct.
|
||||
Mat fakeImageBlob(shape(1, 1, imInfo.at<float>(0), imInfo.at<float>(1)), CV_8UC1, NULL);
|
||||
|
||||
// Generate prior boxes.
|
||||
std::vector<Mat> layerInputs(2), layerOutputs(1, priorBoxes);
|
||||
layerInputs[0] = scores;
|
||||
layerInputs[1] = fakeImageBlob;
|
||||
priorBoxLayer->forward(layerInputs, layerOutputs, internals);
|
||||
|
||||
// Permute scores.
|
||||
layerInputs.assign(1, getObjectScores(scores));
|
||||
layerOutputs.assign(1, permuttedScores);
|
||||
scoresPermute->forward(layerInputs, layerOutputs, internals);
|
||||
|
||||
// Permute deltas.
|
||||
layerInputs.assign(1, bboxDeltas);
|
||||
layerOutputs.assign(1, permuttedDeltas);
|
||||
deltasPermute->forward(layerInputs, layerOutputs, internals);
|
||||
|
||||
// Sort predictions by scores and apply NMS. DetectionOutputLayer allocates
|
||||
// output internally because of different number of objects after NMS.
|
||||
layerInputs.resize(4);
|
||||
layerInputs[0] = permuttedDeltas;
|
||||
layerInputs[1] = permuttedScores;
|
||||
layerInputs[2] = priorBoxes;
|
||||
layerInputs[3] = fakeImageBlob;
|
||||
|
||||
layerOutputs[0] = detections;
|
||||
detectionOutputLayer->forward(layerInputs, layerOutputs, internals);
|
||||
|
||||
// DetectionOutputLayer produces 1x1xNx7 output where N might be less or
|
||||
// equal to keepTopAfterNMS. We fill the rest by zeros.
|
||||
const int numDets = layerOutputs[0].total() / 7;
|
||||
CV_Assert(numDets <= keepTopAfterNMS);
|
||||
|
||||
// The boxes.
|
||||
layerOutputs[0] = layerOutputs[0].reshape(1, numDets);
|
||||
Mat dst = outputs[0].rowRange(0, numDets);
|
||||
layerOutputs[0].colRange(3, 7).copyTo(dst.colRange(1, 5));
|
||||
dst.col(0).setTo(0); // First column are batch ids. Keep it zeros too.
|
||||
|
||||
// The scores.
|
||||
dst = outputs[1].rowRange(0, numDets);
|
||||
layerOutputs[0].col(2).copyTo(dst);
|
||||
}
|
||||
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
InferenceEngine::Builder::ProposalLayer ieLayer(name);
|
||||
|
||||
ieLayer.setBaseSize(baseSize);
|
||||
ieLayer.setFeatStride(featStride);
|
||||
ieLayer.setMinSize(16);
|
||||
ieLayer.setNMSThresh(nmsThreshold);
|
||||
ieLayer.setPostNMSTopN(keepTopAfterNMS);
|
||||
ieLayer.setPreNMSTopN(keepTopBeforeNMS);
|
||||
|
||||
std::vector<float> scalesVec(scales.size());
|
||||
for (int i = 0; i < scales.size(); ++i)
|
||||
scalesVec[i] = scales.get<float>(i);
|
||||
ieLayer.setScale(scalesVec);
|
||||
|
||||
std::vector<float> ratiosVec(ratios.size());
|
||||
for (int i = 0; i < ratios.size(); ++i)
|
||||
ratiosVec[i] = ratios.get<float>(i);
|
||||
ieLayer.setRatio(ratiosVec);
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
}
|
||||
#endif // HAVE_DNN_IE_NN_BUILDER_2019
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(nodes.size() == 3);
|
||||
ngraph::op::ProposalAttrs attr;
|
||||
attr.base_size = baseSize;
|
||||
attr.nms_thresh = nmsThreshold;
|
||||
attr.feat_stride = featStride;
|
||||
attr.min_size = 16;
|
||||
attr.pre_nms_topn = keepTopBeforeNMS;
|
||||
attr.post_nms_topn = keepTopAfterNMS;
|
||||
|
||||
std::vector<float> ratiosVec(ratios.size());
|
||||
for (int i = 0; i < ratios.size(); ++i)
|
||||
ratiosVec[i] = ratios.get<float>(i);
|
||||
attr.ratio = ratiosVec;
|
||||
|
||||
std::vector<float> scalesVec(scales.size());
|
||||
for (int i = 0; i < scales.size(); ++i)
|
||||
scalesVec[i] = scales.get<float>(i);
|
||||
attr.scale = scalesVec;
|
||||
|
||||
auto& class_probs = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
auto& class_logits = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
auto& image_shape = nodes[2].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
|
||||
CV_Assert_N(image_shape->get_shape().size() == 2, image_shape->get_shape().front() == 1);
|
||||
auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
|
||||
ngraph::Shape{1},
|
||||
std::vector<int64_t>{(int64_t)image_shape->get_shape().back()});
|
||||
auto reshape = std::make_shared<ngraph::op::v1::Reshape>(image_shape, shape, true);
|
||||
|
||||
auto proposal = std::make_shared<ngraph::op::Proposal>(class_probs, class_logits, reshape, attr);
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(proposal));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
private:
|
||||
// A first half of channels are background scores. We need only a second one.
|
||||
static Mat getObjectScores(const Mat& m)
|
||||
{
|
||||
CV_Assert(m.dims == 4);
|
||||
CV_Assert(m.size[0] == 1);
|
||||
int channels = m.size[1];
|
||||
CV_Assert((channels & 1) == 0);
|
||||
return slice(m, Range::all(), Range(channels / 2, channels));
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
static UMat getObjectScores(const UMat& m)
|
||||
{
|
||||
CV_Assert(m.dims == 4);
|
||||
CV_Assert(m.size[0] == 1);
|
||||
int channels = m.size[1];
|
||||
CV_Assert((channels & 1) == 0);
|
||||
|
||||
Range r = Range(channels / 2, channels);
|
||||
Range ranges[4] = { Range::all(), r, Range::all(), Range::all() };
|
||||
return m(&ranges[0]);
|
||||
}
|
||||
#endif
|
||||
|
||||
Ptr<PriorBoxLayer> priorBoxLayer;
|
||||
Ptr<DetectionOutputLayer> detectionOutputLayer;
|
||||
|
||||
Ptr<PermuteLayer> deltasPermute;
|
||||
Ptr<PermuteLayer> scoresPermute;
|
||||
uint32_t keepTopBeforeNMS, keepTopAfterNMS, featStride, baseSize;
|
||||
float nmsThreshold;
|
||||
DictValue ratios, scales;
|
||||
#ifdef HAVE_OPENCL
|
||||
UMat umat_fakeImageBlob;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
Ptr<ProposalLayer> ProposalLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<ProposalLayer>(new ProposalLayerImpl(params));
|
||||
}
|
||||
|
||||
} // namespace dnn
|
||||
} // namespace cv
|
||||
813
3rdparty/opencv-4.5.4/modules/dnn/src/layers/recurrent_layers.cpp
vendored
Normal file
813
3rdparty/opencv-4.5.4/modules/dnn/src/layers/recurrent_layers.cpp
vendored
Normal file
@ -0,0 +1,813 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <cmath>
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
template<typename Dtype>
|
||||
static void tanh(const Mat &src, Mat &dst)
|
||||
{
|
||||
MatConstIterator_<Dtype> itSrc = src.begin<Dtype>();
|
||||
MatIterator_<Dtype> itDst = dst.begin<Dtype>();
|
||||
|
||||
for (; itSrc != src.end<Dtype>(); itSrc++, itDst++)
|
||||
*itDst = std::tanh(*itSrc);
|
||||
}
|
||||
|
||||
//TODO: make utils method
|
||||
static void tanh(const Mat &src, Mat &dst)
|
||||
{
|
||||
dst.create(src.dims, (const int*)src.size, src.type());
|
||||
|
||||
if (src.type() == CV_32F)
|
||||
tanh<float>(src, dst);
|
||||
else if (src.type() == CV_64F)
|
||||
tanh<double>(src, dst);
|
||||
else
|
||||
CV_Error(Error::StsUnsupportedFormat, "Function supports only floating point types");
|
||||
}
|
||||
|
||||
static void sigmoid(const Mat &src, Mat &dst)
|
||||
{
|
||||
cv::exp(-src, dst);
|
||||
cv::pow(1 + dst, -1, dst);
|
||||
}
|
||||
|
||||
typedef void (*ActivationFunction)(const Mat &src, Mat &dst);
|
||||
static ActivationFunction get_activation_function(const String& activation) {
|
||||
// most used activations for PyTorch and TF : Tanh, Sigmoid
|
||||
// if you need to support more optional activations use std::map instead
|
||||
if (activation == "Tanh")
|
||||
{
|
||||
return tanh;
|
||||
}
|
||||
else if (activation == "Sigmoid")
|
||||
{
|
||||
return sigmoid;
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented,
|
||||
cv::format("Activation function [%s] for layer LSTM is not supported", activation.c_str()));
|
||||
}
|
||||
}
|
||||
|
||||
class LSTMLayerImpl CV_FINAL : public LSTMLayer
|
||||
{
|
||||
int numTimeStamps, numSamples;
|
||||
bool allocated;
|
||||
|
||||
MatShape outTailShape; //shape of single output sample
|
||||
MatShape outTsShape; //shape of N output samples
|
||||
|
||||
bool useTimestampDim;
|
||||
bool produceCellOutput;
|
||||
float forgetBias, cellClip;
|
||||
bool useCellClip, usePeephole;
|
||||
bool reverse; // If true, go in negative direction along the time axis
|
||||
bool bidirectional; // If true, produces both forward and reversed directions along time axis
|
||||
|
||||
ActivationFunction f_activation;
|
||||
ActivationFunction g_activation;
|
||||
ActivationFunction h_activation;
|
||||
|
||||
public:
|
||||
|
||||
LSTMLayerImpl(const LayerParams& params)
|
||||
: numTimeStamps(0), numSamples(0)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
|
||||
bidirectional = params.get<bool>("bidirectional", false);
|
||||
if (!blobs.empty())
|
||||
{
|
||||
CV_Assert(blobs.size() >= 3);
|
||||
|
||||
blobs[2] = blobs[2].reshape(1, 1);
|
||||
|
||||
const Mat& Wh = blobs[0];
|
||||
const Mat& Wx = blobs[1];
|
||||
const Mat& bias = blobs[2];
|
||||
const Mat& hInternal = blobs[3];
|
||||
const Mat& cInternal = blobs[4];
|
||||
CV_CheckEQ(Wh.dims, 2, "");
|
||||
CV_CheckEQ(Wx.dims, 2, "");
|
||||
CV_CheckEQ(Wh.rows, Wx.rows, "");
|
||||
CV_CheckEQ(Wh.rows, (1 + static_cast<int>(bidirectional))*4*Wh.cols, "");
|
||||
CV_CheckEQ(Wh.rows, (int)bias.total(), "");
|
||||
CV_CheckEQ(hInternal.cols, Wh.cols, "");
|
||||
CV_CheckEQ(hInternal.cols, cInternal.cols, "");
|
||||
CV_CheckEQ(hInternal.rows, cInternal.rows, "");
|
||||
CV_Assert(Wh.type() == Wx.type() && Wx.type() == bias.type());
|
||||
|
||||
// Peephole weights.
|
||||
if (blobs.size() > 5)
|
||||
{
|
||||
CV_Assert(blobs.size() == 8);
|
||||
const int N = Wh.cols;
|
||||
for (int i = 5; i < 8; ++i)
|
||||
{
|
||||
CV_Assert(blobs[i].rows == N && blobs[i].cols == N);
|
||||
CV_Assert(blobs[i].type() == bias.type());
|
||||
}
|
||||
}
|
||||
}
|
||||
useTimestampDim = params.get<bool>("use_timestamp_dim", true);
|
||||
produceCellOutput = params.get<bool>("produce_cell_output", false);
|
||||
forgetBias = params.get<float>("forget_bias", 0.0f);
|
||||
cellClip = params.get<float>("cell_clip", 0.0f);
|
||||
useCellClip = params.get<bool>("use_cell_clip", false);
|
||||
usePeephole = params.get<bool>("use_peephole", false);
|
||||
reverse = params.get<bool>("reverse", false);
|
||||
CV_Assert(!reverse || !bidirectional);
|
||||
|
||||
// read activations
|
||||
DictValue activations = params.get<DictValue>("activations", "");
|
||||
if (activations.size() == 1) // if activations wasn't specified use default
|
||||
{
|
||||
f_activation = sigmoid;
|
||||
g_activation = tanh;
|
||||
h_activation = tanh;
|
||||
} else {
|
||||
CV_Assert(activations.size() == 3);
|
||||
f_activation = get_activation_function(activations.getStringValue(0));
|
||||
g_activation = get_activation_function(activations.getStringValue(1));
|
||||
h_activation = get_activation_function(activations.getStringValue(2));
|
||||
}
|
||||
|
||||
allocated = false;
|
||||
outTailShape.clear();
|
||||
}
|
||||
|
||||
void setUseTimstampsDim(bool use) CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(!allocated);
|
||||
useTimestampDim = use;
|
||||
}
|
||||
|
||||
void setProduceCellOutput(bool produce) CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(!allocated);
|
||||
produceCellOutput = produce;
|
||||
}
|
||||
|
||||
void setOutShape(const MatShape &outTailShape_) CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(!allocated || total(outTailShape) == total(outTailShape_));
|
||||
outTailShape = outTailShape_;
|
||||
}
|
||||
|
||||
void setWeights(const Mat &Wh, const Mat &Wx, const Mat &bias) CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(Wh.dims == 2 && Wx.dims == 2);
|
||||
CV_Assert(Wh.rows == Wx.rows);
|
||||
CV_Assert(Wh.rows == 4*Wh.cols);
|
||||
CV_Assert(Wh.rows == (int)bias.total());
|
||||
CV_Assert(Wh.type() == Wx.type() && Wx.type() == bias.type());
|
||||
|
||||
blobs.resize(3);
|
||||
blobs[0] = Mat(Wh.clone());
|
||||
blobs[1] = Mat(Wx.clone());
|
||||
blobs[2] = Mat(bias.clone()).reshape(1, 1);
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert((!usePeephole && blobs.size() == 5) || (usePeephole && blobs.size() == 8));
|
||||
CV_Assert(inputs.size() == 1);
|
||||
const MatShape& inp0 = inputs[0];
|
||||
|
||||
const Mat &Wh = blobs[0], &Wx = blobs[1];
|
||||
int _numOut = Wh.size[1];
|
||||
int _numInp = Wx.size[1];
|
||||
MatShape outTailShape_(outTailShape), outResShape;
|
||||
|
||||
if (!outTailShape_.empty())
|
||||
CV_Assert(total(outTailShape_) == _numOut);
|
||||
else
|
||||
outTailShape_.assign(1, _numOut);
|
||||
|
||||
int _numSamples;
|
||||
if (useTimestampDim)
|
||||
{
|
||||
CV_Assert(inp0.size() >= 2 && total(inp0, 2) == _numInp);
|
||||
_numSamples = inp0[1];
|
||||
outResShape.push_back(inp0[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(inp0.size() >= 2 && total(inp0, 1) == _numInp);
|
||||
_numSamples = inp0[0];
|
||||
}
|
||||
|
||||
outResShape.push_back(_numSamples);
|
||||
outResShape.insert(outResShape.end(), outTailShape_.begin(), outTailShape_.end());
|
||||
outResShape.back() *= (1 + static_cast<int>(bidirectional));
|
||||
|
||||
size_t noutputs = produceCellOutput ? 2 : 1;
|
||||
outputs.assign(noutputs, outResShape);
|
||||
|
||||
internals.assign(1, shape(_numSamples, _numOut)); // hInternal
|
||||
internals.push_back(shape(_numSamples, _numOut)); // cInternal
|
||||
internals.push_back(shape(_numSamples, 1)); // dummyOnes
|
||||
internals.push_back(shape(_numSamples, 4*_numOut)); // gates
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
|
||||
{
|
||||
std::vector<Mat> input;
|
||||
inputs_arr.getMatVector(input);
|
||||
|
||||
CV_Assert((!usePeephole && blobs.size() == 5) || (usePeephole && blobs.size() == 8));
|
||||
CV_Assert(input.size() == 1);
|
||||
const Mat& inp0 = input[0];
|
||||
|
||||
Mat &Wh = blobs[0], &Wx = blobs[1];
|
||||
int numOut = Wh.size[1];
|
||||
int numInp = Wx.size[1];
|
||||
|
||||
if (!outTailShape.empty())
|
||||
CV_Assert(total(outTailShape) == numOut);
|
||||
else
|
||||
outTailShape.assign(1, numOut);
|
||||
|
||||
if (useTimestampDim)
|
||||
{
|
||||
CV_Assert(inp0.dims >= 2 && (int)inp0.total(2) == numInp);
|
||||
numTimeStamps = inp0.size[0];
|
||||
numSamples = inp0.size[1];
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(inp0.dims >= 2 && (int)inp0.total(1) == numInp);
|
||||
numTimeStamps = 1;
|
||||
numSamples = inp0.size[0];
|
||||
}
|
||||
|
||||
outTsShape.clear();
|
||||
outTsShape.push_back(numSamples);
|
||||
outTsShape.insert(outTsShape.end(), outTailShape.begin(), outTailShape.end());
|
||||
outTsShape.back() *= (1 + static_cast<int>(bidirectional));
|
||||
|
||||
allocated = true;
|
||||
}
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> input, output, internals;
|
||||
inputs_arr.getMatVector(input);
|
||||
outputs_arr.getMatVector(output);
|
||||
internals_arr.getMatVector(internals);
|
||||
|
||||
const int numDirs = 1 + static_cast<int>(bidirectional);
|
||||
for (int i = 0; i < numDirs; ++i)
|
||||
{
|
||||
const Mat &Wh = blobs[0].rowRange(i * blobs[0].rows / numDirs, (i + 1) * blobs[0].rows / numDirs);
|
||||
const Mat &Wx = blobs[1].rowRange(i * blobs[1].rows / numDirs, (i + 1) * blobs[1].rows / numDirs);
|
||||
const Mat &bias = blobs[2].colRange(i * blobs[2].cols / numDirs, (i + 1) * blobs[2].cols / numDirs);
|
||||
const Mat &h_0 = blobs[3].rowRange(i * blobs[3].rows / numDirs, (i + 1) * blobs[3].rows / numDirs);
|
||||
const Mat &c_0 = blobs[4].rowRange(i * blobs[4].rows / numDirs, (i + 1) * blobs[4].rows / numDirs);
|
||||
|
||||
int numOut = Wh.size[1];
|
||||
Mat hInternal = internals[0], cInternal = internals[1],
|
||||
dummyOnes = internals[2], gates = internals[3];
|
||||
h_0.copyTo(hInternal);
|
||||
c_0.copyTo(cInternal);
|
||||
dummyOnes.setTo(1.);
|
||||
|
||||
int numSamplesTotal = numTimeStamps*numSamples;
|
||||
Mat xTs = input[0].reshape(1, numSamplesTotal);
|
||||
|
||||
Mat hOutTs = output[0].reshape(1, numSamplesTotal);
|
||||
hOutTs = hOutTs.colRange(i * hOutTs.cols / numDirs, (i + 1) * hOutTs.cols / numDirs);
|
||||
Mat cOutTs = produceCellOutput ? output[1].reshape(1, numSamplesTotal) : Mat();
|
||||
|
||||
int tsStart, tsEnd, tsInc;
|
||||
if (reverse || i == 1) {
|
||||
tsStart = numTimeStamps - 1;
|
||||
tsEnd = -1;
|
||||
tsInc = -1;
|
||||
}
|
||||
else {
|
||||
tsStart = 0;
|
||||
tsEnd = numTimeStamps;
|
||||
tsInc = 1;
|
||||
}
|
||||
for (int ts = tsStart; ts != tsEnd; ts += tsInc)
|
||||
{
|
||||
Range curRowRange(ts*numSamples, (ts + 1)*numSamples);
|
||||
Mat xCurr = xTs.rowRange(curRowRange);
|
||||
|
||||
gemm(xCurr, Wx, 1, gates, 0, gates, GEMM_2_T); // Wx * x_t
|
||||
gemm(hInternal, Wh, 1, gates, 1, gates, GEMM_2_T); //+Wh * h_{t-1}
|
||||
gemm(dummyOnes, bias, 1, gates, 1, gates); //+b
|
||||
|
||||
Mat gateI = gates.colRange(0*numOut, 1*numOut);
|
||||
Mat gateF = gates.colRange(1*numOut, 2*numOut);
|
||||
Mat gateO = gates.colRange(2*numOut, 3*numOut);
|
||||
Mat gateG = gates.colRange(3*numOut, 4*numOut);
|
||||
|
||||
if (forgetBias)
|
||||
add(gateF, forgetBias, gateF);
|
||||
|
||||
if (usePeephole)
|
||||
{
|
||||
Mat gatesIF = gates.colRange(0, 2*numOut);
|
||||
gemm(cInternal, blobs[5], 1, gateI, 1, gateI);
|
||||
gemm(cInternal, blobs[6], 1, gateF, 1, gateF);
|
||||
f_activation(gatesIF, gatesIF);
|
||||
}
|
||||
else
|
||||
{
|
||||
Mat gatesIFO = gates.colRange(0, 3*numOut);
|
||||
f_activation(gatesIFO, gatesIFO);
|
||||
}
|
||||
|
||||
g_activation(gateG, gateG);
|
||||
|
||||
//compute c_t
|
||||
multiply(gateF, cInternal, gateF); // f_t (*) c_{t-1}
|
||||
multiply(gateI, gateG, gateI); // i_t (*) g_t
|
||||
add(gateF, gateI, cInternal); // c_t = f_t (*) c_{t-1} + i_t (*) g_t
|
||||
|
||||
if (useCellClip)
|
||||
{
|
||||
min(cInternal, cellClip, cInternal);
|
||||
max(cInternal, -cellClip, cInternal);
|
||||
}
|
||||
if (usePeephole)
|
||||
{
|
||||
gemm(cInternal, blobs[7], 1, gateO, 1, gateO);
|
||||
f_activation(gateO, gateO);
|
||||
}
|
||||
|
||||
//compute h_t
|
||||
h_activation(cInternal, hInternal);
|
||||
multiply(gateO, hInternal, hInternal);
|
||||
|
||||
//save results in output blobs
|
||||
hInternal.copyTo(hOutTs.rowRange(curRowRange));
|
||||
if (produceCellOutput)
|
||||
cInternal.copyTo(cOutTs.rowRange(curRowRange));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ptr<LSTMLayer> LSTMLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<LSTMLayer>(new LSTMLayerImpl(params));
|
||||
}
|
||||
|
||||
int LSTMLayer::inputNameToIndex(String inputName)
|
||||
{
|
||||
if (toLowerCase(inputName) == "x")
|
||||
return 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
int LSTMLayer::outputNameToIndex(const String& outputName)
|
||||
{
|
||||
if (toLowerCase(outputName) == "h")
|
||||
return 0;
|
||||
else if (toLowerCase(outputName) == "c")
|
||||
return 1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
class RNNLayerImpl : public RNNLayer
|
||||
{
|
||||
int numX, numH, numO;
|
||||
int numSamples, numTimestamps, numSamplesTotal;
|
||||
int dtype;
|
||||
Mat Whh, Wxh, bh;
|
||||
Mat Who, bo;
|
||||
bool produceH;
|
||||
|
||||
public:
|
||||
|
||||
RNNLayerImpl(const LayerParams& params)
|
||||
: numX(0), numH(0), numO(0), numSamples(0), numTimestamps(0), numSamplesTotal(0), dtype(0)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
type = "RNN";
|
||||
produceH = false;
|
||||
}
|
||||
|
||||
void setProduceHiddenOutput(bool produce = false) CV_OVERRIDE
|
||||
{
|
||||
produceH = produce;
|
||||
}
|
||||
|
||||
void setWeights(const Mat &W_xh, const Mat &b_h, const Mat &W_hh, const Mat &W_ho, const Mat &b_o) CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(W_hh.dims == 2 && W_xh.dims == 2);
|
||||
CV_Assert(W_hh.size[0] == W_xh.size[0] && W_hh.size[0] == W_hh.size[1] && (int)b_h.total() == W_xh.size[0]);
|
||||
CV_Assert(W_ho.size[0] == (int)b_o.total());
|
||||
CV_Assert(W_ho.size[1] == W_hh.size[1]);
|
||||
|
||||
blobs.resize(5);
|
||||
blobs[0] = Mat(W_xh.clone());
|
||||
blobs[1] = Mat(b_h.clone());
|
||||
blobs[2] = Mat(W_hh.clone());
|
||||
blobs[3] = Mat(W_ho.clone());
|
||||
blobs[4] = Mat(b_o.clone());
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(inputs.size() >= 1 && inputs.size() <= 2);
|
||||
|
||||
Mat Who_ = blobs[3];
|
||||
Mat Wxh_ = blobs[0];
|
||||
|
||||
int numTimestamps_ = inputs[0][0];
|
||||
int numSamples_ = inputs[0][1];
|
||||
|
||||
int numO_ = Who_.rows;
|
||||
int numH_ = Wxh_.rows;
|
||||
|
||||
outputs.clear();
|
||||
int dims[] = {numTimestamps_, numSamples_, numO_};
|
||||
outputs.push_back(shape(dims, 3));
|
||||
dims[2] = numH_;
|
||||
if (produceH)
|
||||
outputs.push_back(shape(dims, 3));
|
||||
|
||||
internals.assign(2, shape(numSamples_, numH_));
|
||||
internals.push_back(shape(numSamples_, 1));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
|
||||
{
|
||||
std::vector<Mat> input, outputs;
|
||||
inputs_arr.getMatVector(input);
|
||||
|
||||
CV_Assert(input.size() >= 1 && input.size() <= 2);
|
||||
|
||||
Wxh = blobs[0];
|
||||
bh = blobs[1];
|
||||
Whh = blobs[2];
|
||||
Who = blobs[3];
|
||||
bo = blobs[4];
|
||||
|
||||
numH = Wxh.rows;
|
||||
numX = Wxh.cols;
|
||||
numO = Who.rows;
|
||||
|
||||
const Mat& inp0 = input[0];
|
||||
|
||||
CV_Assert(inp0.dims >= 2);
|
||||
CV_Assert(inp0.total(2) == numX);
|
||||
dtype = CV_32F;
|
||||
CV_Assert(inp0.type() == dtype);
|
||||
numTimestamps = inp0.size[0];
|
||||
numSamples = inp0.size[1];
|
||||
numSamplesTotal = numTimestamps * numSamples;
|
||||
|
||||
bh = bh.reshape(1, 1); //is 1 x numH Mat
|
||||
bo = bo.reshape(1, 1); //is 1 x numO Mat
|
||||
}
|
||||
|
||||
void reshapeOutput(std::vector<Mat> &output)
|
||||
{
|
||||
output.resize(produceH ? 2 : 1);
|
||||
int sz0[] = { numTimestamps, numSamples, numO };
|
||||
output[0].create(3, sz0, dtype);
|
||||
if (produceH)
|
||||
{
|
||||
int sz1[] = { numTimestamps, numSamples, numH };
|
||||
output[1].create(3, sz1, dtype);
|
||||
}
|
||||
}
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> input, output, internals;
|
||||
inputs_arr.getMatVector(input);
|
||||
outputs_arr.getMatVector(output);
|
||||
internals_arr.getMatVector(internals);
|
||||
|
||||
Mat xTs = input[0].reshape(1, numSamplesTotal);
|
||||
Mat oTs = output[0].reshape(1, numSamplesTotal);
|
||||
Mat hTs = produceH ? output[1].reshape(1, numSamplesTotal) : Mat();
|
||||
Mat hCurr = internals[0];
|
||||
Mat hPrev = internals[1];
|
||||
Mat dummyBiasOnes = internals[2];
|
||||
|
||||
hPrev.setTo(0.);
|
||||
dummyBiasOnes.setTo(1.);
|
||||
|
||||
for (int ts = 0; ts < numTimestamps; ts++)
|
||||
{
|
||||
Range curRowRange = Range(ts * numSamples, (ts + 1) * numSamples);
|
||||
Mat xCurr = xTs.rowRange(curRowRange);
|
||||
|
||||
gemm(hPrev, Whh, 1, hCurr, 0, hCurr, GEMM_2_T); // W_{hh} * h_{prev}
|
||||
gemm(xCurr, Wxh, 1, hCurr, 1, hCurr, GEMM_2_T); //+W_{xh} * x_{curr}
|
||||
gemm(dummyBiasOnes, bh, 1, hCurr, 1, hCurr); //+bh
|
||||
tanh(hCurr, hPrev);
|
||||
|
||||
Mat oCurr = oTs.rowRange(curRowRange);
|
||||
gemm(hPrev, Who, 1, oCurr, 0, oCurr, GEMM_2_T); // W_{ho} * h_{prev}
|
||||
gemm(dummyBiasOnes, bo, 1, oCurr, 1, oCurr); //+b_o
|
||||
tanh(oCurr, oCurr);
|
||||
|
||||
if (produceH)
|
||||
hPrev.copyTo(hTs.rowRange(curRowRange));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
CV_EXPORTS_W Ptr<RNNLayer> RNNLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<RNNLayer>(new RNNLayerImpl(params));
|
||||
}
|
||||
|
||||
class GRULayerImpl CV_FINAL : public GRULayer
|
||||
{
|
||||
int numTimeStamps, numSamples;
|
||||
bool allocated;
|
||||
|
||||
MatShape outTailShape; //shape of single output sample
|
||||
MatShape outTsShape; //shape of N output samples
|
||||
bool bidirectional; // If true, produces both forward and reversed directions along time axis
|
||||
|
||||
public:
|
||||
|
||||
GRULayerImpl(const LayerParams& params) : numTimeStamps(0), numSamples(0)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
|
||||
bidirectional = params.get<bool>("bidirectional", false);
|
||||
if (!blobs.empty())
|
||||
{
|
||||
CV_Assert(blobs.size() >= 3);
|
||||
|
||||
blobs[2] = blobs[2].reshape(1, 1);
|
||||
|
||||
const Mat& Wh = blobs[0];
|
||||
const Mat& Wx = blobs[1];
|
||||
const Mat& bias = blobs[2];
|
||||
const Mat& hInternal = blobs[3];
|
||||
CV_CheckEQ(Wh.dims, 2, "");
|
||||
CV_CheckEQ(Wx.dims, 2, "");
|
||||
CV_CheckEQ(Wh.rows, Wx.rows, "");
|
||||
CV_CheckEQ(Wh.rows, (1 + static_cast<int>(bidirectional)) * 3 * Wh.cols, "");
|
||||
CV_CheckEQ(Wh.rows * 2, (int)bias.total(), "");
|
||||
CV_CheckEQ(hInternal.cols, Wh.cols, "");
|
||||
CV_CheckTypeEQ(Wh.type(), Wx.type(), "");
|
||||
CV_CheckTypeEQ(Wx.type(), bias.type(), "");
|
||||
}
|
||||
|
||||
allocated = false;
|
||||
outTailShape.clear();
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(inputs.size() == 1);
|
||||
const MatShape& inp0 = inputs[0];
|
||||
|
||||
const Mat &Wh = blobs[0], &Wx = blobs[1];
|
||||
int _numOut = Wh.size[1];
|
||||
int _numInp = Wx.size[1];
|
||||
MatShape outTailShape_(outTailShape), outResShape;
|
||||
|
||||
if (!outTailShape_.empty())
|
||||
CV_Assert(total(outTailShape_) == _numOut);
|
||||
else
|
||||
outTailShape_.assign(1, _numOut);
|
||||
|
||||
int _numSamples;
|
||||
CV_Assert(inp0.size() >= 2 && total(inp0, 2) == _numInp);
|
||||
_numSamples = inp0[1];
|
||||
outResShape.push_back(inp0[0]);
|
||||
|
||||
outResShape.push_back(_numSamples);
|
||||
outResShape.insert(outResShape.end(), outTailShape_.begin(), outTailShape_.end());
|
||||
outResShape.back() *= (1 + static_cast<int>(bidirectional));
|
||||
|
||||
outputs.assign(1, outResShape);
|
||||
|
||||
internals.assign(1, shape(_numSamples, _numOut)); // hInternal
|
||||
internals.push_back(shape(_numSamples, 1)); // dummyOnes
|
||||
internals.push_back(shape(_numSamples, 2 * _numOut)); // gates
|
||||
internals.push_back(shape(_numSamples, 2 * _numOut)); // gates_b
|
||||
internals.push_back(shape(_numSamples, 1 * _numOut)); // h_linear
|
||||
internals.push_back(shape(_numSamples, _numOut)); // ones
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
|
||||
{
|
||||
std::vector<Mat> input;
|
||||
inputs_arr.getMatVector(input);
|
||||
|
||||
CV_Assert(input.size() == 1);
|
||||
const Mat& inp0 = input[0];
|
||||
|
||||
Mat &Wh = blobs[0], &Wx = blobs[1];
|
||||
int numOut = Wh.size[1];
|
||||
int numInp = Wx.size[1];
|
||||
|
||||
if (!outTailShape.empty())
|
||||
CV_Assert(total(outTailShape) == numOut);
|
||||
else
|
||||
outTailShape.assign(1, numOut);
|
||||
|
||||
CV_Assert(inp0.dims >= 2 && (int)inp0.total(2) == numInp);
|
||||
numTimeStamps = inp0.size[0];
|
||||
numSamples = inp0.size[1];
|
||||
|
||||
outTsShape.clear();
|
||||
outTsShape.push_back(numSamples);
|
||||
outTsShape.insert(outTsShape.end(), outTailShape.begin(), outTailShape.end());
|
||||
outTsShape.back() *= (1 + static_cast<int>(bidirectional));
|
||||
|
||||
allocated = true;
|
||||
}
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> input, output, internals;
|
||||
inputs_arr.getMatVector(input);
|
||||
outputs_arr.getMatVector(output);
|
||||
internals_arr.getMatVector(internals);
|
||||
|
||||
const int numDirs = 1 + static_cast<int>(bidirectional);
|
||||
for (int i = 0; i < numDirs; ++i)
|
||||
{
|
||||
const Mat &Wh = blobs[0].rowRange(i * blobs[0].rows / numDirs, (i + 1) * blobs[0].rows / numDirs);
|
||||
const Mat &Wx = blobs[1].rowRange(i * blobs[1].rows / numDirs, (i + 1) * blobs[1].rows / numDirs);
|
||||
const Mat &bias = blobs[2].colRange(i * blobs[2].cols / numDirs, (i + 1) * blobs[2].cols / numDirs);
|
||||
const Mat &h_0 = blobs[3].rowRange(i * blobs[3].rows / numDirs, (i + 1) * blobs[3].rows / numDirs);
|
||||
|
||||
const Mat &bx = bias.colRange(0, bias.cols / 2);
|
||||
const Mat &bh = bias.colRange(bias.cols / 2, bias.cols);
|
||||
|
||||
Mat hInternal = internals[0], dummyOnes = internals[1], gates = internals[2],
|
||||
b_rz = internals[3], n_t = internals[4], ones = internals[5];
|
||||
h_0.copyTo(hInternal);
|
||||
dummyOnes.setTo(1.);
|
||||
ones.setTo(1.);
|
||||
|
||||
int numOut = Wh.size[1];
|
||||
const Mat& wx_rz = Wx.rowRange(0, 2 * numOut);
|
||||
const Mat& wh_rz = Wh.rowRange(0, 2 * numOut);
|
||||
b_rz = bx.colRange(0, 2 * numOut) + bh.colRange(0, 2 * numOut);
|
||||
const Mat& wx_n = Wx.rowRange(2 * numOut, 3 * numOut);
|
||||
const Mat& wh_n = Wh.rowRange(2 * numOut, 3 * numOut);
|
||||
const Mat& b_in = bx.colRange(2 * numOut, 3 * numOut);
|
||||
const Mat& b_hn = bh.colRange(2 * numOut, 3 * numOut);
|
||||
|
||||
int numSamplesTotal = numTimeStamps * numSamples;
|
||||
Mat xTs = input[0].reshape(1, numSamplesTotal);
|
||||
|
||||
Mat hOutTs = output[0].reshape(1, numSamplesTotal);
|
||||
hOutTs = hOutTs.colRange(i * hOutTs.cols / numDirs, (i + 1) * hOutTs.cols / numDirs);
|
||||
Mat cOutTs = Mat();
|
||||
|
||||
int tsStart, tsEnd, tsInc;
|
||||
if (i == 1) {
|
||||
tsStart = numTimeStamps - 1;
|
||||
tsEnd = -1;
|
||||
tsInc = -1;
|
||||
}
|
||||
else {
|
||||
tsStart = 0;
|
||||
tsEnd = numTimeStamps;
|
||||
tsInc = 1;
|
||||
}
|
||||
for (int ts = tsStart; ts != tsEnd; ts += tsInc)
|
||||
{
|
||||
Range curRowRange(ts * numSamples, (ts + 1) * numSamples);
|
||||
Mat xCurr = xTs.rowRange(curRowRange);
|
||||
|
||||
// calculate r_t = sigmoid(x * Wx_r + h_(t-1) * Wh_r + b_r)
|
||||
// calculate z_t = sigmoid(x * Wx_z + h_(t-1) * Wh_z + b_z)
|
||||
gemm(xCurr, wx_rz, 1, gates, 0, gates, GEMM_2_T); // x * Wx_rz
|
||||
gemm(hInternal, wh_rz, 1, gates, 1, gates, GEMM_2_T); // + h_(t-1) * Wh_rz
|
||||
gemm(dummyOnes, b_rz, 1, gates, 1, gates); // + b_rz
|
||||
sigmoid(gates, gates); // sigmoid()
|
||||
|
||||
Mat z = gates.colRange(0, gates.cols / 2);
|
||||
Mat r = gates.colRange(gates.cols / 2, gates.cols);
|
||||
|
||||
// calculate n_t = tanh(r (*) (h_(t-1) * Wh_n + b_hn) + x * Wx_n + b_in)
|
||||
gemm(hInternal, wh_n, 1, n_t, 0, n_t, GEMM_2_T); // h_(t-1) * Wh_n
|
||||
gemm(dummyOnes, b_hn, 1, n_t, 1, n_t); // + b_hn
|
||||
multiply(r, n_t, n_t); // r (*) (h_(t-1) * Wh_n + b_hn)
|
||||
|
||||
gemm(xCurr, wx_n, 1, n_t, 1, n_t, GEMM_2_T); // + x * Wx_n
|
||||
gemm(dummyOnes, b_in, 1, n_t, 1, n_t); // + b_in
|
||||
tanh(n_t, n_t); // tanh()
|
||||
|
||||
//compute next h_t = z (*) h_(t-1) + (1 - z) (*) n_t
|
||||
multiply(z, hInternal, hInternal); // z (*) h_{t-1}
|
||||
subtract(ones, z, z); // 1 - z
|
||||
multiply(z, n_t, z); // (1 - z) * n
|
||||
add(z, hInternal, hInternal); // z (*) h_(t-1) + (1 - z) (*) n_t
|
||||
|
||||
//save results in output blobs
|
||||
hInternal.copyTo(hOutTs.rowRange(curRowRange));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ptr<GRULayer> GRULayer::create(const LayerParams ¶ms) {
|
||||
return Ptr<GRULayer>(new GRULayerImpl(params));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
660
3rdparty/opencv-4.5.4/modules/dnn/src/layers/region_layer.cpp
vendored
Normal file
660
3rdparty/opencv-4.5.4/modules/dnn/src/layers/region_layer.cpp
vendored
Normal file
@ -0,0 +1,660 @@
|
||||
/*M ///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
#include <opencv2/dnn/all_layers.hpp>
|
||||
#include "../nms.inl.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
#include "../ie_ngraph.hpp"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/region.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
class RegionLayerImpl CV_FINAL : public RegionLayer
|
||||
{
|
||||
public:
|
||||
int coords, classes, anchors, classfix;
|
||||
float thresh, scale_x_y;
|
||||
int new_coords;
|
||||
bool useSoftmax, useLogistic;
|
||||
#ifdef HAVE_OPENCL
|
||||
UMat blob_umat;
|
||||
#endif
|
||||
|
||||
RegionLayerImpl(const LayerParams& params)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
CV_Assert(blobs.size() == 1);
|
||||
|
||||
thresh = params.get<float>("thresh", 0.2);
|
||||
coords = params.get<int>("coords", 4);
|
||||
classes = params.get<int>("classes", 0);
|
||||
anchors = params.get<int>("anchors", 5);
|
||||
classfix = params.get<int>("classfix", 0);
|
||||
useSoftmax = params.get<bool>("softmax", false);
|
||||
useLogistic = params.get<bool>("logistic", false);
|
||||
nmsThreshold = params.get<float>("nms_threshold", 0.4);
|
||||
scale_x_y = params.get<float>("scale_x_y", 1.0); // Yolov4
|
||||
new_coords = params.get<int>("new_coords", 0); // Yolov4x-mish
|
||||
|
||||
CV_Assert(nmsThreshold >= 0.);
|
||||
CV_Assert(coords == 4);
|
||||
CV_Assert(classes >= 1);
|
||||
CV_Assert(anchors >= 1);
|
||||
CV_Assert(useLogistic || useSoftmax);
|
||||
if (params.get<bool>("softmax_tree", false))
|
||||
CV_Error(cv::Error::StsNotImplemented, "Yolo9000 is not implemented");
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(inputs.size() > 0);
|
||||
// channels == cell_size*anchors
|
||||
CV_Assert(inputs[0][3] == (1 + coords + classes)*anchors);
|
||||
int batch_size = inputs[0][0];
|
||||
if(batch_size > 1)
|
||||
outputs = std::vector<MatShape>(1, shape(batch_size, inputs[0][1] * inputs[0][2] * anchors, inputs[0][3] / anchors));
|
||||
else
|
||||
outputs = std::vector<MatShape>(1, shape(inputs[0][1] * inputs[0][2] * anchors, inputs[0][3] / anchors));
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
return INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2020_2) && preferableTarget != DNN_TARGET_MYRIAD && new_coords == 0;
|
||||
#endif
|
||||
#ifdef HAVE_CUDA
|
||||
if (backendId == DNN_BACKEND_CUDA)
|
||||
return true;
|
||||
#endif
|
||||
return backendId == DNN_BACKEND_OPENCV;
|
||||
}
|
||||
|
||||
float logistic_activate(float x) { return 1.F / (1.F + exp(-x)); }
|
||||
|
||||
void softmax_activate(const float* input, const int n, const float temp, float* output)
|
||||
{
|
||||
int i;
|
||||
float sum = 0;
|
||||
float largest = -FLT_MAX;
|
||||
for (i = 0; i < n; ++i) {
|
||||
if (input[i] > largest) largest = input[i];
|
||||
}
|
||||
for (i = 0; i < n; ++i) {
|
||||
float e = exp((input[i] - largest) / temp);
|
||||
sum += e;
|
||||
output[i] = e;
|
||||
}
|
||||
for (i = 0; i < n; ++i) {
|
||||
output[i] /= sum;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||
{
|
||||
if (blob_umat.empty())
|
||||
blobs[0].copyTo(blob_umat);
|
||||
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
// TODO: implement a logistic activation to classification scores.
|
||||
if (useLogistic || inps.depth() == CV_16S)
|
||||
return false;
|
||||
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
CV_Assert(inputs.size() >= 1);
|
||||
int const cell_size = classes + coords + 1;
|
||||
|
||||
for (size_t ii = 0; ii < outputs.size(); ii++)
|
||||
{
|
||||
UMat& inpBlob = inputs[ii];
|
||||
UMat& outBlob = outputs[ii];
|
||||
|
||||
int batch_size = inpBlob.size[0];
|
||||
int rows = inpBlob.size[1];
|
||||
int cols = inpBlob.size[2];
|
||||
|
||||
// channels == cell_size*anchors, see l. 94
|
||||
int sample_size = cell_size*rows*cols*anchors;
|
||||
|
||||
ocl::Kernel logistic_kernel("logistic_activ", ocl::dnn::region_oclsrc);
|
||||
size_t nanchors = rows*cols*anchors*batch_size;
|
||||
logistic_kernel.set(0, (int)nanchors);
|
||||
logistic_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob));
|
||||
logistic_kernel.set(2, (int)cell_size);
|
||||
logistic_kernel.set(3, ocl::KernelArg::PtrWriteOnly(outBlob));
|
||||
logistic_kernel.run(1, &nanchors, NULL, false);
|
||||
|
||||
if (useSoftmax)
|
||||
{
|
||||
// Yolo v2
|
||||
// softmax activation for Probability, for each grid cell (X x Y x Anchor-index)
|
||||
ocl::Kernel softmax_kernel("softmax_activ", ocl::dnn::region_oclsrc);
|
||||
size_t nanchors = rows*cols*anchors*batch_size;
|
||||
softmax_kernel.set(0, (int)nanchors);
|
||||
softmax_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob));
|
||||
softmax_kernel.set(2, ocl::KernelArg::PtrReadOnly(blob_umat));
|
||||
softmax_kernel.set(3, (int)cell_size);
|
||||
softmax_kernel.set(4, (int)classes);
|
||||
softmax_kernel.set(5, (int)classfix);
|
||||
softmax_kernel.set(6, (int)rows);
|
||||
softmax_kernel.set(7, (int)cols);
|
||||
softmax_kernel.set(8, (int)anchors);
|
||||
softmax_kernel.set(9, (float)thresh);
|
||||
softmax_kernel.set(10, ocl::KernelArg::PtrWriteOnly(outBlob));
|
||||
if (!softmax_kernel.run(1, &nanchors, NULL, false))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (nmsThreshold > 0) {
|
||||
Mat mat = outBlob.getMat(ACCESS_WRITE);
|
||||
float *dstData = mat.ptr<float>();
|
||||
for (int b = 0; b < batch_size; ++b)
|
||||
do_nms_sort(dstData + b*sample_size, rows*cols*anchors, thresh, nmsThreshold);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs, internals;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
internals_arr.getMatVector(internals);
|
||||
|
||||
CV_Assert(inputs.size() >= 1);
|
||||
CV_Assert(outputs.size() == 1);
|
||||
int const cell_size = classes + coords + 1;
|
||||
|
||||
const float* biasData = blobs[0].ptr<float>();
|
||||
|
||||
for (size_t ii = 0; ii < outputs.size(); ii++)
|
||||
{
|
||||
Mat &inpBlob = inputs[ii];
|
||||
Mat &outBlob = outputs[ii];
|
||||
|
||||
int batch_size = inpBlob.size[0];
|
||||
int rows = inpBlob.size[1];
|
||||
int cols = inpBlob.size[2];
|
||||
|
||||
// address length for one image in batch, both for input and output
|
||||
int sample_size = cell_size*rows*cols*anchors;
|
||||
|
||||
// assert that the comment above is true
|
||||
CV_Assert(sample_size*batch_size == inpBlob.total());
|
||||
CV_Assert(sample_size*batch_size == outBlob.total());
|
||||
|
||||
CV_Assert(inputs.size() < 2 || inputs[1].dims == 4);
|
||||
int hNorm = inputs.size() > 1 ? inputs[1].size[2] : rows;
|
||||
int wNorm = inputs.size() > 1 ? inputs[1].size[3] : cols;
|
||||
|
||||
const float *srcData = inpBlob.ptr<float>();
|
||||
float *dstData = outBlob.ptr<float>();
|
||||
|
||||
if (new_coords == 0) {
|
||||
// logistic activation for t0, for each grid cell (X x Y x Anchor-index)
|
||||
for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {
|
||||
int index = cell_size*i;
|
||||
float x = srcData[index + 4];
|
||||
dstData[index + 4] = logistic_activate(x); // logistic activation
|
||||
}
|
||||
|
||||
if (useSoftmax) { // Yolo v2
|
||||
for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {
|
||||
int index = cell_size*i;
|
||||
softmax_activate(srcData + index + 5, classes, 1, dstData + index + 5);
|
||||
}
|
||||
}
|
||||
else if (useLogistic) { // Yolo v3
|
||||
for (int i = 0; i < batch_size*rows*cols*anchors; ++i){
|
||||
int index = cell_size*i;
|
||||
const float* input = srcData + index + 5;
|
||||
float* output = dstData + index + 5;
|
||||
for (int c = 0; c < classes; ++c)
|
||||
output[c] = logistic_activate(input[c]);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int b = 0; b < batch_size; ++b)
|
||||
for (int x = 0; x < cols; ++x)
|
||||
for(int y = 0; y < rows; ++y)
|
||||
for (int a = 0; a < anchors; ++a) {
|
||||
// relative start address for image b within the batch data
|
||||
int index_sample_offset = sample_size*b;
|
||||
int index = (y*cols + x)*anchors + a; // index for each grid-cell & anchor
|
||||
int p_index = index_sample_offset + index * cell_size + 4;
|
||||
float scale = dstData[p_index];
|
||||
if (classfix == -1 && scale < .5)
|
||||
{
|
||||
scale = 0; // if(t0 < 0.5) t0 = 0;
|
||||
}
|
||||
int box_index = index_sample_offset + index * cell_size;
|
||||
|
||||
if (new_coords == 1) {
|
||||
float x_tmp = (srcData[box_index + 0] - 0.5f) * scale_x_y + 0.5f;
|
||||
float y_tmp = (srcData[box_index + 1] - 0.5f) * scale_x_y + 0.5f;
|
||||
dstData[box_index + 0] = (x + x_tmp) / cols;
|
||||
dstData[box_index + 1] = (y + y_tmp) / rows;
|
||||
dstData[box_index + 2] = (srcData[box_index + 2]) * (srcData[box_index + 2]) * 4 * biasData[2 * a] / wNorm;
|
||||
dstData[box_index + 3] = (srcData[box_index + 3]) * (srcData[box_index + 3]) * 4 * biasData[2 * a + 1] / hNorm;
|
||||
|
||||
scale = srcData[p_index];
|
||||
if (classfix == -1 && scale < thresh)
|
||||
{
|
||||
scale = 0; // if(t0 < 0.5) t0 = 0;
|
||||
}
|
||||
|
||||
int class_index = index_sample_offset + index * cell_size + 5;
|
||||
for (int j = 0; j < classes; ++j) {
|
||||
float prob = scale*srcData[class_index + j]; // prob = IoU(box, object) = t0 * class-probability
|
||||
dstData[class_index + j] = (prob > thresh) ? prob : 0; // if (IoU < threshold) IoU = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
float x_tmp = (logistic_activate(srcData[box_index + 0]) - 0.5f) * scale_x_y + 0.5f;
|
||||
float y_tmp = (logistic_activate(srcData[box_index + 1]) - 0.5f) * scale_x_y + 0.5f;
|
||||
dstData[box_index + 0] = (x + x_tmp) / cols;
|
||||
dstData[box_index + 1] = (y + y_tmp) / rows;
|
||||
dstData[box_index + 2] = exp(srcData[box_index + 2]) * biasData[2 * a] / wNorm;
|
||||
dstData[box_index + 3] = exp(srcData[box_index + 3]) * biasData[2 * a + 1] / hNorm;
|
||||
|
||||
int class_index = index_sample_offset + index * cell_size + 5;
|
||||
for (int j = 0; j < classes; ++j) {
|
||||
float prob = scale*dstData[class_index + j]; // prob = IoU(box, object) = t0 * class-probability
|
||||
dstData[class_index + j] = (prob > thresh) ? prob : 0; // if (IoU < threshold) IoU = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (nmsThreshold > 0) {
|
||||
for (int b = 0; b < batch_size; ++b){
|
||||
do_nms_sort(dstData+b*sample_size, rows*cols*anchors, thresh, nmsThreshold);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void do_nms_sort(float *detections, int total, float score_thresh, float nms_thresh)
|
||||
{
|
||||
std::vector<Rect2d> boxes(total);
|
||||
std::vector<float> scores(total);
|
||||
|
||||
for (int i = 0; i < total; ++i)
|
||||
{
|
||||
Rect2d &b = boxes[i];
|
||||
int box_index = i * (classes + coords + 1);
|
||||
b.width = detections[box_index + 2];
|
||||
b.height = detections[box_index + 3];
|
||||
b.x = detections[box_index + 0] - b.width / 2;
|
||||
b.y = detections[box_index + 1] - b.height / 2;
|
||||
}
|
||||
|
||||
std::vector<int> indices;
|
||||
for (int k = 0; k < classes; ++k)
|
||||
{
|
||||
for (int i = 0; i < total; ++i)
|
||||
{
|
||||
int box_index = i * (classes + coords + 1);
|
||||
int class_index = box_index + 5;
|
||||
scores[i] = detections[class_index + k];
|
||||
detections[class_index + k] = 0;
|
||||
}
|
||||
NMSBoxes(boxes, scores, score_thresh, nms_thresh, indices);
|
||||
for (int i = 0, n = indices.size(); i < n; ++i)
|
||||
{
|
||||
int box_index = indices[i] * (classes + coords + 1);
|
||||
int class_index = box_index + 5;
|
||||
detections[class_index + k] = scores[indices[i]];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
|
||||
if (coords != 4)
|
||||
CV_Error(Error::StsNotImplemented, "Only upright rectangular boxes are supported in RegionLayer.");
|
||||
|
||||
std::size_t height_norm, width_norm;
|
||||
if (inputs.size() == 1)
|
||||
{
|
||||
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
|
||||
auto input_shape = input_wrapper->getShape();
|
||||
height_norm = input_shape[1];
|
||||
width_norm = input_shape[2];
|
||||
}
|
||||
else
|
||||
{
|
||||
auto input_wrapper = inputs[1].dynamicCast<CUDABackendWrapper>();
|
||||
auto input_shape = input_wrapper->getShape();
|
||||
CV_Assert(input_shape.size() == 4);
|
||||
height_norm = input_shape[2];
|
||||
width_norm = input_shape[3];
|
||||
}
|
||||
|
||||
cuda4dnn::SquashMethod squash_method;
|
||||
if(useLogistic)
|
||||
squash_method = cuda4dnn::SquashMethod::SIGMOID;
|
||||
else if (useSoftmax)
|
||||
squash_method = cuda4dnn::SquashMethod::SOFTMAX;
|
||||
|
||||
/* exactly one must be true */
|
||||
CV_Assert((useLogistic || useSoftmax) && !(useLogistic && useSoftmax));
|
||||
|
||||
cuda4dnn::RegionConfiguration<float> config;
|
||||
config.squash_method = squash_method;
|
||||
config.classes = classes;
|
||||
config.boxes_per_cell = anchors;
|
||||
|
||||
config.height_norm = height_norm;
|
||||
config.width_norm = width_norm;
|
||||
|
||||
config.scale_x_y = scale_x_y;
|
||||
|
||||
config.object_prob_cutoff = (classfix == -1) ? thresh : 0.f;
|
||||
config.class_prob_cutoff = thresh;
|
||||
|
||||
config.nms_iou_threshold = nmsThreshold;
|
||||
|
||||
config.new_coords = (new_coords == 1);
|
||||
return make_cuda_node<cuda4dnn::RegionOp>(preferableTarget, std::move(context->stream), blobs[0], config);
|
||||
}
|
||||
#endif
|
||||
|
||||
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
|
||||
const std::vector<MatShape> &outputs) const CV_OVERRIDE
|
||||
{
|
||||
CV_UNUSED(outputs); // suppress unused variable warning
|
||||
|
||||
int64 flops = 0;
|
||||
for(int i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
flops += 60*total(inputs[i]);
|
||||
}
|
||||
return flops;
|
||||
}
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
auto& input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
auto parent_shape = input->get_shape();
|
||||
int64_t b = parent_shape[0];
|
||||
int64_t h = parent_shape[1];
|
||||
int64_t w = parent_shape[2];
|
||||
int64_t c = parent_shape[3];
|
||||
|
||||
int64_t cols = b * h * w * anchors;
|
||||
int64_t rows = c / anchors;
|
||||
auto shape_node = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{cols, rows});
|
||||
auto tr_axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{1, 0});
|
||||
|
||||
std::shared_ptr<ngraph::Node> input2d;
|
||||
{
|
||||
input2d = std::make_shared<ngraph::op::v1::Reshape>(input, shape_node, true);
|
||||
input2d = std::make_shared<ngraph::op::Transpose>(input2d, tr_axes);
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> region;
|
||||
{
|
||||
auto new_axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{4}, std::vector<int64_t>{0, 3, 1, 2});
|
||||
auto tr_input = std::make_shared<ngraph::op::Transpose>(input, new_axes);
|
||||
|
||||
std::vector<float> anchors_vec(blobs[0].ptr<float>(), blobs[0].ptr<float>() + blobs[0].total());
|
||||
std::vector<int64_t> mask(anchors, 1);
|
||||
region = std::make_shared<ngraph::op::RegionYolo>(tr_input, coords, classes, anchors, useSoftmax, mask, 1, 3, anchors_vec);
|
||||
|
||||
auto tr_shape = tr_input->get_shape();
|
||||
auto shape_as_inp = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
|
||||
ngraph::Shape{tr_shape.size()},
|
||||
std::vector<int64_t>(tr_shape.begin(), tr_shape.end()));
|
||||
|
||||
region = std::make_shared<ngraph::op::v1::Reshape>(region, shape_as_inp, true);
|
||||
new_axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{4}, std::vector<int64_t>{0, 2, 3, 1});
|
||||
region = std::make_shared<ngraph::op::Transpose>(region, new_axes);
|
||||
|
||||
region = std::make_shared<ngraph::op::v1::Reshape>(region, shape_node, true);
|
||||
region = std::make_shared<ngraph::op::Transpose>(region, tr_axes);
|
||||
}
|
||||
|
||||
auto strides = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{1, 1});
|
||||
std::vector<int64_t> boxes_shape{b, anchors, h, w};
|
||||
auto shape_3d = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{boxes_shape.size()}, boxes_shape.data());
|
||||
|
||||
ngraph::Shape box_broad_shape{1, (size_t)anchors, (size_t)h, (size_t)w};
|
||||
auto scale_x_y_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &scale_x_y);
|
||||
auto shift_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, std::vector<float>{0.5});
|
||||
|
||||
auto axis = ngraph::op::Constant::create<int64_t>(ngraph::element::i64, ngraph::Shape{}, {0});
|
||||
auto splits = ngraph::op::Constant::create<int64_t>(ngraph::element::i64, ngraph::Shape{5}, {1, 1, 1, 1, rows - 4});
|
||||
auto split = std::make_shared<ngraph::op::v1::VariadicSplit>(input2d, axis, splits);
|
||||
std::shared_ptr<ngraph::Node> box_x;
|
||||
{
|
||||
box_x = std::make_shared<ngraph::op::Sigmoid>(split->output(0));
|
||||
box_x = std::make_shared<ngraph::op::v1::Subtract>(box_x, shift_node, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
box_x = std::make_shared<ngraph::op::v1::Multiply>(box_x, scale_x_y_node, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
box_x = std::make_shared<ngraph::op::v1::Add>(box_x, shift_node, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
box_x = std::make_shared<ngraph::op::v1::Reshape>(box_x, shape_3d, true);
|
||||
|
||||
std::vector<float> x_indices(w * h * anchors);
|
||||
auto begin = x_indices.begin();
|
||||
for (int i = 0; i < h; i++)
|
||||
{
|
||||
std::fill(begin + i * anchors, begin + (i + 1) * anchors, i);
|
||||
}
|
||||
|
||||
for (int j = 1; j < w; j++)
|
||||
{
|
||||
std::copy(begin, begin + h * anchors, begin + j * h * anchors);
|
||||
}
|
||||
auto horiz = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, box_broad_shape, x_indices.data());
|
||||
box_x = std::make_shared<ngraph::op::v1::Add>(box_x, horiz, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
|
||||
auto cols_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, std::vector<float>{float(w)});
|
||||
box_x = std::make_shared<ngraph::op::v1::Divide>(box_x, cols_node, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> box_y;
|
||||
{
|
||||
box_y = std::make_shared<ngraph::op::Sigmoid>(split->output(1));
|
||||
box_y = std::make_shared<ngraph::op::v1::Subtract>(box_y, shift_node, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
box_y = std::make_shared<ngraph::op::v1::Multiply>(box_y, scale_x_y_node, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
box_y = std::make_shared<ngraph::op::v1::Add>(box_y, shift_node, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
box_y = std::make_shared<ngraph::op::v1::Reshape>(box_y, shape_3d, true);
|
||||
|
||||
std::vector<float> y_indices(h * anchors);
|
||||
for (int i = 0; i < h; i++)
|
||||
{
|
||||
std::fill(y_indices.begin() + i * anchors, y_indices.begin() + (i + 1) * anchors, i);
|
||||
}
|
||||
|
||||
auto vert = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1, (size_t)anchors, (size_t)h, 1}, y_indices.data());
|
||||
box_y = std::make_shared<ngraph::op::v1::Add>(box_y, vert, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
auto rows_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, std::vector<float>{float(h)});
|
||||
box_y = std::make_shared<ngraph::op::v1::Divide>(box_y, rows_node, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> box_w, box_h;
|
||||
{
|
||||
int hNorm, wNorm;
|
||||
if (nodes.size() > 1)
|
||||
{
|
||||
auto node_1_shape = nodes[1].dynamicCast<InfEngineNgraphNode>()->node->get_shape();
|
||||
hNorm = node_1_shape[2];
|
||||
wNorm = node_1_shape[3];
|
||||
}
|
||||
else
|
||||
{
|
||||
hNorm = h;
|
||||
wNorm = w;
|
||||
}
|
||||
|
||||
std::vector<float> anchors_w(anchors), anchors_h(anchors);
|
||||
for (size_t a = 0; a < anchors; ++a)
|
||||
{
|
||||
anchors_w[a] = blobs[0].at<float>(0, 2 * a) / wNorm;
|
||||
anchors_h[a] = blobs[0].at<float>(0, 2 * a + 1) / hNorm;
|
||||
}
|
||||
|
||||
std::vector<float> bias_w(w * h * anchors), bias_h(w * h * anchors);
|
||||
for (int j = 0; j < h; j++)
|
||||
{
|
||||
std::copy(anchors_w.begin(), anchors_w.end(), bias_w.begin() + j * anchors);
|
||||
std::copy(anchors_h.begin(), anchors_h.end(), bias_h.begin() + j * anchors);
|
||||
}
|
||||
|
||||
for (int i = 1; i < w; i++)
|
||||
{
|
||||
std::copy(bias_w.begin(), bias_w.begin() + h * anchors, bias_w.begin() + i * h * anchors);
|
||||
std::copy(bias_h.begin(), bias_h.begin() + h * anchors, bias_h.begin() + i * h * anchors);
|
||||
}
|
||||
|
||||
box_w = std::make_shared<ngraph::op::v0::Exp>(split->output(2));
|
||||
box_w = std::make_shared<ngraph::op::v1::Reshape>(box_w, shape_3d, true);
|
||||
auto anchor_w_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, box_broad_shape, bias_w.data());
|
||||
box_w = std::make_shared<ngraph::op::v1::Multiply>(box_w, anchor_w_node, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
|
||||
box_h = std::make_shared<ngraph::op::v0::Exp>(split->output(3));
|
||||
box_h = std::make_shared<ngraph::op::v1::Reshape>(box_h, shape_3d, true);
|
||||
auto anchor_h_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, box_broad_shape, bias_h.data());
|
||||
box_h = std::make_shared<ngraph::op::v1::Multiply>(box_h, anchor_h_node, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
}
|
||||
|
||||
auto region_splits = ngraph::op::Constant::create<int64_t>(ngraph::element::i64, ngraph::Shape{3}, {4, 1, rows - 5});
|
||||
auto region_split = std::make_shared<ngraph::op::v1::VariadicSplit>(region, axis, region_splits);
|
||||
|
||||
std::shared_ptr<ngraph::Node> scale;
|
||||
{
|
||||
float thr = classfix == -1 ? 0.5 : 0;
|
||||
auto thresh_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, std::vector<float>{thr});
|
||||
auto mask = std::make_shared<ngraph::op::v1::Less>(region_split->output(1), thresh_node);
|
||||
auto zero_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, mask->get_shape(), std::vector<float>(cols, 0));
|
||||
scale = std::make_shared<ngraph::op::v1::Select>(mask, zero_node, region_split->output(1));
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> probs;
|
||||
{
|
||||
probs = std::make_shared<ngraph::op::v1::Multiply>(region_split->output(2), scale, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
auto thresh_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &thresh);
|
||||
auto mask = std::make_shared<ngraph::op::v1::Greater>(probs, thresh_node);
|
||||
auto zero_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, mask->get_shape(), std::vector<float>((rows - 5) * cols, 0));
|
||||
probs = std::make_shared<ngraph::op::v1::Select>(mask, probs, zero_node);
|
||||
}
|
||||
|
||||
|
||||
auto concat_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{1, cols});
|
||||
box_x = std::make_shared<ngraph::op::v1::Reshape>(box_x, concat_shape, true);
|
||||
box_y = std::make_shared<ngraph::op::v1::Reshape>(box_y, concat_shape, true);
|
||||
box_w = std::make_shared<ngraph::op::v1::Reshape>(box_w, concat_shape, true);
|
||||
box_h = std::make_shared<ngraph::op::v1::Reshape>(box_h, concat_shape, true);
|
||||
|
||||
ngraph::NodeVector inp_nodes{box_x, box_y, box_w, box_h, scale, probs};
|
||||
std::shared_ptr<ngraph::Node> result = std::make_shared<ngraph::op::Concat>(inp_nodes, 0);
|
||||
result = std::make_shared<ngraph::op::Transpose>(result, tr_axes);
|
||||
if (b > 1)
|
||||
{
|
||||
std::vector<int64_t> sizes{b, static_cast<int64_t>(result->get_shape()[0]) / b, static_cast<int64_t>(result->get_shape()[1])};
|
||||
auto shape_node = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{sizes.size()}, sizes.data());
|
||||
result = std::make_shared<ngraph::op::v1::Reshape>(result, shape_node, true);
|
||||
}
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(result));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
};
|
||||
|
||||
Ptr<RegionLayer> RegionLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<RegionLayer>(new RegionLayerImpl(params));
|
||||
}
|
||||
|
||||
} // namespace dnn
|
||||
} // namespace cv
|
||||
264
3rdparty/opencv-4.5.4/modules/dnn/src/layers/reorg_layer.cpp
vendored
Normal file
264
3rdparty/opencv-4.5.4/modules/dnn/src/layers/reorg_layer.cpp
vendored
Normal file
@ -0,0 +1,264 @@
|
||||
/*M ///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
#include <opencv2/dnn/all_layers.hpp>
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
#endif
|
||||
|
||||
#include "../op_inf_engine.hpp"
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
#include "../ie_ngraph.hpp"
|
||||
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
|
||||
#include <ngraph/op/reorg_yolo.hpp>
|
||||
#else
|
||||
#include <ngraph/op/experimental/layers/reorg_yolo.hpp>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "../op_cuda.hpp"
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/reorg.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
class ReorgLayerImpl CV_FINAL : public ReorgLayer
|
||||
{
|
||||
int reorgStride;
|
||||
public:
|
||||
|
||||
ReorgLayerImpl(const LayerParams& params)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
|
||||
reorgStride = params.get<int>("reorg_stride", 2);
|
||||
CV_Assert(reorgStride > 0);
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(inputs.size() > 0);
|
||||
outputs = std::vector<MatShape>(inputs.size(), shape(
|
||||
inputs[0][0],
|
||||
inputs[0][1] * reorgStride * reorgStride,
|
||||
inputs[0][2] / reorgStride,
|
||||
inputs[0][3] / reorgStride));
|
||||
|
||||
CV_Assert(outputs[0][0] > 0 && outputs[0][1] > 0 && outputs[0][2] > 0 && outputs[0][3] > 0);
|
||||
CV_Assert(total(outputs[0]) == total(inputs[0]));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
|
||||
{
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
Mat inp = inputs[0];
|
||||
Mat out = outputs[0];
|
||||
int batchSize = inp.size[0];
|
||||
|
||||
LayerParams permParams;
|
||||
if (batchSize == 1)
|
||||
{
|
||||
int order[] = {1, 3, 0, 2};
|
||||
permParams.set("order", DictValue::arrayInt(&order[0], 4));
|
||||
|
||||
permuteInpShape.resize(4);
|
||||
permuteInpShape[0] = inp.size[1] * inp.size[2] / (reorgStride * reorgStride); // (channels*height)/(r*r)
|
||||
permuteInpShape[1] = reorgStride;
|
||||
permuteInpShape[2] = inp.size[3]; // width
|
||||
permuteInpShape[3] = reorgStride;
|
||||
|
||||
permuteOutShape.resize(4);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
permuteOutShape[i] = permuteInpShape[order[i]];
|
||||
}
|
||||
else
|
||||
{
|
||||
int order[] = {0, 2, 4, 1, 3};
|
||||
permParams.set("order", DictValue::arrayInt(&order[0], 5));
|
||||
|
||||
permuteInpShape.resize(5);
|
||||
permuteInpShape[0] = batchSize;
|
||||
permuteInpShape[1] = inp.size[1] * inp.size[2] / (reorgStride * reorgStride); // (channels*height)/(r*r)
|
||||
permuteInpShape[2] = reorgStride;
|
||||
permuteInpShape[3] = inp.size[3]; // width
|
||||
permuteInpShape[4] = reorgStride;
|
||||
|
||||
permuteOutShape.resize(5);
|
||||
for (int i = 0; i < 5; ++i)
|
||||
permuteOutShape[i] = permuteInpShape[order[i]];
|
||||
}
|
||||
permute = PermuteLayer::create(permParams);
|
||||
std::vector<Mat> permuteInputs(1, inp.reshape(1, permuteInpShape));
|
||||
std::vector<Mat> permuteOutputs(1, out.reshape(1, permuteOutShape));
|
||||
permute->finalize(permuteInputs, permuteOutputs);
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CUDA ||
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
inputs[0] = inputs[0].reshape(1, permuteInpShape.size(), &permuteInpShape[0]);
|
||||
outputs[0] = outputs[0].reshape(1, permuteOutShape.size(), &permuteOutShape[0]);
|
||||
permute->preferableTarget = preferableTarget;
|
||||
permute->forward(inputs, outputs, internals);
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
inputs[0] = inputs[0].reshape(1, permuteInpShape);
|
||||
outputs[0] = outputs[0].reshape(1, permuteOutShape);
|
||||
permute->forward(inputs, outputs, internals_arr);
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
InferenceEngine::Builder::ReorgYoloLayer ieLayer(name);
|
||||
ieLayer.setStride(reorgStride);
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
}
|
||||
#endif // HAVE_DNN_IE_NN_BUILDER_2019
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
auto reorg = std::make_shared<ngraph::op::ReorgYolo>(ieInpNode, ngraph::Strides{(size_t)reorgStride});
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(reorg));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
return make_cuda_node<cuda4dnn::ReorgOp>(preferableTarget, std::move(context->stream), reorgStride);
|
||||
}
|
||||
#endif
|
||||
|
||||
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
|
||||
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
|
||||
const std::vector<MatShape> &outputs) const CV_OVERRIDE
|
||||
{
|
||||
CV_UNUSED(outputs); // suppress unused variable warning
|
||||
|
||||
int64 flops = 0;
|
||||
for(int i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
flops += 21*total(inputs[i]);
|
||||
}
|
||||
return flops;
|
||||
}
|
||||
|
||||
private:
|
||||
Ptr<PermuteLayer> permute;
|
||||
std::vector<int> permuteInpShape, permuteOutShape;
|
||||
};
|
||||
|
||||
Ptr<ReorgLayer> ReorgLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<ReorgLayer>(new ReorgLayerImpl(params));
|
||||
}
|
||||
|
||||
} // namespace dnn
|
||||
} // namespace cv
|
||||
367
3rdparty/opencv-4.5.4/modules/dnn/src/layers/reshape_layer.cpp
vendored
Normal file
367
3rdparty/opencv-4.5.4/modules/dnn/src/layers/reshape_layer.cpp
vendored
Normal file
@ -0,0 +1,367 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
#include "../op_inf_engine.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/reshape.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
static void computeShapeByReshapeMask(const MatShape &srcShape,
|
||||
const MatShape &maskShape,
|
||||
Range srcRange /*= Range::all()*/,
|
||||
MatShape& dstShape)
|
||||
{
|
||||
int srcShapeSize = (int)srcShape.size();
|
||||
int maskShapeSize = (int)maskShape.size();
|
||||
|
||||
srcRange = normalize_axis_range(srcRange, srcShapeSize);
|
||||
|
||||
bool explicitMask = !maskShape.empty(); // All mask values are positive.
|
||||
for (int i = 0, n = maskShape.size(); i < n && explicitMask; ++i)
|
||||
{
|
||||
explicitMask = maskShape[i] > 0;
|
||||
}
|
||||
// Working range of source shape is a range where area(src) == area(mask).
|
||||
if (explicitMask)
|
||||
{
|
||||
int maskTotal = total(maskShape);
|
||||
// Go from the end of mask until we collect required total.
|
||||
bool matched = false;
|
||||
for (int i = srcRange.end - 1; i >= srcRange.start; --i)
|
||||
{
|
||||
if (matched)
|
||||
{
|
||||
if (total(srcShape, i, srcRange.end) != maskTotal)
|
||||
{
|
||||
srcRange.start = i + 1;
|
||||
break;
|
||||
}
|
||||
else if (i == 0)
|
||||
{
|
||||
srcRange.start = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
matched = total(srcShape, i, srcRange.end) == maskTotal;
|
||||
}
|
||||
}
|
||||
while (total(srcShape, srcRange.start, srcRange.end) != maskTotal && srcRange.start > 0)
|
||||
{
|
||||
srcRange.start -= 1;
|
||||
}
|
||||
CV_Assert(total(srcShape, srcRange.start, srcRange.end) == maskTotal);
|
||||
}
|
||||
|
||||
CV_Assert(0 <= srcRange.start && srcRange.start <= srcRange.end && srcRange.end <= srcShapeSize);
|
||||
int dstShapeSize = srcShapeSize - srcRange.size() + maskShapeSize;
|
||||
dstShape.resize(dstShapeSize);
|
||||
|
||||
std::copy(srcShape.begin(), srcShape.begin() + srcRange.start, dstShape.begin());
|
||||
std::copy(srcShape.begin() + srcRange.end, srcShape.begin() + srcShapeSize, dstShape.begin() + srcRange.start + maskShapeSize);
|
||||
|
||||
int inferDim = -1;
|
||||
for (int i = 0; i < maskShapeSize; i++)
|
||||
{
|
||||
if (maskShape[i] > 0)
|
||||
{
|
||||
dstShape[srcRange.start + i] = maskShape[i];
|
||||
}
|
||||
else if (maskShape[i] == 0)
|
||||
{
|
||||
if (srcRange.start + i >= srcShapeSize)
|
||||
CV_Error(Error::StsBadArg, format("Copy dim[%d] (which has zero size) is out of the source shape bounds", srcRange.start + i));
|
||||
dstShape[srcRange.start + i] = srcShape[srcRange.start + i];
|
||||
}
|
||||
else if (maskShape[i] == -1)
|
||||
{
|
||||
if (inferDim != -1)
|
||||
CV_Error(Error::StsAssert, "Duplicate of inferred dim (which is denoted by -1)");
|
||||
inferDim = srcRange.start + i;
|
||||
dstShape[inferDim] = 1;
|
||||
}
|
||||
else
|
||||
CV_Error(Error::StsBadArg, "maskShape[i] >= -1");
|
||||
}
|
||||
|
||||
size_t srcTotal = total(srcShape);
|
||||
size_t dstTotal = total(dstShape);
|
||||
CV_Assert(dstTotal != 0);
|
||||
|
||||
if (inferDim != -1)
|
||||
{
|
||||
if (srcTotal % dstTotal != 0)
|
||||
CV_Error(Error::StsBackTrace, "Can't infer a dim denoted by -1");
|
||||
|
||||
dstShape[inferDim] = (int)(srcTotal / dstTotal);
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(srcTotal == dstTotal);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class ReshapeLayerImpl CV_FINAL : public ReshapeLayer
|
||||
{
|
||||
public:
|
||||
ReshapeLayerImpl(const LayerParams& params)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
int axis = params.get<int>("axis", 0);
|
||||
int numAxes = params.get<int>("num_axes", -1);
|
||||
hasDynamicShapes = params.get<bool>("has_dynamic_shapes", false);
|
||||
shapesInitialized = !hasDynamicShapes;
|
||||
|
||||
CV_Assert(numAxes >= -1);
|
||||
newShapeRange = (numAxes == -1) ? Range(axis, INT_MAX) : Range(axis, axis + numAxes);
|
||||
|
||||
newShapeDesc.clear();
|
||||
if (params.has("dim"))
|
||||
{
|
||||
const DictValue ¶mShape = params.get("dim");
|
||||
int i, dims = paramShape.size();
|
||||
newShapeDesc.resize(dims);
|
||||
for (i = 0; i < dims; i++)
|
||||
newShapeDesc[i] = paramShape.get<int>(i);
|
||||
}
|
||||
if (hasDynamicShapes)
|
||||
{
|
||||
dynamicShapes.clear();
|
||||
inputIndices.clear();
|
||||
if (params.has("dynamic_axes")) {
|
||||
CV_Assert(params.has("input_indices"));
|
||||
const DictValue &dynamicAxes = params.get("dynamic_axes");
|
||||
const DictValue &dynamicInputShapes = params.get("input_indices");
|
||||
int i, dims = dynamicAxes.size();
|
||||
CV_Assert(dims == dynamicInputShapes.size());
|
||||
CV_Assert(dims > 0);
|
||||
dynamicShapes.resize(dims);
|
||||
inputIndices.resize(dims);
|
||||
for (i = 0; i < dims; i++) {
|
||||
dynamicShapes[i] = dynamicAxes.get<int>(i);
|
||||
inputIndices[i] = dynamicInputShapes.get<int>(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CUDA ||
|
||||
((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine());
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
|
||||
if (inputs.size() == 1 || inputs.size() == requiredOutputs)
|
||||
{
|
||||
outputs.clear();
|
||||
for (size_t i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
if (hasDynamicShapes && !shapesInitialized)
|
||||
{
|
||||
outputs.push_back(newShapeDesc);
|
||||
}
|
||||
else
|
||||
{
|
||||
outputs.push_back(MatShape());
|
||||
computeShapeByReshapeMask(inputs[i], newShapeDesc, newShapeRange, outputs.back());
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert_N(inputs.size() == 2, total(inputs[0]) == total(inputs[1]));
|
||||
outputs.assign(1, inputs[1]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool updateMemoryShapes(const std::vector<MatShape> &inputs) CV_OVERRIDE
|
||||
{
|
||||
if (hasDynamicShapes)
|
||||
{
|
||||
for (int i = 0; i < dynamicShapes.size(); ++i)
|
||||
{
|
||||
newShapeDesc[dynamicShapes[i]] = inputs[0][inputIndices[i]];
|
||||
}
|
||||
}
|
||||
shapesInitialized = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
|
||||
{
|
||||
std::vector<Mat> outputs;
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
CV_Assert(!outputs.empty());
|
||||
outShapes.resize(outputs.size());
|
||||
for (int i = 0; i < outputs.size(); ++i)
|
||||
outShapes[i] = shape(outputs[i]);
|
||||
}
|
||||
|
||||
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
for (size_t i = 0; i < outputs.size(); i++)
|
||||
{
|
||||
UMat srcBlob = inputs[i];
|
||||
void *src_handle = inputs[i].handle(ACCESS_READ);
|
||||
void *dst_handle = outputs[i].handle(ACCESS_WRITE);
|
||||
if (src_handle != dst_handle)
|
||||
{
|
||||
UMat umat = srcBlob.reshape(1, (int)outShapes[i].size(), &outShapes[i][0]);
|
||||
umat.copyTo(outputs[i]);
|
||||
}
|
||||
}
|
||||
outs.assign(outputs);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
for (size_t i = 0; i < outputs.size(); i++)
|
||||
{
|
||||
Mat srcBlob = inputs[i];
|
||||
if (outputs[i].data != srcBlob.data)
|
||||
srcBlob.reshape(1, shape(outputs[i])).copyTo(outputs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
|
||||
{
|
||||
InferenceEngine::Builder::ReshapeLayer ieLayer(name);
|
||||
CV_Assert(outShapes.size() == 1);
|
||||
ieLayer.setDims(outShapes[0]);
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
}
|
||||
#endif // HAVE_DNN_IE_NN_BUILDER_2019
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(outShapes.size() == 1);
|
||||
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
|
||||
std::vector<int64_t> out(outShapes[0].begin(), outShapes[0].end());
|
||||
auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
|
||||
ngraph::Shape{out.size()}, out.data());
|
||||
auto reshape = std::make_shared<ngraph::op::v1::Reshape>(ieInpNode, shape, true);
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(reshape));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
return make_cuda_node<cuda4dnn::ReshapeOp>(preferableTarget, std::move(context->stream));
|
||||
}
|
||||
#endif
|
||||
|
||||
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
|
||||
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<MatShape> outShapes;
|
||||
std::vector<int> dynamicShapes; // Which axes shapes are dynamic and require reinitialization with new input
|
||||
std::vector<int> inputIndices; // Which axes from input are needed to compute correct output shape
|
||||
bool hasDynamicShapes;
|
||||
bool shapesInitialized;
|
||||
};
|
||||
|
||||
Ptr<ReshapeLayer> ReshapeLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<ReshapeLayer>(new ReshapeLayerImpl(params));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
486
3rdparty/opencv-4.5.4/modules/dnn/src/layers/resize_layer.cpp
vendored
Normal file
486
3rdparty/opencv-4.5.4/modules/dnn/src/layers/resize_layer.cpp
vendored
Normal file
@ -0,0 +1,486 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
#include "../op_inf_engine.hpp"
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
#include "../ie_ngraph.hpp"
|
||||
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
|
||||
#include <ngraph/op/interpolate.hpp>
|
||||
#else
|
||||
#include <ngraph/op/experimental/layers/interpolate.hpp>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/resize.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv { namespace dnn {
|
||||
|
||||
class ResizeLayerImpl : public ResizeLayer
|
||||
{
|
||||
public:
|
||||
ResizeLayerImpl(const LayerParams& params) : zoomFactorWidth(params.get<float>("zoom_factor_x", params.get<float>("zoom_factor", 0))),
|
||||
zoomFactorHeight(params.get<float>("zoom_factor_y", params.get<float>("zoom_factor", 0))),
|
||||
scaleWidth(0), scaleHeight(0)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
outWidth = params.get<float>("width", 0);
|
||||
outHeight = params.get<float>("height", 0);
|
||||
if (params.has("zoom_factor"))
|
||||
{
|
||||
CV_Assert(!params.has("zoom_factor_x") && !params.has("zoom_factor_y"));
|
||||
}
|
||||
else if (params.has("zoom_factor_x") || params.has("zoom_factor_y"))
|
||||
{
|
||||
CV_Assert(params.has("zoom_factor_x") && params.has("zoom_factor_y"));
|
||||
}
|
||||
interpolation = params.get<String>("interpolation");
|
||||
CV_Check(interpolation, interpolation == "nearest" || interpolation == "opencv_linear" || interpolation == "bilinear", "");
|
||||
|
||||
alignCorners = params.get<bool>("align_corners", false);
|
||||
halfPixelCenters = params.get<bool>("half_pixel_centers", false);
|
||||
if (interpolation == "opencv_linear")
|
||||
halfPixelCenters = true;
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert_N(inputs.size() == 1 || inputs.size() == 2, inputs[0].size() == 4);
|
||||
outputs.resize(1, inputs[0]);
|
||||
if (inputs.size() == 1) {
|
||||
outputs[0][2] = zoomFactorHeight > 0 ? (outputs[0][2] * zoomFactorHeight) : outHeight;
|
||||
outputs[0][3] = zoomFactorWidth > 0 ? (outputs[0][3] * zoomFactorWidth) : outWidth;
|
||||
} else {
|
||||
outputs[0][2] = inputs[1][2];
|
||||
outputs[0][3] = inputs[1][3];
|
||||
}
|
||||
// We can work in-place (do nothing) if input shape == output shape.
|
||||
return (outputs[0][2] == inputs[0][2]) && (outputs[0][3] == inputs[0][3]);
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
if (backendId == DNN_BACKEND_CUDA)
|
||||
return interpolation == "nearest" || interpolation == "bilinear" || interpolation == "opencv_linear";
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
{
|
||||
return (interpolation == "nearest" && scaleWidth == scaleHeight) ||
|
||||
(interpolation == "bilinear");
|
||||
}
|
||||
#endif
|
||||
return backendId == DNN_BACKEND_OPENCV;
|
||||
}
|
||||
|
||||
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
|
||||
{
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
outHeight = outputs[0].size[2];
|
||||
outWidth = outputs[0].size[3];
|
||||
if (alignCorners && outHeight > 1)
|
||||
scaleHeight = static_cast<float>(inputs[0].size[2] - 1) / (outHeight - 1);
|
||||
else
|
||||
scaleHeight = static_cast<float>(inputs[0].size[2]) / outHeight;
|
||||
|
||||
if (alignCorners && outWidth > 1)
|
||||
scaleWidth = static_cast<float>(inputs[0].size[3] - 1) / (outWidth - 1);
|
||||
else
|
||||
scaleWidth = static_cast<float>(inputs[0].size[3]) / outWidth;
|
||||
}
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs, internals;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
internals_arr.getMatVector(internals);
|
||||
|
||||
if (outHeight == inputs[0].size[2] && outWidth == inputs[0].size[3])
|
||||
{
|
||||
// outputs[0] = inputs[0] doesn't work due to BlobManager optimizations
|
||||
if (inputs[0].data != outputs[0].data)
|
||||
{
|
||||
inputs[0].copyTo(outputs[0]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
Mat& inp = inputs[0];
|
||||
Mat& out = outputs[0];
|
||||
int depth = inp.depth();
|
||||
if ((interpolation == "nearest" && !alignCorners && !halfPixelCenters) || (interpolation == "opencv_linear" && depth != CV_8S) ||
|
||||
(interpolation == "bilinear" && halfPixelCenters && depth != CV_8S))
|
||||
{
|
||||
// INTER_LINEAR Resize mode does not support INT8 inputs
|
||||
InterpolationFlags mode = interpolation == "nearest" ? INTER_NEAREST : INTER_LINEAR;
|
||||
for (size_t n = 0; n < inputs[0].size[0]; ++n)
|
||||
{
|
||||
for (size_t ch = 0; ch < inputs[0].size[1]; ++ch)
|
||||
{
|
||||
resize(getPlane(inp, n, ch), getPlane(out, n, ch),
|
||||
Size(outWidth, outHeight), 0, 0, mode);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (interpolation == "nearest")
|
||||
{
|
||||
const int inpHeight = inp.size[2];
|
||||
const int inpWidth = inp.size[3];
|
||||
const int inpSpatialSize = inpHeight * inpWidth;
|
||||
const int outSpatialSize = outHeight * outWidth;
|
||||
const int numPlanes = inp.size[0] * inp.size[1];
|
||||
CV_Assert_N(inp.isContinuous(), out.isContinuous());
|
||||
|
||||
Mat inpPlanes = inp.reshape(1, numPlanes * inpHeight);
|
||||
Mat outPlanes = out.reshape(1, numPlanes * outHeight);
|
||||
|
||||
float heightOffset = 0.0f;
|
||||
float widthOffset = 0.0f;
|
||||
|
||||
if (halfPixelCenters)
|
||||
{
|
||||
heightOffset = 0.5f * scaleHeight;
|
||||
widthOffset = 0.5f * scaleWidth;
|
||||
}
|
||||
|
||||
if (depth == CV_8S)
|
||||
{
|
||||
for (int y = 0; y < outHeight; ++y)
|
||||
{
|
||||
float input_y = y * scaleHeight + heightOffset;
|
||||
int y0 = halfPixelCenters ? std::floor(input_y) : lroundf(input_y);
|
||||
y0 = std::min(y0, inpHeight - 1);
|
||||
|
||||
const int8_t* inpData_row = inpPlanes.ptr<int8_t>(y0);
|
||||
|
||||
for (int x = 0; x < outWidth; ++x)
|
||||
{
|
||||
float input_x = x * scaleWidth + widthOffset;
|
||||
int x0 = halfPixelCenters ? std::floor(input_x) : lroundf(input_x);
|
||||
x0 = std::min(x0, inpWidth - 1);
|
||||
|
||||
int8_t* outData = outPlanes.ptr<int8_t>(y, x);
|
||||
const int8_t* inpData_row_c = inpData_row;
|
||||
|
||||
for (int c = 0; c < numPlanes; ++c)
|
||||
{
|
||||
*outData = inpData_row_c[x0];
|
||||
|
||||
inpData_row_c += inpSpatialSize;
|
||||
outData += outSpatialSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int y = 0; y < outHeight; ++y)
|
||||
{
|
||||
float input_y = y * scaleHeight + heightOffset;
|
||||
int y0 = halfPixelCenters ? std::floor(input_y) : lroundf(input_y);
|
||||
y0 = std::min(y0, inpHeight - 1);
|
||||
|
||||
const float* inpData_row = inpPlanes.ptr<float>(y0);
|
||||
|
||||
for (int x = 0; x < outWidth; ++x)
|
||||
{
|
||||
float input_x = x * scaleWidth + widthOffset;
|
||||
int x0 = halfPixelCenters ? std::floor(input_x) : lroundf(input_x);
|
||||
x0 = std::min(x0, inpWidth - 1);
|
||||
|
||||
float* outData = outPlanes.ptr<float>(y, x);
|
||||
const float* inpData_row_c = inpData_row;
|
||||
|
||||
for (int c = 0; c < numPlanes; ++c)
|
||||
{
|
||||
*outData = inpData_row_c[x0];
|
||||
|
||||
inpData_row_c += inpSpatialSize;
|
||||
outData += outSpatialSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (interpolation == "bilinear" || interpolation == "opencv_linear")
|
||||
{
|
||||
const int inpHeight = inp.size[2];
|
||||
const int inpWidth = inp.size[3];
|
||||
const int inpSpatialSize = inpHeight * inpWidth;
|
||||
const int outSpatialSize = outHeight * outWidth;
|
||||
const int numPlanes = inp.size[0] * inp.size[1];
|
||||
CV_Assert_N(inp.isContinuous(), out.isContinuous());
|
||||
|
||||
Mat inpPlanes = inp.reshape(1, numPlanes * inpHeight);
|
||||
Mat outPlanes = out.reshape(1, numPlanes * outHeight);
|
||||
if (depth == CV_8S)
|
||||
{
|
||||
for (int y = 0; y < outHeight; ++y)
|
||||
{
|
||||
float input_y = halfPixelCenters ? std::max((y + 0.5f) * scaleHeight - 0.5f, 0.0f) : y * scaleHeight;
|
||||
int y0 = static_cast<int>(input_y);
|
||||
const int8_t* inpData_row0 = inpPlanes.ptr<int8_t>(y0);
|
||||
const int8_t* inpData_row1 = inpPlanes.ptr<int8_t>(std::min(y0 + 1, inpHeight - 1));
|
||||
for (int x = 0; x < outWidth; ++x)
|
||||
{
|
||||
float input_x = halfPixelCenters ? std::max((x + 0.5f) * scaleWidth - 0.5f, 0.0f) : x * scaleWidth;
|
||||
int x0 = static_cast<int>(input_x);
|
||||
int x1 = std::min(x0 + 1, inpWidth - 1);
|
||||
|
||||
int8_t* outData = outPlanes.ptr<int8_t>(y, x);
|
||||
const int8_t* inpData_row0_c = inpData_row0;
|
||||
const int8_t* inpData_row1_c = inpData_row1;
|
||||
for (int c = 0; c < numPlanes; ++c)
|
||||
{
|
||||
*outData = static_cast<int8_t>(inpData_row0_c[x0] +
|
||||
(input_y - y0) * (inpData_row1_c[x0] - inpData_row0_c[x0]) +
|
||||
(input_x - x0) * (inpData_row0_c[x1] - inpData_row0_c[x0] +
|
||||
(input_y - y0) * (inpData_row1_c[x1] - inpData_row0_c[x1] - inpData_row1_c[x0] + inpData_row0_c[x0])));
|
||||
|
||||
inpData_row0_c += inpSpatialSize;
|
||||
inpData_row1_c += inpSpatialSize;
|
||||
outData += outSpatialSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int y = 0; y < outHeight; ++y)
|
||||
{
|
||||
float input_y = y * scaleHeight;
|
||||
int y0 = static_cast<int>(input_y);
|
||||
const float* inpData_row0 = inpPlanes.ptr<float>(y0);
|
||||
const float* inpData_row1 = inpPlanes.ptr<float>(std::min(y0 + 1, inpHeight - 1));
|
||||
for (int x = 0; x < outWidth; ++x)
|
||||
{
|
||||
float input_x = x * scaleWidth;
|
||||
int x0 = static_cast<int>(input_x);
|
||||
int x1 = std::min(x0 + 1, inpWidth - 1);
|
||||
|
||||
float* outData = outPlanes.ptr<float>(y, x);
|
||||
const float* inpData_row0_c = inpData_row0;
|
||||
const float* inpData_row1_c = inpData_row1;
|
||||
for (int c = 0; c < numPlanes; ++c)
|
||||
{
|
||||
*outData = inpData_row0_c[x0] +
|
||||
(input_y - y0) * (inpData_row1_c[x0] - inpData_row0_c[x0]) +
|
||||
(input_x - x0) * (inpData_row0_c[x1] - inpData_row0_c[x0] +
|
||||
(input_y - y0) * (inpData_row1_c[x1] - inpData_row0_c[x1] - inpData_row1_c[x0] + inpData_row0_c[x0]));
|
||||
|
||||
inpData_row0_c += inpSpatialSize;
|
||||
inpData_row1_c += inpSpatialSize;
|
||||
outData += outSpatialSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Unknown interpolation: " + interpolation);
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
InferenceEngine::Builder::Layer ieLayer(name);
|
||||
ieLayer.setName(name);
|
||||
if (interpolation == "nearest")
|
||||
{
|
||||
ieLayer.setType("Resample");
|
||||
ieLayer.getParameters()["type"] = std::string("caffe.ResampleParameter.NEAREST");
|
||||
ieLayer.getParameters()["antialias"] = false;
|
||||
if (scaleWidth != scaleHeight)
|
||||
CV_Error(Error::StsNotImplemented, "resample with sw != sh");
|
||||
ieLayer.getParameters()["factor"] = 1.0f / scaleWidth;
|
||||
}
|
||||
else if (interpolation == "bilinear")
|
||||
{
|
||||
ieLayer.setType("Interp");
|
||||
ieLayer.getParameters()["pad_beg"] = 0;
|
||||
ieLayer.getParameters()["pad_end"] = 0;
|
||||
ieLayer.getParameters()["align_corners"] = alignCorners;
|
||||
}
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported interpolation: " + interpolation);
|
||||
ieLayer.getParameters()["width"] = outWidth;
|
||||
ieLayer.getParameters()["height"] = outHeight;
|
||||
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(1));
|
||||
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
}
|
||||
#endif // HAVE_DNN_IE_NN_BUILDER_2019
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
|
||||
#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2021_2)
|
||||
ngraph::op::InterpolateAttrs attrs;
|
||||
attrs.pads_begin.push_back(0);
|
||||
attrs.pads_end.push_back(0);
|
||||
attrs.axes = ngraph::AxisSet{2, 3};
|
||||
attrs.align_corners = alignCorners;
|
||||
|
||||
if (interpolation == "nearest") {
|
||||
attrs.mode = "nearest";
|
||||
attrs.antialias = false;
|
||||
} else if (interpolation == "bilinear") {
|
||||
attrs.mode = "linear";
|
||||
} else {
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported interpolation: " + interpolation);
|
||||
}
|
||||
|
||||
std::vector<int64_t> shape = {outHeight, outWidth};
|
||||
auto out_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, shape.data());
|
||||
auto interp = std::make_shared<ngraph::op::Interpolate>(ieInpNode, out_shape, attrs);
|
||||
#else
|
||||
ngraph::op::v4::Interpolate::InterpolateAttrs attrs;
|
||||
|
||||
if (interpolation == "nearest") {
|
||||
attrs.mode = ngraph::op::v4::Interpolate::InterpolateMode::nearest;
|
||||
attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::half_pixel;
|
||||
} else if (interpolation == "bilinear") {
|
||||
attrs.mode = ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx;
|
||||
attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::asymmetric;
|
||||
} else {
|
||||
CV_Error(Error::StsNotImplemented, format("Unsupported interpolation: %s", interpolation.c_str()));
|
||||
}
|
||||
attrs.shape_calculation_mode = ngraph::op::v4::Interpolate::ShapeCalcMode::sizes;
|
||||
|
||||
if (alignCorners) {
|
||||
attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners;
|
||||
}
|
||||
|
||||
attrs.nearest_mode = ngraph::op::v4::Interpolate::NearestMode::round_prefer_floor;
|
||||
|
||||
std::vector<int64_t> shape = {outHeight, outWidth};
|
||||
auto out_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, shape.data());
|
||||
|
||||
auto& input_shape = ieInpNode->get_shape();
|
||||
CV_Assert_N(input_shape[2] != 0, input_shape[3] != 0);
|
||||
std::vector<float> scales = {static_cast<float>(outHeight) / input_shape[2], static_cast<float>(outWidth) / input_shape[3]};
|
||||
auto scales_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{2}, scales.data());
|
||||
|
||||
auto axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{2, 3});
|
||||
auto interp = std::make_shared<ngraph::op::v4::Interpolate>(ieInpNode, out_shape, scales_shape, axes, attrs);
|
||||
#endif
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(interp));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
|
||||
cuda4dnn::ResizeConfiguration config;
|
||||
if (interpolation == "nearest")
|
||||
{
|
||||
config.type = InterpolationType::NEAREST_NEIGHBOUR;
|
||||
config.align_corners = alignCorners;
|
||||
config.half_pixel_centers = halfPixelCenters;
|
||||
}
|
||||
else if (interpolation == "bilinear")
|
||||
{
|
||||
config.type = InterpolationType::BILINEAR;
|
||||
config.align_corners = alignCorners;
|
||||
config.half_pixel_centers = halfPixelCenters;
|
||||
}
|
||||
else if (interpolation == "opencv_linear")
|
||||
{
|
||||
config.type = InterpolationType::BILINEAR;
|
||||
config.align_corners = false;
|
||||
config.half_pixel_centers = true;
|
||||
}
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Requested interpolation mode is not available in resize layer.");
|
||||
return make_cuda_node<cuda4dnn::ResizeOp>(preferableTarget, std::move(context->stream), config);
|
||||
}
|
||||
#endif
|
||||
|
||||
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
|
||||
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
int outWidth, outHeight;
|
||||
const float zoomFactorWidth, zoomFactorHeight;
|
||||
String interpolation;
|
||||
float scaleWidth, scaleHeight;
|
||||
bool alignCorners;
|
||||
bool halfPixelCenters;
|
||||
};
|
||||
|
||||
|
||||
Ptr<ResizeLayer> ResizeLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<ResizeLayer>(new ResizeLayerImpl(params));
|
||||
}
|
||||
|
||||
class InterpLayerImpl CV_FINAL : public ResizeLayerImpl
|
||||
{
|
||||
public:
|
||||
InterpLayerImpl(const LayerParams& params) : ResizeLayerImpl(params) {}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert_N(inputs.size() == 1, inputs[0].size() == 4);
|
||||
outputs.resize(1, inputs[0]);
|
||||
outputs[0][2] = zoomFactorHeight > 0 ? (1 + zoomFactorHeight * (outputs[0][2] - 1)) : outHeight;
|
||||
outputs[0][3] = zoomFactorWidth > 0 ? (1 + zoomFactorWidth * (outputs[0][3] - 1)) : outWidth;
|
||||
// We can work in-place (do nothing) if input shape == output shape.
|
||||
return (outputs[0][2] == inputs[0][2]) && (outputs[0][3] == inputs[0][3]);
|
||||
}
|
||||
};
|
||||
|
||||
Ptr<Layer> InterpLayer::create(const LayerParams& params)
|
||||
{
|
||||
LayerParams lp(params);
|
||||
lp.set("interpolation", "bilinear");
|
||||
lp.set("align_corners", true);
|
||||
return Ptr<Layer>(new InterpLayerImpl(lp));
|
||||
}
|
||||
|
||||
} // namespace dnn
|
||||
} // namespace cv
|
||||
507
3rdparty/opencv-4.5.4/modules/dnn/src/layers/scale_layer.cpp
vendored
Normal file
507
3rdparty/opencv-4.5.4/modules/dnn/src/layers/scale_layer.cpp
vendored
Normal file
@ -0,0 +1,507 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// Copyright (C) 2016, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
/*
|
||||
Implementation of Scale layer.
|
||||
*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
#include "../op_halide.hpp"
|
||||
#include "../op_inf_engine.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/scale_shift.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
class ScaleLayerImpl CV_FINAL : public ScaleLayer
|
||||
{
|
||||
public:
|
||||
ScaleLayerImpl(const LayerParams& params)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
hasBias = params.get<bool>("bias_term", false);
|
||||
axis = params.get<int>("axis", 1);
|
||||
hasWeights = false;
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
outputs.assign(1, inputs[0]);
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
|
||||
{
|
||||
std::vector<Mat> inputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
hasWeights = blobs.size() == 2 || (blobs.size() <= 1 && !hasBias);
|
||||
CV_Assert((inputs.size() == 2 && blobs.empty()) || blobs.size() == (int)hasWeights + (int)hasBias);
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CUDA ||
|
||||
backendId == DNN_BACKEND_HALIDE ||
|
||||
(backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && axis == 1 && !blobs.empty()) ||
|
||||
(backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && axis > 0);
|
||||
}
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
CV_Assert_N(outputs.size() == 1, !blobs.empty() || inputs.size() == 2);
|
||||
|
||||
Mat &inpBlob = inputs[0];
|
||||
Mat &outBlob = outputs[0];
|
||||
// There is a mode when we multiply a first blob by a second one
|
||||
// instead of trainable weights.
|
||||
Mat weights = hasWeights ? (blobs.empty() ? inputs[1] : blobs[0]).reshape(1, 1) : Mat();;
|
||||
Mat bias = hasBias ? (blobs.empty() ? inputs[1] : blobs.back()).reshape(1, 1) : Mat();
|
||||
|
||||
MatShape inpShape = shape(inpBlob);
|
||||
const int numWeights = !weights.empty() ? weights.total() : bias.total();
|
||||
CV_Assert(numWeights != 0);
|
||||
if (hasWeights && hasBias)
|
||||
CV_CheckEQ(weights.total(), bias.total(), "Incompatible weights/bias blobs");
|
||||
|
||||
int endAxis;
|
||||
for (endAxis = axis + 1; endAxis <= inpBlob.dims; ++endAxis)
|
||||
{
|
||||
if (total(inpShape, axis, endAxis) == numWeights)
|
||||
break;
|
||||
}
|
||||
CV_Assert(total(inpShape, axis, endAxis) == numWeights);
|
||||
CV_Assert(!hasBias || numWeights == bias.total());
|
||||
CV_CheckTypeEQ(inpBlob.type(), CV_32FC1, ""); CV_CheckTypeEQ(outBlob.type(), CV_32FC1, "");
|
||||
|
||||
int numSlices = total(inpShape, 0, axis);
|
||||
float* inpData = (float*)inpBlob.data;
|
||||
float* outData = (float*)outBlob.data;
|
||||
|
||||
if (endAxis != inpBlob.dims)
|
||||
{
|
||||
float* weightsData = !weights.empty() ? (float*)weights.data : 0;
|
||||
float* biasesData = hasBias ? (float*)bias.data : 0;
|
||||
int spatialSize = total(inpShape, endAxis); // spatialSize != 1
|
||||
for (int i = 0; i < numSlices; ++i)
|
||||
{
|
||||
for (int j = 0; j < numWeights; ++j)
|
||||
{
|
||||
float w = weightsData ? weightsData[j] : 1;
|
||||
float b = biasesData ? biasesData[j] : 0;
|
||||
Mat inpSlice(1, spatialSize, CV_32F, inpData);
|
||||
Mat outSlice(1, spatialSize, CV_32F, outData);
|
||||
inpSlice.convertTo(outSlice, CV_32F, w, b);
|
||||
inpData += spatialSize;
|
||||
outData += spatialSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < numSlices; ++i)
|
||||
{
|
||||
Mat inpSlice(1, numWeights, CV_32F, inpData);
|
||||
Mat outSlice(1, numWeights, CV_32F, outData);
|
||||
if (!weights.empty())
|
||||
{
|
||||
multiply(inpSlice, weights, outSlice);
|
||||
if (hasBias)
|
||||
add(outSlice, bias, outSlice);
|
||||
}
|
||||
else if (hasBias)
|
||||
add(inpSlice, bias, outSlice);
|
||||
inpData += numWeights;
|
||||
outData += numWeights;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
|
||||
CV_Assert(!blobs.empty() || inputs.size() == 2);
|
||||
|
||||
auto weightsMat = Mat(), biasMat = Mat();
|
||||
|
||||
cuda4dnn::ScaleShiftConfiguration config;
|
||||
if (hasWeights)
|
||||
{
|
||||
if (blobs.empty())
|
||||
{
|
||||
config.scaleMode = cuda4dnn::ScaleShiftConfiguration::OpMode::UNTRAINABLE;
|
||||
}
|
||||
else
|
||||
{
|
||||
weightsMat = blobs[0];
|
||||
config.scaleMode = cuda4dnn::ScaleShiftConfiguration::OpMode::TRAINABLE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
config.scaleMode = cuda4dnn::ScaleShiftConfiguration::OpMode::NONE;
|
||||
}
|
||||
|
||||
if (hasBias)
|
||||
{
|
||||
if(blobs.empty())
|
||||
{
|
||||
config.shiftMode = cuda4dnn::ScaleShiftConfiguration::OpMode::UNTRAINABLE;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* if the weights are provided, bias will be in blobs[1]; otherwise, it will be in blobs[0]
|
||||
* in either case, it is at the end of the blobs vector => bias = blobs.back()
|
||||
*/
|
||||
biasMat = blobs.back();
|
||||
config.shiftMode = cuda4dnn::ScaleShiftConfiguration::OpMode::TRAINABLE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
config.shiftMode = cuda4dnn::ScaleShiftConfiguration::OpMode::NONE;
|
||||
}
|
||||
|
||||
config.axis = axis;
|
||||
|
||||
return make_cuda_node<cuda4dnn::ScaleShiftOp>(preferableTarget, std::move(context->stream), config, weightsMat, biasMat);
|
||||
}
|
||||
#endif
|
||||
|
||||
virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node) CV_OVERRIDE
|
||||
{
|
||||
switch (node->backendId)
|
||||
{
|
||||
case DNN_BACKEND_HALIDE:
|
||||
{
|
||||
#ifdef HAVE_HALIDE
|
||||
auto base = node.dynamicCast<HalideBackendNode>();
|
||||
Halide::Func& input = base->funcs.back();
|
||||
Halide::Var x("x"), y("y"), c("c"), n("n");
|
||||
Halide::Func top = attachHalide(input(x, y, c, n));
|
||||
return Ptr<BackendNode>(new HalideBackendNode(base, top));
|
||||
#endif // HAVE_HALIDE
|
||||
break;
|
||||
}
|
||||
}
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_HALIDE
|
||||
Halide::Buffer<float> input = halideBuffer(inputs[0]);
|
||||
Halide::Var x("x"), y("y"), c("c"), n("n");
|
||||
Halide::Func top = attachHalide(input(x, y, c, n));
|
||||
return Ptr<BackendNode>(new HalideBackendNode(top));
|
||||
#endif // HAVE_HALIDE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
#ifdef HAVE_HALIDE
|
||||
// attachHalide can work both with Halide::Buffer and Halide::Func. In the
|
||||
// second case it will be a fusion.
|
||||
Halide::Func attachHalide(const Halide::Expr& input)
|
||||
{
|
||||
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
|
||||
Halide::Var x("x"), y("y"), c("c"), n("n");
|
||||
|
||||
const int numChannels = blobs[0].total();
|
||||
|
||||
Halide::Expr topExpr = input;
|
||||
if (hasWeights)
|
||||
{
|
||||
auto weights = wrapToHalideBuffer(blobs[0], {numChannels});
|
||||
topExpr *= weights(c);
|
||||
}
|
||||
if (hasBias)
|
||||
{
|
||||
auto bias = wrapToHalideBuffer(blobs.back(), {numChannels});
|
||||
topExpr += bias(c);
|
||||
}
|
||||
top(x, y, c, n) = topExpr;
|
||||
return top;
|
||||
}
|
||||
#endif // HAVE_HALIDE
|
||||
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
InferenceEngine::Builder::Layer l = InferenceEngine::Builder::ScaleShiftLayer(name);
|
||||
|
||||
CV_Assert(!blobs.empty());
|
||||
const size_t numChannels = blobs[0].total();
|
||||
if (hasWeights)
|
||||
{
|
||||
addConstantData("weights", wrapToInfEngineBlob(blobs[0], {numChannels}, InferenceEngine::Layout::C), l);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto weights = InferenceEngine::make_shared_blob<float>({
|
||||
InferenceEngine::Precision::FP32, {(size_t)numChannels},
|
||||
InferenceEngine::Layout::C
|
||||
});
|
||||
weights->allocate();
|
||||
float* buf = weights->buffer().as<float*>();
|
||||
std::fill(buf, buf + numChannels, 1);
|
||||
addConstantData("weights", weights, l);
|
||||
}
|
||||
if (hasBias)
|
||||
addConstantData("biases", wrapToInfEngineBlob(blobs.back(), {numChannels}, InferenceEngine::Layout::C), l);
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(l));
|
||||
}
|
||||
#endif // HAVE_DNN_IE_NN_BUILDER_2019
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
auto ieInpNode0 = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
auto ieInpNode1 = nodes.size() > 1 ? nodes[1].dynamicCast<InfEngineNgraphNode>()->node : nullptr;
|
||||
|
||||
size_t numChannels = 1;
|
||||
if (blobs.empty())
|
||||
for (const size_t& dim : ieInpNode1->get_shape())
|
||||
numChannels *= dim;
|
||||
else
|
||||
numChannels = blobs[0].total();
|
||||
|
||||
std::vector<size_t> shape(ieInpNode0->get_shape().size(), 1);
|
||||
int cAxis = normalize_axis(axis, shape.size());
|
||||
shape[cAxis] = numChannels;
|
||||
|
||||
auto node = ieInpNode0;
|
||||
if (hasWeights)
|
||||
{
|
||||
auto weight = blobs.empty() ? ieInpNode1 :
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), blobs[0].data);
|
||||
|
||||
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2021_2)
|
||||
node = std::make_shared<ngraph::op::v1::Multiply>(node, weight, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
#else
|
||||
node = std::make_shared<ngraph::op::v0::Multiply>(node, weight, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
#endif
|
||||
}
|
||||
if (hasBias || !hasWeights)
|
||||
{
|
||||
std::shared_ptr<ngraph::Node> bias;
|
||||
if (hasBias)
|
||||
{
|
||||
bias = blobs.empty() ? ieInpNode1 :
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
|
||||
ngraph::Shape(shape), blobs.back().data);
|
||||
}
|
||||
else
|
||||
bias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
|
||||
ngraph::Shape(shape), std::vector<float>(numChannels, 0).data());
|
||||
node = std::make_shared<ngraph::op::v1::Add>(node, bias, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
}
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(node));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
void getScaleShift(Mat& scale, Mat& shift) const CV_OVERRIDE
|
||||
{
|
||||
scale = (hasWeights && !blobs.empty()) ? blobs[0] : Mat();
|
||||
shift = (hasBias && !blobs.empty()) ? blobs.back() : Mat();
|
||||
}
|
||||
|
||||
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
|
||||
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
|
||||
{
|
||||
params.set("input_scales", DictValue::arrayReal(scales[0].data(), scales[0].size()));
|
||||
params.set("input_zeropoints", DictValue::arrayInt(zeropoints[0].data(), zeropoints[0].size()));
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
|
||||
const std::vector<MatShape> &outputs) const CV_OVERRIDE
|
||||
{
|
||||
CV_UNUSED(outputs); // suppress unused variable warning
|
||||
long flops = 0;
|
||||
for(int i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
flops += 2*total(inputs[i]);
|
||||
}
|
||||
return flops;
|
||||
}
|
||||
|
||||
private:
|
||||
bool hasWeights;
|
||||
};
|
||||
|
||||
|
||||
Ptr<ScaleLayer> ScaleLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<ScaleLayer>(new ScaleLayerImpl(params));
|
||||
}
|
||||
|
||||
Ptr<Layer> ShiftLayer::create(const LayerParams& params)
|
||||
{
|
||||
LayerParams scaleParams;
|
||||
scaleParams.name = params.name;
|
||||
scaleParams.type = "Scale";
|
||||
scaleParams.blobs = params.blobs;
|
||||
scaleParams.set("bias_term", true);
|
||||
scaleParams.set("axis", 0);
|
||||
return Ptr<ScaleLayer>(new ScaleLayerImpl(scaleParams));
|
||||
}
|
||||
|
||||
class DataAugmentationLayerImpl CV_FINAL : public DataAugmentationLayer
|
||||
{
|
||||
public:
|
||||
DataAugmentationLayerImpl(const LayerParams& params)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
recompute_mean = params.get<int>("recompute_mean", 1);
|
||||
CV_CheckGT(recompute_mean, 0, "");
|
||||
mean_per_pixel = params.get<bool>("mean_per_pixel", false);
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert_N(inputs.size() == 1, blobs.size() == 3);
|
||||
CV_Assert_N(blobs[0].total() == 1,
|
||||
blobs[2].total() == inputs[0][1]);
|
||||
|
||||
outputs.assign(1, inputs[0]);
|
||||
return true;
|
||||
}
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
CV_Assert_N(outputs.size() == 1, blobs.size() == 3, inputs.size() == 1);
|
||||
int num_iter = 0;
|
||||
|
||||
float* inpData = inputs[0].ptr<float>();
|
||||
float* outData = outputs[0].ptr<float>();
|
||||
|
||||
Mat data_mean_cpu = blobs[1].clone();
|
||||
Mat mean_resize = Mat(inputs[0].size[3], inputs[0].size[2], CV_32FC3);
|
||||
Mat mean_3d = Mat(data_mean_cpu.size[3], data_mean_cpu.size[2], CV_32FC3, data_mean_cpu.ptr<float>(0));
|
||||
resize(mean_3d, mean_resize, Size(inputs[0].size[3], inputs[0].size[2]));
|
||||
int new_size[] = {1, mean_resize.channels(), mean_resize.cols, mean_resize.rows};
|
||||
Mat data_mean_cpu_resize = mean_resize.reshape(1, *new_size);
|
||||
Mat data_mean_per_channel_cpu = blobs[2].clone();
|
||||
|
||||
const int numWeights = data_mean_cpu_resize.total();
|
||||
CV_Assert(numWeights != 0);
|
||||
|
||||
++num_iter;
|
||||
if (num_iter <= recompute_mean)
|
||||
{
|
||||
data_mean_cpu_resize *= (num_iter - 1);
|
||||
const int batch = inputs[0].size[0];
|
||||
float alpha = 1.0 / batch;
|
||||
|
||||
for (int i = 0; i < batch; ++i)
|
||||
{
|
||||
Mat inpSlice(1, numWeights, CV_32F, inpData);
|
||||
inpSlice = alpha * inpSlice;
|
||||
|
||||
add(data_mean_cpu_resize.reshape(1, 1), inpSlice, data_mean_cpu_resize.reshape(1, 1));
|
||||
inpData += numWeights;
|
||||
}
|
||||
data_mean_cpu_resize *= (1.0 / num_iter);
|
||||
|
||||
int newsize[] = {inputs[0].size[1], (int)inputs[0].total(2)};
|
||||
reduce(data_mean_cpu_resize.reshape(1, 2, &newsize[0]), data_mean_per_channel_cpu, 1, REDUCE_SUM, CV_32F);
|
||||
|
||||
int area = inputs[0].total(2);
|
||||
data_mean_per_channel_cpu *= (1.0 / area);
|
||||
}
|
||||
|
||||
MatShape inpShape = shape(inputs[0]);
|
||||
|
||||
inpData = inputs[0].ptr<float>();
|
||||
if (mean_per_pixel)
|
||||
{
|
||||
int numSlices = inputs[0].size[0];
|
||||
for (int i = 0; i < numSlices; ++i)
|
||||
{
|
||||
Mat inpSlice(1, numWeights, CV_32F, inpData);
|
||||
Mat outSlice(1, numWeights, CV_32F, outData);
|
||||
|
||||
add(inpSlice, (-1) * data_mean_cpu_resize, outSlice);
|
||||
inpData += numWeights;
|
||||
outData += numWeights;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int numSlices = inpShape[1];
|
||||
int count = numWeights / numSlices;
|
||||
|
||||
for (int i = 0; i < numSlices; ++i)
|
||||
{
|
||||
Mat inpSlice(1, count, CV_32F, inpData);
|
||||
Mat outSlice(1, count, CV_32F, outData);
|
||||
float coeff = data_mean_per_channel_cpu.reshape(1, 1).at<float>(0, i);
|
||||
outSlice = inpSlice - coeff;
|
||||
|
||||
inpData += count;
|
||||
outData += count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
int recompute_mean;
|
||||
bool mean_per_pixel;
|
||||
};
|
||||
|
||||
Ptr<DataAugmentationLayer> DataAugmentationLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<DataAugmentationLayer>(new DataAugmentationLayerImpl(params));
|
||||
}
|
||||
|
||||
} // namespace dnn
|
||||
} // namespace cv
|
||||
167
3rdparty/opencv-4.5.4/modules/dnn/src/layers/shuffle_channel_layer.cpp
vendored
Normal file
167
3rdparty/opencv-4.5.4/modules/dnn/src/layers/shuffle_channel_layer.cpp
vendored
Normal file
@ -0,0 +1,167 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// Copyright (C) 2018, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
#include "../precomp.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/shuffle_channel.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv { namespace dnn {
|
||||
|
||||
class ShuffleChannelLayerImpl CV_FINAL : public ShuffleChannelLayer
|
||||
{
|
||||
public:
|
||||
ShuffleChannelLayerImpl(const LayerParams& params)
|
||||
{
|
||||
group = params.get<int>("group", 1);
|
||||
setParamsFrom(params);
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(inputs.size() == 1 && inputs[0].size() == 4);
|
||||
CV_Assert(inputs[0][1] % group == 0);
|
||||
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
|
||||
return group == 1;
|
||||
}
|
||||
|
||||
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
|
||||
{
|
||||
if (group != 1)
|
||||
{
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
LayerParams lp;
|
||||
float order[] = {0, 2, 1, 3};
|
||||
lp.set("order", DictValue::arrayInt(&order[0], 4));
|
||||
permute = PermuteLayer::create(lp);
|
||||
|
||||
const Mat& inp = inputs[0];
|
||||
const Mat& out = outputs[0];
|
||||
|
||||
permuteInpShape.resize(4);
|
||||
permuteInpShape[0] = inp.size[0];
|
||||
permuteInpShape[1] = group;
|
||||
permuteInpShape[2] = inp.size[1] / group;
|
||||
permuteInpShape[3] = inp.size[2]*inp.size[3];
|
||||
|
||||
permuteOutShape.resize(4);
|
||||
permuteOutShape[0] = permuteInpShape[0];
|
||||
permuteOutShape[1] = permuteInpShape[2];
|
||||
permuteOutShape[2] = permuteInpShape[1];
|
||||
permuteOutShape[3] = permuteInpShape[3];
|
||||
|
||||
std::vector<Mat> permuteInputs(1, inp.reshape(1, permuteInpShape));
|
||||
std::vector<Mat> permuteOutputs(1, out.reshape(1, permuteOutShape));
|
||||
permute->finalize(permuteInputs, permuteOutputs);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inps.getUMatVector(inputs);
|
||||
outs.getUMatVector(outputs);
|
||||
|
||||
if (inputs[0].u != outputs[0].u)
|
||||
{
|
||||
if (!permute.empty())
|
||||
{
|
||||
inputs[0] = inputs[0].reshape(1, permuteInpShape.size(), &permuteInpShape[0]);
|
||||
outputs[0] = outputs[0].reshape(1, permuteOutShape.size(), &permuteOutShape[0]);
|
||||
permute->preferableTarget = preferableTarget;
|
||||
permute->forward(inputs, outputs, internals);
|
||||
}
|
||||
else
|
||||
inputs[0].copyTo(outputs[0]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs, internals;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
internals_arr.getMatVector(internals);
|
||||
|
||||
Mat inp = inputs[0];
|
||||
Mat out = outputs[0];
|
||||
if (inp.data != out.data)
|
||||
{
|
||||
if (!permute.empty())
|
||||
{
|
||||
inp = inp.reshape(1, permuteInpShape);
|
||||
out = out.reshape(1, permuteOutShape);
|
||||
std::vector<Mat> permuteInputs(1, inp);
|
||||
std::vector<Mat> permuteOutputs(1, out);
|
||||
permute->forward(permuteInputs, permuteOutputs, internals);
|
||||
}
|
||||
else
|
||||
inp.copyTo(out);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
return make_cuda_node<cuda4dnn::ShuffleChannelOp>(preferableTarget, std::move(context->stream), group);
|
||||
}
|
||||
#endif
|
||||
|
||||
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
|
||||
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
Ptr<PermuteLayer> permute;
|
||||
std::vector<int> permuteInpShape, permuteOutShape;
|
||||
};
|
||||
|
||||
Ptr<Layer> ShuffleChannelLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<Layer>(new ShuffleChannelLayerImpl(params));
|
||||
}
|
||||
|
||||
} // namespace dnn
|
||||
} // namespace cv
|
||||
821
3rdparty/opencv-4.5.4/modules/dnn/src/layers/slice_layer.cpp
vendored
Normal file
821
3rdparty/opencv-4.5.4/modules/dnn/src/layers/slice_layer.cpp
vendored
Normal file
@ -0,0 +1,821 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
#include "../op_inf_engine.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
|
||||
#include "layers_common.hpp"
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
#include <opencv2/core/utils/logger.hpp>
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/slice.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
void sliceRangesFromShape(const MatShape& inpShape, int& axis, std::vector<std::vector<cv::Range> >& sliceRanges)
|
||||
{
|
||||
CV_Assert(inpShape.size() > 0);
|
||||
bool axisNeg = (axis < 0);
|
||||
axis = (axis + static_cast<int>(inpShape.size())) % inpShape.size();
|
||||
int n = inpShape[axis];
|
||||
|
||||
for (size_t i = 0; i < sliceRanges.size(); ++i){
|
||||
std::vector<Range>& ranges = sliceRanges[i];
|
||||
if (axisNeg)
|
||||
{
|
||||
ranges.insert(ranges.begin(), axis, Range::all());
|
||||
}
|
||||
Range& range = ranges.back();
|
||||
|
||||
if (range.start >= 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
CV_Assert(n != 0);
|
||||
range.start = (n + range.start) % n;
|
||||
}
|
||||
}
|
||||
|
||||
class SliceLayerImpl : public SliceLayer
|
||||
{
|
||||
public:
|
||||
SliceLayerImpl(const LayerParams& params)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
hasSteps = false;
|
||||
axis = params.get<int>("axis", 1);
|
||||
num_split = params.get<int>("num_split", 0);
|
||||
hasDynamicShapes = params.get<bool>("has_dynamic_shapes", false);
|
||||
shapesInitialized = !hasDynamicShapes;
|
||||
|
||||
if (params.has("slice_point"))
|
||||
{
|
||||
CV_Assert(!params.has("begin") && !params.has("size") && !params.has("end"));
|
||||
const DictValue &indicesValue = params.get("slice_point");
|
||||
int size = axis > 0 ? axis + 1 : 1;
|
||||
sliceRanges.resize(indicesValue.size() + 1,
|
||||
std::vector<Range>(size, Range::all()));
|
||||
int prevSlice = 0;
|
||||
for (int i = 0; i < indicesValue.size(); ++i)
|
||||
{
|
||||
sliceRanges[i][size - 1].start = prevSlice;
|
||||
sliceRanges[i][size - 1].end = indicesValue.get<int>(i);
|
||||
prevSlice = sliceRanges[i][size - 1].end;
|
||||
}
|
||||
sliceRanges.back()[size - 1].start = prevSlice;
|
||||
}
|
||||
else if (params.has("begin"))
|
||||
{
|
||||
CV_Assert(params.has("size") ^ params.has("end"));
|
||||
const DictValue &begins = params.get("begin");
|
||||
const DictValue &sizesOrEnds = params.has("size") ? params.get("size") : params.get("end");
|
||||
CV_Assert(begins.size() == sizesOrEnds.size());
|
||||
|
||||
sliceRanges.resize(1);
|
||||
sliceRanges[0].resize(begins.size(), Range::all());
|
||||
for (int i = 0; i < begins.size(); ++i)
|
||||
{
|
||||
int start = begins.get<int>(i);
|
||||
int sizeOrEnd = sizesOrEnds.get<int>(i); // It may be negative to reverse indexation.
|
||||
|
||||
sliceRanges[0][i].start = start;
|
||||
if (params.has("size"))
|
||||
{
|
||||
int size = sizeOrEnd;
|
||||
CV_Assert(size == -1 || size > 0); // -1 value means range [start, axis_size).
|
||||
sliceRanges[0][i].end = size > 0 ? (start + size) : -1; // We'll finalize a negative value later.
|
||||
}
|
||||
else
|
||||
{
|
||||
int end = sizeOrEnd;
|
||||
CV_Assert(end < 0 || end > start); // End index is excluded.
|
||||
sliceRanges[0][i].end = end; // We'll finalize a negative value later.
|
||||
}
|
||||
}
|
||||
|
||||
if (params.has("steps"))
|
||||
{
|
||||
const DictValue &steps = params.get("steps");
|
||||
sliceSteps.resize(1);
|
||||
sliceSteps[0].resize(steps.size());
|
||||
|
||||
for (int i = 0; i < steps.size(); ++i)
|
||||
{
|
||||
int step = steps.get<int>(i);
|
||||
CV_Assert(step >= 1);
|
||||
if (step > 1)
|
||||
hasSteps = true;
|
||||
sliceSteps[0][i] = step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
||||
return INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) &&
|
||||
sliceRanges.size() == 1 && sliceRanges[0].size() == 4 && !hasSteps;
|
||||
#endif
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
return sliceRanges.size() == 1 && !hasSteps;
|
||||
#endif
|
||||
#ifdef HAVE_CUDA
|
||||
if (backendId == DNN_BACKEND_CUDA)
|
||||
return !hasSteps;
|
||||
#endif
|
||||
return backendId == DNN_BACKEND_OPENCV;
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(inputs.size() == 1);
|
||||
MatShape inpShape = inputs[0];
|
||||
|
||||
int axis_rw = axis;
|
||||
std::vector<std::vector<cv::Range> > sliceRanges_rw = sliceRanges;
|
||||
sliceRangesFromShape(inpShape, axis_rw, sliceRanges_rw);
|
||||
|
||||
if (!sliceRanges_rw.empty())
|
||||
{
|
||||
outputs.resize(sliceRanges_rw.size(), inpShape);
|
||||
for (int i = 0; i < outputs.size(); ++i)
|
||||
{
|
||||
CV_Assert(sliceRanges_rw[i].size() <= inpShape.size());
|
||||
for (int j = 0; j < sliceRanges_rw[i].size(); ++j)
|
||||
{
|
||||
if (shapesInitialized || inpShape[j] > 0)
|
||||
outputs[i][j] = normalize_axis_range(sliceRanges_rw[i][j], inpShape[j]).size();
|
||||
|
||||
if (!sliceSteps.empty() && (i < sliceSteps.size()) && (j < sliceSteps[i].size()) && (sliceSteps[i][j] > 1))
|
||||
outputs[i][j] = (outputs[i][j] + sliceSteps[i][j] - 1) / sliceSteps[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
else // Divide input blob on equal parts by axis.
|
||||
{
|
||||
CV_Assert(0 <= axis_rw && axis_rw < inpShape.size());
|
||||
int splits = num_split ? num_split : requiredOutputs;
|
||||
CV_Assert(splits > 0 && inpShape[axis_rw] % splits == 0);
|
||||
inpShape[axis_rw] /= splits;
|
||||
outputs.resize(splits, inpShape);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool updateMemoryShapes(const std::vector<MatShape> &inputs) CV_OVERRIDE
|
||||
{
|
||||
shapesInitialized = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_OPENCL
|
||||
ocl_exec_cache.clear();
|
||||
#endif
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
CV_Assert(inputs.size() == 1);
|
||||
const MatSize& inpShape = inputs[0].size;
|
||||
|
||||
sliceRangesFromShape(shape(inputs[0]), axis, sliceRanges);
|
||||
finalSliceRanges = sliceRanges;
|
||||
|
||||
if (sliceRanges.empty())
|
||||
{
|
||||
// Divide input blob on equal parts by axis.
|
||||
int outAxisSize = inpShape[axis] / outputs.size();
|
||||
finalSliceRanges.resize(outputs.size(),
|
||||
std::vector<Range>(axis + 1, Range::all()));
|
||||
int prevSlice = 0;
|
||||
for (int i = 0; i < outputs.size(); ++i)
|
||||
{
|
||||
finalSliceRanges[i][axis].start = prevSlice;
|
||||
finalSliceRanges[i][axis].end = finalSliceRanges[i][axis].start + outAxisSize;
|
||||
prevSlice = finalSliceRanges[i][axis].end;
|
||||
}
|
||||
}
|
||||
else
|
||||
CV_Assert(outputs.size() == sliceRanges.size());
|
||||
|
||||
for (int i = 0; i < outputs.size(); ++i)
|
||||
{
|
||||
CV_Assert(finalSliceRanges[i].size() <= inpShape.dims());
|
||||
// Fill the rest of ranges.
|
||||
for (int j = finalSliceRanges[i].size(); j < inpShape.dims(); ++j)
|
||||
{
|
||||
finalSliceRanges[i].push_back(Range::all());
|
||||
}
|
||||
// Clamp.
|
||||
for (int j = 0; j < finalSliceRanges[i].size(); ++j)
|
||||
{
|
||||
finalSliceRanges[i][j] = normalize_axis_range(finalSliceRanges[i][j], inpShape[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!sliceSteps.empty() && sliceSteps[0].size() != inputs[0].dims)
|
||||
sliceSteps[0].resize(inputs[0].dims, 1);
|
||||
|
||||
#if 0
|
||||
std::cout << "DEBUG: DNN/Slice: " << outputs.size() << " inpShape=" << inpShape << std::endl;
|
||||
for (int i = 0; i < outputs.size(); ++i)
|
||||
{
|
||||
for (int j = 0; j < finalSliceRanges[i].size(); ++j)
|
||||
{
|
||||
std::cout << finalSliceRanges[i][j];
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
struct OpenCLExecInfo
|
||||
{
|
||||
std::string kernel_name;
|
||||
std::string build_opts;
|
||||
size_t local_size[2];
|
||||
size_t global_size[2];
|
||||
|
||||
OpenCLExecInfo()
|
||||
{
|
||||
local_size[0] = local_size[1] = 0;
|
||||
global_size[0] = global_size[1] = 0;
|
||||
}
|
||||
};
|
||||
std::vector<OpenCLExecInfo> ocl_exec_cache;
|
||||
|
||||
void ocl_prepare(const std::vector<UMat>& inputs, const std::vector<UMat>& outputs)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
CV_Assert(outputs.size() == finalSliceRanges.size());
|
||||
ocl_exec_cache.resize(outputs.size());
|
||||
|
||||
const UMat& input = inputs[0];
|
||||
const int dims = input.dims;
|
||||
|
||||
size_t WSZ = 128;
|
||||
|
||||
const int elemSize = (int)input.elemSize();
|
||||
String opts0 = cv::format(
|
||||
"-DDIMS=%d -DELEMSIZE=%d",
|
||||
dims, elemSize
|
||||
);
|
||||
for (int d = 0; d < dims; d++)
|
||||
{
|
||||
opts0 += cv::format(" -DSRC_STEP_%d=%d", d, (int)input.step[dims - 1 - d]);
|
||||
}
|
||||
for (size_t i = 0; i < outputs.size(); i++)
|
||||
{
|
||||
OpenCLExecInfo& ocl = ocl_exec_cache[i];
|
||||
|
||||
const UMat& output = outputs[i];
|
||||
const std::vector<Range>& range = finalSliceRanges[i];
|
||||
|
||||
String opts = opts0;
|
||||
|
||||
CV_CheckEQ(output.dims, dims, "");
|
||||
for (int d = 0; d < dims; d++)
|
||||
{
|
||||
opts += cv::format(" -DDST_STEP_%d=%d -DDST_SZ_%d=%d -DSRC_START_%d=%d",
|
||||
d, (int)output.step[dims - 1 - d],
|
||||
d, (int)output.size[dims - 1 - d],
|
||||
d, (int)range[dims - 1 - d].start
|
||||
);
|
||||
CV_CheckEQ(range[d].size(), (int)output.size[d], "");
|
||||
}
|
||||
|
||||
const size_t param_LIMIT_BLOCK_SIZE_PER_WG = WSZ * 64;
|
||||
|
||||
int block_dims = 0;
|
||||
size_t block_size = elemSize;
|
||||
for (int i = dims - 1; i >= 0; --i)
|
||||
{
|
||||
if (input.step[i] != output.step[i])
|
||||
break;
|
||||
block_size *= output.size[i];
|
||||
block_dims++;
|
||||
if (block_size >= param_LIMIT_BLOCK_SIZE_PER_WG)
|
||||
break;
|
||||
}
|
||||
|
||||
const size_t total = output.total() * elemSize;
|
||||
size_t num_blocks = total / block_size;
|
||||
|
||||
if ((num_blocks <= 8 && block_size >= WSZ * 4) || (block_size >= param_LIMIT_BLOCK_SIZE_PER_WG))
|
||||
{
|
||||
// use 1D copy mode
|
||||
opts += cv::format(" -DUSE_COPY_1D=1");
|
||||
|
||||
opts += cv::format(" -DBLOCK_DIMS=%d", block_dims);
|
||||
opts += cv::format(" -DBLOCK_DIMS_CONTIGUOUS=%d", block_dims);
|
||||
opts += cv::format(" -DBLOCK_SIZE=%d", (int)block_size);
|
||||
|
||||
opts += cv::format(" -DBLOCK_COLS=%d", (int)block_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
// use 2D copy mode
|
||||
int block_cols = block_size;
|
||||
int block_dims_contiguous = block_dims;
|
||||
size_t input_base_step = input.step[dims - 1 - block_dims_contiguous];
|
||||
size_t output_base_step = output.step[dims - 1 - block_dims_contiguous];
|
||||
|
||||
size_t block_rows = 1;
|
||||
for (int i = dims - 1 - block_dims_contiguous; i >= 0; --i)
|
||||
{
|
||||
if (input.step[i] * output_base_step != output.step[i] * input_base_step)
|
||||
break;
|
||||
block_rows *= output.size[i];
|
||||
block_dims++;
|
||||
}
|
||||
|
||||
block_size *= block_rows;
|
||||
|
||||
num_blocks = total / block_size;
|
||||
|
||||
if (block_rows > 1)
|
||||
{
|
||||
opts += cv::format(" -DBLOCK_DIMS=%d", block_dims);
|
||||
opts += cv::format(" -DBLOCK_DIMS_CONTIGUOUS=%d", block_dims_contiguous);
|
||||
opts += cv::format(" -DBLOCK_SIZE=%d", (int)block_size);
|
||||
|
||||
opts += cv::format(" -DBLOCK_COLS=%d", (int)block_cols);
|
||||
|
||||
opts += cv::format(" -DBLOCK_ROWS=%d", (int)block_rows);
|
||||
opts += cv::format(" -DBLOCK_SRC_STRIDE=%d", (int)input_base_step);
|
||||
}
|
||||
else
|
||||
{
|
||||
// use 1D copy mode
|
||||
opts += cv::format(" -DUSE_COPY_1D=1");
|
||||
|
||||
opts += cv::format(" -DBLOCK_DIMS=%d", block_dims_contiguous);
|
||||
opts += cv::format(" -DBLOCK_DIMS_CONTIGUOUS=%d", block_dims_contiguous);
|
||||
opts += cv::format(" -DBLOCK_SIZE=%d", (int)block_size);
|
||||
|
||||
opts += cv::format(" -DBLOCK_COLS=%d", (int)block_size);
|
||||
}
|
||||
}
|
||||
|
||||
const size_t MIN_WORK_ITEMS = 16;
|
||||
if (block_size <= 4 * MIN_WORK_ITEMS)
|
||||
WSZ = 4;
|
||||
else if (block_size <= 8 * MIN_WORK_ITEMS)
|
||||
WSZ = 8;
|
||||
else if (block_size <= 16 * MIN_WORK_ITEMS)
|
||||
WSZ = 16;
|
||||
else if (block_size <= 32 * MIN_WORK_ITEMS)
|
||||
WSZ = 32;
|
||||
else if (block_size <= 64 * MIN_WORK_ITEMS)
|
||||
WSZ = 64;
|
||||
|
||||
opts += cv::format(" -DWSZ=%d", (int)WSZ);
|
||||
|
||||
std::ostringstream kernel_suffix;
|
||||
kernel_suffix << dims << 'x' << elemSize << "_bsz" << block_size;
|
||||
kernel_suffix << "__src_";
|
||||
for (int d = 0; d < dims; d++)
|
||||
{
|
||||
kernel_suffix << input.size[dims - 1 - d] << '_';
|
||||
}
|
||||
kernel_suffix << '_';
|
||||
/*for (int d = 0; d < dims; d++)
|
||||
{
|
||||
kernel_suffix << input.step[dims - 1 - d] << '_';
|
||||
}
|
||||
kernel_suffix << '_';*/
|
||||
|
||||
kernel_suffix << "dst_";
|
||||
for (int d = 0; d < dims; d++)
|
||||
{
|
||||
kernel_suffix << output.size[dims - 1 - d] << '_';
|
||||
}
|
||||
/*kernel_suffix << '_';
|
||||
for (int d = 0; d < dims; d++)
|
||||
{
|
||||
kernel_suffix << output.step[dims - 1 - d] << '_';
|
||||
}*/
|
||||
kernel_suffix << "_slice_";
|
||||
for (int d = 0; d < dims; d++)
|
||||
{
|
||||
kernel_suffix << range[dims - 1 - d].start << '_';
|
||||
}
|
||||
for (int d = 0; d < dims; d++)
|
||||
{
|
||||
kernel_suffix << '_' << range[dims - 1 - d].end;
|
||||
}
|
||||
|
||||
std::string kernel_suffix_str = kernel_suffix.str();
|
||||
opts += cv::format(" -DSLICE_KERNEL_SUFFIX=%s", kernel_suffix_str.c_str());
|
||||
|
||||
ocl.kernel_name = cv::format("slice_%s", kernel_suffix_str.c_str());
|
||||
ocl.build_opts = opts;
|
||||
ocl.local_size[0] = WSZ;
|
||||
ocl.local_size[1] = 1;
|
||||
ocl.global_size[0] = WSZ;
|
||||
ocl.global_size[1] = num_blocks;
|
||||
} // for outputs.size()
|
||||
} // ocl_prepare
|
||||
|
||||
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
if (hasSteps)
|
||||
return false; // TODO not implemented yet: https://github.com/opencv/opencv/pull/19546
|
||||
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
|
||||
inputs_.getUMatVector(inputs);
|
||||
outputs_.getUMatVector(outputs);
|
||||
|
||||
CV_Assert(outputs.size() == finalSliceRanges.size());
|
||||
|
||||
const UMat& input = inputs[0];
|
||||
const int dims = input.dims;
|
||||
if (dims > 5)
|
||||
{
|
||||
CV_LOG_INFO(NULL, "DNN/OpenCL/Slice: implementation doesn't support dims=" << dims << ". Fallback to CPU");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ocl_exec_cache.empty())
|
||||
{
|
||||
ocl_prepare(inputs, outputs);
|
||||
}
|
||||
CV_CheckEQ(ocl_exec_cache.size(), outputs.size(), "");
|
||||
|
||||
for (size_t i = 0; i < outputs.size(); i++)
|
||||
{
|
||||
const OpenCLExecInfo& ocl = ocl_exec_cache[i];
|
||||
|
||||
UMat& output = outputs[i];
|
||||
|
||||
ocl::Kernel kernel(ocl.kernel_name.c_str(), ocl::dnn::slice_oclsrc, ocl.build_opts);
|
||||
if (kernel.empty())
|
||||
return false;
|
||||
bool ret = kernel.args(
|
||||
ocl::KernelArg::PtrReadOnly(input),
|
||||
ocl::KernelArg::PtrWriteOnly(output)
|
||||
)
|
||||
.run_(2, (size_t*)ocl.global_size, (size_t*)ocl.local_size, false);
|
||||
if (!ret)
|
||||
return false;
|
||||
} // for outputs.size()
|
||||
|
||||
return true;
|
||||
} // forward_ocl
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
const Mat& inpMat = inputs[0];
|
||||
CV_Assert(outputs.size() == finalSliceRanges.size());
|
||||
|
||||
if (!hasSteps)
|
||||
{
|
||||
for (size_t i = 0; i < outputs.size(); i++)
|
||||
{
|
||||
inpMat(finalSliceRanges[i]).copyTo(outputs[i]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int dimsNum = inpMat.dims;
|
||||
|
||||
for (size_t i = 0; i < outputs.size(); i++)
|
||||
{
|
||||
std::vector<int> inpIdx(dimsNum, 0);
|
||||
std::vector<int> outIdx(dimsNum, 0);
|
||||
if (inpMat.type() == CV_16S)
|
||||
getSliceRecursive<int16_t>(inpMat, inpIdx, finalSliceRanges[i], sliceSteps[i], 0, dimsNum, outputs[i], outIdx);
|
||||
else if (inpMat.type() == CV_8S)
|
||||
getSliceRecursive<int8_t>(inpMat, inpIdx, finalSliceRanges[i], sliceSteps[i], 0, dimsNum, outputs[i], outIdx);
|
||||
else
|
||||
getSliceRecursive<float>(inpMat, inpIdx, finalSliceRanges[i], sliceSteps[i], 0, dimsNum, outputs[i], outIdx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1)
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
|
||||
{
|
||||
CV_Assert_N(finalSliceRanges.size() == 1, inputs.size() <= 2);
|
||||
|
||||
std::vector<size_t> axes, offsets, dims;
|
||||
int from, to, step;
|
||||
int numDims = finalSliceRanges[0].size();
|
||||
if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL)
|
||||
{
|
||||
from = axis;
|
||||
to = numDims;
|
||||
step = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
from = numDims - 1;
|
||||
to = axis - 1;
|
||||
step = -1;
|
||||
}
|
||||
for (int i = from; i != to; i += step)
|
||||
{
|
||||
axes.push_back(i);
|
||||
offsets.push_back(finalSliceRanges[0][i].start);
|
||||
dims.push_back(finalSliceRanges[0][i].size());
|
||||
}
|
||||
|
||||
InferenceEngine::Builder::Layer ieLayer(name);
|
||||
ieLayer.setName(name);
|
||||
ieLayer.setType("Crop");
|
||||
ieLayer.getParameters()["axis"] = axes;
|
||||
ieLayer.getParameters()["dim"] = dims;
|
||||
ieLayer.getParameters()["offset"] = offsets;
|
||||
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(2));
|
||||
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
|
||||
|
||||
if (inputs.size() != 2)
|
||||
{
|
||||
std::vector<size_t> outShape(numDims);
|
||||
for (int i = 0; i < numDims; ++i)
|
||||
outShape[i] = finalSliceRanges[0][i].size();
|
||||
|
||||
ieLayer.getInputPorts()[1].setParameter("type", "weights");
|
||||
|
||||
auto shapeSource = InferenceEngine::make_shared_blob<float>({
|
||||
InferenceEngine::Precision::FP32, outShape,
|
||||
InferenceEngine::Layout::ANY
|
||||
});
|
||||
shapeSource->allocate();
|
||||
addConstantData("weights", shapeSource, ieLayer);
|
||||
}
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
CV_Assert_N(nodes.size() <= 2);
|
||||
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
CV_Assert(finalSliceRanges[0].size() == ieInpNode->get_shape().size());
|
||||
|
||||
std::vector<int64_t> offsets, dims;
|
||||
for (int i = 0; i < finalSliceRanges[0].size(); ++i)
|
||||
{
|
||||
offsets.push_back(finalSliceRanges[0][i].start);
|
||||
dims.push_back(finalSliceRanges[0][i].end);
|
||||
}
|
||||
|
||||
auto lower_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
|
||||
ngraph::Shape{offsets.size()}, offsets.data());
|
||||
auto upper_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
|
||||
ngraph::Shape{dims.size()}, dims.data());
|
||||
auto strides = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
|
||||
ngraph::Shape{dims.size()}, std::vector<int64_t>((int64_t)dims.size(), 1));
|
||||
|
||||
auto slice = std::make_shared<ngraph::op::v1::StridedSlice>(ieInpNode,
|
||||
lower_bounds, upper_bounds, strides, std::vector<int64_t>{}, std::vector<int64_t>{});
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(slice));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
|
||||
std::vector<std::vector<std::size_t>> offsets;
|
||||
for (const auto& ranges : finalSliceRanges)
|
||||
{
|
||||
std::vector<std::size_t> offsets_i;
|
||||
for (const auto& range : ranges)
|
||||
offsets_i.push_back(range.start);
|
||||
offsets.push_back(std::move(offsets_i));
|
||||
}
|
||||
|
||||
return make_cuda_node<cuda4dnn::SliceOp>(preferableTarget, std::move(context->stream), std::move(offsets));
|
||||
}
|
||||
#endif
|
||||
|
||||
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
|
||||
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
|
||||
{
|
||||
const int numOutputs = scales[1].size();
|
||||
for (int i = 0; i < numOutputs; i++)
|
||||
{
|
||||
if (scales[1][i] != scales[0][0])
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
void getSliceRecursive(const Mat &inpMat, std::vector<int> &inpIdx,
|
||||
const std::vector<Range> &sliceRanges,
|
||||
const std::vector<int> &sliceSteps, int dim, int dimsNum,
|
||||
Mat &outputs, std::vector<int> &outIdx)
|
||||
{
|
||||
int begin = sliceRanges[dim].start;
|
||||
int end = sliceRanges[dim].end;
|
||||
int step = !sliceSteps.empty() ? sliceSteps[dim] : 1;
|
||||
|
||||
// TODO optimization is required (for 2D tail case at least)
|
||||
for (int k = begin, j = 0; k < end; k += step, j++)
|
||||
{
|
||||
inpIdx[dim] = k;
|
||||
outIdx[dim] = j;
|
||||
|
||||
if (dim + 1 < dimsNum)
|
||||
getSliceRecursive<T>(inpMat, inpIdx, sliceRanges, sliceSteps, dim + 1, dimsNum, outputs, outIdx);
|
||||
else
|
||||
outputs.at<T>(outIdx.data()) = inpMat.at<T>(inpIdx.data());
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
// The actual non-negative values determined from @p sliceRanges depends on input size.
|
||||
std::vector<std::vector<Range> > finalSliceRanges;
|
||||
bool hasDynamicShapes;
|
||||
bool shapesInitialized;
|
||||
bool hasSteps;
|
||||
};
|
||||
|
||||
class CropLayerImpl CV_FINAL : public SliceLayerImpl
|
||||
{
|
||||
public:
|
||||
CropLayerImpl(const LayerParams& params) : SliceLayerImpl(LayerParams())
|
||||
{
|
||||
setParamsFrom(params);
|
||||
axis = params.get<int>("axis", 2);
|
||||
const DictValue *paramOffset = params.ptr("offset");
|
||||
|
||||
if (paramOffset)
|
||||
{
|
||||
for (int i = 0; i < paramOffset->size(); i++)
|
||||
offset.push_back(paramOffset->get<int>(i));
|
||||
}
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(inputs.size() == 2);
|
||||
|
||||
MatShape dstShape = inputs[0];
|
||||
int start = normalize_axis(axis, dstShape);
|
||||
for (int i = start; i < dstShape.size(); i++)
|
||||
{
|
||||
dstShape[i] = inputs[1][i];
|
||||
}
|
||||
outputs.resize(1, dstShape);
|
||||
return false;
|
||||
}
|
||||
|
||||
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
|
||||
{
|
||||
std::vector<Mat> inputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
CV_Assert(2 == inputs.size());
|
||||
|
||||
const Mat &inpBlob = inputs[0];
|
||||
const Mat &inpSzBlob = inputs[1];
|
||||
|
||||
int dims = inpBlob.dims;
|
||||
int start_axis = normalize_axis(axis, dims);
|
||||
|
||||
std::vector<int> offset_final(dims, 0);
|
||||
if (offset.size() == 1)
|
||||
{
|
||||
for (int i = start_axis; i < dims; i++)
|
||||
offset_final[i] = offset[0];
|
||||
}
|
||||
else if (offset.size() > 1)
|
||||
{
|
||||
if ((int)offset.size() != dims - start_axis)
|
||||
CV_Error(Error::StsBadArg, "number of offset values specified must be "
|
||||
"equal to the number of dimensions following axis.");
|
||||
|
||||
for (int i = start_axis; i < dims; i++)
|
||||
offset_final[i] = offset[i - start_axis];
|
||||
}
|
||||
|
||||
finalSliceRanges.resize(1);
|
||||
finalSliceRanges[0].resize(dims);
|
||||
for (int i = 0; i < start_axis; i++)
|
||||
{
|
||||
finalSliceRanges[0][i] = Range(0, inpBlob.size[i]);
|
||||
}
|
||||
for (int i = start_axis; i < dims; i++)
|
||||
{
|
||||
if (offset_final[i] < 0 || offset_final[i] + inpSzBlob.size[i] > inpBlob.size[i])
|
||||
CV_Error(Error::StsBadArg, "invalid crop parameters or blob sizes");
|
||||
|
||||
finalSliceRanges[0][i] = Range(offset_final[i], offset_final[i] + inpSzBlob.size[i]);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<int> offset;
|
||||
};
|
||||
|
||||
Ptr<SliceLayer> SliceLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<SliceLayer>(new SliceLayerImpl(params));
|
||||
}
|
||||
|
||||
Ptr<Layer> CropLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<Layer>(new CropLayerImpl(params));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
416
3rdparty/opencv-4.5.4/modules/dnn/src/layers/softmax_layer.cpp
vendored
Normal file
416
3rdparty/opencv-4.5.4/modules/dnn/src/layers/softmax_layer.cpp
vendored
Normal file
@ -0,0 +1,416 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
#include "../op_halide.hpp"
|
||||
#include "../op_inf_engine.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
#include "../op_vkcom.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <stdlib.h>
|
||||
using std::max;
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
#include "opencl_kernels_dnn.hpp"
|
||||
using namespace cv::dnn::ocl4dnn;
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/softmax.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
class SoftMaxLayerImpl CV_FINAL : public SoftmaxLayer
|
||||
{
|
||||
public:
|
||||
|
||||
SoftMaxLayerImpl(const LayerParams& params)
|
||||
{
|
||||
axisRaw = params.get<int>("axis", 1);
|
||||
logSoftMax = params.get<bool>("log_softmax", false);
|
||||
setParamsFrom(params);
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
Ptr<OCL4DNNSoftmax<float> > softmaxOp;
|
||||
#endif
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
bool inplace = Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
|
||||
MatShape shape = inputs[0];
|
||||
int cAxis = normalize_axis(axisRaw, shape.size());
|
||||
shape[cAxis] = 1;
|
||||
internals.assign(1, shape);
|
||||
return inplace;
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CUDA ||
|
||||
(backendId == DNN_BACKEND_HALIDE && haveHalide() && axisRaw == 1) ||
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ||
|
||||
(backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && haveInfEngine() && !logSoftMax) ||
|
||||
(backendId == DNN_BACKEND_VKCOM && haveVulkan());
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
virtual void finalize(const std::vector<Mat*> &inputs, std::vector<Mat> &outputs) CV_OVERRIDE
|
||||
{
|
||||
softmaxOp.release();
|
||||
}
|
||||
|
||||
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
std::vector<UMat> internals;
|
||||
|
||||
bool use_half = (inputs_.depth() == CV_16S);
|
||||
inputs_.getUMatVector(inputs);
|
||||
outputs_.getUMatVector(outputs);
|
||||
internals_.getUMatVector(internals);
|
||||
|
||||
UMat& src = inputs[0];
|
||||
UMat& dstMat = outputs[0];
|
||||
int axis = normalize_axis(axisRaw, src.dims);
|
||||
|
||||
if (softmaxOp.empty())
|
||||
{
|
||||
OCL4DNNSoftmaxConfig config;
|
||||
config.in_shape = shape(inputs[0]);
|
||||
config.axis = axis;
|
||||
config.channels = inputs[0].size[axis];
|
||||
config.logsoftmax = logSoftMax;
|
||||
config.use_half = use_half;
|
||||
|
||||
softmaxOp = Ptr<OCL4DNNSoftmax<float> >(new OCL4DNNSoftmax<float>(config));
|
||||
}
|
||||
|
||||
if (softmaxOp->Forward(src, dstMat))
|
||||
return true;
|
||||
|
||||
UMat& bufMat = internals[0];
|
||||
MatShape s = shape(src);
|
||||
size_t outerSize = total(s, 0, axis);
|
||||
size_t channels = src.size[axis];
|
||||
size_t innerSize = total(s, axis + 1);
|
||||
|
||||
String buildOpts = format("-DT=%s", use_half ? "half" : "float");
|
||||
ocl::Kernel kmax, ksub, ksum, kdiv;
|
||||
|
||||
if (!kmax.create("kernel_channel_max", ocl::dnn::softmax_oclsrc, buildOpts))
|
||||
return false;
|
||||
|
||||
if (!ksub.create("kernel_channel_subtract", ocl::dnn::softmax_oclsrc, buildOpts))
|
||||
return false;
|
||||
|
||||
if (!ksum.create("kernel_channel_sum", ocl::dnn::softmax_oclsrc, buildOpts))
|
||||
return false;
|
||||
|
||||
if (logSoftMax) buildOpts += " -DLOG_SOFTMAX ";
|
||||
if (!kdiv.create("kernel_channel_div", ocl::dnn::softmax_oclsrc, buildOpts))
|
||||
return false;
|
||||
|
||||
size_t bufSize = internals[0].total();
|
||||
size_t totalSize = src.total();
|
||||
|
||||
size_t internal_globalSize[1] = { bufSize };
|
||||
size_t total_globalSize[1] = { totalSize };
|
||||
|
||||
kmax.args((int)outerSize, (int)channels, (int)innerSize,
|
||||
ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrReadWrite(bufMat));
|
||||
if (!kmax.run(1, internal_globalSize, NULL, false))
|
||||
return false;
|
||||
|
||||
ksub.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
|
||||
ocl::KernelArg::PtrReadOnly(bufMat),
|
||||
ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrWriteOnly(dstMat));
|
||||
if (!ksub.run(1, total_globalSize, NULL, false))
|
||||
return false;
|
||||
|
||||
ksum.args((int)outerSize, (int)channels, (int)innerSize,
|
||||
ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
|
||||
if (!ksum.run(1, internal_globalSize, NULL, false))
|
||||
return false;
|
||||
|
||||
kdiv.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
|
||||
ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
|
||||
if (!kdiv.run(1, total_globalSize, NULL, false))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
if (inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs, internals;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
internals_arr.getMatVector(internals);
|
||||
|
||||
const Mat &src = inputs[0];
|
||||
Mat &dst = outputs[0];
|
||||
|
||||
int axis = normalize_axis(axisRaw, src.dims);
|
||||
size_t outerSize = src.total(0, axis), channels = src.size[axis],
|
||||
innerSize = src.total(axis + 1);
|
||||
|
||||
CV_Assert(src.type() == CV_32F);
|
||||
CV_Assert(src.isContinuous() && dst.isContinuous());
|
||||
|
||||
const float *srcPtr = src.ptr<float>();
|
||||
float *dstPtr = dst.ptr<float>();
|
||||
float *bufPtr = internals[0].ptr<float>();
|
||||
|
||||
size_t outerStep = src.total(axis);
|
||||
size_t cnStep = src.total(axis + 1);
|
||||
|
||||
//compute max along axis
|
||||
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
|
||||
{
|
||||
size_t srcOffset = outerDim * outerStep;
|
||||
size_t bufOffset = outerDim * cnStep;
|
||||
|
||||
memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize * sizeof(float));
|
||||
|
||||
for (size_t cnDim = 1; cnDim < channels; cnDim++)
|
||||
{
|
||||
for (size_t i = 0; i < innerSize; i++)
|
||||
bufPtr[bufOffset + i] = std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]);
|
||||
}
|
||||
}
|
||||
|
||||
//subtract max
|
||||
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
|
||||
{
|
||||
size_t srcOffset = outerDim * outerStep;
|
||||
size_t bufOffset = outerDim * cnStep;
|
||||
|
||||
for (size_t cnDim = 0; cnDim < channels; cnDim++)
|
||||
{
|
||||
const int offset = srcOffset + cnDim * cnStep;
|
||||
for (size_t i = 0; i < innerSize; i++)
|
||||
dstPtr[offset + i] = srcPtr[offset + i] - bufPtr[bufOffset + i];
|
||||
}
|
||||
}
|
||||
|
||||
cv::exp(dst, dst);
|
||||
|
||||
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
|
||||
{
|
||||
size_t srcOffset = outerDim * outerStep;
|
||||
size_t bufOffset = outerDim * cnStep;
|
||||
|
||||
//sum exp along axis
|
||||
for (size_t i = 0; i < innerSize; i++)
|
||||
bufPtr[bufOffset + i] = 0.f;
|
||||
|
||||
for (size_t cnDim = 0; cnDim < channels; cnDim++)
|
||||
{
|
||||
const int offset = srcOffset + cnDim * cnStep;
|
||||
for (size_t i = 0; i < innerSize; i++)
|
||||
bufPtr[bufOffset + i] += dstPtr[offset + i];
|
||||
}
|
||||
|
||||
//divide by computed sum
|
||||
for (size_t cnDim = 0; cnDim < channels; cnDim++)
|
||||
{
|
||||
const int offset = srcOffset + cnDim * cnStep;
|
||||
for (size_t i = 0; i < innerSize; i++)
|
||||
dstPtr[offset + i] /= bufPtr[bufOffset + i];
|
||||
}
|
||||
if (logSoftMax)
|
||||
{
|
||||
for (size_t cnDim = 0; cnDim < channels; cnDim++)
|
||||
{
|
||||
const int offset = srcOffset + cnDim * cnStep;
|
||||
for (size_t i = 0; i < innerSize; i++)
|
||||
dstPtr[offset + i] = log(dstPtr[offset + i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
|
||||
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
|
||||
auto channel_axis = normalize_axis(axisRaw, input_wrapper->getRank());
|
||||
return make_cuda_node<cuda4dnn::SoftmaxOp>(preferableTarget, std::move(context->cudnn_handle), channel_axis, logSoftMax);
|
||||
}
|
||||
#endif
|
||||
|
||||
virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_VULKAN
|
||||
vkcom::Tensor in = VkComTensor(inputs[0]);
|
||||
int cAxis = normalize_axis(axisRaw, in.dimNum());
|
||||
std::shared_ptr<vkcom::OpBase> op(new vkcom::OpSoftmax(cAxis, logSoftMax));
|
||||
return Ptr<BackendNode>(new VkComBackendNode(inputs, op));
|
||||
#endif // HAVE_VULKAN
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
|
||||
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_HALIDE
|
||||
Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
|
||||
int inW, inH, inC, inN;
|
||||
getCanonicalSize(inputBuffer, &inW, &inH, &inC, &inN);
|
||||
|
||||
if (inW != 1 || inH != 1)
|
||||
CV_Error(cv::Error::StsNotImplemented,
|
||||
"Halide backend for SoftMax with spatial size "
|
||||
"more than 1x1 is not implemented");
|
||||
|
||||
Halide::Var x("x"), y("y"), c("c"), n("n");
|
||||
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
|
||||
|
||||
Halide::Func expInput("expInput");
|
||||
Halide::RDom r(0, inW, 0, inH, 0, inC);
|
||||
expInput(x, y, c, n) = exp(inputBuffer(x, y, c, n));
|
||||
Halide::Expr globalSum = sum(expInput(r.x, r.y, r.z, n));
|
||||
top(x, y, c, n) = expInput(x, y, c, n) / globalSum;
|
||||
return Ptr<BackendNode>(new HalideBackendNode(top));
|
||||
#endif // HAVE_HALIDE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
|
||||
{
|
||||
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
|
||||
|
||||
InferenceEngine::Builder::SoftMaxLayer ieLayer(name);
|
||||
ieLayer.setAxis(normalize_axis(axisRaw, input->getDims().size()));
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
}
|
||||
#endif // HAVE_DNN_IE_NN_BUILDER_2019
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
int axis = normalize_axis(axisRaw, ieInpNode->get_shape().size());
|
||||
auto softmax = std::make_shared<ngraph::op::v1::Softmax>(ieInpNode, axis);
|
||||
if (logSoftMax)
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(std::make_shared<ngraph::op::v0::Log>(softmax)));
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(softmax));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
|
||||
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
|
||||
{
|
||||
float inpScale = scales[0][0];
|
||||
Mat lookUpTable(1, 256, CV_32F);
|
||||
float* table = lookUpTable.ptr<float>();
|
||||
for (int i = -128; i < 128; i++)
|
||||
{
|
||||
float x = inpScale*(i - 127); // ensures exp(x) is always between (0, 1)
|
||||
table[i+128] = std::exp(x);
|
||||
}
|
||||
params.blobs.clear();
|
||||
params.blobs.push_back(lookUpTable);
|
||||
return true;
|
||||
}
|
||||
|
||||
int64 getFLOPS(const std::vector<MatShape> &inputs,
|
||||
const std::vector<MatShape> &outputs) const CV_OVERRIDE
|
||||
{
|
||||
CV_UNUSED(outputs); // suppress unused variable warning
|
||||
int64 flops = 0;
|
||||
|
||||
for (int i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
flops += 4*total(inputs[i]);
|
||||
}
|
||||
|
||||
return flops;
|
||||
}
|
||||
|
||||
int axisRaw;
|
||||
};
|
||||
|
||||
Ptr<SoftmaxLayer> SoftmaxLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<SoftmaxLayer>(new SoftMaxLayerImpl(params));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
139
3rdparty/opencv-4.5.4/modules/dnn/src/layers/split_layer.cpp
vendored
Normal file
139
3rdparty/opencv-4.5.4/modules/dnn/src/layers/split_layer.cpp
vendored
Normal file
@ -0,0 +1,139 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#include "../op_cuda.hpp"
|
||||
#include "layers_common.hpp"
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "../cuda4dnn/primitives/split.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
class SplitLayerImpl CV_FINAL : public SplitLayer
|
||||
{
|
||||
public:
|
||||
SplitLayerImpl(const LayerParams ¶ms)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
//TODO: maybe "top_count" param is useless because it can be determined by output connections number
|
||||
if (params.has("top_count"))
|
||||
{
|
||||
outputsCount = params.get<int>("top_count");
|
||||
CV_Assert(outputsCount >= 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
outputsCount = -1;
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CUDA;
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(inputs.size() == 1);
|
||||
|
||||
Layer::getMemoryShapes(inputs, max(1, outputsCount >= 0 ? outputsCount : requiredOutputs),
|
||||
outputs, internals);
|
||||
return false;
|
||||
}
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
for (size_t i = 0; i < outputs.size(); i++)
|
||||
{
|
||||
CV_Assert(inputs[0].total() == outputs[i].total());
|
||||
inputs[0].copyTo(outputs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void *context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
return make_cuda_node<cuda4dnn::SplitOp>(preferableTarget, std::move(context->stream));
|
||||
}
|
||||
#endif
|
||||
|
||||
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
|
||||
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
|
||||
{
|
||||
const int numOutputs = scales[1].size();
|
||||
for (int i = 0; i < numOutputs; i++)
|
||||
{
|
||||
if (scales[1][i] != scales[0][0])
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
Ptr<SplitLayer> SplitLayer::create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<SplitLayer>(new SplitLayerImpl(params));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user