feat: 切换后端至PaddleOCR-NCNN,切换工程为CMake

1.项目后端整体迁移至PaddleOCR-NCNN算法,已通过基本的兼容性测试
2.工程改为使用CMake组织,后续为了更好地兼容第三方库,不再提供QMake工程
3.重整权利声明文件,重整代码工程,确保最小化侵权风险

Log: 切换后端至PaddleOCR-NCNN,切换工程为CMake
Change-Id: I4d5d2c5d37505a4a24b389b1a4c5d12f17bfa38c
This commit is contained in:
wangzhengyang
2022-05-10 09:54:44 +08:00
parent ecdd171c6f
commit 718c41634f
10018 changed files with 3593797 additions and 186748 deletions

View File

@ -0,0 +1,141 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2020, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
#include "../precomp.hpp"
#include "layers_common.hpp"
namespace cv { namespace dnn {
class AccumLayerImpl CV_FINAL : public AccumLayer
{
public:
AccumLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
top_height = params.get<int>("top_height", 0);
top_width = params.get<int>("top_width", 0);
divisor = params.get<int>("size_divisible_by", 0);
have_reference = params.get<String>("have_reference", "false") == "true";
}
virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
std::vector<int> outShape;
int batch = inputs[0][0];
outShape.push_back(batch);
if (have_reference)
{
CV_Assert(inputs.size() >= 2);
int totalchannels = 0;
for (int i = 0; i < inputs.size() - 1; i++) {
CV_Assert(inputs[i][0] == batch);
totalchannels += inputs[i][1];
}
outShape.push_back(totalchannels);
int height = inputs.back()[2];
int width = inputs.back()[3];
outShape.push_back(height);
outShape.push_back(width);
}
else
{
int maxwidth = -1;
int maxheight = -1;
int totalchannels = 0;
// Find largest blob size and count total channels
for (int i = 0; i < inputs.size(); ++i)
{
totalchannels += inputs[i][1];
maxheight = std::max(maxheight, inputs[i][2]);
maxwidth = std::max(maxwidth, inputs[i][3]);
CV_Assert(inputs[i][0] == batch);
}
outShape.push_back(totalchannels);
int out_h = divisor ? static_cast<int>(ceil(maxheight / divisor) * divisor) : top_height;
int out_w = divisor ? static_cast<int>(ceil(maxwidth / divisor) * divisor) : top_width;
// Layer can specify custom top size which is larger than default
if (out_h <= maxheight || out_w <= maxwidth)
{
out_h = maxheight;
out_w = maxwidth;
}
outShape.push_back(out_h);
outShape.push_back(out_w);
}
outputs.assign(1, outShape);
return false;
}
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
{
LayerParams resizeParams;
resizeParams.set("interpolation", "bilinear");
resizeParams.set("align_corners", true);
resize = ResizeLayer::create(resizeParams);
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
const int out_h = outputs[0].size[2];
const int out_w = outputs[0].size[3];
float* out_data = outputs[0].ptr<float>();
std::vector<int> sizes(&outputs[0].size[0], &outputs[0].size[0] + outputs[0].size.dims());
for (int i = 0; i < inputs.size() - have_reference; i++)
{
sizes[1] = inputs[i].size[1];
Mat outSlice(sizes, CV_32F, out_data);
if (out_h == inputs[i].size[2] && out_w == inputs[i].size[3])
{
inputs[i].copyTo(outSlice);
}
else
{
std::vector<Mat> inp_slices, out_slices;
inp_slices.push_back(inputs[i]);
out_slices.push_back(outSlice);
resize->finalize(inp_slices, out_slices);
resize->forward(inp_slices, out_slices, internals_arr);
}
out_data += outSlice.total(1);
}
}
private:
int top_height;
int top_width;
int divisor;
bool have_reference;
Ptr<ResizeLayer> resize;
};
Ptr<AccumLayer> AccumLayer::create(const LayerParams& params)
{
return Ptr<AccumLayer>(new AccumLayerImpl(params));
}
}} // namespace cv::dnn

View File

@ -0,0 +1,447 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2016, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
/*
Implementation of Batch Normalization layer.
*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_halide.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/batch_norm.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class BatchNormLayerImpl CV_FINAL : public BatchNormLayer
{
public:
Mat origin_weights, origin_bias;
Mat weights_, bias_;
UMat umat_weight, umat_bias;
mutable int dims;
BatchNormLayerImpl(const LayerParams& params)
: dims(-1)
{
setParamsFrom(params);
CV_Assert(blobs.size() >= 2);
hasWeights = params.get<bool>("has_weight", false);
hasBias = params.get<bool>("has_bias", false);
useGlobalStats = params.get<bool>("use_global_stats", true);
if(params.get<bool>("scale_bias", false))
hasWeights = hasBias = true;
epsilon = params.get<float>("eps", 1E-5);
size_t n = blobs[0].total();
CV_Assert(blobs[1].total() == n &&
blobs[0].isContinuous() && blobs[1].isContinuous() &&
blobs[0].type() == CV_32F && blobs[1].type() == CV_32F);
float varMeanScale = 1.f;
if (!hasWeights && !hasBias && blobs.size() > 2 && useGlobalStats) {
CV_Assert(blobs.size() == 3); CV_CheckTypeEQ(blobs[2].type(), CV_32FC1, "");
varMeanScale = blobs[2].at<float>(0);
if (varMeanScale != 0)
varMeanScale = 1/varMeanScale;
}
const int biasBlobIndex = blobs.size() - 1;
const int weightsBlobIndex = biasBlobIndex - hasBias;
if( hasWeights )
{
CV_Assert((size_t)weightsBlobIndex < blobs.size());
const Mat& w = blobs[weightsBlobIndex];
CV_Assert(w.isContinuous() && w.type() == CV_32F && w.total() == (size_t)n);
}
if( hasBias )
{
CV_Assert((size_t)biasBlobIndex < blobs.size());
const Mat& b = blobs[weightsBlobIndex];
CV_Assert(b.isContinuous() && b.type() == CV_32F && b.total() == (size_t)n);
}
const float* meanData = blobs[0].ptr<float>();
const float* stdData = blobs[1].ptr<float>();
const float* weightsData = hasWeights ? blobs[weightsBlobIndex].ptr<float>() : 0;
const float* biasData = hasBias ? blobs[biasBlobIndex].ptr<float>() : 0;
origin_weights.create(1, (int)n, CV_32F);
origin_bias.create(1, (int)n, CV_32F);
float* dstWeightsData = origin_weights.ptr<float>();
float* dstBiasData = origin_bias.ptr<float>();
for (size_t i = 0; i < n; ++i)
{
float w = (hasWeights ? weightsData[i] : 1.0f) / sqrt(stdData[i] * varMeanScale + epsilon);
dstWeightsData[i] = w;
dstBiasData[i] = (hasBias ? biasData[i] : 0.0f) - w * meanData[i] * varMeanScale;
}
}
virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE
{
origin_weights.reshape(1, 1).copyTo(weights_);
origin_bias.reshape(1, 1).copyTo(bias_);
}
void getScaleShift(Mat& scale, Mat& shift) const CV_OVERRIDE
{
scale = weights_;
shift = bias_;
}
virtual bool tryFuse(Ptr<Layer>& top) CV_OVERRIDE
{
Mat w, b;
top->getScaleShift(w, b);
if (w.empty() && b.empty())
return false;
const int numChannels = weights_.total();
const int numFusedWeights = w.total();
const int numFusedBias = b.total();
if ((numFusedWeights != numChannels && numFusedWeights != 1 && !w.empty()) ||
(numFusedBias != numChannels && numFusedBias != 1 && !b.empty()))
return false;
if (!w.empty())
{
w = w.reshape(1, 1);
if (numFusedWeights == 1)
{
multiply(weights_, w.at<float>(0), weights_);
multiply(bias_, w.at<float>(0), bias_);
}
else
{
multiply(weights_, w, weights_);
multiply(bias_, w, bias_);
}
}
if (!b.empty())
{
b = b.reshape(1, 1);
if (numFusedBias == 1)
add(bias_, b.at<float>(0), bias_);
else
add(bias_, b.reshape(1, 1), bias_);
}
return true;
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
dims = inputs[0].size();
if (!useGlobalStats && inputs[0][0] != 1)
CV_Error(Error::StsNotImplemented, "Batch normalization in training mode with batch size > 1");
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
return true;
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return (backendId == DNN_BACKEND_OPENCV) ||
backendId == DNN_BACKEND_CUDA ||
(backendId == DNN_BACKEND_HALIDE && haveHalide()) ||
((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine() && (preferableTarget == DNN_TARGET_CPU || dims == 4));
}
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
bool use_half = (inputs_.depth() == CV_16S);
inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs);
CV_Assert(blobs.size() >= 2);
CV_Assert(inputs.size() == 1);
if (use_half && inputs[0].dims == 2)
return false;
if (umat_weight.empty())
{
weights_.copyTo(umat_weight);
bias_.copyTo(umat_bias);
}
UMat &inpBlob = inputs[0];
int groups = inpBlob.size[0];
int channels = inpBlob.size[1];
int planeSize = 1;
for (size_t i = 2; i < inpBlob.dims; i++) {
planeSize *= inpBlob.size[i];
}
String opts = (use_half) ? " -DDtype=half" : " -DDtype=float";
for (size_t ii = 0; ii < outputs.size(); ii++)
{
if (inpBlob.dims == 2)
{
UMat& src = inputs[ii];
UMat& dst = outputs[ii];
multiply(src, weights_, dst);
add(dst, bias_, dst);
}
else
{
MatShape s = shape(groups * channels, planeSize);
UMat src = inputs[ii].reshape(1, s.size(), &s[0]);
UMat dst = outputs[ii].reshape(1, s.size(), &s[0]);
int number = (s[1] % 8 == 0) ? 8 : ((s[1] % 4 == 0) ? 4 : 1);
String buildopt = format("-DNUM=%d", number) + opts;
String kname = format("batch_norm%d", number);
if (number == 1)
buildopt += format(" -Dconvert_T=convert_%s", use_half ? "half" : "float");
else
buildopt += format(" -Dconvert_T=convert_%s%d", use_half ? "half" : "float", number);
ocl::Kernel kernel(kname.c_str(), ocl::dnn::batchnorm_oclsrc, buildopt);
if (kernel.empty())
return false;
size_t global[] = { (size_t)s[0], (size_t)(s[1] / number) };
kernel.set(0, ocl::KernelArg::PtrReadOnly(src));
kernel.set(1, (int)s[0]);
kernel.set(2, (int)s[1]);
kernel.set(3, (int)channels);
kernel.set(4, ocl::KernelArg::PtrReadOnly(umat_weight));
kernel.set(5, ocl::KernelArg::PtrReadOnly(umat_bias));
kernel.set(6, ocl::KernelArg::PtrWriteOnly(dst));
bool ret = kernel.run_(2, global, NULL, false);
if (!ret)
return false;
}
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
CV_Assert(blobs.size() >= 2);
CV_Assert(inputs.size() == 1);
Mat &inpBlob = inputs[0];
int planeSize = 1;
for (size_t i = 2; i < inpBlob.dims; i++) {
planeSize *= inpBlob.size[i];
}
for (size_t ii = 0; ii < outputs.size(); ii++)
{
Mat &outBlob = outputs[ii];
for(int num = 0; num < outBlob.size[0]; num++)
{
for (int n = 0; n < outBlob.size[1]; n++)
{
float w = weights_.at<float>(n);
float b = bias_.at<float>(n);
Mat inpBlobPlane(1, planeSize, CV_32F, inpBlob.ptr<float>(num, n));
Mat outBlobPlane(1, planeSize, CV_32F, outBlob.ptr<float>(num, n));
inpBlobPlane.convertTo(outBlobPlane, CV_32F, w, b);
}
}
}
}
void forwardSlice(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const CV_OVERRIDE
{
for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
{
int i = 0;
float w = weights_.at<float>(cn);
float b = bias_.at<float>(cn);
#if CV_SIMD128
v_float32x4 wV = v_setall_f32(w), bV = v_setall_f32(b);
for( ; i <= len - 16; i += 16 )
{
v_float32x4 x0 = v_load(srcptr + i);
v_float32x4 x1 = v_load(srcptr + i + 4);
v_float32x4 x2 = v_load(srcptr + i + 8);
v_float32x4 x3 = v_load(srcptr + i + 12);
x0 = v_muladd(x0, wV, bV);
x1 = v_muladd(x1, wV, bV);
x2 = v_muladd(x2, wV, bV);
x3 = v_muladd(x3, wV, bV);
v_store(dstptr + i, x0);
v_store(dstptr + i + 4, x1);
v_store(dstptr + i + 8, x2);
v_store(dstptr + i + 12, x3);
}
#endif
for( ; i < len; i++ )
dstptr[i] = w * srcptr[i] + b;
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
return make_cuda_node<cuda4dnn::BatchNormOp>(preferableTarget, std::move(context->stream), weights_, bias_);
}
#endif
virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node) CV_OVERRIDE
{
switch (node->backendId)
{
case DNN_BACKEND_HALIDE:
{
#ifdef HAVE_HALIDE
auto base = node.dynamicCast<HalideBackendNode>();
Halide::Func& input = base->funcs.back();
Halide::Var x("x"), y("y"), c("c"), n("n");
Halide::Func top = attachHalide(input(x, y, c, n));
return Ptr<BackendNode>(new HalideBackendNode(base, top));
#endif // HAVE_HALIDE
break;
}
}
return Ptr<BackendNode>();
}
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
#ifdef HAVE_HALIDE
Halide::Buffer<float> input = halideBuffer(inputs[0]);
Halide::Var x("x"), y("y"), c("c"), n("n");
Halide::Func top = attachHalide(input(x, y, c, n));
return Ptr<BackendNode>(new HalideBackendNode(top));
#endif // HAVE_HALIDE
return Ptr<BackendNode>();
}
#ifdef HAVE_HALIDE
// attachHalide can work both with Halide::Buffer and Halide::Func. In the
// second case it will be a fusion.
Halide::Func attachHalide(const Halide::Expr& input)
{
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
Halide::Var x("x"), y("y"), c("c"), n("n");
const int numChannels = weights_.total();
auto weights = wrapToHalideBuffer(weights_, {numChannels});
auto bias = wrapToHalideBuffer(bias_, {numChannels});
top(x, y, c, n) = input * weights(c) + bias(c);
return top;
}
#endif // HAVE_HALIDE
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
InferenceEngine::Builder::Layer ieLayer = InferenceEngine::Builder::ScaleShiftLayer(name);
const size_t numChannels = weights_.total();
addConstantData("weights", wrapToInfEngineBlob(weights_, {numChannels}, InferenceEngine::Layout::C), ieLayer);
addConstantData("biases", wrapToInfEngineBlob(bias_, {numChannels}, InferenceEngine::Layout::C), ieLayer);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
std::vector<size_t> shape(ieInpNode->get_shape().size(), 1);
shape[1] = weights_.total();
auto weight = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), weights_.data);
auto bias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), bias_.data);
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2021_2)
auto scale_node = std::make_shared<ngraph::op::v1::Multiply>(ieInpNode, weight, ngraph::op::AutoBroadcastType::NUMPY);
#else
auto scale_node = std::make_shared<ngraph::op::v0::Multiply>(ieInpNode, weight, ngraph::op::AutoBroadcastType::NUMPY);
#endif
auto scale_shift = std::make_shared<ngraph::op::v1::Add>(scale_node, bias, ngraph::op::AutoBroadcastType::NUMPY);
return Ptr<BackendNode>(new InfEngineNgraphNode(scale_shift));
}
#endif // HAVE_DNN_NGRAPH
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
{
params.set("input_scale", scales[0][0]);
params.set("input_zeropoint", zeropoints[0][0]);
params.blobs.clear();
params.blobs.push_back(origin_weights);
params.blobs.push_back(origin_bias);
return true;
}
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
CV_UNUSED(outputs); // suppress unused variable warning
int64 flops = 0;
for(int i = 0; i < inputs.size(); i++)
{
flops += 3*total(inputs[i]);
}
return flops;
}
private:
bool useGlobalStats;
};
Ptr<BatchNormLayer> BatchNormLayer::create(const LayerParams& params)
{
return Ptr<BatchNormLayer>(new BatchNormLayerImpl(params));
}
} // namespace dnn
} // namespace cv

View File

@ -0,0 +1,198 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "../op_cuda.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/reshape.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class BlankLayerImpl CV_FINAL : public BlankLayer
{
public:
BlankLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine());
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
return true;
}
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs);
for (int i = 0, n = outputs.size(); i < n; ++i)
{
void *src_handle = inputs[i].handle(ACCESS_READ);
void *dst_handle = outputs[i].handle(ACCESS_WRITE);
if (src_handle != dst_handle)
inputs[i].copyTo(outputs[i]);
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
for (int i = 0, n = outputs.size(); i < n; ++i)
if (outputs[i].data != inputs[i].data)
inputs[i].copyTo(outputs[i]);
}
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
std::vector<size_t> dims = input->getDims();
CV_Assert(!dims.empty());
InferenceEngine::Builder::Layer ieLayer(name);
ieLayer.setName(name);
if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL)
{
ieLayer.setType("Copy");
}
else
{
ieLayer.setType("Split");
ieLayer.getParameters()["axis"] = dims.size() - 1;
ieLayer.getParameters()["out_sizes"] = dims[0];
}
ieLayer.setInputPorts({InferenceEngine::Port(dims)});
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
ngraph::OutputVector inp{ieInpNode};
auto blank = std::make_shared<ngraph::op::Concat>(inp, 0);
return Ptr<BackendNode>(new InfEngineNgraphNode(blank));
}
#endif // HAVE_DNN_NGRAPH
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
return make_cuda_node<cuda4dnn::ReshapeOp>(preferableTarget, std::move(context->stream));
}
#endif
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
{
return true;
}
};
Ptr<Layer> BlankLayer::create(const LayerParams& params)
{
// In case of Caffe's Dropout layer from Faster-RCNN framework,
// https://github.com/rbgirshick/caffe-fast-rcnn/tree/faster-rcnn
// return Power layer.
if (!params.get<bool>("scale_train", true))
{
float scale = 1 - params.get<float>("dropout_ratio", 0.5f);
CV_Assert(scale > 0);
LayerParams powerParams;
powerParams.name = params.name;
powerParams.type = "Power";
powerParams.set("scale", scale);
return PowerLayer::create(powerParams);
}
else
return Ptr<BlankLayer>(new BlankLayerImpl(params));
}
}
}

View File

@ -0,0 +1,419 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_halide.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include "../op_vkcom.hpp"
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/concat.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class ConcatLayerImpl CV_FINAL : public ConcatLayer
{
public:
ConcatLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
axis = params.get<int>("axis", 1);
padding = params.get<bool>("padding", false);
paddingValue = params.get<int>("padding_value", 0);
}
virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() > 0);
outputs.resize(1, inputs[0]);
int cAxis = normalize_axis(axis, inputs[0]);
int axisSum = 0;
for (size_t i = 0; i < inputs.size(); i++)
{
MatShape curShape = inputs[i];
if (padding)
{
for (int curAxis = 0; curAxis < outputs[0].size(); curAxis++)
{
outputs[0][curAxis] = std::max(outputs[0][curAxis], curShape[curAxis]);
}
}
else
{
CV_Assert(curShape.size() == outputs[0].size());
for (int curAxis = 0; curAxis < outputs[0].size(); curAxis++)
{
if (curAxis != cAxis && outputs[0][curAxis] != curShape[curAxis])
CV_Error(Error::StsBadSize, "Inconsistent shape for ConcatLayer");
}
}
axisSum += curShape[cAxis];
}
outputs[0][cAxis] = axisSum;
return false;
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
(backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 && !padding) || // By channels
(backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && haveInfEngine() && !padding) ||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ||
(backendId == DNN_BACKEND_VKCOM && haveVulkan() && !padding);
}
template <class T>
class ChannelConcatInvoker : public ParallelLoopBody
{
public:
std::vector<Mat>* inputs;
Mat* output;
int nstripes;
std::vector<const T*> chptrs;
static void run(std::vector<Mat>& inputs, Mat& output, int nstripes)
{
ChannelConcatInvoker cc;
cc.inputs = &inputs;
cc.output = &output;
cc.nstripes = nstripes;
size_t i, ninputs = inputs.size();
int nchannels = 0, batchsz = output.size[0];
for( i = 0; i < ninputs; i++ )
{
Mat& inp = inputs[i];
CV_Assert( inp.isContinuous() && (inp.type() == CV_32F || inp.type() == CV_16S || inp.type() == CV_8S) &&
inp.dims == 4 && inp.size[0] == output.size[0] &&
inp.size[2] == output.size[2] &&
inp.size[3] == output.size[3] );
nchannels += inp.size[1];
}
CV_Assert( nchannels == output.size[1] );
CV_Assert( output.isContinuous() && (output.type() == CV_32F || output.type() == CV_16S || output.type() == CV_8S) );
cc.chptrs.resize(nchannels*batchsz);
int ofs = 0;
for( i = 0; i < ninputs; i++)
{
Mat& inp = inputs[i];
for( int j = 0; j < batchsz; j++ )
for( int k = 0; k < inp.size[1]; k++ )
{
const T* ptr = inp.ptr<T>(j, k);
cc.chptrs[ofs + j*nchannels + k] = ptr;
}
ofs += inp.size[1];
}
parallel_for_(Range(0, nstripes), cc, nstripes);
}
ChannelConcatInvoker() : inputs(0), output(0), nstripes(0) {}
void operator()(const Range& r) const CV_OVERRIDE
{
size_t planeSize = (size_t)output->size[2]*output->size[3];
size_t nch = chptrs.size();
size_t total = nch*planeSize;
size_t stripeSize = (total + nstripes - 1)/nstripes;
size_t stripeStart = r.start*stripeSize;
size_t stripeEnd = std::min(total, r.end*stripeSize);
const T** ptrs = (const T**)&chptrs[0];
T* outptr = output->ptr<T>();
size_t blockSize0 = 1 << 16;
for( size_t ofs0 = stripeStart; ofs0 < stripeEnd; )
{
size_t ch = ofs0/planeSize;
size_t ofs = ofs0 - ch*planeSize;
size_t blockSize = std::min(blockSize0, planeSize - ofs);
memcpy(outptr + ofs0, ptrs[ch] + ofs, blockSize*sizeof(outptr[0]));
ofs0 += blockSize;
}
}
};
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
bool use_half = (inps.depth() == CV_16S);
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
int cAxis = normalize_axis(axis, inputs[0].dims);
if (padding)
return false;
int bottom_concat_axis;
int concat_size = total(shape(inputs[0]), cAxis + 1);
int top_concat_axis = outputs[0].size[cAxis];
int num_concats = total(shape(inputs[0]), 0, cAxis);
int offset_concat_axis = 0;
UMat& outMat = outputs[0];
String buildopt = format(" -DDtype=%s", (use_half) ? "half" : "float");
String kname = format("concat_%s", use_half ? "half" : "float");
for (size_t i = 0; i < inputs.size(); i++)
{
ocl::Kernel kernel(kname.c_str(), ocl::dnn::concat_oclsrc, buildopt);
if (kernel.empty())
return false;
UMat& inpMat = inputs[i];
bottom_concat_axis = inputs[i].size[cAxis];
size_t nthreads = inputs[i].total();
kernel.set(0, (int)nthreads);
kernel.set(1, ocl::KernelArg::PtrReadOnly(inpMat));
kernel.set(2, (int)num_concats);
kernel.set(3, (int)concat_size);
kernel.set(4, (int)top_concat_axis);
kernel.set(5, (int)bottom_concat_axis);
kernel.set(6, (int)offset_concat_axis);
kernel.set(7, ocl::KernelArg::PtrWriteOnly(outMat));
if (!kernel.run(1, &nthreads, NULL, false))
return false;
offset_concat_axis += bottom_concat_axis;
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
inputs_arr.depth() != CV_8S,
forward_ocl(inputs_arr, outputs_arr, internals_arr))
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
int cAxis = normalize_axis(axis, inputs[0].dims);
Mat& outMat = outputs[0];
if (padding)
outMat.setTo(paddingValue);
if( cAxis == 1 && outMat.dims == 4 && !padding)
{
int nstripes = getNumThreads();
if (outMat.type() == CV_8S)
ChannelConcatInvoker<int8_t>::run(inputs, outMat, nstripes);
else
ChannelConcatInvoker<float>::run(inputs, outMat, nstripes);
}
else
{
std::vector<Range> ranges(outputs[0].dims, Range::all());
ranges[cAxis].start = 0;
for (size_t i = 0; i < inputs.size(); i++)
{
ranges[cAxis].end = ranges[cAxis].start + inputs[i].size[cAxis];
for (int j = 0; j < outMat.dims; ++j)
{
if (j == cAxis) continue;
ranges[j].start = (outMat.size[j] - inputs[i].size[j]) / 2;
ranges[j].end = ranges[j].start + inputs[i].size[j];
}
inputs[i].copyTo(outMat(&ranges[0]));
ranges[cAxis].start = ranges[cAxis].end;
}
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
auto concat_axis = normalize_axis(axis, input_wrapper->getRank());
return make_cuda_node<cuda4dnn::ConcatOp>(preferableTarget, std::move(context->stream), concat_axis, padding);
}
#endif
virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
{
#ifdef HAVE_VULKAN
vkcom::Tensor in = VkComTensor(input[0]);
int cAxis = normalize_axis(axis, in.dimNum());
std::shared_ptr<vkcom::OpBase> op(new vkcom::OpConcat(cAxis));
return Ptr<BackendNode>(new VkComBackendNode(input, op));
#endif // HAVE_VULKAN
return Ptr<BackendNode>();
}
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
{
#ifdef HAVE_HALIDE
std::vector<Halide::Buffer<> > inputBuffers = halideBuffers(input);
Halide::Var x("x"), y("y"), c("c"), n("n");
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
int offset = inputBuffers[0].channels();
Halide::Expr topExpr = select(c < offset,
inputBuffers[0](x, y, c, n),
inputBuffers[1](x, y, c - offset, n));
for (int i = 2; i < input.size(); ++i)
{
offset += inputBuffers[i - 1].channels();
topExpr = select(c < offset, topExpr,
inputBuffers[i](x, y, c - offset, n));
}
top(x, y, c, n) = topExpr;
return Ptr<BackendNode>(new HalideBackendNode(top));
#endif // HAVE_HALIDE
return Ptr<BackendNode>();
}
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
InferenceEngine::Builder::ConcatLayer ieLayer(name);
ieLayer.setAxis(normalize_axis(axis, input->getDims().size()));
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(inputs.size()));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
InferenceEngine::DataPtr data = ngraphDataNode(inputs[0]);
const int numDims = data->getDims().size();
const int cAxis = normalize_axis(axis, numDims);
std::vector<size_t> maxDims(numDims, 0);
CV_Assert(inputs.size() == nodes.size());
ngraph::OutputVector inp_nodes;
for (int i = 0; i < nodes.size(); ++i)
{
inp_nodes.push_back(nodes[i].dynamicCast<InfEngineNgraphNode>()->node);
std::vector<size_t> inpShape = ngraphDataNode(inputs[i])->getDims();
for (int i = 0; i < numDims; ++i)
maxDims[i] = std::max(maxDims[i], inpShape[i]);
}
for (int i = 0; i < inp_nodes.size(); ++i)
{
bool needPadding = false;
std::vector<size_t> inpShape = ngraphDataNode(inputs[i])->getDims();
std::vector<int64_t> begins(inpShape.size(), 0), ends(inpShape.size(), 0);
for (int j = 0; j < inpShape.size(); ++j)
{
if (j != cAxis && inpShape[j] != maxDims[j])
{
needPadding = true;
begins[j] = static_cast<int64_t>((maxDims[j] - inpShape[j]) / 2);
ends[j] = static_cast<int64_t>(maxDims[j] - inpShape[j] - begins[j]);
}
}
if (needPadding)
{
inp_nodes[i] = std::make_shared<ngraph::op::v1::Pad>(
inp_nodes[i],
std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{begins.size()}, begins.data()),
std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{ends.size()}, ends.data()),
ngraph::op::PadMode::CONSTANT);
}
}
auto concat = std::make_shared<ngraph::op::Concat>(inp_nodes, cAxis);
return Ptr<BackendNode>(new InfEngineNgraphNode(concat));
}
#endif // HAVE_DNN_NGRAPH
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
{
if (padding)
params.set("padding_value", zeropoints[1][0]);
return true;
}
};
Ptr<ConcatLayer> ConcatLayer::create(const LayerParams& params)
{
return Ptr<ConcatLayer>(new ConcatLayerImpl(params));
}
}
}

View File

@ -0,0 +1,131 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2018, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
#include "../precomp.hpp"
#include "../op_inf_engine.hpp"
#include "../op_cuda.hpp"
#include "layers_common.hpp"
#include "../ie_ngraph.hpp"
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/const.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv { namespace dnn {
class ConstLayerImpl CV_FINAL : public ConstLayer
{
public:
ConstLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
CV_Assert(blobs.size() == 1);
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ||
backendId == DNN_BACKEND_CUDA;
}
virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.empty());
outputs.assign(1, shape(blobs[0]));
return false;
}
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> outputs;
outs.getUMatVector(outputs);
if (outs.depth() == CV_16S)
convertFp16(blobs[0], outputs[0]);
else
blobs[0].copyTo(outputs[0]);
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
std::vector<Mat> outputs;
outputs_arr.getMatVector(outputs);
blobs[0].copyTo(outputs[0]);
}
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
InferenceEngine::Builder::ConstLayer ieLayer(name);
ieLayer.setData(wrapToInfEngineBlob(blobs[0]));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
getShape<size_t>(blobs[0]),
blobs[0].data);
return Ptr<BackendNode>(new InfEngineNgraphNode(node));
}
#endif // HAVE_DNN_NGRAPH
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
CV_Assert(blobs.size() == 1);
return make_cuda_node<cuda4dnn::ConstOp>(preferableTarget, std::move(context->stream), blobs[0]);
}
#endif
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
{
Mat quantizedBlob;
blobs[0].convertTo(quantizedBlob, CV_8S, 1.f/scales[1][0], zeropoints[1][0]);
params.blobs.clear();
params.blobs.push_back(quantizedBlob);
return true;
}
};
Ptr<Layer> ConstLayer::create(const LayerParams& params)
{
return Ptr<Layer>(new ConstLayerImpl(params));
}
}} // namespace cv::dnn

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,207 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2020, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
#include "../precomp.hpp"
#include "layers_common.hpp"
namespace cv { namespace dnn {
class CorrelationLayerImpl CV_FINAL : public CorrelationLayer
{
public:
CorrelationLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
pad = params.get<int>("pad", 0);
CV_Assert_N(params.has("kernel_size"), params.has("max_displacement"));
max_displacement = params.get<int>("max_displacement");
kernel = params.get<int>("kernel_size");
if (kernel % 2 == 0)
CV_Error(Error::StsNotImplemented, "Odd kernel size required.");
stride_1 = params.get<int>("stride_1", 1);
stride_2 = params.get<int>("stride_2", 1);
}
virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert_N(inputs.size() == 2, inputs[0].size() == 4, inputs[1].size() == 4);
int padded_height = inputs[0][2] + 2 * pad;
int padded_width = inputs[0][3] + 2 * pad;
int kernel_radius = (kernel - 1) / 2;
int border_size = max_displacement + kernel_radius;
int neighborhood_grid_radius = max_displacement / stride_2;
int neighborhood_grid_width = neighborhood_grid_radius * 2 + 1;
std::vector<int> outShape;
int num = inputs[0][0];
outShape.push_back(num);
int out_c = neighborhood_grid_width * neighborhood_grid_width;
outShape.push_back(out_c);
int out_h = ceil(static_cast<float>(padded_height - border_size * 2) / stride_1);
int out_w = ceil(static_cast<float>(padded_width - border_size * 2) / stride_1);
CV_Assert_N(out_h >= 1, out_w >= 1);
outShape.push_back(out_h);
outShape.push_back(out_w);
outputs.assign(1, outShape);
return false;
}
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> inputs;
inputs_arr.getMatVector(inputs);
int padded_height = inputs[0].size[2] + 2 * pad;
int padded_width = inputs[0].size[3] + 2 * pad;
int size[] = {inputs[0].size[0], padded_height, padded_width, inputs[0].size[1]};
rbot0 = Mat(4, &size[0], CV_32F, float(0));
rbot1 = Mat(4, &size[0], CV_32F, float(0));
}
void blobRearrangeKernel2(const Mat& input, Mat& output)
{
const int num = input.size[0];
const int channels = input.size[1];
const int height = input.size[2];
const int width = input.size[3];
const int area = height * width;
const int pad_area = (width + 2 * pad) * (height + 2 * pad);
const float* in = input.ptr<float>();
float* out = output.ptr<float>();
for (int n = 0; n < num; n++)
{
for (int ch = 0; ch < channels; ch++)
{
for (int xy = 0; xy < area; xy++)
{
float value = in[(n * channels + ch) * area + xy];
int xpad = (xy % width + pad);
int ypad = (xy / width + pad);
int xypad = ypad * (width + 2 * pad) + xpad;
out[(n * pad_area + xypad) * channels + ch] = value;
}
}
}
}
void correlationKernelSubtraction(const Mat& input0, const Mat& input1, Mat& output, int item)
{
const int inp_h = input0.size[1];
const int inp_w = input0.size[2];
const int inp_c = input0.size[3];
const int out_c = output.size[1];
const int out_h = output.size[2];
const int out_w = output.size[3];
int topcount = output.total(1);
int neighborhood_grid_radius = max_displacement / stride_2;
int neighborhood_grid_width = neighborhood_grid_radius * 2 + 1;
const float* inp0_data = input0.ptr<float>();
const float* inp1_data = input1.ptr<float>();
float* out_data = output.ptr<float>();
int sumelems = kernel * kernel * inp_c;
std::vector<float> patch_data(sumelems, 0);
for (int y = 0; y < out_h; y++)
{
for (int x = 0; x < out_w; x++)
{
int x1 = x * stride_1 + max_displacement;
int y1 = y * stride_1 + max_displacement;
for (int j = 0; j < kernel; j++)
{
for (int i = 0; i < kernel; i++)
{
int ji_off = ((j * kernel) + i) * inp_c;
for (int ch = 0; ch < inp_c; ch++)
{
int idx1 = ((item * inp_h + y1 + j) * inp_w + x1 + i) * inp_c + ch;
int idxPatchData = ji_off + ch;
patch_data[idxPatchData] = inp0_data[idx1];
}
}
}
for (int out_ch = 0; out_ch < out_c; out_ch++)
{
float sum = 0;
int s2o = (out_ch % neighborhood_grid_width - neighborhood_grid_radius) * stride_2;
int s2p = (out_ch / neighborhood_grid_width - neighborhood_grid_radius) * stride_2;
int x2 = x1 + s2o;
int y2 = y1 + s2p;
for (int j = 0; j < kernel; j++)
{
for (int i = 0; i < kernel; i++)
{
int ji_off = ((j * kernel) + i) * inp_c;
for (int ch = 0; ch < inp_c; ch++)
{
int idxPatchData = ji_off + ch;
int idx2 = ((item * inp_h + y2 + j) * inp_w + x2 + i) * inp_c + ch;
sum += patch_data[idxPatchData] * inp1_data[idx2];
}
}
}
int index = ((out_ch * out_h + y) * out_w) + x;
out_data[index + item * topcount] = static_cast<float>(sum) / sumelems;
}
}
}
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
std::vector<Mat> inputs, outputs, internals;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
internals_arr.getMatVector(internals);
blobRearrangeKernel2(inputs[0], rbot0);
blobRearrangeKernel2(inputs[1], rbot1);
for (int i = 0; i < inputs[0].size[0]; i++)
{
correlationKernelSubtraction(rbot0, rbot1, outputs[0], i);
}
}
private:
int pad;
int kernel;
int max_displacement;
int stride_1;
int stride_2;
Mat rbot0;
Mat rbot1;
};
Ptr<CorrelationLayer> CorrelationLayer::create(const LayerParams& params)
{
return Ptr<CorrelationLayer>(new CorrelationLayerImpl(params));
}
}} // namespace cv::dnn

View File

@ -0,0 +1,186 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2018, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
#include "../precomp.hpp"
#include "../ie_ngraph.hpp"
#include "layers_common.hpp"
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/crop_and_resize.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv { namespace dnn {
class CropAndResizeLayerImpl CV_FINAL : public CropAndResizeLayer
{
public:
CropAndResizeLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
CV_Assert_N(params.has("width"), params.has("height"));
outWidth = params.get<float>("width");
outHeight = params.get<float>("height");
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV
|| backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH
|| backendId == DNN_BACKEND_CUDA
;
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert_N(inputs.size() == 2, inputs[0].size() == 4);
if (inputs[0][0] != 1)
CV_Error(Error::StsNotImplemented, "");
outputs.resize(1, MatShape(4));
outputs[0][0] = inputs[1][2]; // Number of bounding boxes.
outputs[0][1] = inputs[0][1]; // Number of channels.
outputs[0][2] = outHeight;
outputs[0][3] = outWidth;
return false;
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
Mat& inp = inputs[0];
Mat& out = outputs[0];
Mat boxes = inputs[1].reshape(1, inputs[1].total() / 7);
const int numChannels = inp.size[1];
const int inpHeight = inp.size[2];
const int inpWidth = inp.size[3];
const int inpSpatialSize = inpHeight * inpWidth;
const int outSpatialSize = outHeight * outWidth;
CV_Assert_N(inp.isContinuous(), out.isContinuous());
for (int b = 0; b < boxes.rows; ++b)
{
float* outDataBox = out.ptr<float>(b);
float left = boxes.at<float>(b, 3);
float top = boxes.at<float>(b, 4);
float right = boxes.at<float>(b, 5);
float bottom = boxes.at<float>(b, 6);
float boxWidth = right - left;
float boxHeight = bottom - top;
float heightScale = boxHeight * static_cast<float>(inpHeight - 1) / (outHeight - 1);
float widthScale = boxWidth * static_cast<float>(inpWidth - 1) / (outWidth - 1);
for (int y = 0; y < outHeight; ++y)
{
float input_y = top * (inpHeight - 1) + y * heightScale;
int y0 = static_cast<int>(input_y);
const float* inpData_row0 = inp.ptr<float>(0, 0, y0);
const float* inpData_row1 = (y0 + 1 < inpHeight) ? (inpData_row0 + inpWidth) : inpData_row0;
for (int x = 0; x < outWidth; ++x)
{
float input_x = left * (inpWidth - 1) + x * widthScale;
int x0 = static_cast<int>(input_x);
int x1 = std::min(x0 + 1, inpWidth - 1);
float* outData = outDataBox + y * outWidth + x;
const float* inpData_row0_c = inpData_row0;
const float* inpData_row1_c = inpData_row1;
for (int c = 0; c < numChannels; ++c)
{
*outData = inpData_row0_c[x0] +
(input_y - y0) * (inpData_row1_c[x0] - inpData_row0_c[x0]) +
(input_x - x0) * (inpData_row0_c[x1] - inpData_row0_c[x0] +
(input_y - y0) * (inpData_row1_c[x1] - inpData_row0_c[x1] - inpData_row1_c[x0] + inpData_row0_c[x0]));
inpData_row0_c += inpSpatialSize;
inpData_row1_c += inpSpatialSize;
outData += outSpatialSize;
}
}
}
}
if (boxes.rows < out.size[0])
{
// left = top = right = bottom = 0
std::vector<cv::Range> dstRanges(4, Range::all());
dstRanges[0] = Range(boxes.rows, out.size[0]);
out(dstRanges).setTo(inp.ptr<float>(0, 0, 0)[0]);
}
}
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
// Slice second input: from 1x1xNx7 to 1x1xNx5
auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
auto rois = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
auto rois_shape = rois->get_shape();
std::vector<int64_t> dims(rois_shape.begin(), rois_shape.end()), offsets(4, 0);
offsets[3] = 2;
dims[3] = 7;
auto lower_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
ngraph::Shape{offsets.size()}, offsets.data());
auto upper_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
ngraph::Shape{dims.size()}, dims.data());
auto strides = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
ngraph::Shape{dims.size()}, std::vector<int64_t>((int64_t)dims.size(), 1));
auto slice = std::make_shared<ngraph::op::v1::StridedSlice>(rois,
lower_bounds, upper_bounds, strides, std::vector<int64_t>{}, std::vector<int64_t>{});
// Reshape rois from 4D to 2D
std::vector<int64_t> shapeData = {dims[2], 5};
auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, shapeData.data());
auto reshape = std::make_shared<ngraph::op::v1::Reshape>(slice, shape, true);
auto roiPooling =
std::make_shared<ngraph::op::v0::ROIPooling>(input, reshape,
ngraph::Shape{(size_t)outHeight, (size_t)outWidth},
1.0f, "bilinear");
return Ptr<BackendNode>(new InfEngineNgraphNode(roiPooling));
}
#endif // HAVE_DNN_NGRAPH
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
return make_cuda_node<cuda4dnn::CropAndResizeOp>(preferableTarget, std::move(context->stream));
}
#endif
private:
int outWidth, outHeight;
};
Ptr<Layer> CropAndResizeLayer::create(const LayerParams& params)
{
return Ptr<CropAndResizeLayer>(new CropAndResizeLayerImpl(params));
}
} // namespace dnn
} // namespace cv

View File

@ -0,0 +1,131 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "../precomp.hpp"
#include "layers_common.hpp"
#include <opencv2/dnn/shape_utils.hpp>
namespace cv
{
namespace dnn
{
class CumSumLayerImpl CV_FINAL : public CumSumLayer
{
public:
CumSumLayerImpl(const LayerParams &params)
{
axis_raw = params.get<int>("axis", 0);
exclusive_raw = params.get<int>("exclusive", 0);
reverse_raw = params.get<int>("reverse", 0);
setParamsFrom(params);
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
return true;
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs, internals;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
// Get x tensor.
const auto &src_mat = inputs[0];
const auto *src_ptr = src_mat.ptr<float>();
// Get axis.
const int axis = normalize_axis(axis_raw, src_mat.dims);
// Get y tensor.
auto &dst_mat = outputs[0];
src_mat.copyTo(dst_mat);
auto *dst_ptr = dst_mat.ptr<float>();
// Get flags.
const auto exclusive = exclusive_raw == 1;
const auto reverse = reverse_raw == 1;
// Get parameters to iterate outer dimension.
const size_t outer_size = src_mat.total(0, axis);
const size_t outer_step_length = src_mat.total(axis);
// Get parameters to iterate inner dimension.
const size_t inner_size = src_mat.size[axis];
if (!inner_size)
return;
const size_t inner_step_length = src_mat.total(axis + 1);
const int inner_step = (reverse ? -1 : 1) * inner_step_length;
const int inner_start = reverse ? inner_size - 1 : 0;
const int inner_stop = reverse ? -1 : inner_size;
const int inner_delta = reverse ? -1 : 1;
// Get parameters to populate channels.
const size_t num_channels = src_mat.total(axis + 1);
for (size_t outer_dim = 0; outer_dim < outer_size; outer_dim++)
{
const size_t outer_offset = outer_dim * outer_step_length;
size_t src_offset = outer_offset + inner_start * inner_step_length;
// Populate first element of inner dimension.
for (size_t channel = 0; channel < num_channels; channel++)
{
if (exclusive)
{
dst_ptr[src_offset + channel] = 0.0f;
}
else
{
dst_ptr[src_offset + channel] = src_ptr[src_offset + channel];
src_offset += inner_step;
}
}
// Populate remaining elements of inner dimension.
for (int inner_dim = inner_start + inner_delta; inner_dim != inner_stop; inner_dim += inner_delta)
{
const size_t dst_offset = outer_offset + inner_dim * inner_step_length;
for (size_t channel = 0; channel < num_channels; channel++)
{
const size_t previous_dst_offset = dst_offset - inner_step;
dst_ptr[dst_offset + channel] = dst_ptr[previous_dst_offset + channel] +
src_ptr[src_offset + channel];
src_offset += inner_step;
}
}
}
}
int axis_raw;
int exclusive_raw;
int reverse_raw;
};
Ptr<CumSumLayer> CumSumLayer::create(const LayerParams& params)
{
return Ptr<CumSumLayer>(new CumSumLayerImpl(params));
}
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,965 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_halide.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/eltwise.hpp"
#include "../cuda4dnn/primitives/shortcut.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class EltwiseLayerImpl CV_FINAL : public EltwiseLayer
{
public:
enum EltwiseOp
{
PROD = 0,
SUM = 1,
MAX = 2,
DIV = 3,
MIN = 4,
} op;
std::vector<float> coeffs;
enum OutputChannelsMode
{
ELTWISE_CHANNNELS_SAME = 0, //!< number of channels from inputs must be the same and equal to output's number of channels
ELTWISE_CHANNNELS_INPUT_0, //!< number of channels from inputs may be different,
//!< output's number of channels is equal to number of channels of first input
//!< number of channels of other inputs should not be greater than number of channels of first input
ELTWISE_CHANNNELS_INPUT_0_TRUNCATE, //!< number of channels from inputs may be different,
//!< output's number of channels is equal to number of channels of first input
//!< there is restriction on number of channels of other inputs
//!< extra channels of other inputs is ignored
ELTWISE_CHANNNELS_USE_MAX, //!< number of channels from inputs may be different,
//!< output's number of channels is equal to maximal number of input channels
//!< @note supported operation: `SUM`
} channelsModeInput;
mutable OutputChannelsMode channelsMode; //!< "optimized" channels mode (switch to ELTWISE_CHANNNELS_SAME if number of input channels are equal)
mutable /*size_t*/int outputChannels;
EltwiseLayerImpl(const LayerParams& params)
: outputChannels(0)
{
setParamsFrom(params);
hasVecInput = false;
op = SUM;
if (params.has("operation"))
{
String operation = toLowerCase(params.get<String>("operation"));
if (operation == "prod")
op = PROD;
else if (operation == "sum")
op = SUM;
else if (operation == "max")
op = MAX;
else if (operation == "min")
op = MIN;
else if (operation == "div")
op = DIV;
else
CV_Error(cv::Error::StsBadArg, "Unknown operation type \"" + operation + "\"");
}
if (params.has("coeff"))
{
DictValue paramCoeff = params.get("coeff");
int i, n = paramCoeff.size();
coeffs.resize(n);
for (i = 0; i < n; i++)
{
coeffs[i] = paramCoeff.get<float>(i);
}
}
channelsModeInput = ELTWISE_CHANNNELS_SAME;
if (params.has("output_channels_mode"))
{
String v = toLowerCase(params.get<String>("output_channels_mode"));
if (v == "same")
{
channelsModeInput = ELTWISE_CHANNNELS_SAME;
}
else if (v == "input_0")
{
channelsModeInput = ELTWISE_CHANNNELS_INPUT_0;
}
else if (v == "input_0_truncate")
{
channelsModeInput = ELTWISE_CHANNNELS_INPUT_0_TRUNCATE;
}
else if (v == "max_input_channels")
{
channelsModeInput = ELTWISE_CHANNNELS_USE_MAX;
if (op != SUM)
CV_Error(cv::Error::StsBadArg, "[" + type + "]:(" + name + ") 'max' channels mode is limited to SUM operation only");
}
else
CV_Error(cv::Error::StsBadArg, "[" + type + "]:(" + name + ") unknown channels mode: \"" + v + "\"");
}
channelsMode = channelsModeInput;
// TODO Must have checks for other unknown options
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
if (hasVecInput && ELTWISE_CHANNNELS_SAME)
return backendId == DNN_BACKEND_OPENCV;
if (backendId == DNN_BACKEND_CUDA)
{
if(channelsModeInput == ELTWISE_CHANNNELS_INPUT_0 || channelsModeInput == ELTWISE_CHANNNELS_INPUT_0_TRUNCATE)
return op == SUM && coeffs.empty();
return channelsModeInput == ELTWISE_CHANNNELS_SAME;
}
return backendId == DNN_BACKEND_OPENCV ||
(backendId == DNN_BACKEND_HALIDE && op != DIV) || // TODO: not implemented, see PR #15811
((((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (preferableTarget != DNN_TARGET_OPENCL || coeffs.empty()))
|| backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && channelsMode == ELTWISE_CHANNNELS_SAME));
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() >= 2);
CV_Assert(inputs[0].size() >= 2);
CV_Assert(coeffs.size() == 0 || coeffs.size() == inputs.size());
CV_Assert(op == SUM || coeffs.size() == 0);
int dims = inputs[0].size();
// Number of channels in output shape is determined by the first input tensor.
bool variableChannels = false;
int numChannels = inputs[0][1];
for (size_t i = 1; i < inputs.size(); i++)
{
CV_Assert(inputs[0][0] == inputs[i][0]); // batch sizes are equal
int input_channels = inputs[i][1];
if (numChannels != input_channels)
variableChannels = true;
if (channelsModeInput == ELTWISE_CHANNNELS_SAME)
{
CV_Assert(numChannels == input_channels);
}
else if (channelsModeInput == ELTWISE_CHANNNELS_INPUT_0)
{
CV_Assert(numChannels >= input_channels);
}
else if (channelsModeInput == ELTWISE_CHANNNELS_INPUT_0_TRUNCATE)
{
// nothing to check
}
else if (channelsModeInput == ELTWISE_CHANNNELS_USE_MAX)
{
numChannels = std::max(numChannels, input_channels);
}
else
{
CV_Assert(0 && "Internal error");
}
}
channelsMode = variableChannels ? channelsModeInput : ELTWISE_CHANNNELS_SAME;
outputChannels = numChannels;
outputs.assign(1, inputs[0]);
outputs[0][1] = numChannels;
if (dims > 2)
{
size_t vecIdx = 0;
bool isVecFound = false;
for (size_t i = 0; i < inputs.size(); i++)
{
bool allOnes = isAllOnes(inputs[i], 2, dims);
if (!allOnes && !isVecFound)
{
vecIdx = i;
isVecFound = true;
}
if (!allOnes && i != vecIdx)
{
for (size_t j = 2; j < dims; j++)
{
CV_Assert(inputs[vecIdx][j] == inputs[i][j]);
}
}
}
if (channelsModeInput == ELTWISE_CHANNNELS_SAME && isVecFound)
{
for (size_t j = 2; j < dims; j++)
{
outputs[0][j] = inputs[vecIdx][j];
}
}
}
return false;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> inputs;
inputs_arr.getMatVector(inputs);
for (size_t i = 0; i < inputs.size(); i++)
{
MatShape inpShape = shape(inputs[i].size);
if (isAllOnes(inpShape, 2, inputs[i].dims))
{
hasVecInput = true;
return;
}
}
}
class EltwiseInvoker : public ParallelLoopBody
{
EltwiseLayerImpl& self;
std::vector<const Mat*> srcs;
std::vector<int> srcNumChannels;
int nsrcs;
Mat* dst;
std::vector<float> coeffs;
int nstripes;
const ActivationLayer* activ;
int channels;
size_t planeSize;
EltwiseInvoker(EltwiseLayerImpl& self_)
: self(self_)
, nsrcs(0), dst(0), nstripes(0), activ(0), channels(0)
, planeSize(0)
{}
public:
static void run(EltwiseLayerImpl& self,
const Mat* srcs, int nsrcs, Mat& dst,
int nstripes)
{
const EltwiseOp op = self.op;
CV_Check(dst.dims, 1 < dst.dims && dst.dims <= 5, ""); CV_CheckTypeEQ(dst.type(), CV_32FC1, ""); CV_Assert(dst.isContinuous());
CV_Assert(self.coeffs.empty() || self.coeffs.size() == (size_t)nsrcs);
CV_CheckGE(nsrcs, 2, "");
CV_Assert(self.outputChannels == dst.size[1]);
EltwiseInvoker p(self);
p.srcs.resize(nsrcs);
p.srcNumChannels.resize(nsrcs);
p.coeffs = self.coeffs; // can be sorted
bool sortInputs = false;
for( int i = 0; i < nsrcs; i++ )
{
p.srcs[i] = &srcs[i];
CV_CheckEQ(srcs[i].dims, dst.dims, "");
CV_Assert(srcs[i].isContinuous());
CV_Assert(srcs[i].type() == dst.type());
p.srcNumChannels[i] = (srcs[i].dims >= 4) ? srcs[i].size[1] : 1;
if (self.channelsMode == ELTWISE_CHANNNELS_SAME)
{
CV_Assert(srcs[i].size == dst.size);
}
else if (self.channelsMode == ELTWISE_CHANNNELS_INPUT_0)
{
if (i == 0)
CV_Assert(srcs[0].size == dst.size);
CV_Assert(self.outputChannels >= p.srcNumChannels[i]);
sortInputs = true;
}
else if (self.channelsMode == ELTWISE_CHANNNELS_INPUT_0_TRUNCATE)
{
if (i == 0)
CV_Assert(srcs[0].size == dst.size);
sortInputs = true;
}
else if (self.channelsMode == ELTWISE_CHANNNELS_USE_MAX)
{
CV_Assert(op == SUM);
CV_Assert(self.outputChannels >= p.srcNumChannels[i]);
sortInputs = true;
}
else
{
CV_Assert(0 && "Internal error");
}
if (sortInputs)
{
// Sort srcs and coefficients in the desc order by number of channels
for (int j = i; j >= 1; j--)
{
if (std::min(self.outputChannels, p.srcs[j - 1]->size[1]) < std::min(self.outputChannels, p.srcs[j]->size[1]))
{
std::swap(p.srcs[j - 1], p.srcs[j]);
std::swap(p.srcNumChannels[j - 1], p.srcNumChannels[j]);
if (!p.coeffs.empty())
std::swap(p.coeffs[j - 1], p.coeffs[j]);
}
else
break;
}
}
}
p.nsrcs = nsrcs;
p.dst = &dst;
p.nstripes = nstripes;
p.channels = (dst.dims >= 4 ? dst.size[1] : 1);
p.planeSize = dst.total(dst.dims >= 4 ? 2 : 1);
CV_CheckEQ(dst.total(), dst.size[0] * p.channels * p.planeSize, "");
bool simpleCoeffs = true;
if (op == SUM && !p.coeffs.empty())
{
CV_CheckEQ(p.coeffs.size(), (size_t)nsrcs, "");
for (size_t i = 0; i < p.coeffs.size(); i++)
{
if (p.coeffs[i] != 1)
{
simpleCoeffs = false;
break;
}
}
}
if (simpleCoeffs)
p.coeffs.clear();
p.activ = self.activ.get();
parallel_for_(Range(0, nstripes), p, nstripes);
}
void operator()(const Range& r) const CV_OVERRIDE
{
const EltwiseOp op = self.op;
size_t total = dst->size[0]*planeSize;
size_t stripeSize = (total + nstripes - 1)/nstripes;
size_t stripeStart = r.start*stripeSize;
size_t stripeEnd = std::min(r.end*stripeSize, total);
const float* coeffsptr = !coeffs.empty() ? &coeffs[0] : 0;
float* dstptr0 = dst->ptr<float>();
int blockSize0 = 1 << 12;
for (size_t ofs = stripeStart; ofs < stripeEnd; )
{
int sampleIdx = (int)(ofs / planeSize);
int delta = (int)ofs - sampleIdx * planeSize;
int blockSize = std::min(blockSize0, std::min((int)(stripeEnd - ofs), (int)planeSize - delta));
if( blockSize <= 0 )
break;
ofs += blockSize;
for (int c = 0; c < channels; c++)
{
size_t dstIdx = delta + (sampleIdx*channels + c)*planeSize;
float* dstptr = dstptr0 + dstIdx;
// process first two inputs
{
const float* srcptr0 = srcs[0]->ptr<float>() + dstIdx;
const int inputIdx = 1;
int src1_channels = srcNumChannels[inputIdx];
if (c >= src1_channels)
{
// no data from second input
if (!coeffsptr || coeffsptr[0] == 1.0f)
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] = srcptr0[j];
}
}
else
{
float c0 = coeffsptr[0];
for (int j = 0; j < blockSize; j++)
{
dstptr[j] = c0*srcptr0[j];
}
}
}
else
{
size_t srcIdx = delta + (sampleIdx * src1_channels + c) * planeSize;
const float* srcptrI = srcs[inputIdx]->ptr<float>() + srcIdx;
if (op == PROD)
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] = srcptr0[j] * srcptrI[j];
}
}
else if (op == DIV)
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] = srcptr0[j] / srcptrI[j];
}
}
else if (op == MAX)
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] = std::max(srcptr0[j], srcptrI[j]);
}
}
else if (op == MIN)
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] = std::min(srcptr0[j], srcptrI[j]);
}
}
else if (op == SUM)
{
if (!coeffsptr || (coeffsptr[0] == 1.0f && coeffsptr[1] == 1.0f))
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] = srcptr0[j] + srcptrI[j];
}
}
else
{
float c0 = coeffsptr[0];
float c1 = coeffsptr[1];
for (int j = 0; j < blockSize; j++)
{
dstptr[j] = c0*srcptr0[j] + c1*srcptrI[j];
}
}
}
else
CV_Error(Error::StsInternal, "");
}
}
// aggregate other inputs (3+)
for (size_t inputIdx = 2; inputIdx < nsrcs; inputIdx++)
{
int srcI_channels = srcNumChannels[inputIdx];
if (c >= srcI_channels)
continue; // no data from second input
size_t srcIdx = delta + (sampleIdx * srcI_channels + c) * planeSize;
const float* srcptrI = srcs[inputIdx]->ptr<float>() + srcIdx;
if (op == PROD)
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] *= srcptrI[j];
}
}
else if (op == DIV)
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] /= srcptrI[j];
}
}
else if (op == MAX)
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] = std::max(dstptr[j], srcptrI[j]);
}
}
else if (op == MIN)
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] = std::min(dstptr[j], srcptrI[j]);
}
}
else if (op == SUM)
{
if (!coeffsptr || coeffsptr[inputIdx] == 1.0f)
{
for (int j = 0; j < blockSize; j++)
{
dstptr[j] += srcptrI[j];
}
}
else
{
float cI = coeffsptr[inputIdx];
for (int j = 0; j < blockSize; j++)
{
dstptr[j] += cI * srcptrI[j];
}
}
}
else
CV_Error(Error::StsInternal, "");
}
}
if( activ )
{
float* ptr = dstptr0 + delta + sampleIdx*channels*planeSize;
activ->forwardSlice(ptr, ptr, blockSize, planeSize, 0, channels);
}
}
}
};
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
if ((inputs_.depth() == CV_16S && op != SUM) || (channelsMode != ELTWISE_CHANNNELS_SAME))
return false;
if (hasVecInput)
return false; // TODO not implemented yet: https://github.com/opencv/opencv/pull/19477
inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs);
switch (op)
{
case SUM:
{
int channels = total(shape(outputs[0]), 0, 2);
int plane_size = total(shape(outputs[0]), 2);
if (channels % 4 == 0 && plane_size % 4 == 0)
{
size_t localsize[] = { 128 };
size_t globalsize[] = { (size_t)channels / 4 * localsize[0] };
String opts;
if (inputs_.depth() == CV_16S)
opts = " -DDtype=half -DDtype4=half4 -DDtype8=half8";
else
opts = " -DDtype=float -DDtype4=float4 -DDtype8=float8";
for (int i = 0; i < (inputs.size() - 1); ++i)
{
String buildopt = format("-DLOOP=%d", i) + opts;
ocl::Kernel kernel("op_sum4", ocl::dnn::eltwise_oclsrc, buildopt);
int idx = 0;
UMat inpMat = (i == 0) ? inputs[0] : UMat();
float coeff1 = (coeffs.empty() || i > 0) ? 1.0f : coeffs[i];
float coeff2 = coeffs.empty() ? 1.0f : coeffs[i + 1];
kernel.set(idx++, ocl::KernelArg::PtrReadOnly(inputs[0]));
kernel.set(idx++, ocl::KernelArg::PtrReadOnly(inputs[1]));
kernel.set(idx++, (int)plane_size);
kernel.set(idx++, (float)coeff1);
kernel.set(idx++, (float)coeff2);
kernel.set(idx++, ocl::KernelArg::PtrReadWrite(outputs[0]));
bool ret = kernel.run(1, globalsize, localsize, false);
if (!ret)
return false;
}
}
else
{
if (inputs_.depth() == CV_16S)
return false;
float coeff1 = coeffs.empty() ? 1.f : coeffs[0];
float coeff2 = coeffs.empty() ? 1.f : coeffs[1];
UMat mul0, mul1;
multiply(coeff1, inputs[0], mul0);
multiply(coeff2, inputs[1], mul1);
add(mul0, mul1, outputs[0]);
for (int i = 2; i < inputs.size(); ++i)
{
float coeff = coeffs.empty() ? 1.f : coeffs[i];
multiply(coeff, inputs[i], mul0);
add(mul0, outputs[0], outputs[0]);
}
}
}
break;
case PROD:
multiply(inputs[0], inputs[1], outputs[0]);
for (int i = 2; i < inputs.size(); ++i)
multiply(inputs[i], outputs[0], outputs[0]);
break;
case DIV:
divide(inputs[0], inputs[1], outputs[0]);
for (int i = 2; i < inputs.size(); ++i)
divide(outputs[0], inputs[i], outputs[0]);
break;
case MAX:
max(inputs[0], inputs[1], outputs[0]);
for (int i = 2; i < inputs.size(); ++i)
max(inputs[i], outputs[0], outputs[0]);
break;
case MIN:
min(inputs[0], inputs[1], outputs[0]);
for (int i = 2; i < inputs.size(); ++i)
min(inputs[i], outputs[0], outputs[0]);
break;
default:
return false;
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
CV_Assert(outputs.size() == 1);
const int nstripes = getNumThreads();
if (channelsModeInput == ELTWISE_CHANNNELS_SAME && inputs[0].dims > 2)
{
for (size_t i = 0; i < inputs.size(); i++)
{
MatShape inpShape = shape(inputs[i].size);
bool allOnes = isAllOnes(inpShape, 2, inputs[i].dims);
if (allOnes)
{
Mat tmpInput = inputs[i];
MatShape outShape = shape(outputs[0].size);
size_t xSize = outShape[2];
for (size_t j = 3; j < outShape.size(); j++)
xSize *= outShape[j];
int dimVec[3] = {outShape[0], outShape[1], (int) xSize};
std::vector<int> matSizesVec(&dimVec[0], &dimVec[0] + 3);
inputs[i] = Mat(matSizesVec, tmpInput.type());
std::vector<int> idx(outShape.size(), 0);
std::vector<int> outIdx(inpShape.size(), 0);
for (size_t j = 0; j < outShape[0]; j++)
{
outIdx[0] = idx[0] = j;
for(size_t k = 0; k < outShape[1]; k++)
{
outIdx[1] = idx[1] = k;
for (size_t x = 0; x < xSize; x++)
{
outIdx[2] = x;
inputs[i].at<float>(outIdx.data()) = tmpInput.at<float>(idx.data());
}
}
}
inputs[i] = inputs[i].reshape(0, outShape);
}
}
}
EltwiseInvoker::run(*this,
&inputs[0], (int)inputs.size(), outputs[0],
nstripes);
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
CV_Assert(channelsModeInput == ELTWISE_CHANNNELS_INPUT_0 ||
channelsModeInput == ELTWISE_CHANNNELS_INPUT_0_TRUNCATE ||
channelsModeInput == ELTWISE_CHANNNELS_SAME);
if(channelsModeInput == ELTWISE_CHANNNELS_INPUT_0 || channelsModeInput == ELTWISE_CHANNNELS_INPUT_0_TRUNCATE)
{
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
for (int i = 1; i < inputs.size(); i++)
{
auto from_wrapper = inputs[i].dynamicCast<CUDABackendWrapper>();
if (input_wrapper->getShape()[1] != from_wrapper->getShape()[1])
{
CV_Assert(op == SUM);
CV_Assert(coeffs.empty());
return make_cuda_node<cuda4dnn::ShortcutOp>(preferableTarget, std::move(context->stream));
}
}
}
auto op_ = [this] {
switch (op) {
case MAX: return cuda4dnn::EltwiseOpType::MAX;
case MIN: return cuda4dnn::EltwiseOpType::MIN;
case SUM: return cuda4dnn::EltwiseOpType::SUM;
case PROD: return cuda4dnn::EltwiseOpType::PRODUCT;
case DIV: return cuda4dnn::EltwiseOpType::DIV;
}
return cuda4dnn::EltwiseOpType::SUM;
}();
return make_cuda_node<cuda4dnn::EltwiseOp>(preferableTarget, std::move(context->stream), op_, coeffs);
}
#endif
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
{
#ifdef HAVE_HALIDE
Halide::Var x("x"), y("y"), c("c"), n("n");
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
Halide::Expr topExpr;
std::vector<Halide::Buffer<> > inputBuffers = halideBuffers(input);
switch (op)
{
case SUM:
if (coeffs.empty())
{
topExpr = inputBuffers[0](x, y, c, n) +
inputBuffers[1](x, y, c, n);
for (int i = 2; i < inputBuffers.size(); ++i)
topExpr += inputBuffers[i](x, y, c, n);
}
else
{
topExpr = coeffs[0] * inputBuffers[0](x, y, c, n) +
coeffs[1] * inputBuffers[1](x, y, c, n);
for (int i = 2; i < inputBuffers.size(); ++i)
topExpr += coeffs[i] * inputBuffers[i](x, y, c, n);
}
break;
case PROD:
topExpr = inputBuffers[0](x, y, c, n) *
inputBuffers[1](x, y, c, n);
for (int i = 2; i < inputBuffers.size(); ++i)
topExpr *= inputBuffers[i](x, y, c, n);
break;
case DIV:
topExpr = inputBuffers[0](x, y, c, n) /
inputBuffers[1](x, y, c, n);
for (int i = 2; i < inputBuffers.size(); ++i)
topExpr /= inputBuffers[i](x, y, c, n);
break;
case MAX:
topExpr = max(inputBuffers[0](x, y, c, n),
inputBuffers[1](x, y, c, n));
for (int i = 2; i < inputBuffers.size(); ++i)
topExpr = max(topExpr, inputBuffers[i](x, y, c, n));
break;
case MIN:
topExpr = min(inputBuffers[0](x, y, c, n),
inputBuffers[1](x, y, c, n));
for (int i = 2; i < inputBuffers.size(); ++i)
topExpr = min(topExpr, inputBuffers[i](x, y, c, n));
break;
default:
return Ptr<BackendNode>();
}
top(x, y, c, n) = topExpr;
return Ptr<BackendNode>(new HalideBackendNode(top));
#endif // HAVE_HALIDE
return Ptr<BackendNode>();
}
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
InferenceEngine::Builder::EltwiseLayer ieLayer(name);
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(inputs.size()));
if (op == SUM)
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::SUM);
else if (op == PROD)
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MUL);
else if (op == DIV)
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::DIV);
else if (op == MAX)
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MAX);
else if (op == MIN)
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MIN);
else
CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
InferenceEngine::Builder::Layer l = ieLayer;
if (!coeffs.empty())
l.getParameters()["coeff"] = coeffs;
return Ptr<BackendNode>(new InfEngineBackendNode(l));
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto curr_node = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
if (!coeffs.empty()) {
auto coeff = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &coeffs[0]);
curr_node = std::make_shared<ngraph::op::v1::Multiply>(curr_node, coeff, ngraph::op::AutoBroadcastType::NUMPY);
}
for (size_t i = 1; i < nodes.size(); i++)
{
auto next_node = nodes[i].dynamicCast<InfEngineNgraphNode>()->node;
if (!coeffs.empty()) {
auto coeff = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &coeffs[i]);
next_node = std::make_shared<ngraph::op::v1::Multiply>(next_node, coeff, ngraph::op::AutoBroadcastType::NUMPY);
}
switch (op) {
case SUM: curr_node = std::make_shared<ngraph::op::v1::Add>(curr_node, next_node); break;
case PROD: curr_node = std::make_shared<ngraph::op::v1::Multiply>(curr_node, next_node); break;
case DIV: curr_node = std::make_shared<ngraph::op::v1::Divide>(curr_node, next_node); break;
case MAX: curr_node = std::make_shared<ngraph::op::v1::Maximum>(curr_node, next_node); break;
case MIN: curr_node = std::make_shared<ngraph::op::v1::Minimum>(curr_node, next_node); break;
default: CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
}
}
return Ptr<BackendNode>(new InfEngineNgraphNode(curr_node));
}
#endif // HAVE_DNN_NGRAPH
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
{
if (op == SUM)
{
std::vector<float> newCoeffs;
float offset = zeropoints[1][0];
float out_sc = scales[1][0];
for (int i = 0; i < scales[0].size(); i++)
{
float coeff = coeffs.empty() ? 1.f : coeffs[i];
float newcoeff = (scales[0][i] * coeff) / out_sc;
newCoeffs.push_back(newcoeff);
offset -= (newcoeff * zeropoints[0][i]);
}
params.set("coeff", DictValue::arrayReal(newCoeffs.data(), newCoeffs.size()));
params.set("offset", offset);
return true;
}
else if (op == PROD)
{
std::vector<float> newCoeffs = scales[0];
newCoeffs[0] /= scales[1][0];
params.set("coeff", DictValue::arrayReal(newCoeffs.data(), newCoeffs.size()));
params.set("offset", zeropoints[1][0]);
params.set("input_zeropoints", DictValue::arrayInt(zeropoints[0].data(), zeropoints[0].size()));
return true;
}
return op == MAX;
}
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
CV_UNUSED(outputs); // suppress unused variable warning
CV_Assert(inputs.size());
// FIXIT: handle inputs with different number of channels
long flops = inputs.size() * total(inputs[0]);
return flops;
}
bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
{
if (activ.empty() || layer.empty())
{
activ = layer;
return !activ.empty();
}
else
return false;
}
Ptr<ActivationLayer> activ;
private:
bool hasVecInput;
};
Ptr<EltwiseLayer> EltwiseLayer::create(const LayerParams& params)
{
return Ptr<EltwiseLayer>(new EltwiseLayerImpl(params));
}
}
}

View File

@ -0,0 +1,246 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include <float.h>
#include <algorithm>
#include <opencv2/dnn/shape_utils.hpp>
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/reshape.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class FlattenLayerImpl CV_FINAL : public FlattenLayer
{
public:
FlattenLayerImpl(const LayerParams &params)
{
_startAxis = params.get<int>("axis", 1);
_endAxis = params.get<int>("end_axis", -1);
setParamsFrom(params);
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine());
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() > 0);
for (size_t i = 1; i < inputs.size(); i++)
{
CV_Assert(inputs[i] == inputs[0]);
}
int numAxes = inputs[0].size();
int startAxis = normalize_axis(_startAxis, numAxes);
int endAxis = normalize_axis(_endAxis, numAxes);
CV_Assert(startAxis >= 0);
CV_Assert(endAxis >= startAxis && endAxis < (int)numAxes);
size_t flattenedDimensionSize = total(inputs[0], startAxis, endAxis + 1);
MatShape outputShapeVec;
for (int i = 0; i < startAxis; i++)
{
outputShapeVec.push_back(inputs[0][i]);
}
outputShapeVec.push_back(flattenedDimensionSize);
for (size_t i = endAxis + 1; i < numAxes; i++)
{
outputShapeVec.push_back(inputs[0][i]);
}
CV_Assert(outputShapeVec.size() <= 4);
outputs.resize(inputs.size(), outputShapeVec);
return true;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> inputs;
inputs_arr.getMatVector(inputs);
int numAxes = inputs[0].dims;
_startAxis = normalize_axis(_startAxis, numAxes);
_endAxis = normalize_axis(_endAxis, numAxes);
}
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
std::vector<UMat> inpvec;
std::vector<UMat> outputs;
inputs_arr.getUMatVector(inpvec);
outputs_arr.getUMatVector(outputs);
std::vector<UMat*> inputs(inpvec.size());
for (int i = 0; i < inpvec.size(); i++)
inputs[i] = &inpvec[i];
for (size_t i = 0; i < inputs.size(); i++)
{
MatShape outShape = shape(outputs[i]);
UMat& output = outputs_arr.getUMatRef(i);
output = inputs[i]->reshape(1, (int)outShape.size(), &outShape[0]);
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
outputs_arr.isUMatVector(),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
for (size_t i = 0; i < inputs.size(); i++)
{
MatShape outShape = shape(outputs[i]);
if (inputs[i].data != outputs[i].data)
{
inputs[i].reshape(1, (int)outShape.size(), &outShape[0]).copyTo(outputs[i]);
}
}
}
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
InferenceEngine::Builder::Layer ieLayer(name);
ieLayer.setName(name);
ieLayer.setType("Flatten");
ieLayer.getParameters()["axis"] = (size_t)_startAxis;
ieLayer.getParameters()["end_axis"] = _endAxis; // Do not cast to size_t because it might be negative.
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(1));
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
std::vector<size_t> dims = ieInpNode->get_shape();
int numAxes = dims.size();
int startAxis = normalize_axis(_startAxis, numAxes);
int endAxis = normalize_axis(_endAxis, numAxes);
CV_Assert(startAxis >= 0);
CV_Assert(endAxis >= startAxis && endAxis < numAxes);
int64_t flattenedDimensionSize = std::accumulate(dims.begin() + startAxis,
dims.begin() + endAxis + 1, 1, std::multiplies<size_t>());
std::vector<int64_t> outputShapeVec(dims.begin(), dims.begin() + startAxis);
outputShapeVec.push_back(flattenedDimensionSize);
outputShapeVec.insert(outputShapeVec.end(), dims.begin() + endAxis + 1, dims.end());
auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
ngraph::Shape({outputShapeVec.size()}), outputShapeVec.data());
auto reshape = std::make_shared<ngraph::op::v1::Reshape>(ieInpNode, shape, true);
return Ptr<BackendNode>(new InfEngineNgraphNode(reshape));
}
#endif // HAVE_DNN_NGRAPH
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
return make_cuda_node<cuda4dnn::ReshapeOp>(preferableTarget, std::move(context->stream));
}
#endif
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
{
return true;
}
int _startAxis;
int _endAxis;
};
Ptr<FlattenLayer> FlattenLayer::create(const LayerParams& params)
{
return Ptr<FlattenLayer>(new FlattenLayerImpl(params));
}
}
}

View File

@ -0,0 +1,117 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2020, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
#include "../precomp.hpp"
#include "layers_common.hpp"
namespace cv { namespace dnn {
class FlowWarpLayerImpl CV_FINAL : public FlowWarpLayer
{
public:
FlowWarpLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
String fill_string = toLowerCase(params.get<String>("FillParameter", "ZERO"));
if (fill_string != "zero")
CV_Error(Error::StsNotImplemented, "Only zero filling supported.");
fill_value = 0;
}
virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() == 2);
CV_Assert_N(inputs[0][0] == inputs[1][0], inputs[1][1] == 2,
inputs[0][2] == inputs[1][2], inputs[0][3] == inputs[1][3]);
outputs.assign(1, inputs[0]);
return false;
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
const int out_n = outputs[0].size[0];
const int out_c = outputs[0].size[1];
const int out_h = outputs[0].size[2];
const int out_w = outputs[0].size[3];
const int area = out_w * out_h;
const int total = area * out_c;
const float* image_data = inputs[0].ptr<float>();
const float* flow_data = inputs[1].ptr<float>();
float* out_data = outputs[0].ptr<float>();
for (int n = 0; n < out_n; n++)
{
int off = total * n;
for (int x = 0; x < out_w; x++)
{
for (int y = 0; y < out_h; y++)
{
int idx = 2 * area * n + y * out_w + x;
float fx = flow_data[idx];
float fy = flow_data[idx + area];
float x2 = x + fx;
float y2 = y + fy;
if (x2 >= 0 && y2 >= 0 && x2 < out_w && y2 < out_h)
{
int ix2_L = x2;
float alpha = x2 - ix2_L;
int iy2_T = y2;
float beta = y2 - iy2_T;
int ix2_R = std::min(ix2_L + 1, out_w - 1);
int iy2_B = std::min(iy2_T + 1, out_h - 1);
for (int c = 0; c < out_c; c++)
{
float TL = image_data[off + c * area + iy2_T * out_w + ix2_L];
float TR = image_data[off + c * area + iy2_T * out_w + ix2_R];
float BL = image_data[off + c * area + iy2_B * out_w + ix2_L];
float BR = image_data[off + c * area + iy2_B * out_w + ix2_R];
out_data[off + c * area + y * out_w + x] = (1 - alpha) * (1 - beta) * TL +
(1 - alpha) * beta * BL +
alpha * (1 - beta) * TR +
alpha * beta * BR;
}
}
else
{
for (int c = 0; c < out_c; c++)
out_data[off + c * area + y * out_w + x] = fill_value;
}
}
}
}
}
private:
float fill_value;
};
Ptr<FlowWarpLayer> FlowWarpLayer::create(const LayerParams& params)
{
return Ptr<FlowWarpLayer>(new FlowWarpLayerImpl(params));
}
}} // namespace cv::dnn

View File

@ -0,0 +1,687 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_halide.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
using namespace cv::dnn::ocl4dnn;
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/matmul.hpp"
#include "../cuda4dnn/primitives/inner_product.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class FullyConnectedLayerImpl CV_FINAL : public InnerProductLayer
{
public:
enum { VEC_ALIGN = 8 };
#ifdef HAVE_OPENCL
Ptr<OCL4DNNInnerProduct<float> > innerProductOp;
std::vector<UMat> umat_blobs;
std::vector<UMat> half_blobs;
#endif
FullyConnectedLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
bias = params.get<bool>("bias_term", true);
axis = params.get<int>("axis", 1);
if (!blobs.empty())
{
CV_Assert(1 <= blobs.size() && blobs.size() <= 2);
int numOutput = params.get<int>("num_output");
int innerSize = (int)blobs[0].total() / numOutput;
CV_Assert(blobs[0].dims >= 2 && (size_t)(innerSize * numOutput) == blobs[0].total());
CV_Assert(!bias || (blobs.size() == 2 && (size_t)numOutput == blobs[1].total()));
weightsMat = blobs[0] = blobs[0].reshape(1, numOutput);
int vecsize = weightsMat.cols;
if (vecsize % VEC_ALIGN != 0)
{
int vecsize_aligned = (int)alignSize(vecsize, VEC_ALIGN);
Mat weightsBuf(weightsMat.rows, vecsize_aligned, weightsMat.type());
Mat wpadding = weightsBuf.colRange(vecsize, vecsize_aligned);
wpadding.setTo(Scalar::all(0.));
weightsMat = weightsBuf.colRange(0, vecsize);
blobs[0].copyTo(weightsMat);
}
if (bias)
biasMat = blobs[1] = blobs[1].reshape(1, 1);
else
biasMat = Mat::zeros(1, numOutput, weightsMat.type());
}
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &) const CV_OVERRIDE
{
int numOutput, cAxis;
if (blobs.empty())
{
CV_CheckEQ(inputs.size(), (size_t)2, "");
numOutput = inputs[1].back();
cAxis = inputs[0].size() - 1;
int dims = inputs[0].size();
CV_CheckEQ(inputs[1].size(), (size_t)dims, "");
CV_CheckGE(dims, 2, "");
for (int i = 0; i < dims - 2; i++)
CV_CheckEQ(inputs[0][i], inputs[1][i], "");
CV_CheckEQ(inputs[0].back(), inputs[1][dims - 2], "");
}
else
{
CV_CheckEQ(inputs.size(), (size_t)1, "");
CV_CheckEQ(blobs[0].dims, 2, "");
numOutput = blobs[0].size[0];
CV_Assert(!bias || (size_t)numOutput == blobs[1].total());
cAxis = normalize_axis(axis, inputs[0]);
}
MatShape outShape(cAxis + 1);
for (int i = 0; i < cAxis; ++i)
outShape[i] = inputs[0][i];
outShape.back() = numOutput;
outputs.resize(1, outShape);
return false;
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
(backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1) ||
(((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && !blobs.empty()) ||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && axis == 1);
}
virtual bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
{
if (activ.empty() || layer.empty())
{
activ = layer;
return !activ.empty();
}
else
return false;
}
class FullyConnected : public ParallelLoopBody
{
public:
FullyConnected() : srcMat(0), weights(0), biasMat(0), activ(0), dstMat(0), nstripes(0), useAVX(false), useAVX2(false), useAVX512(false), useRVV(false) {}
static void run(const Mat& srcMat, const Mat& weights, const Mat& biasMat,
Mat& dstMat, const ActivationLayer* activ, int nstripes)
{
CV_Assert( srcMat.dims == 2 && srcMat.cols == weights.cols &&
dstMat.rows == srcMat.rows && dstMat.cols == weights.rows &&
srcMat.type() == weights.type() && weights.type() == dstMat.type() &&
srcMat.type() == CV_32F &&
(biasMat.empty() || (biasMat.type() == srcMat.type() &&
biasMat.isContinuous() && (int)biasMat.total() == dstMat.cols)) );
FullyConnected p;
p.srcMat = &srcMat;
p.weights = &weights;
p.biasMat = &biasMat;
p.dstMat = &dstMat;
p.nstripes = nstripes;
p.activ = activ;
p.useAVX = checkHardwareSupport(CPU_AVX);
p.useAVX2 = checkHardwareSupport(CPU_AVX2);
p.useAVX512 = CV_CPU_HAS_SUPPORT_AVX512_SKX;
p.useRVV = checkHardwareSupport(CPU_RVV);
parallel_for_(Range(0, nstripes), p, nstripes);
}
void operator()(const Range& r) const CV_OVERRIDE
{
int valign = FullyConnectedLayerImpl::VEC_ALIGN;
int nsamples = srcMat->rows;
int nw0 = weights->rows;
int k, vecsize = srcMat->cols;
int vecsize_aligned = (int)alignSize(vecsize, VEC_ALIGN);
size_t total = (size_t)nsamples*nw0;
size_t stripeSize = (total + nstripes - 1)/nstripes;
size_t stripeStart = r.start*stripeSize;
size_t stripeEnd = r.end == nstripes ? total : std::min(r.end*stripeSize, total);
size_t wstep = weights->step1();
AutoBuffer<float> srcbuf(vecsize_aligned + valign);
float* sptr = alignPtr(srcbuf.data(), (int)(valign*sizeof(float)));
for( k = vecsize; k < vecsize_aligned; k++ )
sptr[k] = 0.f;
for( size_t ofs = stripeStart; ofs < stripeEnd; )
{
int sampleIdx = (int)(ofs / nw0);
int delta = (int)(ofs - (size_t)sampleIdx*nw0);
const float* sptr_ = srcMat->ptr<float>(sampleIdx);
const float* wptr = weights->ptr<float>(delta);
float* dptr = dstMat->ptr<float>(sampleIdx) + delta;
const float* biasptr = biasMat->ptr<float>() + delta;
int nw = std::min(nw0 - delta, (int)(stripeEnd - ofs));
memcpy(sptr, sptr_, vecsize*sizeof(sptr[0]));
#if CV_TRY_AVX512_SKX
if( useAVX512 )
opt_AVX512_SKX::fastGEMM1T( sptr, wptr, wstep, biasptr, dptr, nw, vecsize);
else
#endif
#if CV_TRY_AVX2
if( useAVX2 )
opt_AVX2::fastGEMM1T( sptr, wptr, wstep, biasptr, dptr, nw, vecsize);
else
#endif
#if CV_TRY_AVX
if( useAVX )
opt_AVX::fastGEMM1T( sptr, wptr, wstep, biasptr, dptr, nw, vecsize);
else
#endif
#if CV_TRY_RVV
if( useRVV )
opt_RVV::fastGEMM1T( sptr, wptr, wstep, biasptr, dptr, nw, vecsize);
else
#endif
{
int i = 0;
#if CV_SIMD128
for( ; i <= nw - 4; i += 4, wptr += 4*wstep )
{
v_float32x4 vs0 = v_setall_f32(0.f);
v_float32x4 vs1 = v_setall_f32(0.f);
v_float32x4 vs2 = v_setall_f32(0.f);
v_float32x4 vs3 = v_setall_f32(0.f);
for( k = 0; k < vecsize; k += 4 )
{
v_float32x4 v = v_load_aligned(sptr + k);
vs0 = v_fma(v, v_load_aligned(wptr + k), vs0);
vs1 = v_fma(v, v_load_aligned(wptr + wstep + k), vs1);
vs2 = v_fma(v, v_load_aligned(wptr + wstep*2 + k), vs2);
vs3 = v_fma(v, v_load_aligned(wptr + wstep*3 + k), vs3);
}
v_float32x4 s = v_reduce_sum4(vs0, vs1, vs2, vs3);
s += v_load(biasptr + i);
v_store(dptr + i, s);
}
#endif
for( ; i < nw; i++, wptr += wstep )
{
float s0=biasptr[i];
for( k = 0; k < vecsize; k++ )
{
float v = sptr[k];
s0 += v*wptr[k];
}
dptr[i] = s0;
}
}
if(activ)
activ->forwardSlice(dptr, dptr, 1, 1, delta, delta + nw);
ofs += nw;
}
}
const Mat *srcMat, *weights, *biasMat;
const ActivationLayer* activ;
Mat* dstMat;
int nstripes;
bool useAVX;
bool useAVX2;
bool useAVX512;
bool useRVV;
};
#ifdef HAVE_OPENCL
virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE
{
innerProductOp.release();
umat_blobs.clear();
half_blobs.clear();
}
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, InputArrayOfArrays internals)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
bool use_half = (inps.depth() == CV_16S);
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
if (inputs.size() == 2)
{
int dims = outputs[0].dims;
int m = inputs[0].size[dims - 2];
int n = inputs[0].size[dims - 1];
int k = inputs[1].size[dims - 1];
int rows = inputs[0].total() / (m * n);
MatShape sh_A = shape(rows, m * n);
MatShape sh_B = shape(rows, n * k);
MatShape sh_C = shape(rows, m * k);
UMat inp = inputs[0].reshape(1, sh_A.size(), &sh_A[0]);
UMat weight = inputs[1].reshape(1, sh_B.size(), &sh_B[0]);
UMat out = outputs[0].reshape(1, sh_C.size(), &sh_C[0]);
UMat A, B, C, A_fp32, B_fp32, C_fp32;
for (int i = 0; i < rows; ++i)
{
A = inp.row(i).reshape(1, m);
B = weight.row(i).reshape(1, n);
C = out.row(i).reshape(1, m);
if (use_half)
{
convertFp16(A, A_fp32);
convertFp16(B, B_fp32);
convertFp16(C, C_fp32);
}
else
{
A_fp32 = A;
B_fp32 = B;
C_fp32 = C;
}
cv::gemm(A_fp32, B_fp32, 1, noArray(), 0, C_fp32);
if (use_half)
{
convertFp16(A_fp32, A);
convertFp16(B_fp32, B);
convertFp16(C_fp32, C);
}
}
return true;
}
int axisCan = normalize_axis(axis, inputs[0].dims);
int numOutput = blobs[0].size[0];
int innerSize = blobs[0].size[1];
int outerSize = total(shape(inputs[0]), 0, axisCan);
bool ret = true;
if (innerProductOp.empty())
{
size_t n = blobs.size();
umat_blobs.resize(n);
for (int i = 0; i < n; i++) blobs[i].copyTo(umat_blobs[i]);
OCL4DNNInnerProductConfig config;
config.num_output = numOutput;
config.bias_term = bias;
config.M = outerSize;
config.K = innerSize;
config.use_half = use_half;
if (use_half)
{
half_blobs.resize(umat_blobs.size());
for (int i = 0; i < umat_blobs.size(); i++)
{
if (!umat_blobs[i].empty())
convertFp16(umat_blobs[i], half_blobs[i]);
}
}
innerProductOp = Ptr<OCL4DNNInnerProduct<float> >(new OCL4DNNInnerProduct<float>(config));
}
for (size_t i = 0; i < inputs.size(); i++)
{
MatShape inshape, outshape;
inshape = shape(outerSize, innerSize);
outshape = shape(outerSize, numOutput);
UMat srcMat, dstMat;
srcMat = inputs[i].reshape(1, inshape.size(), &inshape[0]);
dstMat = outputs[i].reshape(1, outshape.size(), &outshape[0]);
if (!innerProductOp->Forward(srcMat, (use_half) ? half_blobs[0] : umat_blobs[0],
(bias) ? (use_half ? half_blobs[1] : umat_blobs[1]) : UMat(),
dstMat))
{
ret = false;
break;
}
if (!use_half && bias && (outerSize > 1))
{
UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
UMat& biases = umat_blobs[1];
cv::gemm(biasOnesMat, biases, 1, dstMat, 1, dstMat, 0);
}
}
if (ret) return true;
UMat& weights = umat_blobs[0];
for (size_t i = 0; i < inputs.size(); i++)
{
MatShape inshape, outshape;
inshape = shape(outerSize, innerSize);
outshape = shape(outerSize, numOutput);
UMat srcMat, dstMat, srcMat_fp32, dstMat_fp32;
srcMat = inputs[i].reshape(1, inshape.size(), &inshape[0]);
dstMat = outputs[i].reshape(1, outshape.size(), &outshape[0]);
if (use_half)
{
convertFp16(srcMat, srcMat_fp32);
convertFp16(dstMat, dstMat_fp32);
}
else
{
srcMat_fp32 = srcMat;
dstMat_fp32 = dstMat;
}
cv::gemm(srcMat_fp32, weights, 1, noArray(), 0, dstMat_fp32, GEMM_2_T);
if (bias)
{
UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
UMat& biases = umat_blobs[1];
cv::gemm(biasOnesMat, biases, 1, dstMat_fp32, 1, dstMat_fp32, 0);
}
if (use_half)
{
convertFp16(srcMat_fp32, srcMat);
convertFp16(dstMat_fp32, dstMat);
}
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> input, output;
inputs_arr.getMatVector(input);
outputs_arr.getMatVector(output);
if (!blobs.empty())
{
int axisCan = normalize_axis(axis, input[0].dims);
int outerSize = input[0].total(0, axisCan);
for (size_t i = 0; i < input.size(); i++)
{
Mat srcMat = input[i].reshape(1, outerSize);
Mat dstMat = output[i].reshape(1, outerSize);
const int nstripes = getNumThreads();
FullyConnected::run(srcMat, weightsMat, biasMat, dstMat, activ.get(), nstripes);
}
}
else
{
float* inpData = input[0].ptr<float>();
float* weightData = input[1].ptr<float>();
float* outData = output[0].ptr<float>();
int dims = output[0].dims;
int numSlice = output[0].total() / output[0].total(dims - 2);
int m = input[0].size[dims - 2];
int n = input[0].size[dims - 1];
int k = input[1].size[dims - 1];
for (int i = 0; i < numSlice; i++)
{
Mat inpSlice(m, n, CV_32F, inpData);
Mat weightSlice(n, k, CV_32F, weightData);
Mat outSlice(m, k, CV_32F, outData);
outSlice = inpSlice * weightSlice;
inpData += inpSlice.total();
weightData += weightSlice.total();
outData += outSlice.total();
}
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
if (weightsMat.empty())
{
CV_Assert(!bias);
return make_cuda_node<cuda4dnn::MatMulOp>(preferableTarget, std::move(context->stream), std::move(context->cublas_handle));
}
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
auto flatten_start_axis = normalize_axis(axis, input_wrapper->getRank());
auto biasMat_ = bias ? biasMat : Mat();
return make_cuda_node<cuda4dnn::InnerProductOp>(preferableTarget, std::move(context->stream), std::move(context->cublas_handle), flatten_start_axis, weightsMat, biasMat_);
}
#endif
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
#ifdef HAVE_HALIDE
int inW, inH, inC, inN, outC = blobs[0].size[0];
Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
getCanonicalSize(inputBuffer, &inW, &inH, &inC, &inN);
auto weights = wrapToHalideBuffer(blobs[0], {inW, inH, inC, outC});
Halide::Var x("x"), y("y"), c("c"), n("n");
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
Halide::RDom r(0, inW, 0, inH, 0, inC);
Halide::Expr topExpr = sum(inputBuffer(r.x, r.y, r.z, n) *
weights(r.x, r.y, r.z, c));
if (bias)
{
Halide::Buffer<float> bias = wrapToHalideBuffer(blobs[1], {outC});
topExpr += bias(c);
}
top(x, y, c, n) = topExpr;
return Ptr<BackendNode>(new HalideBackendNode(top));
#endif // HAVE_HALIDE
return Ptr<BackendNode>();
}
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
InferenceEngine::Builder::FullyConnectedLayer ieLayer(name);
const int outNum = blobs[0].size[0];
ieLayer.setOutputNum(outNum);
InferenceEngine::Builder::Layer l = ieLayer;
addConstantData("weights", wrapToInfEngineBlob(blobs[0], {(size_t)blobs[0].size[0], (size_t)blobs[0].size[1], 1, 1}, InferenceEngine::Layout::OIHW), l);
if (bias)
addConstantData("biases", wrapToInfEngineBlob(blobs[1], {(size_t)outNum}, InferenceEngine::Layout::C), l);
return Ptr<BackendNode>(new InfEngineBackendNode(l));
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
std::shared_ptr<ngraph::Node> matmul;
if (nodes.size() == 2)
{
auto& inp2 = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
matmul = std::make_shared<ngraph::op::MatMul>(ieInpNode, inp2, false, false);
}
else
{
std::vector<int64_t> data = {(int64_t)ieInpNode->get_shape()[0], (int64_t)blobs[0].size[1]};
auto new_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, data.data());
auto inp = std::make_shared<ngraph::op::v1::Reshape>(ieInpNode, new_shape, true);
std::vector<size_t> weight_shape{(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]};
auto ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, weight_shape, blobs[0].data);
matmul = std::make_shared<ngraph::op::MatMul>(inp, ieWeights, false, true);
}
if (bias) {
auto bias_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
ngraph::Shape{(size_t)blobs[1].size[1]}, blobs[1].data);
matmul = std::make_shared<ngraph::op::v1::Add>(matmul, bias_node, ngraph::op::AutoBroadcastType::NUMPY);
}
return Ptr<BackendNode>(new InfEngineNgraphNode(matmul));
}
#endif // HAVE_DNN_NGRAPH
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
{
if (blobs.empty())
return false;
int numOutput = blobs[0].size[0];
float inputScale = scales[0][0], outputScale = scales[1][0];
int inputZp = zeropoints[0][0];
Mat weightsQuantized(weightsMat.rows, weightsMat.cols, CV_8S);
Mat biasQuantized(1, numOutput, CV_32S);
Mat outputMultiplier(1, numOutput, CV_32F);
double realMin, realMax, weightsScale;
for( int i = 0; i < numOutput; i++ )
{
// Quantize weights
cv::minMaxIdx(weightsMat.row(i), &realMin, &realMax);
realMin = std::min(realMin, 0.0);
realMax = std::max(realMax, 0.0);
weightsScale = (realMax == realMin) ? 1.0 : std::max(-realMin, realMax)/127;
weightsMat.row(i).convertTo(weightsQuantized.row(i), CV_8S, 1.f/weightsScale);
// Quantize biases
float biasScale = inputScale * weightsScale;
biasQuantized.at<int>(i) = (int)std::round(biasMat.at<float>(i)/biasScale) - inputZp*(cv::sum(weightsQuantized.row(i))[0]);
// Store multiplier
outputMultiplier.at<float>(i) = biasScale / outputScale;
}
params.blobs.clear();
params.blobs.push_back(weightsQuantized.reshape(1, shape(blobs[0])));
params.blobs.push_back(biasQuantized);
params.blobs.push_back(outputMultiplier);
return true;
}
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
CV_UNUSED(inputs); // suppress unused variable warning
long flops = 0;
int innerSize = blobs[0].size[1];
for(int i = 0; i < outputs.size(); i++)
{
flops += CV_BIG_INT(3)*innerSize*total(outputs[i]);
}
return flops;
}
bool bias;
Mat weightsMat, biasMat;
Ptr<ActivationLayer> activ;
};
Ptr<InnerProductLayer> InnerProductLayer::create(const LayerParams& params)
{
return Ptr<InnerProductLayer>(new FullyConnectedLayerImpl(params));
}
}
}

View File

@ -0,0 +1,254 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
namespace cv
{
namespace dnn
{
namespace util
{
std::string makeName(const std::string& str1, const std::string& str2)
{
return str1 + str2;
}
bool getParameter(const LayerParams &params, const std::string& nameBase, const std::string& nameAll,
std::vector<size_t>& parameter, bool hasDefault = false, const std::vector<size_t>& defaultValue = std::vector<size_t>(2, 0))
{
std::string nameH = makeName(nameBase, std::string("_h"));
std::string nameW = makeName(nameBase, std::string("_w"));
std::string nameAll_ = nameAll;
if (nameAll_ == "")
nameAll_ = nameBase;
if (params.has(nameH) && params.has(nameW))
{
CV_Assert(params.get<int>(nameH) >= 0 && params.get<int>(nameW) >= 0);
parameter.push_back(params.get<int>(nameH));
parameter.push_back(params.get<int>(nameW));
return true;
}
else
{
if (params.has(nameAll_))
{
DictValue param = params.get(nameAll_);
for (int i = 0; i < param.size(); i++) {
CV_Assert(param.get<int>(i) >= 0);
parameter.push_back(param.get<int>(i));
}
if (parameter.size() == 1)
parameter.resize(2, parameter[0]);
return true;
}
else
{
if (hasDefault)
{
parameter = defaultValue;
return true;
}
else
{
return false;
}
}
}
}
void getKernelSize(const LayerParams &params, std::vector<size_t>& kernel)
{
if (!util::getParameter(params, "kernel", "kernel_size", kernel))
CV_Error(cv::Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified");
for (int i = 0; i < kernel.size(); i++)
CV_Assert(kernel[i] > 0);
}
void getStrideAndPadding(const LayerParams &params, std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end,
std::vector<size_t>& strides, cv::String& padMode, size_t kernel_size = 2)
{
if (params.has("pad_l") && params.has("pad_t") && params.has("pad_r") && params.has("pad_b")) {
CV_Assert(params.get<int>("pad_t") >= 0 && params.get<int>("pad_l") >= 0 &&
params.get<int>("pad_b") >= 0 && params.get<int>("pad_r") >= 0);
pads_begin.push_back(params.get<int>("pad_t"));
pads_begin.push_back(params.get<int>("pad_l"));
pads_end.push_back(params.get<int>("pad_b"));
pads_end.push_back(params.get<int>("pad_r"));
}
else {
util::getParameter(params, "pad", "pad", pads_begin, true, std::vector<size_t>(kernel_size, 0));
if (pads_begin.size() < 4)
pads_end = pads_begin;
else
{
pads_end = std::vector<size_t>(pads_begin.begin() + pads_begin.size() / 2, pads_begin.end());
pads_begin.resize(pads_begin.size() / 2);
}
CV_Assert(pads_begin.size() == pads_end.size());
}
util::getParameter(params, "stride", "stride", strides, true, std::vector<size_t>(kernel_size, 1));
padMode = "";
if (params.has("pad_mode"))
{
padMode = params.get<String>("pad_mode");
}
for (int i = 0; i < strides.size(); i++)
CV_Assert(strides[i] > 0);
}
}
void getPoolingKernelParams(const LayerParams &params, std::vector<size_t>& kernel, std::vector<bool>& globalPooling,
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end,
std::vector<size_t>& strides, cv::String &padMode)
{
bool is_global = params.get<bool>("global_pooling", false);
globalPooling.resize(3);
globalPooling[0] = params.get<bool>("global_pooling_d", is_global);
globalPooling[1] = params.get<bool>("global_pooling_h", is_global);
globalPooling[2] = params.get<bool>("global_pooling_w", is_global);
if (globalPooling[0] || globalPooling[1] || globalPooling[2])
{
util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode);
if ((globalPooling[0] && params.has("kernel_d")) ||
(globalPooling[1] && params.has("kernel_h")) ||
(globalPooling[2] && params.has("kernel_w")) ||
params.has("kernel_size")) {
CV_Error(cv::Error::StsBadArg, "In global_pooling mode, kernel_size (or kernel_h and kernel_w) cannot be specified");
}
kernel.resize(3);
kernel[0] = params.get<int>("kernel_d", 1);
kernel[1] = params.get<int>("kernel_h", 1);
kernel[2] = params.get<int>("kernel_w", 1);
for (int i = 0, j = globalPooling.size() - pads_begin.size(); i < pads_begin.size(); i++, j++) {
if ((pads_begin[i] != 0 || pads_end[i] != 0) && globalPooling[j])
CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0");
}
for (int i = 0, j = globalPooling.size() - strides.size(); i < strides.size(); i++, j++) {
if (strides[i] != 1 && globalPooling[j])
CV_Error(cv::Error::StsBadArg, "In global_pooling mode, strides must be = 1");
}
}
else
{
util::getKernelSize(params, kernel);
util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode, kernel.size());
}
}
void getConvolutionKernelParams(const LayerParams &params, std::vector<size_t>& kernel, std::vector<size_t>& pads_begin,
std::vector<size_t>& pads_end, std::vector<size_t>& strides,
std::vector<size_t>& dilations, cv::String &padMode, std::vector<size_t>& adjust_pads)
{
util::getKernelSize(params, kernel);
util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode, kernel.size());
util::getParameter(params, "dilation", "dilation", dilations, true, std::vector<size_t>(kernel.size(), 1));
util::getParameter(params, "adj", "adj", adjust_pads, true, std::vector<size_t>(kernel.size(), 0));
for (int i = 0; i < dilations.size(); i++)
CV_Assert(dilations[i] > 0);
}
// From TensorFlow code:
// Total padding on rows and cols is
// Pr = (R' - 1) * S + Kr - R
// Pc = (C' - 1) * S + Kc - C
// where (R', C') are output dimensions, (R, C) are input dimensions, S
// is stride, (Kr, Kc) are filter dimensions.
// We pad Pr/2 on the left and Pr - Pr/2 on the right, Pc/2 on the top
// and Pc - Pc/2 on the bottom. When Pr or Pc is odd, this means
// we pad more on the right and bottom than on the top and left.
void getConvPoolOutParams(const std::vector<int>& inp, const std::vector<size_t>& kernel,
const std::vector<size_t>& stride, const String &padMode,
const std::vector<size_t>& dilation, std::vector<int>& out)
{
if (padMode == "VALID")
{
for (int i = 0; i < inp.size(); i++)
out.push_back((inp[i] - dilation[i] * (kernel[i] - 1) - 1 + stride[i]) / stride[i]);
}
else if (padMode == "SAME")
{
for (int i = 0; i < inp.size(); i++)
out.push_back((inp[i] - 1 + stride[i]) / stride[i]);
}
else
{
CV_Error(Error::StsError, "Unsupported padding mode");
}
}
void getConvPoolPaddings(const std::vector<int>& inp, const std::vector<size_t>& kernel,
const std::vector<size_t>& strides, const String &padMode,
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end)
{
if (padMode == "SAME" || padMode == "VALID")
{
pads_begin.assign(kernel.size(), 0);
pads_end.assign(kernel.size(), 0);
}
if (padMode == "SAME")
{
CV_Assert_N(kernel.size() == strides.size(), kernel.size() == inp.size());
for (int i = 0; i < pads_begin.size(); i++) {
// There are test cases with stride > kernel.
if (strides[i] <= kernel[i])
{
int pad = (kernel[i] - 1 - (inp[i] - 1 + strides[i]) % strides[i]) / 2;
pads_begin[i] = pads_end[i] = pad;
}
}
}
}
}
}

View File

@ -0,0 +1,79 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_LAYERS_LAYERS_COMMON_HPP__
#define __OPENCV_DNN_LAYERS_LAYERS_COMMON_HPP__
#include <opencv2/dnn.hpp>
#include <opencv2/dnn/shape_utils.hpp>
#define CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
// dispatched AVX/AVX2 optimizations
#include "./layers_common.simd.hpp"
#include "layers/layers_common.simd_declarations.hpp"
#undef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
#ifdef HAVE_OPENCL
#include "../ocl4dnn/include/ocl4dnn.hpp"
#endif
namespace cv
{
namespace dnn
{
void getConvolutionKernelParams(const LayerParams &params, std::vector<size_t>& kernel, std::vector<size_t>& pads_begin,
std::vector<size_t>& pads_end, std::vector<size_t>& strides, std::vector<size_t>& dilations,
cv::String &padMode, std::vector<size_t>& adjust_pads);
void getPoolingKernelParams(const LayerParams &params, std::vector<size_t>& kernel, std::vector<bool>& globalPooling,
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end, std::vector<size_t>& strides, cv::String &padMode);
void getConvPoolOutParams(const std::vector<int>& inp, const std::vector<size_t>& kernel,
const std::vector<size_t>& stride, const String &padMode,
const std::vector<size_t>& dilation, std::vector<int>& out);
void getConvPoolPaddings(const std::vector<int>& inp, const std::vector<size_t>& kernel,
const std::vector<size_t>& strides, const String &padMode,
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end);
}
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,530 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_halide.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include "../op_vkcom.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/dnn/shape_utils.hpp"
#include "opencv2/core/hal/hal.hpp"
#include <algorithm>
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
using namespace cv::dnn::ocl4dnn;
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/lrn.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class LRNLayerImpl CV_FINAL : public LRNLayer
{
public:
LRNLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
type = -1;
String nrmType = params.get<String>("norm_region", "ACROSS_CHANNELS");
if (nrmType == "ACROSS_CHANNELS")
type = CHANNEL_NRM;
else if (nrmType == "WITHIN_CHANNEL")
type = SPATIAL_NRM;
else
CV_Error(Error::StsBadArg, "Unknown region type \"" + nrmType + "\"");
size = params.get<int>("local_size", 5);
if (size % 2 != 1 || size <= 0)
CV_Error(Error::StsBadArg, "LRN layer supports only positive odd values for local_size");
alpha = params.get<double>("alpha", 1);
beta = params.get<double>("beta", 0.75);
bias = params.get<double>("bias", 1);
normBySize = params.get<bool>("norm_by_size", true);
}
#ifdef HAVE_OPENCL
Ptr<OCL4DNNLRN<float> > lrnOp;
#endif
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) {
return bias == (int)bias;
}
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
return bias == (int)bias;
}
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
backendId == DNN_BACKEND_HALIDE ||
(backendId == DNN_BACKEND_VKCOM && haveVulkan() && (size % 2 == 1) && (type == CHANNEL_NRM));
}
#ifdef HAVE_OPENCL
virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE
{
lrnOp.release();
}
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
bool use_half = (inps.depth() == CV_16S);
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
if (lrnOp.empty())
{
OCL4DNNLRNConfig config;
config.lrn_type = type == CHANNEL_NRM ?
LRNParameter_NormRegion_ACROSS_CHANNELS :
LRNParameter_NormRegion_WITHIN_CHANNEL;
CHECK_EQ(size % 2, 1)<< "LRN only supports odd values for local_size";
config.local_size = size;
config.alpha = alpha;
config.beta = beta;
config.k = bias;
CHECK_EQ(4, inputs[0].dims) << "Input must have 4 axes, "
<< "corresponding to (num, channels, height, width)";
config.batch_size = inputs[0].size[0];
config.channels = inputs[0].size[1];
config.height = inputs[0].size[2];
config.width = inputs[0].size[3];
config.norm_by_size = normBySize;
config.use_half = use_half;
lrnOp = Ptr<OCL4DNNLRN<float> >(new OCL4DNNLRN<float>(config));
}
if (!lrnOp->Forward(inputs[0], outputs[0]))
return false;
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_Assert(inputs_arr.total() == outputs_arr.total());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
CV_Assert(inputs.size() == outputs.size());
for (int i = 0; i < inputs.size(); i++)
{
CV_Assert(inputs[i].dims == 4);
Mat &src = inputs[i];
Mat &dst = outputs[i];
switch (type)
{
case CHANNEL_NRM:
channelNormalization(src, dst);
break;
case SPATIAL_NRM:
spatialNormalization(src, dst);
break;
default:
CV_Error(Error::StsNotImplemented, "Unimplemented mode of LRN layer");
break;
}
}
}
class ChannelLRN : public ParallelLoopBody
{
public:
ChannelLRN(const float* src, float* dst, int channels, int ksize,
float alpha1, float bias1, float beta1,
size_t planeSize, int nsamples, int nstripes)
{
src_ = src; dst_ = dst;
channels_ = channels;
ksize_ = ksize;
alpha1_ = alpha1; bias1_ = bias1; beta1_ = beta1;
planeSize_ = planeSize; nsamples_ = nsamples; nstripes_ = nstripes;
}
void operator()(const Range& r) const CV_OVERRIDE
{
int nsamples = nsamples_, nstripes = nstripes_;
size_t planeSize = planeSize_, planeSize_n = planeSize * nsamples;
size_t elemsPerStripe = (planeSize_n + nstripes - 1)/nstripes;
size_t rstart = r.start*elemsPerStripe;
size_t rend = r.end == nstripes ? planeSize_n : r.end*elemsPerStripe;
rstart = std::min(rstart, planeSize_n);
rend = std::min(rend, planeSize_n);
float alpha1 = alpha1_, bias1 = bias1_, beta1 = beta1_;
int k, channels = channels_, ksize = ksize_;
AutoBuffer<float> buf_((channels + ksize + 1)*2);
float* acc = buf_.data();
float* buf = acc + channels + ksize + 1;
for( k = 0; k <= ksize; k++ )
buf[-k-1] = buf[channels + k] = 0.f;
for( size_t ofs = rstart; ofs < rend; )
{
int sampleIdx = (int)(ofs/planeSize);
if( sampleIdx >= nsamples )
break;
size_t ofs0 = ofs - sampleIdx*planeSize;
size_t ofs1 = std::min(planeSize - ofs0, rend - ofs) + ofs;
const float* src = src_ + sampleIdx*planeSize*channels + ofs0;
float* dst = dst_ + sampleIdx*planeSize*channels + ofs0;
for( ; ofs < ofs1; ofs++, src++, dst++ )
{
for( k = 0; k < channels; k++ )
buf[k] = src[k*planeSize];
float s = 0;
for( k = 0; k < ksize; k++ )
s += buf[k]*buf[k];
for( k = 0; k < channels; k++ )
{
float x1 = buf[k + ksize];
float x0 = buf[k - ksize - 1];
s = std::max(s + (x1 + x0)*(x1 - x0), 0.f);
acc[k] = (float)(alpha1*s + bias1);
}
hal::log32f(acc, acc, channels);
for( k = 0; k < channels; k++ )
acc[k] *= beta1;
hal::exp32f(acc, acc, channels);
for( k = 0; k < channels; k++ )
dst[k*planeSize] = buf[k]*acc[k];
}
}
}
const float* src_;
float* dst_;
float alpha1_, bias1_, beta1_;
size_t planeSize_;
int channels_, ksize_, nsamples_, nstripes_;
};
void channelNormalization(Mat &srcBlob, Mat &dstBlob)
{
int num = srcBlob.size[0];
int channels = srcBlob.size[1];
int ksize = (size - 1) / 2;
int sizeNormFactor = normBySize ? size : 1;
size_t planeSize = srcBlob.size[2]*srcBlob.size[3];
int nstripes = std::max(getNumThreads(), 1);
ChannelLRN clrn(srcBlob.ptr<float>(), dstBlob.ptr<float>(), channels,
ksize, alpha/sizeNormFactor, bias, -beta, planeSize, num, nstripes);
parallel_for_(Range(0, nstripes), clrn, nstripes);
}
void sqrBoxFilter_(const Mat &src, Mat &dst)
{
Mat srcRawWrapper(src.rows, src.cols, src.type(), src.data, src.step[0]);
cv::sqrBoxFilter(srcRawWrapper, dst, dst.depth(), Size(size, size), Point(-1, -1), false, BORDER_CONSTANT);
}
void spatialNormalization(Mat &srcBlob, Mat &dstBlob)
{
int num = srcBlob.size[0];
int channels = srcBlob.size[1];
int sizeNormFactor = normBySize ? size*size : 1;
Mat srcMat = srcBlob;
Mat dstMat = dstBlob;
for (int n = 0; n < num; n++)
{
for (int cn = 0; cn < channels; cn++)
{
Mat src = getPlane(srcMat, n, cn);
Mat dst = getPlane(dstMat, n, cn);
sqrBoxFilter_(src, dst);
dst.convertTo(dst, dst.type(), alpha/sizeNormFactor, bias);
cv::pow(dst, beta, dst);
cv::divide(src, dst, dst);
}
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
cuda4dnn::LRNType type_;
if (type == CHANNEL_NRM)
type_ = cuda4dnn::LRNType::ACROSS_CHANNELS;
else if (type == SPATIAL_NRM)
type_ = cuda4dnn::LRNType::WITHIN_CHANNEL;
else
CV_Error(Error::StsNotImplemented, "Unknown normalization region");
float alphaSize = alpha;
if (!normBySize) {
switch (type) {
case CHANNEL_NRM: alphaSize = alpha * size; break;
case SPATIAL_NRM: alphaSize = alpha * size * size; break;
}
}
std::size_t largestInputSize = 0;
for(auto& wrapper : inputs) {
auto input_wrapper = wrapper.dynamicCast<CUDABackendWrapper>();
auto shape = input_wrapper->getShape();
largestInputSize = std::max<std::size_t>(
largestInputSize,
std::accumulate(std::begin(shape), std::end(shape), 1, std::multiplies<int>())
);
}
return make_cuda_node<cuda4dnn::LRNOp>(preferableTarget,
std::move(context->cudnn_handle), type_, size, alphaSize, beta, bias, largestInputSize);
}
#endif
virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
#ifdef HAVE_VULKAN
std::shared_ptr<vkcom::OpBase> op(new vkcom::OpLRN(size / 2, bias, alpha, beta, normBySize));
return Ptr<BackendNode>(new VkComBackendNode(inputs, op));
#endif
return Ptr<BackendNode>();
}
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
#ifdef HAVE_HALIDE
float alphaSize = alpha;
if (normBySize)
alphaSize /= (type == CHANNEL_NRM ? size : size * size);
int width, height, channels, numImgs;
Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
getCanonicalSize(inputBuffer, &width, &height, &channels, &numImgs);
Halide::Var x("x"), y("y"), c("c"), n("n");
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
Halide::Func padded_sq(name + "_padded_sq");
Halide::Func sq("sq");
sq(x, y, c, n) = inputBuffer(x, y, c, n) * inputBuffer(x, y, c, n);
Halide::Func bounded =
Halide::BoundaryConditions::constant_exterior(sq, 0, 0, width,
0, height,
0, channels,
0, numImgs);
padded_sq(x, y, c, n) = bounded(x, y, c, n);
Halide::Expr base;
if (type == CHANNEL_NRM)
{
Halide::RDom r((1 - size) / 2, size);
base = alphaSize * sum(padded_sq(x, y, c + r, n));
}
else // SPATIAL_NRM
{
Halide::RDom r((1 - size) / 2, size, (1 - size) / 2, size);
base = alphaSize * sum(padded_sq(x + r.x, y + r.y, c, n));
}
base += static_cast<float>(bias);
top(x, y, c, n) = inputBuffer(x, y, c, n) / pow(base, beta);
return Ptr<BackendNode>(new HalideBackendNode({ padded_sq, top }));
#endif // HAVE_HALIDE
return Ptr<BackendNode>();
}
virtual void applyHalideScheduler(Ptr<BackendNode>& node,
const std::vector<Mat*> &inputs,
const std::vector<Mat> &outputs,
int targetId) const CV_OVERRIDE
{
#ifdef HAVE_HALIDE
if (targetId != DNN_TARGET_CPU)
{
Layer::applyHalideScheduler(node, inputs, outputs, targetId);
return;
}
int outW, outH, outC, outN;
getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN);
Halide::Var x("x"), y("y"), c("c"), n("n"), yo("yo"), yi("yi"), tile("tile");
Halide::Func& top = node.dynamicCast<HalideBackendNode>()->funcs[1];
Halide::Func& padded_sq = node.dynamicCast<HalideBackendNode>()->funcs[0];
if (outW < 8 || outH <= 2)
return;
top.reorder(x, c, y, n)
.split(y, yo, yi, 2)
.fuse(yo, n, tile)
.parallel(tile)
.unroll(yi)
.vectorize(x, 8);
padded_sq.store_at(top, tile)
.compute_at(top, yi);
#endif // HAVE_HALIDE
}
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
float alphaSize = alpha;
if (!normBySize)
alphaSize *= (type == SPATIAL_NRM ? size*size : size);
InferenceEngine::Builder::NormLayer ieLayer(name);
ieLayer.setSize(size);
ieLayer.setAlpha(alphaSize);
ieLayer.setBeta(beta);
ieLayer.setAcrossMaps(type == CHANNEL_NRM);
InferenceEngine::Builder::Layer l = ieLayer;
l.getParameters()["k"] = bias;
return Ptr<BackendNode>(new InfEngineBackendNode(l));
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
float alphaSize = alpha;
if (!normBySize)
alphaSize *= (type == SPATIAL_NRM ? size*size : size);
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
std::vector<int64_t> axes;
if (type != SPATIAL_NRM) {
axes = {1};
} else {
axes.resize(ieInpNode->get_shape().size() - 2);
std::iota(axes.begin(), axes.end(), 2);
}
auto ngraph_axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{axes.size()}, axes.data());
auto lrn = std::make_shared<ngraph::op::LRN>(ieInpNode, ngraph_axes, alphaSize, beta, bias, size);
return Ptr<BackendNode>(new InfEngineNgraphNode(lrn));
}
#endif // HAVE_DNN_NGRAPH
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
CV_UNUSED(outputs); // suppress unused variable warning
CV_Assert(inputs.size() > 0);
long flops = 0;
for(int i = 0; i < inputs.size(); i++)
{
if (type == CHANNEL_NRM)
{
int channels = inputs[i][1];
int ksize = (size - 1) / 2;
flops += inputs[i][0]*(std::min(ksize, channels)*2*total(inputs[i], 2) + channels*4*total(inputs[i], 2));
if (ksize < channels)
{
flops += (size + 2*(channels - size))*total(inputs[i], 2);
}
}
else
{
flops += total(inputs[i])*(2*size*size + 2);
}
}
return flops;
}
private:
enum Type
{
CHANNEL_NRM,
SPATIAL_NRM
};
};
Ptr<LRNLayer> LRNLayer::create(const LayerParams& params)
{
return Ptr<LRNLayer>(new LRNLayerImpl(params));
}
}
}

View File

@ -0,0 +1,196 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2016, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
/*
Implementation of Batch Normalization layer.
*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_halide.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/max_unpooling.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class MaxUnpoolLayerImpl CV_FINAL : public MaxUnpoolLayer
{
public:
MaxUnpoolLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
poolKernel = Size(params.get<int>("pool_k_w"), params.get<int>("pool_k_h"));
poolPad = Size(params.get<int>("pool_pad_w"), params.get<int>("pool_pad_h"));
poolStride = Size(params.get<int>("pool_stride_w"), params.get<int>("pool_stride_h"));
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
(backendId == DNN_BACKEND_HALIDE && haveHalide() && !poolPad.width && !poolPad.height);
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() == 2 || inputs.size() == 3);
CV_Assert(total(inputs[0]) == total(inputs[1]));
MatShape outShape;
if (inputs.size() == 2)
{
outShape = inputs[0];
outShape[2] = (outShape[2] - 1) * poolStride.height + poolKernel.height - 2 * poolPad.height;
outShape[3] = (outShape[3] - 1) * poolStride.width + poolKernel.width - 2 * poolPad.width;
}
else
outShape = inputs[2];
outputs.clear();
outputs.push_back(outShape);
return false;
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
CV_Assert(inputs.size() == 2 || inputs.size() == 3);
Mat& input = inputs[0];
Mat& indices = inputs[1];
CV_Assert(input.total() == indices.total());
CV_Assert(input.size[0] == 1);
CV_Assert(input.isContinuous());
for(int i_n = 0; i_n < outputs.size(); i_n++)
{
Mat& outBlob = outputs[i_n];
outBlob.setTo(0);
CV_Assert(input.size[1] == outBlob.size[1]);
int outPlaneTotal = outBlob.size[2]*outBlob.size[3];
for (int i_c = 0; i_c < input.size[1]; i_c++)
{
Mat outPlane = getPlane(outBlob, 0, i_c);
int wh_area = input.size[2]*input.size[3];
const float* inptr = input.ptr<float>(0, i_c);
const float* idxptr = indices.ptr<float>(0, i_c);
float* outptr = outPlane.ptr<float>();
for(int i_wh = 0; i_wh < wh_area; i_wh++)
{
int index = idxptr[i_wh];
if (!(0 <= index && index < outPlaneTotal))
{
std::cerr
<< "i_n=" << i_n << std::endl
<< "i_c=" << i_c << std::endl
<< "i_wh=" << i_wh << std::endl
<< "index=" << index << std::endl
<< "maxval=" << inptr[i_wh] << std::endl
<< "outPlaneTotal=" << outPlaneTotal << std::endl
<< "input.size=" << input.size << std::endl
<< "indices.size=" << indices.size << std::endl
<< "outBlob=" << outBlob.size << std::endl
;
CV_Assert(0 <= index && index < outPlaneTotal);
}
outptr[index] = inptr[i_wh];
}
}
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
cuda4dnn::MaxUnpoolingConfiguration config;
auto& window_size = config.window_size;
window_size.resize(2);
window_size[0] = poolKernel.height;
window_size[1] = poolKernel.width;
auto& strides = config.strides;
strides.resize(2);
strides[0] = poolStride.height;
strides[1] = poolStride.width;
auto& pads_begin = config.pads_begin;
pads_begin.resize(2);
pads_begin[0] = poolPad.height;
pads_begin[1] = poolPad.width;
return make_cuda_node<cuda4dnn::MaxUnpoolingOp>(preferableTarget, std::move(context->stream), config);
}
#endif
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
{
#ifdef HAVE_HALIDE
// Meaningless operation if false because if kernel > stride
// it is not deterministic and if kernel < stride we just
// skip a part of input data (you'd better change your model).
if (poolKernel.width != poolStride.width ||
poolKernel.height != poolStride.height)
CV_Error(cv::Error::StsNotImplemented,
"Halide backend for maximum unpooling "
"is not support cases when kernel != stride");
Halide::Var x("x"), y("y"), c("c"), n("n");
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
Halide::Buffer<float> inputBuffer = halideBuffer(input[0]);
Halide::Buffer<float> indices = halideBuffer(input[1]);
Halide::Expr pooledX = x / poolKernel.width;
Halide::Expr pooledY = y / poolKernel.height;
const int outW = inputBuffer.width() * poolKernel.width;
top(x, y, c, n) = select(y * outW + x == indices(pooledX, pooledY, c, n),
inputBuffer(pooledX, pooledY, c, n), 0.0f);
return Ptr<BackendNode>(new HalideBackendNode(top));
#endif // HAVE_HALIDE
return Ptr<BackendNode>();
}
};
Ptr<MaxUnpoolLayer> MaxUnpoolLayer::create(const LayerParams& params)
{
return Ptr<MaxUnpoolLayer>(new MaxUnpoolLayerImpl(params));
}
}
}

View File

@ -0,0 +1,463 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include "../op_cuda.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#ifdef HAVE_OPENCL
#include "../ocl4dnn/include/math_functions.hpp"
#include "opencl_kernels_dnn.hpp"
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/mvn.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class MVNLayerImpl CV_FINAL : public MVNLayer
{
public:
MVNLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
normVariance = params.get<bool>("normalize_variance", true);
acrossChannels = params.get<bool>("across_channels", false);
eps = params.get<double>("eps", 1e-9);
fuse_batch_norm = false;
fuse_relu = false;
relu_slope = 0.f;
zeroDev = false;
}
Mat scale, shift;
#ifdef HAVE_OPENCL
UMat umat_scale, umat_shift;
#endif
bool fuse_batch_norm;
Ptr<ReLULayer> activ_relu;
float relu_slope;
bool fuse_relu;
bool zeroDev; // TODO: Doesn't considered in Intel's Inference Engine backend.
bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
{
if (!layer.empty() && !fuse_relu && !fuse_batch_norm)
{
layer->getScaleShift(scale, shift);
fuse_batch_norm = !scale.empty() || !shift.empty();
return fuse_batch_norm;
}
if (!layer.empty() && preferableTarget == DNN_TARGET_OPENCL)
{
activ_relu = layer.dynamicCast<ReLULayer>();
if( !activ_relu.empty() )
relu_slope = activ_relu->negativeSlope;
}
fuse_relu = !activ_relu.empty();
return fuse_relu;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> inputs;
inputs_arr.getMatVector(inputs);
int splitDim = (acrossChannels) ? 1 : 2;
int i, newRows = 1;
for( i = 0; i < splitDim; i++ )
newRows *= inputs[0].size[i];
zeroDev = inputs[0].total() == newRows;
#ifdef HAVE_OPENCL
umat_scale.release();
umat_shift.release();
#endif
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
{
bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL;
return !zeroDev && (!isMyriad || eps <= 1e-7f);
}
#endif
#ifdef HAVE_DNN_NGRAPH
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
return true;
#endif
{
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
}
}
#ifdef HAVE_OPENCL
bool fast_forward_ocl(std::vector<UMat> &inputs, std::vector<UMat> &outputs)
{
if (umat_scale.empty() && !scale.empty())
scale.copyTo(umat_scale);
if (umat_shift.empty() && !shift.empty())
shift.copyTo(umat_shift);
UMat& bnorm_weight = umat_scale;
UMat& bnorm_bias = umat_shift;
const unsigned LOCAL_SIZE = 128;
bool use_half = (inputs[0].depth() == CV_16S);
String opts = format(" -DT=%s -DT4=%s -Dconvert_T=%s -DLOCAL_SIZE=%u", use_half ? "half" : "float",
use_half ? "half4" : "float4", use_half ? "convert_half4" : "convert_float4",
LOCAL_SIZE
);
int splitDim = (acrossChannels) ? 1 : 2;
for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
{
UMat &inpMat = inputs[inpIdx];
UMat &outMat = outputs[inpIdx];
int newRows = total(shape(inpMat), 0, splitDim);
CV_Assert(newRows != 0);
MatShape s = shape(newRows, inpMat.total() / newRows);
UMat meanMat = UMat(s[0], 1, (use_half) ? CV_16S : CV_32F);
UMat tmpMat = UMat(s[0], s[1], CV_32F);
float alpha = 1.0f / s[1];
String buildopt = "-DNUM=4" + opts;
ocl::Kernel k("mean_fuse4", ocl::dnn::mvn_oclsrc, buildopt + " -DKERNEL_MEAN_FUSE");
size_t localsize[] = { LOCAL_SIZE };
size_t globalsize[] = { (size_t)s[0] / 4 * localsize[0] };
int argId = 0;
k.set(argId++, ocl::KernelArg::PtrReadOnly(inpMat));
k.set(argId++, (int)s[1]);
k.set(argId++, alpha);
k.set(argId++, ocl::KernelArg::PtrWriteOnly(meanMat));
k.set(argId++, ocl::KernelArg::PtrWriteOnly(tmpMat));
bool ret = k.run(1, globalsize, localsize, false);
if (!ret)
return false;
buildopt += format(" %s %s", (fuse_batch_norm) ? "-DFUSE_BATCH_NORM" : "",
(fuse_relu) ? "-DFUSE_RELU" : "");
ocl::Kernel k1("mvn_fuse4", ocl::dnn::mvn_oclsrc, buildopt + " -DKERNEL_MVN_FUSE");
argId = 0;
k1.set(argId++, ocl::KernelArg::PtrReadOnly(tmpMat));
k1.set(argId++, ocl::KernelArg::PtrReadOnly(inpMat));
k1.set(argId++, ocl::KernelArg::PtrReadOnly(meanMat));
k1.set(argId++, (int)s[1]);
k1.set(argId++, (float)alpha);
k1.set(argId++, (float)eps);
k1.set(argId++, (float)relu_slope);
k1.set(argId++, ocl::KernelArg::PtrReadOnly(bnorm_weight));
k1.set(argId++, ocl::KernelArg::PtrReadOnly(bnorm_bias));
k1.set(argId++, ocl::KernelArg::PtrWriteOnly(outMat));
ret = k1.run_(1, globalsize, localsize, false);
if (!ret)
return false;
}
return true;
}
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
{
if (umat_scale.empty() && !scale.empty())
scale.copyTo(umat_scale);
if (umat_shift.empty() && !shift.empty())
shift.copyTo(umat_shift);
UMat& bnorm_weight = umat_scale;
UMat& bnorm_bias = umat_shift;
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs);
int splitDim = (acrossChannels) ? 1 : 2;
int row_size = total(shape(inputs[0]), 0, splitDim);
int plane_size = total(shape(inputs[0]), splitDim);
if (normVariance && (row_size % 4 == 0) && (plane_size % 4 == 0))
return fast_forward_ocl(inputs, outputs);
if (inputs[0].depth() == CV_16S)
return false;
String opts = format(" -DT=float -DT4=float4 -Dconvert_T=convert_float4");
for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
{
UMat &inpMat = inputs[inpIdx];
UMat &outMat = outputs[inpIdx];
int newRows = total(shape(inpMat), 0, splitDim);
CV_Assert(newRows != 0);
MatShape s = shape(newRows, inpMat.total() / newRows);
UMat oneMat = UMat::ones(s[1], 1, CV_32F);
UMat meanMat = UMat(s[0], 1, CV_32F);
UMat devMat = UMat(s[0], 1, CV_32F);
UMat tmpMat = UMat(s[0], s[1], CV_32F);
float alpha = 1.0f / s[1];
bool ret = ocl4dnn::ocl4dnnGEMV<float>(ocl4dnn::CblasNoTrans, s[0], s[1], alpha,
inpMat, 0, oneMat, 0, 0.0f, meanMat, 0);
if (!ret)
return false;
int number = (s[1] % 8 == 0) ? 8 : ((s[1] % 4 == 0) ? 4 : 1);
size_t global[] = { (size_t)s[0], (size_t)(s[1] / number) };
String buildopt = format("-DNUM=%d", number) + opts;
if (normVariance)
{
String kname = format("calc_mean%d", number);
ocl::Kernel kernel(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt + " -DKERNEL_MEAN");
if (kernel.empty())
return false;
kernel.set(0, ocl::KernelArg::PtrReadOnly(inpMat));
kernel.set(1, (int)s[0]);
kernel.set(2, (int)s[1]);
kernel.set(3, ocl::KernelArg::PtrReadOnly(meanMat));
kernel.set(4, ocl::KernelArg::PtrWriteOnly(tmpMat));
ret = kernel.run(2, global, NULL, false);
if (!ret)
return false;
ret = ocl4dnn::ocl4dnnGEMV<float>(ocl4dnn::CblasNoTrans, s[0], s[1], alpha,
tmpMat, 0, oneMat, 0, 0.0f, devMat, 0);
if (!ret)
return false;
}
String kname = format("mvn%d", number);
buildopt += format("%s%s%s -DKERNEL_MVN", (normVariance) ? " -DNORM_VARIANCE" : "",
(fuse_batch_norm) ? " -DFUSE_BATCH_NORM" : "",
(fuse_relu) ? " -DFUSE_RELU" : "");
ocl::Kernel kernel1(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt);
if (kernel1.empty())
return false;
kernel1.set(0, ocl::KernelArg::PtrReadOnly(inpMat));
kernel1.set(1, (int)s[0]);
kernel1.set(2, (int)s[1]);
kernel1.set(3, (float)eps);
kernel1.set(4, ocl::KernelArg::PtrReadOnly(meanMat));
kernel1.set(5, ocl::KernelArg::PtrReadOnly(devMat));
kernel1.set(6, ocl::KernelArg::PtrReadOnly(bnorm_weight));
kernel1.set(7, ocl::KernelArg::PtrReadOnly(bnorm_bias));
kernel1.set(8, (int)inpMat.size[1]);
kernel1.set(9, (float)relu_slope);
kernel1.set(10, ocl::KernelArg::PtrWriteOnly(outMat));
ret = kernel1.run(2, global, NULL, false);
if (!ret)
return false;
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs, internals;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
internals_arr.getMatVector(internals);
for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
{
Mat &inpBlob = inputs[inpIdx];
Mat &outBlob = outputs[inpIdx];
int splitDim = (acrossChannels) ? 1 : 2;
int i, newRows = 1;
for( i = 0; i < splitDim; i++ )
newRows *= inpBlob.size[i];
Mat inpMat = inpBlob.reshape(1, newRows);
Mat outMat = outBlob.reshape(1, newRows);
if ( inpBlob.total() == newRows )
{
// MVN is applied to single values at an every row.
if (shift.empty())
{
outBlob.setTo(0);
}
else
{
for ( i = 0; i < newRows; i++ )
{
outMat.row(i).setTo(((float*)shift.data)[i]);
}
}
return;
}
Scalar mean, dev;
for ( i = 0; i < newRows; i++)
{
Mat inpRow = inpMat.row(i);
Mat outRow = outMat.row(i);
float weight = 1.f;
float bias = 0.f;
if (fuse_batch_norm)
{
weight = i < scale.cols ? ((float*)scale.data)[i] : weight;
bias = i < shift.cols ? ((float*)shift.data)[i] : bias;
}
cv::meanStdDev(inpRow, mean, (normVariance) ? dev : noArray());
double alpha = 1;
if (normVariance)
{
alpha = 1 / std::sqrt(eps + dev[0]*dev[0]);
}
double normalizationScale = 1.0;
double normalizationShift = 0.0;
if (fuse_batch_norm)
{
normalizationScale = alpha * weight;
normalizationShift = -mean[0] * normalizationScale + bias;
}
else
{
normalizationScale = alpha;
normalizationShift = -mean[0] * alpha;
}
inpRow.convertTo(outRow, outRow.type(), normalizationScale, normalizationShift);
}
}
}
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
InferenceEngine::Builder::MVNLayer ieLayer(name);
ieLayer.setAcrossChannels(acrossChannels);
ieLayer.setNormalize(normVariance);
ieLayer.setEpsilon(eps);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2021_2)
auto mvn = std::make_shared<ngraph::op::MVN>(ieInpNode, acrossChannels, normVariance, eps);
#else
int64_t start_axis = acrossChannels ? 1 : 2;
std::vector<int64_t> axes_v(ieInpNode->get_shape().size() - start_axis);
std::iota(axes_v.begin(), axes_v.end(), start_axis);
auto axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{axes_v.size()}, axes_v.data());
auto mvn = std::make_shared<ngraph::op::v6::MVN>(ieInpNode, axes, normVariance, eps, ngraph::op::MVNEpsMode::INSIDE_SQRT);
#endif
return Ptr<BackendNode>(new InfEngineNgraphNode(mvn));
}
#endif // HAVE_DNN_NGRAPH
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
cuda4dnn::MVNConfiguration config;
config.split_axis = acrossChannels ? 1 : 2;
config.normalize_variance = normVariance;
config.epsilon = eps;
config.input_shapes.resize(inputs.size());
for (int i = 0; i < inputs.size(); i++)
{
auto wrapper = inputs[i].dynamicCast<CUDABackendWrapper>();
auto shape = wrapper->getShape();
config.input_shapes[i].assign(std::begin(shape), std::end(shape));
}
return make_cuda_node<cuda4dnn::MVNOp>(preferableTarget, std::move(context->stream), config);
}
#endif
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
CV_UNUSED(outputs); // suppress unused variable warning
long flops = 0;
for(int i = 0; i < inputs.size(); i++)
{
flops += 6*total(inputs[i]) + 3*total(inputs[i], 0, normVariance ? 2 : 1);
}
return flops;
}
};
Ptr<MVNLayer> MVNLayer::create(const LayerParams& params)
{
return Ptr<MVNLayer>(new MVNLayerImpl(params));
}
}
}

View File

@ -0,0 +1,402 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/normalize_bbox.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv { namespace dnn {
class NormalizeBBoxLayerImpl CV_FINAL : public NormalizeBBoxLayer
{
public:
NormalizeBBoxLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
pnorm = params.get<float>("p", 2);
epsilon = params.get<float>("eps", 1e-10f);
acrossSpatial = params.get<bool>("across_spatial", true);
startAxis = params.get<int>("start_axis", 1);
CV_Assert(!params.has("across_spatial") || !params.has("end_axis"));
endAxis = params.get<int>("end_axis", acrossSpatial ? -1 : startAxis);
CV_Assert(pnorm > 0);
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
if (pnorm != 2)
return false;
bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL;
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && isMyriad)
return !acrossSpatial;
return startAxis == 1;
}
return backendId == DNN_BACKEND_OPENCV ||
(backendId == DNN_BACKEND_CUDA && (pnorm == 1 || pnorm == 2));
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() == 1);
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
internals.resize(1, inputs[0]);
internals[0][0] = 1; // Batch size.
return true;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> inputs;
inputs_arr.getMatVector(inputs);
CV_Assert(inputs.size() == 1);
endAxis = endAxis == -1 ? (inputs[0].dims - 1) : endAxis;
startAxis = startAxis == -1 ? (inputs[0].dims - 1) : startAxis;
acrossSpatial = (startAxis == 1 && endAxis == inputs[0].dims - 1);
}
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
std::vector<UMat> internals;
if (inputs_.depth() == CV_16S)
return false;
inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs);
internals_.getUMatVector(internals);
CV_Assert(inputs.size() == 1 && outputs.size() == 1);
CV_Assert(inputs[0].total() == outputs[0].total());
const UMat& inp0 = inputs[0];
UMat& buffer = internals[0];
startAxis = normalize_axis(startAxis, inp0.dims);
endAxis = normalize_axis(endAxis, inp0.dims);
size_t num = total(shape(inp0.size), 0, startAxis);
size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1);
size_t planeSize = inp0.total() / (num * numPlanes);
MatShape s = shape(1, inputs[0].total());
UMat inp = inputs[0].reshape(1, s.size(), &s[0]).reshape(1, num);
UMat out = outputs[0].reshape(1, s.size(), &s[0]).reshape(1, num);
for (size_t i = 0; i < num; ++i)
{
s = shape(numPlanes, planeSize);
UMat src = inp.row(i).reshape(1, s.size(), &s[0]);
UMat dst = out.row(i).reshape(1, s.size(), &s[0]);
UMat abs_mat;
absdiff(src, cv::Scalar::all(0), abs_mat);
pow(abs_mat, pnorm, buffer);
if (planeSize == 1)
{
// add eps to avoid overflow
float absSum = sum(buffer)[0] + epsilon;
float norm = pow(absSum, 1.0f / pnorm);
multiply(src, 1.0f / norm, dst);
}
else
{
Mat norm;
reduce(buffer, norm, 0, REDUCE_SUM);
norm += epsilon;
// compute inverted norm to call multiply instead divide
cv::pow(norm, -1.0f / pnorm, norm);
repeat(norm, numPlanes, 1, buffer);
multiply(src, buffer, dst);
}
if (!blobs.empty())
{
// scale the output
Mat scale = blobs[0];
if (scale.total() == 1)
{
// _scale: 1 x 1
multiply(dst, scale.at<float>(0, 0), dst);
}
else
{
// _scale: _channels x 1
CV_Assert(scale.total() == numPlanes);
repeat(scale, 1, dst.cols, buffer);
multiply(dst, buffer, dst);
}
}
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs, internals;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
internals_arr.getMatVector(internals);
CV_Assert(inputs.size() == 1 && outputs.size() == 1);
CV_Assert(inputs[0].total() == outputs[0].total());
const Mat& inp0 = inputs[0];
Mat& buffer = internals[0];
startAxis = normalize_axis(startAxis, inp0.dims);
endAxis = normalize_axis(endAxis, inp0.dims);
const float* inpData = inp0.ptr<float>();
float* outData = outputs[0].ptr<float>();
size_t num = total(shape(inp0.size), 0, startAxis);
size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1);
CV_Assert(num * numPlanes != 0);
size_t planeSize = inp0.total() / (num * numPlanes);
for (size_t n = 0; n < num; ++n)
{
Mat src = Mat(numPlanes, planeSize, CV_32F, (void*)inpData);
Mat dst = Mat(numPlanes, planeSize, CV_32F, (void*)outData);
cv::pow(abs(src), pnorm, buffer);
if (planeSize == 1)
{
// add eps to avoid overflow
float absSum = sum(buffer)[0] + epsilon;
float norm = pow(absSum, 1.0f / pnorm);
multiply(src, 1.0f / norm, dst);
}
else
{
Mat norm;
reduce(buffer, norm, 0, REDUCE_SUM);
norm += epsilon;
// compute inverted norm to call multiply instead divide
cv::pow(norm, -1.0f / pnorm, norm);
repeat(norm, numPlanes, 1, buffer);
multiply(src, buffer, dst);
}
if (!blobs.empty())
{
// scale the output
Mat scale = blobs[0];
if (scale.total() == 1)
{
// _scale: 1 x 1
dst *= scale.at<float>(0, 0);
}
else
{
// _scale: _channels x 1
CV_Assert(scale.total() == numPlanes);
repeat(scale, 1, dst.cols, buffer);
multiply(dst, buffer, dst);
}
}
inpData += numPlanes * planeSize;
outData += numPlanes * planeSize;
}
}
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
std::vector<size_t> dims = input->getDims();
if (dims.size() == 4)
{
InferenceEngine::Builder::NormalizeLayer ieLayer(name);
ieLayer.setChannelShared(false);
ieLayer.setAcrossMaps(acrossSpatial);
ieLayer.setEpsilon(epsilon);
InferenceEngine::Builder::Layer l = ieLayer;
const int numChannels = dims[1];
InferenceEngine::Blob::Ptr weights;
if (blobs.empty())
{
weights = InferenceEngine::make_shared_blob<float>({
InferenceEngine::Precision::FP32,
{(size_t)numChannels}, InferenceEngine::Layout::C
});
weights->allocate();
Mat weightsMat = infEngineBlobToMat(weights).reshape(1, numChannels);
Mat(numChannels, 1, CV_32F, Scalar(1)).copyTo(weightsMat);
l.getParameters()["channel_shared"] = false;
}
else
{
CV_Assert(numChannels == blobs[0].total());
weights = wrapToInfEngineBlob(blobs[0], {(size_t)numChannels}, InferenceEngine::Layout::C);
l.getParameters()["channel_shared"] = blobs[0].total() == 1;
}
addConstantData("weights", weights, l);
l.getParameters()["across_spatial"] = acrossSpatial;
return Ptr<BackendNode>(new InfEngineBackendNode(l));
}
else
{
InferenceEngine::Builder::GRNLayer ieLayer(name);
ieLayer.setBeta(epsilon);
InferenceEngine::Builder::Layer l = ieLayer;
l.getParameters()["bias"] = epsilon;
return Ptr<BackendNode>(new InfEngineBackendNode(l));
}
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
const size_t batch = ieInpNode->get_shape()[0];
const size_t numChannels = ieInpNode->get_shape()[1];
std::vector<int64_t> axes_data;
if (!acrossSpatial) {
axes_data.push_back(1);
} else {
axes_data.resize(ieInpNode->get_shape().size() - 1);
std::iota(axes_data.begin(), axes_data.end(), 1);
}
auto axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{axes_data.size()}, axes_data);
auto norm = std::make_shared<ngraph::op::v0::NormalizeL2>(ieInpNode, axes, epsilon, ngraph::op::EpsMode::ADD);
CV_Assert(blobs.empty() || numChannels == blobs[0].total());
std::vector<size_t> shape(ieInpNode->get_shape().size(), 1);
shape[0] = blobs.empty() ? 1 : batch;
shape[1] = numChannels;
if (!blobs.empty())
{
auto weight = std::make_shared<ngraph::op::Constant>(
ngraph::element::f32, ngraph::Shape(shape), blobs[0].data);
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2021_2)
auto mul = std::make_shared<ngraph::op::v1::Multiply>(norm, weight, ngraph::op::AutoBroadcastType::NUMPY);
#else
auto mul = std::make_shared<ngraph::op::v0::Multiply>(norm, weight, ngraph::op::AutoBroadcastType::NUMPY);
#endif
return Ptr<BackendNode>(new InfEngineNgraphNode(mul));
}
return Ptr<BackendNode>(new InfEngineNgraphNode(norm));
}
#endif // HAVE_DNN_NGRAPH
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
if(pnorm != 1 && pnorm != 2)
CV_Error(Error::StsNotImplemented, "Unsupported normalization mode");
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
auto input_shape = input_wrapper->getShape();
NormalizeConfiguration<float> config;
config.input_shape.assign(std::begin(input_shape), std::end(input_shape));
config.axis_start = normalize_axis(startAxis, input_shape.size());
config.axis_end = normalize_axis(endAxis, input_shape.size()) + 1; /* +1 because NormalizeOp follows [start, end) convention */
config.norm = pnorm;
config.eps = epsilon;
const auto& weightsMat = blobs.empty() ? Mat() : blobs[0];
return make_cuda_node<cuda4dnn::NormalizeOp>(preferableTarget, std::move(context->stream), weightsMat, config);
}
#endif
private:
int startAxis, endAxis;
};
Ptr<NormalizeBBoxLayer> NormalizeBBoxLayer::create(const LayerParams &params)
{
return Ptr<NormalizeBBoxLayer>(new NormalizeBBoxLayerImpl(params));
}
}
}

View File

@ -0,0 +1,194 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "../precomp.hpp"
#include "../dnn_common.hpp"
namespace cv { namespace dnn {
CV__DNN_INLINE_NS_BEGIN
namespace detail {
class NotImplementedImpl CV_FINAL : public NotImplemented
{
public:
NotImplementedImpl(const LayerParams& params)
{
setParamsFrom(params);
CV_Assert(params.has("type"));
std::stringstream ss;
ss << "Node for layer '" << params.name << "' of type '" << params.get("type") << "' wasn't initialized.";
msg = ss.str();
}
CV_DEPRECATED_EXTERNAL
virtual void finalize(const std::vector<Mat*> &input, std::vector<Mat> &output) CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, msg);
}
virtual void finalize(InputArrayOfArrays inputs, OutputArrayOfArrays outputs) CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, msg);
}
CV_DEPRECATED_EXTERNAL
virtual void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals) CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, msg);
}
virtual void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, msg);
}
void forward_fallback(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals)
{
CV_Error(Error::StsNotImplemented, msg);
}
CV_DEPRECATED_EXTERNAL
void finalize(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs)
{
CV_Error(Error::StsNotImplemented, msg);
}
CV_DEPRECATED std::vector<Mat> finalize(const std::vector<Mat> &inputs)
{
CV_Error(Error::StsNotImplemented, msg);
}
CV_DEPRECATED void run(const std::vector<Mat> &inputs,
CV_OUT std::vector<Mat> &outputs,
CV_IN_OUT std::vector<Mat> &internals)
{
CV_Error(Error::StsNotImplemented, msg);
}
virtual int inputNameToIndex(String inputName) CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, msg);
}
virtual int outputNameToIndex(const String& outputName) CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, msg);
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, msg);
}
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, msg);
}
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, msg);
}
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, msg);
}
virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, msg);
}
virtual Ptr<BackendNode> initCUDA(
void *context,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, msg);
}
virtual void applyHalideScheduler(Ptr<BackendNode>& node,
const std::vector<Mat*> &inputs,
const std::vector<Mat> &outputs,
int targetId) const CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, msg);
}
virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node) CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, msg);
}
virtual bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, msg);
}
virtual bool tryFuse(Ptr<Layer>& top) CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, msg);
}
virtual void getScaleShift(Mat& scale, Mat& shift) const CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, msg);
}
virtual void unsetAttached() CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, msg);
}
virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, msg);
}
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, msg);
}
virtual bool updateMemoryShapes(const std::vector<MatShape> &inputs) CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, msg);
}
private:
std::string msg;
};
Ptr<Layer> NotImplemented::create(const LayerParams& params)
{
return makePtr<NotImplementedImpl>(params);
}
Ptr<Layer> notImplementedRegisterer(LayerParams &params)
{
return detail::NotImplemented::create(params);
}
void NotImplemented::Register()
{
LayerFactory::registerLayer("NotImplemented", detail::notImplementedRegisterer);
}
void NotImplemented::unRegister()
{
LayerFactory::unregisterLayer("NotImplemented");
}
} // namespace detail
CV__DNN_INLINE_NS_END
}} // namespace cv::dnn

View File

@ -0,0 +1,293 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
/*
Implementation of padding layer, which adds paddings to input blob.
*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_halide.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include <vector>
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/padding.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class PaddingLayerImpl CV_FINAL : public PaddingLayer
{
public:
PaddingLayerImpl(const LayerParams &params)
{
setParamsFrom(params);
paddingValue = params.get<float>("value", 0);
inputDims = params.get<int>("input_dims", -1);
paddingType = params.get<String>("type", "constant");
CV_Assert(params.has("paddings"));
const DictValue& paddingsParam = params.get("paddings");
CV_Assert((paddingsParam.size() & 1) == 0);
paddings.resize(paddingsParam.size() / 2);
for (int i = 0; i < paddings.size(); ++i)
{
paddings[i].first = paddingsParam.get<int>(i * 2); // Pad before.
paddings[i].second = paddingsParam.get<int>(i * 2 + 1); // Pad after.
CV_Assert_N(paddings[i].first >= 0, paddings[i].second >= 0);
}
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() == 1);
const MatShape& inpShape = inputs[0];
CV_Assert(inpShape.size() >= paddings.size());
CV_Assert(inputDims == -1 || inpShape.size() == inputDims || inpShape.size() > paddings.size());
outputs.resize(1, inpShape);
int offset = (inputDims == -1 ? 0 : (inpShape.size() > inputDims ? 1 : 0));
for (int i = 0; i < paddings.size(); ++i)
{
outputs[0][offset + i] = inpShape[offset + i] + paddings[i].first + paddings[i].second;
}
return false;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> inputs;
inputs_arr.getMatVector(inputs);
// Compute dstRanges.
const MatSize& inpShape = inputs[0].size;
if (inputDims != -1 && inputs[0].dims != inputDims)
{
paddings.insert(paddings.begin(), std::make_pair(0, 0));
}
dstRanges.resize(paddings.size());
for (int i = 0; i < paddings.size(); ++i)
{
dstRanges[i].start = paddings[i].first;
dstRanges[i].end = paddings[i].first + inpShape[i];
}
// Add the rest of dimensions.
for (int i = dstRanges.size(); i < inputs[0].dims; ++i)
{
dstRanges.push_back(Range::all());
paddings.push_back(std::make_pair(0, 0));
}
inputDims = -1; // Next time paddings are filled for all the dimensions.
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL;
if (INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) && isMyriad)
return dstRanges.size() == 4 && paddings[0].first == 0 && paddings[0].second == 0;
return (dstRanges.size() <= 4 || !isArmComputePlugin());
}
#endif
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
(backendId == DNN_BACKEND_HALIDE && haveHalide() && dstRanges.size() == 4);
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
if (paddingType == "constant")
{
if (inputs_arr.depth() == CV_16S)
{
std::vector<float> paddingValue_fp32(1, paddingValue);
std::vector<int16_t> paddingValue_fp16(1);
cv::convertFp16(paddingValue_fp32, paddingValue_fp16);
outputs[0].setTo(paddingValue_fp16[0]);
}
else if (inputs_arr.depth() == CV_8S)
outputs[0].setTo(saturate_cast<int8_t>(paddingValue));
else
outputs[0].setTo(paddingValue);
inputs[0].copyTo(outputs[0](dstRanges));
}
else if (paddingType == "reflect")
{
CV_Assert(inputs.size() == 1);
CV_Assert(outputs.size() == 1);
CV_Assert(inputs[0].dims == 4);
CV_Assert(outputs[0].dims == 4);
if (inputs[0].size[0] != outputs[0].size[0] || inputs[0].size[1] != outputs[0].size[1])
CV_Error(Error::StsNotImplemented, "Only spatial reflection padding is supported.");
const int inpHeight = inputs[0].size[2];
const int inpWidth = inputs[0].size[3];
const int outHeight = outputs[0].size[2];
const int outWidth = outputs[0].size[3];
const int padTop = dstRanges[2].start;
const int padBottom = outHeight - dstRanges[2].end;
const int padLeft = dstRanges[3].start;
const int padRight = outWidth - dstRanges[3].end;
CV_CheckLT(padTop, inpHeight, ""); CV_CheckLT(padBottom, inpHeight, "");
CV_CheckLT(padLeft, inpWidth, ""); CV_CheckLT(padRight, inpWidth, "");
for (size_t n = 0; n < inputs[0].size[0]; ++n)
{
for (size_t ch = 0; ch < inputs[0].size[1]; ++ch)
{
copyMakeBorder(getPlane(inputs[0], n, ch),
getPlane(outputs[0], n, ch),
padTop, padBottom, padLeft, padRight,
BORDER_REFLECT_101);
}
}
}
else
CV_Error(Error::StsNotImplemented, "Unknown padding type: " + paddingType);
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
cuda4dnn::PaddingType ptype;
if (paddingType == "constant")
ptype = PaddingType::CONSTANT;
else if (paddingType == "reflect")
ptype = PaddingType::REFLECTION101;
else
CV_Error(Error::StsNotImplemented, "Unsupported padding mode");
return make_cuda_node<cuda4dnn::PaddingOp>(preferableTarget, std::move(context->stream), ptype, paddingValue, dstRanges);
}
#endif
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
#ifdef HAVE_HALIDE
int inW, inH, inC, inN;
int minN = std::max(dstRanges[0].start, 0);
int minC = std::max(dstRanges[1].start, 0);
int minY = std::max(dstRanges[2].start, 0);
int minX = std::max(dstRanges[3].start, 0);
Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
getCanonicalSize(inputBuffer, &inW, &inH, &inC, &inN);
Halide::Var x("x"), y("y"), c("c"), n("n");
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
Halide::Func padded =
Halide::BoundaryConditions::constant_exterior(inputBuffer, paddingValue);
top(x, y, c, n) = padded(x - minX, y - minY, c - minC, n - minN);
return Ptr<BackendNode>(new HalideBackendNode(top));
#endif // HAVE_HALIDE
return Ptr<BackendNode>();
}
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
InferenceEngine::Builder::Layer ieLayer(name);
ieLayer.setName(name);
ieLayer.setType("Pad");
std::vector<int> begins(paddings.size(), 0), ends(paddings.size(), 0);
for (int i = 0; i < paddings.size(); ++i)
{
begins[i] = paddings[i].first;
ends[i] = paddings[i].second;
}
ieLayer.getParameters()["pads_begin"] = begins;
ieLayer.getParameters()["pads_end"] = ends;
ieLayer.getParameters()["pad_mode"] = paddingType;
if (paddingType == "constant")
ieLayer.getParameters()["pad_value"] = paddingValue;
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(1));
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
std::vector<int64_t> begins(paddings.size(), 0), ends(paddings.size(), 0);
for (int i = 0; i < paddings.size(); ++i)
{
begins[i] = static_cast<int64_t>(paddings[i].first);
ends[i] = static_cast<int64_t>(paddings[i].second);
}
auto padding_below = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{begins.size()}, begins.data());
auto padding_above = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{ends.size()}, ends.data());
auto pad_mode = paddingType == "constant" ? ngraph::op::PadMode::CONSTANT : ngraph::op::PadMode::REFLECT; // SYMMETRIC
auto arg_pad_value = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{}, &paddingValue);;
auto pad = paddingType == "constant" ?
std::make_shared<ngraph::op::v1::Pad>(ieInpNode, padding_below, padding_above, arg_pad_value, pad_mode) :
std::make_shared<ngraph::op::v1::Pad>(ieInpNode, padding_below, padding_above, pad_mode);
return Ptr<BackendNode>(new InfEngineNgraphNode(pad));
}
#endif
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
{
float outputScale = scales[1][0];
int outputZp = zeropoints[1][0];
float padValue = outputZp + std::round(params.get<float>("value", 0)/outputScale);
params.set("value", padValue);
return true;
}
private:
std::vector<std::pair<int, int> > paddings; // Pairs pad before, pad after.
std::vector<Range> dstRanges;
int inputDims;
float paddingValue;
std::string paddingType;
};
Ptr<PaddingLayer> PaddingLayer::create(const LayerParams &params)
{
return Ptr<PaddingLayer>(new PaddingLayerImpl(params));
}
}
}

View File

@ -0,0 +1,494 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include "../op_vkcom.hpp"
#include <float.h>
#include <algorithm>
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/permute.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class PermuteLayerImpl CV_FINAL : public PermuteLayer
{
public:
void checkNeedForPermutation()
{
_needsPermute = false;
for (size_t i = 0; i < _numAxes; ++i)
{
if (_order[i] != i)
{
_needsPermute = true;
break;
}
}
}
PermuteLayerImpl(const LayerParams &params)
: _count(0), _needsPermute(false), _numAxes(0)
{
if (!params.has("order"))
{
return;
}
DictValue paramOrder = params.get("order");
_numAxes = paramOrder.size();
for (size_t i = 0; i < _numAxes; i++)
{
int currentOrder = paramOrder.get<int>(i);
if (currentOrder < 0 || currentOrder > _numAxes)
{
CV_Error(Error::StsBadArg,
format("Orders of dimensions in Permute layer parameter"
"must be in [0...%zu]", _numAxes - 1));
}
if (std::find(_order.begin(), _order.end(), currentOrder) != _order.end())
{
CV_Error(Error::StsBadArg,
"Permute layer parameter contains duplicated orders.");
}
_order.push_back(currentOrder);
}
setParamsFrom(params);
checkNeedForPermutation();
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && preferableTarget == DNN_TARGET_CPU)
return _order.size() <= 4 || !isArmComputePlugin();
#endif
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine()) ||
(backendId == DNN_BACKEND_VKCOM && haveVulkan());
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
if(!_needsPermute)
{
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
return true;
}
CV_Assert(inputs.size() > 0);
CV_Assert((int)_numAxes == inputs[0].size());
MatShape shapeBefore = inputs[0], shapeAfter;
for (size_t i = 0; i < _numAxes; i++)
{
shapeAfter.push_back(shapeBefore[_order[i]]);
}
outputs.clear();
for (size_t i = 0; i < inputs.size(); i++)
{
CV_Assert(total(inputs[i]) == total(shapeAfter));
outputs.push_back(shapeAfter);
}
return false;
}
void computeStrides(const MatShape &shapeBefore, const MatShape &shapeAfter)
{
_oldStride.resize(_numAxes);
_newStride.resize(_numAxes);
_oldStride[_numAxes - 1] = 1;
_newStride[_numAxes - 1] = 1;
for(int i = _numAxes - 2; i >= 0; i--)
{
_oldStride[i] = _oldStride[i + 1] * shapeBefore[i + 1];
_newStride[i] = _newStride[i + 1] * shapeAfter[i + 1];
}
_count = _oldStride[0] * shapeBefore[0];
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
{
if(!_needsPermute)
{
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
CV_Assert(inputs.size() > 0);
const Mat& inp0 = inputs[0];
CV_Assert((int)_numAxes == inp0.dims);
computeStrides(shape(inputs[0]), shape(outputs[0]));
#ifdef HAVE_OPENCL
uorder.release();
uold_stride.release();
unew_stride.release();
#endif
}
template <class T>
class PermuteInvoker : public ParallelLoopBody
{
public:
const Mat* inp;
Mat* out;
const std::vector<size_t>* order;
int nstripes;
static void run(const Mat& inp, Mat& out, const std::vector<size_t>& order, int nstripes)
{
PermuteInvoker p;
p.inp = &inp;
p.out = &out;
p.order = &order;
p.nstripes = nstripes;
CV_Assert( out.size[0] == inp.size[order[0]] &&
out.size[1] == inp.size[order[1]] &&
out.size[2] == inp.size[order[2]] &&
out.size[3] == inp.size[order[3]]);
parallel_for_(Range(0, nstripes), p, nstripes);
}
PermuteInvoker() : inp(0), out(0), order(0), nstripes(0) {}
void operator()(const Range& r) const CV_OVERRIDE
{
int n0 = out->size[0], n1 = out->size[1], n2 = out->size[2], n3 = out->size[3];
size_t orows = (size_t)n0*n1*n2;
size_t stripeSize = (orows + nstripes - 1)/nstripes;
size_t stripeStart = r.start*stripeSize;
size_t stripeEnd = std::min(r.end*stripeSize, orows);
const size_t esz = sizeof(T);
size_t ostep0 = out->step[0]/esz, ostep1 = out->step[1]/esz, ostep2 = out->step[2]/esz;
const size_t* ord = &order->at(0);
size_t istep0 = inp->step[ord[0]]/esz, istep1 = inp->step[ord[1]]/esz,
istep2 = inp->step[ord[2]]/esz, istep3 = inp->step[ord[3]]/esz;
size_t val = stripeStart;
int i2 = (int)(val % n2);
val /= n2;
int i1 = (int)(val % n1);
int i0 = (int)(val / n1);
const T* inptr_orig = inp->ptr<T>();
T* outptr_orig = out->ptr<T>();
for( size_t ofs = stripeStart; ofs < stripeEnd; ofs++ )
{
const T* inptr = inptr_orig + i0*istep0 + i1*istep1 + i2*istep2;
T* outptr = outptr_orig + i0*ostep0 + i1*ostep1 + i2*ostep2;
for( int i3 = 0; i3 < n3; i3++ )
outptr[i3] = inptr[i3*istep3];
if( ++i2 >= n2 )
{
i2 = 0;
if( ++i1 >= n1 )
{
i1 = 0;
if( ++i0 >= n0 )
break;
}
}
}
}
};
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
if (!_needsPermute)
return false;
if (uorder.empty())
{
std::vector<int> orderVec(_order.begin(), _order.end());;
Mat morder(1, orderVec.size(), CV_32SC1, &orderVec[0]);
std::vector<int> oldStrideVec(_oldStride.begin(), _oldStride.end());
Mat mold_stride(1, _oldStride.size(), CV_32SC1, &oldStrideVec[0]);
std::vector<int> newStrideVec(_newStride.begin(), _newStride.end());
Mat mnew_stride(1, newStrideVec.size(), CV_32SC1, &newStrideVec[0]);
morder.copyTo(uorder);
mold_stride.copyTo(uold_stride);
mnew_stride.copyTo(unew_stride);
}
bool use_half = (inps.depth() == CV_16S);
String opts = format("-DDtype=%s", use_half ? "half" : "float");
for (size_t i = 0; i < inputs.size(); i++)
{
ocl::Kernel kernel("permute", ocl::dnn::permute_oclsrc, opts);
kernel.set(0, (int)_count);
kernel.set(1, ocl::KernelArg::PtrReadOnly(inputs[i]));
kernel.set(2, ocl::KernelArg::PtrReadOnly(uorder));
kernel.set(3, ocl::KernelArg::PtrReadOnly(uold_stride));
kernel.set(4, ocl::KernelArg::PtrReadOnly(unew_stride));
kernel.set(5, (int)_numAxes);
kernel.set(6, ocl::KernelArg::PtrWriteOnly(outputs[i]));
if (!kernel.run(1, &_count, NULL, false))
return false;
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
inputs_arr.depth() != CV_8S,
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
size_t k, ninputs = inputs.size();
if(!_needsPermute)
{
for (k = 0; k < ninputs; k++)
{
CV_Assert(outputs[k].total() == inputs[k].total());
if (outputs[k].data != inputs[k].data)
inputs[k].copyTo(outputs[k]);
}
}
else
{
size_t i, j, count = _count, numAxes = _numAxes;
const size_t* newStride = &_newStride[0];
const size_t* oldStride = &_oldStride[0];
const size_t* order = &_order[0];
for (k = 0; k < ninputs; k++)
{
const Mat& inp = inputs[k];
Mat& out = outputs[k];
CV_Assert(inp.dims == numAxes && inp.size == inputs[0].size);
CV_Assert(out.dims == numAxes && out.size == outputs[0].size);
CV_Assert(inp.isContinuous() && out.isContinuous());
// CV_Assert(inp.type() == CV_32F && out.type() == CV_32F);
if( numAxes == 4 )
{
int nstripes = getNumThreads();
if (inp.type() == CV_8S)
PermuteInvoker<int8_t>::run(inp, out, _order, nstripes);
else
PermuteInvoker<float>::run(inp, out, _order, nstripes);
}
else
{
if (inp.type() == CV_8S)
{
const int8_t *srcData = inp.ptr<int8_t>();
int8_t *dstData = out.ptr<int8_t>();
for (i = 0; i < count; ++i)
{
size_t oldPosition = 0;
size_t newPosition = i;
for (j = 0; j < numAxes; ++j)
{
oldPosition += (newPosition / newStride[j]) * oldStride[order[j]];
newPosition %= newStride[j];
}
dstData[i] = srcData[oldPosition];
}
}
else
{
const float *srcData = inp.ptr<float>();
float *dstData = out.ptr<float>();
for (i = 0; i < count; ++i)
{
size_t oldPosition = 0;
size_t newPosition = i;
for (j = 0; j < numAxes; ++j)
{
oldPosition += (newPosition / newStride[j]) * oldStride[order[j]];
newPosition %= newStride[j];
}
dstData[i] = srcData[oldPosition];
}
}
}
}
}
}
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
InferenceEngine::Builder::PermuteLayer ieLayer(name);
ieLayer.setOrder(_order);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
std::vector<int64_t> order(_order.begin(), _order.end());
auto tr_axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
ngraph::Shape({order.size()}), order.data());
auto transpose = std::make_shared<ngraph::op::Transpose>(ieInpNode, tr_axes);
return Ptr<BackendNode>(new InfEngineNgraphNode(transpose));
}
#endif // HAVE_DNN_NGRAPH
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
return make_cuda_node<cuda4dnn::PermuteOp>(preferableTarget, std::move(context->stream), _order);
}
#endif
#ifdef HAVE_VULKAN
virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
{
CV_Assert(!_order.empty());
std::shared_ptr<vkcom::OpBase> op(new vkcom::OpPermute(_order));
return Ptr<BackendNode>(new VkComBackendNode(input, op));
}
#endif // HAVE_VULKAN
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
{
return true;
}
size_t _count;
std::vector<size_t> _order;
std::vector<int> _oldDimensionSize;
std::vector<int> _newDimensionSize;
std::vector<size_t> _oldStride;
std::vector<size_t> _newStride;
bool _needsPermute;
#ifdef HAVE_OPENCL
UMat uorder, uold_stride, unew_stride;
#endif
size_t _numAxes;
};
Ptr<PermuteLayer> PermuteLayer::create(const LayerParams &params)
{
return Ptr<PermuteLayer>(new PermuteLayerImpl(params));
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,765 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_inf_engine.hpp"
#ifdef HAVE_DNN_NGRAPH
#include "../ie_ngraph.hpp"
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
#include <ngraph/op/prior_box.hpp>
#include <ngraph/op/prior_box_clustered.hpp>
#else
#include <ngraph/op/experimental/layers/prior_box.hpp>
#include <ngraph/op/experimental/layers/prior_box_clustered.hpp>
#endif
#endif
#include "../op_vkcom.hpp"
#include <float.h>
#include <algorithm>
#include <cmath>
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/prior_box.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class PriorBoxLayerImpl CV_FINAL : public PriorBoxLayer
{
public:
static bool getParameterDict(const LayerParams &params,
const std::string &parameterName,
DictValue& result)
{
if (!params.has(parameterName))
{
return false;
}
result = params.get(parameterName);
return true;
}
template<typename T>
T getParameter(const LayerParams &params,
const std::string &parameterName,
const size_t &idx=0,
const bool required=true,
const T& defaultValue=T())
{
DictValue dictValue;
bool success = getParameterDict(params, parameterName, dictValue);
if(!success)
{
if(required)
{
std::string message = _layerName;
message += " layer parameter does not contain ";
message += parameterName;
message += " parameter.";
CV_Error(Error::StsBadArg, message);
}
else
{
return defaultValue;
}
}
return dictValue.get<T>(idx);
}
void getAspectRatios(const LayerParams &params)
{
DictValue aspectRatioParameter;
bool aspectRatioRetieved = getParameterDict(params, "aspect_ratio", aspectRatioParameter);
if (!aspectRatioRetieved)
return;
for (int i = 0; i < aspectRatioParameter.size(); ++i)
{
float aspectRatio = aspectRatioParameter.get<float>(i);
bool alreadyExists = fabs(aspectRatio - 1.f) < 1e-6f;
for (size_t j = 0; j < _aspectRatios.size() && !alreadyExists; ++j)
{
alreadyExists = fabs(aspectRatio - _aspectRatios[j]) < 1e-6;
}
if (!alreadyExists)
{
_aspectRatios.push_back(aspectRatio);
if (_flip)
{
_aspectRatios.push_back(1./aspectRatio);
}
}
}
}
static void getParams(const std::string& name, const LayerParams &params,
std::vector<float>* values)
{
DictValue dict;
if (getParameterDict(params, name, dict))
{
values->resize(dict.size());
for (int i = 0; i < dict.size(); ++i)
{
(*values)[i] = dict.get<float>(i);
}
}
else
values->clear();
}
void getVariance(const LayerParams &params)
{
DictValue varianceParameter;
bool varianceParameterRetrieved = getParameterDict(params, "variance", varianceParameter);
CV_Assert(varianceParameterRetrieved);
int varianceSize = varianceParameter.size();
if (varianceSize > 1)
{
// Must and only provide 4 variance.
CV_Assert(varianceSize == 4);
for (int i = 0; i < varianceSize; ++i)
{
float variance = varianceParameter.get<float>(i);
CV_Assert(variance > 0);
_variance.push_back(variance);
}
}
else
{
if (varianceSize == 1)
{
float variance = varianceParameter.get<float>(0);
CV_Assert(variance > 0);
_variance.push_back(variance);
}
else
{
// Set default to 0.1.
_variance.push_back(0.1f);
}
}
}
PriorBoxLayerImpl(const LayerParams &params)
{
setParamsFrom(params);
_flip = getParameter<bool>(params, "flip", 0, false, true);
_clip = getParameter<bool>(params, "clip", 0, false, true);
_bboxesNormalized = getParameter<bool>(params, "normalized_bbox", 0, false, true);
getParams("min_size", params, &_minSize);
getAspectRatios(params);
getVariance(params);
if (params.has("max_size"))
{
getParams("max_size", params, &_maxSize);
CV_Assert(_minSize.size() == _maxSize.size());
for (int i = 0; i < _maxSize.size(); i++)
CV_Assert(_minSize[i] < _maxSize[i]);
}
std::vector<float> widths, heights;
getParams("width", params, &widths);
getParams("height", params, &heights);
_explicitSizes = !widths.empty();
CV_Assert(widths.size() == heights.size());
if (_explicitSizes)
{
CV_Assert(_aspectRatios.empty());
CV_Assert(!params.has("min_size"));
CV_Assert(!params.has("max_size"));
_boxWidths = widths;
_boxHeights = heights;
}
else
{
CV_Assert(!_minSize.empty());
for (int i = 0; i < _minSize.size(); ++i)
{
float minSize = _minSize[i];
CV_Assert(minSize > 0);
_boxWidths.push_back(minSize);
_boxHeights.push_back(minSize);
if (_maxSize.size() > 0)
{
float size = sqrt(minSize * _maxSize[i]);
_boxWidths.push_back(size);
_boxHeights.push_back(size);
}
// rest of priors
for (size_t r = 0; r < _aspectRatios.size(); ++r)
{
float arSqrt = sqrt(_aspectRatios[r]);
_boxWidths.push_back(minSize * arSqrt);
_boxHeights.push_back(minSize / arSqrt);
}
}
}
CV_Assert(_boxWidths.size() == _boxHeights.size());
_numPriors = _boxWidths.size();
if (params.has("step_h") || params.has("step_w")) {
CV_Assert(!params.has("step"));
_stepY = getParameter<float>(params, "step_h");
CV_Assert(_stepY > 0.);
_stepX = getParameter<float>(params, "step_w");
CV_Assert(_stepX > 0.);
} else if (params.has("step")) {
const float step = getParameter<float>(params, "step");
CV_Assert(step > 0);
_stepY = step;
_stepX = step;
} else {
_stepY = 0;
_stepX = 0;
}
if (params.has("offset_h") || params.has("offset_w"))
{
CV_Assert_N(!params.has("offset"), params.has("offset_h"), params.has("offset_w"));
getParams("offset_h", params, &_offsetsY);
getParams("offset_w", params, &_offsetsX);
CV_Assert(_offsetsX.size() == _offsetsY.size());
_numPriors *= std::max((size_t)1, 2 * (_offsetsX.size() - 1));
}
else
{
float offset = getParameter<float>(params, "offset", 0, false, 0.5);
_offsetsX.assign(1, offset);
_offsetsY.assign(1, offset);
}
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
#ifdef HAVE_DNN_NGRAPH
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
return _explicitSizes || _stepX == _stepY;
#endif
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
(backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && haveInfEngine() &&
( _explicitSizes || (_minSize.size() == 1 && _maxSize.size() <= 1)))
|| (backendId == DNN_BACKEND_VKCOM && haveVulkan());
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(!inputs.empty());
int layerHeight = inputs[0][2];
int layerWidth = inputs[0][3];
// Since all images in a batch has same height and width, we only need to
// generate one set of priors which can be shared across all images.
size_t outNum = 1;
// 2 channels. First channel stores the mean of each prior coordinate.
// Second channel stores the variance of each prior coordinate.
size_t outChannels = 2;
outputs.resize(1, shape(outNum, outChannels,
layerHeight * layerWidth * _numPriors * 4));
return false;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> inputs;
inputs_arr.getMatVector(inputs);
CV_CheckGT(inputs.size(), (size_t)1, "");
CV_CheckEQ(inputs[0].dims, 4, ""); CV_CheckEQ(inputs[1].dims, 4, "");
int layerWidth = inputs[0].size[3];
int layerHeight = inputs[0].size[2];
int imageWidth = inputs[1].size[3];
int imageHeight = inputs[1].size[2];
_stepY = _stepY == 0 ? (static_cast<float>(imageHeight) / layerHeight) : _stepY;
_stepX = _stepX == 0 ? (static_cast<float>(imageWidth) / layerWidth) : _stepX;
}
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
bool use_half = (inps.depth() == CV_16S);
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
int _layerWidth = inputs[0].size[3];
int _layerHeight = inputs[0].size[2];
int _imageWidth = inputs[1].size[3];
int _imageHeight = inputs[1].size[2];
if (umat_offsetsX.empty())
{
Mat offsetsX(1, _offsetsX.size(), CV_32FC1, &_offsetsX[0]);
Mat offsetsY(1, _offsetsY.size(), CV_32FC1, &_offsetsY[0]);
Mat variance(1, _variance.size(), CV_32FC1, &_variance[0]);
Mat widths(1, _boxWidths.size(), CV_32FC1, &_boxWidths[0]);
Mat heights(1, _boxHeights.size(), CV_32FC1, &_boxHeights[0]);
offsetsX.copyTo(umat_offsetsX);
offsetsY.copyTo(umat_offsetsY);
variance.copyTo(umat_variance);
widths.copyTo(umat_widths);
heights.copyTo(umat_heights);
}
String opts;
if (use_half)
opts = "-DDtype=half -DDtype4=half4 -Dconvert_T=convert_half4";
else
opts = "-DDtype=float -DDtype4=float4 -Dconvert_T=convert_float4";
size_t nthreads = _layerHeight * _layerWidth;
ocl::Kernel kernel("prior_box", ocl::dnn::prior_box_oclsrc, opts);
kernel.set(0, (int)nthreads);
kernel.set(1, (float)_stepX);
kernel.set(2, (float)_stepY);
kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_offsetsX));
kernel.set(4, ocl::KernelArg::PtrReadOnly(umat_offsetsY));
kernel.set(5, (int)_offsetsX.size());
kernel.set(6, ocl::KernelArg::PtrReadOnly(umat_widths));
kernel.set(7, ocl::KernelArg::PtrReadOnly(umat_heights));
kernel.set(8, (int)_boxWidths.size());
kernel.set(9, ocl::KernelArg::PtrWriteOnly(outputs[0]));
kernel.set(10, (int)_layerHeight);
kernel.set(11, (int)_layerWidth);
kernel.set(12, (int)_imageHeight);
kernel.set(13, (int)_imageWidth);
kernel.run(1, &nthreads, NULL, false);
// clip the prior's coordinate such that it is within [0, 1]
if (_clip)
{
ocl::Kernel kernel("clip", ocl::dnn::prior_box_oclsrc, opts);
size_t nthreads = _layerHeight * _layerWidth * _numPriors * 4;
if (!kernel.args((int)nthreads, ocl::KernelArg::PtrReadWrite(outputs[0]))
.run(1, &nthreads, NULL, false))
return false;
}
// set the variance.
{
ocl::Kernel kernel("set_variance", ocl::dnn::prior_box_oclsrc, opts);
int offset = total(shape(outputs[0]), 2);
size_t nthreads = _layerHeight * _layerWidth * _numPriors;
kernel.set(0, (int)nthreads);
kernel.set(1, (int)offset);
kernel.set(2, (int)_variance.size());
kernel.set(3, ocl::KernelArg::PtrReadOnly(umat_variance));
kernel.set(4, ocl::KernelArg::PtrWriteOnly(outputs[0]));
if (!kernel.run(1, &nthreads, NULL, false))
return false;
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
CV_Assert(inputs.size() == 2);
int _layerWidth = inputs[0].size[3];
int _layerHeight = inputs[0].size[2];
int _imageWidth = inputs[1].size[3];
int _imageHeight = inputs[1].size[2];
float* outputPtr = outputs[0].ptr<float>();
float _boxWidth, _boxHeight;
for (size_t h = 0; h < _layerHeight; ++h)
{
for (size_t w = 0; w < _layerWidth; ++w)
{
for (size_t i = 0; i < _boxWidths.size(); ++i)
{
_boxWidth = _boxWidths[i];
_boxHeight = _boxHeights[i];
for (int j = 0; j < _offsetsX.size(); ++j)
{
float center_x = (w + _offsetsX[j]) * _stepX;
float center_y = (h + _offsetsY[j]) * _stepY;
outputPtr = addPrior(center_x, center_y, _boxWidth, _boxHeight, _imageWidth,
_imageHeight, _bboxesNormalized, outputPtr);
}
}
}
}
// clip the prior's coordinate such that it is within [0, 1]
if (_clip)
{
int _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4;
outputPtr = outputs[0].ptr<float>();
for (size_t d = 0; d < _outChannelSize; ++d)
{
outputPtr[d] = std::min<float>(std::max<float>(outputPtr[d], 0.), 1.);
}
}
// set the variance.
outputPtr = outputs[0].ptr<float>(0, 1);
if(_variance.size() == 1)
{
Mat secondChannel(1, outputs[0].size[2], CV_32F, outputPtr);
secondChannel.setTo(Scalar::all(_variance[0]));
}
else
{
int count = 0;
for (size_t h = 0; h < _layerHeight; ++h)
{
for (size_t w = 0; w < _layerWidth; ++w)
{
for (size_t i = 0; i < _numPriors; ++i)
{
for (int j = 0; j < 4; ++j)
{
outputPtr[count] = _variance[j];
++count;
}
}
}
}
}
}
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
if (_explicitSizes)
{
InferenceEngine::Builder::PriorBoxClusteredLayer ieLayer(name);
ieLayer.setSteps({_stepY, _stepX});
CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
ieLayer.setOffset(_offsetsX[0]);
ieLayer.setClip(_clip);
ieLayer.setFlip(false); // We already flipped aspect ratios.
InferenceEngine::Builder::Layer l = ieLayer;
CV_Assert_N(!_boxWidths.empty(), !_boxHeights.empty(), !_variance.empty());
CV_Assert(_boxWidths.size() == _boxHeights.size());
l.getParameters()["width"] = _boxWidths;
l.getParameters()["height"] = _boxHeights;
l.getParameters()["variance"] = _variance;
return Ptr<BackendNode>(new InfEngineBackendNode(l));
}
else
{
InferenceEngine::Builder::PriorBoxLayer ieLayer(name);
CV_Assert(!_explicitSizes);
ieLayer.setMinSize(_minSize[0]);
if (!_maxSize.empty())
ieLayer.setMaxSize(_maxSize[0]);
CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
ieLayer.setOffset(_offsetsX[0]);
ieLayer.setClip(_clip);
ieLayer.setFlip(false); // We already flipped aspect ratios.
InferenceEngine::Builder::Layer l = ieLayer;
if (_stepX == _stepY)
{
l.getParameters()["step"] = _stepX;
l.getParameters()["step_h"] = 0.0f;
l.getParameters()["step_w"] = 0.0f;
}
else
{
l.getParameters()["step"] = 0.0f;
l.getParameters()["step_h"] = _stepY;
l.getParameters()["step_w"] = _stepX;
}
if (!_aspectRatios.empty())
{
l.getParameters()["aspect_ratio"] = _aspectRatios;
}
CV_Assert(!_variance.empty());
l.getParameters()["variance"] = _variance;
return Ptr<BackendNode>(new InfEngineBackendNode(l));
}
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
CV_Assert(nodes.size() == 2);
auto layer = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
auto image = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
auto layer_shape = std::make_shared<ngraph::op::ShapeOf>(layer);
auto image_shape = std::make_shared<ngraph::op::ShapeOf>(image);
auto lower_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, std::vector<int64_t>{2});
auto upper_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, std::vector<int64_t>{4});
auto strides = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, std::vector<int64_t>{1});
auto slice_layer = std::make_shared<ngraph::op::v1::StridedSlice>(layer_shape,
lower_bounds, upper_bounds, strides, std::vector<int64_t>{}, std::vector<int64_t>{});
auto slice_image = std::make_shared<ngraph::op::v1::StridedSlice>(image_shape,
lower_bounds, upper_bounds, strides, std::vector<int64_t>{}, std::vector<int64_t>{});
if (_explicitSizes)
{
CV_Assert_N(!_boxWidths.empty(), !_boxHeights.empty(), !_variance.empty());
CV_Assert(_boxWidths.size() == _boxHeights.size());
ngraph::op::PriorBoxClusteredAttrs attrs;
attrs.widths = _boxWidths;
attrs.heights = _boxHeights;
attrs.clip = _clip;
CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
attrs.offset = _offsetsX[0];
attrs.step_heights = _stepY;
attrs.step_widths = _stepX;
attrs.variances = _variance;
auto priorBox = std::make_shared<ngraph::op::PriorBoxClustered>(slice_layer, slice_image, attrs);
auto axis = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, std::vector<int64_t>{0});
auto unsqueeze = std::make_shared<ngraph::op::v0::Unsqueeze>(priorBox, axis);
return Ptr<BackendNode>(new InfEngineNgraphNode(unsqueeze));
}
else
{
ngraph::op::PriorBoxAttrs attrs;
attrs.min_size = _minSize;
attrs.max_size = _maxSize;
// doesn't work with empty aspectRatio
attrs.aspect_ratio = !_aspectRatios.empty()? _aspectRatios : std::vector<float>{1.0f};
attrs.clip = _clip;
attrs.flip = false;
attrs.variance = _variance;
CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
attrs.offset = _offsetsX[0];
attrs.step = _stepX;
attrs.scale_all_sizes = !_aspectRatios.empty();
auto priorBox = std::make_shared<ngraph::op::PriorBox>(slice_layer, slice_image, attrs);
auto axis = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, std::vector<int64_t>{0});
auto unsqueeze = std::make_shared<ngraph::op::v0::Unsqueeze>(priorBox, axis);
return Ptr<BackendNode>(new InfEngineNgraphNode(unsqueeze));
}
}
#endif // HAVE_DNN_NGRAPH
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
auto feature_map_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
auto feature_map_shape = feature_map_wrapper->getShape();
auto image_wrapper = inputs[1].dynamicCast<CUDABackendWrapper>();
auto image_shape = image_wrapper->getShape();
PriorBoxConfiguration config;
config.feature_map_width = feature_map_shape.rbegin()[0];
config.feature_map_height = feature_map_shape.rbegin()[1];
config.image_width = image_shape.rbegin()[0];
config.image_height = image_shape.rbegin()[1];
config.num_priors = _numPriors;
config.box_widths = _boxWidths;
config.box_heights = _boxHeights;
config.offsets_x = _offsetsX;
config.offsets_y = _offsetsY;
config.stepX = _stepX;
config.stepY = _stepY;
config.variance = _variance;
config.clip = _clip;
config.normalize = _bboxesNormalized;
return make_cuda_node<cuda4dnn::PriorBoxOp>(preferableTarget, std::move(context->stream), config);
}
#endif
#ifdef HAVE_VULKAN
virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &input) CV_OVERRIDE
{
std::shared_ptr<vkcom::OpBase> op(new vkcom::OpPriorBox(_stepX, _stepY,
_clip, _numPriors,
_variance, _offsetsX,
_offsetsY, _boxWidths,
_boxHeights));
return Ptr<BackendNode>(new VkComBackendNode(input, op));
}
#endif // HAVE_VULKAN
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
CV_UNUSED(outputs); // suppress unused variable warning
long flops = 0;
for (int i = 0; i < inputs.size(); i++)
{
flops += total(inputs[i], 2) * _numPriors * 4;
}
return flops;
}
private:
std::vector<float> _minSize;
std::vector<float> _maxSize;
float _stepX, _stepY;
std::vector<float> _aspectRatios;
std::vector<float> _variance;
std::vector<float> _offsetsX;
std::vector<float> _offsetsY;
// Precomputed final widths and heights based on aspect ratios or explicit sizes.
std::vector<float> _boxWidths;
std::vector<float> _boxHeights;
#ifdef HAVE_OPENCL
UMat umat_offsetsX;
UMat umat_offsetsY;
UMat umat_widths;
UMat umat_heights;
UMat umat_variance;
#endif
bool _flip;
bool _clip;
bool _explicitSizes;
bool _bboxesNormalized;
size_t _numPriors;
static const size_t _numAxes = 4;
static const std::string _layerName;
static float* addPrior(float center_x, float center_y, float width, float height,
float imgWidth, float imgHeight, bool normalized, float* dst)
{
if (normalized)
{
dst[0] = (center_x - width * 0.5f) / imgWidth; // xmin
dst[1] = (center_y - height * 0.5f) / imgHeight; // ymin
dst[2] = (center_x + width * 0.5f) / imgWidth; // xmax
dst[3] = (center_y + height * 0.5f) / imgHeight; // ymax
}
else
{
dst[0] = center_x - width * 0.5f; // xmin
dst[1] = center_y - height * 0.5f; // ymin
dst[2] = center_x + width * 0.5f - 1.0f; // xmax
dst[3] = center_y + height * 0.5f - 1.0f; // ymax
}
return dst + 4;
}
};
const std::string PriorBoxLayerImpl::_layerName = std::string("PriorBox");
Ptr<PriorBoxLayer> PriorBoxLayer::create(const LayerParams &params)
{
return Ptr<PriorBoxLayer>(new PriorBoxLayerImpl(params));
}
}
}

View File

@ -0,0 +1,451 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_inf_engine.hpp"
#ifdef HAVE_DNN_NGRAPH
#include "../ie_ngraph.hpp"
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
#include <ngraph/op/proposal.hpp>
#else
#include <ngraph/op/experimental/layers/proposal.hpp>
#endif
#endif
namespace cv { namespace dnn {
class ProposalLayerImpl CV_FINAL : public ProposalLayer
{
public:
ProposalLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
featStride = params.get<uint32_t>("feat_stride", 16);
baseSize = params.get<uint32_t>("base_size", 16);
// uint32_t minSize = params.get<uint32_t>("min_size", 16);
keepTopBeforeNMS = params.get<uint32_t>("pre_nms_topn", 6000);
keepTopAfterNMS = params.get<uint32_t>("post_nms_topn", 300);
nmsThreshold = params.get<float>("nms_thresh", 0.7);
ratios = params.get("ratio");
scales = params.get("scale");
{
LayerParams lp;
lp.set("step", featStride);
lp.set("flip", false);
lp.set("clip", false);
lp.set("normalized_bbox", false);
lp.set("offset", 0.5 * baseSize / featStride);
// Unused values.
float variance[] = {0.1f, 0.1f, 0.2f, 0.2f};
lp.set("variance", DictValue::arrayReal<float*>(&variance[0], 4));
// Compute widths and heights explicitly.
std::vector<float> widths, heights;
widths.reserve(ratios.size() * scales.size());
heights.reserve(ratios.size() * scales.size());
for (int i = 0; i < ratios.size(); ++i)
{
float ratio = ratios.get<float>(i);
float width = std::floor(baseSize / sqrt(ratio) + 0.5f);
float height = std::floor(width * ratio + 0.5f);
for (int j = 0; j < scales.size(); ++j)
{
float scale = scales.get<float>(j);
widths.push_back(scale * width);
heights.push_back(scale * height);
}
}
lp.set("width", DictValue::arrayReal<float*>(&widths[0], widths.size()));
lp.set("height", DictValue::arrayReal<float*>(&heights[0], heights.size()));
priorBoxLayer = PriorBoxLayer::create(lp);
}
{
int order[] = {0, 2, 3, 1};
LayerParams lp;
lp.set("order", DictValue::arrayInt<int*>(&order[0], 4));
deltasPermute = PermuteLayer::create(lp);
scoresPermute = PermuteLayer::create(lp);
}
{
LayerParams lp;
lp.set("code_type", "CENTER_SIZE");
lp.set("num_classes", 1);
lp.set("share_location", true);
lp.set("background_label_id", 1); // We won't pass background scores so set it out of range [0, num_classes)
lp.set("variance_encoded_in_target", true);
lp.set("keep_top_k", keepTopAfterNMS);
lp.set("top_k", keepTopBeforeNMS);
lp.set("nms_threshold", nmsThreshold);
lp.set("normalized_bbox", false);
lp.set("clip", true);
detectionOutputLayer = DetectionOutputLayer::create(lp);
}
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL;
return !isMyriad;
}
#endif
return backendId == DNN_BACKEND_OPENCV;
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
// We need to allocate the following blobs:
// - output priors from PriorBoxLayer
// - permuted priors
// - permuted scores
CV_Assert(inputs.size() == 3);
const MatShape& scores = inputs[0];
const MatShape& bboxDeltas = inputs[1];
std::vector<MatShape> layerInputs, layerOutputs, layerInternals;
// Prior boxes layer.
layerInputs.assign(1, scores);
priorBoxLayer->getMemoryShapes(layerInputs, 1, layerOutputs, layerInternals);
CV_Assert(layerOutputs.size() == 1);
CV_Assert(layerInternals.empty());
internals.push_back(layerOutputs[0]);
// Scores permute layer.
CV_Assert(scores.size() == 4);
MatShape objectScores = scores;
CV_Assert((scores[1] & 1) == 0); // Number of channels is even.
objectScores[1] /= 2;
layerInputs.assign(1, objectScores);
scoresPermute->getMemoryShapes(layerInputs, 1, layerOutputs, layerInternals);
CV_Assert(layerOutputs.size() == 1);
CV_Assert(layerInternals.empty());
internals.push_back(layerOutputs[0]);
// BBox predictions permute layer.
layerInputs.assign(1, bboxDeltas);
deltasPermute->getMemoryShapes(layerInputs, 1, layerOutputs, layerInternals);
CV_Assert(layerOutputs.size() == 1);
CV_Assert(layerInternals.empty());
internals.push_back(layerOutputs[0]);
// Detections layer.
internals.push_back(shape(1, 1, keepTopAfterNMS, 7));
outputs.resize(2);
outputs[0] = shape(keepTopAfterNMS, 5);
outputs[1] = shape(keepTopAfterNMS, 1);
return false;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> inputs;
inputs_arr.getMatVector(inputs);
std::vector<Mat> layerInputs;
std::vector<Mat> layerOutputs;
// Scores permute layer.
Mat scores = getObjectScores(inputs[0]);
layerInputs.assign(1, scores);
layerOutputs.assign(1, Mat(shape(scores.size[0], scores.size[2],
scores.size[3], scores.size[1]), CV_32FC1));
scoresPermute->finalize(layerInputs, layerOutputs);
// BBox predictions permute layer.
const Mat& bboxDeltas = inputs[1];
CV_Assert(bboxDeltas.dims == 4);
layerInputs.assign(1, bboxDeltas);
layerOutputs.assign(1, Mat(shape(bboxDeltas.size[0], bboxDeltas.size[2],
bboxDeltas.size[3], bboxDeltas.size[1]), CV_32FC1));
deltasPermute->finalize(layerInputs, layerOutputs);
}
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
std::vector<UMat> internals;
if (inputs_.depth() == CV_16S)
return false;
inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs);
internals_.getUMatVector(internals);
CV_Assert(inputs.size() == 3);
CV_Assert(internals.size() == 4);
const UMat& scores = inputs[0];
const UMat& bboxDeltas = inputs[1];
const UMat& imInfo = inputs[2];
UMat& priorBoxes = internals[0];
UMat& permuttedScores = internals[1];
UMat& permuttedDeltas = internals[2];
UMat& detections = internals[3];
CV_Assert(imInfo.total() >= 2);
// We've chosen the smallest data type because we need just a shape from it.
Mat szMat;
imInfo.copyTo(szMat);
int rows = (int)szMat.at<float>(0);
int cols = (int)szMat.at<float>(1);
umat_fakeImageBlob.create(shape(1, 1, rows, cols), CV_8UC1);
umat_fakeImageBlob.setTo(0);
// Generate prior boxes.
std::vector<UMat> layerInputs(2), layerOutputs(1, priorBoxes);
layerInputs[0] = scores;
layerInputs[1] = umat_fakeImageBlob;
priorBoxLayer->forward(layerInputs, layerOutputs, internals);
// Permute scores.
layerInputs.assign(1, getObjectScores(scores));
layerOutputs.assign(1, permuttedScores);
scoresPermute->forward(layerInputs, layerOutputs, internals);
// Permute deltas.
layerInputs.assign(1, bboxDeltas);
layerOutputs.assign(1, permuttedDeltas);
deltasPermute->forward(layerInputs, layerOutputs, internals);
// Sort predictions by scores and apply NMS. DetectionOutputLayer allocates
// output internally because of different number of objects after NMS.
layerInputs.resize(4);
layerInputs[0] = permuttedDeltas;
layerInputs[1] = permuttedScores;
layerInputs[2] = priorBoxes;
layerInputs[3] = umat_fakeImageBlob;
layerOutputs[0] = detections;
detectionOutputLayer->forward(layerInputs, layerOutputs, internals);
// DetectionOutputLayer produces 1x1xNx7 output where N might be less or
// equal to keepTopAfterNMS. We fill the rest by zeros.
const int numDets = layerOutputs[0].total() / 7;
CV_Assert(numDets <= keepTopAfterNMS);
MatShape s = shape(numDets, 7);
layerOutputs[0] = layerOutputs[0].reshape(1, s.size(), &s[0]);
// The boxes.
UMat dst = outputs[0].rowRange(0, numDets);
layerOutputs[0].colRange(3, 7).copyTo(dst.colRange(1, 5));
dst.col(0).setTo(0); // First column are batch ids. Keep it zeros too.
// The scores.
dst = outputs[1].rowRange(0, numDets);
layerOutputs[0].col(2).copyTo(dst);
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs, internals;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
internals_arr.getMatVector(internals);
CV_Assert(inputs.size() == 3);
CV_Assert(internals.size() == 4);
const Mat& scores = inputs[0];
const Mat& bboxDeltas = inputs[1];
const Mat& imInfo = inputs[2];
Mat& priorBoxes = internals[0];
Mat& permuttedScores = internals[1];
Mat& permuttedDeltas = internals[2];
Mat& detections = internals[3];
CV_Assert(imInfo.total() >= 2);
// We've chosen the smallest data type because we need just a shape from it.
// We don't allocate memory but just need the shape is correct.
Mat fakeImageBlob(shape(1, 1, imInfo.at<float>(0), imInfo.at<float>(1)), CV_8UC1, NULL);
// Generate prior boxes.
std::vector<Mat> layerInputs(2), layerOutputs(1, priorBoxes);
layerInputs[0] = scores;
layerInputs[1] = fakeImageBlob;
priorBoxLayer->forward(layerInputs, layerOutputs, internals);
// Permute scores.
layerInputs.assign(1, getObjectScores(scores));
layerOutputs.assign(1, permuttedScores);
scoresPermute->forward(layerInputs, layerOutputs, internals);
// Permute deltas.
layerInputs.assign(1, bboxDeltas);
layerOutputs.assign(1, permuttedDeltas);
deltasPermute->forward(layerInputs, layerOutputs, internals);
// Sort predictions by scores and apply NMS. DetectionOutputLayer allocates
// output internally because of different number of objects after NMS.
layerInputs.resize(4);
layerInputs[0] = permuttedDeltas;
layerInputs[1] = permuttedScores;
layerInputs[2] = priorBoxes;
layerInputs[3] = fakeImageBlob;
layerOutputs[0] = detections;
detectionOutputLayer->forward(layerInputs, layerOutputs, internals);
// DetectionOutputLayer produces 1x1xNx7 output where N might be less or
// equal to keepTopAfterNMS. We fill the rest by zeros.
const int numDets = layerOutputs[0].total() / 7;
CV_Assert(numDets <= keepTopAfterNMS);
// The boxes.
layerOutputs[0] = layerOutputs[0].reshape(1, numDets);
Mat dst = outputs[0].rowRange(0, numDets);
layerOutputs[0].colRange(3, 7).copyTo(dst.colRange(1, 5));
dst.col(0).setTo(0); // First column are batch ids. Keep it zeros too.
// The scores.
dst = outputs[1].rowRange(0, numDets);
layerOutputs[0].col(2).copyTo(dst);
}
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
InferenceEngine::Builder::ProposalLayer ieLayer(name);
ieLayer.setBaseSize(baseSize);
ieLayer.setFeatStride(featStride);
ieLayer.setMinSize(16);
ieLayer.setNMSThresh(nmsThreshold);
ieLayer.setPostNMSTopN(keepTopAfterNMS);
ieLayer.setPreNMSTopN(keepTopBeforeNMS);
std::vector<float> scalesVec(scales.size());
for (int i = 0; i < scales.size(); ++i)
scalesVec[i] = scales.get<float>(i);
ieLayer.setScale(scalesVec);
std::vector<float> ratiosVec(ratios.size());
for (int i = 0; i < ratios.size(); ++i)
ratiosVec[i] = ratios.get<float>(i);
ieLayer.setRatio(ratiosVec);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
CV_Assert(nodes.size() == 3);
ngraph::op::ProposalAttrs attr;
attr.base_size = baseSize;
attr.nms_thresh = nmsThreshold;
attr.feat_stride = featStride;
attr.min_size = 16;
attr.pre_nms_topn = keepTopBeforeNMS;
attr.post_nms_topn = keepTopAfterNMS;
std::vector<float> ratiosVec(ratios.size());
for (int i = 0; i < ratios.size(); ++i)
ratiosVec[i] = ratios.get<float>(i);
attr.ratio = ratiosVec;
std::vector<float> scalesVec(scales.size());
for (int i = 0; i < scales.size(); ++i)
scalesVec[i] = scales.get<float>(i);
attr.scale = scalesVec;
auto& class_probs = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
auto& class_logits = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
auto& image_shape = nodes[2].dynamicCast<InfEngineNgraphNode>()->node;
CV_Assert_N(image_shape->get_shape().size() == 2, image_shape->get_shape().front() == 1);
auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
ngraph::Shape{1},
std::vector<int64_t>{(int64_t)image_shape->get_shape().back()});
auto reshape = std::make_shared<ngraph::op::v1::Reshape>(image_shape, shape, true);
auto proposal = std::make_shared<ngraph::op::Proposal>(class_probs, class_logits, reshape, attr);
return Ptr<BackendNode>(new InfEngineNgraphNode(proposal));
}
#endif // HAVE_DNN_NGRAPH
private:
// A first half of channels are background scores. We need only a second one.
static Mat getObjectScores(const Mat& m)
{
CV_Assert(m.dims == 4);
CV_Assert(m.size[0] == 1);
int channels = m.size[1];
CV_Assert((channels & 1) == 0);
return slice(m, Range::all(), Range(channels / 2, channels));
}
#ifdef HAVE_OPENCL
static UMat getObjectScores(const UMat& m)
{
CV_Assert(m.dims == 4);
CV_Assert(m.size[0] == 1);
int channels = m.size[1];
CV_Assert((channels & 1) == 0);
Range r = Range(channels / 2, channels);
Range ranges[4] = { Range::all(), r, Range::all(), Range::all() };
return m(&ranges[0]);
}
#endif
Ptr<PriorBoxLayer> priorBoxLayer;
Ptr<DetectionOutputLayer> detectionOutputLayer;
Ptr<PermuteLayer> deltasPermute;
Ptr<PermuteLayer> scoresPermute;
uint32_t keepTopBeforeNMS, keepTopAfterNMS, featStride, baseSize;
float nmsThreshold;
DictValue ratios, scales;
#ifdef HAVE_OPENCL
UMat umat_fakeImageBlob;
#endif
};
Ptr<ProposalLayer> ProposalLayer::create(const LayerParams& params)
{
return Ptr<ProposalLayer>(new ProposalLayerImpl(params));
}
} // namespace dnn
} // namespace cv

View File

@ -0,0 +1,813 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include <iostream>
#include <iterator>
#include <cmath>
#include <opencv2/dnn/shape_utils.hpp>
namespace cv
{
namespace dnn
{
template<typename Dtype>
static void tanh(const Mat &src, Mat &dst)
{
MatConstIterator_<Dtype> itSrc = src.begin<Dtype>();
MatIterator_<Dtype> itDst = dst.begin<Dtype>();
for (; itSrc != src.end<Dtype>(); itSrc++, itDst++)
*itDst = std::tanh(*itSrc);
}
//TODO: make utils method
static void tanh(const Mat &src, Mat &dst)
{
dst.create(src.dims, (const int*)src.size, src.type());
if (src.type() == CV_32F)
tanh<float>(src, dst);
else if (src.type() == CV_64F)
tanh<double>(src, dst);
else
CV_Error(Error::StsUnsupportedFormat, "Function supports only floating point types");
}
static void sigmoid(const Mat &src, Mat &dst)
{
cv::exp(-src, dst);
cv::pow(1 + dst, -1, dst);
}
typedef void (*ActivationFunction)(const Mat &src, Mat &dst);
static ActivationFunction get_activation_function(const String& activation) {
// most used activations for PyTorch and TF : Tanh, Sigmoid
// if you need to support more optional activations use std::map instead
if (activation == "Tanh")
{
return tanh;
}
else if (activation == "Sigmoid")
{
return sigmoid;
}
else
{
CV_Error(Error::StsNotImplemented,
cv::format("Activation function [%s] for layer LSTM is not supported", activation.c_str()));
}
}
class LSTMLayerImpl CV_FINAL : public LSTMLayer
{
int numTimeStamps, numSamples;
bool allocated;
MatShape outTailShape; //shape of single output sample
MatShape outTsShape; //shape of N output samples
bool useTimestampDim;
bool produceCellOutput;
float forgetBias, cellClip;
bool useCellClip, usePeephole;
bool reverse; // If true, go in negative direction along the time axis
bool bidirectional; // If true, produces both forward and reversed directions along time axis
ActivationFunction f_activation;
ActivationFunction g_activation;
ActivationFunction h_activation;
public:
LSTMLayerImpl(const LayerParams& params)
: numTimeStamps(0), numSamples(0)
{
setParamsFrom(params);
bidirectional = params.get<bool>("bidirectional", false);
if (!blobs.empty())
{
CV_Assert(blobs.size() >= 3);
blobs[2] = blobs[2].reshape(1, 1);
const Mat& Wh = blobs[0];
const Mat& Wx = blobs[1];
const Mat& bias = blobs[2];
const Mat& hInternal = blobs[3];
const Mat& cInternal = blobs[4];
CV_CheckEQ(Wh.dims, 2, "");
CV_CheckEQ(Wx.dims, 2, "");
CV_CheckEQ(Wh.rows, Wx.rows, "");
CV_CheckEQ(Wh.rows, (1 + static_cast<int>(bidirectional))*4*Wh.cols, "");
CV_CheckEQ(Wh.rows, (int)bias.total(), "");
CV_CheckEQ(hInternal.cols, Wh.cols, "");
CV_CheckEQ(hInternal.cols, cInternal.cols, "");
CV_CheckEQ(hInternal.rows, cInternal.rows, "");
CV_Assert(Wh.type() == Wx.type() && Wx.type() == bias.type());
// Peephole weights.
if (blobs.size() > 5)
{
CV_Assert(blobs.size() == 8);
const int N = Wh.cols;
for (int i = 5; i < 8; ++i)
{
CV_Assert(blobs[i].rows == N && blobs[i].cols == N);
CV_Assert(blobs[i].type() == bias.type());
}
}
}
useTimestampDim = params.get<bool>("use_timestamp_dim", true);
produceCellOutput = params.get<bool>("produce_cell_output", false);
forgetBias = params.get<float>("forget_bias", 0.0f);
cellClip = params.get<float>("cell_clip", 0.0f);
useCellClip = params.get<bool>("use_cell_clip", false);
usePeephole = params.get<bool>("use_peephole", false);
reverse = params.get<bool>("reverse", false);
CV_Assert(!reverse || !bidirectional);
// read activations
DictValue activations = params.get<DictValue>("activations", "");
if (activations.size() == 1) // if activations wasn't specified use default
{
f_activation = sigmoid;
g_activation = tanh;
h_activation = tanh;
} else {
CV_Assert(activations.size() == 3);
f_activation = get_activation_function(activations.getStringValue(0));
g_activation = get_activation_function(activations.getStringValue(1));
h_activation = get_activation_function(activations.getStringValue(2));
}
allocated = false;
outTailShape.clear();
}
void setUseTimstampsDim(bool use) CV_OVERRIDE
{
CV_Assert(!allocated);
useTimestampDim = use;
}
void setProduceCellOutput(bool produce) CV_OVERRIDE
{
CV_Assert(!allocated);
produceCellOutput = produce;
}
void setOutShape(const MatShape &outTailShape_) CV_OVERRIDE
{
CV_Assert(!allocated || total(outTailShape) == total(outTailShape_));
outTailShape = outTailShape_;
}
void setWeights(const Mat &Wh, const Mat &Wx, const Mat &bias) CV_OVERRIDE
{
CV_Assert(Wh.dims == 2 && Wx.dims == 2);
CV_Assert(Wh.rows == Wx.rows);
CV_Assert(Wh.rows == 4*Wh.cols);
CV_Assert(Wh.rows == (int)bias.total());
CV_Assert(Wh.type() == Wx.type() && Wx.type() == bias.type());
blobs.resize(3);
blobs[0] = Mat(Wh.clone());
blobs[1] = Mat(Wx.clone());
blobs[2] = Mat(bias.clone()).reshape(1, 1);
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert((!usePeephole && blobs.size() == 5) || (usePeephole && blobs.size() == 8));
CV_Assert(inputs.size() == 1);
const MatShape& inp0 = inputs[0];
const Mat &Wh = blobs[0], &Wx = blobs[1];
int _numOut = Wh.size[1];
int _numInp = Wx.size[1];
MatShape outTailShape_(outTailShape), outResShape;
if (!outTailShape_.empty())
CV_Assert(total(outTailShape_) == _numOut);
else
outTailShape_.assign(1, _numOut);
int _numSamples;
if (useTimestampDim)
{
CV_Assert(inp0.size() >= 2 && total(inp0, 2) == _numInp);
_numSamples = inp0[1];
outResShape.push_back(inp0[0]);
}
else
{
CV_Assert(inp0.size() >= 2 && total(inp0, 1) == _numInp);
_numSamples = inp0[0];
}
outResShape.push_back(_numSamples);
outResShape.insert(outResShape.end(), outTailShape_.begin(), outTailShape_.end());
outResShape.back() *= (1 + static_cast<int>(bidirectional));
size_t noutputs = produceCellOutput ? 2 : 1;
outputs.assign(noutputs, outResShape);
internals.assign(1, shape(_numSamples, _numOut)); // hInternal
internals.push_back(shape(_numSamples, _numOut)); // cInternal
internals.push_back(shape(_numSamples, 1)); // dummyOnes
internals.push_back(shape(_numSamples, 4*_numOut)); // gates
return false;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> input;
inputs_arr.getMatVector(input);
CV_Assert((!usePeephole && blobs.size() == 5) || (usePeephole && blobs.size() == 8));
CV_Assert(input.size() == 1);
const Mat& inp0 = input[0];
Mat &Wh = blobs[0], &Wx = blobs[1];
int numOut = Wh.size[1];
int numInp = Wx.size[1];
if (!outTailShape.empty())
CV_Assert(total(outTailShape) == numOut);
else
outTailShape.assign(1, numOut);
if (useTimestampDim)
{
CV_Assert(inp0.dims >= 2 && (int)inp0.total(2) == numInp);
numTimeStamps = inp0.size[0];
numSamples = inp0.size[1];
}
else
{
CV_Assert(inp0.dims >= 2 && (int)inp0.total(1) == numInp);
numTimeStamps = 1;
numSamples = inp0.size[0];
}
outTsShape.clear();
outTsShape.push_back(numSamples);
outTsShape.insert(outTsShape.end(), outTailShape.begin(), outTailShape.end());
outTsShape.back() *= (1 + static_cast<int>(bidirectional));
allocated = true;
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> input, output, internals;
inputs_arr.getMatVector(input);
outputs_arr.getMatVector(output);
internals_arr.getMatVector(internals);
const int numDirs = 1 + static_cast<int>(bidirectional);
for (int i = 0; i < numDirs; ++i)
{
const Mat &Wh = blobs[0].rowRange(i * blobs[0].rows / numDirs, (i + 1) * blobs[0].rows / numDirs);
const Mat &Wx = blobs[1].rowRange(i * blobs[1].rows / numDirs, (i + 1) * blobs[1].rows / numDirs);
const Mat &bias = blobs[2].colRange(i * blobs[2].cols / numDirs, (i + 1) * blobs[2].cols / numDirs);
const Mat &h_0 = blobs[3].rowRange(i * blobs[3].rows / numDirs, (i + 1) * blobs[3].rows / numDirs);
const Mat &c_0 = blobs[4].rowRange(i * blobs[4].rows / numDirs, (i + 1) * blobs[4].rows / numDirs);
int numOut = Wh.size[1];
Mat hInternal = internals[0], cInternal = internals[1],
dummyOnes = internals[2], gates = internals[3];
h_0.copyTo(hInternal);
c_0.copyTo(cInternal);
dummyOnes.setTo(1.);
int numSamplesTotal = numTimeStamps*numSamples;
Mat xTs = input[0].reshape(1, numSamplesTotal);
Mat hOutTs = output[0].reshape(1, numSamplesTotal);
hOutTs = hOutTs.colRange(i * hOutTs.cols / numDirs, (i + 1) * hOutTs.cols / numDirs);
Mat cOutTs = produceCellOutput ? output[1].reshape(1, numSamplesTotal) : Mat();
int tsStart, tsEnd, tsInc;
if (reverse || i == 1) {
tsStart = numTimeStamps - 1;
tsEnd = -1;
tsInc = -1;
}
else {
tsStart = 0;
tsEnd = numTimeStamps;
tsInc = 1;
}
for (int ts = tsStart; ts != tsEnd; ts += tsInc)
{
Range curRowRange(ts*numSamples, (ts + 1)*numSamples);
Mat xCurr = xTs.rowRange(curRowRange);
gemm(xCurr, Wx, 1, gates, 0, gates, GEMM_2_T); // Wx * x_t
gemm(hInternal, Wh, 1, gates, 1, gates, GEMM_2_T); //+Wh * h_{t-1}
gemm(dummyOnes, bias, 1, gates, 1, gates); //+b
Mat gateI = gates.colRange(0*numOut, 1*numOut);
Mat gateF = gates.colRange(1*numOut, 2*numOut);
Mat gateO = gates.colRange(2*numOut, 3*numOut);
Mat gateG = gates.colRange(3*numOut, 4*numOut);
if (forgetBias)
add(gateF, forgetBias, gateF);
if (usePeephole)
{
Mat gatesIF = gates.colRange(0, 2*numOut);
gemm(cInternal, blobs[5], 1, gateI, 1, gateI);
gemm(cInternal, blobs[6], 1, gateF, 1, gateF);
f_activation(gatesIF, gatesIF);
}
else
{
Mat gatesIFO = gates.colRange(0, 3*numOut);
f_activation(gatesIFO, gatesIFO);
}
g_activation(gateG, gateG);
//compute c_t
multiply(gateF, cInternal, gateF); // f_t (*) c_{t-1}
multiply(gateI, gateG, gateI); // i_t (*) g_t
add(gateF, gateI, cInternal); // c_t = f_t (*) c_{t-1} + i_t (*) g_t
if (useCellClip)
{
min(cInternal, cellClip, cInternal);
max(cInternal, -cellClip, cInternal);
}
if (usePeephole)
{
gemm(cInternal, blobs[7], 1, gateO, 1, gateO);
f_activation(gateO, gateO);
}
//compute h_t
h_activation(cInternal, hInternal);
multiply(gateO, hInternal, hInternal);
//save results in output blobs
hInternal.copyTo(hOutTs.rowRange(curRowRange));
if (produceCellOutput)
cInternal.copyTo(cOutTs.rowRange(curRowRange));
}
}
}
};
Ptr<LSTMLayer> LSTMLayer::create(const LayerParams& params)
{
return Ptr<LSTMLayer>(new LSTMLayerImpl(params));
}
int LSTMLayer::inputNameToIndex(String inputName)
{
if (toLowerCase(inputName) == "x")
return 0;
return -1;
}
int LSTMLayer::outputNameToIndex(const String& outputName)
{
if (toLowerCase(outputName) == "h")
return 0;
else if (toLowerCase(outputName) == "c")
return 1;
return -1;
}
class RNNLayerImpl : public RNNLayer
{
int numX, numH, numO;
int numSamples, numTimestamps, numSamplesTotal;
int dtype;
Mat Whh, Wxh, bh;
Mat Who, bo;
bool produceH;
public:
RNNLayerImpl(const LayerParams& params)
: numX(0), numH(0), numO(0), numSamples(0), numTimestamps(0), numSamplesTotal(0), dtype(0)
{
setParamsFrom(params);
type = "RNN";
produceH = false;
}
void setProduceHiddenOutput(bool produce = false) CV_OVERRIDE
{
produceH = produce;
}
void setWeights(const Mat &W_xh, const Mat &b_h, const Mat &W_hh, const Mat &W_ho, const Mat &b_o) CV_OVERRIDE
{
CV_Assert(W_hh.dims == 2 && W_xh.dims == 2);
CV_Assert(W_hh.size[0] == W_xh.size[0] && W_hh.size[0] == W_hh.size[1] && (int)b_h.total() == W_xh.size[0]);
CV_Assert(W_ho.size[0] == (int)b_o.total());
CV_Assert(W_ho.size[1] == W_hh.size[1]);
blobs.resize(5);
blobs[0] = Mat(W_xh.clone());
blobs[1] = Mat(b_h.clone());
blobs[2] = Mat(W_hh.clone());
blobs[3] = Mat(W_ho.clone());
blobs[4] = Mat(b_o.clone());
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() >= 1 && inputs.size() <= 2);
Mat Who_ = blobs[3];
Mat Wxh_ = blobs[0];
int numTimestamps_ = inputs[0][0];
int numSamples_ = inputs[0][1];
int numO_ = Who_.rows;
int numH_ = Wxh_.rows;
outputs.clear();
int dims[] = {numTimestamps_, numSamples_, numO_};
outputs.push_back(shape(dims, 3));
dims[2] = numH_;
if (produceH)
outputs.push_back(shape(dims, 3));
internals.assign(2, shape(numSamples_, numH_));
internals.push_back(shape(numSamples_, 1));
return false;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> input, outputs;
inputs_arr.getMatVector(input);
CV_Assert(input.size() >= 1 && input.size() <= 2);
Wxh = blobs[0];
bh = blobs[1];
Whh = blobs[2];
Who = blobs[3];
bo = blobs[4];
numH = Wxh.rows;
numX = Wxh.cols;
numO = Who.rows;
const Mat& inp0 = input[0];
CV_Assert(inp0.dims >= 2);
CV_Assert(inp0.total(2) == numX);
dtype = CV_32F;
CV_Assert(inp0.type() == dtype);
numTimestamps = inp0.size[0];
numSamples = inp0.size[1];
numSamplesTotal = numTimestamps * numSamples;
bh = bh.reshape(1, 1); //is 1 x numH Mat
bo = bo.reshape(1, 1); //is 1 x numO Mat
}
void reshapeOutput(std::vector<Mat> &output)
{
output.resize(produceH ? 2 : 1);
int sz0[] = { numTimestamps, numSamples, numO };
output[0].create(3, sz0, dtype);
if (produceH)
{
int sz1[] = { numTimestamps, numSamples, numH };
output[1].create(3, sz1, dtype);
}
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> input, output, internals;
inputs_arr.getMatVector(input);
outputs_arr.getMatVector(output);
internals_arr.getMatVector(internals);
Mat xTs = input[0].reshape(1, numSamplesTotal);
Mat oTs = output[0].reshape(1, numSamplesTotal);
Mat hTs = produceH ? output[1].reshape(1, numSamplesTotal) : Mat();
Mat hCurr = internals[0];
Mat hPrev = internals[1];
Mat dummyBiasOnes = internals[2];
hPrev.setTo(0.);
dummyBiasOnes.setTo(1.);
for (int ts = 0; ts < numTimestamps; ts++)
{
Range curRowRange = Range(ts * numSamples, (ts + 1) * numSamples);
Mat xCurr = xTs.rowRange(curRowRange);
gemm(hPrev, Whh, 1, hCurr, 0, hCurr, GEMM_2_T); // W_{hh} * h_{prev}
gemm(xCurr, Wxh, 1, hCurr, 1, hCurr, GEMM_2_T); //+W_{xh} * x_{curr}
gemm(dummyBiasOnes, bh, 1, hCurr, 1, hCurr); //+bh
tanh(hCurr, hPrev);
Mat oCurr = oTs.rowRange(curRowRange);
gemm(hPrev, Who, 1, oCurr, 0, oCurr, GEMM_2_T); // W_{ho} * h_{prev}
gemm(dummyBiasOnes, bo, 1, oCurr, 1, oCurr); //+b_o
tanh(oCurr, oCurr);
if (produceH)
hPrev.copyTo(hTs.rowRange(curRowRange));
}
}
};
CV_EXPORTS_W Ptr<RNNLayer> RNNLayer::create(const LayerParams& params)
{
return Ptr<RNNLayer>(new RNNLayerImpl(params));
}
class GRULayerImpl CV_FINAL : public GRULayer
{
int numTimeStamps, numSamples;
bool allocated;
MatShape outTailShape; //shape of single output sample
MatShape outTsShape; //shape of N output samples
bool bidirectional; // If true, produces both forward and reversed directions along time axis
public:
GRULayerImpl(const LayerParams& params) : numTimeStamps(0), numSamples(0)
{
setParamsFrom(params);
bidirectional = params.get<bool>("bidirectional", false);
if (!blobs.empty())
{
CV_Assert(blobs.size() >= 3);
blobs[2] = blobs[2].reshape(1, 1);
const Mat& Wh = blobs[0];
const Mat& Wx = blobs[1];
const Mat& bias = blobs[2];
const Mat& hInternal = blobs[3];
CV_CheckEQ(Wh.dims, 2, "");
CV_CheckEQ(Wx.dims, 2, "");
CV_CheckEQ(Wh.rows, Wx.rows, "");
CV_CheckEQ(Wh.rows, (1 + static_cast<int>(bidirectional)) * 3 * Wh.cols, "");
CV_CheckEQ(Wh.rows * 2, (int)bias.total(), "");
CV_CheckEQ(hInternal.cols, Wh.cols, "");
CV_CheckTypeEQ(Wh.type(), Wx.type(), "");
CV_CheckTypeEQ(Wx.type(), bias.type(), "");
}
allocated = false;
outTailShape.clear();
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() == 1);
const MatShape& inp0 = inputs[0];
const Mat &Wh = blobs[0], &Wx = blobs[1];
int _numOut = Wh.size[1];
int _numInp = Wx.size[1];
MatShape outTailShape_(outTailShape), outResShape;
if (!outTailShape_.empty())
CV_Assert(total(outTailShape_) == _numOut);
else
outTailShape_.assign(1, _numOut);
int _numSamples;
CV_Assert(inp0.size() >= 2 && total(inp0, 2) == _numInp);
_numSamples = inp0[1];
outResShape.push_back(inp0[0]);
outResShape.push_back(_numSamples);
outResShape.insert(outResShape.end(), outTailShape_.begin(), outTailShape_.end());
outResShape.back() *= (1 + static_cast<int>(bidirectional));
outputs.assign(1, outResShape);
internals.assign(1, shape(_numSamples, _numOut)); // hInternal
internals.push_back(shape(_numSamples, 1)); // dummyOnes
internals.push_back(shape(_numSamples, 2 * _numOut)); // gates
internals.push_back(shape(_numSamples, 2 * _numOut)); // gates_b
internals.push_back(shape(_numSamples, 1 * _numOut)); // h_linear
internals.push_back(shape(_numSamples, _numOut)); // ones
return false;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> input;
inputs_arr.getMatVector(input);
CV_Assert(input.size() == 1);
const Mat& inp0 = input[0];
Mat &Wh = blobs[0], &Wx = blobs[1];
int numOut = Wh.size[1];
int numInp = Wx.size[1];
if (!outTailShape.empty())
CV_Assert(total(outTailShape) == numOut);
else
outTailShape.assign(1, numOut);
CV_Assert(inp0.dims >= 2 && (int)inp0.total(2) == numInp);
numTimeStamps = inp0.size[0];
numSamples = inp0.size[1];
outTsShape.clear();
outTsShape.push_back(numSamples);
outTsShape.insert(outTsShape.end(), outTailShape.begin(), outTailShape.end());
outTsShape.back() *= (1 + static_cast<int>(bidirectional));
allocated = true;
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> input, output, internals;
inputs_arr.getMatVector(input);
outputs_arr.getMatVector(output);
internals_arr.getMatVector(internals);
const int numDirs = 1 + static_cast<int>(bidirectional);
for (int i = 0; i < numDirs; ++i)
{
const Mat &Wh = blobs[0].rowRange(i * blobs[0].rows / numDirs, (i + 1) * blobs[0].rows / numDirs);
const Mat &Wx = blobs[1].rowRange(i * blobs[1].rows / numDirs, (i + 1) * blobs[1].rows / numDirs);
const Mat &bias = blobs[2].colRange(i * blobs[2].cols / numDirs, (i + 1) * blobs[2].cols / numDirs);
const Mat &h_0 = blobs[3].rowRange(i * blobs[3].rows / numDirs, (i + 1) * blobs[3].rows / numDirs);
const Mat &bx = bias.colRange(0, bias.cols / 2);
const Mat &bh = bias.colRange(bias.cols / 2, bias.cols);
Mat hInternal = internals[0], dummyOnes = internals[1], gates = internals[2],
b_rz = internals[3], n_t = internals[4], ones = internals[5];
h_0.copyTo(hInternal);
dummyOnes.setTo(1.);
ones.setTo(1.);
int numOut = Wh.size[1];
const Mat& wx_rz = Wx.rowRange(0, 2 * numOut);
const Mat& wh_rz = Wh.rowRange(0, 2 * numOut);
b_rz = bx.colRange(0, 2 * numOut) + bh.colRange(0, 2 * numOut);
const Mat& wx_n = Wx.rowRange(2 * numOut, 3 * numOut);
const Mat& wh_n = Wh.rowRange(2 * numOut, 3 * numOut);
const Mat& b_in = bx.colRange(2 * numOut, 3 * numOut);
const Mat& b_hn = bh.colRange(2 * numOut, 3 * numOut);
int numSamplesTotal = numTimeStamps * numSamples;
Mat xTs = input[0].reshape(1, numSamplesTotal);
Mat hOutTs = output[0].reshape(1, numSamplesTotal);
hOutTs = hOutTs.colRange(i * hOutTs.cols / numDirs, (i + 1) * hOutTs.cols / numDirs);
Mat cOutTs = Mat();
int tsStart, tsEnd, tsInc;
if (i == 1) {
tsStart = numTimeStamps - 1;
tsEnd = -1;
tsInc = -1;
}
else {
tsStart = 0;
tsEnd = numTimeStamps;
tsInc = 1;
}
for (int ts = tsStart; ts != tsEnd; ts += tsInc)
{
Range curRowRange(ts * numSamples, (ts + 1) * numSamples);
Mat xCurr = xTs.rowRange(curRowRange);
// calculate r_t = sigmoid(x * Wx_r + h_(t-1) * Wh_r + b_r)
// calculate z_t = sigmoid(x * Wx_z + h_(t-1) * Wh_z + b_z)
gemm(xCurr, wx_rz, 1, gates, 0, gates, GEMM_2_T); // x * Wx_rz
gemm(hInternal, wh_rz, 1, gates, 1, gates, GEMM_2_T); // + h_(t-1) * Wh_rz
gemm(dummyOnes, b_rz, 1, gates, 1, gates); // + b_rz
sigmoid(gates, gates); // sigmoid()
Mat z = gates.colRange(0, gates.cols / 2);
Mat r = gates.colRange(gates.cols / 2, gates.cols);
// calculate n_t = tanh(r (*) (h_(t-1) * Wh_n + b_hn) + x * Wx_n + b_in)
gemm(hInternal, wh_n, 1, n_t, 0, n_t, GEMM_2_T); // h_(t-1) * Wh_n
gemm(dummyOnes, b_hn, 1, n_t, 1, n_t); // + b_hn
multiply(r, n_t, n_t); // r (*) (h_(t-1) * Wh_n + b_hn)
gemm(xCurr, wx_n, 1, n_t, 1, n_t, GEMM_2_T); // + x * Wx_n
gemm(dummyOnes, b_in, 1, n_t, 1, n_t); // + b_in
tanh(n_t, n_t); // tanh()
//compute next h_t = z (*) h_(t-1) + (1 - z) (*) n_t
multiply(z, hInternal, hInternal); // z (*) h_{t-1}
subtract(ones, z, z); // 1 - z
multiply(z, n_t, z); // (1 - z) * n
add(z, hInternal, hInternal); // z (*) h_(t-1) + (1 - z) (*) n_t
//save results in output blobs
hInternal.copyTo(hOutTs.rowRange(curRowRange));
}
}
}
};
Ptr<GRULayer> GRULayer::create(const LayerParams &params) {
return Ptr<GRULayer>(new GRULayerImpl(params));
}
}
}

View File

@ -0,0 +1,660 @@
/*M ///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "../op_cuda.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#include <opencv2/dnn/all_layers.hpp>
#include "../nms.inl.hpp"
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
#endif
#ifdef HAVE_DNN_NGRAPH
#include "../ie_ngraph.hpp"
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/region.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class RegionLayerImpl CV_FINAL : public RegionLayer
{
public:
int coords, classes, anchors, classfix;
float thresh, scale_x_y;
int new_coords;
bool useSoftmax, useLogistic;
#ifdef HAVE_OPENCL
UMat blob_umat;
#endif
RegionLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
CV_Assert(blobs.size() == 1);
thresh = params.get<float>("thresh", 0.2);
coords = params.get<int>("coords", 4);
classes = params.get<int>("classes", 0);
anchors = params.get<int>("anchors", 5);
classfix = params.get<int>("classfix", 0);
useSoftmax = params.get<bool>("softmax", false);
useLogistic = params.get<bool>("logistic", false);
nmsThreshold = params.get<float>("nms_threshold", 0.4);
scale_x_y = params.get<float>("scale_x_y", 1.0); // Yolov4
new_coords = params.get<int>("new_coords", 0); // Yolov4x-mish
CV_Assert(nmsThreshold >= 0.);
CV_Assert(coords == 4);
CV_Assert(classes >= 1);
CV_Assert(anchors >= 1);
CV_Assert(useLogistic || useSoftmax);
if (params.get<bool>("softmax_tree", false))
CV_Error(cv::Error::StsNotImplemented, "Yolo9000 is not implemented");
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() > 0);
// channels == cell_size*anchors
CV_Assert(inputs[0][3] == (1 + coords + classes)*anchors);
int batch_size = inputs[0][0];
if(batch_size > 1)
outputs = std::vector<MatShape>(1, shape(batch_size, inputs[0][1] * inputs[0][2] * anchors, inputs[0][3] / anchors));
else
outputs = std::vector<MatShape>(1, shape(inputs[0][1] * inputs[0][2] * anchors, inputs[0][3] / anchors));
return false;
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
#ifdef HAVE_DNN_NGRAPH
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
return INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2020_2) && preferableTarget != DNN_TARGET_MYRIAD && new_coords == 0;
#endif
#ifdef HAVE_CUDA
if (backendId == DNN_BACKEND_CUDA)
return true;
#endif
return backendId == DNN_BACKEND_OPENCV;
}
float logistic_activate(float x) { return 1.F / (1.F + exp(-x)); }
void softmax_activate(const float* input, const int n, const float temp, float* output)
{
int i;
float sum = 0;
float largest = -FLT_MAX;
for (i = 0; i < n; ++i) {
if (input[i] > largest) largest = input[i];
}
for (i = 0; i < n; ++i) {
float e = exp((input[i] - largest) / temp);
sum += e;
output[i] = e;
}
for (i = 0; i < n; ++i) {
output[i] /= sum;
}
}
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
if (blob_umat.empty())
blobs[0].copyTo(blob_umat);
std::vector<UMat> inputs;
std::vector<UMat> outputs;
// TODO: implement a logistic activation to classification scores.
if (useLogistic || inps.depth() == CV_16S)
return false;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
CV_Assert(inputs.size() >= 1);
int const cell_size = classes + coords + 1;
for (size_t ii = 0; ii < outputs.size(); ii++)
{
UMat& inpBlob = inputs[ii];
UMat& outBlob = outputs[ii];
int batch_size = inpBlob.size[0];
int rows = inpBlob.size[1];
int cols = inpBlob.size[2];
// channels == cell_size*anchors, see l. 94
int sample_size = cell_size*rows*cols*anchors;
ocl::Kernel logistic_kernel("logistic_activ", ocl::dnn::region_oclsrc);
size_t nanchors = rows*cols*anchors*batch_size;
logistic_kernel.set(0, (int)nanchors);
logistic_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob));
logistic_kernel.set(2, (int)cell_size);
logistic_kernel.set(3, ocl::KernelArg::PtrWriteOnly(outBlob));
logistic_kernel.run(1, &nanchors, NULL, false);
if (useSoftmax)
{
// Yolo v2
// softmax activation for Probability, for each grid cell (X x Y x Anchor-index)
ocl::Kernel softmax_kernel("softmax_activ", ocl::dnn::region_oclsrc);
size_t nanchors = rows*cols*anchors*batch_size;
softmax_kernel.set(0, (int)nanchors);
softmax_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob));
softmax_kernel.set(2, ocl::KernelArg::PtrReadOnly(blob_umat));
softmax_kernel.set(3, (int)cell_size);
softmax_kernel.set(4, (int)classes);
softmax_kernel.set(5, (int)classfix);
softmax_kernel.set(6, (int)rows);
softmax_kernel.set(7, (int)cols);
softmax_kernel.set(8, (int)anchors);
softmax_kernel.set(9, (float)thresh);
softmax_kernel.set(10, ocl::KernelArg::PtrWriteOnly(outBlob));
if (!softmax_kernel.run(1, &nanchors, NULL, false))
return false;
}
if (nmsThreshold > 0) {
Mat mat = outBlob.getMat(ACCESS_WRITE);
float *dstData = mat.ptr<float>();
for (int b = 0; b < batch_size; ++b)
do_nms_sort(dstData + b*sample_size, rows*cols*anchors, thresh, nmsThreshold);
}
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs, internals;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
internals_arr.getMatVector(internals);
CV_Assert(inputs.size() >= 1);
CV_Assert(outputs.size() == 1);
int const cell_size = classes + coords + 1;
const float* biasData = blobs[0].ptr<float>();
for (size_t ii = 0; ii < outputs.size(); ii++)
{
Mat &inpBlob = inputs[ii];
Mat &outBlob = outputs[ii];
int batch_size = inpBlob.size[0];
int rows = inpBlob.size[1];
int cols = inpBlob.size[2];
// address length for one image in batch, both for input and output
int sample_size = cell_size*rows*cols*anchors;
// assert that the comment above is true
CV_Assert(sample_size*batch_size == inpBlob.total());
CV_Assert(sample_size*batch_size == outBlob.total());
CV_Assert(inputs.size() < 2 || inputs[1].dims == 4);
int hNorm = inputs.size() > 1 ? inputs[1].size[2] : rows;
int wNorm = inputs.size() > 1 ? inputs[1].size[3] : cols;
const float *srcData = inpBlob.ptr<float>();
float *dstData = outBlob.ptr<float>();
if (new_coords == 0) {
// logistic activation for t0, for each grid cell (X x Y x Anchor-index)
for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {
int index = cell_size*i;
float x = srcData[index + 4];
dstData[index + 4] = logistic_activate(x); // logistic activation
}
if (useSoftmax) { // Yolo v2
for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {
int index = cell_size*i;
softmax_activate(srcData + index + 5, classes, 1, dstData + index + 5);
}
}
else if (useLogistic) { // Yolo v3
for (int i = 0; i < batch_size*rows*cols*anchors; ++i){
int index = cell_size*i;
const float* input = srcData + index + 5;
float* output = dstData + index + 5;
for (int c = 0; c < classes; ++c)
output[c] = logistic_activate(input[c]);
}
}
}
for (int b = 0; b < batch_size; ++b)
for (int x = 0; x < cols; ++x)
for(int y = 0; y < rows; ++y)
for (int a = 0; a < anchors; ++a) {
// relative start address for image b within the batch data
int index_sample_offset = sample_size*b;
int index = (y*cols + x)*anchors + a; // index for each grid-cell & anchor
int p_index = index_sample_offset + index * cell_size + 4;
float scale = dstData[p_index];
if (classfix == -1 && scale < .5)
{
scale = 0; // if(t0 < 0.5) t0 = 0;
}
int box_index = index_sample_offset + index * cell_size;
if (new_coords == 1) {
float x_tmp = (srcData[box_index + 0] - 0.5f) * scale_x_y + 0.5f;
float y_tmp = (srcData[box_index + 1] - 0.5f) * scale_x_y + 0.5f;
dstData[box_index + 0] = (x + x_tmp) / cols;
dstData[box_index + 1] = (y + y_tmp) / rows;
dstData[box_index + 2] = (srcData[box_index + 2]) * (srcData[box_index + 2]) * 4 * biasData[2 * a] / wNorm;
dstData[box_index + 3] = (srcData[box_index + 3]) * (srcData[box_index + 3]) * 4 * biasData[2 * a + 1] / hNorm;
scale = srcData[p_index];
if (classfix == -1 && scale < thresh)
{
scale = 0; // if(t0 < 0.5) t0 = 0;
}
int class_index = index_sample_offset + index * cell_size + 5;
for (int j = 0; j < classes; ++j) {
float prob = scale*srcData[class_index + j]; // prob = IoU(box, object) = t0 * class-probability
dstData[class_index + j] = (prob > thresh) ? prob : 0; // if (IoU < threshold) IoU = 0;
}
}
else
{
float x_tmp = (logistic_activate(srcData[box_index + 0]) - 0.5f) * scale_x_y + 0.5f;
float y_tmp = (logistic_activate(srcData[box_index + 1]) - 0.5f) * scale_x_y + 0.5f;
dstData[box_index + 0] = (x + x_tmp) / cols;
dstData[box_index + 1] = (y + y_tmp) / rows;
dstData[box_index + 2] = exp(srcData[box_index + 2]) * biasData[2 * a] / wNorm;
dstData[box_index + 3] = exp(srcData[box_index + 3]) * biasData[2 * a + 1] / hNorm;
int class_index = index_sample_offset + index * cell_size + 5;
for (int j = 0; j < classes; ++j) {
float prob = scale*dstData[class_index + j]; // prob = IoU(box, object) = t0 * class-probability
dstData[class_index + j] = (prob > thresh) ? prob : 0; // if (IoU < threshold) IoU = 0;
}
}
}
if (nmsThreshold > 0) {
for (int b = 0; b < batch_size; ++b){
do_nms_sort(dstData+b*sample_size, rows*cols*anchors, thresh, nmsThreshold);
}
}
}
}
void do_nms_sort(float *detections, int total, float score_thresh, float nms_thresh)
{
std::vector<Rect2d> boxes(total);
std::vector<float> scores(total);
for (int i = 0; i < total; ++i)
{
Rect2d &b = boxes[i];
int box_index = i * (classes + coords + 1);
b.width = detections[box_index + 2];
b.height = detections[box_index + 3];
b.x = detections[box_index + 0] - b.width / 2;
b.y = detections[box_index + 1] - b.height / 2;
}
std::vector<int> indices;
for (int k = 0; k < classes; ++k)
{
for (int i = 0; i < total; ++i)
{
int box_index = i * (classes + coords + 1);
int class_index = box_index + 5;
scores[i] = detections[class_index + k];
detections[class_index + k] = 0;
}
NMSBoxes(boxes, scores, score_thresh, nms_thresh, indices);
for (int i = 0, n = indices.size(); i < n; ++i)
{
int box_index = indices[i] * (classes + coords + 1);
int class_index = box_index + 5;
detections[class_index + k] = scores[indices[i]];
}
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
if (coords != 4)
CV_Error(Error::StsNotImplemented, "Only upright rectangular boxes are supported in RegionLayer.");
std::size_t height_norm, width_norm;
if (inputs.size() == 1)
{
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
auto input_shape = input_wrapper->getShape();
height_norm = input_shape[1];
width_norm = input_shape[2];
}
else
{
auto input_wrapper = inputs[1].dynamicCast<CUDABackendWrapper>();
auto input_shape = input_wrapper->getShape();
CV_Assert(input_shape.size() == 4);
height_norm = input_shape[2];
width_norm = input_shape[3];
}
cuda4dnn::SquashMethod squash_method;
if(useLogistic)
squash_method = cuda4dnn::SquashMethod::SIGMOID;
else if (useSoftmax)
squash_method = cuda4dnn::SquashMethod::SOFTMAX;
/* exactly one must be true */
CV_Assert((useLogistic || useSoftmax) && !(useLogistic && useSoftmax));
cuda4dnn::RegionConfiguration<float> config;
config.squash_method = squash_method;
config.classes = classes;
config.boxes_per_cell = anchors;
config.height_norm = height_norm;
config.width_norm = width_norm;
config.scale_x_y = scale_x_y;
config.object_prob_cutoff = (classfix == -1) ? thresh : 0.f;
config.class_prob_cutoff = thresh;
config.nms_iou_threshold = nmsThreshold;
config.new_coords = (new_coords == 1);
return make_cuda_node<cuda4dnn::RegionOp>(preferableTarget, std::move(context->stream), blobs[0], config);
}
#endif
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
CV_UNUSED(outputs); // suppress unused variable warning
int64 flops = 0;
for(int i = 0; i < inputs.size(); i++)
{
flops += 60*total(inputs[i]);
}
return flops;
}
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto& input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
auto parent_shape = input->get_shape();
int64_t b = parent_shape[0];
int64_t h = parent_shape[1];
int64_t w = parent_shape[2];
int64_t c = parent_shape[3];
int64_t cols = b * h * w * anchors;
int64_t rows = c / anchors;
auto shape_node = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{cols, rows});
auto tr_axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{1, 0});
std::shared_ptr<ngraph::Node> input2d;
{
input2d = std::make_shared<ngraph::op::v1::Reshape>(input, shape_node, true);
input2d = std::make_shared<ngraph::op::Transpose>(input2d, tr_axes);
}
std::shared_ptr<ngraph::Node> region;
{
auto new_axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{4}, std::vector<int64_t>{0, 3, 1, 2});
auto tr_input = std::make_shared<ngraph::op::Transpose>(input, new_axes);
std::vector<float> anchors_vec(blobs[0].ptr<float>(), blobs[0].ptr<float>() + blobs[0].total());
std::vector<int64_t> mask(anchors, 1);
region = std::make_shared<ngraph::op::RegionYolo>(tr_input, coords, classes, anchors, useSoftmax, mask, 1, 3, anchors_vec);
auto tr_shape = tr_input->get_shape();
auto shape_as_inp = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
ngraph::Shape{tr_shape.size()},
std::vector<int64_t>(tr_shape.begin(), tr_shape.end()));
region = std::make_shared<ngraph::op::v1::Reshape>(region, shape_as_inp, true);
new_axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{4}, std::vector<int64_t>{0, 2, 3, 1});
region = std::make_shared<ngraph::op::Transpose>(region, new_axes);
region = std::make_shared<ngraph::op::v1::Reshape>(region, shape_node, true);
region = std::make_shared<ngraph::op::Transpose>(region, tr_axes);
}
auto strides = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{1, 1});
std::vector<int64_t> boxes_shape{b, anchors, h, w};
auto shape_3d = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{boxes_shape.size()}, boxes_shape.data());
ngraph::Shape box_broad_shape{1, (size_t)anchors, (size_t)h, (size_t)w};
auto scale_x_y_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &scale_x_y);
auto shift_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, std::vector<float>{0.5});
auto axis = ngraph::op::Constant::create<int64_t>(ngraph::element::i64, ngraph::Shape{}, {0});
auto splits = ngraph::op::Constant::create<int64_t>(ngraph::element::i64, ngraph::Shape{5}, {1, 1, 1, 1, rows - 4});
auto split = std::make_shared<ngraph::op::v1::VariadicSplit>(input2d, axis, splits);
std::shared_ptr<ngraph::Node> box_x;
{
box_x = std::make_shared<ngraph::op::Sigmoid>(split->output(0));
box_x = std::make_shared<ngraph::op::v1::Subtract>(box_x, shift_node, ngraph::op::AutoBroadcastType::NUMPY);
box_x = std::make_shared<ngraph::op::v1::Multiply>(box_x, scale_x_y_node, ngraph::op::AutoBroadcastType::NUMPY);
box_x = std::make_shared<ngraph::op::v1::Add>(box_x, shift_node, ngraph::op::AutoBroadcastType::NUMPY);
box_x = std::make_shared<ngraph::op::v1::Reshape>(box_x, shape_3d, true);
std::vector<float> x_indices(w * h * anchors);
auto begin = x_indices.begin();
for (int i = 0; i < h; i++)
{
std::fill(begin + i * anchors, begin + (i + 1) * anchors, i);
}
for (int j = 1; j < w; j++)
{
std::copy(begin, begin + h * anchors, begin + j * h * anchors);
}
auto horiz = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, box_broad_shape, x_indices.data());
box_x = std::make_shared<ngraph::op::v1::Add>(box_x, horiz, ngraph::op::AutoBroadcastType::NUMPY);
auto cols_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, std::vector<float>{float(w)});
box_x = std::make_shared<ngraph::op::v1::Divide>(box_x, cols_node, ngraph::op::AutoBroadcastType::NUMPY);
}
std::shared_ptr<ngraph::Node> box_y;
{
box_y = std::make_shared<ngraph::op::Sigmoid>(split->output(1));
box_y = std::make_shared<ngraph::op::v1::Subtract>(box_y, shift_node, ngraph::op::AutoBroadcastType::NUMPY);
box_y = std::make_shared<ngraph::op::v1::Multiply>(box_y, scale_x_y_node, ngraph::op::AutoBroadcastType::NUMPY);
box_y = std::make_shared<ngraph::op::v1::Add>(box_y, shift_node, ngraph::op::AutoBroadcastType::NUMPY);
box_y = std::make_shared<ngraph::op::v1::Reshape>(box_y, shape_3d, true);
std::vector<float> y_indices(h * anchors);
for (int i = 0; i < h; i++)
{
std::fill(y_indices.begin() + i * anchors, y_indices.begin() + (i + 1) * anchors, i);
}
auto vert = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1, (size_t)anchors, (size_t)h, 1}, y_indices.data());
box_y = std::make_shared<ngraph::op::v1::Add>(box_y, vert, ngraph::op::AutoBroadcastType::NUMPY);
auto rows_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, std::vector<float>{float(h)});
box_y = std::make_shared<ngraph::op::v1::Divide>(box_y, rows_node, ngraph::op::AutoBroadcastType::NUMPY);
}
std::shared_ptr<ngraph::Node> box_w, box_h;
{
int hNorm, wNorm;
if (nodes.size() > 1)
{
auto node_1_shape = nodes[1].dynamicCast<InfEngineNgraphNode>()->node->get_shape();
hNorm = node_1_shape[2];
wNorm = node_1_shape[3];
}
else
{
hNorm = h;
wNorm = w;
}
std::vector<float> anchors_w(anchors), anchors_h(anchors);
for (size_t a = 0; a < anchors; ++a)
{
anchors_w[a] = blobs[0].at<float>(0, 2 * a) / wNorm;
anchors_h[a] = blobs[0].at<float>(0, 2 * a + 1) / hNorm;
}
std::vector<float> bias_w(w * h * anchors), bias_h(w * h * anchors);
for (int j = 0; j < h; j++)
{
std::copy(anchors_w.begin(), anchors_w.end(), bias_w.begin() + j * anchors);
std::copy(anchors_h.begin(), anchors_h.end(), bias_h.begin() + j * anchors);
}
for (int i = 1; i < w; i++)
{
std::copy(bias_w.begin(), bias_w.begin() + h * anchors, bias_w.begin() + i * h * anchors);
std::copy(bias_h.begin(), bias_h.begin() + h * anchors, bias_h.begin() + i * h * anchors);
}
box_w = std::make_shared<ngraph::op::v0::Exp>(split->output(2));
box_w = std::make_shared<ngraph::op::v1::Reshape>(box_w, shape_3d, true);
auto anchor_w_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, box_broad_shape, bias_w.data());
box_w = std::make_shared<ngraph::op::v1::Multiply>(box_w, anchor_w_node, ngraph::op::AutoBroadcastType::NUMPY);
box_h = std::make_shared<ngraph::op::v0::Exp>(split->output(3));
box_h = std::make_shared<ngraph::op::v1::Reshape>(box_h, shape_3d, true);
auto anchor_h_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, box_broad_shape, bias_h.data());
box_h = std::make_shared<ngraph::op::v1::Multiply>(box_h, anchor_h_node, ngraph::op::AutoBroadcastType::NUMPY);
}
auto region_splits = ngraph::op::Constant::create<int64_t>(ngraph::element::i64, ngraph::Shape{3}, {4, 1, rows - 5});
auto region_split = std::make_shared<ngraph::op::v1::VariadicSplit>(region, axis, region_splits);
std::shared_ptr<ngraph::Node> scale;
{
float thr = classfix == -1 ? 0.5 : 0;
auto thresh_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, std::vector<float>{thr});
auto mask = std::make_shared<ngraph::op::v1::Less>(region_split->output(1), thresh_node);
auto zero_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, mask->get_shape(), std::vector<float>(cols, 0));
scale = std::make_shared<ngraph::op::v1::Select>(mask, zero_node, region_split->output(1));
}
std::shared_ptr<ngraph::Node> probs;
{
probs = std::make_shared<ngraph::op::v1::Multiply>(region_split->output(2), scale, ngraph::op::AutoBroadcastType::NUMPY);
auto thresh_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &thresh);
auto mask = std::make_shared<ngraph::op::v1::Greater>(probs, thresh_node);
auto zero_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, mask->get_shape(), std::vector<float>((rows - 5) * cols, 0));
probs = std::make_shared<ngraph::op::v1::Select>(mask, probs, zero_node);
}
auto concat_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{1, cols});
box_x = std::make_shared<ngraph::op::v1::Reshape>(box_x, concat_shape, true);
box_y = std::make_shared<ngraph::op::v1::Reshape>(box_y, concat_shape, true);
box_w = std::make_shared<ngraph::op::v1::Reshape>(box_w, concat_shape, true);
box_h = std::make_shared<ngraph::op::v1::Reshape>(box_h, concat_shape, true);
ngraph::NodeVector inp_nodes{box_x, box_y, box_w, box_h, scale, probs};
std::shared_ptr<ngraph::Node> result = std::make_shared<ngraph::op::Concat>(inp_nodes, 0);
result = std::make_shared<ngraph::op::Transpose>(result, tr_axes);
if (b > 1)
{
std::vector<int64_t> sizes{b, static_cast<int64_t>(result->get_shape()[0]) / b, static_cast<int64_t>(result->get_shape()[1])};
auto shape_node = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{sizes.size()}, sizes.data());
result = std::make_shared<ngraph::op::v1::Reshape>(result, shape_node, true);
}
return Ptr<BackendNode>(new InfEngineNgraphNode(result));
}
#endif // HAVE_DNN_NGRAPH
};
Ptr<RegionLayer> RegionLayer::create(const LayerParams& params)
{
return Ptr<RegionLayer>(new RegionLayerImpl(params));
}
} // namespace dnn
} // namespace cv

View File

@ -0,0 +1,264 @@
/*M ///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#include <opencv2/dnn/all_layers.hpp>
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
#endif
#include "../op_inf_engine.hpp"
#ifdef HAVE_DNN_NGRAPH
#include "../ie_ngraph.hpp"
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
#include <ngraph/op/reorg_yolo.hpp>
#else
#include <ngraph/op/experimental/layers/reorg_yolo.hpp>
#endif
#endif
#include "../op_cuda.hpp"
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/reorg.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class ReorgLayerImpl CV_FINAL : public ReorgLayer
{
int reorgStride;
public:
ReorgLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
reorgStride = params.get<int>("reorg_stride", 2);
CV_Assert(reorgStride > 0);
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() > 0);
outputs = std::vector<MatShape>(inputs.size(), shape(
inputs[0][0],
inputs[0][1] * reorgStride * reorgStride,
inputs[0][2] / reorgStride,
inputs[0][3] / reorgStride));
CV_Assert(outputs[0][0] > 0 && outputs[0][1] > 0 && outputs[0][2] > 0 && outputs[0][3] > 0);
CV_Assert(total(outputs[0]) == total(inputs[0]));
return false;
}
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
{
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
Mat inp = inputs[0];
Mat out = outputs[0];
int batchSize = inp.size[0];
LayerParams permParams;
if (batchSize == 1)
{
int order[] = {1, 3, 0, 2};
permParams.set("order", DictValue::arrayInt(&order[0], 4));
permuteInpShape.resize(4);
permuteInpShape[0] = inp.size[1] * inp.size[2] / (reorgStride * reorgStride); // (channels*height)/(r*r)
permuteInpShape[1] = reorgStride;
permuteInpShape[2] = inp.size[3]; // width
permuteInpShape[3] = reorgStride;
permuteOutShape.resize(4);
for (int i = 0; i < 4; ++i)
permuteOutShape[i] = permuteInpShape[order[i]];
}
else
{
int order[] = {0, 2, 4, 1, 3};
permParams.set("order", DictValue::arrayInt(&order[0], 5));
permuteInpShape.resize(5);
permuteInpShape[0] = batchSize;
permuteInpShape[1] = inp.size[1] * inp.size[2] / (reorgStride * reorgStride); // (channels*height)/(r*r)
permuteInpShape[2] = reorgStride;
permuteInpShape[3] = inp.size[3]; // width
permuteInpShape[4] = reorgStride;
permuteOutShape.resize(5);
for (int i = 0; i < 5; ++i)
permuteOutShape[i] = permuteInpShape[order[i]];
}
permute = PermuteLayer::create(permParams);
std::vector<Mat> permuteInputs(1, inp.reshape(1, permuteInpShape));
std::vector<Mat> permuteOutputs(1, out.reshape(1, permuteOutShape));
permute->finalize(permuteInputs, permuteOutputs);
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
}
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
inputs[0] = inputs[0].reshape(1, permuteInpShape.size(), &permuteInpShape[0]);
outputs[0] = outputs[0].reshape(1, permuteOutShape.size(), &permuteOutShape[0]);
permute->preferableTarget = preferableTarget;
permute->forward(inputs, outputs, internals);
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
inputs[0] = inputs[0].reshape(1, permuteInpShape);
outputs[0] = outputs[0].reshape(1, permuteOutShape);
permute->forward(inputs, outputs, internals_arr);
}
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
InferenceEngine::Builder::ReorgYoloLayer ieLayer(name);
ieLayer.setStride(reorgStride);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
auto reorg = std::make_shared<ngraph::op::ReorgYolo>(ieInpNode, ngraph::Strides{(size_t)reorgStride});
return Ptr<BackendNode>(new InfEngineNgraphNode(reorg));
}
#endif // HAVE_DNN_NGRAPH
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
return make_cuda_node<cuda4dnn::ReorgOp>(preferableTarget, std::move(context->stream), reorgStride);
}
#endif
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
{
return true;
}
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
CV_UNUSED(outputs); // suppress unused variable warning
int64 flops = 0;
for(int i = 0; i < inputs.size(); i++)
{
flops += 21*total(inputs[i]);
}
return flops;
}
private:
Ptr<PermuteLayer> permute;
std::vector<int> permuteInpShape, permuteOutShape;
};
Ptr<ReorgLayer> ReorgLayer::create(const LayerParams& params)
{
return Ptr<ReorgLayer>(new ReorgLayerImpl(params));
}
} // namespace dnn
} // namespace cv

View File

@ -0,0 +1,367 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/reshape.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
static void computeShapeByReshapeMask(const MatShape &srcShape,
const MatShape &maskShape,
Range srcRange /*= Range::all()*/,
MatShape& dstShape)
{
int srcShapeSize = (int)srcShape.size();
int maskShapeSize = (int)maskShape.size();
srcRange = normalize_axis_range(srcRange, srcShapeSize);
bool explicitMask = !maskShape.empty(); // All mask values are positive.
for (int i = 0, n = maskShape.size(); i < n && explicitMask; ++i)
{
explicitMask = maskShape[i] > 0;
}
// Working range of source shape is a range where area(src) == area(mask).
if (explicitMask)
{
int maskTotal = total(maskShape);
// Go from the end of mask until we collect required total.
bool matched = false;
for (int i = srcRange.end - 1; i >= srcRange.start; --i)
{
if (matched)
{
if (total(srcShape, i, srcRange.end) != maskTotal)
{
srcRange.start = i + 1;
break;
}
else if (i == 0)
{
srcRange.start = 0;
break;
}
}
else
{
matched = total(srcShape, i, srcRange.end) == maskTotal;
}
}
while (total(srcShape, srcRange.start, srcRange.end) != maskTotal && srcRange.start > 0)
{
srcRange.start -= 1;
}
CV_Assert(total(srcShape, srcRange.start, srcRange.end) == maskTotal);
}
CV_Assert(0 <= srcRange.start && srcRange.start <= srcRange.end && srcRange.end <= srcShapeSize);
int dstShapeSize = srcShapeSize - srcRange.size() + maskShapeSize;
dstShape.resize(dstShapeSize);
std::copy(srcShape.begin(), srcShape.begin() + srcRange.start, dstShape.begin());
std::copy(srcShape.begin() + srcRange.end, srcShape.begin() + srcShapeSize, dstShape.begin() + srcRange.start + maskShapeSize);
int inferDim = -1;
for (int i = 0; i < maskShapeSize; i++)
{
if (maskShape[i] > 0)
{
dstShape[srcRange.start + i] = maskShape[i];
}
else if (maskShape[i] == 0)
{
if (srcRange.start + i >= srcShapeSize)
CV_Error(Error::StsBadArg, format("Copy dim[%d] (which has zero size) is out of the source shape bounds", srcRange.start + i));
dstShape[srcRange.start + i] = srcShape[srcRange.start + i];
}
else if (maskShape[i] == -1)
{
if (inferDim != -1)
CV_Error(Error::StsAssert, "Duplicate of inferred dim (which is denoted by -1)");
inferDim = srcRange.start + i;
dstShape[inferDim] = 1;
}
else
CV_Error(Error::StsBadArg, "maskShape[i] >= -1");
}
size_t srcTotal = total(srcShape);
size_t dstTotal = total(dstShape);
CV_Assert(dstTotal != 0);
if (inferDim != -1)
{
if (srcTotal % dstTotal != 0)
CV_Error(Error::StsBackTrace, "Can't infer a dim denoted by -1");
dstShape[inferDim] = (int)(srcTotal / dstTotal);
}
else
{
CV_Assert(srcTotal == dstTotal);
}
}
class ReshapeLayerImpl CV_FINAL : public ReshapeLayer
{
public:
ReshapeLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
int axis = params.get<int>("axis", 0);
int numAxes = params.get<int>("num_axes", -1);
hasDynamicShapes = params.get<bool>("has_dynamic_shapes", false);
shapesInitialized = !hasDynamicShapes;
CV_Assert(numAxes >= -1);
newShapeRange = (numAxes == -1) ? Range(axis, INT_MAX) : Range(axis, axis + numAxes);
newShapeDesc.clear();
if (params.has("dim"))
{
const DictValue &paramShape = params.get("dim");
int i, dims = paramShape.size();
newShapeDesc.resize(dims);
for (i = 0; i < dims; i++)
newShapeDesc[i] = paramShape.get<int>(i);
}
if (hasDynamicShapes)
{
dynamicShapes.clear();
inputIndices.clear();
if (params.has("dynamic_axes")) {
CV_Assert(params.has("input_indices"));
const DictValue &dynamicAxes = params.get("dynamic_axes");
const DictValue &dynamicInputShapes = params.get("input_indices");
int i, dims = dynamicAxes.size();
CV_Assert(dims == dynamicInputShapes.size());
CV_Assert(dims > 0);
dynamicShapes.resize(dims);
inputIndices.resize(dims);
for (i = 0; i < dims; i++) {
dynamicShapes[i] = dynamicAxes.get<int>(i);
inputIndices[i] = dynamicInputShapes.get<int>(i);
}
}
}
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine());
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
if (inputs.size() == 1 || inputs.size() == requiredOutputs)
{
outputs.clear();
for (size_t i = 0; i < inputs.size(); i++)
{
if (hasDynamicShapes && !shapesInitialized)
{
outputs.push_back(newShapeDesc);
}
else
{
outputs.push_back(MatShape());
computeShapeByReshapeMask(inputs[i], newShapeDesc, newShapeRange, outputs.back());
}
}
}
else
{
CV_Assert_N(inputs.size() == 2, total(inputs[0]) == total(inputs[1]));
outputs.assign(1, inputs[1]);
}
return true;
}
bool updateMemoryShapes(const std::vector<MatShape> &inputs) CV_OVERRIDE
{
if (hasDynamicShapes)
{
for (int i = 0; i < dynamicShapes.size(); ++i)
{
newShapeDesc[dynamicShapes[i]] = inputs[0][inputIndices[i]];
}
}
shapesInitialized = true;
return true;
}
void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
{
std::vector<Mat> outputs;
outputs_arr.getMatVector(outputs);
CV_Assert(!outputs.empty());
outShapes.resize(outputs.size());
for (int i = 0; i < outputs.size(); ++i)
outShapes[i] = shape(outputs[i]);
}
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
for (size_t i = 0; i < outputs.size(); i++)
{
UMat srcBlob = inputs[i];
void *src_handle = inputs[i].handle(ACCESS_READ);
void *dst_handle = outputs[i].handle(ACCESS_WRITE);
if (src_handle != dst_handle)
{
UMat umat = srcBlob.reshape(1, (int)outShapes[i].size(), &outShapes[i][0]);
umat.copyTo(outputs[i]);
}
}
outs.assign(outputs);
return true;
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
for (size_t i = 0; i < outputs.size(); i++)
{
Mat srcBlob = inputs[i];
if (outputs[i].data != srcBlob.data)
srcBlob.reshape(1, shape(outputs[i])).copyTo(outputs[i]);
}
}
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
InferenceEngine::Builder::ReshapeLayer ieLayer(name);
CV_Assert(outShapes.size() == 1);
ieLayer.setDims(outShapes[0]);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
CV_Assert(outShapes.size() == 1);
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
std::vector<int64_t> out(outShapes[0].begin(), outShapes[0].end());
auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
ngraph::Shape{out.size()}, out.data());
auto reshape = std::make_shared<ngraph::op::v1::Reshape>(ieInpNode, shape, true);
return Ptr<BackendNode>(new InfEngineNgraphNode(reshape));
}
#endif // HAVE_DNN_NGRAPH
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
return make_cuda_node<cuda4dnn::ReshapeOp>(preferableTarget, std::move(context->stream));
}
#endif
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
{
return true;
}
private:
std::vector<MatShape> outShapes;
std::vector<int> dynamicShapes; // Which axes shapes are dynamic and require reinitialization with new input
std::vector<int> inputIndices; // Which axes from input are needed to compute correct output shape
bool hasDynamicShapes;
bool shapesInitialized;
};
Ptr<ReshapeLayer> ReshapeLayer::create(const LayerParams& params)
{
return Ptr<ReshapeLayer>(new ReshapeLayerImpl(params));
}
}
}

View File

@ -0,0 +1,486 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_inf_engine.hpp"
#include <opencv2/imgproc.hpp>
#ifdef HAVE_DNN_NGRAPH
#include "../ie_ngraph.hpp"
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
#include <ngraph/op/interpolate.hpp>
#else
#include <ngraph/op/experimental/layers/interpolate.hpp>
#endif
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/resize.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv { namespace dnn {
class ResizeLayerImpl : public ResizeLayer
{
public:
ResizeLayerImpl(const LayerParams& params) : zoomFactorWidth(params.get<float>("zoom_factor_x", params.get<float>("zoom_factor", 0))),
zoomFactorHeight(params.get<float>("zoom_factor_y", params.get<float>("zoom_factor", 0))),
scaleWidth(0), scaleHeight(0)
{
setParamsFrom(params);
outWidth = params.get<float>("width", 0);
outHeight = params.get<float>("height", 0);
if (params.has("zoom_factor"))
{
CV_Assert(!params.has("zoom_factor_x") && !params.has("zoom_factor_y"));
}
else if (params.has("zoom_factor_x") || params.has("zoom_factor_y"))
{
CV_Assert(params.has("zoom_factor_x") && params.has("zoom_factor_y"));
}
interpolation = params.get<String>("interpolation");
CV_Check(interpolation, interpolation == "nearest" || interpolation == "opencv_linear" || interpolation == "bilinear", "");
alignCorners = params.get<bool>("align_corners", false);
halfPixelCenters = params.get<bool>("half_pixel_centers", false);
if (interpolation == "opencv_linear")
halfPixelCenters = true;
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert_N(inputs.size() == 1 || inputs.size() == 2, inputs[0].size() == 4);
outputs.resize(1, inputs[0]);
if (inputs.size() == 1) {
outputs[0][2] = zoomFactorHeight > 0 ? (outputs[0][2] * zoomFactorHeight) : outHeight;
outputs[0][3] = zoomFactorWidth > 0 ? (outputs[0][3] * zoomFactorWidth) : outWidth;
} else {
outputs[0][2] = inputs[1][2];
outputs[0][3] = inputs[1][3];
}
// We can work in-place (do nothing) if input shape == output shape.
return (outputs[0][2] == inputs[0][2]) && (outputs[0][3] == inputs[0][3]);
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
if (backendId == DNN_BACKEND_CUDA)
return interpolation == "nearest" || interpolation == "bilinear" || interpolation == "opencv_linear";
#ifdef HAVE_INF_ENGINE
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
return (interpolation == "nearest" && scaleWidth == scaleHeight) ||
(interpolation == "bilinear");
}
#endif
return backendId == DNN_BACKEND_OPENCV;
}
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
{
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
outHeight = outputs[0].size[2];
outWidth = outputs[0].size[3];
if (alignCorners && outHeight > 1)
scaleHeight = static_cast<float>(inputs[0].size[2] - 1) / (outHeight - 1);
else
scaleHeight = static_cast<float>(inputs[0].size[2]) / outHeight;
if (alignCorners && outWidth > 1)
scaleWidth = static_cast<float>(inputs[0].size[3] - 1) / (outWidth - 1);
else
scaleWidth = static_cast<float>(inputs[0].size[3]) / outWidth;
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs, internals;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
internals_arr.getMatVector(internals);
if (outHeight == inputs[0].size[2] && outWidth == inputs[0].size[3])
{
// outputs[0] = inputs[0] doesn't work due to BlobManager optimizations
if (inputs[0].data != outputs[0].data)
{
inputs[0].copyTo(outputs[0]);
}
return;
}
Mat& inp = inputs[0];
Mat& out = outputs[0];
int depth = inp.depth();
if ((interpolation == "nearest" && !alignCorners && !halfPixelCenters) || (interpolation == "opencv_linear" && depth != CV_8S) ||
(interpolation == "bilinear" && halfPixelCenters && depth != CV_8S))
{
// INTER_LINEAR Resize mode does not support INT8 inputs
InterpolationFlags mode = interpolation == "nearest" ? INTER_NEAREST : INTER_LINEAR;
for (size_t n = 0; n < inputs[0].size[0]; ++n)
{
for (size_t ch = 0; ch < inputs[0].size[1]; ++ch)
{
resize(getPlane(inp, n, ch), getPlane(out, n, ch),
Size(outWidth, outHeight), 0, 0, mode);
}
}
}
else if (interpolation == "nearest")
{
const int inpHeight = inp.size[2];
const int inpWidth = inp.size[3];
const int inpSpatialSize = inpHeight * inpWidth;
const int outSpatialSize = outHeight * outWidth;
const int numPlanes = inp.size[0] * inp.size[1];
CV_Assert_N(inp.isContinuous(), out.isContinuous());
Mat inpPlanes = inp.reshape(1, numPlanes * inpHeight);
Mat outPlanes = out.reshape(1, numPlanes * outHeight);
float heightOffset = 0.0f;
float widthOffset = 0.0f;
if (halfPixelCenters)
{
heightOffset = 0.5f * scaleHeight;
widthOffset = 0.5f * scaleWidth;
}
if (depth == CV_8S)
{
for (int y = 0; y < outHeight; ++y)
{
float input_y = y * scaleHeight + heightOffset;
int y0 = halfPixelCenters ? std::floor(input_y) : lroundf(input_y);
y0 = std::min(y0, inpHeight - 1);
const int8_t* inpData_row = inpPlanes.ptr<int8_t>(y0);
for (int x = 0; x < outWidth; ++x)
{
float input_x = x * scaleWidth + widthOffset;
int x0 = halfPixelCenters ? std::floor(input_x) : lroundf(input_x);
x0 = std::min(x0, inpWidth - 1);
int8_t* outData = outPlanes.ptr<int8_t>(y, x);
const int8_t* inpData_row_c = inpData_row;
for (int c = 0; c < numPlanes; ++c)
{
*outData = inpData_row_c[x0];
inpData_row_c += inpSpatialSize;
outData += outSpatialSize;
}
}
}
}
else
{
for (int y = 0; y < outHeight; ++y)
{
float input_y = y * scaleHeight + heightOffset;
int y0 = halfPixelCenters ? std::floor(input_y) : lroundf(input_y);
y0 = std::min(y0, inpHeight - 1);
const float* inpData_row = inpPlanes.ptr<float>(y0);
for (int x = 0; x < outWidth; ++x)
{
float input_x = x * scaleWidth + widthOffset;
int x0 = halfPixelCenters ? std::floor(input_x) : lroundf(input_x);
x0 = std::min(x0, inpWidth - 1);
float* outData = outPlanes.ptr<float>(y, x);
const float* inpData_row_c = inpData_row;
for (int c = 0; c < numPlanes; ++c)
{
*outData = inpData_row_c[x0];
inpData_row_c += inpSpatialSize;
outData += outSpatialSize;
}
}
}
}
}
else if (interpolation == "bilinear" || interpolation == "opencv_linear")
{
const int inpHeight = inp.size[2];
const int inpWidth = inp.size[3];
const int inpSpatialSize = inpHeight * inpWidth;
const int outSpatialSize = outHeight * outWidth;
const int numPlanes = inp.size[0] * inp.size[1];
CV_Assert_N(inp.isContinuous(), out.isContinuous());
Mat inpPlanes = inp.reshape(1, numPlanes * inpHeight);
Mat outPlanes = out.reshape(1, numPlanes * outHeight);
if (depth == CV_8S)
{
for (int y = 0; y < outHeight; ++y)
{
float input_y = halfPixelCenters ? std::max((y + 0.5f) * scaleHeight - 0.5f, 0.0f) : y * scaleHeight;
int y0 = static_cast<int>(input_y);
const int8_t* inpData_row0 = inpPlanes.ptr<int8_t>(y0);
const int8_t* inpData_row1 = inpPlanes.ptr<int8_t>(std::min(y0 + 1, inpHeight - 1));
for (int x = 0; x < outWidth; ++x)
{
float input_x = halfPixelCenters ? std::max((x + 0.5f) * scaleWidth - 0.5f, 0.0f) : x * scaleWidth;
int x0 = static_cast<int>(input_x);
int x1 = std::min(x0 + 1, inpWidth - 1);
int8_t* outData = outPlanes.ptr<int8_t>(y, x);
const int8_t* inpData_row0_c = inpData_row0;
const int8_t* inpData_row1_c = inpData_row1;
for (int c = 0; c < numPlanes; ++c)
{
*outData = static_cast<int8_t>(inpData_row0_c[x0] +
(input_y - y0) * (inpData_row1_c[x0] - inpData_row0_c[x0]) +
(input_x - x0) * (inpData_row0_c[x1] - inpData_row0_c[x0] +
(input_y - y0) * (inpData_row1_c[x1] - inpData_row0_c[x1] - inpData_row1_c[x0] + inpData_row0_c[x0])));
inpData_row0_c += inpSpatialSize;
inpData_row1_c += inpSpatialSize;
outData += outSpatialSize;
}
}
}
}
else
{
for (int y = 0; y < outHeight; ++y)
{
float input_y = y * scaleHeight;
int y0 = static_cast<int>(input_y);
const float* inpData_row0 = inpPlanes.ptr<float>(y0);
const float* inpData_row1 = inpPlanes.ptr<float>(std::min(y0 + 1, inpHeight - 1));
for (int x = 0; x < outWidth; ++x)
{
float input_x = x * scaleWidth;
int x0 = static_cast<int>(input_x);
int x1 = std::min(x0 + 1, inpWidth - 1);
float* outData = outPlanes.ptr<float>(y, x);
const float* inpData_row0_c = inpData_row0;
const float* inpData_row1_c = inpData_row1;
for (int c = 0; c < numPlanes; ++c)
{
*outData = inpData_row0_c[x0] +
(input_y - y0) * (inpData_row1_c[x0] - inpData_row0_c[x0]) +
(input_x - x0) * (inpData_row0_c[x1] - inpData_row0_c[x0] +
(input_y - y0) * (inpData_row1_c[x1] - inpData_row0_c[x1] - inpData_row1_c[x0] + inpData_row0_c[x0]));
inpData_row0_c += inpSpatialSize;
inpData_row1_c += inpSpatialSize;
outData += outSpatialSize;
}
}
}
}
}
else
CV_Error(Error::StsNotImplemented, "Unknown interpolation: " + interpolation);
}
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
InferenceEngine::Builder::Layer ieLayer(name);
ieLayer.setName(name);
if (interpolation == "nearest")
{
ieLayer.setType("Resample");
ieLayer.getParameters()["type"] = std::string("caffe.ResampleParameter.NEAREST");
ieLayer.getParameters()["antialias"] = false;
if (scaleWidth != scaleHeight)
CV_Error(Error::StsNotImplemented, "resample with sw != sh");
ieLayer.getParameters()["factor"] = 1.0f / scaleWidth;
}
else if (interpolation == "bilinear")
{
ieLayer.setType("Interp");
ieLayer.getParameters()["pad_beg"] = 0;
ieLayer.getParameters()["pad_end"] = 0;
ieLayer.getParameters()["align_corners"] = alignCorners;
}
else
CV_Error(Error::StsNotImplemented, "Unsupported interpolation: " + interpolation);
ieLayer.getParameters()["width"] = outWidth;
ieLayer.getParameters()["height"] = outHeight;
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(1));
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2021_2)
ngraph::op::InterpolateAttrs attrs;
attrs.pads_begin.push_back(0);
attrs.pads_end.push_back(0);
attrs.axes = ngraph::AxisSet{2, 3};
attrs.align_corners = alignCorners;
if (interpolation == "nearest") {
attrs.mode = "nearest";
attrs.antialias = false;
} else if (interpolation == "bilinear") {
attrs.mode = "linear";
} else {
CV_Error(Error::StsNotImplemented, "Unsupported interpolation: " + interpolation);
}
std::vector<int64_t> shape = {outHeight, outWidth};
auto out_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, shape.data());
auto interp = std::make_shared<ngraph::op::Interpolate>(ieInpNode, out_shape, attrs);
#else
ngraph::op::v4::Interpolate::InterpolateAttrs attrs;
if (interpolation == "nearest") {
attrs.mode = ngraph::op::v4::Interpolate::InterpolateMode::nearest;
attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::half_pixel;
} else if (interpolation == "bilinear") {
attrs.mode = ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx;
attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::asymmetric;
} else {
CV_Error(Error::StsNotImplemented, format("Unsupported interpolation: %s", interpolation.c_str()));
}
attrs.shape_calculation_mode = ngraph::op::v4::Interpolate::ShapeCalcMode::sizes;
if (alignCorners) {
attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners;
}
attrs.nearest_mode = ngraph::op::v4::Interpolate::NearestMode::round_prefer_floor;
std::vector<int64_t> shape = {outHeight, outWidth};
auto out_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, shape.data());
auto& input_shape = ieInpNode->get_shape();
CV_Assert_N(input_shape[2] != 0, input_shape[3] != 0);
std::vector<float> scales = {static_cast<float>(outHeight) / input_shape[2], static_cast<float>(outWidth) / input_shape[3]};
auto scales_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{2}, scales.data());
auto axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{2, 3});
auto interp = std::make_shared<ngraph::op::v4::Interpolate>(ieInpNode, out_shape, scales_shape, axes, attrs);
#endif
return Ptr<BackendNode>(new InfEngineNgraphNode(interp));
}
#endif // HAVE_DNN_NGRAPH
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
cuda4dnn::ResizeConfiguration config;
if (interpolation == "nearest")
{
config.type = InterpolationType::NEAREST_NEIGHBOUR;
config.align_corners = alignCorners;
config.half_pixel_centers = halfPixelCenters;
}
else if (interpolation == "bilinear")
{
config.type = InterpolationType::BILINEAR;
config.align_corners = alignCorners;
config.half_pixel_centers = halfPixelCenters;
}
else if (interpolation == "opencv_linear")
{
config.type = InterpolationType::BILINEAR;
config.align_corners = false;
config.half_pixel_centers = true;
}
else
CV_Error(Error::StsNotImplemented, "Requested interpolation mode is not available in resize layer.");
return make_cuda_node<cuda4dnn::ResizeOp>(preferableTarget, std::move(context->stream), config);
}
#endif
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
{
return true;
}
protected:
int outWidth, outHeight;
const float zoomFactorWidth, zoomFactorHeight;
String interpolation;
float scaleWidth, scaleHeight;
bool alignCorners;
bool halfPixelCenters;
};
Ptr<ResizeLayer> ResizeLayer::create(const LayerParams& params)
{
return Ptr<ResizeLayer>(new ResizeLayerImpl(params));
}
class InterpLayerImpl CV_FINAL : public ResizeLayerImpl
{
public:
InterpLayerImpl(const LayerParams& params) : ResizeLayerImpl(params) {}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert_N(inputs.size() == 1, inputs[0].size() == 4);
outputs.resize(1, inputs[0]);
outputs[0][2] = zoomFactorHeight > 0 ? (1 + zoomFactorHeight * (outputs[0][2] - 1)) : outHeight;
outputs[0][3] = zoomFactorWidth > 0 ? (1 + zoomFactorWidth * (outputs[0][3] - 1)) : outWidth;
// We can work in-place (do nothing) if input shape == output shape.
return (outputs[0][2] == inputs[0][2]) && (outputs[0][3] == inputs[0][3]);
}
};
Ptr<Layer> InterpLayer::create(const LayerParams& params)
{
LayerParams lp(params);
lp.set("interpolation", "bilinear");
lp.set("align_corners", true);
return Ptr<Layer>(new InterpLayerImpl(lp));
}
} // namespace dnn
} // namespace cv

View File

@ -0,0 +1,507 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2016, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
/*
Implementation of Scale layer.
*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_halide.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include <opencv2/imgproc.hpp>
#include <opencv2/dnn/shape_utils.hpp>
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/scale_shift.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class ScaleLayerImpl CV_FINAL : public ScaleLayer
{
public:
ScaleLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
hasBias = params.get<bool>("bias_term", false);
axis = params.get<int>("axis", 1);
hasWeights = false;
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
outputs.assign(1, inputs[0]);
return true;
}
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> inputs;
inputs_arr.getMatVector(inputs);
hasWeights = blobs.size() == 2 || (blobs.size() <= 1 && !hasBias);
CV_Assert((inputs.size() == 2 && blobs.empty()) || blobs.size() == (int)hasWeights + (int)hasBias);
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
backendId == DNN_BACKEND_HALIDE ||
(backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && axis == 1 && !blobs.empty()) ||
(backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && axis > 0);
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
CV_Assert_N(outputs.size() == 1, !blobs.empty() || inputs.size() == 2);
Mat &inpBlob = inputs[0];
Mat &outBlob = outputs[0];
// There is a mode when we multiply a first blob by a second one
// instead of trainable weights.
Mat weights = hasWeights ? (blobs.empty() ? inputs[1] : blobs[0]).reshape(1, 1) : Mat();;
Mat bias = hasBias ? (blobs.empty() ? inputs[1] : blobs.back()).reshape(1, 1) : Mat();
MatShape inpShape = shape(inpBlob);
const int numWeights = !weights.empty() ? weights.total() : bias.total();
CV_Assert(numWeights != 0);
if (hasWeights && hasBias)
CV_CheckEQ(weights.total(), bias.total(), "Incompatible weights/bias blobs");
int endAxis;
for (endAxis = axis + 1; endAxis <= inpBlob.dims; ++endAxis)
{
if (total(inpShape, axis, endAxis) == numWeights)
break;
}
CV_Assert(total(inpShape, axis, endAxis) == numWeights);
CV_Assert(!hasBias || numWeights == bias.total());
CV_CheckTypeEQ(inpBlob.type(), CV_32FC1, ""); CV_CheckTypeEQ(outBlob.type(), CV_32FC1, "");
int numSlices = total(inpShape, 0, axis);
float* inpData = (float*)inpBlob.data;
float* outData = (float*)outBlob.data;
if (endAxis != inpBlob.dims)
{
float* weightsData = !weights.empty() ? (float*)weights.data : 0;
float* biasesData = hasBias ? (float*)bias.data : 0;
int spatialSize = total(inpShape, endAxis); // spatialSize != 1
for (int i = 0; i < numSlices; ++i)
{
for (int j = 0; j < numWeights; ++j)
{
float w = weightsData ? weightsData[j] : 1;
float b = biasesData ? biasesData[j] : 0;
Mat inpSlice(1, spatialSize, CV_32F, inpData);
Mat outSlice(1, spatialSize, CV_32F, outData);
inpSlice.convertTo(outSlice, CV_32F, w, b);
inpData += spatialSize;
outData += spatialSize;
}
}
}
else
{
for (int i = 0; i < numSlices; ++i)
{
Mat inpSlice(1, numWeights, CV_32F, inpData);
Mat outSlice(1, numWeights, CV_32F, outData);
if (!weights.empty())
{
multiply(inpSlice, weights, outSlice);
if (hasBias)
add(outSlice, bias, outSlice);
}
else if (hasBias)
add(inpSlice, bias, outSlice);
inpData += numWeights;
outData += numWeights;
}
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
CV_Assert(!blobs.empty() || inputs.size() == 2);
auto weightsMat = Mat(), biasMat = Mat();
cuda4dnn::ScaleShiftConfiguration config;
if (hasWeights)
{
if (blobs.empty())
{
config.scaleMode = cuda4dnn::ScaleShiftConfiguration::OpMode::UNTRAINABLE;
}
else
{
weightsMat = blobs[0];
config.scaleMode = cuda4dnn::ScaleShiftConfiguration::OpMode::TRAINABLE;
}
}
else
{
config.scaleMode = cuda4dnn::ScaleShiftConfiguration::OpMode::NONE;
}
if (hasBias)
{
if(blobs.empty())
{
config.shiftMode = cuda4dnn::ScaleShiftConfiguration::OpMode::UNTRAINABLE;
}
else
{
/* if the weights are provided, bias will be in blobs[1]; otherwise, it will be in blobs[0]
* in either case, it is at the end of the blobs vector => bias = blobs.back()
*/
biasMat = blobs.back();
config.shiftMode = cuda4dnn::ScaleShiftConfiguration::OpMode::TRAINABLE;
}
}
else
{
config.shiftMode = cuda4dnn::ScaleShiftConfiguration::OpMode::NONE;
}
config.axis = axis;
return make_cuda_node<cuda4dnn::ScaleShiftOp>(preferableTarget, std::move(context->stream), config, weightsMat, biasMat);
}
#endif
virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node) CV_OVERRIDE
{
switch (node->backendId)
{
case DNN_BACKEND_HALIDE:
{
#ifdef HAVE_HALIDE
auto base = node.dynamicCast<HalideBackendNode>();
Halide::Func& input = base->funcs.back();
Halide::Var x("x"), y("y"), c("c"), n("n");
Halide::Func top = attachHalide(input(x, y, c, n));
return Ptr<BackendNode>(new HalideBackendNode(base, top));
#endif // HAVE_HALIDE
break;
}
}
return Ptr<BackendNode>();
}
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
#ifdef HAVE_HALIDE
Halide::Buffer<float> input = halideBuffer(inputs[0]);
Halide::Var x("x"), y("y"), c("c"), n("n");
Halide::Func top = attachHalide(input(x, y, c, n));
return Ptr<BackendNode>(new HalideBackendNode(top));
#endif // HAVE_HALIDE
return Ptr<BackendNode>();
}
#ifdef HAVE_HALIDE
// attachHalide can work both with Halide::Buffer and Halide::Func. In the
// second case it will be a fusion.
Halide::Func attachHalide(const Halide::Expr& input)
{
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
Halide::Var x("x"), y("y"), c("c"), n("n");
const int numChannels = blobs[0].total();
Halide::Expr topExpr = input;
if (hasWeights)
{
auto weights = wrapToHalideBuffer(blobs[0], {numChannels});
topExpr *= weights(c);
}
if (hasBias)
{
auto bias = wrapToHalideBuffer(blobs.back(), {numChannels});
topExpr += bias(c);
}
top(x, y, c, n) = topExpr;
return top;
}
#endif // HAVE_HALIDE
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
InferenceEngine::Builder::Layer l = InferenceEngine::Builder::ScaleShiftLayer(name);
CV_Assert(!blobs.empty());
const size_t numChannels = blobs[0].total();
if (hasWeights)
{
addConstantData("weights", wrapToInfEngineBlob(blobs[0], {numChannels}, InferenceEngine::Layout::C), l);
}
else
{
auto weights = InferenceEngine::make_shared_blob<float>({
InferenceEngine::Precision::FP32, {(size_t)numChannels},
InferenceEngine::Layout::C
});
weights->allocate();
float* buf = weights->buffer().as<float*>();
std::fill(buf, buf + numChannels, 1);
addConstantData("weights", weights, l);
}
if (hasBias)
addConstantData("biases", wrapToInfEngineBlob(blobs.back(), {numChannels}, InferenceEngine::Layout::C), l);
return Ptr<BackendNode>(new InfEngineBackendNode(l));
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto ieInpNode0 = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
auto ieInpNode1 = nodes.size() > 1 ? nodes[1].dynamicCast<InfEngineNgraphNode>()->node : nullptr;
size_t numChannels = 1;
if (blobs.empty())
for (const size_t& dim : ieInpNode1->get_shape())
numChannels *= dim;
else
numChannels = blobs[0].total();
std::vector<size_t> shape(ieInpNode0->get_shape().size(), 1);
int cAxis = normalize_axis(axis, shape.size());
shape[cAxis] = numChannels;
auto node = ieInpNode0;
if (hasWeights)
{
auto weight = blobs.empty() ? ieInpNode1 :
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), blobs[0].data);
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2021_2)
node = std::make_shared<ngraph::op::v1::Multiply>(node, weight, ngraph::op::AutoBroadcastType::NUMPY);
#else
node = std::make_shared<ngraph::op::v0::Multiply>(node, weight, ngraph::op::AutoBroadcastType::NUMPY);
#endif
}
if (hasBias || !hasWeights)
{
std::shared_ptr<ngraph::Node> bias;
if (hasBias)
{
bias = blobs.empty() ? ieInpNode1 :
std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
ngraph::Shape(shape), blobs.back().data);
}
else
bias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
ngraph::Shape(shape), std::vector<float>(numChannels, 0).data());
node = std::make_shared<ngraph::op::v1::Add>(node, bias, ngraph::op::AutoBroadcastType::NUMPY);
}
return Ptr<BackendNode>(new InfEngineNgraphNode(node));
}
#endif // HAVE_DNN_NGRAPH
void getScaleShift(Mat& scale, Mat& shift) const CV_OVERRIDE
{
scale = (hasWeights && !blobs.empty()) ? blobs[0] : Mat();
shift = (hasBias && !blobs.empty()) ? blobs.back() : Mat();
}
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
{
params.set("input_scales", DictValue::arrayReal(scales[0].data(), scales[0].size()));
params.set("input_zeropoints", DictValue::arrayInt(zeropoints[0].data(), zeropoints[0].size()));
return true;
}
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
CV_UNUSED(outputs); // suppress unused variable warning
long flops = 0;
for(int i = 0; i < inputs.size(); i++)
{
flops += 2*total(inputs[i]);
}
return flops;
}
private:
bool hasWeights;
};
Ptr<ScaleLayer> ScaleLayer::create(const LayerParams& params)
{
return Ptr<ScaleLayer>(new ScaleLayerImpl(params));
}
Ptr<Layer> ShiftLayer::create(const LayerParams& params)
{
LayerParams scaleParams;
scaleParams.name = params.name;
scaleParams.type = "Scale";
scaleParams.blobs = params.blobs;
scaleParams.set("bias_term", true);
scaleParams.set("axis", 0);
return Ptr<ScaleLayer>(new ScaleLayerImpl(scaleParams));
}
class DataAugmentationLayerImpl CV_FINAL : public DataAugmentationLayer
{
public:
DataAugmentationLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
recompute_mean = params.get<int>("recompute_mean", 1);
CV_CheckGT(recompute_mean, 0, "");
mean_per_pixel = params.get<bool>("mean_per_pixel", false);
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert_N(inputs.size() == 1, blobs.size() == 3);
CV_Assert_N(blobs[0].total() == 1,
blobs[2].total() == inputs[0][1]);
outputs.assign(1, inputs[0]);
return true;
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
CV_Assert_N(outputs.size() == 1, blobs.size() == 3, inputs.size() == 1);
int num_iter = 0;
float* inpData = inputs[0].ptr<float>();
float* outData = outputs[0].ptr<float>();
Mat data_mean_cpu = blobs[1].clone();
Mat mean_resize = Mat(inputs[0].size[3], inputs[0].size[2], CV_32FC3);
Mat mean_3d = Mat(data_mean_cpu.size[3], data_mean_cpu.size[2], CV_32FC3, data_mean_cpu.ptr<float>(0));
resize(mean_3d, mean_resize, Size(inputs[0].size[3], inputs[0].size[2]));
int new_size[] = {1, mean_resize.channels(), mean_resize.cols, mean_resize.rows};
Mat data_mean_cpu_resize = mean_resize.reshape(1, *new_size);
Mat data_mean_per_channel_cpu = blobs[2].clone();
const int numWeights = data_mean_cpu_resize.total();
CV_Assert(numWeights != 0);
++num_iter;
if (num_iter <= recompute_mean)
{
data_mean_cpu_resize *= (num_iter - 1);
const int batch = inputs[0].size[0];
float alpha = 1.0 / batch;
for (int i = 0; i < batch; ++i)
{
Mat inpSlice(1, numWeights, CV_32F, inpData);
inpSlice = alpha * inpSlice;
add(data_mean_cpu_resize.reshape(1, 1), inpSlice, data_mean_cpu_resize.reshape(1, 1));
inpData += numWeights;
}
data_mean_cpu_resize *= (1.0 / num_iter);
int newsize[] = {inputs[0].size[1], (int)inputs[0].total(2)};
reduce(data_mean_cpu_resize.reshape(1, 2, &newsize[0]), data_mean_per_channel_cpu, 1, REDUCE_SUM, CV_32F);
int area = inputs[0].total(2);
data_mean_per_channel_cpu *= (1.0 / area);
}
MatShape inpShape = shape(inputs[0]);
inpData = inputs[0].ptr<float>();
if (mean_per_pixel)
{
int numSlices = inputs[0].size[0];
for (int i = 0; i < numSlices; ++i)
{
Mat inpSlice(1, numWeights, CV_32F, inpData);
Mat outSlice(1, numWeights, CV_32F, outData);
add(inpSlice, (-1) * data_mean_cpu_resize, outSlice);
inpData += numWeights;
outData += numWeights;
}
}
else
{
int numSlices = inpShape[1];
int count = numWeights / numSlices;
for (int i = 0; i < numSlices; ++i)
{
Mat inpSlice(1, count, CV_32F, inpData);
Mat outSlice(1, count, CV_32F, outData);
float coeff = data_mean_per_channel_cpu.reshape(1, 1).at<float>(0, i);
outSlice = inpSlice - coeff;
inpData += count;
outData += count;
}
}
}
private:
int recompute_mean;
bool mean_per_pixel;
};
Ptr<DataAugmentationLayer> DataAugmentationLayer::create(const LayerParams& params)
{
return Ptr<DataAugmentationLayer>(new DataAugmentationLayerImpl(params));
}
} // namespace dnn
} // namespace cv

View File

@ -0,0 +1,167 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2018, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
#include "../precomp.hpp"
#include "../op_cuda.hpp"
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/shuffle_channel.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv { namespace dnn {
class ShuffleChannelLayerImpl CV_FINAL : public ShuffleChannelLayer
{
public:
ShuffleChannelLayerImpl(const LayerParams& params)
{
group = params.get<int>("group", 1);
setParamsFrom(params);
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA;
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() == 1 && inputs[0].size() == 4);
CV_Assert(inputs[0][1] % group == 0);
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
return group == 1;
}
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
{
if (group != 1)
{
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
LayerParams lp;
float order[] = {0, 2, 1, 3};
lp.set("order", DictValue::arrayInt(&order[0], 4));
permute = PermuteLayer::create(lp);
const Mat& inp = inputs[0];
const Mat& out = outputs[0];
permuteInpShape.resize(4);
permuteInpShape[0] = inp.size[0];
permuteInpShape[1] = group;
permuteInpShape[2] = inp.size[1] / group;
permuteInpShape[3] = inp.size[2]*inp.size[3];
permuteOutShape.resize(4);
permuteOutShape[0] = permuteInpShape[0];
permuteOutShape[1] = permuteInpShape[2];
permuteOutShape[2] = permuteInpShape[1];
permuteOutShape[3] = permuteInpShape[3];
std::vector<Mat> permuteInputs(1, inp.reshape(1, permuteInpShape));
std::vector<Mat> permuteOutputs(1, out.reshape(1, permuteOutShape));
permute->finalize(permuteInputs, permuteOutputs);
}
}
#ifdef HAVE_OPENCL
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
if (inputs[0].u != outputs[0].u)
{
if (!permute.empty())
{
inputs[0] = inputs[0].reshape(1, permuteInpShape.size(), &permuteInpShape[0]);
outputs[0] = outputs[0].reshape(1, permuteOutShape.size(), &permuteOutShape[0]);
permute->preferableTarget = preferableTarget;
permute->forward(inputs, outputs, internals);
}
else
inputs[0].copyTo(outputs[0]);
}
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs, internals;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
internals_arr.getMatVector(internals);
Mat inp = inputs[0];
Mat out = outputs[0];
if (inp.data != out.data)
{
if (!permute.empty())
{
inp = inp.reshape(1, permuteInpShape);
out = out.reshape(1, permuteOutShape);
std::vector<Mat> permuteInputs(1, inp);
std::vector<Mat> permuteOutputs(1, out);
permute->forward(permuteInputs, permuteOutputs, internals);
}
else
inp.copyTo(out);
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
return make_cuda_node<cuda4dnn::ShuffleChannelOp>(preferableTarget, std::move(context->stream), group);
}
#endif
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
{
return true;
}
private:
Ptr<PermuteLayer> permute;
std::vector<int> permuteInpShape, permuteOutShape;
};
Ptr<Layer> ShuffleChannelLayer::create(const LayerParams& params)
{
return Ptr<Layer>(new ShuffleChannelLayerImpl(params));
}
} // namespace dnn
} // namespace cv

View File

@ -0,0 +1,821 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "../op_cuda.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include "layers_common.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#include <opencv2/core/utils/logger.hpp>
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/slice.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
void sliceRangesFromShape(const MatShape& inpShape, int& axis, std::vector<std::vector<cv::Range> >& sliceRanges)
{
CV_Assert(inpShape.size() > 0);
bool axisNeg = (axis < 0);
axis = (axis + static_cast<int>(inpShape.size())) % inpShape.size();
int n = inpShape[axis];
for (size_t i = 0; i < sliceRanges.size(); ++i){
std::vector<Range>& ranges = sliceRanges[i];
if (axisNeg)
{
ranges.insert(ranges.begin(), axis, Range::all());
}
Range& range = ranges.back();
if (range.start >= 0)
{
continue;
}
CV_Assert(n != 0);
range.start = (n + range.start) % n;
}
}
class SliceLayerImpl : public SliceLayer
{
public:
SliceLayerImpl(const LayerParams& params)
{
setParamsFrom(params);
hasSteps = false;
axis = params.get<int>("axis", 1);
num_split = params.get<int>("num_split", 0);
hasDynamicShapes = params.get<bool>("has_dynamic_shapes", false);
shapesInitialized = !hasDynamicShapes;
if (params.has("slice_point"))
{
CV_Assert(!params.has("begin") && !params.has("size") && !params.has("end"));
const DictValue &indicesValue = params.get("slice_point");
int size = axis > 0 ? axis + 1 : 1;
sliceRanges.resize(indicesValue.size() + 1,
std::vector<Range>(size, Range::all()));
int prevSlice = 0;
for (int i = 0; i < indicesValue.size(); ++i)
{
sliceRanges[i][size - 1].start = prevSlice;
sliceRanges[i][size - 1].end = indicesValue.get<int>(i);
prevSlice = sliceRanges[i][size - 1].end;
}
sliceRanges.back()[size - 1].start = prevSlice;
}
else if (params.has("begin"))
{
CV_Assert(params.has("size") ^ params.has("end"));
const DictValue &begins = params.get("begin");
const DictValue &sizesOrEnds = params.has("size") ? params.get("size") : params.get("end");
CV_Assert(begins.size() == sizesOrEnds.size());
sliceRanges.resize(1);
sliceRanges[0].resize(begins.size(), Range::all());
for (int i = 0; i < begins.size(); ++i)
{
int start = begins.get<int>(i);
int sizeOrEnd = sizesOrEnds.get<int>(i); // It may be negative to reverse indexation.
sliceRanges[0][i].start = start;
if (params.has("size"))
{
int size = sizeOrEnd;
CV_Assert(size == -1 || size > 0); // -1 value means range [start, axis_size).
sliceRanges[0][i].end = size > 0 ? (start + size) : -1; // We'll finalize a negative value later.
}
else
{
int end = sizeOrEnd;
CV_Assert(end < 0 || end > start); // End index is excluded.
sliceRanges[0][i].end = end; // We'll finalize a negative value later.
}
}
if (params.has("steps"))
{
const DictValue &steps = params.get("steps");
sliceSteps.resize(1);
sliceSteps[0].resize(steps.size());
for (int i = 0; i < steps.size(); ++i)
{
int step = steps.get<int>(i);
CV_Assert(step >= 1);
if (step > 1)
hasSteps = true;
sliceSteps[0][i] = step;
}
}
}
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
return INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) &&
sliceRanges.size() == 1 && sliceRanges[0].size() == 4 && !hasSteps;
#endif
#ifdef HAVE_DNN_NGRAPH
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
return sliceRanges.size() == 1 && !hasSteps;
#endif
#ifdef HAVE_CUDA
if (backendId == DNN_BACKEND_CUDA)
return !hasSteps;
#endif
return backendId == DNN_BACKEND_OPENCV;
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() == 1);
MatShape inpShape = inputs[0];
int axis_rw = axis;
std::vector<std::vector<cv::Range> > sliceRanges_rw = sliceRanges;
sliceRangesFromShape(inpShape, axis_rw, sliceRanges_rw);
if (!sliceRanges_rw.empty())
{
outputs.resize(sliceRanges_rw.size(), inpShape);
for (int i = 0; i < outputs.size(); ++i)
{
CV_Assert(sliceRanges_rw[i].size() <= inpShape.size());
for (int j = 0; j < sliceRanges_rw[i].size(); ++j)
{
if (shapesInitialized || inpShape[j] > 0)
outputs[i][j] = normalize_axis_range(sliceRanges_rw[i][j], inpShape[j]).size();
if (!sliceSteps.empty() && (i < sliceSteps.size()) && (j < sliceSteps[i].size()) && (sliceSteps[i][j] > 1))
outputs[i][j] = (outputs[i][j] + sliceSteps[i][j] - 1) / sliceSteps[i][j];
}
}
}
else // Divide input blob on equal parts by axis.
{
CV_Assert(0 <= axis_rw && axis_rw < inpShape.size());
int splits = num_split ? num_split : requiredOutputs;
CV_Assert(splits > 0 && inpShape[axis_rw] % splits == 0);
inpShape[axis_rw] /= splits;
outputs.resize(splits, inpShape);
}
return false;
}
bool updateMemoryShapes(const std::vector<MatShape> &inputs) CV_OVERRIDE
{
shapesInitialized = true;
return true;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
{
#ifdef HAVE_OPENCL
ocl_exec_cache.clear();
#endif
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
CV_Assert(inputs.size() == 1);
const MatSize& inpShape = inputs[0].size;
sliceRangesFromShape(shape(inputs[0]), axis, sliceRanges);
finalSliceRanges = sliceRanges;
if (sliceRanges.empty())
{
// Divide input blob on equal parts by axis.
int outAxisSize = inpShape[axis] / outputs.size();
finalSliceRanges.resize(outputs.size(),
std::vector<Range>(axis + 1, Range::all()));
int prevSlice = 0;
for (int i = 0; i < outputs.size(); ++i)
{
finalSliceRanges[i][axis].start = prevSlice;
finalSliceRanges[i][axis].end = finalSliceRanges[i][axis].start + outAxisSize;
prevSlice = finalSliceRanges[i][axis].end;
}
}
else
CV_Assert(outputs.size() == sliceRanges.size());
for (int i = 0; i < outputs.size(); ++i)
{
CV_Assert(finalSliceRanges[i].size() <= inpShape.dims());
// Fill the rest of ranges.
for (int j = finalSliceRanges[i].size(); j < inpShape.dims(); ++j)
{
finalSliceRanges[i].push_back(Range::all());
}
// Clamp.
for (int j = 0; j < finalSliceRanges[i].size(); ++j)
{
finalSliceRanges[i][j] = normalize_axis_range(finalSliceRanges[i][j], inpShape[j]);
}
}
if (!sliceSteps.empty() && sliceSteps[0].size() != inputs[0].dims)
sliceSteps[0].resize(inputs[0].dims, 1);
#if 0
std::cout << "DEBUG: DNN/Slice: " << outputs.size() << " inpShape=" << inpShape << std::endl;
for (int i = 0; i < outputs.size(); ++i)
{
for (int j = 0; j < finalSliceRanges[i].size(); ++j)
{
std::cout << finalSliceRanges[i][j];
}
std::cout << std::endl;
}
#endif
}
#ifdef HAVE_OPENCL
struct OpenCLExecInfo
{
std::string kernel_name;
std::string build_opts;
size_t local_size[2];
size_t global_size[2];
OpenCLExecInfo()
{
local_size[0] = local_size[1] = 0;
global_size[0] = global_size[1] = 0;
}
};
std::vector<OpenCLExecInfo> ocl_exec_cache;
void ocl_prepare(const std::vector<UMat>& inputs, const std::vector<UMat>& outputs)
{
CV_TRACE_FUNCTION();
CV_Assert(outputs.size() == finalSliceRanges.size());
ocl_exec_cache.resize(outputs.size());
const UMat& input = inputs[0];
const int dims = input.dims;
size_t WSZ = 128;
const int elemSize = (int)input.elemSize();
String opts0 = cv::format(
"-DDIMS=%d -DELEMSIZE=%d",
dims, elemSize
);
for (int d = 0; d < dims; d++)
{
opts0 += cv::format(" -DSRC_STEP_%d=%d", d, (int)input.step[dims - 1 - d]);
}
for (size_t i = 0; i < outputs.size(); i++)
{
OpenCLExecInfo& ocl = ocl_exec_cache[i];
const UMat& output = outputs[i];
const std::vector<Range>& range = finalSliceRanges[i];
String opts = opts0;
CV_CheckEQ(output.dims, dims, "");
for (int d = 0; d < dims; d++)
{
opts += cv::format(" -DDST_STEP_%d=%d -DDST_SZ_%d=%d -DSRC_START_%d=%d",
d, (int)output.step[dims - 1 - d],
d, (int)output.size[dims - 1 - d],
d, (int)range[dims - 1 - d].start
);
CV_CheckEQ(range[d].size(), (int)output.size[d], "");
}
const size_t param_LIMIT_BLOCK_SIZE_PER_WG = WSZ * 64;
int block_dims = 0;
size_t block_size = elemSize;
for (int i = dims - 1; i >= 0; --i)
{
if (input.step[i] != output.step[i])
break;
block_size *= output.size[i];
block_dims++;
if (block_size >= param_LIMIT_BLOCK_SIZE_PER_WG)
break;
}
const size_t total = output.total() * elemSize;
size_t num_blocks = total / block_size;
if ((num_blocks <= 8 && block_size >= WSZ * 4) || (block_size >= param_LIMIT_BLOCK_SIZE_PER_WG))
{
// use 1D copy mode
opts += cv::format(" -DUSE_COPY_1D=1");
opts += cv::format(" -DBLOCK_DIMS=%d", block_dims);
opts += cv::format(" -DBLOCK_DIMS_CONTIGUOUS=%d", block_dims);
opts += cv::format(" -DBLOCK_SIZE=%d", (int)block_size);
opts += cv::format(" -DBLOCK_COLS=%d", (int)block_size);
}
else
{
// use 2D copy mode
int block_cols = block_size;
int block_dims_contiguous = block_dims;
size_t input_base_step = input.step[dims - 1 - block_dims_contiguous];
size_t output_base_step = output.step[dims - 1 - block_dims_contiguous];
size_t block_rows = 1;
for (int i = dims - 1 - block_dims_contiguous; i >= 0; --i)
{
if (input.step[i] * output_base_step != output.step[i] * input_base_step)
break;
block_rows *= output.size[i];
block_dims++;
}
block_size *= block_rows;
num_blocks = total / block_size;
if (block_rows > 1)
{
opts += cv::format(" -DBLOCK_DIMS=%d", block_dims);
opts += cv::format(" -DBLOCK_DIMS_CONTIGUOUS=%d", block_dims_contiguous);
opts += cv::format(" -DBLOCK_SIZE=%d", (int)block_size);
opts += cv::format(" -DBLOCK_COLS=%d", (int)block_cols);
opts += cv::format(" -DBLOCK_ROWS=%d", (int)block_rows);
opts += cv::format(" -DBLOCK_SRC_STRIDE=%d", (int)input_base_step);
}
else
{
// use 1D copy mode
opts += cv::format(" -DUSE_COPY_1D=1");
opts += cv::format(" -DBLOCK_DIMS=%d", block_dims_contiguous);
opts += cv::format(" -DBLOCK_DIMS_CONTIGUOUS=%d", block_dims_contiguous);
opts += cv::format(" -DBLOCK_SIZE=%d", (int)block_size);
opts += cv::format(" -DBLOCK_COLS=%d", (int)block_size);
}
}
const size_t MIN_WORK_ITEMS = 16;
if (block_size <= 4 * MIN_WORK_ITEMS)
WSZ = 4;
else if (block_size <= 8 * MIN_WORK_ITEMS)
WSZ = 8;
else if (block_size <= 16 * MIN_WORK_ITEMS)
WSZ = 16;
else if (block_size <= 32 * MIN_WORK_ITEMS)
WSZ = 32;
else if (block_size <= 64 * MIN_WORK_ITEMS)
WSZ = 64;
opts += cv::format(" -DWSZ=%d", (int)WSZ);
std::ostringstream kernel_suffix;
kernel_suffix << dims << 'x' << elemSize << "_bsz" << block_size;
kernel_suffix << "__src_";
for (int d = 0; d < dims; d++)
{
kernel_suffix << input.size[dims - 1 - d] << '_';
}
kernel_suffix << '_';
/*for (int d = 0; d < dims; d++)
{
kernel_suffix << input.step[dims - 1 - d] << '_';
}
kernel_suffix << '_';*/
kernel_suffix << "dst_";
for (int d = 0; d < dims; d++)
{
kernel_suffix << output.size[dims - 1 - d] << '_';
}
/*kernel_suffix << '_';
for (int d = 0; d < dims; d++)
{
kernel_suffix << output.step[dims - 1 - d] << '_';
}*/
kernel_suffix << "_slice_";
for (int d = 0; d < dims; d++)
{
kernel_suffix << range[dims - 1 - d].start << '_';
}
for (int d = 0; d < dims; d++)
{
kernel_suffix << '_' << range[dims - 1 - d].end;
}
std::string kernel_suffix_str = kernel_suffix.str();
opts += cv::format(" -DSLICE_KERNEL_SUFFIX=%s", kernel_suffix_str.c_str());
ocl.kernel_name = cv::format("slice_%s", kernel_suffix_str.c_str());
ocl.build_opts = opts;
ocl.local_size[0] = WSZ;
ocl.local_size[1] = 1;
ocl.global_size[0] = WSZ;
ocl.global_size[1] = num_blocks;
} // for outputs.size()
} // ocl_prepare
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
{
CV_TRACE_FUNCTION();
if (hasSteps)
return false; // TODO not implemented yet: https://github.com/opencv/opencv/pull/19546
std::vector<UMat> inputs;
std::vector<UMat> outputs;
inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs);
CV_Assert(outputs.size() == finalSliceRanges.size());
const UMat& input = inputs[0];
const int dims = input.dims;
if (dims > 5)
{
CV_LOG_INFO(NULL, "DNN/OpenCL/Slice: implementation doesn't support dims=" << dims << ". Fallback to CPU");
return false;
}
if (ocl_exec_cache.empty())
{
ocl_prepare(inputs, outputs);
}
CV_CheckEQ(ocl_exec_cache.size(), outputs.size(), "");
for (size_t i = 0; i < outputs.size(); i++)
{
const OpenCLExecInfo& ocl = ocl_exec_cache[i];
UMat& output = outputs[i];
ocl::Kernel kernel(ocl.kernel_name.c_str(), ocl::dnn::slice_oclsrc, ocl.build_opts);
if (kernel.empty())
return false;
bool ret = kernel.args(
ocl::KernelArg::PtrReadOnly(input),
ocl::KernelArg::PtrWriteOnly(output)
)
.run_(2, (size_t*)ocl.global_size, (size_t*)ocl.local_size, false);
if (!ret)
return false;
} // for outputs.size()
return true;
} // forward_ocl
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
const Mat& inpMat = inputs[0];
CV_Assert(outputs.size() == finalSliceRanges.size());
if (!hasSteps)
{
for (size_t i = 0; i < outputs.size(); i++)
{
inpMat(finalSliceRanges[i]).copyTo(outputs[i]);
}
}
else
{
int dimsNum = inpMat.dims;
for (size_t i = 0; i < outputs.size(); i++)
{
std::vector<int> inpIdx(dimsNum, 0);
std::vector<int> outIdx(dimsNum, 0);
if (inpMat.type() == CV_16S)
getSliceRecursive<int16_t>(inpMat, inpIdx, finalSliceRanges[i], sliceSteps[i], 0, dimsNum, outputs[i], outIdx);
else if (inpMat.type() == CV_8S)
getSliceRecursive<int8_t>(inpMat, inpIdx, finalSliceRanges[i], sliceSteps[i], 0, dimsNum, outputs[i], outIdx);
else
getSliceRecursive<float>(inpMat, inpIdx, finalSliceRanges[i], sliceSteps[i], 0, dimsNum, outputs[i], outIdx);
}
}
}
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1)
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
CV_Assert_N(finalSliceRanges.size() == 1, inputs.size() <= 2);
std::vector<size_t> axes, offsets, dims;
int from, to, step;
int numDims = finalSliceRanges[0].size();
if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL)
{
from = axis;
to = numDims;
step = 1;
}
else
{
from = numDims - 1;
to = axis - 1;
step = -1;
}
for (int i = from; i != to; i += step)
{
axes.push_back(i);
offsets.push_back(finalSliceRanges[0][i].start);
dims.push_back(finalSliceRanges[0][i].size());
}
InferenceEngine::Builder::Layer ieLayer(name);
ieLayer.setName(name);
ieLayer.setType("Crop");
ieLayer.getParameters()["axis"] = axes;
ieLayer.getParameters()["dim"] = dims;
ieLayer.getParameters()["offset"] = offsets;
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(2));
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
if (inputs.size() != 2)
{
std::vector<size_t> outShape(numDims);
for (int i = 0; i < numDims; ++i)
outShape[i] = finalSliceRanges[0][i].size();
ieLayer.getInputPorts()[1].setParameter("type", "weights");
auto shapeSource = InferenceEngine::make_shared_blob<float>({
InferenceEngine::Precision::FP32, outShape,
InferenceEngine::Layout::ANY
});
shapeSource->allocate();
addConstantData("weights", shapeSource, ieLayer);
}
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif
#endif
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
CV_Assert_N(nodes.size() <= 2);
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
CV_Assert(finalSliceRanges[0].size() == ieInpNode->get_shape().size());
std::vector<int64_t> offsets, dims;
for (int i = 0; i < finalSliceRanges[0].size(); ++i)
{
offsets.push_back(finalSliceRanges[0][i].start);
dims.push_back(finalSliceRanges[0][i].end);
}
auto lower_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
ngraph::Shape{offsets.size()}, offsets.data());
auto upper_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
ngraph::Shape{dims.size()}, dims.data());
auto strides = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
ngraph::Shape{dims.size()}, std::vector<int64_t>((int64_t)dims.size(), 1));
auto slice = std::make_shared<ngraph::op::v1::StridedSlice>(ieInpNode,
lower_bounds, upper_bounds, strides, std::vector<int64_t>{}, std::vector<int64_t>{});
return Ptr<BackendNode>(new InfEngineNgraphNode(slice));
}
#endif // HAVE_DNN_NGRAPH
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
std::vector<std::vector<std::size_t>> offsets;
for (const auto& ranges : finalSliceRanges)
{
std::vector<std::size_t> offsets_i;
for (const auto& range : ranges)
offsets_i.push_back(range.start);
offsets.push_back(std::move(offsets_i));
}
return make_cuda_node<cuda4dnn::SliceOp>(preferableTarget, std::move(context->stream), std::move(offsets));
}
#endif
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
{
const int numOutputs = scales[1].size();
for (int i = 0; i < numOutputs; i++)
{
if (scales[1][i] != scales[0][0])
return false;
}
return true;
}
private:
template <typename T>
void getSliceRecursive(const Mat &inpMat, std::vector<int> &inpIdx,
const std::vector<Range> &sliceRanges,
const std::vector<int> &sliceSteps, int dim, int dimsNum,
Mat &outputs, std::vector<int> &outIdx)
{
int begin = sliceRanges[dim].start;
int end = sliceRanges[dim].end;
int step = !sliceSteps.empty() ? sliceSteps[dim] : 1;
// TODO optimization is required (for 2D tail case at least)
for (int k = begin, j = 0; k < end; k += step, j++)
{
inpIdx[dim] = k;
outIdx[dim] = j;
if (dim + 1 < dimsNum)
getSliceRecursive<T>(inpMat, inpIdx, sliceRanges, sliceSteps, dim + 1, dimsNum, outputs, outIdx);
else
outputs.at<T>(outIdx.data()) = inpMat.at<T>(inpIdx.data());
}
}
protected:
// The actual non-negative values determined from @p sliceRanges depends on input size.
std::vector<std::vector<Range> > finalSliceRanges;
bool hasDynamicShapes;
bool shapesInitialized;
bool hasSteps;
};
class CropLayerImpl CV_FINAL : public SliceLayerImpl
{
public:
CropLayerImpl(const LayerParams& params) : SliceLayerImpl(LayerParams())
{
setParamsFrom(params);
axis = params.get<int>("axis", 2);
const DictValue *paramOffset = params.ptr("offset");
if (paramOffset)
{
for (int i = 0; i < paramOffset->size(); i++)
offset.push_back(paramOffset->get<int>(i));
}
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() == 2);
MatShape dstShape = inputs[0];
int start = normalize_axis(axis, dstShape);
for (int i = start; i < dstShape.size(); i++)
{
dstShape[i] = inputs[1][i];
}
outputs.resize(1, dstShape);
return false;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> inputs;
inputs_arr.getMatVector(inputs);
CV_Assert(2 == inputs.size());
const Mat &inpBlob = inputs[0];
const Mat &inpSzBlob = inputs[1];
int dims = inpBlob.dims;
int start_axis = normalize_axis(axis, dims);
std::vector<int> offset_final(dims, 0);
if (offset.size() == 1)
{
for (int i = start_axis; i < dims; i++)
offset_final[i] = offset[0];
}
else if (offset.size() > 1)
{
if ((int)offset.size() != dims - start_axis)
CV_Error(Error::StsBadArg, "number of offset values specified must be "
"equal to the number of dimensions following axis.");
for (int i = start_axis; i < dims; i++)
offset_final[i] = offset[i - start_axis];
}
finalSliceRanges.resize(1);
finalSliceRanges[0].resize(dims);
for (int i = 0; i < start_axis; i++)
{
finalSliceRanges[0][i] = Range(0, inpBlob.size[i]);
}
for (int i = start_axis; i < dims; i++)
{
if (offset_final[i] < 0 || offset_final[i] + inpSzBlob.size[i] > inpBlob.size[i])
CV_Error(Error::StsBadArg, "invalid crop parameters or blob sizes");
finalSliceRanges[0][i] = Range(offset_final[i], offset_final[i] + inpSzBlob.size[i]);
}
}
private:
std::vector<int> offset;
};
Ptr<SliceLayer> SliceLayer::create(const LayerParams& params)
{
return Ptr<SliceLayer>(new SliceLayerImpl(params));
}
Ptr<Layer> CropLayer::create(const LayerParams& params)
{
return Ptr<Layer>(new CropLayerImpl(params));
}
}
}

View File

@ -0,0 +1,416 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "../op_cuda.hpp"
#include "../op_halide.hpp"
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include "../op_vkcom.hpp"
#include <algorithm>
#include <stdlib.h>
using std::max;
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
using namespace cv::dnn::ocl4dnn;
#endif
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/softmax.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class SoftMaxLayerImpl CV_FINAL : public SoftmaxLayer
{
public:
SoftMaxLayerImpl(const LayerParams& params)
{
axisRaw = params.get<int>("axis", 1);
logSoftMax = params.get<bool>("log_softmax", false);
setParamsFrom(params);
}
#ifdef HAVE_OPENCL
Ptr<OCL4DNNSoftmax<float> > softmaxOp;
#endif
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
bool inplace = Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
MatShape shape = inputs[0];
int cAxis = normalize_axis(axisRaw, shape.size());
shape[cAxis] = 1;
internals.assign(1, shape);
return inplace;
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA ||
(backendId == DNN_BACKEND_HALIDE && haveHalide() && axisRaw == 1) ||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ||
(backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && haveInfEngine() && !logSoftMax) ||
(backendId == DNN_BACKEND_VKCOM && haveVulkan());
}
#ifdef HAVE_OPENCL
virtual void finalize(const std::vector<Mat*> &inputs, std::vector<Mat> &outputs) CV_OVERRIDE
{
softmaxOp.release();
}
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
std::vector<UMat> internals;
bool use_half = (inputs_.depth() == CV_16S);
inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs);
internals_.getUMatVector(internals);
UMat& src = inputs[0];
UMat& dstMat = outputs[0];
int axis = normalize_axis(axisRaw, src.dims);
if (softmaxOp.empty())
{
OCL4DNNSoftmaxConfig config;
config.in_shape = shape(inputs[0]);
config.axis = axis;
config.channels = inputs[0].size[axis];
config.logsoftmax = logSoftMax;
config.use_half = use_half;
softmaxOp = Ptr<OCL4DNNSoftmax<float> >(new OCL4DNNSoftmax<float>(config));
}
if (softmaxOp->Forward(src, dstMat))
return true;
UMat& bufMat = internals[0];
MatShape s = shape(src);
size_t outerSize = total(s, 0, axis);
size_t channels = src.size[axis];
size_t innerSize = total(s, axis + 1);
String buildOpts = format("-DT=%s", use_half ? "half" : "float");
ocl::Kernel kmax, ksub, ksum, kdiv;
if (!kmax.create("kernel_channel_max", ocl::dnn::softmax_oclsrc, buildOpts))
return false;
if (!ksub.create("kernel_channel_subtract", ocl::dnn::softmax_oclsrc, buildOpts))
return false;
if (!ksum.create("kernel_channel_sum", ocl::dnn::softmax_oclsrc, buildOpts))
return false;
if (logSoftMax) buildOpts += " -DLOG_SOFTMAX ";
if (!kdiv.create("kernel_channel_div", ocl::dnn::softmax_oclsrc, buildOpts))
return false;
size_t bufSize = internals[0].total();
size_t totalSize = src.total();
size_t internal_globalSize[1] = { bufSize };
size_t total_globalSize[1] = { totalSize };
kmax.args((int)outerSize, (int)channels, (int)innerSize,
ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrReadWrite(bufMat));
if (!kmax.run(1, internal_globalSize, NULL, false))
return false;
ksub.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
ocl::KernelArg::PtrReadOnly(bufMat),
ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrWriteOnly(dstMat));
if (!ksub.run(1, total_globalSize, NULL, false))
return false;
ksum.args((int)outerSize, (int)channels, (int)innerSize,
ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
if (!ksum.run(1, internal_globalSize, NULL, false))
return false;
kdiv.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
if (!kdiv.run(1, total_globalSize, NULL, false))
return false;
return true;
}
#endif
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
forward_ocl(inputs_arr, outputs_arr, internals_arr))
if (inputs_arr.depth() == CV_16S)
{
forward_fallback(inputs_arr, outputs_arr, internals_arr);
return;
}
std::vector<Mat> inputs, outputs, internals;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
internals_arr.getMatVector(internals);
const Mat &src = inputs[0];
Mat &dst = outputs[0];
int axis = normalize_axis(axisRaw, src.dims);
size_t outerSize = src.total(0, axis), channels = src.size[axis],
innerSize = src.total(axis + 1);
CV_Assert(src.type() == CV_32F);
CV_Assert(src.isContinuous() && dst.isContinuous());
const float *srcPtr = src.ptr<float>();
float *dstPtr = dst.ptr<float>();
float *bufPtr = internals[0].ptr<float>();
size_t outerStep = src.total(axis);
size_t cnStep = src.total(axis + 1);
//compute max along axis
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
{
size_t srcOffset = outerDim * outerStep;
size_t bufOffset = outerDim * cnStep;
memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize * sizeof(float));
for (size_t cnDim = 1; cnDim < channels; cnDim++)
{
for (size_t i = 0; i < innerSize; i++)
bufPtr[bufOffset + i] = std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]);
}
}
//subtract max
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
{
size_t srcOffset = outerDim * outerStep;
size_t bufOffset = outerDim * cnStep;
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
const int offset = srcOffset + cnDim * cnStep;
for (size_t i = 0; i < innerSize; i++)
dstPtr[offset + i] = srcPtr[offset + i] - bufPtr[bufOffset + i];
}
}
cv::exp(dst, dst);
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
{
size_t srcOffset = outerDim * outerStep;
size_t bufOffset = outerDim * cnStep;
//sum exp along axis
for (size_t i = 0; i < innerSize; i++)
bufPtr[bufOffset + i] = 0.f;
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
const int offset = srcOffset + cnDim * cnStep;
for (size_t i = 0; i < innerSize; i++)
bufPtr[bufOffset + i] += dstPtr[offset + i];
}
//divide by computed sum
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
const int offset = srcOffset + cnDim * cnStep;
for (size_t i = 0; i < innerSize; i++)
dstPtr[offset + i] /= bufPtr[bufOffset + i];
}
if (logSoftMax)
{
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
const int offset = srcOffset + cnDim * cnStep;
for (size_t i = 0; i < innerSize; i++)
dstPtr[offset + i] = log(dstPtr[offset + i]);
}
}
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
auto channel_axis = normalize_axis(axisRaw, input_wrapper->getRank());
return make_cuda_node<cuda4dnn::SoftmaxOp>(preferableTarget, std::move(context->cudnn_handle), channel_axis, logSoftMax);
}
#endif
virtual Ptr<BackendNode> initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
#ifdef HAVE_VULKAN
vkcom::Tensor in = VkComTensor(inputs[0]);
int cAxis = normalize_axis(axisRaw, in.dimNum());
std::shared_ptr<vkcom::OpBase> op(new vkcom::OpSoftmax(cAxis, logSoftMax));
return Ptr<BackendNode>(new VkComBackendNode(inputs, op));
#endif // HAVE_VULKAN
return Ptr<BackendNode>();
}
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs) CV_OVERRIDE
{
#ifdef HAVE_HALIDE
Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
int inW, inH, inC, inN;
getCanonicalSize(inputBuffer, &inW, &inH, &inC, &inN);
if (inW != 1 || inH != 1)
CV_Error(cv::Error::StsNotImplemented,
"Halide backend for SoftMax with spatial size "
"more than 1x1 is not implemented");
Halide::Var x("x"), y("y"), c("c"), n("n");
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
Halide::Func expInput("expInput");
Halide::RDom r(0, inW, 0, inH, 0, inC);
expInput(x, y, c, n) = exp(inputBuffer(x, y, c, n));
Halide::Expr globalSum = sum(expInput(r.x, r.y, r.z, n));
top(x, y, c, n) = expInput(x, y, c, n) / globalSum;
return Ptr<BackendNode>(new HalideBackendNode(top));
#endif // HAVE_HALIDE
return Ptr<BackendNode>();
}
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
InferenceEngine::Builder::SoftMaxLayer ieLayer(name);
ieLayer.setAxis(normalize_axis(axisRaw, input->getDims().size()));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif // HAVE_DNN_IE_NN_BUILDER_2019
#ifdef HAVE_DNN_NGRAPH
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
{
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
int axis = normalize_axis(axisRaw, ieInpNode->get_shape().size());
auto softmax = std::make_shared<ngraph::op::v1::Softmax>(ieInpNode, axis);
if (logSoftMax)
return Ptr<BackendNode>(new InfEngineNgraphNode(std::make_shared<ngraph::op::v0::Log>(softmax)));
return Ptr<BackendNode>(new InfEngineNgraphNode(softmax));
}
#endif // HAVE_DNN_NGRAPH
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
{
float inpScale = scales[0][0];
Mat lookUpTable(1, 256, CV_32F);
float* table = lookUpTable.ptr<float>();
for (int i = -128; i < 128; i++)
{
float x = inpScale*(i - 127); // ensures exp(x) is always between (0, 1)
table[i+128] = std::exp(x);
}
params.blobs.clear();
params.blobs.push_back(lookUpTable);
return true;
}
int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
CV_UNUSED(outputs); // suppress unused variable warning
int64 flops = 0;
for (int i = 0; i < inputs.size(); i++)
{
flops += 4*total(inputs[i]);
}
return flops;
}
int axisRaw;
};
Ptr<SoftmaxLayer> SoftmaxLayer::create(const LayerParams& params)
{
return Ptr<SoftmaxLayer>(new SoftMaxLayerImpl(params));
}
}
}

View File

@ -0,0 +1,139 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "../op_cuda.hpp"
#include "layers_common.hpp"
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/split.hpp"
using namespace cv::dnn::cuda4dnn;
#endif
namespace cv
{
namespace dnn
{
class SplitLayerImpl CV_FINAL : public SplitLayer
{
public:
SplitLayerImpl(const LayerParams &params)
{
setParamsFrom(params);
//TODO: maybe "top_count" param is useless because it can be determined by output connections number
if (params.has("top_count"))
{
outputsCount = params.get<int>("top_count");
CV_Assert(outputsCount >= 0);
}
else
{
outputsCount = -1;
}
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA;
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() == 1);
Layer::getMemoryShapes(inputs, max(1, outputsCount >= 0 ? outputsCount : requiredOutputs),
outputs, internals);
return false;
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
for (size_t i = 0; i < outputs.size(); i++)
{
CV_Assert(inputs[0].total() == outputs[i].total());
inputs[0].copyTo(outputs[i]);
}
}
#ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(
void *context_,
const std::vector<Ptr<BackendWrapper>>& inputs,
const std::vector<Ptr<BackendWrapper>>& outputs
) override
{
auto context = reinterpret_cast<csl::CSLContext*>(context_);
return make_cuda_node<cuda4dnn::SplitOp>(preferableTarget, std::move(context->stream));
}
#endif
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
{
const int numOutputs = scales[1].size();
for (int i = 0; i < numOutputs; i++)
{
if (scales[1][i] != scales[0][0])
return false;
}
return true;
}
};
Ptr<SplitLayer> SplitLayer::create(const LayerParams& params)
{
return Ptr<SplitLayer>(new SplitLayerImpl(params));
}
}
}