feat: 切换后端至PaddleOCR-NCNN,切换工程为CMake
1.项目后端整体迁移至PaddleOCR-NCNN算法,已通过基本的兼容性测试 2.工程改为使用CMake组织,后续为了更好地兼容第三方库,不再提供QMake工程 3.重整权利声明文件,重整代码工程,确保最小化侵权风险 Log: 切换后端至PaddleOCR-NCNN,切换工程为CMake Change-Id: I4d5d2c5d37505a4a24b389b1a4c5d12f17bfa38c
This commit is contained in:
200
3rdparty/opencv-4.5.4/modules/dnn/CMakeLists.txt
vendored
Normal file
200
3rdparty/opencv-4.5.4/modules/dnn/CMakeLists.txt
vendored
Normal file
@@ -0,0 +1,200 @@
|
||||
if(WINRT)
|
||||
ocv_module_disable(dnn)
|
||||
endif()
|
||||
|
||||
if(NOT HAVE_PROTOBUF)
|
||||
ocv_module_disable(opencv_dnn)
|
||||
endif()
|
||||
|
||||
set(the_description "Deep neural network module. It allows to load models from different frameworks and to make forward pass")
|
||||
|
||||
ocv_add_dispatched_file_force_all("layers/layers_common" AVX AVX2 AVX512_SKX RVV)
|
||||
ocv_add_dispatched_file_force_all("int8layers/layers_common" AVX2 AVX512_SKX)
|
||||
|
||||
ocv_add_module(dnn opencv_core opencv_imgproc WRAP python java objc js)
|
||||
|
||||
ocv_option(OPENCV_DNN_OPENCL "Build with OpenCL support" HAVE_OPENCL AND NOT APPLE)
|
||||
if(HAVE_TENGINE)
|
||||
add_definitions(-DHAVE_TENGINE=1)
|
||||
endif()
|
||||
|
||||
if(OPENCV_DNN_OPENCL AND HAVE_OPENCL)
|
||||
add_definitions(-DCV_OCL4DNN=1)
|
||||
endif()
|
||||
|
||||
ocv_option(OPENCV_DNN_CUDA "Build with CUDA support"
|
||||
HAVE_CUDA
|
||||
AND HAVE_CUBLAS
|
||||
AND HAVE_CUDNN
|
||||
)
|
||||
|
||||
if(OPENCV_DNN_CUDA)
|
||||
if(HAVE_CUDA AND HAVE_CUBLAS AND HAVE_CUDNN)
|
||||
add_definitions(-DCV_CUDA4DNN=1)
|
||||
else()
|
||||
if(NOT HAVE_CUDA)
|
||||
message(SEND_ERROR "DNN: CUDA backend requires CUDA Toolkit. Please resolve dependency or disable OPENCV_DNN_CUDA=OFF")
|
||||
elseif(NOT HAVE_CUBLAS)
|
||||
message(SEND_ERROR "DNN: CUDA backend requires cuBLAS. Please resolve dependency or disable OPENCV_DNN_CUDA=OFF")
|
||||
elseif(NOT HAVE_CUDNN)
|
||||
message(SEND_ERROR "DNN: CUDA backend requires cuDNN. Please resolve dependency or disable OPENCV_DNN_CUDA=OFF")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
ocv_cmake_hook_append(INIT_MODULE_SOURCES_opencv_dnn "${CMAKE_CURRENT_LIST_DIR}/cmake/hooks/INIT_MODULE_SOURCES_opencv_dnn.cmake")
|
||||
|
||||
if(MSVC)
|
||||
add_definitions( -D_CRT_SECURE_NO_WARNINGS=1 )
|
||||
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4244 /wd4267 /wd4018 /wd4355 /wd4800 /wd4251 /wd4996 /wd4146
|
||||
/wd4305 /wd4127 /wd4100 /wd4512 /wd4125 /wd4389 /wd4510 /wd4610
|
||||
/wd4702 /wd4456 /wd4457 /wd4065 /wd4310 /wd4661 /wd4506
|
||||
)
|
||||
else()
|
||||
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wno-deprecated -Wmissing-prototypes -Wmissing-declarations -Wshadow
|
||||
-Wunused-parameter -Wsign-compare
|
||||
)
|
||||
endif()
|
||||
if(HAVE_CUDA)
|
||||
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef)
|
||||
endif()
|
||||
if(NOT HAVE_CXX11)
|
||||
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wno-undef) # LANG_CXX11 from protobuf files
|
||||
endif()
|
||||
|
||||
if(APPLE_FRAMEWORK)
|
||||
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshorten-64-to-32)
|
||||
endif()
|
||||
|
||||
if(ANDROID)
|
||||
add_definitions(-DDISABLE_POSIX_MEMALIGN -DTH_DISABLE_HEAP_TRACKING)
|
||||
endif()
|
||||
|
||||
if(NOT BUILD_PROTOBUF)
|
||||
add_definitions(-DOPENCV_DNN_EXTERNAL_PROTOBUF=1)
|
||||
endif()
|
||||
|
||||
add_definitions(-DHAVE_PROTOBUF=1)
|
||||
|
||||
#suppress warnings in autogenerated caffe.pb.* files
|
||||
ocv_warnings_disable(CMAKE_CXX_FLAGS
|
||||
/wd4125 /wd4267 /wd4127 /wd4244 /wd4512 /wd4702
|
||||
/wd4456 /wd4510 /wd4610 /wd4800
|
||||
/wd4701 /wd4703 # potentially uninitialized local/pointer variable 'value' used
|
||||
/wd4505 # unreferenced local function has been removed
|
||||
/wd4458 # declaration of 'x' hides class member. GCC still works, MSVC bug is here: https://developercommunity.visualstudio.com/content/problem/219311/c-c4458-declaration-hides-class-member-warning-iss.html
|
||||
-wd858 -wd2196
|
||||
-Winvalid-offsetof # Apple Clang (attr_value.pb.cc)
|
||||
)
|
||||
|
||||
set(include_dirs "")
|
||||
set(libs "")
|
||||
|
||||
if(PROTOBUF_UPDATE_FILES)
|
||||
file(GLOB proto_files "${CMAKE_CURRENT_LIST_DIR}/src/tensorflow/*.proto" "${CMAKE_CURRENT_LIST_DIR}/src/caffe/opencv-caffe.proto" "${CMAKE_CURRENT_LIST_DIR}/src/onnx/opencv-onnx.proto")
|
||||
set(PROTOBUF_GENERATE_CPP_APPEND_PATH ON) # required for tensorflow
|
||||
protobuf_generate_cpp(fw_srcs fw_hdrs ${proto_files})
|
||||
else()
|
||||
file(GLOB fw_srcs "${CMAKE_CURRENT_LIST_DIR}/misc/tensorflow/*.cc" "${CMAKE_CURRENT_LIST_DIR}/misc/caffe/opencv-caffe.pb.cc" "${CMAKE_CURRENT_LIST_DIR}/misc/onnx/opencv-onnx.pb.cc")
|
||||
file(GLOB fw_hdrs "${CMAKE_CURRENT_LIST_DIR}/misc/tensorflow/*.h" "${CMAKE_CURRENT_LIST_DIR}/misc/caffe/opencv-caffe.pb.h" "${CMAKE_CURRENT_LIST_DIR}/misc/onnx/opencv-onnx.pb.h")
|
||||
set(fw_inc "${CMAKE_CURRENT_LIST_DIR}/misc/caffe" "${CMAKE_CURRENT_LIST_DIR}/misc/tensorflow" "${CMAKE_CURRENT_LIST_DIR}/misc/onnx")
|
||||
endif()
|
||||
|
||||
list(APPEND include_dirs ${fw_inc})
|
||||
list(APPEND libs ${Protobuf_LIBRARIES})
|
||||
if(NOT BUILD_PROTOBUF)
|
||||
list(APPEND include_dirs ${Protobuf_INCLUDE_DIRS})
|
||||
endif()
|
||||
|
||||
set(sources_options "")
|
||||
|
||||
list(APPEND libs ${LAPACK_LIBRARIES})
|
||||
if(OPENCV_DNN_OPENCL AND HAVE_OPENCL)
|
||||
list(APPEND include_dirs ${OPENCL_INCLUDE_DIRS})
|
||||
else()
|
||||
set(sources_options EXCLUDE_OPENCL)
|
||||
endif()
|
||||
|
||||
if(OPENCV_DNN_CUDA AND HAVE_CUDA AND HAVE_CUBLAS AND HAVE_CUDNN)
|
||||
list(APPEND include_dirs ${CUDA_TOOLKIT_INCLUDE} ${CUDNN_INCLUDE_DIRS})
|
||||
set(CC_LIST ${CUDA_ARCH_BIN})
|
||||
separate_arguments(CC_LIST)
|
||||
foreach(cc ${CC_LIST})
|
||||
if(cc VERSION_LESS 3.0)
|
||||
message(FATAL_ERROR "CUDA backend for DNN module requires CC 3.0 or higher. Please remove unsupported architectures from CUDA_ARCH_BIN option or disable OPENCV_DNN_CUDA=OFF.")
|
||||
endif()
|
||||
endforeach()
|
||||
unset(CC_LIST)
|
||||
else()
|
||||
set(sources_options ${sources_options} EXCLUDE_CUDA)
|
||||
endif()
|
||||
|
||||
if(HAVE_TENGINE)
|
||||
list(APPEND include_dirs ${TENGINE_INCLUDE_DIRS})
|
||||
list(APPEND libs -Wl,--whole-archive ${TENGINE_LIBRARIES} -Wl,--no-whole-archive)
|
||||
endif()
|
||||
|
||||
ocv_module_include_directories(${include_dirs})
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-suggest-override") # GCC
|
||||
ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-array-bounds") # GCC 9.3.0 (Ubuntu 20.04)
|
||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
||||
ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-inconsistent-missing-override") # Clang
|
||||
endif()
|
||||
|
||||
set(dnn_runtime_libs "")
|
||||
if(INF_ENGINE_TARGET)
|
||||
set(use_nn_builder OFF)
|
||||
if(TARGET inference_engine_nn_builder OR # custom imported target
|
||||
TARGET IE::inference_engine_nn_builder OR # default imported target via InferenceEngineConfig.cmake
|
||||
INF_ENGINE_RELEASE VERSION_LESS "2020000000") # compatibility with older versions on IE
|
||||
set(use_nn_builder ON)
|
||||
endif()
|
||||
ocv_option(OPENCV_DNN_IE_NN_BUILDER_2019 "Build with Inference Engine NN Builder API support" ${use_nn_builder}) # future: NOT HAVE_NGRAPH
|
||||
if(OPENCV_DNN_IE_NN_BUILDER_2019)
|
||||
message(STATUS "DNN: Enabling Inference Engine NN Builder API support")
|
||||
add_definitions(-DHAVE_DNN_IE_NN_BUILDER_2019=1)
|
||||
endif()
|
||||
list(APPEND dnn_runtime_libs ${INF_ENGINE_TARGET})
|
||||
endif()
|
||||
if(HAVE_NGRAPH)
|
||||
message(STATUS "DNN: Enabling Inference Engine nGraph API support")
|
||||
add_definitions(-DHAVE_DNN_NGRAPH)
|
||||
list(APPEND dnn_runtime_libs ngraph::ngraph)
|
||||
endif()
|
||||
|
||||
ocv_glob_module_sources(${sources_options} SOURCES ${fw_srcs})
|
||||
ocv_create_module(${libs} ${dnn_runtime_libs})
|
||||
ocv_add_samples()
|
||||
ocv_add_accuracy_tests(${dnn_runtime_libs})
|
||||
|
||||
set(perf_path "${CMAKE_CURRENT_LIST_DIR}/perf")
|
||||
file(GLOB_RECURSE perf_srcs "${perf_path}/*.cpp")
|
||||
file(GLOB_RECURSE perf_hdrs "${perf_path}/*.hpp" "${perf_path}/*.h")
|
||||
ocv_add_perf_tests(${INF_ENGINE_TARGET}
|
||||
FILES test_common "${CMAKE_CURRENT_LIST_DIR}/test/test_common.hpp" "${CMAKE_CURRENT_LIST_DIR}/test/test_common.impl.hpp"
|
||||
FILES Src ${perf_srcs}
|
||||
FILES Include ${perf_hdrs}
|
||||
)
|
||||
|
||||
ocv_option(OPENCV_DNN_PERF_CAFFE "Add performance tests of Caffe framework" OFF)
|
||||
ocv_option(OPENCV_DNN_PERF_CLCAFFE "Add performance tests of clCaffe framework" OFF)
|
||||
if(BUILD_PERF_TESTS)
|
||||
if (OPENCV_DNN_PERF_CAFFE
|
||||
OR ${the_module}_PERF_CAFFE # compatibility for deprecated option
|
||||
)
|
||||
find_package(Caffe QUIET)
|
||||
if (Caffe_FOUND)
|
||||
add_definitions(-DHAVE_CAFFE=1)
|
||||
ocv_target_link_libraries(opencv_perf_dnn caffe)
|
||||
endif()
|
||||
elseif(OPENCV_DNN_PERF_CLCAFFE
|
||||
OR ${the_module}_PERF_CAFFE # compatibility for deprecated option
|
||||
)
|
||||
find_package(Caffe QUIET)
|
||||
if (Caffe_FOUND)
|
||||
add_definitions(-DHAVE_CLCAFFE=1)
|
||||
ocv_target_link_libraries(opencv_perf_dnn caffe)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
11
3rdparty/opencv-4.5.4/modules/dnn/cmake/hooks/INIT_MODULE_SOURCES_opencv_dnn.cmake
vendored
Normal file
11
3rdparty/opencv-4.5.4/modules/dnn/cmake/hooks/INIT_MODULE_SOURCES_opencv_dnn.cmake
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
if(NOT (OPENCV_DNN_OPENCL AND HAVE_OPENCL))
|
||||
message(STATUS "opencv_dnn: filter out ocl4dnn source code")
|
||||
ocv_list_filterout(OPENCV_MODULE_${the_module}_SOURCES "/ocl4dnn/")
|
||||
ocv_list_filterout(OPENCV_MODULE_${the_module}_HEADERS "/ocl4dnn/")
|
||||
endif()
|
||||
|
||||
if(NOT (OPENCV_DNN_CUDA AND HAVE_CUDA AND HAVE_CUBLAS AND HAVE_CUDNN))
|
||||
message(STATUS "opencv_dnn: filter out cuda4dnn source code")
|
||||
ocv_list_filterout(OPENCV_MODULE_${the_module}_SOURCES "/cuda4dnn/")
|
||||
ocv_list_filterout(OPENCV_MODULE_${the_module}_HEADERS "/cuda4dnn/")
|
||||
endif()
|
||||
78
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn.hpp
vendored
Normal file
78
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn.hpp
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_DNN_HPP
|
||||
#define OPENCV_DNN_HPP
|
||||
|
||||
// This is an umbrella header to include into you project.
|
||||
// We are free to change headers layout in dnn subfolder, so please include
|
||||
// this header for future compatibility
|
||||
|
||||
|
||||
/** @defgroup dnn Deep Neural Network module
|
||||
@{
|
||||
This module contains:
|
||||
- API for new layers creation, layers are building bricks of neural networks;
|
||||
- set of built-in most-useful Layers;
|
||||
- API to construct and modify comprehensive neural networks from layers;
|
||||
- functionality for loading serialized networks models from different frameworks.
|
||||
|
||||
Functionality of this module is designed only for forward pass computations (i.e. network testing).
|
||||
A network training is in principle not supported.
|
||||
@}
|
||||
*/
|
||||
/** @example samples/dnn/classification.cpp
|
||||
Check @ref tutorial_dnn_googlenet "the corresponding tutorial" for more details
|
||||
*/
|
||||
/** @example samples/dnn/colorization.cpp
|
||||
*/
|
||||
/** @example samples/dnn/object_detection.cpp
|
||||
Check @ref tutorial_dnn_yolo "the corresponding tutorial" for more details
|
||||
*/
|
||||
/** @example samples/dnn/openpose.cpp
|
||||
*/
|
||||
/** @example samples/dnn/segmentation.cpp
|
||||
*/
|
||||
/** @example samples/dnn/text_detection.cpp
|
||||
*/
|
||||
#include <opencv2/dnn/dnn.hpp>
|
||||
|
||||
#endif /* OPENCV_DNN_HPP */
|
||||
832
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn/all_layers.hpp
vendored
Normal file
832
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn/all_layers.hpp
vendored
Normal file
@@ -0,0 +1,832 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_DNN_DNN_ALL_LAYERS_HPP
|
||||
#define OPENCV_DNN_DNN_ALL_LAYERS_HPP
|
||||
#include <opencv2/dnn.hpp>
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
//! @addtogroup dnn
|
||||
//! @{
|
||||
|
||||
/** @defgroup dnnLayerList Partial List of Implemented Layers
|
||||
@{
|
||||
This subsection of dnn module contains information about built-in layers and their descriptions.
|
||||
|
||||
Classes listed here, in fact, provides C++ API for creating instances of built-in layers.
|
||||
In addition to this way of layers instantiation, there is a more common factory API (see @ref dnnLayerFactory), it allows to create layers dynamically (by name) and register new ones.
|
||||
You can use both API, but factory API is less convenient for native C++ programming and basically designed for use inside importers (see @ref readNetFromCaffe(), @ref readNetFromTorch(), @ref readNetFromTensorflow()).
|
||||
|
||||
Built-in layers partially reproduce functionality of corresponding Caffe and Torch7 layers.
|
||||
In particular, the following layers and Caffe importer were tested to reproduce <a href="http://caffe.berkeleyvision.org/tutorial/layers.html">Caffe</a> functionality:
|
||||
- Convolution
|
||||
- Deconvolution
|
||||
- Pooling
|
||||
- InnerProduct
|
||||
- TanH, ReLU, Sigmoid, BNLL, Power, AbsVal
|
||||
- Softmax
|
||||
- Reshape, Flatten, Slice, Split
|
||||
- LRN
|
||||
- MVN
|
||||
- Dropout (since it does nothing on forward pass -))
|
||||
*/
|
||||
|
||||
class CV_EXPORTS BlankLayer : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<Layer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
/**
|
||||
* Constant layer produces the same data blob at an every forward pass.
|
||||
*/
|
||||
class CV_EXPORTS ConstLayer : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<Layer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
//! LSTM recurrent layer
|
||||
class CV_EXPORTS LSTMLayer : public Layer
|
||||
{
|
||||
public:
|
||||
/** Creates instance of LSTM layer */
|
||||
static Ptr<LSTMLayer> create(const LayerParams& params);
|
||||
|
||||
/** @deprecated Use LayerParams::blobs instead.
|
||||
@brief Set trained weights for LSTM layer.
|
||||
|
||||
LSTM behavior on each step is defined by current input, previous output, previous cell state and learned weights.
|
||||
|
||||
Let @f$x_t@f$ be current input, @f$h_t@f$ be current output, @f$c_t@f$ be current state.
|
||||
Than current output and current cell state is computed as follows:
|
||||
@f{eqnarray*}{
|
||||
h_t &= o_t \odot tanh(c_t), \\
|
||||
c_t &= f_t \odot c_{t-1} + i_t \odot g_t, \\
|
||||
@f}
|
||||
where @f$\odot@f$ is per-element multiply operation and @f$i_t, f_t, o_t, g_t@f$ is internal gates that are computed using learned weights.
|
||||
|
||||
Gates are computed as follows:
|
||||
@f{eqnarray*}{
|
||||
i_t &= sigmoid&(W_{xi} x_t + W_{hi} h_{t-1} + b_i), \\
|
||||
f_t &= sigmoid&(W_{xf} x_t + W_{hf} h_{t-1} + b_f), \\
|
||||
o_t &= sigmoid&(W_{xo} x_t + W_{ho} h_{t-1} + b_o), \\
|
||||
g_t &= tanh &(W_{xg} x_t + W_{hg} h_{t-1} + b_g), \\
|
||||
@f}
|
||||
where @f$W_{x?}@f$, @f$W_{h?}@f$ and @f$b_{?}@f$ are learned weights represented as matrices:
|
||||
@f$W_{x?} \in R^{N_h \times N_x}@f$, @f$W_{h?} \in R^{N_h \times N_h}@f$, @f$b_? \in R^{N_h}@f$.
|
||||
|
||||
For simplicity and performance purposes we use @f$ W_x = [W_{xi}; W_{xf}; W_{xo}, W_{xg}] @f$
|
||||
(i.e. @f$W_x@f$ is vertical concatenation of @f$ W_{x?} @f$), @f$ W_x \in R^{4N_h \times N_x} @f$.
|
||||
The same for @f$ W_h = [W_{hi}; W_{hf}; W_{ho}, W_{hg}], W_h \in R^{4N_h \times N_h} @f$
|
||||
and for @f$ b = [b_i; b_f, b_o, b_g]@f$, @f$b \in R^{4N_h} @f$.
|
||||
|
||||
@param Wh is matrix defining how previous output is transformed to internal gates (i.e. according to above mentioned notation is @f$ W_h @f$)
|
||||
@param Wx is matrix defining how current input is transformed to internal gates (i.e. according to above mentioned notation is @f$ W_x @f$)
|
||||
@param b is bias vector (i.e. according to above mentioned notation is @f$ b @f$)
|
||||
*/
|
||||
CV_DEPRECATED virtual void setWeights(const Mat &Wh, const Mat &Wx, const Mat &b) = 0;
|
||||
|
||||
/** @brief Specifies shape of output blob which will be [[`T`], `N`] + @p outTailShape.
|
||||
* @details If this parameter is empty or unset then @p outTailShape = [`Wh`.size(0)] will be used,
|
||||
* where `Wh` is parameter from setWeights().
|
||||
*/
|
||||
virtual void setOutShape(const MatShape &outTailShape = MatShape()) = 0;
|
||||
|
||||
/** @deprecated Use flag `produce_cell_output` in LayerParams.
|
||||
* @brief Specifies either interpret first dimension of input blob as timestamp dimension either as sample.
|
||||
*
|
||||
* If flag is set to true then shape of input blob will be interpreted as [`T`, `N`, `[data dims]`] where `T` specifies number of timestamps, `N` is number of independent streams.
|
||||
* In this case each forward() call will iterate through `T` timestamps and update layer's state `T` times.
|
||||
*
|
||||
* If flag is set to false then shape of input blob will be interpreted as [`N`, `[data dims]`].
|
||||
* In this case each forward() call will make one iteration and produce one timestamp with shape [`N`, `[out dims]`].
|
||||
*/
|
||||
CV_DEPRECATED virtual void setUseTimstampsDim(bool use = true) = 0;
|
||||
|
||||
/** @deprecated Use flag `use_timestamp_dim` in LayerParams.
|
||||
* @brief If this flag is set to true then layer will produce @f$ c_t @f$ as second output.
|
||||
* @details Shape of the second output is the same as first output.
|
||||
*/
|
||||
CV_DEPRECATED virtual void setProduceCellOutput(bool produce = false) = 0;
|
||||
|
||||
/* In common case it use single input with @f$x_t@f$ values to compute output(s) @f$h_t@f$ (and @f$c_t@f$).
|
||||
* @param input should contain packed values @f$x_t@f$
|
||||
* @param output contains computed outputs: @f$h_t@f$ (and @f$c_t@f$ if setProduceCellOutput() flag was set to true).
|
||||
*
|
||||
* If setUseTimstampsDim() is set to true then @p input[0] should has at least two dimensions with the following shape: [`T`, `N`, `[data dims]`],
|
||||
* where `T` specifies number of timestamps, `N` is number of independent streams (i.e. @f$ x_{t_0 + t}^{stream} @f$ is stored inside @p input[0][t, stream, ...]).
|
||||
*
|
||||
* If setUseTimstampsDim() is set to false then @p input[0] should contain single timestamp, its shape should has form [`N`, `[data dims]`] with at least one dimension.
|
||||
* (i.e. @f$ x_{t}^{stream} @f$ is stored inside @p input[0][stream, ...]).
|
||||
*/
|
||||
|
||||
int inputNameToIndex(String inputName) CV_OVERRIDE;
|
||||
int outputNameToIndex(const String& outputName) CV_OVERRIDE;
|
||||
};
|
||||
|
||||
/** @brief GRU recurrent one-layer
|
||||
*
|
||||
* Accepts input sequence and computes the final hidden state for each element in the batch.
|
||||
*
|
||||
* - input[0] containing the features of the input sequence.
|
||||
* input[0] should have shape [`T`, `N`, `data_dims`] where `T` is sequence length, `N` is batch size, `data_dims` is input size
|
||||
* - output would have shape [`T`, `N`, `D` * `hidden_size`] where `D = 2` if layer is bidirectional otherwise `D = 1`
|
||||
*
|
||||
* Depends on the following attributes:
|
||||
* - hidden_size - Number of neurons in the hidden layer
|
||||
* - direction - RNN could be bidirectional or forward
|
||||
*
|
||||
* The final hidden state @f$ h_t @f$ computes by the following formulas:
|
||||
*
|
||||
@f{eqnarray*}{
|
||||
r_t = \sigma(W_{ir} x_t + b_{ir} + W_{hr} h_{(t-1)} + b_{hr}) \\
|
||||
z_t = \sigma(W_{iz} x_t + b_{iz} + W_{hz} h_{(t-1)} + b_{hz}) \\
|
||||
n_t = \tanh(W_{in} x_t + b_{in} + r_t \odot (W_{hn} h_{(t-1)}+ b_{hn})) \\
|
||||
h_t = (1 - z_t) \odot n_t + z_t \odot h_{(t-1)} \\
|
||||
@f}
|
||||
* Where @f$x_t@f$ is current input, @f$h_{(t-1)}@f$ is previous or initial hidden state.
|
||||
*
|
||||
* @f$W_{x?}@f$, @f$W_{h?}@f$ and @f$b_{?}@f$ are learned weights represented as matrices:
|
||||
* @f$W_{x?} \in R^{N_h \times N_x}@f$, @f$W_{h?} \in R^{N_h \times N_h}@f$, @f$b_? \in R^{N_h}@f$.
|
||||
*
|
||||
* @f$\odot@f$ is per-element multiply operation.
|
||||
*/
|
||||
class CV_EXPORTS GRULayer : public Layer
|
||||
{
|
||||
public:
|
||||
/** Creates instance of GRU layer */
|
||||
static Ptr<GRULayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
/** @brief Classical recurrent layer
|
||||
|
||||
Accepts two inputs @f$x_t@f$ and @f$h_{t-1}@f$ and compute two outputs @f$o_t@f$ and @f$h_t@f$.
|
||||
|
||||
- input: should contain packed input @f$x_t@f$.
|
||||
- output: should contain output @f$o_t@f$ (and @f$h_t@f$ if setProduceHiddenOutput() is set to true).
|
||||
|
||||
input[0] should have shape [`T`, `N`, `data_dims`] where `T` and `N` is number of timestamps and number of independent samples of @f$x_t@f$ respectively.
|
||||
|
||||
output[0] will have shape [`T`, `N`, @f$N_o@f$], where @f$N_o@f$ is number of rows in @f$ W_{xo} @f$ matrix.
|
||||
|
||||
If setProduceHiddenOutput() is set to true then @p output[1] will contain a Mat with shape [`T`, `N`, @f$N_h@f$], where @f$N_h@f$ is number of rows in @f$ W_{hh} @f$ matrix.
|
||||
*/
|
||||
class CV_EXPORTS RNNLayer : public Layer
|
||||
{
|
||||
public:
|
||||
/** Creates instance of RNNLayer */
|
||||
static Ptr<RNNLayer> create(const LayerParams& params);
|
||||
|
||||
/** Setups learned weights.
|
||||
|
||||
Recurrent-layer behavior on each step is defined by current input @f$ x_t @f$, previous state @f$ h_t @f$ and learned weights as follows:
|
||||
@f{eqnarray*}{
|
||||
h_t &= tanh&(W_{hh} h_{t-1} + W_{xh} x_t + b_h), \\
|
||||
o_t &= tanh&(W_{ho} h_t + b_o),
|
||||
@f}
|
||||
|
||||
@param Wxh is @f$ W_{xh} @f$ matrix
|
||||
@param bh is @f$ b_{h} @f$ vector
|
||||
@param Whh is @f$ W_{hh} @f$ matrix
|
||||
@param Who is @f$ W_{xo} @f$ matrix
|
||||
@param bo is @f$ b_{o} @f$ vector
|
||||
*/
|
||||
virtual void setWeights(const Mat &Wxh, const Mat &bh, const Mat &Whh, const Mat &Who, const Mat &bo) = 0;
|
||||
|
||||
/** @brief If this flag is set to true then layer will produce @f$ h_t @f$ as second output.
|
||||
* @details Shape of the second output is the same as first output.
|
||||
*/
|
||||
virtual void setProduceHiddenOutput(bool produce = false) = 0;
|
||||
|
||||
};
|
||||
|
||||
class CV_EXPORTS BaseConvolutionLayer : public Layer
|
||||
{
|
||||
public:
|
||||
CV_DEPRECATED_EXTERNAL Size kernel, stride, pad, dilation, adjustPad;
|
||||
std::vector<size_t> adjust_pads;
|
||||
std::vector<size_t> kernel_size, strides, dilations;
|
||||
std::vector<size_t> pads_begin, pads_end;
|
||||
String padMode;
|
||||
int numOutput;
|
||||
};
|
||||
|
||||
class CV_EXPORTS ConvolutionLayer : public BaseConvolutionLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS ConvolutionLayerInt8 : public BaseConvolutionLayer
|
||||
{
|
||||
public:
|
||||
int input_zp, output_zp;
|
||||
float output_sc;
|
||||
static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS DeconvolutionLayer : public BaseConvolutionLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS LRNLayer : public Layer
|
||||
{
|
||||
public:
|
||||
int type;
|
||||
|
||||
int size;
|
||||
float alpha, beta, bias;
|
||||
bool normBySize;
|
||||
|
||||
static Ptr<LRNLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS PoolingLayer : public Layer
|
||||
{
|
||||
public:
|
||||
int type;
|
||||
std::vector<size_t> kernel_size, strides;
|
||||
std::vector<size_t> pads_begin, pads_end;
|
||||
bool globalPooling; //!< Flag is true if at least one of the axes is global pooled.
|
||||
std::vector<bool> isGlobalPooling;
|
||||
bool computeMaxIdx;
|
||||
String padMode;
|
||||
bool ceilMode;
|
||||
// If true for average pooling with padding, divide an every output region
|
||||
// by a whole kernel area. Otherwise exclude zero padded values and divide
|
||||
// by number of real values.
|
||||
bool avePoolPaddedArea;
|
||||
// ROIPooling parameters.
|
||||
Size pooledSize;
|
||||
float spatialScale;
|
||||
// PSROIPooling parameters.
|
||||
int psRoiOutChannels;
|
||||
|
||||
static Ptr<PoolingLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS PoolingLayerInt8 : public PoolingLayer
|
||||
{
|
||||
public:
|
||||
int input_zp, output_zp;
|
||||
static Ptr<PoolingLayerInt8> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS SoftmaxLayer : public Layer
|
||||
{
|
||||
public:
|
||||
bool logSoftMax;
|
||||
|
||||
static Ptr<SoftmaxLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS SoftmaxLayerInt8 : public SoftmaxLayer
|
||||
{
|
||||
public:
|
||||
float output_sc;
|
||||
int output_zp;
|
||||
static Ptr<SoftmaxLayerInt8> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS InnerProductLayer : public Layer
|
||||
{
|
||||
public:
|
||||
int axis;
|
||||
static Ptr<InnerProductLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS InnerProductLayerInt8 : public InnerProductLayer
|
||||
{
|
||||
public:
|
||||
int output_zp;
|
||||
static Ptr<InnerProductLayerInt8> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS MVNLayer : public Layer
|
||||
{
|
||||
public:
|
||||
float eps;
|
||||
bool normVariance, acrossChannels;
|
||||
|
||||
static Ptr<MVNLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
/* Reshaping */
|
||||
|
||||
class CV_EXPORTS ReshapeLayer : public Layer
|
||||
{
|
||||
public:
|
||||
MatShape newShapeDesc;
|
||||
Range newShapeRange;
|
||||
|
||||
static Ptr<ReshapeLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS FlattenLayer : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<FlattenLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS QuantizeLayer : public Layer
|
||||
{
|
||||
public:
|
||||
float scale;
|
||||
int zeropoint;
|
||||
static Ptr<QuantizeLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS DequantizeLayer : public Layer
|
||||
{
|
||||
public:
|
||||
float scale;
|
||||
int zeropoint;
|
||||
static Ptr<DequantizeLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS RequantizeLayer : public Layer
|
||||
{
|
||||
public:
|
||||
float scale, shift;
|
||||
static Ptr<RequantizeLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS ConcatLayer : public Layer
|
||||
{
|
||||
public:
|
||||
int axis;
|
||||
/**
|
||||
* @brief Add zero padding in case of concatenation of blobs with different
|
||||
* spatial sizes.
|
||||
*
|
||||
* Details: https://github.com/torch/nn/blob/master/doc/containers.md#depthconcat
|
||||
*/
|
||||
bool padding;
|
||||
int paddingValue;
|
||||
|
||||
static Ptr<ConcatLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS SplitLayer : public Layer
|
||||
{
|
||||
public:
|
||||
int outputsCount; //!< Number of copies that will be produced (is ignored when negative).
|
||||
|
||||
static Ptr<SplitLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
/**
|
||||
* Slice layer has several modes:
|
||||
* 1. Caffe mode
|
||||
* @param[in] axis Axis of split operation
|
||||
* @param[in] slice_point Array of split points
|
||||
*
|
||||
* Number of output blobs equals to number of split points plus one. The
|
||||
* first blob is a slice on input from 0 to @p slice_point[0] - 1 by @p axis,
|
||||
* the second output blob is a slice of input from @p slice_point[0] to
|
||||
* @p slice_point[1] - 1 by @p axis and the last output blob is a slice of
|
||||
* input from @p slice_point[-1] up to the end of @p axis size.
|
||||
*
|
||||
* 2. TensorFlow mode
|
||||
* @param begin Vector of start indices
|
||||
* @param size Vector of sizes
|
||||
*
|
||||
* More convenient numpy-like slice. One and only output blob
|
||||
* is a slice `input[begin[0]:begin[0]+size[0], begin[1]:begin[1]+size[1], ...]`
|
||||
*
|
||||
* 3. Torch mode
|
||||
* @param axis Axis of split operation
|
||||
*
|
||||
* Split input blob on the equal parts by @p axis.
|
||||
*/
|
||||
class CV_EXPORTS SliceLayer : public Layer
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* @brief Vector of slice ranges.
|
||||
*
|
||||
* The first dimension equals number of output blobs.
|
||||
* Inner vector has slice ranges for the first number of input dimensions.
|
||||
*/
|
||||
std::vector<std::vector<Range> > sliceRanges;
|
||||
std::vector<std::vector<int> > sliceSteps;
|
||||
int axis;
|
||||
int num_split;
|
||||
|
||||
static Ptr<SliceLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS PermuteLayer : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<PermuteLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
/**
|
||||
* Permute channels of 4-dimensional input blob.
|
||||
* @param group Number of groups to split input channels and pick in turns
|
||||
* into output blob.
|
||||
*
|
||||
* \f[ groupSize = \frac{number\ of\ channels}{group} \f]
|
||||
* \f[ output(n, c, h, w) = input(n, groupSize \times (c \% group) + \lfloor \frac{c}{group} \rfloor, h, w) \f]
|
||||
* Read more at https://arxiv.org/pdf/1707.01083.pdf
|
||||
*/
|
||||
class CV_EXPORTS ShuffleChannelLayer : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<Layer> create(const LayerParams& params);
|
||||
|
||||
int group;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Adds extra values for specific axes.
|
||||
* @param paddings Vector of paddings in format
|
||||
* @code
|
||||
* [ pad_before, pad_after, // [0]th dimension
|
||||
* pad_before, pad_after, // [1]st dimension
|
||||
* ...
|
||||
* pad_before, pad_after ] // [n]th dimension
|
||||
* @endcode
|
||||
* that represents number of padded values at every dimension
|
||||
* starting from the first one. The rest of dimensions won't
|
||||
* be padded.
|
||||
* @param value Value to be padded. Defaults to zero.
|
||||
* @param type Padding type: 'constant', 'reflect'
|
||||
* @param input_dims Torch's parameter. If @p input_dims is not equal to the
|
||||
* actual input dimensionality then the `[0]th` dimension
|
||||
* is considered as a batch dimension and @p paddings are shifted
|
||||
* to a one dimension. Defaults to `-1` that means padding
|
||||
* corresponding to @p paddings.
|
||||
*/
|
||||
class CV_EXPORTS PaddingLayer : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<PaddingLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
/* Activations */
|
||||
class CV_EXPORTS ActivationLayer : public Layer
|
||||
{
|
||||
public:
|
||||
virtual void forwardSlice(const float* src, float* dst, int len,
|
||||
size_t outPlaneSize, int cn0, int cn1) const {};
|
||||
virtual void forwardSlice(const int* src, const int* lut, int* dst, int len,
|
||||
size_t outPlaneSize, int cn0, int cn1) const {};
|
||||
virtual void forwardSlice(const int8_t* src, const int8_t* lut, int8_t* dst, int len,
|
||||
size_t outPlaneSize, int cn0, int cn1) const {};
|
||||
};
|
||||
|
||||
class CV_EXPORTS ReLULayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
float negativeSlope;
|
||||
|
||||
static Ptr<ReLULayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS ReLU6Layer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
float minValue, maxValue;
|
||||
|
||||
static Ptr<ReLU6Layer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS ChannelsPReLULayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<Layer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS ELULayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<ELULayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS TanHLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<TanHLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS SwishLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<SwishLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS MishLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<MishLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS SigmoidLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<SigmoidLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS BNLLLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<BNLLLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS AbsLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<AbsLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS PowerLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
float power, scale, shift;
|
||||
|
||||
static Ptr<PowerLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS ExpLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
float base, scale, shift;
|
||||
|
||||
static Ptr<ExpLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS ActivationLayerInt8 : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
static Ptr<ActivationLayerInt8> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
/* Layers used in semantic segmentation */
|
||||
|
||||
class CV_EXPORTS CropLayer : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<Layer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
/** @brief Element wise operation on inputs
|
||||
|
||||
Extra optional parameters:
|
||||
- "operation" as string. Values are "sum" (default), "prod", "max", "div", "min"
|
||||
- "coeff" as float array. Specify weights of inputs for SUM operation
|
||||
- "output_channels_mode" as string. Values are "same" (default, all input must have the same layout), "input_0", "input_0_truncate", "max_input_channels"
|
||||
*/
|
||||
class CV_EXPORTS EltwiseLayer : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<EltwiseLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS EltwiseLayerInt8 : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<EltwiseLayerInt8> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS BatchNormLayer : public ActivationLayer
|
||||
{
|
||||
public:
|
||||
bool hasWeights, hasBias;
|
||||
float epsilon;
|
||||
|
||||
static Ptr<BatchNormLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS BatchNormLayerInt8 : public BatchNormLayer
|
||||
{
|
||||
public:
|
||||
float input_sc, output_sc;
|
||||
int input_zp, output_zp;
|
||||
static Ptr<BatchNormLayerInt8> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS MaxUnpoolLayer : public Layer
|
||||
{
|
||||
public:
|
||||
Size poolKernel;
|
||||
Size poolPad;
|
||||
Size poolStride;
|
||||
|
||||
static Ptr<MaxUnpoolLayer> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS ScaleLayer : public Layer
|
||||
{
|
||||
public:
|
||||
bool hasBias;
|
||||
int axis;
|
||||
|
||||
static Ptr<ScaleLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS ScaleLayerInt8 : public ScaleLayer
|
||||
{
|
||||
public:
|
||||
float output_sc;
|
||||
int output_zp;
|
||||
static Ptr<ScaleLayerInt8> create(const LayerParams ¶ms);
|
||||
};
|
||||
|
||||
class CV_EXPORTS ShiftLayer : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<Layer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS ShiftLayerInt8 : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<Layer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS DataAugmentationLayer : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<DataAugmentationLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS CorrelationLayer : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<CorrelationLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS AccumLayer : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<AccumLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS FlowWarpLayer : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<FlowWarpLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS PriorBoxLayer : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<PriorBoxLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS ReorgLayer : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<ReorgLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS RegionLayer : public Layer
|
||||
{
|
||||
public:
|
||||
float nmsThreshold;
|
||||
|
||||
static Ptr<RegionLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Detection output layer.
|
||||
*
|
||||
* The layer size is: @f$ (1 \times 1 \times N \times 7) @f$
|
||||
* where N is [keep_top_k] parameter multiplied by batch size. Each row is:
|
||||
* [image_id, label, confidence, xmin, ymin, xmax, ymax]
|
||||
* where image_id is the index of image input in the batch.
|
||||
*/
|
||||
class CV_EXPORTS DetectionOutputLayer : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<DetectionOutputLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief \f$ L_p \f$ - normalization layer.
|
||||
* @param p Normalization factor. The most common `p = 1` for \f$ L_1 \f$ -
|
||||
* normalization or `p = 2` for \f$ L_2 \f$ - normalization or a custom one.
|
||||
* @param eps Parameter \f$ \epsilon \f$ to prevent a division by zero.
|
||||
* @param across_spatial If true, normalize an input across all non-batch dimensions.
|
||||
* Otherwise normalize an every channel separately.
|
||||
*
|
||||
* Across spatial:
|
||||
* @f[
|
||||
* norm = \sqrt[p]{\epsilon + \sum_{x, y, c} |src(x, y, c)|^p } \\
|
||||
* dst(x, y, c) = \frac{ src(x, y, c) }{norm}
|
||||
* @f]
|
||||
*
|
||||
* Channel wise normalization:
|
||||
* @f[
|
||||
* norm(c) = \sqrt[p]{\epsilon + \sum_{x, y} |src(x, y, c)|^p } \\
|
||||
* dst(x, y, c) = \frac{ src(x, y, c) }{norm(c)}
|
||||
* @f]
|
||||
*
|
||||
* Where `x, y` - spatial coordinates, `c` - channel.
|
||||
*
|
||||
* An every sample in the batch is normalized separately. Optionally,
|
||||
* output is scaled by the trained parameters.
|
||||
*/
|
||||
class CV_EXPORTS NormalizeBBoxLayer : public Layer
|
||||
{
|
||||
public:
|
||||
float pnorm, epsilon;
|
||||
CV_DEPRECATED_EXTERNAL bool acrossSpatial;
|
||||
|
||||
static Ptr<NormalizeBBoxLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Resize input 4-dimensional blob by nearest neighbor or bilinear strategy.
|
||||
*
|
||||
* Layer is used to support TensorFlow's resize_nearest_neighbor and resize_bilinear ops.
|
||||
*/
|
||||
class CV_EXPORTS ResizeLayer : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<ResizeLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Bilinear resize layer from https://github.com/cdmh/deeplab-public-ver2
|
||||
*
|
||||
* It differs from @ref ResizeLayer in output shape and resize scales computations.
|
||||
*/
|
||||
class CV_EXPORTS InterpLayer : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<Layer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS ProposalLayer : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<ProposalLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS CropAndResizeLayer : public Layer
|
||||
{
|
||||
public:
|
||||
static Ptr<Layer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
class CV_EXPORTS CumSumLayer : public Layer
|
||||
{
|
||||
public:
|
||||
int exclusive;
|
||||
int reverse;
|
||||
|
||||
static Ptr<CumSumLayer> create(const LayerParams& params);
|
||||
};
|
||||
|
||||
//! @}
|
||||
//! @}
|
||||
CV__DNN_INLINE_NS_END
|
||||
}
|
||||
}
|
||||
#endif
|
||||
160
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn/dict.hpp
vendored
Normal file
160
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn/dict.hpp
vendored
Normal file
@@ -0,0 +1,160 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
#include <map>
|
||||
#include <ostream>
|
||||
|
||||
#include <opencv2/dnn/dnn.hpp>
|
||||
|
||||
#ifndef OPENCV_DNN_DNN_DICT_HPP
|
||||
#define OPENCV_DNN_DNN_DICT_HPP
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
//! @addtogroup dnn
|
||||
//! @{
|
||||
|
||||
/** @brief This struct stores the scalar value (or array) of one of the following type: double, cv::String or int64.
|
||||
* @todo Maybe int64 is useless because double type exactly stores at least 2^52 integers.
|
||||
*/
|
||||
struct CV_EXPORTS_W DictValue
|
||||
{
|
||||
DictValue(const DictValue &r);
|
||||
DictValue(bool i) : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = i ? 1 : 0; } //!< Constructs integer scalar
|
||||
DictValue(int64 i = 0) : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = i; } //!< Constructs integer scalar
|
||||
CV_WRAP DictValue(int i) : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = i; } //!< Constructs integer scalar
|
||||
DictValue(unsigned p) : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = p; } //!< Constructs integer scalar
|
||||
CV_WRAP DictValue(double p) : type(Param::REAL), pd(new AutoBuffer<double,1>) { (*pd)[0] = p; } //!< Constructs floating point scalar
|
||||
CV_WRAP DictValue(const String &s) : type(Param::STRING), ps(new AutoBuffer<String,1>) { (*ps)[0] = s; } //!< Constructs string scalar
|
||||
DictValue(const char *s) : type(Param::STRING), ps(new AutoBuffer<String,1>) { (*ps)[0] = s; } //!< @overload
|
||||
|
||||
template<typename TypeIter>
|
||||
static DictValue arrayInt(TypeIter begin, int size); //!< Constructs integer array
|
||||
template<typename TypeIter>
|
||||
static DictValue arrayReal(TypeIter begin, int size); //!< Constructs floating point array
|
||||
template<typename TypeIter>
|
||||
static DictValue arrayString(TypeIter begin, int size); //!< Constructs array of strings
|
||||
|
||||
template<typename T>
|
||||
T get(int idx = -1) const; //!< Tries to convert array element with specified index to requested type and returns its.
|
||||
|
||||
int size() const;
|
||||
|
||||
CV_WRAP bool isInt() const;
|
||||
CV_WRAP bool isString() const;
|
||||
CV_WRAP bool isReal() const;
|
||||
|
||||
CV_WRAP int getIntValue(int idx = -1) const;
|
||||
CV_WRAP double getRealValue(int idx = -1) const;
|
||||
CV_WRAP String getStringValue(int idx = -1) const;
|
||||
|
||||
DictValue &operator=(const DictValue &r);
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &stream, const DictValue &dictv);
|
||||
|
||||
~DictValue();
|
||||
|
||||
private:
|
||||
|
||||
Param type;
|
||||
|
||||
union
|
||||
{
|
||||
AutoBuffer<int64, 1> *pi;
|
||||
AutoBuffer<double, 1> *pd;
|
||||
AutoBuffer<String, 1> *ps;
|
||||
void *pv;
|
||||
};
|
||||
|
||||
DictValue(Param _type, void *_p) : type(_type), pv(_p) {}
|
||||
void release();
|
||||
};
|
||||
|
||||
/** @brief This class implements name-value dictionary, values are instances of DictValue. */
|
||||
class CV_EXPORTS Dict
|
||||
{
|
||||
typedef std::map<String, DictValue> _Dict;
|
||||
_Dict dict;
|
||||
|
||||
public:
|
||||
|
||||
//! Checks a presence of the @p key in the dictionary.
|
||||
bool has(const String &key) const;
|
||||
|
||||
//! If the @p key in the dictionary then returns pointer to its value, else returns NULL.
|
||||
DictValue *ptr(const String &key);
|
||||
|
||||
/** @overload */
|
||||
const DictValue *ptr(const String &key) const;
|
||||
|
||||
//! If the @p key in the dictionary then returns its value, else an error will be generated.
|
||||
const DictValue &get(const String &key) const;
|
||||
|
||||
/** @overload */
|
||||
template <typename T>
|
||||
T get(const String &key) const;
|
||||
|
||||
//! If the @p key in the dictionary then returns its value, else returns @p defaultValue.
|
||||
template <typename T>
|
||||
T get(const String &key, const T &defaultValue) const;
|
||||
|
||||
//! Sets new @p value for the @p key, or adds new key-value pair into the dictionary.
|
||||
template<typename T>
|
||||
const T &set(const String &key, const T &value);
|
||||
|
||||
//! Erase @p key from the dictionary.
|
||||
void erase(const String &key);
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &stream, const Dict &dict);
|
||||
|
||||
std::map<String, DictValue>::const_iterator begin() const;
|
||||
|
||||
std::map<String, DictValue>::const_iterator end() const;
|
||||
};
|
||||
|
||||
//! @}
|
||||
CV__DNN_INLINE_NS_END
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
1693
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn/dnn.hpp
vendored
Normal file
1693
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn/dnn.hpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
412
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn/dnn.inl.hpp
vendored
Normal file
412
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn/dnn.inl.hpp
vendored
Normal file
@@ -0,0 +1,412 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_DNN_DNN_INL_HPP
|
||||
#define OPENCV_DNN_DNN_INL_HPP
|
||||
|
||||
#include <opencv2/dnn.hpp>
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
template<typename TypeIter>
|
||||
DictValue DictValue::arrayInt(TypeIter begin, int size)
|
||||
{
|
||||
DictValue res(Param::INT, new AutoBuffer<int64, 1>(size));
|
||||
for (int j = 0; j < size; begin++, j++)
|
||||
(*res.pi)[j] = *begin;
|
||||
return res;
|
||||
}
|
||||
|
||||
template<typename TypeIter>
|
||||
DictValue DictValue::arrayReal(TypeIter begin, int size)
|
||||
{
|
||||
DictValue res(Param::REAL, new AutoBuffer<double, 1>(size));
|
||||
for (int j = 0; j < size; begin++, j++)
|
||||
(*res.pd)[j] = *begin;
|
||||
return res;
|
||||
}
|
||||
|
||||
template<typename TypeIter>
|
||||
DictValue DictValue::arrayString(TypeIter begin, int size)
|
||||
{
|
||||
DictValue res(Param::STRING, new AutoBuffer<String, 1>(size));
|
||||
for (int j = 0; j < size; begin++, j++)
|
||||
(*res.ps)[j] = *begin;
|
||||
return res;
|
||||
}
|
||||
|
||||
template<>
|
||||
inline DictValue DictValue::get<DictValue>(int idx) const
|
||||
{
|
||||
CV_Assert(idx == -1);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<>
|
||||
inline int64 DictValue::get<int64>(int idx) const
|
||||
{
|
||||
CV_Assert((idx == -1 && size() == 1) || (idx >= 0 && idx < size()));
|
||||
idx = (idx == -1) ? 0 : idx;
|
||||
|
||||
if (type == Param::INT)
|
||||
{
|
||||
return (*pi)[idx];
|
||||
}
|
||||
else if (type == Param::REAL)
|
||||
{
|
||||
double doubleValue = (*pd)[idx];
|
||||
|
||||
double fracpart, intpart;
|
||||
fracpart = std::modf(doubleValue, &intpart);
|
||||
CV_Assert(fracpart == 0.0);
|
||||
|
||||
return (int64)doubleValue;
|
||||
}
|
||||
else if (type == Param::STRING)
|
||||
{
|
||||
return std::atoi((*ps)[idx].c_str());
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(isInt() || isReal() || isString());
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
inline int DictValue::get<int>(int idx) const
|
||||
{
|
||||
return (int)get<int64>(idx);
|
||||
}
|
||||
|
||||
inline int DictValue::getIntValue(int idx) const
|
||||
{
|
||||
return (int)get<int64>(idx);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline unsigned DictValue::get<unsigned>(int idx) const
|
||||
{
|
||||
return (unsigned)get<int64>(idx);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool DictValue::get<bool>(int idx) const
|
||||
{
|
||||
return (get<int64>(idx) != 0);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline double DictValue::get<double>(int idx) const
|
||||
{
|
||||
CV_Assert((idx == -1 && size() == 1) || (idx >= 0 && idx < size()));
|
||||
idx = (idx == -1) ? 0 : idx;
|
||||
|
||||
if (type == Param::REAL)
|
||||
{
|
||||
return (*pd)[idx];
|
||||
}
|
||||
else if (type == Param::INT)
|
||||
{
|
||||
return (double)(*pi)[idx];
|
||||
}
|
||||
else if (type == Param::STRING)
|
||||
{
|
||||
return std::atof((*ps)[idx].c_str());
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(isReal() || isInt() || isString());
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline double DictValue::getRealValue(int idx) const
|
||||
{
|
||||
return get<double>(idx);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline float DictValue::get<float>(int idx) const
|
||||
{
|
||||
return (float)get<double>(idx);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline String DictValue::get<String>(int idx) const
|
||||
{
|
||||
CV_Assert(isString());
|
||||
CV_Assert((idx == -1 && ps->size() == 1) || (idx >= 0 && idx < (int)ps->size()));
|
||||
return (*ps)[(idx == -1) ? 0 : idx];
|
||||
}
|
||||
|
||||
|
||||
inline String DictValue::getStringValue(int idx) const
|
||||
{
|
||||
return get<String>(idx);
|
||||
}
|
||||
|
||||
inline void DictValue::release()
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case Param::INT:
|
||||
delete pi;
|
||||
break;
|
||||
case Param::STRING:
|
||||
delete ps;
|
||||
break;
|
||||
case Param::REAL:
|
||||
delete pd;
|
||||
break;
|
||||
case Param::BOOLEAN:
|
||||
case Param::MAT:
|
||||
case Param::MAT_VECTOR:
|
||||
case Param::ALGORITHM:
|
||||
case Param::FLOAT:
|
||||
case Param::UNSIGNED_INT:
|
||||
case Param::UINT64:
|
||||
case Param::UCHAR:
|
||||
case Param::SCALAR:
|
||||
break; // unhandled
|
||||
}
|
||||
}
|
||||
|
||||
inline DictValue::~DictValue()
|
||||
{
|
||||
release();
|
||||
}
|
||||
|
||||
inline DictValue & DictValue::operator=(const DictValue &r)
|
||||
{
|
||||
if (&r == this)
|
||||
return *this;
|
||||
|
||||
if (r.type == Param::INT)
|
||||
{
|
||||
AutoBuffer<int64, 1> *tmp = new AutoBuffer<int64, 1>(*r.pi);
|
||||
release();
|
||||
pi = tmp;
|
||||
}
|
||||
else if (r.type == Param::STRING)
|
||||
{
|
||||
AutoBuffer<String, 1> *tmp = new AutoBuffer<String, 1>(*r.ps);
|
||||
release();
|
||||
ps = tmp;
|
||||
}
|
||||
else if (r.type == Param::REAL)
|
||||
{
|
||||
AutoBuffer<double, 1> *tmp = new AutoBuffer<double, 1>(*r.pd);
|
||||
release();
|
||||
pd = tmp;
|
||||
}
|
||||
|
||||
type = r.type;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline DictValue::DictValue(const DictValue &r)
|
||||
: pv(NULL)
|
||||
{
|
||||
type = r.type;
|
||||
|
||||
if (r.type == Param::INT)
|
||||
pi = new AutoBuffer<int64, 1>(*r.pi);
|
||||
else if (r.type == Param::STRING)
|
||||
ps = new AutoBuffer<String, 1>(*r.ps);
|
||||
else if (r.type == Param::REAL)
|
||||
pd = new AutoBuffer<double, 1>(*r.pd);
|
||||
}
|
||||
|
||||
inline bool DictValue::isString() const
|
||||
{
|
||||
return (type == Param::STRING);
|
||||
}
|
||||
|
||||
inline bool DictValue::isInt() const
|
||||
{
|
||||
return (type == Param::INT);
|
||||
}
|
||||
|
||||
inline bool DictValue::isReal() const
|
||||
{
|
||||
return (type == Param::REAL || type == Param::INT);
|
||||
}
|
||||
|
||||
inline int DictValue::size() const
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case Param::INT:
|
||||
return (int)pi->size();
|
||||
case Param::STRING:
|
||||
return (int)ps->size();
|
||||
case Param::REAL:
|
||||
return (int)pd->size();
|
||||
case Param::BOOLEAN:
|
||||
case Param::MAT:
|
||||
case Param::MAT_VECTOR:
|
||||
case Param::ALGORITHM:
|
||||
case Param::FLOAT:
|
||||
case Param::UNSIGNED_INT:
|
||||
case Param::UINT64:
|
||||
case Param::UCHAR:
|
||||
case Param::SCALAR:
|
||||
break; // unhandled
|
||||
}
|
||||
CV_Error_(Error::StsInternal, ("Unhandled type (%d)", static_cast<int>(type)));
|
||||
}
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &stream, const DictValue &dictv)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (dictv.isInt())
|
||||
{
|
||||
for (i = 0; i < dictv.size() - 1; i++)
|
||||
stream << dictv.get<int64>(i) << ", ";
|
||||
stream << dictv.get<int64>(i);
|
||||
}
|
||||
else if (dictv.isReal())
|
||||
{
|
||||
for (i = 0; i < dictv.size() - 1; i++)
|
||||
stream << dictv.get<double>(i) << ", ";
|
||||
stream << dictv.get<double>(i);
|
||||
}
|
||||
else if (dictv.isString())
|
||||
{
|
||||
for (i = 0; i < dictv.size() - 1; i++)
|
||||
stream << "\"" << dictv.get<String>(i) << "\", ";
|
||||
stream << dictv.get<String>(i);
|
||||
}
|
||||
|
||||
return stream;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
inline bool Dict::has(const String &key) const
|
||||
{
|
||||
return dict.count(key) != 0;
|
||||
}
|
||||
|
||||
inline DictValue *Dict::ptr(const String &key)
|
||||
{
|
||||
_Dict::iterator i = dict.find(key);
|
||||
return (i == dict.end()) ? NULL : &i->second;
|
||||
}
|
||||
|
||||
inline const DictValue *Dict::ptr(const String &key) const
|
||||
{
|
||||
_Dict::const_iterator i = dict.find(key);
|
||||
return (i == dict.end()) ? NULL : &i->second;
|
||||
}
|
||||
|
||||
inline const DictValue &Dict::get(const String &key) const
|
||||
{
|
||||
_Dict::const_iterator i = dict.find(key);
|
||||
if (i == dict.end())
|
||||
CV_Error(Error::StsObjectNotFound, "Required argument \"" + key + "\" not found into dictionary");
|
||||
return i->second;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T Dict::get(const String &key) const
|
||||
{
|
||||
return this->get(key).get<T>();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T Dict::get(const String &key, const T &defaultValue) const
|
||||
{
|
||||
_Dict::const_iterator i = dict.find(key);
|
||||
|
||||
if (i != dict.end())
|
||||
return i->second.get<T>();
|
||||
else
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline const T &Dict::set(const String &key, const T &value)
|
||||
{
|
||||
_Dict::iterator i = dict.find(key);
|
||||
|
||||
if (i != dict.end())
|
||||
i->second = DictValue(value);
|
||||
else
|
||||
dict.insert(std::make_pair(key, DictValue(value)));
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
inline void Dict::erase(const String &key)
|
||||
{
|
||||
dict.erase(key);
|
||||
}
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &stream, const Dict &dict)
|
||||
{
|
||||
Dict::_Dict::const_iterator it;
|
||||
for (it = dict.dict.begin(); it != dict.dict.end(); it++)
|
||||
stream << it->first << " : " << it->second << "\n";
|
||||
|
||||
return stream;
|
||||
}
|
||||
|
||||
inline std::map<String, DictValue>::const_iterator Dict::begin() const
|
||||
{
|
||||
return dict.begin();
|
||||
}
|
||||
|
||||
inline std::map<String, DictValue>::const_iterator Dict::end() const
|
||||
{
|
||||
return dict.end();
|
||||
}
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
78
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn/layer.details.hpp
vendored
Normal file
78
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn/layer.details.hpp
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
#ifndef OPENCV_DNN_LAYER_DETAILS_HPP
|
||||
#define OPENCV_DNN_LAYER_DETAILS_HPP
|
||||
|
||||
#include <opencv2/dnn/layer.hpp>
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
/** @brief Registers layer constructor in runtime.
|
||||
* @param type string, containing type name of the layer.
|
||||
* @param constructorFunc pointer to the function of type LayerRegister::Constructor, which creates the layer.
|
||||
* @details This macros must be placed inside the function code.
|
||||
*/
|
||||
#define CV_DNN_REGISTER_LAYER_FUNC(type, constructorFunc) \
|
||||
cv::dnn::LayerFactory::registerLayer(#type, constructorFunc);
|
||||
|
||||
/** @brief Registers layer class in runtime.
|
||||
* @param type string, containing type name of the layer.
|
||||
* @param class C++ class, derived from Layer.
|
||||
* @details This macros must be placed inside the function code.
|
||||
*/
|
||||
#define CV_DNN_REGISTER_LAYER_CLASS(type, class) \
|
||||
cv::dnn::LayerFactory::registerLayer(#type, cv::dnn::details::_layerDynamicRegisterer<class>);
|
||||
|
||||
/** @brief Registers layer constructor on module load time.
|
||||
* @param type string, containing type name of the layer.
|
||||
* @param constructorFunc pointer to the function of type LayerRegister::Constructor, which creates the layer.
|
||||
* @details This macros must be placed outside the function code.
|
||||
*/
|
||||
#define CV_DNN_REGISTER_LAYER_FUNC_STATIC(type, constructorFunc) \
|
||||
static cv::dnn::details::_LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, constructorFunc);
|
||||
|
||||
/** @brief Registers layer class on module load time.
|
||||
* @param type string, containing type name of the layer.
|
||||
* @param class C++ class, derived from Layer.
|
||||
* @details This macros must be placed outside the function code.
|
||||
*/
|
||||
#define CV_DNN_REGISTER_LAYER_CLASS_STATIC(type, class) \
|
||||
Ptr<Layer> __LayerStaticRegisterer_func_##type(LayerParams ¶ms) \
|
||||
{ return Ptr<Layer>(new class(params)); } \
|
||||
static cv::dnn::details::_LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, __LayerStaticRegisterer_func_##type);
|
||||
|
||||
namespace details {
|
||||
|
||||
template<typename LayerClass>
|
||||
Ptr<Layer> _layerDynamicRegisterer(LayerParams ¶ms)
|
||||
{
|
||||
return Ptr<Layer>(LayerClass::create(params));
|
||||
}
|
||||
|
||||
//allows automatically register created layer on module load time
|
||||
class _LayerStaticRegisterer
|
||||
{
|
||||
String type;
|
||||
public:
|
||||
|
||||
_LayerStaticRegisterer(const String &layerType, LayerFactory::Constructor layerConstructor)
|
||||
{
|
||||
this->type = layerType;
|
||||
LayerFactory::registerLayer(layerType, layerConstructor);
|
||||
}
|
||||
|
||||
~_LayerStaticRegisterer()
|
||||
{
|
||||
LayerFactory::unregisterLayer(type);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace
|
||||
|
||||
#endif
|
||||
85
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn/layer.hpp
vendored
Normal file
85
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn/layer.hpp
vendored
Normal file
@@ -0,0 +1,85 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_DNN_LAYER_HPP
|
||||
#define OPENCV_DNN_LAYER_HPP
|
||||
#include <opencv2/dnn.hpp>
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
//! @addtogroup dnn
|
||||
//! @{
|
||||
//!
|
||||
//! @defgroup dnnLayerFactory Utilities for New Layers Registration
|
||||
//! @{
|
||||
|
||||
/** @brief %Layer factory allows to create instances of registered layers. */
|
||||
class CV_EXPORTS LayerFactory
|
||||
{
|
||||
public:
|
||||
|
||||
//! Each Layer class must provide this function to the factory
|
||||
typedef Ptr<Layer>(*Constructor)(LayerParams ¶ms);
|
||||
|
||||
//! Registers the layer class with typename @p type and specified @p constructor. Thread-safe.
|
||||
static void registerLayer(const String &type, Constructor constructor);
|
||||
|
||||
//! Unregisters registered layer with specified type name. Thread-safe.
|
||||
static void unregisterLayer(const String &type);
|
||||
|
||||
/** @brief Creates instance of registered layer.
|
||||
* @param type type name of creating layer.
|
||||
* @param params parameters which will be used for layer initialization.
|
||||
* @note Thread-safe.
|
||||
*/
|
||||
static Ptr<Layer> createLayerInstance(const String &type, LayerParams& params);
|
||||
|
||||
private:
|
||||
LayerFactory();
|
||||
};
|
||||
|
||||
//! @}
|
||||
//! @}
|
||||
CV__DNN_INLINE_NS_END
|
||||
}
|
||||
}
|
||||
#endif
|
||||
29
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn/layer_reg.private.hpp
vendored
Normal file
29
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn/layer_reg.private.hpp
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_DNN_LAYER_REG_HPP
|
||||
#define OPENCV_DNN_LAYER_REG_HPP
|
||||
#include <opencv2/dnn.hpp>
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
//! @addtogroup dnn
|
||||
//! @{
|
||||
|
||||
typedef std::map<std::string, std::vector<LayerFactory::Constructor> > LayerFactory_Impl;
|
||||
|
||||
//! Register layer types of DNN model.
|
||||
//!
|
||||
//! @note In order to thread-safely access the factory, see getLayerFactoryMutex() function.
|
||||
LayerFactory_Impl& getLayerFactoryImpl();
|
||||
|
||||
//! Get the mutex guarding @ref LayerFactory_Impl, see getLayerFactoryImpl() function.
|
||||
Mutex& getLayerFactoryMutex();
|
||||
|
||||
//! @}
|
||||
CV__DNN_INLINE_NS_END
|
||||
}
|
||||
}
|
||||
#endif
|
||||
259
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn/shape_utils.hpp
vendored
Normal file
259
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn/shape_utils.hpp
vendored
Normal file
@@ -0,0 +1,259 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_DNN_DNN_SHAPE_UTILS_HPP
|
||||
#define OPENCV_DNN_DNN_SHAPE_UTILS_HPP
|
||||
|
||||
#include <opencv2/dnn/dnn.hpp>
|
||||
#include <opencv2/core/types_c.h> // CV_MAX_DIM
|
||||
#include <iostream>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
//Slicing
|
||||
|
||||
struct _Range : public cv::Range
|
||||
{
|
||||
_Range(const Range &r) : cv::Range(r) {}
|
||||
_Range(int start_, int size_ = 1) : cv::Range(start_, start_ + size_) {}
|
||||
};
|
||||
|
||||
static inline Mat slice(const Mat &m, const _Range &r0)
|
||||
{
|
||||
Range ranges[CV_MAX_DIM];
|
||||
for (int i = 1; i < m.dims; i++)
|
||||
ranges[i] = Range::all();
|
||||
ranges[0] = r0;
|
||||
return m(&ranges[0]);
|
||||
}
|
||||
|
||||
static inline Mat slice(const Mat &m, const _Range &r0, const _Range &r1)
|
||||
{
|
||||
CV_Assert(m.dims >= 2);
|
||||
Range ranges[CV_MAX_DIM];
|
||||
for (int i = 2; i < m.dims; i++)
|
||||
ranges[i] = Range::all();
|
||||
ranges[0] = r0;
|
||||
ranges[1] = r1;
|
||||
return m(&ranges[0]);
|
||||
}
|
||||
|
||||
static inline Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const _Range &r2)
|
||||
{
|
||||
CV_Assert(m.dims >= 3);
|
||||
Range ranges[CV_MAX_DIM];
|
||||
for (int i = 3; i < m.dims; i++)
|
||||
ranges[i] = Range::all();
|
||||
ranges[0] = r0;
|
||||
ranges[1] = r1;
|
||||
ranges[2] = r2;
|
||||
return m(&ranges[0]);
|
||||
}
|
||||
|
||||
static inline Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const _Range &r2, const _Range &r3)
|
||||
{
|
||||
CV_Assert(m.dims >= 4);
|
||||
Range ranges[CV_MAX_DIM];
|
||||
for (int i = 4; i < m.dims; i++)
|
||||
ranges[i] = Range::all();
|
||||
ranges[0] = r0;
|
||||
ranges[1] = r1;
|
||||
ranges[2] = r2;
|
||||
ranges[3] = r3;
|
||||
return m(&ranges[0]);
|
||||
}
|
||||
|
||||
static inline Mat getPlane(const Mat &m, int n, int cn)
|
||||
{
|
||||
CV_Assert(m.dims > 2);
|
||||
int sz[CV_MAX_DIM];
|
||||
for(int i = 2; i < m.dims; i++)
|
||||
{
|
||||
sz[i-2] = m.size.p[i];
|
||||
}
|
||||
return Mat(m.dims - 2, sz, m.type(), (void*)m.ptr<float>(n, cn));
|
||||
}
|
||||
|
||||
static inline MatShape shape(const int* dims, const int n)
|
||||
{
|
||||
MatShape shape;
|
||||
shape.assign(dims, dims + n);
|
||||
return shape;
|
||||
}
|
||||
|
||||
static inline MatShape shape(const Mat& mat)
|
||||
{
|
||||
return shape(mat.size.p, mat.dims);
|
||||
}
|
||||
|
||||
static inline MatShape shape(const MatSize& sz)
|
||||
{
|
||||
return shape(sz.p, sz.dims());
|
||||
}
|
||||
|
||||
static inline MatShape shape(const UMat& mat)
|
||||
{
|
||||
return shape(mat.size.p, mat.dims);
|
||||
}
|
||||
|
||||
#if 0 // issues with MatExpr wrapped into InputArray
|
||||
static inline
|
||||
MatShape shape(InputArray input)
|
||||
{
|
||||
int sz[CV_MAX_DIM];
|
||||
int ndims = input.sizend(sz);
|
||||
return shape(sz, ndims);
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace {inline bool is_neg(int i) { return i < 0; }}
|
||||
|
||||
static inline MatShape shape(int a0, int a1=-1, int a2=-1, int a3=-1)
|
||||
{
|
||||
int dims[] = {a0, a1, a2, a3};
|
||||
MatShape s = shape(dims, 4);
|
||||
s.erase(std::remove_if(s.begin(), s.end(), is_neg), s.end());
|
||||
return s;
|
||||
}
|
||||
|
||||
static inline int total(const MatShape& shape, int start = -1, int end = -1)
|
||||
{
|
||||
if (start == -1) start = 0;
|
||||
if (end == -1) end = (int)shape.size();
|
||||
|
||||
if (shape.empty())
|
||||
return 0;
|
||||
|
||||
int elems = 1;
|
||||
CV_Assert(start <= (int)shape.size() && end <= (int)shape.size() &&
|
||||
start <= end);
|
||||
for(int i = start; i < end; i++)
|
||||
{
|
||||
elems *= shape[i];
|
||||
}
|
||||
return elems;
|
||||
}
|
||||
|
||||
static inline MatShape concat(const MatShape& a, const MatShape& b)
|
||||
{
|
||||
MatShape c = a;
|
||||
c.insert(c.end(), b.begin(), b.end());
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
static inline std::string toString(const MatShape& shape, const String& name = "")
|
||||
{
|
||||
std::ostringstream ss;
|
||||
if (!name.empty())
|
||||
ss << name << ' ';
|
||||
ss << '[';
|
||||
for(size_t i = 0, n = shape.size(); i < n; ++i)
|
||||
ss << ' ' << shape[i];
|
||||
ss << " ]";
|
||||
return ss.str();
|
||||
}
|
||||
static inline void print(const MatShape& shape, const String& name = "")
|
||||
{
|
||||
std::cout << toString(shape, name) << std::endl;
|
||||
}
|
||||
static inline std::ostream& operator<<(std::ostream &out, const MatShape& shape)
|
||||
{
|
||||
out << toString(shape);
|
||||
return out;
|
||||
}
|
||||
|
||||
/// @brief Converts axis from `[-dims; dims)` (similar to Python's slice notation) to `[0; dims)` range.
|
||||
static inline
|
||||
int normalize_axis(int axis, int dims)
|
||||
{
|
||||
CV_Check(axis, axis >= -dims && axis < dims, "");
|
||||
axis = (axis < 0) ? (dims + axis) : axis;
|
||||
CV_DbgCheck(axis, axis >= 0 && axis < dims, "");
|
||||
return axis;
|
||||
}
|
||||
|
||||
static inline
|
||||
int normalize_axis(int axis, const MatShape& shape)
|
||||
{
|
||||
return normalize_axis(axis, (int)shape.size());
|
||||
}
|
||||
|
||||
static inline
|
||||
Range normalize_axis_range(const Range& r, int axisSize)
|
||||
{
|
||||
if (r == Range::all())
|
||||
return Range(0, axisSize);
|
||||
CV_CheckGE(r.start, 0, "");
|
||||
Range clamped(r.start,
|
||||
r.end > 0 ? std::min(r.end, axisSize) : axisSize + r.end + 1);
|
||||
CV_DbgCheckGE(clamped.start, 0, "");
|
||||
CV_CheckLT(clamped.start, clamped.end, "");
|
||||
CV_CheckLE(clamped.end, axisSize, "");
|
||||
return clamped;
|
||||
}
|
||||
|
||||
static inline
|
||||
bool isAllOnes(const MatShape &inputShape, int startPos, int endPos)
|
||||
{
|
||||
CV_Assert(!inputShape.empty());
|
||||
|
||||
CV_CheckGE((int) inputShape.size(), startPos, "");
|
||||
CV_CheckGE(startPos, 0, "");
|
||||
CV_CheckLE(startPos, endPos, "");
|
||||
CV_CheckLE((size_t)endPos, inputShape.size(), "");
|
||||
|
||||
for (size_t i = startPos; i < endPos; i++)
|
||||
{
|
||||
if (inputShape[i] != 1)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}
|
||||
}
|
||||
#endif
|
||||
24
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn/utils/debug_utils.hpp
vendored
Normal file
24
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn/utils/debug_utils.hpp
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_DNN_UTILS_DEBUG_UTILS_HPP
|
||||
#define OPENCV_DNN_UTILS_DEBUG_UTILS_HPP
|
||||
|
||||
#include "../dnn.hpp"
|
||||
|
||||
namespace cv { namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
/**
|
||||
* @brief Skip model import after diagnostic run in readNet() functions.
|
||||
* @param[in] skip Indicates whether to skip the import.
|
||||
*
|
||||
* This is an internal OpenCV function not intended for users.
|
||||
*/
|
||||
CV_EXPORTS void skipModelImport(bool skip);
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace
|
||||
|
||||
#endif // OPENCV_DNN_UTILS_DEBUG_UTILS_HPP
|
||||
76
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn/utils/inference_engine.hpp
vendored
Normal file
76
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn/utils/inference_engine.hpp
vendored
Normal file
@@ -0,0 +1,76 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2018-2019, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
#ifndef OPENCV_DNN_UTILS_INF_ENGINE_HPP
|
||||
#define OPENCV_DNN_UTILS_INF_ENGINE_HPP
|
||||
|
||||
#include "../dnn.hpp"
|
||||
|
||||
namespace cv { namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
|
||||
/* Values for 'OPENCV_DNN_BACKEND_INFERENCE_ENGINE_TYPE' parameter */
|
||||
#define CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API "NN_BUILDER"
|
||||
#define CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH "NGRAPH"
|
||||
|
||||
/** @brief Returns Inference Engine internal backend API.
|
||||
*
|
||||
* See values of `CV_DNN_BACKEND_INFERENCE_ENGINE_*` macros.
|
||||
*
|
||||
* Default value is controlled through `OPENCV_DNN_BACKEND_INFERENCE_ENGINE_TYPE` runtime parameter (environment variable).
|
||||
*/
|
||||
CV_EXPORTS_W cv::String getInferenceEngineBackendType();
|
||||
|
||||
/** @brief Specify Inference Engine internal backend API.
|
||||
*
|
||||
* See values of `CV_DNN_BACKEND_INFERENCE_ENGINE_*` macros.
|
||||
*
|
||||
* @returns previous value of internal backend API
|
||||
*/
|
||||
CV_EXPORTS_W cv::String setInferenceEngineBackendType(const cv::String& newBackendType);
|
||||
|
||||
|
||||
/** @brief Release a Myriad device (binded by OpenCV).
|
||||
*
|
||||
* Single Myriad device cannot be shared across multiple processes which uses
|
||||
* Inference Engine's Myriad plugin.
|
||||
*/
|
||||
CV_EXPORTS_W void resetMyriadDevice();
|
||||
|
||||
|
||||
/* Values for 'OPENCV_DNN_IE_VPU_TYPE' parameter */
|
||||
#define CV_DNN_INFERENCE_ENGINE_VPU_TYPE_UNSPECIFIED ""
|
||||
/// Intel(R) Movidius(TM) Neural Compute Stick, NCS (USB 03e7:2150), Myriad2 (https://software.intel.com/en-us/movidius-ncs)
|
||||
#define CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_2 "Myriad2"
|
||||
/// Intel(R) Neural Compute Stick 2, NCS2 (USB 03e7:2485), MyriadX (https://software.intel.com/ru-ru/neural-compute-stick)
|
||||
#define CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X "MyriadX"
|
||||
#define CV_DNN_INFERENCE_ENGINE_CPU_TYPE_ARM_COMPUTE "ARM_COMPUTE"
|
||||
#define CV_DNN_INFERENCE_ENGINE_CPU_TYPE_X86 "X86"
|
||||
|
||||
|
||||
/** @brief Returns Inference Engine VPU type.
|
||||
*
|
||||
* See values of `CV_DNN_INFERENCE_ENGINE_VPU_TYPE_*` macros.
|
||||
*/
|
||||
CV_EXPORTS_W cv::String getInferenceEngineVPUType();
|
||||
|
||||
/** @brief Returns Inference Engine CPU type.
|
||||
*
|
||||
* Specify OpenVINO plugin: CPU or ARM.
|
||||
*/
|
||||
CV_EXPORTS_W cv::String getInferenceEngineCPUType();
|
||||
|
||||
/** @brief Release a HDDL plugin.
|
||||
*/
|
||||
CV_EXPORTS_W void releaseHDDLPlugin();
|
||||
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace
|
||||
|
||||
#endif // OPENCV_DNN_UTILS_INF_ENGINE_HPP
|
||||
21
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn/version.hpp
vendored
Normal file
21
3rdparty/opencv-4.5.4/modules/dnn/include/opencv2/dnn/version.hpp
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_DNN_VERSION_HPP
|
||||
#define OPENCV_DNN_VERSION_HPP
|
||||
|
||||
/// Use with major OpenCV version only.
|
||||
#define OPENCV_DNN_API_VERSION 20211004
|
||||
|
||||
#if !defined CV_DOXYGEN && !defined CV_STATIC_ANALYSIS && !defined CV_DNN_DONT_ADD_INLINE_NS
|
||||
#define CV__DNN_INLINE_NS __CV_CAT(dnn4_v, OPENCV_DNN_API_VERSION)
|
||||
#define CV__DNN_INLINE_NS_BEGIN namespace CV__DNN_INLINE_NS {
|
||||
#define CV__DNN_INLINE_NS_END }
|
||||
namespace cv { namespace dnn { namespace CV__DNN_INLINE_NS { } using namespace CV__DNN_INLINE_NS; }}
|
||||
#else
|
||||
#define CV__DNN_INLINE_NS_BEGIN
|
||||
#define CV__DNN_INLINE_NS_END
|
||||
#endif
|
||||
|
||||
#endif // OPENCV_DNN_VERSION_HPP
|
||||
41126
3rdparty/opencv-4.5.4/modules/dnn/misc/caffe/opencv-caffe.pb.cc
vendored
Normal file
41126
3rdparty/opencv-4.5.4/modules/dnn/misc/caffe/opencv-caffe.pb.cc
vendored
Normal file
File diff suppressed because it is too large
Load Diff
32721
3rdparty/opencv-4.5.4/modules/dnn/misc/caffe/opencv-caffe.pb.h
vendored
Normal file
32721
3rdparty/opencv-4.5.4/modules/dnn/misc/caffe/opencv-caffe.pb.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
196
3rdparty/opencv-4.5.4/modules/dnn/misc/face_detector_accuracy.py
vendored
Normal file
196
3rdparty/opencv-4.5.4/modules/dnn/misc/face_detector_accuracy.py
vendored
Normal file
@@ -0,0 +1,196 @@
|
||||
# This script is used to estimate an accuracy of different face detection models.
|
||||
# COCO evaluation tool is used to compute an accuracy metrics (Average Precision).
|
||||
# Script works with different face detection datasets.
|
||||
import os
|
||||
import json
|
||||
from fnmatch import fnmatch
|
||||
from math import pi
|
||||
import cv2 as cv
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
from pycocotools.coco import COCO
|
||||
from pycocotools.cocoeval import COCOeval
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Evaluate OpenCV face detection algorithms '
|
||||
'using COCO evaluation tool, http://cocodataset.org/#detections-eval')
|
||||
parser.add_argument('--proto', help='Path to .prototxt of Caffe model or .pbtxt of TensorFlow graph')
|
||||
parser.add_argument('--model', help='Path to .caffemodel trained in Caffe or .pb from TensorFlow')
|
||||
parser.add_argument('--cascade', help='Optional path to trained Haar cascade as '
|
||||
'an additional model for evaluation')
|
||||
parser.add_argument('--ann', help='Path to text file with ground truth annotations')
|
||||
parser.add_argument('--pics', help='Path to images root directory')
|
||||
parser.add_argument('--fddb', help='Evaluate FDDB dataset, http://vis-www.cs.umass.edu/fddb/', action='store_true')
|
||||
parser.add_argument('--wider', help='Evaluate WIDER FACE dataset, http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/', action='store_true')
|
||||
args = parser.parse_args()
|
||||
|
||||
dataset = {}
|
||||
dataset['images'] = []
|
||||
dataset['categories'] = [{ 'id': 0, 'name': 'face' }]
|
||||
dataset['annotations'] = []
|
||||
|
||||
def ellipse2Rect(params):
|
||||
rad_x = params[0]
|
||||
rad_y = params[1]
|
||||
angle = params[2] * 180.0 / pi
|
||||
center_x = params[3]
|
||||
center_y = params[4]
|
||||
pts = cv.ellipse2Poly((int(center_x), int(center_y)), (int(rad_x), int(rad_y)),
|
||||
int(angle), 0, 360, 10)
|
||||
rect = cv.boundingRect(pts)
|
||||
left = rect[0]
|
||||
top = rect[1]
|
||||
right = rect[0] + rect[2]
|
||||
bottom = rect[1] + rect[3]
|
||||
return left, top, right, bottom
|
||||
|
||||
def addImage(imagePath):
|
||||
assert('images' in dataset)
|
||||
imageId = len(dataset['images'])
|
||||
dataset['images'].append({
|
||||
'id': int(imageId),
|
||||
'file_name': imagePath
|
||||
})
|
||||
return imageId
|
||||
|
||||
def addBBox(imageId, left, top, width, height):
|
||||
assert('annotations' in dataset)
|
||||
dataset['annotations'].append({
|
||||
'id': len(dataset['annotations']),
|
||||
'image_id': int(imageId),
|
||||
'category_id': 0, # Face
|
||||
'bbox': [int(left), int(top), int(width), int(height)],
|
||||
'iscrowd': 0,
|
||||
'area': float(width * height)
|
||||
})
|
||||
|
||||
def addDetection(detections, imageId, left, top, width, height, score):
|
||||
detections.append({
|
||||
'image_id': int(imageId),
|
||||
'category_id': 0, # Face
|
||||
'bbox': [int(left), int(top), int(width), int(height)],
|
||||
'score': float(score)
|
||||
})
|
||||
|
||||
|
||||
def fddb_dataset(annotations, images):
|
||||
for d in os.listdir(annotations):
|
||||
if fnmatch(d, 'FDDB-fold-*-ellipseList.txt'):
|
||||
with open(os.path.join(annotations, d), 'rt') as f:
|
||||
lines = [line.rstrip('\n') for line in f]
|
||||
lineId = 0
|
||||
while lineId < len(lines):
|
||||
# Image
|
||||
imgPath = lines[lineId]
|
||||
lineId += 1
|
||||
imageId = addImage(os.path.join(images, imgPath) + '.jpg')
|
||||
|
||||
img = cv.imread(os.path.join(images, imgPath) + '.jpg')
|
||||
|
||||
# Faces
|
||||
numFaces = int(lines[lineId])
|
||||
lineId += 1
|
||||
for i in range(numFaces):
|
||||
params = [float(v) for v in lines[lineId].split()]
|
||||
lineId += 1
|
||||
left, top, right, bottom = ellipse2Rect(params)
|
||||
addBBox(imageId, left, top, width=right - left + 1,
|
||||
height=bottom - top + 1)
|
||||
|
||||
|
||||
def wider_dataset(annotations, images):
|
||||
with open(annotations, 'rt') as f:
|
||||
lines = [line.rstrip('\n') for line in f]
|
||||
lineId = 0
|
||||
while lineId < len(lines):
|
||||
# Image
|
||||
imgPath = lines[lineId]
|
||||
lineId += 1
|
||||
imageId = addImage(os.path.join(images, imgPath))
|
||||
|
||||
# Faces
|
||||
numFaces = int(lines[lineId])
|
||||
lineId += 1
|
||||
for i in range(numFaces):
|
||||
params = [int(v) for v in lines[lineId].split()]
|
||||
lineId += 1
|
||||
left, top, width, height = params[0], params[1], params[2], params[3]
|
||||
addBBox(imageId, left, top, width, height)
|
||||
|
||||
def evaluate():
|
||||
cocoGt = COCO('annotations.json')
|
||||
cocoDt = cocoGt.loadRes('detections.json')
|
||||
cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
|
||||
cocoEval.evaluate()
|
||||
cocoEval.accumulate()
|
||||
cocoEval.summarize()
|
||||
|
||||
|
||||
### Convert to COCO annotations format #########################################
|
||||
assert(args.fddb or args.wider)
|
||||
if args.fddb:
|
||||
fddb_dataset(args.ann, args.pics)
|
||||
elif args.wider:
|
||||
wider_dataset(args.ann, args.pics)
|
||||
|
||||
with open('annotations.json', 'wt') as f:
|
||||
json.dump(dataset, f)
|
||||
|
||||
### Obtain detections ##########################################################
|
||||
detections = []
|
||||
if args.proto and args.model:
|
||||
net = cv.dnn.readNet(args.proto, args.model)
|
||||
|
||||
def detect(img, imageId):
|
||||
imgWidth = img.shape[1]
|
||||
imgHeight = img.shape[0]
|
||||
net.setInput(cv.dnn.blobFromImage(img, 1.0, (300, 300), (104., 177., 123.), False, False))
|
||||
out = net.forward()
|
||||
|
||||
for i in range(out.shape[2]):
|
||||
confidence = out[0, 0, i, 2]
|
||||
left = int(out[0, 0, i, 3] * img.shape[1])
|
||||
top = int(out[0, 0, i, 4] * img.shape[0])
|
||||
right = int(out[0, 0, i, 5] * img.shape[1])
|
||||
bottom = int(out[0, 0, i, 6] * img.shape[0])
|
||||
|
||||
x = max(0, min(left, img.shape[1] - 1))
|
||||
y = max(0, min(top, img.shape[0] - 1))
|
||||
w = max(0, min(right - x + 1, img.shape[1] - x))
|
||||
h = max(0, min(bottom - y + 1, img.shape[0] - y))
|
||||
|
||||
addDetection(detections, imageId, x, y, w, h, score=confidence)
|
||||
|
||||
elif args.cascade:
|
||||
cascade = cv.CascadeClassifier(args.cascade)
|
||||
|
||||
def detect(img, imageId):
|
||||
srcImgGray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
|
||||
faces = cascade.detectMultiScale(srcImgGray)
|
||||
|
||||
for rect in faces:
|
||||
left, top, width, height = rect[0], rect[1], rect[2], rect[3]
|
||||
addDetection(detections, imageId, left, top, width, height, score=1.0)
|
||||
|
||||
for i in range(len(dataset['images'])):
|
||||
sys.stdout.write('\r%d / %d' % (i + 1, len(dataset['images'])))
|
||||
sys.stdout.flush()
|
||||
|
||||
img = cv.imread(dataset['images'][i]['file_name'])
|
||||
imageId = int(dataset['images'][i]['id'])
|
||||
|
||||
detect(img, imageId)
|
||||
|
||||
with open('detections.json', 'wt') as f:
|
||||
json.dump(detections, f)
|
||||
|
||||
evaluate()
|
||||
|
||||
|
||||
def rm(f):
|
||||
if os.path.exists(f):
|
||||
os.remove(f)
|
||||
|
||||
rm('annotations.json')
|
||||
rm('detections.json')
|
||||
1
3rdparty/opencv-4.5.4/modules/dnn/misc/java/filelist_common
vendored
Normal file
1
3rdparty/opencv-4.5.4/modules/dnn/misc/java/filelist_common
vendored
Normal file
@@ -0,0 +1 @@
|
||||
misc/java/src/cpp/dnn_converters.hpp
|
||||
63
3rdparty/opencv-4.5.4/modules/dnn/misc/java/gen_dict.json
vendored
Normal file
63
3rdparty/opencv-4.5.4/modules/dnn/misc/java/gen_dict.json
vendored
Normal file
@@ -0,0 +1,63 @@
|
||||
{
|
||||
"type_dict": {
|
||||
"MatShape": {
|
||||
"j_type": "MatOfInt",
|
||||
"jn_type": "long",
|
||||
"jni_type": "jlong",
|
||||
"jni_var": "MatShape %(n)s",
|
||||
"suffix": "J",
|
||||
"v_type": "Mat",
|
||||
"j_import": "org.opencv.core.MatOfInt"
|
||||
},
|
||||
"vector_MatShape": {
|
||||
"j_type": "List<MatOfInt>",
|
||||
"jn_type": "List<MatOfInt>",
|
||||
"jni_type": "jobject",
|
||||
"jni_var": "std::vector< MatShape > %(n)s",
|
||||
"suffix": "Ljava_util_List",
|
||||
"v_type": "vector_MatShape",
|
||||
"j_import": "org.opencv.core.MatOfInt"
|
||||
},
|
||||
"vector_size_t": {
|
||||
"j_type": "MatOfDouble",
|
||||
"jn_type": "long",
|
||||
"jni_type": "jlong",
|
||||
"jni_var": "std::vector<size_t> %(n)s",
|
||||
"suffix": "J",
|
||||
"v_type": "Mat",
|
||||
"j_import": "org.opencv.core.MatOfDouble"
|
||||
},
|
||||
"vector_Ptr_Layer": {
|
||||
"j_type": "List<Layer>",
|
||||
"jn_type": "List<Layer>",
|
||||
"jni_type": "jobject",
|
||||
"jni_var": "std::vector< Ptr<cv::dnn::Layer> > %(n)s",
|
||||
"suffix": "Ljava_util_List",
|
||||
"v_type": "vector_Layer",
|
||||
"j_import": "org.opencv.dnn.Layer"
|
||||
},
|
||||
"vector_Target": {
|
||||
"j_type": "List<Integer>",
|
||||
"jn_type": "List<Integer>",
|
||||
"jni_type": "jobject",
|
||||
"jni_var": "std::vector< cv::dnn::Target > %(n)s",
|
||||
"suffix": "Ljava_util_List",
|
||||
"v_type": "vector_Target"
|
||||
},
|
||||
"LayerId": {
|
||||
"j_type": "DictValue",
|
||||
"jn_type": "long",
|
||||
"jn_args": [
|
||||
[
|
||||
"__int64",
|
||||
".getNativeObjAddr()"
|
||||
]
|
||||
|
||||
],
|
||||
"jni_name": "(*(*(Ptr<cv::dnn::DictValue>*)%(n)s_nativeObj))",
|
||||
"jni_type": "jlong",
|
||||
"suffix": "J",
|
||||
"j_import": "org.opencv.dnn.DictValue"
|
||||
}
|
||||
}
|
||||
}
|
||||
102
3rdparty/opencv-4.5.4/modules/dnn/misc/java/src/cpp/dnn_converters.cpp
vendored
Normal file
102
3rdparty/opencv-4.5.4/modules/dnn/misc/java/src/cpp/dnn_converters.cpp
vendored
Normal file
@@ -0,0 +1,102 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html
|
||||
|
||||
// Author: abratchik
|
||||
|
||||
#include "dnn_converters.hpp"
|
||||
|
||||
#define LOG_TAG "org.opencv.dnn"
|
||||
|
||||
void Mat_to_MatShape(cv::Mat& mat, MatShape& matshape)
|
||||
{
|
||||
matshape.clear();
|
||||
CHECK_MAT(mat.type()==CV_32SC1 && mat.cols==1);
|
||||
matshape = (MatShape) mat;
|
||||
}
|
||||
|
||||
void MatShape_to_Mat(MatShape& matshape, cv::Mat& mat)
|
||||
{
|
||||
mat = cv::Mat(matshape, true);
|
||||
}
|
||||
|
||||
std::vector<MatShape> List_to_vector_MatShape(JNIEnv* env, jobject list)
|
||||
{
|
||||
static jclass juArrayList = ARRAYLIST(env);
|
||||
jmethodID m_size = LIST_SIZE(env, juArrayList);
|
||||
jmethodID m_get = LIST_GET(env, juArrayList);
|
||||
|
||||
static jclass jMatOfInt = MATOFINT(env);
|
||||
|
||||
jint len = env->CallIntMethod(list, m_size);
|
||||
std::vector<MatShape> result;
|
||||
result.reserve(len);
|
||||
for (jint i=0; i<len; i++)
|
||||
{
|
||||
jobject element = static_cast<jobject>(env->CallObjectMethod(list, m_get, i));
|
||||
cv::Mat& mat = *((cv::Mat*) GETNATIVEOBJ(env, jMatOfInt, element) );
|
||||
MatShape matshape = (MatShape) mat;
|
||||
result.push_back(matshape);
|
||||
env->DeleteLocalRef(element);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
jobject vector_Ptr_Layer_to_List(JNIEnv* env, std::vector<cv::Ptr<cv::dnn::Layer> >& vs)
|
||||
{
|
||||
static jclass juArrayList = ARRAYLIST(env);
|
||||
static jmethodID m_create = CONSTRUCTOR(env, juArrayList);
|
||||
jmethodID m_add = LIST_ADD(env, juArrayList);
|
||||
|
||||
static jclass jLayerClass = LAYER(env);
|
||||
static jmethodID m_create_layer = LAYER_CONSTRUCTOR(env, jLayerClass);
|
||||
|
||||
jobject result = env->NewObject(juArrayList, m_create, vs.size());
|
||||
for (std::vector< cv::Ptr<cv::dnn::Layer> >::iterator it = vs.begin(); it != vs.end(); ++it) {
|
||||
jobject element = env->NewObject(jLayerClass, m_create_layer, (*it).get());
|
||||
env->CallBooleanMethod(result, m_add, element);
|
||||
env->DeleteLocalRef(element);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
jobject vector_Target_to_List(JNIEnv* env, std::vector<cv::dnn::Target>& vs)
|
||||
{
|
||||
static jclass juArrayList = ARRAYLIST(env);
|
||||
static jmethodID m_create = CONSTRUCTOR(env, juArrayList);
|
||||
jmethodID m_add = LIST_ADD(env, juArrayList);
|
||||
|
||||
static jclass jInteger = env->FindClass("java/lang/Integer");
|
||||
static jmethodID m_create_Integer = env->GetMethodID(jInteger, "<init>", "(I)V");
|
||||
|
||||
jobject result = env->NewObject(juArrayList, m_create, vs.size());
|
||||
for (size_t i = 0; i < vs.size(); ++i)
|
||||
{
|
||||
jobject element = env->NewObject(jInteger, m_create_Integer, vs[i]);
|
||||
env->CallBooleanMethod(result, m_add, element);
|
||||
env->DeleteLocalRef(element);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<cv::Ptr<cv::dnn::Layer> > List_to_vector_Ptr_Layer(JNIEnv* env, jobject list)
|
||||
{
|
||||
static jclass juArrayList = ARRAYLIST(env);
|
||||
jmethodID m_size = LIST_SIZE(env, juArrayList);
|
||||
jmethodID m_get = LIST_GET(env, juArrayList);
|
||||
|
||||
static jclass jLayerClass = LAYER(env);
|
||||
|
||||
jint len = env->CallIntMethod(list, m_size);
|
||||
std::vector< cv::Ptr<cv::dnn::Layer> > result;
|
||||
result.reserve(len);
|
||||
for (jint i=0; i<len; i++)
|
||||
{
|
||||
jobject element = static_cast<jobject>(env->CallObjectMethod(list, m_get, i));
|
||||
cv::Ptr<cv::dnn::Layer>* layer_ptr = (cv::Ptr<cv::dnn::Layer>*) GETNATIVEOBJ(env, jLayerClass, element) ;
|
||||
cv::Ptr<cv::dnn::Layer> layer = *(layer_ptr);
|
||||
result.push_back(layer);
|
||||
env->DeleteLocalRef(element);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
33
3rdparty/opencv-4.5.4/modules/dnn/misc/java/src/cpp/dnn_converters.hpp
vendored
Normal file
33
3rdparty/opencv-4.5.4/modules/dnn/misc/java/src/cpp/dnn_converters.hpp
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html
|
||||
|
||||
// Author: abratchik
|
||||
|
||||
#ifndef DNN_CONVERTERS_HPP
|
||||
#define DNN_CONVERTERS_HPP
|
||||
|
||||
#include <jni.h>
|
||||
#include "opencv_java.hpp"
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/dnn/dnn.hpp"
|
||||
|
||||
#define LAYER(ENV) static_cast<jclass>(ENV->NewGlobalRef(ENV->FindClass("org/opencv/dnn/Layer")))
|
||||
#define LAYER_CONSTRUCTOR(ENV, CLS) ENV->GetMethodID(CLS, "<init>", "(J)V")
|
||||
|
||||
|
||||
using namespace cv::dnn;
|
||||
|
||||
void Mat_to_MatShape(cv::Mat& mat, MatShape& matshape);
|
||||
|
||||
void MatShape_to_Mat(MatShape& matshape, cv::Mat& mat);
|
||||
|
||||
std::vector<MatShape> List_to_vector_MatShape(JNIEnv* env, jobject list);
|
||||
|
||||
jobject vector_Ptr_Layer_to_List(JNIEnv* env, std::vector<cv::Ptr<cv::dnn::Layer> >& vs);
|
||||
|
||||
std::vector<cv::Ptr<cv::dnn::Layer> > List_to_vector_Ptr_Layer(JNIEnv* env, jobject list);
|
||||
|
||||
jobject vector_Target_to_List(JNIEnv* env, std::vector<cv::dnn::Target>& vs);
|
||||
|
||||
#endif /* DNN_CONVERTERS_HPP */
|
||||
119
3rdparty/opencv-4.5.4/modules/dnn/misc/java/test/DnnListRegressionTest.java
vendored
Normal file
119
3rdparty/opencv-4.5.4/modules/dnn/misc/java/test/DnnListRegressionTest.java
vendored
Normal file
@@ -0,0 +1,119 @@
|
||||
package org.opencv.test.dnn;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.opencv.core.Core;
|
||||
import org.opencv.core.Mat;
|
||||
import org.opencv.core.MatOfInt;
|
||||
import org.opencv.core.MatOfFloat;
|
||||
import org.opencv.core.MatOfByte;
|
||||
import org.opencv.core.Scalar;
|
||||
import org.opencv.core.Size;
|
||||
import org.opencv.dnn.DictValue;
|
||||
import org.opencv.dnn.Dnn;
|
||||
import org.opencv.dnn.Layer;
|
||||
import org.opencv.dnn.Net;
|
||||
import org.opencv.imgcodecs.Imgcodecs;
|
||||
import org.opencv.imgproc.Imgproc;
|
||||
import org.opencv.test.OpenCVTestCase;
|
||||
|
||||
/*
|
||||
* regression test for #12324,
|
||||
* testing various java.util.List invocations,
|
||||
* which use the LIST_GET macro
|
||||
*/
|
||||
|
||||
public class DnnListRegressionTest extends OpenCVTestCase {
|
||||
|
||||
private final static String ENV_OPENCV_DNN_TEST_DATA_PATH = "OPENCV_DNN_TEST_DATA_PATH";
|
||||
|
||||
private final static String ENV_OPENCV_TEST_DATA_PATH = "OPENCV_TEST_DATA_PATH";
|
||||
|
||||
String modelFileName = "";
|
||||
String sourceImageFile = "";
|
||||
|
||||
Net net;
|
||||
|
||||
@Override
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
|
||||
String envDnnTestDataPath = System.getenv(ENV_OPENCV_DNN_TEST_DATA_PATH);
|
||||
|
||||
if(envDnnTestDataPath == null){
|
||||
isTestCaseEnabled = false;
|
||||
return;
|
||||
}
|
||||
|
||||
File dnnTestDataPath = new File(envDnnTestDataPath);
|
||||
modelFileName = new File(dnnTestDataPath, "dnn/tensorflow_inception_graph.pb").toString();
|
||||
|
||||
String envTestDataPath = System.getenv(ENV_OPENCV_TEST_DATA_PATH);
|
||||
|
||||
if(envTestDataPath == null) throw new Exception(ENV_OPENCV_TEST_DATA_PATH + " has to be defined!");
|
||||
|
||||
File testDataPath = new File(envTestDataPath);
|
||||
|
||||
File f = new File(testDataPath, "dnn/grace_hopper_227.png");
|
||||
sourceImageFile = f.toString();
|
||||
if(!f.exists()) throw new Exception("Test image is missing: " + sourceImageFile);
|
||||
|
||||
net = Dnn.readNetFromTensorflow(modelFileName);
|
||||
|
||||
Mat image = Imgcodecs.imread(sourceImageFile);
|
||||
assertNotNull("Loading image from file failed!", image);
|
||||
|
||||
Mat inputBlob = Dnn.blobFromImage(image, 1.0, new Size(224, 224), new Scalar(0), true, true);
|
||||
assertNotNull("Converting image to blob failed!", inputBlob);
|
||||
|
||||
net.setInput(inputBlob, "input");
|
||||
}
|
||||
|
||||
public void testSetInputsNames() {
|
||||
List<String> inputs = new ArrayList();
|
||||
inputs.add("input");
|
||||
try {
|
||||
net.setInputsNames(inputs);
|
||||
} catch(Exception e) {
|
||||
fail("Net setInputsNames failed: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public void testForward() {
|
||||
List<Mat> outs = new ArrayList();
|
||||
List<String> outNames = new ArrayList();
|
||||
outNames.add("softmax2");
|
||||
try {
|
||||
net.forward(outs,outNames);
|
||||
} catch(Exception e) {
|
||||
fail("Net forward failed: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public void testGetMemoryConsumption() {
|
||||
int layerId = 1;
|
||||
List<MatOfInt> netInputShapes = new ArrayList();
|
||||
netInputShapes.add(new MatOfInt(1, 3, 224, 224));
|
||||
long[] weights=null;
|
||||
long[] blobs=null;
|
||||
try {
|
||||
net.getMemoryConsumption(layerId, netInputShapes, weights, blobs);
|
||||
} catch(Exception e) {
|
||||
fail("Net getMemoryConsumption failed: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public void testGetFLOPS() {
|
||||
int layerId = 1;
|
||||
List<MatOfInt> netInputShapes = new ArrayList();
|
||||
netInputShapes.add(new MatOfInt(1, 3, 224, 224));
|
||||
try {
|
||||
net.getFLOPS(layerId, netInputShapes);
|
||||
} catch(Exception e) {
|
||||
fail("Net getFLOPS failed: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
149
3rdparty/opencv-4.5.4/modules/dnn/misc/java/test/DnnTensorFlowTest.java
vendored
Normal file
149
3rdparty/opencv-4.5.4/modules/dnn/misc/java/test/DnnTensorFlowTest.java
vendored
Normal file
@@ -0,0 +1,149 @@
|
||||
package org.opencv.test.dnn;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.opencv.core.Core;
|
||||
import org.opencv.core.Mat;
|
||||
import org.opencv.core.MatOfFloat;
|
||||
import org.opencv.core.MatOfByte;
|
||||
import org.opencv.core.Scalar;
|
||||
import org.opencv.core.Size;
|
||||
import org.opencv.dnn.DictValue;
|
||||
import org.opencv.dnn.Dnn;
|
||||
import org.opencv.dnn.Layer;
|
||||
import org.opencv.dnn.Net;
|
||||
import org.opencv.imgcodecs.Imgcodecs;
|
||||
import org.opencv.imgproc.Imgproc;
|
||||
import org.opencv.test.OpenCVTestCase;
|
||||
|
||||
public class DnnTensorFlowTest extends OpenCVTestCase {
|
||||
|
||||
private final static String ENV_OPENCV_DNN_TEST_DATA_PATH = "OPENCV_DNN_TEST_DATA_PATH";
|
||||
|
||||
private final static String ENV_OPENCV_TEST_DATA_PATH = "OPENCV_TEST_DATA_PATH";
|
||||
|
||||
String modelFileName = "";
|
||||
String sourceImageFile = "";
|
||||
|
||||
Net net;
|
||||
|
||||
private static void normAssert(Mat ref, Mat test) {
|
||||
final double l1 = 1e-5;
|
||||
final double lInf = 1e-4;
|
||||
double normL1 = Core.norm(ref, test, Core.NORM_L1) / ref.total();
|
||||
double normLInf = Core.norm(ref, test, Core.NORM_INF) / ref.total();
|
||||
assertTrue(normL1 < l1);
|
||||
assertTrue(normLInf < lInf);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
|
||||
String envDnnTestDataPath = System.getenv(ENV_OPENCV_DNN_TEST_DATA_PATH);
|
||||
|
||||
if(envDnnTestDataPath == null){
|
||||
isTestCaseEnabled = false;
|
||||
return;
|
||||
}
|
||||
|
||||
File dnnTestDataPath = new File(envDnnTestDataPath);
|
||||
modelFileName = new File(dnnTestDataPath, "dnn/tensorflow_inception_graph.pb").toString();
|
||||
|
||||
String envTestDataPath = System.getenv(ENV_OPENCV_TEST_DATA_PATH);
|
||||
|
||||
if(envTestDataPath == null) throw new Exception(ENV_OPENCV_TEST_DATA_PATH + " has to be defined!");
|
||||
|
||||
File testDataPath = new File(envTestDataPath);
|
||||
|
||||
File f = new File(testDataPath, "dnn/grace_hopper_227.png");
|
||||
sourceImageFile = f.toString();
|
||||
if(!f.exists()) throw new Exception("Test image is missing: " + sourceImageFile);
|
||||
|
||||
net = Dnn.readNetFromTensorflow(modelFileName);
|
||||
}
|
||||
|
||||
public void testGetLayerTypes() {
|
||||
List<String> layertypes = new ArrayList();
|
||||
net.getLayerTypes(layertypes);
|
||||
|
||||
assertFalse("No layer types returned!", layertypes.isEmpty());
|
||||
}
|
||||
|
||||
public void testGetLayer() {
|
||||
List<String> layernames = net.getLayerNames();
|
||||
|
||||
assertFalse("Test net returned no layers!", layernames.isEmpty());
|
||||
|
||||
String testLayerName = layernames.get(0);
|
||||
|
||||
DictValue layerId = new DictValue(testLayerName);
|
||||
|
||||
assertEquals("DictValue did not return the string, which was used in constructor!", testLayerName, layerId.getStringValue());
|
||||
|
||||
Layer layer = net.getLayer(layerId);
|
||||
|
||||
assertEquals("Layer name does not match the expected value!", testLayerName, layer.get_name());
|
||||
|
||||
}
|
||||
|
||||
public void checkInceptionNet(Net net)
|
||||
{
|
||||
Mat image = Imgcodecs.imread(sourceImageFile);
|
||||
assertNotNull("Loading image from file failed!", image);
|
||||
|
||||
Mat inputBlob = Dnn.blobFromImage(image, 1.0, new Size(224, 224), new Scalar(0), true, true);
|
||||
assertNotNull("Converting image to blob failed!", inputBlob);
|
||||
|
||||
net.setInput(inputBlob, "input");
|
||||
|
||||
Mat result = new Mat();
|
||||
try {
|
||||
net.setPreferableBackend(Dnn.DNN_BACKEND_OPENCV);
|
||||
result = net.forward("softmax2");
|
||||
}
|
||||
catch (Exception e) {
|
||||
fail("DNN forward failed: " + e.getMessage());
|
||||
}
|
||||
assertNotNull("Net returned no result!", result);
|
||||
|
||||
result = result.reshape(1, 1);
|
||||
Core.MinMaxLocResult minmax = Core.minMaxLoc(result);
|
||||
assertEquals("Wrong prediction", (int)minmax.maxLoc.x, 866);
|
||||
|
||||
Mat top5RefScores = new MatOfFloat(new float[] {
|
||||
0.63032645f, 0.2561979f, 0.032181446f, 0.015721032f, 0.014785315f
|
||||
}).reshape(1, 1);
|
||||
|
||||
Core.sort(result, result, Core.SORT_DESCENDING);
|
||||
|
||||
normAssert(result.colRange(0, 5), top5RefScores);
|
||||
}
|
||||
|
||||
public void testTestNetForward() {
|
||||
checkInceptionNet(net);
|
||||
}
|
||||
|
||||
public void testReadFromBuffer() {
|
||||
File modelFile = new File(modelFileName);
|
||||
byte[] modelBuffer = new byte[ (int)modelFile.length() ];
|
||||
|
||||
try {
|
||||
FileInputStream fis = new FileInputStream(modelFile);
|
||||
fis.read(modelBuffer);
|
||||
fis.close();
|
||||
} catch (IOException e) {
|
||||
fail("Failed to read a model: " + e.getMessage());
|
||||
}
|
||||
net = Dnn.readNetFromTensorflow(new MatOfByte(modelBuffer));
|
||||
checkInceptionNet(net);
|
||||
}
|
||||
|
||||
public void testGetAvailableTargets() {
|
||||
List<Integer> targets = Dnn.getAvailableTargets(Dnn.DNN_BACKEND_OPENCV);
|
||||
assertTrue(targets.contains(Dnn.DNN_TARGET_CPU));
|
||||
}
|
||||
}
|
||||
46
3rdparty/opencv-4.5.4/modules/dnn/misc/objc/gen_dict.json
vendored
Normal file
46
3rdparty/opencv-4.5.4/modules/dnn/misc/objc/gen_dict.json
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"func_arg_fix" : {
|
||||
"Dnn": {
|
||||
"(Net*)readNetFromCaffe:(NSString*)prototxt caffeModel:(NSString*)caffeModel" : { "readNetFromCaffe" : {"name" : "readNetFromCaffeFile"} },
|
||||
"(Net*)readNetFromCaffe:(ByteVector*)bufferProto bufferModel:(ByteVector*)bufferModel" : { "readNetFromCaffe" : {"name" : "readNetFromCaffeBuffer"} },
|
||||
"(Net*)readNetFromDarknet:(NSString*)cfgFile darknetModel:(NSString*)darknetModel" : { "readNetFromDarknet" : {"name" : "readNetFromDarknetFile"} },
|
||||
"(Net*)readNetFromDarknet:(ByteVector*)bufferCfg bufferModel:(ByteVector*)bufferModel" : { "readNetFromDarknet" : {"name" : "readNetFromDarknetBuffer"} },
|
||||
"(Net*)readNetFromONNX:(NSString*)onnxFile" : { "readNetFromONNX" : {"name" : "readNetFromONNXFile"} },
|
||||
"(Net*)readNetFromONNX:(ByteVector*)buffer" : { "readNetFromONNX" : {"name" : "readNetFromONNXBuffer"} },
|
||||
"(Net*)readNetFromTensorflow:(NSString*)model config:(NSString*)config" : { "readNetFromTensorflow" : {"name" : "readNetFromTensorflowFile"} },
|
||||
"(Net*)readNetFromTensorflow:(ByteVector*)bufferModel bufferConfig:(ByteVector*)bufferConfig" : { "readNetFromTensorflow" : {"name" : "readNetFromTensorflowBuffer"} }
|
||||
},
|
||||
"Net": {
|
||||
"(void)forward:(NSMutableArray<Mat*>*)outputBlobs outputName:(NSString*)outputName" : { "forward" : {"name" : "forwardOutputBlobs"} },
|
||||
"(void)forward:(NSMutableArray<Mat*>*)outputBlobs outBlobNames:(NSArray<NSString*>*)outBlobNames" : { "forward" : {"name" : "forwardOutputBlobs"} },
|
||||
"(void)forwardAndRetrieve:(NSMutableArray<NSMutableArray<Mat*>*>*)outputBlobs outBlobNames:(NSArray<NSString*>*)outBlobNames" : { "forward" : {"swift_name" : "forwardAndRetrieve"} },
|
||||
"(long)getFLOPS:(IntVector*)netInputShape" : { "getFLOPS" : {"name" : "getFLOPSWithNetInputShape"} },
|
||||
"(long)getFLOPS:(NSArray<IntVector*>*)netInputShapes" : { "getFLOPS" : {"name" : "getFLOPSWithNetInputShapes"} },
|
||||
"(long)getFLOPS:(int)layerId netInputShape:(IntVector*)netInputShape" : { "getFLOPS" : {"name" : "getFLOPSWithLayerId"} },
|
||||
"(long)getFLOPS:(int)layerId netInputShapes:(NSArray<IntVector*>*)netInputShapes" : { "getFLOPS" : {"name" : "getFLOPSWithLayerId"} },
|
||||
"(void)getLayersShapes:(IntVector*)netInputShape layersIds:(IntVector*)layersIds inLayersShapes:(NSMutableArray<NSMutableArray<IntVector*>*>*)inLayersShapes outLayersShapes:(NSMutableArray<NSMutableArray<IntVector*>*>*)outLayersShapes" : { "getLayersShapes" : {"name" : "getLayersShapesWithNetInputShape"} },
|
||||
"(void)getLayersShapes:(NSArray<IntVector*>*)netInputShapes layersIds:(IntVector*)layersIds inLayersShapes:(NSMutableArray<NSMutableArray<IntVector*>*>*)inLayersShapes outLayersShapes:(NSMutableArray<NSMutableArray<IntVector*>*>*)outLayersShapes" : { "getLayersShapes" : {"name" : "getLayersShapesWithNetInputShapes"} }
|
||||
}
|
||||
},
|
||||
"type_dict": {
|
||||
"MatShape": {
|
||||
"objc_type": "IntVector*",
|
||||
"to_cpp": "%(n)s.nativeRef",
|
||||
"from_cpp": "[IntVector fromNative:%(n)s]",
|
||||
"cast_to": "std::vector<int>"
|
||||
},
|
||||
"vector_MatShape": {
|
||||
"objc_type": "IntVector*",
|
||||
"v_type": "IntVector"
|
||||
},
|
||||
"vector_vector_MatShape": {
|
||||
"objc_type": "IntVector*",
|
||||
"v_v_type": "IntVector"
|
||||
},
|
||||
"LayerId": {
|
||||
"objc_type": "DictValue*",
|
||||
"to_cpp": "*(cv::dnn::DictValue*)(%(n)s.nativePtr)",
|
||||
"from_cpp": "[DictValue fromNative:%(n)s]"
|
||||
}
|
||||
}
|
||||
}
|
||||
6977
3rdparty/opencv-4.5.4/modules/dnn/misc/onnx/opencv-onnx.pb.cc
vendored
Normal file
6977
3rdparty/opencv-4.5.4/modules/dnn/misc/onnx/opencv-onnx.pb.cc
vendored
Normal file
File diff suppressed because it is too large
Load Diff
5849
3rdparty/opencv-4.5.4/modules/dnn/misc/onnx/opencv-onnx.pb.h
vendored
Normal file
5849
3rdparty/opencv-4.5.4/modules/dnn/misc/onnx/opencv-onnx.pb.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
219
3rdparty/opencv-4.5.4/modules/dnn/misc/python/pyopencv_dnn.hpp
vendored
Normal file
219
3rdparty/opencv-4.5.4/modules/dnn/misc/python/pyopencv_dnn.hpp
vendored
Normal file
@@ -0,0 +1,219 @@
|
||||
#ifdef HAVE_OPENCV_DNN
|
||||
typedef dnn::DictValue LayerId;
|
||||
typedef std::vector<dnn::MatShape> vector_MatShape;
|
||||
typedef std::vector<std::vector<dnn::MatShape> > vector_vector_MatShape;
|
||||
|
||||
template<>
|
||||
bool pyopencv_to(PyObject *o, dnn::DictValue &dv, const ArgInfo& info)
|
||||
{
|
||||
CV_UNUSED(info);
|
||||
if (!o || o == Py_None)
|
||||
return true; //Current state will be used
|
||||
else if (PyLong_Check(o))
|
||||
{
|
||||
dv = dnn::DictValue((int64)PyLong_AsLongLong(o));
|
||||
return true;
|
||||
}
|
||||
else if (PyInt_Check(o))
|
||||
{
|
||||
dv = dnn::DictValue((int64)PyInt_AS_LONG(o));
|
||||
return true;
|
||||
}
|
||||
else if (PyFloat_Check(o))
|
||||
{
|
||||
dv = dnn::DictValue(PyFloat_AsDouble(o));
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string str;
|
||||
if (getUnicodeString(o, str))
|
||||
{
|
||||
dv = dnn::DictValue(str);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
PyObject* pyopencv_from(const dnn::DictValue &dv)
|
||||
{
|
||||
if (dv.size() > 1)
|
||||
{
|
||||
std::vector<T> vec(dv.size());
|
||||
for (int i = 0; i < dv.size(); ++i)
|
||||
vec[i] = dv.get<T>(i);
|
||||
return pyopencv_from_generic_vec(vec);
|
||||
}
|
||||
else
|
||||
return pyopencv_from(dv.get<T>());
|
||||
}
|
||||
|
||||
template<>
|
||||
PyObject* pyopencv_from(const dnn::DictValue &dv)
|
||||
{
|
||||
if (dv.isInt()) return pyopencv_from<int>(dv);
|
||||
if (dv.isReal()) return pyopencv_from<float>(dv);
|
||||
if (dv.isString()) return pyopencv_from<String>(dv);
|
||||
CV_Error(Error::StsNotImplemented, "Unknown value type");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
template<>
|
||||
PyObject* pyopencv_from(const dnn::LayerParams& lp)
|
||||
{
|
||||
PyObject* dict = PyDict_New();
|
||||
for (std::map<String, dnn::DictValue>::const_iterator it = lp.begin(); it != lp.end(); ++it)
|
||||
{
|
||||
CV_Assert(!PyDict_SetItemString(dict, it->first.c_str(), pyopencv_from(it->second)));
|
||||
}
|
||||
return dict;
|
||||
}
|
||||
|
||||
template<>
|
||||
PyObject* pyopencv_from(const std::vector<dnn::Target> &t)
|
||||
{
|
||||
return pyopencv_from(std::vector<int>(t.begin(), t.end()));
|
||||
}
|
||||
|
||||
class pycvLayer CV_FINAL : public dnn::Layer
|
||||
{
|
||||
public:
|
||||
pycvLayer(const dnn::LayerParams ¶ms, PyObject* pyLayer) : Layer(params)
|
||||
{
|
||||
PyGILState_STATE gstate;
|
||||
gstate = PyGILState_Ensure();
|
||||
|
||||
PyObject* args = PyTuple_New(2);
|
||||
CV_Assert(!PyTuple_SetItem(args, 0, pyopencv_from(params)));
|
||||
CV_Assert(!PyTuple_SetItem(args, 1, pyopencv_from(params.blobs)));
|
||||
o = PyObject_CallObject(pyLayer, args);
|
||||
|
||||
Py_DECREF(args);
|
||||
PyGILState_Release(gstate);
|
||||
if (!o)
|
||||
CV_Error(Error::StsError, "Failed to create an instance of custom layer");
|
||||
}
|
||||
|
||||
static void registerLayer(const std::string& type, PyObject* o)
|
||||
{
|
||||
std::map<std::string, std::vector<PyObject*> >::iterator it = pyLayers.find(type);
|
||||
if (it != pyLayers.end())
|
||||
it->second.push_back(o);
|
||||
else
|
||||
pyLayers[type] = std::vector<PyObject*>(1, o);
|
||||
}
|
||||
|
||||
static void unregisterLayer(const std::string& type)
|
||||
{
|
||||
std::map<std::string, std::vector<PyObject*> >::iterator it = pyLayers.find(type);
|
||||
if (it != pyLayers.end())
|
||||
{
|
||||
if (it->second.size() > 1)
|
||||
it->second.pop_back();
|
||||
else
|
||||
pyLayers.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
static Ptr<dnn::Layer> create(dnn::LayerParams ¶ms)
|
||||
{
|
||||
std::map<std::string, std::vector<PyObject*> >::iterator it = pyLayers.find(params.type);
|
||||
if (it == pyLayers.end())
|
||||
CV_Error(Error::StsNotImplemented, "Layer with a type \"" + params.type +
|
||||
"\" is not implemented");
|
||||
CV_Assert(!it->second.empty());
|
||||
return Ptr<dnn::Layer>(new pycvLayer(params, it->second.back()));
|
||||
}
|
||||
|
||||
virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
|
||||
const int,
|
||||
std::vector<std::vector<int> > &outputs,
|
||||
std::vector<std::vector<int> > &) const CV_OVERRIDE
|
||||
{
|
||||
PyGILState_STATE gstate;
|
||||
gstate = PyGILState_Ensure();
|
||||
|
||||
PyObject* args = PyList_New(inputs.size());
|
||||
for(size_t i = 0; i < inputs.size(); ++i)
|
||||
PyList_SetItem(args, i, pyopencv_from_generic_vec(inputs[i]));
|
||||
|
||||
PyObject* res = PyObject_CallMethodObjArgs(o, PyString_FromString("getMemoryShapes"), args, NULL);
|
||||
Py_DECREF(args);
|
||||
PyGILState_Release(gstate);
|
||||
if (!res)
|
||||
CV_Error(Error::StsNotImplemented, "Failed to call \"getMemoryShapes\" method");
|
||||
CV_Assert(pyopencv_to_generic_vec(res, outputs, ArgInfo("", 0)));
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays) CV_OVERRIDE
|
||||
{
|
||||
PyGILState_STATE gstate;
|
||||
gstate = PyGILState_Ensure();
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
PyObject* args = pyopencv_from(inputs);
|
||||
PyObject* res = PyObject_CallMethodObjArgs(o, PyString_FromString("forward"), args, NULL);
|
||||
Py_DECREF(args);
|
||||
if (!res)
|
||||
CV_Error(Error::StsNotImplemented, "Failed to call \"forward\" method");
|
||||
|
||||
std::vector<Mat> pyOutputs;
|
||||
CV_Assert(pyopencv_to(res, pyOutputs, ArgInfo("", 0)));
|
||||
Py_DECREF(res);
|
||||
PyGILState_Release(gstate);
|
||||
|
||||
CV_Assert(pyOutputs.size() == outputs.size());
|
||||
for (size_t i = 0; i < outputs.size(); ++i)
|
||||
{
|
||||
CV_Assert(pyOutputs[i].size == outputs[i].size);
|
||||
CV_Assert(pyOutputs[i].type() == outputs[i].type());
|
||||
pyOutputs[i].copyTo(outputs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// Map layers types to python classes.
|
||||
static std::map<std::string, std::vector<PyObject*> > pyLayers;
|
||||
PyObject* o; // Instance of implemented python layer.
|
||||
};
|
||||
|
||||
std::map<std::string, std::vector<PyObject*> > pycvLayer::pyLayers;
|
||||
|
||||
static PyObject *pyopencv_cv_dnn_registerLayer(PyObject*, PyObject *args, PyObject *kw)
|
||||
{
|
||||
const char *keywords[] = { "type", "class", NULL };
|
||||
char* layerType;
|
||||
PyObject *classInstance;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kw, "sO", (char**)keywords, &layerType, &classInstance))
|
||||
return NULL;
|
||||
if (!PyCallable_Check(classInstance)) {
|
||||
PyErr_SetString(PyExc_TypeError, "class must be callable");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pycvLayer::registerLayer(layerType, classInstance);
|
||||
dnn::LayerFactory::registerLayer(layerType, pycvLayer::create);
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyObject *pyopencv_cv_dnn_unregisterLayer(PyObject*, PyObject *args, PyObject *kw)
|
||||
{
|
||||
const char *keywords[] = { "type", NULL };
|
||||
char* layerType;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kw, "s", (char**)keywords, &layerType))
|
||||
return NULL;
|
||||
|
||||
pycvLayer::unregisterLayer(layerType);
|
||||
dnn::LayerFactory::unregisterLayer(layerType);
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
#endif // HAVE_OPENCV_DNN
|
||||
415
3rdparty/opencv-4.5.4/modules/dnn/misc/python/test/test_dnn.py
vendored
Normal file
415
3rdparty/opencv-4.5.4/modules/dnn/misc/python/test/test_dnn.py
vendored
Normal file
@@ -0,0 +1,415 @@
|
||||
#!/usr/bin/env python
|
||||
import os
|
||||
import cv2 as cv
|
||||
import numpy as np
|
||||
|
||||
from tests_common import NewOpenCVTests, unittest
|
||||
|
||||
def normAssert(test, a, b, msg=None, lInf=1e-5):
|
||||
test.assertLess(np.max(np.abs(a - b)), lInf, msg)
|
||||
|
||||
def inter_area(box1, box2):
|
||||
x_min, x_max = max(box1[0], box2[0]), min(box1[2], box2[2])
|
||||
y_min, y_max = max(box1[1], box2[1]), min(box1[3], box2[3])
|
||||
return (x_max - x_min) * (y_max - y_min)
|
||||
|
||||
def area(box):
|
||||
return (box[2] - box[0]) * (box[3] - box[1])
|
||||
|
||||
def box2str(box):
|
||||
left, top = box[0], box[1]
|
||||
width, height = box[2] - left, box[3] - top
|
||||
return '[%f x %f from (%f, %f)]' % (width, height, left, top)
|
||||
|
||||
def normAssertDetections(test, refClassIds, refScores, refBoxes, testClassIds, testScores, testBoxes,
|
||||
confThreshold=0.0, scores_diff=1e-5, boxes_iou_diff=1e-4):
|
||||
matchedRefBoxes = [False] * len(refBoxes)
|
||||
errMsg = ''
|
||||
for i in range(len(testBoxes)):
|
||||
testScore = testScores[i]
|
||||
if testScore < confThreshold:
|
||||
continue
|
||||
|
||||
testClassId, testBox = testClassIds[i], testBoxes[i]
|
||||
matched = False
|
||||
for j in range(len(refBoxes)):
|
||||
if (not matchedRefBoxes[j]) and testClassId == refClassIds[j] and \
|
||||
abs(testScore - refScores[j]) < scores_diff:
|
||||
interArea = inter_area(testBox, refBoxes[j])
|
||||
iou = interArea / (area(testBox) + area(refBoxes[j]) - interArea)
|
||||
if abs(iou - 1.0) < boxes_iou_diff:
|
||||
matched = True
|
||||
matchedRefBoxes[j] = True
|
||||
if not matched:
|
||||
errMsg += '\nUnmatched prediction: class %d score %f box %s' % (testClassId, testScore, box2str(testBox))
|
||||
|
||||
for i in range(len(refBoxes)):
|
||||
if (not matchedRefBoxes[i]) and refScores[i] > confThreshold:
|
||||
errMsg += '\nUnmatched reference: class %d score %f box %s' % (refClassIds[i], refScores[i], box2str(refBoxes[i]))
|
||||
if errMsg:
|
||||
test.fail(errMsg)
|
||||
|
||||
def printParams(backend, target):
|
||||
backendNames = {
|
||||
cv.dnn.DNN_BACKEND_OPENCV: 'OCV',
|
||||
cv.dnn.DNN_BACKEND_INFERENCE_ENGINE: 'DLIE'
|
||||
}
|
||||
targetNames = {
|
||||
cv.dnn.DNN_TARGET_CPU: 'CPU',
|
||||
cv.dnn.DNN_TARGET_OPENCL: 'OCL',
|
||||
cv.dnn.DNN_TARGET_OPENCL_FP16: 'OCL_FP16',
|
||||
cv.dnn.DNN_TARGET_MYRIAD: 'MYRIAD'
|
||||
}
|
||||
print('%s/%s' % (backendNames[backend], targetNames[target]))
|
||||
|
||||
def getDefaultThreshold(target):
|
||||
if target == cv.dnn.DNN_TARGET_OPENCL_FP16 or target == cv.dnn.DNN_TARGET_MYRIAD:
|
||||
return 4e-3
|
||||
else:
|
||||
return 1e-5
|
||||
|
||||
testdata_required = bool(os.environ.get('OPENCV_DNN_TEST_REQUIRE_TESTDATA', False))
|
||||
|
||||
g_dnnBackendsAndTargets = None
|
||||
|
||||
class dnn_test(NewOpenCVTests):
|
||||
|
||||
def setUp(self):
|
||||
super(dnn_test, self).setUp()
|
||||
|
||||
global g_dnnBackendsAndTargets
|
||||
if g_dnnBackendsAndTargets is None:
|
||||
g_dnnBackendsAndTargets = self.initBackendsAndTargets()
|
||||
self.dnnBackendsAndTargets = g_dnnBackendsAndTargets
|
||||
|
||||
def initBackendsAndTargets(self):
|
||||
self.dnnBackendsAndTargets = [
|
||||
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
||||
]
|
||||
|
||||
if self.checkIETarget(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_CPU):
|
||||
self.dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_CPU])
|
||||
if self.checkIETarget(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_MYRIAD):
|
||||
self.dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_MYRIAD])
|
||||
|
||||
if cv.ocl.haveOpenCL() and cv.ocl.useOpenCL():
|
||||
self.dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_OPENCL])
|
||||
self.dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_OPENCL_FP16])
|
||||
if cv.ocl_Device.getDefault().isIntel():
|
||||
if self.checkIETarget(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL):
|
||||
self.dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL])
|
||||
if self.checkIETarget(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL_FP16):
|
||||
self.dnnBackendsAndTargets.append([cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_TARGET_OPENCL_FP16])
|
||||
return self.dnnBackendsAndTargets
|
||||
|
||||
def find_dnn_file(self, filename, required=True):
|
||||
if not required:
|
||||
required = testdata_required
|
||||
return self.find_file(filename, [os.environ.get('OPENCV_DNN_TEST_DATA_PATH', os.getcwd()),
|
||||
os.environ['OPENCV_TEST_DATA_PATH']],
|
||||
required=required)
|
||||
|
||||
def checkIETarget(self, backend, target):
|
||||
proto = self.find_dnn_file('dnn/layers/layer_convolution.prototxt')
|
||||
model = self.find_dnn_file('dnn/layers/layer_convolution.caffemodel')
|
||||
net = cv.dnn.readNet(proto, model)
|
||||
net.setPreferableBackend(backend)
|
||||
net.setPreferableTarget(target)
|
||||
inp = np.random.standard_normal([1, 2, 10, 11]).astype(np.float32)
|
||||
try:
|
||||
net.setInput(inp)
|
||||
net.forward()
|
||||
except BaseException as e:
|
||||
return False
|
||||
return True
|
||||
|
||||
def test_getAvailableTargets(self):
|
||||
targets = cv.dnn.getAvailableTargets(cv.dnn.DNN_BACKEND_OPENCV)
|
||||
self.assertTrue(cv.dnn.DNN_TARGET_CPU in targets)
|
||||
|
||||
def test_blobFromImage(self):
|
||||
np.random.seed(324)
|
||||
|
||||
width = 6
|
||||
height = 7
|
||||
scale = 1.0/127.5
|
||||
mean = (10, 20, 30)
|
||||
|
||||
# Test arguments names.
|
||||
img = np.random.randint(0, 255, [4, 5, 3]).astype(np.uint8)
|
||||
blob = cv.dnn.blobFromImage(img, scale, (width, height), mean, True, False)
|
||||
blob_args = cv.dnn.blobFromImage(img, scalefactor=scale, size=(width, height),
|
||||
mean=mean, swapRB=True, crop=False)
|
||||
normAssert(self, blob, blob_args)
|
||||
|
||||
# Test values.
|
||||
target = cv.resize(img, (width, height), interpolation=cv.INTER_LINEAR)
|
||||
target = target.astype(np.float32)
|
||||
target = target[:,:,[2, 1, 0]] # BGR2RGB
|
||||
target[:,:,0] -= mean[0]
|
||||
target[:,:,1] -= mean[1]
|
||||
target[:,:,2] -= mean[2]
|
||||
target *= scale
|
||||
target = target.transpose(2, 0, 1).reshape(1, 3, height, width) # to NCHW
|
||||
normAssert(self, blob, target)
|
||||
|
||||
|
||||
def test_model(self):
|
||||
img_path = self.find_dnn_file("dnn/street.png")
|
||||
weights = self.find_dnn_file("dnn/MobileNetSSD_deploy.caffemodel", required=False)
|
||||
config = self.find_dnn_file("dnn/MobileNetSSD_deploy.prototxt", required=False)
|
||||
if weights is None or config is None:
|
||||
raise unittest.SkipTest("Missing DNN test files (dnn/MobileNetSSD_deploy.{prototxt/caffemodel}). Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.")
|
||||
|
||||
frame = cv.imread(img_path)
|
||||
model = cv.dnn_DetectionModel(weights, config)
|
||||
model.setInputParams(size=(300, 300), mean=(127.5, 127.5, 127.5), scale=1.0/127.5)
|
||||
|
||||
iouDiff = 0.05
|
||||
confThreshold = 0.0001
|
||||
nmsThreshold = 0
|
||||
scoreDiff = 1e-3
|
||||
|
||||
classIds, confidences, boxes = model.detect(frame, confThreshold, nmsThreshold)
|
||||
|
||||
refClassIds = (7, 15)
|
||||
refConfidences = (0.9998, 0.8793)
|
||||
refBoxes = ((328, 238, 85, 102), (101, 188, 34, 138))
|
||||
|
||||
normAssertDetections(self, refClassIds, refConfidences, refBoxes,
|
||||
classIds, confidences, boxes,confThreshold, scoreDiff, iouDiff)
|
||||
|
||||
for box in boxes:
|
||||
cv.rectangle(frame, box, (0, 255, 0))
|
||||
cv.rectangle(frame, np.array(box), (0, 255, 0))
|
||||
cv.rectangle(frame, tuple(box), (0, 255, 0))
|
||||
cv.rectangle(frame, list(box), (0, 255, 0))
|
||||
|
||||
|
||||
def test_classification_model(self):
|
||||
img_path = self.find_dnn_file("dnn/googlenet_0.png")
|
||||
weights = self.find_dnn_file("dnn/squeezenet_v1.1.caffemodel", required=False)
|
||||
config = self.find_dnn_file("dnn/squeezenet_v1.1.prototxt")
|
||||
ref = np.load(self.find_dnn_file("dnn/squeezenet_v1.1_prob.npy"))
|
||||
if weights is None or config is None:
|
||||
raise unittest.SkipTest("Missing DNN test files (dnn/squeezenet_v1.1.{prototxt/caffemodel}). Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.")
|
||||
|
||||
frame = cv.imread(img_path)
|
||||
model = cv.dnn_ClassificationModel(config, weights)
|
||||
model.setInputSize(227, 227)
|
||||
model.setInputCrop(True)
|
||||
|
||||
out = model.predict(frame)
|
||||
normAssert(self, out, ref)
|
||||
|
||||
|
||||
def test_textdetection_model(self):
|
||||
img_path = self.find_dnn_file("dnn/text_det_test1.png")
|
||||
weights = self.find_dnn_file("dnn/onnx/models/DB_TD500_resnet50.onnx", required=False)
|
||||
if weights is None:
|
||||
raise unittest.SkipTest("Missing DNN test files (onnx/models/DB_TD500_resnet50.onnx). Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.")
|
||||
|
||||
frame = cv.imread(img_path)
|
||||
scale = 1.0 / 255.0
|
||||
size = (736, 736)
|
||||
mean = (122.67891434, 116.66876762, 104.00698793)
|
||||
|
||||
model = cv.dnn_TextDetectionModel_DB(weights)
|
||||
model.setInputParams(scale, size, mean)
|
||||
out, _ = model.detect(frame)
|
||||
|
||||
self.assertTrue(type(out) == tuple, msg='actual type {}'.format(str(type(out))))
|
||||
self.assertTrue(np.array(out).shape == (2, 4, 2))
|
||||
|
||||
|
||||
def test_face_detection(self):
|
||||
proto = self.find_dnn_file('dnn/opencv_face_detector.prototxt')
|
||||
model = self.find_dnn_file('dnn/opencv_face_detector.caffemodel', required=False)
|
||||
if proto is None or model is None:
|
||||
raise unittest.SkipTest("Missing DNN test files (dnn/opencv_face_detector.{prototxt/caffemodel}). Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.")
|
||||
|
||||
img = self.get_sample('gpu/lbpcascade/er.png')
|
||||
blob = cv.dnn.blobFromImage(img, mean=(104, 177, 123), swapRB=False, crop=False)
|
||||
|
||||
ref = [[0, 1, 0.99520785, 0.80997437, 0.16379407, 0.87996572, 0.26685631],
|
||||
[0, 1, 0.9934696, 0.2831718, 0.50738752, 0.345781, 0.5985168],
|
||||
[0, 1, 0.99096733, 0.13629119, 0.24892329, 0.19756334, 0.3310290],
|
||||
[0, 1, 0.98977017, 0.23901358, 0.09084064, 0.29902688, 0.1769477],
|
||||
[0, 1, 0.97203469, 0.67965847, 0.06876482, 0.73999709, 0.1513494],
|
||||
[0, 1, 0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801]]
|
||||
|
||||
print('\n')
|
||||
for backend, target in self.dnnBackendsAndTargets:
|
||||
printParams(backend, target)
|
||||
|
||||
net = cv.dnn.readNet(proto, model)
|
||||
net.setPreferableBackend(backend)
|
||||
net.setPreferableTarget(target)
|
||||
net.setInput(blob)
|
||||
out = net.forward().reshape(-1, 7)
|
||||
|
||||
scoresDiff = 4e-3 if target in [cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD] else 1e-5
|
||||
iouDiff = 2e-2 if target in [cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD] else 1e-4
|
||||
|
||||
ref = np.array(ref, np.float32)
|
||||
refClassIds, testClassIds = ref[:, 1], out[:, 1]
|
||||
refScores, testScores = ref[:, 2], out[:, 2]
|
||||
refBoxes, testBoxes = ref[:, 3:], out[:, 3:]
|
||||
|
||||
normAssertDetections(self, refClassIds, refScores, refBoxes, testClassIds,
|
||||
testScores, testBoxes, 0.5, scoresDiff, iouDiff)
|
||||
|
||||
def test_async(self):
|
||||
timeout = 10*1000*10**6 # in nanoseconds (10 sec)
|
||||
proto = self.find_dnn_file('dnn/layers/layer_convolution.prototxt')
|
||||
model = self.find_dnn_file('dnn/layers/layer_convolution.caffemodel')
|
||||
if proto is None or model is None:
|
||||
raise unittest.SkipTest("Missing DNN test files (dnn/layers/layer_convolution.{prototxt/caffemodel}). Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.")
|
||||
|
||||
print('\n')
|
||||
for backend, target in self.dnnBackendsAndTargets:
|
||||
if backend != cv.dnn.DNN_BACKEND_INFERENCE_ENGINE:
|
||||
continue
|
||||
|
||||
printParams(backend, target)
|
||||
|
||||
netSync = cv.dnn.readNet(proto, model)
|
||||
netSync.setPreferableBackend(backend)
|
||||
netSync.setPreferableTarget(target)
|
||||
|
||||
netAsync = cv.dnn.readNet(proto, model)
|
||||
netAsync.setPreferableBackend(backend)
|
||||
netAsync.setPreferableTarget(target)
|
||||
|
||||
# Generate inputs
|
||||
numInputs = 10
|
||||
inputs = []
|
||||
for _ in range(numInputs):
|
||||
inputs.append(np.random.standard_normal([2, 6, 75, 113]).astype(np.float32))
|
||||
|
||||
# Run synchronously
|
||||
refs = []
|
||||
for i in range(numInputs):
|
||||
netSync.setInput(inputs[i])
|
||||
refs.append(netSync.forward())
|
||||
|
||||
# Run asynchronously. To make test more robust, process inputs in the reversed order.
|
||||
outs = []
|
||||
for i in reversed(range(numInputs)):
|
||||
netAsync.setInput(inputs[i])
|
||||
outs.insert(0, netAsync.forwardAsync())
|
||||
|
||||
for i in reversed(range(numInputs)):
|
||||
ret, result = outs[i].get(timeoutNs=float(timeout))
|
||||
self.assertTrue(ret)
|
||||
normAssert(self, refs[i], result, 'Index: %d' % i, 1e-10)
|
||||
|
||||
def test_nms(self):
|
||||
confs = (1, 1)
|
||||
rects = ((0, 0, 0.4, 0.4), (0, 0, 0.2, 0.4)) # 0.5 overlap
|
||||
|
||||
self.assertTrue(all(cv.dnn.NMSBoxes(rects, confs, 0, 0.6).ravel() == (0, 1)))
|
||||
|
||||
def test_custom_layer(self):
|
||||
class CropLayer(object):
|
||||
def __init__(self, params, blobs):
|
||||
self.xstart = 0
|
||||
self.xend = 0
|
||||
self.ystart = 0
|
||||
self.yend = 0
|
||||
# Our layer receives two inputs. We need to crop the first input blob
|
||||
# to match a shape of the second one (keeping batch size and number of channels)
|
||||
def getMemoryShapes(self, inputs):
|
||||
inputShape, targetShape = inputs[0], inputs[1]
|
||||
batchSize, numChannels = inputShape[0], inputShape[1]
|
||||
height, width = targetShape[2], targetShape[3]
|
||||
self.ystart = (inputShape[2] - targetShape[2]) // 2
|
||||
self.xstart = (inputShape[3] - targetShape[3]) // 2
|
||||
self.yend = self.ystart + height
|
||||
self.xend = self.xstart + width
|
||||
return [[batchSize, numChannels, height, width]]
|
||||
def forward(self, inputs):
|
||||
return [inputs[0][:,:,self.ystart:self.yend,self.xstart:self.xend]]
|
||||
|
||||
cv.dnn_registerLayer('CropCaffe', CropLayer)
|
||||
proto = '''
|
||||
name: "TestCrop"
|
||||
input: "input"
|
||||
input_shape
|
||||
{
|
||||
dim: 1
|
||||
dim: 2
|
||||
dim: 5
|
||||
dim: 5
|
||||
}
|
||||
input: "roi"
|
||||
input_shape
|
||||
{
|
||||
dim: 1
|
||||
dim: 2
|
||||
dim: 3
|
||||
dim: 3
|
||||
}
|
||||
layer {
|
||||
name: "Crop"
|
||||
type: "CropCaffe"
|
||||
bottom: "input"
|
||||
bottom: "roi"
|
||||
top: "Crop"
|
||||
}'''
|
||||
|
||||
net = cv.dnn.readNetFromCaffe(bytearray(proto.encode()))
|
||||
for backend, target in self.dnnBackendsAndTargets:
|
||||
if backend != cv.dnn.DNN_BACKEND_OPENCV:
|
||||
continue
|
||||
|
||||
printParams(backend, target)
|
||||
|
||||
net.setPreferableBackend(backend)
|
||||
net.setPreferableTarget(target)
|
||||
src_shape = [1, 2, 5, 5]
|
||||
dst_shape = [1, 2, 3, 3]
|
||||
inp = np.arange(0, np.prod(src_shape), dtype=np.float32).reshape(src_shape)
|
||||
roi = np.empty(dst_shape, dtype=np.float32)
|
||||
net.setInput(inp, "input")
|
||||
net.setInput(roi, "roi")
|
||||
out = net.forward()
|
||||
ref = inp[:, :, 1:4, 1:4]
|
||||
normAssert(self, out, ref)
|
||||
|
||||
cv.dnn_unregisterLayer('CropCaffe')
|
||||
|
||||
# check that dnn module can work with 3D tensor as input for network
|
||||
def test_input_3d(self):
|
||||
model = self.find_dnn_file('dnn/onnx/models/hidden_lstm.onnx')
|
||||
input_file = self.find_dnn_file('dnn/onnx/data/input_hidden_lstm.npy')
|
||||
output_file = self.find_dnn_file('dnn/onnx/data/output_hidden_lstm.npy')
|
||||
if model is None:
|
||||
raise unittest.SkipTest("Missing DNN test files (dnn/onnx/models/hidden_lstm.onnx). "
|
||||
"Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.")
|
||||
if input_file is None or output_file is None:
|
||||
raise unittest.SkipTest("Missing DNN test files (dnn/onnx/data/{input/output}_hidden_lstm.npy). "
|
||||
"Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.")
|
||||
|
||||
input = np.load(input_file)
|
||||
# we have to expand the shape of input tensor because Python bindings cut 3D tensors to 2D
|
||||
# it should be fixed in future. see : https://github.com/opencv/opencv/issues/19091
|
||||
# please remove `expand_dims` after that
|
||||
input = np.expand_dims(input, axis=3)
|
||||
gold_output = np.load(output_file)
|
||||
|
||||
for backend, target in self.dnnBackendsAndTargets:
|
||||
printParams(backend, target)
|
||||
|
||||
net = cv.dnn.readNet(model)
|
||||
|
||||
net.setPreferableBackend(backend)
|
||||
net.setPreferableTarget(target)
|
||||
|
||||
net.setInput(input)
|
||||
real_output = net.forward()
|
||||
|
||||
normAssert(self, real_output, gold_output, "", getDefaultThreshold(target))
|
||||
|
||||
if __name__ == '__main__':
|
||||
NewOpenCVTests.bootstrap()
|
||||
365
3rdparty/opencv-4.5.4/modules/dnn/misc/quantize_face_detector.py
vendored
Normal file
365
3rdparty/opencv-4.5.4/modules/dnn/misc/quantize_face_detector.py
vendored
Normal file
@@ -0,0 +1,365 @@
|
||||
from __future__ import print_function
|
||||
import sys
|
||||
import argparse
|
||||
import cv2 as cv
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
import struct
|
||||
|
||||
if sys.version_info > (3,):
|
||||
long = int
|
||||
|
||||
from tensorflow.python.tools import optimize_for_inference_lib
|
||||
from tensorflow.tools.graph_transforms import TransformGraph
|
||||
from tensorflow.core.framework.node_def_pb2 import NodeDef
|
||||
from google.protobuf import text_format
|
||||
|
||||
parser = argparse.ArgumentParser(description="Use this script to create TensorFlow graph "
|
||||
"with weights from OpenCV's face detection network. "
|
||||
"Only backbone part of SSD model is converted this way. "
|
||||
"Look for .pbtxt configuration file at "
|
||||
"https://github.com/opencv/opencv_extra/tree/master/testdata/dnn/opencv_face_detector.pbtxt")
|
||||
parser.add_argument('--model', help='Path to .caffemodel weights', required=True)
|
||||
parser.add_argument('--proto', help='Path to .prototxt Caffe model definition', required=True)
|
||||
parser.add_argument('--pb', help='Path to output .pb TensorFlow model', required=True)
|
||||
parser.add_argument('--pbtxt', help='Path to output .pbxt TensorFlow graph', required=True)
|
||||
parser.add_argument('--quantize', help='Quantize weights to uint8', action='store_true')
|
||||
parser.add_argument('--fp16', help='Convert weights to half precision floats', action='store_true')
|
||||
args = parser.parse_args()
|
||||
|
||||
assert(not args.quantize or not args.fp16)
|
||||
|
||||
dtype = tf.float16 if args.fp16 else tf.float32
|
||||
|
||||
################################################################################
|
||||
cvNet = cv.dnn.readNetFromCaffe(args.proto, args.model)
|
||||
|
||||
def dnnLayer(name):
|
||||
return cvNet.getLayer(long(cvNet.getLayerId(name)))
|
||||
|
||||
def scale(x, name):
|
||||
with tf.variable_scope(name):
|
||||
layer = dnnLayer(name)
|
||||
w = tf.Variable(layer.blobs[0].flatten(), dtype=dtype, name='mul')
|
||||
if len(layer.blobs) > 1:
|
||||
b = tf.Variable(layer.blobs[1].flatten(), dtype=dtype, name='add')
|
||||
return tf.nn.bias_add(tf.multiply(x, w), b)
|
||||
else:
|
||||
return tf.multiply(x, w, name)
|
||||
|
||||
def conv(x, name, stride=1, pad='SAME', dilation=1, activ=None):
|
||||
with tf.variable_scope(name):
|
||||
layer = dnnLayer(name)
|
||||
w = tf.Variable(layer.blobs[0].transpose(2, 3, 1, 0), dtype=dtype, name='weights')
|
||||
if dilation == 1:
|
||||
conv = tf.nn.conv2d(x, filter=w, strides=(1, stride, stride, 1), padding=pad)
|
||||
else:
|
||||
assert(stride == 1)
|
||||
conv = tf.nn.atrous_conv2d(x, w, rate=dilation, padding=pad)
|
||||
|
||||
if len(layer.blobs) > 1:
|
||||
b = tf.Variable(layer.blobs[1].flatten(), dtype=dtype, name='bias')
|
||||
conv = tf.nn.bias_add(conv, b)
|
||||
return activ(conv) if activ else conv
|
||||
|
||||
def batch_norm(x, name):
|
||||
with tf.variable_scope(name):
|
||||
# Unfortunately, TensorFlow's batch normalization layer doesn't work with fp16 input.
|
||||
# Here we do a cast to fp32 but remove it in the frozen graph.
|
||||
if x.dtype != tf.float32:
|
||||
x = tf.cast(x, tf.float32)
|
||||
|
||||
layer = dnnLayer(name)
|
||||
assert(len(layer.blobs) >= 3)
|
||||
|
||||
mean = layer.blobs[0].flatten()
|
||||
std = layer.blobs[1].flatten()
|
||||
scale = layer.blobs[2].flatten()
|
||||
|
||||
eps = 1e-5
|
||||
hasBias = len(layer.blobs) > 3
|
||||
hasWeights = scale.shape != (1,)
|
||||
|
||||
if not hasWeights and not hasBias:
|
||||
mean /= scale[0]
|
||||
std /= scale[0]
|
||||
|
||||
mean = tf.Variable(mean, dtype=tf.float32, name='mean')
|
||||
std = tf.Variable(std, dtype=tf.float32, name='std')
|
||||
gamma = tf.Variable(scale if hasWeights else np.ones(mean.shape), dtype=tf.float32, name='gamma')
|
||||
beta = tf.Variable(layer.blobs[3].flatten() if hasBias else np.zeros(mean.shape), dtype=tf.float32, name='beta')
|
||||
bn = tf.nn.fused_batch_norm(x, gamma, beta, mean, std, eps,
|
||||
is_training=False)[0]
|
||||
if bn.dtype != dtype:
|
||||
bn = tf.cast(bn, dtype)
|
||||
return bn
|
||||
|
||||
def l2norm(x, name):
|
||||
with tf.variable_scope(name):
|
||||
layer = dnnLayer(name)
|
||||
w = tf.Variable(layer.blobs[0].flatten(), dtype=dtype, name='mul')
|
||||
return tf.nn.l2_normalize(x, 3, epsilon=1e-10) * w
|
||||
|
||||
### Graph definition ###########################################################
|
||||
inp = tf.placeholder(dtype, [1, 300, 300, 3], 'data')
|
||||
data_bn = batch_norm(inp, 'data_bn')
|
||||
data_scale = scale(data_bn, 'data_scale')
|
||||
|
||||
# Instead of tf.pad we use tf.space_to_batch_nd layers which override convolution's padding strategy to explicit numbers
|
||||
# data_scale = tf.pad(data_scale, [[0, 0], [3, 3], [3, 3], [0, 0]])
|
||||
data_scale = tf.space_to_batch_nd(data_scale, [1, 1], [[3, 3], [3, 3]], name='Pad')
|
||||
conv1_h = conv(data_scale, stride=2, pad='VALID', name='conv1_h')
|
||||
|
||||
conv1_bn_h = batch_norm(conv1_h, 'conv1_bn_h')
|
||||
conv1_scale_h = scale(conv1_bn_h, 'conv1_scale_h')
|
||||
conv1_relu = tf.nn.relu(conv1_scale_h)
|
||||
conv1_pool = tf.layers.max_pooling2d(conv1_relu, pool_size=(3, 3), strides=(2, 2),
|
||||
padding='SAME', name='conv1_pool')
|
||||
|
||||
layer_64_1_conv1_h = conv(conv1_pool, 'layer_64_1_conv1_h')
|
||||
layer_64_1_bn2_h = batch_norm(layer_64_1_conv1_h, 'layer_64_1_bn2_h')
|
||||
layer_64_1_scale2_h = scale(layer_64_1_bn2_h, 'layer_64_1_scale2_h')
|
||||
layer_64_1_relu2 = tf.nn.relu(layer_64_1_scale2_h)
|
||||
layer_64_1_conv2_h = conv(layer_64_1_relu2, 'layer_64_1_conv2_h')
|
||||
layer_64_1_sum = layer_64_1_conv2_h + conv1_pool
|
||||
|
||||
layer_128_1_bn1_h = batch_norm(layer_64_1_sum, 'layer_128_1_bn1_h')
|
||||
layer_128_1_scale1_h = scale(layer_128_1_bn1_h, 'layer_128_1_scale1_h')
|
||||
layer_128_1_relu1 = tf.nn.relu(layer_128_1_scale1_h)
|
||||
layer_128_1_conv1_h = conv(layer_128_1_relu1, stride=2, name='layer_128_1_conv1_h')
|
||||
layer_128_1_bn2 = batch_norm(layer_128_1_conv1_h, 'layer_128_1_bn2')
|
||||
layer_128_1_scale2 = scale(layer_128_1_bn2, 'layer_128_1_scale2')
|
||||
layer_128_1_relu2 = tf.nn.relu(layer_128_1_scale2)
|
||||
layer_128_1_conv2 = conv(layer_128_1_relu2, 'layer_128_1_conv2')
|
||||
layer_128_1_conv_expand_h = conv(layer_128_1_relu1, stride=2, name='layer_128_1_conv_expand_h')
|
||||
layer_128_1_sum = layer_128_1_conv2 + layer_128_1_conv_expand_h
|
||||
|
||||
layer_256_1_bn1 = batch_norm(layer_128_1_sum, 'layer_256_1_bn1')
|
||||
layer_256_1_scale1 = scale(layer_256_1_bn1, 'layer_256_1_scale1')
|
||||
layer_256_1_relu1 = tf.nn.relu(layer_256_1_scale1)
|
||||
|
||||
# layer_256_1_conv1 = tf.pad(layer_256_1_relu1, [[0, 0], [1, 1], [1, 1], [0, 0]])
|
||||
layer_256_1_conv1 = tf.space_to_batch_nd(layer_256_1_relu1, [1, 1], [[1, 1], [1, 1]], name='Pad_1')
|
||||
layer_256_1_conv1 = conv(layer_256_1_conv1, stride=2, pad='VALID', name='layer_256_1_conv1')
|
||||
|
||||
layer_256_1_bn2 = batch_norm(layer_256_1_conv1, 'layer_256_1_bn2')
|
||||
layer_256_1_scale2 = scale(layer_256_1_bn2, 'layer_256_1_scale2')
|
||||
layer_256_1_relu2 = tf.nn.relu(layer_256_1_scale2)
|
||||
layer_256_1_conv2 = conv(layer_256_1_relu2, 'layer_256_1_conv2')
|
||||
layer_256_1_conv_expand = conv(layer_256_1_relu1, stride=2, name='layer_256_1_conv_expand')
|
||||
layer_256_1_sum = layer_256_1_conv2 + layer_256_1_conv_expand
|
||||
|
||||
layer_512_1_bn1 = batch_norm(layer_256_1_sum, 'layer_512_1_bn1')
|
||||
layer_512_1_scale1 = scale(layer_512_1_bn1, 'layer_512_1_scale1')
|
||||
layer_512_1_relu1 = tf.nn.relu(layer_512_1_scale1)
|
||||
layer_512_1_conv1_h = conv(layer_512_1_relu1, 'layer_512_1_conv1_h')
|
||||
layer_512_1_bn2_h = batch_norm(layer_512_1_conv1_h, 'layer_512_1_bn2_h')
|
||||
layer_512_1_scale2_h = scale(layer_512_1_bn2_h, 'layer_512_1_scale2_h')
|
||||
layer_512_1_relu2 = tf.nn.relu(layer_512_1_scale2_h)
|
||||
layer_512_1_conv2_h = conv(layer_512_1_relu2, dilation=2, name='layer_512_1_conv2_h')
|
||||
layer_512_1_conv_expand_h = conv(layer_512_1_relu1, 'layer_512_1_conv_expand_h')
|
||||
layer_512_1_sum = layer_512_1_conv2_h + layer_512_1_conv_expand_h
|
||||
|
||||
last_bn_h = batch_norm(layer_512_1_sum, 'last_bn_h')
|
||||
last_scale_h = scale(last_bn_h, 'last_scale_h')
|
||||
fc7 = tf.nn.relu(last_scale_h, name='last_relu')
|
||||
|
||||
conv6_1_h = conv(fc7, 'conv6_1_h', activ=tf.nn.relu)
|
||||
conv6_2_h = conv(conv6_1_h, stride=2, name='conv6_2_h', activ=tf.nn.relu)
|
||||
conv7_1_h = conv(conv6_2_h, 'conv7_1_h', activ=tf.nn.relu)
|
||||
|
||||
# conv7_2_h = tf.pad(conv7_1_h, [[0, 0], [1, 1], [1, 1], [0, 0]])
|
||||
conv7_2_h = tf.space_to_batch_nd(conv7_1_h, [1, 1], [[1, 1], [1, 1]], name='Pad_2')
|
||||
conv7_2_h = conv(conv7_2_h, stride=2, pad='VALID', name='conv7_2_h', activ=tf.nn.relu)
|
||||
|
||||
conv8_1_h = conv(conv7_2_h, pad='SAME', name='conv8_1_h', activ=tf.nn.relu)
|
||||
conv8_2_h = conv(conv8_1_h, pad='VALID', name='conv8_2_h', activ=tf.nn.relu)
|
||||
conv9_1_h = conv(conv8_2_h, 'conv9_1_h', activ=tf.nn.relu)
|
||||
conv9_2_h = conv(conv9_1_h, pad='VALID', name='conv9_2_h', activ=tf.nn.relu)
|
||||
|
||||
conv4_3_norm = l2norm(layer_256_1_relu1, 'conv4_3_norm')
|
||||
|
||||
### Locations and confidences ##################################################
|
||||
locations = []
|
||||
confidences = []
|
||||
flattenLayersNames = [] # Collect all reshape layers names that should be replaced to flattens.
|
||||
for top, suffix in zip([locations, confidences], ['_mbox_loc', '_mbox_conf']):
|
||||
for bottom, name in zip([conv4_3_norm, fc7, conv6_2_h, conv7_2_h, conv8_2_h, conv9_2_h],
|
||||
['conv4_3_norm', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2']):
|
||||
name += suffix
|
||||
flat = tf.layers.flatten(conv(bottom, name))
|
||||
flattenLayersNames.append(flat.name[:flat.name.find(':')])
|
||||
top.append(flat)
|
||||
|
||||
mbox_loc = tf.concat(locations, axis=-1, name='mbox_loc')
|
||||
mbox_conf = tf.concat(confidences, axis=-1, name='mbox_conf')
|
||||
|
||||
total = int(np.prod(mbox_conf.shape[1:]))
|
||||
mbox_conf_reshape = tf.reshape(mbox_conf, [-1, 2], name='mbox_conf_reshape')
|
||||
mbox_conf_softmax = tf.nn.softmax(mbox_conf_reshape, name='mbox_conf_softmax')
|
||||
mbox_conf_flatten = tf.reshape(mbox_conf_softmax, [-1, total], name='mbox_conf_flatten')
|
||||
flattenLayersNames.append('mbox_conf_flatten')
|
||||
|
||||
with tf.Session() as sess:
|
||||
sess.run(tf.global_variables_initializer())
|
||||
|
||||
### Check correctness ######################################################
|
||||
out_nodes = ['mbox_loc', 'mbox_conf_flatten']
|
||||
inp_nodes = [inp.name[:inp.name.find(':')]]
|
||||
|
||||
np.random.seed(2701)
|
||||
inputData = np.random.standard_normal([1, 3, 300, 300]).astype(np.float32)
|
||||
|
||||
cvNet.setInput(inputData)
|
||||
cvNet.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
|
||||
outDNN = cvNet.forward(out_nodes)
|
||||
|
||||
outTF = sess.run([mbox_loc, mbox_conf_flatten], feed_dict={inp: inputData.transpose(0, 2, 3, 1)})
|
||||
print('Max diff @ locations: %e' % np.max(np.abs(outDNN[0] - outTF[0])))
|
||||
print('Max diff @ confidence: %e' % np.max(np.abs(outDNN[1] - outTF[1])))
|
||||
|
||||
# Save a graph
|
||||
graph_def = sess.graph.as_graph_def()
|
||||
|
||||
# Freeze graph. Replaces variables to constants.
|
||||
graph_def = tf.graph_util.convert_variables_to_constants(sess, graph_def, out_nodes)
|
||||
# Optimize graph. Removes training-only ops, unused nodes.
|
||||
graph_def = optimize_for_inference_lib.optimize_for_inference(graph_def, inp_nodes, out_nodes, dtype.as_datatype_enum)
|
||||
# Fuse constant operations.
|
||||
transforms = ["fold_constants(ignore_errors=True)"]
|
||||
if args.quantize:
|
||||
transforms += ["quantize_weights(minimum_size=0)"]
|
||||
transforms += ["sort_by_execution_order"]
|
||||
graph_def = TransformGraph(graph_def, inp_nodes, out_nodes, transforms)
|
||||
|
||||
# By default, float16 weights are stored in repeated tensor's field called
|
||||
# `half_val`. It has type int32 with leading zeros for unused bytes.
|
||||
# This type is encoded by Variant that means only 7 bits are used for value
|
||||
# representation but the last one is indicated the end of encoding. This way
|
||||
# float16 might takes 1 or 2 or 3 bytes depends on value. To improve compression,
|
||||
# we replace all `half_val` values to `tensor_content` using only 2 bytes for everyone.
|
||||
for node in graph_def.node:
|
||||
if 'value' in node.attr:
|
||||
halfs = node.attr["value"].tensor.half_val
|
||||
if not node.attr["value"].tensor.tensor_content and halfs:
|
||||
node.attr["value"].tensor.tensor_content = struct.pack('H' * len(halfs), *halfs)
|
||||
node.attr["value"].tensor.ClearField('half_val')
|
||||
|
||||
# Serialize
|
||||
with tf.gfile.FastGFile(args.pb, 'wb') as f:
|
||||
f.write(graph_def.SerializeToString())
|
||||
|
||||
|
||||
################################################################################
|
||||
# Write a text graph representation
|
||||
################################################################################
|
||||
def tensorMsg(values):
|
||||
msg = 'tensor { dtype: DT_FLOAT tensor_shape { dim { size: %d } }' % len(values)
|
||||
for value in values:
|
||||
msg += 'float_val: %f ' % value
|
||||
return msg + '}'
|
||||
|
||||
# Remove Const nodes and unused attributes.
|
||||
for i in reversed(range(len(graph_def.node))):
|
||||
if graph_def.node[i].op in ['Const', 'Dequantize']:
|
||||
del graph_def.node[i]
|
||||
for attr in ['T', 'data_format', 'Tshape', 'N', 'Tidx', 'Tdim',
|
||||
'use_cudnn_on_gpu', 'Index', 'Tperm', 'is_training',
|
||||
'Tpaddings', 'Tblock_shape', 'Tcrops']:
|
||||
if attr in graph_def.node[i].attr:
|
||||
del graph_def.node[i].attr[attr]
|
||||
|
||||
# Append prior box generators
|
||||
min_sizes = [30, 60, 111, 162, 213, 264]
|
||||
max_sizes = [60, 111, 162, 213, 264, 315]
|
||||
steps = [8, 16, 32, 64, 100, 300]
|
||||
aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
|
||||
layers = [conv4_3_norm, fc7, conv6_2_h, conv7_2_h, conv8_2_h, conv9_2_h]
|
||||
for i in range(6):
|
||||
priorBox = NodeDef()
|
||||
priorBox.name = 'PriorBox_%d' % i
|
||||
priorBox.op = 'PriorBox'
|
||||
priorBox.input.append(layers[i].name[:layers[i].name.find(':')])
|
||||
priorBox.input.append(inp_nodes[0]) # data
|
||||
|
||||
text_format.Merge('i: %d' % min_sizes[i], priorBox.attr["min_size"])
|
||||
text_format.Merge('i: %d' % max_sizes[i], priorBox.attr["max_size"])
|
||||
text_format.Merge('b: true', priorBox.attr["flip"])
|
||||
text_format.Merge('b: false', priorBox.attr["clip"])
|
||||
text_format.Merge(tensorMsg(aspect_ratios[i]), priorBox.attr["aspect_ratio"])
|
||||
text_format.Merge(tensorMsg([0.1, 0.1, 0.2, 0.2]), priorBox.attr["variance"])
|
||||
text_format.Merge('f: %f' % steps[i], priorBox.attr["step"])
|
||||
text_format.Merge('f: 0.5', priorBox.attr["offset"])
|
||||
graph_def.node.extend([priorBox])
|
||||
|
||||
# Concatenate prior boxes
|
||||
concat = NodeDef()
|
||||
concat.name = 'mbox_priorbox'
|
||||
concat.op = 'ConcatV2'
|
||||
for i in range(6):
|
||||
concat.input.append('PriorBox_%d' % i)
|
||||
concat.input.append('mbox_loc/axis')
|
||||
graph_def.node.extend([concat])
|
||||
|
||||
# DetectionOutput layer
|
||||
detectionOut = NodeDef()
|
||||
detectionOut.name = 'detection_out'
|
||||
detectionOut.op = 'DetectionOutput'
|
||||
|
||||
detectionOut.input.append('mbox_loc')
|
||||
detectionOut.input.append('mbox_conf_flatten')
|
||||
detectionOut.input.append('mbox_priorbox')
|
||||
|
||||
text_format.Merge('i: 2', detectionOut.attr['num_classes'])
|
||||
text_format.Merge('b: true', detectionOut.attr['share_location'])
|
||||
text_format.Merge('i: 0', detectionOut.attr['background_label_id'])
|
||||
text_format.Merge('f: 0.45', detectionOut.attr['nms_threshold'])
|
||||
text_format.Merge('i: 400', detectionOut.attr['top_k'])
|
||||
text_format.Merge('s: "CENTER_SIZE"', detectionOut.attr['code_type'])
|
||||
text_format.Merge('i: 200', detectionOut.attr['keep_top_k'])
|
||||
text_format.Merge('f: 0.01', detectionOut.attr['confidence_threshold'])
|
||||
|
||||
graph_def.node.extend([detectionOut])
|
||||
|
||||
# Replace L2Normalization subgraph onto a single node.
|
||||
for i in reversed(range(len(graph_def.node))):
|
||||
if graph_def.node[i].name in ['conv4_3_norm/l2_normalize/Square',
|
||||
'conv4_3_norm/l2_normalize/Sum',
|
||||
'conv4_3_norm/l2_normalize/Maximum',
|
||||
'conv4_3_norm/l2_normalize/Rsqrt']:
|
||||
del graph_def.node[i]
|
||||
for node in graph_def.node:
|
||||
if node.name == 'conv4_3_norm/l2_normalize':
|
||||
node.op = 'L2Normalize'
|
||||
node.input.pop()
|
||||
node.input.pop()
|
||||
node.input.append(layer_256_1_relu1.name)
|
||||
node.input.append('conv4_3_norm/l2_normalize/Sum/reduction_indices')
|
||||
break
|
||||
|
||||
softmaxShape = NodeDef()
|
||||
softmaxShape.name = 'reshape_before_softmax'
|
||||
softmaxShape.op = 'Const'
|
||||
text_format.Merge(
|
||||
'tensor {'
|
||||
' dtype: DT_INT32'
|
||||
' tensor_shape { dim { size: 3 } }'
|
||||
' int_val: 0'
|
||||
' int_val: -1'
|
||||
' int_val: 2'
|
||||
'}', softmaxShape.attr["value"])
|
||||
graph_def.node.extend([softmaxShape])
|
||||
|
||||
for node in graph_def.node:
|
||||
if node.name == 'mbox_conf_reshape':
|
||||
node.input[1] = softmaxShape.name
|
||||
elif node.name == 'mbox_conf_softmax':
|
||||
text_format.Merge('i: 2', node.attr['axis'])
|
||||
elif node.name in flattenLayersNames:
|
||||
node.op = 'Flatten'
|
||||
inpName = node.input[0]
|
||||
node.input.pop()
|
||||
node.input.pop()
|
||||
node.input.append(inpName)
|
||||
|
||||
tf.train.write_graph(graph_def, "", args.pbtxt, as_text=True)
|
||||
2153
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/attr_value.pb.cc
vendored
Normal file
2153
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/attr_value.pb.cc
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1749
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/attr_value.pb.h
vendored
Normal file
1749
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/attr_value.pb.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1892
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/function.pb.cc
vendored
Normal file
1892
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/function.pb.cc
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1385
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/function.pb.h
vendored
Normal file
1385
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/function.pb.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1295
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/graph.pb.cc
vendored
Normal file
1295
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/graph.pb.cc
vendored
Normal file
File diff suppressed because it is too large
Load Diff
968
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/graph.pb.h
vendored
Normal file
968
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/graph.pb.h
vendored
Normal file
@@ -0,0 +1,968 @@
|
||||
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
// source: graph.proto
|
||||
|
||||
#ifndef PROTOBUF_graph_2eproto__INCLUDED
|
||||
#define PROTOBUF_graph_2eproto__INCLUDED
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <google/protobuf/stubs/common.h>
|
||||
|
||||
#if GOOGLE_PROTOBUF_VERSION < 3005000
|
||||
#error This file was generated by a newer version of protoc which is
|
||||
#error incompatible with your Protocol Buffer headers. Please update
|
||||
#error your headers.
|
||||
#endif
|
||||
#if 3005001 < GOOGLE_PROTOBUF_MIN_PROTOC_VERSION
|
||||
#error This file was generated by an older version of protoc which is
|
||||
#error incompatible with your Protocol Buffer headers. Please
|
||||
#error regenerate this file with a newer version of protoc.
|
||||
#endif
|
||||
|
||||
#include <google/protobuf/io/coded_stream.h>
|
||||
#include <google/protobuf/arena.h>
|
||||
#include <google/protobuf/arenastring.h>
|
||||
#include <google/protobuf/generated_message_table_driven.h>
|
||||
#include <google/protobuf/generated_message_util.h>
|
||||
#include <google/protobuf/metadata.h>
|
||||
#include <google/protobuf/message.h>
|
||||
#include <google/protobuf/repeated_field.h> // IWYU pragma: export
|
||||
#include <google/protobuf/extension_set.h> // IWYU pragma: export
|
||||
#include <google/protobuf/map.h> // IWYU pragma: export
|
||||
#include <google/protobuf/map_entry.h>
|
||||
#include <google/protobuf/map_field_inl.h>
|
||||
#include <google/protobuf/unknown_field_set.h>
|
||||
#include "attr_value.pb.h"
|
||||
#include "function.pb.h"
|
||||
#include "versions.pb.h"
|
||||
// @@protoc_insertion_point(includes)
|
||||
|
||||
namespace protobuf_graph_2eproto {
|
||||
// Internal implementation detail -- do not use these members.
|
||||
struct TableStruct {
|
||||
static const ::google::protobuf::internal::ParseTableField entries[];
|
||||
static const ::google::protobuf::internal::AuxillaryParseTableField aux[];
|
||||
static const ::google::protobuf::internal::ParseTable schema[3];
|
||||
static const ::google::protobuf::internal::FieldMetadata field_metadata[];
|
||||
static const ::google::protobuf::internal::SerializationTable serialization_table[];
|
||||
static const ::google::protobuf::uint32 offsets[];
|
||||
};
|
||||
void AddDescriptors();
|
||||
void InitDefaultsGraphDefImpl();
|
||||
void InitDefaultsGraphDef();
|
||||
void InitDefaultsNodeDef_AttrEntry_DoNotUseImpl();
|
||||
void InitDefaultsNodeDef_AttrEntry_DoNotUse();
|
||||
void InitDefaultsNodeDefImpl();
|
||||
void InitDefaultsNodeDef();
|
||||
inline void InitDefaults() {
|
||||
InitDefaultsGraphDef();
|
||||
InitDefaultsNodeDef_AttrEntry_DoNotUse();
|
||||
InitDefaultsNodeDef();
|
||||
}
|
||||
} // namespace protobuf_graph_2eproto
|
||||
namespace opencv_tensorflow {
|
||||
class GraphDef;
|
||||
class GraphDefDefaultTypeInternal;
|
||||
extern GraphDefDefaultTypeInternal _GraphDef_default_instance_;
|
||||
class NodeDef;
|
||||
class NodeDefDefaultTypeInternal;
|
||||
extern NodeDefDefaultTypeInternal _NodeDef_default_instance_;
|
||||
class NodeDef_AttrEntry_DoNotUse;
|
||||
class NodeDef_AttrEntry_DoNotUseDefaultTypeInternal;
|
||||
extern NodeDef_AttrEntry_DoNotUseDefaultTypeInternal _NodeDef_AttrEntry_DoNotUse_default_instance_;
|
||||
} // namespace opencv_tensorflow
|
||||
namespace opencv_tensorflow {
|
||||
|
||||
// ===================================================================
|
||||
|
||||
class GraphDef : public ::google::protobuf::Message /* @@protoc_insertion_point(class_definition:opencv_tensorflow.GraphDef) */ {
|
||||
public:
|
||||
GraphDef();
|
||||
virtual ~GraphDef();
|
||||
|
||||
GraphDef(const GraphDef& from);
|
||||
|
||||
inline GraphDef& operator=(const GraphDef& from) {
|
||||
CopyFrom(from);
|
||||
return *this;
|
||||
}
|
||||
#if LANG_CXX11
|
||||
GraphDef(GraphDef&& from) noexcept
|
||||
: GraphDef() {
|
||||
*this = ::std::move(from);
|
||||
}
|
||||
|
||||
inline GraphDef& operator=(GraphDef&& from) noexcept {
|
||||
if (GetArenaNoVirtual() == from.GetArenaNoVirtual()) {
|
||||
if (this != &from) InternalSwap(&from);
|
||||
} else {
|
||||
CopyFrom(from);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
inline ::google::protobuf::Arena* GetArena() const PROTOBUF_FINAL {
|
||||
return GetArenaNoVirtual();
|
||||
}
|
||||
inline void* GetMaybeArenaPointer() const PROTOBUF_FINAL {
|
||||
return MaybeArenaPtr();
|
||||
}
|
||||
static const ::google::protobuf::Descriptor* descriptor();
|
||||
static const GraphDef& default_instance();
|
||||
|
||||
static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY
|
||||
static inline const GraphDef* internal_default_instance() {
|
||||
return reinterpret_cast<const GraphDef*>(
|
||||
&_GraphDef_default_instance_);
|
||||
}
|
||||
static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
|
||||
0;
|
||||
|
||||
void UnsafeArenaSwap(GraphDef* other);
|
||||
void Swap(GraphDef* other);
|
||||
friend void swap(GraphDef& a, GraphDef& b) {
|
||||
a.Swap(&b);
|
||||
}
|
||||
|
||||
// implements Message ----------------------------------------------
|
||||
|
||||
inline GraphDef* New() const PROTOBUF_FINAL { return New(NULL); }
|
||||
|
||||
GraphDef* New(::google::protobuf::Arena* arena) const PROTOBUF_FINAL;
|
||||
void CopyFrom(const ::google::protobuf::Message& from) PROTOBUF_FINAL;
|
||||
void MergeFrom(const ::google::protobuf::Message& from) PROTOBUF_FINAL;
|
||||
void CopyFrom(const GraphDef& from);
|
||||
void MergeFrom(const GraphDef& from);
|
||||
void Clear() PROTOBUF_FINAL;
|
||||
bool IsInitialized() const PROTOBUF_FINAL;
|
||||
|
||||
size_t ByteSizeLong() const PROTOBUF_FINAL;
|
||||
bool MergePartialFromCodedStream(
|
||||
::google::protobuf::io::CodedInputStream* input) PROTOBUF_FINAL;
|
||||
void SerializeWithCachedSizes(
|
||||
::google::protobuf::io::CodedOutputStream* output) const PROTOBUF_FINAL;
|
||||
::google::protobuf::uint8* InternalSerializeWithCachedSizesToArray(
|
||||
bool deterministic, ::google::protobuf::uint8* target) const PROTOBUF_FINAL;
|
||||
int GetCachedSize() const PROTOBUF_FINAL { return _cached_size_; }
|
||||
private:
|
||||
void SharedCtor();
|
||||
void SharedDtor();
|
||||
void SetCachedSize(int size) const PROTOBUF_FINAL;
|
||||
void InternalSwap(GraphDef* other);
|
||||
protected:
|
||||
explicit GraphDef(::google::protobuf::Arena* arena);
|
||||
private:
|
||||
static void ArenaDtor(void* object);
|
||||
inline void RegisterArenaDtor(::google::protobuf::Arena* arena);
|
||||
private:
|
||||
inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
|
||||
return _internal_metadata_.arena();
|
||||
}
|
||||
inline void* MaybeArenaPtr() const {
|
||||
return _internal_metadata_.raw_arena_ptr();
|
||||
}
|
||||
public:
|
||||
|
||||
::google::protobuf::Metadata GetMetadata() const PROTOBUF_FINAL;
|
||||
|
||||
// nested types ----------------------------------------------------
|
||||
|
||||
// accessors -------------------------------------------------------
|
||||
|
||||
// repeated .opencv_tensorflow.NodeDef node = 1;
|
||||
int node_size() const;
|
||||
void clear_node();
|
||||
static const int kNodeFieldNumber = 1;
|
||||
const ::opencv_tensorflow::NodeDef& node(int index) const;
|
||||
::opencv_tensorflow::NodeDef* mutable_node(int index);
|
||||
::opencv_tensorflow::NodeDef* add_node();
|
||||
::google::protobuf::RepeatedPtrField< ::opencv_tensorflow::NodeDef >*
|
||||
mutable_node();
|
||||
const ::google::protobuf::RepeatedPtrField< ::opencv_tensorflow::NodeDef >&
|
||||
node() const;
|
||||
|
||||
// .opencv_tensorflow.FunctionDefLibrary library = 2;
|
||||
bool has_library() const;
|
||||
void clear_library();
|
||||
static const int kLibraryFieldNumber = 2;
|
||||
private:
|
||||
void _slow_mutable_library();
|
||||
public:
|
||||
const ::opencv_tensorflow::FunctionDefLibrary& library() const;
|
||||
::opencv_tensorflow::FunctionDefLibrary* release_library();
|
||||
::opencv_tensorflow::FunctionDefLibrary* mutable_library();
|
||||
void set_allocated_library(::opencv_tensorflow::FunctionDefLibrary* library);
|
||||
void unsafe_arena_set_allocated_library(
|
||||
::opencv_tensorflow::FunctionDefLibrary* library);
|
||||
::opencv_tensorflow::FunctionDefLibrary* unsafe_arena_release_library();
|
||||
|
||||
// .opencv_tensorflow.VersionDef versions = 4;
|
||||
bool has_versions() const;
|
||||
void clear_versions();
|
||||
static const int kVersionsFieldNumber = 4;
|
||||
private:
|
||||
void _slow_mutable_versions();
|
||||
public:
|
||||
const ::opencv_tensorflow::VersionDef& versions() const;
|
||||
::opencv_tensorflow::VersionDef* release_versions();
|
||||
::opencv_tensorflow::VersionDef* mutable_versions();
|
||||
void set_allocated_versions(::opencv_tensorflow::VersionDef* versions);
|
||||
void unsafe_arena_set_allocated_versions(
|
||||
::opencv_tensorflow::VersionDef* versions);
|
||||
::opencv_tensorflow::VersionDef* unsafe_arena_release_versions();
|
||||
|
||||
// int32 version = 3 [deprecated = true];
|
||||
GOOGLE_PROTOBUF_DEPRECATED_ATTR void clear_version();
|
||||
GOOGLE_PROTOBUF_DEPRECATED_ATTR static const int kVersionFieldNumber = 3;
|
||||
GOOGLE_PROTOBUF_DEPRECATED_ATTR ::google::protobuf::int32 version() const;
|
||||
GOOGLE_PROTOBUF_DEPRECATED_ATTR void set_version(::google::protobuf::int32 value);
|
||||
|
||||
// @@protoc_insertion_point(class_scope:opencv_tensorflow.GraphDef)
|
||||
private:
|
||||
|
||||
::google::protobuf::internal::InternalMetadataWithArena _internal_metadata_;
|
||||
template <typename T> friend class ::google::protobuf::Arena::InternalHelper;
|
||||
typedef void InternalArenaConstructable_;
|
||||
typedef void DestructorSkippable_;
|
||||
::google::protobuf::RepeatedPtrField< ::opencv_tensorflow::NodeDef > node_;
|
||||
::opencv_tensorflow::FunctionDefLibrary* library_;
|
||||
::opencv_tensorflow::VersionDef* versions_;
|
||||
::google::protobuf::int32 version_;
|
||||
mutable int _cached_size_;
|
||||
friend struct ::protobuf_graph_2eproto::TableStruct;
|
||||
friend void ::protobuf_graph_2eproto::InitDefaultsGraphDefImpl();
|
||||
};
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
class NodeDef_AttrEntry_DoNotUse : public ::google::protobuf::internal::MapEntry<NodeDef_AttrEntry_DoNotUse,
|
||||
::std::string, ::opencv_tensorflow::AttrValue,
|
||||
::google::protobuf::internal::WireFormatLite::TYPE_STRING,
|
||||
::google::protobuf::internal::WireFormatLite::TYPE_MESSAGE,
|
||||
0 > {
|
||||
public:
|
||||
typedef ::google::protobuf::internal::MapEntry<NodeDef_AttrEntry_DoNotUse,
|
||||
::std::string, ::opencv_tensorflow::AttrValue,
|
||||
::google::protobuf::internal::WireFormatLite::TYPE_STRING,
|
||||
::google::protobuf::internal::WireFormatLite::TYPE_MESSAGE,
|
||||
0 > SuperType;
|
||||
NodeDef_AttrEntry_DoNotUse();
|
||||
NodeDef_AttrEntry_DoNotUse(::google::protobuf::Arena* arena);
|
||||
void MergeFrom(const NodeDef_AttrEntry_DoNotUse& other);
|
||||
static const NodeDef_AttrEntry_DoNotUse* internal_default_instance() { return reinterpret_cast<const NodeDef_AttrEntry_DoNotUse*>(&_NodeDef_AttrEntry_DoNotUse_default_instance_); }
|
||||
void MergeFrom(const ::google::protobuf::Message& other) PROTOBUF_FINAL;
|
||||
::google::protobuf::Metadata GetMetadata() const;
|
||||
};
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
class NodeDef : public ::google::protobuf::Message /* @@protoc_insertion_point(class_definition:opencv_tensorflow.NodeDef) */ {
|
||||
public:
|
||||
NodeDef();
|
||||
virtual ~NodeDef();
|
||||
|
||||
NodeDef(const NodeDef& from);
|
||||
|
||||
inline NodeDef& operator=(const NodeDef& from) {
|
||||
CopyFrom(from);
|
||||
return *this;
|
||||
}
|
||||
#if LANG_CXX11
|
||||
NodeDef(NodeDef&& from) noexcept
|
||||
: NodeDef() {
|
||||
*this = ::std::move(from);
|
||||
}
|
||||
|
||||
inline NodeDef& operator=(NodeDef&& from) noexcept {
|
||||
if (GetArenaNoVirtual() == from.GetArenaNoVirtual()) {
|
||||
if (this != &from) InternalSwap(&from);
|
||||
} else {
|
||||
CopyFrom(from);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
inline ::google::protobuf::Arena* GetArena() const PROTOBUF_FINAL {
|
||||
return GetArenaNoVirtual();
|
||||
}
|
||||
inline void* GetMaybeArenaPointer() const PROTOBUF_FINAL {
|
||||
return MaybeArenaPtr();
|
||||
}
|
||||
static const ::google::protobuf::Descriptor* descriptor();
|
||||
static const NodeDef& default_instance();
|
||||
|
||||
static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY
|
||||
static inline const NodeDef* internal_default_instance() {
|
||||
return reinterpret_cast<const NodeDef*>(
|
||||
&_NodeDef_default_instance_);
|
||||
}
|
||||
static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
|
||||
2;
|
||||
|
||||
void UnsafeArenaSwap(NodeDef* other);
|
||||
void Swap(NodeDef* other);
|
||||
friend void swap(NodeDef& a, NodeDef& b) {
|
||||
a.Swap(&b);
|
||||
}
|
||||
|
||||
// implements Message ----------------------------------------------
|
||||
|
||||
inline NodeDef* New() const PROTOBUF_FINAL { return New(NULL); }
|
||||
|
||||
NodeDef* New(::google::protobuf::Arena* arena) const PROTOBUF_FINAL;
|
||||
void CopyFrom(const ::google::protobuf::Message& from) PROTOBUF_FINAL;
|
||||
void MergeFrom(const ::google::protobuf::Message& from) PROTOBUF_FINAL;
|
||||
void CopyFrom(const NodeDef& from);
|
||||
void MergeFrom(const NodeDef& from);
|
||||
void Clear() PROTOBUF_FINAL;
|
||||
bool IsInitialized() const PROTOBUF_FINAL;
|
||||
|
||||
size_t ByteSizeLong() const PROTOBUF_FINAL;
|
||||
bool MergePartialFromCodedStream(
|
||||
::google::protobuf::io::CodedInputStream* input) PROTOBUF_FINAL;
|
||||
void SerializeWithCachedSizes(
|
||||
::google::protobuf::io::CodedOutputStream* output) const PROTOBUF_FINAL;
|
||||
::google::protobuf::uint8* InternalSerializeWithCachedSizesToArray(
|
||||
bool deterministic, ::google::protobuf::uint8* target) const PROTOBUF_FINAL;
|
||||
int GetCachedSize() const PROTOBUF_FINAL { return _cached_size_; }
|
||||
private:
|
||||
void SharedCtor();
|
||||
void SharedDtor();
|
||||
void SetCachedSize(int size) const PROTOBUF_FINAL;
|
||||
void InternalSwap(NodeDef* other);
|
||||
protected:
|
||||
explicit NodeDef(::google::protobuf::Arena* arena);
|
||||
private:
|
||||
static void ArenaDtor(void* object);
|
||||
inline void RegisterArenaDtor(::google::protobuf::Arena* arena);
|
||||
private:
|
||||
inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
|
||||
return _internal_metadata_.arena();
|
||||
}
|
||||
inline void* MaybeArenaPtr() const {
|
||||
return _internal_metadata_.raw_arena_ptr();
|
||||
}
|
||||
public:
|
||||
|
||||
::google::protobuf::Metadata GetMetadata() const PROTOBUF_FINAL;
|
||||
|
||||
// nested types ----------------------------------------------------
|
||||
|
||||
|
||||
// accessors -------------------------------------------------------
|
||||
|
||||
// repeated string input = 3;
|
||||
int input_size() const;
|
||||
void clear_input();
|
||||
static const int kInputFieldNumber = 3;
|
||||
const ::std::string& input(int index) const;
|
||||
::std::string* mutable_input(int index);
|
||||
void set_input(int index, const ::std::string& value);
|
||||
#if LANG_CXX11
|
||||
void set_input(int index, ::std::string&& value);
|
||||
#endif
|
||||
void set_input(int index, const char* value);
|
||||
void set_input(int index, const char* value, size_t size);
|
||||
::std::string* add_input();
|
||||
void add_input(const ::std::string& value);
|
||||
#if LANG_CXX11
|
||||
void add_input(::std::string&& value);
|
||||
#endif
|
||||
void add_input(const char* value);
|
||||
void add_input(const char* value, size_t size);
|
||||
const ::google::protobuf::RepeatedPtrField< ::std::string>& input() const;
|
||||
::google::protobuf::RepeatedPtrField< ::std::string>* mutable_input();
|
||||
|
||||
// map<string, .opencv_tensorflow.AttrValue> attr = 5;
|
||||
int attr_size() const;
|
||||
void clear_attr();
|
||||
static const int kAttrFieldNumber = 5;
|
||||
const ::google::protobuf::Map< ::std::string, ::opencv_tensorflow::AttrValue >&
|
||||
attr() const;
|
||||
::google::protobuf::Map< ::std::string, ::opencv_tensorflow::AttrValue >*
|
||||
mutable_attr();
|
||||
|
||||
// string name = 1;
|
||||
void clear_name();
|
||||
static const int kNameFieldNumber = 1;
|
||||
const ::std::string& name() const;
|
||||
void set_name(const ::std::string& value);
|
||||
#if LANG_CXX11
|
||||
void set_name(::std::string&& value);
|
||||
#endif
|
||||
void set_name(const char* value);
|
||||
void set_name(const char* value, size_t size);
|
||||
::std::string* mutable_name();
|
||||
::std::string* release_name();
|
||||
void set_allocated_name(::std::string* name);
|
||||
PROTOBUF_RUNTIME_DEPRECATED("The unsafe_arena_ accessors for"
|
||||
" string fields are deprecated and will be removed in a"
|
||||
" future release.")
|
||||
::std::string* unsafe_arena_release_name();
|
||||
PROTOBUF_RUNTIME_DEPRECATED("The unsafe_arena_ accessors for"
|
||||
" string fields are deprecated and will be removed in a"
|
||||
" future release.")
|
||||
void unsafe_arena_set_allocated_name(
|
||||
::std::string* name);
|
||||
|
||||
// string op = 2;
|
||||
void clear_op();
|
||||
static const int kOpFieldNumber = 2;
|
||||
const ::std::string& op() const;
|
||||
void set_op(const ::std::string& value);
|
||||
#if LANG_CXX11
|
||||
void set_op(::std::string&& value);
|
||||
#endif
|
||||
void set_op(const char* value);
|
||||
void set_op(const char* value, size_t size);
|
||||
::std::string* mutable_op();
|
||||
::std::string* release_op();
|
||||
void set_allocated_op(::std::string* op);
|
||||
PROTOBUF_RUNTIME_DEPRECATED("The unsafe_arena_ accessors for"
|
||||
" string fields are deprecated and will be removed in a"
|
||||
" future release.")
|
||||
::std::string* unsafe_arena_release_op();
|
||||
PROTOBUF_RUNTIME_DEPRECATED("The unsafe_arena_ accessors for"
|
||||
" string fields are deprecated and will be removed in a"
|
||||
" future release.")
|
||||
void unsafe_arena_set_allocated_op(
|
||||
::std::string* op);
|
||||
|
||||
// string device = 4;
|
||||
void clear_device();
|
||||
static const int kDeviceFieldNumber = 4;
|
||||
const ::std::string& device() const;
|
||||
void set_device(const ::std::string& value);
|
||||
#if LANG_CXX11
|
||||
void set_device(::std::string&& value);
|
||||
#endif
|
||||
void set_device(const char* value);
|
||||
void set_device(const char* value, size_t size);
|
||||
::std::string* mutable_device();
|
||||
::std::string* release_device();
|
||||
void set_allocated_device(::std::string* device);
|
||||
PROTOBUF_RUNTIME_DEPRECATED("The unsafe_arena_ accessors for"
|
||||
" string fields are deprecated and will be removed in a"
|
||||
" future release.")
|
||||
::std::string* unsafe_arena_release_device();
|
||||
PROTOBUF_RUNTIME_DEPRECATED("The unsafe_arena_ accessors for"
|
||||
" string fields are deprecated and will be removed in a"
|
||||
" future release.")
|
||||
void unsafe_arena_set_allocated_device(
|
||||
::std::string* device);
|
||||
|
||||
// @@protoc_insertion_point(class_scope:opencv_tensorflow.NodeDef)
|
||||
private:
|
||||
|
||||
::google::protobuf::internal::InternalMetadataWithArena _internal_metadata_;
|
||||
template <typename T> friend class ::google::protobuf::Arena::InternalHelper;
|
||||
typedef void InternalArenaConstructable_;
|
||||
typedef void DestructorSkippable_;
|
||||
::google::protobuf::RepeatedPtrField< ::std::string> input_;
|
||||
::google::protobuf::internal::MapField<
|
||||
NodeDef_AttrEntry_DoNotUse,
|
||||
::std::string, ::opencv_tensorflow::AttrValue,
|
||||
::google::protobuf::internal::WireFormatLite::TYPE_STRING,
|
||||
::google::protobuf::internal::WireFormatLite::TYPE_MESSAGE,
|
||||
0 > attr_;
|
||||
::google::protobuf::internal::ArenaStringPtr name_;
|
||||
::google::protobuf::internal::ArenaStringPtr op_;
|
||||
::google::protobuf::internal::ArenaStringPtr device_;
|
||||
mutable int _cached_size_;
|
||||
friend struct ::protobuf_graph_2eproto::TableStruct;
|
||||
friend void ::protobuf_graph_2eproto::InitDefaultsNodeDefImpl();
|
||||
};
|
||||
// ===================================================================
|
||||
|
||||
|
||||
// ===================================================================
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
|
||||
#endif // __GNUC__
|
||||
// GraphDef
|
||||
|
||||
// repeated .opencv_tensorflow.NodeDef node = 1;
|
||||
inline int GraphDef::node_size() const {
|
||||
return node_.size();
|
||||
}
|
||||
inline void GraphDef::clear_node() {
|
||||
node_.Clear();
|
||||
}
|
||||
inline const ::opencv_tensorflow::NodeDef& GraphDef::node(int index) const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.GraphDef.node)
|
||||
return node_.Get(index);
|
||||
}
|
||||
inline ::opencv_tensorflow::NodeDef* GraphDef::mutable_node(int index) {
|
||||
// @@protoc_insertion_point(field_mutable:opencv_tensorflow.GraphDef.node)
|
||||
return node_.Mutable(index);
|
||||
}
|
||||
inline ::opencv_tensorflow::NodeDef* GraphDef::add_node() {
|
||||
// @@protoc_insertion_point(field_add:opencv_tensorflow.GraphDef.node)
|
||||
return node_.Add();
|
||||
}
|
||||
inline ::google::protobuf::RepeatedPtrField< ::opencv_tensorflow::NodeDef >*
|
||||
GraphDef::mutable_node() {
|
||||
// @@protoc_insertion_point(field_mutable_list:opencv_tensorflow.GraphDef.node)
|
||||
return &node_;
|
||||
}
|
||||
inline const ::google::protobuf::RepeatedPtrField< ::opencv_tensorflow::NodeDef >&
|
||||
GraphDef::node() const {
|
||||
// @@protoc_insertion_point(field_list:opencv_tensorflow.GraphDef.node)
|
||||
return node_;
|
||||
}
|
||||
|
||||
// .opencv_tensorflow.VersionDef versions = 4;
|
||||
inline bool GraphDef::has_versions() const {
|
||||
return this != internal_default_instance() && versions_ != NULL;
|
||||
}
|
||||
inline const ::opencv_tensorflow::VersionDef& GraphDef::versions() const {
|
||||
const ::opencv_tensorflow::VersionDef* p = versions_;
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.GraphDef.versions)
|
||||
return p != NULL ? *p : *reinterpret_cast<const ::opencv_tensorflow::VersionDef*>(
|
||||
&::opencv_tensorflow::_VersionDef_default_instance_);
|
||||
}
|
||||
inline ::opencv_tensorflow::VersionDef* GraphDef::release_versions() {
|
||||
// @@protoc_insertion_point(field_release:opencv_tensorflow.GraphDef.versions)
|
||||
|
||||
::opencv_tensorflow::VersionDef* temp = versions_;
|
||||
if (GetArenaNoVirtual() != NULL) {
|
||||
temp = ::google::protobuf::internal::DuplicateIfNonNull(temp, NULL);
|
||||
}
|
||||
versions_ = NULL;
|
||||
return temp;
|
||||
}
|
||||
inline ::opencv_tensorflow::VersionDef* GraphDef::unsafe_arena_release_versions() {
|
||||
// @@protoc_insertion_point(field_unsafe_arena_release:opencv_tensorflow.GraphDef.versions)
|
||||
|
||||
::opencv_tensorflow::VersionDef* temp = versions_;
|
||||
versions_ = NULL;
|
||||
return temp;
|
||||
}
|
||||
inline ::opencv_tensorflow::VersionDef* GraphDef::mutable_versions() {
|
||||
|
||||
if (versions_ == NULL) {
|
||||
_slow_mutable_versions();
|
||||
}
|
||||
// @@protoc_insertion_point(field_mutable:opencv_tensorflow.GraphDef.versions)
|
||||
return versions_;
|
||||
}
|
||||
inline void GraphDef::set_allocated_versions(::opencv_tensorflow::VersionDef* versions) {
|
||||
::google::protobuf::Arena* message_arena = GetArenaNoVirtual();
|
||||
if (message_arena == NULL) {
|
||||
delete reinterpret_cast< ::google::protobuf::MessageLite*>(versions_);
|
||||
}
|
||||
if (versions) {
|
||||
::google::protobuf::Arena* submessage_arena =
|
||||
reinterpret_cast< ::google::protobuf::MessageLite*>(versions)->GetArena();
|
||||
if (message_arena != submessage_arena) {
|
||||
versions = ::google::protobuf::internal::GetOwnedMessage(
|
||||
message_arena, versions, submessage_arena);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
}
|
||||
versions_ = versions;
|
||||
// @@protoc_insertion_point(field_set_allocated:opencv_tensorflow.GraphDef.versions)
|
||||
}
|
||||
|
||||
// int32 version = 3 [deprecated = true];
|
||||
inline void GraphDef::clear_version() {
|
||||
version_ = 0;
|
||||
}
|
||||
inline ::google::protobuf::int32 GraphDef::version() const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.GraphDef.version)
|
||||
return version_;
|
||||
}
|
||||
inline void GraphDef::set_version(::google::protobuf::int32 value) {
|
||||
|
||||
version_ = value;
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.GraphDef.version)
|
||||
}
|
||||
|
||||
// .opencv_tensorflow.FunctionDefLibrary library = 2;
|
||||
inline bool GraphDef::has_library() const {
|
||||
return this != internal_default_instance() && library_ != NULL;
|
||||
}
|
||||
inline const ::opencv_tensorflow::FunctionDefLibrary& GraphDef::library() const {
|
||||
const ::opencv_tensorflow::FunctionDefLibrary* p = library_;
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.GraphDef.library)
|
||||
return p != NULL ? *p : *reinterpret_cast<const ::opencv_tensorflow::FunctionDefLibrary*>(
|
||||
&::opencv_tensorflow::_FunctionDefLibrary_default_instance_);
|
||||
}
|
||||
inline ::opencv_tensorflow::FunctionDefLibrary* GraphDef::release_library() {
|
||||
// @@protoc_insertion_point(field_release:opencv_tensorflow.GraphDef.library)
|
||||
|
||||
::opencv_tensorflow::FunctionDefLibrary* temp = library_;
|
||||
if (GetArenaNoVirtual() != NULL) {
|
||||
temp = ::google::protobuf::internal::DuplicateIfNonNull(temp, NULL);
|
||||
}
|
||||
library_ = NULL;
|
||||
return temp;
|
||||
}
|
||||
inline ::opencv_tensorflow::FunctionDefLibrary* GraphDef::unsafe_arena_release_library() {
|
||||
// @@protoc_insertion_point(field_unsafe_arena_release:opencv_tensorflow.GraphDef.library)
|
||||
|
||||
::opencv_tensorflow::FunctionDefLibrary* temp = library_;
|
||||
library_ = NULL;
|
||||
return temp;
|
||||
}
|
||||
inline ::opencv_tensorflow::FunctionDefLibrary* GraphDef::mutable_library() {
|
||||
|
||||
if (library_ == NULL) {
|
||||
_slow_mutable_library();
|
||||
}
|
||||
// @@protoc_insertion_point(field_mutable:opencv_tensorflow.GraphDef.library)
|
||||
return library_;
|
||||
}
|
||||
inline void GraphDef::set_allocated_library(::opencv_tensorflow::FunctionDefLibrary* library) {
|
||||
::google::protobuf::Arena* message_arena = GetArenaNoVirtual();
|
||||
if (message_arena == NULL) {
|
||||
delete reinterpret_cast< ::google::protobuf::MessageLite*>(library_);
|
||||
}
|
||||
if (library) {
|
||||
::google::protobuf::Arena* submessage_arena =
|
||||
reinterpret_cast< ::google::protobuf::MessageLite*>(library)->GetArena();
|
||||
if (message_arena != submessage_arena) {
|
||||
library = ::google::protobuf::internal::GetOwnedMessage(
|
||||
message_arena, library, submessage_arena);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
}
|
||||
library_ = library;
|
||||
// @@protoc_insertion_point(field_set_allocated:opencv_tensorflow.GraphDef.library)
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
// NodeDef
|
||||
|
||||
// string name = 1;
|
||||
inline void NodeDef::clear_name() {
|
||||
name_.ClearToEmpty(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), GetArenaNoVirtual());
|
||||
}
|
||||
inline const ::std::string& NodeDef::name() const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.NodeDef.name)
|
||||
return name_.Get();
|
||||
}
|
||||
inline void NodeDef::set_name(const ::std::string& value) {
|
||||
|
||||
name_.Set(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value, GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.NodeDef.name)
|
||||
}
|
||||
#if LANG_CXX11
|
||||
inline void NodeDef::set_name(::std::string&& value) {
|
||||
|
||||
name_.Set(
|
||||
&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value), GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set_rvalue:opencv_tensorflow.NodeDef.name)
|
||||
}
|
||||
#endif
|
||||
inline void NodeDef::set_name(const char* value) {
|
||||
GOOGLE_DCHECK(value != NULL);
|
||||
|
||||
name_.Set(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value),
|
||||
GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set_char:opencv_tensorflow.NodeDef.name)
|
||||
}
|
||||
inline void NodeDef::set_name(const char* value,
|
||||
size_t size) {
|
||||
|
||||
name_.Set(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(
|
||||
reinterpret_cast<const char*>(value), size), GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set_pointer:opencv_tensorflow.NodeDef.name)
|
||||
}
|
||||
inline ::std::string* NodeDef::mutable_name() {
|
||||
|
||||
// @@protoc_insertion_point(field_mutable:opencv_tensorflow.NodeDef.name)
|
||||
return name_.Mutable(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), GetArenaNoVirtual());
|
||||
}
|
||||
inline ::std::string* NodeDef::release_name() {
|
||||
// @@protoc_insertion_point(field_release:opencv_tensorflow.NodeDef.name)
|
||||
|
||||
return name_.Release(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), GetArenaNoVirtual());
|
||||
}
|
||||
inline void NodeDef::set_allocated_name(::std::string* name) {
|
||||
if (name != NULL) {
|
||||
|
||||
} else {
|
||||
|
||||
}
|
||||
name_.SetAllocated(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), name,
|
||||
GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set_allocated:opencv_tensorflow.NodeDef.name)
|
||||
}
|
||||
inline ::std::string* NodeDef::unsafe_arena_release_name() {
|
||||
// @@protoc_insertion_point(field_unsafe_arena_release:opencv_tensorflow.NodeDef.name)
|
||||
GOOGLE_DCHECK(GetArenaNoVirtual() != NULL);
|
||||
|
||||
return name_.UnsafeArenaRelease(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
GetArenaNoVirtual());
|
||||
}
|
||||
inline void NodeDef::unsafe_arena_set_allocated_name(
|
||||
::std::string* name) {
|
||||
GOOGLE_DCHECK(GetArenaNoVirtual() != NULL);
|
||||
if (name != NULL) {
|
||||
|
||||
} else {
|
||||
|
||||
}
|
||||
name_.UnsafeArenaSetAllocated(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
name, GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_unsafe_arena_set_allocated:opencv_tensorflow.NodeDef.name)
|
||||
}
|
||||
|
||||
// string op = 2;
|
||||
inline void NodeDef::clear_op() {
|
||||
op_.ClearToEmpty(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), GetArenaNoVirtual());
|
||||
}
|
||||
inline const ::std::string& NodeDef::op() const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.NodeDef.op)
|
||||
return op_.Get();
|
||||
}
|
||||
inline void NodeDef::set_op(const ::std::string& value) {
|
||||
|
||||
op_.Set(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value, GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.NodeDef.op)
|
||||
}
|
||||
#if LANG_CXX11
|
||||
inline void NodeDef::set_op(::std::string&& value) {
|
||||
|
||||
op_.Set(
|
||||
&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value), GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set_rvalue:opencv_tensorflow.NodeDef.op)
|
||||
}
|
||||
#endif
|
||||
inline void NodeDef::set_op(const char* value) {
|
||||
GOOGLE_DCHECK(value != NULL);
|
||||
|
||||
op_.Set(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value),
|
||||
GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set_char:opencv_tensorflow.NodeDef.op)
|
||||
}
|
||||
inline void NodeDef::set_op(const char* value,
|
||||
size_t size) {
|
||||
|
||||
op_.Set(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(
|
||||
reinterpret_cast<const char*>(value), size), GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set_pointer:opencv_tensorflow.NodeDef.op)
|
||||
}
|
||||
inline ::std::string* NodeDef::mutable_op() {
|
||||
|
||||
// @@protoc_insertion_point(field_mutable:opencv_tensorflow.NodeDef.op)
|
||||
return op_.Mutable(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), GetArenaNoVirtual());
|
||||
}
|
||||
inline ::std::string* NodeDef::release_op() {
|
||||
// @@protoc_insertion_point(field_release:opencv_tensorflow.NodeDef.op)
|
||||
|
||||
return op_.Release(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), GetArenaNoVirtual());
|
||||
}
|
||||
inline void NodeDef::set_allocated_op(::std::string* op) {
|
||||
if (op != NULL) {
|
||||
|
||||
} else {
|
||||
|
||||
}
|
||||
op_.SetAllocated(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), op,
|
||||
GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set_allocated:opencv_tensorflow.NodeDef.op)
|
||||
}
|
||||
inline ::std::string* NodeDef::unsafe_arena_release_op() {
|
||||
// @@protoc_insertion_point(field_unsafe_arena_release:opencv_tensorflow.NodeDef.op)
|
||||
GOOGLE_DCHECK(GetArenaNoVirtual() != NULL);
|
||||
|
||||
return op_.UnsafeArenaRelease(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
GetArenaNoVirtual());
|
||||
}
|
||||
inline void NodeDef::unsafe_arena_set_allocated_op(
|
||||
::std::string* op) {
|
||||
GOOGLE_DCHECK(GetArenaNoVirtual() != NULL);
|
||||
if (op != NULL) {
|
||||
|
||||
} else {
|
||||
|
||||
}
|
||||
op_.UnsafeArenaSetAllocated(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
op, GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_unsafe_arena_set_allocated:opencv_tensorflow.NodeDef.op)
|
||||
}
|
||||
|
||||
// repeated string input = 3;
|
||||
inline int NodeDef::input_size() const {
|
||||
return input_.size();
|
||||
}
|
||||
inline void NodeDef::clear_input() {
|
||||
input_.Clear();
|
||||
}
|
||||
inline const ::std::string& NodeDef::input(int index) const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.NodeDef.input)
|
||||
return input_.Get(index);
|
||||
}
|
||||
inline ::std::string* NodeDef::mutable_input(int index) {
|
||||
// @@protoc_insertion_point(field_mutable:opencv_tensorflow.NodeDef.input)
|
||||
return input_.Mutable(index);
|
||||
}
|
||||
inline void NodeDef::set_input(int index, const ::std::string& value) {
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.NodeDef.input)
|
||||
input_.Mutable(index)->assign(value);
|
||||
}
|
||||
#if LANG_CXX11
|
||||
inline void NodeDef::set_input(int index, ::std::string&& value) {
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.NodeDef.input)
|
||||
input_.Mutable(index)->assign(std::move(value));
|
||||
}
|
||||
#endif
|
||||
inline void NodeDef::set_input(int index, const char* value) {
|
||||
GOOGLE_DCHECK(value != NULL);
|
||||
input_.Mutable(index)->assign(value);
|
||||
// @@protoc_insertion_point(field_set_char:opencv_tensorflow.NodeDef.input)
|
||||
}
|
||||
inline void NodeDef::set_input(int index, const char* value, size_t size) {
|
||||
input_.Mutable(index)->assign(
|
||||
reinterpret_cast<const char*>(value), size);
|
||||
// @@protoc_insertion_point(field_set_pointer:opencv_tensorflow.NodeDef.input)
|
||||
}
|
||||
inline ::std::string* NodeDef::add_input() {
|
||||
// @@protoc_insertion_point(field_add_mutable:opencv_tensorflow.NodeDef.input)
|
||||
return input_.Add();
|
||||
}
|
||||
inline void NodeDef::add_input(const ::std::string& value) {
|
||||
input_.Add()->assign(value);
|
||||
// @@protoc_insertion_point(field_add:opencv_tensorflow.NodeDef.input)
|
||||
}
|
||||
#if LANG_CXX11
|
||||
inline void NodeDef::add_input(::std::string&& value) {
|
||||
input_.Add(std::move(value));
|
||||
// @@protoc_insertion_point(field_add:opencv_tensorflow.NodeDef.input)
|
||||
}
|
||||
#endif
|
||||
inline void NodeDef::add_input(const char* value) {
|
||||
GOOGLE_DCHECK(value != NULL);
|
||||
input_.Add()->assign(value);
|
||||
// @@protoc_insertion_point(field_add_char:opencv_tensorflow.NodeDef.input)
|
||||
}
|
||||
inline void NodeDef::add_input(const char* value, size_t size) {
|
||||
input_.Add()->assign(reinterpret_cast<const char*>(value), size);
|
||||
// @@protoc_insertion_point(field_add_pointer:opencv_tensorflow.NodeDef.input)
|
||||
}
|
||||
inline const ::google::protobuf::RepeatedPtrField< ::std::string>&
|
||||
NodeDef::input() const {
|
||||
// @@protoc_insertion_point(field_list:opencv_tensorflow.NodeDef.input)
|
||||
return input_;
|
||||
}
|
||||
inline ::google::protobuf::RepeatedPtrField< ::std::string>*
|
||||
NodeDef::mutable_input() {
|
||||
// @@protoc_insertion_point(field_mutable_list:opencv_tensorflow.NodeDef.input)
|
||||
return &input_;
|
||||
}
|
||||
|
||||
// string device = 4;
|
||||
inline void NodeDef::clear_device() {
|
||||
device_.ClearToEmpty(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), GetArenaNoVirtual());
|
||||
}
|
||||
inline const ::std::string& NodeDef::device() const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.NodeDef.device)
|
||||
return device_.Get();
|
||||
}
|
||||
inline void NodeDef::set_device(const ::std::string& value) {
|
||||
|
||||
device_.Set(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value, GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.NodeDef.device)
|
||||
}
|
||||
#if LANG_CXX11
|
||||
inline void NodeDef::set_device(::std::string&& value) {
|
||||
|
||||
device_.Set(
|
||||
&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value), GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set_rvalue:opencv_tensorflow.NodeDef.device)
|
||||
}
|
||||
#endif
|
||||
inline void NodeDef::set_device(const char* value) {
|
||||
GOOGLE_DCHECK(value != NULL);
|
||||
|
||||
device_.Set(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value),
|
||||
GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set_char:opencv_tensorflow.NodeDef.device)
|
||||
}
|
||||
inline void NodeDef::set_device(const char* value,
|
||||
size_t size) {
|
||||
|
||||
device_.Set(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(
|
||||
reinterpret_cast<const char*>(value), size), GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set_pointer:opencv_tensorflow.NodeDef.device)
|
||||
}
|
||||
inline ::std::string* NodeDef::mutable_device() {
|
||||
|
||||
// @@protoc_insertion_point(field_mutable:opencv_tensorflow.NodeDef.device)
|
||||
return device_.Mutable(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), GetArenaNoVirtual());
|
||||
}
|
||||
inline ::std::string* NodeDef::release_device() {
|
||||
// @@protoc_insertion_point(field_release:opencv_tensorflow.NodeDef.device)
|
||||
|
||||
return device_.Release(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), GetArenaNoVirtual());
|
||||
}
|
||||
inline void NodeDef::set_allocated_device(::std::string* device) {
|
||||
if (device != NULL) {
|
||||
|
||||
} else {
|
||||
|
||||
}
|
||||
device_.SetAllocated(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), device,
|
||||
GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set_allocated:opencv_tensorflow.NodeDef.device)
|
||||
}
|
||||
inline ::std::string* NodeDef::unsafe_arena_release_device() {
|
||||
// @@protoc_insertion_point(field_unsafe_arena_release:opencv_tensorflow.NodeDef.device)
|
||||
GOOGLE_DCHECK(GetArenaNoVirtual() != NULL);
|
||||
|
||||
return device_.UnsafeArenaRelease(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
GetArenaNoVirtual());
|
||||
}
|
||||
inline void NodeDef::unsafe_arena_set_allocated_device(
|
||||
::std::string* device) {
|
||||
GOOGLE_DCHECK(GetArenaNoVirtual() != NULL);
|
||||
if (device != NULL) {
|
||||
|
||||
} else {
|
||||
|
||||
}
|
||||
device_.UnsafeArenaSetAllocated(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
device, GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_unsafe_arena_set_allocated:opencv_tensorflow.NodeDef.device)
|
||||
}
|
||||
|
||||
// map<string, .opencv_tensorflow.AttrValue> attr = 5;
|
||||
inline int NodeDef::attr_size() const {
|
||||
return attr_.size();
|
||||
}
|
||||
inline const ::google::protobuf::Map< ::std::string, ::opencv_tensorflow::AttrValue >&
|
||||
NodeDef::attr() const {
|
||||
// @@protoc_insertion_point(field_map:opencv_tensorflow.NodeDef.attr)
|
||||
return attr_.GetMap();
|
||||
}
|
||||
inline ::google::protobuf::Map< ::std::string, ::opencv_tensorflow::AttrValue >*
|
||||
NodeDef::mutable_attr() {
|
||||
// @@protoc_insertion_point(field_mutable_map:opencv_tensorflow.NodeDef.attr)
|
||||
return attr_.MutableMap();
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif // __GNUC__
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
|
||||
// @@protoc_insertion_point(namespace_scope)
|
||||
|
||||
} // namespace opencv_tensorflow
|
||||
|
||||
// @@protoc_insertion_point(global_scope)
|
||||
|
||||
#endif // PROTOBUF_graph_2eproto__INCLUDED
|
||||
2840
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/op_def.pb.cc
vendored
Normal file
2840
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/op_def.pb.cc
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2435
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/op_def.pb.h
vendored
Normal file
2435
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/op_def.pb.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1115
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/tensor.pb.cc
vendored
Normal file
1115
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/tensor.pb.cc
vendored
Normal file
File diff suppressed because it is too large
Load Diff
844
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/tensor.pb.h
vendored
Normal file
844
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/tensor.pb.h
vendored
Normal file
@@ -0,0 +1,844 @@
|
||||
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
// source: tensor.proto
|
||||
|
||||
#ifndef PROTOBUF_tensor_2eproto__INCLUDED
|
||||
#define PROTOBUF_tensor_2eproto__INCLUDED
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <google/protobuf/stubs/common.h>
|
||||
|
||||
#if GOOGLE_PROTOBUF_VERSION < 3005000
|
||||
#error This file was generated by a newer version of protoc which is
|
||||
#error incompatible with your Protocol Buffer headers. Please update
|
||||
#error your headers.
|
||||
#endif
|
||||
#if 3005001 < GOOGLE_PROTOBUF_MIN_PROTOC_VERSION
|
||||
#error This file was generated by an older version of protoc which is
|
||||
#error incompatible with your Protocol Buffer headers. Please
|
||||
#error regenerate this file with a newer version of protoc.
|
||||
#endif
|
||||
|
||||
#include <google/protobuf/io/coded_stream.h>
|
||||
#include <google/protobuf/arena.h>
|
||||
#include <google/protobuf/arenastring.h>
|
||||
#include <google/protobuf/generated_message_table_driven.h>
|
||||
#include <google/protobuf/generated_message_util.h>
|
||||
#include <google/protobuf/metadata.h>
|
||||
#include <google/protobuf/message.h>
|
||||
#include <google/protobuf/repeated_field.h> // IWYU pragma: export
|
||||
#include <google/protobuf/extension_set.h> // IWYU pragma: export
|
||||
#include <google/protobuf/unknown_field_set.h>
|
||||
#include "tensor_shape.pb.h"
|
||||
#include "types.pb.h"
|
||||
// @@protoc_insertion_point(includes)
|
||||
|
||||
namespace protobuf_tensor_2eproto {
|
||||
// Internal implementation detail -- do not use these members.
|
||||
struct TableStruct {
|
||||
static const ::google::protobuf::internal::ParseTableField entries[];
|
||||
static const ::google::protobuf::internal::AuxillaryParseTableField aux[];
|
||||
static const ::google::protobuf::internal::ParseTable schema[1];
|
||||
static const ::google::protobuf::internal::FieldMetadata field_metadata[];
|
||||
static const ::google::protobuf::internal::SerializationTable serialization_table[];
|
||||
static const ::google::protobuf::uint32 offsets[];
|
||||
};
|
||||
void AddDescriptors();
|
||||
void InitDefaultsTensorProtoImpl();
|
||||
void InitDefaultsTensorProto();
|
||||
inline void InitDefaults() {
|
||||
InitDefaultsTensorProto();
|
||||
}
|
||||
} // namespace protobuf_tensor_2eproto
|
||||
namespace opencv_tensorflow {
|
||||
class TensorProto;
|
||||
class TensorProtoDefaultTypeInternal;
|
||||
extern TensorProtoDefaultTypeInternal _TensorProto_default_instance_;
|
||||
} // namespace opencv_tensorflow
|
||||
namespace opencv_tensorflow {
|
||||
|
||||
// ===================================================================
|
||||
|
||||
class TensorProto : public ::google::protobuf::Message /* @@protoc_insertion_point(class_definition:opencv_tensorflow.TensorProto) */ {
|
||||
public:
|
||||
TensorProto();
|
||||
virtual ~TensorProto();
|
||||
|
||||
TensorProto(const TensorProto& from);
|
||||
|
||||
inline TensorProto& operator=(const TensorProto& from) {
|
||||
CopyFrom(from);
|
||||
return *this;
|
||||
}
|
||||
#if LANG_CXX11
|
||||
TensorProto(TensorProto&& from) noexcept
|
||||
: TensorProto() {
|
||||
*this = ::std::move(from);
|
||||
}
|
||||
|
||||
inline TensorProto& operator=(TensorProto&& from) noexcept {
|
||||
if (GetArenaNoVirtual() == from.GetArenaNoVirtual()) {
|
||||
if (this != &from) InternalSwap(&from);
|
||||
} else {
|
||||
CopyFrom(from);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
inline ::google::protobuf::Arena* GetArena() const PROTOBUF_FINAL {
|
||||
return GetArenaNoVirtual();
|
||||
}
|
||||
inline void* GetMaybeArenaPointer() const PROTOBUF_FINAL {
|
||||
return MaybeArenaPtr();
|
||||
}
|
||||
static const ::google::protobuf::Descriptor* descriptor();
|
||||
static const TensorProto& default_instance();
|
||||
|
||||
static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY
|
||||
static inline const TensorProto* internal_default_instance() {
|
||||
return reinterpret_cast<const TensorProto*>(
|
||||
&_TensorProto_default_instance_);
|
||||
}
|
||||
static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
|
||||
0;
|
||||
|
||||
void UnsafeArenaSwap(TensorProto* other);
|
||||
void Swap(TensorProto* other);
|
||||
friend void swap(TensorProto& a, TensorProto& b) {
|
||||
a.Swap(&b);
|
||||
}
|
||||
|
||||
// implements Message ----------------------------------------------
|
||||
|
||||
inline TensorProto* New() const PROTOBUF_FINAL { return New(NULL); }
|
||||
|
||||
TensorProto* New(::google::protobuf::Arena* arena) const PROTOBUF_FINAL;
|
||||
void CopyFrom(const ::google::protobuf::Message& from) PROTOBUF_FINAL;
|
||||
void MergeFrom(const ::google::protobuf::Message& from) PROTOBUF_FINAL;
|
||||
void CopyFrom(const TensorProto& from);
|
||||
void MergeFrom(const TensorProto& from);
|
||||
void Clear() PROTOBUF_FINAL;
|
||||
bool IsInitialized() const PROTOBUF_FINAL;
|
||||
|
||||
size_t ByteSizeLong() const PROTOBUF_FINAL;
|
||||
bool MergePartialFromCodedStream(
|
||||
::google::protobuf::io::CodedInputStream* input) PROTOBUF_FINAL;
|
||||
void SerializeWithCachedSizes(
|
||||
::google::protobuf::io::CodedOutputStream* output) const PROTOBUF_FINAL;
|
||||
::google::protobuf::uint8* InternalSerializeWithCachedSizesToArray(
|
||||
bool deterministic, ::google::protobuf::uint8* target) const PROTOBUF_FINAL;
|
||||
int GetCachedSize() const PROTOBUF_FINAL { return _cached_size_; }
|
||||
private:
|
||||
void SharedCtor();
|
||||
void SharedDtor();
|
||||
void SetCachedSize(int size) const PROTOBUF_FINAL;
|
||||
void InternalSwap(TensorProto* other);
|
||||
protected:
|
||||
explicit TensorProto(::google::protobuf::Arena* arena);
|
||||
private:
|
||||
static void ArenaDtor(void* object);
|
||||
inline void RegisterArenaDtor(::google::protobuf::Arena* arena);
|
||||
private:
|
||||
inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
|
||||
return _internal_metadata_.arena();
|
||||
}
|
||||
inline void* MaybeArenaPtr() const {
|
||||
return _internal_metadata_.raw_arena_ptr();
|
||||
}
|
||||
public:
|
||||
|
||||
::google::protobuf::Metadata GetMetadata() const PROTOBUF_FINAL;
|
||||
|
||||
// nested types ----------------------------------------------------
|
||||
|
||||
// accessors -------------------------------------------------------
|
||||
|
||||
// repeated float float_val = 5 [packed = true];
|
||||
int float_val_size() const;
|
||||
void clear_float_val();
|
||||
static const int kFloatValFieldNumber = 5;
|
||||
float float_val(int index) const;
|
||||
void set_float_val(int index, float value);
|
||||
void add_float_val(float value);
|
||||
const ::google::protobuf::RepeatedField< float >&
|
||||
float_val() const;
|
||||
::google::protobuf::RepeatedField< float >*
|
||||
mutable_float_val();
|
||||
|
||||
// repeated double double_val = 6 [packed = true];
|
||||
int double_val_size() const;
|
||||
void clear_double_val();
|
||||
static const int kDoubleValFieldNumber = 6;
|
||||
double double_val(int index) const;
|
||||
void set_double_val(int index, double value);
|
||||
void add_double_val(double value);
|
||||
const ::google::protobuf::RepeatedField< double >&
|
||||
double_val() const;
|
||||
::google::protobuf::RepeatedField< double >*
|
||||
mutable_double_val();
|
||||
|
||||
// repeated int32 int_val = 7 [packed = true];
|
||||
int int_val_size() const;
|
||||
void clear_int_val();
|
||||
static const int kIntValFieldNumber = 7;
|
||||
::google::protobuf::int32 int_val(int index) const;
|
||||
void set_int_val(int index, ::google::protobuf::int32 value);
|
||||
void add_int_val(::google::protobuf::int32 value);
|
||||
const ::google::protobuf::RepeatedField< ::google::protobuf::int32 >&
|
||||
int_val() const;
|
||||
::google::protobuf::RepeatedField< ::google::protobuf::int32 >*
|
||||
mutable_int_val();
|
||||
|
||||
// repeated bytes string_val = 8;
|
||||
int string_val_size() const;
|
||||
void clear_string_val();
|
||||
static const int kStringValFieldNumber = 8;
|
||||
const ::std::string& string_val(int index) const;
|
||||
::std::string* mutable_string_val(int index);
|
||||
void set_string_val(int index, const ::std::string& value);
|
||||
#if LANG_CXX11
|
||||
void set_string_val(int index, ::std::string&& value);
|
||||
#endif
|
||||
void set_string_val(int index, const char* value);
|
||||
void set_string_val(int index, const void* value, size_t size);
|
||||
::std::string* add_string_val();
|
||||
void add_string_val(const ::std::string& value);
|
||||
#if LANG_CXX11
|
||||
void add_string_val(::std::string&& value);
|
||||
#endif
|
||||
void add_string_val(const char* value);
|
||||
void add_string_val(const void* value, size_t size);
|
||||
const ::google::protobuf::RepeatedPtrField< ::std::string>& string_val() const;
|
||||
::google::protobuf::RepeatedPtrField< ::std::string>* mutable_string_val();
|
||||
|
||||
// repeated float scomplex_val = 9 [packed = true];
|
||||
int scomplex_val_size() const;
|
||||
void clear_scomplex_val();
|
||||
static const int kScomplexValFieldNumber = 9;
|
||||
float scomplex_val(int index) const;
|
||||
void set_scomplex_val(int index, float value);
|
||||
void add_scomplex_val(float value);
|
||||
const ::google::protobuf::RepeatedField< float >&
|
||||
scomplex_val() const;
|
||||
::google::protobuf::RepeatedField< float >*
|
||||
mutable_scomplex_val();
|
||||
|
||||
// repeated int64 int64_val = 10 [packed = true];
|
||||
int int64_val_size() const;
|
||||
void clear_int64_val();
|
||||
static const int kInt64ValFieldNumber = 10;
|
||||
::google::protobuf::int64 int64_val(int index) const;
|
||||
void set_int64_val(int index, ::google::protobuf::int64 value);
|
||||
void add_int64_val(::google::protobuf::int64 value);
|
||||
const ::google::protobuf::RepeatedField< ::google::protobuf::int64 >&
|
||||
int64_val() const;
|
||||
::google::protobuf::RepeatedField< ::google::protobuf::int64 >*
|
||||
mutable_int64_val();
|
||||
|
||||
// repeated bool bool_val = 11 [packed = true];
|
||||
int bool_val_size() const;
|
||||
void clear_bool_val();
|
||||
static const int kBoolValFieldNumber = 11;
|
||||
bool bool_val(int index) const;
|
||||
void set_bool_val(int index, bool value);
|
||||
void add_bool_val(bool value);
|
||||
const ::google::protobuf::RepeatedField< bool >&
|
||||
bool_val() const;
|
||||
::google::protobuf::RepeatedField< bool >*
|
||||
mutable_bool_val();
|
||||
|
||||
// repeated double dcomplex_val = 12 [packed = true];
|
||||
int dcomplex_val_size() const;
|
||||
void clear_dcomplex_val();
|
||||
static const int kDcomplexValFieldNumber = 12;
|
||||
double dcomplex_val(int index) const;
|
||||
void set_dcomplex_val(int index, double value);
|
||||
void add_dcomplex_val(double value);
|
||||
const ::google::protobuf::RepeatedField< double >&
|
||||
dcomplex_val() const;
|
||||
::google::protobuf::RepeatedField< double >*
|
||||
mutable_dcomplex_val();
|
||||
|
||||
// repeated int32 half_val = 13 [packed = true];
|
||||
int half_val_size() const;
|
||||
void clear_half_val();
|
||||
static const int kHalfValFieldNumber = 13;
|
||||
::google::protobuf::int32 half_val(int index) const;
|
||||
void set_half_val(int index, ::google::protobuf::int32 value);
|
||||
void add_half_val(::google::protobuf::int32 value);
|
||||
const ::google::protobuf::RepeatedField< ::google::protobuf::int32 >&
|
||||
half_val() const;
|
||||
::google::protobuf::RepeatedField< ::google::protobuf::int32 >*
|
||||
mutable_half_val();
|
||||
|
||||
// bytes tensor_content = 4;
|
||||
void clear_tensor_content();
|
||||
static const int kTensorContentFieldNumber = 4;
|
||||
const ::std::string& tensor_content() const;
|
||||
void set_tensor_content(const ::std::string& value);
|
||||
#if LANG_CXX11
|
||||
void set_tensor_content(::std::string&& value);
|
||||
#endif
|
||||
void set_tensor_content(const char* value);
|
||||
void set_tensor_content(const void* value, size_t size);
|
||||
::std::string* mutable_tensor_content();
|
||||
::std::string* release_tensor_content();
|
||||
void set_allocated_tensor_content(::std::string* tensor_content);
|
||||
PROTOBUF_RUNTIME_DEPRECATED("The unsafe_arena_ accessors for"
|
||||
" string fields are deprecated and will be removed in a"
|
||||
" future release.")
|
||||
::std::string* unsafe_arena_release_tensor_content();
|
||||
PROTOBUF_RUNTIME_DEPRECATED("The unsafe_arena_ accessors for"
|
||||
" string fields are deprecated and will be removed in a"
|
||||
" future release.")
|
||||
void unsafe_arena_set_allocated_tensor_content(
|
||||
::std::string* tensor_content);
|
||||
|
||||
// .opencv_tensorflow.TensorShapeProto tensor_shape = 2;
|
||||
bool has_tensor_shape() const;
|
||||
void clear_tensor_shape();
|
||||
static const int kTensorShapeFieldNumber = 2;
|
||||
private:
|
||||
void _slow_mutable_tensor_shape();
|
||||
public:
|
||||
const ::opencv_tensorflow::TensorShapeProto& tensor_shape() const;
|
||||
::opencv_tensorflow::TensorShapeProto* release_tensor_shape();
|
||||
::opencv_tensorflow::TensorShapeProto* mutable_tensor_shape();
|
||||
void set_allocated_tensor_shape(::opencv_tensorflow::TensorShapeProto* tensor_shape);
|
||||
void unsafe_arena_set_allocated_tensor_shape(
|
||||
::opencv_tensorflow::TensorShapeProto* tensor_shape);
|
||||
::opencv_tensorflow::TensorShapeProto* unsafe_arena_release_tensor_shape();
|
||||
|
||||
// .opencv_tensorflow.DataType dtype = 1;
|
||||
void clear_dtype();
|
||||
static const int kDtypeFieldNumber = 1;
|
||||
::opencv_tensorflow::DataType dtype() const;
|
||||
void set_dtype(::opencv_tensorflow::DataType value);
|
||||
|
||||
// int32 version_number = 3;
|
||||
void clear_version_number();
|
||||
static const int kVersionNumberFieldNumber = 3;
|
||||
::google::protobuf::int32 version_number() const;
|
||||
void set_version_number(::google::protobuf::int32 value);
|
||||
|
||||
// @@protoc_insertion_point(class_scope:opencv_tensorflow.TensorProto)
|
||||
private:
|
||||
|
||||
::google::protobuf::internal::InternalMetadataWithArena _internal_metadata_;
|
||||
template <typename T> friend class ::google::protobuf::Arena::InternalHelper;
|
||||
typedef void InternalArenaConstructable_;
|
||||
typedef void DestructorSkippable_;
|
||||
::google::protobuf::RepeatedField< float > float_val_;
|
||||
mutable int _float_val_cached_byte_size_;
|
||||
::google::protobuf::RepeatedField< double > double_val_;
|
||||
mutable int _double_val_cached_byte_size_;
|
||||
::google::protobuf::RepeatedField< ::google::protobuf::int32 > int_val_;
|
||||
mutable int _int_val_cached_byte_size_;
|
||||
::google::protobuf::RepeatedPtrField< ::std::string> string_val_;
|
||||
::google::protobuf::RepeatedField< float > scomplex_val_;
|
||||
mutable int _scomplex_val_cached_byte_size_;
|
||||
::google::protobuf::RepeatedField< ::google::protobuf::int64 > int64_val_;
|
||||
mutable int _int64_val_cached_byte_size_;
|
||||
::google::protobuf::RepeatedField< bool > bool_val_;
|
||||
mutable int _bool_val_cached_byte_size_;
|
||||
::google::protobuf::RepeatedField< double > dcomplex_val_;
|
||||
mutable int _dcomplex_val_cached_byte_size_;
|
||||
::google::protobuf::RepeatedField< ::google::protobuf::int32 > half_val_;
|
||||
mutable int _half_val_cached_byte_size_;
|
||||
::google::protobuf::internal::ArenaStringPtr tensor_content_;
|
||||
::opencv_tensorflow::TensorShapeProto* tensor_shape_;
|
||||
int dtype_;
|
||||
::google::protobuf::int32 version_number_;
|
||||
mutable int _cached_size_;
|
||||
friend struct ::protobuf_tensor_2eproto::TableStruct;
|
||||
friend void ::protobuf_tensor_2eproto::InitDefaultsTensorProtoImpl();
|
||||
};
|
||||
// ===================================================================
|
||||
|
||||
|
||||
// ===================================================================
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
|
||||
#endif // __GNUC__
|
||||
// TensorProto
|
||||
|
||||
// .opencv_tensorflow.DataType dtype = 1;
|
||||
inline void TensorProto::clear_dtype() {
|
||||
dtype_ = 0;
|
||||
}
|
||||
inline ::opencv_tensorflow::DataType TensorProto::dtype() const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.TensorProto.dtype)
|
||||
return static_cast< ::opencv_tensorflow::DataType >(dtype_);
|
||||
}
|
||||
inline void TensorProto::set_dtype(::opencv_tensorflow::DataType value) {
|
||||
|
||||
dtype_ = value;
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.TensorProto.dtype)
|
||||
}
|
||||
|
||||
// .opencv_tensorflow.TensorShapeProto tensor_shape = 2;
|
||||
inline bool TensorProto::has_tensor_shape() const {
|
||||
return this != internal_default_instance() && tensor_shape_ != NULL;
|
||||
}
|
||||
inline const ::opencv_tensorflow::TensorShapeProto& TensorProto::tensor_shape() const {
|
||||
const ::opencv_tensorflow::TensorShapeProto* p = tensor_shape_;
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.TensorProto.tensor_shape)
|
||||
return p != NULL ? *p : *reinterpret_cast<const ::opencv_tensorflow::TensorShapeProto*>(
|
||||
&::opencv_tensorflow::_TensorShapeProto_default_instance_);
|
||||
}
|
||||
inline ::opencv_tensorflow::TensorShapeProto* TensorProto::release_tensor_shape() {
|
||||
// @@protoc_insertion_point(field_release:opencv_tensorflow.TensorProto.tensor_shape)
|
||||
|
||||
::opencv_tensorflow::TensorShapeProto* temp = tensor_shape_;
|
||||
if (GetArenaNoVirtual() != NULL) {
|
||||
temp = ::google::protobuf::internal::DuplicateIfNonNull(temp, NULL);
|
||||
}
|
||||
tensor_shape_ = NULL;
|
||||
return temp;
|
||||
}
|
||||
inline ::opencv_tensorflow::TensorShapeProto* TensorProto::unsafe_arena_release_tensor_shape() {
|
||||
// @@protoc_insertion_point(field_unsafe_arena_release:opencv_tensorflow.TensorProto.tensor_shape)
|
||||
|
||||
::opencv_tensorflow::TensorShapeProto* temp = tensor_shape_;
|
||||
tensor_shape_ = NULL;
|
||||
return temp;
|
||||
}
|
||||
inline ::opencv_tensorflow::TensorShapeProto* TensorProto::mutable_tensor_shape() {
|
||||
|
||||
if (tensor_shape_ == NULL) {
|
||||
_slow_mutable_tensor_shape();
|
||||
}
|
||||
// @@protoc_insertion_point(field_mutable:opencv_tensorflow.TensorProto.tensor_shape)
|
||||
return tensor_shape_;
|
||||
}
|
||||
inline void TensorProto::set_allocated_tensor_shape(::opencv_tensorflow::TensorShapeProto* tensor_shape) {
|
||||
::google::protobuf::Arena* message_arena = GetArenaNoVirtual();
|
||||
if (message_arena == NULL) {
|
||||
delete reinterpret_cast< ::google::protobuf::MessageLite*>(tensor_shape_);
|
||||
}
|
||||
if (tensor_shape) {
|
||||
::google::protobuf::Arena* submessage_arena =
|
||||
reinterpret_cast< ::google::protobuf::MessageLite*>(tensor_shape)->GetArena();
|
||||
if (message_arena != submessage_arena) {
|
||||
tensor_shape = ::google::protobuf::internal::GetOwnedMessage(
|
||||
message_arena, tensor_shape, submessage_arena);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
}
|
||||
tensor_shape_ = tensor_shape;
|
||||
// @@protoc_insertion_point(field_set_allocated:opencv_tensorflow.TensorProto.tensor_shape)
|
||||
}
|
||||
|
||||
// int32 version_number = 3;
|
||||
inline void TensorProto::clear_version_number() {
|
||||
version_number_ = 0;
|
||||
}
|
||||
inline ::google::protobuf::int32 TensorProto::version_number() const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.TensorProto.version_number)
|
||||
return version_number_;
|
||||
}
|
||||
inline void TensorProto::set_version_number(::google::protobuf::int32 value) {
|
||||
|
||||
version_number_ = value;
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.TensorProto.version_number)
|
||||
}
|
||||
|
||||
// bytes tensor_content = 4;
|
||||
inline void TensorProto::clear_tensor_content() {
|
||||
tensor_content_.ClearToEmpty(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), GetArenaNoVirtual());
|
||||
}
|
||||
inline const ::std::string& TensorProto::tensor_content() const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.TensorProto.tensor_content)
|
||||
return tensor_content_.Get();
|
||||
}
|
||||
inline void TensorProto::set_tensor_content(const ::std::string& value) {
|
||||
|
||||
tensor_content_.Set(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value, GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.TensorProto.tensor_content)
|
||||
}
|
||||
#if LANG_CXX11
|
||||
inline void TensorProto::set_tensor_content(::std::string&& value) {
|
||||
|
||||
tensor_content_.Set(
|
||||
&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value), GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set_rvalue:opencv_tensorflow.TensorProto.tensor_content)
|
||||
}
|
||||
#endif
|
||||
inline void TensorProto::set_tensor_content(const char* value) {
|
||||
GOOGLE_DCHECK(value != NULL);
|
||||
|
||||
tensor_content_.Set(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value),
|
||||
GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set_char:opencv_tensorflow.TensorProto.tensor_content)
|
||||
}
|
||||
inline void TensorProto::set_tensor_content(const void* value,
|
||||
size_t size) {
|
||||
|
||||
tensor_content_.Set(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(
|
||||
reinterpret_cast<const char*>(value), size), GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set_pointer:opencv_tensorflow.TensorProto.tensor_content)
|
||||
}
|
||||
inline ::std::string* TensorProto::mutable_tensor_content() {
|
||||
|
||||
// @@protoc_insertion_point(field_mutable:opencv_tensorflow.TensorProto.tensor_content)
|
||||
return tensor_content_.Mutable(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), GetArenaNoVirtual());
|
||||
}
|
||||
inline ::std::string* TensorProto::release_tensor_content() {
|
||||
// @@protoc_insertion_point(field_release:opencv_tensorflow.TensorProto.tensor_content)
|
||||
|
||||
return tensor_content_.Release(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), GetArenaNoVirtual());
|
||||
}
|
||||
inline void TensorProto::set_allocated_tensor_content(::std::string* tensor_content) {
|
||||
if (tensor_content != NULL) {
|
||||
|
||||
} else {
|
||||
|
||||
}
|
||||
tensor_content_.SetAllocated(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), tensor_content,
|
||||
GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set_allocated:opencv_tensorflow.TensorProto.tensor_content)
|
||||
}
|
||||
inline ::std::string* TensorProto::unsafe_arena_release_tensor_content() {
|
||||
// @@protoc_insertion_point(field_unsafe_arena_release:opencv_tensorflow.TensorProto.tensor_content)
|
||||
GOOGLE_DCHECK(GetArenaNoVirtual() != NULL);
|
||||
|
||||
return tensor_content_.UnsafeArenaRelease(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
GetArenaNoVirtual());
|
||||
}
|
||||
inline void TensorProto::unsafe_arena_set_allocated_tensor_content(
|
||||
::std::string* tensor_content) {
|
||||
GOOGLE_DCHECK(GetArenaNoVirtual() != NULL);
|
||||
if (tensor_content != NULL) {
|
||||
|
||||
} else {
|
||||
|
||||
}
|
||||
tensor_content_.UnsafeArenaSetAllocated(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
tensor_content, GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_unsafe_arena_set_allocated:opencv_tensorflow.TensorProto.tensor_content)
|
||||
}
|
||||
|
||||
// repeated int32 half_val = 13 [packed = true];
|
||||
inline int TensorProto::half_val_size() const {
|
||||
return half_val_.size();
|
||||
}
|
||||
inline void TensorProto::clear_half_val() {
|
||||
half_val_.Clear();
|
||||
}
|
||||
inline ::google::protobuf::int32 TensorProto::half_val(int index) const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.TensorProto.half_val)
|
||||
return half_val_.Get(index);
|
||||
}
|
||||
inline void TensorProto::set_half_val(int index, ::google::protobuf::int32 value) {
|
||||
half_val_.Set(index, value);
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.TensorProto.half_val)
|
||||
}
|
||||
inline void TensorProto::add_half_val(::google::protobuf::int32 value) {
|
||||
half_val_.Add(value);
|
||||
// @@protoc_insertion_point(field_add:opencv_tensorflow.TensorProto.half_val)
|
||||
}
|
||||
inline const ::google::protobuf::RepeatedField< ::google::protobuf::int32 >&
|
||||
TensorProto::half_val() const {
|
||||
// @@protoc_insertion_point(field_list:opencv_tensorflow.TensorProto.half_val)
|
||||
return half_val_;
|
||||
}
|
||||
inline ::google::protobuf::RepeatedField< ::google::protobuf::int32 >*
|
||||
TensorProto::mutable_half_val() {
|
||||
// @@protoc_insertion_point(field_mutable_list:opencv_tensorflow.TensorProto.half_val)
|
||||
return &half_val_;
|
||||
}
|
||||
|
||||
// repeated float float_val = 5 [packed = true];
|
||||
inline int TensorProto::float_val_size() const {
|
||||
return float_val_.size();
|
||||
}
|
||||
inline void TensorProto::clear_float_val() {
|
||||
float_val_.Clear();
|
||||
}
|
||||
inline float TensorProto::float_val(int index) const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.TensorProto.float_val)
|
||||
return float_val_.Get(index);
|
||||
}
|
||||
inline void TensorProto::set_float_val(int index, float value) {
|
||||
float_val_.Set(index, value);
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.TensorProto.float_val)
|
||||
}
|
||||
inline void TensorProto::add_float_val(float value) {
|
||||
float_val_.Add(value);
|
||||
// @@protoc_insertion_point(field_add:opencv_tensorflow.TensorProto.float_val)
|
||||
}
|
||||
inline const ::google::protobuf::RepeatedField< float >&
|
||||
TensorProto::float_val() const {
|
||||
// @@protoc_insertion_point(field_list:opencv_tensorflow.TensorProto.float_val)
|
||||
return float_val_;
|
||||
}
|
||||
inline ::google::protobuf::RepeatedField< float >*
|
||||
TensorProto::mutable_float_val() {
|
||||
// @@protoc_insertion_point(field_mutable_list:opencv_tensorflow.TensorProto.float_val)
|
||||
return &float_val_;
|
||||
}
|
||||
|
||||
// repeated double double_val = 6 [packed = true];
|
||||
inline int TensorProto::double_val_size() const {
|
||||
return double_val_.size();
|
||||
}
|
||||
inline void TensorProto::clear_double_val() {
|
||||
double_val_.Clear();
|
||||
}
|
||||
inline double TensorProto::double_val(int index) const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.TensorProto.double_val)
|
||||
return double_val_.Get(index);
|
||||
}
|
||||
inline void TensorProto::set_double_val(int index, double value) {
|
||||
double_val_.Set(index, value);
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.TensorProto.double_val)
|
||||
}
|
||||
inline void TensorProto::add_double_val(double value) {
|
||||
double_val_.Add(value);
|
||||
// @@protoc_insertion_point(field_add:opencv_tensorflow.TensorProto.double_val)
|
||||
}
|
||||
inline const ::google::protobuf::RepeatedField< double >&
|
||||
TensorProto::double_val() const {
|
||||
// @@protoc_insertion_point(field_list:opencv_tensorflow.TensorProto.double_val)
|
||||
return double_val_;
|
||||
}
|
||||
inline ::google::protobuf::RepeatedField< double >*
|
||||
TensorProto::mutable_double_val() {
|
||||
// @@protoc_insertion_point(field_mutable_list:opencv_tensorflow.TensorProto.double_val)
|
||||
return &double_val_;
|
||||
}
|
||||
|
||||
// repeated int32 int_val = 7 [packed = true];
|
||||
inline int TensorProto::int_val_size() const {
|
||||
return int_val_.size();
|
||||
}
|
||||
inline void TensorProto::clear_int_val() {
|
||||
int_val_.Clear();
|
||||
}
|
||||
inline ::google::protobuf::int32 TensorProto::int_val(int index) const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.TensorProto.int_val)
|
||||
return int_val_.Get(index);
|
||||
}
|
||||
inline void TensorProto::set_int_val(int index, ::google::protobuf::int32 value) {
|
||||
int_val_.Set(index, value);
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.TensorProto.int_val)
|
||||
}
|
||||
inline void TensorProto::add_int_val(::google::protobuf::int32 value) {
|
||||
int_val_.Add(value);
|
||||
// @@protoc_insertion_point(field_add:opencv_tensorflow.TensorProto.int_val)
|
||||
}
|
||||
inline const ::google::protobuf::RepeatedField< ::google::protobuf::int32 >&
|
||||
TensorProto::int_val() const {
|
||||
// @@protoc_insertion_point(field_list:opencv_tensorflow.TensorProto.int_val)
|
||||
return int_val_;
|
||||
}
|
||||
inline ::google::protobuf::RepeatedField< ::google::protobuf::int32 >*
|
||||
TensorProto::mutable_int_val() {
|
||||
// @@protoc_insertion_point(field_mutable_list:opencv_tensorflow.TensorProto.int_val)
|
||||
return &int_val_;
|
||||
}
|
||||
|
||||
// repeated bytes string_val = 8;
|
||||
inline int TensorProto::string_val_size() const {
|
||||
return string_val_.size();
|
||||
}
|
||||
inline void TensorProto::clear_string_val() {
|
||||
string_val_.Clear();
|
||||
}
|
||||
inline const ::std::string& TensorProto::string_val(int index) const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.TensorProto.string_val)
|
||||
return string_val_.Get(index);
|
||||
}
|
||||
inline ::std::string* TensorProto::mutable_string_val(int index) {
|
||||
// @@protoc_insertion_point(field_mutable:opencv_tensorflow.TensorProto.string_val)
|
||||
return string_val_.Mutable(index);
|
||||
}
|
||||
inline void TensorProto::set_string_val(int index, const ::std::string& value) {
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.TensorProto.string_val)
|
||||
string_val_.Mutable(index)->assign(value);
|
||||
}
|
||||
#if LANG_CXX11
|
||||
inline void TensorProto::set_string_val(int index, ::std::string&& value) {
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.TensorProto.string_val)
|
||||
string_val_.Mutable(index)->assign(std::move(value));
|
||||
}
|
||||
#endif
|
||||
inline void TensorProto::set_string_val(int index, const char* value) {
|
||||
GOOGLE_DCHECK(value != NULL);
|
||||
string_val_.Mutable(index)->assign(value);
|
||||
// @@protoc_insertion_point(field_set_char:opencv_tensorflow.TensorProto.string_val)
|
||||
}
|
||||
inline void TensorProto::set_string_val(int index, const void* value, size_t size) {
|
||||
string_val_.Mutable(index)->assign(
|
||||
reinterpret_cast<const char*>(value), size);
|
||||
// @@protoc_insertion_point(field_set_pointer:opencv_tensorflow.TensorProto.string_val)
|
||||
}
|
||||
inline ::std::string* TensorProto::add_string_val() {
|
||||
// @@protoc_insertion_point(field_add_mutable:opencv_tensorflow.TensorProto.string_val)
|
||||
return string_val_.Add();
|
||||
}
|
||||
inline void TensorProto::add_string_val(const ::std::string& value) {
|
||||
string_val_.Add()->assign(value);
|
||||
// @@protoc_insertion_point(field_add:opencv_tensorflow.TensorProto.string_val)
|
||||
}
|
||||
#if LANG_CXX11
|
||||
inline void TensorProto::add_string_val(::std::string&& value) {
|
||||
string_val_.Add(std::move(value));
|
||||
// @@protoc_insertion_point(field_add:opencv_tensorflow.TensorProto.string_val)
|
||||
}
|
||||
#endif
|
||||
inline void TensorProto::add_string_val(const char* value) {
|
||||
GOOGLE_DCHECK(value != NULL);
|
||||
string_val_.Add()->assign(value);
|
||||
// @@protoc_insertion_point(field_add_char:opencv_tensorflow.TensorProto.string_val)
|
||||
}
|
||||
inline void TensorProto::add_string_val(const void* value, size_t size) {
|
||||
string_val_.Add()->assign(reinterpret_cast<const char*>(value), size);
|
||||
// @@protoc_insertion_point(field_add_pointer:opencv_tensorflow.TensorProto.string_val)
|
||||
}
|
||||
inline const ::google::protobuf::RepeatedPtrField< ::std::string>&
|
||||
TensorProto::string_val() const {
|
||||
// @@protoc_insertion_point(field_list:opencv_tensorflow.TensorProto.string_val)
|
||||
return string_val_;
|
||||
}
|
||||
inline ::google::protobuf::RepeatedPtrField< ::std::string>*
|
||||
TensorProto::mutable_string_val() {
|
||||
// @@protoc_insertion_point(field_mutable_list:opencv_tensorflow.TensorProto.string_val)
|
||||
return &string_val_;
|
||||
}
|
||||
|
||||
// repeated float scomplex_val = 9 [packed = true];
|
||||
inline int TensorProto::scomplex_val_size() const {
|
||||
return scomplex_val_.size();
|
||||
}
|
||||
inline void TensorProto::clear_scomplex_val() {
|
||||
scomplex_val_.Clear();
|
||||
}
|
||||
inline float TensorProto::scomplex_val(int index) const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.TensorProto.scomplex_val)
|
||||
return scomplex_val_.Get(index);
|
||||
}
|
||||
inline void TensorProto::set_scomplex_val(int index, float value) {
|
||||
scomplex_val_.Set(index, value);
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.TensorProto.scomplex_val)
|
||||
}
|
||||
inline void TensorProto::add_scomplex_val(float value) {
|
||||
scomplex_val_.Add(value);
|
||||
// @@protoc_insertion_point(field_add:opencv_tensorflow.TensorProto.scomplex_val)
|
||||
}
|
||||
inline const ::google::protobuf::RepeatedField< float >&
|
||||
TensorProto::scomplex_val() const {
|
||||
// @@protoc_insertion_point(field_list:opencv_tensorflow.TensorProto.scomplex_val)
|
||||
return scomplex_val_;
|
||||
}
|
||||
inline ::google::protobuf::RepeatedField< float >*
|
||||
TensorProto::mutable_scomplex_val() {
|
||||
// @@protoc_insertion_point(field_mutable_list:opencv_tensorflow.TensorProto.scomplex_val)
|
||||
return &scomplex_val_;
|
||||
}
|
||||
|
||||
// repeated int64 int64_val = 10 [packed = true];
|
||||
inline int TensorProto::int64_val_size() const {
|
||||
return int64_val_.size();
|
||||
}
|
||||
inline void TensorProto::clear_int64_val() {
|
||||
int64_val_.Clear();
|
||||
}
|
||||
inline ::google::protobuf::int64 TensorProto::int64_val(int index) const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.TensorProto.int64_val)
|
||||
return int64_val_.Get(index);
|
||||
}
|
||||
inline void TensorProto::set_int64_val(int index, ::google::protobuf::int64 value) {
|
||||
int64_val_.Set(index, value);
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.TensorProto.int64_val)
|
||||
}
|
||||
inline void TensorProto::add_int64_val(::google::protobuf::int64 value) {
|
||||
int64_val_.Add(value);
|
||||
// @@protoc_insertion_point(field_add:opencv_tensorflow.TensorProto.int64_val)
|
||||
}
|
||||
inline const ::google::protobuf::RepeatedField< ::google::protobuf::int64 >&
|
||||
TensorProto::int64_val() const {
|
||||
// @@protoc_insertion_point(field_list:opencv_tensorflow.TensorProto.int64_val)
|
||||
return int64_val_;
|
||||
}
|
||||
inline ::google::protobuf::RepeatedField< ::google::protobuf::int64 >*
|
||||
TensorProto::mutable_int64_val() {
|
||||
// @@protoc_insertion_point(field_mutable_list:opencv_tensorflow.TensorProto.int64_val)
|
||||
return &int64_val_;
|
||||
}
|
||||
|
||||
// repeated bool bool_val = 11 [packed = true];
|
||||
inline int TensorProto::bool_val_size() const {
|
||||
return bool_val_.size();
|
||||
}
|
||||
inline void TensorProto::clear_bool_val() {
|
||||
bool_val_.Clear();
|
||||
}
|
||||
inline bool TensorProto::bool_val(int index) const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.TensorProto.bool_val)
|
||||
return bool_val_.Get(index);
|
||||
}
|
||||
inline void TensorProto::set_bool_val(int index, bool value) {
|
||||
bool_val_.Set(index, value);
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.TensorProto.bool_val)
|
||||
}
|
||||
inline void TensorProto::add_bool_val(bool value) {
|
||||
bool_val_.Add(value);
|
||||
// @@protoc_insertion_point(field_add:opencv_tensorflow.TensorProto.bool_val)
|
||||
}
|
||||
inline const ::google::protobuf::RepeatedField< bool >&
|
||||
TensorProto::bool_val() const {
|
||||
// @@protoc_insertion_point(field_list:opencv_tensorflow.TensorProto.bool_val)
|
||||
return bool_val_;
|
||||
}
|
||||
inline ::google::protobuf::RepeatedField< bool >*
|
||||
TensorProto::mutable_bool_val() {
|
||||
// @@protoc_insertion_point(field_mutable_list:opencv_tensorflow.TensorProto.bool_val)
|
||||
return &bool_val_;
|
||||
}
|
||||
|
||||
// repeated double dcomplex_val = 12 [packed = true];
|
||||
inline int TensorProto::dcomplex_val_size() const {
|
||||
return dcomplex_val_.size();
|
||||
}
|
||||
inline void TensorProto::clear_dcomplex_val() {
|
||||
dcomplex_val_.Clear();
|
||||
}
|
||||
inline double TensorProto::dcomplex_val(int index) const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.TensorProto.dcomplex_val)
|
||||
return dcomplex_val_.Get(index);
|
||||
}
|
||||
inline void TensorProto::set_dcomplex_val(int index, double value) {
|
||||
dcomplex_val_.Set(index, value);
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.TensorProto.dcomplex_val)
|
||||
}
|
||||
inline void TensorProto::add_dcomplex_val(double value) {
|
||||
dcomplex_val_.Add(value);
|
||||
// @@protoc_insertion_point(field_add:opencv_tensorflow.TensorProto.dcomplex_val)
|
||||
}
|
||||
inline const ::google::protobuf::RepeatedField< double >&
|
||||
TensorProto::dcomplex_val() const {
|
||||
// @@protoc_insertion_point(field_list:opencv_tensorflow.TensorProto.dcomplex_val)
|
||||
return dcomplex_val_;
|
||||
}
|
||||
inline ::google::protobuf::RepeatedField< double >*
|
||||
TensorProto::mutable_dcomplex_val() {
|
||||
// @@protoc_insertion_point(field_mutable_list:opencv_tensorflow.TensorProto.dcomplex_val)
|
||||
return &dcomplex_val_;
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif // __GNUC__
|
||||
|
||||
// @@protoc_insertion_point(namespace_scope)
|
||||
|
||||
} // namespace opencv_tensorflow
|
||||
|
||||
// @@protoc_insertion_point(global_scope)
|
||||
|
||||
#endif // PROTOBUF_tensor_2eproto__INCLUDED
|
||||
783
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/tensor_shape.pb.cc
vendored
Normal file
783
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/tensor_shape.pb.cc
vendored
Normal file
@@ -0,0 +1,783 @@
|
||||
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
// source: tensor_shape.proto
|
||||
|
||||
#include "tensor_shape.pb.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include <google/protobuf/stubs/common.h>
|
||||
#include <google/protobuf/stubs/port.h>
|
||||
#include <google/protobuf/stubs/once.h>
|
||||
#include <google/protobuf/io/coded_stream.h>
|
||||
#include <google/protobuf/wire_format_lite_inl.h>
|
||||
#include <google/protobuf/descriptor.h>
|
||||
#include <google/protobuf/generated_message_reflection.h>
|
||||
#include <google/protobuf/reflection_ops.h>
|
||||
#include <google/protobuf/wire_format.h>
|
||||
// This is a temporary google only hack
|
||||
#ifdef GOOGLE_PROTOBUF_ENFORCE_UNIQUENESS
|
||||
#include "third_party/protobuf/version.h"
|
||||
#endif
|
||||
// @@protoc_insertion_point(includes)
|
||||
namespace opencv_tensorflow {
|
||||
class TensorShapeProto_DimDefaultTypeInternal {
|
||||
public:
|
||||
::google::protobuf::internal::ExplicitlyConstructed<TensorShapeProto_Dim>
|
||||
_instance;
|
||||
} _TensorShapeProto_Dim_default_instance_;
|
||||
class TensorShapeProtoDefaultTypeInternal {
|
||||
public:
|
||||
::google::protobuf::internal::ExplicitlyConstructed<TensorShapeProto>
|
||||
_instance;
|
||||
} _TensorShapeProto_default_instance_;
|
||||
} // namespace opencv_tensorflow
|
||||
namespace protobuf_tensor_5fshape_2eproto {
|
||||
void InitDefaultsTensorShapeProto_DimImpl() {
|
||||
GOOGLE_PROTOBUF_VERIFY_VERSION;
|
||||
|
||||
#ifdef GOOGLE_PROTOBUF_ENFORCE_UNIQUENESS
|
||||
::google::protobuf::internal::InitProtobufDefaultsForceUnique();
|
||||
#else
|
||||
::google::protobuf::internal::InitProtobufDefaults();
|
||||
#endif // GOOGLE_PROTOBUF_ENFORCE_UNIQUENESS
|
||||
{
|
||||
void* ptr = &::opencv_tensorflow::_TensorShapeProto_Dim_default_instance_;
|
||||
new (ptr) ::opencv_tensorflow::TensorShapeProto_Dim();
|
||||
::google::protobuf::internal::OnShutdownDestroyMessage(ptr);
|
||||
}
|
||||
::opencv_tensorflow::TensorShapeProto_Dim::InitAsDefaultInstance();
|
||||
}
|
||||
|
||||
void InitDefaultsTensorShapeProto_Dim() {
|
||||
static GOOGLE_PROTOBUF_DECLARE_ONCE(once);
|
||||
::google::protobuf::GoogleOnceInit(&once, &InitDefaultsTensorShapeProto_DimImpl);
|
||||
}
|
||||
|
||||
void InitDefaultsTensorShapeProtoImpl() {
|
||||
GOOGLE_PROTOBUF_VERIFY_VERSION;
|
||||
|
||||
#ifdef GOOGLE_PROTOBUF_ENFORCE_UNIQUENESS
|
||||
::google::protobuf::internal::InitProtobufDefaultsForceUnique();
|
||||
#else
|
||||
::google::protobuf::internal::InitProtobufDefaults();
|
||||
#endif // GOOGLE_PROTOBUF_ENFORCE_UNIQUENESS
|
||||
protobuf_tensor_5fshape_2eproto::InitDefaultsTensorShapeProto_Dim();
|
||||
{
|
||||
void* ptr = &::opencv_tensorflow::_TensorShapeProto_default_instance_;
|
||||
new (ptr) ::opencv_tensorflow::TensorShapeProto();
|
||||
::google::protobuf::internal::OnShutdownDestroyMessage(ptr);
|
||||
}
|
||||
::opencv_tensorflow::TensorShapeProto::InitAsDefaultInstance();
|
||||
}
|
||||
|
||||
void InitDefaultsTensorShapeProto() {
|
||||
static GOOGLE_PROTOBUF_DECLARE_ONCE(once);
|
||||
::google::protobuf::GoogleOnceInit(&once, &InitDefaultsTensorShapeProtoImpl);
|
||||
}
|
||||
|
||||
::google::protobuf::Metadata file_level_metadata[2];
|
||||
|
||||
const ::google::protobuf::uint32 TableStruct::offsets[] GOOGLE_PROTOBUF_ATTRIBUTE_SECTION_VARIABLE(protodesc_cold) = {
|
||||
~0u, // no _has_bits_
|
||||
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::opencv_tensorflow::TensorShapeProto_Dim, _internal_metadata_),
|
||||
~0u, // no _extensions_
|
||||
~0u, // no _oneof_case_
|
||||
~0u, // no _weak_field_map_
|
||||
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::opencv_tensorflow::TensorShapeProto_Dim, size_),
|
||||
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::opencv_tensorflow::TensorShapeProto_Dim, name_),
|
||||
~0u, // no _has_bits_
|
||||
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::opencv_tensorflow::TensorShapeProto, _internal_metadata_),
|
||||
~0u, // no _extensions_
|
||||
~0u, // no _oneof_case_
|
||||
~0u, // no _weak_field_map_
|
||||
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::opencv_tensorflow::TensorShapeProto, dim_),
|
||||
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::opencv_tensorflow::TensorShapeProto, unknown_rank_),
|
||||
};
|
||||
static const ::google::protobuf::internal::MigrationSchema schemas[] GOOGLE_PROTOBUF_ATTRIBUTE_SECTION_VARIABLE(protodesc_cold) = {
|
||||
{ 0, -1, sizeof(::opencv_tensorflow::TensorShapeProto_Dim)},
|
||||
{ 7, -1, sizeof(::opencv_tensorflow::TensorShapeProto)},
|
||||
};
|
||||
|
||||
static ::google::protobuf::Message const * const file_default_instances[] = {
|
||||
reinterpret_cast<const ::google::protobuf::Message*>(&::opencv_tensorflow::_TensorShapeProto_Dim_default_instance_),
|
||||
reinterpret_cast<const ::google::protobuf::Message*>(&::opencv_tensorflow::_TensorShapeProto_default_instance_),
|
||||
};
|
||||
|
||||
void protobuf_AssignDescriptors() {
|
||||
AddDescriptors();
|
||||
::google::protobuf::MessageFactory* factory = NULL;
|
||||
AssignDescriptors(
|
||||
"tensor_shape.proto", schemas, file_default_instances, TableStruct::offsets, factory,
|
||||
file_level_metadata, NULL, NULL);
|
||||
}
|
||||
|
||||
void protobuf_AssignDescriptorsOnce() {
|
||||
static GOOGLE_PROTOBUF_DECLARE_ONCE(once);
|
||||
::google::protobuf::GoogleOnceInit(&once, &protobuf_AssignDescriptors);
|
||||
}
|
||||
|
||||
void protobuf_RegisterTypes(const ::std::string&) GOOGLE_PROTOBUF_ATTRIBUTE_COLD;
|
||||
void protobuf_RegisterTypes(const ::std::string&) {
|
||||
protobuf_AssignDescriptorsOnce();
|
||||
::google::protobuf::internal::RegisterAllTypes(file_level_metadata, 2);
|
||||
}
|
||||
|
||||
void AddDescriptorsImpl() {
|
||||
InitDefaults();
|
||||
static const char descriptor[] GOOGLE_PROTOBUF_ATTRIBUTE_SECTION_VARIABLE(protodesc_cold) = {
|
||||
"\n\022tensor_shape.proto\022\021opencv_tensorflow\""
|
||||
"\201\001\n\020TensorShapeProto\0224\n\003dim\030\002 \003(\0132\'.open"
|
||||
"cv_tensorflow.TensorShapeProto.Dim\022\024\n\014un"
|
||||
"known_rank\030\003 \001(\010\032!\n\003Dim\022\014\n\004size\030\001 \001(\003\022\014\n"
|
||||
"\004name\030\002 \001(\tB2\n\030org.tensorflow.frameworkB"
|
||||
"\021TensorShapeProtosP\001\370\001\001b\006proto3"
|
||||
};
|
||||
::google::protobuf::DescriptorPool::InternalAddGeneratedFile(
|
||||
descriptor, 231);
|
||||
::google::protobuf::MessageFactory::InternalRegisterGeneratedFile(
|
||||
"tensor_shape.proto", &protobuf_RegisterTypes);
|
||||
}
|
||||
|
||||
void AddDescriptors() {
|
||||
static GOOGLE_PROTOBUF_DECLARE_ONCE(once);
|
||||
::google::protobuf::GoogleOnceInit(&once, &AddDescriptorsImpl);
|
||||
}
|
||||
// Force AddDescriptors() to be called at dynamic initialization time.
|
||||
struct StaticDescriptorInitializer {
|
||||
StaticDescriptorInitializer() {
|
||||
AddDescriptors();
|
||||
}
|
||||
} static_descriptor_initializer;
|
||||
} // namespace protobuf_tensor_5fshape_2eproto
|
||||
namespace opencv_tensorflow {
|
||||
|
||||
// ===================================================================
|
||||
|
||||
void TensorShapeProto_Dim::InitAsDefaultInstance() {
|
||||
}
|
||||
#if !defined(_MSC_VER) || _MSC_VER >= 1900
|
||||
const int TensorShapeProto_Dim::kSizeFieldNumber;
|
||||
const int TensorShapeProto_Dim::kNameFieldNumber;
|
||||
#endif // !defined(_MSC_VER) || _MSC_VER >= 1900
|
||||
|
||||
TensorShapeProto_Dim::TensorShapeProto_Dim()
|
||||
: ::google::protobuf::Message(), _internal_metadata_(NULL) {
|
||||
if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
|
||||
::protobuf_tensor_5fshape_2eproto::InitDefaultsTensorShapeProto_Dim();
|
||||
}
|
||||
SharedCtor();
|
||||
// @@protoc_insertion_point(constructor:opencv_tensorflow.TensorShapeProto.Dim)
|
||||
}
|
||||
TensorShapeProto_Dim::TensorShapeProto_Dim(::google::protobuf::Arena* arena)
|
||||
: ::google::protobuf::Message(),
|
||||
_internal_metadata_(arena) {
|
||||
::protobuf_tensor_5fshape_2eproto::InitDefaultsTensorShapeProto_Dim();
|
||||
SharedCtor();
|
||||
RegisterArenaDtor(arena);
|
||||
// @@protoc_insertion_point(arena_constructor:opencv_tensorflow.TensorShapeProto.Dim)
|
||||
}
|
||||
TensorShapeProto_Dim::TensorShapeProto_Dim(const TensorShapeProto_Dim& from)
|
||||
: ::google::protobuf::Message(),
|
||||
_internal_metadata_(NULL),
|
||||
_cached_size_(0) {
|
||||
_internal_metadata_.MergeFrom(from._internal_metadata_);
|
||||
name_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
if (from.name().size() > 0) {
|
||||
name_.Set(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.name(),
|
||||
GetArenaNoVirtual());
|
||||
}
|
||||
size_ = from.size_;
|
||||
// @@protoc_insertion_point(copy_constructor:opencv_tensorflow.TensorShapeProto.Dim)
|
||||
}
|
||||
|
||||
void TensorShapeProto_Dim::SharedCtor() {
|
||||
name_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
size_ = GOOGLE_LONGLONG(0);
|
||||
_cached_size_ = 0;
|
||||
}
|
||||
|
||||
TensorShapeProto_Dim::~TensorShapeProto_Dim() {
|
||||
// @@protoc_insertion_point(destructor:opencv_tensorflow.TensorShapeProto.Dim)
|
||||
SharedDtor();
|
||||
}
|
||||
|
||||
void TensorShapeProto_Dim::SharedDtor() {
|
||||
GOOGLE_DCHECK(GetArenaNoVirtual() == NULL);
|
||||
name_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
|
||||
}
|
||||
|
||||
void TensorShapeProto_Dim::ArenaDtor(void* object) {
|
||||
TensorShapeProto_Dim* _this = reinterpret_cast< TensorShapeProto_Dim* >(object);
|
||||
(void)_this;
|
||||
}
|
||||
void TensorShapeProto_Dim::RegisterArenaDtor(::google::protobuf::Arena* arena) {
|
||||
}
|
||||
void TensorShapeProto_Dim::SetCachedSize(int size) const {
|
||||
GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
|
||||
_cached_size_ = size;
|
||||
GOOGLE_SAFE_CONCURRENT_WRITES_END();
|
||||
}
|
||||
const ::google::protobuf::Descriptor* TensorShapeProto_Dim::descriptor() {
|
||||
::protobuf_tensor_5fshape_2eproto::protobuf_AssignDescriptorsOnce();
|
||||
return ::protobuf_tensor_5fshape_2eproto::file_level_metadata[kIndexInFileMessages].descriptor;
|
||||
}
|
||||
|
||||
const TensorShapeProto_Dim& TensorShapeProto_Dim::default_instance() {
|
||||
::protobuf_tensor_5fshape_2eproto::InitDefaultsTensorShapeProto_Dim();
|
||||
return *internal_default_instance();
|
||||
}
|
||||
|
||||
TensorShapeProto_Dim* TensorShapeProto_Dim::New(::google::protobuf::Arena* arena) const {
|
||||
return ::google::protobuf::Arena::CreateMessage<TensorShapeProto_Dim>(arena);
|
||||
}
|
||||
|
||||
void TensorShapeProto_Dim::Clear() {
|
||||
// @@protoc_insertion_point(message_clear_start:opencv_tensorflow.TensorShapeProto.Dim)
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
// Prevent compiler warnings about cached_has_bits being unused
|
||||
(void) cached_has_bits;
|
||||
|
||||
name_.ClearToEmpty(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), GetArenaNoVirtual());
|
||||
size_ = GOOGLE_LONGLONG(0);
|
||||
_internal_metadata_.Clear();
|
||||
}
|
||||
|
||||
bool TensorShapeProto_Dim::MergePartialFromCodedStream(
|
||||
::google::protobuf::io::CodedInputStream* input) {
|
||||
#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
|
||||
::google::protobuf::uint32 tag;
|
||||
// @@protoc_insertion_point(parse_start:opencv_tensorflow.TensorShapeProto.Dim)
|
||||
for (;;) {
|
||||
::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
|
||||
tag = p.first;
|
||||
if (!p.second) goto handle_unusual;
|
||||
switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
|
||||
// int64 size = 1;
|
||||
case 1: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(8u /* 8 & 0xFF */)) {
|
||||
|
||||
DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
|
||||
::google::protobuf::int64, ::google::protobuf::internal::WireFormatLite::TYPE_INT64>(
|
||||
input, &size_)));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// string name = 2;
|
||||
case 2: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(18u /* 18 & 0xFF */)) {
|
||||
DO_(::google::protobuf::internal::WireFormatLite::ReadString(
|
||||
input, this->mutable_name()));
|
||||
DO_(::google::protobuf::internal::WireFormatLite::VerifyUtf8String(
|
||||
this->name().data(), static_cast<int>(this->name().length()),
|
||||
::google::protobuf::internal::WireFormatLite::PARSE,
|
||||
"opencv_tensorflow.TensorShapeProto.Dim.name"));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
handle_unusual:
|
||||
if (tag == 0) {
|
||||
goto success;
|
||||
}
|
||||
DO_(::google::protobuf::internal::WireFormat::SkipField(
|
||||
input, tag, _internal_metadata_.mutable_unknown_fields()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
success:
|
||||
// @@protoc_insertion_point(parse_success:opencv_tensorflow.TensorShapeProto.Dim)
|
||||
return true;
|
||||
failure:
|
||||
// @@protoc_insertion_point(parse_failure:opencv_tensorflow.TensorShapeProto.Dim)
|
||||
return false;
|
||||
#undef DO_
|
||||
}
|
||||
|
||||
void TensorShapeProto_Dim::SerializeWithCachedSizes(
|
||||
::google::protobuf::io::CodedOutputStream* output) const {
|
||||
// @@protoc_insertion_point(serialize_start:opencv_tensorflow.TensorShapeProto.Dim)
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
// int64 size = 1;
|
||||
if (this->size() != 0) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteInt64(1, this->size(), output);
|
||||
}
|
||||
|
||||
// string name = 2;
|
||||
if (this->name().size() > 0) {
|
||||
::google::protobuf::internal::WireFormatLite::VerifyUtf8String(
|
||||
this->name().data(), static_cast<int>(this->name().length()),
|
||||
::google::protobuf::internal::WireFormatLite::SERIALIZE,
|
||||
"opencv_tensorflow.TensorShapeProto.Dim.name");
|
||||
::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased(
|
||||
2, this->name(), output);
|
||||
}
|
||||
|
||||
if ((_internal_metadata_.have_unknown_fields() && ::google::protobuf::internal::GetProto3PreserveUnknownsDefault())) {
|
||||
::google::protobuf::internal::WireFormat::SerializeUnknownFields(
|
||||
(::google::protobuf::internal::GetProto3PreserveUnknownsDefault() ? _internal_metadata_.unknown_fields() : _internal_metadata_.default_instance()), output);
|
||||
}
|
||||
// @@protoc_insertion_point(serialize_end:opencv_tensorflow.TensorShapeProto.Dim)
|
||||
}
|
||||
|
||||
::google::protobuf::uint8* TensorShapeProto_Dim::InternalSerializeWithCachedSizesToArray(
|
||||
bool deterministic, ::google::protobuf::uint8* target) const {
|
||||
(void)deterministic; // Unused
|
||||
// @@protoc_insertion_point(serialize_to_array_start:opencv_tensorflow.TensorShapeProto.Dim)
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
// int64 size = 1;
|
||||
if (this->size() != 0) {
|
||||
target = ::google::protobuf::internal::WireFormatLite::WriteInt64ToArray(1, this->size(), target);
|
||||
}
|
||||
|
||||
// string name = 2;
|
||||
if (this->name().size() > 0) {
|
||||
::google::protobuf::internal::WireFormatLite::VerifyUtf8String(
|
||||
this->name().data(), static_cast<int>(this->name().length()),
|
||||
::google::protobuf::internal::WireFormatLite::SERIALIZE,
|
||||
"opencv_tensorflow.TensorShapeProto.Dim.name");
|
||||
target =
|
||||
::google::protobuf::internal::WireFormatLite::WriteStringToArray(
|
||||
2, this->name(), target);
|
||||
}
|
||||
|
||||
if ((_internal_metadata_.have_unknown_fields() && ::google::protobuf::internal::GetProto3PreserveUnknownsDefault())) {
|
||||
target = ::google::protobuf::internal::WireFormat::SerializeUnknownFieldsToArray(
|
||||
(::google::protobuf::internal::GetProto3PreserveUnknownsDefault() ? _internal_metadata_.unknown_fields() : _internal_metadata_.default_instance()), target);
|
||||
}
|
||||
// @@protoc_insertion_point(serialize_to_array_end:opencv_tensorflow.TensorShapeProto.Dim)
|
||||
return target;
|
||||
}
|
||||
|
||||
size_t TensorShapeProto_Dim::ByteSizeLong() const {
|
||||
// @@protoc_insertion_point(message_byte_size_start:opencv_tensorflow.TensorShapeProto.Dim)
|
||||
size_t total_size = 0;
|
||||
|
||||
if ((_internal_metadata_.have_unknown_fields() && ::google::protobuf::internal::GetProto3PreserveUnknownsDefault())) {
|
||||
total_size +=
|
||||
::google::protobuf::internal::WireFormat::ComputeUnknownFieldsSize(
|
||||
(::google::protobuf::internal::GetProto3PreserveUnknownsDefault() ? _internal_metadata_.unknown_fields() : _internal_metadata_.default_instance()));
|
||||
}
|
||||
// string name = 2;
|
||||
if (this->name().size() > 0) {
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::StringSize(
|
||||
this->name());
|
||||
}
|
||||
|
||||
// int64 size = 1;
|
||||
if (this->size() != 0) {
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::Int64Size(
|
||||
this->size());
|
||||
}
|
||||
|
||||
int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
|
||||
GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
|
||||
_cached_size_ = cached_size;
|
||||
GOOGLE_SAFE_CONCURRENT_WRITES_END();
|
||||
return total_size;
|
||||
}
|
||||
|
||||
void TensorShapeProto_Dim::MergeFrom(const ::google::protobuf::Message& from) {
|
||||
// @@protoc_insertion_point(generalized_merge_from_start:opencv_tensorflow.TensorShapeProto.Dim)
|
||||
GOOGLE_DCHECK_NE(&from, this);
|
||||
const TensorShapeProto_Dim* source =
|
||||
::google::protobuf::internal::DynamicCastToGenerated<const TensorShapeProto_Dim>(
|
||||
&from);
|
||||
if (source == NULL) {
|
||||
// @@protoc_insertion_point(generalized_merge_from_cast_fail:opencv_tensorflow.TensorShapeProto.Dim)
|
||||
::google::protobuf::internal::ReflectionOps::Merge(from, this);
|
||||
} else {
|
||||
// @@protoc_insertion_point(generalized_merge_from_cast_success:opencv_tensorflow.TensorShapeProto.Dim)
|
||||
MergeFrom(*source);
|
||||
}
|
||||
}
|
||||
|
||||
void TensorShapeProto_Dim::MergeFrom(const TensorShapeProto_Dim& from) {
|
||||
// @@protoc_insertion_point(class_specific_merge_from_start:opencv_tensorflow.TensorShapeProto.Dim)
|
||||
GOOGLE_DCHECK_NE(&from, this);
|
||||
_internal_metadata_.MergeFrom(from._internal_metadata_);
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
if (from.name().size() > 0) {
|
||||
set_name(from.name());
|
||||
}
|
||||
if (from.size() != 0) {
|
||||
set_size(from.size());
|
||||
}
|
||||
}
|
||||
|
||||
void TensorShapeProto_Dim::CopyFrom(const ::google::protobuf::Message& from) {
|
||||
// @@protoc_insertion_point(generalized_copy_from_start:opencv_tensorflow.TensorShapeProto.Dim)
|
||||
if (&from == this) return;
|
||||
Clear();
|
||||
MergeFrom(from);
|
||||
}
|
||||
|
||||
void TensorShapeProto_Dim::CopyFrom(const TensorShapeProto_Dim& from) {
|
||||
// @@protoc_insertion_point(class_specific_copy_from_start:opencv_tensorflow.TensorShapeProto.Dim)
|
||||
if (&from == this) return;
|
||||
Clear();
|
||||
MergeFrom(from);
|
||||
}
|
||||
|
||||
bool TensorShapeProto_Dim::IsInitialized() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
void TensorShapeProto_Dim::Swap(TensorShapeProto_Dim* other) {
|
||||
if (other == this) return;
|
||||
if (GetArenaNoVirtual() == other->GetArenaNoVirtual()) {
|
||||
InternalSwap(other);
|
||||
} else {
|
||||
TensorShapeProto_Dim* temp = New(GetArenaNoVirtual());
|
||||
temp->MergeFrom(*other);
|
||||
other->CopyFrom(*this);
|
||||
InternalSwap(temp);
|
||||
if (GetArenaNoVirtual() == NULL) {
|
||||
delete temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
void TensorShapeProto_Dim::UnsafeArenaSwap(TensorShapeProto_Dim* other) {
|
||||
if (other == this) return;
|
||||
GOOGLE_DCHECK(GetArenaNoVirtual() == other->GetArenaNoVirtual());
|
||||
InternalSwap(other);
|
||||
}
|
||||
void TensorShapeProto_Dim::InternalSwap(TensorShapeProto_Dim* other) {
|
||||
using std::swap;
|
||||
name_.Swap(&other->name_);
|
||||
swap(size_, other->size_);
|
||||
_internal_metadata_.Swap(&other->_internal_metadata_);
|
||||
swap(_cached_size_, other->_cached_size_);
|
||||
}
|
||||
|
||||
::google::protobuf::Metadata TensorShapeProto_Dim::GetMetadata() const {
|
||||
protobuf_tensor_5fshape_2eproto::protobuf_AssignDescriptorsOnce();
|
||||
return ::protobuf_tensor_5fshape_2eproto::file_level_metadata[kIndexInFileMessages];
|
||||
}
|
||||
|
||||
|
||||
// ===================================================================
|
||||
|
||||
void TensorShapeProto::InitAsDefaultInstance() {
|
||||
}
|
||||
#if !defined(_MSC_VER) || _MSC_VER >= 1900
|
||||
const int TensorShapeProto::kDimFieldNumber;
|
||||
const int TensorShapeProto::kUnknownRankFieldNumber;
|
||||
#endif // !defined(_MSC_VER) || _MSC_VER >= 1900
|
||||
|
||||
TensorShapeProto::TensorShapeProto()
|
||||
: ::google::protobuf::Message(), _internal_metadata_(NULL) {
|
||||
if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
|
||||
::protobuf_tensor_5fshape_2eproto::InitDefaultsTensorShapeProto();
|
||||
}
|
||||
SharedCtor();
|
||||
// @@protoc_insertion_point(constructor:opencv_tensorflow.TensorShapeProto)
|
||||
}
|
||||
TensorShapeProto::TensorShapeProto(::google::protobuf::Arena* arena)
|
||||
: ::google::protobuf::Message(),
|
||||
_internal_metadata_(arena),
|
||||
dim_(arena) {
|
||||
::protobuf_tensor_5fshape_2eproto::InitDefaultsTensorShapeProto();
|
||||
SharedCtor();
|
||||
RegisterArenaDtor(arena);
|
||||
// @@protoc_insertion_point(arena_constructor:opencv_tensorflow.TensorShapeProto)
|
||||
}
|
||||
TensorShapeProto::TensorShapeProto(const TensorShapeProto& from)
|
||||
: ::google::protobuf::Message(),
|
||||
_internal_metadata_(NULL),
|
||||
dim_(from.dim_),
|
||||
_cached_size_(0) {
|
||||
_internal_metadata_.MergeFrom(from._internal_metadata_);
|
||||
unknown_rank_ = from.unknown_rank_;
|
||||
// @@protoc_insertion_point(copy_constructor:opencv_tensorflow.TensorShapeProto)
|
||||
}
|
||||
|
||||
void TensorShapeProto::SharedCtor() {
|
||||
unknown_rank_ = false;
|
||||
_cached_size_ = 0;
|
||||
}
|
||||
|
||||
TensorShapeProto::~TensorShapeProto() {
|
||||
// @@protoc_insertion_point(destructor:opencv_tensorflow.TensorShapeProto)
|
||||
SharedDtor();
|
||||
}
|
||||
|
||||
void TensorShapeProto::SharedDtor() {
|
||||
GOOGLE_DCHECK(GetArenaNoVirtual() == NULL);
|
||||
}
|
||||
|
||||
void TensorShapeProto::ArenaDtor(void* object) {
|
||||
TensorShapeProto* _this = reinterpret_cast< TensorShapeProto* >(object);
|
||||
(void)_this;
|
||||
}
|
||||
void TensorShapeProto::RegisterArenaDtor(::google::protobuf::Arena* arena) {
|
||||
}
|
||||
void TensorShapeProto::SetCachedSize(int size) const {
|
||||
GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
|
||||
_cached_size_ = size;
|
||||
GOOGLE_SAFE_CONCURRENT_WRITES_END();
|
||||
}
|
||||
const ::google::protobuf::Descriptor* TensorShapeProto::descriptor() {
|
||||
::protobuf_tensor_5fshape_2eproto::protobuf_AssignDescriptorsOnce();
|
||||
return ::protobuf_tensor_5fshape_2eproto::file_level_metadata[kIndexInFileMessages].descriptor;
|
||||
}
|
||||
|
||||
const TensorShapeProto& TensorShapeProto::default_instance() {
|
||||
::protobuf_tensor_5fshape_2eproto::InitDefaultsTensorShapeProto();
|
||||
return *internal_default_instance();
|
||||
}
|
||||
|
||||
TensorShapeProto* TensorShapeProto::New(::google::protobuf::Arena* arena) const {
|
||||
return ::google::protobuf::Arena::CreateMessage<TensorShapeProto>(arena);
|
||||
}
|
||||
|
||||
void TensorShapeProto::Clear() {
|
||||
// @@protoc_insertion_point(message_clear_start:opencv_tensorflow.TensorShapeProto)
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
// Prevent compiler warnings about cached_has_bits being unused
|
||||
(void) cached_has_bits;
|
||||
|
||||
dim_.Clear();
|
||||
unknown_rank_ = false;
|
||||
_internal_metadata_.Clear();
|
||||
}
|
||||
|
||||
bool TensorShapeProto::MergePartialFromCodedStream(
|
||||
::google::protobuf::io::CodedInputStream* input) {
|
||||
#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
|
||||
::google::protobuf::uint32 tag;
|
||||
// @@protoc_insertion_point(parse_start:opencv_tensorflow.TensorShapeProto)
|
||||
for (;;) {
|
||||
::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
|
||||
tag = p.first;
|
||||
if (!p.second) goto handle_unusual;
|
||||
switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
|
||||
// repeated .opencv_tensorflow.TensorShapeProto.Dim dim = 2;
|
||||
case 2: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(18u /* 18 & 0xFF */)) {
|
||||
DO_(::google::protobuf::internal::WireFormatLite::ReadMessage(input, add_dim()));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// bool unknown_rank = 3;
|
||||
case 3: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(24u /* 24 & 0xFF */)) {
|
||||
|
||||
DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
|
||||
bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>(
|
||||
input, &unknown_rank_)));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
handle_unusual:
|
||||
if (tag == 0) {
|
||||
goto success;
|
||||
}
|
||||
DO_(::google::protobuf::internal::WireFormat::SkipField(
|
||||
input, tag, _internal_metadata_.mutable_unknown_fields()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
success:
|
||||
// @@protoc_insertion_point(parse_success:opencv_tensorflow.TensorShapeProto)
|
||||
return true;
|
||||
failure:
|
||||
// @@protoc_insertion_point(parse_failure:opencv_tensorflow.TensorShapeProto)
|
||||
return false;
|
||||
#undef DO_
|
||||
}
|
||||
|
||||
void TensorShapeProto::SerializeWithCachedSizes(
|
||||
::google::protobuf::io::CodedOutputStream* output) const {
|
||||
// @@protoc_insertion_point(serialize_start:opencv_tensorflow.TensorShapeProto)
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
// repeated .opencv_tensorflow.TensorShapeProto.Dim dim = 2;
|
||||
for (unsigned int i = 0,
|
||||
n = static_cast<unsigned int>(this->dim_size()); i < n; i++) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteMessageMaybeToArray(
|
||||
2, this->dim(static_cast<int>(i)), output);
|
||||
}
|
||||
|
||||
// bool unknown_rank = 3;
|
||||
if (this->unknown_rank() != 0) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteBool(3, this->unknown_rank(), output);
|
||||
}
|
||||
|
||||
if ((_internal_metadata_.have_unknown_fields() && ::google::protobuf::internal::GetProto3PreserveUnknownsDefault())) {
|
||||
::google::protobuf::internal::WireFormat::SerializeUnknownFields(
|
||||
(::google::protobuf::internal::GetProto3PreserveUnknownsDefault() ? _internal_metadata_.unknown_fields() : _internal_metadata_.default_instance()), output);
|
||||
}
|
||||
// @@protoc_insertion_point(serialize_end:opencv_tensorflow.TensorShapeProto)
|
||||
}
|
||||
|
||||
::google::protobuf::uint8* TensorShapeProto::InternalSerializeWithCachedSizesToArray(
|
||||
bool deterministic, ::google::protobuf::uint8* target) const {
|
||||
(void)deterministic; // Unused
|
||||
// @@protoc_insertion_point(serialize_to_array_start:opencv_tensorflow.TensorShapeProto)
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
// repeated .opencv_tensorflow.TensorShapeProto.Dim dim = 2;
|
||||
for (unsigned int i = 0,
|
||||
n = static_cast<unsigned int>(this->dim_size()); i < n; i++) {
|
||||
target = ::google::protobuf::internal::WireFormatLite::
|
||||
InternalWriteMessageToArray(
|
||||
2, this->dim(static_cast<int>(i)), deterministic, target);
|
||||
}
|
||||
|
||||
// bool unknown_rank = 3;
|
||||
if (this->unknown_rank() != 0) {
|
||||
target = ::google::protobuf::internal::WireFormatLite::WriteBoolToArray(3, this->unknown_rank(), target);
|
||||
}
|
||||
|
||||
if ((_internal_metadata_.have_unknown_fields() && ::google::protobuf::internal::GetProto3PreserveUnknownsDefault())) {
|
||||
target = ::google::protobuf::internal::WireFormat::SerializeUnknownFieldsToArray(
|
||||
(::google::protobuf::internal::GetProto3PreserveUnknownsDefault() ? _internal_metadata_.unknown_fields() : _internal_metadata_.default_instance()), target);
|
||||
}
|
||||
// @@protoc_insertion_point(serialize_to_array_end:opencv_tensorflow.TensorShapeProto)
|
||||
return target;
|
||||
}
|
||||
|
||||
size_t TensorShapeProto::ByteSizeLong() const {
|
||||
// @@protoc_insertion_point(message_byte_size_start:opencv_tensorflow.TensorShapeProto)
|
||||
size_t total_size = 0;
|
||||
|
||||
if ((_internal_metadata_.have_unknown_fields() && ::google::protobuf::internal::GetProto3PreserveUnknownsDefault())) {
|
||||
total_size +=
|
||||
::google::protobuf::internal::WireFormat::ComputeUnknownFieldsSize(
|
||||
(::google::protobuf::internal::GetProto3PreserveUnknownsDefault() ? _internal_metadata_.unknown_fields() : _internal_metadata_.default_instance()));
|
||||
}
|
||||
// repeated .opencv_tensorflow.TensorShapeProto.Dim dim = 2;
|
||||
{
|
||||
unsigned int count = static_cast<unsigned int>(this->dim_size());
|
||||
total_size += 1UL * count;
|
||||
for (unsigned int i = 0; i < count; i++) {
|
||||
total_size +=
|
||||
::google::protobuf::internal::WireFormatLite::MessageSize(
|
||||
this->dim(static_cast<int>(i)));
|
||||
}
|
||||
}
|
||||
|
||||
// bool unknown_rank = 3;
|
||||
if (this->unknown_rank() != 0) {
|
||||
total_size += 1 + 1;
|
||||
}
|
||||
|
||||
int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
|
||||
GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
|
||||
_cached_size_ = cached_size;
|
||||
GOOGLE_SAFE_CONCURRENT_WRITES_END();
|
||||
return total_size;
|
||||
}
|
||||
|
||||
void TensorShapeProto::MergeFrom(const ::google::protobuf::Message& from) {
|
||||
// @@protoc_insertion_point(generalized_merge_from_start:opencv_tensorflow.TensorShapeProto)
|
||||
GOOGLE_DCHECK_NE(&from, this);
|
||||
const TensorShapeProto* source =
|
||||
::google::protobuf::internal::DynamicCastToGenerated<const TensorShapeProto>(
|
||||
&from);
|
||||
if (source == NULL) {
|
||||
// @@protoc_insertion_point(generalized_merge_from_cast_fail:opencv_tensorflow.TensorShapeProto)
|
||||
::google::protobuf::internal::ReflectionOps::Merge(from, this);
|
||||
} else {
|
||||
// @@protoc_insertion_point(generalized_merge_from_cast_success:opencv_tensorflow.TensorShapeProto)
|
||||
MergeFrom(*source);
|
||||
}
|
||||
}
|
||||
|
||||
void TensorShapeProto::MergeFrom(const TensorShapeProto& from) {
|
||||
// @@protoc_insertion_point(class_specific_merge_from_start:opencv_tensorflow.TensorShapeProto)
|
||||
GOOGLE_DCHECK_NE(&from, this);
|
||||
_internal_metadata_.MergeFrom(from._internal_metadata_);
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
dim_.MergeFrom(from.dim_);
|
||||
if (from.unknown_rank() != 0) {
|
||||
set_unknown_rank(from.unknown_rank());
|
||||
}
|
||||
}
|
||||
|
||||
void TensorShapeProto::CopyFrom(const ::google::protobuf::Message& from) {
|
||||
// @@protoc_insertion_point(generalized_copy_from_start:opencv_tensorflow.TensorShapeProto)
|
||||
if (&from == this) return;
|
||||
Clear();
|
||||
MergeFrom(from);
|
||||
}
|
||||
|
||||
void TensorShapeProto::CopyFrom(const TensorShapeProto& from) {
|
||||
// @@protoc_insertion_point(class_specific_copy_from_start:opencv_tensorflow.TensorShapeProto)
|
||||
if (&from == this) return;
|
||||
Clear();
|
||||
MergeFrom(from);
|
||||
}
|
||||
|
||||
bool TensorShapeProto::IsInitialized() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
void TensorShapeProto::Swap(TensorShapeProto* other) {
|
||||
if (other == this) return;
|
||||
if (GetArenaNoVirtual() == other->GetArenaNoVirtual()) {
|
||||
InternalSwap(other);
|
||||
} else {
|
||||
TensorShapeProto* temp = New(GetArenaNoVirtual());
|
||||
temp->MergeFrom(*other);
|
||||
other->CopyFrom(*this);
|
||||
InternalSwap(temp);
|
||||
if (GetArenaNoVirtual() == NULL) {
|
||||
delete temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
void TensorShapeProto::UnsafeArenaSwap(TensorShapeProto* other) {
|
||||
if (other == this) return;
|
||||
GOOGLE_DCHECK(GetArenaNoVirtual() == other->GetArenaNoVirtual());
|
||||
InternalSwap(other);
|
||||
}
|
||||
void TensorShapeProto::InternalSwap(TensorShapeProto* other) {
|
||||
using std::swap;
|
||||
dim_.InternalSwap(&other->dim_);
|
||||
swap(unknown_rank_, other->unknown_rank_);
|
||||
_internal_metadata_.Swap(&other->_internal_metadata_);
|
||||
swap(_cached_size_, other->_cached_size_);
|
||||
}
|
||||
|
||||
::google::protobuf::Metadata TensorShapeProto::GetMetadata() const {
|
||||
protobuf_tensor_5fshape_2eproto::protobuf_AssignDescriptorsOnce();
|
||||
return ::protobuf_tensor_5fshape_2eproto::file_level_metadata[kIndexInFileMessages];
|
||||
}
|
||||
|
||||
|
||||
// @@protoc_insertion_point(namespace_scope)
|
||||
} // namespace opencv_tensorflow
|
||||
|
||||
// @@protoc_insertion_point(global_scope)
|
||||
491
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/tensor_shape.pb.h
vendored
Normal file
491
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/tensor_shape.pb.h
vendored
Normal file
@@ -0,0 +1,491 @@
|
||||
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
// source: tensor_shape.proto
|
||||
|
||||
#ifndef PROTOBUF_tensor_5fshape_2eproto__INCLUDED
|
||||
#define PROTOBUF_tensor_5fshape_2eproto__INCLUDED
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <google/protobuf/stubs/common.h>
|
||||
|
||||
#if GOOGLE_PROTOBUF_VERSION < 3005000
|
||||
#error This file was generated by a newer version of protoc which is
|
||||
#error incompatible with your Protocol Buffer headers. Please update
|
||||
#error your headers.
|
||||
#endif
|
||||
#if 3005001 < GOOGLE_PROTOBUF_MIN_PROTOC_VERSION
|
||||
#error This file was generated by an older version of protoc which is
|
||||
#error incompatible with your Protocol Buffer headers. Please
|
||||
#error regenerate this file with a newer version of protoc.
|
||||
#endif
|
||||
|
||||
#include <google/protobuf/io/coded_stream.h>
|
||||
#include <google/protobuf/arena.h>
|
||||
#include <google/protobuf/arenastring.h>
|
||||
#include <google/protobuf/generated_message_table_driven.h>
|
||||
#include <google/protobuf/generated_message_util.h>
|
||||
#include <google/protobuf/metadata.h>
|
||||
#include <google/protobuf/message.h>
|
||||
#include <google/protobuf/repeated_field.h> // IWYU pragma: export
|
||||
#include <google/protobuf/extension_set.h> // IWYU pragma: export
|
||||
#include <google/protobuf/unknown_field_set.h>
|
||||
// @@protoc_insertion_point(includes)
|
||||
|
||||
namespace protobuf_tensor_5fshape_2eproto {
|
||||
// Internal implementation detail -- do not use these members.
|
||||
struct TableStruct {
|
||||
static const ::google::protobuf::internal::ParseTableField entries[];
|
||||
static const ::google::protobuf::internal::AuxillaryParseTableField aux[];
|
||||
static const ::google::protobuf::internal::ParseTable schema[2];
|
||||
static const ::google::protobuf::internal::FieldMetadata field_metadata[];
|
||||
static const ::google::protobuf::internal::SerializationTable serialization_table[];
|
||||
static const ::google::protobuf::uint32 offsets[];
|
||||
};
|
||||
void AddDescriptors();
|
||||
void InitDefaultsTensorShapeProto_DimImpl();
|
||||
void InitDefaultsTensorShapeProto_Dim();
|
||||
void InitDefaultsTensorShapeProtoImpl();
|
||||
void InitDefaultsTensorShapeProto();
|
||||
inline void InitDefaults() {
|
||||
InitDefaultsTensorShapeProto_Dim();
|
||||
InitDefaultsTensorShapeProto();
|
||||
}
|
||||
} // namespace protobuf_tensor_5fshape_2eproto
|
||||
namespace opencv_tensorflow {
|
||||
class TensorShapeProto;
|
||||
class TensorShapeProtoDefaultTypeInternal;
|
||||
extern TensorShapeProtoDefaultTypeInternal _TensorShapeProto_default_instance_;
|
||||
class TensorShapeProto_Dim;
|
||||
class TensorShapeProto_DimDefaultTypeInternal;
|
||||
extern TensorShapeProto_DimDefaultTypeInternal _TensorShapeProto_Dim_default_instance_;
|
||||
} // namespace opencv_tensorflow
|
||||
namespace opencv_tensorflow {
|
||||
|
||||
// ===================================================================
|
||||
|
||||
class TensorShapeProto_Dim : public ::google::protobuf::Message /* @@protoc_insertion_point(class_definition:opencv_tensorflow.TensorShapeProto.Dim) */ {
|
||||
public:
|
||||
TensorShapeProto_Dim();
|
||||
virtual ~TensorShapeProto_Dim();
|
||||
|
||||
TensorShapeProto_Dim(const TensorShapeProto_Dim& from);
|
||||
|
||||
inline TensorShapeProto_Dim& operator=(const TensorShapeProto_Dim& from) {
|
||||
CopyFrom(from);
|
||||
return *this;
|
||||
}
|
||||
#if LANG_CXX11
|
||||
TensorShapeProto_Dim(TensorShapeProto_Dim&& from) noexcept
|
||||
: TensorShapeProto_Dim() {
|
||||
*this = ::std::move(from);
|
||||
}
|
||||
|
||||
inline TensorShapeProto_Dim& operator=(TensorShapeProto_Dim&& from) noexcept {
|
||||
if (GetArenaNoVirtual() == from.GetArenaNoVirtual()) {
|
||||
if (this != &from) InternalSwap(&from);
|
||||
} else {
|
||||
CopyFrom(from);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
inline ::google::protobuf::Arena* GetArena() const PROTOBUF_FINAL {
|
||||
return GetArenaNoVirtual();
|
||||
}
|
||||
inline void* GetMaybeArenaPointer() const PROTOBUF_FINAL {
|
||||
return MaybeArenaPtr();
|
||||
}
|
||||
static const ::google::protobuf::Descriptor* descriptor();
|
||||
static const TensorShapeProto_Dim& default_instance();
|
||||
|
||||
static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY
|
||||
static inline const TensorShapeProto_Dim* internal_default_instance() {
|
||||
return reinterpret_cast<const TensorShapeProto_Dim*>(
|
||||
&_TensorShapeProto_Dim_default_instance_);
|
||||
}
|
||||
static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
|
||||
0;
|
||||
|
||||
void UnsafeArenaSwap(TensorShapeProto_Dim* other);
|
||||
void Swap(TensorShapeProto_Dim* other);
|
||||
friend void swap(TensorShapeProto_Dim& a, TensorShapeProto_Dim& b) {
|
||||
a.Swap(&b);
|
||||
}
|
||||
|
||||
// implements Message ----------------------------------------------
|
||||
|
||||
inline TensorShapeProto_Dim* New() const PROTOBUF_FINAL { return New(NULL); }
|
||||
|
||||
TensorShapeProto_Dim* New(::google::protobuf::Arena* arena) const PROTOBUF_FINAL;
|
||||
void CopyFrom(const ::google::protobuf::Message& from) PROTOBUF_FINAL;
|
||||
void MergeFrom(const ::google::protobuf::Message& from) PROTOBUF_FINAL;
|
||||
void CopyFrom(const TensorShapeProto_Dim& from);
|
||||
void MergeFrom(const TensorShapeProto_Dim& from);
|
||||
void Clear() PROTOBUF_FINAL;
|
||||
bool IsInitialized() const PROTOBUF_FINAL;
|
||||
|
||||
size_t ByteSizeLong() const PROTOBUF_FINAL;
|
||||
bool MergePartialFromCodedStream(
|
||||
::google::protobuf::io::CodedInputStream* input) PROTOBUF_FINAL;
|
||||
void SerializeWithCachedSizes(
|
||||
::google::protobuf::io::CodedOutputStream* output) const PROTOBUF_FINAL;
|
||||
::google::protobuf::uint8* InternalSerializeWithCachedSizesToArray(
|
||||
bool deterministic, ::google::protobuf::uint8* target) const PROTOBUF_FINAL;
|
||||
int GetCachedSize() const PROTOBUF_FINAL { return _cached_size_; }
|
||||
private:
|
||||
void SharedCtor();
|
||||
void SharedDtor();
|
||||
void SetCachedSize(int size) const PROTOBUF_FINAL;
|
||||
void InternalSwap(TensorShapeProto_Dim* other);
|
||||
protected:
|
||||
explicit TensorShapeProto_Dim(::google::protobuf::Arena* arena);
|
||||
private:
|
||||
static void ArenaDtor(void* object);
|
||||
inline void RegisterArenaDtor(::google::protobuf::Arena* arena);
|
||||
private:
|
||||
inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
|
||||
return _internal_metadata_.arena();
|
||||
}
|
||||
inline void* MaybeArenaPtr() const {
|
||||
return _internal_metadata_.raw_arena_ptr();
|
||||
}
|
||||
public:
|
||||
|
||||
::google::protobuf::Metadata GetMetadata() const PROTOBUF_FINAL;
|
||||
|
||||
// nested types ----------------------------------------------------
|
||||
|
||||
// accessors -------------------------------------------------------
|
||||
|
||||
// string name = 2;
|
||||
void clear_name();
|
||||
static const int kNameFieldNumber = 2;
|
||||
const ::std::string& name() const;
|
||||
void set_name(const ::std::string& value);
|
||||
#if LANG_CXX11
|
||||
void set_name(::std::string&& value);
|
||||
#endif
|
||||
void set_name(const char* value);
|
||||
void set_name(const char* value, size_t size);
|
||||
::std::string* mutable_name();
|
||||
::std::string* release_name();
|
||||
void set_allocated_name(::std::string* name);
|
||||
PROTOBUF_RUNTIME_DEPRECATED("The unsafe_arena_ accessors for"
|
||||
" string fields are deprecated and will be removed in a"
|
||||
" future release.")
|
||||
::std::string* unsafe_arena_release_name();
|
||||
PROTOBUF_RUNTIME_DEPRECATED("The unsafe_arena_ accessors for"
|
||||
" string fields are deprecated and will be removed in a"
|
||||
" future release.")
|
||||
void unsafe_arena_set_allocated_name(
|
||||
::std::string* name);
|
||||
|
||||
// int64 size = 1;
|
||||
void clear_size();
|
||||
static const int kSizeFieldNumber = 1;
|
||||
::google::protobuf::int64 size() const;
|
||||
void set_size(::google::protobuf::int64 value);
|
||||
|
||||
// @@protoc_insertion_point(class_scope:opencv_tensorflow.TensorShapeProto.Dim)
|
||||
private:
|
||||
|
||||
::google::protobuf::internal::InternalMetadataWithArena _internal_metadata_;
|
||||
template <typename T> friend class ::google::protobuf::Arena::InternalHelper;
|
||||
typedef void InternalArenaConstructable_;
|
||||
typedef void DestructorSkippable_;
|
||||
::google::protobuf::internal::ArenaStringPtr name_;
|
||||
::google::protobuf::int64 size_;
|
||||
mutable int _cached_size_;
|
||||
friend struct ::protobuf_tensor_5fshape_2eproto::TableStruct;
|
||||
friend void ::protobuf_tensor_5fshape_2eproto::InitDefaultsTensorShapeProto_DimImpl();
|
||||
};
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
class TensorShapeProto : public ::google::protobuf::Message /* @@protoc_insertion_point(class_definition:opencv_tensorflow.TensorShapeProto) */ {
|
||||
public:
|
||||
TensorShapeProto();
|
||||
virtual ~TensorShapeProto();
|
||||
|
||||
TensorShapeProto(const TensorShapeProto& from);
|
||||
|
||||
inline TensorShapeProto& operator=(const TensorShapeProto& from) {
|
||||
CopyFrom(from);
|
||||
return *this;
|
||||
}
|
||||
#if LANG_CXX11
|
||||
TensorShapeProto(TensorShapeProto&& from) noexcept
|
||||
: TensorShapeProto() {
|
||||
*this = ::std::move(from);
|
||||
}
|
||||
|
||||
inline TensorShapeProto& operator=(TensorShapeProto&& from) noexcept {
|
||||
if (GetArenaNoVirtual() == from.GetArenaNoVirtual()) {
|
||||
if (this != &from) InternalSwap(&from);
|
||||
} else {
|
||||
CopyFrom(from);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
inline ::google::protobuf::Arena* GetArena() const PROTOBUF_FINAL {
|
||||
return GetArenaNoVirtual();
|
||||
}
|
||||
inline void* GetMaybeArenaPointer() const PROTOBUF_FINAL {
|
||||
return MaybeArenaPtr();
|
||||
}
|
||||
static const ::google::protobuf::Descriptor* descriptor();
|
||||
static const TensorShapeProto& default_instance();
|
||||
|
||||
static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY
|
||||
static inline const TensorShapeProto* internal_default_instance() {
|
||||
return reinterpret_cast<const TensorShapeProto*>(
|
||||
&_TensorShapeProto_default_instance_);
|
||||
}
|
||||
static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
|
||||
1;
|
||||
|
||||
void UnsafeArenaSwap(TensorShapeProto* other);
|
||||
void Swap(TensorShapeProto* other);
|
||||
friend void swap(TensorShapeProto& a, TensorShapeProto& b) {
|
||||
a.Swap(&b);
|
||||
}
|
||||
|
||||
// implements Message ----------------------------------------------
|
||||
|
||||
inline TensorShapeProto* New() const PROTOBUF_FINAL { return New(NULL); }
|
||||
|
||||
TensorShapeProto* New(::google::protobuf::Arena* arena) const PROTOBUF_FINAL;
|
||||
void CopyFrom(const ::google::protobuf::Message& from) PROTOBUF_FINAL;
|
||||
void MergeFrom(const ::google::protobuf::Message& from) PROTOBUF_FINAL;
|
||||
void CopyFrom(const TensorShapeProto& from);
|
||||
void MergeFrom(const TensorShapeProto& from);
|
||||
void Clear() PROTOBUF_FINAL;
|
||||
bool IsInitialized() const PROTOBUF_FINAL;
|
||||
|
||||
size_t ByteSizeLong() const PROTOBUF_FINAL;
|
||||
bool MergePartialFromCodedStream(
|
||||
::google::protobuf::io::CodedInputStream* input) PROTOBUF_FINAL;
|
||||
void SerializeWithCachedSizes(
|
||||
::google::protobuf::io::CodedOutputStream* output) const PROTOBUF_FINAL;
|
||||
::google::protobuf::uint8* InternalSerializeWithCachedSizesToArray(
|
||||
bool deterministic, ::google::protobuf::uint8* target) const PROTOBUF_FINAL;
|
||||
int GetCachedSize() const PROTOBUF_FINAL { return _cached_size_; }
|
||||
private:
|
||||
void SharedCtor();
|
||||
void SharedDtor();
|
||||
void SetCachedSize(int size) const PROTOBUF_FINAL;
|
||||
void InternalSwap(TensorShapeProto* other);
|
||||
protected:
|
||||
explicit TensorShapeProto(::google::protobuf::Arena* arena);
|
||||
private:
|
||||
static void ArenaDtor(void* object);
|
||||
inline void RegisterArenaDtor(::google::protobuf::Arena* arena);
|
||||
private:
|
||||
inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
|
||||
return _internal_metadata_.arena();
|
||||
}
|
||||
inline void* MaybeArenaPtr() const {
|
||||
return _internal_metadata_.raw_arena_ptr();
|
||||
}
|
||||
public:
|
||||
|
||||
::google::protobuf::Metadata GetMetadata() const PROTOBUF_FINAL;
|
||||
|
||||
// nested types ----------------------------------------------------
|
||||
|
||||
typedef TensorShapeProto_Dim Dim;
|
||||
|
||||
// accessors -------------------------------------------------------
|
||||
|
||||
// repeated .opencv_tensorflow.TensorShapeProto.Dim dim = 2;
|
||||
int dim_size() const;
|
||||
void clear_dim();
|
||||
static const int kDimFieldNumber = 2;
|
||||
const ::opencv_tensorflow::TensorShapeProto_Dim& dim(int index) const;
|
||||
::opencv_tensorflow::TensorShapeProto_Dim* mutable_dim(int index);
|
||||
::opencv_tensorflow::TensorShapeProto_Dim* add_dim();
|
||||
::google::protobuf::RepeatedPtrField< ::opencv_tensorflow::TensorShapeProto_Dim >*
|
||||
mutable_dim();
|
||||
const ::google::protobuf::RepeatedPtrField< ::opencv_tensorflow::TensorShapeProto_Dim >&
|
||||
dim() const;
|
||||
|
||||
// bool unknown_rank = 3;
|
||||
void clear_unknown_rank();
|
||||
static const int kUnknownRankFieldNumber = 3;
|
||||
bool unknown_rank() const;
|
||||
void set_unknown_rank(bool value);
|
||||
|
||||
// @@protoc_insertion_point(class_scope:opencv_tensorflow.TensorShapeProto)
|
||||
private:
|
||||
|
||||
::google::protobuf::internal::InternalMetadataWithArena _internal_metadata_;
|
||||
template <typename T> friend class ::google::protobuf::Arena::InternalHelper;
|
||||
typedef void InternalArenaConstructable_;
|
||||
typedef void DestructorSkippable_;
|
||||
::google::protobuf::RepeatedPtrField< ::opencv_tensorflow::TensorShapeProto_Dim > dim_;
|
||||
bool unknown_rank_;
|
||||
mutable int _cached_size_;
|
||||
friend struct ::protobuf_tensor_5fshape_2eproto::TableStruct;
|
||||
friend void ::protobuf_tensor_5fshape_2eproto::InitDefaultsTensorShapeProtoImpl();
|
||||
};
|
||||
// ===================================================================
|
||||
|
||||
|
||||
// ===================================================================
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
|
||||
#endif // __GNUC__
|
||||
// TensorShapeProto_Dim
|
||||
|
||||
// int64 size = 1;
|
||||
inline void TensorShapeProto_Dim::clear_size() {
|
||||
size_ = GOOGLE_LONGLONG(0);
|
||||
}
|
||||
inline ::google::protobuf::int64 TensorShapeProto_Dim::size() const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.TensorShapeProto.Dim.size)
|
||||
return size_;
|
||||
}
|
||||
inline void TensorShapeProto_Dim::set_size(::google::protobuf::int64 value) {
|
||||
|
||||
size_ = value;
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.TensorShapeProto.Dim.size)
|
||||
}
|
||||
|
||||
// string name = 2;
|
||||
inline void TensorShapeProto_Dim::clear_name() {
|
||||
name_.ClearToEmpty(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), GetArenaNoVirtual());
|
||||
}
|
||||
inline const ::std::string& TensorShapeProto_Dim::name() const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.TensorShapeProto.Dim.name)
|
||||
return name_.Get();
|
||||
}
|
||||
inline void TensorShapeProto_Dim::set_name(const ::std::string& value) {
|
||||
|
||||
name_.Set(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value, GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.TensorShapeProto.Dim.name)
|
||||
}
|
||||
#if LANG_CXX11
|
||||
inline void TensorShapeProto_Dim::set_name(::std::string&& value) {
|
||||
|
||||
name_.Set(
|
||||
&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value), GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set_rvalue:opencv_tensorflow.TensorShapeProto.Dim.name)
|
||||
}
|
||||
#endif
|
||||
inline void TensorShapeProto_Dim::set_name(const char* value) {
|
||||
GOOGLE_DCHECK(value != NULL);
|
||||
|
||||
name_.Set(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value),
|
||||
GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set_char:opencv_tensorflow.TensorShapeProto.Dim.name)
|
||||
}
|
||||
inline void TensorShapeProto_Dim::set_name(const char* value,
|
||||
size_t size) {
|
||||
|
||||
name_.Set(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(
|
||||
reinterpret_cast<const char*>(value), size), GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set_pointer:opencv_tensorflow.TensorShapeProto.Dim.name)
|
||||
}
|
||||
inline ::std::string* TensorShapeProto_Dim::mutable_name() {
|
||||
|
||||
// @@protoc_insertion_point(field_mutable:opencv_tensorflow.TensorShapeProto.Dim.name)
|
||||
return name_.Mutable(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), GetArenaNoVirtual());
|
||||
}
|
||||
inline ::std::string* TensorShapeProto_Dim::release_name() {
|
||||
// @@protoc_insertion_point(field_release:opencv_tensorflow.TensorShapeProto.Dim.name)
|
||||
|
||||
return name_.Release(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), GetArenaNoVirtual());
|
||||
}
|
||||
inline void TensorShapeProto_Dim::set_allocated_name(::std::string* name) {
|
||||
if (name != NULL) {
|
||||
|
||||
} else {
|
||||
|
||||
}
|
||||
name_.SetAllocated(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), name,
|
||||
GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_set_allocated:opencv_tensorflow.TensorShapeProto.Dim.name)
|
||||
}
|
||||
inline ::std::string* TensorShapeProto_Dim::unsafe_arena_release_name() {
|
||||
// @@protoc_insertion_point(field_unsafe_arena_release:opencv_tensorflow.TensorShapeProto.Dim.name)
|
||||
GOOGLE_DCHECK(GetArenaNoVirtual() != NULL);
|
||||
|
||||
return name_.UnsafeArenaRelease(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
GetArenaNoVirtual());
|
||||
}
|
||||
inline void TensorShapeProto_Dim::unsafe_arena_set_allocated_name(
|
||||
::std::string* name) {
|
||||
GOOGLE_DCHECK(GetArenaNoVirtual() != NULL);
|
||||
if (name != NULL) {
|
||||
|
||||
} else {
|
||||
|
||||
}
|
||||
name_.UnsafeArenaSetAllocated(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
|
||||
name, GetArenaNoVirtual());
|
||||
// @@protoc_insertion_point(field_unsafe_arena_set_allocated:opencv_tensorflow.TensorShapeProto.Dim.name)
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
// TensorShapeProto
|
||||
|
||||
// repeated .opencv_tensorflow.TensorShapeProto.Dim dim = 2;
|
||||
inline int TensorShapeProto::dim_size() const {
|
||||
return dim_.size();
|
||||
}
|
||||
inline void TensorShapeProto::clear_dim() {
|
||||
dim_.Clear();
|
||||
}
|
||||
inline const ::opencv_tensorflow::TensorShapeProto_Dim& TensorShapeProto::dim(int index) const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.TensorShapeProto.dim)
|
||||
return dim_.Get(index);
|
||||
}
|
||||
inline ::opencv_tensorflow::TensorShapeProto_Dim* TensorShapeProto::mutable_dim(int index) {
|
||||
// @@protoc_insertion_point(field_mutable:opencv_tensorflow.TensorShapeProto.dim)
|
||||
return dim_.Mutable(index);
|
||||
}
|
||||
inline ::opencv_tensorflow::TensorShapeProto_Dim* TensorShapeProto::add_dim() {
|
||||
// @@protoc_insertion_point(field_add:opencv_tensorflow.TensorShapeProto.dim)
|
||||
return dim_.Add();
|
||||
}
|
||||
inline ::google::protobuf::RepeatedPtrField< ::opencv_tensorflow::TensorShapeProto_Dim >*
|
||||
TensorShapeProto::mutable_dim() {
|
||||
// @@protoc_insertion_point(field_mutable_list:opencv_tensorflow.TensorShapeProto.dim)
|
||||
return &dim_;
|
||||
}
|
||||
inline const ::google::protobuf::RepeatedPtrField< ::opencv_tensorflow::TensorShapeProto_Dim >&
|
||||
TensorShapeProto::dim() const {
|
||||
// @@protoc_insertion_point(field_list:opencv_tensorflow.TensorShapeProto.dim)
|
||||
return dim_;
|
||||
}
|
||||
|
||||
// bool unknown_rank = 3;
|
||||
inline void TensorShapeProto::clear_unknown_rank() {
|
||||
unknown_rank_ = false;
|
||||
}
|
||||
inline bool TensorShapeProto::unknown_rank() const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.TensorShapeProto.unknown_rank)
|
||||
return unknown_rank_;
|
||||
}
|
||||
inline void TensorShapeProto::set_unknown_rank(bool value) {
|
||||
|
||||
unknown_rank_ = value;
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.TensorShapeProto.unknown_rank)
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif // __GNUC__
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
|
||||
// @@protoc_insertion_point(namespace_scope)
|
||||
|
||||
} // namespace opencv_tensorflow
|
||||
|
||||
// @@protoc_insertion_point(global_scope)
|
||||
|
||||
#endif // PROTOBUF_tensor_5fshape_2eproto__INCLUDED
|
||||
144
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/types.pb.cc
vendored
Normal file
144
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/types.pb.cc
vendored
Normal file
@@ -0,0 +1,144 @@
|
||||
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
// source: types.proto
|
||||
|
||||
#include "types.pb.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include <google/protobuf/stubs/common.h>
|
||||
#include <google/protobuf/stubs/port.h>
|
||||
#include <google/protobuf/stubs/once.h>
|
||||
#include <google/protobuf/io/coded_stream.h>
|
||||
#include <google/protobuf/wire_format_lite_inl.h>
|
||||
#include <google/protobuf/descriptor.h>
|
||||
#include <google/protobuf/generated_message_reflection.h>
|
||||
#include <google/protobuf/reflection_ops.h>
|
||||
#include <google/protobuf/wire_format.h>
|
||||
// This is a temporary google only hack
|
||||
#ifdef GOOGLE_PROTOBUF_ENFORCE_UNIQUENESS
|
||||
#include "third_party/protobuf/version.h"
|
||||
#endif
|
||||
// @@protoc_insertion_point(includes)
|
||||
namespace opencv_tensorflow {
|
||||
} // namespace opencv_tensorflow
|
||||
namespace protobuf_types_2eproto {
|
||||
const ::google::protobuf::EnumDescriptor* file_level_enum_descriptors[1];
|
||||
const ::google::protobuf::uint32 TableStruct::offsets[1] = {};
|
||||
static const ::google::protobuf::internal::MigrationSchema* schemas = NULL;
|
||||
static const ::google::protobuf::Message* const* file_default_instances = NULL;
|
||||
|
||||
void protobuf_AssignDescriptors() {
|
||||
AddDescriptors();
|
||||
::google::protobuf::MessageFactory* factory = NULL;
|
||||
AssignDescriptors(
|
||||
"types.proto", schemas, file_default_instances, TableStruct::offsets, factory,
|
||||
NULL, file_level_enum_descriptors, NULL);
|
||||
}
|
||||
|
||||
void protobuf_AssignDescriptorsOnce() {
|
||||
static GOOGLE_PROTOBUF_DECLARE_ONCE(once);
|
||||
::google::protobuf::GoogleOnceInit(&once, &protobuf_AssignDescriptors);
|
||||
}
|
||||
|
||||
void protobuf_RegisterTypes(const ::std::string&) GOOGLE_PROTOBUF_ATTRIBUTE_COLD;
|
||||
void protobuf_RegisterTypes(const ::std::string&) {
|
||||
protobuf_AssignDescriptorsOnce();
|
||||
}
|
||||
|
||||
void AddDescriptorsImpl() {
|
||||
InitDefaults();
|
||||
static const char descriptor[] GOOGLE_PROTOBUF_ATTRIBUTE_SECTION_VARIABLE(protodesc_cold) = {
|
||||
"\n\013types.proto\022\021opencv_tensorflow*\234\005\n\010Dat"
|
||||
"aType\022\016\n\nDT_INVALID\020\000\022\014\n\010DT_FLOAT\020\001\022\r\n\tD"
|
||||
"T_DOUBLE\020\002\022\014\n\010DT_INT32\020\003\022\014\n\010DT_UINT8\020\004\022\014"
|
||||
"\n\010DT_INT16\020\005\022\013\n\007DT_INT8\020\006\022\r\n\tDT_STRING\020\007"
|
||||
"\022\020\n\014DT_COMPLEX64\020\010\022\014\n\010DT_INT64\020\t\022\013\n\007DT_B"
|
||||
"OOL\020\n\022\014\n\010DT_QINT8\020\013\022\r\n\tDT_QUINT8\020\014\022\r\n\tDT"
|
||||
"_QINT32\020\r\022\017\n\013DT_BFLOAT16\020\016\022\r\n\tDT_QINT16\020"
|
||||
"\017\022\016\n\nDT_QUINT16\020\020\022\r\n\tDT_UINT16\020\021\022\021\n\rDT_C"
|
||||
"OMPLEX128\020\022\022\013\n\007DT_HALF\020\023\022\020\n\014DT_FLOAT_REF"
|
||||
"\020e\022\021\n\rDT_DOUBLE_REF\020f\022\020\n\014DT_INT32_REF\020g\022"
|
||||
"\020\n\014DT_UINT8_REF\020h\022\020\n\014DT_INT16_REF\020i\022\017\n\013D"
|
||||
"T_INT8_REF\020j\022\021\n\rDT_STRING_REF\020k\022\024\n\020DT_CO"
|
||||
"MPLEX64_REF\020l\022\020\n\014DT_INT64_REF\020m\022\017\n\013DT_BO"
|
||||
"OL_REF\020n\022\020\n\014DT_QINT8_REF\020o\022\021\n\rDT_QUINT8_"
|
||||
"REF\020p\022\021\n\rDT_QINT32_REF\020q\022\023\n\017DT_BFLOAT16_"
|
||||
"REF\020r\022\021\n\rDT_QINT16_REF\020s\022\022\n\016DT_QUINT16_R"
|
||||
"EF\020t\022\021\n\rDT_UINT16_REF\020u\022\025\n\021DT_COMPLEX128"
|
||||
"_REF\020v\022\017\n\013DT_HALF_REF\020wB,\n\030org.tensorflo"
|
||||
"w.frameworkB\013TypesProtosP\001\370\001\001b\006proto3"
|
||||
};
|
||||
::google::protobuf::DescriptorPool::InternalAddGeneratedFile(
|
||||
descriptor, 757);
|
||||
::google::protobuf::MessageFactory::InternalRegisterGeneratedFile(
|
||||
"types.proto", &protobuf_RegisterTypes);
|
||||
}
|
||||
|
||||
void AddDescriptors() {
|
||||
static GOOGLE_PROTOBUF_DECLARE_ONCE(once);
|
||||
::google::protobuf::GoogleOnceInit(&once, &AddDescriptorsImpl);
|
||||
}
|
||||
// Force AddDescriptors() to be called at dynamic initialization time.
|
||||
struct StaticDescriptorInitializer {
|
||||
StaticDescriptorInitializer() {
|
||||
AddDescriptors();
|
||||
}
|
||||
} static_descriptor_initializer;
|
||||
} // namespace protobuf_types_2eproto
|
||||
namespace opencv_tensorflow {
|
||||
const ::google::protobuf::EnumDescriptor* DataType_descriptor() {
|
||||
protobuf_types_2eproto::protobuf_AssignDescriptorsOnce();
|
||||
return protobuf_types_2eproto::file_level_enum_descriptors[0];
|
||||
}
|
||||
bool DataType_IsValid(int value) {
|
||||
switch (value) {
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
case 8:
|
||||
case 9:
|
||||
case 10:
|
||||
case 11:
|
||||
case 12:
|
||||
case 13:
|
||||
case 14:
|
||||
case 15:
|
||||
case 16:
|
||||
case 17:
|
||||
case 18:
|
||||
case 19:
|
||||
case 101:
|
||||
case 102:
|
||||
case 103:
|
||||
case 104:
|
||||
case 105:
|
||||
case 106:
|
||||
case 107:
|
||||
case 108:
|
||||
case 109:
|
||||
case 110:
|
||||
case 111:
|
||||
case 112:
|
||||
case 113:
|
||||
case 114:
|
||||
case 115:
|
||||
case 116:
|
||||
case 117:
|
||||
case 118:
|
||||
case 119:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// @@protoc_insertion_point(namespace_scope)
|
||||
} // namespace opencv_tensorflow
|
||||
|
||||
// @@protoc_insertion_point(global_scope)
|
||||
143
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/types.pb.h
vendored
Normal file
143
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/types.pb.h
vendored
Normal file
@@ -0,0 +1,143 @@
|
||||
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
// source: types.proto
|
||||
|
||||
#ifndef PROTOBUF_types_2eproto__INCLUDED
|
||||
#define PROTOBUF_types_2eproto__INCLUDED
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <google/protobuf/stubs/common.h>
|
||||
|
||||
#if GOOGLE_PROTOBUF_VERSION < 3005000
|
||||
#error This file was generated by a newer version of protoc which is
|
||||
#error incompatible with your Protocol Buffer headers. Please update
|
||||
#error your headers.
|
||||
#endif
|
||||
#if 3005001 < GOOGLE_PROTOBUF_MIN_PROTOC_VERSION
|
||||
#error This file was generated by an older version of protoc which is
|
||||
#error incompatible with your Protocol Buffer headers. Please
|
||||
#error regenerate this file with a newer version of protoc.
|
||||
#endif
|
||||
|
||||
#include <google/protobuf/io/coded_stream.h>
|
||||
#include <google/protobuf/arena.h>
|
||||
#include <google/protobuf/arenastring.h>
|
||||
#include <google/protobuf/generated_message_table_driven.h>
|
||||
#include <google/protobuf/generated_message_util.h>
|
||||
#include <google/protobuf/metadata.h>
|
||||
#include <google/protobuf/repeated_field.h> // IWYU pragma: export
|
||||
#include <google/protobuf/extension_set.h> // IWYU pragma: export
|
||||
#include <google/protobuf/generated_enum_reflection.h>
|
||||
// @@protoc_insertion_point(includes)
|
||||
|
||||
namespace protobuf_types_2eproto {
|
||||
// Internal implementation detail -- do not use these members.
|
||||
struct TableStruct {
|
||||
static const ::google::protobuf::internal::ParseTableField entries[];
|
||||
static const ::google::protobuf::internal::AuxillaryParseTableField aux[];
|
||||
static const ::google::protobuf::internal::ParseTable schema[1];
|
||||
static const ::google::protobuf::internal::FieldMetadata field_metadata[];
|
||||
static const ::google::protobuf::internal::SerializationTable serialization_table[];
|
||||
static const ::google::protobuf::uint32 offsets[];
|
||||
};
|
||||
void AddDescriptors();
|
||||
inline void InitDefaults() {
|
||||
}
|
||||
} // namespace protobuf_types_2eproto
|
||||
namespace opencv_tensorflow {
|
||||
} // namespace opencv_tensorflow
|
||||
namespace opencv_tensorflow {
|
||||
|
||||
enum DataType {
|
||||
DT_INVALID = 0,
|
||||
DT_FLOAT = 1,
|
||||
DT_DOUBLE = 2,
|
||||
DT_INT32 = 3,
|
||||
DT_UINT8 = 4,
|
||||
DT_INT16 = 5,
|
||||
DT_INT8 = 6,
|
||||
DT_STRING = 7,
|
||||
DT_COMPLEX64 = 8,
|
||||
DT_INT64 = 9,
|
||||
DT_BOOL = 10,
|
||||
DT_QINT8 = 11,
|
||||
DT_QUINT8 = 12,
|
||||
DT_QINT32 = 13,
|
||||
DT_BFLOAT16 = 14,
|
||||
DT_QINT16 = 15,
|
||||
DT_QUINT16 = 16,
|
||||
DT_UINT16 = 17,
|
||||
DT_COMPLEX128 = 18,
|
||||
DT_HALF = 19,
|
||||
DT_FLOAT_REF = 101,
|
||||
DT_DOUBLE_REF = 102,
|
||||
DT_INT32_REF = 103,
|
||||
DT_UINT8_REF = 104,
|
||||
DT_INT16_REF = 105,
|
||||
DT_INT8_REF = 106,
|
||||
DT_STRING_REF = 107,
|
||||
DT_COMPLEX64_REF = 108,
|
||||
DT_INT64_REF = 109,
|
||||
DT_BOOL_REF = 110,
|
||||
DT_QINT8_REF = 111,
|
||||
DT_QUINT8_REF = 112,
|
||||
DT_QINT32_REF = 113,
|
||||
DT_BFLOAT16_REF = 114,
|
||||
DT_QINT16_REF = 115,
|
||||
DT_QUINT16_REF = 116,
|
||||
DT_UINT16_REF = 117,
|
||||
DT_COMPLEX128_REF = 118,
|
||||
DT_HALF_REF = 119,
|
||||
DataType_INT_MIN_SENTINEL_DO_NOT_USE_ = ::google::protobuf::kint32min,
|
||||
DataType_INT_MAX_SENTINEL_DO_NOT_USE_ = ::google::protobuf::kint32max
|
||||
};
|
||||
bool DataType_IsValid(int value);
|
||||
const DataType DataType_MIN = DT_INVALID;
|
||||
const DataType DataType_MAX = DT_HALF_REF;
|
||||
const int DataType_ARRAYSIZE = DataType_MAX + 1;
|
||||
|
||||
const ::google::protobuf::EnumDescriptor* DataType_descriptor();
|
||||
inline const ::std::string& DataType_Name(DataType value) {
|
||||
return ::google::protobuf::internal::NameOfEnum(
|
||||
DataType_descriptor(), value);
|
||||
}
|
||||
inline bool DataType_Parse(
|
||||
const ::std::string& name, DataType* value) {
|
||||
return ::google::protobuf::internal::ParseNamedEnum<DataType>(
|
||||
DataType_descriptor(), name, value);
|
||||
}
|
||||
// ===================================================================
|
||||
|
||||
|
||||
// ===================================================================
|
||||
|
||||
|
||||
// ===================================================================
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
|
||||
#endif // __GNUC__
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif // __GNUC__
|
||||
|
||||
// @@protoc_insertion_point(namespace_scope)
|
||||
|
||||
} // namespace opencv_tensorflow
|
||||
|
||||
namespace google {
|
||||
namespace protobuf {
|
||||
|
||||
template <> struct is_proto_enum< ::opencv_tensorflow::DataType> : ::google::protobuf::internal::true_type {};
|
||||
template <>
|
||||
inline const EnumDescriptor* GetEnumDescriptor< ::opencv_tensorflow::DataType>() {
|
||||
return ::opencv_tensorflow::DataType_descriptor();
|
||||
}
|
||||
|
||||
} // namespace protobuf
|
||||
} // namespace google
|
||||
|
||||
// @@protoc_insertion_point(global_scope)
|
||||
|
||||
#endif // PROTOBUF_types_2eproto__INCLUDED
|
||||
492
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/versions.pb.cc
vendored
Normal file
492
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/versions.pb.cc
vendored
Normal file
@@ -0,0 +1,492 @@
|
||||
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
// source: versions.proto
|
||||
|
||||
#include "versions.pb.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include <google/protobuf/stubs/common.h>
|
||||
#include <google/protobuf/stubs/port.h>
|
||||
#include <google/protobuf/stubs/once.h>
|
||||
#include <google/protobuf/io/coded_stream.h>
|
||||
#include <google/protobuf/wire_format_lite_inl.h>
|
||||
#include <google/protobuf/descriptor.h>
|
||||
#include <google/protobuf/generated_message_reflection.h>
|
||||
#include <google/protobuf/reflection_ops.h>
|
||||
#include <google/protobuf/wire_format.h>
|
||||
// This is a temporary google only hack
|
||||
#ifdef GOOGLE_PROTOBUF_ENFORCE_UNIQUENESS
|
||||
#include "third_party/protobuf/version.h"
|
||||
#endif
|
||||
// @@protoc_insertion_point(includes)
|
||||
namespace opencv_tensorflow {
|
||||
class VersionDefDefaultTypeInternal {
|
||||
public:
|
||||
::google::protobuf::internal::ExplicitlyConstructed<VersionDef>
|
||||
_instance;
|
||||
} _VersionDef_default_instance_;
|
||||
} // namespace opencv_tensorflow
|
||||
namespace protobuf_versions_2eproto {
|
||||
void InitDefaultsVersionDefImpl() {
|
||||
GOOGLE_PROTOBUF_VERIFY_VERSION;
|
||||
|
||||
#ifdef GOOGLE_PROTOBUF_ENFORCE_UNIQUENESS
|
||||
::google::protobuf::internal::InitProtobufDefaultsForceUnique();
|
||||
#else
|
||||
::google::protobuf::internal::InitProtobufDefaults();
|
||||
#endif // GOOGLE_PROTOBUF_ENFORCE_UNIQUENESS
|
||||
{
|
||||
void* ptr = &::opencv_tensorflow::_VersionDef_default_instance_;
|
||||
new (ptr) ::opencv_tensorflow::VersionDef();
|
||||
::google::protobuf::internal::OnShutdownDestroyMessage(ptr);
|
||||
}
|
||||
::opencv_tensorflow::VersionDef::InitAsDefaultInstance();
|
||||
}
|
||||
|
||||
void InitDefaultsVersionDef() {
|
||||
static GOOGLE_PROTOBUF_DECLARE_ONCE(once);
|
||||
::google::protobuf::GoogleOnceInit(&once, &InitDefaultsVersionDefImpl);
|
||||
}
|
||||
|
||||
::google::protobuf::Metadata file_level_metadata[1];
|
||||
|
||||
const ::google::protobuf::uint32 TableStruct::offsets[] GOOGLE_PROTOBUF_ATTRIBUTE_SECTION_VARIABLE(protodesc_cold) = {
|
||||
~0u, // no _has_bits_
|
||||
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::opencv_tensorflow::VersionDef, _internal_metadata_),
|
||||
~0u, // no _extensions_
|
||||
~0u, // no _oneof_case_
|
||||
~0u, // no _weak_field_map_
|
||||
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::opencv_tensorflow::VersionDef, producer_),
|
||||
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::opencv_tensorflow::VersionDef, min_consumer_),
|
||||
GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::opencv_tensorflow::VersionDef, bad_consumers_),
|
||||
};
|
||||
static const ::google::protobuf::internal::MigrationSchema schemas[] GOOGLE_PROTOBUF_ATTRIBUTE_SECTION_VARIABLE(protodesc_cold) = {
|
||||
{ 0, -1, sizeof(::opencv_tensorflow::VersionDef)},
|
||||
};
|
||||
|
||||
static ::google::protobuf::Message const * const file_default_instances[] = {
|
||||
reinterpret_cast<const ::google::protobuf::Message*>(&::opencv_tensorflow::_VersionDef_default_instance_),
|
||||
};
|
||||
|
||||
void protobuf_AssignDescriptors() {
|
||||
AddDescriptors();
|
||||
::google::protobuf::MessageFactory* factory = NULL;
|
||||
AssignDescriptors(
|
||||
"versions.proto", schemas, file_default_instances, TableStruct::offsets, factory,
|
||||
file_level_metadata, NULL, NULL);
|
||||
}
|
||||
|
||||
void protobuf_AssignDescriptorsOnce() {
|
||||
static GOOGLE_PROTOBUF_DECLARE_ONCE(once);
|
||||
::google::protobuf::GoogleOnceInit(&once, &protobuf_AssignDescriptors);
|
||||
}
|
||||
|
||||
void protobuf_RegisterTypes(const ::std::string&) GOOGLE_PROTOBUF_ATTRIBUTE_COLD;
|
||||
void protobuf_RegisterTypes(const ::std::string&) {
|
||||
protobuf_AssignDescriptorsOnce();
|
||||
::google::protobuf::internal::RegisterAllTypes(file_level_metadata, 1);
|
||||
}
|
||||
|
||||
void AddDescriptorsImpl() {
|
||||
InitDefaults();
|
||||
static const char descriptor[] GOOGLE_PROTOBUF_ATTRIBUTE_SECTION_VARIABLE(protodesc_cold) = {
|
||||
"\n\016versions.proto\022\021opencv_tensorflow\"K\n\nV"
|
||||
"ersionDef\022\020\n\010producer\030\001 \001(\005\022\024\n\014min_consu"
|
||||
"mer\030\002 \001(\005\022\025\n\rbad_consumers\030\003 \003(\005B/\n\030org."
|
||||
"tensorflow.frameworkB\016VersionsProtosP\001\370\001"
|
||||
"\001b\006proto3"
|
||||
};
|
||||
::google::protobuf::DescriptorPool::InternalAddGeneratedFile(
|
||||
descriptor, 169);
|
||||
::google::protobuf::MessageFactory::InternalRegisterGeneratedFile(
|
||||
"versions.proto", &protobuf_RegisterTypes);
|
||||
}
|
||||
|
||||
void AddDescriptors() {
|
||||
static GOOGLE_PROTOBUF_DECLARE_ONCE(once);
|
||||
::google::protobuf::GoogleOnceInit(&once, &AddDescriptorsImpl);
|
||||
}
|
||||
// Force AddDescriptors() to be called at dynamic initialization time.
|
||||
struct StaticDescriptorInitializer {
|
||||
StaticDescriptorInitializer() {
|
||||
AddDescriptors();
|
||||
}
|
||||
} static_descriptor_initializer;
|
||||
} // namespace protobuf_versions_2eproto
|
||||
namespace opencv_tensorflow {
|
||||
|
||||
// ===================================================================
|
||||
|
||||
void VersionDef::InitAsDefaultInstance() {
|
||||
}
|
||||
#if !defined(_MSC_VER) || _MSC_VER >= 1900
|
||||
const int VersionDef::kProducerFieldNumber;
|
||||
const int VersionDef::kMinConsumerFieldNumber;
|
||||
const int VersionDef::kBadConsumersFieldNumber;
|
||||
#endif // !defined(_MSC_VER) || _MSC_VER >= 1900
|
||||
|
||||
VersionDef::VersionDef()
|
||||
: ::google::protobuf::Message(), _internal_metadata_(NULL) {
|
||||
if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
|
||||
::protobuf_versions_2eproto::InitDefaultsVersionDef();
|
||||
}
|
||||
SharedCtor();
|
||||
// @@protoc_insertion_point(constructor:opencv_tensorflow.VersionDef)
|
||||
}
|
||||
VersionDef::VersionDef(::google::protobuf::Arena* arena)
|
||||
: ::google::protobuf::Message(),
|
||||
_internal_metadata_(arena),
|
||||
bad_consumers_(arena) {
|
||||
::protobuf_versions_2eproto::InitDefaultsVersionDef();
|
||||
SharedCtor();
|
||||
RegisterArenaDtor(arena);
|
||||
// @@protoc_insertion_point(arena_constructor:opencv_tensorflow.VersionDef)
|
||||
}
|
||||
VersionDef::VersionDef(const VersionDef& from)
|
||||
: ::google::protobuf::Message(),
|
||||
_internal_metadata_(NULL),
|
||||
bad_consumers_(from.bad_consumers_),
|
||||
_cached_size_(0) {
|
||||
_internal_metadata_.MergeFrom(from._internal_metadata_);
|
||||
::memcpy(&producer_, &from.producer_,
|
||||
static_cast<size_t>(reinterpret_cast<char*>(&min_consumer_) -
|
||||
reinterpret_cast<char*>(&producer_)) + sizeof(min_consumer_));
|
||||
// @@protoc_insertion_point(copy_constructor:opencv_tensorflow.VersionDef)
|
||||
}
|
||||
|
||||
void VersionDef::SharedCtor() {
|
||||
::memset(&producer_, 0, static_cast<size_t>(
|
||||
reinterpret_cast<char*>(&min_consumer_) -
|
||||
reinterpret_cast<char*>(&producer_)) + sizeof(min_consumer_));
|
||||
_cached_size_ = 0;
|
||||
}
|
||||
|
||||
VersionDef::~VersionDef() {
|
||||
// @@protoc_insertion_point(destructor:opencv_tensorflow.VersionDef)
|
||||
SharedDtor();
|
||||
}
|
||||
|
||||
void VersionDef::SharedDtor() {
|
||||
GOOGLE_DCHECK(GetArenaNoVirtual() == NULL);
|
||||
}
|
||||
|
||||
void VersionDef::ArenaDtor(void* object) {
|
||||
VersionDef* _this = reinterpret_cast< VersionDef* >(object);
|
||||
(void)_this;
|
||||
}
|
||||
void VersionDef::RegisterArenaDtor(::google::protobuf::Arena* arena) {
|
||||
}
|
||||
void VersionDef::SetCachedSize(int size) const {
|
||||
GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
|
||||
_cached_size_ = size;
|
||||
GOOGLE_SAFE_CONCURRENT_WRITES_END();
|
||||
}
|
||||
const ::google::protobuf::Descriptor* VersionDef::descriptor() {
|
||||
::protobuf_versions_2eproto::protobuf_AssignDescriptorsOnce();
|
||||
return ::protobuf_versions_2eproto::file_level_metadata[kIndexInFileMessages].descriptor;
|
||||
}
|
||||
|
||||
const VersionDef& VersionDef::default_instance() {
|
||||
::protobuf_versions_2eproto::InitDefaultsVersionDef();
|
||||
return *internal_default_instance();
|
||||
}
|
||||
|
||||
VersionDef* VersionDef::New(::google::protobuf::Arena* arena) const {
|
||||
return ::google::protobuf::Arena::CreateMessage<VersionDef>(arena);
|
||||
}
|
||||
|
||||
void VersionDef::Clear() {
|
||||
// @@protoc_insertion_point(message_clear_start:opencv_tensorflow.VersionDef)
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
// Prevent compiler warnings about cached_has_bits being unused
|
||||
(void) cached_has_bits;
|
||||
|
||||
bad_consumers_.Clear();
|
||||
::memset(&producer_, 0, static_cast<size_t>(
|
||||
reinterpret_cast<char*>(&min_consumer_) -
|
||||
reinterpret_cast<char*>(&producer_)) + sizeof(min_consumer_));
|
||||
_internal_metadata_.Clear();
|
||||
}
|
||||
|
||||
bool VersionDef::MergePartialFromCodedStream(
|
||||
::google::protobuf::io::CodedInputStream* input) {
|
||||
#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
|
||||
::google::protobuf::uint32 tag;
|
||||
// @@protoc_insertion_point(parse_start:opencv_tensorflow.VersionDef)
|
||||
for (;;) {
|
||||
::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
|
||||
tag = p.first;
|
||||
if (!p.second) goto handle_unusual;
|
||||
switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
|
||||
// int32 producer = 1;
|
||||
case 1: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(8u /* 8 & 0xFF */)) {
|
||||
|
||||
DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
|
||||
::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
|
||||
input, &producer_)));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// int32 min_consumer = 2;
|
||||
case 2: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(16u /* 16 & 0xFF */)) {
|
||||
|
||||
DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
|
||||
::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
|
||||
input, &min_consumer_)));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// repeated int32 bad_consumers = 3;
|
||||
case 3: {
|
||||
if (static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(26u /* 26 & 0xFF */)) {
|
||||
DO_((::google::protobuf::internal::WireFormatLite::ReadPackedPrimitive<
|
||||
::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
|
||||
input, this->mutable_bad_consumers())));
|
||||
} else if (
|
||||
static_cast< ::google::protobuf::uint8>(tag) ==
|
||||
static_cast< ::google::protobuf::uint8>(24u /* 24 & 0xFF */)) {
|
||||
DO_((::google::protobuf::internal::WireFormatLite::ReadRepeatedPrimitiveNoInline<
|
||||
::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
|
||||
1, 26u, input, this->mutable_bad_consumers())));
|
||||
} else {
|
||||
goto handle_unusual;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
handle_unusual:
|
||||
if (tag == 0) {
|
||||
goto success;
|
||||
}
|
||||
DO_(::google::protobuf::internal::WireFormat::SkipField(
|
||||
input, tag, _internal_metadata_.mutable_unknown_fields()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
success:
|
||||
// @@protoc_insertion_point(parse_success:opencv_tensorflow.VersionDef)
|
||||
return true;
|
||||
failure:
|
||||
// @@protoc_insertion_point(parse_failure:opencv_tensorflow.VersionDef)
|
||||
return false;
|
||||
#undef DO_
|
||||
}
|
||||
|
||||
void VersionDef::SerializeWithCachedSizes(
|
||||
::google::protobuf::io::CodedOutputStream* output) const {
|
||||
// @@protoc_insertion_point(serialize_start:opencv_tensorflow.VersionDef)
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
// int32 producer = 1;
|
||||
if (this->producer() != 0) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteInt32(1, this->producer(), output);
|
||||
}
|
||||
|
||||
// int32 min_consumer = 2;
|
||||
if (this->min_consumer() != 0) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteInt32(2, this->min_consumer(), output);
|
||||
}
|
||||
|
||||
// repeated int32 bad_consumers = 3;
|
||||
if (this->bad_consumers_size() > 0) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteTag(3, ::google::protobuf::internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED, output);
|
||||
output->WriteVarint32(static_cast< ::google::protobuf::uint32>(
|
||||
_bad_consumers_cached_byte_size_));
|
||||
}
|
||||
for (int i = 0, n = this->bad_consumers_size(); i < n; i++) {
|
||||
::google::protobuf::internal::WireFormatLite::WriteInt32NoTag(
|
||||
this->bad_consumers(i), output);
|
||||
}
|
||||
|
||||
if ((_internal_metadata_.have_unknown_fields() && ::google::protobuf::internal::GetProto3PreserveUnknownsDefault())) {
|
||||
::google::protobuf::internal::WireFormat::SerializeUnknownFields(
|
||||
(::google::protobuf::internal::GetProto3PreserveUnknownsDefault() ? _internal_metadata_.unknown_fields() : _internal_metadata_.default_instance()), output);
|
||||
}
|
||||
// @@protoc_insertion_point(serialize_end:opencv_tensorflow.VersionDef)
|
||||
}
|
||||
|
||||
::google::protobuf::uint8* VersionDef::InternalSerializeWithCachedSizesToArray(
|
||||
bool deterministic, ::google::protobuf::uint8* target) const {
|
||||
(void)deterministic; // Unused
|
||||
// @@protoc_insertion_point(serialize_to_array_start:opencv_tensorflow.VersionDef)
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
// int32 producer = 1;
|
||||
if (this->producer() != 0) {
|
||||
target = ::google::protobuf::internal::WireFormatLite::WriteInt32ToArray(1, this->producer(), target);
|
||||
}
|
||||
|
||||
// int32 min_consumer = 2;
|
||||
if (this->min_consumer() != 0) {
|
||||
target = ::google::protobuf::internal::WireFormatLite::WriteInt32ToArray(2, this->min_consumer(), target);
|
||||
}
|
||||
|
||||
// repeated int32 bad_consumers = 3;
|
||||
if (this->bad_consumers_size() > 0) {
|
||||
target = ::google::protobuf::internal::WireFormatLite::WriteTagToArray(
|
||||
3,
|
||||
::google::protobuf::internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED,
|
||||
target);
|
||||
target = ::google::protobuf::io::CodedOutputStream::WriteVarint32ToArray(
|
||||
static_cast< ::google::protobuf::int32>(
|
||||
_bad_consumers_cached_byte_size_), target);
|
||||
target = ::google::protobuf::internal::WireFormatLite::
|
||||
WriteInt32NoTagToArray(this->bad_consumers_, target);
|
||||
}
|
||||
|
||||
if ((_internal_metadata_.have_unknown_fields() && ::google::protobuf::internal::GetProto3PreserveUnknownsDefault())) {
|
||||
target = ::google::protobuf::internal::WireFormat::SerializeUnknownFieldsToArray(
|
||||
(::google::protobuf::internal::GetProto3PreserveUnknownsDefault() ? _internal_metadata_.unknown_fields() : _internal_metadata_.default_instance()), target);
|
||||
}
|
||||
// @@protoc_insertion_point(serialize_to_array_end:opencv_tensorflow.VersionDef)
|
||||
return target;
|
||||
}
|
||||
|
||||
size_t VersionDef::ByteSizeLong() const {
|
||||
// @@protoc_insertion_point(message_byte_size_start:opencv_tensorflow.VersionDef)
|
||||
size_t total_size = 0;
|
||||
|
||||
if ((_internal_metadata_.have_unknown_fields() && ::google::protobuf::internal::GetProto3PreserveUnknownsDefault())) {
|
||||
total_size +=
|
||||
::google::protobuf::internal::WireFormat::ComputeUnknownFieldsSize(
|
||||
(::google::protobuf::internal::GetProto3PreserveUnknownsDefault() ? _internal_metadata_.unknown_fields() : _internal_metadata_.default_instance()));
|
||||
}
|
||||
// repeated int32 bad_consumers = 3;
|
||||
{
|
||||
size_t data_size = ::google::protobuf::internal::WireFormatLite::
|
||||
Int32Size(this->bad_consumers_);
|
||||
if (data_size > 0) {
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::Int32Size(
|
||||
static_cast< ::google::protobuf::int32>(data_size));
|
||||
}
|
||||
int cached_size = ::google::protobuf::internal::ToCachedSize(data_size);
|
||||
GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
|
||||
_bad_consumers_cached_byte_size_ = cached_size;
|
||||
GOOGLE_SAFE_CONCURRENT_WRITES_END();
|
||||
total_size += data_size;
|
||||
}
|
||||
|
||||
// int32 producer = 1;
|
||||
if (this->producer() != 0) {
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::Int32Size(
|
||||
this->producer());
|
||||
}
|
||||
|
||||
// int32 min_consumer = 2;
|
||||
if (this->min_consumer() != 0) {
|
||||
total_size += 1 +
|
||||
::google::protobuf::internal::WireFormatLite::Int32Size(
|
||||
this->min_consumer());
|
||||
}
|
||||
|
||||
int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
|
||||
GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
|
||||
_cached_size_ = cached_size;
|
||||
GOOGLE_SAFE_CONCURRENT_WRITES_END();
|
||||
return total_size;
|
||||
}
|
||||
|
||||
void VersionDef::MergeFrom(const ::google::protobuf::Message& from) {
|
||||
// @@protoc_insertion_point(generalized_merge_from_start:opencv_tensorflow.VersionDef)
|
||||
GOOGLE_DCHECK_NE(&from, this);
|
||||
const VersionDef* source =
|
||||
::google::protobuf::internal::DynamicCastToGenerated<const VersionDef>(
|
||||
&from);
|
||||
if (source == NULL) {
|
||||
// @@protoc_insertion_point(generalized_merge_from_cast_fail:opencv_tensorflow.VersionDef)
|
||||
::google::protobuf::internal::ReflectionOps::Merge(from, this);
|
||||
} else {
|
||||
// @@protoc_insertion_point(generalized_merge_from_cast_success:opencv_tensorflow.VersionDef)
|
||||
MergeFrom(*source);
|
||||
}
|
||||
}
|
||||
|
||||
void VersionDef::MergeFrom(const VersionDef& from) {
|
||||
// @@protoc_insertion_point(class_specific_merge_from_start:opencv_tensorflow.VersionDef)
|
||||
GOOGLE_DCHECK_NE(&from, this);
|
||||
_internal_metadata_.MergeFrom(from._internal_metadata_);
|
||||
::google::protobuf::uint32 cached_has_bits = 0;
|
||||
(void) cached_has_bits;
|
||||
|
||||
bad_consumers_.MergeFrom(from.bad_consumers_);
|
||||
if (from.producer() != 0) {
|
||||
set_producer(from.producer());
|
||||
}
|
||||
if (from.min_consumer() != 0) {
|
||||
set_min_consumer(from.min_consumer());
|
||||
}
|
||||
}
|
||||
|
||||
void VersionDef::CopyFrom(const ::google::protobuf::Message& from) {
|
||||
// @@protoc_insertion_point(generalized_copy_from_start:opencv_tensorflow.VersionDef)
|
||||
if (&from == this) return;
|
||||
Clear();
|
||||
MergeFrom(from);
|
||||
}
|
||||
|
||||
void VersionDef::CopyFrom(const VersionDef& from) {
|
||||
// @@protoc_insertion_point(class_specific_copy_from_start:opencv_tensorflow.VersionDef)
|
||||
if (&from == this) return;
|
||||
Clear();
|
||||
MergeFrom(from);
|
||||
}
|
||||
|
||||
bool VersionDef::IsInitialized() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
void VersionDef::Swap(VersionDef* other) {
|
||||
if (other == this) return;
|
||||
if (GetArenaNoVirtual() == other->GetArenaNoVirtual()) {
|
||||
InternalSwap(other);
|
||||
} else {
|
||||
VersionDef* temp = New(GetArenaNoVirtual());
|
||||
temp->MergeFrom(*other);
|
||||
other->CopyFrom(*this);
|
||||
InternalSwap(temp);
|
||||
if (GetArenaNoVirtual() == NULL) {
|
||||
delete temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
void VersionDef::UnsafeArenaSwap(VersionDef* other) {
|
||||
if (other == this) return;
|
||||
GOOGLE_DCHECK(GetArenaNoVirtual() == other->GetArenaNoVirtual());
|
||||
InternalSwap(other);
|
||||
}
|
||||
void VersionDef::InternalSwap(VersionDef* other) {
|
||||
using std::swap;
|
||||
bad_consumers_.InternalSwap(&other->bad_consumers_);
|
||||
swap(producer_, other->producer_);
|
||||
swap(min_consumer_, other->min_consumer_);
|
||||
_internal_metadata_.Swap(&other->_internal_metadata_);
|
||||
swap(_cached_size_, other->_cached_size_);
|
||||
}
|
||||
|
||||
::google::protobuf::Metadata VersionDef::GetMetadata() const {
|
||||
protobuf_versions_2eproto::protobuf_AssignDescriptorsOnce();
|
||||
return ::protobuf_versions_2eproto::file_level_metadata[kIndexInFileMessages];
|
||||
}
|
||||
|
||||
|
||||
// @@protoc_insertion_point(namespace_scope)
|
||||
} // namespace opencv_tensorflow
|
||||
|
||||
// @@protoc_insertion_point(global_scope)
|
||||
272
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/versions.pb.h
vendored
Normal file
272
3rdparty/opencv-4.5.4/modules/dnn/misc/tensorflow/versions.pb.h
vendored
Normal file
@@ -0,0 +1,272 @@
|
||||
// Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
// source: versions.proto
|
||||
|
||||
#ifndef PROTOBUF_versions_2eproto__INCLUDED
|
||||
#define PROTOBUF_versions_2eproto__INCLUDED
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <google/protobuf/stubs/common.h>
|
||||
|
||||
#if GOOGLE_PROTOBUF_VERSION < 3005000
|
||||
#error This file was generated by a newer version of protoc which is
|
||||
#error incompatible with your Protocol Buffer headers. Please update
|
||||
#error your headers.
|
||||
#endif
|
||||
#if 3005001 < GOOGLE_PROTOBUF_MIN_PROTOC_VERSION
|
||||
#error This file was generated by an older version of protoc which is
|
||||
#error incompatible with your Protocol Buffer headers. Please
|
||||
#error regenerate this file with a newer version of protoc.
|
||||
#endif
|
||||
|
||||
#include <google/protobuf/io/coded_stream.h>
|
||||
#include <google/protobuf/arena.h>
|
||||
#include <google/protobuf/arenastring.h>
|
||||
#include <google/protobuf/generated_message_table_driven.h>
|
||||
#include <google/protobuf/generated_message_util.h>
|
||||
#include <google/protobuf/metadata.h>
|
||||
#include <google/protobuf/message.h>
|
||||
#include <google/protobuf/repeated_field.h> // IWYU pragma: export
|
||||
#include <google/protobuf/extension_set.h> // IWYU pragma: export
|
||||
#include <google/protobuf/unknown_field_set.h>
|
||||
// @@protoc_insertion_point(includes)
|
||||
|
||||
namespace protobuf_versions_2eproto {
|
||||
// Internal implementation detail -- do not use these members.
|
||||
struct TableStruct {
|
||||
static const ::google::protobuf::internal::ParseTableField entries[];
|
||||
static const ::google::protobuf::internal::AuxillaryParseTableField aux[];
|
||||
static const ::google::protobuf::internal::ParseTable schema[1];
|
||||
static const ::google::protobuf::internal::FieldMetadata field_metadata[];
|
||||
static const ::google::protobuf::internal::SerializationTable serialization_table[];
|
||||
static const ::google::protobuf::uint32 offsets[];
|
||||
};
|
||||
void AddDescriptors();
|
||||
void InitDefaultsVersionDefImpl();
|
||||
void InitDefaultsVersionDef();
|
||||
inline void InitDefaults() {
|
||||
InitDefaultsVersionDef();
|
||||
}
|
||||
} // namespace protobuf_versions_2eproto
|
||||
namespace opencv_tensorflow {
|
||||
class VersionDef;
|
||||
class VersionDefDefaultTypeInternal;
|
||||
extern VersionDefDefaultTypeInternal _VersionDef_default_instance_;
|
||||
} // namespace opencv_tensorflow
|
||||
namespace opencv_tensorflow {
|
||||
|
||||
// ===================================================================
|
||||
|
||||
class VersionDef : public ::google::protobuf::Message /* @@protoc_insertion_point(class_definition:opencv_tensorflow.VersionDef) */ {
|
||||
public:
|
||||
VersionDef();
|
||||
virtual ~VersionDef();
|
||||
|
||||
VersionDef(const VersionDef& from);
|
||||
|
||||
inline VersionDef& operator=(const VersionDef& from) {
|
||||
CopyFrom(from);
|
||||
return *this;
|
||||
}
|
||||
#if LANG_CXX11
|
||||
VersionDef(VersionDef&& from) noexcept
|
||||
: VersionDef() {
|
||||
*this = ::std::move(from);
|
||||
}
|
||||
|
||||
inline VersionDef& operator=(VersionDef&& from) noexcept {
|
||||
if (GetArenaNoVirtual() == from.GetArenaNoVirtual()) {
|
||||
if (this != &from) InternalSwap(&from);
|
||||
} else {
|
||||
CopyFrom(from);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
inline ::google::protobuf::Arena* GetArena() const PROTOBUF_FINAL {
|
||||
return GetArenaNoVirtual();
|
||||
}
|
||||
inline void* GetMaybeArenaPointer() const PROTOBUF_FINAL {
|
||||
return MaybeArenaPtr();
|
||||
}
|
||||
static const ::google::protobuf::Descriptor* descriptor();
|
||||
static const VersionDef& default_instance();
|
||||
|
||||
static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY
|
||||
static inline const VersionDef* internal_default_instance() {
|
||||
return reinterpret_cast<const VersionDef*>(
|
||||
&_VersionDef_default_instance_);
|
||||
}
|
||||
static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
|
||||
0;
|
||||
|
||||
void UnsafeArenaSwap(VersionDef* other);
|
||||
void Swap(VersionDef* other);
|
||||
friend void swap(VersionDef& a, VersionDef& b) {
|
||||
a.Swap(&b);
|
||||
}
|
||||
|
||||
// implements Message ----------------------------------------------
|
||||
|
||||
inline VersionDef* New() const PROTOBUF_FINAL { return New(NULL); }
|
||||
|
||||
VersionDef* New(::google::protobuf::Arena* arena) const PROTOBUF_FINAL;
|
||||
void CopyFrom(const ::google::protobuf::Message& from) PROTOBUF_FINAL;
|
||||
void MergeFrom(const ::google::protobuf::Message& from) PROTOBUF_FINAL;
|
||||
void CopyFrom(const VersionDef& from);
|
||||
void MergeFrom(const VersionDef& from);
|
||||
void Clear() PROTOBUF_FINAL;
|
||||
bool IsInitialized() const PROTOBUF_FINAL;
|
||||
|
||||
size_t ByteSizeLong() const PROTOBUF_FINAL;
|
||||
bool MergePartialFromCodedStream(
|
||||
::google::protobuf::io::CodedInputStream* input) PROTOBUF_FINAL;
|
||||
void SerializeWithCachedSizes(
|
||||
::google::protobuf::io::CodedOutputStream* output) const PROTOBUF_FINAL;
|
||||
::google::protobuf::uint8* InternalSerializeWithCachedSizesToArray(
|
||||
bool deterministic, ::google::protobuf::uint8* target) const PROTOBUF_FINAL;
|
||||
int GetCachedSize() const PROTOBUF_FINAL { return _cached_size_; }
|
||||
private:
|
||||
void SharedCtor();
|
||||
void SharedDtor();
|
||||
void SetCachedSize(int size) const PROTOBUF_FINAL;
|
||||
void InternalSwap(VersionDef* other);
|
||||
protected:
|
||||
explicit VersionDef(::google::protobuf::Arena* arena);
|
||||
private:
|
||||
static void ArenaDtor(void* object);
|
||||
inline void RegisterArenaDtor(::google::protobuf::Arena* arena);
|
||||
private:
|
||||
inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
|
||||
return _internal_metadata_.arena();
|
||||
}
|
||||
inline void* MaybeArenaPtr() const {
|
||||
return _internal_metadata_.raw_arena_ptr();
|
||||
}
|
||||
public:
|
||||
|
||||
::google::protobuf::Metadata GetMetadata() const PROTOBUF_FINAL;
|
||||
|
||||
// nested types ----------------------------------------------------
|
||||
|
||||
// accessors -------------------------------------------------------
|
||||
|
||||
// repeated int32 bad_consumers = 3;
|
||||
int bad_consumers_size() const;
|
||||
void clear_bad_consumers();
|
||||
static const int kBadConsumersFieldNumber = 3;
|
||||
::google::protobuf::int32 bad_consumers(int index) const;
|
||||
void set_bad_consumers(int index, ::google::protobuf::int32 value);
|
||||
void add_bad_consumers(::google::protobuf::int32 value);
|
||||
const ::google::protobuf::RepeatedField< ::google::protobuf::int32 >&
|
||||
bad_consumers() const;
|
||||
::google::protobuf::RepeatedField< ::google::protobuf::int32 >*
|
||||
mutable_bad_consumers();
|
||||
|
||||
// int32 producer = 1;
|
||||
void clear_producer();
|
||||
static const int kProducerFieldNumber = 1;
|
||||
::google::protobuf::int32 producer() const;
|
||||
void set_producer(::google::protobuf::int32 value);
|
||||
|
||||
// int32 min_consumer = 2;
|
||||
void clear_min_consumer();
|
||||
static const int kMinConsumerFieldNumber = 2;
|
||||
::google::protobuf::int32 min_consumer() const;
|
||||
void set_min_consumer(::google::protobuf::int32 value);
|
||||
|
||||
// @@protoc_insertion_point(class_scope:opencv_tensorflow.VersionDef)
|
||||
private:
|
||||
|
||||
::google::protobuf::internal::InternalMetadataWithArena _internal_metadata_;
|
||||
template <typename T> friend class ::google::protobuf::Arena::InternalHelper;
|
||||
typedef void InternalArenaConstructable_;
|
||||
typedef void DestructorSkippable_;
|
||||
::google::protobuf::RepeatedField< ::google::protobuf::int32 > bad_consumers_;
|
||||
mutable int _bad_consumers_cached_byte_size_;
|
||||
::google::protobuf::int32 producer_;
|
||||
::google::protobuf::int32 min_consumer_;
|
||||
mutable int _cached_size_;
|
||||
friend struct ::protobuf_versions_2eproto::TableStruct;
|
||||
friend void ::protobuf_versions_2eproto::InitDefaultsVersionDefImpl();
|
||||
};
|
||||
// ===================================================================
|
||||
|
||||
|
||||
// ===================================================================
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
|
||||
#endif // __GNUC__
|
||||
// VersionDef
|
||||
|
||||
// int32 producer = 1;
|
||||
inline void VersionDef::clear_producer() {
|
||||
producer_ = 0;
|
||||
}
|
||||
inline ::google::protobuf::int32 VersionDef::producer() const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.VersionDef.producer)
|
||||
return producer_;
|
||||
}
|
||||
inline void VersionDef::set_producer(::google::protobuf::int32 value) {
|
||||
|
||||
producer_ = value;
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.VersionDef.producer)
|
||||
}
|
||||
|
||||
// int32 min_consumer = 2;
|
||||
inline void VersionDef::clear_min_consumer() {
|
||||
min_consumer_ = 0;
|
||||
}
|
||||
inline ::google::protobuf::int32 VersionDef::min_consumer() const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.VersionDef.min_consumer)
|
||||
return min_consumer_;
|
||||
}
|
||||
inline void VersionDef::set_min_consumer(::google::protobuf::int32 value) {
|
||||
|
||||
min_consumer_ = value;
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.VersionDef.min_consumer)
|
||||
}
|
||||
|
||||
// repeated int32 bad_consumers = 3;
|
||||
inline int VersionDef::bad_consumers_size() const {
|
||||
return bad_consumers_.size();
|
||||
}
|
||||
inline void VersionDef::clear_bad_consumers() {
|
||||
bad_consumers_.Clear();
|
||||
}
|
||||
inline ::google::protobuf::int32 VersionDef::bad_consumers(int index) const {
|
||||
// @@protoc_insertion_point(field_get:opencv_tensorflow.VersionDef.bad_consumers)
|
||||
return bad_consumers_.Get(index);
|
||||
}
|
||||
inline void VersionDef::set_bad_consumers(int index, ::google::protobuf::int32 value) {
|
||||
bad_consumers_.Set(index, value);
|
||||
// @@protoc_insertion_point(field_set:opencv_tensorflow.VersionDef.bad_consumers)
|
||||
}
|
||||
inline void VersionDef::add_bad_consumers(::google::protobuf::int32 value) {
|
||||
bad_consumers_.Add(value);
|
||||
// @@protoc_insertion_point(field_add:opencv_tensorflow.VersionDef.bad_consumers)
|
||||
}
|
||||
inline const ::google::protobuf::RepeatedField< ::google::protobuf::int32 >&
|
||||
VersionDef::bad_consumers() const {
|
||||
// @@protoc_insertion_point(field_list:opencv_tensorflow.VersionDef.bad_consumers)
|
||||
return bad_consumers_;
|
||||
}
|
||||
inline ::google::protobuf::RepeatedField< ::google::protobuf::int32 >*
|
||||
VersionDef::mutable_bad_consumers() {
|
||||
// @@protoc_insertion_point(field_mutable_list:opencv_tensorflow.VersionDef.bad_consumers)
|
||||
return &bad_consumers_;
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif // __GNUC__
|
||||
|
||||
// @@protoc_insertion_point(namespace_scope)
|
||||
|
||||
} // namespace opencv_tensorflow
|
||||
|
||||
// @@protoc_insertion_point(global_scope)
|
||||
|
||||
#endif // PROTOBUF_versions_2eproto__INCLUDED
|
||||
111
3rdparty/opencv-4.5.4/modules/dnn/perf/perf_caffe.cpp
vendored
Normal file
111
3rdparty/opencv-4.5.4/modules/dnn/perf/perf_caffe.cpp
vendored
Normal file
@@ -0,0 +1,111 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
// Recommends run this performance test via
|
||||
// ./bin/opencv_perf_dnn 2> /dev/null | grep "PERFSTAT" -A 3
|
||||
// because whole output includes Caffe's logs.
|
||||
//
|
||||
// Note: Be sure that interesting version of Caffe was linked.
|
||||
// Note: There is an impact on Halide performance. Comment this tests if you
|
||||
// want to run the last one.
|
||||
//
|
||||
// How to build Intel-Caffe with MKLDNN backend
|
||||
// ============================================
|
||||
// mkdir build && cd build
|
||||
// cmake -DCMAKE_BUILD_TYPE=Release \
|
||||
// -DUSE_MKLDNN_AS_DEFAULT_ENGINE=ON \
|
||||
// -DUSE_MKL2017_AS_DEFAULT_ENGINE=OFF \
|
||||
// -DCPU_ONLY=ON \
|
||||
// -DCMAKE_INSTALL_PREFIX=/usr/local .. && make -j8
|
||||
// sudo make install
|
||||
//
|
||||
// In case of problems with cublas_v2.h at include/caffe/util/device_alternate.hpp: add line
|
||||
// #define CPU_ONLY
|
||||
// before the first line
|
||||
// #ifdef CPU_ONLY // CPU-only Caffe.
|
||||
|
||||
#if defined(HAVE_CAFFE) || defined(HAVE_CLCAFFE)
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
#include <iostream>
|
||||
#include <caffe/caffe.hpp>
|
||||
|
||||
namespace opencv_test {
|
||||
|
||||
static caffe::Net<float>* initNet(std::string proto, std::string weights)
|
||||
{
|
||||
proto = findDataFile(proto);
|
||||
weights = findDataFile(weights, false);
|
||||
|
||||
#ifdef HAVE_CLCAFFE
|
||||
caffe::Caffe::set_mode(caffe::Caffe::GPU);
|
||||
caffe::Caffe::SetDevice(0);
|
||||
|
||||
caffe::Net<float>* net =
|
||||
new caffe::Net<float>(proto, caffe::TEST, caffe::Caffe::GetDefaultDevice());
|
||||
#else
|
||||
caffe::Caffe::set_mode(caffe::Caffe::CPU);
|
||||
|
||||
caffe::Net<float>* net = new caffe::Net<float>(proto, caffe::TEST);
|
||||
#endif
|
||||
|
||||
net->CopyTrainedLayersFrom(weights);
|
||||
|
||||
caffe::Blob<float>* input = net->input_blobs()[0];
|
||||
|
||||
CV_Assert(input->num() == 1);
|
||||
CV_Assert(input->channels() == 3);
|
||||
|
||||
Mat inputMat(input->height(), input->width(), CV_32FC3, (char*)input->cpu_data());
|
||||
randu(inputMat, 0.0f, 1.0f);
|
||||
|
||||
net->Forward();
|
||||
return net;
|
||||
}
|
||||
|
||||
PERF_TEST(AlexNet_caffe, CaffePerfTest)
|
||||
{
|
||||
caffe::Net<float>* net = initNet("dnn/bvlc_alexnet.prototxt",
|
||||
"dnn/bvlc_alexnet.caffemodel");
|
||||
TEST_CYCLE() net->Forward();
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
|
||||
PERF_TEST(GoogLeNet_caffe, CaffePerfTest)
|
||||
{
|
||||
caffe::Net<float>* net = initNet("dnn/bvlc_googlenet.prototxt",
|
||||
"dnn/bvlc_googlenet.caffemodel");
|
||||
TEST_CYCLE() net->Forward();
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
|
||||
PERF_TEST(ResNet50_caffe, CaffePerfTest)
|
||||
{
|
||||
caffe::Net<float>* net = initNet("dnn/ResNet-50-deploy.prototxt",
|
||||
"dnn/ResNet-50-model.caffemodel");
|
||||
TEST_CYCLE() net->Forward();
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
|
||||
PERF_TEST(SqueezeNet_v1_1_caffe, CaffePerfTest)
|
||||
{
|
||||
caffe::Net<float>* net = initNet("dnn/squeezenet_v1.1.prototxt",
|
||||
"dnn/squeezenet_v1.1.caffemodel");
|
||||
TEST_CYCLE() net->Forward();
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
|
||||
PERF_TEST(MobileNet_SSD, CaffePerfTest)
|
||||
{
|
||||
caffe::Net<float>* net = initNet("dnn/MobileNetSSD_deploy.prototxt",
|
||||
"dnn/MobileNetSSD_deploy.caffemodel");
|
||||
TEST_CYCLE() net->Forward();
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
#endif // HAVE_CAFFE
|
||||
6
3rdparty/opencv-4.5.4/modules/dnn/perf/perf_common.cpp
vendored
Normal file
6
3rdparty/opencv-4.5.4/modules/dnn/perf/perf_common.cpp
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
#include "../test/test_common.impl.hpp" // shared with accuracy tests
|
||||
894
3rdparty/opencv-4.5.4/modules/dnn/perf/perf_convolution.cpp
vendored
Normal file
894
3rdparty/opencv-4.5.4/modules/dnn/perf/perf_convolution.cpp
vendored
Normal file
@@ -0,0 +1,894 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
namespace opencv_test {
|
||||
|
||||
// Flops_Kernel_Input_OutCN_Group_Stride_Pad_Dilation_PadAdjust_PadMode_Bias
|
||||
struct TestSize_ {
|
||||
int width, height;
|
||||
operator Size() const { return Size(width, height); }
|
||||
};
|
||||
struct ConvParam_t {
|
||||
struct TestSize_ kernel;
|
||||
struct BlobShape { int dims[4]; } shapeIn;
|
||||
int outCN;
|
||||
int groups;
|
||||
struct TestSize_ stride;
|
||||
struct TestSize_ dilation;
|
||||
struct TestSize_ pad;
|
||||
struct TestSize_ padAdjust;
|
||||
const char* padMode;
|
||||
bool hasBias;
|
||||
double declared_flops;
|
||||
};
|
||||
// Details: #12142
|
||||
// Last update: 2021-09
|
||||
static const ConvParam_t testConvolutionConfigs[] = {
|
||||
/* GFLOPS 3.398 x 20 = 67.956 */ {{7, 7}, {{1, 128, 46, 46}}, 128, 1, {1, 1}, {1, 1}, {3, 3}, {0, 0}, "", true, 3397788160.},
|
||||
/* GFLOPS 16.987 x 3 = 50.962 */ {{5, 5}, {{1, 1152, 16, 16}}, 1152, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 16987226112.},
|
||||
/* GFLOPS 23.122 x 2 = 46.244 */ {{5, 5}, {{1, 672, 32, 32}}, 672, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 23121788928.},
|
||||
/* GFLOPS 9.987 x 3 = 29.960 */ {{3, 3}, {{1, 256, 92, 92}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 9986707456.},
|
||||
/* GFLOPS 1.595 x 16 = 25.524 */ {{3, 3}, {{1, 256, 26, 26}}, 512, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 1595230208.},
|
||||
/* GFLOPS 4.566 x 5 = 22.828 */ {{7, 7}, {{1, 172, 46, 46}}, 128, 1, {1, 1}, {1, 1}, {3, 3}, {0, 0}, "", true, 4565684736.},
|
||||
/* GFLOPS 1.596 x 14 = 22.338 */ {{3, 3}, {{1, 128, 52, 52}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 1595576320.},
|
||||
/* GFLOPS 1.595 x 12 = 19.141 */ {{3, 3}, {{1, 512, 13, 13}}, 1024, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 1595057152.},
|
||||
/* GFLOPS 6.814 x 2 = 13.629 */ {{3, 3}, {{1, 512, 38, 38}}, 512, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 6814386176.},
|
||||
/* GFLOPS 6.637 x 2 = 13.274 */ {{3, 3}, {{1, 256, 75, 75}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 6636960000.},
|
||||
/* GFLOPS 11.797 x 1 = 11.797 */ {{5, 5}, {{1, 240, 64, 64}}, 240, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 11797463040.},
|
||||
/* GFLOPS 11.797 x 1 = 11.797 */ {{5, 5}, {{1, 480, 32, 32}}, 480, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 11796971520.},
|
||||
/* GFLOPS 10.701 x 1 = 10.701 */ {{3, 3}, {{1, 512, 38, 38}}, 804, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 10700715792.},
|
||||
/* GFLOPS 10.087 x 1 = 10.087 */ {{3, 3}, {{1, 576, 38, 50}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 10086963200.},
|
||||
/* GFLOPS 9.993 x 1 = 9.993 */ {{3, 3}, {{1, 64, 368, 368}}, 64, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 9993207808.},
|
||||
/* GFLOPS 9.989 x 1 = 9.989 */ {{3, 3}, {{1, 128, 184, 184}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 9988874240.},
|
||||
/* GFLOPS 9.986 x 1 = 9.986 */ {{3, 3}, {{1, 512, 46, 46}}, 512, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 9985624064.},
|
||||
/* GFLOPS 1.704 x 5 = 8.518 */ {{3, 3}, {{1, 512, 19, 19}}, 512, 512, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1703596544.},
|
||||
/* GFLOPS 1.704 x 5 = 8.518 */ {{3, 3}, {{1, 512, 19, 19}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1703596544.},
|
||||
/* GFLOPS 4.247 x 2 = 8.494 */ {{3, 3}, {{1, 480, 32, 32}}, 480, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 4247224320.},
|
||||
/* GFLOPS 8.025 x 1 = 8.025 */ {{3, 3}, {{1, 1024, 19, 19}}, 1206, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 8025101478.},
|
||||
/* GFLOPS 0.798 x 9 = 7.180 */ {{3, 3}, {{1, 128, 52, 52}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 797788160.},
|
||||
/* GFLOPS 0.798 x 9 = 7.179 */ {{3, 3}, {{1, 256, 26, 26}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 797615104.},
|
||||
/* GFLOPS 6.641 x 1 = 6.641 */ {{3, 3}, {{1, 64, 300, 300}}, 64, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 6641280000.},
|
||||
/* GFLOPS 6.641 x 1 = 6.641 */ {{3, 3}, {{1, 64, 150, 200}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 6641280000.},
|
||||
/* GFLOPS 6.638 x 1 = 6.638 */ {{3, 3}, {{1, 128, 150, 150}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 6638400000.},
|
||||
/* GFLOPS 6.118 x 1 = 6.118 */ {{3, 3}, {{1, 144, 128, 128}}, 144, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 6117654528.},
|
||||
/* GFLOPS 6.116 x 1 = 6.116 */ {{3, 3}, {{1, 1152, 16, 16}}, 1152, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 6115590144.},
|
||||
/* GFLOPS 5.780 x 1 = 5.780 */ {{5, 5}, {{1, 672, 32, 32}}, 672, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 5780447232.},
|
||||
/* GFLOPS 1.704 x 3 = 5.111 */ {{3, 3}, {{1, 512, 19, 19}}, 512, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1703596544.},
|
||||
/* GFLOPS 4.997 x 1 = 4.997 */ {{3, 3}, {{1, 64, 184, 184}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 4996603904.},
|
||||
/* GFLOPS 4.994 x 1 = 4.994 */ {{3, 3}, {{1, 128, 92, 92}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 4994437120.},
|
||||
/* GFLOPS 4.993 x 1 = 4.993 */ {{3, 3}, {{1, 256, 46, 46}}, 512, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 4993353728.},
|
||||
/* GFLOPS 4.993 x 1 = 4.993 */ {{3, 3}, {{1, 512, 46, 46}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 4992812032.},
|
||||
/* GFLOPS 1.659 x 3 = 4.977 */ {{3, 3}, {{1, 960, 10, 10}}, 960, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1658976000.},
|
||||
/* GFLOPS 2.156 x 2 = 4.312 */ {{3, 3}, {{1, 576, 19, 19}}, 576, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 2156088384.},
|
||||
/* GFLOPS 4.247 x 1 = 4.247 */ {{5, 5}, {{1, 144, 128, 128}}, 144, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 4247322624.},
|
||||
/* GFLOPS 0.798 x 5 = 3.988 */ {{3, 3}, {{1, 512, 13, 13}}, 512, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 797528576.},
|
||||
/* GFLOPS 0.958 x 4 = 3.833 */ {{3, 3}, {{1, 384, 19, 19}}, 384, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 958307712.},
|
||||
/* GFLOPS 0.624 x 6 = 3.746 */ {{3, 3}, {{1, 128, 46, 46}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 624304640.},
|
||||
/* GFLOPS 3.408 x 1 = 3.408 */ {{3, 3}, {{1, 256, 38, 38}}, 512, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 3407562752.},
|
||||
/* GFLOPS 3.407 x 1 = 3.407 */ {{3, 3}, {{1, 512, 19, 19}}, 1024, 1, {1, 1}, {6, 6}, {6, 6}, {0, 0}, "", true, 3407193088.},
|
||||
/* GFLOPS 0.177 x 19 = 3.370 */ {{1, 1}, {{1, 512, 26, 26}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 177382400.},
|
||||
/* GFLOPS 0.302 x 11 = 3.325 */ {{3, 3}, {{1, 64, 64, 64}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 302252032.},
|
||||
/* GFLOPS 3.321 x 1 = 3.321 */ {{3, 3}, {{1, 64, 150, 150}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 3320640000.},
|
||||
/* GFLOPS 0.830 x 4 = 3.321 */ {{3, 3}, {{1, 64, 75, 100}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 830160000.},
|
||||
/* GFLOPS 3.319 x 1 = 3.319 */ {{3, 3}, {{1, 128, 75, 75}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 3319200000.},
|
||||
/* GFLOPS 1.598 x 2 = 3.195 */ {{3, 3}, {{1, 32, 416, 416}}, 64, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 1597652992.},
|
||||
/* GFLOPS 1.598 x 2 = 3.195 */ {{3, 3}, {{1, 32, 208, 208}}, 64, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 1597652992.},
|
||||
/* GFLOPS 1.596 x 2 = 3.193 */ {{3, 3}, {{1, 64, 208, 208}}, 128, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 1596268544.},
|
||||
/* GFLOPS 1.596 x 2 = 3.193 */ {{3, 3}, {{1, 64, 104, 104}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 1596268544.},
|
||||
/* GFLOPS 1.596 x 2 = 3.191 */ {{3, 3}, {{1, 128, 104, 104}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 1595576320.},
|
||||
/* GFLOPS 1.595 x 2 = 3.190 */ {{3, 3}, {{1, 256, 52, 52}}, 512, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 1595230208.},
|
||||
/* GFLOPS 1.595 x 2 = 3.190 */ {{3, 3}, {{1, 512, 26, 26}}, 1024, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 1595057152.},
|
||||
/* GFLOPS 0.178 x 16 = 2.841 */ {{1, 1}, {{1, 256, 52, 52}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 177555456.},
|
||||
/* GFLOPS 2.719 x 1 = 2.719 */ {{3, 3}, {{1, 96, 256, 256}}, 96, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 2719481856.},
|
||||
/* GFLOPS 0.177 x 15 = 2.659 */ {{1, 1}, {{1, 1024, 13, 13}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 177295872.},
|
||||
/* GFLOPS 1.245 x 2 = 2.490 */ {{3, 3}, {{1, 96, 75, 100}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1244880000.},
|
||||
/* GFLOPS 0.798 x 3 = 2.394 */ {{3, 3}, {{1, 64, 104, 104}}, 64, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 798134272.},
|
||||
/* GFLOPS 0.472 x 5 = 2.360 */ {{3, 3}, {{1, 256, 20, 20}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 471961600.},
|
||||
/* GFLOPS 2.255 x 1 = 2.255 */ {{3, 3}, {{1, 128, 80, 100}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2255285760.},
|
||||
/* GFLOPS 2.153 x 1 = 2.153 */ {{3, 3}, {{1, 128, 78, 98}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2152611840.},
|
||||
/* GFLOPS 2.100 x 1 = 2.100 */ {{3, 3}, {{1, 144, 75, 75}}, 144, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 2100330000.},
|
||||
/* GFLOPS 2.052 x 1 = 2.052 */ {{3, 3}, {{1, 128, 76, 96}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2052298240.},
|
||||
/* GFLOPS 1.022 x 2 = 2.044 */ {{3, 3}, {{1, 576, 19, 19}}, 273, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1021896057.},
|
||||
/* GFLOPS 1.995 x 1 = 1.995 */ {{9, 9}, {{1, 3, 320, 400}}, 32, 1, {1, 1}, {1, 1}, {4, 4}, {0, 0}, "", true, 1994752000.},
|
||||
/* GFLOPS 1.954 x 1 = 1.954 */ {{3, 3}, {{1, 128, 74, 94}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1954344960.},
|
||||
/* GFLOPS 0.958 x 2 = 1.917 */ {{3, 3}, {{1, 192, 38, 38}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 958446336.},
|
||||
/* GFLOPS 1.888 x 1 = 1.888 */ {{3, 3}, {{1, 1024, 10, 10}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1887539200.},
|
||||
/* GFLOPS 1.888 x 1 = 1.888 */ {{3, 3}, {{1, 1024, 10, 10}}, 1024, 1024, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1887539200.},
|
||||
/* GFLOPS 1.859 x 1 = 1.859 */ {{3, 3}, {{1, 128, 72, 92}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1858752000.},
|
||||
/* GFLOPS 1.766 x 1 = 1.766 */ {{3, 3}, {{1, 128, 70, 90}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1765519360.},
|
||||
/* GFLOPS 1.704 x 1 = 1.704 */ {{3, 3}, {{1, 256, 38, 38}}, 256, 256, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1703781376.},
|
||||
/* GFLOPS 1.704 x 1 = 1.704 */ {{3, 3}, {{1, 256, 38, 38}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1703781376.},
|
||||
/* GFLOPS 1.675 x 1 = 1.675 */ {{3, 3}, {{1, 128, 68, 88}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1674647040.},
|
||||
/* GFLOPS 1.660 x 1 = 1.660 */ {{3, 3}, {{1, 128, 75, 75}}, 128, 128, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1659600000.},
|
||||
/* GFLOPS 1.660 x 1 = 1.660 */ {{3, 3}, {{1, 128, 75, 75}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1659600000.},
|
||||
/* GFLOPS 1.586 x 1 = 1.586 */ {{3, 3}, {{1, 128, 66, 86}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1586135040.},
|
||||
/* GFLOPS 1.500 x 1 = 1.500 */ {{3, 3}, {{1, 128, 64, 84}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1499983360.},
|
||||
/* GFLOPS 1.416 x 1 = 1.416 */ {{3, 3}, {{1, 128, 62, 82}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1416192000.},
|
||||
/* GFLOPS 0.472 x 3 = 1.416 */ {{3, 3}, {{1, 128, 40, 40}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 472064000.},
|
||||
/* GFLOPS 0.472 x 3 = 1.416 */ {{3, 3}, {{1, 512, 10, 10}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 471910400.},
|
||||
/* GFLOPS 0.280 x 5 = 1.402 */ {{1, 1}, {{1, 576, 38, 50}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 280409600.},
|
||||
/* GFLOPS 0.701 x 2 = 1.401 */ {{3, 3}, {{1, 128, 38, 50}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 700720000.},
|
||||
/* GFLOPS 0.231 x 6 = 1.388 */ {{3, 3}, {{1, 128, 56, 56}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 231311360.},
|
||||
/* GFLOPS 0.231 x 6 = 1.388 */ {{3, 3}, {{1, 256, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 231261184.},
|
||||
/* GFLOPS 0.210 x 6 = 1.262 */ {{1, 1}, {{1, 576, 38, 50}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 210307200.},
|
||||
/* GFLOPS 0.420 x 3 = 1.261 */ {{3, 3}, {{1, 96, 38, 50}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 420492800.},
|
||||
/* GFLOPS 1.261 x 1 = 1.261 */ {{3, 3}, {{1, 192, 38, 50}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1261113600.},
|
||||
/* GFLOPS 1.258 x 1 = 1.258 */ {{3, 3}, {{1, 1280, 10, 10}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1258038600.},
|
||||
/* GFLOPS 1.248 x 1 = 1.248 */ {{3, 3}, {{1, 256, 46, 46}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1248338432.},
|
||||
/* GFLOPS 1.245 x 1 = 1.245 */ {{3, 3}, {{1, 64, 75, 75}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1245240000.},
|
||||
/* GFLOPS 1.210 x 1 = 1.210 */ {{3, 3}, {{1, 32, 256, 256}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1210056704.},
|
||||
/* GFLOPS 1.196 x 1 = 1.196 */ {{3, 3}, {{1, 384, 26, 26}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 1196336128.},
|
||||
/* GFLOPS 1.195 x 1 = 1.195 */ {{9, 9}, {{1, 32, 240, 320}}, 3, 1, {1, 1}, {1, 1}, {4, 4}, {0, 0}, "", true, 1194624000.},
|
||||
/* GFLOPS 1.182 x 1 = 1.182 */ {{3, 3}, {{1, 32, 320, 400}}, 64, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 1181696000.},
|
||||
/* GFLOPS 1.181 x 1 = 1.181 */ {{3, 3}, {{1, 64, 160, 200}}, 128, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 1180672000.},
|
||||
/* GFLOPS 0.561 x 2 = 1.121 */ {{3, 3}, {{1, 128, 38, 50}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 560576000.},
|
||||
/* GFLOPS 1.112 x 1 = 1.112 */ {{3, 3}, {{1, 512, 10, 10}}, 1206, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1111570200.},
|
||||
/* GFLOPS 0.357 x 3 = 1.072 */ {{1, 1}, {{1, 64, 208, 208}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 357187584.},
|
||||
/* GFLOPS 1.062 x 1 = 1.062 */ {{3, 3}, {{1, 240, 64, 64}}, 240, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1061928960.},
|
||||
/* GFLOPS 0.076 x 14 = 1.058 */ {{3, 3}, {{1, 64, 32, 32}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 75563008.},
|
||||
/* GFLOPS 1.051 x 1 = 1.051 */ {{3, 3}, {{1, 160, 38, 50}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1050988800.},
|
||||
/* GFLOPS 0.210 x 5 = 1.051 */ {{1, 1}, {{1, 256, 20, 20}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 210124800.},
|
||||
/* GFLOPS 0.210 x 5 = 1.049 */ {{1, 1}, {{1, 1024, 20, 20}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 209817600.},
|
||||
/* GFLOPS 1.006 x 1 = 1.006 */ {{3, 3}, {{1, 1024, 10, 10}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1006441800.},
|
||||
/* GFLOPS 0.246 x 4 = 0.985 */ {{1, 1}, {{1, 256, 75, 100}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 246240000.},
|
||||
/* GFLOPS 0.189 x 5 = 0.947 */ {{1, 1}, {{1, 512, 19, 19}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 189452800.},
|
||||
/* GFLOPS 0.189 x 5 = 0.947 */ {{1, 1}, {{1, 512, 19, 19}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 189452800.},
|
||||
/* GFLOPS 0.472 x 2 = 0.945 */ {{3, 3}, {{1, 64, 80, 80}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 472268800.},
|
||||
/* GFLOPS 0.934 x 1 = 0.934 */ {{3, 3}, {{1, 96, 150, 150}}, 96, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 933660000.},
|
||||
/* GFLOPS 0.231 x 4 = 0.925 */ {{3, 3}, {{1, 128, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 231311360.},
|
||||
/* GFLOPS 0.896 x 1 = 0.896 */ {{5, 5}, {{1, 96, 27, 27}}, 256, 2, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 895981824.},
|
||||
/* GFLOPS 0.089 x 10 = 0.890 */ {{1, 1}, {{1, 128, 52, 52}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 88950784.},
|
||||
/* GFLOPS 0.089 x 10 = 0.888 */ {{1, 1}, {{1, 256, 26, 26}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 88777728.},
|
||||
/* GFLOPS 0.876 x 1 = 0.876 */ {{3, 3}, {{1, 160, 38, 50}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 875824000.},
|
||||
/* GFLOPS 0.850 x 1 = 0.850 */ {{7, 7}, {{1, 3, 600, 800}}, 24, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 849600000.},
|
||||
/* GFLOPS 0.841 x 1 = 0.841 */ {{3, 3}, {{1, 128, 38, 50}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 840864000.},
|
||||
/* GFLOPS 0.415 x 2 = 0.831 */ {{3, 3}, {{1, 32, 150, 150}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 415440000.},
|
||||
/* GFLOPS 0.757 x 1 = 0.757 */ {{1, 1}, {{1, 1024, 19, 19}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 757441536.},
|
||||
/* GFLOPS 0.712 x 1 = 0.712 */ {{1, 1}, {{1, 128, 208, 208}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 711606272.},
|
||||
/* GFLOPS 0.178 x 4 = 0.712 */ {{1, 1}, {{1, 128, 104, 104}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 177901568.},
|
||||
/* GFLOPS 0.354 x 2 = 0.707 */ {{1, 1}, {{1, 256, 52, 52}}, 255, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 353723760.},
|
||||
/* GFLOPS 0.351 x 2 = 0.701 */ {{1, 1}, {{1, 576, 38, 50}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 350512000.},
|
||||
/* GFLOPS 0.701 x 1 = 0.701 */ {{3, 3}, {{1, 128, 75, 100}}, 160, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 700720000.},
|
||||
/* GFLOPS 0.694 x 1 = 0.694 */ {{3, 3}, {{1, 64, 56, 56}}, 192, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 694235136.},
|
||||
/* GFLOPS 0.694 x 1 = 0.694 */ {{3, 3}, {{1, 64, 56, 56}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 694235136.},
|
||||
/* GFLOPS 0.231 x 3 = 0.694 */ {{3, 3}, {{1, 64, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 231411712.},
|
||||
/* GFLOPS 0.058 x 12 = 0.694 */ {{3, 3}, {{1, 128, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 57827840.},
|
||||
/* GFLOPS 0.231 x 3 = 0.694 */ {{3, 3}, {{1, 512, 7, 7}}, 512, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 231236096.},
|
||||
/* GFLOPS 0.160 x 4 = 0.639 */ {{3, 3}, {{1, 64, 38, 38}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 159833472.},
|
||||
/* GFLOPS 0.211 x 3 = 0.634 */ {{1, 1}, {{1, 64, 80, 80}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 211353600.},
|
||||
/* GFLOPS 0.211 x 3 = 0.632 */ {{1, 1}, {{1, 128, 40, 40}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 210534400.},
|
||||
/* GFLOPS 0.210 x 3 = 0.630 */ {{1, 1}, {{1, 512, 40, 40}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 209920000.},
|
||||
/* GFLOPS 0.210 x 3 = 0.630 */ {{1, 1}, {{1, 512, 10, 10}}, 2048, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 209920000.},
|
||||
/* GFLOPS 0.103 x 6 = 0.618 */ {{1, 1}, {{1, 256, 14, 14}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102961152.},
|
||||
/* GFLOPS 0.615 x 1 = 0.615 */ {{1, 1}, {{1, 320, 75, 100}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 615360000.},
|
||||
/* GFLOPS 0.305 x 2 = 0.609 */ {{3, 3}, {{1, 3, 416, 416}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 304578560.},
|
||||
/* GFLOPS 0.597 x 1 = 0.597 */ {{3, 3}, {{1, 576, 19, 19}}, 576, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 597254400.},
|
||||
/* GFLOPS 0.278 x 2 = 0.557 */ {{1, 1}, {{1, 128, 46, 46}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 278431744.},
|
||||
/* GFLOPS 0.185 x 3 = 0.554 */ {{1, 1}, {{1, 192, 75, 100}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 184800000.},
|
||||
/* GFLOPS 0.553 x 1 = 0.553 */ {{3, 3}, {{1, 64, 75, 100}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 553440000.},
|
||||
/* GFLOPS 0.539 x 1 = 0.539 */ {{3, 3}, {{1, 144, 75, 75}}, 144, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 539178048.},
|
||||
/* GFLOPS 0.103 x 5 = 0.514 */ {{1, 1}, {{1, 1024, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102810624.},
|
||||
/* GFLOPS 0.491 x 1 = 0.491 */ {{1, 1}, {{1, 576, 38, 50}}, 224, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 490716800.},
|
||||
/* GFLOPS 0.483 x 1 = 0.483 */ {{7, 7}, {{1, 3, 320, 320}}, 64, 1, {2, 2}, {1, 1}, {3, 3}, {0, 0}, "", false, 483328000.},
|
||||
/* GFLOPS 0.240 x 2 = 0.479 */ {{3, 3}, {{1, 96, 38, 38}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 239680896.},
|
||||
/* GFLOPS 0.477 x 1 = 0.477 */ {{3, 3}, {{1, 3, 368, 368}}, 64, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 476692480.},
|
||||
/* GFLOPS 0.237 x 2 = 0.474 */ {{7, 7}, {{1, 3, 224, 224}}, 64, 1, {2, 2}, {1, 1}, {3, 3}, {0, 0}, "", true, 236830720.},
|
||||
/* GFLOPS 0.472 x 1 = 0.472 */ {{3, 3}, {{1, 512, 19, 19}}, 512, 512, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 471910400.},
|
||||
/* GFLOPS 0.472 x 1 = 0.472 */ {{3, 3}, {{1, 512, 19, 19}}, 512, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 471910400.},
|
||||
/* GFLOPS 0.155 x 3 = 0.464 */ {{1, 1}, {{1, 112, 32, 32}}, 672, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 154828800.},
|
||||
/* GFLOPS 0.114 x 4 = 0.454 */ {{1, 1}, {{1, 192, 16, 16}}, 1152, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 113541120.},
|
||||
/* GFLOPS 0.449 x 1 = 0.449 */ {{3, 3}, {{1, 384, 13, 13}}, 384, 2, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 448626048.},
|
||||
/* GFLOPS 0.089 x 5 = 0.443 */ {{1, 1}, {{1, 512, 13, 13}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 88691200.},
|
||||
/* GFLOPS 0.428 x 1 = 0.428 */ {{1, 1}, {{1, 64, 64, 64}}, 810, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 427991040.},
|
||||
/* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 128, 75, 75}}, 128, 128, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 426037760.},
|
||||
/* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 128, 75, 75}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 426037760.},
|
||||
/* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 128, 38, 38}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 426037760.},
|
||||
/* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 256, 38, 38}}, 256, 256, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 425945344.},
|
||||
/* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 256, 38, 38}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 425945344.},
|
||||
/* GFLOPS 0.426 x 1 = 0.426 */ {{3, 3}, {{1, 256, 19, 19}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 425945344.},
|
||||
/* GFLOPS 0.421 x 1 = 0.421 */ {{1, 1}, {{1, 576, 38, 50}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 420614400.},
|
||||
/* GFLOPS 0.420 x 1 = 0.420 */ {{1, 1}, {{1, 256, 40, 40}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 420249600.},
|
||||
/* GFLOPS 0.210 x 2 = 0.420 */ {{1, 1}, {{1, 256, 80, 80}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 210124800.},
|
||||
/* GFLOPS 0.420 x 1 = 0.420 */ {{1, 1}, {{1, 512, 20, 20}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 419840000.},
|
||||
/* GFLOPS 0.420 x 1 = 0.420 */ {{1, 1}, {{1, 1024, 10, 10}}, 2048, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 419635200.},
|
||||
/* GFLOPS 0.210 x 2 = 0.420 */ {{1, 1}, {{1, 2048, 10, 10}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 209766400.},
|
||||
/* GFLOPS 0.415 x 1 = 0.415 */ {{3, 3}, {{1, 32, 150, 150}}, 32, 32, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 415440000.},
|
||||
/* GFLOPS 0.415 x 1 = 0.415 */ {{3, 3}, {{1, 64, 150, 150}}, 64, 64, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 415080000.},
|
||||
/* GFLOPS 0.415 x 1 = 0.415 */ {{3, 3}, {{1, 64, 150, 150}}, 64, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 415080000.},
|
||||
/* GFLOPS 0.104 x 4 = 0.414 */ {{1, 1}, {{1, 64, 56, 56}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 103563264.},
|
||||
/* GFLOPS 0.103 x 4 = 0.413 */ {{1, 1}, {{1, 128, 28, 28}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 103161856.},
|
||||
/* GFLOPS 0.399 x 1 = 0.399 */ {{3, 3}, {{1, 32, 208, 208}}, 64, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 399413248.},
|
||||
/* GFLOPS 0.200 x 2 = 0.399 */ {{3, 3}, {{1, 32, 104, 104}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 199706624.},
|
||||
/* GFLOPS 0.200 x 2 = 0.399 */ {{3, 3}, {{1, 64, 52, 52}}, 64, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 199533568.},
|
||||
/* GFLOPS 0.399 x 1 = 0.399 */ {{3, 3}, {{1, 128, 52, 52}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 398894080.},
|
||||
/* GFLOPS 0.199 x 2 = 0.399 */ {{3, 3}, {{1, 128, 26, 26}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 199447040.},
|
||||
/* GFLOPS 0.399 x 1 = 0.399 */ {{3, 3}, {{1, 256, 26, 26}}, 512, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 398807552.},
|
||||
/* GFLOPS 0.399 x 1 = 0.399 */ {{3, 3}, {{1, 256, 13, 13}}, 512, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 398807552.},
|
||||
/* GFLOPS 0.376 x 1 = 0.376 */ {{1, 1}, {{1, 24, 300, 400}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 376320000.},
|
||||
/* GFLOPS 0.179 x 2 = 0.357 */ {{1, 1}, {{1, 64, 208, 208}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 178593792.},
|
||||
/* GFLOPS 0.089 x 4 = 0.357 */ {{1, 1}, {{1, 64, 104, 104}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 89296896.},
|
||||
/* GFLOPS 0.356 x 1 = 0.356 */ {{1, 1}, {{1, 128, 104, 104}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 355803136.},
|
||||
/* GFLOPS 0.355 x 1 = 0.355 */ {{1, 1}, {{1, 256, 52, 52}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 355110912.},
|
||||
/* GFLOPS 0.355 x 1 = 0.355 */ {{1, 1}, {{1, 512, 26, 26}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 354764800.},
|
||||
/* GFLOPS 0.355 x 1 = 0.355 */ {{1, 1}, {{1, 1024, 13, 13}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 354591744.},
|
||||
/* GFLOPS 0.355 x 1 = 0.355 */ {{1, 1}, {{1, 2048, 13, 13}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 354505216.},
|
||||
/* GFLOPS 0.177 x 2 = 0.353 */ {{1, 1}, {{1, 512, 26, 26}}, 255, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 176689500.},
|
||||
/* GFLOPS 0.070 x 5 = 0.348 */ {{1, 1}, {{1, 128, 46, 46}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 69607936.},
|
||||
/* GFLOPS 0.347 x 1 = 0.347 */ {{3, 3}, {{1, 128, 28, 28}}, 192, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 346967040.},
|
||||
/* GFLOPS 0.347 x 1 = 0.347 */ {{3, 3}, {{1, 128, 28, 28}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 346967040.},
|
||||
/* GFLOPS 0.014 x 24 = 0.347 */ {{3, 3}, {{1, 128, 14, 14}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 14456960.},
|
||||
/* GFLOPS 0.113 x 3 = 0.340 */ {{1, 1}, {{1, 1152, 16, 16}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 113295360.},
|
||||
/* GFLOPS 0.053 x 6 = 0.320 */ {{1, 1}, {{1, 576, 19, 19}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 53277824.},
|
||||
/* GFLOPS 0.319 x 1 = 0.319 */ {{3, 3}, {{1, 192, 19, 19}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 319482112.},
|
||||
/* GFLOPS 0.317 x 1 = 0.317 */ {{3, 3}, {{1, 3, 300, 300}}, 64, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 316800000.},
|
||||
/* GFLOPS 0.315 x 1 = 0.315 */ {{3, 3}, {{1, 96, 75, 100}}, 96, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 315369600.},
|
||||
/* GFLOPS 0.103 x 3 = 0.309 */ {{1, 1}, {{1, 512, 7, 7}}, 2048, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102860800.},
|
||||
/* GFLOPS 0.103 x 3 = 0.309 */ {{1, 1}, {{1, 512, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102860800.},
|
||||
/* GFLOPS 0.154 x 2 = 0.309 */ {{1, 1}, {{1, 672, 32, 32}}, 112, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 154255360.},
|
||||
/* GFLOPS 0.308 x 1 = 0.308 */ {{1, 1}, {{1, 320, 75, 100}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 307680000.},
|
||||
/* GFLOPS 0.034 x 9 = 0.304 */ {{1, 1}, {{1, 64, 64, 64}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 33816576.},
|
||||
/* GFLOPS 0.299 x 1 = 0.299 */ {{3, 3}, {{1, 256, 13, 13}}, 384, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 299105664.},
|
||||
/* GFLOPS 0.299 x 1 = 0.299 */ {{3, 3}, {{1, 384, 13, 13}}, 256, 2, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 299084032.},
|
||||
/* GFLOPS 0.017 x 17 = 0.290 */ {{1, 1}, {{1, 32, 32, 64}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 17039360.},
|
||||
/* GFLOPS 0.017 x 16 = 0.269 */ {{1, 1}, {{1, 128, 32, 64}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 16842752.},
|
||||
/* GFLOPS 0.133 x 2 = 0.266 */ {{3, 3}, {{1, 128, 19, 19}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 133136800.},
|
||||
/* GFLOPS 0.266 x 1 = 0.266 */ {{1, 1}, {{1, 384, 52, 52}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 266160128.},
|
||||
/* GFLOPS 0.266 x 1 = 0.266 */ {{1, 1}, {{1, 768, 26, 26}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 265987072.},
|
||||
/* GFLOPS 0.038 x 7 = 0.265 */ {{3, 3}, {{1, 16, 64, 128}}, 16, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 37879808.},
|
||||
/* GFLOPS 0.019 x 14 = 0.264 */ {{3, 3}, {{1, 64, 16, 16}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 18890752.},
|
||||
/* GFLOPS 0.262 x 1 = 0.262 */ {{1, 1}, {{1, 2560, 20, 20}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 262195200.},
|
||||
/* GFLOPS 0.126 x 2 = 0.252 */ {{3, 3}, {{1, 512, 5, 5}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 125812050.},
|
||||
/* GFLOPS 0.248 x 1 = 0.248 */ {{1, 1}, {{1, 64, 150, 200}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 247680000.},
|
||||
/* GFLOPS 0.040 x 6 = 0.240 */ {{1, 1}, {{1, 576, 19, 19}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 39958368.},
|
||||
/* GFLOPS 0.080 x 3 = 0.240 */ {{3, 3}, {{1, 96, 19, 19}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 79893632.},
|
||||
/* GFLOPS 0.240 x 1 = 0.240 */ {{3, 3}, {{1, 192, 38, 38}}, 192, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 239611584.},
|
||||
/* GFLOPS 0.240 x 1 = 0.240 */ {{3, 3}, {{1, 192, 19, 19}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 239611584.},
|
||||
/* GFLOPS 0.079 x 3 = 0.237 */ {{1, 1}, {{1, 80, 32, 32}}, 480, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 79134720.},
|
||||
/* GFLOPS 0.237 x 1 = 0.237 */ {{7, 7}, {{1, 3, 224, 224}}, 64, 1, {2, 2}, {1, 1}, {3, 3}, {0, 0}, "", false, 236830720.},
|
||||
/* GFLOPS 0.237 x 1 = 0.237 */ {{7, 7}, {{1, 3, 224, 224}}, 64, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 236830720.},
|
||||
/* GFLOPS 0.118 x 2 = 0.236 */ {{3, 3}, {{1, 32, 80, 80}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 118169600.},
|
||||
/* GFLOPS 0.236 x 1 = 0.236 */ {{3, 3}, {{1, 256, 19, 19}}, 512, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 235980800.},
|
||||
/* GFLOPS 0.116 x 2 = 0.231 */ {{1, 1}, {{1, 24, 128, 128}}, 144, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 115605504.},
|
||||
/* GFLOPS 0.111 x 2 = 0.221 */ {{3, 3}, {{1, 192, 10, 10}}, 320, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 110624000.},
|
||||
/* GFLOPS 0.213 x 1 = 0.213 */ {{3, 3}, {{1, 128, 38, 38}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 213018880.},
|
||||
/* GFLOPS 0.213 x 1 = 0.213 */ {{3, 3}, {{1, 128, 19, 19}}, 256, 1, {1, 1}, {2, 2}, {2, 2}, {0, 0}, "", false, 213018880.},
|
||||
/* GFLOPS 0.107 x 2 = 0.213 */ {{3, 3}, {{1, 128, 19, 19}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 106509440.},
|
||||
/* GFLOPS 0.213 x 1 = 0.213 */ {{3, 3}, {{1, 256, 19, 19}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 212972672.},
|
||||
/* GFLOPS 0.213 x 1 = 0.213 */ {{3, 3}, {{1, 512, 38, 38}}, 16, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 212949568.},
|
||||
/* GFLOPS 0.212 x 1 = 0.212 */ {{7, 7}, {{1, 3, 300, 300}}, 32, 1, {2, 2}, {1, 1}, {3, 3}, {0, 0}, "", true, 212400000.},
|
||||
/* GFLOPS 0.211 x 1 = 0.211 */ {{11, 11}, {{1, 3, 227, 227}}, 96, 1, {4, 4}, {1, 1}, {0, 0}, {0, 0}, "", true, 211120800.},
|
||||
/* GFLOPS 0.210 x 1 = 0.210 */ {{3, 3}, {{1, 64, 38, 50}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 210307200.},
|
||||
/* GFLOPS 0.210 x 1 = 0.210 */ {{1, 1}, {{1, 1024, 10, 10}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 209817600.},
|
||||
/* GFLOPS 0.210 x 1 = 0.210 */ {{1, 1}, {{1, 1024, 10, 10}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 209817600.},
|
||||
/* GFLOPS 0.104 x 2 = 0.208 */ {{3, 3}, {{1, 32, 75, 75}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 103860000.},
|
||||
/* GFLOPS 0.208 x 1 = 0.208 */ {{1, 1}, {{1, 16, 256, 256}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 207618048.},
|
||||
/* GFLOPS 0.206 x 1 = 0.206 */ {{1, 1}, {{1, 256, 56, 56}}, 512, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 205922304.},
|
||||
/* GFLOPS 0.206 x 1 = 0.206 */ {{1, 1}, {{1, 256, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 205922304.},
|
||||
/* GFLOPS 0.103 x 2 = 0.206 */ {{1, 1}, {{1, 256, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102961152.},
|
||||
/* GFLOPS 0.206 x 1 = 0.206 */ {{1, 1}, {{1, 512, 28, 28}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 205721600.},
|
||||
/* GFLOPS 0.206 x 1 = 0.206 */ {{1, 1}, {{1, 512, 28, 28}}, 1024, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 205721600.},
|
||||
/* GFLOPS 0.206 x 1 = 0.206 */ {{1, 1}, {{1, 1024, 14, 14}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 205621248.},
|
||||
/* GFLOPS 0.206 x 1 = 0.206 */ {{1, 1}, {{1, 1024, 14, 14}}, 2048, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 205621248.},
|
||||
/* GFLOPS 0.103 x 2 = 0.206 */ {{1, 1}, {{1, 2048, 7, 7}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 102785536.},
|
||||
/* GFLOPS 0.201 x 1 = 0.201 */ {{1, 1}, {{1, 512, 14, 14}}, 1000, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 200900000.},
|
||||
/* GFLOPS 0.200 x 1 = 0.200 */ {{3, 3}, {{1, 160, 19, 19}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 199687872.},
|
||||
/* GFLOPS 0.190 x 1 = 0.190 */ {{1, 1}, {{1, 256, 38, 38}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 189637632.},
|
||||
/* GFLOPS 0.190 x 1 = 0.190 */ {{1, 1}, {{1, 256, 38, 38}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 189637632.},
|
||||
/* GFLOPS 0.047 x 4 = 0.190 */ {{1, 1}, {{1, 256, 38, 38}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 47409408.},
|
||||
/* GFLOPS 0.189 x 1 = 0.189 */ {{1, 1}, {{1, 1024, 19, 19}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 189360384.},
|
||||
/* GFLOPS 0.038 x 5 = 0.189 */ {{3, 3}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 37814272.},
|
||||
/* GFLOPS 0.189 x 1 = 0.189 */ {{1, 1}, {{1, 1152, 16, 16}}, 320, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 188825600.},
|
||||
/* GFLOPS 0.185 x 1 = 0.185 */ {{1, 1}, {{1, 128, 75, 75}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 185040000.},
|
||||
/* GFLOPS 0.185 x 1 = 0.185 */ {{1, 1}, {{1, 128, 75, 75}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 185040000.},
|
||||
/* GFLOPS 0.181 x 1 = 0.181 */ {{3, 3}, {{1, 160, 14, 14}}, 320, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 180696320.},
|
||||
/* GFLOPS 0.181 x 1 = 0.181 */ {{3, 3}, {{1, 160, 14, 14}}, 320, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 180696320.},
|
||||
/* GFLOPS 0.090 x 2 = 0.181 */ {{3, 3}, {{1, 224, 10, 10}}, 224, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 90339200.},
|
||||
/* GFLOPS 0.180 x 1 = 0.180 */ {{1, 1}, {{1, 224, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 180232192.},
|
||||
/* GFLOPS 0.088 x 2 = 0.177 */ {{1, 1}, {{1, 1024, 13, 13}}, 255, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 88301655.},
|
||||
/* GFLOPS 0.174 x 1 = 0.174 */ {{3, 3}, {{1, 96, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 173508608.},
|
||||
/* GFLOPS 0.174 x 1 = 0.174 */ {{3, 3}, {{1, 96, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 173508608.},
|
||||
/* GFLOPS 0.166 x 1 = 0.166 */ {{3, 3}, {{1, 160, 19, 19}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 166406560.},
|
||||
/* GFLOPS 0.080 x 2 = 0.160 */ {{1, 1}, {{1, 576, 19, 19}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 79916736.},
|
||||
/* GFLOPS 0.160 x 1 = 0.160 */ {{3, 3}, {{1, 128, 19, 19}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 159764160.},
|
||||
/* GFLOPS 0.160 x 1 = 0.160 */ {{3, 3}, {{1, 1024, 19, 19}}, 24, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 159703512.},
|
||||
/* GFLOPS 0.159 x 1 = 0.159 */ {{7, 7}, {{1, 3, 300, 300}}, 24, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 159300000.},
|
||||
/* GFLOPS 0.080 x 2 = 0.159 */ {{1, 1}, {{1, 40, 64, 64}}, 240, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 79626240.},
|
||||
/* GFLOPS 0.079 x 2 = 0.157 */ {{1, 1}, {{1, 480, 32, 32}}, 80, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 78725120.},
|
||||
/* GFLOPS 0.155 x 1 = 0.155 */ {{1, 1}, {{1, 192, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 154542080.},
|
||||
/* GFLOPS 0.146 x 1 = 0.146 */ {{3, 3}, {{1, 144, 14, 14}}, 288, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 146369664.},
|
||||
/* GFLOPS 0.146 x 1 = 0.146 */ {{3, 3}, {{1, 144, 14, 14}}, 288, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 146369664.},
|
||||
/* GFLOPS 0.072 x 2 = 0.144 */ {{1, 1}, {{1, 1024, 10, 10}}, 352, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 72124800.},
|
||||
/* GFLOPS 0.140 x 1 = 0.140 */ {{1, 1}, {{1, 576, 38, 50}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 140204800.},
|
||||
/* GFLOPS 0.139 x 1 = 0.139 */ {{3, 3}, {{1, 256, 5, 5}}, 1206, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 138961350.},
|
||||
/* GFLOPS 0.017 x 8 = 0.138 */ {{1, 1}, {{1, 16, 64, 128}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 17301504.},
|
||||
/* GFLOPS 0.067 x 2 = 0.133 */ {{1, 1}, {{1, 576, 19, 19}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 66597280.},
|
||||
/* GFLOPS 0.133 x 1 = 0.133 */ {{3, 3}, {{1, 128, 38, 38}}, 160, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 133136800.},
|
||||
/* GFLOPS 0.044 x 3 = 0.133 */ {{1, 1}, {{1, 512, 13, 13}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 44345600.},
|
||||
/* GFLOPS 0.129 x 1 = 0.129 */ {{1, 1}, {{1, 160, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 128851968.},
|
||||
/* GFLOPS 0.128 x 1 = 0.128 */ {{3, 3}, {{1, 64, 24, 24}}, 192, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 127512576.},
|
||||
/* GFLOPS 0.120 x 1 = 0.120 */ {{5, 5}, {{1, 32, 28, 28}}, 96, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 120497664.},
|
||||
/* GFLOPS 0.120 x 1 = 0.120 */ {{5, 5}, {{1, 32, 28, 28}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 120497664.},
|
||||
/* GFLOPS 0.040 x 3 = 0.120 */ {{1, 1}, {{1, 96, 19, 19}}, 576, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 40131648.},
|
||||
/* GFLOPS 0.118 x 1 = 0.118 */ {{1, 1}, {{1, 320, 38, 38}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 118477312.},
|
||||
/* GFLOPS 0.017 x 7 = 0.118 */ {{1, 1}, {{1, 64, 64, 128}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 16908288.},
|
||||
/* GFLOPS 0.118 x 1 = 0.118 */ {{3, 3}, {{1, 64, 80, 80}}, 64, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 118067200.},
|
||||
/* GFLOPS 0.118 x 1 = 0.118 */ {{3, 3}, {{1, 64, 40, 40}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 118067200.},
|
||||
/* GFLOPS 0.039 x 3 = 0.118 */ {{1, 1}, {{1, 1024, 10, 10}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 39340800.},
|
||||
/* GFLOPS 0.118 x 1 = 0.118 */ {{3, 3}, {{1, 128, 40, 40}}, 128, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 118016000.},
|
||||
/* GFLOPS 0.118 x 1 = 0.118 */ {{3, 3}, {{1, 128, 20, 20}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 118016000.},
|
||||
/* GFLOPS 0.118 x 1 = 0.118 */ {{3, 3}, {{1, 256, 20, 20}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 117990400.},
|
||||
/* GFLOPS 0.118 x 1 = 0.118 */ {{3, 3}, {{1, 256, 19, 19}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 117990400.},
|
||||
/* GFLOPS 0.058 x 2 = 0.116 */ {{3, 3}, {{1, 16, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 58003456.},
|
||||
/* GFLOPS 0.058 x 2 = 0.116 */ {{3, 3}, {{1, 32, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 57903104.},
|
||||
/* GFLOPS 0.058 x 2 = 0.116 */ {{3, 3}, {{1, 64, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 57852928.},
|
||||
/* GFLOPS 0.116 x 1 = 0.116 */ {{3, 3}, {{1, 128, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 115655680.},
|
||||
/* GFLOPS 0.116 x 1 = 0.116 */ {{3, 3}, {{1, 128, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 115655680.},
|
||||
/* GFLOPS 0.115 x 1 = 0.115 */ {{3, 3}, {{1, 3, 512, 512}}, 32, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 115343360.},
|
||||
/* GFLOPS 0.114 x 1 = 0.114 */ {{1, 1}, {{1, 144, 128, 128}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 113639424.},
|
||||
/* GFLOPS 0.112 x 1 = 0.112 */ {{1, 1}, {{1, 1024, 10, 10}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 111875400.},
|
||||
/* GFLOPS 0.110 x 1 = 0.110 */ {{1, 1}, {{1, 480, 32, 32}}, 112, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 110215168.},
|
||||
/* GFLOPS 0.107 x 1 = 0.107 */ {{1, 1}, {{1, 64, 32, 32}}, 810, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 106997760.},
|
||||
/* GFLOPS 0.036 x 3 = 0.107 */ {{1, 1}, {{1, 192, 38, 38}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 35580160.},
|
||||
/* GFLOPS 0.107 x 1 = 0.107 */ {{3, 3}, {{1, 32, 75, 75}}, 128, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 106648064.},
|
||||
/* GFLOPS 0.107 x 1 = 0.107 */ {{3, 3}, {{1, 64, 38, 38}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 106555648.},
|
||||
/* GFLOPS 0.105 x 1 = 0.105 */ {{1, 1}, {{1, 256, 40, 40}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 105062400.},
|
||||
/* GFLOPS 0.105 x 1 = 0.105 */ {{1, 1}, {{1, 512, 20, 20}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 104960000.},
|
||||
/* GFLOPS 0.105 x 1 = 0.105 */ {{1, 1}, {{1, 512, 10, 10}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 104960000.},
|
||||
/* GFLOPS 0.105 x 1 = 0.105 */ {{1, 1}, {{1, 512, 10, 10}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 104960000.},
|
||||
/* GFLOPS 0.105 x 1 = 0.105 */ {{1, 1}, {{1, 1024, 10, 10}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 104908800.},
|
||||
/* GFLOPS 0.103 x 1 = 0.103 */ {{1, 1}, {{1, 128, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 103161856.},
|
||||
/* GFLOPS 0.051 x 2 = 0.103 */ {{1, 1}, {{1, 256, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 51480576.},
|
||||
/* GFLOPS 0.051 x 2 = 0.103 */ {{1, 1}, {{1, 256, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 51480576.},
|
||||
/* GFLOPS 0.008 x 12 = 0.101 */ {{1, 1}, {{1, 64, 32, 32}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 8454144.},
|
||||
/* GFLOPS 0.101 x 1 = 0.101 */ {{1, 1}, {{1, 512, 19, 19}}, 273, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 101016825.},
|
||||
/* GFLOPS 0.096 x 1 = 0.096 */ {{1, 1}, {{1, 480, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 96438272.},
|
||||
/* GFLOPS 0.095 x 1 = 0.095 */ {{1, 1}, {{1, 128, 38, 38}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 95003648.},
|
||||
/* GFLOPS 0.095 x 1 = 0.095 */ {{1, 1}, {{1, 128, 38, 38}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 95003648.},
|
||||
/* GFLOPS 0.095 x 1 = 0.095 */ {{1, 1}, {{1, 256, 19, 19}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 94818816.},
|
||||
/* GFLOPS 0.095 x 1 = 0.095 */ {{1, 1}, {{1, 256, 19, 19}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 94818816.},
|
||||
/* GFLOPS 0.094 x 1 = 0.094 */ {{1, 1}, {{1, 32, 150, 150}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 93600000.},
|
||||
/* GFLOPS 0.094 x 1 = 0.094 */ {{1, 1}, {{1, 32, 150, 150}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 93600000.},
|
||||
/* GFLOPS 0.093 x 1 = 0.093 */ {{1, 1}, {{1, 512, 38, 50}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 93480000.},
|
||||
/* GFLOPS 0.093 x 1 = 0.093 */ {{1, 1}, {{1, 576, 19, 19}}, 224, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 93236192.},
|
||||
/* GFLOPS 0.093 x 1 = 0.093 */ {{1, 1}, {{1, 64, 75, 75}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 92880000.},
|
||||
/* GFLOPS 0.093 x 1 = 0.093 */ {{1, 1}, {{1, 64, 75, 75}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 92880000.},
|
||||
/* GFLOPS 0.031 x 3 = 0.092 */ {{1, 1}, {{1, 160, 10, 10}}, 960, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 30816000.},
|
||||
/* GFLOPS 0.092 x 1 = 0.092 */ {{1, 1}, {{1, 192, 75, 100}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 92400000.},
|
||||
/* GFLOPS 0.090 x 1 = 0.090 */ {{1, 1}, {{1, 448, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 90015744.},
|
||||
/* GFLOPS 0.045 x 2 = 0.090 */ {{3, 3}, {{1, 576, 19, 19}}, 12, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 44918508.},
|
||||
/* GFLOPS 0.044 x 2 = 0.089 */ {{1, 1}, {{1, 256, 26, 26}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 44388864.},
|
||||
/* GFLOPS 0.089 x 1 = 0.089 */ {{3, 3}, {{1, 112, 14, 14}}, 224, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 88554368.},
|
||||
/* GFLOPS 0.089 x 1 = 0.089 */ {{3, 3}, {{1, 112, 14, 14}}, 224, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 88554368.},
|
||||
/* GFLOPS 0.088 x 1 = 0.088 */ {{1, 1}, {{1, 256, 26, 26}}, 255, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 88430940.},
|
||||
/* GFLOPS 0.021 x 4 = 0.084 */ {{5, 1}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {1, 1}, {2, 0}, {0, 0}, "", false, 21037056.},
|
||||
/* GFLOPS 0.021 x 4 = 0.084 */ {{1, 5}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {1, 1}, {0, 2}, {0, 0}, "", true, 21037056.},
|
||||
/* GFLOPS 0.084 x 1 = 0.084 */ {{1, 1}, {{1, 416, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 83593216.},
|
||||
/* GFLOPS 0.082 x 1 = 0.082 */ {{1, 1}, {{1, 320, 10, 10}}, 1280, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 82048000.},
|
||||
/* GFLOPS 0.040 x 2 = 0.080 */ {{1, 1}, {{1, 576, 19, 19}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 39958368.},
|
||||
/* GFLOPS 0.040 x 2 = 0.079 */ {{1, 1}, {{1, 24, 75, 75}}, 144, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 39690000.},
|
||||
/* GFLOPS 0.040 x 2 = 0.079 */ {{3, 3}, {{1, 3, 300, 300}}, 32, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 39600000.},
|
||||
/* GFLOPS 0.079 x 1 = 0.079 */ {{1, 1}, {{1, 240, 64, 64}}, 40, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 78807040.},
|
||||
/* GFLOPS 0.079 x 1 = 0.079 */ {{1, 1}, {{1, 384, 40, 40}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 78745600.},
|
||||
/* GFLOPS 0.077 x 1 = 0.077 */ {{1, 1}, {{1, 96, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 77471744.},
|
||||
/* GFLOPS 0.077 x 1 = 0.077 */ {{3, 3}, {{1, 192, 10, 10}}, 224, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 77436800.},
|
||||
/* GFLOPS 0.077 x 1 = 0.077 */ {{1, 1}, {{1, 384, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 77170688.},
|
||||
/* GFLOPS 0.076 x 1 = 0.076 */ {{3, 3}, {{1, 3, 416, 416}}, 32, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", false, 76144640.},
|
||||
/* GFLOPS 0.076 x 1 = 0.076 */ {{1, 1}, {{1, 96, 128, 128}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 75890688.},
|
||||
/* GFLOPS 0.038 x 2 = 0.076 */ {{3, 3}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {8, 8}, {8, 8}, {0, 0}, "", true, 37814272.},
|
||||
/* GFLOPS 0.038 x 2 = 0.076 */ {{3, 3}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {4, 4}, {4, 4}, {0, 0}, "", true, 37814272.},
|
||||
/* GFLOPS 0.038 x 2 = 0.076 */ {{3, 3}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {2, 2}, {2, 2}, {0, 0}, "", true, 37814272.},
|
||||
/* GFLOPS 0.038 x 2 = 0.076 */ {{3, 3}, {{1, 32, 32, 64}}, 32, 1, {1, 1}, {16, 16}, {16, 16}, {0, 0}, "", true, 37814272.},
|
||||
/* GFLOPS 0.018 x 4 = 0.072 */ {{1, 1}, {{1, 64, 19, 19}}, 384, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 17882496.},
|
||||
/* GFLOPS 0.071 x 1 = 0.071 */ {{1, 1}, {{1, 16, 150, 150}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 71280000.},
|
||||
/* GFLOPS 0.071 x 1 = 0.071 */ {{1, 1}, {{1, 352, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 70748160.},
|
||||
/* GFLOPS 0.071 x 1 = 0.071 */ {{1, 1}, {{1, 24, 150, 150}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 70560000.},
|
||||
/* GFLOPS 0.070 x 1 = 0.070 */ {{3, 3}, {{1, 96, 14, 14}}, 208, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 70487872.},
|
||||
/* GFLOPS 0.069 x 1 = 0.069 */ {{3, 3}, {{1, 96, 14, 14}}, 204, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 69132336.},
|
||||
/* GFLOPS 0.068 x 1 = 0.068 */ {{1, 1}, {{1, 32, 256, 256}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 68157440.},
|
||||
/* GFLOPS 0.005 x 14 = 0.066 */ {{3, 3}, {{1, 64, 8, 8}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 4722688.},
|
||||
/* GFLOPS 0.066 x 1 = 0.066 */ {{1, 1}, {{1, 672, 16, 16}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 66109440.},
|
||||
/* GFLOPS 0.066 x 1 = 0.066 */ {{1, 1}, {{1, 1280, 10, 10}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 65561600.},
|
||||
/* GFLOPS 0.033 x 2 = 0.065 */ {{3, 3}, {{1, 48, 14, 14}}, 192, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 32551680.},
|
||||
/* GFLOPS 0.065 x 1 = 0.065 */ {{3, 3}, {{1, 192, 7, 7}}, 384, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 65046912.},
|
||||
/* GFLOPS 0.065 x 1 = 0.065 */ {{3, 3}, {{1, 192, 7, 7}}, 384, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 65046912.},
|
||||
/* GFLOPS 0.065 x 1 = 0.065 */ {{3, 3}, {{1, 160, 10, 10}}, 224, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 64534400.},
|
||||
/* GFLOPS 0.064 x 1 = 0.064 */ {{1, 1}, {{1, 320, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 64325632.},
|
||||
/* GFLOPS 0.032 x 2 = 0.064 */ {{3, 3}, {{1, 96, 12, 12}}, 128, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 31868928.},
|
||||
/* GFLOPS 0.061 x 1 = 0.061 */ {{1, 1}, {{1, 960, 10, 10}}, 320, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 61472000.},
|
||||
/* GFLOPS 0.031 x 2 = 0.061 */ {{1, 1}, {{1, 960, 10, 10}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 30736000.},
|
||||
/* GFLOPS 0.061 x 1 = 0.061 */ {{1, 1}, {{1, 512, 46, 46}}, 28, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 60729200.},
|
||||
/* GFLOPS 0.060 x 1 = 0.060 */ {{3, 3}, {{1, 96, 38, 38}}, 96, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 59920224.},
|
||||
/* GFLOPS 0.059 x 1 = 0.059 */ {{1, 1}, {{1, 320, 38, 38}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 59238656.},
|
||||
/* GFLOPS 0.059 x 1 = 0.059 */ {{3, 3}, {{1, 128, 19, 19}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 59008000.},
|
||||
/* GFLOPS 0.059 x 1 = 0.059 */ {{3, 3}, {{1, 256, 10, 10}}, 512, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 58995200.},
|
||||
/* GFLOPS 0.059 x 1 = 0.059 */ {{3, 3}, {{1, 256, 10, 10}}, 512, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 58995200.},
|
||||
/* GFLOPS 0.059 x 1 = 0.059 */ {{3, 3}, {{1, 256, 10, 10}}, 512, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 58995200.},
|
||||
/* GFLOPS 0.058 x 1 = 0.058 */ {{1, 1}, {{1, 288, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 57903104.},
|
||||
/* GFLOPS 0.004 x 16 = 0.058 */ {{3, 3}, {{1, 128, 7, 7}}, 32, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", false, 3614240.},
|
||||
/* GFLOPS 0.055 x 1 = 0.055 */ {{3, 3}, {{1, 1280, 10, 10}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 55298400.},
|
||||
/* GFLOPS 0.018 x 3 = 0.054 */ {{1, 1}, {{1, 32, 38, 38}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 18021120.},
|
||||
/* GFLOPS 0.018 x 3 = 0.053 */ {{1, 1}, {{1, 384, 19, 19}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 17766976.},
|
||||
/* GFLOPS 0.053 x 1 = 0.053 */ {{3, 3}, {{1, 128, 38, 38}}, 16, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 53254720.},
|
||||
/* GFLOPS 0.053 x 1 = 0.053 */ {{1, 1}, {{1, 528, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 53036032.},
|
||||
/* GFLOPS 0.053 x 1 = 0.053 */ {{1, 1}, {{1, 528, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 53036032.},
|
||||
/* GFLOPS 0.053 x 1 = 0.053 */ {{1, 1}, {{1, 64, 80, 80}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 52838400.},
|
||||
/* GFLOPS 0.053 x 1 = 0.053 */ {{1, 1}, {{1, 64, 40, 40}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 52838400.},
|
||||
/* GFLOPS 0.053 x 1 = 0.053 */ {{1, 1}, {{1, 128, 80, 80}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 52633600.},
|
||||
/* GFLOPS 0.053 x 1 = 0.053 */ {{1, 1}, {{1, 128, 20, 20}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 52633600.},
|
||||
/* GFLOPS 0.053 x 1 = 0.053 */ {{1, 1}, {{1, 256, 10, 10}}, 1024, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 52531200.},
|
||||
/* GFLOPS 0.052 x 1 = 0.052 */ {{1, 1}, {{1, 1024, 10, 10}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 52454400.},
|
||||
/* GFLOPS 0.052 x 1 = 0.052 */ {{1, 1}, {{1, 1024, 10, 10}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 52454400.},
|
||||
/* GFLOPS 0.052 x 1 = 0.052 */ {{1, 1}, {{1, 1024, 10, 10}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 52454400.},
|
||||
/* GFLOPS 0.026 x 2 = 0.052 */ {{1, 1}, {{1, 1024, 10, 10}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 26227200.},
|
||||
/* GFLOPS 0.052 x 1 = 0.052 */ {{1, 1}, {{1, 64, 56, 56}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 51781632.},
|
||||
/* GFLOPS 0.051 x 1 = 0.051 */ {{1, 1}, {{1, 256, 56, 56}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 51480576.},
|
||||
/* GFLOPS 0.051 x 1 = 0.051 */ {{1, 1}, {{1, 256, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 51480576.},
|
||||
/* GFLOPS 0.051 x 1 = 0.051 */ {{1, 1}, {{1, 512, 28, 28}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 51430400.},
|
||||
/* GFLOPS 0.026 x 2 = 0.051 */ {{1, 1}, {{1, 512, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 25715200.},
|
||||
/* GFLOPS 0.026 x 2 = 0.051 */ {{1, 1}, {{1, 512, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 25715200.},
|
||||
/* GFLOPS 0.013 x 4 = 0.051 */ {{1, 1}, {{1, 512, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 12857600.},
|
||||
/* GFLOPS 0.051 x 1 = 0.051 */ {{1, 1}, {{1, 1024, 14, 14}}, 512, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 51405312.},
|
||||
/* GFLOPS 0.050 x 1 = 0.050 */ {{1, 1}, {{1, 992, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 49799680.},
|
||||
/* GFLOPS 0.048 x 1 = 0.048 */ {{1, 1}, {{1, 960, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 48194048.},
|
||||
/* GFLOPS 0.047 x 1 = 0.047 */ {{1, 1}, {{1, 256, 19, 19}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 47409408.},
|
||||
/* GFLOPS 0.047 x 1 = 0.047 */ {{1, 1}, {{1, 144, 64, 64}}, 40, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 47349760.},
|
||||
/* GFLOPS 0.047 x 1 = 0.047 */ {{1, 1}, {{1, 512, 38, 50}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 46740000.},
|
||||
/* GFLOPS 0.047 x 1 = 0.047 */ {{1, 1}, {{1, 928, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 46588416.},
|
||||
/* GFLOPS 0.046 x 1 = 0.046 */ {{1, 1}, {{1, 64, 75, 75}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 46440000.},
|
||||
/* GFLOPS 0.023 x 2 = 0.045 */ {{3, 3}, {{1, 256, 3, 3}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 22648626.},
|
||||
/* GFLOPS 0.045 x 1 = 0.045 */ {{3, 3}, {{1, 160, 7, 7}}, 320, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 45174080.},
|
||||
/* GFLOPS 0.045 x 1 = 0.045 */ {{3, 3}, {{1, 160, 7, 7}}, 320, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 45174080.},
|
||||
/* GFLOPS 0.045 x 1 = 0.045 */ {{1, 1}, {{1, 224, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 45058048.},
|
||||
/* GFLOPS 0.023 x 2 = 0.045 */ {{1, 1}, {{1, 512, 14, 14}}, 112, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 22500800.},
|
||||
/* GFLOPS 0.045 x 1 = 0.045 */ {{1, 1}, {{1, 896, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 44982784.},
|
||||
/* GFLOPS 0.045 x 1 = 0.045 */ {{3, 3}, {{1, 3, 227, 227}}, 64, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", true, 44946880.},
|
||||
/* GFLOPS 0.044 x 1 = 0.044 */ {{3, 3}, {{1, 128, 19, 19}}, 192, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 44256000.},
|
||||
/* GFLOPS 0.044 x 1 = 0.044 */ {{3, 3}, {{1, 1024, 10, 10}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 44239200.},
|
||||
/* GFLOPS 0.044 x 1 = 0.044 */ {{1, 1}, {{1, 512, 13, 13}}, 255, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 44172375.},
|
||||
/* GFLOPS 0.043 x 1 = 0.043 */ {{7, 7}, {{1, 3, 96, 96}}, 64, 1, {2, 2}, {1, 1}, {3, 3}, {0, 0}, "", true, 43499520.},
|
||||
/* GFLOPS 0.043 x 1 = 0.043 */ {{1, 1}, {{1, 864, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 43377152.},
|
||||
/* GFLOPS 0.042 x 1 = 0.042 */ {{1, 1}, {{1, 832, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 41771520.},
|
||||
/* GFLOPS 0.040 x 1 = 0.040 */ {{5, 5}, {{1, 32, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 40165888.},
|
||||
/* GFLOPS 0.040 x 1 = 0.040 */ {{5, 5}, {{1, 32, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 40165888.},
|
||||
/* GFLOPS 0.040 x 1 = 0.040 */ {{1, 1}, {{1, 800, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 40165888.},
|
||||
/* GFLOPS 0.040 x 1 = 0.040 */ {{3, 3}, {{1, 64, 19, 19}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 39958368.},
|
||||
/* GFLOPS 0.040 x 1 = 0.040 */ {{3, 3}, {{1, 256, 19, 19}}, 24, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 39932376.},
|
||||
/* GFLOPS 0.040 x 1 = 0.040 */ {{3, 3}, {{1, 3, 300, 300}}, 32, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 39600000.},
|
||||
/* GFLOPS 0.039 x 1 = 0.039 */ {{1, 1}, {{1, 240, 32, 32}}, 80, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 39403520.},
|
||||
/* GFLOPS 0.039 x 1 = 0.039 */ {{1, 1}, {{1, 144, 75, 75}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 39015000.},
|
||||
/* GFLOPS 0.039 x 1 = 0.039 */ {{1, 1}, {{1, 192, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 38635520.},
|
||||
/* GFLOPS 0.039 x 1 = 0.039 */ {{1, 1}, {{1, 768, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 38560256.},
|
||||
/* GFLOPS 0.037 x 1 = 0.037 */ {{1, 1}, {{1, 736, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 36954624.},
|
||||
/* GFLOPS 0.036 x 1 = 0.036 */ {{1, 1}, {{1, 480, 14, 14}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 36164352.},
|
||||
/* GFLOPS 0.036 x 1 = 0.036 */ {{1, 1}, {{1, 480, 14, 14}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 36164352.},
|
||||
/* GFLOPS 0.018 x 2 = 0.036 */ {{1, 1}, {{1, 192, 38, 38}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 17790080.},
|
||||
/* GFLOPS 0.035 x 1 = 0.035 */ {{1, 1}, {{1, 704, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 35348992.},
|
||||
/* GFLOPS 0.035 x 1 = 0.035 */ {{1, 1}, {{1, 512, 46, 46}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 34702400.},
|
||||
/* GFLOPS 0.034 x 1 = 0.034 */ {{1, 1}, {{1, 672, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 33743360.},
|
||||
/* GFLOPS 0.034 x 1 = 0.034 */ {{1, 1}, {{1, 128, 32, 64}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 33685504.},
|
||||
/* GFLOPS 0.034 x 1 = 0.034 */ {{2, 2}, {{1, 64, 64, 128}}, 32, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 33619968.},
|
||||
/* GFLOPS 0.033 x 1 = 0.033 */ {{3, 3}, {{1, 256, 3, 3}}, 804, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 33350724.},
|
||||
/* GFLOPS 0.033 x 1 = 0.033 */ {{1, 1}, {{1, 528, 14, 14}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 33147520.},
|
||||
/* GFLOPS 0.033 x 1 = 0.033 */ {{1, 1}, {{1, 528, 14, 14}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 33147520.},
|
||||
/* GFLOPS 0.033 x 1 = 0.033 */ {{1, 1}, {{1, 1024, 10, 10}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 32784000.},
|
||||
/* GFLOPS 0.032 x 1 = 0.032 */ {{1, 1}, {{1, 160, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 32212992.},
|
||||
/* GFLOPS 0.032 x 1 = 0.032 */ {{1, 1}, {{1, 512, 14, 14}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 32144000.},
|
||||
/* GFLOPS 0.032 x 1 = 0.032 */ {{1, 1}, {{1, 640, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 32137728.},
|
||||
/* GFLOPS 0.032 x 1 = 0.032 */ {{1, 1}, {{1, 508, 14, 14}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 31893120.},
|
||||
/* GFLOPS 0.011 x 3 = 0.032 */ {{1, 1}, {{1, 320, 16, 16}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 10502144.},
|
||||
/* GFLOPS 0.031 x 1 = 0.031 */ {{1, 1}, {{1, 832, 7, 7}}, 384, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 31328640.},
|
||||
/* GFLOPS 0.031 x 1 = 0.031 */ {{1, 1}, {{1, 832, 7, 7}}, 384, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 31328640.},
|
||||
/* GFLOPS 0.031 x 1 = 0.031 */ {{1, 1}, {{1, 608, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 30532096.},
|
||||
/* GFLOPS 0.015 x 2 = 0.030 */ {{1, 1}, {{1, 128, 46, 46}}, 28, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 15226736.},
|
||||
/* GFLOPS 0.015 x 2 = 0.030 */ {{5, 5}, {{1, 24, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 15065344.},
|
||||
/* GFLOPS 0.015 x 2 = 0.030 */ {{5, 5}, {{1, 24, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 15065344.},
|
||||
/* GFLOPS 0.015 x 2 = 0.030 */ {{5, 5}, {{1, 48, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 15059072.},
|
||||
/* GFLOPS 0.029 x 1 = 0.029 */ {{3, 3}, {{1, 256, 10, 10}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 29497600.},
|
||||
/* GFLOPS 0.015 x 2 = 0.029 */ {{1, 1}, {{1, 112, 32, 32}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 14745600.},
|
||||
/* GFLOPS 0.029 x 1 = 0.029 */ {{1, 1}, {{1, 192, 28, 28}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 28976640.},
|
||||
/* GFLOPS 0.029 x 1 = 0.029 */ {{1, 1}, {{1, 192, 28, 28}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 28976640.},
|
||||
/* GFLOPS 0.029 x 1 = 0.029 */ {{1, 1}, {{1, 512, 14, 14}}, 144, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 28929600.},
|
||||
/* GFLOPS 0.029 x 1 = 0.029 */ {{1, 1}, {{1, 512, 14, 14}}, 144, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 28929600.},
|
||||
/* GFLOPS 0.029 x 1 = 0.029 */ {{1, 1}, {{1, 576, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 28926464.},
|
||||
/* GFLOPS 0.027 x 1 = 0.027 */ {{1, 1}, {{1, 544, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 27320832.},
|
||||
/* GFLOPS 0.027 x 1 = 0.027 */ {{1, 1}, {{1, 64, 16, 16}}, 810, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 26749440.},
|
||||
/* GFLOPS 0.027 x 1 = 0.027 */ {{1, 1}, {{1, 384, 19, 19}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 26650464.},
|
||||
/* GFLOPS 0.027 x 1 = 0.027 */ {{1, 1}, {{1, 576, 19, 19}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 26638912.},
|
||||
/* GFLOPS 0.027 x 1 = 0.027 */ {{3, 3}, {{1, 128, 38, 38}}, 8, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 26627360.},
|
||||
/* GFLOPS 0.027 x 1 = 0.027 */ {{1, 1}, {{1, 528, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 26518016.},
|
||||
/* GFLOPS 0.027 x 1 = 0.027 */ {{1, 1}, {{1, 528, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 26518016.},
|
||||
/* GFLOPS 0.009 x 3 = 0.026 */ {{1, 1}, {{1, 128, 46, 46}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 8700992.},
|
||||
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 96, 75, 75}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 26055000.},
|
||||
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 64, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 25890816.},
|
||||
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 64, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 25890816.},
|
||||
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 64, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 25890816.},
|
||||
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 1024, 10, 10}}, 126, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 25817400.},
|
||||
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 128, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 25790464.},
|
||||
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 256, 28, 28}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 25740288.},
|
||||
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 256, 28, 28}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 25740288.},
|
||||
/* GFLOPS 0.013 x 2 = 0.026 */ {{1, 1}, {{1, 256, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 12870144.},
|
||||
/* GFLOPS 0.026 x 1 = 0.026 */ {{1, 1}, {{1, 512, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 25715200.},
|
||||
/* GFLOPS 0.013 x 2 = 0.026 */ {{1, 1}, {{1, 512, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 12857600.},
|
||||
/* GFLOPS 0.002 x 12 = 0.025 */ {{1, 1}, {{1, 64, 16, 16}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 2113536.},
|
||||
/* GFLOPS 0.024 x 1 = 0.024 */ {{1, 1}, {{1, 480, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 24109568.},
|
||||
/* GFLOPS 0.024 x 1 = 0.024 */ {{1, 1}, {{1, 128, 38, 38}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 23750912.},
|
||||
/* GFLOPS 0.024 x 1 = 0.024 */ {{1, 1}, {{1, 256, 19, 19}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 23704704.},
|
||||
/* GFLOPS 0.023 x 1 = 0.023 */ {{3, 3}, {{1, 3, 256, 512}}, 13, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 23429120.},
|
||||
/* GFLOPS 0.023 x 1 = 0.023 */ {{1, 1}, {{1, 32, 150, 150}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 23400000.},
|
||||
/* GFLOPS 0.023 x 1 = 0.023 */ {{1, 1}, {{1, 512, 19, 19}}, 63, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 23311575.},
|
||||
/* GFLOPS 0.023 x 1 = 0.023 */ {{1, 1}, {{1, 448, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 22503936.},
|
||||
/* GFLOPS 0.023 x 1 = 0.023 */ {{1, 1}, {{1, 512, 14, 14}}, 112, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 22500800.},
|
||||
/* GFLOPS 0.022 x 1 = 0.022 */ {{1, 1}, {{1, 508, 14, 14}}, 112, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 22325184.},
|
||||
/* GFLOPS 0.022 x 1 = 0.022 */ {{3, 3}, {{1, 512, 10, 10}}, 24, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 22120800.},
|
||||
/* GFLOPS 0.021 x 1 = 0.021 */ {{3, 3}, {{1, 128, 12, 12}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 21242880.},
|
||||
/* GFLOPS 0.021 x 1 = 0.021 */ {{1, 1}, {{1, 40, 64, 64}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 21233664.},
|
||||
/* GFLOPS 0.021 x 1 = 0.021 */ {{1, 1}, {{1, 416, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 20898304.},
|
||||
/* GFLOPS 0.021 x 1 = 0.021 */ {{1, 1}, {{1, 832, 7, 7}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 20885760.},
|
||||
/* GFLOPS 0.021 x 1 = 0.021 */ {{1, 1}, {{1, 832, 7, 7}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 20885760.},
|
||||
/* GFLOPS 0.010 x 2 = 0.021 */ {{1, 1}, {{1, 832, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 10442880.},
|
||||
/* GFLOPS 0.010 x 2 = 0.021 */ {{1, 1}, {{1, 832, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 10442880.},
|
||||
/* GFLOPS 0.010 x 2 = 0.020 */ {{3, 3}, {{1, 256, 2, 2}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 10066056.},
|
||||
/* GFLOPS 0.020 x 1 = 0.020 */ {{5, 5}, {{1, 16, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 20095488.},
|
||||
/* GFLOPS 0.020 x 1 = 0.020 */ {{5, 5}, {{1, 16, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 20095488.},
|
||||
/* GFLOPS 0.020 x 1 = 0.020 */ {{5, 5}, {{1, 32, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 20082944.},
|
||||
/* GFLOPS 0.020 x 1 = 0.020 */ {{5, 5}, {{1, 32, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 20082944.},
|
||||
/* GFLOPS 0.020 x 1 = 0.020 */ {{3, 3}, {{1, 256, 19, 19}}, 12, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 19966188.},
|
||||
/* GFLOPS 0.019 x 1 = 0.019 */ {{1, 1}, {{1, 192, 28, 28}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 19317760.},
|
||||
/* GFLOPS 0.019 x 1 = 0.019 */ {{1, 1}, {{1, 192, 28, 28}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 19317760.},
|
||||
/* GFLOPS 0.019 x 1 = 0.019 */ {{1, 1}, {{1, 384, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 19292672.},
|
||||
/* GFLOPS 0.019 x 1 = 0.019 */ {{1, 1}, {{1, 64, 64, 64}}, 36, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 19021824.},
|
||||
/* GFLOPS 0.018 x 1 = 0.018 */ {{1, 1}, {{1, 576, 10, 10}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 18448000.},
|
||||
/* GFLOPS 0.018 x 1 = 0.018 */ {{1, 1}, {{1, 480, 14, 14}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 18082176.},
|
||||
/* GFLOPS 0.018 x 1 = 0.018 */ {{1, 1}, {{1, 480, 14, 14}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 18082176.},
|
||||
/* GFLOPS 0.018 x 1 = 0.018 */ {{1, 1}, {{1, 192, 38, 38}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 17790080.},
|
||||
/* GFLOPS 0.018 x 1 = 0.018 */ {{1, 1}, {{1, 352, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 17687040.},
|
||||
/* GFLOPS 0.017 x 1 = 0.017 */ {{2, 2}, {{1, 16, 128, 256}}, 16, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 16908288.},
|
||||
/* GFLOPS 0.016 x 1 = 0.016 */ {{1, 1}, {{1, 320, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 16081408.},
|
||||
/* GFLOPS 0.016 x 1 = 0.016 */ {{1, 1}, {{1, 832, 7, 7}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 15664320.},
|
||||
/* GFLOPS 0.016 x 1 = 0.016 */ {{1, 1}, {{1, 832, 7, 7}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 15664320.},
|
||||
/* GFLOPS 0.015 x 1 = 0.015 */ {{5, 5}, {{1, 48, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 15059072.},
|
||||
/* GFLOPS 0.015 x 1 = 0.015 */ {{5, 5}, {{1, 32, 12, 12}}, 64, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 14754816.},
|
||||
/* GFLOPS 0.015 x 1 = 0.015 */ {{3, 3}, {{1, 128, 10, 10}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 14752000.},
|
||||
/* GFLOPS 0.014 x 1 = 0.014 */ {{1, 1}, {{1, 288, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 14475776.},
|
||||
/* GFLOPS 0.014 x 1 = 0.014 */ {{1, 1}, {{1, 512, 5, 5}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 13991250.},
|
||||
/* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 144, 38, 38}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 13354112.},
|
||||
/* GFLOPS 0.007 x 2 = 0.013 */ {{1, 1}, {{1, 16, 56, 56}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6623232.},
|
||||
/* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 512, 10, 10}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 13120000.},
|
||||
/* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 832, 7, 7}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 13053600.},
|
||||
/* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 832, 7, 7}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 13053600.},
|
||||
/* GFLOPS 0.007 x 2 = 0.013 */ {{1, 1}, {{1, 32, 28, 28}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6522880.},
|
||||
/* GFLOPS 0.001 x 11 = 0.013 */ {{3, 3}, {{1, 64, 4, 4}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1180672.},
|
||||
/* GFLOPS 0.006 x 2 = 0.013 */ {{1, 1}, {{1, 64, 14, 14}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6472704.},
|
||||
/* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 128, 56, 56}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 12895232.},
|
||||
/* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 256, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 12870144.},
|
||||
/* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 256, 14, 14}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 12870144.},
|
||||
/* GFLOPS 0.013 x 1 = 0.013 */ {{1, 1}, {{1, 508, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 12757248.},
|
||||
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 992, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 12449920.},
|
||||
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 480, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 12054784.},
|
||||
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 480, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 12054784.},
|
||||
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 960, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 12048512.},
|
||||
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 32, 75, 75}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "", false, 12014080.},
|
||||
/* GFLOPS 0.012 x 1 = 0.012 */ {{3, 3}, {{1, 96, 6, 6}}, 192, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 11950848.},
|
||||
/* GFLOPS 0.006 x 2 = 0.012 */ {{3, 3}, {{1, 96, 3, 3}}, 384, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 5975424.},
|
||||
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 320, 12, 12}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 11814912.},
|
||||
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 640, 6, 6}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 11805696.},
|
||||
/* GFLOPS 0.012 x 1 = 0.012 */ {{1, 1}, {{1, 928, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 11647104.},
|
||||
/* GFLOPS 0.011 x 1 = 0.011 */ {{1, 1}, {{1, 896, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 11245696.},
|
||||
/* GFLOPS 0.011 x 1 = 0.011 */ {{1, 1}, {{1, 256, 13, 13}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 11097216.},
|
||||
/* GFLOPS 0.011 x 1 = 0.011 */ {{3, 3}, {{1, 256, 10, 10}}, 24, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 11061600.},
|
||||
/* GFLOPS 0.006 x 2 = 0.011 */ {{3, 3}, {{1, 512, 5, 5}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 5530200.},
|
||||
/* GFLOPS 0.011 x 1 = 0.011 */ {{1, 1}, {{1, 864, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 10844288.},
|
||||
/* GFLOPS 0.010 x 1 = 0.010 */ {{1, 1}, {{1, 832, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 10442880.},
|
||||
/* GFLOPS 0.010 x 1 = 0.010 */ {{5, 5}, {{1, 32, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 10041472.},
|
||||
/* GFLOPS 0.010 x 1 = 0.010 */ {{1, 1}, {{1, 800, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 10041472.},
|
||||
/* GFLOPS 0.010 x 1 = 0.010 */ {{1, 1}, {{1, 192, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 9658880.},
|
||||
/* GFLOPS 0.010 x 1 = 0.010 */ {{1, 1}, {{1, 192, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 9658880.},
|
||||
/* GFLOPS 0.010 x 1 = 0.010 */ {{1, 1}, {{1, 384, 14, 14}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 9646336.},
|
||||
/* GFLOPS 0.005 x 2 = 0.010 */ {{1, 1}, {{1, 512, 14, 14}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4821600.},
|
||||
/* GFLOPS 0.010 x 1 = 0.010 */ {{1, 1}, {{1, 768, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 9640064.},
|
||||
/* GFLOPS 0.010 x 1 = 0.010 */ {{3, 3}, {{1, 4, 128, 256}}, 4, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 9568256.},
|
||||
/* GFLOPS 0.005 x 2 = 0.009 */ {{1, 1}, {{1, 4, 128, 256}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 4718592.},
|
||||
/* GFLOPS 0.009 x 1 = 0.009 */ {{1, 1}, {{1, 736, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 9238656.},
|
||||
/* GFLOPS 0.009 x 1 = 0.009 */ {{1, 1}, {{1, 192, 19, 19}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 8895040.},
|
||||
/* GFLOPS 0.009 x 1 = 0.009 */ {{1, 1}, {{1, 704, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 8837248.},
|
||||
/* GFLOPS 0.008 x 1 = 0.008 */ {{1, 1}, {{1, 672, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 8435840.},
|
||||
/* GFLOPS 0.008 x 1 = 0.008 */ {{1, 1}, {{1, 128, 32, 64}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 8421376.},
|
||||
/* GFLOPS 0.008 x 1 = 0.008 */ {{1, 1}, {{1, 640, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 8034432.},
|
||||
/* GFLOPS 0.004 x 2 = 0.008 */ {{1, 1}, {{1, 832, 7, 7}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 3916080.},
|
||||
/* GFLOPS 0.008 x 1 = 0.008 */ {{1, 1}, {{1, 608, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 7633024.},
|
||||
/* GFLOPS 0.008 x 1 = 0.008 */ {{5, 5}, {{1, 16, 14, 14}}, 48, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 7535808.},
|
||||
/* GFLOPS 0.008 x 1 = 0.008 */ {{5, 5}, {{1, 16, 14, 14}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 7535808.},
|
||||
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 640, 6, 6}}, 160, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 7378560.},
|
||||
/* GFLOPS 0.004 x 2 = 0.007 */ {{1, 1}, {{1, 48, 14, 14}}, 192, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3650304.},
|
||||
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 384, 14, 14}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 7234752.},
|
||||
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 576, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 7231616.},
|
||||
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 256, 12, 12}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 7091712.},
|
||||
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 544, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 6830208.},
|
||||
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 64, 8, 8}}, 810, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 6687360.},
|
||||
/* GFLOPS 0.007 x 1 = 0.007 */ {{3, 3}, {{1, 160, 6, 6}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 6637824.},
|
||||
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 528, 14, 14}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6629504.},
|
||||
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 528, 14, 14}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 6629504.},
|
||||
/* GFLOPS 0.007 x 1 = 0.007 */ {{1, 1}, {{1, 256, 5, 5}}, 512, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 6566400.},
|
||||
/* GFLOPS 0.003 x 2 = 0.007 */ {{1, 1}, {{1, 512, 5, 5}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 3280000.},
|
||||
/* GFLOPS 0.006 x 1 = 0.006 */ {{1, 1}, {{1, 64, 56, 56}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6472704.},
|
||||
/* GFLOPS 0.006 x 1 = 0.006 */ {{1, 1}, {{1, 128, 28, 28}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6447616.},
|
||||
/* GFLOPS 0.006 x 1 = 0.006 */ {{1, 1}, {{1, 512, 7, 7}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 6428800.},
|
||||
/* GFLOPS 0.006 x 1 = 0.006 */ {{1, 1}, {{1, 512, 14, 14}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6428800.},
|
||||
/* GFLOPS 0.006 x 1 = 0.006 */ {{1, 1}, {{1, 512, 14, 14}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 6428800.},
|
||||
/* GFLOPS 0.001 x 12 = 0.006 */ {{1, 1}, {{1, 64, 8, 8}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 528384.},
|
||||
/* GFLOPS 0.006 x 1 = 0.006 */ {{3, 3}, {{1, 256, 10, 10}}, 12, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 5530800.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 192, 12, 12}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 5322240.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{3, 3}, {{1, 128, 5, 5}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 5310720.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{3, 3}, {{1, 128, 5, 5}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 5310720.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{3, 3}, {{1, 128, 5, 5}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 5310720.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{3, 3}, {{1, 128, 5, 5}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 5310720.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 1024, 10, 10}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4917600.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 1024, 10, 10}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 4917600.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 192, 28, 28}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4829440.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 192, 28, 28}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 4829440.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 256, 14, 14}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4826304.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 512, 14, 14}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 4821600.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 508, 14, 14}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 4783968.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 64, 32, 32}}, 36, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 4755456.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 64, 24, 24}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4755456.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 256, 12, 12}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4727808.},
|
||||
/* GFLOPS 0.005 x 1 = 0.005 */ {{1, 1}, {{1, 1024, 3, 3}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4720896.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 512, 19, 19}}, 12, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4440300.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 512, 19, 19}}, 12, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 4440300.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 640, 6, 6}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 4427136.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 16, 128, 256}}, 4, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 4325376.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 64, 64, 128}}, 4, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", false, 4227072.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 832, 7, 7}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3916080.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{3, 3}, {{1, 256, 1, 1}}, 804, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 3705636.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{5, 5}, {{1, 16, 12, 12}}, 32, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 3691008.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{3, 3}, {{1, 64, 10, 10}}, 128, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 3689600.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{5, 5}, {{1, 32, 6, 6}}, 64, 1, {1, 1}, {1, 1}, {2, 2}, {0, 0}, "", true, 3688704.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{5, 5}, {{1, 32, 12, 12}}, 64, 1, {2, 2}, {1, 1}, {2, 2}, {0, 0}, "", true, 3688704.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{5, 5}, {{1, 64, 6, 6}}, 128, 1, {2, 2}, {1, 1}, {2, 2}, {0, 0}, "", true, 3687552.},
|
||||
/* GFLOPS 0.004 x 1 = 0.004 */ {{1, 1}, {{1, 192, 12, 12}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3548160.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 736, 3, 3}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3393792.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 256, 10, 10}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3283200.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 512, 5, 5}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3280000.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 512, 5, 5}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 3280000.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 512, 5, 5}}, 126, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3228750.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 480, 14, 14}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 3013696.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 480, 14, 14}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 3013696.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 320, 12, 12}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2953728.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 640, 6, 6}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2951424.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{3, 3}, {{1, 256, 5, 5}}, 24, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 2765400.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{3, 3}, {{1, 128, 5, 5}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 2655360.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 832, 7, 7}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2610720.},
|
||||
/* GFLOPS 0.003 x 1 = 0.003 */ {{1, 1}, {{1, 256, 3, 3}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 2520882.},
|
||||
/* GFLOPS 0.001 x 2 = 0.003 */ {{3, 3}, {{1, 128, 1, 1}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1258530.},
|
||||
/* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 256, 12, 12}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2363904.},
|
||||
/* GFLOPS 0.002 x 1 = 0.002 */ {{3, 3}, {{1, 128, 3, 3}}, 256, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 2360320.},
|
||||
/* GFLOPS 0.002 x 1 = 0.002 */ {{3, 3}, {{1, 128, 3, 3}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 2360320.},
|
||||
/* GFLOPS 0.002 x 1 = 0.002 */ {{3, 3}, {{1, 128, 3, 3}}, 256, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 2360320.},
|
||||
/* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 528, 4, 4}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 2164736.},
|
||||
/* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 508, 4, 4}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 2082816.},
|
||||
/* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 1024, 1, 1}}, 1000, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 2049000.},
|
||||
/* GFLOPS 0.001 x 2 = 0.002 */ {{3, 3}, {{1, 256, 3, 3}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 995544.},
|
||||
/* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 1024, 3, 3}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1770336.},
|
||||
/* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 64, 4, 4}}, 810, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 1671840.},
|
||||
/* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 32, 80, 80}}, 4, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1664000.},
|
||||
/* GFLOPS 0.002 x 1 = 0.002 */ {{1, 1}, {{1, 256, 5, 5}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1641600.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 640, 6, 6}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1475712.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{3, 3}, {{1, 128, 5, 5}}, 24, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 1383000.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{3, 3}, {{1, 64, 5, 5}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1328256.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 736, 3, 3}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 1272672.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 64, 16, 16}}, 36, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 1188864.},
|
||||
/* GFLOPS 0.000 x 9 = 0.001 */ {{1, 1}, {{1, 64, 4, 4}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 132096.},
|
||||
/* GFLOPS 0.001 x 2 = 0.001 */ {{1, 1}, {{1, 256, 3, 3}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 590976.},
|
||||
/* GFLOPS 0.001 x 2 = 0.001 */ {{1, 1}, {{1, 256, 3, 3}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 590976.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{3, 3}, {{1, 128, 3, 3}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 1180160.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 256, 2, 2}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1120392.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 192, 12, 12}}, 16, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 887040.},
|
||||
/* GFLOPS 0.000 x 2 = 0.001 */ {{3, 3}, {{1, 256, 2, 2}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 442464.},
|
||||
/* GFLOPS 0.000 x 2 = 0.001 */ {{1, 1}, {{1, 32, 80, 80}}, 1, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 416000.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{3, 3}, {{1, 128, 5, 5}}, 12, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 691500.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{3, 3}, {{1, 256, 3, 3}}, 16, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 663696.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 640, 2, 2}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 655872.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 512, 5, 5}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 615000.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 512, 5, 5}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 615000.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 128, 3, 3}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 592128.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 256, 3, 3}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 590976.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{3, 3}, {{1, 128, 3, 3}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 590080.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 256, 3, 3}}, 126, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 581742.},
|
||||
/* GFLOPS 0.001 x 1 = 0.001 */ {{1, 1}, {{1, 256, 4, 4}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 525312.},
|
||||
/* GFLOPS 0.000 x 4 = 0.000 */ {{1, 1}, {{1, 48, 1, 1}}, 1152, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 111744.},
|
||||
/* GFLOPS 0.000 x 4 = 0.000 */ {{1, 1}, {{1, 1152, 1, 1}}, 48, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 110640.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 5, 5}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 411200.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 128, 3, 3}}, 16, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 331920.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 192, 5, 5}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 308000.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 64, 8, 8}}, 36, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 297216.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 2, 2}}, 256, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 263168.},
|
||||
/* GFLOPS 0.000 x 2 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 131328.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 126, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 258552.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 1024, 1, 1}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 196704.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 128, 3, 3}}, 8, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 165960.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 3, 3}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 148032.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 64, 3, 3}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 147584.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 64, 2, 2}}, 128, 1, {2, 2}, {1, 1}, {1, 1}, {0, 0}, "", true, 147584.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 64, 2, 2}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 147584.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 64, 2, 2}}, 128, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 147584.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 736, 1, 1}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 141408.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 1, 1}}, 546, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 140322.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 131328.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 64, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 131328.},
|
||||
/* GFLOPS 0.000 x 3 = 0.000 */ {{1, 1}, {{1, 28, 1, 1}}, 672, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 38304.},
|
||||
/* GFLOPS 0.000 x 3 = 0.000 */ {{1, 1}, {{1, 672, 1, 1}}, 28, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 37660.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 3, 3}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 110808.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 3, 3}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 110808.},
|
||||
/* GFLOPS 0.000 x 2 = 0.000 */ {{3, 3}, {{1, 128, 1, 1}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 55320.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 64, 4, 4}}, 36, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "VALID", true, 74304.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 64, 2, 2}}, 64, 1, {2, 2}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 73792.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 256, 1, 1}}, 16, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 73744.},
|
||||
/* GFLOPS 0.000 x 3 = 0.000 */ {{1, 1}, {{1, 20, 1, 1}}, 480, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 19680.},
|
||||
/* GFLOPS 0.000 x 3 = 0.000 */ {{1, 1}, {{1, 480, 1, 1}}, 20, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 19220.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 49248.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 256, 2, 2}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 49248.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 128, 1, 1}}, 16, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 36880.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 1, 1}}, 126, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 32382.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{3, 3}, {{1, 128, 1, 1}}, 8, 1, {1, 1}, {1, 1}, {1, 1}, {0, 0}, "", true, 18440.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 64, 1, 1}}, 128, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", false, 16512.},
|
||||
/* GFLOPS 0.000 x 2 = 0.000 */ {{1, 1}, {{1, 10, 1, 1}}, 240, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 5040.},
|
||||
/* GFLOPS 0.000 x 2 = 0.000 */ {{1, 1}, {{1, 240, 1, 1}}, 10, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 4810.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 1, 1}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "", true, 6168.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 128, 1, 1}}, 24, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 6168.},
|
||||
/* GFLOPS 0.000 x 2 = 0.000 */ {{1, 1}, {{1, 6, 1, 1}}, 144, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1872.},
|
||||
/* GFLOPS 0.000 x 2 = 0.000 */ {{1, 1}, {{1, 144, 1, 1}}, 6, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 1734.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 4, 1, 1}}, 96, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 864.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 96, 1, 1}}, 4, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 772.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 8, 1, 1}}, 32, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 544.},
|
||||
/* GFLOPS 0.000 x 1 = 0.000 */ {{1, 1}, {{1, 32, 1, 1}}, 8, 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, "SAME", true, 520.}
|
||||
};
|
||||
struct ConvParamID
|
||||
{
|
||||
enum {
|
||||
CONV_0 = 0,
|
||||
CONV_100 = 100,
|
||||
CONV_LAST = sizeof(testConvolutionConfigs) / sizeof(testConvolutionConfigs[0])
|
||||
};
|
||||
int val_;
|
||||
ConvParamID(int val = 0) : val_(val) {}
|
||||
operator int() const { return val_; }
|
||||
static ::testing::internal::ParamGenerator<ConvParamID> all()
|
||||
{
|
||||
#if 0
|
||||
enum { NUM = (int)CONV_LAST };
|
||||
#else
|
||||
enum { NUM = (int)CONV_100 };
|
||||
#endif
|
||||
ConvParamID v_[NUM]; for (int i = 0; i < NUM; ++i) { v_[i] = ConvParamID(i); } // reduce generated code size
|
||||
return ::testing::ValuesIn(v_, v_ + NUM);
|
||||
}
|
||||
};
|
||||
static inline void PrintTo(const ConvParamID& v, std::ostream* os)
|
||||
{
|
||||
CV_Assert((int)v >= 0); CV_Assert((int)v < ConvParamID::CONV_LAST);
|
||||
const ConvParam_t& p = testConvolutionConfigs[(int)v];
|
||||
|
||||
*os << "GFLOPS=" << cv::format("%.3f", p.declared_flops * 1e-9)
|
||||
<< ", K=" << (Size)p.kernel
|
||||
<< ", IN={" << p.shapeIn.dims[0] << ", " << p.shapeIn.dims[1] << ", " << p.shapeIn.dims[2] << ", " << p.shapeIn.dims[3] << "}"
|
||||
<< ", OCN=" << p.outCN;
|
||||
if (p.groups > 1)
|
||||
*os << ", G=" << p.groups;
|
||||
if (((Size)p.stride).area() != 1)
|
||||
*os << ", S=" << ((Size)p.stride);
|
||||
if (((Size)p.dilation).area() != 1)
|
||||
*os << ", D=" << ((Size)p.dilation);
|
||||
if (!((Size)p.pad).empty())
|
||||
*os << ", P=" << ((Size)p.pad);
|
||||
if (!((Size)p.padAdjust).empty())
|
||||
*os << ", PAdj=" << ((Size)p.padAdjust);
|
||||
if (!((std::string)p.padMode).empty())
|
||||
*os << ", PM=" << ((std::string)p.padMode);
|
||||
if (p.hasBias)
|
||||
*os << ", BIAS";
|
||||
}
|
||||
|
||||
|
||||
|
||||
typedef tuple<ConvParamID, tuple<Backend, Target> > ConvTestParam_t;
|
||||
typedef TestBaseWithParam<ConvTestParam_t> Conv;
|
||||
|
||||
PERF_TEST_P_(Conv, conv)
|
||||
{
|
||||
int test_id = (int)get<0>(GetParam());
|
||||
ASSERT_GE(test_id, 0); ASSERT_LT(test_id, ConvParamID::CONV_LAST);
|
||||
const ConvParam_t& params = testConvolutionConfigs[test_id];
|
||||
double declared_flops = params.declared_flops;
|
||||
Size kernel = params.kernel;
|
||||
MatShape inputShape = MatShape(params.shapeIn.dims, params.shapeIn.dims + 4);
|
||||
int outChannels = params.outCN;
|
||||
int groups = params.groups;
|
||||
Size stride = params.stride;
|
||||
Size dilation = params.dilation;
|
||||
Size pad = params.pad;
|
||||
Size padAdjust = params.padAdjust;
|
||||
std::string padMode(params.padMode);
|
||||
bool hasBias = params.hasBias;
|
||||
Backend backendId = get<0>(get<1>(GetParam()));
|
||||
Target targetId = get<1>(get<1>(GetParam()));
|
||||
|
||||
int inChannels = inputShape[1];
|
||||
Size inSize(inputShape[3], inputShape[2]);
|
||||
|
||||
int sz[] = {outChannels, inChannels / groups, kernel.height, kernel.width};
|
||||
Mat weights(4, &sz[0], CV_32F);
|
||||
randu(weights, -1.0f, 1.0f);
|
||||
|
||||
LayerParams lp;
|
||||
lp.set("kernel_w", kernel.width);
|
||||
lp.set("kernel_h", kernel.height);
|
||||
lp.set("pad_w", pad.width);
|
||||
lp.set("pad_h", pad.height);
|
||||
if (padAdjust.width > 0 || padAdjust.height > 0)
|
||||
{
|
||||
lp.set("adj_w", padAdjust.width);
|
||||
lp.set("adj_h", padAdjust.height);
|
||||
}
|
||||
if (!padMode.empty())
|
||||
lp.set("pad_mode", padMode);
|
||||
lp.set("stride_w", stride.width);
|
||||
lp.set("stride_h", stride.height);
|
||||
lp.set("dilation_w", dilation.width);
|
||||
lp.set("dilation_h", dilation.height);
|
||||
lp.set("num_output", outChannels);
|
||||
lp.set("group", groups);
|
||||
lp.set("bias_term", hasBias);
|
||||
lp.type = "Convolution";
|
||||
lp.name = "testLayer";
|
||||
lp.blobs.push_back(weights);
|
||||
if (hasBias)
|
||||
{
|
||||
Mat bias(1, outChannels, CV_32F);
|
||||
randu(bias, -1.0f, 1.0f);
|
||||
lp.blobs.push_back(bias);
|
||||
}
|
||||
int inpSz[] = {1, inChannels, inSize.height, inSize.width};
|
||||
Mat input(4, &inpSz[0], CV_32F);
|
||||
randu(input, -1.0f, 1.0f);
|
||||
|
||||
Net net;
|
||||
net.addLayerToPrev(lp.name, lp.type, lp);
|
||||
|
||||
net.setInput(input);
|
||||
net.setPreferableBackend(backendId);
|
||||
net.setPreferableTarget(targetId);
|
||||
|
||||
// warmup
|
||||
Mat output = net.forward();
|
||||
|
||||
MatShape netInputShape = shape(input);
|
||||
size_t weightsMemory = 0, blobsMemory = 0;
|
||||
net.getMemoryConsumption(netInputShape, weightsMemory, blobsMemory);
|
||||
int64 flops = net.getFLOPS(netInputShape);
|
||||
CV_Assert(flops > 0);
|
||||
|
||||
std::cout
|
||||
<< "IN=" << divUp(input.total() * input.elemSize(), 1u<<10) << " Kb " << netInputShape
|
||||
<< " OUT=" << divUp(output.total() * output.elemSize(), 1u<<10) << " Kb " << shape(output)
|
||||
<< " Weights(parameters): " << divUp(weightsMemory, 1u<<10) << " Kb"
|
||||
<< " MFLOPS=" << flops * 1e-6 << std::endl;
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
Mat res = net.forward();
|
||||
}
|
||||
|
||||
EXPECT_NEAR(flops, declared_flops, declared_flops * 1e-6);
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Conv, Combine(
|
||||
ConvParamID::all(),
|
||||
dnnBackendsAndTargets(false, false) // defined in ../test/test_common.hpp
|
||||
));
|
||||
|
||||
} // namespace
|
||||
163
3rdparty/opencv-4.5.4/modules/dnn/perf/perf_convolution1d.cpp
vendored
Normal file
163
3rdparty/opencv-4.5.4/modules/dnn/perf/perf_convolution1d.cpp
vendored
Normal file
@@ -0,0 +1,163 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
namespace opencv_test {
|
||||
|
||||
struct Conv1DParam_t {
|
||||
int kernel;
|
||||
struct BlobShape { int dims[3]; } shapeIn;
|
||||
int outCN;
|
||||
int groups;
|
||||
int stride;
|
||||
int dilation;
|
||||
int pad[2];
|
||||
const char* padMode;
|
||||
bool hasBias;
|
||||
double declared_flops;
|
||||
};
|
||||
// Details: #12142
|
||||
static const Conv1DParam_t testConvolution1DConfigs[] = {
|
||||
{3, {{1, 6, 10}}, 6, 1, 1, 1, {0, 0}, "VALID", true, 1776.},
|
||||
{3, {{1, 2, 19}}, 2, 2, 2, 1, {1, 1}, "", true, 260.},
|
||||
{3, {{1, 2, 25}}, 2, 2, 1, 1, {2, 2}, "SAME", false, 650.},
|
||||
};
|
||||
|
||||
struct Conv1DParamID
|
||||
{
|
||||
enum {
|
||||
CONV_0 = 0,
|
||||
CONV_LAST = sizeof(testConvolution1DConfigs) / sizeof(testConvolution1DConfigs[0])
|
||||
};
|
||||
int val_;
|
||||
Conv1DParamID(int val = 0) : val_(val) {}
|
||||
operator int() const { return val_; }
|
||||
static ::testing::internal::ParamGenerator<Conv1DParamID> all()
|
||||
{
|
||||
enum { NUM = (int)CONV_LAST };
|
||||
Conv1DParamID v_[NUM]; for (int i = 0; i < NUM; ++i) { v_[i] = Conv1DParamID(i); } // reduce generated code size
|
||||
return ::testing::ValuesIn(v_, v_ + NUM);
|
||||
}
|
||||
};
|
||||
static inline void PrintTo(const Conv1DParamID& v, std::ostream* os)
|
||||
{
|
||||
CV_Assert((int)v >= 0); CV_Assert((int)v < Conv1DParamID::CONV_LAST);
|
||||
const Conv1DParam_t& p = testConvolution1DConfigs[(int)v];
|
||||
|
||||
*os << "GFLOPS=" << cv::format("%.3f", p.declared_flops * 1e-9)
|
||||
<< ", K=[" << p.kernel << "]"
|
||||
<< ", IN={" << p.shapeIn.dims[0] << ", " << p.shapeIn.dims[1] << ", " << p.shapeIn.dims[2] << "}"
|
||||
<< ", OCN=" << p.outCN;
|
||||
if (p.groups > 1)
|
||||
*os << ", G=" << p.groups;
|
||||
if (p.stride != 1)
|
||||
*os << ", S=" << p.stride;
|
||||
if (p.dilation != 1)
|
||||
*os << ", D=" << p.dilation;
|
||||
if (p.pad[0] != 0 && p.pad[1] != 0 )
|
||||
*os << ", P=(" << p.pad[0] << ", " << p.pad[1] << ")";
|
||||
if (!((std::string)p.padMode).empty())
|
||||
*os << ", PM=" << ((std::string)p.padMode);
|
||||
if (p.hasBias)
|
||||
*os << ", BIAS";
|
||||
}
|
||||
|
||||
|
||||
typedef tuple<Conv1DParamID, tuple<Backend, Target> > Conv1DTestParam_t;
|
||||
typedef TestBaseWithParam<Conv1DTestParam_t> Conv1D;
|
||||
|
||||
PERF_TEST_P_(Conv1D, conv1d)
|
||||
{
|
||||
int test_id = (int)get<0>(GetParam());
|
||||
ASSERT_GE(test_id, 0); ASSERT_LT(test_id, Conv1DParamID::CONV_LAST);
|
||||
const Conv1DParam_t& params = testConvolution1DConfigs[test_id];
|
||||
double declared_flops = params.declared_flops;
|
||||
|
||||
DictValue kernel = DictValue::arrayInt(¶ms.kernel, 1);
|
||||
DictValue stride = DictValue::arrayInt(¶ms.stride, 1);
|
||||
DictValue pad = DictValue::arrayInt(¶ms.pad[0], 2);
|
||||
DictValue dilation = DictValue::arrayInt(¶ms.dilation, 1);
|
||||
|
||||
MatShape inputShape = MatShape(params.shapeIn.dims, params.shapeIn.dims + 3);
|
||||
int outChannels = params.outCN;
|
||||
int groups = params.groups;
|
||||
std::string padMode(params.padMode);
|
||||
|
||||
bool hasBias = params.hasBias;
|
||||
Backend backendId = get<0>(get<1>(GetParam()));
|
||||
Target targetId = get<1>(get<1>(GetParam()));
|
||||
|
||||
if (targetId != DNN_TARGET_CPU)
|
||||
throw SkipTestException("Only CPU is supported");
|
||||
|
||||
int inChannels = inputShape[1];
|
||||
|
||||
int sz[] = {outChannels, inChannels / groups, params.kernel};
|
||||
Mat weights(3, &sz[0], CV_32F);
|
||||
randu(weights, -1.0f, 1.0f);
|
||||
|
||||
LayerParams lp;
|
||||
lp.set("kernel_size", kernel);
|
||||
lp.set("pad", pad);
|
||||
if (!padMode.empty())
|
||||
lp.set("pad_mode", padMode);
|
||||
|
||||
lp.set("stride", stride);
|
||||
lp.set("dilation", dilation);
|
||||
lp.set("num_output", outChannels);
|
||||
lp.set("group", groups);
|
||||
lp.set("bias_term", hasBias);
|
||||
lp.type = "Convolution";
|
||||
lp.name = "testLayer";
|
||||
lp.blobs.push_back(weights);
|
||||
|
||||
if (hasBias)
|
||||
{
|
||||
Mat bias(1, outChannels, CV_32F);
|
||||
randu(bias, -1.0f, 1.0f);
|
||||
lp.blobs.push_back(bias);
|
||||
}
|
||||
|
||||
int inpSz[] = {1, inChannels, inputShape[2]};
|
||||
Mat input(3, &inpSz[0], CV_32F);
|
||||
randu(input, -1.0f, 1.0f);
|
||||
|
||||
Net net;
|
||||
net.addLayerToPrev(lp.name, lp.type, lp);
|
||||
|
||||
net.setInput(input);
|
||||
net.setPreferableBackend(backendId);
|
||||
net.setPreferableTarget(targetId);
|
||||
|
||||
// warmup
|
||||
Mat output = net.forward();
|
||||
|
||||
MatShape netInputShape = shape(input);
|
||||
size_t weightsMemory = 0, blobsMemory = 0;
|
||||
net.getMemoryConsumption(netInputShape, weightsMemory, blobsMemory);
|
||||
int64 flops = net.getFLOPS(netInputShape);
|
||||
CV_Assert(flops > 0);
|
||||
|
||||
std::cout
|
||||
<< "IN=" << divUp(input.total() * input.elemSize(), 1u<<10) << " Kb " << netInputShape
|
||||
<< " OUT=" << divUp(output.total() * output.elemSize(), 1u<<10) << " Kb " << shape(output)
|
||||
<< " Weights(parameters): " << divUp(weightsMemory, 1u<<10) << " Kb"
|
||||
<< " MFLOPS=" << flops * 1e-6 << std::endl;
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
Mat res = net.forward();
|
||||
}
|
||||
EXPECT_NEAR(flops, declared_flops, declared_flops * 1e-6);
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Conv1D, Combine(
|
||||
Conv1DParamID::all(),
|
||||
dnnBackendsAndTargets(false, false) // defined in ../test/test_common.hpp
|
||||
));
|
||||
|
||||
} // namespace
|
||||
182
3rdparty/opencv-4.5.4/modules/dnn/perf/perf_convolution3d.cpp
vendored
Normal file
182
3rdparty/opencv-4.5.4/modules/dnn/perf/perf_convolution3d.cpp
vendored
Normal file
@@ -0,0 +1,182 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
namespace opencv_test {
|
||||
|
||||
struct Conv3DParam_t {
|
||||
int kernel[3];
|
||||
struct BlobShape { int dims[5]; } shapeIn;
|
||||
int outCN;
|
||||
int groups;
|
||||
int stride[3];
|
||||
int dilation[3];
|
||||
int pad[6];
|
||||
const char* padMode;
|
||||
bool hasBias;
|
||||
double declared_flops;
|
||||
};
|
||||
// Details: #12142
|
||||
static const Conv3DParam_t testConvolution3DConfigs[] = {
|
||||
{{3, 3, 3}, {{1, 6, 10, 38, 50}}, 6, 1, {1, 1, 1}, {1, 1, 1}, {0, 0, 0, 0, 0, 0}, "VALID", true, 26956800.},
|
||||
{{3, 3, 3}, {{1, 2, 19, 19, 19}}, 2, 2, {2, 2, 2}, {1, 1, 1}, {1, 1, 1, 1, 1, 1}, "", true, 218000.},
|
||||
{{3, 3, 3}, {{1, 2, 25, 19, 19}}, 2, 2, {1, 2, 2}, {1, 1, 1}, {2, 2, 2, 2, 2, 2}, "SAME", false, 545000.},
|
||||
{{3, 3, 3}, {{1, 11, 9, 150, 200}}, 11, 1, {1, 1, 1}, {1, 1, 1}, {0, 0, 0, 0, 0, 0}, "VALID", true, 1342562760.},
|
||||
{{3, 3, 3}, {{1, 10, 98, 10, 10}}, 10, 1, {1, 1, 1}, {1, 1, 1}, {1, 0, 1, 1, 0,1}, "SAME", false, 53018000.},
|
||||
{{5, 5, 5}, {{1, 6, 19, 19, 19}}, 6, 2, {1, 1, 1}, {1, 1, 1}, {0, 0, 0, 0, 0, 0}, "", false, 30395250.},
|
||||
{{5, 5, 5}, {{1, 4, 50, 19, 19}}, 4, 1, {2, 2, 2}, {1, 1, 1}, {1, 1, 1, 1, 1, 1}, "VALID", false, 5893888.},
|
||||
{{5, 5, 5}, {{1, 3, 75, 75, 100}}, 3, 1, {1, 1, 1}, {1, 1, 1}, {0, 0, 0, 0, 0, 0}, "SAME", true, 1267312500.},
|
||||
{{5, 5, 5}, {{1, 2, 21, 75, 100}}, 2, 1, {1, 1, 1}, {1, 1, 1}, {0, 0, 0, 0, 0, 0}, "", true, 116103744.},
|
||||
{{5, 5, 5}, {{1, 4, 40, 75, 75}}, 4, 1, {2, 2, 2}, {1, 1, 1}, {0, 0, 0, 0, 0, 0}, "", false, 93405312.},
|
||||
{{7, 7, 7}, {{1, 6, 15, 19, 19}}, 6, 1, {2, 1, 1}, {1, 1, 1}, {3, 3, 3, 3, 3, 3}, "SAME", true, 71339376.},
|
||||
{{7, 7, 7}, {{1, 2, 38, 38, 38}}, 2, 1, {1, 2, 1}, {1, 1, 1}, {0, 0, 0, 0, 0, 0}, "", false, 44990464.},
|
||||
{{1, 1, 1}, {{1, 4, 9, 10, 10}}, 4, 1, {1, 1, 2}, {1, 1, 1}, {1, 1, 1, 1, 1, 1}, "VALID", false, 16200.},
|
||||
{{3, 1, 4}, {{1, 14, 5, 10, 10}}, 14, 1, {1, 1, 1}, {1, 1, 1}, {0, 0, 0, 0, 0, 0}, "SAME", false, 2359000.},
|
||||
{{1, 1, 1}, {{1, 8, 1, 10, 10}}, 8, 8, {1, 1, 1}, {1, 1, 1}, {1, 1, 1, 1, 1, 1}, "", true, 58752.},
|
||||
{{3, 4, 2}, {{1, 4, 8, 10, 10}}, 4, 4, {1, 2, 1}, {1, 1, 1}, {0, 0, 0, 0, 0, 0}, "", true, 166752.}
|
||||
};
|
||||
|
||||
struct Conv3DParamID
|
||||
{
|
||||
enum {
|
||||
CONV_0 = 0,
|
||||
CONV_100 = 16,
|
||||
CONV_LAST = sizeof(testConvolution3DConfigs) / sizeof(testConvolution3DConfigs[0])
|
||||
};
|
||||
int val_;
|
||||
Conv3DParamID(int val = 0) : val_(val) {}
|
||||
operator int() const { return val_; }
|
||||
static ::testing::internal::ParamGenerator<Conv3DParamID> all()
|
||||
{
|
||||
#if 0
|
||||
enum { NUM = (int)CONV_LAST };
|
||||
#else
|
||||
enum { NUM = (int)CONV_100 };
|
||||
#endif
|
||||
Conv3DParamID v_[NUM]; for (int i = 0; i < NUM; ++i) { v_[i] = Conv3DParamID(i); } // reduce generated code size
|
||||
return ::testing::ValuesIn(v_, v_ + NUM);
|
||||
}
|
||||
};
|
||||
static inline void PrintTo(const Conv3DParamID& v, std::ostream* os)
|
||||
{
|
||||
CV_Assert((int)v >= 0); CV_Assert((int)v < Conv3DParamID::CONV_LAST);
|
||||
const Conv3DParam_t& p = testConvolution3DConfigs[(int)v];
|
||||
|
||||
*os << "GFLOPS=" << cv::format("%.3f", p.declared_flops * 1e-9)
|
||||
<< ", K=[" << p.kernel[0] << " x " << p.kernel[1] << " x " << p.kernel[2] << "]"
|
||||
<< ", IN={" << p.shapeIn.dims[0] << ", " << p.shapeIn.dims[1] << ", " << p.shapeIn.dims[2] << ", " << p.shapeIn.dims[3] << ", " << p.shapeIn.dims[4] << "}"
|
||||
<< ", OCN=" << p.outCN;
|
||||
if (p.groups > 1)
|
||||
*os << ", G=" << p.groups;
|
||||
if (p.stride[0] * p.stride[1] * p.stride[2] != 1)
|
||||
*os << ", S=[" << p.stride[0] << " x " << p.stride[1] << " x " << p.stride[2] << "]";
|
||||
if (p.dilation[0] * p.dilation[1] * p.dilation[2] != 1)
|
||||
*os << ", D=[" << p.dilation[0] << " x " << p.dilation[1] << " x " << p.dilation[2] << "]";
|
||||
if (p.pad[0] != 0 && p.pad[1] != 0 && p.pad[2] != 0 &&
|
||||
p.pad[3] != 0 && p.pad[4] != 0 && p.pad[5] != 0)
|
||||
*os << ", P=(" << p.pad[0] << ", " << p.pad[3] << ") x ("
|
||||
<< p.pad[1] << ", " << p.pad[4] << ") x ("
|
||||
<< p.pad[2] << ", " << p.pad[5] << ")";
|
||||
if (!((std::string)p.padMode).empty())
|
||||
*os << ", PM=" << ((std::string)p.padMode);
|
||||
if (p.hasBias)
|
||||
*os << ", BIAS";
|
||||
}
|
||||
|
||||
|
||||
typedef tuple<Conv3DParamID, tuple<Backend, Target> > Conv3DTestParam_t;
|
||||
typedef TestBaseWithParam<Conv3DTestParam_t> Conv3D;
|
||||
|
||||
PERF_TEST_P_(Conv3D, conv3d)
|
||||
{
|
||||
int test_id = (int)get<0>(GetParam());
|
||||
ASSERT_GE(test_id, 0); ASSERT_LT(test_id, Conv3DParamID::CONV_LAST);
|
||||
const Conv3DParam_t& params = testConvolution3DConfigs[test_id];
|
||||
double declared_flops = params.declared_flops;
|
||||
|
||||
DictValue kernel = DictValue::arrayInt(¶ms.kernel[0], 3);
|
||||
DictValue stride = DictValue::arrayInt(¶ms.stride[0], 3);
|
||||
DictValue pad = DictValue::arrayInt(¶ms.pad[0], 6);
|
||||
DictValue dilation = DictValue::arrayInt(¶ms.dilation[0], 3);
|
||||
|
||||
MatShape inputShape = MatShape(params.shapeIn.dims, params.shapeIn.dims + 5);
|
||||
int outChannels = params.outCN;
|
||||
int groups = params.groups;
|
||||
std::string padMode(params.padMode);
|
||||
|
||||
bool hasBias = params.hasBias;
|
||||
Backend backendId = get<0>(get<1>(GetParam()));
|
||||
Target targetId = get<1>(get<1>(GetParam()));
|
||||
|
||||
if (targetId != DNN_TARGET_CPU && backendId != DNN_BACKEND_CUDA)
|
||||
throw SkipTestException("Only CPU and CUDA is supported");
|
||||
|
||||
int inChannels = inputShape[1];
|
||||
|
||||
int sz[] = {outChannels, inChannels / groups, params.kernel[0], params.kernel[1], params.kernel[2]};
|
||||
Mat weights(5, &sz[0], CV_32F);
|
||||
randu(weights, -1.0f, 1.0f);
|
||||
|
||||
LayerParams lp;
|
||||
lp.set("kernel_size", kernel);
|
||||
lp.set("pad", pad);
|
||||
if (!padMode.empty())
|
||||
lp.set("pad_mode", padMode);
|
||||
|
||||
lp.set("stride", stride);
|
||||
lp.set("dilation", dilation);
|
||||
lp.set("num_output", outChannels);
|
||||
lp.set("group", groups);
|
||||
lp.set("bias_term", hasBias);
|
||||
lp.type = "Convolution";
|
||||
lp.name = "testLayer";
|
||||
lp.blobs.push_back(weights);
|
||||
|
||||
if (hasBias)
|
||||
{
|
||||
Mat bias(1, outChannels, CV_32F);
|
||||
randu(bias, -1.0f, 1.0f);
|
||||
lp.blobs.push_back(bias);
|
||||
}
|
||||
int inpSz[] = {1, inChannels, inputShape[2], inputShape[3], inputShape[4]};
|
||||
Mat input(5, &inpSz[0], CV_32F);
|
||||
randu(input, -1.0f, 1.0f);
|
||||
|
||||
Net net;
|
||||
net.addLayerToPrev(lp.name, lp.type, lp);
|
||||
|
||||
net.setInput(input);
|
||||
net.setPreferableBackend(backendId);
|
||||
net.setPreferableTarget(targetId);
|
||||
|
||||
Mat output = net.forward();
|
||||
|
||||
MatShape netInputShape = shape(input);
|
||||
size_t weightsMemory = 0, blobsMemory = 0;
|
||||
net.getMemoryConsumption(netInputShape, weightsMemory, blobsMemory);
|
||||
int64 flops = net.getFLOPS(netInputShape);
|
||||
CV_Assert(flops > 0);
|
||||
|
||||
std::cout
|
||||
<< "IN=" << divUp(input.total() * input.elemSize(), 1u<<10) << " Kb " << netInputShape
|
||||
<< " OUT=" << divUp(output.total() * output.elemSize(), 1u<<10) << " Kb " << shape(output)
|
||||
<< " Weights(parameters): " << divUp(weightsMemory, 1u<<10) << " Kb"
|
||||
<< " MFLOPS=" << flops * 1e-6 << std::endl;
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
Mat res = net.forward();
|
||||
}
|
||||
EXPECT_NEAR(flops, declared_flops, declared_flops * 1e-6);
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Conv3D, Combine(
|
||||
Conv3DParamID::all(),
|
||||
dnnBackendsAndTargets(false, false) // defined in ../test/test_common.hpp
|
||||
));
|
||||
|
||||
} // namespace
|
||||
95
3rdparty/opencv-4.5.4/modules/dnn/perf/perf_layer.cpp
vendored
Normal file
95
3rdparty/opencv-4.5.4/modules/dnn/perf/perf_layer.cpp
vendored
Normal file
@@ -0,0 +1,95 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
namespace opencv_test {
|
||||
|
||||
struct Layer_Slice : public TestBaseWithParam<tuple<Backend, Target> >
|
||||
{
|
||||
template<int DIMS>
|
||||
void test_slice(const int* inputShape, const int* begin, const int* end)
|
||||
{
|
||||
int backendId = get<0>(GetParam());
|
||||
int targetId = get<1>(GetParam());
|
||||
|
||||
Mat input(DIMS, inputShape, CV_32FC1, Scalar::all(0));
|
||||
for (int i = 0; i < (int)input.total(); ++i)
|
||||
input.ptr<float>()[i] = (float)(i & 4095);
|
||||
|
||||
std::vector<Range> range(DIMS);
|
||||
for (int i = 0; i < DIMS; ++i)
|
||||
range[i] = Range(begin[i], end[i]);
|
||||
|
||||
Net net;
|
||||
LayerParams lp;
|
||||
lp.type = "Slice";
|
||||
lp.name = "testLayer";
|
||||
lp.set("begin", DictValue::arrayInt<int*>((int*)&begin[0], DIMS));
|
||||
lp.set("end", DictValue::arrayInt<int*>((int*)&end[0], DIMS));
|
||||
net.addLayerToPrev(lp.name, lp.type, lp);
|
||||
|
||||
// warmup
|
||||
{
|
||||
net.setInput(input);
|
||||
net.setPreferableBackend(backendId);
|
||||
net.setPreferableTarget(targetId);
|
||||
Mat out = net.forward();
|
||||
|
||||
EXPECT_GT(cv::norm(out, NORM_INF), 0);
|
||||
#if 0
|
||||
//normAssert(out, input(range));
|
||||
cout << input(range).clone().reshape(1, 1) << endl;
|
||||
cout << out.reshape(1, 1) << endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
Mat res = net.forward();
|
||||
}
|
||||
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
PERF_TEST_P_(Layer_Slice, YOLOv4_tiny_1)
|
||||
{
|
||||
const int inputShape[4] = {1, 64, 104, 104};
|
||||
const int begin[] = {0, 32, 0, 0};
|
||||
const int end[] = {1, 64, 104, 104};
|
||||
test_slice<4>(inputShape, begin, end);
|
||||
}
|
||||
|
||||
PERF_TEST_P_(Layer_Slice, YOLOv4_tiny_2)
|
||||
{
|
||||
const int inputShape[4] = {1, 128, 52, 52};
|
||||
const int begin[] = {0, 64, 0, 0};
|
||||
const int end[] = {1, 128, 52, 52};
|
||||
test_slice<4>(inputShape, begin, end);
|
||||
}
|
||||
|
||||
PERF_TEST_P_(Layer_Slice, YOLOv4_tiny_3)
|
||||
{
|
||||
const int inputShape[4] = {1, 256, 26, 26};
|
||||
const int begin[] = {0, 128, 0, 0};
|
||||
const int end[] = {1, 256, 26, 26};
|
||||
test_slice<4>(inputShape, begin, end);
|
||||
}
|
||||
|
||||
|
||||
PERF_TEST_P_(Layer_Slice, FastNeuralStyle_eccv16)
|
||||
{
|
||||
const int inputShape[4] = {1, 128, 80, 100};
|
||||
const int begin[] = {0, 0, 2, 2};
|
||||
const int end[] = {1, 128, 76, 96};
|
||||
test_slice<4>(inputShape, begin, end);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Layer_Slice, dnnBackendsAndTargets(false, false));
|
||||
|
||||
} // namespace
|
||||
16
3rdparty/opencv-4.5.4/modules/dnn/perf/perf_main.cpp
vendored
Normal file
16
3rdparty/opencv-4.5.4/modules/dnn/perf/perf_main.cpp
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
#include "perf_precomp.hpp"
|
||||
|
||||
static const char* extraTestDataPath =
|
||||
#ifdef WINRT
|
||||
NULL;
|
||||
#else
|
||||
getenv("OPENCV_DNN_TEST_DATA_PATH");
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_HPX)
|
||||
#include <hpx/hpx_main.hpp>
|
||||
#endif
|
||||
|
||||
CV_PERF_TEST_MAIN(dnn,
|
||||
extraTestDataPath ? (void)cvtest::addDataSearchPath(extraTestDataPath) : (void)0
|
||||
)
|
||||
305
3rdparty/opencv-4.5.4/modules/dnn/perf/perf_net.cpp
vendored
Normal file
305
3rdparty/opencv-4.5.4/modules/dnn/perf/perf_net.cpp
vendored
Normal file
@@ -0,0 +1,305 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
#include "opencv2/core/ocl.hpp"
|
||||
|
||||
#include "opencv2/dnn/shape_utils.hpp"
|
||||
|
||||
#include "../test/test_common.hpp"
|
||||
|
||||
namespace opencv_test {
|
||||
|
||||
class DNNTestNetwork : public ::perf::TestBaseWithParam< tuple<Backend, Target> >
|
||||
{
|
||||
public:
|
||||
dnn::Backend backend;
|
||||
dnn::Target target;
|
||||
|
||||
dnn::Net net;
|
||||
|
||||
DNNTestNetwork()
|
||||
{
|
||||
backend = (dnn::Backend)(int)get<0>(GetParam());
|
||||
target = (dnn::Target)(int)get<1>(GetParam());
|
||||
}
|
||||
|
||||
void processNet(std::string weights, std::string proto, std::string halide_scheduler,
|
||||
const Mat& input, const std::string& outputLayer = "")
|
||||
{
|
||||
randu(input, 0.0f, 1.0f);
|
||||
|
||||
weights = findDataFile(weights, false);
|
||||
if (!proto.empty())
|
||||
proto = findDataFile(proto);
|
||||
if (backend == DNN_BACKEND_HALIDE)
|
||||
{
|
||||
if (halide_scheduler == "disabled")
|
||||
throw cvtest::SkipTestException("Halide test is disabled");
|
||||
if (!halide_scheduler.empty())
|
||||
halide_scheduler = findDataFile(std::string("dnn/halide_scheduler_") + (target == DNN_TARGET_OPENCL ? "opencl_" : "") + halide_scheduler, true);
|
||||
}
|
||||
net = readNet(proto, weights);
|
||||
net.setInput(blobFromImage(input, 1.0, Size(), Scalar(), false));
|
||||
net.setPreferableBackend(backend);
|
||||
net.setPreferableTarget(target);
|
||||
if (backend == DNN_BACKEND_HALIDE)
|
||||
{
|
||||
net.setHalideScheduler(halide_scheduler);
|
||||
}
|
||||
|
||||
MatShape netInputShape = shape(1, 3, input.rows, input.cols);
|
||||
size_t weightsMemory = 0, blobsMemory = 0;
|
||||
net.getMemoryConsumption(netInputShape, weightsMemory, blobsMemory);
|
||||
int64 flops = net.getFLOPS(netInputShape);
|
||||
CV_Assert(flops > 0);
|
||||
|
||||
net.forward(outputLayer); // warmup
|
||||
|
||||
std::cout << "Memory consumption:" << std::endl;
|
||||
std::cout << " Weights(parameters): " << divUp(weightsMemory, 1u<<20) << " Mb" << std::endl;
|
||||
std::cout << " Blobs: " << divUp(blobsMemory, 1u<<20) << " Mb" << std::endl;
|
||||
std::cout << "Calculation complexity: " << flops * 1e-9 << " GFlops" << std::endl;
|
||||
|
||||
PERF_SAMPLE_BEGIN()
|
||||
net.forward();
|
||||
PERF_SAMPLE_END()
|
||||
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, AlexNet)
|
||||
{
|
||||
processNet("dnn/bvlc_alexnet.caffemodel", "dnn/bvlc_alexnet.prototxt",
|
||||
"alexnet.yml", Mat(cv::Size(227, 227), CV_32FC3));
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, GoogLeNet)
|
||||
{
|
||||
processNet("dnn/bvlc_googlenet.caffemodel", "dnn/bvlc_googlenet.prototxt",
|
||||
"", Mat(cv::Size(224, 224), CV_32FC3));
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, ResNet_50)
|
||||
{
|
||||
processNet("dnn/ResNet-50-model.caffemodel", "dnn/ResNet-50-deploy.prototxt",
|
||||
"resnet_50.yml", Mat(cv::Size(224, 224), CV_32FC3));
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, SqueezeNet_v1_1)
|
||||
{
|
||||
processNet("dnn/squeezenet_v1.1.caffemodel", "dnn/squeezenet_v1.1.prototxt",
|
||||
"squeezenet_v1_1.yml", Mat(cv::Size(227, 227), CV_32FC3));
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, Inception_5h)
|
||||
{
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) throw SkipTestException("");
|
||||
processNet("dnn/tensorflow_inception_graph.pb", "",
|
||||
"inception_5h.yml",
|
||||
Mat(cv::Size(224, 224), CV_32FC3), "softmax2");
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, ENet)
|
||||
{
|
||||
if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target != DNN_TARGET_CPU) ||
|
||||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
|
||||
throw SkipTestException("");
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2021010000)
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
throw SkipTestException("");
|
||||
#endif
|
||||
processNet("dnn/Enet-model-best.net", "", "enet.yml",
|
||||
Mat(cv::Size(512, 256), CV_32FC3));
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, SSD)
|
||||
{
|
||||
processNet("dnn/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel", "dnn/ssd_vgg16.prototxt", "disabled",
|
||||
Mat(cv::Size(300, 300), CV_32FC3));
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, OpenFace)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE)
|
||||
throw SkipTestException("");
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2018050000)
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_HDDL))
|
||||
throw SkipTestException("");
|
||||
#endif
|
||||
processNet("dnn/openface_nn4.small2.v1.t7", "", "",
|
||||
Mat(cv::Size(96, 96), CV_32FC3));
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_Caffe)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt", "",
|
||||
Mat(cv::Size(300, 300), CV_32FC3));
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_v1_TensorFlow)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/ssd_mobilenet_v1_coco_2017_11_17.pb", "ssd_mobilenet_v1_coco_2017_11_17.pbtxt", "",
|
||||
Mat(cv::Size(300, 300), CV_32FC3));
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_v2_TensorFlow)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/ssd_mobilenet_v2_coco_2018_03_29.pb", "ssd_mobilenet_v2_coco_2018_03_29.pbtxt", "",
|
||||
Mat(cv::Size(300, 300), CV_32FC3));
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, DenseNet_121)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", "",
|
||||
Mat(cv::Size(224, 224), CV_32FC3));
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, OpenPose_pose_mpi_faster_4_stages)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE ||
|
||||
(backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_HDDL)))
|
||||
throw SkipTestException("");
|
||||
// The same .caffemodel but modified .prototxt
|
||||
// See https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/pose/poseParameters.cpp
|
||||
processNet("dnn/openpose_pose_mpi.caffemodel", "dnn/openpose_pose_mpi_faster_4_stages.prototxt", "",
|
||||
Mat(cv::Size(368, 368), CV_32FC3));
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, opencv_face_detector)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/opencv_face_detector.caffemodel", "dnn/opencv_face_detector.prototxt", "",
|
||||
Mat(cv::Size(300, 300), CV_32FC3));
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "ssd_inception_v2_coco_2017_11_17.pbtxt", "",
|
||||
Mat(cv::Size(300, 300), CV_32FC3));
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, YOLOv3)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE)
|
||||
throw SkipTestException("");
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
|
||||
throw SkipTestException("Test is disabled in OpenVINO 2020.4");
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
|
||||
throw SkipTestException("Test is disabled in OpenVINO 2020.4");
|
||||
#endif
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2021010000) // nGraph compilation failure
|
||||
if (target == DNN_TARGET_MYRIAD)
|
||||
throw SkipTestException("");
|
||||
#endif
|
||||
|
||||
Mat sample = imread(findDataFile("dnn/dog416.png"));
|
||||
cvtColor(sample, sample, COLOR_BGR2RGB);
|
||||
Mat inp;
|
||||
sample.convertTo(inp, CV_32FC3, 1.0f / 255, 0);
|
||||
processNet("dnn/yolov3.weights", "dnn/yolov3.cfg", "", inp);
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, YOLOv4)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE)
|
||||
throw SkipTestException("");
|
||||
if (target == DNN_TARGET_MYRIAD) // not enough resources
|
||||
throw SkipTestException("");
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
|
||||
throw SkipTestException("Test is disabled in OpenVINO 2020.4");
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
|
||||
throw SkipTestException("Test is disabled in OpenVINO 2020.4");
|
||||
#endif
|
||||
Mat sample = imread(findDataFile("dnn/dog416.png"));
|
||||
cvtColor(sample, sample, COLOR_BGR2RGB);
|
||||
Mat inp;
|
||||
sample.convertTo(inp, CV_32FC3, 1.0f / 255, 0);
|
||||
processNet("dnn/yolov4.weights", "dnn/yolov4.cfg", "", inp);
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, YOLOv4_tiny)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE)
|
||||
throw SkipTestException("");
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2021010000) // nGraph compilation failure
|
||||
if (target == DNN_TARGET_MYRIAD)
|
||||
throw SkipTestException("");
|
||||
#endif
|
||||
Mat sample = imread(findDataFile("dnn/dog416.png"));
|
||||
cvtColor(sample, sample, COLOR_BGR2RGB);
|
||||
Mat inp;
|
||||
sample.convertTo(inp, CV_32FC3, 1.0f / 255, 0);
|
||||
processNet("dnn/yolov4-tiny.weights", "dnn/yolov4-tiny.cfg", "", inp);
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, EAST_text_detection)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/frozen_east_text_detection.pb", "", "", Mat(cv::Size(320, 320), CV_32FC3));
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, FastNeuralStyle_eccv16)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE)
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/fast_neural_style_eccv16_starry_night.t7", "", "", Mat(cv::Size(320, 240), CV_32FC3));
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, Inception_v2_Faster_RCNN)
|
||||
{
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2019010000)
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
||||
throw SkipTestException("Test is disabled in OpenVINO 2019R1");
|
||||
#endif
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2019020000)
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
||||
throw SkipTestException("Test is disabled in OpenVINO 2019R2");
|
||||
#endif
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2021010000)
|
||||
if (target == DNN_TARGET_MYRIAD)
|
||||
throw SkipTestException("Test is disabled in OpenVINO 2021.1+ / MYRIAD");
|
||||
#endif
|
||||
if (backend == DNN_BACKEND_HALIDE ||
|
||||
(backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target != DNN_TARGET_CPU) ||
|
||||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
|
||||
throw SkipTestException("");
|
||||
processNet("dnn/faster_rcnn_inception_v2_coco_2018_01_28.pb",
|
||||
"dnn/faster_rcnn_inception_v2_coco_2018_01_28.pbtxt", "",
|
||||
Mat(cv::Size(800, 600), CV_32FC3));
|
||||
}
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, EfficientDet)
|
||||
{
|
||||
if (backend == DNN_BACKEND_HALIDE || target != DNN_TARGET_CPU)
|
||||
throw SkipTestException("");
|
||||
Mat sample = imread(findDataFile("dnn/dog416.png"));
|
||||
resize(sample, sample, Size(512, 512));
|
||||
Mat inp;
|
||||
sample.convertTo(inp, CV_32FC3, 1.0/255);
|
||||
processNet("dnn/efficientdet-d0.pb", "dnn/efficientdet-d0.pbtxt", "", inp);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, dnnBackendsAndTargets());
|
||||
|
||||
} // namespace
|
||||
14
3rdparty/opencv-4.5.4/modules/dnn/perf/perf_precomp.hpp
vendored
Normal file
14
3rdparty/opencv-4.5.4/modules/dnn/perf/perf_precomp.hpp
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
#ifndef __OPENCV_PERF_PRECOMP_HPP__
|
||||
#define __OPENCV_PERF_PRECOMP_HPP__
|
||||
|
||||
#include <opencv2/ts.hpp>
|
||||
#include <opencv2/dnn.hpp>
|
||||
|
||||
#include "../test/test_common.hpp"
|
||||
|
||||
namespace opencv_test {
|
||||
using namespace perf;
|
||||
using namespace cv::dnn;
|
||||
} // namespace
|
||||
|
||||
#endif
|
||||
593
3rdparty/opencv-4.5.4/modules/dnn/src/caffe/caffe_importer.cpp
vendored
Normal file
593
3rdparty/opencv-4.5.4/modules/dnn/src/caffe/caffe_importer.cpp
vendored
Normal file
@@ -0,0 +1,593 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp"
|
||||
|
||||
#ifdef HAVE_PROTOBUF
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <google/protobuf/message.h>
|
||||
#include <google/protobuf/text_format.h>
|
||||
#include <google/protobuf/io/zero_copy_stream_impl.h>
|
||||
#include "caffe_io.hpp"
|
||||
#endif
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
#ifdef HAVE_PROTOBUF
|
||||
using ::google::protobuf::RepeatedField;
|
||||
using ::google::protobuf::RepeatedPtrField;
|
||||
using ::google::protobuf::Message;
|
||||
using ::google::protobuf::Descriptor;
|
||||
using ::google::protobuf::FieldDescriptor;
|
||||
using ::google::protobuf::Reflection;
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template<typename T>
|
||||
static cv::String toString(const T &v)
|
||||
{
|
||||
std::ostringstream ss;
|
||||
ss << v;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
static inline
|
||||
MatShape parseBlobShape(const caffe::BlobShape& _input_shape)
|
||||
{
|
||||
MatShape shape;
|
||||
for (int i = 0; i < _input_shape.dim_size(); i++)
|
||||
{
|
||||
shape.push_back((int)_input_shape.dim(i));
|
||||
}
|
||||
return shape;
|
||||
}
|
||||
|
||||
class CaffeImporter
|
||||
{
|
||||
caffe::NetParameter net;
|
||||
caffe::NetParameter netBinary;
|
||||
|
||||
public:
|
||||
|
||||
CaffeImporter(const char *pototxt, const char *caffeModel)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
ReadNetParamsFromTextFileOrDie(pototxt, &net);
|
||||
|
||||
if (caffeModel && caffeModel[0])
|
||||
ReadNetParamsFromBinaryFileOrDie(caffeModel, &netBinary);
|
||||
}
|
||||
|
||||
CaffeImporter(const char *dataProto, size_t lenProto,
|
||||
const char *dataModel, size_t lenModel)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
ReadNetParamsFromTextBufferOrDie(dataProto, lenProto, &net);
|
||||
|
||||
if (dataModel != NULL && lenModel > 0)
|
||||
ReadNetParamsFromBinaryBufferOrDie(dataModel, lenModel, &netBinary);
|
||||
}
|
||||
|
||||
void extractCustomParams(const google::protobuf::UnknownFieldSet& unknownFields, cv::dnn::LayerParams ¶ms)
|
||||
{
|
||||
const int numFields = unknownFields.field_count();
|
||||
for (int i = 0; i < numFields; ++i)
|
||||
{
|
||||
const google::protobuf::UnknownField& field = unknownFields.field(i);
|
||||
CV_Assert(field.type() == google::protobuf::UnknownField::TYPE_GROUP);
|
||||
std::string fieldName = field.group().field(0).length_delimited();
|
||||
std::string fieldValue = field.group().field(1).length_delimited();
|
||||
params.set(fieldName, fieldValue);
|
||||
}
|
||||
}
|
||||
|
||||
void addParam(const Message &msg, const FieldDescriptor *field, cv::dnn::LayerParams ¶ms)
|
||||
{
|
||||
const Reflection *refl = msg.GetReflection();
|
||||
int type = field->cpp_type();
|
||||
bool isRepeated = field->is_repeated();
|
||||
const std::string &name = field->name();
|
||||
|
||||
#define SET_UP_FILED(getter, arrayConstr, gtype) \
|
||||
if (isRepeated) { \
|
||||
const RepeatedField<gtype> &v = refl->GetRepeatedField<gtype>(msg, field); \
|
||||
params.set(name, DictValue::arrayConstr(v.begin(), (int)v.size())); \
|
||||
} \
|
||||
else { \
|
||||
params.set(name, refl->getter(msg, field)); \
|
||||
}
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case FieldDescriptor::CPPTYPE_INT32:
|
||||
SET_UP_FILED(GetInt32, arrayInt, ::google::protobuf::int32);
|
||||
break;
|
||||
case FieldDescriptor::CPPTYPE_UINT32:
|
||||
SET_UP_FILED(GetUInt32, arrayInt, ::google::protobuf::uint32);
|
||||
break;
|
||||
case FieldDescriptor::CPPTYPE_INT64:
|
||||
SET_UP_FILED(GetInt32, arrayInt, ::google::protobuf::int64);
|
||||
break;
|
||||
case FieldDescriptor::CPPTYPE_UINT64:
|
||||
SET_UP_FILED(GetUInt32, arrayInt, ::google::protobuf::uint64);
|
||||
break;
|
||||
case FieldDescriptor::CPPTYPE_BOOL:
|
||||
SET_UP_FILED(GetBool, arrayInt, bool);
|
||||
break;
|
||||
case FieldDescriptor::CPPTYPE_DOUBLE:
|
||||
SET_UP_FILED(GetDouble, arrayReal, double);
|
||||
break;
|
||||
case FieldDescriptor::CPPTYPE_FLOAT:
|
||||
SET_UP_FILED(GetFloat, arrayReal, float);
|
||||
break;
|
||||
case FieldDescriptor::CPPTYPE_STRING:
|
||||
if (isRepeated) {
|
||||
const RepeatedPtrField<std::string> &v = refl->GetRepeatedPtrField<std::string>(msg, field);
|
||||
params.set(name, DictValue::arrayString(v.begin(), (int)v.size()));
|
||||
}
|
||||
else {
|
||||
params.set(name, refl->GetString(msg, field));
|
||||
}
|
||||
break;
|
||||
case FieldDescriptor::CPPTYPE_ENUM:
|
||||
if (isRepeated) {
|
||||
int size = refl->FieldSize(msg, field);
|
||||
std::vector<cv::String> buf(size);
|
||||
for (int i = 0; i < size; i++)
|
||||
buf[i] = refl->GetRepeatedEnum(msg, field, i)->name();
|
||||
params.set(name, DictValue::arrayString(buf.begin(), size));
|
||||
}
|
||||
else {
|
||||
params.set(name, refl->GetEnum(msg, field)->name());
|
||||
}
|
||||
break;
|
||||
default:
|
||||
CV_Error(Error::StsError, "Unknown type \"" + String(field->type_name()) + "\" in prototxt");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
inline static bool ends_with_param(const std::string &str)
|
||||
{
|
||||
static const std::string _param("_param");
|
||||
return (str.size() >= _param.size()) && str.compare(str.size() - _param.size(), _param.size(), _param) == 0;
|
||||
}
|
||||
|
||||
void extractLayerParams(const Message &msg, cv::dnn::LayerParams ¶ms, bool isInternal = false)
|
||||
{
|
||||
const Descriptor *msgDesc = msg.GetDescriptor();
|
||||
const Reflection *msgRefl = msg.GetReflection();
|
||||
|
||||
for (int fieldId = 0; fieldId < msgDesc->field_count(); fieldId++)
|
||||
{
|
||||
const FieldDescriptor *fd = msgDesc->field(fieldId);
|
||||
|
||||
if (!isInternal && !ends_with_param(fd->name()))
|
||||
continue;
|
||||
|
||||
const google::protobuf::UnknownFieldSet& unknownFields = msgRefl->GetUnknownFields(msg);
|
||||
bool hasData = fd->is_required() ||
|
||||
(fd->is_optional() && msgRefl->HasField(msg, fd)) ||
|
||||
(fd->is_repeated() && msgRefl->FieldSize(msg, fd) > 0) ||
|
||||
!unknownFields.empty();
|
||||
if (!hasData)
|
||||
continue;
|
||||
|
||||
extractCustomParams(unknownFields, params);
|
||||
if (fd->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE)
|
||||
{
|
||||
if (fd->is_repeated()) //Extract only first item!
|
||||
extractLayerParams(msgRefl->GetRepeatedMessage(msg, fd, 0), params, true);
|
||||
else
|
||||
extractLayerParams(msgRefl->GetMessage(msg, fd), params, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
addParam(msg, fd, params);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void blobShapeFromProto(const caffe::BlobProto &pbBlob, MatShape& shape)
|
||||
{
|
||||
shape.clear();
|
||||
if (pbBlob.has_num() || pbBlob.has_channels() || pbBlob.has_height() || pbBlob.has_width())
|
||||
{
|
||||
shape.push_back(pbBlob.num());
|
||||
shape.push_back(pbBlob.channels());
|
||||
shape.push_back(pbBlob.height());
|
||||
shape.push_back(pbBlob.width());
|
||||
}
|
||||
else if (pbBlob.has_shape())
|
||||
{
|
||||
shape = parseBlobShape(pbBlob.shape());
|
||||
}
|
||||
else
|
||||
shape.resize(1, 1); // Is a scalar.
|
||||
}
|
||||
|
||||
void blobFromProto(const caffe::BlobProto &pbBlob, cv::Mat &dstBlob)
|
||||
{
|
||||
MatShape shape;
|
||||
blobShapeFromProto(pbBlob, shape);
|
||||
|
||||
dstBlob.create((int)shape.size(), &shape[0], CV_32F);
|
||||
if (pbBlob.data_size())
|
||||
{
|
||||
// Single precision floats.
|
||||
CV_Assert(pbBlob.data_size() == (int)dstBlob.total());
|
||||
|
||||
CV_DbgAssert(pbBlob.GetDescriptor()->FindFieldByLowercaseName("data")->cpp_type() == FieldDescriptor::CPPTYPE_FLOAT);
|
||||
Mat(dstBlob.dims, &dstBlob.size[0], CV_32F, (void*)pbBlob.data().data()).copyTo(dstBlob);
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(pbBlob.has_raw_data());
|
||||
const std::string& raw_data = pbBlob.raw_data();
|
||||
if (pbBlob.raw_data_type() == caffe::FLOAT16)
|
||||
{
|
||||
// Half precision floats.
|
||||
CV_Assert(raw_data.size() / 2 == (int)dstBlob.total());
|
||||
|
||||
Mat halfs((int)shape.size(), &shape[0], CV_16SC1, (void*)raw_data.c_str());
|
||||
convertFp16(halfs, dstBlob);
|
||||
}
|
||||
else if (pbBlob.raw_data_type() == caffe::FLOAT)
|
||||
{
|
||||
CV_Assert(raw_data.size() / 4 == (int)dstBlob.total());
|
||||
Mat((int)shape.size(), &shape[0], CV_32FC1, (void*)raw_data.c_str()).copyTo(dstBlob);
|
||||
}
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Unexpected blob data type");
|
||||
}
|
||||
}
|
||||
|
||||
void extractBinaryLayerParams(const caffe::LayerParameter& layer, LayerParams& layerParams)
|
||||
{
|
||||
const std::string &name = layer.name();
|
||||
|
||||
int li;
|
||||
for (li = 0; li != netBinary.layer_size(); li++)
|
||||
{
|
||||
const caffe::LayerParameter& binLayer = netBinary.layer(li);
|
||||
// Break if the layer name is the same and the blobs are not cleared
|
||||
if (binLayer.name() == name && binLayer.blobs_size() != 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (li == netBinary.layer_size())
|
||||
return;
|
||||
|
||||
caffe::LayerParameter* binLayer = netBinary.mutable_layer(li);
|
||||
const int numBlobs = binLayer->blobs_size();
|
||||
std::vector<caffe::BlobProto*> blobs(numBlobs);
|
||||
binLayer->mutable_blobs()->ExtractSubrange(0, numBlobs, blobs.data());
|
||||
layerParams.blobs.resize(numBlobs);
|
||||
for (int bi = 0; bi < numBlobs; bi++)
|
||||
{
|
||||
blobFromProto(*blobs[bi], layerParams.blobs[bi]);
|
||||
delete blobs[bi];
|
||||
}
|
||||
}
|
||||
|
||||
struct BlobNote
|
||||
{
|
||||
BlobNote(const std::string &_name, int _layerId, int _outNum) :
|
||||
name(_name), layerId(_layerId), outNum(_outNum) {}
|
||||
|
||||
std::string name;
|
||||
int layerId, outNum;
|
||||
};
|
||||
|
||||
std::vector<BlobNote> addedBlobs;
|
||||
std::map<String, int> layerCounter;
|
||||
|
||||
void populateNet(Net dstNet)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
int layersSize = net.layer_size();
|
||||
layerCounter.clear();
|
||||
addedBlobs.clear();
|
||||
addedBlobs.reserve(layersSize + 1);
|
||||
|
||||
//setup input layer names
|
||||
std::vector<String> netInputs(net.input_size());
|
||||
std::vector<MatShape> inp_shapes;
|
||||
{
|
||||
int net_input_size = net.input_size();
|
||||
for (int inNum = 0; inNum < net_input_size; inNum++)
|
||||
{
|
||||
addedBlobs.push_back(BlobNote(net.input(inNum), 0, inNum));
|
||||
netInputs[inNum] = net.input(inNum);
|
||||
}
|
||||
|
||||
if (net.input_dim_size() > 0) // deprecated in Caffe proto
|
||||
{
|
||||
int net_input_dim_size = net.input_dim_size();
|
||||
CV_Check(net_input_dim_size, net_input_dim_size % 4 == 0, "");
|
||||
CV_CheckEQ(net_input_dim_size, net_input_size * 4, "");
|
||||
for (int inp_id = 0; inp_id < net_input_size; inp_id++)
|
||||
{
|
||||
int dim = inp_id * 4;
|
||||
MatShape shape(4);
|
||||
shape[0] = net.input_dim(dim);
|
||||
shape[1] = net.input_dim(dim+1);
|
||||
shape[2] = net.input_dim(dim+2);
|
||||
shape[3] = net.input_dim(dim+3);
|
||||
inp_shapes.push_back(shape);
|
||||
}
|
||||
}
|
||||
else if (net.input_shape_size() > 0) // deprecated in Caffe proto
|
||||
{
|
||||
int net_input_shape_size = net.input_shape_size();
|
||||
CV_CheckEQ(net_input_shape_size, net_input_size, "");
|
||||
for (int inp_id = 0; inp_id < net_input_shape_size; inp_id++)
|
||||
{
|
||||
MatShape shape = parseBlobShape(net.input_shape(inp_id));
|
||||
inp_shapes.push_back(shape);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int inp_id = 0; inp_id < net_input_size; inp_id++)
|
||||
{
|
||||
MatShape shape; // empty
|
||||
inp_shapes.push_back(shape);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int li = 0; li < layersSize; li++)
|
||||
{
|
||||
const caffe::LayerParameter &layer = net.layer(li);
|
||||
String name = layer.name();
|
||||
String type = layer.type();
|
||||
LayerParams layerParams;
|
||||
|
||||
extractLayerParams(layer, layerParams);
|
||||
extractBinaryLayerParams(layer, layerParams);
|
||||
|
||||
int repetitions = layerCounter[name]++;
|
||||
if (repetitions)
|
||||
name += String("_") + toString(repetitions);
|
||||
|
||||
if (type == "Input")
|
||||
{
|
||||
for (int outNum = 0; outNum < layer.top_size(); outNum++)
|
||||
{
|
||||
addOutput(layer, 0, outNum);
|
||||
addedBlobs.back().outNum = netInputs.size();
|
||||
netInputs.push_back(addedBlobs.back().name);
|
||||
}
|
||||
if (layer.has_input_param())
|
||||
{
|
||||
const caffe::InputParameter &inputParameter = layer.input_param();
|
||||
int input_shape_size = inputParameter.shape_size();
|
||||
CV_CheckEQ(input_shape_size, layer.top_size(), "");
|
||||
for (int inp_id = 0; inp_id < input_shape_size; inp_id++)
|
||||
{
|
||||
MatShape shape = parseBlobShape(inputParameter.shape(inp_id));
|
||||
inp_shapes.push_back(shape);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else if (type == "BatchNorm")
|
||||
{
|
||||
if (!layerParams.get<bool>("use_global_stats", true))
|
||||
{
|
||||
CV_Assert_N(layer.bottom_size() == 1, layer.top_size() == 1);
|
||||
|
||||
LayerParams mvnParams;
|
||||
mvnParams.set("eps", layerParams.get<float>("eps", 1e-5));
|
||||
std::string mvnName = name + "/mvn";
|
||||
|
||||
int repetitions = layerCounter[mvnName]++;
|
||||
if (repetitions)
|
||||
mvnName += String("_") + toString(repetitions);
|
||||
|
||||
int mvnId = dstNet.addLayer(mvnName, "MVN", mvnParams);
|
||||
addInput(layer.bottom(0), mvnId, 0, dstNet);
|
||||
addOutput(layer, mvnId, 0);
|
||||
net.mutable_layer(li)->set_bottom(0, layer.top(0));
|
||||
layerParams.blobs[0].setTo(0); // mean
|
||||
layerParams.blobs[1].setTo(1); // std
|
||||
}
|
||||
}
|
||||
else if (type == "Axpy")
|
||||
{
|
||||
CV_Assert_N(layer.bottom_size() == 3, layer.top_size() == 1);
|
||||
|
||||
std::string scaleName = name + "/scale";
|
||||
int repetitions = layerCounter[scaleName]++;
|
||||
if (repetitions) {
|
||||
scaleName += String("_") + toString(repetitions);
|
||||
}
|
||||
|
||||
LayerParams scaleParams;
|
||||
scaleParams.set("axis", 1);
|
||||
scaleParams.set("has_bias", false);
|
||||
int scaleId = dstNet.addLayer(scaleName, "Scale", scaleParams);
|
||||
addInput(layer.bottom(2), scaleId, 0, dstNet);
|
||||
addInput(layer.bottom(0), scaleId, 1, dstNet);
|
||||
addOutput(layer, scaleId, 0);
|
||||
net.mutable_layer(li)->set_bottom(0, layer.top(0));
|
||||
net.mutable_layer(li)->mutable_bottom()->RemoveLast();
|
||||
type = "Eltwise";
|
||||
}
|
||||
else if (type == "Resample")
|
||||
{
|
||||
CV_Assert(layer.bottom_size() == 1 || layer.bottom_size() == 2);
|
||||
type = "Resize";
|
||||
String interp = toLowerCase(layerParams.get<String>("type"));
|
||||
layerParams.set("interpolation", interp == "linear" ? "bilinear" : interp);
|
||||
|
||||
if (layerParams.has("factor"))
|
||||
{
|
||||
float factor = layerParams.get<float>("factor");
|
||||
CV_Assert(layer.bottom_size() != 2 || factor == 1.0);
|
||||
layerParams.set("zoom_factor", factor);
|
||||
|
||||
if ((interp == "linear" && factor != 1.0) ||
|
||||
(interp == "nearest" && factor < 1.0))
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported Resample mode");
|
||||
}
|
||||
}
|
||||
else if ("Convolution" == type)
|
||||
{
|
||||
CV_Assert(layer.bottom_size() == layer.top_size());
|
||||
for (int i = 0; i < layer.bottom_size(); i++)
|
||||
{
|
||||
int conv_id = dstNet.addLayer(layer.top(i), type, layerParams);
|
||||
addInput(layer.bottom(i), conv_id, 0, dstNet);
|
||||
addedBlobs.push_back(BlobNote(layer.top(i), conv_id, 0));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else if ("ConvolutionDepthwise" == type)
|
||||
{
|
||||
type = "Convolution";
|
||||
}
|
||||
|
||||
int id = dstNet.addLayer(name, type, layerParams);
|
||||
|
||||
for (int inNum = 0; inNum < layer.bottom_size(); inNum++)
|
||||
addInput(layer.bottom(inNum), id, inNum, dstNet);
|
||||
|
||||
for (int outNum = 0; outNum < layer.top_size(); outNum++)
|
||||
addOutput(layer, id, outNum);
|
||||
}
|
||||
dstNet.setInputsNames(netInputs);
|
||||
|
||||
if (inp_shapes.size() > 0)
|
||||
{
|
||||
CV_CheckEQ(inp_shapes.size(), netInputs.size(), "");
|
||||
for (int inp_id = 0; inp_id < inp_shapes.size(); inp_id++)
|
||||
dstNet.setInputShape(netInputs[inp_id], inp_shapes[inp_id]);
|
||||
}
|
||||
|
||||
addedBlobs.clear();
|
||||
}
|
||||
|
||||
void addOutput(const caffe::LayerParameter &layer, int layerId, int outNum)
|
||||
{
|
||||
const std::string &name = layer.top(outNum);
|
||||
|
||||
bool haveDups = false;
|
||||
for (int idx = (int)addedBlobs.size() - 1; idx >= 0; idx--)
|
||||
{
|
||||
if (addedBlobs[idx].name == name)
|
||||
{
|
||||
haveDups = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (haveDups)
|
||||
{
|
||||
bool isInplace = layer.bottom_size() > outNum && layer.bottom(outNum) == name;
|
||||
if (!isInplace)
|
||||
CV_Error(Error::StsBadArg, "Duplicate blobs produced by multiple sources");
|
||||
}
|
||||
|
||||
addedBlobs.push_back(BlobNote(name, layerId, outNum));
|
||||
}
|
||||
|
||||
void addInput(const std::string &name, int layerId, int inNum, Net &dstNet)
|
||||
{
|
||||
int idx;
|
||||
for (idx = (int)addedBlobs.size() - 1; idx >= 0; idx--)
|
||||
{
|
||||
if (addedBlobs[idx].name == name)
|
||||
break;
|
||||
}
|
||||
|
||||
if (idx < 0)
|
||||
{
|
||||
CV_Error(Error::StsObjectNotFound, "Can't find output blob \"" + name + "\"");
|
||||
return;
|
||||
}
|
||||
|
||||
dstNet.connect(addedBlobs[idx].layerId, addedBlobs[idx].outNum, layerId, inNum);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
Net readNetFromCaffe(const String &prototxt, const String &caffeModel /*= String()*/)
|
||||
{
|
||||
CaffeImporter caffeImporter(prototxt.c_str(), caffeModel.c_str());
|
||||
Net net;
|
||||
caffeImporter.populateNet(net);
|
||||
return net;
|
||||
}
|
||||
|
||||
Net readNetFromCaffe(const char *bufferProto, size_t lenProto,
|
||||
const char *bufferModel, size_t lenModel)
|
||||
{
|
||||
CaffeImporter caffeImporter(bufferProto, lenProto, bufferModel, lenModel);
|
||||
Net net;
|
||||
caffeImporter.populateNet(net);
|
||||
return net;
|
||||
}
|
||||
|
||||
Net readNetFromCaffe(const std::vector<uchar>& bufferProto, const std::vector<uchar>& bufferModel)
|
||||
{
|
||||
const char* bufferProtoPtr = reinterpret_cast<const char*>(&bufferProto[0]);
|
||||
const char* bufferModelPtr = bufferModel.empty() ? NULL :
|
||||
reinterpret_cast<const char*>(&bufferModel[0]);
|
||||
return readNetFromCaffe(bufferProtoPtr, bufferProto.size(),
|
||||
bufferModelPtr, bufferModel.size());
|
||||
}
|
||||
|
||||
#endif //HAVE_PROTOBUF
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace
|
||||
1190
3rdparty/opencv-4.5.4/modules/dnn/src/caffe/caffe_io.cpp
vendored
Normal file
1190
3rdparty/opencv-4.5.4/modules/dnn/src/caffe/caffe_io.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
129
3rdparty/opencv-4.5.4/modules/dnn/src/caffe/caffe_io.hpp
vendored
Normal file
129
3rdparty/opencv-4.5.4/modules/dnn/src/caffe/caffe_io.hpp
vendored
Normal file
@@ -0,0 +1,129 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//COPYRIGHT
|
||||
//
|
||||
//All contributions by the University of California:
|
||||
//Copyright (c) 2014, The Regents of the University of California (Regents)
|
||||
//All rights reserved.
|
||||
//
|
||||
//All other contributions:
|
||||
//Copyright (c) 2014, the respective contributors
|
||||
//All rights reserved.
|
||||
//
|
||||
//Caffe uses a shared copyright model: each contributor holds copyright over
|
||||
//their contributions to Caffe. The project versioning records all such
|
||||
//contribution and copyright details. If a contributor wants to further mark
|
||||
//their specific copyright on a particular contribution, they should indicate
|
||||
//their copyright solely in the commit message of the change when it is
|
||||
//committed.
|
||||
//
|
||||
//LICENSE
|
||||
//
|
||||
//Redistribution and use in source and binary forms, with or without
|
||||
//modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
//1. Redistributions of source code must retain the above copyright notice, this
|
||||
// list of conditions and the following disclaimer.
|
||||
//2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
//THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
//ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
//WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
//DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
//ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
//(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
//LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
//ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
//(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
//SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
//CONTRIBUTION AGREEMENT
|
||||
//
|
||||
//By contributing to the BVLC/caffe repository through pull-request, comment,
|
||||
//or otherwise, the contributor releases their content to the
|
||||
//license and copyright terms herein.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_DNN_CAFFE_IO_HPP__
|
||||
#define __OPENCV_DNN_CAFFE_IO_HPP__
|
||||
#ifdef HAVE_PROTOBUF
|
||||
|
||||
#if defined(__GNUC__) && __GNUC__ >= 5
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wsuggest-override"
|
||||
#endif
|
||||
#include "opencv-caffe.pb.h"
|
||||
#if defined(__GNUC__) && __GNUC__ >= 5
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace caffe { using namespace opencv_caffe; } // avoid massive renames from caffe proto package
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
|
||||
// Read parameters from a file into a NetParameter proto message.
|
||||
void ReadNetParamsFromTextFileOrDie(const char* param_file,
|
||||
caffe::NetParameter* param);
|
||||
void ReadNetParamsFromBinaryFileOrDie(const char* param_file,
|
||||
caffe::NetParameter* param);
|
||||
|
||||
// Read parameters from a memory buffer into a NetParammeter proto message.
|
||||
void ReadNetParamsFromBinaryBufferOrDie(const char* data, size_t len,
|
||||
caffe::NetParameter* param);
|
||||
void ReadNetParamsFromTextBufferOrDie(const char* data, size_t len,
|
||||
caffe::NetParameter* param);
|
||||
|
||||
// Utility functions used internally by Caffe and TensorFlow loaders
|
||||
bool ReadProtoFromTextFile(const char* filename, ::google::protobuf::Message* proto);
|
||||
bool ReadProtoFromBinaryFile(const char* filename, ::google::protobuf::Message* proto);
|
||||
bool ReadProtoFromTextBuffer(const char* data, size_t len, ::google::protobuf::Message* proto);
|
||||
bool ReadProtoFromBinaryBuffer(const char* data, size_t len, ::google::protobuf::Message* proto);
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
80
3rdparty/opencv-4.5.4/modules/dnn/src/caffe/caffe_shrinker.cpp
vendored
Normal file
80
3rdparty/opencv-4.5.4/modules/dnn/src/caffe/caffe_shrinker.cpp
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
#include "../precomp.hpp"
|
||||
|
||||
#ifdef HAVE_PROTOBUF
|
||||
#include <fstream>
|
||||
#include "caffe_io.hpp"
|
||||
#endif
|
||||
|
||||
namespace cv { namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
#ifdef HAVE_PROTOBUF
|
||||
|
||||
void shrinkCaffeModel(const String& src, const String& dst, const std::vector<String>& layersTypes)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
std::vector<String> types(layersTypes);
|
||||
if (types.empty())
|
||||
{
|
||||
types.push_back("Convolution");
|
||||
types.push_back("InnerProduct");
|
||||
}
|
||||
|
||||
caffe::NetParameter net;
|
||||
ReadNetParamsFromBinaryFileOrDie(src.c_str(), &net);
|
||||
|
||||
for (int i = 0; i < net.layer_size(); ++i)
|
||||
{
|
||||
caffe::LayerParameter* lp = net.mutable_layer(i);
|
||||
if (std::find(types.begin(), types.end(), lp->type()) == types.end())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
for (int j = 0; j < lp->blobs_size(); ++j)
|
||||
{
|
||||
caffe::BlobProto* blob = lp->mutable_blobs(j);
|
||||
CV_Assert(blob->data_size() != 0); // float32 array.
|
||||
|
||||
Mat floats(1, blob->data_size(), CV_32FC1, (void*)blob->data().data());
|
||||
Mat halfs(1, blob->data_size(), CV_16SC1);
|
||||
convertFp16(floats, halfs); // Convert to float16.
|
||||
|
||||
blob->clear_data(); // Clear float32 data.
|
||||
|
||||
// Set float16 data.
|
||||
blob->set_raw_data(halfs.data, halfs.total() * halfs.elemSize());
|
||||
blob->set_raw_data_type(caffe::FLOAT16);
|
||||
}
|
||||
}
|
||||
#if GOOGLE_PROTOBUF_VERSION < 3005000
|
||||
size_t msgSize = saturate_cast<size_t>(net.ByteSize());
|
||||
#else
|
||||
size_t msgSize = net.ByteSizeLong();
|
||||
#endif
|
||||
std::vector<uint8_t> output(msgSize);
|
||||
net.SerializeWithCachedSizesToArray(&output[0]);
|
||||
|
||||
std::ofstream ofs(dst.c_str(), std::ios::binary);
|
||||
ofs.write((const char*)&output[0], msgSize);
|
||||
ofs.close();
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void shrinkCaffeModel(const String& src, const String& dst, const std::vector<String>& types)
|
||||
{
|
||||
CV_Error(cv::Error::StsNotImplemented, "libprotobuf required to import data from Caffe models");
|
||||
}
|
||||
|
||||
#endif // HAVE_PROTOBUF
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace
|
||||
106
3rdparty/opencv-4.5.4/modules/dnn/src/caffe/glog_emulator.hpp
vendored
Normal file
106
3rdparty/opencv-4.5.4/modules/dnn/src/caffe/glog_emulator.hpp
vendored
Normal file
@@ -0,0 +1,106 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_DNN_CAFFE_GLOG_EMULATOR_HPP__
|
||||
#define __OPENCV_DNN_CAFFE_GLOG_EMULATOR_HPP__
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
#define CHECK(cond) for(cv::dnn::GLogWrapper _logger(__FILE__, CV_Func, __LINE__, "CHECK", #cond, cond); _logger.exit(); _logger.check()) _logger.stream()
|
||||
#define CHECK_EQ(a, b) for(cv::dnn::GLogWrapper _logger(__FILE__, CV_Func, __LINE__, "CHECK", #a"="#b, ((a) == (b))); _logger.exit(); _logger.check()) _logger.stream()
|
||||
#define LOG(TYPE) for(cv::dnn::GLogWrapper _logger(__FILE__, CV_Func, __LINE__, #TYPE); _logger.exit(); _logger.check()) _logger.stream()
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
class GLogWrapper
|
||||
{
|
||||
const char *file, *func, *type, *cond_str;
|
||||
int line;
|
||||
bool cond_status, exit_loop;
|
||||
std::stringstream sstream;
|
||||
|
||||
public:
|
||||
|
||||
GLogWrapper(const char *_file, const char *_func, int _line,
|
||||
const char *_type,
|
||||
const char *_cond_str = NULL, bool _cond_status = true
|
||||
) :
|
||||
file(_file), func(_func), type(_type), cond_str(_cond_str),
|
||||
line(_line), cond_status(_cond_status), exit_loop(true) {}
|
||||
|
||||
std::iostream &stream()
|
||||
{
|
||||
return sstream;
|
||||
}
|
||||
|
||||
bool exit()
|
||||
{
|
||||
return exit_loop;
|
||||
}
|
||||
|
||||
void check()
|
||||
{
|
||||
exit_loop = false;
|
||||
|
||||
if (cond_str && !cond_status)
|
||||
{
|
||||
cv::error(cv::Error::StsError, "FAILED: " + String(cond_str) + ". " + sstream.str(), func, file, line);
|
||||
}
|
||||
else if (!cond_str && strcmp(type, "CHECK"))
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
if (!std::strcmp(type, "INFO"))
|
||||
std::cout << sstream.str() << std::endl;
|
||||
else
|
||||
std::cerr << sstream.str() << std::endl;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
1649
3rdparty/opencv-4.5.4/modules/dnn/src/caffe/opencv-caffe.proto
vendored
Normal file
1649
3rdparty/opencv-4.5.4/modules/dnn/src/caffe/opencv-caffe.proto
vendored
Normal file
File diff suppressed because it is too large
Load Diff
121
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/activation_eltwise.cu
vendored
Normal file
121
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/activation_eltwise.cu
vendored
Normal file
@@ -0,0 +1,121 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "functors.hpp"
|
||||
#include "vector_traits.hpp"
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
|
||||
template <class T, class ActivationOp, class EltwiseOp, std::size_t N>
|
||||
__global__ void generic_op_eltwise_op_inplace_vec(Span<T> inplace_output, View<T> eltwise, const typename ActivationOp::Params act_params, const typename EltwiseOp::Params eltwise_params) {
|
||||
using vector_type = get_vector_type_t<T, N>;
|
||||
|
||||
auto inplace_output_vPtr = vector_type::get_pointer(inplace_output.data());
|
||||
auto eltwise_vPtr = vector_type::get_pointer(eltwise.data());
|
||||
|
||||
ActivationOp activation_op(act_params);
|
||||
EltwiseOp eltwise_op(eltwise_params);
|
||||
|
||||
for (auto i : grid_stride_range(inplace_output.size() / vector_type::size())) {
|
||||
vector_type output_vec, eltwise_vec;
|
||||
v_load(output_vec, inplace_output_vPtr[i]);
|
||||
v_load(eltwise_vec, eltwise_vPtr[i]);
|
||||
for(int j = 0; j < output_vec.size(); j++)
|
||||
output_vec.data[j] = eltwise_op(activation_op(output_vec.data[j]), eltwise_vec.data[j]);
|
||||
v_store(inplace_output_vPtr[i], output_vec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, class ActivationOp, class EltwiseOp, std::size_t N> static
|
||||
void launch_vectorized_generic_op_eltwise_op_inplace(const Stream& stream, Span<T> inplace_output, View<T> eltwise, const typename ActivationOp::Params& act_params, const typename EltwiseOp::Params& eltwise_params) {
|
||||
CV_Assert(is_fully_aligned<T>(inplace_output, N));
|
||||
CV_Assert(is_fully_aligned<T>(eltwise, N));
|
||||
|
||||
auto kernel = raw::generic_op_eltwise_op_inplace_vec<T, ActivationOp, EltwiseOp, N>;
|
||||
auto policy = make_policy(kernel, inplace_output.size() / N, 0, stream);
|
||||
launch_kernel(kernel, policy, inplace_output, eltwise, act_params, eltwise_params);
|
||||
}
|
||||
|
||||
template <class T, class ActivationOp, class EltwiseOp> static
|
||||
void generic_op_eltwise_op_inplace(const Stream& stream, Span<T> inplace_output, View<T> eltwise, const typename ActivationOp::Params& act_params = {}, const typename EltwiseOp::Params& eltwise_params = {}) {
|
||||
CV_Assert(inplace_output.size() == eltwise.size());
|
||||
|
||||
if (is_fully_aligned<T>(inplace_output, 4) && is_fully_aligned<T>(eltwise, 4)) {
|
||||
launch_vectorized_generic_op_eltwise_op_inplace<T, ActivationOp, EltwiseOp, 4>(stream, inplace_output, eltwise, act_params, eltwise_params);
|
||||
} else if (is_fully_aligned<T>(inplace_output, 2) && is_fully_aligned<T>(eltwise, 2)) {
|
||||
launch_vectorized_generic_op_eltwise_op_inplace<T, ActivationOp, EltwiseOp, 2>(stream, inplace_output, eltwise, act_params, eltwise_params);
|
||||
} else {
|
||||
launch_vectorized_generic_op_eltwise_op_inplace<T, ActivationOp, EltwiseOp, 1>(stream, inplace_output, eltwise, act_params, eltwise_params);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void relu_eltwise_sum_2_inplace(const Stream& stream, Span<T> inplace_output, View<T> eltwise, T slope) {
|
||||
generic_op_eltwise_op_inplace<T, ReLUFunctor<T>, SumFunctor<T>>(stream, inplace_output, eltwise, {slope});
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void clipped_relu_eltwise_sum_2_inplace(const Stream& stream, Span<T> inplace_output, View<T> eltwise, T floor, T ceiling) {
|
||||
CV_Assert(static_cast<double>(floor) <= static_cast<double>(ceiling));
|
||||
generic_op_eltwise_op_inplace<T, ClippedReLUFunctor<T>, SumFunctor<T>>(stream, inplace_output, eltwise, {floor, ceiling});
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void tanh_eltwise_sum_2_inplace(const Stream& stream, Span<T> inplace_output, View<T> eltwise) {
|
||||
generic_op_eltwise_op_inplace<T, TanHFunctor<T>, SumFunctor<T>>(stream, inplace_output, eltwise);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void swish_eltwise_sum_2_inplace(const Stream& stream, Span<T> inplace_output, View<T> eltwise) {
|
||||
generic_op_eltwise_op_inplace<T, SwishFunctor<T>, SumFunctor<T>>(stream, inplace_output, eltwise);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void mish_eltwise_sum_2_inplace(const Stream& stream, Span<T> inplace_output, View<T> eltwise) {
|
||||
generic_op_eltwise_op_inplace<T, MishFunctor<T>, SumFunctor<T>>(stream, inplace_output, eltwise);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void sigmoid_eltwise_sum_2_inplace(const Stream& stream, Span<T> inplace_output, View<T> eltwise) {
|
||||
generic_op_eltwise_op_inplace<T, SigmoidFunctor<T>, SumFunctor<T>>(stream, inplace_output, eltwise);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void power_eltwise_sum_2_inplace(const Stream& stream, Span<T> inplace_output, View<T> eltwise, T exp, T scale, T shift) {
|
||||
generic_op_eltwise_op_inplace<T, PowerFunctor<T>, SumFunctor<T>>(stream, inplace_output, eltwise, {exp, scale, shift});
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void relu_eltwise_sum_2_inplace<__half>(const Stream&, Span<__half>, View<__half>, __half);
|
||||
template void clipped_relu_eltwise_sum_2_inplace<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
|
||||
template void tanh_eltwise_sum_2_inplace<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void swish_eltwise_sum_2_inplace<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void mish_eltwise_sum_2_inplace<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void sigmoid_eltwise_sum_2_inplace<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void power_eltwise_sum_2_inplace<__half>(const Stream&, Span<__half>, View<__half>, __half, __half, __half);
|
||||
#endif
|
||||
|
||||
template void relu_eltwise_sum_2_inplace<float>(const Stream&, Span<float>, View<float>, float);
|
||||
template void clipped_relu_eltwise_sum_2_inplace<float>(const Stream&, Span<float>, View<float>, float, float);
|
||||
template void tanh_eltwise_sum_2_inplace<float>(const Stream&, Span<float>, View<float>);
|
||||
template void swish_eltwise_sum_2_inplace<float>(const Stream&, Span<float>, View<float>);
|
||||
template void mish_eltwise_sum_2_inplace<float>(const Stream&, Span<float>, View<float>);
|
||||
template void sigmoid_eltwise_sum_2_inplace<float>(const Stream&, Span<float>, View<float>);
|
||||
template void power_eltwise_sum_2_inplace<float>(const Stream&, Span<float>, View<float>, float, float, float);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
209
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/activations.cu
vendored
Normal file
209
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/activations.cu
vendored
Normal file
@@ -0,0 +1,209 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "functors.hpp"
|
||||
#include "types.hpp"
|
||||
#include "vector_traits.hpp"
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
|
||||
#include "../cuda4dnn/kernels/scale_shift.hpp"
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
template <class T, class ActivationOp, std::size_t N>
|
||||
__global__ void generic_op_vec(Span<T> output, View<T> input, const typename ActivationOp::Params params) {
|
||||
using vector_type = get_vector_type_t<T, N>;
|
||||
|
||||
auto output_vPtr = vector_type::get_pointer(output.data());
|
||||
auto input_vPtr = vector_type::get_pointer(input.data());
|
||||
|
||||
ActivationOp activation_op(params);
|
||||
|
||||
for (auto i : grid_stride_range(output.size() / vector_type::size())) {
|
||||
vector_type vec;
|
||||
v_load(vec, input_vPtr[i]);
|
||||
for (int j = 0; j < vector_type::size(); j++)
|
||||
vec.data[j] = activation_op(vec.data[j]);
|
||||
v_store(output_vPtr[i], vec);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::size_t N>
|
||||
__global__ void axiswise_relu_vec(Span<T> output, View<T> input, size_type inner_size, View<T> slope) {
|
||||
using vector_type = get_vector_type_t<T, N>;
|
||||
|
||||
auto output_vPtr = vector_type::get_pointer(output.data());
|
||||
auto input_vPtr = vector_type::get_pointer(input.data());
|
||||
|
||||
for (auto i : grid_stride_range(output.size() / vector_type::size())) {
|
||||
const index_type c = (i / inner_size) % slope.size();
|
||||
|
||||
vector_type vec;
|
||||
v_load(vec, input_vPtr[i]);
|
||||
for (int j = 0; j < vector_type::size(); j++)
|
||||
vec.data[j] = vec.data[j] > T(0) ? vec.data[j] : vec.data[j] * slope[c];
|
||||
v_store(output_vPtr[i], vec);
|
||||
}
|
||||
}
|
||||
|
||||
} /* namespace raw */
|
||||
|
||||
template <class T, class ActivationOp, std::size_t N> static
|
||||
void launch_vectorized_generic_op(const Stream& stream, Span<T> output, View<T> input, const typename ActivationOp::Params& params) {
|
||||
CV_Assert(is_fully_aligned<T>(output, N));
|
||||
CV_Assert(is_fully_aligned<T>(input, N));
|
||||
|
||||
auto kernel = raw::generic_op_vec<T, ActivationOp, N>;
|
||||
auto policy = make_policy(kernel, output.size() / N, 0, stream);
|
||||
launch_kernel(kernel, policy, output, input, params);
|
||||
}
|
||||
|
||||
template <class T, class ActivationOp> static
|
||||
void generic_op(const Stream& stream, Span<T> output, View<T> input, const typename ActivationOp::Params& params = {}) {
|
||||
CV_Assert(input.size() == output.size());
|
||||
|
||||
if (is_fully_aligned<T>(output, 4) && is_fully_aligned<T>(input, 4)) {
|
||||
launch_vectorized_generic_op<T, ActivationOp, 4>(stream, output, input, params);
|
||||
} else if (is_fully_aligned<T>(output, 2) && is_fully_aligned<T>(input, 2)) {
|
||||
launch_vectorized_generic_op<T, ActivationOp, 2>(stream, output, input, params);
|
||||
} else {
|
||||
launch_vectorized_generic_op<T, ActivationOp, 1>(stream, output, input, params);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void relu(const Stream& stream, Span<T> output, View<T> input, T slope) {
|
||||
generic_op<T, ReLUFunctor<T>>(stream, output, input, {slope});
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void clipped_relu(const Stream& stream, Span<T> output, View<T> input, T floor, T ceiling) {
|
||||
CV_Assert(static_cast<double>(floor) <= static_cast<double>(ceiling));
|
||||
generic_op<T, ClippedReLUFunctor<T>>(stream, output, input, {floor, ceiling});
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void tanh(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, TanHFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void swish(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, SwishFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void mish(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, MishFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void sigmoid(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, SigmoidFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void elu(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, ELUFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void bnll(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, BNLLFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void abs(const Stream& stream, Span<T> output, View<T> input) {
|
||||
generic_op<T, AbsFunctor<T>>(stream, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void power(const Stream& stream, Span<T> output, View<T> input, T exp, T scale, T shift) {
|
||||
CV_Assert(input.size() == output.size());
|
||||
|
||||
if (static_cast<float>(exp) == 1.0f) {
|
||||
scale1_with_bias1(stream, output, input, scale, shift);
|
||||
return;
|
||||
}
|
||||
|
||||
generic_op<T, PowerFunctor<T>>(stream, output, input, {exp, scale, shift});
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void exp(const Stream& stream, Span<T> output, View<T> input, T normScale, T normShift) {
|
||||
generic_op<T, ExpFunctor<T>>(stream, output, input, {normScale, normShift});
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void relu<__half>(const Stream&, Span<__half>, View<__half>, __half);
|
||||
template void clipped_relu<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
|
||||
template void tanh<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void swish<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void mish<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void sigmoid<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void elu<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void abs<__half>(const Stream& stream, Span<__half> output, View<__half> input);
|
||||
template void bnll<__half>(const Stream&, Span<__half>, View<__half>);
|
||||
template void power<__half>(const Stream&, Span<__half>, View<__half>, __half, __half, __half);
|
||||
template void exp<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
|
||||
#endif
|
||||
|
||||
|
||||
template void relu<float>(const Stream&, Span<float>, View<float>, float);
|
||||
template void clipped_relu<float>(const Stream&, Span<float>, View<float>, float, float);
|
||||
template void tanh<float>(const Stream&, Span<float>, View<float>);
|
||||
template void swish<float>(const Stream&, Span<float>, View<float>);
|
||||
template void mish<float>(const Stream&, Span<float>, View<float>);
|
||||
template void sigmoid<float>(const Stream&, Span<float>, View<float>);
|
||||
template void elu<float>(const Stream&, Span<float>, View<float>);
|
||||
template void abs<float>(const Stream& stream, Span<float> output, View<float> input);
|
||||
template void bnll<float>(const Stream&, Span<float>, View<float>);
|
||||
template void power<float>(const Stream&, Span<float>, View<float>, float, float, float);
|
||||
template void exp<float>(const Stream&, Span<float>, View<float>, float, float);
|
||||
|
||||
template <class T, std::size_t N> static
|
||||
void launch_vectorized_axiswise_relu(const Stream& stream, Span<T> output, View<T> input, std::size_t inner_size, View<T> slope) {
|
||||
CV_Assert(is_fully_aligned<T>(output, N));
|
||||
CV_Assert(is_fully_aligned<T>(input, N));
|
||||
CV_Assert(inner_size % N == 0);
|
||||
|
||||
auto kernel = raw::axiswise_relu_vec<T, N>;
|
||||
auto policy = make_policy(kernel, output.size() / N, 0, stream);
|
||||
launch_kernel(kernel, policy, output, input, inner_size / N, slope);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void axiswise_relu(const Stream& stream, Span<T> output, View<T> input, std::size_t inner_size, View<T> slope) {
|
||||
CV_Assert(input.size() == output.size());
|
||||
|
||||
if (is_fully_aligned<T>(output, 4) && is_fully_aligned<T>(input, 4) && inner_size % 4 == 0) {
|
||||
launch_vectorized_axiswise_relu<T, 4>(stream, output, input, inner_size, slope);
|
||||
} else if (is_fully_aligned<T>(output, 2) && is_fully_aligned<T>(input, 2) && inner_size % 2 == 0) {
|
||||
launch_vectorized_axiswise_relu<T, 2>(stream, output, input, inner_size, slope);
|
||||
} else {
|
||||
launch_vectorized_axiswise_relu<T, 1>(stream, output, input, inner_size, slope);
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void axiswise_relu<__half>(const Stream&, Span<__half>, View<__half>, std::size_t, View<__half>);
|
||||
#endif
|
||||
template void axiswise_relu<float>(const Stream&, Span<float>, View<float>, std::size_t, View<float>);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
73
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/array.hpp
vendored
Normal file
73
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/array.hpp
vendored
Normal file
@@ -0,0 +1,73 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_DNN_SRC_CUDA_ARRAY_HPP
|
||||
#define OPENCV_DNN_SRC_CUDA_ARRAY_HPP
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include "types.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <type_traits>
|
||||
#include <iterator>
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace device {
|
||||
|
||||
template <class T, std::size_t N>
|
||||
struct array {
|
||||
using value_type = T;
|
||||
using size_type = device::size_type;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using reference = typename std::add_lvalue_reference<value_type>::type;
|
||||
using const_reference = typename std::add_lvalue_reference<typename std::add_const<value_type>::type>::type;
|
||||
using pointer = typename std::add_pointer<value_type>::type;
|
||||
using const_pointer = typename std::add_pointer<typename std::add_const<value_type>::type>::type;
|
||||
using iterator = pointer;
|
||||
using const_iterator = const_pointer;
|
||||
using reverse_iterator = std::reverse_iterator<iterator>;
|
||||
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
|
||||
|
||||
__host__ __device__ bool empty() const noexcept { return N == 0; }
|
||||
__host__ __device__ size_type size() const noexcept { return N; }
|
||||
|
||||
__host__ __device__ iterator begin() noexcept { return ptr; }
|
||||
__host__ __device__ iterator end() noexcept { return ptr + N; }
|
||||
__host__ __device__ const_iterator begin() const noexcept { return ptr; }
|
||||
__host__ __device__ const_iterator end() const noexcept { return ptr + N; }
|
||||
|
||||
__host__ __device__ const_iterator cbegin() const noexcept { return ptr; }
|
||||
__host__ __device__ const_iterator cend() const noexcept { return ptr + N; }
|
||||
|
||||
__host__ __device__ reverse_iterator rbegin() noexcept { return ptr + N; }
|
||||
__host__ __device__ reverse_iterator rend() noexcept { return ptr; }
|
||||
__host__ __device__ const_reverse_iterator rbegin() const noexcept { return ptr + N; }
|
||||
__host__ __device__ const_reverse_iterator rend() const noexcept { return ptr; }
|
||||
|
||||
__host__ __device__ const_reverse_iterator crbegin() const noexcept { return ptr + N; }
|
||||
__host__ __device__ const_reverse_iterator crend() const noexcept { return ptr; }
|
||||
|
||||
template <class InputItr>
|
||||
__host__ void assign(InputItr first, InputItr last) {
|
||||
std::copy(first, last, std::begin(ptr));
|
||||
}
|
||||
|
||||
__host__ __device__ reference operator[](int idx) { return ptr[idx]; }
|
||||
__host__ __device__ const_reference operator[](int idx) const { return ptr[idx]; }
|
||||
|
||||
__host__ __device__ reference front() { return ptr[0]; }
|
||||
__host__ __device__ const_reference front() const { return ptr[0]; }
|
||||
|
||||
__host__ __device__ reference back() { return ptr[N - 1]; }
|
||||
__host__ __device__ const_reference back() const { return ptr[N - 1]; }
|
||||
|
||||
__host__ __device__ pointer data() noexcept { return ptr; }
|
||||
__host__ __device__ const_pointer data() const noexcept { return ptr; }
|
||||
|
||||
T ptr[N];
|
||||
};
|
||||
|
||||
}}}}} /* namespace cv::dnn::cuda4dnn::csl::device */
|
||||
|
||||
#endif /* OPENCV_DNN_SRC_CUDA_ARRAY_HPP */
|
||||
38
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/atomics.hpp
vendored
Normal file
38
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/atomics.hpp
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_DNN_SRC_CUDA_ATOMICS_HPP
|
||||
#define OPENCV_DNN_SRC_CUDA_ATOMICS_HPP
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
// The 16-bit __half floating-point version of atomicAdd() is only supported by devices of compute capability 7.x and higher.
|
||||
// This function was introduced in CUDA 10.
|
||||
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomicadd
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 700 && CUDART_VERSION >= 10000)
|
||||
// And half-precision floating-point operations are not supported by devices of compute capability strictly lower than 5.3
|
||||
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications
|
||||
#elif __CUDA_ARCH__ < 530
|
||||
#else
|
||||
inline __device__ void atomicAdd(__half* address, __half val) {
|
||||
unsigned int* address_as_ui = (unsigned int *)((char *)address - ((size_t)address & 2));
|
||||
unsigned int old = *address_as_ui;
|
||||
unsigned int assumed;
|
||||
|
||||
do {
|
||||
assumed = old;
|
||||
|
||||
__half_raw hsum;
|
||||
hsum.x = (size_t)address & 2 ? (old >> 16) : (old & 0xffff);
|
||||
__half tmpres = hsum + val;
|
||||
hsum = __half_raw(tmpres);
|
||||
|
||||
old = (size_t)address & 2 ? (old & 0xffff) | (hsum.x << 16) : (old & 0xffff0000) | hsum.x;
|
||||
old = atomicCAS(address_as_ui, assumed, old);
|
||||
} while (assumed != old);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* OPENCV_DNN_SRC_CUDA_ATOMICS_HPP */
|
||||
39
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/bbox_utils.hpp
vendored
Normal file
39
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/bbox_utils.hpp
vendored
Normal file
@@ -0,0 +1,39 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_DNN_SRC_CUDA_BBOX_UTILS_HPP
|
||||
#define OPENCV_DNN_SRC_CUDA_BBOX_UTILS_HPP
|
||||
|
||||
#include "math.hpp"
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
struct BoundingBox
|
||||
{
|
||||
float xmin, ymin, xmax, ymax;
|
||||
};
|
||||
|
||||
template <bool NORMALIZED_BBOX>
|
||||
__device__ __forceinline__ float compute_bbox_size(BoundingBox bbox)
|
||||
{
|
||||
float width = bbox.xmax - bbox.xmin;
|
||||
float height = bbox.ymax - bbox.ymin;
|
||||
if (width < 0 || height < 0)
|
||||
return 0.0;
|
||||
|
||||
if (!NORMALIZED_BBOX)
|
||||
{
|
||||
width += 1;
|
||||
height += 1;
|
||||
}
|
||||
|
||||
using csl::device::mul_ftz;
|
||||
return mul_ftz(width, height);
|
||||
}
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
|
||||
#endif /* OPENCV_DNN_SRC_CUDA_BBOX_UTILS_HPP */
|
||||
120
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/bias_activation.cu
vendored
Normal file
120
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/bias_activation.cu
vendored
Normal file
@@ -0,0 +1,120 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "functors.hpp"
|
||||
#include "types.hpp"
|
||||
#include "vector_traits.hpp"
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
template <class T, class ActivationOp, std::size_t N>
|
||||
__global__ void biasN_generic_op_inplace_vec(Span<T> inplace_output, size_type inner_size, View<T> bias, const typename ActivationOp::Params params) {
|
||||
using vector_type = get_vector_type_t<T, N>;
|
||||
|
||||
auto inplace_output_vPtr = vector_type::get_pointer(inplace_output.data());
|
||||
|
||||
ActivationOp activation_op(params);
|
||||
|
||||
for (auto i : grid_stride_range(inplace_output.size() / vector_type::size())) {
|
||||
const index_type bias_idx = (i / inner_size) % bias.size();
|
||||
|
||||
vector_type vec;
|
||||
v_load(vec, inplace_output_vPtr[i]);
|
||||
for(int j = 0; j < vec.size(); j++)
|
||||
vec.data[j] = activation_op(vec.data[j] + bias[bias_idx]);
|
||||
v_store(inplace_output_vPtr[i], vec);
|
||||
}
|
||||
}
|
||||
|
||||
} /* namespace raw */
|
||||
|
||||
template <class T, class ActivationOp, std::size_t N> static
|
||||
void launch_vectorized_biasN_generic_op_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, const typename ActivationOp::Params& params) {
|
||||
CV_Assert(inplace_output.size() % inner_size == 0);
|
||||
CV_Assert(is_fully_aligned<T>(inplace_output, N));
|
||||
CV_Assert(inner_size % N == 0);
|
||||
|
||||
auto kernel = raw::biasN_generic_op_inplace_vec<T, ActivationOp, N>;
|
||||
auto policy = make_policy(kernel, inplace_output.size() / N, 0, stream);
|
||||
launch_kernel(kernel, policy, inplace_output, inner_size / N, bias, params);
|
||||
}
|
||||
|
||||
template <class T, class ActivationOp> static
|
||||
void biasN_generic_op_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, const typename ActivationOp::Params& params = {}) {
|
||||
if (is_fully_aligned<T>(inplace_output, 4) && inner_size % 4 == 0) {
|
||||
launch_vectorized_biasN_generic_op_inplace<T, ActivationOp, 4>(stream, inplace_output, inner_size, bias, params);
|
||||
} else if (is_fully_aligned<T>(inplace_output, 2) && inner_size % 2 == 0) {
|
||||
launch_vectorized_biasN_generic_op_inplace<T, ActivationOp, 2>(stream, inplace_output, inner_size, bias, params);
|
||||
} else {
|
||||
launch_vectorized_biasN_generic_op_inplace<T, ActivationOp, 1>(stream, inplace_output, inner_size, bias, params);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN_relu_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, T slope) {
|
||||
biasN_generic_op_inplace<T, ReLUFunctor<T>>(stream, inplace_output, inner_size, bias, {slope});
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN_clipped_relu_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, T floor, T ceil) {
|
||||
CV_Assert(static_cast<double>(floor) <= static_cast<double>(ceil));
|
||||
biasN_generic_op_inplace<T, ClippedReLUFunctor<T>>(stream, inplace_output, inner_size, bias, {floor, ceil});
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN_tanh_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias) {
|
||||
biasN_generic_op_inplace<T, TanHFunctor<T>>(stream, inplace_output, inner_size, bias);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN_swish_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias) {
|
||||
biasN_generic_op_inplace<T, SwishFunctor<T>>(stream, inplace_output, inner_size, bias);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN_mish_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias) {
|
||||
biasN_generic_op_inplace<T, MishFunctor<T>>(stream, inplace_output, inner_size, bias);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN_sigmoid_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias) {
|
||||
biasN_generic_op_inplace<T, SigmoidFunctor<T>>(stream, inplace_output, inner_size, bias);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN_power_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, T power, T scale, T shift) {
|
||||
biasN_generic_op_inplace<T, PowerFunctor<T>>(stream, inplace_output, inner_size, bias, {power, scale, shift});
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void biasN_relu_inplace<__half>(const Stream&, Span<__half>, std::size_t, View<__half>, __half);
|
||||
template void biasN_clipped_relu_inplace<__half>(const Stream&, Span<__half>, std::size_t, View<__half>, __half, __half);
|
||||
template void biasN_tanh_inplace<__half>(const Stream&, Span<__half>, std::size_t, View<__half>);
|
||||
template void biasN_swish_inplace<__half>(const Stream&, Span<__half>, std::size_t, View<__half>);
|
||||
template void biasN_mish_inplace<__half>(const Stream&, Span<__half>, std::size_t, View<__half>);
|
||||
template void biasN_sigmoid_inplace<__half>(const Stream&, Span<__half>, std::size_t, View<__half>);
|
||||
template void biasN_power_inplace<__half>(const Stream&, Span<__half>, std::size_t, View<__half>, __half, __half, __half);
|
||||
#endif
|
||||
|
||||
template void biasN_relu_inplace<float>(const Stream&, Span<float>, std::size_t, View<float>, float);
|
||||
template void biasN_clipped_relu_inplace<float>(const Stream&, Span<float>, std::size_t, View<float>, float, float);
|
||||
template void biasN_tanh_inplace<float>(const Stream&, Span<float>, std::size_t, View<float>);
|
||||
template void biasN_swish_inplace<float>(const Stream&, Span<float>, std::size_t, View<float>);
|
||||
template void biasN_mish_inplace<float>(const Stream&, Span<float>, std::size_t, View<float>);
|
||||
template void biasN_sigmoid_inplace<float>(const Stream&, Span<float>, std::size_t, View<float>);
|
||||
template void biasN_power_inplace<float>(const Stream&, Span<float>, std::size_t, View<float>, float, float, float);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
125
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/bias_activation_eltwise.cu
vendored
Normal file
125
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/bias_activation_eltwise.cu
vendored
Normal file
@@ -0,0 +1,125 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "functors.hpp"
|
||||
#include "types.hpp"
|
||||
#include "vector_traits.hpp"
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
|
||||
template <class T, class ActivationOp, class EltwiseOp, std::size_t N>
|
||||
__global__ void biasN_generic_op_eltwise_op_inplace_vec(Span<T> inplace_output, size_type inner_size, View<T> bias, View<T> eltwise, const typename ActivationOp::Params act_params, const typename EltwiseOp::Params eltwise_params) {
|
||||
using vector_type = get_vector_type_t<T, N>;
|
||||
|
||||
auto inplace_output_vPtr = vector_type::get_pointer(inplace_output.data());
|
||||
auto eltwise_vPtr = vector_type::get_pointer(eltwise.data());
|
||||
|
||||
ActivationOp activation_op(act_params);
|
||||
EltwiseOp eltwise_op(eltwise_params);
|
||||
|
||||
for (auto i : grid_stride_range(inplace_output.size() / vector_type::size())) {
|
||||
const index_type bias_idx = (i / inner_size) % bias.size();
|
||||
|
||||
vector_type output_vec, eltwise_vec;
|
||||
v_load(output_vec, inplace_output_vPtr[i]);
|
||||
v_load(eltwise_vec, eltwise_vPtr[i]);
|
||||
for(int j = 0; j < output_vec.size(); j++)
|
||||
output_vec.data[j] = eltwise_op(activation_op(output_vec.data[j] + bias[bias_idx]), eltwise_vec.data[j]);
|
||||
v_store(inplace_output_vPtr[i], output_vec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, class ActivationOp, class EltwiseOp, std::size_t N> static
|
||||
void launch_vectorized_biasN_generic_op_eltwise_op_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, View<T> eltwise, const typename ActivationOp::Params& act_params, const typename EltwiseOp::Params& eltwise_params) {
|
||||
CV_Assert(is_fully_aligned<T>(inplace_output, N));
|
||||
CV_Assert(is_fully_aligned<T>(eltwise, N));
|
||||
CV_Assert(inner_size % N == 0);
|
||||
|
||||
auto kernel = raw::biasN_generic_op_eltwise_op_inplace_vec<T, ActivationOp, EltwiseOp, N>;
|
||||
auto policy = make_policy(kernel, inplace_output.size() / N, 0, stream);
|
||||
launch_kernel(kernel, policy, inplace_output, inner_size / N, bias, eltwise, act_params, eltwise_params);
|
||||
}
|
||||
|
||||
template <class T, class ActivationOp, class EltwiseOp> static
|
||||
void biasN_generic_op_eltwise_op_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, View<T> eltwise, const typename ActivationOp::Params& act_params = {}, const typename EltwiseOp::Params& eltwise_params = {}) {
|
||||
CV_Assert(inplace_output.size() == eltwise.size());
|
||||
|
||||
if (is_fully_aligned<T>(inplace_output, 4) && is_fully_aligned<T>(eltwise, 4) && inner_size % 4 == 0) {
|
||||
launch_vectorized_biasN_generic_op_eltwise_op_inplace<T, ActivationOp, EltwiseOp, 4>(stream, inplace_output, inner_size, bias, eltwise, act_params, eltwise_params);
|
||||
} else if (is_fully_aligned<T>(inplace_output, 2) && is_fully_aligned<T>(eltwise, 2) && inner_size % 2 == 0) {
|
||||
launch_vectorized_biasN_generic_op_eltwise_op_inplace<T, ActivationOp, EltwiseOp, 2>(stream, inplace_output, inner_size, bias, eltwise, act_params, eltwise_params);
|
||||
} else {
|
||||
launch_vectorized_biasN_generic_op_eltwise_op_inplace<T, ActivationOp, EltwiseOp, 1>(stream, inplace_output, inner_size, bias, eltwise, act_params, eltwise_params);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN_relu_eltwise_sum_2_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, View<T> eltwise, T slope) {
|
||||
biasN_generic_op_eltwise_op_inplace<T, ReLUFunctor<T>, SumFunctor<T>>(stream, inplace_output, inner_size, bias, eltwise, {slope});
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN_clipped_relu_eltwise_sum_2_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, View<T> eltwise, T floor, T ceiling) {
|
||||
CV_Assert(static_cast<double>(floor) <= static_cast<double>(ceiling));
|
||||
biasN_generic_op_eltwise_op_inplace<T, ClippedReLUFunctor<T>, SumFunctor<T>>(stream, inplace_output, inner_size, bias, eltwise, {floor, ceiling});
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN_tanh_eltwise_sum_2_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, View<T> eltwise) {
|
||||
biasN_generic_op_eltwise_op_inplace<T, TanHFunctor<T>, SumFunctor<T>>(stream, inplace_output, inner_size, bias, eltwise);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN_swish_eltwise_sum_2_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, View<T> eltwise) {
|
||||
biasN_generic_op_eltwise_op_inplace<T, SwishFunctor<T>, SumFunctor<T>>(stream, inplace_output, inner_size, bias, eltwise);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN_mish_eltwise_sum_2_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, View<T> eltwise) {
|
||||
biasN_generic_op_eltwise_op_inplace<T, MishFunctor<T>, SumFunctor<T>>(stream, inplace_output, inner_size, bias, eltwise);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN_sigmoid_eltwise_sum_2_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, View<T> eltwise) {
|
||||
biasN_generic_op_eltwise_op_inplace<T, SigmoidFunctor<T>, SumFunctor<T>>(stream, inplace_output, inner_size, bias, eltwise);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN_power_eltwise_sum_2_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, View<T> eltwise, T exp, T scale, T shift) {
|
||||
biasN_generic_op_eltwise_op_inplace<T, PowerFunctor<T>, SumFunctor<T>>(stream, inplace_output, inner_size, bias, eltwise, {exp, scale, shift});
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void biasN_relu_eltwise_sum_2_inplace<__half>(const Stream&, Span<__half>, std::size_t, View<__half>, View<__half>, __half);
|
||||
template void biasN_clipped_relu_eltwise_sum_2_inplace<__half>(const Stream&, Span<__half>, std::size_t, View<__half>, View<__half>, __half, __half);
|
||||
template void biasN_tanh_eltwise_sum_2_inplace<__half>(const Stream&, Span<__half>, std::size_t, View<__half>, View<__half>);
|
||||
template void biasN_swish_eltwise_sum_2_inplace<__half>(const Stream&, Span<__half>, std::size_t, View<__half>, View<__half>);
|
||||
template void biasN_mish_eltwise_sum_2_inplace<__half>(const Stream&, Span<__half>, std::size_t, View<__half>, View<__half>);
|
||||
template void biasN_sigmoid_eltwise_sum_2_inplace<__half>(const Stream&, Span<__half>, std::size_t, View<__half>, View<__half>);
|
||||
template void biasN_power_eltwise_sum_2_inplace<__half>(const Stream&, Span<__half>, std::size_t, View<__half>, View<__half>, __half, __half, __half);
|
||||
#endif
|
||||
|
||||
template void biasN_relu_eltwise_sum_2_inplace<float>(const Stream&, Span<float>, std::size_t, View<float>, View<float>, float);
|
||||
template void biasN_clipped_relu_eltwise_sum_2_inplace<float>(const Stream&, Span<float>, std::size_t, View<float>, View<float>, float, float);
|
||||
template void biasN_tanh_eltwise_sum_2_inplace<float>(const Stream&, Span<float>, std::size_t, View<float>, View<float>);
|
||||
template void biasN_swish_eltwise_sum_2_inplace<float>(const Stream&, Span<float>, std::size_t, View<float>, View<float>);
|
||||
template void biasN_mish_eltwise_sum_2_inplace<float>(const Stream&, Span<float>, std::size_t, View<float>, View<float>);
|
||||
template void biasN_sigmoid_eltwise_sum_2_inplace<float>(const Stream&, Span<float>, std::size_t, View<float>, View<float>);
|
||||
template void biasN_power_eltwise_sum_2_inplace<float>(const Stream&, Span<float>, std::size_t, View<float>, View<float>, float, float, float);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
132
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/bias_eltwise_activation.cu
vendored
Normal file
132
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/bias_eltwise_activation.cu
vendored
Normal file
@@ -0,0 +1,132 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "functors.hpp"
|
||||
#include "types.hpp"
|
||||
#include "vector_traits.hpp"
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
template <class T, class EltwiseOp, class ActivationOp, std::size_t N>
|
||||
__global__ void biasN_eltwise_op_generic_op_inplace_vec(Span<T> inplace_output, size_type inner_size, View<T> bias, View<T> eltwise, const typename EltwiseOp::Params eltwise_params, const typename ActivationOp::Params act_params) {
|
||||
using vector_type = get_vector_type_t<T, N>;
|
||||
|
||||
auto inplace_output_vPtr = vector_type::get_pointer(inplace_output.data());
|
||||
auto eltwise_vPtr = vector_type::get_pointer(eltwise.data());
|
||||
|
||||
EltwiseOp eltwise_op(eltwise_params);
|
||||
ActivationOp activation_op(act_params);
|
||||
|
||||
for (auto i : grid_stride_range(inplace_output.size() / vector_type::size())) {
|
||||
const index_type bias_idx = (i / inner_size) % bias.size();
|
||||
|
||||
vector_type output_vec, eltwise_vec;
|
||||
v_load(output_vec, inplace_output_vPtr[i]);
|
||||
v_load(eltwise_vec, eltwise_vPtr[i]);
|
||||
for(int j = 0; j < output_vec.size(); j++)
|
||||
output_vec.data[j] = activation_op(eltwise_op(output_vec.data[j] + bias[bias_idx], eltwise_vec.data[j]));
|
||||
v_store(inplace_output_vPtr[i], output_vec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, class EltwiseOp, class ActivationOp, std::size_t N> static
|
||||
void launch_vectorized_biasN_eltwise_op_generic_op_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, View<T> eltwise, const typename EltwiseOp::Params& eltwise_params, const typename ActivationOp::Params& act_params) {
|
||||
CV_Assert(is_fully_aligned<T>(inplace_output, N));
|
||||
CV_Assert(inplace_output.size() % bias.size() == 0);
|
||||
CV_Assert(is_fully_aligned<T>(eltwise, N));
|
||||
CV_Assert(inner_size % N == 0);
|
||||
|
||||
auto kernel = raw::biasN_eltwise_op_generic_op_inplace_vec<T, EltwiseOp, ActivationOp, N>;
|
||||
auto policy = make_policy(kernel, inplace_output.size() / N, 0, stream);
|
||||
launch_kernel(kernel, policy, inplace_output, inner_size / N, bias, eltwise, eltwise_params, act_params);
|
||||
}
|
||||
|
||||
template <class T, class EltwiseOp, class ActivationOp> static
|
||||
void biasN_eltwise_op_generic_op_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, View<T> eltwise, const typename EltwiseOp::Params& eltwise_params = {}, const typename ActivationOp::Params& act_params = {}) {
|
||||
CV_Assert(inplace_output.size() == eltwise.size());
|
||||
|
||||
if (is_fully_aligned<T>(inplace_output, 4) && is_fully_aligned<T>(eltwise, 4) && inner_size % 4 == 0) {
|
||||
launch_vectorized_biasN_eltwise_op_generic_op_inplace<T, EltwiseOp, ActivationOp, 4>(stream, inplace_output, inner_size, bias, eltwise, eltwise_params, act_params);
|
||||
} else if (is_fully_aligned<T>(inplace_output, 2) && is_fully_aligned<T>(eltwise, 2) && inner_size % 2 == 0) {
|
||||
launch_vectorized_biasN_eltwise_op_generic_op_inplace<T, EltwiseOp, ActivationOp, 2>(stream, inplace_output, inner_size, bias, eltwise, eltwise_params, act_params);
|
||||
} else {
|
||||
launch_vectorized_biasN_eltwise_op_generic_op_inplace<T, EltwiseOp, ActivationOp, 1>(stream, inplace_output, inner_size, bias, eltwise, eltwise_params, act_params);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN_eltwise_sum_2_identity_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, View<T> eltwise) {
|
||||
biasN_eltwise_op_generic_op_inplace<T, SumFunctor<T>, IdentityFunctor<T>>(stream, inplace_output, inner_size, bias, eltwise);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN_eltwise_sum_2_relu_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, View<T> eltwise, T slope) {
|
||||
biasN_eltwise_op_generic_op_inplace<T, SumFunctor<T>, ReLUFunctor<T>>(stream, inplace_output, inner_size, bias, eltwise, {}, {slope});
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN_eltwise_sum_2_clipped_relu_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, View<T> eltwise, T floor, T ceiling) {
|
||||
CV_Assert(static_cast<double>(floor) <= static_cast<double>(ceiling));
|
||||
biasN_eltwise_op_generic_op_inplace<T, SumFunctor<T>, ClippedReLUFunctor<T>>(stream, inplace_output, inner_size, bias, eltwise, {}, {floor, ceiling});
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN_eltwise_sum_2_tanh_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, View<T> eltwise) {
|
||||
biasN_eltwise_op_generic_op_inplace<T, SumFunctor<T>, TanHFunctor<T>>(stream, inplace_output, inner_size, bias, eltwise);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN_eltwise_sum_2_swish_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, View<T> eltwise) {
|
||||
biasN_eltwise_op_generic_op_inplace<T, SumFunctor<T>, SwishFunctor<T>>(stream, inplace_output, inner_size, bias, eltwise);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN_eltwise_sum_2_mish_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, View<T> eltwise) {
|
||||
biasN_eltwise_op_generic_op_inplace<T, SumFunctor<T>, MishFunctor<T>>(stream, inplace_output, inner_size, bias, eltwise);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN_eltwise_sum_2_sigmoid_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, View<T> eltwise) {
|
||||
biasN_eltwise_op_generic_op_inplace<T, SumFunctor<T>, SigmoidFunctor<T>>(stream, inplace_output, inner_size, bias, eltwise);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN_eltwise_sum_2_power_inplace(const Stream& stream, Span<T> inplace_output, std::size_t inner_size, View<T> bias, View<T> eltwise, T exp, T scale, T shift) {
|
||||
biasN_eltwise_op_generic_op_inplace<T, SumFunctor<T>, PowerFunctor<T>>(stream, inplace_output, inner_size, bias, eltwise, {}, {exp, scale, shift});
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void biasN_eltwise_sum_2_identity_inplace<__half>(const Stream&, Span<__half>, std::size_t, View<__half>, View<__half>);
|
||||
template void biasN_eltwise_sum_2_relu_inplace<__half>(const Stream&, Span<__half>, std::size_t, View<__half>, View<__half>, __half);
|
||||
template void biasN_eltwise_sum_2_clipped_relu_inplace<__half>(const Stream&, Span<__half>, std::size_t, View<__half>, View<__half>, __half, __half);
|
||||
template void biasN_eltwise_sum_2_tanh_inplace<__half>(const Stream&, Span<__half>, std::size_t, View<__half>, View<__half>);
|
||||
template void biasN_eltwise_sum_2_swish_inplace<__half>(const Stream&, Span<__half>, std::size_t, View<__half>, View<__half>);
|
||||
template void biasN_eltwise_sum_2_mish_inplace<__half>(const Stream&, Span<__half>, std::size_t, View<__half>, View<__half>);
|
||||
template void biasN_eltwise_sum_2_sigmoid_inplace<__half>(const Stream&, Span<__half>, std::size_t, View<__half>, View<__half>);
|
||||
template void biasN_eltwise_sum_2_power_inplace<__half>(const Stream&, Span<__half>, std::size_t, View<__half>, View<__half>, __half, __half, __half);
|
||||
#endif
|
||||
|
||||
template void biasN_eltwise_sum_2_identity_inplace<float>(const Stream&, Span<float>, std::size_t, View<float>, View<float>);
|
||||
template void biasN_eltwise_sum_2_relu_inplace<float>(const Stream&, Span<float>, std::size_t, View<float>, View<float>, float);
|
||||
template void biasN_eltwise_sum_2_clipped_relu_inplace<float>(const Stream&, Span<float>, std::size_t, View<float>, View<float>, float, float);
|
||||
template void biasN_eltwise_sum_2_tanh_inplace<float>(const Stream&, Span<float>, std::size_t, View<float>, View<float>);
|
||||
template void biasN_eltwise_sum_2_swish_inplace<float>(const Stream&, Span<float>, std::size_t, View<float>, View<float>);
|
||||
template void biasN_eltwise_sum_2_mish_inplace<float>(const Stream&, Span<float>, std::size_t, View<float>, View<float>);
|
||||
template void biasN_eltwise_sum_2_sigmoid_inplace<float>(const Stream&, Span<float>, std::size_t, View<float>, View<float>);
|
||||
template void biasN_eltwise_sum_2_power_inplace<float>(const Stream&, Span<float>, std::size_t, View<float>, View<float>, float, float, float);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
71
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/block_stride_range.hpp
vendored
Normal file
71
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/block_stride_range.hpp
vendored
Normal file
@@ -0,0 +1,71 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_DNN_SRC_CUDA_BLOCK_STRIDE_RANGE_HPP
|
||||
#define OPENCV_DNN_SRC_CUDA_BLOCK_STRIDE_RANGE_HPP
|
||||
|
||||
#include "types.hpp"
|
||||
#include "index_helpers.hpp"
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace device {
|
||||
|
||||
template <int dim, int BLOCK_SIZE = 0, class index_type = device::index_type, class size_type = device::size_type>
|
||||
class block_stride_range_generic {
|
||||
public:
|
||||
__device__ block_stride_range_generic(index_type to_) : from(0), to(to_) { }
|
||||
__device__ block_stride_range_generic(index_type from_, index_type to_) : from(from_), to(to_) { }
|
||||
|
||||
class iterator
|
||||
{
|
||||
public:
|
||||
__device__ iterator(index_type pos_) : pos(pos_) {}
|
||||
|
||||
/* these iterators return the index when dereferenced; this allows us to loop
|
||||
* through the indices using a range based for loop
|
||||
*/
|
||||
__device__ index_type operator*() const { return pos; }
|
||||
|
||||
__device__ iterator& operator++() {
|
||||
const index_type block_size = BLOCK_SIZE == 0 ? getBlockDim<dim>() : BLOCK_SIZE;
|
||||
pos += block_size;
|
||||
return *this;
|
||||
}
|
||||
|
||||
__device__ bool operator!=(const iterator& other) const {
|
||||
/* NOTE HACK
|
||||
* 'pos' can move in large steps (see operator++)
|
||||
* expansion of range for loop uses != as the loop conditioion
|
||||
* => operator!= must return false if 'pos' crosses the end
|
||||
*/
|
||||
return pos < other.pos;
|
||||
}
|
||||
|
||||
private:
|
||||
index_type pos;
|
||||
};
|
||||
|
||||
__device__ iterator begin() const {
|
||||
return iterator(from + getThreadIdx<dim>());
|
||||
}
|
||||
|
||||
__device__ iterator end() const {
|
||||
return iterator(to);
|
||||
}
|
||||
|
||||
private:
|
||||
index_type from, to;
|
||||
};
|
||||
|
||||
using block_stride_range_x = block_stride_range_generic<0>;
|
||||
using block_stride_range_y = block_stride_range_generic<1>;
|
||||
using block_stride_range_z = block_stride_range_generic<2>;
|
||||
|
||||
template <size_type BLOCK_SIZE = 0>
|
||||
using block_stride_range = block_stride_range_generic<0, BLOCK_SIZE>;
|
||||
|
||||
}}}}} /* namespace cv::dnn::cuda4dnn::csl::device */
|
||||
|
||||
#endif /* OPENCV_DNN_SRC_CUDA_BLOCK_STRIDE_RANGE_HPP */
|
||||
277
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/concat.cu
vendored
Normal file
277
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/concat.cu
vendored
Normal file
@@ -0,0 +1,277 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "array.hpp"
|
||||
#include "types.hpp"
|
||||
#include "vector_traits.hpp"
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
#include "kernel_dispatcher.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/tensor.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
|
||||
#include "../cuda4dnn/kernels/fill_copy.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
template <class T, std::size_t N>
|
||||
__global__ void concat_vec(
|
||||
Span<T> output, size_type output_axis_size, index_type output_axis_offset,
|
||||
View<T> input, size_type input_axis_size, size_type concat_size)
|
||||
{
|
||||
using vector_type = get_vector_type_t<T, N>;
|
||||
|
||||
auto output_vPtr = vector_type::get_pointer(output.data());
|
||||
auto input_vPtr = vector_type::get_pointer(input.data());
|
||||
|
||||
/* we need to copy all the elements of input to some location in the output
|
||||
* we copy blocks of size `total_concat_size` to some location in the output
|
||||
*/
|
||||
const auto total_concat_size = concat_size * input_axis_size;
|
||||
|
||||
for (auto in_idx : grid_stride_range(input.size() / vector_type::size())) {
|
||||
const index_type idx = in_idx * vector_type::size();
|
||||
const index_type concat_num = idx / total_concat_size;
|
||||
const index_type concat_index = idx % total_concat_size;
|
||||
const index_type top_index = concat_index +
|
||||
(concat_num * output_axis_size + output_axis_offset) * concat_size;
|
||||
|
||||
const auto out_idx = top_index / vector_type::size();
|
||||
|
||||
vector_type vec;
|
||||
v_load(vec, input_vPtr[in_idx]);
|
||||
v_store(output_vPtr[out_idx], vec);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::size_t Rank>
|
||||
__global__ void concat_with_offsets(
|
||||
Span<T> output, array<size_type, Rank> out_strides, array<index_type, Rank> out_offset,
|
||||
View<T> input, array<size_type, Rank> in_strides)
|
||||
{
|
||||
for (auto i : grid_stride_range(input.size())) {
|
||||
index_type in_index = i / in_strides[0];
|
||||
index_type out_index = out_offset[0] + in_index;
|
||||
index_type oidx = out_index * out_strides[0];
|
||||
for (int j = 1; j < Rank; j++) {
|
||||
in_index = (i % in_strides[j - 1]) / in_strides[j];
|
||||
out_index = out_offset[j] + in_index;
|
||||
oidx += out_index * out_strides[j];
|
||||
}
|
||||
|
||||
output[oidx] = input[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::size_t N> static
|
||||
void launch_vectorized_concat(const Stream& stream,
|
||||
Span<T> output, size_type output_axis_size, index_type output_axis_offset,
|
||||
View<T> input, size_type input_axis_size, size_type concat_size)
|
||||
{
|
||||
CV_Assert(is_fully_aligned<T>(output, N));
|
||||
CV_Assert(is_fully_aligned<T>(input, N));
|
||||
/* more assertions are required to fully check for vectorization possibility; check concat() */
|
||||
|
||||
auto kernel = raw::concat_vec<T, N>;
|
||||
auto policy = make_policy(kernel, input.size() / N, 0, stream);
|
||||
launch_kernel(kernel, policy, output, output_axis_size, output_axis_offset, input, input_axis_size, concat_size);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void concat(
|
||||
const Stream& stream,
|
||||
TensorSpan<T> output, std::size_t output_axis_offset,
|
||||
TensorView<T> input, std::size_t axis)
|
||||
{
|
||||
CV_Assert(output.rank() == input.rank());
|
||||
CV_Assert(output_axis_offset < output.get_axis_size(axis));
|
||||
|
||||
/* if axes preceeding the concat axis are all singleton, the concat blocks are contiguous
|
||||
* in the output and we can copy each block directly
|
||||
*/
|
||||
if (output.size_range(0, axis) == 1)
|
||||
{
|
||||
auto stride = output.size_range(axis + 1, output.rank());
|
||||
auto sliced_output = Span<T>(output.get() + output_axis_offset * stride, input.size());
|
||||
kernels::copy<T>(stream, sliced_output, input);
|
||||
return;
|
||||
}
|
||||
|
||||
/* let's call the axis of interest as the channel axis for the purpose of the following discussion
|
||||
* even though it can be any axis
|
||||
*
|
||||
* for each batch item:
|
||||
* we move all the channels from the input (which together, for a single batch item, is contiguous)
|
||||
* of a batch item to its corresponding contiguous place in the output
|
||||
*
|
||||
* for a valid vector operation:
|
||||
* - the size of each copy block must be aligned
|
||||
* - input must be aligned
|
||||
* - all the destination locations in the output must be aligned
|
||||
*/
|
||||
std::size_t concat_size = output.size_range(axis + 1, output.rank());
|
||||
|
||||
std::size_t input_axis_size = input.get_axis_size(axis);
|
||||
std::size_t output_axis_size = output.get_axis_size(axis);
|
||||
|
||||
std::size_t copy_block_size = concat_size * input_axis_size;
|
||||
std::size_t copy_block_stride = concat_size * output_axis_size;
|
||||
std::size_t starting_offset = output_axis_offset * concat_size;
|
||||
|
||||
/* in a nutshell, all this concat operation does is copy several blocks of size `copy_block_size`
|
||||
* to the output starting from `starting_offset` with blocks in the output strided by `copy_block_stride`
|
||||
*/
|
||||
|
||||
bool is_aligned_4 = copy_block_size % 4 == 0 && copy_block_stride % 4 == 0 && starting_offset % 4 == 0;
|
||||
bool is_aligned_2 = copy_block_size % 2 == 0 && copy_block_stride % 2 == 0 && starting_offset % 2 == 0;
|
||||
|
||||
if (is_fully_aligned<T>(output, 4) && is_fully_aligned<T>(input, 4) && is_aligned_4) {
|
||||
launch_vectorized_concat<T, 4>(stream, output, output_axis_size, output_axis_offset, input, input_axis_size, concat_size);
|
||||
} else if (is_fully_aligned<T>(output, 2) && is_fully_aligned<T>(input, 2) && is_aligned_2) {
|
||||
launch_vectorized_concat<T, 2>(stream, output, output_axis_size, output_axis_offset, input, input_axis_size, concat_size);
|
||||
} else {
|
||||
launch_vectorized_concat<T, 1>(stream, output, output_axis_size, output_axis_offset, input, input_axis_size, concat_size);
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void concat<__half>(const Stream&, TensorSpan<__half>, std::size_t, TensorView<__half>, std::size_t);
|
||||
#endif
|
||||
template void concat<float>(const Stream&, TensorSpan<float>, std::size_t, TensorView<float>, std::size_t);
|
||||
|
||||
template <class T, std::size_t Rank> static
|
||||
void launch_concat_with_offsets(
|
||||
const Stream& stream,
|
||||
Span<T> output, const std::vector<std::size_t>& outStride, const std::vector<std::size_t>& outOffset,
|
||||
View<T> input, const std::vector<std::size_t>& inStride)
|
||||
{
|
||||
CV_Assert(outStride.size() == Rank);
|
||||
CV_Assert(outOffset.size() == Rank);
|
||||
CV_Assert(inStride.size() == Rank);
|
||||
|
||||
array<size_type, Rank> outStride_k, inStride_k;
|
||||
outStride_k.assign(std::begin(outStride), std::end(outStride));
|
||||
inStride_k.assign(std::begin(inStride), std::end(inStride));
|
||||
|
||||
array<index_type, Rank> outOffset_k;
|
||||
outOffset_k.assign(std::begin(outOffset), std::end(outOffset));
|
||||
|
||||
auto kernel = raw::concat_with_offsets<T, Rank>;
|
||||
auto policy = make_policy(kernel, input.size(), 0, stream);
|
||||
launch_kernel(kernel, policy, output, outStride_k, outOffset_k, input, inStride_k);
|
||||
}
|
||||
|
||||
GENERATE_KERNEL_DISPATCHER(concat_with_offsets_dispatcher, launch_concat_with_offsets);
|
||||
|
||||
template <class T>
|
||||
void concat_with_offsets(
|
||||
const Stream& stream,
|
||||
TensorSpan<T> output, TensorView<T> input,
|
||||
std::vector<std::size_t> offsets)
|
||||
{
|
||||
CV_Assert(output.rank() == input.rank());
|
||||
CV_Assert(output.rank() == offsets.size());
|
||||
|
||||
/* squeezable axes at the beginning of both tensors can be eliminated
|
||||
*
|
||||
* Reasoning:
|
||||
* ----------
|
||||
* Suppose an item's indices in the input tensor is [i1, i2, ...]. The indices in the output
|
||||
* tensor will be [i1 + off1, i2 + off2, ...]. The concat operation essentially copies items
|
||||
* from the input tensor to new locations in the output tensor.
|
||||
*
|
||||
* If the size of the first axis of the input and output tensor is unity, the input and output
|
||||
* indices for all the elements will be of the form be [0, i2, ...] and [0, i2 + off2, ...]
|
||||
* respectively. The first index does not contribute to the element's address calculation and
|
||||
* hence does nothing apart from eating up few cycles.
|
||||
*/
|
||||
while (input.get_axis_size(0) == 1 && output.get_axis_size(0) == 1) {
|
||||
CV_Assert(offsets[0] == 0);
|
||||
|
||||
input.squeeze(0);
|
||||
output.squeeze(0);
|
||||
offsets.erase(std::begin(offsets));
|
||||
|
||||
CV_Assert(output.rank() == input.rank());
|
||||
CV_Assert(output.rank() == offsets.size());
|
||||
}
|
||||
|
||||
auto inShape = input.shape_as_vector();
|
||||
auto outShape = output.shape_as_vector();
|
||||
|
||||
/* contiguous axes that undergo full copy can be combined into one axis
|
||||
*
|
||||
* Reasoning:
|
||||
* ----------
|
||||
* Suppose an item's indices in the input tensor is [i1, i2, i3, ...]. Let the first two axes not undergo any
|
||||
* concatenation. The indices in the output tensor will be [i1, i2, i3 + off3, ...].
|
||||
*
|
||||
* Each axis in the contiguous axes sequence will add an offset of iN * strideN. In the above example,
|
||||
* the two axes add a total offset of `i1 * stride1 + i2 * stride2`. We can merge the two axes into one axis with
|
||||
* a size of `size1 * size2`. The new offset added will be i12 * stride2` as the kernel iterates through `i12`.
|
||||
* Note that `i12` is actually `(i1 * size2 + i2)` in the original tensor.
|
||||
*/
|
||||
for (int i = 0; i < inShape.size(); i++) {
|
||||
/* check if axis `i` requires any slicing */
|
||||
if (offsets[i] == 0 && inShape[i] == outShape[i]) {
|
||||
/* loop invariant: `i` is the first axis in the contiguous unsliced axis sequence */
|
||||
|
||||
int j = i + 1; /* `j` is the axis which we will attempt to merge */
|
||||
while (j < inShape.size() && offsets[j] == 0 && inShape[j] == outShape[j]) {
|
||||
/* `j` axis is also copied fully; merge `i` and `j` */
|
||||
auto new_size = inShape[i] * inShape[j];
|
||||
inShape[i] = new_size;
|
||||
outShape[i] = new_size;
|
||||
offsets[i] = 0; /* redundant */
|
||||
|
||||
/* delete axis `j` */
|
||||
inShape.erase(std::begin(inShape) + j);
|
||||
outShape.erase(std::begin(outShape) + j);
|
||||
offsets.erase(std::begin(offsets) + j);
|
||||
|
||||
/* optimizations should not break the invariants */
|
||||
CV_Assert(inShape.size() == outShape.size());
|
||||
CV_Assert(inShape.size() == offsets.size());
|
||||
CV_Assert(inShape[i] == outShape[i]);
|
||||
CV_Assert(offsets[i] == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto rank = inShape.size();
|
||||
|
||||
std::vector<std::size_t> inStride(rank), outStride(rank);
|
||||
inStride.back() = 1;
|
||||
outStride.back() = 1;
|
||||
/* garbage, ..., garbage, 1 */
|
||||
|
||||
std::copy(std::begin(inShape) + 1, std::end(inShape), std::begin(inStride));
|
||||
std::copy(std::begin(outShape) + 1, std::end(outShape), std::begin(outStride));
|
||||
/* dim[0], dim[1], ..., dim[-1], 1 */
|
||||
|
||||
std::partial_sum(inStride.rbegin(), inStride.rend(), inStride.rbegin(), std::multiplies<int>());
|
||||
std::partial_sum(outStride.rbegin(), outStride.rend(), outStride.rbegin(), std::multiplies<int>());
|
||||
/* stride[0], stride[1], ..., stride[-2], 1 */
|
||||
|
||||
CV_Assert(1 <= rank && rank <= CSL_MAX_TENSOR_RANK);
|
||||
concat_with_offsets_dispatcher<T, 1, CSL_MAX_TENSOR_RANK>(rank, stream, output, outStride, offsets, input, inStride);
|
||||
}
|
||||
|
||||
template void concat_with_offsets(const Stream&, TensorSpan<__half>, TensorView<__half>, std::vector<std::size_t>);
|
||||
template void concat_with_offsets(const Stream&, TensorSpan<float>, TensorView<float>, std::vector<std::size_t>);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
171
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/crop_and_resize.cu
vendored
Normal file
171
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/crop_and_resize.cu
vendored
Normal file
@@ -0,0 +1,171 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "math.hpp"
|
||||
#include "types.hpp"
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
#include "memory.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/tensor.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
|
||||
template <class T, std::size_t CHANNELS_PER_ITER>
|
||||
__global__ void crop_and_resize(
|
||||
Span<T> output, size_type out_height, size_type out_width,
|
||||
View<T> input, size_type in_height, size_type in_width,
|
||||
View<T> boxes,
|
||||
size_type num_channels)
|
||||
{
|
||||
// input [1, num_channels, in_height, in_width]
|
||||
// output [boxes, num_channels, out_height, out_width]
|
||||
|
||||
const auto in_image_size = in_height * in_width;
|
||||
const auto out_image_size = out_height * out_width;
|
||||
const auto out_box_size = num_channels * out_image_size;
|
||||
|
||||
/* we have to compute the output value for every combination of (box, c, y, x) in the output
|
||||
*
|
||||
* the computation involving (y, x) are identical for all non-spatial dimensions
|
||||
* the computation and memory requests involving the box are identical for remaining three axes
|
||||
*
|
||||
* we process multiple channels every iteration to reuse the identical computation
|
||||
* and memory requests involved with the box and spatial dimensions
|
||||
*/
|
||||
|
||||
/*
|
||||
* if we are processing `CHANNELS_PER_ITER` channels per iteration, we will need
|
||||
* (num_channels / CHANNELS_PER_ITER) iterations per (box, x, y)
|
||||
*/
|
||||
auto num_channel_iters_per_box_xy = num_channels / CHANNELS_PER_ITER;
|
||||
|
||||
/* we need `num_channel_iters_per_box_xy` iterations per (box, x, y) and there are
|
||||
* `num_boxes` boxes and `out_image_size` combinations of (x, y)
|
||||
*/
|
||||
auto num_boxes = boxes.size() / 7; /* 7 values per box */
|
||||
auto iters_per_box = num_channel_iters_per_box_xy * out_image_size;
|
||||
auto iters_required = num_boxes * iters_per_box;
|
||||
|
||||
for (auto iter : grid_stride_range(iters_required)) {
|
||||
const index_type box_no = iter / iters_per_box;
|
||||
const index_type c_start = ((iter % iters_per_box) / out_image_size) * CHANNELS_PER_ITER;
|
||||
|
||||
/* note here that consecutive `iter` values will often have consecutive `x` values
|
||||
* => stores into output will be coalesced across threads
|
||||
*/
|
||||
const index_type y = (iter % out_image_size) / out_width;
|
||||
const index_type x = iter % out_width;
|
||||
|
||||
const index_type box_offset = box_no * 7;
|
||||
const auto left = boxes[box_offset + 3],
|
||||
top = boxes[box_offset + 4],
|
||||
right = boxes[box_offset + 5],
|
||||
bottom = boxes[box_offset + 6];
|
||||
|
||||
const auto box_width = right - left;
|
||||
const auto box_height = bottom - top;
|
||||
|
||||
const auto o2i_fy = static_cast<T>(in_height - 1) / static_cast<T>(out_height - 1);
|
||||
const auto o2i_fx = static_cast<T>(in_width - 1) / static_cast<T>(out_width - 1);
|
||||
|
||||
const auto height_scale = box_height * o2i_fy;
|
||||
const auto width_scale = box_width * o2i_fx;
|
||||
|
||||
const auto in_y = top * static_cast<T>(in_height - 1) + static_cast<T>(y) * height_scale;
|
||||
const auto in_x = left * static_cast<T>(in_width - 1) + static_cast<T>(x) * width_scale;
|
||||
|
||||
const auto in_y0 = static_cast<index_type>(in_y);
|
||||
const auto in_x0 = static_cast<index_type>(in_x);
|
||||
|
||||
using device::min;
|
||||
const auto in_x1 = min<index_type>(in_x0 + 1, in_width - 1);
|
||||
const auto in_y1 = min<index_type>(in_y0 + 1, in_height - 1);
|
||||
|
||||
index_type in_offset_r0 = c_start * in_image_size + in_y0 * in_width;
|
||||
index_type in_offset_r1 = c_start * in_image_size + in_y1 * in_width;
|
||||
index_type out_idx = box_no * out_box_size + c_start * out_image_size + y * out_width + x;
|
||||
|
||||
#pragma unroll 1 /* disable unrolling */
|
||||
for (int i = 0; i < CHANNELS_PER_ITER; i++) {
|
||||
auto v_00 = load_ldg(input[in_offset_r0 + in_x0]),
|
||||
v_01 = load_ldg(input[in_offset_r0 + in_x1]),
|
||||
v_10 = load_ldg(input[in_offset_r1 + in_x0]),
|
||||
v_11 = load_ldg(input[in_offset_r1 + in_x1]);
|
||||
|
||||
output[out_idx] =
|
||||
v_00 +
|
||||
T(in_y - T(in_y0)) * T(v_10 - v_00) +
|
||||
T(in_x - T(in_x0)) * T(v_01 - v_00) +
|
||||
T(in_y - T(in_y0)) * T(in_x - T(in_x0)) * T(v_11 - v_01 - v_10 + v_00);
|
||||
|
||||
in_offset_r0 += in_image_size;
|
||||
in_offset_r1 += in_image_size;
|
||||
out_idx += out_image_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::size_t CHANNELS_PER_ITER> static
|
||||
void launch_multichannel_crop_and_resize(const Stream& stream,
|
||||
Span<T> output, size_type out_height, size_type out_width,
|
||||
View<T> input, size_type in_height, size_type in_width,
|
||||
View<T> boxes, size_type num_channels)
|
||||
{
|
||||
auto kernel = raw::crop_and_resize<T, CHANNELS_PER_ITER>;
|
||||
auto policy = make_policy(kernel, output.size() / CHANNELS_PER_ITER, 0, stream);
|
||||
launch_kernel(kernel, policy, output, out_height, out_width, input, in_height, in_width, boxes, num_channels);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void crop_and_resize(const Stream& stream, TensorSpan<T> output, TensorView<T> input, View<T> boxes) {
|
||||
CV_Assert(input.get_axis_size(0) == 1); /* batch not supported */
|
||||
CV_Assert(input.get_axis_size(1) == output.get_axis_size(1));
|
||||
|
||||
auto out_height = output.get_axis_size(-2);
|
||||
auto out_width = output.get_axis_size(-1);
|
||||
|
||||
auto in_height = input.get_axis_size(-2);
|
||||
auto in_width = input.get_axis_size(-1);
|
||||
|
||||
auto num_channels = input.get_axis_size(1);
|
||||
|
||||
if (num_channels % 64 == 0) {
|
||||
launch_multichannel_crop_and_resize<T, 64>(stream, output, out_height, out_width, input, in_height, in_width, boxes, num_channels);
|
||||
} else if (num_channels % 32 == 0) {
|
||||
launch_multichannel_crop_and_resize<T, 32>(stream, output, out_height, out_width, input, in_height, in_width, boxes, num_channels);
|
||||
} else if (num_channels % 16 == 0) {
|
||||
launch_multichannel_crop_and_resize<T, 16>(stream, output, out_height, out_width, input, in_height, in_width, boxes, num_channels);
|
||||
} else if (num_channels % 8 == 0) {
|
||||
launch_multichannel_crop_and_resize<T, 8>(stream, output, out_height, out_width, input, in_height, in_width, boxes, num_channels);
|
||||
} else if (num_channels % 4 == 0) {
|
||||
launch_multichannel_crop_and_resize<T, 4>(stream, output, out_height, out_width, input, in_height, in_width, boxes, num_channels);
|
||||
} else if (num_channels % 2 == 0) {
|
||||
launch_multichannel_crop_and_resize<T, 2>(stream, output, out_height, out_width, input, in_height, in_width, boxes, num_channels);
|
||||
} else {
|
||||
launch_multichannel_crop_and_resize<T, 1>(stream, output, out_height, out_width, input, in_height, in_width, boxes, num_channels);
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void crop_and_resize<__half>(const Stream&, TensorSpan<__half>, TensorView<__half>, View<__half> boxes);
|
||||
#endif
|
||||
template void crop_and_resize<float>(const Stream&, TensorSpan<float>, TensorView<float>, View<float> boxes);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
897
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/detection_output.cu
vendored
Normal file
897
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/detection_output.cu
vendored
Normal file
@@ -0,0 +1,897 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "math.hpp"
|
||||
#include "bbox_utils.hpp"
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "block_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
#include "vector_traits.hpp"
|
||||
#include "memory.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
#include "../cuda4dnn/csl/tensor.hpp"
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
|
||||
template <class T, bool SHARE_LOCATION, bool VARIANCE_ENCODED_IN_TARGET, bool CORNER_TRUE_CENTER_FALSE, bool CLIP_BBOX>
|
||||
__global__ void decode_bbox(Span<T> decoded_bboxes, View<T> locations, View<T> priors,
|
||||
bool transpose_location, bool normalized_bbox,
|
||||
size_type num_loc_classes, index_type background_class_id,
|
||||
float clip_width, float clip_height)
|
||||
{
|
||||
// decoded_bboxes: [batch_size, num_priors, num_loc_classes, 4]
|
||||
// locations: [batch_size, num_priors, num_loc_classes, 4]
|
||||
// priors: [1, C, num_priors, 4]
|
||||
// C = 2 if !VARIANCE_ENCODED_IN_TARGET; otherwise, 1
|
||||
|
||||
/* 4 bbox values + 4 variance values per prior */
|
||||
constexpr int PRIOR_BOX_SIZE = VARIANCE_ENCODED_IN_TARGET ? 4 : 8;
|
||||
const size_type num_priors = priors.size() / PRIOR_BOX_SIZE;
|
||||
|
||||
using vector_type = get_vector_type_t<T, 4>;
|
||||
auto locations_vPtr = vector_type::get_pointer(locations.data());
|
||||
auto priors_vPtr = vector_type::get_pointer(priors.data());
|
||||
auto decoded_bboxes_vPtr = vector_type::get_pointer(decoded_bboxes.data());
|
||||
|
||||
const auto boxes_per_batch = num_priors * num_loc_classes;
|
||||
for (auto idx : grid_stride_range(decoded_bboxes.size() / 4))
|
||||
{
|
||||
index_type p;
|
||||
index_type c;
|
||||
|
||||
if (SHARE_LOCATION)
|
||||
{
|
||||
// locations are shared across all classes => num_loc_classes = 1
|
||||
p = idx % boxes_per_batch;
|
||||
c = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
p = (idx % boxes_per_batch) / num_loc_classes;
|
||||
c = idx % num_loc_classes;
|
||||
}
|
||||
|
||||
if (!SHARE_LOCATION && c == background_class_id)
|
||||
continue;
|
||||
|
||||
BoundingBox bbox;
|
||||
{
|
||||
vector_type location;
|
||||
v_load(location, locations_vPtr[idx]);
|
||||
|
||||
if (transpose_location)
|
||||
{
|
||||
bbox.ymin = location.data[0];
|
||||
bbox.xmin = location.data[1];
|
||||
bbox.ymax = location.data[2];
|
||||
bbox.xmax = location.data[3];
|
||||
}
|
||||
else
|
||||
{
|
||||
bbox.xmin = location.data[0];
|
||||
bbox.ymin = location.data[1];
|
||||
bbox.xmax = location.data[2];
|
||||
bbox.ymax = location.data[3];
|
||||
}
|
||||
}
|
||||
|
||||
if (!VARIANCE_ENCODED_IN_TARGET)
|
||||
{
|
||||
vector_type prior_variance;
|
||||
v_load_ldg(prior_variance, priors_vPtr[num_priors + p]);
|
||||
|
||||
bbox.xmin *= static_cast<float>(prior_variance.data[0]);
|
||||
bbox.ymin *= static_cast<float>(prior_variance.data[1]);
|
||||
bbox.xmax *= static_cast<float>(prior_variance.data[2]);
|
||||
bbox.ymax *= static_cast<float>(prior_variance.data[3]);
|
||||
}
|
||||
|
||||
BoundingBox prior;
|
||||
{
|
||||
vector_type prior_box;
|
||||
v_load_ldg(prior_box, priors_vPtr[p]);
|
||||
|
||||
prior.xmin = prior_box.data[0];
|
||||
prior.ymin = prior_box.data[1];
|
||||
prior.xmax = prior_box.data[2];
|
||||
prior.ymax = prior_box.data[3];
|
||||
}
|
||||
|
||||
BoundingBox decoded_bbox;
|
||||
if (CORNER_TRUE_CENTER_FALSE)
|
||||
{
|
||||
decoded_bbox.xmin = prior.xmin + bbox.xmin;
|
||||
decoded_bbox.ymin = prior.ymin + bbox.ymin;
|
||||
decoded_bbox.xmax = prior.xmax + bbox.xmax;
|
||||
decoded_bbox.ymax = prior.ymax + bbox.ymax;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto prior_width = prior.xmax - prior.xmin;
|
||||
auto prior_height = prior.ymax - prior.ymin;
|
||||
if (!normalized_bbox)
|
||||
{
|
||||
prior_width += 1;
|
||||
prior_height += 1;
|
||||
}
|
||||
|
||||
auto prior_center_x = prior.xmin + prior_width * 0.5f;
|
||||
auto prior_center_y = prior.ymin + prior_height * 0.5f;
|
||||
|
||||
auto decode_bbox_center_x = bbox.xmin * prior_width + prior_center_x;
|
||||
auto decode_bbox_center_y = bbox.ymin * prior_height + prior_center_y;
|
||||
|
||||
using device::exp;
|
||||
float decode_bbox_width = exp(bbox.xmax) * prior_width;
|
||||
float decode_bbox_height = exp(bbox.ymax) * prior_height;
|
||||
|
||||
decoded_bbox.xmin = decode_bbox_center_x - decode_bbox_width * 0.5f;
|
||||
decoded_bbox.ymin = decode_bbox_center_y - decode_bbox_height * 0.5f;
|
||||
decoded_bbox.xmax = decode_bbox_center_x + decode_bbox_width * 0.5f;
|
||||
decoded_bbox.ymax = decode_bbox_center_y + decode_bbox_height * 0.5f;
|
||||
}
|
||||
|
||||
vector_type decoded_bbox_vec;
|
||||
if (CLIP_BBOX)
|
||||
{
|
||||
decoded_bbox_vec.data[0] = clamp(decoded_bbox.xmin, 0.0f, clip_width);
|
||||
decoded_bbox_vec.data[1] = clamp(decoded_bbox.ymin, 0.0f, clip_height);
|
||||
decoded_bbox_vec.data[2] = clamp(decoded_bbox.xmax, 0.0f, clip_width);
|
||||
decoded_bbox_vec.data[3] = clamp(decoded_bbox.ymax, 0.0f, clip_height);
|
||||
}
|
||||
else
|
||||
{
|
||||
decoded_bbox_vec.data[0] = decoded_bbox.xmin;
|
||||
decoded_bbox_vec.data[1] = decoded_bbox.ymin;
|
||||
decoded_bbox_vec.data[2] = decoded_bbox.xmax;
|
||||
decoded_bbox_vec.data[3] = decoded_bbox.ymax;
|
||||
}
|
||||
|
||||
v_store(decoded_bboxes_vPtr[idx], decoded_bbox_vec);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, int BINS, int BLOCK_SIZE>
|
||||
__launch_bounds__(BLOCK_SIZE)
|
||||
__global__ void findTopK(Span<int> indices_, Span<int> count_, View<T> scores_, float threshold, size_type classwise_topK, size_type num_classes, size_type num_priors, index_type background_class_id)
|
||||
{
|
||||
/* We need to sort boxes based on their confidence scores. The confidence scores fall in
|
||||
* the range [0.0, 1.0]. We break the range into bins and perform count sort. This is an
|
||||
* approximate algorithm.
|
||||
*
|
||||
* Each block handles a particular class of a particular batch item.
|
||||
*/
|
||||
const auto c = blockIdx.x;
|
||||
const auto b = blockIdx.y;
|
||||
|
||||
if (c == background_class_id)
|
||||
return;
|
||||
|
||||
// indices: [batch_size, num_classes, classwise_topK]
|
||||
// count: [batch_size, num_classes]
|
||||
// scores: [batch_size, num_classes, num_priors]
|
||||
|
||||
auto count = count_.data() + b * num_classes + c;
|
||||
auto scores = scores_.data() + (b * num_classes + c) * num_priors;
|
||||
auto indices = indices_.data() + (b * num_classes + c) * classwise_topK;
|
||||
|
||||
/* We do not require a large number of bins to find the top K confidence scores. We will use
|
||||
* a reasonable number of bins which will fit in the shared memory.
|
||||
*
|
||||
* Note that smaller scores will have a smaller index, i.e. the `bins` are ordered in
|
||||
* ascending order.
|
||||
*/
|
||||
|
||||
__shared__ int bins[BINS];
|
||||
|
||||
#pragma unroll
|
||||
for (int unroll = 0; unroll < BINS / BLOCK_SIZE; unroll++)
|
||||
bins[unroll * BLOCK_SIZE + threadIdx.x] = 0;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
for (auto i : block_stride_range<BLOCK_SIZE>(num_priors))
|
||||
{
|
||||
const float confidence = load_ldg(scores[i]);
|
||||
if (confidence > threshold)
|
||||
{
|
||||
using device::fast_divide_ftz;
|
||||
auto conf_scaled = fast_divide_ftz(confidence - threshold, 1 - threshold);
|
||||
|
||||
using device::clamp;
|
||||
int bin_index = conf_scaled * BINS;
|
||||
|
||||
/* We store counts of confidence scores in the bins. Our ultimate goal is to store the indices
|
||||
* of the `classwise_topK` confidence values in the `indices` array.
|
||||
*
|
||||
* We use a little trick to parallelize the process of filling up the `indices` array.
|
||||
* We want every thread in the block to participate in the process. To do so, we want the
|
||||
* bins array to be shifted by one place to the left. We will be computing the suffix sum
|
||||
* of the bins array later. Details and reasons for doing so will be explained later.
|
||||
*/
|
||||
bin_index = clamp<int>(bin_index, 0, BINS - 1) - 1; // shift left by one
|
||||
|
||||
if (bin_index >= 0)
|
||||
atomicAdd(&bins[bin_index], 1);
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
constexpr int WARP_SIZE = 32; /* must be equal to warpSize */
|
||||
// FORWARD_COMPATIBILITY_TAG: WARP_SIZE_DEPENDENT_CODE
|
||||
|
||||
if (threadIdx.x < WARP_SIZE)
|
||||
{
|
||||
/* We can compute suffix sum of an array in groups of N numbers.
|
||||
* Let N be 4 for this example.
|
||||
*
|
||||
* 1) Last 4 numbers
|
||||
* 1 2 3 4 | 5 6 7 8 | 9 10 11 12
|
||||
* group suffix sum: 42 33 23 12
|
||||
*
|
||||
* 2) Middle 4 numbers
|
||||
* 1 2 3 4 | 5 6 7 8 | 9 10 11 12
|
||||
* group suffix sum: | 26 21 15 8 |
|
||||
*
|
||||
* We add `42` (first element in the previous group) to each element to get:
|
||||
*
|
||||
* 1 2 3 4 | 5 6 7 8 | 9 10 11 12
|
||||
* | 68 63 57 50 | 42 33 23 12
|
||||
* 3) First 4 numbers
|
||||
*
|
||||
* 1 2 3 4 | 5 6 7 8 | 9 10 11 12
|
||||
* group suffix sum: 10 9 7 4 |
|
||||
*
|
||||
* We add `68` (first element in the previous group) to each element to get:
|
||||
*
|
||||
* 1 2 3 4 | 5 6 7 8 | 9 10 11 12
|
||||
* group suffix sum: 78 77 75 72 | 68 63 57 50 | 42 33 23 12
|
||||
*
|
||||
* What we are left with now is the suffix sum of the entire array.
|
||||
*
|
||||
* We use the aforementioned logic in the code below but work in groups of `warpSize`.
|
||||
*/
|
||||
|
||||
/* We calculate suffix sums WARP_SIZE elements at a time starting from the right end.
|
||||
* Hence, we will need BINS / WARP_SIZE number of iterations.
|
||||
*
|
||||
* Each iteration uses shuffle instructions to exchange data between threads. Shuffle
|
||||
* instructions cannot be used in warp-divergent code. If the bins are a multiple of
|
||||
* the warpSize, all the threads in the warp will participate.
|
||||
*/
|
||||
static_assert(BINS % WARP_SIZE == 0, "number of bins must be a multiple of warp size");
|
||||
|
||||
const int thread_id = threadIdx.x;
|
||||
const int inverse_lane_id = WARP_SIZE - thread_id - 1;
|
||||
|
||||
int previous_group_first_element = 0;
|
||||
for (int iter = BINS / WARP_SIZE - 1; iter >= 0; iter--)
|
||||
{
|
||||
const index_type idx = iter * WARP_SIZE + thread_id;
|
||||
auto value = bins[idx];
|
||||
|
||||
for (int i = 1; i < WARP_SIZE; i *= 2)
|
||||
{
|
||||
auto n = __shfl_down_sync(0xFFFFFFFF, value, i);
|
||||
if (inverse_lane_id >= i)
|
||||
value += n;
|
||||
}
|
||||
|
||||
value += previous_group_first_element;
|
||||
bins[idx] = value;
|
||||
|
||||
previous_group_first_element = __shfl_sync(0xFFFFFFFF, value, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
*count = 0;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
for (auto i : block_stride_range<BLOCK_SIZE>(num_priors))
|
||||
{
|
||||
const float confidence = load_ldg(scores[i]);
|
||||
if (confidence > threshold)
|
||||
{
|
||||
using device::fast_divide_ftz;
|
||||
auto conf_scaled = fast_divide_ftz(confidence - threshold, 1 - threshold);
|
||||
|
||||
int bin_index = conf_scaled * BINS;
|
||||
bin_index = clamp<int>(bin_index, 0, BINS - 1);
|
||||
|
||||
/* This bounding box is eligible to be selected unless it does not fall in
|
||||
* the `classwise_topK`. If it did, we would have to compute the location where it needs
|
||||
* to be stored.
|
||||
*
|
||||
* Suppose we had just 4 bins and say the following were the counts:
|
||||
* BIN0 2
|
||||
* BIN1 1
|
||||
* BIN2 3
|
||||
* BIN3 0 (last bin is always zero as we shift left by one while populating the bins)
|
||||
*
|
||||
* We will try our best to store the boxes in a sorted order in the `indices` array.
|
||||
* This requires that the boxes in later bins (higher confidence scores) must be
|
||||
* stored earlier.
|
||||
*
|
||||
* We compute the suffix sum of the array. This gives us:
|
||||
* BIN0 6
|
||||
* BIN1 4
|
||||
* BIN2 3
|
||||
* BIN3 0
|
||||
*
|
||||
* The bins now give us the location in the `indices` array from which the indices of the
|
||||
* scores corresponding to that bin would be stored. We atomically increment the bin count
|
||||
* everytime we store a box corresponding to that bin. Therefore, the value in the bins
|
||||
* gives the index in the `indices` array where the next box corresponding to that bin must
|
||||
* be put.
|
||||
*/
|
||||
|
||||
const index_type idx = atomicAdd(&bins[bin_index], 1);
|
||||
if (idx < classwise_topK)
|
||||
{
|
||||
indices[idx] = i;
|
||||
atomicAdd(&count[0], 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void box_collect(Span<T> collected_bboxes_, View<T> decoded_bboxes_, View<int> indices_, View<int> count_, bool share_location, size_type num_priors, size_type num_classes, size_type classwise_topK, index_type background_class_id)
|
||||
{
|
||||
const index_type c = blockIdx.x;
|
||||
if (c == background_class_id)
|
||||
return;
|
||||
|
||||
const index_type b = blockIdx.y;
|
||||
|
||||
// collected_bboxes: [batch_size, num_classes, classwise_topK, 4]
|
||||
// decoded_bboxes: [batch_size, num_priors, num_loc_classes, 4]
|
||||
// indices: [batch_size, num_classes, classwise_topK]
|
||||
// count: [batch_size, num_classes]
|
||||
|
||||
const auto num_loc_classes = share_location ? 1 : num_classes;
|
||||
|
||||
auto collected_bboxes = collected_bboxes_.data() + (b * num_classes + c) * classwise_topK * 4;
|
||||
auto decoded_bboxes = decoded_bboxes_.data() + b * num_priors * num_loc_classes * 4;
|
||||
auto indices = indices_.data() + (b * num_classes + c) * classwise_topK;
|
||||
auto count = count_.data() + b * num_classes + c;
|
||||
|
||||
const auto boxes = load_ldg(&count[0]);
|
||||
if (boxes == 0)
|
||||
return;
|
||||
|
||||
using vector_type = get_vector_type_t<T, 4>;
|
||||
auto decoded_bboxes_vPtr = vector_type::get_pointer(decoded_bboxes);
|
||||
auto collected_bboxes_vPtr = vector_type::get_pointer(collected_bboxes);
|
||||
|
||||
for (auto i : block_stride_range<>(boxes))
|
||||
{
|
||||
const auto prior_id = indices[i];
|
||||
const index_type idx = share_location ? prior_id : (prior_id * num_classes + c);
|
||||
|
||||
vector_type box;
|
||||
v_load(box, decoded_bboxes_vPtr[idx]);
|
||||
v_store(collected_bboxes_vPtr[i], box);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, bool NORMALIZED_BBOX>
|
||||
__global__ void blockwise_class_nms(Span<int> indices_, Span<int> count_, View<T> collected_bboxes_, size_type num_classes, size_type classwise_topK, index_type background_class_id, float nms_threshold)
|
||||
{
|
||||
const index_type b = blockIdx.x / num_classes;
|
||||
const index_type c = blockIdx.x % num_classes;
|
||||
if (c == background_class_id)
|
||||
return;
|
||||
|
||||
// indices: [batch_size, num_classes, classwise_topK]
|
||||
// count: [batch_size, num_classes]
|
||||
// collected_bboxes: [batch_size, num_classes, classwise_topK, 4]
|
||||
|
||||
auto indices = indices_.data() + (b * num_classes + c) * classwise_topK;
|
||||
auto count = count_.data() + b * num_classes + c;
|
||||
auto collected_bboxes = collected_bboxes_.data() + (b * num_classes + c) * classwise_topK * 4;
|
||||
|
||||
const auto boxes = count[0];
|
||||
if (boxes == 0)
|
||||
return;
|
||||
|
||||
using vector_type = get_vector_type_t<T, 4>;
|
||||
auto collected_bboxes_vPtr = vector_type::get_pointer(collected_bboxes);
|
||||
|
||||
for (int i = 0; i < boxes; i++)
|
||||
{
|
||||
auto prior_id = indices[i];
|
||||
if (prior_id != -1)
|
||||
{
|
||||
BoundingBox bbox1;
|
||||
{
|
||||
vector_type box;
|
||||
v_load(box, collected_bboxes_vPtr[i]);
|
||||
|
||||
bbox1.xmin = box.data[0];
|
||||
bbox1.ymin = box.data[1];
|
||||
bbox1.xmax = box.data[2];
|
||||
bbox1.ymax = box.data[3];
|
||||
}
|
||||
|
||||
for (auto j : block_stride_range<>(i + 1, boxes))
|
||||
{
|
||||
prior_id = indices[j];
|
||||
if (prior_id == -1)
|
||||
continue;
|
||||
|
||||
BoundingBox bbox2;
|
||||
{
|
||||
vector_type box;
|
||||
v_load_ldg(box, collected_bboxes_vPtr[j]);
|
||||
|
||||
bbox2.xmin = box.data[0];
|
||||
bbox2.ymin = box.data[1];
|
||||
bbox2.xmax = box.data[2];
|
||||
bbox2.ymax = box.data[3];
|
||||
}
|
||||
|
||||
using device::min;
|
||||
using device::max;
|
||||
|
||||
BoundingBox intersect_bbox;
|
||||
intersect_bbox.xmin = max(bbox1.xmin, bbox2.xmin);
|
||||
intersect_bbox.ymin = max(bbox1.ymin, bbox2.ymin);
|
||||
intersect_bbox.xmax = min(bbox1.xmax, bbox2.xmax);
|
||||
intersect_bbox.ymax = min(bbox1.ymax, bbox2.ymax);
|
||||
|
||||
float intersect_size = compute_bbox_size<NORMALIZED_BBOX>(intersect_bbox);
|
||||
float bbox1_size = compute_bbox_size<NORMALIZED_BBOX>(bbox1);
|
||||
float bbox2_size = compute_bbox_size<NORMALIZED_BBOX>(bbox2);
|
||||
|
||||
using device::fast_divide_ftz;
|
||||
float iou = fast_divide_ftz(intersect_size, bbox1_size + bbox2_size - intersect_size);
|
||||
if (iou > nms_threshold)
|
||||
indices[j] = -1;
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
count[0] = 0;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
for (auto i : block_stride_range<>(boxes))
|
||||
{
|
||||
auto prior_id = indices[i];
|
||||
if(prior_id != -1)
|
||||
{
|
||||
const index_type idx = atomicAdd(&count[0], 1);
|
||||
indices[idx] = prior_id;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::size_t BINS, int BLOCK_SIZE>
|
||||
__launch_bounds__(BLOCK_SIZE)
|
||||
__global__ void nms_collect(
|
||||
Span<int> kept_indices, Span<int> kept_count, View<int> indices_, View<int> count, View<T> scores_, float threshold,
|
||||
size_type num_classes, size_type num_priors, size_type classwise_topK, size_type keepTopK, index_type background_class_id)
|
||||
{
|
||||
// sorting algorithm is documented in detail in findTopK kernel comments
|
||||
// no explanations are provided here
|
||||
|
||||
// kept_indices: [batch_size, keepTopK]
|
||||
// kept_count: [batch_size]
|
||||
|
||||
const auto b = blockIdx.x;
|
||||
|
||||
__shared__ int bins[BINS];
|
||||
|
||||
#pragma unroll
|
||||
for (int unroll = 0; unroll < BINS / BLOCK_SIZE; unroll++)
|
||||
bins[unroll * BLOCK_SIZE + threadIdx.x] = 0;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
for (int c = 0; c < num_classes; c++)
|
||||
{
|
||||
if (c == background_class_id)
|
||||
continue;
|
||||
|
||||
// indices: [batch_size, num_classes, classwise_topK]
|
||||
// count: [batch_size, num_classes]
|
||||
// scores: [batch_size, num_classes, num_priors]
|
||||
|
||||
const auto indices = indices_.data() + (b * num_classes + c) * classwise_topK;
|
||||
const auto scores = scores_.data() + (b * num_classes + c) * num_priors;
|
||||
|
||||
auto boxes = count[b * num_classes + c];
|
||||
|
||||
for (auto i : block_stride_range<BLOCK_SIZE>(boxes))
|
||||
{
|
||||
auto prior_id = indices[i];
|
||||
const float confidence = load_ldg(scores[prior_id]);
|
||||
if (confidence > threshold)
|
||||
{
|
||||
using device::fast_divide_ftz;
|
||||
auto conf_scaled = fast_divide_ftz(confidence - threshold, 1 - threshold);
|
||||
|
||||
using device::clamp;
|
||||
int bin_index = conf_scaled * BINS;
|
||||
bin_index = clamp<int>(bin_index, 0, BINS - 1) - 1; // shift left by one
|
||||
|
||||
if (bin_index >= 0)
|
||||
atomicAdd(&bins[bin_index], 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
constexpr int WARP_SIZE = 32; /* must be equal to warpSize */
|
||||
// FORWARD_COMPATIBILITY_TAG: WARP_SIZE_DEPENDENT_CODE
|
||||
|
||||
if (threadIdx.x < WARP_SIZE)
|
||||
{
|
||||
static_assert(BINS % WARP_SIZE == 0, "number of bins must be a multiple of warp size");
|
||||
|
||||
const int thread_id = threadIdx.x;
|
||||
const int inverse_lane_id = WARP_SIZE - thread_id - 1;
|
||||
|
||||
int previous_group_first_element = 0;
|
||||
for (int iter = BINS / WARP_SIZE - 1; iter >= 0; iter--)
|
||||
{
|
||||
const index_type idx = iter * WARP_SIZE + thread_id;
|
||||
auto value = bins[idx];
|
||||
|
||||
for (int i = 1; i < WARP_SIZE; i *= 2)
|
||||
{
|
||||
auto n = __shfl_down_sync(0xFFFFFFFF, value, i);
|
||||
if (inverse_lane_id >= i)
|
||||
value += n;
|
||||
}
|
||||
|
||||
value += previous_group_first_element;
|
||||
bins[idx] = value;
|
||||
|
||||
previous_group_first_element = __shfl_sync(0xFFFFFFFF, value, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
kept_count[b] = 0;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
for (int c = 0; c < num_classes; c++)
|
||||
{
|
||||
if (c == background_class_id)
|
||||
continue;
|
||||
|
||||
const auto indices = indices_.data() + (b * num_classes + c) * classwise_topK;
|
||||
const auto scores = scores_.data() + (b * num_classes + c) * num_priors;
|
||||
|
||||
auto boxes = count[b * num_classes + c];
|
||||
|
||||
for (auto i : block_stride_range<BLOCK_SIZE>(boxes))
|
||||
{
|
||||
auto prior_id = indices[i];
|
||||
const float confidence = load_ldg(scores[prior_id]);
|
||||
if (confidence > threshold)
|
||||
{
|
||||
using device::fast_divide_ftz;
|
||||
auto conf_scaled = fast_divide_ftz(confidence - threshold, 1 - threshold);
|
||||
|
||||
using device::clamp;
|
||||
int bin_index = conf_scaled * BINS;
|
||||
bin_index = clamp<int>(bin_index, 0, BINS - 1);
|
||||
|
||||
const index_type idx = atomicAdd(&bins[bin_index], 1);
|
||||
if (idx < keepTopK)
|
||||
{
|
||||
kept_indices[b * keepTopK + idx] = c * num_priors + prior_id;
|
||||
atomicAdd(&kept_count[b], 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void consolidate_detections(Span<T> output,
|
||||
View<int> kept_indices, View<int> kept_count, View<T> decoded_bboxes, View<T> scores, bool share_location,
|
||||
size_type batch_size, size_type num_classes, size_type num_priors, size_type keepTopK, DevicePtr<int> num_detections)
|
||||
{
|
||||
using vector_type = get_vector_type_t<T, 4>;
|
||||
auto decoded_bboxes_vPtr = vector_type::get_pointer(decoded_bboxes.data());
|
||||
|
||||
// output: [1, 1, batch_size * keepTopK, 7]
|
||||
// kept_indices: [batch_size, keepTopK]
|
||||
// kept_count: [batch_size]
|
||||
// decoded_bboxes: [batch_size, num_priors, num_loc_classes, 4]
|
||||
// scores: [batch_size, num_classes, num_priors]
|
||||
|
||||
for (int b = 0; b < batch_size; b++)
|
||||
{
|
||||
for (auto i : grid_stride_range(kept_count[b]))
|
||||
{
|
||||
auto score_id = kept_indices[b * keepTopK + i];
|
||||
auto c = score_id / num_priors;
|
||||
auto prior_id = score_id % num_priors;
|
||||
|
||||
const auto confidence = scores[b * num_classes * num_priors + score_id];
|
||||
|
||||
index_type bbox_id;
|
||||
if (share_location)
|
||||
{
|
||||
// decoded_bboxes: [batch_size, num_priors, 1, 4]
|
||||
bbox_id = b * num_priors + prior_id;
|
||||
}
|
||||
else
|
||||
{
|
||||
// decoded_bboxes: [batch_size, num_priors, num_classes, 4]
|
||||
bbox_id = (b * num_priors + prior_id) * num_classes + c;
|
||||
}
|
||||
|
||||
vector_type bbox;
|
||||
v_load(bbox, decoded_bboxes_vPtr[bbox_id]);
|
||||
|
||||
auto output_id = atomicAdd(num_detections.get(), 1);
|
||||
output[output_id * 7 + 0] = b;
|
||||
output[output_id * 7 + 1] = c;
|
||||
output[output_id * 7 + 2] = confidence;
|
||||
output[output_id * 7 + 3] = bbox.data[0];
|
||||
output[output_id * 7 + 4] = bbox.data[1];
|
||||
output[output_id * 7 + 5] = bbox.data[2];
|
||||
output[output_id * 7 + 6] = bbox.data[3];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, bool SHARE_LOCATION, bool VARIANCE_ENCODED_IN_TARGET, bool CORNER_TRUE_CENTER_FALSE, bool CLIP_BBOX> static
|
||||
void launch_decode_boxes_kernel(const Stream& stream, Span<T> decoded_bboxes, View<T> locations, View<T> priors,
|
||||
bool transpose_location, bool normalized_bbox,
|
||||
size_type num_loc_classes, index_type background_class_id,
|
||||
float clip_width, float clip_height)
|
||||
{
|
||||
auto kernel = raw::decode_bbox<T, SHARE_LOCATION, VARIANCE_ENCODED_IN_TARGET, CORNER_TRUE_CENTER_FALSE, CLIP_BBOX>;
|
||||
auto policy = make_policy(kernel, decoded_bboxes.size() / 4, 0, stream);
|
||||
launch_kernel(kernel, policy, decoded_bboxes, locations, priors, transpose_location, normalized_bbox, num_loc_classes, background_class_id, clip_width, clip_height);
|
||||
}
|
||||
|
||||
template <class T, unsigned int current, class ...Args> static
|
||||
typename std::enable_if<current == 0, void>
|
||||
::type dispatch_decode_bboxes(int selector, Args&& ...args) {
|
||||
if(selector == 0)
|
||||
launch_decode_boxes_kernel<T, 0, 0, 0, 0>(std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
template <class T, unsigned int current, class ...Args> static
|
||||
typename std::enable_if<current != 0, void>
|
||||
::type dispatch_decode_bboxes(int selector, Args&& ...args) {
|
||||
if(selector == current)
|
||||
launch_decode_boxes_kernel<T,
|
||||
static_cast<bool>(current & 8),
|
||||
static_cast<bool>(current & 4),
|
||||
static_cast<bool>(current & 2),
|
||||
static_cast<bool>(current & 1)>(std::forward<Args>(args)...);
|
||||
else
|
||||
dispatch_decode_bboxes<T, current - 1, Args...>(selector, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void decode_bboxes(const Stream& stream, Span<T> output, View<T> locations, View<T> priors,
|
||||
std::size_t num_loc_classes,
|
||||
bool share_location, std::size_t background_class_id,
|
||||
bool transpose_location, bool variance_encoded_in_target,
|
||||
bool corner_true_or_center_false, bool normalized_bbox,
|
||||
bool clip_box, float clip_width, float clip_height)
|
||||
{
|
||||
/* `config` combines three kernel template options into one number using which a bit of TMP code can
|
||||
* run through all possible combinations and instantiate the correct template
|
||||
*/
|
||||
unsigned int config = (share_location << 3 | variance_encoded_in_target << 2 | corner_true_or_center_false << 1 | clip_box);
|
||||
dispatch_decode_bboxes<T, 15>(config, stream, output, locations, priors, transpose_location, normalized_bbox, num_loc_classes, background_class_id, clip_width, clip_height);
|
||||
}
|
||||
|
||||
template void decode_bboxes(const Stream&, Span<__half>, View<__half>, View<__half>, std::size_t, bool, std::size_t, bool, bool, bool, bool, bool, float, float);
|
||||
template void decode_bboxes(const Stream&, Span<float>, View<float>, View<float>, std::size_t, bool, std::size_t, bool, bool, bool, bool, bool, float, float);
|
||||
|
||||
template <class T>
|
||||
void findTopK(const Stream& stream, TensorSpan<int> indices, TensorSpan<int> count, TensorView<T> scores, std::size_t background_class_id, float threshold)
|
||||
{
|
||||
// indices: [batch_size, num_classes, classwise_topK]
|
||||
// count: [batch_size, num_classes]
|
||||
// scores: [batch_size, num_classes, num_priors]
|
||||
|
||||
const auto batch_size = indices.get_axis_size(0);
|
||||
CV_Assert(count.get_axis_size(0) == batch_size);
|
||||
CV_Assert(scores.get_axis_size(0) == batch_size);
|
||||
|
||||
const auto num_classes = indices.get_axis_size(1);
|
||||
CV_Assert(count.get_axis_size(1) == num_classes);
|
||||
CV_Assert(scores.get_axis_size(1) == num_classes);
|
||||
|
||||
const auto classwise_topK = indices.get_axis_size(2);
|
||||
const auto num_priors = scores.get_axis_size(2);
|
||||
|
||||
/* each block processes one class from each batch */
|
||||
constexpr auto BLOCK_SIZE = 256;
|
||||
|
||||
dim3 grid_size(num_classes, batch_size);
|
||||
dim3 block_size(BLOCK_SIZE);
|
||||
auto policy = execution_policy(grid_size, block_size, stream);
|
||||
|
||||
auto kernel = raw::findTopK<T, 2048, BLOCK_SIZE>;
|
||||
launch_kernel(kernel, policy, indices, count, scores, threshold, classwise_topK, num_classes, num_priors, background_class_id);
|
||||
}
|
||||
|
||||
template void findTopK(const Stream&, TensorSpan<int>, TensorSpan<int>, TensorView<__half>, std::size_t, float);
|
||||
template void findTopK(const Stream&, TensorSpan<int>, TensorSpan<int>, TensorView<float>, std::size_t, float);
|
||||
|
||||
template <class T>
|
||||
void box_collect(const Stream& stream, TensorSpan<T> collected_bboxes, TensorView<T> decoded_bboxes, TensorView<int> indices, TensorView<int> count, bool share_location, std::size_t background_class_id)
|
||||
{
|
||||
// collected_bboxes: [batch_size, num_classes, classwise_topK, 4]
|
||||
// decoded_bboxes: [batch_size, num_priors, num_loc_classes, 4]
|
||||
// indices: [batch_size, num_classes, classwise_topK]
|
||||
// count: [batch_size, num_classes]
|
||||
|
||||
const auto batch_size = collected_bboxes.get_axis_size(0);
|
||||
CV_Assert(decoded_bboxes.get_axis_size(0) == batch_size);
|
||||
CV_Assert(indices.get_axis_size(0) == batch_size);
|
||||
CV_Assert(count.get_axis_size(0) == batch_size);
|
||||
|
||||
const auto num_classes = collected_bboxes.get_axis_size(1);
|
||||
CV_Assert(indices.get_axis_size(1) == num_classes);
|
||||
CV_Assert(count.get_axis_size(1) == num_classes);
|
||||
|
||||
const auto classwise_topK = collected_bboxes.get_axis_size(2);
|
||||
CV_Assert(indices.get_axis_size(2) == classwise_topK);
|
||||
|
||||
const auto num_priors = decoded_bboxes.get_axis_size(1);
|
||||
|
||||
CV_Assert(!share_location || decoded_bboxes.get_axis_size(2) == 1);
|
||||
|
||||
constexpr int BLOCK_SIZE = 256;
|
||||
|
||||
/* each block processes one class from each batch */
|
||||
dim3 grid_size(num_classes, batch_size);
|
||||
dim3 block_size(BLOCK_SIZE);
|
||||
auto policy = execution_policy(grid_size, block_size, stream);
|
||||
|
||||
auto kernel = raw::box_collect<T>;
|
||||
launch_kernel(kernel, policy, collected_bboxes, decoded_bboxes, indices, count, share_location, num_priors, num_classes, classwise_topK, background_class_id);
|
||||
}
|
||||
|
||||
template void box_collect(const Stream&, TensorSpan<float>, TensorView<float>, TensorView<int>, TensorView<int>, bool, std::size_t);
|
||||
template void box_collect(const Stream&, TensorSpan<__half>, TensorView<__half>, TensorView<int>, TensorView<int>, bool, std::size_t);
|
||||
|
||||
template <class T>
|
||||
void blockwise_class_nms(const Stream& stream, TensorSpan<int> indices, TensorSpan<int> count, TensorView<T> collected_bboxes,
|
||||
bool normalized_bbox, std::size_t background_class_id, float nms_threshold)
|
||||
{
|
||||
// indices: [batch_size, num_classes, classwise_topK]
|
||||
// count: [batch_size, num_classes]
|
||||
// collected_bboxes: [batch_size, num_classes, classwise_topK, 4]
|
||||
|
||||
const auto batch_size = indices.get_axis_size(0);
|
||||
CV_Assert(count.get_axis_size(0) == batch_size);
|
||||
CV_Assert(collected_bboxes.get_axis_size(0) == batch_size);
|
||||
|
||||
const auto num_classes = indices.get_axis_size(1);
|
||||
CV_Assert(count.get_axis_size(1) == num_classes);
|
||||
CV_Assert(collected_bboxes.get_axis_size(1) == num_classes);
|
||||
|
||||
const auto classwise_topK = indices.get_axis_size(2);
|
||||
CV_Assert(collected_bboxes.get_axis_size(2) == classwise_topK);
|
||||
|
||||
/* each block processes one class from each batch */
|
||||
auto num_blocks = batch_size * num_classes;
|
||||
auto num_threads = std::max<std::size_t>(std::min<std::size_t>(1024, classwise_topK), 32);
|
||||
|
||||
dim3 grid_size(num_blocks);
|
||||
dim3 block_size(num_threads);
|
||||
auto policy = execution_policy(grid_size, block_size, stream);
|
||||
|
||||
if (normalized_bbox)
|
||||
{
|
||||
auto kernel = raw::blockwise_class_nms<T, true>;
|
||||
launch_kernel(kernel, policy, indices, count, collected_bboxes, num_classes, classwise_topK, background_class_id, nms_threshold);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto kernel = raw::blockwise_class_nms<T, false>;
|
||||
launch_kernel(kernel, policy, indices, count, collected_bboxes, num_classes, classwise_topK, background_class_id, nms_threshold);
|
||||
}
|
||||
}
|
||||
|
||||
template void blockwise_class_nms(const Stream&, TensorSpan<int>, TensorSpan<int>, TensorView<__half>, bool, std::size_t, float);
|
||||
template void blockwise_class_nms(const Stream&, TensorSpan<int>, TensorSpan<int>, TensorView<float>, bool, std::size_t, float);
|
||||
|
||||
template <class T>
|
||||
void nms_collect(const Stream& stream, TensorSpan<int> kept_indices, TensorSpan<int> kept_count,
|
||||
TensorView<int> indices, TensorView<int> count, TensorView<T> scores, float threshold, std::size_t background_class_id)
|
||||
{
|
||||
// kept_indices: [batch_size, keepTopK]
|
||||
// kept_count: [batch_size]
|
||||
|
||||
// indices: [batch_size, num_classes, classwise_topK]
|
||||
// count: [batch_size, num_classes]
|
||||
// scores: [batch_size, num_classes, num_priors]
|
||||
|
||||
auto batch_size = kept_indices.get_axis_size(0);
|
||||
CV_Assert(kept_count.get_axis_size(0) == batch_size);
|
||||
CV_Assert(indices.get_axis_size(0) == batch_size);
|
||||
CV_Assert(count.get_axis_size(0) == batch_size);
|
||||
CV_Assert(scores.get_axis_size(0) == batch_size);
|
||||
|
||||
auto keepTopK = kept_indices.get_axis_size(1);
|
||||
|
||||
auto num_classes = indices.get_axis_size(1);
|
||||
CV_Assert(count.get_axis_size(1) == num_classes);
|
||||
CV_Assert(scores.get_axis_size(1) == num_classes);
|
||||
|
||||
auto classwise_topK = indices.get_axis_size(2);
|
||||
auto num_priors = scores.get_axis_size(2);
|
||||
|
||||
auto num_blocks = batch_size;
|
||||
constexpr int BLOCK_SIZE = 1024;
|
||||
|
||||
dim3 grid_size(num_blocks);
|
||||
dim3 block_size(BLOCK_SIZE);
|
||||
auto policy = execution_policy(grid_size, block_size, stream);
|
||||
|
||||
auto kernel = raw::nms_collect<T, 1024, BLOCK_SIZE>;
|
||||
launch_kernel(kernel, policy, kept_indices, kept_count, indices, count, scores, threshold, num_classes, num_priors, classwise_topK, keepTopK, background_class_id);
|
||||
}
|
||||
|
||||
template void nms_collect(const Stream&, TensorSpan<int>, TensorSpan<int>, TensorView<int>, TensorView<int>, TensorView<__half>, float, std::size_t);
|
||||
template void nms_collect(const Stream&, TensorSpan<int>, TensorSpan<int>, TensorView<int>, TensorView<int>, TensorView<float>, float, std::size_t);
|
||||
|
||||
template <class T>
|
||||
void consolidate_detections(const Stream& stream, TensorSpan<T> output,
|
||||
TensorView<int> kept_indices, TensorView<int> kept_count,
|
||||
TensorView<T> decoded_bboxes, TensorView<T> scores, bool share_location, DevicePtr<int> num_detections)
|
||||
{
|
||||
// output: [1, 1, batch_size * keepTopK, 7]
|
||||
// kept_indices: [batch_size, keepTopK]
|
||||
// kept_count: [batch_size]
|
||||
// decoded_bboxes: [batch_size, num_priors, num_loc_classes, 4]
|
||||
// scores: [batch_size, num_classes, num_priors]
|
||||
|
||||
auto batch_size = kept_indices.get_axis_size(0);
|
||||
CV_Assert(kept_count.get_axis_size(0) == batch_size);
|
||||
CV_Assert(decoded_bboxes.get_axis_size(0) == batch_size);
|
||||
CV_Assert(scores.get_axis_size(0) == batch_size);
|
||||
|
||||
auto keepTopK = kept_indices.get_axis_size(1);
|
||||
|
||||
auto num_classes = scores.get_axis_size(1);
|
||||
auto num_priors = scores.get_axis_size(2);
|
||||
|
||||
CV_Assert(batch_size * keepTopK * 7 == output.size());
|
||||
|
||||
auto kernel = raw::consolidate_detections<T>;
|
||||
auto policy = make_policy(kernel, keepTopK, 0, stream);
|
||||
launch_kernel(kernel, policy, output, kept_indices, kept_count, decoded_bboxes, scores, share_location, batch_size, num_classes, num_priors, keepTopK, num_detections);
|
||||
}
|
||||
|
||||
template void consolidate_detections(const Stream&, TensorSpan<__half>, TensorView<int>, TensorView<int>, TensorView<__half>, TensorView<__half>, bool, DevicePtr<int>);
|
||||
template void consolidate_detections(const Stream&, TensorSpan<float>, TensorView<int>, TensorView<int>, TensorView<float>, TensorView<float>, bool, DevicePtr<int>);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
125
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/eltwise_activation.cu
vendored
Normal file
125
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/eltwise_activation.cu
vendored
Normal file
@@ -0,0 +1,125 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "functors.hpp"
|
||||
#include "types.hpp"
|
||||
#include "vector_traits.hpp"
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
|
||||
template <class T, class EltwiseOp, class ActivationOp, std::size_t N>
|
||||
__global__ void eltwise_op_generic_op_vec(Span<T> output, View<T> x, View<T> y, const typename EltwiseOp::Params eltwise_params, const typename ActivationOp::Params act_params) {
|
||||
using vector_type = get_vector_type_t<T, N>;
|
||||
|
||||
auto output_vPtr = vector_type::get_pointer(output.data());
|
||||
auto x_vPtr = vector_type::get_pointer(x.data());
|
||||
auto y_vPtr = vector_type::get_pointer(y.data());
|
||||
|
||||
EltwiseOp eltwise_op(eltwise_params);
|
||||
ActivationOp activation_op(act_params);
|
||||
|
||||
for (auto i : grid_stride_range(output.size() / vector_type::size())) {
|
||||
vector_type vec_x, vec_y;
|
||||
v_load(vec_x, x_vPtr[i]);
|
||||
v_load(vec_y, y_vPtr[i]);
|
||||
for(int j = 0; j < vec_x.size(); j++)
|
||||
vec_x.data[j] = activation_op(eltwise_op(vec_x.data[j], vec_y.data[j]));
|
||||
v_store(output_vPtr[i], vec_x);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, class EltwiseOp, class ActivationOp, std::size_t N> static
|
||||
void launch_vectorized_eltwise_op_generic_op(const Stream& stream, Span<T> output, View<T> x, View<T> y, const typename EltwiseOp::Params& eltwise_params, const typename ActivationOp::Params& act_params) {
|
||||
CV_Assert(is_fully_aligned<T>(output, N));
|
||||
CV_Assert(is_fully_aligned<T>(x, N));
|
||||
CV_Assert(is_fully_aligned<T>(y, N));
|
||||
|
||||
auto kernel = raw::eltwise_op_generic_op_vec<T, EltwiseOp, ActivationOp, N>;
|
||||
auto policy = make_policy(kernel, output.size() / N, 0, stream);
|
||||
launch_kernel(kernel, policy, output, x, y, eltwise_params, act_params);
|
||||
}
|
||||
|
||||
template <class T, class EltwiseOp, class ActivationOp> static
|
||||
void eltwise_op_generic_op(const Stream& stream, Span<T> output, View<T> x, View<T> y, const typename EltwiseOp::Params& eltwise_params = {}, const typename ActivationOp::Params& act_params = {}) {
|
||||
CV_Assert(output.size() == x.size());
|
||||
CV_Assert(output.size() == y.size());
|
||||
|
||||
if (is_fully_aligned<T>(output, 4) && is_fully_aligned<T>(x, 4) && is_fully_aligned<T>(y, 4)) {
|
||||
launch_vectorized_eltwise_op_generic_op<T, EltwiseOp, ActivationOp, 4>(stream, output, x, y, eltwise_params, act_params);
|
||||
} else if (is_fully_aligned<T>(output, 2) && is_fully_aligned<T>(x, 2) && is_fully_aligned<T>(y, 4)) {
|
||||
launch_vectorized_eltwise_op_generic_op<T, EltwiseOp, ActivationOp, 2>(stream, output, x, y, eltwise_params, act_params);
|
||||
} else {
|
||||
launch_vectorized_eltwise_op_generic_op<T, EltwiseOp, ActivationOp, 1>(stream, output, x, y, eltwise_params, act_params);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void eltwise_sum_2_relu(const Stream& stream, Span<T> output, View<T> x, View<T> y, T slope) {
|
||||
eltwise_op_generic_op<T, SumFunctor<T>, ReLUFunctor<T>>(stream, output, x, y, {}, {slope});
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void eltwise_sum_2_clipped_relu(const Stream& stream, Span<T> output, View<T> x, View<T> y, T floor, T ceiling) {
|
||||
CV_Assert(static_cast<double>(floor) <= static_cast<double>(ceiling));
|
||||
eltwise_op_generic_op<T, SumFunctor<T>, ClippedReLUFunctor<T>>(stream, output, x, y, {}, {floor, ceiling});
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void eltwise_sum_2_tanh(const Stream& stream, Span<T> output, View<T> x, View<T> y) {
|
||||
eltwise_op_generic_op<T, SumFunctor<T>, TanHFunctor<T>>(stream, output, x, y);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void eltwise_sum_2_swish(const Stream& stream, Span<T> output, View<T> x, View<T> y) {
|
||||
eltwise_op_generic_op<T, SumFunctor<T>, SwishFunctor<T>>(stream, output, x, y);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void eltwise_sum_2_mish(const Stream& stream, Span<T> output, View<T> x, View<T> y) {
|
||||
eltwise_op_generic_op<T, SumFunctor<T>, MishFunctor<T>>(stream, output, x, y);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void eltwise_sum_2_sigmoid(const Stream& stream, Span<T> output, View<T> x, View<T> y) {
|
||||
eltwise_op_generic_op<T, SumFunctor<T>, SigmoidFunctor<T>>(stream, output, x, y);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void eltwise_sum_2_power(const Stream& stream, Span<T> output, View<T> x, View<T> y, T exp, T scale, T shift) {
|
||||
eltwise_op_generic_op<T, SumFunctor<T>, PowerFunctor<T>>(stream, output, x, y, {}, {exp, scale, shift});
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void eltwise_sum_2_relu<__half>(const Stream&, Span<__half>, View<__half>, View<__half>, __half);
|
||||
template void eltwise_sum_2_clipped_relu<__half>(const Stream&, Span<__half>, View<__half>, View<__half>, __half, __half);
|
||||
template void eltwise_sum_2_tanh<__half>(const Stream&, Span<__half>, View<__half>, View<__half>);
|
||||
template void eltwise_sum_2_swish<__half>(const Stream&, Span<__half>, View<__half>, View<__half>);
|
||||
template void eltwise_sum_2_mish<__half>(const Stream&, Span<__half>, View<__half>, View<__half>);
|
||||
template void eltwise_sum_2_sigmoid<__half>(const Stream&, Span<__half>, View<__half>, View<__half>);
|
||||
template void eltwise_sum_2_power<__half>(const Stream&, Span<__half>, View<__half>, View<__half>, __half, __half, __half);
|
||||
#endif
|
||||
|
||||
template void eltwise_sum_2_relu<float>(const Stream&, Span<float>, View<float>, View<float>, float);
|
||||
template void eltwise_sum_2_clipped_relu<float>(const Stream&, Span<float>, View<float>, View<float>, float, float);
|
||||
template void eltwise_sum_2_tanh<float>(const Stream&, Span<float>, View<float>, View<float>);
|
||||
template void eltwise_sum_2_swish<float>(const Stream&, Span<float>, View<float>, View<float>);
|
||||
template void eltwise_sum_2_mish<float>(const Stream&, Span<float>, View<float>, View<float>);
|
||||
template void eltwise_sum_2_sigmoid<float>(const Stream&, Span<float>, View<float>, View<float>);
|
||||
template void eltwise_sum_2_power<float>(const Stream&, Span<float>, View<float>, View<float>, float, float, float);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
334
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/eltwise_ops.cu
vendored
Normal file
334
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/eltwise_ops.cu
vendored
Normal file
@@ -0,0 +1,334 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "array.hpp"
|
||||
#include "functors.hpp"
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
#include "vector_traits.hpp"
|
||||
#include "kernel_dispatcher.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
#include "../cuda4dnn/csl/tensor.hpp"
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
template <class T, class EltwiseOp, std::size_t N>
|
||||
__global__ void eltwise_op_vec(Span<T> output, View<T> x, View<T> y, const typename EltwiseOp::Params params) {
|
||||
using vector_type = get_vector_type_t<T, N>;
|
||||
|
||||
auto output_vPtr = vector_type::get_pointer(output.data());
|
||||
auto x_vPtr = vector_type::get_pointer(x.data());
|
||||
auto y_vPtr = vector_type::get_pointer(y.data());
|
||||
|
||||
EltwiseOp eltwise_op(params);
|
||||
|
||||
for (auto i : grid_stride_range(output.size() / vector_type::size())) {
|
||||
vector_type vec_x, vec_y;
|
||||
v_load(vec_x, x_vPtr[i]);
|
||||
v_load(vec_y, y_vPtr[i]);
|
||||
for (int j = 0; j < vector_type::size(); j++)
|
||||
vec_x.data[j] = eltwise_op(vec_x.data[j], vec_y.data[j]);
|
||||
v_store(output_vPtr[i], vec_x);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, class EltwiseOp, std::size_t Rank>
|
||||
__global__ void eltwise_op_bcast(
|
||||
Span<T> output, array<size_type, Rank> out_strides,
|
||||
View<T> x, array<size_type, Rank> x_strides, array<bool, Rank> x_bcast,
|
||||
View<T> y, array<size_type, Rank> y_strides, array<bool, Rank> y_bcast,
|
||||
const typename EltwiseOp::Params params) {
|
||||
EltwiseOp eltwise_op(params);
|
||||
|
||||
for (auto i : grid_stride_range(output.size())) {
|
||||
index_type out_index = i / out_strides[0];
|
||||
index_type x_index = x_bcast[0] ? 0 : out_index * x_strides[0];
|
||||
index_type y_index = y_bcast[0] ? 0 : out_index * y_strides[0];
|
||||
|
||||
for (int j = 1; j < Rank; j++)
|
||||
{
|
||||
out_index = (i % out_strides[j - 1]) / out_strides[j];
|
||||
if (!x_bcast[j])
|
||||
x_index += out_index * x_strides[j];
|
||||
if (!y_bcast[j])
|
||||
y_index += out_index * y_strides[j];
|
||||
}
|
||||
|
||||
output[i] = eltwise_op(x[x_index], y[y_index]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, class EltwiseOp, std::size_t N> static
|
||||
void launch_vectorized_eltwise_op(const Stream& stream, Span<T> output, View<T> x, View<T> y, const typename EltwiseOp::Params& params) {
|
||||
CV_Assert(x.size() == y.size());
|
||||
CV_Assert(x.size() == output.size());
|
||||
CV_Assert(is_fully_aligned<T>(output, N));
|
||||
CV_Assert(is_fully_aligned<T>(x, N));
|
||||
CV_Assert(is_fully_aligned<T>(y, N));
|
||||
|
||||
auto kernel = raw::eltwise_op_vec<T, EltwiseOp, N>;
|
||||
auto policy = make_policy(kernel, output.size() / N, 0, stream);
|
||||
launch_kernel(kernel, policy, output, x, y, params);
|
||||
}
|
||||
|
||||
template <class T, class EltwiseOp, std::size_t Rank> static
|
||||
void launch_eltwise_op_bcast(
|
||||
const Stream& stream,
|
||||
Span<T> output, const std::vector<std::size_t>& outStride,
|
||||
View<T> x, const std::vector<std::size_t>& inStride1, const std::vector<int>& inBcast1,
|
||||
View<T> y, const std::vector<std::size_t>& inStride2, const std::vector<int>& inBcast2,
|
||||
const typename EltwiseOp::Params& params)
|
||||
{
|
||||
CV_Assert(outStride.size() == Rank);
|
||||
CV_Assert(inStride1.size() == Rank);
|
||||
CV_Assert(inStride2.size() == Rank);
|
||||
CV_Assert(inBcast1.size() == Rank);
|
||||
CV_Assert(inBcast2.size() == Rank);
|
||||
|
||||
array<size_type, Rank> outStride_k, inStride1_k, inStride2_k;
|
||||
outStride_k.assign(std::begin(outStride), std::end(outStride));
|
||||
inStride1_k.assign(std::begin(inStride1), std::end(inStride1));
|
||||
inStride2_k.assign(std::begin(inStride2), std::end(inStride2));
|
||||
|
||||
array<bool, Rank> inBcast1_k, inBcast2_k;
|
||||
inBcast1_k.assign(std::begin(inBcast1), std::end(inBcast1));
|
||||
inBcast2_k.assign(std::begin(inBcast2), std::end(inBcast2));
|
||||
|
||||
auto kernel = raw::eltwise_op_bcast<T, EltwiseOp, Rank>;
|
||||
auto policy = make_policy(kernel, output.size(), 0, stream);
|
||||
launch_kernel(kernel, policy, output, outStride_k, x, inStride1_k, inBcast1_k, y, inStride2_k, inBcast2_k, params);
|
||||
}
|
||||
|
||||
GENERATE_KERNEL_DISPATCHER_2TP(eltwise_op_bcast_dispatcher, launch_eltwise_op_bcast);
|
||||
|
||||
template <class T, class EltwiseOp> static
|
||||
void eltwise_op(const Stream& stream, TensorSpan<T> output, TensorView<T> x, TensorView<T> y, const typename EltwiseOp::Params& params = {}) {
|
||||
if (is_shape_same(output, x) && is_shape_same(output, y))
|
||||
{
|
||||
/* no broadcasting; use fast path */
|
||||
CV_Assert(x.size() == y.size());
|
||||
CV_Assert(x.size() == output.size());
|
||||
|
||||
if (is_fully_aligned<T>(output, 4) && is_fully_aligned<T>(x, 4) && is_fully_aligned<T>(y, 4)) {
|
||||
launch_vectorized_eltwise_op<T, EltwiseOp, 4>(stream, output, x, y, params);
|
||||
} else if (is_fully_aligned<T>(output, 2) && is_fully_aligned<T>(x, 2) && is_fully_aligned<T>(y, 2)) {
|
||||
launch_vectorized_eltwise_op<T, EltwiseOp, 2>(stream, output, x, y, params);
|
||||
} else {
|
||||
launch_vectorized_eltwise_op<T, EltwiseOp, 1>(stream, output, x, y, params);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(is_shape_compatible(output, x));
|
||||
CV_Assert(is_shape_compatible(output, y));
|
||||
|
||||
/* matching singleton axes in both input tensors can be eliminated
|
||||
*
|
||||
* Reasoning:
|
||||
* ----------
|
||||
* Singleton axes do not contribute towards address calculation. They are redundant
|
||||
* unless there is broadcasting. If both input tensors have singleton axis at a
|
||||
* specified position, there is no broadcasting on that axis.
|
||||
*
|
||||
* Example:
|
||||
* ---------
|
||||
* x: [1, 256, 32, 32] -> [256, 32, 32]
|
||||
* y: [1, 256, 1, 1] -> [256, 1, 1]
|
||||
*/
|
||||
for (int r = 0; r < output.rank(); r++)
|
||||
{
|
||||
while (x.get_axis_size(r) == 1 && y.get_axis_size(r) == 1) {
|
||||
CV_Assert(output.get_axis_size(r) == 1);
|
||||
|
||||
x.squeeze(r);
|
||||
y.squeeze(r);
|
||||
output.squeeze(r);
|
||||
}
|
||||
}
|
||||
|
||||
auto inShape1 = x.shape_as_vector();
|
||||
auto inShape2 = y.shape_as_vector();
|
||||
auto outShape = output.shape_as_vector();
|
||||
|
||||
/* contiguous axes that do not broadcast can be merged into one axis
|
||||
*
|
||||
* Example:
|
||||
* ---------
|
||||
* x: [32, 8, 8] -> [32, 64]
|
||||
* y: [1, 8, 8] -> [1, 64]
|
||||
*/
|
||||
for (int i = 0; i < inShape1.size(); i++) {
|
||||
/* check if axis `i` requires any broadcasting */
|
||||
if (inShape1[i] == inShape2[i]) {
|
||||
/* loop invariant: `i` is the first axis in the contiguous axis sequence */
|
||||
|
||||
int j = i + 1; /* `j` is the axis which we will attempt to merge */
|
||||
while (j < inShape1.size() && inShape1[j] == inShape2[j]) {
|
||||
CV_Assert(outShape[j] == inShape1[j]);
|
||||
|
||||
/* `j` axis is also used fully; merge `i` and `j` */
|
||||
auto new_size = inShape1[i] * inShape1[j];
|
||||
inShape1[i] = new_size;
|
||||
inShape2[i] = new_size;
|
||||
|
||||
/* delete axis `j` */
|
||||
inShape1.erase(std::begin(inShape1) + j);
|
||||
inShape2.erase(std::begin(inShape2) + j);
|
||||
outShape.erase(std::begin(outShape) + j);
|
||||
|
||||
/* optimizations should not break the invariants */
|
||||
CV_Assert(inShape1.size() == outShape.size());
|
||||
CV_Assert(inShape2.size() == outShape.size());
|
||||
CV_Assert(inShape1[i] == outShape[i]);
|
||||
CV_Assert(inShape2[i] == outShape[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* contiguous broadcasting axes on the same tensor can be merged into one axis
|
||||
*
|
||||
* Example:
|
||||
* ---------
|
||||
* x: [256, 8, 8] -> [256, 64]
|
||||
* y: [256, 1, 1] -> [256, 1]
|
||||
*/
|
||||
for (int i = 0; i < inShape1.size(); i++) {
|
||||
/* check if axis `i` requires any broadcasting in tensor 1 */
|
||||
if (inShape1[i] == 1 && inShape2[i] != 1) {
|
||||
/* loop invariant: `i` is the first axis in the contiguous axis sequence */
|
||||
|
||||
int j = i + 1; /* `j` is the axis which we will attempt to merge */
|
||||
while (j < inShape1.size() && inShape1[j] == 1 && inShape2[j] != 1) {
|
||||
CV_Assert(outShape[j] == inShape2[j]);
|
||||
|
||||
/* `j` axis is also used fully; merge `i` and `j` */
|
||||
inShape1[i] = 1;
|
||||
inShape2[i] = inShape2[i] * inShape2[j];
|
||||
outShape[i] = inShape2[i];
|
||||
|
||||
/* delete axis `j` */
|
||||
inShape1.erase(std::begin(inShape1) + j);
|
||||
inShape2.erase(std::begin(inShape2) + j);
|
||||
outShape.erase(std::begin(outShape) + j);
|
||||
|
||||
/* optimizations should not break the invariants */
|
||||
CV_Assert(inShape1.size() == outShape.size());
|
||||
CV_Assert(inShape2.size() == outShape.size());
|
||||
CV_Assert(inShape1[i] == 1);
|
||||
CV_Assert(inShape2[i] == outShape[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* check if axis `i` requires any broadcasting in tensor 2 */
|
||||
if (inShape1[i] != 1 && inShape2[i] == 1) {
|
||||
/* loop invariant: `i` is the first axis in the contiguous axis sequence */
|
||||
|
||||
int j = i + 1; /* `j` is the axis which we will attempt to merge */
|
||||
while (j < inShape1.size() && inShape1[j] != 1 && inShape2[j] == 1) {
|
||||
CV_Assert(outShape[j] == inShape1[j]);
|
||||
|
||||
/* `j` axis is also used fully; merge `i` and `j` */
|
||||
inShape1[i] = inShape1[i] * inShape1[j];
|
||||
inShape2[i] = 1;
|
||||
outShape[i] = inShape1[i];
|
||||
|
||||
/* delete axis `j` */
|
||||
inShape1.erase(std::begin(inShape1) + j);
|
||||
inShape2.erase(std::begin(inShape2) + j);
|
||||
outShape.erase(std::begin(outShape) + j);
|
||||
|
||||
/* optimizations should not break the invariants */
|
||||
CV_Assert(inShape1.size() == outShape.size());
|
||||
CV_Assert(inShape2.size() == outShape.size());
|
||||
CV_Assert(inShape1[i] == outShape[i]);
|
||||
CV_Assert(inShape2[i] == 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto rank = outShape.size();
|
||||
|
||||
std::vector<std::size_t> inStride1(rank), inStride2(rank), outStride(rank);
|
||||
inStride1.back() = 1;
|
||||
inStride2.back() = 1;
|
||||
outStride.back() = 1;
|
||||
/* garbage, ..., garbage, 1 */
|
||||
|
||||
std::copy(std::begin(inShape1) + 1, std::end(inShape1), std::begin(inStride1));
|
||||
std::copy(std::begin(inShape2) + 1, std::end(inShape2), std::begin(inStride2));
|
||||
std::copy(std::begin(outShape) + 1, std::end(outShape), std::begin(outStride));
|
||||
/* dim[0], dim[1], ..., dim[-1], 1 */
|
||||
|
||||
std::partial_sum(inStride1.rbegin(), inStride1.rend(), inStride1.rbegin(), std::multiplies<std::size_t>());
|
||||
std::partial_sum(inStride2.rbegin(), inStride2.rend(), inStride2.rbegin(), std::multiplies<std::size_t>());
|
||||
std::partial_sum(outStride.rbegin(), outStride.rend(), outStride.rbegin(), std::multiplies<std::size_t>());
|
||||
/* stride[0], stride[1], ..., stride[-2], 1 */
|
||||
|
||||
std::vector<int> inBcast1(rank), inBcast2(rank);
|
||||
std::transform(std::begin(inShape1), std::end(inShape1), std::begin(inBcast1), [](std::size_t sz) { return sz == 1; });
|
||||
std::transform(std::begin(inShape2), std::end(inShape2), std::begin(inBcast2), [](std::size_t sz) { return sz == 1; });
|
||||
|
||||
CV_Assert(1 <= rank && rank <= CSL_MAX_TENSOR_RANK);
|
||||
eltwise_op_bcast_dispatcher<T, EltwiseOp, 1, CSL_MAX_TENSOR_RANK>(rank, stream, output, outStride, x, inStride1, inBcast1, y, inStride2, inBcast2, params);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void eltwise_max_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x, TensorView<T> y) {
|
||||
eltwise_op<T, MaxFunctor<T>>(stream, output, x, y);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void eltwise_min_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x, TensorView<T> y) {
|
||||
eltwise_op<T, MinFunctor<T>>(stream, output, x, y);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void eltwise_sum_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x, TensorView<T> y) {
|
||||
eltwise_op<T, SumFunctor<T>>(stream, output, x, y);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void eltwise_sum_coeff_2(const Stream& stream, TensorSpan<T> output, T coeff_x, TensorView<T> x, T coeff_y, TensorView<T> y) {
|
||||
eltwise_op<T, ScaledSumFunctor<T>>(stream, output, x, y, {coeff_x, coeff_y});
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void eltwise_prod_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x, TensorView<T> y) {
|
||||
eltwise_op<T, ProductFunctor<T>>(stream, output, x, y);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void eltwise_div_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x, TensorView<T> y) {
|
||||
eltwise_op<T, DivFunctor<T>>(stream, output, x, y);
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void eltwise_div_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||
template void eltwise_prod_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||
template void eltwise_sum_coeff_2(const Stream&, TensorSpan<__half>, __half, TensorView<__half>, __half, TensorView<__half>);
|
||||
template void eltwise_sum_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||
template void eltwise_max_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||
template void eltwise_min_2(const Stream& stream, TensorSpan<__half> output, TensorView<__half> x, TensorView<__half> y);
|
||||
#endif
|
||||
template void eltwise_div_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
|
||||
template void eltwise_prod_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
|
||||
template void eltwise_sum_coeff_2(const Stream&, TensorSpan<float>, float, TensorView<float>, float, TensorView<float>);
|
||||
template void eltwise_sum_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
|
||||
template void eltwise_max_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
|
||||
template void eltwise_min_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
81
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/execution.hpp
vendored
Normal file
81
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/execution.hpp
vendored
Normal file
@@ -0,0 +1,81 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_DNN_SRC_CUDA_EXECUTION_HPP
|
||||
#define OPENCV_DNN_SRC_CUDA_EXECUTION_HPP
|
||||
|
||||
#include "../cuda4dnn/csl/error.hpp"
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace csl {
|
||||
|
||||
struct execution_policy {
|
||||
execution_policy(dim3 grid_size, dim3 block_size)
|
||||
: grid{ grid_size }, block{ block_size }, sharedMem{ 0 }, stream{ 0 } { }
|
||||
|
||||
execution_policy(dim3 grid_size, dim3 block_size, std::size_t shared_mem)
|
||||
: grid{ grid_size }, block{ block_size }, sharedMem{ shared_mem }, stream{ nullptr } { }
|
||||
|
||||
execution_policy(dim3 grid_size, dim3 block_size, const Stream& strm)
|
||||
: grid{ grid_size }, block{ block_size }, sharedMem{ 0 }, stream{ strm.get() } { }
|
||||
|
||||
execution_policy(dim3 grid_size, dim3 block_size, std::size_t shared_mem, const Stream& strm)
|
||||
: grid{ grid_size }, block{ block_size }, sharedMem{ shared_mem }, stream{ strm.get() } { }
|
||||
|
||||
dim3 grid;
|
||||
dim3 block;
|
||||
std::size_t sharedMem;
|
||||
cudaStream_t stream;
|
||||
};
|
||||
|
||||
/* this overload shouldn't be necessary; we should always provide a bound on the number of threads */
|
||||
/*
|
||||
template <class Kernel> inline
|
||||
execution_policy make_policy(Kernel kernel, std::size_t sharedMem = 0, const Stream& stream = 0) {
|
||||
int grid_size, block_size;
|
||||
CUDA4DNN_CHECK_CUDA(cudaOccupancyMaxPotentialBlockSize(&grid_size, &block_size, kernel, sharedMem));
|
||||
return execution_policy(grid_size, block_size, sharedMem, stream);
|
||||
}*/
|
||||
|
||||
template <class Kernel> inline
|
||||
execution_policy make_policy(Kernel kernel, std::size_t max_threads, std::size_t sharedMem = 0, const Stream& stream = 0) {
|
||||
CV_Assert(max_threads > 0);
|
||||
|
||||
int grid_size = 0, block_size = 0;
|
||||
CUDA4DNN_CHECK_CUDA(cudaOccupancyMaxPotentialBlockSize(&grid_size, &block_size, kernel, sharedMem));
|
||||
if (grid_size * block_size > max_threads) {
|
||||
grid_size = (max_threads + block_size - 1) / block_size;
|
||||
if (block_size > max_threads)
|
||||
block_size = max_threads;
|
||||
}
|
||||
|
||||
CV_Assert(grid_size >= 1 && block_size >= 1);
|
||||
return execution_policy(grid_size, block_size, sharedMem, stream);
|
||||
}
|
||||
|
||||
template <class Kernel, typename ...Args> inline
|
||||
void launch_kernel(Kernel kernel, Args ...args) {
|
||||
auto policy = make_policy(kernel);
|
||||
kernel <<<policy.grid, policy.block>>> (args...);
|
||||
}
|
||||
|
||||
template <class Kernel, typename ...Args> inline
|
||||
void launch_kernel(Kernel kernel, dim3 grid, dim3 block, Args ...args) {
|
||||
kernel <<<grid, block>>> (args...);
|
||||
}
|
||||
|
||||
template <class Kernel, typename ...Args> inline
|
||||
void launch_kernel(Kernel kernel, execution_policy policy, Args ...args) {
|
||||
kernel <<<policy.grid, policy.block, policy.sharedMem, policy.stream>>> (args...);
|
||||
}
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::csl */
|
||||
|
||||
#endif /* OPENCV_DNN_SRC_CUDA_EXECUTION_HPP */
|
||||
98
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/fill_copy.cu
vendored
Normal file
98
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/fill_copy.cu
vendored
Normal file
@@ -0,0 +1,98 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
#include "vector_traits.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
template <class T, std::size_t N>
|
||||
__global__ void fill_vec(Span<T> output, T value) {
|
||||
using vector_type = get_vector_type_t<T, N>;
|
||||
auto output_vPtr = vector_type::get_pointer(output.data());
|
||||
for (auto i : grid_stride_range(output.size() / vector_type::size())) {
|
||||
vector_type vec;
|
||||
for (int j = 0; j < vector_type::size(); j++)
|
||||
vec.data[j] = value;
|
||||
v_store(output_vPtr[i], vec);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::size_t N>
|
||||
__global__ void copy_vec(Span<T> output, View<T> input) {
|
||||
using vector_type = get_vector_type_t<T, N>;
|
||||
auto input_vPtr = vector_type::get_pointer(input.data());
|
||||
auto output_vPtr = vector_type::get_pointer(output.data());
|
||||
for (auto i : grid_stride_range(output.size() / vector_type::size())) {
|
||||
vector_type vec;
|
||||
v_load(vec, input_vPtr[i]);
|
||||
v_store(output_vPtr[i], vec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::size_t N> static
|
||||
void launch_vectorized_fill(const Stream& stream, Span<T> output, T value) {
|
||||
CV_Assert(is_fully_aligned<T>(output, N));
|
||||
|
||||
auto kernel = raw::fill_vec<T, N>;
|
||||
auto policy = make_policy(kernel, output.size() / N, 0, stream);
|
||||
launch_kernel(kernel, policy, output, value);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void fill(const Stream& stream, Span<T> output, T value) {
|
||||
if (is_fully_aligned<T>(output, 4)) {
|
||||
launch_vectorized_fill<T, 4>(stream, output, value);
|
||||
} else if (is_fully_aligned<T>(output, 2)) {
|
||||
launch_vectorized_fill<T, 2>(stream, output, value);
|
||||
} else {
|
||||
launch_vectorized_fill<T, 1>(stream, output, value);
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void fill(const Stream&, Span<__half>, __half);
|
||||
#endif
|
||||
template void fill(const Stream&, Span<float>, float);
|
||||
template void fill(const Stream&, Span<int>, int);
|
||||
|
||||
template <class T, std::size_t N> static
|
||||
void launch_vectorized_copy(const Stream& stream, Span<T> output, View<T> input) {
|
||||
CV_Assert(is_fully_aligned<T>(output, N));
|
||||
CV_Assert(is_fully_aligned<T>(input, N));
|
||||
|
||||
auto kernel = raw::copy_vec<T, N>;
|
||||
auto policy = make_policy(kernel, output.size() / N, 0, stream);
|
||||
launch_kernel(kernel, policy, output, input);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void copy(const Stream& stream, Span<T> output, View<T> input) {
|
||||
if (is_fully_aligned<T>(output, 4) && is_fully_aligned<T>(input, 4)) {
|
||||
launch_vectorized_copy<T, 4>(stream, output, input);
|
||||
} else if (is_fully_aligned<T>(output, 2) && is_fully_aligned<T>(input, 2)) {
|
||||
launch_vectorized_copy<T, 2>(stream, output, input);
|
||||
} else {
|
||||
launch_vectorized_copy<T, 1>(stream, output, input);
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void copy(const Stream&, Span<__half>, View<__half>);
|
||||
#endif
|
||||
template void copy(const Stream&, Span<float>, View<float>);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
102
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/fp_conversion.cu
vendored
Normal file
102
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/fp_conversion.cu
vendored
Normal file
@@ -0,0 +1,102 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
#include "vector_traits.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
template <std::size_t N>
|
||||
__global__ void fp32_to_fp16(Span<__half> output, View<float> input) {
|
||||
using output_vector_type = get_vector_type_t<__half, N>;
|
||||
using input_vector_type = get_vector_type_t<float, N>;
|
||||
|
||||
auto output_vPtr = output_vector_type::get_pointer(output.data());
|
||||
auto input_vPtr = input_vector_type::get_pointer(input.data());
|
||||
|
||||
for (auto i : grid_stride_range(output.size() / output_vector_type::size())) {
|
||||
input_vector_type in_vec;
|
||||
v_load(in_vec, input_vPtr[i]);
|
||||
|
||||
output_vector_type out_vec;
|
||||
for (int j = 0; j < output_vector_type::size(); j++)
|
||||
out_vec.data[j] = __float2half(in_vec.data[j]);
|
||||
|
||||
v_store(output_vPtr[i], out_vec);
|
||||
}
|
||||
}
|
||||
|
||||
template <std::size_t N>
|
||||
__global__ void fp16_to_fp32(Span<float> output, View<__half> input) {
|
||||
using output_vector_type = get_vector_type_t<float, N>;
|
||||
using input_vector_type = get_vector_type_t<__half, N>;
|
||||
|
||||
auto output_vPtr = output_vector_type::get_pointer(output.data());
|
||||
auto input_vPtr = input_vector_type::get_pointer(input.data());
|
||||
|
||||
for (auto i : grid_stride_range(output.size() / output_vector_type::size())) {
|
||||
input_vector_type in_vec;
|
||||
v_load(in_vec, input_vPtr[i]);
|
||||
|
||||
output_vector_type out_vec;
|
||||
for (int j = 0; j < output_vector_type::size(); j++)
|
||||
out_vec.data[j] = __half2float(in_vec.data[j]);
|
||||
|
||||
v_store(output_vPtr[i], out_vec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <std::size_t N> static
|
||||
void launch_vectorized_fp32_to_fp16(const Stream& stream, Span<__half> output, View<float> input) {
|
||||
CV_Assert(is_fully_aligned<__half>(output, N));
|
||||
CV_Assert(is_fully_aligned<float>(input, N));
|
||||
|
||||
auto kernel = raw::fp32_to_fp16<N>;
|
||||
auto policy = make_policy(kernel, output.size() / N, 0, stream);
|
||||
launch_kernel(kernel, policy, output, input);
|
||||
}
|
||||
|
||||
void fp32_to_fp16(const Stream& stream, Span<__half> output, View<float> input) {
|
||||
if (is_fully_aligned<__half>(output, 4) && is_fully_aligned<float>(input, 4)) {
|
||||
launch_vectorized_fp32_to_fp16<4>(stream, output, input);
|
||||
} else if (is_fully_aligned<__half>(output, 2) && is_fully_aligned<float>(input, 2)) {
|
||||
launch_vectorized_fp32_to_fp16<2>(stream, output, input);
|
||||
} else {
|
||||
launch_vectorized_fp32_to_fp16<1>(stream, output, input);
|
||||
}
|
||||
}
|
||||
|
||||
template <std::size_t N> static
|
||||
void launch_vectorized_fp16_to_fp32(const Stream& stream, Span<float> output, View<__half> input) {
|
||||
CV_Assert(is_fully_aligned<float>(output, N));
|
||||
CV_Assert(is_fully_aligned<__half>(input, N));
|
||||
|
||||
auto kernel = raw::fp16_to_fp32<N>;
|
||||
auto policy = make_policy(kernel, output.size() / N, 0, stream);
|
||||
launch_kernel(kernel, policy, output, input);
|
||||
}
|
||||
|
||||
void fp16_to_fp32(const Stream& stream, Span<float> output, View<__half> input) {
|
||||
if (is_fully_aligned<float>(output, 4) && is_fully_aligned<__half>(input, 4)) {
|
||||
launch_vectorized_fp16_to_fp32<4>(stream, output, input);
|
||||
} else if (is_fully_aligned<float>(output, 2) && is_fully_aligned<__half>(input, 2)) {
|
||||
launch_vectorized_fp16_to_fp32<2>(stream, output, input);
|
||||
} else {
|
||||
launch_vectorized_fp16_to_fp32<1>(stream, output, input);
|
||||
}
|
||||
}
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
334
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/functors.hpp
vendored
Normal file
334
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/functors.hpp
vendored
Normal file
@@ -0,0 +1,334 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_DNN_SRC_CUDA_FUNCTORS_HPP
|
||||
#define OPENCV_DNN_SRC_CUDA_FUNCTORS_HPP
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include "math.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/nvcc_defs.hpp"
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
template <class T>
|
||||
struct IdentityFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE IdentityFunctor() { }
|
||||
CUDA4DNN_DEVICE IdentityFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
return value;
|
||||
};
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct ReLUFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() : slope(0) { }
|
||||
CUDA4DNN_HOST_DEVICE Params(T slope_) : slope(slope_) { }
|
||||
T slope;
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE ReLUFunctor() : ReLUFunctor(Params{}) { }
|
||||
CUDA4DNN_DEVICE ReLUFunctor(const Params& params) : slope(params.slope) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::log1pexp;
|
||||
return value >= T(0) ? value : slope * value;
|
||||
}
|
||||
|
||||
T slope;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct ClippedReLUFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() : floor(0), ceiling(6) { }
|
||||
CUDA4DNN_HOST_DEVICE Params(T floor_, T ceiling_) : floor(floor_), ceiling(ceiling_) { }
|
||||
T floor, ceiling;
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE ClippedReLUFunctor() : ClippedReLUFunctor(Params{}) { }
|
||||
CUDA4DNN_DEVICE ClippedReLUFunctor(const Params& params) : floor{params.floor}, ceiling{params.ceiling} { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::clamp;
|
||||
return clamp(value, floor, ceiling);
|
||||
}
|
||||
|
||||
T floor, ceiling;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct TanHFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE TanHFunctor() { }
|
||||
CUDA4DNN_DEVICE TanHFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::tanh;
|
||||
return tanh(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct SwishFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE SwishFunctor() { }
|
||||
CUDA4DNN_DEVICE SwishFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
// f(x) = x * sigmoid(x)
|
||||
using csl::device::fast_divide;
|
||||
using csl::device::fast_exp;
|
||||
return fast_divide(value, static_cast<T>(1) + fast_exp(-value));
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct MishFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE MishFunctor() { }
|
||||
CUDA4DNN_DEVICE MishFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::tanh;
|
||||
using csl::device::log1pexp;
|
||||
return value * tanh(log1pexp(value));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct MishFunctor<float> {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE MishFunctor() { }
|
||||
CUDA4DNN_DEVICE MishFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE float operator()(float value) {
|
||||
// f(x) = x * tanh(log1pexp(x));
|
||||
using csl::device::fast_divide;
|
||||
using csl::device::fast_exp;
|
||||
|
||||
auto e = fast_exp(value);
|
||||
auto n = e * e + 2 * e;
|
||||
if (value <= -0.6f)
|
||||
return value * fast_divide(n, n + 2);
|
||||
return value - 2 * fast_divide(value, n + 2);
|
||||
}
|
||||
};
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <>
|
||||
struct MishFunctor<__half> {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE MishFunctor() { }
|
||||
CUDA4DNN_DEVICE MishFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE __half operator()(__half value) {
|
||||
return MishFunctor<float>()(value);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
template <class T>
|
||||
struct SigmoidFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE SigmoidFunctor() { }
|
||||
CUDA4DNN_DEVICE SigmoidFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::fast_sigmoid;
|
||||
return fast_sigmoid(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct ELUFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE ELUFunctor() { }
|
||||
CUDA4DNN_DEVICE ELUFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::expm1;
|
||||
return value >= T(0) ? value : expm1(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct AbsFunctor {
|
||||
struct Params { };
|
||||
|
||||
CUDA4DNN_DEVICE AbsFunctor() { }
|
||||
CUDA4DNN_DEVICE AbsFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::abs;
|
||||
return abs(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct BNLLFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE BNLLFunctor() { }
|
||||
CUDA4DNN_DEVICE BNLLFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::log1pexp;
|
||||
return value > T(0) ? value + log1pexp(-value) : log1pexp(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct PowerFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() : exp(1), scale(1), shift(0) { }
|
||||
CUDA4DNN_HOST_DEVICE Params(T exp_, T scale_, T shift_) : exp(exp_), scale(scale_), shift(shift_) { }
|
||||
T exp, scale, shift;
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE PowerFunctor() : PowerFunctor(Params{}) { }
|
||||
CUDA4DNN_DEVICE PowerFunctor(const Params& params) : exp{params.exp}, scale{params.scale}, shift{params.shift} { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::pow;
|
||||
return pow(shift + scale * value, exp);
|
||||
}
|
||||
|
||||
T exp, scale, shift;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct ExpFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() : normScale(1), normShift(0) { }
|
||||
CUDA4DNN_HOST_DEVICE Params(T nScale_, T nShift_) : normScale(nScale_), normShift(nShift_) { }
|
||||
T normScale, normShift;
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE ExpFunctor() : ExpFunctor(Params{}) { }
|
||||
CUDA4DNN_DEVICE ExpFunctor(const Params& params) : normScale{params.normScale}, normShift{params.normShift} { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T value) {
|
||||
using csl::device::fast_exp;
|
||||
return fast_exp(normShift + normScale * value);
|
||||
}
|
||||
|
||||
T normScale, normShift;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct MaxFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE MaxFunctor() { }
|
||||
CUDA4DNN_DEVICE MaxFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T x, T y) {
|
||||
using csl::device::max;
|
||||
return max(x, y);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct MinFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE MinFunctor() { }
|
||||
CUDA4DNN_DEVICE MinFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T x, T y) {
|
||||
using csl::device::min;
|
||||
return min(x, y);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct SumFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE SumFunctor() { }
|
||||
CUDA4DNN_DEVICE SumFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T x, T y) { return x + y; }
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct ScaledSumFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() : scale_x(1), scale_y(1) { }
|
||||
CUDA4DNN_HOST_DEVICE Params(T scale_x_, T scale_y_) : scale_x(scale_x_), scale_y(scale_y_) { }
|
||||
T scale_x, scale_y;
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE ScaledSumFunctor() : scale_x(1), scale_y(1) { }
|
||||
CUDA4DNN_DEVICE ScaledSumFunctor(const Params& params) : scale_x{params.scale_x}, scale_y{params.scale_y} { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T x, T y) { return scale_x * x + scale_y * y; }
|
||||
|
||||
T scale_x, scale_y;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct ProductFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE ProductFunctor() { }
|
||||
CUDA4DNN_DEVICE ProductFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T x, T y) { return x * y; }
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct DivFunctor {
|
||||
struct Params {
|
||||
CUDA4DNN_HOST_DEVICE Params() { }
|
||||
};
|
||||
|
||||
CUDA4DNN_DEVICE DivFunctor() { }
|
||||
CUDA4DNN_DEVICE DivFunctor(const Params& params) { }
|
||||
|
||||
CUDA4DNN_DEVICE T operator()(T x, T y) { return x / y; }
|
||||
};
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
|
||||
#endif /* OPENCV_DNN_SRC_CUDA_FUNCTORS_HPP */
|
||||
467
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/grid_nms.cu
vendored
Normal file
467
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/grid_nms.cu
vendored
Normal file
@@ -0,0 +1,467 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "math.hpp"
|
||||
#include "bbox_utils.hpp"
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "block_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
#include "vector_traits.hpp"
|
||||
#include "memory.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
#include "../cuda4dnn/csl/tensor.hpp"
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
|
||||
template <class T, bool NORMALIZED_BBOX, int BLOCK_SIZE>
|
||||
__launch_bounds__(BLOCK_SIZE)
|
||||
__global__ void grid_nms(Span<unsigned int> mask_, Span<int> count_, View<T> bboxes_, size_type num_classes, index_type background_class_id, size_type topK, size_type topK_gs, float nms_threshold)
|
||||
{
|
||||
// topK_gs is topK rounded upwards to some size
|
||||
|
||||
// mask: [batch_size, num_classes, topK_gs, topK_gs / 32]
|
||||
// bboxes: [batch_size, num_classes, topK, 4]
|
||||
// count: [batch_size, num_classes]
|
||||
|
||||
const index_type c = blockIdx.y;
|
||||
const index_type b = blockIdx.z;
|
||||
|
||||
if (c == background_class_id)
|
||||
return;
|
||||
|
||||
auto mask = mask_.data() + (b * num_classes + c) * topK_gs * topK_gs / 32;
|
||||
auto bboxes = bboxes_.data() + (b * num_classes + c) * topK * 4;
|
||||
auto count = count_.data() + b * num_classes + c;
|
||||
|
||||
const auto boxes = *count;
|
||||
if (boxes == 0)
|
||||
return;
|
||||
|
||||
/* We divide the set of boxes into groups containing BLOCK_SIZE boxes */
|
||||
const auto num_groups = (boxes + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
|
||||
/* We need to calculate IOUs for every pair of boxes. We can generalize and say that
|
||||
* we need to compute IOUs of every group with every other group including itself.
|
||||
*/
|
||||
// Each block processes a pair of groups.
|
||||
const index_type group_i = blockIdx.x % num_groups;
|
||||
const index_type group_j = blockIdx.x / num_groups;
|
||||
|
||||
/* we use __syncthreads() later but note that the following condition will cause all threads
|
||||
* in the block to exit; hence, no thread will execute a divergent __syncthreads()
|
||||
*/
|
||||
if (group_i >= num_groups || group_j >= num_groups)
|
||||
return;
|
||||
|
||||
/* Note that IOU(A, B) = IOU(B, A). Hence, if we compute IOU(GROUP_A, GROUP_B), we do not need
|
||||
* to compute IOU(GROUP_B, GROUP_A). We still have to compute IOU(GROUP_A, GROUP_A) though since
|
||||
* each group has many boxes and we need IOUs amongst boxes within a group.
|
||||
*
|
||||
* We arbitarily choose a scheme to exit : exit if group_i is greater than group_j. This way we only
|
||||
* compute IOUs between groups once. While nearly half the blocks are wasted, it's ok since they exit
|
||||
* early on and the working blocks are compute heavy.
|
||||
*/
|
||||
if (group_i > group_j)
|
||||
return;
|
||||
|
||||
/* the following variables contain the absolute box number of the first box of their respective groups */
|
||||
const auto group_i_offset = group_i * BLOCK_SIZE;
|
||||
const auto group_j_offset = group_j * BLOCK_SIZE;
|
||||
|
||||
/* MAIN LOOP LOGIC:
|
||||
* We compare a box `i` from group_i with all boxes in group_j in each iteration. The box `j` is fixed
|
||||
* for each thread. The `j` exactly maps to the thread index. Hence, the `j` is a loop invariant. Each
|
||||
* thread of the block computes the overlap between box `i` and its box `j`.
|
||||
*
|
||||
* for (int i = 0; i < BLOCK_SIZE; i++)
|
||||
* {
|
||||
* // i = box 1
|
||||
* // j = threadIdx.x = box 2
|
||||
* }
|
||||
*/
|
||||
|
||||
/* The `j` box is fixed for each thread. All `i` boxes will be required for every thread.
|
||||
* We store the `i` boxes in shared memory to allow global memory coalesing.
|
||||
*/
|
||||
using vector_type = get_vector_type_t<T, 4>;
|
||||
__shared__ vector_type group_i_boxes[BLOCK_SIZE];
|
||||
|
||||
/* We will precompute the sizes of `i` boxes in the code where we load them. The size computation
|
||||
* is distributed across the block. Otherwise, all threads will have to compute the size of the same
|
||||
* box simultaneously in the main loop. The size is computed while the memory subsystem is busy
|
||||
* servicing requests for box coordinates; the compute resources would otherwise be idle in this phase.
|
||||
*/
|
||||
/* we store the size as a float since the size can exceed fp16 limits for unnormalized boxes */
|
||||
__shared__ float group_i_size[BLOCK_SIZE];
|
||||
|
||||
const auto bboxes_vPtr = vector_type::get_pointer(bboxes);
|
||||
|
||||
// load `i` boxes and precompute their sizes
|
||||
{
|
||||
int i = threadIdx.x;
|
||||
if (group_i_offset + i < boxes)
|
||||
{
|
||||
vector_type box;
|
||||
v_load(box, bboxes_vPtr[group_i_offset + i]);
|
||||
v_store(group_i_boxes[i], box);
|
||||
|
||||
BoundingBox bbox;
|
||||
bbox.xmin = box.data[0];
|
||||
bbox.ymin = box.data[1];
|
||||
bbox.xmax = box.data[2];
|
||||
bbox.ymax = box.data[3];
|
||||
|
||||
group_i_size[i] = compute_bbox_size<NORMALIZED_BBOX>(bbox);
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
/* We compute overlap between boxes and check if the IOU exceeds the nms threshold.
|
||||
* We store the result (exceeds or below nms_thresold) in a two-dimensional matrix.
|
||||
* (i, j) is set to one if the overlap between i and j is within the nms threshold.
|
||||
* We pack 32 results into one 32-bit integer. The effective memory layout of the
|
||||
* matrix hence is (BLOCK_SIZE, BLOCK_SIZE / 32).
|
||||
*/
|
||||
__shared__ unsigned int mask_shared[BLOCK_SIZE * BLOCK_SIZE / 32];
|
||||
|
||||
// load box `j` and precompute its size (fixed per thread)
|
||||
BoundingBox bbox_j;
|
||||
float bbox_j_size = 0;
|
||||
if (group_j_offset + threadIdx.x < boxes)
|
||||
{
|
||||
vector_type box;
|
||||
v_load(box, bboxes_vPtr[group_j_offset + threadIdx.x]);
|
||||
|
||||
bbox_j.xmin = box.data[0];
|
||||
bbox_j.ymin = box.data[1];
|
||||
bbox_j.xmax = box.data[2];
|
||||
bbox_j.ymax = box.data[3];
|
||||
|
||||
bbox_j_size = compute_bbox_size<NORMALIZED_BBOX>(bbox_j);
|
||||
}
|
||||
|
||||
/* Each thread computes a predicate which is broadcasted across the warp to obtain a 32-bit mask.
|
||||
* The lane zero thread of each warp saves the mask. We store the offset to the mask array beforehand
|
||||
* to save cycles in the compute-intensive main loop.
|
||||
*/
|
||||
auto mask_offset = threadIdx.x / 32;
|
||||
|
||||
/* The main loop is compute intensive and causes the kernel to be overall compute-bound. Hence,
|
||||
* this loop has been highly tuned. Please profile and verify carefully before making changes.
|
||||
*/
|
||||
/* UNROLL_SIZE is the number of boxes that must be processed per iteration. We manually unroll
|
||||
* the loop since the compiler cannot effectively unroll on its own preassumably due to presence
|
||||
* of instructions forcing warp synchronization.
|
||||
*/
|
||||
constexpr int UNROLL_SIZE = 4;
|
||||
|
||||
#pragma unroll 8
|
||||
for (int s = 0; s < BLOCK_SIZE; s += UNROLL_SIZE)
|
||||
{
|
||||
bool do_not_reject_j[UNROLL_SIZE];
|
||||
|
||||
#pragma unroll
|
||||
for (int k = 0; k < UNROLL_SIZE; k++)
|
||||
{
|
||||
int i = s + k;
|
||||
|
||||
/* The number of boxes need not necessarily be a multiple of BLOCK_SIZE.
|
||||
* However, the shared memory allocated can hold BLOCK_SIZE boxes from
|
||||
* each group. Accessing the undefined regions of shared memory is
|
||||
* a valid memory operation as long as the memory has been allocated.
|
||||
*
|
||||
* The condition below is only required when one of the groups does not
|
||||
* fully filled with valid boxes. This situations are relatively rare. It's
|
||||
* more common to see both groups completely filled.
|
||||
*
|
||||
* We comment this condition to improve the performance of the common case.
|
||||
* This leads to a net improvement.
|
||||
*/
|
||||
// if (group_i_offset + i < boxes && group_j_offset + threadIdx.x < boxes)
|
||||
{
|
||||
BoundingBox bbox_i;
|
||||
float bbox_i_size;
|
||||
{
|
||||
vector_type box;
|
||||
v_load(box, group_i_boxes[i]);
|
||||
bbox_i.xmin = box.data[0];
|
||||
bbox_i.ymin = box.data[1];
|
||||
bbox_i.xmax = box.data[2];
|
||||
bbox_i.ymax = box.data[3];
|
||||
|
||||
bbox_i_size = group_i_size[i];
|
||||
}
|
||||
|
||||
using device::min;
|
||||
using device::max;
|
||||
|
||||
BoundingBox intersect_bbox;
|
||||
intersect_bbox.xmin = max(bbox_i.xmin, bbox_j.xmin);
|
||||
intersect_bbox.ymin = max(bbox_i.ymin, bbox_j.ymin);
|
||||
intersect_bbox.xmax = min(bbox_i.xmax, bbox_j.xmax);
|
||||
intersect_bbox.ymax = min(bbox_i.ymax, bbox_j.ymax);
|
||||
|
||||
float intersect_size = compute_bbox_size<NORMALIZED_BBOX>(intersect_bbox);
|
||||
|
||||
using device::fast_divide_ftz;
|
||||
float iou = fast_divide_ftz(intersect_size, bbox_i_size + bbox_j_size - intersect_size);
|
||||
do_not_reject_j[k] = iou <= nms_threshold;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma unroll
|
||||
for (int k = 0; k < UNROLL_SIZE; k++)
|
||||
{
|
||||
// FORWARD_COMPATIBILITY_TAG: WARP_SIZE_DEPENDENT_CODE
|
||||
auto predicate = __ballot_sync(0xFFFFFFFF, do_not_reject_j[k]);
|
||||
if (threadIdx.x % 32 == 0)
|
||||
mask_shared[mask_offset] = predicate;
|
||||
|
||||
/* The following operation should logically be inside the previous if branch. Note that `mask_offset`
|
||||
* is only used by lane zero threads. Hence, there is no harm in executing it other threads as it is
|
||||
* unused there.
|
||||
*
|
||||
* Keeping it inside prevents the compiler from treating it as a constexpr addition to the address in
|
||||
* successive unrolled iterations. A register is used and instructions are emitted to multiply the
|
||||
* addend by four to obtain the byte offset. Pulling it out of the branch makes the compiler do constexpr
|
||||
* addition on the address in successive unrolled iterations.
|
||||
*/
|
||||
mask_offset += BLOCK_SIZE / 32;
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
/* The mask data is organized as a two-dimensional bit matrix of size topK_gs * topK_gs.
|
||||
* (i, j) is set to true if the overlap between `i` and `j` is beyond the nms threshold.
|
||||
* We pack 32 results into one 32-bit integer. So the effective memory layout is topK_gs * topK_gs / 32.
|
||||
*/
|
||||
|
||||
/* Each box `i` was compared with BLOCK_SIZE `j` boxes. This amounts to BLOCK_SIZE / 32
|
||||
* 32-bit integers per box `i`.
|
||||
*/
|
||||
using mask_vector_type = get_vector_type_t<unsigned int, BLOCK_SIZE / 32>;
|
||||
|
||||
const int i = threadIdx.x;
|
||||
|
||||
auto mask_shared_vPtr = mask_vector_type::get_pointer(DevicePtr<unsigned>(mask_shared));
|
||||
mask_vector_type temp;
|
||||
v_load(temp, mask_shared_vPtr[i]);
|
||||
for (int i = 0; i < mask_vector_type::size(); i++)
|
||||
temp.data[i] = __brev(temp.data[i]);
|
||||
|
||||
auto mask_vPtr = mask_vector_type::get_pointer(mask);
|
||||
v_store(mask_vPtr[((group_i_offset + i) * topK_gs + group_j_offset) / 32 / mask_vector_type::size()], temp);
|
||||
}
|
||||
|
||||
template <int ITEMS_PER_THREAD, int BLOCK_SIZE>
|
||||
__launch_bounds__(BLOCK_SIZE)
|
||||
__global__ void grid_nms_collect(Span<int> indices_, Span<int> count_, View<unsigned int> mask_, size_type num_classes, index_type background_class_id, size_type topK, size_type topK_gs_by32)
|
||||
{
|
||||
const index_type c = blockIdx.x;
|
||||
if (c == background_class_id)
|
||||
return;
|
||||
|
||||
const index_type b = blockIdx.y;
|
||||
|
||||
// topK_gs is topK rounded upwards to some size
|
||||
|
||||
// indices: [batch_size, num_classes, topK]
|
||||
// count: [batch_size, num_classes]
|
||||
// mask: [batch_size, num_classes, topK_gs, topK_gs / 32]
|
||||
|
||||
auto indices = indices_.data() + (b * num_classes + c) * topK;
|
||||
auto count = count_.data() + (b * num_classes + c);
|
||||
auto mask = mask_.data() + (b * num_classes + c) * topK_gs_by32 * 32 * topK_gs_by32;
|
||||
|
||||
const auto boxes = *count;
|
||||
if (boxes == 0)
|
||||
return;
|
||||
|
||||
/* We have a fixed number of threads and an arbitary number of boxes. We use an array of
|
||||
* bits to store which boxes haven't been eliminated and which are still active. We organize
|
||||
* the array of bits into a matrix of bits of the shape (num_rows, BLOCK_SIZE, 32) which
|
||||
* is equivalent to (num_rows, BLOCK_SIZE) where the type is a 32-bit unsigned integer.
|
||||
* `num_rows` is the minimum number of rows required to cover all the boxes.
|
||||
*
|
||||
* Each thread handles a specific column in the matrix. To improve performance, we process
|
||||
* `ITEMS_PER_THREAD` number of elements per thread. This changes the shape to (num_rows,
|
||||
* ROW_WIDTH) where ROW_WIDTH is BLOCK_SIZE * ITEMS_PER_THREAD.
|
||||
*/
|
||||
constexpr int ROW_WIDTH = BLOCK_SIZE * ITEMS_PER_THREAD;
|
||||
|
||||
const index_type num_32b_masks = static_cast<unsigned>(boxes + 31) / 32;
|
||||
const index_type num_rows = static_cast<unsigned>(num_32b_masks + ROW_WIDTH - 1) / ROW_WIDTH;
|
||||
|
||||
extern __shared__ unsigned int active_boxes[]; // the matrix described earlier
|
||||
|
||||
#pragma unroll 1
|
||||
for (auto idx : block_stride_range<BLOCK_SIZE>(num_32b_masks))
|
||||
active_boxes[idx] = (idx == num_32b_masks - 1) ? __brev((1u << (boxes % 32)) - 1) : 0xFFFFFFFF;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
using vector_type = get_vector_type_t<unsigned int, ITEMS_PER_THREAD>;
|
||||
auto mask_vPtr = vector_type::get_pointer(mask);
|
||||
auto shared_vPtr = vector_type::get_pointer(DevicePtr<unsigned>(active_boxes));
|
||||
|
||||
int index_temp;
|
||||
int thread0_count = 0;
|
||||
int thread_id = threadIdx.x;
|
||||
|
||||
for (int step = 0; step < num_32b_masks; step++)
|
||||
{
|
||||
auto current_active = active_boxes[step];
|
||||
while (current_active)
|
||||
{
|
||||
const index_type bit = __clz(current_active);
|
||||
const index_type i = step * 32 + bit;
|
||||
|
||||
const int mask_offset = static_cast<unsigned>(i * topK_gs_by32) / ITEMS_PER_THREAD;
|
||||
|
||||
/* We fetch the index from the memory and store it in a register. We will not use it until
|
||||
* much later. This helps avoid a long scoreboard stall.
|
||||
*/
|
||||
if (thread_id == 0)
|
||||
index_temp = indices[i];
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
active_boxes[step] = current_active ^ (0x80000000 >> bit);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
#pragma unroll 1
|
||||
for (int r = 0; r < num_rows; r++)
|
||||
{
|
||||
const int idx = r * BLOCK_SIZE + thread_id;
|
||||
if ((step & ~(ITEMS_PER_THREAD - 1)) <= idx * ITEMS_PER_THREAD && idx * ITEMS_PER_THREAD < num_32b_masks)
|
||||
{
|
||||
auto active_boxes_vec = shared_vPtr[idx];
|
||||
auto mask_vec = mask_vPtr[mask_offset + idx];
|
||||
for (int i = 0; i < vector_type::size(); i++)
|
||||
active_boxes_vec.data[i] &= mask_vec.data[i];
|
||||
shared_vPtr[idx] = active_boxes_vec;
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (thread_id == 0)
|
||||
{
|
||||
indices[thread0_count] = index_temp;
|
||||
thread0_count++;
|
||||
}
|
||||
|
||||
current_active = active_boxes[step];
|
||||
}
|
||||
}
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
*count = thread0_count;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr int GROUP_SIZE = 128;
|
||||
|
||||
static std::size_t getAlignedTopK(std::size_t topK)
|
||||
{
|
||||
auto remainder = topK % GROUP_SIZE;
|
||||
if (remainder == 0)
|
||||
return topK;
|
||||
return topK + (GROUP_SIZE - remainder);
|
||||
}
|
||||
|
||||
std::size_t getGridNMSWorkspaceSizePerBatchItem(std::size_t num_classes, std::size_t classwise_topK)
|
||||
{
|
||||
auto topK_gs = getAlignedTopK(classwise_topK);
|
||||
return num_classes * topK_gs * topK_gs / 32 * sizeof(unsigned int);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void grid_nms(const Stream& stream, Span<unsigned int> workspace, TensorSpan<int> indices, TensorSpan<int> count, TensorView<T> bboxes, int background_class_id, bool normalized_bbox, float nms_threshold)
|
||||
{
|
||||
// workspace: [batch_size, num_classes, topK_gs, topK_gs / 32]
|
||||
// indices: [batch_size, num_classes, topK]
|
||||
// count: [batch_size, num_classes]
|
||||
// bboxes: [batch_size, num_classes, topK, 4] (only first count[b][c] boxes are read)
|
||||
|
||||
const auto batch_size = indices.get_axis_size(0);
|
||||
CV_Assert(count.get_axis_size(0) == batch_size);
|
||||
CV_Assert(bboxes.get_axis_size(0) == batch_size);
|
||||
|
||||
const auto num_classes = indices.get_axis_size(1);
|
||||
CV_Assert(count.get_axis_size(1) == num_classes);
|
||||
CV_Assert(bboxes.get_axis_size(1) == num_classes);
|
||||
|
||||
const auto topK = indices.get_axis_size(2);
|
||||
CV_Assert(bboxes.get_axis_size(2) == topK);
|
||||
|
||||
CV_Assert(bboxes.get_axis_size(3) == 4);
|
||||
|
||||
const auto topK_gs = getAlignedTopK(topK);
|
||||
CV_Assert(workspace.size() >= topK_gs * topK_gs / 32);
|
||||
|
||||
const auto boxes = topK;
|
||||
const auto num_groups = (boxes + GROUP_SIZE - 1) / GROUP_SIZE;
|
||||
|
||||
{
|
||||
// grid = (num_groups * num_groups, num_classes, batch_size)
|
||||
// if the background class is the last class, we can reduce grid y dim by one
|
||||
auto grid_num_classes = num_classes; //(background_class_id == num_classes - 1) ? num_classes - 1 : num_classes;
|
||||
|
||||
constexpr int BLOCK_SIZE = GROUP_SIZE;
|
||||
|
||||
dim3 grid_size(num_groups * num_groups, grid_num_classes, batch_size);
|
||||
dim3 block_size(BLOCK_SIZE);
|
||||
auto policy = execution_policy(grid_size, block_size, stream);
|
||||
|
||||
if (normalized_bbox)
|
||||
{
|
||||
auto kernel = raw::grid_nms<T, true, BLOCK_SIZE>;
|
||||
launch_kernel(kernel, policy, workspace, count, bboxes, num_classes, background_class_id, topK, topK_gs, nms_threshold);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto kernel = raw::grid_nms<T, false, BLOCK_SIZE>;
|
||||
launch_kernel(kernel, policy, workspace, count, bboxes, num_classes, background_class_id, topK, topK_gs, nms_threshold);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// grid = (num_classes, batch_size)
|
||||
// if the background class is the last class, we can reduce grid x dim by one
|
||||
auto grid_num_classes = num_classes; //(background_class_id == num_classes - 1) ? num_classes - 1 : num_classes;
|
||||
|
||||
constexpr int BLOCK_SIZE = 64;
|
||||
|
||||
constexpr int ITEMS_PER_THREAD = 4;
|
||||
auto kernel = raw::grid_nms_collect<ITEMS_PER_THREAD, BLOCK_SIZE>;
|
||||
|
||||
dim3 grid_size(grid_num_classes, batch_size);
|
||||
|
||||
auto sharedMem = topK_gs / 32 * 4;
|
||||
auto policy = execution_policy(grid_size, BLOCK_SIZE, sharedMem, stream);
|
||||
launch_kernel(kernel, policy, indices, count, workspace, num_classes, background_class_id, topK, topK_gs / 32);
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t getGridNMSWorkspaceSizePerBatchItem(std::size_t num_classes, std::size_t classwise_topK);
|
||||
|
||||
template void grid_nms(const Stream& stream, Span<unsigned int> workspace, TensorSpan<int> indices, TensorSpan<int> count, TensorView<__half> bboxes, int, bool normalized_bbox, float nms_threshold);
|
||||
template void grid_nms(const Stream& stream, Span<unsigned int> workspace, TensorSpan<int> indices, TensorSpan<int> count, TensorView<float> bboxes, int, bool normalized_bbox, float nms_threshold);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
68
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/grid_stride_range.hpp
vendored
Normal file
68
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/grid_stride_range.hpp
vendored
Normal file
@@ -0,0 +1,68 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_DNN_SRC_CUDA_GRID_STRIDE_RANGE_HPP
|
||||
#define OPENCV_DNN_SRC_CUDA_GRID_STRIDE_RANGE_HPP
|
||||
|
||||
#include "types.hpp"
|
||||
#include "index_helpers.hpp"
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace device {
|
||||
|
||||
template <int dim, class index_type = device::index_type, class size_type = device::size_type>
|
||||
class grid_stride_range_generic {
|
||||
public:
|
||||
__device__ grid_stride_range_generic(index_type to_) : from(0), to(to_) { }
|
||||
__device__ grid_stride_range_generic(index_type from_, index_type to_) : from(from_), to(to_) { }
|
||||
|
||||
class iterator
|
||||
{
|
||||
public:
|
||||
__device__ iterator(index_type pos_) : pos(pos_) {}
|
||||
|
||||
/* these iterators return the index when dereferenced; this allows us to loop
|
||||
* through the indices using a range based for loop
|
||||
*/
|
||||
__device__ index_type operator*() const { return pos; }
|
||||
|
||||
__device__ iterator& operator++() {
|
||||
pos += getGridDim<dim>() * static_cast<index_type>(getBlockDim<dim>());
|
||||
return *this;
|
||||
}
|
||||
|
||||
__device__ bool operator!=(const iterator& other) const {
|
||||
/* NOTE HACK
|
||||
* 'pos' can move in large steps (see operator++)
|
||||
* expansion of range for loop uses != as the loop conditioion
|
||||
* => operator!= must return false if 'pos' crosses the end
|
||||
*/
|
||||
return pos < other.pos;
|
||||
}
|
||||
|
||||
private:
|
||||
index_type pos;
|
||||
};
|
||||
|
||||
__device__ iterator begin() const {
|
||||
return iterator(from + getBlockDim<dim>() * getBlockIdx<dim>() + getThreadIdx<dim>());
|
||||
}
|
||||
|
||||
__device__ iterator end() const {
|
||||
return iterator(to);
|
||||
}
|
||||
|
||||
private:
|
||||
index_type from, to;
|
||||
};
|
||||
|
||||
using grid_stride_range_x = grid_stride_range_generic<0>;
|
||||
using grid_stride_range_y = grid_stride_range_generic<1>;
|
||||
using grid_stride_range_z = grid_stride_range_generic<2>;
|
||||
using grid_stride_range = grid_stride_range_x;
|
||||
|
||||
}}}}} /* namespace cv::dnn::cuda4dnn::csl::device */
|
||||
|
||||
#endif /* OPENCV_DNN_SRC_CUDA_GRID_STRIDE_RANGE_HPP */
|
||||
41
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/index_helpers.hpp
vendored
Normal file
41
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/index_helpers.hpp
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_DNN_SRC_CUDA_INDEX_HELPERS_HPP
|
||||
#define OPENCV_DNN_SRC_CUDA_INDEX_HELPERS_HPP
|
||||
|
||||
#include "types.hpp"
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace device {
|
||||
|
||||
namespace detail {
|
||||
using dim3_member_type = decltype(dim3::x);
|
||||
using uint3_member_type = decltype(uint3::x);
|
||||
}
|
||||
|
||||
template <int> __device__ detail::dim3_member_type getGridDim();
|
||||
template <> inline __device__ detail::dim3_member_type getGridDim<0>() { return gridDim.x; }
|
||||
template <> inline __device__ detail::dim3_member_type getGridDim<1>() { return gridDim.y; }
|
||||
template <> inline __device__ detail::dim3_member_type getGridDim<2>() { return gridDim.z; }
|
||||
|
||||
template <int> __device__ detail::dim3_member_type getBlockDim();
|
||||
template <> inline __device__ detail::dim3_member_type getBlockDim<0>() { return blockDim.x; }
|
||||
template <> inline __device__ detail::dim3_member_type getBlockDim<1>() { return blockDim.y; }
|
||||
template <> inline __device__ detail::dim3_member_type getBlockDim<2>() { return blockDim.z; }
|
||||
|
||||
template <int> __device__ detail::uint3_member_type getBlockIdx();
|
||||
template <> inline __device__ detail::uint3_member_type getBlockIdx<0>() { return blockIdx.x; }
|
||||
template <> inline __device__ detail::uint3_member_type getBlockIdx<1>() { return blockIdx.y; }
|
||||
template <> inline __device__ detail::uint3_member_type getBlockIdx<2>() { return blockIdx.z; }
|
||||
|
||||
template <int> __device__ detail::uint3_member_type getThreadIdx();
|
||||
template <> inline __device__ detail::uint3_member_type getThreadIdx<0>() { return threadIdx.x; }
|
||||
template <> inline __device__ detail::uint3_member_type getThreadIdx<1>() { return threadIdx.y; }
|
||||
template <> inline __device__ detail::uint3_member_type getThreadIdx<2>() { return threadIdx.z; }
|
||||
|
||||
}}}}} /* namespace cv::dnn::cuda4dnn::csl::device */
|
||||
|
||||
#endif /* OPENCV_DNN_SRC_CUDA_INDEX_HELPERS_HPP */
|
||||
94
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/kernel_dispatcher.hpp
vendored
Normal file
94
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/kernel_dispatcher.hpp
vendored
Normal file
@@ -0,0 +1,94 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_DNN_SRC_CUDA_KERNEL_DISPATCHER_HPP
|
||||
#define OPENCV_DNN_SRC_CUDA_KERNEL_DISPATCHER_HPP
|
||||
|
||||
#include <cstddef>
|
||||
#include <type_traits>
|
||||
|
||||
/* The performance of many kernels are highly dependent on the tensor rank. Instead of having
|
||||
* one kernel which can work with the maximally ranked tensors, we make one kernel for each supported
|
||||
* tensor rank. This is to ensure that the requirements of the maximally ranked tensors do not take a
|
||||
* toll on the performance of the operation for low ranked tensors. Hence, many kernels take the tensor
|
||||
* rank as a template parameter.
|
||||
*
|
||||
* The kernel is a template and we have different instantiations for each rank. This causes the following pattern
|
||||
* to arise frequently:
|
||||
*
|
||||
* if(rank == 3)
|
||||
* kernel<T, 3>();
|
||||
* else if(rank == 2)
|
||||
* kernel<T, 2>();
|
||||
* else
|
||||
* kernel<T, 1>();
|
||||
*
|
||||
* The rank is a runtime variable. To facilitate creation of such structures, we use GENERATE_KERNEL_DISPATCHER.
|
||||
* This macro creates a function which selects the correct kernel instantiation at runtime.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* // function which setups the kernel and launches it
|
||||
* template <class T, std::size_t Rank>
|
||||
* void launch_some_kernel(...);
|
||||
*
|
||||
* // creates the dispatcher named "some_dispatcher" which invokves the correct instantiation of "launch_some_kernel"
|
||||
* GENERATE_KERNEL_DISPATCHER(some_dispatcher, launch_some_kernel);
|
||||
*
|
||||
* // internal API function
|
||||
* template <class T>
|
||||
* void some(...) {
|
||||
* // ...
|
||||
* auto rank = input.rank();
|
||||
* some_dispatcher<T, MIN_RANK, MAX_RANK>(rank, ...);
|
||||
* }
|
||||
*/
|
||||
|
||||
/*
|
||||
* name name of the dispatcher function that is generated
|
||||
* func template function that requires runtime selection
|
||||
*
|
||||
* T first template parameter to `func`
|
||||
* start starting rank
|
||||
* end ending rank (inclusive)
|
||||
*
|
||||
* Executes func<T, selector> based on runtime `selector` argument given `selector` lies
|
||||
* within the range [start, end]. If outside the range, no instantiation of `func` is executed.
|
||||
*/
|
||||
#define GENERATE_KERNEL_DISPATCHER(name,func); \
|
||||
template <class T, std::size_t start, std::size_t end, class... Args> static \
|
||||
typename std::enable_if<start == end, void> \
|
||||
::type name(int selector, Args&& ...args) { \
|
||||
if(selector == start) \
|
||||
func<T, start>(std::forward<Args>(args)...); \
|
||||
} \
|
||||
\
|
||||
template <class T, std::size_t start, std::size_t end, class... Args> static \
|
||||
typename std::enable_if<start != end, void> \
|
||||
::type name(int selector, Args&& ...args) { \
|
||||
if(selector == start) \
|
||||
func<T, start>(std::forward<Args>(args)...); \
|
||||
else \
|
||||
name<T, start + 1, end, Args...>(selector, std::forward<Args>(args)...); \
|
||||
}
|
||||
|
||||
// Same as GENERATE_KERNEL_DISPATCHER but takes two class template parameters T and TP1 instead of just T
|
||||
#define GENERATE_KERNEL_DISPATCHER_2TP(name,func); \
|
||||
template <class TP1, class TP2, std::size_t start, std::size_t end, class... Args> static \
|
||||
typename std::enable_if<start == end, void> \
|
||||
::type name(int selector, Args&& ...args) { \
|
||||
if(selector == start) \
|
||||
func<TP1, TP2, start>(std::forward<Args>(args)...); \
|
||||
} \
|
||||
\
|
||||
template <class TP1, class TP2, std::size_t start, std::size_t end, class... Args> static \
|
||||
typename std::enable_if<start != end, void> \
|
||||
::type name(int selector, Args&& ...args) { \
|
||||
if(selector == start) \
|
||||
func<TP1, TP2, start>(std::forward<Args>(args)...); \
|
||||
else \
|
||||
name<TP1, TP2, start + 1, end, Args...>(selector, std::forward<Args>(args)...); \
|
||||
}
|
||||
|
||||
#endif /* OPENCV_DNN_SRC_CUDA_KERNEL_DISPATCHER_HPP */
|
||||
36
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/limits.hpp
vendored
Normal file
36
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/limits.hpp
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_DNN_SRC_CUDA_LIMITS_HPP
|
||||
#define OPENCV_DNN_SRC_CUDA_LIMITS_HPP
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include <cfloat>
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace device {
|
||||
|
||||
template <class T>
|
||||
struct numeric_limits;
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <>
|
||||
struct numeric_limits<__half> {
|
||||
__device__ static __half min() { return 0.0000610; }
|
||||
__device__ static __half max() { return 65504.0; }
|
||||
__device__ static __half lowest() { return -65504.0; }
|
||||
};
|
||||
#endif
|
||||
|
||||
template <>
|
||||
struct numeric_limits<float> {
|
||||
__device__ static float min() { return FLT_MIN; }
|
||||
__device__ static float max() { return FLT_MAX; }
|
||||
__device__ static float lowest() { return -FLT_MAX; }
|
||||
};
|
||||
|
||||
}}}}} /* namespace cv::dnn::cuda4dnn::csl::device */
|
||||
|
||||
#endif /* OPENCV_DNN_SRC_CUDA_LIMITS_HPP */
|
||||
154
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/math.hpp
vendored
Normal file
154
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/math.hpp
vendored
Normal file
@@ -0,0 +1,154 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_DNN_SRC_CUDA_MATH_HPP
|
||||
#define OPENCV_DNN_SRC_CUDA_MATH_HPP
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace device {
|
||||
|
||||
template <class T> __device__ T abs(T val) { return (val < T(0) ? -val : val); }
|
||||
template <> inline __device__ float abs(float val) { return fabsf(val); }
|
||||
template <> inline __device__ double abs(double val) { return fabs(val); }
|
||||
|
||||
template <class T> __device__ T exp(T val);
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half exp(__half val) { return hexp(val); }
|
||||
#endif
|
||||
template <> inline __device__ float exp(float val) { return expf(val); }
|
||||
template <> inline __device__ double exp(double val) { return ::exp(val); }
|
||||
|
||||
template <class T> __device__ T expm1(T val);
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half expm1(__half val) { return hexp(val) - __half(1); }
|
||||
#endif
|
||||
template <> inline __device__ float expm1(float val) { return expm1f(val); }
|
||||
template <> inline __device__ double expm1(double val) { return ::expm1(val); }
|
||||
|
||||
template <class T> __device__ T max(T x, T y) { return (x > y ? x : y); }
|
||||
template <> inline __device__ float max(float x, float y) { return fmaxf(x, y); }
|
||||
template <> inline __device__ double max(double x, double y) { return fmax(x, y); }
|
||||
|
||||
template <class T> __device__ T min(T x, T y) { return (x > y ? y : x); }
|
||||
template <> inline __device__ float min(float x, float y) { return fminf(x, y); }
|
||||
template <> inline __device__ double min(double x, double y) { return fmin(x, y); }
|
||||
|
||||
template <class T> __device__ T log1p(T val);
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half log1p(__half val) { return hlog(__half(1) + val); }
|
||||
#endif
|
||||
template <> inline __device__ float log1p(float val) { return log1pf(val); }
|
||||
|
||||
template <class T> __device__ T log1pexp(T val);
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half log1pexp(__half val) {
|
||||
if (val <= __half(-4.0))
|
||||
return exp(val);
|
||||
else if (val <= __half(8.0))
|
||||
return log1p(exp(val));
|
||||
else if (val <= __half(8.7))
|
||||
return val + exp(-val);
|
||||
else
|
||||
return val;
|
||||
}
|
||||
#endif
|
||||
template <> inline __device__ float log1pexp(float val) {
|
||||
if (val <= -20)
|
||||
return expf(val);
|
||||
else if (val <= 9.0)
|
||||
return log1pf(expf(val));
|
||||
else if (val <= 14.6)
|
||||
return val + exp(-val);
|
||||
else
|
||||
return val;
|
||||
}
|
||||
template <> inline __device__ double log1pexp(double val) {
|
||||
if (val <= -37)
|
||||
return exp(val);
|
||||
else if (val <= 18)
|
||||
return log1p(exp(val));
|
||||
else if (val <= 33.3)
|
||||
return val + exp(-val);
|
||||
else
|
||||
return val;
|
||||
}
|
||||
|
||||
template <class T> __device__ T tanh(T val);
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half tanh(__half val) { return tanhf(val); }
|
||||
#endif
|
||||
template <> inline __device__ float tanh(float val) { return tanhf(val); }
|
||||
template <> inline __device__ double tanh(double val) { return ::tanh(val); }
|
||||
|
||||
template <class T> __device__ T pow(T val, T exp);
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half pow(__half val, __half exp) { return powf(val, exp); }
|
||||
#endif
|
||||
template <> inline __device__ float pow(float val, float exp) { return powf(val, exp); }
|
||||
template <> inline __device__ double pow(double val, double exp) { return ::pow(val, exp); }
|
||||
|
||||
template <class T> __device__ T sqrt(T val);
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half sqrt(__half val) { return hsqrt(val); }
|
||||
#endif
|
||||
template <> inline __device__ float sqrt(float val) { return sqrtf(val); }
|
||||
template <> inline __device__ double sqrt(double val) { return ::sqrt(val); }
|
||||
|
||||
template <class T> __device__ T rsqrt(T val);
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half rsqrt(__half val) { return hrsqrt(val); }
|
||||
#endif
|
||||
template <> inline __device__ float rsqrt(float val) { return rsqrtf(val); }
|
||||
template <> inline __device__ double rsqrt(double val) { return ::rsqrt(val); }
|
||||
|
||||
template <class T> __device__ T sigmoid(T val) { return T(1) / (T(1) + exp(-val)); }
|
||||
|
||||
template <class T> __device__ T clamp(T value, T lower, T upper) { return min(max(value, lower), upper); }
|
||||
|
||||
template <class T> __device__ long lround(T value);
|
||||
template <> inline __device__ long lround(double value) { return ::lround(value); }
|
||||
template <> inline __device__ long lround(float value) { return lroundf(value); }
|
||||
|
||||
template <class T> __device__ T round(T value);
|
||||
template <> inline __device__ double round(double value) { return ::round(value); }
|
||||
template <> inline __device__ float round(float value) { return roundf(value); }
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half round(__half value) { return hrint(value); }
|
||||
#endif
|
||||
|
||||
template <class T> __device__ T ceil(T value);
|
||||
template <> inline __device__ double ceil(double value) { return ::ceil(value); }
|
||||
template <> inline __device__ float ceil(float value) { return ceilf(value); }
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template <> inline __device__ __half ceil(__half value) { return hceil(value); }
|
||||
#endif
|
||||
|
||||
template <class T> __device__ T mul_ftz(T x, T y) { return x * y; }
|
||||
template <> inline __device__ float mul_ftz(float x, float y) {
|
||||
float result;
|
||||
asm("mul.ftz.f32 %0, %1, %2;" : "=f"(result) : "f"(x), "f"(y));
|
||||
return result;
|
||||
}
|
||||
|
||||
template <class T> __device__ T fast_divide(T x, T y) { return x / y; }
|
||||
template <> inline __device__ float fast_divide(float x, float y) { return __fdividef(x, y); }
|
||||
|
||||
template <class T> __device__ T fast_divide_ftz(T x, T y) { return fast_divide(x, y); }
|
||||
template <> inline __device__ float fast_divide_ftz(float x, float y) {
|
||||
float result;
|
||||
asm("div.approx.ftz.f32 %0, %1, %2;" : "=f"(result) : "f"(x), "f"(y));
|
||||
return result;
|
||||
}
|
||||
|
||||
template <class T> __device__ T fast_exp(T value) { return exp(value); }
|
||||
template <> inline __device__ float fast_exp(float value) { return __expf(value); }
|
||||
|
||||
template <class T> __device__ T fast_sigmoid(T value) { return sigmoid(value); }
|
||||
template <> inline __device__ float fast_sigmoid(float value) { return __fdividef(1, 1 + __expf(-value)); }
|
||||
|
||||
}}}}} /* namespace cv::dnn::cuda4dnn::csl::device */
|
||||
|
||||
#endif /* OPENCV_DNN_SRC_CUDA_MATH_HPP */
|
||||
328
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/max_unpooling.cu
vendored
Normal file
328
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/max_unpooling.cu
vendored
Normal file
@@ -0,0 +1,328 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "math.hpp"
|
||||
#include "array.hpp"
|
||||
#include "limits.hpp"
|
||||
#include "types.hpp"
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/tensor.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
|
||||
#include "../cuda4dnn/kernels/fill_copy.hpp"
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include <type_traits>
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
template <class T, std::size_t Order,
|
||||
typename std::enable_if<Order == 1 || Order == 2 || Order == 3, bool>::type = true> /* Order has been hardcoded; see code */
|
||||
__global__ void max_pooling_with_indices(
|
||||
Span<T> output, Span<T> indices, View<T> input, size_type channels,
|
||||
array<size_type, Order> out_spatial_dims, array<size_type, Order> in_spatial_dims,
|
||||
array<size_type, Order> window_size, array<size_type, Order> strides, array<size_type, Order> padding_left)
|
||||
{
|
||||
/* every element in the output is mapped to a window in the input and each thread processes several windows */
|
||||
for (auto idx : grid_stride_range(output.size())) {
|
||||
size_type out_spatial_size = 1;
|
||||
array<index_type, Order> window_idx;
|
||||
for (int i = Order - 1; i >= 0; i--) {
|
||||
window_idx[i] = (idx / out_spatial_size) % out_spatial_dims[i];
|
||||
out_spatial_size *= out_spatial_dims[i];
|
||||
}
|
||||
|
||||
const index_type n = idx / (out_spatial_size * channels);
|
||||
const index_type c = (idx / out_spatial_size) % channels;
|
||||
|
||||
array<index_type, Order> start;
|
||||
for(int i = 0; i < Order; i++)
|
||||
start[i] = window_idx[i] * strides[i] - padding_left[i];
|
||||
|
||||
array<index_type, Order> end;
|
||||
for (int i = 0; i < Order; i++) {
|
||||
using device::min;
|
||||
end[i] = min<index_type>(start[i] + window_size[i], in_spatial_dims[i]);
|
||||
}
|
||||
|
||||
for (int i = 0; i < Order; i++) {
|
||||
using device::max;
|
||||
start[i] = max(start[i], 0);
|
||||
}
|
||||
|
||||
T max_value = numeric_limits<T>::lowest();
|
||||
index_type max_idx = -1;
|
||||
|
||||
size_type in_spatial_size = 1;
|
||||
for (int i = 0; i < Order; i++)
|
||||
in_spatial_size *= in_spatial_dims[i];
|
||||
|
||||
const auto outer_offset = (n * channels + c) * in_spatial_size;
|
||||
if (Order == 1) {
|
||||
array<index_type, Order> idx;
|
||||
for (idx[0] = start[0]; idx[0] != end[0]; idx[0]++) {
|
||||
index_type offset = 0;
|
||||
index_type stride = 1;
|
||||
for (int i = Order - 1; i >= 0; i--) {
|
||||
offset += stride * idx[i];
|
||||
stride *= in_spatial_dims[i];
|
||||
}
|
||||
|
||||
if (input[outer_offset + offset] > max_value) {
|
||||
max_idx = offset;
|
||||
max_value = input[outer_offset + offset];
|
||||
}
|
||||
}
|
||||
} else if (Order == 2) {
|
||||
array<index_type, Order> idx;
|
||||
for (idx[0] = start[0]; idx[0] != end[0]; idx[0]++) {
|
||||
for (idx[1] = start[1]; idx[1] != end[1]; idx[1]++) {
|
||||
index_type offset = 0;
|
||||
index_type stride = 1;
|
||||
for (int i = Order - 1; i >= 0; i--) {
|
||||
offset += stride * idx[i];
|
||||
stride *= in_spatial_dims[i];
|
||||
}
|
||||
|
||||
if (input[outer_offset + offset] > max_value) {
|
||||
max_idx = offset;
|
||||
max_value = input[outer_offset + offset];
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if(Order == 3) {
|
||||
array<index_type, Order> idx;
|
||||
for (idx[0] = start[0]; idx[0] != end[0]; idx[0]++) {
|
||||
for (idx[1] = start[1]; idx[1] != end[1]; idx[1]++) {
|
||||
for (idx[2] = start[2]; idx[2] != end[2]; idx[2]++) {
|
||||
index_type offset = 0;
|
||||
index_type stride = 1;
|
||||
for (int i = Order - 1; i >= 0; i--) {
|
||||
offset += stride * idx[i];
|
||||
stride *= in_spatial_dims[i];
|
||||
}
|
||||
|
||||
if (input[outer_offset + offset] > max_value) {
|
||||
max_idx = offset;
|
||||
max_value = input[outer_offset + offset];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
output[idx] = max_value;
|
||||
indices[idx] = max_idx;
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::size_t Order>
|
||||
__global__ void max_unpooling(
|
||||
Span<T> output, View<T> input, View<T> indices, size_type channels,
|
||||
array<size_type, Order> out_spatial_dims, array<size_type, Order> in_spatial_dims,
|
||||
array<size_type, Order> window_size, array<size_type, Order> strides, array<size_type, Order> padding_left)
|
||||
{
|
||||
/* the output has already been zero filled */
|
||||
/* Every input value represents a window in the output. The max unpooling operation
|
||||
* copies the input value to exactly one location in the output window which is given
|
||||
* by the indices tensor.
|
||||
*/
|
||||
for (auto idx : grid_stride_range(input.size())) {
|
||||
size_type in_spatial_size = 1;
|
||||
array<index_type, Order> window_idx;
|
||||
for (int i = Order - 1; i >= 0; i--) {
|
||||
window_idx[i] = (idx / in_spatial_size) % in_spatial_dims[i];
|
||||
in_spatial_size *= in_spatial_dims[i];
|
||||
}
|
||||
|
||||
const index_type n = idx / (in_spatial_size * channels);
|
||||
const index_type c = (idx / in_spatial_size) % channels;
|
||||
|
||||
array<index_type, Order> start;
|
||||
for (int i = 0; i < Order; i++) {
|
||||
using device::min;
|
||||
using device::max;
|
||||
start[i] = max(0, min(window_idx[i] * strides[i] - padding_left[i], out_spatial_dims[i] - 1));
|
||||
}
|
||||
|
||||
size_type out_spatial_size = 1;
|
||||
for (int i = 0; i < Order; i++)
|
||||
out_spatial_size *= out_spatial_dims[i];
|
||||
|
||||
index_type outer_offset = (n * channels + c) * out_spatial_size;
|
||||
output[outer_offset + static_cast<index_type>(indices[idx])] = input[idx];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::size_t Order> static
|
||||
void launch_max_pooling_kernel(
|
||||
const Stream& stream,
|
||||
Span<T> output, Span<T> indices, View<T> input, std::size_t channels,
|
||||
const std::vector<std::size_t>& out_spatial_dims, const std::vector<std::size_t>& in_spatial_dims,
|
||||
const std::vector<std::size_t>& window_size,
|
||||
const std::vector<std::size_t>& strides, const std::vector<std::size_t>& padding_left)
|
||||
{
|
||||
CV_Assert(indices.size() == output.size());
|
||||
CV_Assert(out_spatial_dims.size() == Order);
|
||||
CV_Assert(in_spatial_dims.size() == Order);
|
||||
CV_Assert(window_size.size() == Order);
|
||||
CV_Assert(strides.size() == Order);
|
||||
CV_Assert(padding_left.size() == Order);
|
||||
|
||||
array<size_type, Order> out_spatial_dims_k, in_spatial_dims_k;
|
||||
out_spatial_dims_k.assign(std::begin(out_spatial_dims), std::end(out_spatial_dims));
|
||||
in_spatial_dims_k.assign(std::begin(in_spatial_dims), std::end(in_spatial_dims));
|
||||
|
||||
array<size_type, Order> window_size_k, strides_k, padding_left_k;
|
||||
window_size_k.assign(std::begin(window_size), std::end(window_size));
|
||||
strides_k.assign(std::begin(strides), std::end(strides));
|
||||
padding_left_k.assign(std::begin(padding_left), std::end(padding_left));
|
||||
|
||||
auto kernel = raw::max_pooling_with_indices<T, Order>;
|
||||
auto policy = make_policy(kernel, output.size(), 0, stream);
|
||||
launch_kernel(kernel, policy, output, indices, input, channels,
|
||||
out_spatial_dims_k, in_spatial_dims_k, window_size_k, strides_k, padding_left_k);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void max_pooling_with_indices(
|
||||
const Stream& stream,
|
||||
TensorSpan<T> output, TensorSpan<T> indices, TensorView<T> input,
|
||||
const std::vector<std::size_t>& window_size, const std::vector<std::size_t>& strides,
|
||||
const std::vector<std::size_t>& padding_left)
|
||||
{
|
||||
CV_Assert(is_shape_same(output, indices));
|
||||
CV_Assert(input.get_axis_size(1) == output.get_axis_size(1));
|
||||
|
||||
auto order = window_size.size();
|
||||
CV_Assert(strides.size() == order);
|
||||
CV_Assert(padding_left.size() == order);
|
||||
CV_Assert(output.rank() == order + 2);
|
||||
CV_Assert(input.rank() == order + 2);
|
||||
|
||||
std::vector<std::size_t> out_spatial_dims(order), in_spatial_dims(order);
|
||||
for (int i = 0; i < order; i++) {
|
||||
in_spatial_dims[i] = input.get_axis_size(2 + i);
|
||||
out_spatial_dims[i] = output.get_axis_size(2 + i);
|
||||
}
|
||||
|
||||
CV_Assert(1 <= order && order <= 3);
|
||||
std::size_t channels = input.get_axis_size(1);
|
||||
if (order == 3) {
|
||||
launch_max_pooling_kernel<T, 3>(stream, output, indices, input, channels,
|
||||
out_spatial_dims, in_spatial_dims, window_size, strides, padding_left);
|
||||
} else if (order == 2) {
|
||||
launch_max_pooling_kernel<T, 2>(stream, output, indices, input, channels,
|
||||
out_spatial_dims, in_spatial_dims, window_size, strides, padding_left);
|
||||
} else if (order == 1) {
|
||||
launch_max_pooling_kernel<T, 1>(stream, output, indices, input, channels,
|
||||
out_spatial_dims, in_spatial_dims, window_size, strides, padding_left);
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void max_pooling_with_indices(const Stream&,
|
||||
TensorSpan<__half>, TensorSpan<__half>, TensorView<__half>,
|
||||
const std::vector<std::size_t>&, const std::vector<std::size_t>&,
|
||||
const std::vector<std::size_t>&);
|
||||
#endif
|
||||
|
||||
template void max_pooling_with_indices(const Stream&,
|
||||
TensorSpan<float>, TensorSpan<float>, TensorView<float>,
|
||||
const std::vector<std::size_t>&, const std::vector<std::size_t>&,
|
||||
const std::vector<std::size_t>&);
|
||||
|
||||
template <class T, std::size_t Order> static
|
||||
void launch_max_unpooling_kernel(
|
||||
const Stream& stream,
|
||||
Span<T> output, View<T> input, View<T> indices, std::size_t channels,
|
||||
const std::vector<std::size_t>& out_spatial_dims, const std::vector<std::size_t>& in_spatial_dims,
|
||||
const std::vector<std::size_t>& window_size,
|
||||
const std::vector<std::size_t>& strides, const std::vector<std::size_t>& padding_left)
|
||||
{
|
||||
CV_Assert(out_spatial_dims.size() == Order);
|
||||
CV_Assert(in_spatial_dims.size() == Order);
|
||||
CV_Assert(window_size.size() == Order);
|
||||
CV_Assert(strides.size() == Order);
|
||||
CV_Assert(padding_left.size() == Order);
|
||||
CV_Assert(indices.size() == input.size());
|
||||
|
||||
array<size_type, Order> out_spatial_dims_k, in_spatial_dims_k;
|
||||
out_spatial_dims_k.assign(std::begin(out_spatial_dims), std::end(out_spatial_dims));
|
||||
in_spatial_dims_k.assign(std::begin(in_spatial_dims), std::end(in_spatial_dims));
|
||||
|
||||
array<size_type, Order> window_size_k, strides_k, padding_left_k;
|
||||
window_size_k.assign(std::begin(window_size), std::end(window_size));
|
||||
strides_k.assign(std::begin(strides), std::end(strides));
|
||||
padding_left_k.assign(std::begin(padding_left), std::end(padding_left));
|
||||
|
||||
auto kernel = raw::max_unpooling<T, Order>;
|
||||
auto policy = make_policy(kernel, input.size(), 0, stream);
|
||||
launch_kernel(kernel, policy, output, input, indices, channels,
|
||||
out_spatial_dims_k, in_spatial_dims_k, window_size_k, strides_k, padding_left_k);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void max_unpooling(
|
||||
const Stream& stream,
|
||||
TensorSpan<T> output, TensorView<T> input, TensorView<T> indices,
|
||||
const std::vector<std::size_t>& window_size, const std::vector<std::size_t>& strides,
|
||||
const std::vector<std::size_t>& padding_left)
|
||||
{
|
||||
CV_Assert(is_shape_same(input, indices));
|
||||
CV_Assert(input.get_axis_size(1) == output.get_axis_size(1));
|
||||
|
||||
auto order = window_size.size();
|
||||
CV_Assert(strides.size() == order);
|
||||
CV_Assert(padding_left.size() == order);
|
||||
CV_Assert(output.rank() == order + 2);
|
||||
CV_Assert(input.rank() == order + 2);
|
||||
|
||||
std::vector<std::size_t> out_spatial_dims(order), in_spatial_dims(order);
|
||||
for (int i = 0; i < order; i++) {
|
||||
in_spatial_dims[i] = input.get_axis_size(2 + i);
|
||||
out_spatial_dims[i] = output.get_axis_size(2 + i);
|
||||
}
|
||||
|
||||
kernels::fill<T>(stream, output, 0.0);
|
||||
|
||||
/* only max_unpooling2d and max_unpooling3d are supported */
|
||||
CV_Assert(2 <= order && order <= 3);
|
||||
std::size_t channels = input.get_axis_size(1);
|
||||
if (order == 3) {
|
||||
launch_max_unpooling_kernel<T, 3>(stream, output, input, indices, channels,
|
||||
out_spatial_dims, in_spatial_dims, window_size, strides, padding_left);
|
||||
} else if (order == 2) {
|
||||
launch_max_unpooling_kernel<T, 2>(stream, output, input, indices, channels,
|
||||
out_spatial_dims, in_spatial_dims, window_size, strides, padding_left);
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void max_unpooling(const Stream&,
|
||||
TensorSpan<__half>, TensorView<__half>, TensorView<__half>,
|
||||
const std::vector<std::size_t>&, const std::vector<std::size_t>&,
|
||||
const std::vector<std::size_t>&);
|
||||
#endif
|
||||
|
||||
template void max_unpooling(const Stream&,
|
||||
TensorSpan<float>, TensorView<float>, TensorView<float>,
|
||||
const std::vector<std::size_t>&, const std::vector<std::size_t>&,
|
||||
const std::vector<std::size_t>&);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
32
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/memory.hpp
vendored
Normal file
32
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/memory.hpp
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_DNN_SRC_CUDA_MEMORY_HPP
|
||||
#define OPENCV_DNN_SRC_CUDA_MEMORY_HPP
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace device {
|
||||
|
||||
template <class T>
|
||||
__device__ T load_ldg(const T& src) {
|
||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 350)
|
||||
return __ldg(&src);
|
||||
#else
|
||||
return src;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__device__ T load_ldg(const T* src) {
|
||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 350)
|
||||
return __ldg(src);
|
||||
#else
|
||||
return *src;
|
||||
#endif
|
||||
}
|
||||
|
||||
}}}}} /* namespace cv::dnn::cuda4dnn::csl::device */
|
||||
|
||||
#endif /* OPENCV_DNN_SRC_CUDA_MEMORY_HPP */
|
||||
145
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/mvn.cu
vendored
Normal file
145
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/mvn.cu
vendored
Normal file
@@ -0,0 +1,145 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "math.hpp"
|
||||
#include "types.hpp"
|
||||
#include "atomics.hpp"
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
template <class T>
|
||||
__global__ void reduce_mean(Span<float> means, View<T> input, size_type inner_size) {
|
||||
for (auto idx : grid_stride_range(input.size())) {
|
||||
const index_type outer_idx = idx / inner_size;
|
||||
atomicAdd(&means[outer_idx], static_cast<float>(input[idx]) / inner_size);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void reduce_mean_sqr_sum(Span<float> means, Span<float> sum_sqrs, View<T> input, size_type inner_size) {
|
||||
for (auto idx : grid_stride_range(input.size())) {
|
||||
const index_type outer_idx = idx / inner_size;
|
||||
auto x = static_cast<float>(input[idx]);
|
||||
atomicAdd(&means[outer_idx], x / inner_size);
|
||||
atomicAdd(&sum_sqrs[outer_idx], x * x);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void compute_normalization_scale(Span<float> scale, View<float> means, View<float> sums_sqr, size_type inner_size, float eps) {
|
||||
for (auto idx : grid_stride_range(scale.size())) {
|
||||
auto mean = means[idx];
|
||||
auto var = sums_sqr[idx] / inner_size - mean * mean;
|
||||
using device::rsqrt;
|
||||
scale[idx] = rsqrt(eps + var);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void normalize_mean(Span<T> output, View<T> input, View<float> means, size_type inner_size) {
|
||||
for (auto idx : grid_stride_range(output.size())) {
|
||||
const index_type outer_idx = idx / inner_size;
|
||||
output[idx] = static_cast<float>(input[idx]) - means[outer_idx];
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void normalize_mean_variance(Span<T> output, View<T> input, View<float> means, View<float> scale, size_type inner_size) {
|
||||
for (auto idx : grid_stride_range(output.size())) {
|
||||
const index_type outer_idx = idx / inner_size;
|
||||
output[idx] = (static_cast<float>(input[idx]) - means[outer_idx]) * scale[outer_idx];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void reduce_mean(const Stream& stream, Span<float> means, View<T> input, std::size_t inner_size)
|
||||
{
|
||||
CV_Assert(input.size() / inner_size == means.size());
|
||||
|
||||
auto kernel = raw::reduce_mean<T>;
|
||||
auto policy = make_policy(kernel, input.size(), 0, stream);
|
||||
launch_kernel(kernel, policy, means, input, inner_size);
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void reduce_mean(const Stream&, Span<float>, View<__half>, std::size_t);
|
||||
#endif
|
||||
template void reduce_mean(const Stream&, Span<float>, View<float>, std::size_t);
|
||||
|
||||
template <class T>
|
||||
void reduce_mean_sqr_sum(const Stream& stream, Span<float> means, Span<float> sum_sqrs, View<T> input, std::size_t inner_size)
|
||||
{
|
||||
CV_Assert(input.size() / inner_size == means.size());
|
||||
CV_Assert(input.size() / inner_size == sum_sqrs.size());
|
||||
|
||||
auto kernel = raw::reduce_mean_sqr_sum<T>;
|
||||
auto policy = make_policy(kernel, input.size(), 0, stream);
|
||||
launch_kernel(kernel, policy, means, sum_sqrs, input, inner_size);
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void reduce_mean_sqr_sum(const Stream&, Span<float>, Span<float>, View<__half>, std::size_t);
|
||||
#endif
|
||||
template void reduce_mean_sqr_sum(const Stream&, Span<float>, Span<float>, View<float>, std::size_t);
|
||||
|
||||
void compute_normalization_scale(const Stream& stream, Span<float> scale, View<float> means, View<float> sum_sqrs, std::size_t inner_size, float eps)
|
||||
{
|
||||
CV_Assert(scale.size() == means.size());
|
||||
CV_Assert(scale.size() == sum_sqrs.size());
|
||||
|
||||
auto kernel = raw::compute_normalization_scale;
|
||||
auto policy = make_policy(kernel, scale.size(), 0, stream);
|
||||
launch_kernel(kernel, policy, scale, means, sum_sqrs, inner_size, eps);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void normalize_mean(const Stream& stream, Span<T> output, View<T> input, View<float> means, std::size_t inner_size)
|
||||
{
|
||||
CV_Assert(output.size() == input.size());
|
||||
CV_Assert(input.size() / inner_size == means.size());
|
||||
|
||||
auto kernel = raw::normalize_mean<T>;
|
||||
auto policy = make_policy(kernel, output.size(), 0, stream);
|
||||
launch_kernel(kernel, policy, output, input, means, inner_size);
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void normalize_mean(const Stream&, Span<__half>, View<__half>, View<float>, std::size_t);
|
||||
#endif
|
||||
template void normalize_mean(const Stream&, Span<float>, View<float>, View<float>, std::size_t);
|
||||
|
||||
template <class T>
|
||||
void normalize_mean_variance(const Stream& stream, Span<T> output, View<T> input, View<float> means, View<float> scale, std::size_t inner_size)
|
||||
{
|
||||
CV_Assert(input.size() == output.size());
|
||||
CV_Assert(input.size() / inner_size == means.size());
|
||||
CV_Assert(input.size() / inner_size == scale.size());
|
||||
|
||||
auto kernel = raw::normalize_mean_variance<T>;
|
||||
auto policy = make_policy(kernel, output.size(), 0, stream);
|
||||
launch_kernel(kernel, policy, output, input, means, scale, inner_size);
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void normalize_mean_variance(const Stream&, Span<__half>, View<__half>, View<float>, View<float>, std::size_t);
|
||||
#endif
|
||||
template void normalize_mean_variance(const Stream&, Span<float>, View<float>, View<float>, View<float>, std::size_t);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
123
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/normalize.cu
vendored
Normal file
123
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/normalize.cu
vendored
Normal file
@@ -0,0 +1,123 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "array.hpp"
|
||||
#include "math.hpp"
|
||||
#include "types.hpp"
|
||||
#include "atomics.hpp"
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
|
||||
#include "../cuda4dnn/kernels/fill_copy.hpp"
|
||||
#include "../cuda4dnn/kernels/scale_shift.hpp"
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
template <class T>
|
||||
__global__ void reduce_sum_abs(Span<T> output, View<T> input, size_type outer_stride, size_type mid_stride) {
|
||||
for (auto idx : grid_stride_range(input.size())) {
|
||||
const index_type outer_idx = idx / outer_stride;
|
||||
const index_type inner_idx = idx % mid_stride;
|
||||
|
||||
const index_type sum_idx = outer_idx * mid_stride + inner_idx;
|
||||
atomicAdd(&output[sum_idx], device::abs(input[idx]));
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void reciprocal(Span<T> output, T epsilon) {
|
||||
for (auto idx : grid_stride_range(output.size()))
|
||||
output[idx] = T(1) / (output[idx] + epsilon);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void reduce_sum_squared(Span<T> output, View<T> input, size_type outer_stride, size_type mid_stride) {
|
||||
for (auto idx : grid_stride_range(input.size())) {
|
||||
const index_type outer_idx = idx / outer_stride;
|
||||
const index_type inner_idx = idx % mid_stride;
|
||||
|
||||
const index_type sum_idx = outer_idx * mid_stride + inner_idx;
|
||||
atomicAdd(&output[sum_idx], input[idx] * input[idx]);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void rsqrt(Span<T> output, T epsilon) {
|
||||
for (auto idx : grid_stride_range(output.size())) {
|
||||
using device::sqrt;
|
||||
output[idx] = T(1) / sqrt(output[idx] + epsilon);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void apply_norm(Span<T> output, View<T> input, size_type outer_stride, size_type mid_stride, View<T> sums) {
|
||||
for (auto idx : grid_stride_range(output.size())) {
|
||||
const index_type outer_idx = idx / outer_stride;
|
||||
const index_type inner_idx = idx % mid_stride;
|
||||
|
||||
const index_type sum_idx = outer_idx * mid_stride + inner_idx;
|
||||
output[idx] = input[idx] * sums[sum_idx];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void normalize(
|
||||
const Stream& stream,
|
||||
Span<T> output,
|
||||
View<T> input, std::size_t outer_size, std::size_t mid_size, std::size_t inner_size, std::size_t norm, T epsilon,
|
||||
Span<T> workspace)
|
||||
{
|
||||
CV_Assert(output.size() == input.size());
|
||||
CV_Assert(output.size() == outer_size * mid_size * inner_size);
|
||||
CV_Assert(norm == 1 || norm == 2);
|
||||
CV_Assert(workspace.size() >= outer_size * inner_size);
|
||||
|
||||
auto sums = Span<T>(workspace.data(), outer_size * inner_size);
|
||||
|
||||
fill<T>(stream, sums, 0.0);
|
||||
|
||||
if (norm == 1) {
|
||||
auto reduce_kernel = raw::reduce_sum_abs<T>;
|
||||
auto policy = make_policy(reduce_kernel, input.size(), 0, stream);
|
||||
launch_kernel(reduce_kernel, policy, sums, input, mid_size * inner_size, inner_size);
|
||||
|
||||
auto reciprocal_kernel = raw::reciprocal<T>;
|
||||
policy = make_policy(reciprocal_kernel, sums.size(), 0, stream);
|
||||
launch_kernel(reciprocal_kernel, policy, sums, epsilon);
|
||||
} else {
|
||||
auto reduce_kernel = raw::reduce_sum_squared<T>;
|
||||
auto policy = make_policy(reduce_kernel, input.size(), 0, stream);
|
||||
launch_kernel(reduce_kernel, policy, sums, input, mid_size * inner_size, inner_size);
|
||||
|
||||
auto rsqrt_kernel = raw::rsqrt<T>;
|
||||
policy = make_policy(rsqrt_kernel, sums.size(), 0, stream);
|
||||
launch_kernel(rsqrt_kernel, policy, sums, epsilon);
|
||||
}
|
||||
|
||||
auto scale_kernel = raw::apply_norm<T>;
|
||||
auto policy = make_policy(scale_kernel, output.size(), 0, stream);
|
||||
launch_kernel(scale_kernel, policy, output, input, mid_size * inner_size, inner_size, sums);
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void normalize(const Stream&, Span<__half>, View<__half>, std::size_t, std::size_t, std::size_t, std::size_t, __half, Span<__half>);
|
||||
#endif
|
||||
template void normalize(const Stream&, Span<float>, View<float>, std::size_t, std::size_t, std::size_t, std::size_t, float, Span<float>);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
201
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/padding.cu
vendored
Normal file
201
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/padding.cu
vendored
Normal file
@@ -0,0 +1,201 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "array.hpp"
|
||||
#include "math.hpp"
|
||||
#include "types.hpp"
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
#include "kernel_dispatcher.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/tensor.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
template <class T, std::size_t Rank>
|
||||
__global__ void copy_with_reflection101(
|
||||
Span<T> output, array<size_type, Rank> out_strides, array<index_type, Rank> start, array<index_type, Rank> end,
|
||||
View<T> input, array<size_type, Rank> in_strides)
|
||||
{
|
||||
for (auto i : grid_stride_range(output.size())) {
|
||||
/* compute output axis indices corresponding to element 'i' */
|
||||
array<index_type, Rank> out_index;
|
||||
out_index[0] = i / out_strides[0];
|
||||
for (int j = 1; j < Rank; j++)
|
||||
out_index[j] = (i % out_strides[j - 1]) / out_strides[j];
|
||||
|
||||
/* compute input axis indices corresponding to output axis indices */
|
||||
array<index_type, Rank> in_index;
|
||||
for (int j = 0; j < Rank; j++) {
|
||||
/* if out_index < start, the point is in the left reflection region
|
||||
* the reflected value's index is the absolute value of the difference
|
||||
*
|
||||
* otherwise, if the value is in the copy region, out_index - start gives the input index
|
||||
*/
|
||||
using device::abs;
|
||||
in_index[j] = abs(out_index[j] - start[j]);
|
||||
|
||||
/* if out_index >= end, it's in the right reflection region */
|
||||
if (out_index[j] >= end[j])
|
||||
in_index[j] = (end[j] - start[j]) - (out_index[j] - end[j]) - 2;
|
||||
}
|
||||
|
||||
/* compute input element number from input axis indices */
|
||||
index_type iidx = 0;
|
||||
for (int j = 0; j < Rank; j++)
|
||||
iidx += in_index[j] * in_strides[j];
|
||||
|
||||
output[i] = input[iidx];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::size_t Rank> static
|
||||
void launch_copy_with_reflection101(
|
||||
const Stream& stream,
|
||||
Span<T> output, const std::vector<std::size_t>& outStride,
|
||||
View<T> input, const std::vector<std::size_t>& inStride,
|
||||
const std::vector<std::pair<std::size_t, std::size_t>>& ranges)
|
||||
{
|
||||
CV_Assert(outStride.size() == Rank);
|
||||
CV_Assert(inStride.size() == Rank);
|
||||
CV_Assert(ranges.size() == Rank);
|
||||
|
||||
array<size_type, Rank> outStride_k, inStride_k;
|
||||
outStride_k.assign(std::begin(outStride), std::end(outStride));
|
||||
inStride_k.assign(std::begin(inStride), std::end(inStride));
|
||||
|
||||
array<index_type, Rank> start_k, end_k;
|
||||
for (int i = 0; i < Rank; i++) {
|
||||
start_k[i] = ranges[i].first;
|
||||
end_k[i] = ranges[i].second;
|
||||
}
|
||||
|
||||
auto kernel = raw::copy_with_reflection101<T, Rank>;
|
||||
auto policy = make_policy(kernel, output.size(), 0, stream);
|
||||
launch_kernel(kernel, policy, output, outStride_k, start_k, end_k, input, inStride_k);
|
||||
}
|
||||
|
||||
GENERATE_KERNEL_DISPATCHER(copy_with_reflection101_dispatcher, launch_copy_with_reflection101);
|
||||
|
||||
template <class T>
|
||||
void copy_with_reflection101(
|
||||
const Stream& stream,
|
||||
TensorSpan<T> output, TensorView<T> input,
|
||||
std::vector<std::pair<std::size_t, std::size_t>> ranges)
|
||||
{
|
||||
CV_Assert(output.rank() == input.rank());
|
||||
CV_Assert(output.rank() == ranges.size());
|
||||
|
||||
/* squeezable axes at the beginning of both tensors can be eliminated
|
||||
*
|
||||
* Reasoning:
|
||||
* ----------
|
||||
* Suppose an item's indices in the input tensor is [i1, i2, ...]. The indices in the
|
||||
* output tensor will be [i1 + off1, i2 + off2, ...]. The rest of the elements in the output are padding.
|
||||
* The padding operation essentially copies items from the input tensor to new locations in the output tensor
|
||||
* and pads the remaining.
|
||||
*
|
||||
* If the size of the first axis of the input and output tensor is unity, the input and output indices
|
||||
* for all the elements will be of the form be [0, i2, ...] and [0, i2 + off2, ...] respectively. Note that
|
||||
* there cannot be extra padding since the axes have unit size. The first index does not contribute to the
|
||||
* element's address calculation and hence does nothing apart from eating up few cycles.
|
||||
*/
|
||||
while (input.get_axis_size(0) == 1 && output.get_axis_size(0) == 1) {
|
||||
CV_Assert(ranges[0].first == 0 && ranges[0].second == 1);
|
||||
|
||||
input.squeeze(0);
|
||||
output.squeeze(0);
|
||||
ranges.erase(std::begin(ranges));
|
||||
|
||||
CV_Assert(output.rank() == input.rank());
|
||||
CV_Assert(output.rank() == ranges.size());
|
||||
}
|
||||
|
||||
auto inShape = input.shape_as_vector();
|
||||
auto outShape = output.shape_as_vector();
|
||||
|
||||
/* contiguous axes which do not have any padding can be combined into one axis
|
||||
*
|
||||
* Reasoning:
|
||||
* ----------
|
||||
* Suppose an item's indices in the input tensor is [i1, i2, i3, ...]. Let the first two axes not have any
|
||||
* padding. The indices in the output tensor will be [i1, i2, i3 + off3, ...].
|
||||
*
|
||||
* Each axis in the contiguous unpadded axes sequence will add an offset of iN * strideN. In the above example,
|
||||
* the two axes add a total offset of `i1 * stride1 + i2 * stride2`. We can merge the two axes into one axis with
|
||||
* a size of `size1 * size2`. The new offset added will be `i12 * stride2` as the kernel iterates through `i12`.
|
||||
* Note that `i12` is actually `(i1 * size2 + i2)` in the original tensor.
|
||||
*/
|
||||
for (int i = 0; i < inShape.size(); i++) {
|
||||
/* check if axis `i` requires any padding */
|
||||
if (ranges[i].first == 0 && ranges[i].second == inShape[i]) {
|
||||
/* loop invariant: `i` is the first axis in the contiguous unpadded axis sequence */
|
||||
CV_Assert(inShape[i] == outShape[i]);
|
||||
|
||||
/* we now iterate through the axes which follow and try to merge */
|
||||
int j = i + 1; /* `j` is the axis which we will attempt to merge */
|
||||
while (j < inShape.size() && ranges[j].first == 0 && ranges[j].second == inShape[j]) {
|
||||
CV_Assert(inShape[j] == outShape[j]);
|
||||
|
||||
/* `j` is also unpadded; merge `i` and `j` */
|
||||
auto new_size = inShape[i] * inShape[j];
|
||||
inShape[i] = new_size;
|
||||
outShape[i] = new_size;
|
||||
ranges[i].second = new_size;
|
||||
|
||||
/* delete axis `j` */
|
||||
inShape.erase(std::begin(inShape) + j);
|
||||
outShape.erase(std::begin(outShape) + j);
|
||||
ranges.erase(std::begin(ranges) + j);
|
||||
|
||||
/* optimizations should not break the invariants */
|
||||
CV_Assert(inShape.size() == outShape.size());
|
||||
CV_Assert(inShape.size() == ranges.size());
|
||||
CV_Assert(inShape[i] == outShape[i]);
|
||||
CV_Assert(ranges[i].first == 0 && ranges[i].second == inShape[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto rank = inShape.size();
|
||||
|
||||
std::vector<std::size_t> inStride(rank), outStride(rank);
|
||||
inStride.back() = 1;
|
||||
outStride.back() = 1;
|
||||
/* garbage, ..., garbage, 1 */
|
||||
|
||||
std::copy(std::begin(inShape) + 1, std::end(inShape), std::begin(inStride));
|
||||
std::copy(std::begin(outShape) + 1, std::end(outShape), std::begin(outStride));
|
||||
/* dim[0], dim[1], ..., dim[-1], 1 */
|
||||
|
||||
std::partial_sum(inStride.rbegin(), inStride.rend(), inStride.rbegin(), std::multiplies<int>());
|
||||
std::partial_sum(outStride.rbegin(), outStride.rend(), outStride.rbegin(), std::multiplies<int>());
|
||||
/* stride[0], stride[1], ..., stride[-2], 1 */
|
||||
|
||||
CV_Assert(1 <= rank && rank <= CSL_MAX_TENSOR_RANK);
|
||||
copy_with_reflection101_dispatcher<T, 1, CSL_MAX_TENSOR_RANK>(rank, stream, output, outStride, input, inStride, ranges);
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void copy_with_reflection101(const Stream&, TensorSpan<__half>, TensorView<__half>, std::vector<std::pair<std::size_t, std::size_t>> ranges);
|
||||
#endif
|
||||
template void copy_with_reflection101(const Stream&, TensorSpan<float>, TensorView<float>, std::vector<std::pair<std::size_t, std::size_t>> ranges);
|
||||
|
||||
}}}} /* namespace namespace cv::dnn::cuda4dnn::kernels */
|
||||
288
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/permute.cu
vendored
Normal file
288
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/permute.cu
vendored
Normal file
@@ -0,0 +1,288 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "array.hpp"
|
||||
#include "types.hpp"
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
#include "kernel_dispatcher.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/tensor.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
|
||||
#include "../cuda4dnn/kernels/fill_copy.hpp"
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
template <class T, std::size_t Rank>
|
||||
__global__ void permute(
|
||||
array<index_type, Rank> axis_order,
|
||||
Span<T> output, array<size_type, Rank> outStrides,
|
||||
View<T> input, array<size_type, Rank> inStrides)
|
||||
{
|
||||
for (auto i : grid_stride_range(input.size())) {
|
||||
index_type oldPosition = 0;
|
||||
index_type newPosition = i;
|
||||
|
||||
for (int j = 0; j < Rank; j++)
|
||||
{
|
||||
auto order = axis_order[j];
|
||||
oldPosition += (newPosition / outStrides[j]) * inStrides[order];
|
||||
newPosition %= outStrides[j];
|
||||
}
|
||||
|
||||
output[i] = input[oldPosition];
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, int TILE_SIZE, int ROWS_PER_THREAD>
|
||||
__global__ void transpose(Span<T> output, View<T> input, size_type in_width, size_type out_width)
|
||||
{
|
||||
__shared__ T tile[TILE_SIZE][TILE_SIZE + 1];
|
||||
|
||||
/* blockDim.y = TILE_SIZE / ROWS_PER_THREAD, blockDim.x = TILE_SIZE */
|
||||
const index_type in_x = blockIdx.x * TILE_SIZE + threadIdx.x;
|
||||
const index_type in_y_begin = blockIdx.y * TILE_SIZE + threadIdx.y;
|
||||
|
||||
/* Every valid input location has a corresponding output location and vice versa.
|
||||
* Hence, if we do not load values into the shared memory for a given location, we
|
||||
* also won't read them for storing in the output.
|
||||
*/
|
||||
for (int j = 0; j < TILE_SIZE; j += TILE_SIZE / ROWS_PER_THREAD)
|
||||
{
|
||||
const auto in_y_current = in_y_begin + j;
|
||||
if (in_x < in_width && in_y_current < out_width)
|
||||
tile[threadIdx.y + j][threadIdx.x] = input[in_y_current * in_width + in_x];
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
/* We interchange `threadIdx.x` and `threadIdx.y` so that consecutive output indices map to
|
||||
* consecutive threads. This would allow writes across threds in a warp to be coalesced.
|
||||
*/
|
||||
const index_type out_x = blockIdx.y * TILE_SIZE + threadIdx.x;
|
||||
const index_type out_y_begin = blockIdx.x * TILE_SIZE + threadIdx.y;
|
||||
|
||||
for (int j = 0; j < TILE_SIZE; j += TILE_SIZE / ROWS_PER_THREAD)
|
||||
{
|
||||
const auto out_y_current = out_y_begin + j;
|
||||
if (out_x < out_width && out_y_current < in_width)
|
||||
output[out_y_current * out_width + out_x] = tile[threadIdx.x][threadIdx.y + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void transpose(const Stream& stream, Span<T> output, View<T> input, std::size_t in_width, std::size_t out_width)
|
||||
{
|
||||
/* Each block processes a TILE_SIZE x TILE_SIZE piece */
|
||||
constexpr int TILE_SIZE = 32;
|
||||
|
||||
/* Each thread processes ROWS_PER_THREAD rows. We do this to decrease the number of threads required
|
||||
* in a block so that the cost of the block-wide synchronization is minimized.
|
||||
*/
|
||||
constexpr int ROWS_PER_THREAD = 4;
|
||||
|
||||
dim3 grid_size((in_width + TILE_SIZE - 1) / TILE_SIZE, (out_width + TILE_SIZE - 1) / TILE_SIZE);
|
||||
dim3 block_size(TILE_SIZE, TILE_SIZE / ROWS_PER_THREAD);
|
||||
auto policy = execution_policy(grid_size, block_size, stream);
|
||||
|
||||
auto kernel = raw::transpose<T, TILE_SIZE, ROWS_PER_THREAD>;
|
||||
launch_kernel(kernel, policy, output, input, in_width, out_width);
|
||||
}
|
||||
|
||||
template void transpose(const Stream&, Span<__half>, View<__half>, std::size_t, std::size_t);
|
||||
template void transpose(const Stream&, Span<float>, View<float>, std::size_t, std::size_t);
|
||||
|
||||
template <class T, std::size_t Rank> static
|
||||
void launch_permute_kernel(
|
||||
const Stream& stream,
|
||||
const std::vector<std::size_t>& order,
|
||||
Span<T> output, const std::vector<std::size_t>& outStride,
|
||||
View<T> input, const std::vector<std::size_t>& inStride)
|
||||
{
|
||||
CV_Assert(order.size() == Rank);
|
||||
CV_Assert(outStride.size() == Rank);
|
||||
CV_Assert(inStride.size() == Rank);
|
||||
|
||||
array<index_type, Rank> order_k;
|
||||
order_k.assign(std::begin(order), std::end(order));
|
||||
|
||||
array<size_type, Rank> outStride_k, inStride_k;
|
||||
outStride_k.assign(std::begin(outStride), std::end(outStride));
|
||||
inStride_k.assign(std::begin(inStride), std::end(inStride));
|
||||
|
||||
auto kernel = raw::permute<T, Rank>;
|
||||
auto policy = make_policy(kernel, input.size(), 0, stream);
|
||||
launch_kernel(kernel, policy, order_k, output, outStride_k, input, inStride_k);
|
||||
}
|
||||
|
||||
GENERATE_KERNEL_DISPATCHER(permute_dispatcher, launch_permute_kernel);
|
||||
|
||||
template <class T>
|
||||
void permute(
|
||||
const Stream& stream,
|
||||
TensorSpan<T> output, TensorView<T> input,
|
||||
std::vector<std::size_t> order)
|
||||
{
|
||||
CV_Assert(output.rank() == input.rank());
|
||||
CV_Assert(input.rank() == order.size());
|
||||
CV_Assert(input.size() == output.size());
|
||||
|
||||
auto rank = output.rank();
|
||||
auto inShape = input.shape_as_vector();
|
||||
auto outShape = output.shape_as_vector();
|
||||
|
||||
/* singleton axes do not contribute towards address calculation
|
||||
*
|
||||
* Reasoning:
|
||||
* ----------
|
||||
* Suppose an item's indices in the input tensor is [i1, i2, ...]. The indices in the
|
||||
* output tensor will be some permutation of the input tensor indices. Let the output
|
||||
* tensor indices be [o1, o2, ...]. The permutation operation essentially copies items
|
||||
* from the input tensor to new locations in the output tensor as dictated by the indices.
|
||||
*
|
||||
* If the size of the nth axis (say i2) of the input is one the input and output indicies for
|
||||
* all the elements will be of the form be [i1, 0, ...] and [..., 0, ...] respectively.
|
||||
* The index does not contribute to the element's address calculation and hence would give
|
||||
* identical result if it weren't there.
|
||||
*/
|
||||
for (int i = 0; i < rank; i++)
|
||||
{
|
||||
/* index `i` corresponds to the axis index in the output; order[i] has the corresponding axis index in the input */
|
||||
while (i < rank && outShape[i] == 1)
|
||||
{
|
||||
int in_i = order[i];
|
||||
CV_Assert(inShape[in_i] == 1);
|
||||
|
||||
/* delete axis `i` */
|
||||
inShape.erase(std::begin(inShape) + in_i);
|
||||
outShape.erase(std::begin(outShape) + i);
|
||||
|
||||
/* deletion of an axis reduces an axis in the input tensor which would cause the indices
|
||||
* of the axes that come after the deleted axis to reduce by one
|
||||
*/
|
||||
order.erase(order.begin() + i);
|
||||
for (auto& axis : order)
|
||||
if (axis > in_i)
|
||||
axis--;
|
||||
|
||||
rank--;
|
||||
|
||||
/* optimizations should not break the invariants */
|
||||
CV_Assert(rank == order.size());
|
||||
CV_Assert(inShape.size() == order.size());
|
||||
CV_Assert(outShape.size() == order.size());
|
||||
CV_Assert(input.size() == output.size());
|
||||
}
|
||||
}
|
||||
|
||||
/* contiguous axes whose relative ordering stays same before and after permutation can be merged into one axis
|
||||
* example: in permute order 0 2 3 1, axes 2 and 3 can be grouped into a single axis
|
||||
*
|
||||
* Reasoning:
|
||||
* ----------
|
||||
* Suppose an item's indices in the input tensor is [i0, i1, i2, i3, ...]. Let the permutation order be [0, 3, 1, 2, ...].
|
||||
* Note that i1 and i2 are adjacent axes in the same order in input as well as output. The indices in the output tensor
|
||||
* will be [i0, i3, i1, i2, ...].
|
||||
*
|
||||
* Each axis in the contiguous axes sequence will add an offset of iN * strideN. In the above example,
|
||||
* the two axes add a total offset of `i1 * (size2 * stride2) + i2 * stride2` which is `(i1 * size2 + i2) * stride2`,
|
||||
* in both input and output. Note stride2 can be different in the input and output. We can merge the two axes into one axis
|
||||
* with a size of `size1 * size2`. The new offset added will be `i12 * stride12` as the kernel iterates through `i12`. Note
|
||||
* that `i12` is actually `(i1 * size2 + i2)` and `stride12` is `stride2`.
|
||||
*/
|
||||
for (int i = 0; i < rank; i++) {
|
||||
/* the indices used in the loops such as `i` and `j` are axis indices in the output tensor */
|
||||
/* the corresponding input axis indices are `order[i]` and `order[j]`*/
|
||||
|
||||
/* loop invariant: `i` is the first axis in the contiguous unpermuted axis sequence */
|
||||
|
||||
int j = i + 1; /* `j` is the axis which we will attempt to merge */
|
||||
while (j < rank && (order[i] + 1) == order[j]) {
|
||||
/* axis `i` and axis `j` do not change relative order */
|
||||
|
||||
auto in_i = order[i], in_j = order[j];
|
||||
|
||||
auto new_size = inShape[in_i] * inShape[in_j];
|
||||
inShape[in_i] = new_size;
|
||||
outShape[i] = new_size;
|
||||
|
||||
/* delete axis `j` */
|
||||
inShape.erase(std::begin(inShape) + in_j);
|
||||
outShape.erase(std::begin(outShape) + j);
|
||||
|
||||
/* deletion of an axis reduces an axis in the input tensor which would cause the indices
|
||||
* of the axes that come after the deleted axis to reduce by one
|
||||
*/
|
||||
order.erase(order.begin() + j);
|
||||
for (auto& axis : order)
|
||||
if (axis > order[i])
|
||||
axis--;
|
||||
|
||||
rank--;
|
||||
|
||||
/* optimizations should not break the invariants */
|
||||
CV_Assert(rank == order.size());
|
||||
CV_Assert(inShape.size() == order.size());
|
||||
CV_Assert(outShape.size() == order.size());
|
||||
CV_Assert(input.size() == output.size());
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::size_t> inStride(rank), outStride(rank);
|
||||
inStride.back() = 1;
|
||||
outStride.back() = 1;
|
||||
/* garbage, ..., garbage, 1 */
|
||||
|
||||
std::copy(std::begin(inShape) + 1, std::end(inShape), std::begin(inStride));
|
||||
std::copy(std::begin(outShape) + 1, std::end(outShape), std::begin(outStride));
|
||||
/* dim[0], dim[1], ..., dim[-1], 1 */
|
||||
|
||||
std::partial_sum(inStride.rbegin(), inStride.rend(), inStride.rbegin(), std::multiplies<std::size_t>());
|
||||
std::partial_sum(outStride.rbegin(), outStride.rend(), outStride.rbegin(), std::multiplies<std::size_t>());
|
||||
/* stride[0], stride[1], ..., stride[-2], 1 */
|
||||
|
||||
const bool is_in_order = [&order] {
|
||||
for (int i = 0; i < order.size(); i++)
|
||||
if (order[i] != i)
|
||||
return false;
|
||||
return true;
|
||||
}();
|
||||
|
||||
if (is_in_order)
|
||||
{
|
||||
kernels::copy<T>(stream, output, input);
|
||||
}
|
||||
else if(rank == 2)
|
||||
{
|
||||
/* use the more efficient transpose kernel */
|
||||
transpose<T>(stream, output, input, inShape[1], outShape[1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(3 <= rank && rank <= CSL_MAX_TENSOR_RANK);
|
||||
permute_dispatcher<T, 3, CSL_MAX_TENSOR_RANK>(rank, stream, order, output, outStride, input, inStride);
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void permute(const Stream&, TensorSpan<__half>, TensorView<__half>, std::vector<std::size_t>);
|
||||
#endif
|
||||
template void permute(const Stream&, TensorSpan<float>, TensorView<float>, std::vector<std::size_t>);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
176
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/prior_box.cu
vendored
Normal file
176
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/prior_box.cu
vendored
Normal file
@@ -0,0 +1,176 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "array.hpp"
|
||||
#include "math.hpp"
|
||||
#include "types.hpp"
|
||||
#include "vector_traits.hpp"
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
template <class T, bool Normalize>
|
||||
__global__ void prior_box(
|
||||
Span<T> output,
|
||||
View<float> boxWidth, View<float> boxHeight, View<float> offsetX, View<float> offsetY, float stepX, float stepY,
|
||||
size_type layerWidth, size_type layerHeight,
|
||||
size_type imageWidth, size_type imageHeight)
|
||||
{
|
||||
/* each box consists of two pair of coordinates and hence 4 values in total */
|
||||
/* since the entire output consists (first channel at least) of these boxes,
|
||||
* we are garunteeed that the output is aligned to a boundary of 4 values
|
||||
*/
|
||||
using vector_type = get_vector_type_t<T, 4>;
|
||||
auto output_vPtr = vector_type::get_pointer(output.data());
|
||||
|
||||
/* num_points contains the number of points in the feature map of interest
|
||||
* each iteration of the stride loop selects a point and generates prior boxes for it
|
||||
*/
|
||||
size_type num_points = layerWidth * layerHeight;
|
||||
for (auto idx : grid_stride_range(num_points)) {
|
||||
const index_type x = idx % layerWidth,
|
||||
y = idx / layerWidth;
|
||||
|
||||
index_type output_offset_v4 = idx * offsetX.size() * boxWidth.size();
|
||||
for (int i = 0; i < boxWidth.size(); i++) {
|
||||
for (int j = 0; j < offsetX.size(); j++) {
|
||||
float center_x = (x + offsetX[j]) * stepX;
|
||||
float center_y = (y + offsetY[j]) * stepY;
|
||||
|
||||
vector_type vec;
|
||||
if(Normalize) {
|
||||
vec.data[0] = (center_x - boxWidth[i] * 0.5f) / imageWidth;
|
||||
vec.data[1] = (center_y - boxHeight[i] * 0.5f) / imageHeight;
|
||||
vec.data[2] = (center_x + boxWidth[i] * 0.5f) / imageWidth;
|
||||
vec.data[3] = (center_y + boxHeight[i] * 0.5f) / imageHeight;
|
||||
} else {
|
||||
vec.data[0] = center_x - boxWidth[i] * 0.5f;
|
||||
vec.data[1] = center_y - boxHeight[i] * 0.5f;
|
||||
vec.data[2] = center_x + boxWidth[i] * 0.5f - 1.0f;
|
||||
vec.data[3] = center_y + boxHeight[i] * 0.5f - 1.0f;
|
||||
}
|
||||
|
||||
v_store(output_vPtr[output_offset_v4], vec);
|
||||
output_offset_v4++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void prior_box_clip(Span<T> output) {
|
||||
for (auto i : grid_stride_range(output.size())) {
|
||||
using device::clamp;
|
||||
output[i] = clamp<T>(output[i], 0.0, 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void prior_box_set_variance1(Span<T> output, float variance) {
|
||||
using vector_type = get_vector_type_t<T, 4>;
|
||||
auto output_vPtr = vector_type::get_pointer(output.data());
|
||||
for (auto i : grid_stride_range(output.size() / 4)) {
|
||||
vector_type vec;
|
||||
for (int j = 0; j < 4; j++)
|
||||
vec.data[j] = variance;
|
||||
v_store(output_vPtr[i], vec);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void prior_box_set_variance4(Span<T> output, array<float, 4> variance) {
|
||||
using vector_type = get_vector_type_t<T, 4>;
|
||||
auto output_vPtr = vector_type::get_pointer(output.data());
|
||||
for (auto i : grid_stride_range(output.size() / 4)) {
|
||||
vector_type vec;
|
||||
for(int j = 0; j < 4; j++)
|
||||
vec.data[j] = variance[j];
|
||||
v_store(output_vPtr[i], vec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, bool Normalize> static
|
||||
void launch_prior_box_kernel(
|
||||
const Stream& stream,
|
||||
Span<T> output, View<float> boxWidth, View<float> boxHeight, View<float> offsetX, View<float> offsetY, float stepX, float stepY,
|
||||
std::size_t layerWidth, std::size_t layerHeight, std::size_t imageWidth, std::size_t imageHeight)
|
||||
{
|
||||
auto num_points = layerWidth * layerHeight;
|
||||
auto kernel = raw::prior_box<T, Normalize>;
|
||||
auto policy = make_policy(kernel, num_points, 0, stream);
|
||||
launch_kernel(kernel, policy,
|
||||
output, boxWidth, boxHeight, offsetX, offsetY, stepX, stepY,
|
||||
layerWidth, layerHeight, imageWidth, imageHeight);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void generate_prior_boxes(
|
||||
const Stream& stream,
|
||||
Span<T> output,
|
||||
View<float> boxWidth, View<float> boxHeight, View<float> offsetX, View<float> offsetY, float stepX, float stepY,
|
||||
std::vector<float> variance,
|
||||
std::size_t numPriors,
|
||||
std::size_t layerWidth, std::size_t layerHeight,
|
||||
std::size_t imageWidth, std::size_t imageHeight,
|
||||
bool normalize, bool clip)
|
||||
{
|
||||
if (normalize) {
|
||||
launch_prior_box_kernel<T, true>(
|
||||
stream, output, boxWidth, boxHeight, offsetX, offsetY, stepX, stepY,
|
||||
layerWidth, layerHeight, imageWidth, imageHeight
|
||||
);
|
||||
} else {
|
||||
launch_prior_box_kernel<T, false>(
|
||||
stream, output, boxWidth, boxHeight, offsetX, offsetY, stepX, stepY,
|
||||
layerWidth, layerHeight, imageWidth, imageHeight
|
||||
);
|
||||
}
|
||||
|
||||
std::size_t channel_size = layerHeight * layerWidth * numPriors * 4;
|
||||
CV_Assert(channel_size * 2 == output.size());
|
||||
|
||||
if (clip) {
|
||||
auto output_span_c1 = Span<T>(output.data(), channel_size);
|
||||
auto kernel = raw::prior_box_clip<T>;
|
||||
auto policy = make_policy(kernel, output_span_c1.size(), 0, stream);
|
||||
launch_kernel(kernel, policy, output_span_c1);
|
||||
}
|
||||
|
||||
auto output_span_c2 = Span<T>(output.data() + channel_size, channel_size);
|
||||
if (variance.size() == 1) {
|
||||
auto kernel = raw::prior_box_set_variance1<T>;
|
||||
auto policy = make_policy(kernel, output_span_c2.size() / 4, 0, stream);
|
||||
launch_kernel(kernel, policy, output_span_c2, variance[0]);
|
||||
} else {
|
||||
array<float, 4> variance_k;
|
||||
variance_k.assign(std::begin(variance), std::end(variance));
|
||||
auto kernel = raw::prior_box_set_variance4<T>;
|
||||
auto policy = make_policy(kernel, output_span_c2.size() / 4, 0, stream);
|
||||
launch_kernel(kernel, policy, output_span_c2, variance_k);
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void generate_prior_boxes(const Stream&, Span<__half>, View<float>, View<float>, View<float>, View<float>, float, float,
|
||||
std::vector<float>, std::size_t, std::size_t, std::size_t, std::size_t, std::size_t, bool, bool);
|
||||
#endif
|
||||
|
||||
template void generate_prior_boxes(const Stream&, Span<float>, View<float>, View<float>, View<float>, View<float>, float, float,
|
||||
std::vector<float>, std::size_t, std::size_t, std::size_t, std::size_t, std::size_t, bool, bool);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
216
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/region.cu
vendored
Normal file
216
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/region.cu
vendored
Normal file
@@ -0,0 +1,216 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "math.hpp"
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
#include "limits.hpp"
|
||||
#include "vector_traits.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
|
||||
template <class T>
|
||||
__global__ void region_box(
|
||||
Span<T> output, View<T> input, View<T> bias,
|
||||
size_type boxes_per_cell, size_type box_size,
|
||||
size_type rows, size_type cols, T scale_x_y,
|
||||
size_type height_norm, size_type width_norm,
|
||||
T object_prob_cutoff, bool new_coords)
|
||||
{
|
||||
using vector2_type = get_vector_type_t<T, 2>;
|
||||
auto bias_vPtr = vector2_type::get_pointer(bias.data());
|
||||
|
||||
for (auto box_index : grid_stride_range(output.size() / box_size)) {
|
||||
const auto box_of_the_cell = box_index % boxes_per_cell; /* box number within a cell */
|
||||
const auto box_offset = box_index * box_size;
|
||||
|
||||
const auto batch_inner_size = rows * cols * boxes_per_cell;
|
||||
const auto row_inner_size = cols * boxes_per_cell;
|
||||
const auto col_inner_size = boxes_per_cell;
|
||||
|
||||
const auto y = (box_index % batch_inner_size) / row_inner_size;
|
||||
const auto x = (box_index % row_inner_size) / col_inner_size;
|
||||
|
||||
/* When new_coords is true, we shouldn't use logistic activation again */
|
||||
T objectness_prob;
|
||||
if (new_coords)
|
||||
{
|
||||
const auto tmp_x = (input[box_offset + 0] - static_cast<T>(0.5)) * scale_x_y + static_cast<T>(0.5);
|
||||
const auto tmp_y = (input[box_offset + 1] - static_cast<T>(0.5)) * scale_x_y + static_cast<T>(0.5);
|
||||
|
||||
output[box_offset + 0] = fast_divide_ftz(static_cast<T>(x) + tmp_x, static_cast<T>(cols));
|
||||
output[box_offset + 1] = fast_divide_ftz(static_cast<T>(y) + tmp_y, static_cast<T>(rows));
|
||||
|
||||
vector2_type bias_xy;
|
||||
v_load(bias_xy, bias_vPtr[box_of_the_cell]);
|
||||
|
||||
output[box_offset + 2] = input[box_offset + 2] * input[box_offset + 2] *
|
||||
static_cast<T>(4) * bias_xy.data[0] / static_cast<T>(width_norm);
|
||||
output[box_offset + 3] = input[box_offset + 3] * input[box_offset + 3] *
|
||||
static_cast<T>(4) * bias_xy.data[1] / static_cast<T>(height_norm);
|
||||
|
||||
objectness_prob = input[box_offset + 4];
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto tmp_x = (fast_sigmoid(input[box_offset + 0]) - static_cast<T>(0.5)) * scale_x_y + static_cast<T>(0.5);
|
||||
const auto tmp_y = (fast_sigmoid(input[box_offset + 1]) - static_cast<T>(0.5)) * scale_x_y + static_cast<T>(0.5);
|
||||
|
||||
output[box_offset + 0] = fast_divide_ftz(static_cast<T>(x) + tmp_x, static_cast<T>(cols));
|
||||
output[box_offset + 1] = fast_divide_ftz(static_cast<T>(y) + tmp_y, static_cast<T>(rows));
|
||||
|
||||
vector2_type bias_xy;
|
||||
v_load(bias_xy, bias_vPtr[box_of_the_cell]);
|
||||
|
||||
output[box_offset + 2] = fast_exp(input[box_offset + 2]) * bias_xy.data[0] / static_cast<T>(width_norm);
|
||||
output[box_offset + 3] = fast_exp(input[box_offset + 3]) * bias_xy.data[1] / static_cast<T>(height_norm);
|
||||
|
||||
/* squash objectness score into a probability */
|
||||
objectness_prob = fast_sigmoid(input[box_offset + 4]);
|
||||
}
|
||||
|
||||
/* ignore prediction if the objectness probability is less than the cutoff */
|
||||
if (objectness_prob < object_prob_cutoff)
|
||||
objectness_prob = 0;
|
||||
|
||||
output[box_offset + 4] = objectness_prob;
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void region_sigmoid_class_score(Span<T> output, View<T> input, T class_prob_cutoff,
|
||||
size_type box_size, bool new_coords)
|
||||
{
|
||||
for (auto idx : grid_stride_range(output.size())) {
|
||||
const index_type box_no = idx / box_size;
|
||||
const index_type start_of_box = box_no * box_size;
|
||||
const index_type box_offset = idx % box_size;
|
||||
|
||||
if (box_offset < 5) {
|
||||
/* continue as we have already processed these in region_box */
|
||||
continue;
|
||||
}
|
||||
|
||||
auto objectness_prob = output[start_of_box + 4];
|
||||
|
||||
/* the class probabilities we currently have are conditional class probabilities
|
||||
* given the object
|
||||
*
|
||||
* to obtain the actual class probability, we multiply the conditional probability
|
||||
* with the object probability
|
||||
*
|
||||
* when new_coords is true, we shouldn't use logistic activation again.
|
||||
*/
|
||||
|
||||
T actual_class_prob;
|
||||
if (new_coords)
|
||||
{
|
||||
actual_class_prob = objectness_prob * input[idx];
|
||||
}
|
||||
else
|
||||
{
|
||||
actual_class_prob = objectness_prob * fast_sigmoid(input[idx]);
|
||||
}
|
||||
|
||||
if (actual_class_prob <= class_prob_cutoff)
|
||||
actual_class_prob = T(0);
|
||||
output[idx] = actual_class_prob;
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void region_softmax_class_score(Span<T> output, View<T> input, T class_prob_cutoff, size_type box_size) {
|
||||
for (auto box_no : grid_stride_range(output.size() / box_size)) {
|
||||
const index_type start_of_box = box_no * box_size;
|
||||
const index_type start_idx = start_of_box + 5;
|
||||
const index_type end_idx = start_of_box + box_size;
|
||||
|
||||
auto largest = numeric_limits<T>::lowest();
|
||||
for (int idx = start_idx; idx < end_idx; idx++) {
|
||||
using device::max;
|
||||
largest = max(largest, input[idx]);
|
||||
}
|
||||
|
||||
auto sum = T(0);
|
||||
for (int idx = start_idx; idx < end_idx; idx++) {
|
||||
using device::exp;
|
||||
auto temp = exp(input[idx] - largest);
|
||||
sum += temp;
|
||||
output[idx] = temp;
|
||||
}
|
||||
|
||||
for (int idx = start_idx; idx < end_idx; idx++) {
|
||||
auto softmax_score = output[idx] / sum;
|
||||
|
||||
/* the class probabilities we currently have are conditional class probabilities
|
||||
* given the object
|
||||
*
|
||||
* to obtain the actual class probability, we multiply the conditional probability
|
||||
* with the object probability
|
||||
*/
|
||||
auto objectness_prob = output[start_of_box + 4];
|
||||
auto actual_class_prob = objectness_prob * softmax_score;
|
||||
if (actual_class_prob <= class_prob_cutoff)
|
||||
actual_class_prob = T(0);
|
||||
output[idx] = actual_class_prob;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void region(const Stream& stream, Span<T> output, View<T> input, View<T> bias,
|
||||
T object_prob_cutoff, T class_prob_cutoff,
|
||||
std::size_t boxes_per_cell, std::size_t box_size,
|
||||
std::size_t rows, std::size_t cols, T scale_x_y,
|
||||
std::size_t height_norm, std::size_t width_norm,
|
||||
bool if_true_sigmoid_else_softmax, /* true = sigmoid, false = softmax */
|
||||
bool new_coords)
|
||||
{
|
||||
CV_Assert(output.size() == input.size());
|
||||
CV_Assert(output.size() % box_size == 0);
|
||||
CV_Assert(is_fully_aligned(bias, 2));
|
||||
|
||||
auto box_kernel = raw::region_box<T>;
|
||||
auto box_policy = make_policy(box_kernel, output.size() / box_size, 0, stream);
|
||||
launch_kernel(box_kernel, box_policy,
|
||||
output, input, bias, boxes_per_cell, box_size,
|
||||
rows, cols, scale_x_y, height_norm, width_norm,
|
||||
object_prob_cutoff, new_coords);
|
||||
|
||||
if (if_true_sigmoid_else_softmax) {
|
||||
auto kernel_score = raw::region_sigmoid_class_score<T>;
|
||||
auto policy_score = make_policy(kernel_score, output.size(), 0, stream);
|
||||
launch_kernel(kernel_score, policy_score, output, input, class_prob_cutoff, box_size, new_coords);
|
||||
} else {
|
||||
auto kernel_score = raw::region_softmax_class_score<T>;
|
||||
auto policy_score = make_policy(kernel_score, output.size(), 0, stream);
|
||||
launch_kernel(kernel_score, policy_score, output, input, class_prob_cutoff, box_size);
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void region(const Stream&, Span<__half>, View<__half>, View<__half>,
|
||||
__half, __half, std::size_t, std::size_t, std::size_t, std::size_t, __half, std::size_t, std::size_t, bool, bool);
|
||||
#endif
|
||||
|
||||
template void region(const Stream&, Span<float>, View<float>, View<float>,
|
||||
float, float, std::size_t, std::size_t, std::size_t, std::size_t, float, std::size_t, std::size_t, bool, bool);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
245
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/resize.cu
vendored
Normal file
245
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/resize.cu
vendored
Normal file
@@ -0,0 +1,245 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "math.hpp"
|
||||
#include "types.hpp"
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
#include "memory.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/tensor.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
template <class T, std::size_t CHANNELS_PER_ITER>
|
||||
__global__ void resize_nn(
|
||||
Span<T> output, size_type out_height, size_type out_width,
|
||||
View<T> input, size_type in_height, size_type in_width,
|
||||
float o2i_fy, float o2i_fx, bool round, bool half_pixel_centers)
|
||||
{
|
||||
auto in_image_size = in_height * in_width;
|
||||
auto out_image_size = out_height * out_width;
|
||||
|
||||
/* think of the output and input as a collection of 2d images with the last axis
|
||||
* representing the width and the last but one axis representing the height
|
||||
*
|
||||
* the remaining axis together form a collection of these images/channels
|
||||
*/
|
||||
auto num_effective_channels = output.size() / out_image_size;
|
||||
|
||||
/* we process multiple channels every iteration to reuse the identical computation
|
||||
* involved with the spatial dimensions
|
||||
*
|
||||
* if we are processing `CHANNELS_PER_ITER` channels per iteration, we will need
|
||||
* (num_effective_channels / CHANNELS_PER_ITER) iterations per (x, y) location
|
||||
*/
|
||||
auto num_channel_iters_per_xy = (num_effective_channels / CHANNELS_PER_ITER);
|
||||
|
||||
/* we need `num_channel_iters_per_xy` iterations per (x, y) and there are `out_image_size`
|
||||
* combinations of (x, y); hence, we'll need `num_channel_iters_per_xy * out_image_size`
|
||||
* iterations in total to finish the resize operation
|
||||
*/
|
||||
auto iters_required = num_channel_iters_per_xy * out_image_size;
|
||||
for (auto iter : grid_stride_range(iters_required)) {
|
||||
const index_type c_start = (iter / out_image_size) * CHANNELS_PER_ITER;
|
||||
|
||||
/* note here that consecutive `iter` values will often have consecutive `x` values
|
||||
* => stores into output will be coalesced across threads
|
||||
*/
|
||||
const index_type y = (iter % out_image_size) / out_width;
|
||||
const index_type x = iter % out_width;
|
||||
|
||||
auto in_yf = half_pixel_centers ? (y + 0.5f) * o2i_fy : y * o2i_fy;
|
||||
auto in_xf = half_pixel_centers ? (x + 0.5f) * o2i_fx : x * o2i_fx;
|
||||
|
||||
using device::lround;
|
||||
index_type in_y = round ? lround(in_yf) : static_cast<index_type>(in_yf);
|
||||
index_type in_x = round ? lround(in_xf) : static_cast<index_type>(in_xf);
|
||||
|
||||
using device::min;
|
||||
in_y = min(in_y, in_height - 1);
|
||||
in_x = min(in_x, in_width - 1);
|
||||
|
||||
index_type in_idx = c_start * in_image_size + in_y * in_width + in_x;
|
||||
index_type out_idx = c_start * out_image_size + y * out_width + x;
|
||||
|
||||
for (int i = 0; i < CHANNELS_PER_ITER; i++) {
|
||||
output[out_idx] = load_ldg(input[in_idx]);
|
||||
|
||||
in_idx += in_image_size;
|
||||
out_idx += out_image_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::size_t CHANNELS_PER_ITER>
|
||||
__global__ void resize_bilinear(
|
||||
Span<T> output, size_type out_height, size_type out_width,
|
||||
View<T> input, size_type in_height, size_type in_width,
|
||||
float o2i_fy, float o2i_fx, bool half_pixel_centers)
|
||||
{
|
||||
auto in_image_size = in_height * in_width;
|
||||
auto out_image_size = out_height * out_width;
|
||||
|
||||
/* think of the output and input as a collection of 2d images with the last axis
|
||||
* representing the width and the last but one axis representing the height
|
||||
*
|
||||
* the remaining axis together form a collection of these images/channels
|
||||
*/
|
||||
auto num_effective_channels = output.size() / out_image_size;
|
||||
|
||||
/* we process multiple channels every iteration to reuse the identical computation
|
||||
* involved with the spatial dimensions
|
||||
*
|
||||
* if we are processing `CHANNELS_PER_ITER` channels per iteration, we will need
|
||||
* (num_effective_channels / CHANNELS_PER_ITER) iterations per (x, y) location
|
||||
*/
|
||||
auto num_channel_iters_per_xy = (num_effective_channels / CHANNELS_PER_ITER);
|
||||
|
||||
/* we need `num_channel_iters_per_xy` iterations per (x, y) and there are `out_image_size`
|
||||
* combinations of (x, y); hence, we'll need `num_channel_iters_per_xy * out_image_size`
|
||||
* iterations in total to finish the resize operation
|
||||
*/
|
||||
auto iters_required = num_channel_iters_per_xy * out_image_size;
|
||||
|
||||
for (auto iter : grid_stride_range(iters_required)) {
|
||||
const index_type c_start = (iter / out_image_size) * CHANNELS_PER_ITER;
|
||||
const index_type c_end = c_start + CHANNELS_PER_ITER;
|
||||
|
||||
/* note here that consecutive `iter` values will often have consecutive `x` values
|
||||
* => stores into output will be coalesced across threads
|
||||
*/
|
||||
const index_type y = (iter % out_image_size) / out_width;
|
||||
const index_type x = iter % out_width;
|
||||
|
||||
using device::max;
|
||||
auto in_x = half_pixel_centers ? max<float>((x + 0.5f) * o2i_fx - 0.5f, 0.0f) : x * o2i_fx;
|
||||
auto in_y = half_pixel_centers ? max<float>((y + 0.5f) * o2i_fy - 0.5f, 0.0f) : y * o2i_fy;
|
||||
|
||||
auto in_x0 = static_cast<index_type>(in_x);
|
||||
auto in_y0 = static_cast<index_type>(in_y);
|
||||
|
||||
using device::min;
|
||||
auto in_x1 = min<index_type>(in_x0 + 1, in_width - 1);
|
||||
auto in_y1 = min<index_type>(in_y0 + 1, in_height - 1);
|
||||
|
||||
index_type in_offset_r0 = c_start * in_image_size + in_y0 * in_width;
|
||||
index_type in_offset_r1 = c_start * in_image_size + in_y1 * in_width;
|
||||
index_type out_idx = c_start * out_image_size + y * out_width + x;
|
||||
|
||||
#pragma unroll 1 /* disable unrolling to reduce register pressure; not sure how but it works */
|
||||
for (auto c = c_start; c < c_end; c++) {
|
||||
auto v_00 = load_ldg(input[in_offset_r0 + in_x0]),
|
||||
v_01 = load_ldg(input[in_offset_r0 + in_x1]),
|
||||
v_10 = load_ldg(input[in_offset_r1 + in_x0]),
|
||||
v_11 = load_ldg(input[in_offset_r1 + in_x1]);
|
||||
|
||||
output[out_idx] =
|
||||
v_00 +
|
||||
T(in_y - in_y0) * T(v_10 - v_00) +
|
||||
T(in_x - in_x0) * T(v_01 - v_00) +
|
||||
T(in_y - in_y0) * T(in_x - in_x0) * T(v_11 - v_01 - v_10 + v_00);
|
||||
|
||||
in_offset_r0 += in_image_size;
|
||||
in_offset_r1 += in_image_size;
|
||||
out_idx += out_image_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::size_t CHANNELS_PER_ITER> static
|
||||
void launch_multichannel_resize_nn(const Stream& stream,
|
||||
Span<T> output, size_type out_height, size_type out_width,
|
||||
View<T> input, size_type in_height, size_type in_width,
|
||||
float scale_y, float scale_x, bool round, bool half_pixel_centers)
|
||||
{
|
||||
auto kernel = raw::resize_nn<T, CHANNELS_PER_ITER>;
|
||||
auto policy = make_policy(kernel, output.size() / CHANNELS_PER_ITER, 0, stream);
|
||||
launch_kernel(kernel, policy, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, round, half_pixel_centers);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void resize_nn(const Stream& stream, TensorSpan<T> output, TensorView<T> input, float scale_y, float scale_x, bool round, bool half_pixel_centers) {
|
||||
auto out_height = output.get_axis_size(-2);
|
||||
auto out_width = output.get_axis_size(-1);
|
||||
|
||||
auto in_height = input.get_axis_size(-2);
|
||||
auto in_width = input.get_axis_size(-1);
|
||||
|
||||
auto num_effective_channels = input.size_range(0, 2);
|
||||
auto num_iters = num_effective_channels * out_height * out_width;
|
||||
|
||||
if (num_effective_channels % 32 == 0 && num_iters > 655360) {
|
||||
launch_multichannel_resize_nn<T, 32>(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, round, half_pixel_centers);
|
||||
} else if (num_effective_channels % 16 == 0 && num_iters > 327680) {
|
||||
launch_multichannel_resize_nn<T, 16>(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, round, half_pixel_centers);
|
||||
} else if (num_effective_channels % 8 == 0 && num_iters > 163840) {
|
||||
launch_multichannel_resize_nn<T, 8>(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, round, half_pixel_centers);
|
||||
} else if (num_effective_channels % 4 == 0 && num_iters > 81920) {
|
||||
launch_multichannel_resize_nn<T, 4>(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, round, half_pixel_centers);
|
||||
} else if (num_effective_channels % 2 == 0) {
|
||||
launch_multichannel_resize_nn<T, 2>(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, round, half_pixel_centers);
|
||||
} else {
|
||||
launch_multichannel_resize_nn<T, 1>(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, round, half_pixel_centers);
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void resize_nn<__half>(const Stream&, TensorSpan<__half>, TensorView<__half>, float, float, bool, bool);
|
||||
#endif
|
||||
template void resize_nn<float>(const Stream&, TensorSpan<float>, TensorView<float>, float, float, bool,bool);
|
||||
|
||||
template <class T, std::size_t CHANNELS_PER_ITER> static
|
||||
void launch_multichannel_resize_bilinear(const Stream& stream,
|
||||
Span<T> output, size_type out_height, size_type out_width,
|
||||
View<T> input, size_type in_height, size_type in_width,
|
||||
float scale_y, float scale_x, bool half_pixel_centers)
|
||||
{
|
||||
auto kernel = raw::resize_bilinear<T, CHANNELS_PER_ITER>;
|
||||
auto policy = make_policy(kernel, output.size() / CHANNELS_PER_ITER, 0, stream);
|
||||
launch_kernel(kernel, policy, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, half_pixel_centers);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void resize_bilinear(const Stream& stream, TensorSpan<T> output, TensorView<T> input, float scale_y, float scale_x, bool half_pixel_centers) {
|
||||
auto out_height = output.get_axis_size(-2);
|
||||
auto out_width = output.get_axis_size(-1);
|
||||
|
||||
auto in_height = input.get_axis_size(-2);
|
||||
auto in_width = input.get_axis_size(-1);
|
||||
|
||||
auto num_effective_channels = input.size_range(0, 2);
|
||||
auto num_iters = num_effective_channels * out_height * out_width;
|
||||
|
||||
if (num_effective_channels % 16 == 0 && num_iters > 163840) {
|
||||
launch_multichannel_resize_bilinear<T, 16>(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, half_pixel_centers);
|
||||
} else if (num_effective_channels % 8 == 0 && num_iters > 81920) {
|
||||
launch_multichannel_resize_bilinear<T, 8>(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, half_pixel_centers);
|
||||
} else if (num_effective_channels % 4 == 0 && num_iters > 40960) {
|
||||
launch_multichannel_resize_bilinear<T, 4>(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, half_pixel_centers);
|
||||
} else if (num_effective_channels % 2 == 0) {
|
||||
launch_multichannel_resize_bilinear<T, 2>(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, half_pixel_centers);
|
||||
} else {
|
||||
launch_multichannel_resize_bilinear<T, 1>(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, half_pixel_centers);
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void resize_bilinear<__half>(const Stream&, TensorSpan<__half>, TensorView<__half>, float, float, bool);
|
||||
#endif
|
||||
template void resize_bilinear<float>(const Stream&, TensorSpan<float>, TensorView<float>, float, float, bool);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
181
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/roi_pooling.cu
vendored
Normal file
181
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/roi_pooling.cu
vendored
Normal file
@@ -0,0 +1,181 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "math.hpp"
|
||||
#include "limits.hpp"
|
||||
#include "types.hpp"
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
#include "memory.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/tensor.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
|
||||
template <class T, std::size_t CHANNELS_PER_ITER>
|
||||
__global__ void roi_pooling(
|
||||
Span<T> output, size_type pooled_height, size_type pooled_width,
|
||||
View<T> input, size_type in_height, size_type in_width,
|
||||
View<T> rois, size_type num_channels, float spatial_scale)
|
||||
{
|
||||
// input: [1, num_channels, in_height, in_width]
|
||||
const auto in_image_size = in_height * in_width;
|
||||
|
||||
// rois: [num_rois, 5]
|
||||
auto num_rois = rois.size() / 5;
|
||||
|
||||
// output: [num_rois, num_channels, pooled_height, pooled_width]
|
||||
const auto out_spatial_size = pooled_height * pooled_width;
|
||||
const auto out_roi_size = num_channels * out_spatial_size;
|
||||
|
||||
/* we have to compute the output value for every combination of (roi, c, y, x) in the output
|
||||
*
|
||||
* the computation involving (y, x) are identical for all non-spatial dimensions
|
||||
* the computation and memory requests involving the roi are identical for remaining three axes
|
||||
*
|
||||
* we process multiple channels every iteration to reuse the identical computation
|
||||
* and memory requests involved with the roi and spatial dimensions
|
||||
*/
|
||||
/*
|
||||
* if we are processing `CHANNELS_PER_ITER` channels per iteration, we will need
|
||||
* (num_channels / CHANNELS_PER_ITER) iterations per (roi, x, y)
|
||||
*/
|
||||
auto num_channel_iters_per_roi_xy = num_channels / CHANNELS_PER_ITER;
|
||||
|
||||
/* we need `num_channel_iters_per_roi_xy` iterations per (roi, x, y) and there are
|
||||
* `num_rois` rois and `out_spatial_size` combinations of (x, y)
|
||||
*/
|
||||
auto iters_per_roi = num_channel_iters_per_roi_xy * out_spatial_size;
|
||||
auto iters_required = num_rois * iters_per_roi;
|
||||
|
||||
for (auto iter : grid_stride_range(iters_required))
|
||||
{
|
||||
const index_type roi_no = iter / iters_per_roi;
|
||||
const index_type c_start = ((iter % iters_per_roi) / out_spatial_size) * CHANNELS_PER_ITER;
|
||||
|
||||
/* note here that consecutive `iter` values will often have consecutive `x` values
|
||||
* => stores into output will be coalesced across threads
|
||||
*/
|
||||
const index_type y = (iter % out_spatial_size) / pooled_width;
|
||||
const index_type x = iter % pooled_width;
|
||||
|
||||
const index_type roi_offset = roi_no * 5;
|
||||
|
||||
using device::round;
|
||||
const index_type batch_id = rois[roi_offset + 0];
|
||||
const index_type x_start_roi = round(static_cast<float>(rois[roi_offset + 1]) * spatial_scale);
|
||||
const index_type y_start_roi = round(static_cast<float>(rois[roi_offset + 2]) * spatial_scale);
|
||||
const index_type x_end_roi = round(static_cast<float>(rois[roi_offset + 3]) * spatial_scale);
|
||||
const index_type y_end_roi = round(static_cast<float>(rois[roi_offset + 4]) * spatial_scale);
|
||||
|
||||
using device::max;
|
||||
const auto roi_width = max<index_type>(x_end_roi - x_start_roi + 1, 1);
|
||||
const auto roi_height = max<index_type>(y_end_roi - y_start_roi + 1, 1);
|
||||
|
||||
const auto roi_width_ratio = static_cast<float>(roi_width) / pooled_width;
|
||||
const auto roi_height_ratio = static_cast<float>(roi_height) / pooled_height;
|
||||
|
||||
auto x_start = x_start_roi + static_cast<index_type>(x * roi_width_ratio);
|
||||
auto y_start = y_start_roi + static_cast<index_type>(y * roi_height_ratio);
|
||||
|
||||
using device::ceil;
|
||||
auto x_end = x_start_roi + static_cast<index_type>(ceil((x + 1) * roi_width_ratio));
|
||||
auto y_end = y_start_roi + static_cast<index_type>(ceil((y + 1) * roi_height_ratio));
|
||||
|
||||
using device::max;
|
||||
x_start = max<index_type>(x_start, 0);
|
||||
y_start = max<index_type>(y_start, 0);
|
||||
|
||||
using device::min;
|
||||
x_end = min<index_type>(x_end, in_width);
|
||||
y_end = min<index_type>(y_end, in_height);
|
||||
|
||||
index_type in_offset = (batch_id * num_channels + c_start) * in_height * in_width;
|
||||
index_type out_idx = roi_no * out_roi_size + c_start * out_spatial_size + y * pooled_width + x;
|
||||
|
||||
for (int i = 0; i < CHANNELS_PER_ITER; i++)
|
||||
{
|
||||
/* We have to set the output to zero if (x_start >= x_end) or (y_start >= y_end). If either
|
||||
* condition is true, the loops below won't execute even a single iteration. Hence, by setting
|
||||
* `max_val` to zero in this case, we can combine it with the `else` code.
|
||||
*/
|
||||
T max_val = (x_start >= x_end || y_start >= y_end) ? T(0) : device::numeric_limits<T>::lowest();
|
||||
|
||||
for (auto iy = y_start; iy < y_end; iy++)
|
||||
{
|
||||
const auto in_idx = in_offset + iy * in_width;
|
||||
for (auto ix = x_start; ix < x_end; ix++)
|
||||
{
|
||||
max_val = max(max_val, load_ldg(input[in_idx + ix]));
|
||||
}
|
||||
}
|
||||
|
||||
output[out_idx] = max_val;
|
||||
|
||||
in_offset += in_image_size;
|
||||
out_idx += out_spatial_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::size_t CHANNELS_PER_ITER> static
|
||||
void launch_multichannel_roi_pooling(const Stream& stream,
|
||||
Span<T> output, size_type pooled_height, size_type pooled_width,
|
||||
View<T> input, size_type in_height, size_type in_width,
|
||||
View<T> rois, size_type num_channels, float spatial_scale)
|
||||
{
|
||||
auto kernel = raw::roi_pooling<T, CHANNELS_PER_ITER>;
|
||||
auto policy = make_policy(kernel, output.size() / CHANNELS_PER_ITER, 0, stream);
|
||||
launch_kernel(kernel, policy, output, pooled_height, pooled_width, input, in_height, in_width, rois, num_channels, spatial_scale);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void roi_pooling(const Stream& stream, TensorSpan<T> output, TensorView<T> input, View<T> rois, float spatial_scale)
|
||||
{
|
||||
CV_Assert(input.get_axis_size(1) == output.get_axis_size(1));
|
||||
|
||||
size_type num_channels = output.get_axis_size(1);
|
||||
|
||||
size_type pooled_height = output.get_axis_size(2);
|
||||
size_type pooled_width = output.get_axis_size(3);
|
||||
|
||||
size_type in_height = input.get_axis_size(2);
|
||||
size_type in_width = input.get_axis_size(3);
|
||||
|
||||
if (num_channels % 64 == 0) {
|
||||
launch_multichannel_roi_pooling<T, 64>(stream, output, pooled_height, pooled_width, input, in_height, in_width, rois, num_channels, spatial_scale);
|
||||
} else if (num_channels % 32 == 0) {
|
||||
launch_multichannel_roi_pooling<T, 32>(stream, output, pooled_height, pooled_width, input, in_height, in_width, rois, num_channels, spatial_scale);
|
||||
} else if (num_channels % 16 == 0) {
|
||||
launch_multichannel_roi_pooling<T, 16>(stream, output, pooled_height, pooled_width, input, in_height, in_width, rois, num_channels, spatial_scale);
|
||||
} else if (num_channels % 8 == 0) {
|
||||
launch_multichannel_roi_pooling<T, 8>(stream, output, pooled_height, pooled_width, input, in_height, in_width, rois, num_channels, spatial_scale);
|
||||
} else if (num_channels % 4 == 0) {
|
||||
launch_multichannel_roi_pooling<T, 4>(stream, output, pooled_height, pooled_width, input, in_height, in_width, rois, num_channels, spatial_scale);
|
||||
} else if (num_channels % 2 == 0) {
|
||||
launch_multichannel_roi_pooling<T, 2>(stream, output, pooled_height, pooled_width, input, in_height, in_width, rois, num_channels, spatial_scale);
|
||||
} else {
|
||||
launch_multichannel_roi_pooling<T, 1>(stream, output, pooled_height, pooled_width, input, in_height, in_width, rois, num_channels, spatial_scale);
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void roi_pooling(const Stream& stream, TensorSpan<__half> output, TensorView<__half> input, View<__half> rois, float spatial_scale);
|
||||
#endif
|
||||
template void roi_pooling(const Stream& stream, TensorSpan<float> output, TensorView<float> input, View<float> rois, float spatial_scale);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
235
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/scale_shift.cu
vendored
Normal file
235
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/scale_shift.cu
vendored
Normal file
@@ -0,0 +1,235 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "types.hpp"
|
||||
#include "vector_traits.hpp"
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/tensor.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
template <class T, std::size_t N>
|
||||
__global__ void biasN_vec(Span<T> output, View<T> input, size_type inner_size, View<T> bias) {
|
||||
using vector_type = get_vector_type_t<T, N>;
|
||||
|
||||
auto output_vPtr = vector_type::get_pointer(output.data());
|
||||
auto input_vPtr = vector_type::get_pointer(input.data());
|
||||
|
||||
inner_size /= vector_type::size();
|
||||
for (auto i : grid_stride_range(output.size() / vector_type::size())) {
|
||||
const index_type bias_idx = (i / inner_size) % bias.size();
|
||||
|
||||
vector_type vec;
|
||||
v_load(vec, input_vPtr[i]);
|
||||
for(int j = 0; j < vec.size(); j++)
|
||||
vec.data[j] = vec.data[j] + bias[bias_idx];
|
||||
v_store(output_vPtr[i], vec);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::size_t N>
|
||||
__global__ void scaleN_vec(Span<T> output, View<T> input, size_type inner_size, View<T> weights)
|
||||
{
|
||||
using vector_type = get_vector_type_t<T, N>;
|
||||
|
||||
auto output_vPtr = vector_type::get_pointer(output.data());
|
||||
auto input_vPtr = vector_type::get_pointer(input.data());
|
||||
|
||||
inner_size /= vector_type::size();
|
||||
for (auto i : grid_stride_range(output.size() / vector_type::size())) {
|
||||
const index_type scale_idx = (i / inner_size) % weights.size();
|
||||
|
||||
vector_type vec;
|
||||
v_load(vec, input_vPtr[i]);
|
||||
for (int j = 0; j < vec.size(); j++)
|
||||
vec.data[j] = vec.data[j] * weights[scale_idx];
|
||||
v_store(output_vPtr[i], vec);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::size_t N>
|
||||
__global__ void scale1_with_bias1_vec(Span<T> output, View<T> input, T alpha, T beta)
|
||||
{
|
||||
using vector_type = get_vector_type_t<T, N>;
|
||||
|
||||
auto output_vPtr = vector_type::get_pointer(output.data());
|
||||
auto input_vPtr = vector_type::get_pointer(input.data());
|
||||
|
||||
for (auto i : grid_stride_range(output.size() / vector_type::size())) {
|
||||
vector_type vec;
|
||||
v_load(vec, input_vPtr[i]);
|
||||
for (int j = 0; j < vec.size(); j++)
|
||||
vec.data[j] = alpha * vec.data[j] + beta;
|
||||
v_store(output_vPtr[i], vec);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::size_t N>
|
||||
__global__ void scaleN_with_biasN_vec(Span<T> output, View<T> input, size_type inner_size, View<T> weights, View<T> bias)
|
||||
{
|
||||
using vector_type = get_vector_type_t<T, N>;
|
||||
|
||||
auto output_vPtr = vector_type::get_pointer(output.data());
|
||||
auto input_vPtr = vector_type::get_pointer(input.data());
|
||||
|
||||
inner_size /= vector_type::size();
|
||||
for (auto i : grid_stride_range(output.size() / vector_type::size())) {
|
||||
const index_type scale_idx = (i / inner_size) % weights.size();
|
||||
|
||||
vector_type vec;
|
||||
v_load(vec, input_vPtr[i]);
|
||||
for (int j = 0; j < vec.size(); j++)
|
||||
vec.data[j] = vec.data[j] * weights[scale_idx] + bias[scale_idx];
|
||||
v_store(output_vPtr[i], vec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::size_t N> static
|
||||
void launch_biasN_vec_kernel(const Stream& stream, Span<T> output, View<T> input, std::size_t inner_size, View<T> bias){
|
||||
CV_Assert(is_fully_aligned<T>(output, N));
|
||||
CV_Assert(is_fully_aligned<T>(input, N));
|
||||
CV_Assert(inner_size % N == 0);
|
||||
|
||||
auto kernel = raw::biasN_vec<T, N>;
|
||||
auto policy = make_policy(kernel, output.size() / N, 0, stream);
|
||||
launch_kernel(kernel, policy, output, input, inner_size, bias);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void biasN(
|
||||
const Stream& stream,
|
||||
TensorSpan<T> output,
|
||||
TensorView<T> input, std::size_t inner_size,
|
||||
TensorView<T> bias)
|
||||
{
|
||||
CV_Assert(is_shape_same(input, output));
|
||||
|
||||
if (is_fully_aligned<T>(output, 4) && is_fully_aligned<T>(input, 4) && inner_size % 4 == 0) {
|
||||
launch_biasN_vec_kernel<T, 4>(stream, output, input, inner_size, bias);
|
||||
} else if (is_fully_aligned<T>(output, 2) && is_fully_aligned<T>(input, 2) && inner_size % 2 == 0) {
|
||||
launch_biasN_vec_kernel<T, 2>(stream, output, input, inner_size, bias);
|
||||
} else {
|
||||
launch_biasN_vec_kernel<T, 1>(stream, output, input, inner_size, bias);
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void biasN<__half>(const Stream&, TensorSpan<__half>, TensorView<__half>, std::size_t, TensorView<__half>);
|
||||
#endif
|
||||
template void biasN<float>(const Stream&, TensorSpan<float>, TensorView<float>, std::size_t, TensorView<float>);
|
||||
|
||||
template <class T, std::size_t N> static
|
||||
void launch_scaleN_vec_kernel(const Stream& stream, Span<T> output, View<T> input, std::size_t inner_size, View<T> weights) {
|
||||
CV_Assert(is_fully_aligned<T>(output, N));
|
||||
CV_Assert(is_fully_aligned<T>(input, N));
|
||||
CV_Assert(inner_size % N == 0);
|
||||
|
||||
auto kernel = raw::scaleN_vec<T, N>;
|
||||
auto policy = make_policy(kernel, output.size() / N, 0, stream);
|
||||
launch_kernel(kernel, policy, output, input, inner_size, weights);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void scaleN(
|
||||
const Stream& stream,
|
||||
TensorSpan<T> output,
|
||||
TensorView<T> input, std::size_t inner_size,
|
||||
TensorView<T> weights)
|
||||
{
|
||||
CV_Assert(is_shape_same(input, output));
|
||||
|
||||
if (is_fully_aligned<T>(output, 4) && is_fully_aligned<T>(input, 4) && inner_size % 4 == 0) {
|
||||
launch_scaleN_vec_kernel<T, 4>(stream, output, input, inner_size, weights);
|
||||
} else if (is_fully_aligned<T>(output, 2) && is_fully_aligned<T>(input, 2) && inner_size % 2 == 0) {
|
||||
launch_scaleN_vec_kernel<T, 2>(stream, output, input, inner_size, weights);
|
||||
} else {
|
||||
launch_scaleN_vec_kernel<T, 1>(stream, output, input, inner_size, weights);
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void scaleN<__half>(const Stream&, TensorSpan<__half>, TensorView<__half>, std::size_t, TensorView<__half>);
|
||||
#endif
|
||||
template void scaleN<float>(const Stream&, TensorSpan<float>, TensorView<float>, std::size_t, TensorView<float>);
|
||||
|
||||
template <class T, std::size_t N> static
|
||||
void launch_scale1_with_bias1_vec_kernel(const Stream& stream, Span<T> output, View<T> input, T alpha, T beta) {
|
||||
CV_Assert(is_fully_aligned<T>(output, N));
|
||||
CV_Assert(is_fully_aligned<T>(input, N));
|
||||
|
||||
auto kernel = raw::scale1_with_bias1_vec<T, N>;
|
||||
auto policy = make_policy(kernel, output.size() / N, 0, stream);
|
||||
launch_kernel(kernel, policy, output, input, alpha, beta);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void scale1_with_bias1(const Stream& stream, Span<T> output, View<T> input, T alpha, T beta) {
|
||||
CV_Assert(output.size() == input.size());
|
||||
|
||||
if (is_fully_aligned<T>(output, 4) && is_fully_aligned<T>(input, 4)) {
|
||||
launch_scale1_with_bias1_vec_kernel<T, 4>(stream, output, input, alpha, beta);
|
||||
} else if (is_fully_aligned<T>(output, 2) && is_fully_aligned<T>(input, 2)) {
|
||||
launch_scale1_with_bias1_vec_kernel<T, 2>(stream, output, input, alpha, beta);
|
||||
} else {
|
||||
launch_scale1_with_bias1_vec_kernel<T, 1>(stream, output, input, alpha, beta);
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void scale1_with_bias1<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
|
||||
#endif
|
||||
template void scale1_with_bias1<float>(const Stream&, Span<float>, View<float>, float, float);
|
||||
|
||||
template <class T, std::size_t N> static
|
||||
void launch_scaleN_with_biasN_vec_kernel(const Stream& stream, Span<T> output, View<T> input, std::size_t inner_size, View<T> weights, View<T> bias) {
|
||||
CV_Assert(is_fully_aligned<T>(output, N));
|
||||
CV_Assert(is_fully_aligned<T>(input, N));
|
||||
CV_Assert(inner_size % N == 0);
|
||||
|
||||
auto kernel = raw::scaleN_with_biasN_vec<T, N>;
|
||||
auto policy = make_policy(kernel, output.size() / N, 0, stream);
|
||||
launch_kernel(kernel, policy, output, input, inner_size, weights, bias);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void scaleN_with_biasN(
|
||||
const Stream& stream,
|
||||
TensorSpan<T> output,
|
||||
TensorView<T> input, std::size_t inner_size,
|
||||
TensorView<T> weights, TensorView<T> bias)
|
||||
{
|
||||
CV_Assert(is_shape_same(input, output));
|
||||
CV_Assert(weights.size() == bias.size());
|
||||
|
||||
if (is_fully_aligned<T>(output, 4) && is_fully_aligned<T>(input, 4) && inner_size % 4 == 0) {
|
||||
launch_scaleN_with_biasN_vec_kernel<T, 4>(stream, output, input, inner_size, weights, bias);
|
||||
} else if (is_fully_aligned<T>(output, 2) && is_fully_aligned<T>(input, 2) && inner_size % 2 == 0) {
|
||||
launch_scaleN_with_biasN_vec_kernel<T, 2>(stream, output, input, inner_size, weights, bias);
|
||||
} else {
|
||||
launch_scaleN_with_biasN_vec_kernel<T, 1>(stream, output, input, inner_size, weights, bias);
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void scaleN_with_biasN<__half>(const Stream&, TensorSpan<__half>, TensorView<__half>, std::size_t, TensorView<__half>, TensorView<__half>);
|
||||
#endif
|
||||
template void scaleN_with_biasN<float>(const Stream&, TensorSpan<float>, TensorView<float>, std::size_t, TensorView<float>, TensorView<float>);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
111
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/shortcut.cu
vendored
Normal file
111
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/shortcut.cu
vendored
Normal file
@@ -0,0 +1,111 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
#include "vector_traits.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
#include "../cuda4dnn/csl/tensor.hpp"
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
template <class T, std::size_t N>
|
||||
__global__ void input_shortcut_vec(
|
||||
Span<T> output,
|
||||
View<T> input, index_type c_input, /* `c_input` = number of channels in `input` */
|
||||
View<T> from, index_type c_from, /* `c_from` = number of channels in `from` */
|
||||
size_type channel_stride /* common for both `input` and `from` */)
|
||||
{
|
||||
using vector_type = get_vector_type_t<T, N>;
|
||||
|
||||
auto output_vPtr = vector_type::get_pointer(output.data());
|
||||
auto input_vPtr = vector_type::get_pointer(input.data());
|
||||
auto from_vPtr = vector_type::get_pointer(from.data());
|
||||
|
||||
auto batch_stride_input = c_input * channel_stride;
|
||||
auto batch_stride_from = c_from * channel_stride;
|
||||
|
||||
for (auto i : grid_stride_range(output.size() / vector_type::size())) {
|
||||
const auto actual_idx = i * vector_type::size();
|
||||
const auto b = actual_idx / batch_stride_input; /* `input` and `output` have the same shape */
|
||||
const auto c = (actual_idx % batch_stride_input) / channel_stride;
|
||||
const auto c_offset = actual_idx % channel_stride;
|
||||
|
||||
vector_type vec_input;
|
||||
v_load(vec_input, input_vPtr[i]);
|
||||
|
||||
/* We can break down the shortcut operation into two steps:
|
||||
* - copy `input` to `output`
|
||||
* - add `from` to corresponding channels in `output`
|
||||
*
|
||||
* In this scheme, only some channels in the `output` differ from `input`. They differ in the channels
|
||||
* which have a corresponding channel in `from`.
|
||||
*/
|
||||
if (c < c_from) {
|
||||
const auto from_actual_idx = b * batch_stride_from + c * channel_stride + c_offset;
|
||||
const auto from_vec_idx = from_actual_idx / vector_type::size();
|
||||
|
||||
vector_type vec_from;
|
||||
v_load(vec_from, from_vPtr[from_vec_idx]);
|
||||
for (int j = 0; j < vector_type::size(); j++)
|
||||
vec_input.data[j] += vec_from.data[j];
|
||||
}
|
||||
|
||||
v_store(output_vPtr[i], vec_input);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::size_t N>
|
||||
void launch_vectorized_input_shortcut(const Stream& stream, Span<T> output, View<T> input, std::size_t c_input, View<T> from, std::size_t c_from, std::size_t channel_stride) {
|
||||
CV_Assert(is_fully_aligned<T>(output, N));
|
||||
CV_Assert(is_fully_aligned<T>(input, N));
|
||||
CV_Assert(is_fully_aligned<T>(from, N));
|
||||
CV_Assert(channel_stride % N == 0);
|
||||
|
||||
auto kernel = raw::input_shortcut_vec<T, N>;
|
||||
auto policy = make_policy(kernel, output.size() / N, 0, stream);
|
||||
launch_kernel(kernel, policy, output, input, c_input, from, c_from, channel_stride);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void input_shortcut(const csl::Stream& stream, csl::TensorSpan<T> output, csl::TensorView<T> input, csl::TensorView<T> from) {
|
||||
CV_Assert(is_shape_same(output, input));
|
||||
CV_Assert(output.rank() == from.rank());
|
||||
for (int i = 0; i < output.rank(); i++) {
|
||||
if (i != 1) {
|
||||
CV_Assert(from.get_axis_size(i) == output.get_axis_size(i));
|
||||
}
|
||||
}
|
||||
|
||||
auto channel_stride = output.size_range(2, output.rank()); /* same for `output`, `input` and `from` */
|
||||
auto c_input = input.get_axis_size(1);
|
||||
auto c_from = from.get_axis_size(1);
|
||||
|
||||
if (is_fully_aligned<T>(output, 4) && is_fully_aligned<T>(input, 4) && is_fully_aligned<T>(from, 4) && channel_stride % 4 == 0) {
|
||||
launch_vectorized_input_shortcut<T, 4>(stream, output, input, c_input, from, c_from, channel_stride);
|
||||
} else if (is_fully_aligned<T>(output, 2) && is_fully_aligned<T>(input, 2) && is_fully_aligned<T>(from, 2) && channel_stride % 2 == 0) {
|
||||
launch_vectorized_input_shortcut<T, 2>(stream, output, input, c_input, from, c_from, channel_stride);
|
||||
} else {
|
||||
launch_vectorized_input_shortcut<T, 1>(stream, output, input, c_input, from, c_from, channel_stride);
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void input_shortcut(const Stream&, TensorSpan<__half>, TensorView<__half>, TensorView<__half>);
|
||||
#endif
|
||||
template void input_shortcut(const Stream&, TensorSpan<float>, TensorView<float>, TensorView<float>);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
203
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/slice.cu
vendored
Normal file
203
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/slice.cu
vendored
Normal file
@@ -0,0 +1,203 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "array.hpp"
|
||||
#include "types.hpp"
|
||||
#include "grid_stride_range.hpp"
|
||||
#include "execution.hpp"
|
||||
#include "kernel_dispatcher.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/stream.hpp"
|
||||
#include "../cuda4dnn/csl/tensor.hpp"
|
||||
#include "../cuda4dnn/csl/span.hpp"
|
||||
|
||||
#include "../cuda4dnn/kernels/fill_copy.hpp"
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
|
||||
using namespace cv::dnn::cuda4dnn::csl;
|
||||
using namespace cv::dnn::cuda4dnn::csl::device;
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
|
||||
|
||||
namespace raw {
|
||||
template <class T, std::size_t Rank>
|
||||
__global__ void slice(
|
||||
Span<T> output, array<size_type, Rank> out_strides,
|
||||
View<T> input, array<size_type, Rank> in_strides, array<index_type, Rank> in_offset)
|
||||
{
|
||||
for (auto i : grid_stride_range(output.size())) {
|
||||
index_type out_index = i / out_strides[0];
|
||||
index_type in_index = in_offset[0] + out_index;
|
||||
index_type iidx = in_index * in_strides[0];
|
||||
for (int j = 1; j < Rank; j++) {
|
||||
out_index = (i % out_strides[j - 1]) / out_strides[j];
|
||||
in_index = in_offset[j] + out_index;
|
||||
iidx += in_index * in_strides[j];
|
||||
}
|
||||
|
||||
output[i] = input[iidx];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, std::size_t Rank> static
|
||||
void launch_slice(
|
||||
const Stream& stream,
|
||||
Span<T> output, const std::vector<std::size_t>& outStride,
|
||||
View<T> input, const std::vector<std::size_t>& inStride, const std::vector<std::size_t>& inOffset)
|
||||
{
|
||||
CV_Assert(outStride.size() == Rank);
|
||||
CV_Assert(inStride.size() == Rank);
|
||||
CV_Assert(inOffset.size() == Rank);
|
||||
|
||||
array<size_type, Rank> outStride_k, inStride_k;
|
||||
outStride_k.assign(std::begin(outStride), std::end(outStride));
|
||||
inStride_k.assign(std::begin(inStride), std::end(inStride));
|
||||
|
||||
array<index_type, Rank> inOffset_k;
|
||||
inOffset_k.assign(std::begin(inOffset), std::end(inOffset));
|
||||
|
||||
auto kernel = raw::slice<T, Rank>;
|
||||
auto policy = make_policy(kernel, output.size(), 0, stream);
|
||||
launch_kernel(kernel, policy, output, outStride_k, input, inStride_k, inOffset_k);
|
||||
}
|
||||
|
||||
GENERATE_KERNEL_DISPATCHER(slice_dispatcher, launch_slice);
|
||||
|
||||
template <class T>
|
||||
void slice(const Stream& stream,
|
||||
TensorSpan<T> output, TensorView<T> input,
|
||||
std::vector<std::size_t> offsets)
|
||||
{
|
||||
CV_Assert(output.rank() == input.rank());
|
||||
CV_Assert(output.rank() == offsets.size());
|
||||
|
||||
/* copy directly if no slicing is required */
|
||||
if (is_shape_same(output, input))
|
||||
{
|
||||
CV_Assert(std::all_of(std::begin(offsets), std::end(offsets), [] (std::size_t x) { return x == 0; }));
|
||||
kernels::copy<T>(stream, output, input);
|
||||
return;
|
||||
}
|
||||
|
||||
/* squeezable axes at the beginning of both tensors can be eliminated
|
||||
*
|
||||
* Reasoning:
|
||||
* ----------
|
||||
* Suppose an item's indices in the output tensor is [o1, o2, ...]. The indices in the input
|
||||
* tensor will be [o1 + off1, o2 + off2, ...]. The rest of the elements in the input are ignored.
|
||||
*
|
||||
* If the size of the first axis of the input and output tensor is unity, the input and output indices
|
||||
* for all the elements will be of the form be [0, o2 + off2, ...] and [0, o2, ...] respectively. Note that
|
||||
* there cannot be any ignored items since the axes have unit size. The first index does not contribute to the
|
||||
* element's address calculation and hence does nothing apart from eating up few cycles.
|
||||
*/
|
||||
while (input.get_axis_size(0) == 1 && output.get_axis_size(0) == 1) {
|
||||
CV_Assert(offsets[0] == 0);
|
||||
|
||||
input.squeeze(0);
|
||||
output.squeeze(0);
|
||||
offsets.erase(std::begin(offsets));
|
||||
|
||||
CV_Assert(output.rank() == input.rank());
|
||||
CV_Assert(output.rank() == offsets.size());
|
||||
}
|
||||
|
||||
auto inShape = input.shape_as_vector();
|
||||
auto outShape = output.shape_as_vector();
|
||||
|
||||
/* contiguous axes which do not undergo slicing can be combined into one axis
|
||||
*
|
||||
* Reasoning:
|
||||
* ----------
|
||||
* Suppose an item's indices in the output tensor is [o1, o2, o3, ...]. Let the first two axes not undergo any
|
||||
* slicing. The indices in the input tensor will be [o1, o2, o3 + off3, ...].
|
||||
*
|
||||
* Each axis in the contiguous unsliced axes sequence will add an offset of iN * strideN. In the above example,
|
||||
* the two axes add a total offset of `o1 * stride1 + o2 * stride2`. We can merge the two axes into one axis with
|
||||
* a size of `size1 * size2`. The new offset added will be o12 * stride2` as the kernel iterates through `o12`.
|
||||
* Note that `o12` is actually `(o1 * size2 + o2)` in the original tensor.
|
||||
*/
|
||||
for (int i = 0; i < inShape.size(); i++) {
|
||||
/* check if axis `i` requires any slicing */
|
||||
if (offsets[i] == 0 && inShape[i] == outShape[i]) {
|
||||
/* loop invariant: `i` is the first axis in the contiguous unsliced axis sequence */
|
||||
|
||||
int j = i + 1; /* `j` is the axis which we will attempt to merge */
|
||||
while (j < inShape.size() && offsets[j] == 0 && inShape[j] == outShape[j]) {
|
||||
/* `j` axis is also unsliced; merge `i` and `j` */
|
||||
auto new_size = inShape[i] * inShape[j];
|
||||
inShape[i] = new_size;
|
||||
outShape[i] = new_size;
|
||||
offsets[i] = 0; /* redundant */
|
||||
|
||||
/* delete axis `j` */
|
||||
inShape.erase(std::begin(inShape) + j);
|
||||
outShape.erase(std::begin(outShape) + j);
|
||||
offsets.erase(std::begin(offsets) + j);
|
||||
|
||||
/* optimizations should not break the invariants */
|
||||
CV_Assert(inShape.size() == outShape.size());
|
||||
CV_Assert(inShape.size() == offsets.size());
|
||||
CV_Assert(inShape[i] == outShape[i]);
|
||||
CV_Assert(offsets[i] == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto rank = inShape.size();
|
||||
|
||||
/* We can do a copy if the reduced rank is two and only the first axis is sliced.
|
||||
* The general requirement is that only one axis is sliced and all the axes that
|
||||
* preceed the sliced axis are singleton. However, the reductions above will remove
|
||||
* all the leading singleton axes and merge the trailing unsliced axes into one, or
|
||||
* zero if there are no trailing unsliced axes. The latter is handled separately.
|
||||
*/
|
||||
if (rank == 2 && offsets[0] != 0 && offsets[1] == 0)
|
||||
{
|
||||
auto stride = inShape[1];
|
||||
auto sliced_input = View<T>(input.get() + offsets[0] * stride, output.size());
|
||||
kernels::copy<T>(stream, output, sliced_input);
|
||||
return;
|
||||
}
|
||||
|
||||
if (rank == 1)
|
||||
{
|
||||
auto sliced_input = View<T>(input.get() + offsets[0], output.size());
|
||||
kernels::copy<T>(stream, output, sliced_input);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<std::size_t> inStride(rank), outStride(rank);
|
||||
inStride.back() = 1;
|
||||
outStride.back() = 1;
|
||||
/* garbage, ..., garbage, 1 */
|
||||
|
||||
std::copy(std::begin(inShape) + 1, std::end(inShape), std::begin(inStride));
|
||||
std::copy(std::begin(outShape) + 1, std::end(outShape), std::begin(outStride));
|
||||
/* dim[0], dim[1], ..., dim[-1], 1 */
|
||||
|
||||
std::partial_sum(inStride.rbegin(), inStride.rend(), inStride.rbegin(), std::multiplies<std::size_t>());
|
||||
std::partial_sum(outStride.rbegin(), outStride.rend(), outStride.rbegin(), std::multiplies<std::size_t>());
|
||||
/* stride[0], stride[1], ..., stride[-2], 1 */
|
||||
|
||||
CV_Assert(1 <= rank && rank <= CSL_MAX_TENSOR_RANK);
|
||||
slice_dispatcher<T, 1, CSL_MAX_TENSOR_RANK>(rank, stream, output, outStride, input, inStride, offsets);
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530)
|
||||
template void slice(const Stream&, TensorSpan<__half>, TensorView<__half>, std::vector<std::size_t>);
|
||||
#endif
|
||||
template void slice(const Stream&, TensorSpan<float>, TensorView<float>, std::vector<std::size_t>);
|
||||
|
||||
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
|
||||
27
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/types.hpp
vendored
Normal file
27
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/types.hpp
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_DNN_SRC_CUDA_TYPES_HPP
|
||||
#define OPENCV_DNN_SRC_CUDA_TYPES_HPP
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace device {
|
||||
|
||||
/* For indices, we can use 32bit variables or 64bit variables. The GPU registers are 32 bits in size.
|
||||
* Hence, a 64bit variable requires two registers and is significantly slower than the 32bit versions.
|
||||
*
|
||||
* If we do not need to handle huge tensors, we can use 32-bit indices and get better performance.
|
||||
*/
|
||||
#ifdef __CUDACC__
|
||||
using size_type = int;
|
||||
using index_type = int;
|
||||
#else
|
||||
using size_type = std::int32_t;
|
||||
using index_type = std::int32_t;
|
||||
#endif
|
||||
|
||||
}}}}} /* namespace cv::dnn::cuda4dnn::csl::device */
|
||||
|
||||
#endif /* OPENCV_DNN_SRC_CUDA_TYPES_HPP */
|
||||
120
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/vector_traits.hpp
vendored
Normal file
120
3rdparty/opencv-4.5.4/modules/dnn/src/cuda/vector_traits.hpp
vendored
Normal file
@@ -0,0 +1,120 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_DNN_SRC_CUDA_VECTOR_TRAITS_HPP
|
||||
#define OPENCV_DNN_SRC_CUDA_VECTOR_TRAITS_HPP
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include "types.hpp"
|
||||
#include "memory.hpp"
|
||||
|
||||
#include "../cuda4dnn/csl/pointer.hpp"
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace device {
|
||||
|
||||
/** \file vector_traits.hpp
|
||||
* \brief utility classes and functions for vectorized memory loads/stores
|
||||
*
|
||||
* Example:
|
||||
* using vector_type = get_vector_type_t<float, 4>;
|
||||
*
|
||||
* auto input_vPtr = type::get_pointer(iptr); // iptr is of type DevicePtr<const float>
|
||||
* auto output_vPtr = type::get_pointer(optr); // optr is of type DevicePtr<float>
|
||||
*
|
||||
* vector_type vec;
|
||||
* v_load(vec, input_vPtr);
|
||||
*
|
||||
* for(int i = 0; i < vector_type::size(); i++)
|
||||
* vec[i] = do_something(vec[i]);
|
||||
*
|
||||
* v_store(output_vPtr, vec);
|
||||
*/
|
||||
|
||||
namespace detail {
|
||||
template <size_type N> struct raw_type_ { };
|
||||
template <> struct raw_type_<256> { typedef ulonglong4 type; };
|
||||
template <> struct raw_type_<128> { typedef uint4 type; };
|
||||
template <> struct raw_type_<64> { typedef uint2 type; };
|
||||
template <> struct raw_type_<32> { typedef uint1 type; };
|
||||
template <> struct raw_type_<16> { typedef uchar2 type; };
|
||||
template <> struct raw_type_<8> { typedef uchar1 type; };
|
||||
|
||||
template <size_type N> struct raw_type {
|
||||
using type = typename raw_type_<N>::type;
|
||||
static_assert(sizeof(type) * 8 == N, "");
|
||||
};
|
||||
}
|
||||
|
||||
/* \tparam T type of element in the vector
|
||||
* \tparam N "number of elements" of type T in the vector
|
||||
*/
|
||||
template <class T, size_type N>
|
||||
union vector_type {
|
||||
using value_type = T;
|
||||
using raw_type = typename detail::raw_type<N * sizeof(T) * 8>::type;
|
||||
|
||||
__device__ vector_type() { }
|
||||
|
||||
__device__ static constexpr size_type size() { return N; }
|
||||
|
||||
raw_type raw;
|
||||
T data[N];
|
||||
|
||||
template <class U> static __device__
|
||||
typename std::enable_if<std::is_const<U>::value, const vector_type*>
|
||||
::type get_pointer(csl::DevicePtr<U> ptr) {
|
||||
return reinterpret_cast<const vector_type*>(ptr.get());
|
||||
}
|
||||
|
||||
template <class U> static __device__
|
||||
typename std::enable_if<!std::is_const<U>::value, vector_type*>
|
||||
::type get_pointer(csl::DevicePtr<U> ptr) {
|
||||
return reinterpret_cast<vector_type*>(ptr.get());
|
||||
}
|
||||
};
|
||||
|
||||
template <class V>
|
||||
__device__ void v_load(V& dest, const V& src) {
|
||||
dest.raw = src.raw;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
__device__ void v_load(V& dest, const V* src) {
|
||||
dest.raw = src->raw;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
__device__ void v_load_ldg(V& dest, const V& src) {
|
||||
dest.raw = load_ldg(src.raw);
|
||||
}
|
||||
|
||||
template <class V>
|
||||
__device__ void v_load_ldg(V& dest, const V* src) {
|
||||
dest.raw = load_ldg(src->raw);
|
||||
}
|
||||
|
||||
template <class V>
|
||||
__device__ void v_store(V* dest, const V& src) {
|
||||
dest->raw = src.raw;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
__device__ void v_store(V& dest, const V& src) {
|
||||
dest.raw = src.raw;
|
||||
}
|
||||
|
||||
template <class T, size_type N>
|
||||
struct get_vector_type {
|
||||
typedef vector_type<T, N> type;
|
||||
};
|
||||
|
||||
template <class T, size_type N>
|
||||
using get_vector_type_t = typename get_vector_type<T, N>::type;
|
||||
|
||||
}}}}} /* namespace cv::dnn::cuda4dnn::csl::device */
|
||||
|
||||
#endif /* OPENCV_DNN_SRC_CUDA_VECTOR_TRAITS_HPP */
|
||||
368
3rdparty/opencv-4.5.4/modules/dnn/src/cuda4dnn/csl/cublas.hpp
vendored
Normal file
368
3rdparty/opencv-4.5.4/modules/dnn/src/cuda4dnn/csl/cublas.hpp
vendored
Normal file
@@ -0,0 +1,368 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_DNN_SRC_CUDA4DNN_CSL_CUBLAS_HPP
|
||||
#define OPENCV_DNN_SRC_CUDA4DNN_CSL_CUBLAS_HPP
|
||||
|
||||
#include "error.hpp"
|
||||
#include "stream.hpp"
|
||||
#include "pointer.hpp"
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
#include <cublas_v2.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#define CUDA4DNN_CHECK_CUBLAS(call) \
|
||||
::cv::dnn::cuda4dnn::csl::cublas::detail::check((call), CV_Func, __FILE__, __LINE__)
|
||||
|
||||
namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cublas {
|
||||
|
||||
/** @brief exception class for errors thrown by the cuBLAS API */
|
||||
class cuBLASException : public CUDAException {
|
||||
public:
|
||||
using CUDAException::CUDAException;
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
static void check(cublasStatus_t status, const char* func, const char* file, int line) {
|
||||
auto cublasGetErrorString = [](cublasStatus_t err) {
|
||||
switch (err) {
|
||||
case CUBLAS_STATUS_SUCCESS: return "CUBLAS_STATUS_SUCCESS";
|
||||
case CUBLAS_STATUS_NOT_INITIALIZED: return "CUBLAS_STATUS_NOT_INITIALIZED";
|
||||
case CUBLAS_STATUS_ALLOC_FAILED: return "CUBLAS_STATUS_ALLOC_FAILED";
|
||||
case CUBLAS_STATUS_INVALID_VALUE: return "CUBLAS_STATUS_INVALID_VALUE";
|
||||
case CUBLAS_STATUS_ARCH_MISMATCH: return "CUBLAS_STATUS_ARCH_MISMATCH";
|
||||
case CUBLAS_STATUS_MAPPING_ERROR: return "CUBLAS_STATUS_MAPPING_ERROR";
|
||||
case CUBLAS_STATUS_EXECUTION_FAILED: return "CUBLAS_STATUS_EXECUTION_FAILED";
|
||||
case CUBLAS_STATUS_INTERNAL_ERROR: return "CUBLAS_STATUS_INTERNAL_ERROR";
|
||||
case CUBLAS_STATUS_NOT_SUPPORTED: return "CUBLAS_STATUS_NOT_SUPPORTED";
|
||||
case CUBLAS_STATUS_LICENSE_ERROR: return "CUBLAS_STATUS_LICENSE_ERROR";
|
||||
}
|
||||
return "UNKNOWN_CUBLAS_ERROR";
|
||||
};
|
||||
|
||||
if (status != CUBLAS_STATUS_SUCCESS)
|
||||
throw cuBLASException(Error::GpuApiCallError, cublasGetErrorString(status), func, file, line);
|
||||
}
|
||||
}
|
||||
|
||||
/** non-copyable cuBLAS smart handle
|
||||
*
|
||||
* UniqueHandle is a smart non-sharable wrapper for cuBLAS handle which ensures that the handle
|
||||
* is destroyed after use. The handle must always be associated with a non-default stream. The stream
|
||||
* must be specified during construction.
|
||||
*
|
||||
* Refer to stream API for more information for the choice of forcing non-default streams.
|
||||
*/
|
||||
class UniqueHandle {
|
||||
public:
|
||||
UniqueHandle() noexcept : handle{ nullptr } { }
|
||||
UniqueHandle(UniqueHandle&) = delete;
|
||||
UniqueHandle(UniqueHandle&& other) noexcept {
|
||||
stream = std::move(other.stream);
|
||||
handle = other.handle;
|
||||
other.handle = nullptr;
|
||||
}
|
||||
|
||||
/** creates a cuBLAS handle and associates it with the stream specified
|
||||
*
|
||||
* Exception Guarantee: Basic
|
||||
*/
|
||||
UniqueHandle(Stream strm) : stream(std::move(strm)) {
|
||||
CV_Assert(stream);
|
||||
CUDA4DNN_CHECK_CUBLAS(cublasCreate(&handle));
|
||||
try {
|
||||
CUDA4DNN_CHECK_CUBLAS(cublasSetStream(handle, stream.get()));
|
||||
} catch (...) {
|
||||
/* cublasDestroy won't throw if a valid handle is passed */
|
||||
CUDA4DNN_CHECK_CUBLAS(cublasDestroy(handle));
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
~UniqueHandle() noexcept {
|
||||
if (handle) {
|
||||
/* cublasDestroy won't throw if a valid handle is passed */
|
||||
CUDA4DNN_CHECK_CUBLAS(cublasDestroy(handle));
|
||||
}
|
||||
}
|
||||
|
||||
UniqueHandle& operator=(const UniqueHandle&) = delete;
|
||||
UniqueHandle& operator=(UniqueHandle&& other) noexcept {
|
||||
CV_Assert(other);
|
||||
if (&other != this) {
|
||||
UniqueHandle(std::move(*this)); /* destroy current handle */
|
||||
stream = std::move(other.stream);
|
||||
handle = other.handle;
|
||||
other.handle = nullptr;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
/** returns the raw cuBLAS handle */
|
||||
cublasHandle_t get() const noexcept {
|
||||
CV_Assert(handle);
|
||||
return handle;
|
||||
}
|
||||
|
||||
/** returns true if the handle is valid */
|
||||
explicit operator bool() const noexcept { return static_cast<bool>(handle); }
|
||||
|
||||
private:
|
||||
Stream stream;
|
||||
cublasHandle_t handle;
|
||||
};
|
||||
|
||||
/** @brief sharable cuBLAS smart handle
|
||||
*
|
||||
* Handle is a smart sharable wrapper for cuBLAS handle which ensures that the handle
|
||||
* is destroyed after all references to the handle are destroyed. The handle must always
|
||||
* be associated with a non-default stream. The stream must be specified during construction.
|
||||
*
|
||||
* @note Moving a Handle object to another invalidates the former
|
||||
*/
|
||||
class Handle {
|
||||
public:
|
||||
Handle() = default;
|
||||
Handle(const Handle&) = default;
|
||||
Handle(Handle&&) = default;
|
||||
|
||||
/** creates a cuBLAS handle and associates it with the stream specified
|
||||
*
|
||||
* Exception Guarantee: Basic
|
||||
*/
|
||||
Handle(Stream strm) : handle(std::make_shared<UniqueHandle>(std::move(strm))) { }
|
||||
|
||||
Handle& operator=(const Handle&) = default;
|
||||
Handle& operator=(Handle&&) = default;
|
||||
|
||||
/** returns true if the handle is valid */
|
||||
explicit operator bool() const noexcept { return static_cast<bool>(handle); }
|
||||
|
||||
/** returns the raw cuBLAS handle */
|
||||
cublasHandle_t get() const noexcept {
|
||||
CV_Assert(handle);
|
||||
return handle->get();
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<UniqueHandle> handle;
|
||||
};
|
||||
|
||||
/** @brief GEMM for colummn-major matrices
|
||||
*
|
||||
* \f$ C = \alpha AB + \beta C \f$
|
||||
*
|
||||
* @tparam T matrix element type (must be `half` or `float`)
|
||||
*
|
||||
* @param handle valid cuBLAS Handle
|
||||
* @param transa use transposed matrix of A for computation
|
||||
* @param transb use transposed matrix of B for computation
|
||||
* @param rows_c number of rows in C
|
||||
* @param cols_c number of columns in C
|
||||
* @param common_dim common dimension of A (or trans A) and B (or trans B)
|
||||
* @param alpha scale factor for AB
|
||||
* @param[in] A pointer to column-major matrix A in device memory
|
||||
* @param lda leading dimension of matrix A
|
||||
* @param[in] B pointer to column-major matrix B in device memory
|
||||
* @param ldb leading dimension of matrix B
|
||||
* @param beta scale factor for C
|
||||
* @param[in,out] C pointer to column-major matrix C in device memory
|
||||
* @param ldc leading dimension of matrix C
|
||||
*
|
||||
* Exception Guarantee: Basic
|
||||
*/
|
||||
template <class T>
|
||||
void gemm(const Handle& handle,
|
||||
bool transa, bool transb,
|
||||
std::size_t rows_c, std::size_t cols_c, std::size_t common_dim,
|
||||
T alpha, const DevicePtr<const T> A, std::size_t lda,
|
||||
const DevicePtr<const T> B, std::size_t ldb,
|
||||
T beta, const DevicePtr<T> C, std::size_t ldc);
|
||||
|
||||
template <> inline
|
||||
void gemm<half>(const Handle& handle,
|
||||
bool transa, bool transb,
|
||||
std::size_t rows_c, std::size_t cols_c, std::size_t common_dim,
|
||||
half alpha, const DevicePtr<const half> A, std::size_t lda,
|
||||
const DevicePtr<const half> B, std::size_t ldb,
|
||||
half beta, const DevicePtr<half> C, std::size_t ldc)
|
||||
{
|
||||
CV_Assert(handle);
|
||||
|
||||
auto opa = transa ? CUBLAS_OP_T : CUBLAS_OP_N,
|
||||
opb = transb ? CUBLAS_OP_T : CUBLAS_OP_N;
|
||||
int irows_c = static_cast<int>(rows_c),
|
||||
icols_c = static_cast<int>(cols_c),
|
||||
icommon_dim = static_cast<int>(common_dim),
|
||||
ilda = static_cast<int>(lda),
|
||||
ildb = static_cast<int>(ldb),
|
||||
ildc = static_cast<int>(ldc);
|
||||
|
||||
CUDA4DNN_CHECK_CUBLAS(
|
||||
cublasHgemm(
|
||||
handle.get(),
|
||||
opa, opb,
|
||||
irows_c, icols_c, icommon_dim,
|
||||
&alpha, A.get(), ilda,
|
||||
B.get(), ildb,
|
||||
&beta, C.get(), ildc
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
template <> inline
|
||||
void gemm<float>(const Handle& handle,
|
||||
bool transa, bool transb,
|
||||
std::size_t rows_c, std::size_t cols_c, std::size_t common_dim,
|
||||
float alpha, const DevicePtr<const float> A, std::size_t lda,
|
||||
const DevicePtr<const float> B, std::size_t ldb,
|
||||
float beta, const DevicePtr<float> C, std::size_t ldc)
|
||||
{
|
||||
CV_Assert(handle);
|
||||
|
||||
auto opa = transa ? CUBLAS_OP_T : CUBLAS_OP_N,
|
||||
opb = transb ? CUBLAS_OP_T : CUBLAS_OP_N;
|
||||
int irows_c = static_cast<int>(rows_c),
|
||||
icols_c = static_cast<int>(cols_c),
|
||||
icommon_dim = static_cast<int>(common_dim),
|
||||
ilda = static_cast<int>(lda),
|
||||
ildb = static_cast<int>(ldb),
|
||||
ildc = static_cast<int>(ldc);
|
||||
|
||||
CUDA4DNN_CHECK_CUBLAS(
|
||||
cublasSgemm(
|
||||
handle.get(),
|
||||
opa, opb,
|
||||
irows_c, icols_c, icommon_dim,
|
||||
&alpha, A.get(), ilda,
|
||||
B.get(), ildb,
|
||||
&beta, C.get(), ildc
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
/** @brief Strided batched GEMM for colummn-major matrices
|
||||
*
|
||||
* \f$ C_i = \alpha A_i B_i + \beta C_i \f$ for a stack of matrices A, B and C indexed by i
|
||||
*
|
||||
* @tparam T matrix element type (must be `half` or `float`)
|
||||
*
|
||||
* @param handle valid cuBLAS Handle
|
||||
* @param transa use transposed matrix of A_i for computation
|
||||
* @param transb use transposed matrix of B_i for computation
|
||||
* @param rows_c number of rows in C_i
|
||||
* @param cols_c number of columns in C_i
|
||||
* @param common_dim common dimension of A_i (or trans A_i) and B_i (or trans B_i)
|
||||
* @param alpha scale factor for A_i B_i
|
||||
* @param[in] A pointer to stack of column-major matrices A in device memory
|
||||
* @param lda leading dimension of matrix A_i
|
||||
* @param strideA stride between matrices in A
|
||||
* @param[in] B pointer to stack of column-major matrices B in device memory
|
||||
* @param ldb leading dimension of matrix B_i
|
||||
* @param strideB stride between matrices in B
|
||||
* @param beta scale factor for C_i
|
||||
* @param[in,out] C pointer to stack of column-major matrices C in device memory
|
||||
* @param ldc leading dimension of matrix C_i
|
||||
* @param strideC stride between matrices in C
|
||||
* @param batchCount number of matrices in the batch
|
||||
*
|
||||
* Exception Guarantee: Basic
|
||||
*/
|
||||
template <class T>
|
||||
void gemmStridedBatched(const Handle& handle,
|
||||
bool transa, bool transb,
|
||||
std::size_t rows_c, std::size_t cols_c, std::size_t common_dim,
|
||||
T alpha, const DevicePtr<const T> A, std::size_t lda, std::size_t strideA,
|
||||
const DevicePtr<const T> B, std::size_t ldb, std::size_t strideB,
|
||||
T beta, const DevicePtr<T> C, std::size_t ldc, std::size_t strideC,
|
||||
std::size_t batchCount);
|
||||
|
||||
template <> inline
|
||||
void gemmStridedBatched<half>(const Handle& handle,
|
||||
bool transa, bool transb,
|
||||
std::size_t rows_c, std::size_t cols_c, std::size_t common_dim,
|
||||
half alpha, const DevicePtr<const half> A, std::size_t lda, std::size_t strideA,
|
||||
const DevicePtr<const half> B, std::size_t ldb, std::size_t strideB,
|
||||
half beta, const DevicePtr<half> C, std::size_t ldc, std::size_t strideC,
|
||||
std::size_t batchCount)
|
||||
{
|
||||
CV_Assert(handle);
|
||||
|
||||
const auto opa = transa ? CUBLAS_OP_T : CUBLAS_OP_N,
|
||||
opb = transb ? CUBLAS_OP_T : CUBLAS_OP_N;
|
||||
const auto irows_c = static_cast<int>(rows_c),
|
||||
icols_c = static_cast<int>(cols_c),
|
||||
icommon_dim = static_cast<int>(common_dim),
|
||||
ilda = static_cast<int>(lda),
|
||||
ildb = static_cast<int>(ldb),
|
||||
ildc = static_cast<int>(ldc);
|
||||
|
||||
const auto batch_count = static_cast<int>(batchCount);
|
||||
const auto stride_a = static_cast<long long int>(strideA),
|
||||
stride_b = static_cast<long long int>(strideB),
|
||||
stride_c = static_cast<long long int>(strideC);
|
||||
|
||||
CV_Assert(stride_c >= irows_c * icols_c); // output matrices must not overlap
|
||||
|
||||
CUDA4DNN_CHECK_CUBLAS(
|
||||
cublasHgemmStridedBatched(
|
||||
handle.get(),
|
||||
opa, opb,
|
||||
irows_c, icols_c, icommon_dim,
|
||||
&alpha, A.get(), ilda, stride_a,
|
||||
B.get(), ildb, stride_b,
|
||||
&beta, C.get(), ildc, stride_c,
|
||||
batch_count
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
template <> inline
|
||||
void gemmStridedBatched<float>(const Handle& handle,
|
||||
bool transa, bool transb,
|
||||
std::size_t rows_c, std::size_t cols_c, std::size_t common_dim,
|
||||
float alpha, const DevicePtr<const float> A, std::size_t lda, std::size_t strideA,
|
||||
const DevicePtr<const float> B, std::size_t ldb, std::size_t strideB,
|
||||
float beta, const DevicePtr<float> C, std::size_t ldc, std::size_t strideC,
|
||||
std::size_t batchCount)
|
||||
{
|
||||
CV_Assert(handle);
|
||||
|
||||
const auto opa = transa ? CUBLAS_OP_T : CUBLAS_OP_N,
|
||||
opb = transb ? CUBLAS_OP_T : CUBLAS_OP_N;
|
||||
const auto irows_c = static_cast<int>(rows_c),
|
||||
icols_c = static_cast<int>(cols_c),
|
||||
icommon_dim = static_cast<int>(common_dim),
|
||||
ilda = static_cast<int>(lda),
|
||||
ildb = static_cast<int>(ldb),
|
||||
ildc = static_cast<int>(ldc);
|
||||
|
||||
const auto batch_count = static_cast<int>(batchCount);
|
||||
const auto stride_a = static_cast<long long int>(strideA),
|
||||
stride_b = static_cast<long long int>(strideB),
|
||||
stride_c = static_cast<long long int>(strideC);
|
||||
|
||||
CV_Assert(stride_c >= irows_c * icols_c); // output matrices must not overlap
|
||||
|
||||
CUDA4DNN_CHECK_CUBLAS(
|
||||
cublasSgemmStridedBatched(
|
||||
handle.get(),
|
||||
opa, opb,
|
||||
irows_c, icols_c, icommon_dim,
|
||||
&alpha, A.get(), ilda, stride_a,
|
||||
B.get(), ildb, stride_b,
|
||||
&beta, C.get(), ildc, stride_c,
|
||||
batch_count
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
}}}}} /* namespace cv::dnn::cuda4dnn::csl::cublas */
|
||||
|
||||
#endif /* OPENCV_DNN_SRC_CUDA4DNN_CSL_CUBLAS_HPP */
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user