feat: 切换后端至PaddleOCR-NCNN，切换工程为CMake

1.项目后端整体迁移至PaddleOCR-NCNN算法，已通过基本的兼容性测试 2.工程改为使用CMake组织，后续为了更好地兼容第三方库，不再提供QMake工程 3.重整权利声明文件，重整代码工程，确保最小化侵权风险 Log: 切换后端至PaddleOCR-NCNN，切换工程为CMake Change-Id: I4d5d2c5d37505a4a24b389b1a4c5d12f17bfa38c
2022-05-10 09:54:44 +08:00
parent ecdd171c6f
commit 718c41634f
10018 changed files with 3593797 additions and 186748 deletions
--- a/3rdparty/ncnn/python/CMakeLists.txt
+++ b/3rdparty/ncnn/python/CMakeLists.txt
@ -0,0 +1,32 @@
+cmake_minimum_required(VERSION 3.4)
+
+project(pyncnn)
+
+set(PACKAGE_VERSION ${NCNN_VERSION_STRING})
+add_definitions(-DVERSION_INFO="${PACKAGE_VERSION}")
+
+set( CMAKE_CXX_STANDARD 11 )
+set( CMAKE_CXX_STANDARD_REQUIRED ON )
+
+add_subdirectory(pybind11)
+
+if("${CMAKE_LIBRARY_OUTPUT_DIRECTORY}" STREQUAL "")
+    if(MSVC OR CMAKE_GENERATOR STREQUAL "Xcode")
+        set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${CMAKE_CURRENT_BINARY_DIR}/ncnn/)
+        set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${CMAKE_CURRENT_BINARY_DIR}/ncnn/)
+    endif(MSVC OR CMAKE_GENERATOR STREQUAL "Xcode")
+endif("${CMAKE_LIBRARY_OUTPUT_DIRECTORY}" STREQUAL "")
+
+include_directories(${pybind11_INCLUDE_DIR} ${PYTHON_INCLUDE_DIRS})
+pybind11_add_module(pyncnn src/main.cpp)
+set_target_properties(pyncnn PROPERTIES OUTPUT_NAME "ncnn")
+target_link_libraries(pyncnn PUBLIC ncnn)
+set_target_properties(pyncnn PROPERTIES PREFIX "" LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/ncnn")
+set_property(TARGET pyncnn PROPERTY FOLDER "python")
+if("${CMAKE_LIBRARY_OUTPUT_DIRECTORY}" STREQUAL "")
+    add_custom_command(TARGET pyncnn POST_BUILD 
+        COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/ncnn/ncnn${PYTHON_MODULE_PREFIX}${PYTHON_MODULE_EXTENSION} 
+        ${PROJECT_SOURCE_DIR}/ncnn/ncnn${PYTHON_MODULE_PREFIX}${PYTHON_MODULE_EXTENSION})
+endif("${CMAKE_LIBRARY_OUTPUT_DIRECTORY}" STREQUAL "")
+
+configure_file(setup.py.i ${PROJECT_SOURCE_DIR}/setup.py)
--- a/3rdparty/ncnn/python/README.md
+++ b/3rdparty/ncnn/python/README.md
@ -0,0 +1,127 @@
+# ncnn
+python wrapper of ncnn with [pybind11](https://github.com/pybind/pybind11), only support python3.x now.
+
+
+Install from pip
+==================
+
+ncnn is available as wheel packages for macOS, Windows and Linux distributions, you can install with pip:
+
+```
+python -m pip install -U pip
+python -m pip install -U ncnn
+```
+
+# Build from source
+
+If you want to build ncnn with some options not as default, or just like to build everything yourself, it is not difficult to build ncnn from source.
+
+## Prerequisites
+
+**On Unix (Linux, OS X)**
+
+* A compiler with C++11 support
+* CMake >= 3.4
+
+**On Mac**
+
+* A compiler with C++11 support
+* CMake >= 3.4
+
+**On Windows**
+
+* Visual Studio 2015 or higher
+* CMake >= 3.4
+
+## Build
+1. clone ncnn and init submodule.
+```bash
+cd /pathto/ncnn
+git submodule init && git submodule update
+```
+2. build.
+```bash
+mkdir build
+cd build
+cmake -DNCNN_PYTHON=ON ..
+make
+```
+
+## Install
+```bash
+cd /pathto/ncnn
+pip install .
+```
+
+if you use conda or miniconda, you can also install as following:
+```bash
+cd /pathto/ncnn
+python3 setup.py install
+```
+
+## Tests
+**test**
+```bash
+cd /pathto/ncnn/python
+python3 tests/test.py
+```
+
+**benchmark**
+
+```bash
+cd /pathto/ncnn/python
+python3 tests/benchmark.py
+```
+
+## Numpy
+**ncnn.Mat->numpy.array, with no memory copy**
+
+```bash
+mat = ncnn.Mat(...)
+mat_np = np.array(mat)
+```
+
+**numpy.array->ncnn.Mat, with no memory copy**
+```bash
+mat_np = np.array(...)
+mat = ncnn.Mat(mat_np)
+```
+
+# Model Zoo
+install requirements
+```bash
+pip install -r requirements.txt
+```
+then you can import ncnn.model_zoo and get model list as follow:
+```bash
+import ncnn
+import ncnn.model_zoo as model_zoo
+
+print(model_zoo.get_model_list())
+```
+models now in model zoo are as list below:
+```bash
+mobilenet_yolov2
+mobilenetv2_yolov3
+yolov4_tiny
+yolov4
+yolov5s
+yolact
+mobilenet_ssd
+squeezenet_ssd
+mobilenetv2_ssdlite
+mobilenetv3_ssdlite
+squeezenet
+faster_rcnn
+peleenet_ssd
+retinaface
+rfcn
+shufflenetv2
+simplepose
+nanodet
+```
+all model in model zoo has example in ncnn/python/examples folder
+
+# Custom Layer
+
+custom layer demo is in ncnn/python/ncnn/model_zoo/yolov5.py:23
--- a/3rdparty/ncnn/python/examples/fasterrcnn.py
+++ b/3rdparty/ncnn/python/examples/fasterrcnn.py
@ -0,0 +1,38 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import sys
+import cv2
+import numpy as np
+import ncnn
+from ncnn.model_zoo import get_model
+from ncnn.utils import draw_detection_objects
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: %s [imagepath]\n" % (sys.argv[0]))
+        sys.exit(0)
+
+    imagepath = sys.argv[1]
+
+    m = cv2.imread(imagepath)
+    if m is None:
+        print("cv2.imread %s failed\n" % (imagepath))
+        sys.exit(0)
+
+    net = get_model("faster_rcnn", num_threads=4, use_gpu=True)
+
+    objects = net(m)
+
+    draw_detection_objects(m, net.class_names, objects)
--- a/3rdparty/ncnn/python/examples/mobilenetssd.py
+++ b/3rdparty/ncnn/python/examples/mobilenetssd.py
@ -0,0 +1,38 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import sys
+import cv2
+import numpy as np
+import ncnn
+from ncnn.model_zoo import get_model
+from ncnn.utils import draw_detection_objects
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: %s [imagepath]\n" % (sys.argv[0]))
+        sys.exit(0)
+
+    imagepath = sys.argv[1]
+
+    m = cv2.imread(imagepath)
+    if m is None:
+        print("cv2.imread %s failed\n" % (imagepath))
+        sys.exit(0)
+
+    net = get_model("mobilenet_ssd", num_threads=4, use_gpu=True)
+
+    objects = net(m)
+
+    draw_detection_objects(m, net.class_names, objects)
--- a/3rdparty/ncnn/python/examples/mobilenetv2ssdlite.py
+++ b/3rdparty/ncnn/python/examples/mobilenetv2ssdlite.py
@ -0,0 +1,38 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import sys
+import cv2
+import numpy as np
+import ncnn
+from ncnn.model_zoo import get_model
+from ncnn.utils import draw_detection_objects
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: %s [imagepath]\n" % (sys.argv[0]))
+        sys.exit(0)
+
+    imagepath = sys.argv[1]
+
+    m = cv2.imread(imagepath)
+    if m is None:
+        print("cv2.imread %s failed\n" % (imagepath))
+        sys.exit(0)
+
+    net = get_model("mobilenetv2_ssdlite", num_threads=4, use_gpu=True)
+
+    objects = net(m)
+
+    draw_detection_objects(m, net.class_names, objects)
--- a/3rdparty/ncnn/python/examples/mobilenetv3ssdlite.py
+++ b/3rdparty/ncnn/python/examples/mobilenetv3ssdlite.py
@ -0,0 +1,38 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import sys
+import cv2
+import numpy as np
+import ncnn
+from ncnn.model_zoo import get_model
+from ncnn.utils import draw_detection_objects
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: %s [imagepath]\n" % (sys.argv[0]))
+        sys.exit(0)
+
+    imagepath = sys.argv[1]
+
+    m = cv2.imread(imagepath)
+    if m is None:
+        print("cv2.imread %s failed\n" % (imagepath))
+        sys.exit(0)
+
+    net = get_model("mobilenetv3_ssdlite", num_threads=4, use_gpu=True)
+
+    objects = net(m)
+
+    draw_detection_objects(m, net.class_names, objects, 0.6)
--- a/3rdparty/ncnn/python/examples/model_zoo.py
+++ b/3rdparty/ncnn/python/examples/model_zoo.py
@ -0,0 +1,22 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import sys
+import cv2
+import numpy as np
+import ncnn
+from ncnn.model_zoo import get_model_list
+
+if __name__ == "__main__":
+    print(get_model_list())
--- a/3rdparty/ncnn/python/examples/nanodet.py
+++ b/3rdparty/ncnn/python/examples/nanodet.py
@ -0,0 +1,46 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import sys
+import cv2
+import time
+import numpy as np
+import ncnn
+from ncnn.model_zoo import get_model
+from ncnn.utils import draw_detection_objects
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: %s [imagepath]\n" % (sys.argv[0]))
+        sys.exit(0)
+
+    imagepath = sys.argv[1]
+
+    m = cv2.imread(imagepath)
+    if m is None:
+        print("cv2.imread %s failed\n" % (imagepath))
+        sys.exit(0)
+
+    net = get_model(
+        "nanodet",
+        target_size=320,
+        prob_threshold=0.4,
+        nms_threshold=0.5,
+        num_threads=4,
+        use_gpu=True,
+    )
+
+    objects = net(m)
+
+    draw_detection_objects(m, net.class_names, objects)
--- a/3rdparty/ncnn/python/examples/peleenetssd.py
+++ b/3rdparty/ncnn/python/examples/peleenetssd.py
@ -0,0 +1,121 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import sys
+import cv2
+import numpy as np
+import ncnn
+from ncnn.model_zoo import get_model
+
+
+def draw_detection_objects_seg(image, class_names, objects, mat_map):
+    color = [128, 255, 128, 244, 35, 232]
+    color_count = len(color)
+
+    for obj in objects:
+        print(
+            "%d = %.5f at %.2f %.2f %.2f x %.2f\n"
+            % (obj.label, obj.prob, obj.rect.x, obj.rect.y, obj.rect.w, obj.rect.h)
+        )
+
+        cv2.rectangle(
+            image,
+            (int(obj.rect.x), int(obj.rect.y)),
+            (int(obj.rect.x + obj.rect.w), int(obj.rect.y + obj.rect.h)),
+            (255, 0, 0),
+        )
+
+        text = "%s %.1f%%" % (class_names[int(obj.label)], obj.prob * 100)
+
+        label_size, baseLine = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+
+        x = obj.rect.x
+        y = obj.rect.y - label_size[1] - baseLine
+        if y < 0:
+            y = 0
+        if x + label_size[0] > image.shape[1]:
+            x = image.shape[1] - label_size[0]
+
+        cv2.rectangle(
+            image,
+            (int(x), int(y)),
+            (int(x + label_size[0]), int(y + label_size[1] + baseLine)),
+            (255, 255, 255),
+            -1,
+        )
+
+        cv2.putText(
+            image,
+            text,
+            (int(x), int(y + label_size[1])),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.5,
+            (0, 0, 0),
+        )
+
+    width = mat_map.w
+    height = mat_map.h
+    size = mat_map.c
+    img_index2 = 0
+    threshold = 0.45
+    ptr2 = np.array(mat_map)
+    for i in range(height):
+        ptr1 = image[i].flatten()
+        img_index1 = 0
+        for j in range(width):
+            maxima = threshold
+            index = -1
+            for c in range(size):
+                # const float* ptr3 = ptr2 + c*width*height
+                ptr3 = ptr2[c].flatten()
+                if ptr3[img_index2] > maxima:
+                    maxima = ptr3[img_index2]
+                    index = c
+
+            if index > -1:
+                color_index = (index) * 3
+                if color_index < color_count:
+                    b = color[color_index]
+                    g = color[color_index + 1]
+                    r = color[color_index + 2]
+                    ptr1[img_index1] = b / 2 + ptr1[img_index1] / 2
+                    ptr1[img_index1 + 1] = g / 2 + ptr1[img_index1 + 1] / 2
+                    ptr1[img_index1 + 2] = r / 2 + ptr1[img_index1 + 2] / 2
+
+            img_index1 += 3
+            img_index2 += 1
+
+        image[i] = ptr1.reshape(image[i].shape)
+
+    cv2.imshow("image", image)
+    cv2.waitKey(0)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: %s [imagepath]\n" % (sys.argv[0]))
+        sys.exit(0)
+
+    imagepath = sys.argv[1]
+
+    m = cv2.imread(imagepath)
+    if m is None:
+        print("cv2.imread %s failed\n" % (imagepath))
+        sys.exit(0)
+
+    net = get_model("peleenet_ssd", num_threads=4, use_gpu=True)
+
+    objects, seg_out = net(m)
+
+    draw_detection_objects_seg(m, net.class_names, objects, seg_out)
--- a/3rdparty/ncnn/python/examples/retinaface.py
+++ b/3rdparty/ncnn/python/examples/retinaface.py
@ -0,0 +1,38 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import sys
+import cv2
+import numpy as np
+import ncnn
+from ncnn.model_zoo import get_model
+from ncnn.utils import draw_faceobjects
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: %s [imagepath]\n" % (sys.argv[0]))
+        sys.exit(0)
+
+    imagepath = sys.argv[1]
+
+    m = cv2.imread(imagepath)
+    if m is None:
+        print("cv2.imread %s failed\n" % (imagepath))
+        sys.exit(0)
+
+    net = get_model("retinaface", num_threads=4, use_gpu=True)
+
+    faceobjects = net(m)
+
+    draw_faceobjects(m, faceobjects)
--- a/3rdparty/ncnn/python/examples/rfcn.py
+++ b/3rdparty/ncnn/python/examples/rfcn.py
@ -0,0 +1,38 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import sys
+import cv2
+import numpy as np
+import ncnn
+from ncnn.model_zoo import get_model
+from ncnn.utils import draw_detection_objects
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: %s [imagepath]\n" % (sys.argv[0]))
+        sys.exit(0)
+
+    imagepath = sys.argv[1]
+
+    m = cv2.imread(imagepath)
+    if m is None:
+        print("cv2.imread %s failed\n" % (imagepath))
+        sys.exit(0)
+
+    net = get_model("rfcn", num_threads=4, use_gpu=True)
+
+    objects = net(m)
+
+    draw_detection_objects(m, net.class_names, objects)
--- a/3rdparty/ncnn/python/examples/shufflenetv2.py
+++ b/3rdparty/ncnn/python/examples/shufflenetv2.py
@ -0,0 +1,38 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import sys
+import cv2
+import numpy as np
+import ncnn
+from ncnn.model_zoo import get_model
+from ncnn.utils import print_topk
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: %s [imagepath]\n" % (sys.argv[0]))
+        sys.exit(0)
+
+    imagepath = sys.argv[1]
+
+    m = cv2.imread(imagepath)
+    if m is None:
+        print("cv2.imread %s failed\n" % (imagepath))
+        sys.exit(0)
+
+    net = get_model("shufflenetv2", num_threads=4, use_gpu=True)
+
+    cls_scores = net(m)
+
+    print_topk(cls_scores, 3)
--- a/3rdparty/ncnn/python/examples/simplepose.py
+++ b/3rdparty/ncnn/python/examples/simplepose.py
@ -0,0 +1,38 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import sys
+import cv2
+import numpy as np
+import ncnn
+from ncnn.model_zoo import get_model
+from ncnn.utils import draw_pose
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: %s [imagepath]\n" % (sys.argv[0]))
+        sys.exit(0)
+
+    imagepath = sys.argv[1]
+
+    m = cv2.imread(imagepath)
+    if m is None:
+        print("cv2.imread %s failed\n" % (imagepath))
+        sys.exit(0)
+
+    net = get_model("simplepose", num_threads=4, use_gpu=True)
+
+    keypoints = net(m)
+
+    draw_pose(m, keypoints)
--- a/3rdparty/ncnn/python/examples/squeezenet.py
+++ b/3rdparty/ncnn/python/examples/squeezenet.py
@ -0,0 +1,38 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import sys
+import cv2
+import numpy as np
+import ncnn
+from ncnn.model_zoo import get_model
+from ncnn.utils import print_topk
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: %s [imagepath]\n" % (sys.argv[0]))
+        sys.exit(0)
+
+    imagepath = sys.argv[1]
+
+    m = cv2.imread(imagepath)
+    if m is None:
+        print("cv2.imread %s failed\n" % (imagepath))
+        sys.exit(0)
+
+    net = get_model("squeezenet", num_threads=4, use_gpu=True)
+
+    cls_scores = net(m)
+
+    print_topk(cls_scores, 5)
--- a/3rdparty/ncnn/python/examples/squeezenetssd.py
+++ b/3rdparty/ncnn/python/examples/squeezenetssd.py
@ -0,0 +1,38 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import sys
+import cv2
+import numpy as np
+import ncnn
+from ncnn.model_zoo import get_model
+from ncnn.utils import draw_detection_objects
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: %s [imagepath]\n" % (sys.argv[0]))
+        sys.exit(0)
+
+    imagepath = sys.argv[1]
+
+    m = cv2.imread(imagepath)
+    if m is None:
+        print("cv2.imread %s failed\n" % (imagepath))
+        sys.exit(0)
+
+    net = get_model("squeezenet_ssd", num_threads=4, use_gpu=True)
+
+    objects = net(m)
+
+    draw_detection_objects(m, net.class_names, objects)
--- a/3rdparty/ncnn/python/examples/yolact.py
+++ b/3rdparty/ncnn/python/examples/yolact.py
@ -0,0 +1,184 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import sys
+import cv2
+import numpy as np
+import ncnn
+from ncnn.model_zoo import get_model
+from ncnn.utils import draw_detection_objects
+
+
+def draw_result(image, class_names, boxes, masks, classes, scores):
+    colors = [
+        [56, 0, 255],
+        [226, 255, 0],
+        [0, 94, 255],
+        [0, 37, 255],
+        [0, 255, 94],
+        [255, 226, 0],
+        [0, 18, 255],
+        [255, 151, 0],
+        [170, 0, 255],
+        [0, 255, 56],
+        [255, 0, 75],
+        [0, 75, 255],
+        [0, 255, 169],
+        [255, 0, 207],
+        [75, 255, 0],
+        [207, 0, 255],
+        [37, 0, 255],
+        [0, 207, 255],
+        [94, 0, 255],
+        [0, 255, 113],
+        [255, 18, 0],
+        [255, 0, 56],
+        [18, 0, 255],
+        [0, 255, 226],
+        [170, 255, 0],
+        [255, 0, 245],
+        [151, 255, 0],
+        [132, 255, 0],
+        [75, 0, 255],
+        [151, 0, 255],
+        [0, 151, 255],
+        [132, 0, 255],
+        [0, 255, 245],
+        [255, 132, 0],
+        [226, 0, 255],
+        [255, 37, 0],
+        [207, 255, 0],
+        [0, 255, 207],
+        [94, 255, 0],
+        [0, 226, 255],
+        [56, 255, 0],
+        [255, 94, 0],
+        [255, 113, 0],
+        [0, 132, 255],
+        [255, 0, 132],
+        [255, 170, 0],
+        [255, 0, 188],
+        [113, 255, 0],
+        [245, 0, 255],
+        [113, 0, 255],
+        [255, 188, 0],
+        [0, 113, 255],
+        [255, 0, 0],
+        [0, 56, 255],
+        [255, 0, 113],
+        [0, 255, 188],
+        [255, 0, 94],
+        [255, 0, 18],
+        [18, 255, 0],
+        [0, 255, 132],
+        [0, 188, 255],
+        [0, 245, 255],
+        [0, 169, 255],
+        [37, 255, 0],
+        [255, 0, 151],
+        [188, 0, 255],
+        [0, 255, 37],
+        [0, 255, 0],
+        [255, 0, 170],
+        [255, 0, 37],
+        [255, 75, 0],
+        [0, 0, 255],
+        [255, 207, 0],
+        [255, 0, 226],
+        [255, 245, 0],
+        [188, 255, 0],
+        [0, 255, 18],
+        [0, 255, 75],
+        [0, 255, 151],
+        [255, 56, 0],
+        [245, 255, 0],
+    ]
+
+    color_index = 0
+
+    for box, mask, label, score in zip(boxes, masks, classes, scores):
+        if score < 0.15:
+            continue
+
+        print(
+            "%s = %.5f at %.2f %.2f %.2f x %.2f\n"
+            % (label, score, box[0], box[1], box[2], box[3])
+        )
+
+        cv2.rectangle(
+            image,
+            (int(box[0]), int(box[1])),
+            (int(box[0] + box[2]), int(int(box[1] + box[3]))),
+            (255, 0, 0),
+        )
+
+        text = "%s %.1f%%" % (class_names[int(label)], score * 100)
+
+        label_size, baseLine = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+
+        x = box[0]
+        y = box[1] - label_size[1] - baseLine
+        if y < 0:
+            y = 0
+        if x + label_size[0] > image.shape[1]:
+            x = image.shape[1] - label_size[0]
+
+        cv2.rectangle(
+            image,
+            (int(x), int(y)),
+            (int(x + label_size[0]), int(y + label_size[1] + baseLine)),
+            (255, 255, 255),
+            -1,
+        )
+
+        cv2.putText(
+            image,
+            text,
+            (int(x), int(y + label_size[1])),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.5,
+            (0, 0, 0),
+        )
+
+        image[mask] = image[mask] * 0.5 + np.array(colors[color_index]) * 0.5
+        color_index += 1
+
+    cv2.imshow("image", image)
+    cv2.waitKey(0)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: %s [imagepath]" % (sys.argv[0]))
+        sys.exit(0)
+
+    imagepath = sys.argv[1]
+    m = cv2.imread(imagepath)
+    if m is None:
+        print("cv2.imread %s failed\n" % (imagepath))
+        sys.exit(0)
+
+    net = get_model(
+        "yolact",
+        target_size=550,
+        confidence_threshold=0.05,
+        nms_threshold=0.5,
+        keep_top_k=200,
+        num_threads=4,
+        use_gpu=True,
+    )
+
+    boxes, masks, classes, scores = net(m)
+
+    draw_result(m, net.class_names, boxes, masks, classes, scores)
--- a/3rdparty/ncnn/python/examples/yolov2.py
+++ b/3rdparty/ncnn/python/examples/yolov2.py
@ -0,0 +1,38 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import sys
+import cv2
+import numpy as np
+import ncnn
+from ncnn.model_zoo import get_model
+from ncnn.utils import draw_detection_objects
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: %s [imagepath]\n" % (sys.argv[0]))
+        sys.exit(0)
+
+    imagepath = sys.argv[1]
+
+    m = cv2.imread(imagepath)
+    if m is None:
+        print("cv2.imread %s failed\n" % (imagepath))
+        sys.exit(0)
+
+    net = get_model("mobilenet_yolov2", num_threads=4, use_gpu=True)
+
+    objects = net(m)
+
+    draw_detection_objects(m, net.class_names, objects)
--- a/3rdparty/ncnn/python/examples/yolov3.py
+++ b/3rdparty/ncnn/python/examples/yolov3.py
@ -0,0 +1,38 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import sys
+import cv2
+import numpy as np
+import ncnn
+from ncnn.model_zoo import get_model
+from ncnn.utils import draw_detection_objects
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: %s [imagepath]\n" % (sys.argv[0]))
+        sys.exit(0)
+
+    imagepath = sys.argv[1]
+
+    m = cv2.imread(imagepath)
+    if m is None:
+        print("cv2.imread %s failed\n" % (imagepath))
+        sys.exit(0)
+
+    net = get_model("mobilenetv2_yolov3", num_threads=4, use_gpu=True)
+
+    objects = net(m)
+
+    draw_detection_objects(m, net.class_names, objects)
--- a/3rdparty/ncnn/python/examples/yolov4.py
+++ b/3rdparty/ncnn/python/examples/yolov4.py
@ -0,0 +1,53 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import sys
+import cv2
+import numpy as np
+import ncnn
+from ncnn.model_zoo import get_model
+from ncnn.utils import draw_detection_objects
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: %s [v4l input device or image]\n" % (sys.argv[0]))
+        sys.exit(0)
+
+    devicepath = sys.argv[1]
+
+    net = get_model("yolov4_tiny", num_threads=4, use_gpu=True)
+    # net = get_model("yolov4", num_threads=4, use_gpu=True)
+
+    if devicepath.find("/dev/video") == -1:
+        m = cv2.imread(devicepath)
+        if m is None:
+            print("cv2.imread %s failed\n" % (devicepath))
+            sys.exit(0)
+
+        objects = net(m)
+
+        draw_detection_objects(m, net.class_names, objects)
+    else:
+        cap = cv2.VideoCapture(devicepath)
+
+        if cap.isOpened() == False:
+            print("Failed to open %s" % (devicepath))
+            sys.exit(0)
+
+        while True:
+            ret, frame = cap.read()
+
+            objects = net(frame)
+
+            draw_detection_objects(frame, net.class_names, objects)
--- a/3rdparty/ncnn/python/examples/yolov5.py
+++ b/3rdparty/ncnn/python/examples/yolov5.py
@ -0,0 +1,46 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import sys
+import cv2
+import time
+import numpy as np
+import ncnn
+from ncnn.model_zoo import get_model
+from ncnn.utils import draw_detection_objects
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: %s [imagepath]\n" % (sys.argv[0]))
+        sys.exit(0)
+
+    imagepath = sys.argv[1]
+
+    m = cv2.imread(imagepath)
+    if m is None:
+        print("cv2.imread %s failed\n" % (imagepath))
+        sys.exit(0)
+
+    net = get_model(
+        "yolov5s",
+        target_size=640,
+        prob_threshold=0.25,
+        nms_threshold=0.45,
+        num_threads=4,
+        use_gpu=True,
+    )
+
+    objects = net(m)
+
+    draw_detection_objects(m, net.class_names, objects)
--- a/3rdparty/ncnn/python/ncnn/init.py
+++ b/3rdparty/ncnn/python/ncnn/init.py
@ -0,0 +1,17 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+from .ncnn import *
+
+__version__ = ncnn.__version__
--- a/3rdparty/ncnn/python/ncnn/model_zoo/init.py
+++ b/3rdparty/ncnn/python/ncnn/model_zoo/init.py
@ -0,0 +1,20 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+# coding: utf-8
+"""Predefined and pretrained models."""
+
+from . import model_store
+
+from .model_zoo import get_model, get_model_list
--- a/3rdparty/ncnn/python/ncnn/model_zoo/fasterrcnn.py
+++ b/3rdparty/ncnn/python/ncnn/model_zoo/fasterrcnn.py
@ -0,0 +1,241 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import numpy as np
+import ncnn
+from .model_store import get_model_file
+from ..utils.objects import Detect_Object
+
+
+class Faster_RCNN:
+    def __init__(
+        self,
+        img_width=600,
+        img_height=600,
+        num_threads=1,
+        use_gpu=False,
+        max_per_image=100,
+        confidence_thresh=0.05,
+        nms_threshold=0.3,
+    ):
+        self.img_width = img_width
+        self.img_height = img_height
+        self.num_threads = num_threads
+        self.use_gpu = use_gpu
+
+        self.mean_vals = [102.9801, 115.9465, 122.7717]
+        self.norm_vals = []
+
+        self.net = ncnn.Net()
+        self.net.opt.use_vulkan_compute = self.use_gpu
+
+        # original pretrained model from https://github.com/rbgirshick/py-faster-rcnn
+        # py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_alt_opt/faster_rcnn_test.pt
+        # https://dl.dropboxusercontent.com/s/o6ii098bu51d139/faster_rcnn_models.tgz?dl=0
+        # ZF_faster_rcnn_final.caffemodel
+        # the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
+        self.net.load_param(get_model_file("ZF_faster_rcnn_final.param"))
+        self.net.load_model(get_model_file("ZF_faster_rcnn_final.bin"))
+
+        self.max_per_image = max_per_image
+        self.confidence_thresh = confidence_thresh
+        self.nms_threshold = nms_threshold
+
+        self.class_names = [
+            "background",
+            "aeroplane",
+            "bicycle",
+            "bird",
+            "boat",
+            "bottle",
+            "bus",
+            "car",
+            "cat",
+            "chair",
+            "cow",
+            "diningtable",
+            "dog",
+            "horse",
+            "motorbike",
+            "person",
+            "pottedplant",
+            "sheep",
+            "sofa",
+            "train",
+            "tvmonitor",
+        ]
+
+    def __del__(self):
+        self.net = None
+
+    def __call__(self, img):
+        # scale to target detect size
+        h = img.shape[0]
+        w = img.shape[1]
+        scale = 1.0
+        if w < h:
+            scale = float(self.img_width) / w
+            w = self.img_width
+            h = int(h * scale)
+        else:
+            scale = float(self.img_height) / h
+            h = self.img_height
+            w = int(w * scale)
+
+        mat_in = ncnn.Mat.from_pixels_resize(
+            img, ncnn.Mat.PixelType.PIXEL_BGR, img.shape[1], img.shape[0], w, h
+        )
+        mat_in.substract_mean_normalize(self.mean_vals, self.norm_vals)
+
+        # method 1 use numpy to Mat interface
+        # im_info = ncnn.Mat(np.array([h, w, scale], dtype=np.float32))
+
+        # method 2 use ncnn.Mat interface
+        im_info = ncnn.Mat(3)
+        im_info[0] = h
+        im_info[1] = w
+        im_info[2] = scale
+
+        ex1 = self.net.create_extractor()
+        ex1.set_num_threads(self.num_threads)
+
+        ex1.input("data", mat_in)
+        ex1.input("im_info", im_info)
+
+        ret1, conv5_relu5 = ex1.extract("conv5_relu5")
+        ret2, rois = ex1.extract("rois")
+
+        class_candidates = []
+        for i in range(rois.c):
+            ex2 = self.net.create_extractor()
+
+            roi = rois.channel(i)  # get single roi
+            ex2.input("conv5_relu5", conv5_relu5)
+            ex2.input("rois", roi)
+
+            ret1, bbox_pred = ex2.extract("bbox_pred")
+            ret2, cls_prob = ex2.extract("cls_prob")
+
+            num_class = cls_prob.w
+            while len(class_candidates) < num_class:
+                class_candidates.append([])
+
+            # find class id with highest score
+            label = 0
+            score = 0.0
+            for j in range(num_class):
+                class_score = cls_prob[j]
+                if class_score > score:
+                    label = j
+                    score = class_score
+
+            # ignore background or low score
+            if label == 0 or score <= self.confidence_thresh:
+                continue
+
+            # fprintf(stderr, "%d = %f\n", label, score);
+
+            # unscale to image size
+            x1 = roi[0] / scale
+            y1 = roi[1] / scale
+            x2 = roi[2] / scale
+            y2 = roi[3] / scale
+
+            pb_w = x2 - x1 + 1
+            pb_h = y2 - y1 + 1
+
+            # apply bbox regression
+            dx = bbox_pred[label * 4]
+            dy = bbox_pred[label * 4 + 1]
+            dw = bbox_pred[label * 4 + 2]
+            dh = bbox_pred[label * 4 + 3]
+
+            cx = x1 + pb_w * 0.5
+            cy = y1 + pb_h * 0.5
+
+            obj_cx = cx + pb_w * dx
+            obj_cy = cy + pb_h * dy
+
+            obj_w = pb_w * np.exp(dw)
+            obj_h = pb_h * np.exp(dh)
+
+            obj_x1 = obj_cx - obj_w * 0.5
+            obj_y1 = obj_cy - obj_h * 0.5
+            obj_x2 = obj_cx + obj_w * 0.5
+            obj_y2 = obj_cy + obj_h * 0.5
+
+            # clip
+            obj_x1 = np.maximum(np.minimum(obj_x1, float(img.shape[1] - 1)), 0.0)
+            obj_y1 = np.maximum(np.minimum(obj_y1, float(img.shape[0] - 1)), 0.0)
+            obj_x2 = np.maximum(np.minimum(obj_x2, float(img.shape[1] - 1)), 0.0)
+            obj_y2 = np.maximum(np.minimum(obj_y2, float(img.shape[0] - 1)), 0.0)
+
+            # append object
+            obj = Detect_Object()
+            obj.rect.x = obj_x1
+            obj.rect.y = obj_y1
+            obj.rect.w = obj_x2 - obj_x1 + 1
+            obj.rect.h = obj_y2 - obj_y1 + 1
+            obj.label = label
+            obj.prob = score
+
+            class_candidates[label].append(obj)
+
+        # post process
+        objects = []
+        for candidates in class_candidates:
+            if len(candidates) == 0:
+                continue
+
+            candidates.sort(key=lambda obj: obj.prob, reverse=True)
+
+            picked = self.nms_sorted_bboxes(candidates, self.nms_threshold)
+
+            for j in range(len(picked)):
+                z = picked[j]
+                objects.append(candidates[z])
+
+        objects.sort(key=lambda obj: obj.prob, reverse=True)
+
+        objects = objects[: self.max_per_image]
+
+        return objects
+
+    def nms_sorted_bboxes(self, objects, nms_threshold):
+        picked = []
+
+        n = len(objects)
+
+        areas = np.zeros((n,), dtype=np.float32)
+        for i in range(n):
+            areas[i] = objects[i].rect.area()
+
+        for i in range(n):
+            a = objects[i]
+
+            keep = True
+            for j in range(len(picked)):
+                b = objects[picked[j]]
+
+                # intersection over union
+                inter_area = a.rect.intersection_area(b.rect)
+                union_area = areas[i] + areas[picked[j]] - inter_area
+                # float IoU = inter_area / union_area
+                if inter_area / union_area > nms_threshold:
+                    keep = False
+
+            if keep:
+                picked.append(i)
+
+        return picked
--- a/3rdparty/ncnn/python/ncnn/model_zoo/mobilenetssd.py
+++ b/3rdparty/ncnn/python/ncnn/model_zoo/mobilenetssd.py
@ -0,0 +1,119 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import ncnn
+from .model_store import get_model_file
+from ..utils.objects import Detect_Object
+
+
+class MobileNet_SSD:
+    def __init__(self, target_size=300, num_threads=1, use_gpu=False):
+        self.target_size = target_size
+        self.num_threads = num_threads
+        self.use_gpu = use_gpu
+
+        self.mean_vals = [127.5, 127.5, 127.5]
+        self.norm_vals = [0.007843, 0.007843, 0.007843]
+
+        self.net = ncnn.Net()
+        self.net.opt.use_vulkan_compute = self.use_gpu
+
+        # model is converted from https://github.com/chuanqi305/MobileNet-SSD
+        # and can be downloaded from https://drive.google.com/open?id=0ByaKLD9QaPtucWk0Y0dha1VVY0U
+        # the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
+        self.net.load_param(get_model_file("mobilenet_ssd_voc_ncnn.param"))
+        self.net.load_model(get_model_file("mobilenet_ssd_voc_ncnn.bin"))
+
+        self.class_names = [
+            "background",
+            "aeroplane",
+            "bicycle",
+            "bird",
+            "boat",
+            "bottle",
+            "bus",
+            "car",
+            "cat",
+            "chair",
+            "cow",
+            "diningtable",
+            "dog",
+            "horse",
+            "motorbike",
+            "person",
+            "pottedplant",
+            "sheep",
+            "sofa",
+            "train",
+            "tvmonitor",
+        ]
+
+    def __del__(self):
+        self.net = None
+
+    def __call__(self, img):
+        img_h = img.shape[0]
+        img_w = img.shape[1]
+
+        mat_in = ncnn.Mat.from_pixels_resize(
+            img,
+            ncnn.Mat.PixelType.PIXEL_BGR,
+            img.shape[1],
+            img.shape[0],
+            self.target_size,
+            self.target_size,
+        )
+        mat_in.substract_mean_normalize(self.mean_vals, self.norm_vals)
+
+        ex = self.net.create_extractor()
+        ex.set_num_threads(self.num_threads)
+
+        ex.input("data", mat_in)
+
+        ret, mat_out = ex.extract("detection_out")
+
+        objects = []
+
+        # printf("%d %d %d\n", mat_out.w, mat_out.h, mat_out.c)
+
+        # method 1, use ncnn.Mat.row to get the result, no memory copy
+        for i in range(mat_out.h):
+            values = mat_out.row(i)
+
+            obj = Detect_Object()
+            obj.label = values[0]
+            obj.prob = values[1]
+            obj.rect.x = values[2] * img_w
+            obj.rect.y = values[3] * img_h
+            obj.rect.w = values[4] * img_w - obj.rect.x
+            obj.rect.h = values[5] * img_h - obj.rect.y
+
+            objects.append(obj)
+
+        """
+        #method 2, use ncnn.Mat->numpy.array to get the result, no memory copy too
+        out = np.array(mat_out)
+        for i in range(len(out)):
+            values = out[i]
+            obj = Detect_Object()
+            obj.label = values[0]
+            obj.prob = values[1]
+            obj.rect.x = values[2] * img_w
+            obj.rect.y = values[3] * img_h
+            obj.rect.w = values[4] * img_w - obj.rect.x
+            obj.rect.h = values[5] * img_h - obj.rect.y
+            objects.append(obj)
+        """
+
+        return objects
--- a/3rdparty/ncnn/python/ncnn/model_zoo/mobilenetv2ssdlite.py
+++ b/3rdparty/ncnn/python/ncnn/model_zoo/mobilenetv2ssdlite.py
@ -0,0 +1,129 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import ncnn
+from .model_store import get_model_file
+from ..utils.objects import Detect_Object
+
+
+class Noop(ncnn.Layer):
+    pass
+
+
+def Noop_layer_creator():
+    return Noop()
+
+
+class MobileNetV2_SSDLite:
+    def __init__(self, target_size=300, num_threads=1, use_gpu=False):
+        self.target_size = target_size
+        self.num_threads = num_threads
+        self.use_gpu = use_gpu
+
+        self.mean_vals = [127.5, 127.5, 127.5]
+        self.norm_vals = [0.007843, 0.007843, 0.007843]
+
+        self.net = ncnn.Net()
+        self.net.opt.use_vulkan_compute = self.use_gpu
+        # self.net.register_custom_layer("Silence", Noop_layer_creator)
+
+        # original pretrained model from https://github.com/chuanqi305/MobileNetv2-SSDLite
+        # https://github.com/chuanqi305/MobileNetv2-SSDLite/blob/master/ssdlite/voc/deploy.prototxt
+        # the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
+        self.net.load_param(get_model_file("mobilenetv2_ssdlite_voc.param"))
+        self.net.load_model(get_model_file("mobilenetv2_ssdlite_voc.bin"))
+
+        self.class_names = [
+            "background",
+            "aeroplane",
+            "bicycle",
+            "bird",
+            "boat",
+            "bottle",
+            "bus",
+            "car",
+            "cat",
+            "chair",
+            "cow",
+            "diningtable",
+            "dog",
+            "horse",
+            "motorbike",
+            "person",
+            "pottedplant",
+            "sheep",
+            "sofa",
+            "train",
+            "tvmonitor",
+        ]
+
+    def __del__(self):
+        self.net = None
+
+    def __call__(self, img):
+        img_h = img.shape[0]
+        img_w = img.shape[1]
+
+        mat_in = ncnn.Mat.from_pixels_resize(
+            img,
+            ncnn.Mat.PixelType.PIXEL_BGR,
+            img_w,
+            img_h,
+            self.target_size,
+            self.target_size,
+        )
+        mat_in.substract_mean_normalize(self.mean_vals, self.norm_vals)
+
+        ex = self.net.create_extractor()
+        ex.set_light_mode(True)
+        ex.set_num_threads(self.num_threads)
+
+        ex.input("data", mat_in)
+
+        ret, mat_out = ex.extract("detection_out")
+
+        objects = []
+
+        # printf("%d %d %d\n", mat_out.w, mat_out.h, mat_out.c)
+
+        # method 1, use ncnn.Mat.row to get the result, no memory copy
+        for i in range(mat_out.h):
+            values = mat_out.row(i)
+
+            obj = Detect_Object()
+            obj.label = values[0]
+            obj.prob = values[1]
+            obj.rect.x = values[2] * img_w
+            obj.rect.y = values[3] * img_h
+            obj.rect.w = values[4] * img_w - obj.rect.x
+            obj.rect.h = values[5] * img_h - obj.rect.y
+
+            objects.append(obj)
+
+        """
+        #method 2, use ncnn.Mat->numpy.array to get the result, no memory copy too
+        out = np.array(mat_out)
+        for i in range(len(out)):
+            values = out[i]
+            obj = Detect_Object()
+            obj.label = values[0]
+            obj.prob = values[1]
+            obj.rect.x = values[2] * img_w
+            obj.rect.y = values[3] * img_h
+            obj.rect.w = values[4] * img_w - obj.rect.x
+            obj.rect.h = values[5] * img_h - obj.rect.y
+            objects.append(obj)
+        """
+
+        return objects
--- a/3rdparty/ncnn/python/ncnn/model_zoo/mobilenetv3ssdlite.py
+++ b/3rdparty/ncnn/python/ncnn/model_zoo/mobilenetv3ssdlite.py
@ -0,0 +1,162 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import numpy as np
+import ncnn
+from .model_store import get_model_file
+from ..utils.objects import Detect_Object
+
+
+def clamp(v, lo, hi):
+    if v < lo:
+        return lo
+    elif hi < v:
+        return hi
+    else:
+        return v
+
+
+class MobileNetV3_SSDLite:
+    def __init__(self, target_size=300, num_threads=1, use_gpu=False):
+        self.target_size = target_size
+        self.num_threads = num_threads
+        self.use_gpu = use_gpu
+
+        self.mean_vals = [123.675, 116.28, 103.53]
+        self.norm_vals = [1.0, 1.0, 1.0]
+
+        self.net = ncnn.Net()
+        self.net.opt.use_vulkan_compute = self.use_gpu
+
+        # converted ncnn model from https://github.com/ujsyehao/mobilenetv3-ssd
+        # the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
+        self.net.load_param(get_model_file("mobilenetv3_ssdlite_voc.param"))
+        self.net.load_model(get_model_file("mobilenetv3_ssdlite_voc.bin"))
+
+        self.class_names = [
+            "background",
+            "aeroplane",
+            "bicycle",
+            "bird",
+            "boat",
+            "bottle",
+            "bus",
+            "car",
+            "cat",
+            "chair",
+            "cow",
+            "diningtable",
+            "dog",
+            "horse",
+            "motorbike",
+            "person",
+            "pottedplant",
+            "sheep",
+            "sofa",
+            "train",
+            "tvmonitor",
+        ]
+
+    def __del__(self):
+        self.net = None
+
+    def __call__(self, img):
+        img_h = img.shape[0]
+        img_w = img.shape[1]
+
+        mat_in = ncnn.Mat.from_pixels_resize(
+            img,
+            ncnn.Mat.PixelType.PIXEL_BGR2RGB,
+            img.shape[1],
+            img.shape[0],
+            self.target_size,
+            self.target_size,
+        )
+        mat_in.substract_mean_normalize([], self.norm_vals)
+        mat_in.substract_mean_normalize(self.mean_vals, [])
+
+        ex = self.net.create_extractor()
+        ex.set_light_mode(True)
+        ex.set_num_threads(self.num_threads)
+
+        ex.input("input", mat_in)
+
+        ret, mat_out = ex.extract("detection_out")
+
+        objects = []
+
+        # printf("%d %d %d\n", mat_out.w, mat_out.h, mat_out.c)
+
+        # method 1, use ncnn.Mat.row to get the result, no memory copy
+        for i in range(mat_out.h):
+            values = mat_out.row(i)
+
+            obj = Detect_Object()
+            obj.label = values[0]
+            obj.prob = values[1]
+
+            x1 = (
+                clamp(values[2] * self.target_size, 0.0, float(self.target_size - 1))
+                / self.target_size
+                * img_w
+            )
+            y1 = (
+                clamp(values[3] * self.target_size, 0.0, float(self.target_size - 1))
+                / self.target_size
+                * img_h
+            )
+            x2 = (
+                clamp(values[4] * self.target_size, 0.0, float(self.target_size - 1))
+                / self.target_size
+                * img_w
+            )
+            y2 = (
+                clamp(values[5] * self.target_size, 0.0, float(self.target_size - 1))
+                / self.target_size
+                * img_h
+            )
+
+            if np.isnan(x1) or np.isnan(y1) or np.isnan(x2) or np.isnan(y2):
+                continue
+
+            obj.rect.x = x1
+            obj.rect.y = y1
+            obj.rect.w = x2 - x1
+            obj.rect.h = y2 - y1
+
+            objects.append(obj)
+
+        """
+        #method 2, use ncnn.Mat->numpy.array to get the result, no memory copy too
+        out = np.array(mat_out)
+        for i in range(len(out)):
+            values = out[i]
+            obj = Detect_Object()
+            obj.label = values[0]
+            obj.prob = values[1]
+
+            x1 = clamp(values[2] * self.img_width, 0.0, float(self.img_width - 1)) / self.img_width * img_w
+            y1 = clamp(values[3] * self.img_height, 0.0, float(self.img_height - 1)) / self.img_height * img_h
+            x2 = clamp(values[4] * self.img_width, 0.0, float(self.img_width - 1)) / self.img_width * img_w
+            y2 = clamp(values[5] * self.img_height, 0.0, float(self.img_height - 1)) / self.img_height * img_h
+
+            obj.rect.x = x1
+            obj.rect.y = y1
+            obj.rect.w = x2 - x1
+            obj.rect.h = y2 - y1
+
+            objects.append(obj)
+        """
+
+        return objects
--- a/3rdparty/ncnn/python/ncnn/model_zoo/model_store.py
+++ b/3rdparty/ncnn/python/ncnn/model_zoo/model_store.py
@ -0,0 +1,206 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+"""Model store which provides pretrained models."""
+from __future__ import print_function
+
+__all__ = ["get_model_file", "purge"]
+
+import os
+import zipfile
+import logging
+import portalocker
+
+from ..utils import download, check_sha1
+
+_model_sha1 = {
+    name: checksum
+    for checksum, name in [
+        ("4ff279e78cdb0b8bbc9363181df6f094ad46dc36", "mobilenet_yolo.param"),
+        ("1528cf08b9823fc01aaebfc932ec8c8d4a3b1613", "mobilenet_yolo.bin"),
+        ("3f5b78b0c982f8bdf3a2c3a27e6136d4d2680e96", "mobilenetv2_yolov3.param"),
+        ("0705b0f8fe5a77718561b9b7d6ed4f33fcd3d455", "mobilenetv2_yolov3.bin"),
+        ("de59186323ebad5650631e12a6cc66b526ec7df4", "yolov4-tiny-opt.param"),
+        ("1765c3b251c041dd6ac59d2ec3ddf7b983fe9ee9", "yolov4-tiny-opt.bin"),
+        ("e92d3a3a8ac5e6a6c08c433aa2252b0680124328", "yolov4-opt.param"),
+        ("69d128b42b70fb790e9d3ccabcf1b6e8cc2859fe", "yolov4-opt.bin"),
+        ("6fa8ccc8cabc0f5633ab3c6ffa268e6042b8888f", "yolov5s.param"),
+        ("0cbab3664deb090480ea748c1305f6fe850b9ac4", "yolov5s.bin"),
+        ("e65bae7052d9e9b9d45e1214a8d1b5fe6f64e8af", "yolact.param"),
+        ("9bda99f50b1c14c98c5c6bbc08d4f782eed66548", "yolact.bin"),
+        ("3723ce3e312db6a102cff1a5c39dae80e1de658e", "mobilenet_ssd_voc_ncnn.param"),
+        ("8e2d2139550dcbee1ce5e200b7697b25aab29656", "mobilenet_ssd_voc_ncnn.bin"),
+        ("52c669821dc32ef5b7ab30749fa71a3bc27786b8", "squeezenet_ssd_voc.param"),
+        ("347e31d1cbe469259fa8305860a7c24a95039202", "squeezenet_ssd_voc.bin"),
+        ("52dab628ecac8137e61ce3aea1a912f9c5a0a638", "mobilenetv2_ssdlite_voc.param"),
+        ("9fea06f74f7c60d753cf703ea992f92e50a986d4", "mobilenetv2_ssdlite_voc.bin"),
+        ("f36661eff1eda1e36185e7f2f28fc722ad8b66bb", "mobilenetv3_ssdlite_voc.param"),
+        ("908f63ca9bff0061a499512664b9c533a0b7f485", "mobilenetv3_ssdlite_voc.bin"),
+        ("a63d779a1f789af976bc4e2eae86fdd9b0bb6c2c", "squeezenet_v1.1.param"),
+        ("262f0e33e37aeac69021b5a3556664be65fc0aeb", "squeezenet_v1.1.bin"),
+        ("3ba57cccd1d4a583f6eb76eae25a2dbda7ce7f74", "ZF_faster_rcnn_final.param"),
+        ("1095fbb5f846a1f311b40941add5fef691acaf8d", "ZF_faster_rcnn_final.bin"),
+        ("3586ec3d663b1cc8ec8c662768caa9c7fbcf4fdc", "pelee.param"),
+        ("2442ad483dc546940271591b86db0d9c8b1c7118", "pelee.bin"),
+        ("6cfeda08d5494a1274199089fda77c421be1ecac", "mnet.25-opt.param"),
+        ("3ff9a51dc81cdf506a87543dbf752071ffc50b8d", "mnet.25-opt.bin"),
+        ("50acebff393c91468a73a7b7c604ef231429d068", "rfcn_end2end.param"),
+        ("9a68cd937959b4dda9c5bf9c99181cb0e40f266b", "rfcn_end2end.bin"),
+        ("d6b289cda068e9a9d8a171fb909352a05a39a494", "shufflenet_v2_x0.5.param"),
+        ("2ccd631d04a1b7e05483cd8a8def76bca7d330a8", "shufflenet_v2_x0.5.bin"),
+        ("7c8f8d72c60aab6802985423686b36c61be2f68c", "pose.param"),
+        ("7f691540972715298c611a3e595b20c59c2147ce", "pose.bin"),
+        ("979d09942881cf1207a93cbfa9853005a434469b", "nanodet_m.param"),
+        ("51d868905361e4ba9c45bd12e8a5608e7aadd1bd", "nanodet_m.bin"),
+    ]
+}
+
+
+_split_model_bins = {
+    "ZF_faster_rcnn_final.bin": 3,
+    "rfcn_end2end.bin": 2,
+    "yolov4-opt.bin": 7,
+}
+
+
+github_repo_url = "https://github.com/nihui/ncnn-assets/raw/master/models/"
+_url_format = "{repo_url}{file_name}"
+
+
+def merge_file(root, files_in, file_out, remove=True):
+    with open(file_out, "wb") as fd_out:
+        for file_in in files_in:
+            file = os.path.join(root, file_in)
+            with open(file, "rb") as fd_in:
+                fd_out.write(fd_in.read())
+            if remove == True:
+                os.remove(file)
+
+
+def short_hash(name):
+    if name not in _model_sha1:
+        raise ValueError(
+            "Pretrained model for {name} is not available.".format(name=name)
+        )
+    return _model_sha1[name][:8]
+
+
+def get_model_file(name, tag=None, root=os.path.join("~", ".ncnn", "models")):
+    r"""Return location for the pretrained on local file system.
+
+    This function will download from online model zoo when model cannot be found or has mismatch.
+    The root directory will be created if it doesn't exist.
+
+    Parameters
+    ----------
+    name : str
+        Name of the model.
+    root : str, default '~/.ncnn/models'
+        Location for keeping the model parameters.
+
+    Returns
+    -------
+    file_path
+        Path to the requested pretrained model file.
+    """
+    if "NCNN_HOME" in os.environ:
+        root = os.path.join(os.environ["NCNN_HOME"], "models")
+
+    use_tag = isinstance(tag, str)
+    if use_tag:
+        file_name = "{name}-{short_hash}".format(name=name, short_hash=tag)
+    else:
+        file_name = "{name}".format(name=name)
+
+    root = os.path.expanduser(root)
+    params_path = os.path.join(root, file_name)
+    lockfile = os.path.join(root, file_name + ".lock")
+    if use_tag:
+        sha1_hash = tag
+    else:
+        sha1_hash = _model_sha1[name]
+
+    if not os.path.exists(root):
+        os.makedirs(root)
+
+    with portalocker.Lock(
+        lockfile, timeout=int(os.environ.get("NCNN_MODEL_LOCK_TIMEOUT", 300))
+    ):
+        if os.path.exists(params_path):
+            if check_sha1(params_path, sha1_hash):
+                return params_path
+            else:
+                logging.warning(
+                    "Hash mismatch in the content of model file '%s' detected. "
+                    "Downloading again.",
+                    params_path,
+                )
+        else:
+            logging.info("Model file not found. Downloading.")
+
+        zip_file_path = os.path.join(root, file_name)
+        if file_name in _split_model_bins:
+            file_name_parts = [
+                "%s.part%02d" % (file_name, i + 1)
+                for i in range(_split_model_bins[file_name])
+            ]
+            for file_name_part in file_name_parts:
+                file_path = os.path.join(root, file_name_part)
+                repo_url = os.environ.get("NCNN_REPO", github_repo_url)
+                if repo_url[-1] != "/":
+                    repo_url = repo_url + "/"
+                download(
+                    _url_format.format(repo_url=repo_url, file_name=file_name_part),
+                    path=file_path,
+                    overwrite=True,
+                )
+
+            merge_file(root, file_name_parts, zip_file_path)
+        else:
+            repo_url = os.environ.get("NCNN_REPO", github_repo_url)
+            if repo_url[-1] != "/":
+                repo_url = repo_url + "/"
+            download(
+                _url_format.format(repo_url=repo_url, file_name=file_name),
+                path=zip_file_path,
+                overwrite=True,
+            )
+        if zip_file_path.endswith(".zip"):
+            with zipfile.ZipFile(zip_file_path) as zf:
+                zf.extractall(root)
+            os.remove(zip_file_path)
+        # Make sure we write the model file on networked filesystems
+        try:
+            os.sync()
+        except AttributeError:
+            pass
+        if check_sha1(params_path, sha1_hash):
+            return params_path
+        else:
+            raise ValueError("Downloaded file has different hash. Please try again.")
+
+
+def purge(root=os.path.join("~", ".ncnn", "models")):
+    r"""Purge all pretrained model files in local file store.
+
+    Parameters
+    ----------
+    root : str, default '~/.ncnn/models'
+        Location for keeping the model parameters.
+    """
+    root = os.path.expanduser(root)
+    files = os.listdir(root)
+    for f in files:
+        if f.endswith(".params"):
+            os.remove(os.path.join(root, f))
--- a/3rdparty/ncnn/python/ncnn/model_zoo/model_zoo.py
+++ b/3rdparty/ncnn/python/ncnn/model_zoo/model_zoo.py
@ -0,0 +1,68 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+from .yolov2 import MobileNet_YoloV2
+from .yolov3 import MobileNetV2_YoloV3
+from .yolov4 import YoloV4_Tiny, YoloV4
+from .yolov5 import YoloV5s
+from .yolact import Yolact
+from .mobilenetssd import MobileNet_SSD
+from .squeezenetssd import SqueezeNet_SSD
+from .mobilenetv2ssdlite import MobileNetV2_SSDLite
+from .mobilenetv3ssdlite import MobileNetV3_SSDLite
+from .squeezenet import SqueezeNet
+from .fasterrcnn import Faster_RCNN
+from .peleenetssd import PeleeNet_SSD
+from .retinaface import RetinaFace
+from .rfcn import RFCN
+from .shufflenetv2 import ShuffleNetV2
+from .simplepose import SimplePose
+from .nanodet import NanoDet
+
+__all__ = ["get_model", "get_model_list"]
+
+_models = {
+    "mobilenet_yolov2": MobileNet_YoloV2,
+    "mobilenetv2_yolov3": MobileNetV2_YoloV3,
+    "yolov4_tiny": YoloV4_Tiny,
+    "yolov4": YoloV4,
+    "yolov5s": YoloV5s,
+    "yolact": Yolact,
+    "mobilenet_ssd": MobileNet_SSD,
+    "squeezenet_ssd": SqueezeNet_SSD,
+    "mobilenetv2_ssdlite": MobileNetV2_SSDLite,
+    "mobilenetv3_ssdlite": MobileNetV3_SSDLite,
+    "squeezenet": SqueezeNet,
+    "faster_rcnn": Faster_RCNN,
+    "peleenet_ssd": PeleeNet_SSD,
+    "retinaface": RetinaFace,
+    "rfcn": RFCN,
+    "shufflenetv2": ShuffleNetV2,
+    "simplepose": SimplePose,
+    "nanodet": NanoDet,
+}
+
+
+def get_model(name, **kwargs):
+    name = name.lower()
+    if name not in _models:
+        err_str = '"%s" is not among the following model list:\n\t' % (name)
+        err_str += "%s" % ("\n\t".join(sorted(_models.keys())))
+        raise ValueError(err_str)
+    net = _models[name](**kwargs)
+    return net
+
+
+def get_model_list():
+    return list(_models.keys())
--- a/3rdparty/ncnn/python/ncnn/model_zoo/nanodet.py
+++ b/3rdparty/ncnn/python/ncnn/model_zoo/nanodet.py
@ -0,0 +1,273 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import time
+import numpy as np
+import ncnn
+from .model_store import get_model_file
+from ..utils.objects import Detect_Object
+from ..utils.functional import *
+import cv2
+
+
+class NanoDet:
+    def __init__(
+        self,
+        target_size=320,
+        prob_threshold=0.4,
+        nms_threshold=0.3,
+        num_threads=1,
+        use_gpu=False,
+    ):
+        self.target_size = target_size
+        self.prob_threshold = prob_threshold
+        self.nms_threshold = nms_threshold
+        self.num_threads = num_threads
+        self.use_gpu = use_gpu
+
+        self.mean_vals = [103.53, 116.28, 123.675]
+        self.norm_vals = [0.017429, 0.017507, 0.017125]
+
+        self.net = ncnn.Net()
+        self.net.opt.use_vulkan_compute = self.use_gpu
+        self.net.opt.num_threads = self.num_threads
+
+        # original pretrained model from https://github.com/RangiLyu/nanodet
+        # the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
+        self.net.load_param(get_model_file("nanodet_m.param"))
+        self.net.load_model(get_model_file("nanodet_m.bin"))
+
+        self.reg_max = 7
+        self.strides = [8, 16, 32]
+        self.num_candidate = 1000
+        self.top_k = -1
+
+        self.class_names = [
+            "person",
+            "bicycle",
+            "car",
+            "motorcycle",
+            "airplane",
+            "bus",
+            "train",
+            "truck",
+            "boat",
+            "traffic light",
+            "fire hydrant",
+            "stop sign",
+            "parking meter",
+            "bench",
+            "bird",
+            "cat",
+            "dog",
+            "horse",
+            "sheep",
+            "cow",
+            "elephant",
+            "bear",
+            "zebra",
+            "giraffe",
+            "backpack",
+            "umbrella",
+            "handbag",
+            "tie",
+            "suitcase",
+            "frisbee",
+            "skis",
+            "snowboard",
+            "sports ball",
+            "kite",
+            "baseball bat",
+            "baseball glove",
+            "skateboard",
+            "surfboard",
+            "tennis racket",
+            "bottle",
+            "wine glass",
+            "cup",
+            "fork",
+            "knife",
+            "spoon",
+            "bowl",
+            "banana",
+            "apple",
+            "sandwich",
+            "orange",
+            "broccoli",
+            "carrot",
+            "hot dog",
+            "pizza",
+            "donut",
+            "cake",
+            "chair",
+            "couch",
+            "potted plant",
+            "bed",
+            "dining table",
+            "toilet",
+            "tv",
+            "laptop",
+            "mouse",
+            "remote",
+            "keyboard",
+            "cell phone",
+            "microwave",
+            "oven",
+            "toaster",
+            "sink",
+            "refrigerator",
+            "book",
+            "clock",
+            "vase",
+            "scissors",
+            "teddy bear",
+            "hair drier",
+            "toothbrush",
+        ]
+
+    def __del__(self):
+        self.net = None
+
+    def __call__(self, img):
+        img_w = img.shape[1]
+        img_h = img.shape[0]
+
+        w = img_w
+        h = img_h
+        scale = 1.0
+        if w > h:
+            scale = float(self.target_size) / w
+            w = self.target_size
+            h = int(h * scale)
+        else:
+            scale = float(self.target_size) / h
+            h = self.target_size
+            w = int(w * scale)
+
+        mat_in = ncnn.Mat.from_pixels_resize(
+            img, ncnn.Mat.PixelType.PIXEL_BGR, img_w, img_h, w, h
+        )
+
+        # pad to target_size rectangle
+        wpad = (w + 31) // 32 * 32 - w
+        hpad = (h + 31) // 32 * 32 - h
+        mat_in_pad = ncnn.copy_make_border(
+            mat_in,
+            hpad // 2,
+            hpad - hpad // 2,
+            wpad // 2,
+            wpad - wpad // 2,
+            ncnn.BorderType.BORDER_CONSTANT,
+            0,
+        )
+
+        mat_in_pad.substract_mean_normalize(self.mean_vals, self.norm_vals)
+
+        ex = self.net.create_extractor()
+        ex.input("input.1", mat_in_pad)
+
+        score_out_name = ["792", "814", "836"]
+        scores = [ex.extract(x)[1] for x in score_out_name]
+        scores = [np.reshape(x, (-1, 80)) for x in scores]
+
+        boxes_out_name = ["795", "817", "839"]
+        raw_boxes = [ex.extract(x)[1] for x in boxes_out_name]
+        raw_boxes = [np.reshape(x, (-1, 32)) for x in raw_boxes]
+
+        # generate centers
+        decode_boxes = []
+        select_scores = []
+        for stride, box_distribute, score in zip(self.strides, raw_boxes, scores):
+            # centers
+            if mat_in_pad.w > mat_in_pad.h:
+                fm_w = mat_in_pad.w // stride
+                fm_h = score.shape[0] // fm_w
+            else:
+                fm_h = mat_in_pad.h // stride
+                fm_w = score.shape[1] // fm_h
+            h_range = np.arange(fm_h)
+            w_range = np.arange(fm_w)
+            ww, hh = np.meshgrid(w_range, h_range)
+            ct_row = (hh.flatten() + 0.5) * stride
+            ct_col = (ww.flatten() + 0.5) * stride
+            center = np.stack((ct_col, ct_row, ct_col, ct_row), axis=1)
+
+            # box distribution to distance
+            reg_range = np.arange(self.reg_max + 1)
+            box_distance = box_distribute.reshape((-1, self.reg_max + 1))
+            box_distance = softmax(box_distance)
+            box_distance = box_distance * np.expand_dims(reg_range, axis=0)
+            box_distance = np.sum(box_distance, axis=1).reshape((-1, 4))
+            box_distance = box_distance * stride
+
+            # top K candidate
+            topk_idx = np.argsort(score.max(axis=1))[::-1]
+            topk_idx = topk_idx[: self.num_candidate]
+            center = center[topk_idx]
+            score = score[topk_idx]
+            box_distance = box_distance[topk_idx]
+
+            # decode box
+            decode_box = center + [-1, -1, 1, 1] * box_distance
+
+            select_scores.append(score)
+            decode_boxes.append(decode_box)
+
+        # nms
+        bboxes = np.concatenate(decode_boxes, axis=0)
+        confidences = np.concatenate(select_scores, axis=0)
+        picked_box = []
+        picked_probs = []
+        picked_labels = []
+        for class_index in range(0, confidences.shape[1]):
+            probs = confidences[:, class_index]
+            mask = probs > self.prob_threshold
+            probs = probs[mask]
+            if probs.shape[0] == 0:
+                continue
+            subset_boxes = bboxes[mask, :]
+            picked = nms(
+                subset_boxes,
+                probs,
+                iou_threshold=self.nms_threshold,
+                top_k=self.top_k,
+            )
+            picked_box.append(subset_boxes[picked])
+            picked_probs.append(probs[picked])
+            picked_labels.extend([class_index] * len(picked))
+
+        if not picked_box:
+            return []
+
+        picked_box = np.concatenate(picked_box)
+        picked_probs = np.concatenate(picked_probs)
+
+        # result with clip
+        objects = [
+            Detect_Object(
+                label,
+                score,
+                (bbox[0] - wpad / 2) / scale if bbox[0] > 0 else 0,
+                (bbox[1] - hpad / 2) / scale if bbox[1] > 0 else 0,
+                (bbox[2] - bbox[0]) / scale
+                if bbox[2] < mat_in_pad.w
+                else (mat_in_pad.w - bbox[0]) / scale,
+                (bbox[3] - bbox[1]) / scale
+                if bbox[3] < mat_in_pad.h
+                else (mat_in_pad.h - bbox[1]) / scale,
+            )
+            for label, score, bbox in zip(picked_labels, picked_probs, picked_box)
+        ]
+
+        return objects
--- a/3rdparty/ncnn/python/ncnn/model_zoo/peleenetssd.py
+++ b/3rdparty/ncnn/python/ncnn/model_zoo/peleenetssd.py
@ -0,0 +1,114 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import ncnn
+from .model_store import get_model_file
+from ..utils.objects import Detect_Object
+
+
+class PeleeNet_SSD:
+    def __init__(self, target_size=304, num_threads=1, use_gpu=False):
+        self.target_size = target_size
+        self.num_threads = num_threads
+        self.use_gpu = use_gpu
+
+        self.mean_vals = [103.9, 116.7, 123.6]
+        self.norm_vals = [0.017, 0.017, 0.017]
+
+        self.net = ncnn.Net()
+        self.net.opt.use_vulkan_compute = self.use_gpu
+
+        # model is converted from https://github.com/eric612/MobileNet-YOLO
+        # and can be downloaded from https://drive.google.com/open?id=1Wt6jKv13sBRMHgrGAJYlOlRF-o80pC0g
+        # the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
+        self.net.load_param(get_model_file("pelee.param"))
+        self.net.load_model(get_model_file("pelee.bin"))
+
+        self.class_names = [
+            "background",
+            "person",
+            "rider",
+            "car",
+            "bus",
+            "truck",
+            "bike",
+            "motor",
+            "traffic light",
+            "traffic sign",
+            "train",
+        ]
+
+    def __del__(self):
+        self.net = None
+
+    def __call__(self, img):
+        img_h = img.shape[0]
+        img_w = img.shape[1]
+
+        mat_in = ncnn.Mat.from_pixels_resize(
+            img,
+            ncnn.Mat.PixelType.PIXEL_BGR,
+            img.shape[1],
+            img.shape[0],
+            self.target_size,
+            self.target_size,
+        )
+        mat_in.substract_mean_normalize(self.mean_vals, self.norm_vals)
+
+        ex = self.net.create_extractor()
+        ex.set_num_threads(self.num_threads)
+
+        ex.input("data", mat_in)
+
+        ret, mat_out = ex.extract("detection_out")
+
+        objects = []
+
+        # printf("%d %d %d\n", mat_out.w, mat_out.h, mat_out.c)
+
+        # method 1, use ncnn.Mat.row to get the result, no memory copy
+        for i in range(mat_out.h):
+            values = mat_out.row(i)
+
+            obj = Detect_Object()
+            obj.label = values[0]
+            obj.prob = values[1]
+            obj.rect.x = values[2] * img_w
+            obj.rect.y = values[3] * img_h
+            obj.rect.w = values[4] * img_w - obj.rect.x
+            obj.rect.h = values[5] * img_h - obj.rect.y
+
+            objects.append(obj)
+
+        """
+        #method 2, use ncnn.Mat->numpy.array to get the result, no memory copy too
+        out = np.array(mat_out)
+        for i in range(len(out)):
+            values = out[i]
+            obj = Detect_Object()
+            obj.label = values[0]
+            obj.prob = values[1]
+            obj.rect.x = values[2] * img_w
+            obj.rect.y = values[3] * img_h
+            obj.rect.w = values[4] * img_w - obj.rect.x
+            obj.rect.h = values[5] * img_h - obj.rect.y
+            objects.append(obj)
+        """
+
+        ret, seg_out = ex.extract("sigmoid")
+
+        resized = ncnn.Mat()
+        ncnn.resize_bilinear(seg_out, resized, img_w, img_h)
+
+        return objects, resized
--- a/3rdparty/ncnn/python/ncnn/model_zoo/retinaface.py
+++ b/3rdparty/ncnn/python/ncnn/model_zoo/retinaface.py
@ -0,0 +1,326 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import numpy as np
+import ncnn
+from .model_store import get_model_file
+from ..utils.objects import Point, Face_Object
+
+
+class RetinaFace:
+    def __init__(
+        self, prob_threshold=0.8, nms_threshold=0.4, num_threads=1, use_gpu=False
+    ):
+        self.prob_threshold = prob_threshold
+        self.nms_threshold = nms_threshold
+        self.num_threads = num_threads
+        self.use_gpu = use_gpu
+
+        self.net = ncnn.Net()
+        self.net.opt.use_vulkan_compute = self.use_gpu
+
+        # model is converted from
+        # https://github.com/deepinsight/insightface/tree/master/RetinaFace#retinaface-pretrained-models
+        # https://github.com/deepinsight/insightface/issues/669
+        # the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
+        self.net.load_param(get_model_file("mnet.25-opt.param"))
+        self.net.load_model(get_model_file("mnet.25-opt.bin"))
+
+    def __del__(self):
+        self.net = None
+
+    def __call__(self, img):
+        img_h = img.shape[0]
+        img_w = img.shape[1]
+
+        mat_in = ncnn.Mat.from_pixels(
+            img, ncnn.Mat.PixelType.PIXEL_BGR2RGB, img_w, img_h
+        )
+
+        ex = self.net.create_extractor()
+        ex.set_num_threads(self.num_threads)
+
+        ex.input("data", mat_in)
+
+        faceobjects32 = self.detect_stride32(ex)
+        faceobjects16 = self.detect_stride16(ex)
+        faceobjects8 = self.detect_stride8(ex)
+
+        faceproposals = [*faceobjects32, *faceobjects16, *faceobjects8]
+
+        # sort all proposals by score from highest to lowest
+        faceproposals.sort(key=lambda obj: obj.prob, reverse=True)
+
+        # apply nms with nms_threshold
+        picked = self.nms_sorted_bboxes(faceproposals, self.nms_threshold)
+
+        face_count = len(picked)
+
+        faceobjects = []
+        for i in range(face_count):
+            faceobjects.append(faceproposals[picked[i]])
+
+            # clip to image size
+            x0 = faceobjects[i].rect.x
+            y0 = faceobjects[i].rect.y
+            x1 = x0 + faceobjects[i].rect.w
+            y1 = y0 + faceobjects[i].rect.h
+
+            x0 = np.maximum(np.minimum(x0, float(img_w) - 1), 0.0)
+            y0 = np.maximum(np.minimum(y0, float(img_h) - 1), 0.0)
+            x1 = np.maximum(np.minimum(x1, float(img_w) - 1), 0.0)
+            y1 = np.maximum(np.minimum(y1, float(img_h) - 1), 0.0)
+
+            faceobjects[i].rect.x = x0
+            faceobjects[i].rect.y = y0
+            faceobjects[i].rect.w = x1 - x0
+            faceobjects[i].rect.h = y1 - y0
+
+        return faceobjects
+
+    def detect_stride32(self, ex):
+        ret1, score_blob = ex.extract("face_rpn_cls_prob_reshape_stride32")
+        ret2, bbox_blob = ex.extract("face_rpn_bbox_pred_stride32")
+        ret3, landmark_blob = ex.extract("face_rpn_landmark_pred_stride32")
+
+        base_size = 16
+        feat_stride = 32
+        ratios = ncnn.Mat(1)
+        ratios[0] = 1.0
+        scales = ncnn.Mat(2)
+        scales[0] = 32.0
+        scales[1] = 16.0
+        anchors = self.generate_anchors(base_size, ratios, scales)
+
+        faceobjects32 = self.generate_proposals(
+            anchors,
+            feat_stride,
+            score_blob,
+            bbox_blob,
+            landmark_blob,
+            self.prob_threshold,
+        )
+
+        return faceobjects32
+
+    def detect_stride16(self, ex):
+        ret1, score_blob = ex.extract("face_rpn_cls_prob_reshape_stride16")
+        ret2, bbox_blob = ex.extract("face_rpn_bbox_pred_stride16")
+        ret3, landmark_blob = ex.extract("face_rpn_landmark_pred_stride16")
+
+        base_size = 16
+        feat_stride = 16
+        ratios = ncnn.Mat(1)
+        ratios[0] = 1.0
+        scales = ncnn.Mat(2)
+        scales[0] = 8.0
+        scales[1] = 4.0
+        anchors = self.generate_anchors(base_size, ratios, scales)
+
+        faceobjects16 = self.generate_proposals(
+            anchors,
+            feat_stride,
+            score_blob,
+            bbox_blob,
+            landmark_blob,
+            self.prob_threshold,
+        )
+
+        return faceobjects16
+
+    def detect_stride8(self, ex):
+        ret1, score_blob = ex.extract("face_rpn_cls_prob_reshape_stride8")
+        ret2, bbox_blob = ex.extract("face_rpn_bbox_pred_stride8")
+        ret3, landmark_blob = ex.extract("face_rpn_landmark_pred_stride8")
+
+        base_size = 16
+        feat_stride = 8
+        ratios = ncnn.Mat(1)
+        ratios[0] = 1.0
+        scales = ncnn.Mat(2)
+        scales[0] = 2.0
+        scales[1] = 1.0
+        anchors = self.generate_anchors(base_size, ratios, scales)
+
+        faceobjects8 = self.generate_proposals(
+            anchors,
+            feat_stride,
+            score_blob,
+            bbox_blob,
+            landmark_blob,
+            self.prob_threshold,
+        )
+
+        return faceobjects8
+
+    def generate_anchors(self, base_size, ratios, scales):
+        num_ratio = ratios.w
+        num_scale = scales.w
+
+        # anchors = ncnn.Mat()
+        # anchors.create(w=4, h=num_ratio * num_scale)
+
+        anchors_np = np.zeros((2, 4), dtype=np.float32)
+
+        cx = base_size * 0.5
+        cy = base_size * 0.5
+
+        for i in range(num_ratio):
+            ar = ratios[i]
+
+            r_w = np.round(base_size / np.sqrt(ar))
+            r_h = np.round(r_w * ar)  # round(base_size * np.sqrt(ar))
+
+            for j in range(num_scale):
+                scale = scales[j]
+
+                rs_w = r_w * scale
+                rs_h = r_h * scale
+
+                anchor = anchors_np[i * num_scale + j]
+
+                anchor[0] = cx - rs_w * 0.5
+                anchor[1] = cy - rs_h * 0.5
+                anchor[2] = cx + rs_w * 0.5
+                anchor[3] = cy + rs_h * 0.5
+
+        anchors = ncnn.Mat(anchors_np)
+        return anchors
+
+    def generate_proposals(
+        self, anchors, feat_stride, score_blob, bbox_blob, landmark_blob, prob_threshold
+    ):
+        faceobjects = []
+
+        w = score_blob.w
+        h = score_blob.h
+
+        # generate face proposal from bbox deltas and shifted anchors
+        num_anchors = anchors.h
+
+        for q in range(num_anchors):
+            anchor = anchors.row(q)
+
+            score = score_blob.channel(q + num_anchors)
+            bbox = bbox_blob.channel_range(q * 4, 4)
+            landmark = landmark_blob.channel_range(q * 10, 10)
+
+            # shifted anchor
+            anchor_y = anchor[1]
+
+            anchor_w = anchor[2] - anchor[0]
+            anchor_h = anchor[3] - anchor[1]
+
+            for i in range(h):
+                anchor_x = anchor[0]
+
+                for j in range(w):
+                    index = i * w + j
+
+                    prob = score[index]
+
+                    if prob >= prob_threshold:
+                        # apply center size
+                        dx = bbox.channel(0)[index]
+                        dy = bbox.channel(1)[index]
+                        dw = bbox.channel(2)[index]
+                        dh = bbox.channel(3)[index]
+
+                        cx = anchor_x + anchor_w * 0.5
+                        cy = anchor_y + anchor_h * 0.5
+
+                        pb_cx = cx + anchor_w * dx
+                        pb_cy = cy + anchor_h * dy
+
+                        pb_w = anchor_w * np.exp(dw)
+                        pb_h = anchor_h * np.exp(dh)
+
+                        x0 = pb_cx - pb_w * 0.5
+                        y0 = pb_cy - pb_h * 0.5
+                        x1 = pb_cx + pb_w * 0.5
+                        y1 = pb_cy + pb_h * 0.5
+
+                        obj = Face_Object()
+                        obj.rect.x = x0
+                        obj.rect.y = y0
+                        obj.rect.w = x1 - x0 + 1
+                        obj.rect.h = y1 - y0 + 1
+                        obj.landmark = [Point(), Point(), Point(), Point(), Point()]
+                        obj.landmark[0].x = (
+                            cx + (anchor_w + 1) * landmark.channel(0)[index]
+                        )
+                        obj.landmark[0].y = (
+                            cy + (anchor_h + 1) * landmark.channel(1)[index]
+                        )
+                        obj.landmark[1].x = (
+                            cx + (anchor_w + 1) * landmark.channel(2)[index]
+                        )
+                        obj.landmark[1].y = (
+                            cy + (anchor_h + 1) * landmark.channel(3)[index]
+                        )
+                        obj.landmark[2].x = (
+                            cx + (anchor_w + 1) * landmark.channel(4)[index]
+                        )
+                        obj.landmark[2].y = (
+                            cy + (anchor_h + 1) * landmark.channel(5)[index]
+                        )
+                        obj.landmark[3].x = (
+                            cx + (anchor_w + 1) * landmark.channel(6)[index]
+                        )
+                        obj.landmark[3].y = (
+                            cy + (anchor_h + 1) * landmark.channel(7)[index]
+                        )
+                        obj.landmark[4].x = (
+                            cx + (anchor_w + 1) * landmark.channel(8)[index]
+                        )
+                        obj.landmark[4].y = (
+                            cy + (anchor_h + 1) * landmark.channel(9)[index]
+                        )
+                        obj.prob = prob
+
+                        faceobjects.append(obj)
+
+                    anchor_x += feat_stride
+
+                anchor_y += feat_stride
+
+        return faceobjects
+
+    def nms_sorted_bboxes(self, faceobjects, nms_threshold):
+        picked = []
+
+        n = len(faceobjects)
+
+        areas = []
+        for i in range(n):
+            areas.append(faceobjects[i].rect.area())
+
+        for i in range(n):
+            a = faceobjects[i]
+
+            keep = True
+            for j in range(len(picked)):
+                b = faceobjects[picked[j]]
+
+                # intersection over union
+                inter_area = a.rect.intersection_area(b.rect)
+                union_area = areas[i] + areas[picked[j]] - inter_area
+                # float IoU = inter_area / union_area
+                if inter_area / union_area > nms_threshold:
+                    keep = False
+
+            if keep:
+                picked.append(i)
+
+        return picked
--- a/3rdparty/ncnn/python/ncnn/model_zoo/rfcn.py
+++ b/3rdparty/ncnn/python/ncnn/model_zoo/rfcn.py
@ -0,0 +1,242 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import numpy as np
+import ncnn
+from .model_store import get_model_file
+from ..utils.objects import Detect_Object
+
+
+class RFCN:
+    def __init__(
+        self,
+        target_size=224,
+        max_per_image=100,
+        confidence_thresh=0.6,
+        nms_threshold=0.3,
+        num_threads=1,
+        use_gpu=False,
+    ):
+        self.target_size = target_size
+        self.max_per_image = max_per_image
+        self.confidence_thresh = confidence_thresh
+        self.nms_threshold = nms_threshold
+        self.num_threads = num_threads
+        self.use_gpu = use_gpu
+
+        self.mean_vals = [102.9801, 115.9465, 122.7717]
+        self.norm_vals = []
+
+        self.net = ncnn.Net()
+        self.net.opt.use_vulkan_compute = self.use_gpu
+
+        # original pretrained model from https://github.com/YuwenXiong/py-R-FCN
+        # https://github.com/YuwenXiong/py-R-FCN/blob/master/models/pascal_voc/ResNet-50/rfcn_end2end/test_agnostic.prototxt
+        # https://1drv.ms/u/s!AoN7vygOjLIQqUWHpY67oaC7mopf
+        # resnet50_rfcn_final.caffemodel
+        # the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
+        self.net.load_param(get_model_file("rfcn_end2end.param"))
+        self.net.load_model(get_model_file("rfcn_end2end.bin"))
+
+        self.class_names = [
+            "background",
+            "aeroplane",
+            "bicycle",
+            "bird",
+            "boat",
+            "bottle",
+            "bus",
+            "car",
+            "cat",
+            "chair",
+            "cow",
+            "diningtable",
+            "dog",
+            "horse",
+            "motorbike",
+            "person",
+            "pottedplant",
+            "sheep",
+            "sofa",
+            "train",
+            "tvmonitor",
+        ]
+
+    def __del__(self):
+        self.net = None
+
+    def __call__(self, img):
+        h = img.shape[0]
+        w = img.shape[1]
+
+        scale = 1.0
+        if w < h:
+            scale = float(self.target_size) / w
+            w = self.target_size
+            h = h * scale
+        else:
+            scale = float(self.target_size) / h
+            h = self.target_size
+            w = w * scale
+
+        mat_in = ncnn.Mat.from_pixels_resize(
+            img,
+            ncnn.Mat.PixelType.PIXEL_BGR,
+            img.shape[1],
+            img.shape[0],
+            int(w),
+            int(h),
+        )
+        mat_in.substract_mean_normalize(self.mean_vals, self.norm_vals)
+
+        im_info = ncnn.Mat(3)
+        im_info[0] = h
+        im_info[1] = w
+        im_info[2] = scale
+
+        # step1, extract feature and all rois
+        ex1 = self.net.create_extractor()
+        ex1.set_num_threads(self.num_threads)
+        ex1.input("data", mat_in)
+        ex1.input("im_info", im_info)
+
+        ret1, rfcn_cls = ex1.extract("rfcn_cls")
+        ret2, rfcn_bbox = ex1.extract("rfcn_bbox")
+        ret3, rois = ex1.extract("rois")  # all rois
+
+        # step2, extract bbox and score for each roi
+        class_candidates = []
+        for i in range(rois.c):
+            ex2 = self.net.create_extractor()
+
+            roi = rois.channel(i)  # get single roi
+            ex2.input("rfcn_cls", rfcn_cls)
+            ex2.input("rfcn_bbox", rfcn_bbox)
+            ex2.input("rois", roi)
+
+            ret1, bbox_pred = ex2.extract("bbox_pred")
+            ret2, cls_prob = ex2.extract("cls_prob")
+
+            num_class = cls_prob.w
+            while len(class_candidates) < num_class:
+                class_candidates.append([])
+
+            # find class id with highest score
+            label = 0
+            score = 0.0
+            for j in range(num_class):
+                class_score = cls_prob[j]
+                if class_score > score:
+                    label = j
+                    score = class_score
+
+            # ignore background or low score
+            if label == 0 or score <= self.confidence_thresh:
+                continue
+
+            # fprintf(stderr, "%d = %f\n", label, score)
+
+            # unscale to image size
+            x1 = roi[0] / scale
+            y1 = roi[1] / scale
+            x2 = roi[2] / scale
+            y2 = roi[3] / scale
+
+            pb_w = x2 - x1 + 1
+            pb_h = y2 - y1 + 1
+
+            # apply bbox regression
+            dx = bbox_pred[4]
+            dy = bbox_pred[4 + 1]
+            dw = bbox_pred[4 + 2]
+            dh = bbox_pred[4 + 3]
+
+            cx = x1 + pb_w * 0.5
+            cy = y1 + pb_h * 0.5
+
+            obj_cx = cx + pb_w * dx
+            obj_cy = cy + pb_h * dy
+
+            obj_w = pb_w * np.exp(dw)
+            obj_h = pb_h * np.exp(dh)
+
+            obj_x1 = obj_cx - obj_w * 0.5
+            obj_y1 = obj_cy - obj_h * 0.5
+            obj_x2 = obj_cx + obj_w * 0.5
+            obj_y2 = obj_cy + obj_h * 0.5
+
+            # clip
+            obj_x1 = np.maximum(np.minimum(obj_x1, float(img.shape[1] - 1)), 0.0)
+            obj_y1 = np.maximum(np.minimum(obj_y1, float(img.shape[0] - 1)), 0.0)
+            obj_x2 = np.maximum(np.minimum(obj_x2, float(img.shape[1] - 1)), 0.0)
+            obj_y2 = np.maximum(np.minimum(obj_y2, float(img.shape[0] - 1)), 0.0)
+
+            # append object
+            obj = Detect_Object()
+            obj.rect.x = obj_x1
+            obj.rect.y = obj_y1
+            obj.rect.w = obj_x2 - obj_x1 + 1
+            obj.rect.h = obj_y2 - obj_y1 + 1
+            obj.label = label
+            obj.prob = score
+
+            class_candidates[label].append(obj)
+
+        # post process
+        objects = []
+        for candidates in class_candidates:
+            if len(candidates) == 0:
+                continue
+
+            candidates.sort(key=lambda obj: obj.prob, reverse=True)
+
+            picked = self.nms_sorted_bboxes(candidates, self.nms_threshold)
+
+            for j in range(len(picked)):
+                z = picked[j]
+                objects.append(candidates[z])
+
+        objects.sort(key=lambda obj: obj.prob, reverse=True)
+
+        objects = objects[: self.max_per_image]
+
+        return objects
+
+    def nms_sorted_bboxes(self, objects, nms_threshold):
+        picked = []
+
+        n = len(objects)
+
+        areas = np.zeros((n,), dtype=np.float32)
+        for i in range(n):
+            areas[i] = objects[i].rect.area()
+
+        for i in range(n):
+            a = objects[i]
+
+            keep = True
+            for j in range(len(picked)):
+                b = objects[picked[j]]
+
+                # intersection over union
+                inter_area = a.rect.intersection_area(b.rect)
+                union_area = areas[i] + areas[picked[j]] - inter_area
+                # float IoU = inter_area / union_area
+                if inter_area / union_area > nms_threshold:
+                    keep = False
+
+            if keep:
+                picked.append(i)
+
+        return picked
--- a/3rdparty/ncnn/python/ncnn/model_zoo/shufflenetv2.py
+++ b/3rdparty/ncnn/python/ncnn/model_zoo/shufflenetv2.py
@ -0,0 +1,75 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import numpy as np
+import ncnn
+from .model_store import get_model_file
+
+
+class ShuffleNetV2:
+    def __init__(self, target_size=224, num_threads=1, use_gpu=False):
+        self.target_size = target_size
+        self.num_threads = num_threads
+        self.use_gpu = use_gpu
+
+        self.mean_vals = []
+        self.norm_vals = [1 / 255.0, 1 / 255.0, 1 / 255.0]
+
+        self.net = ncnn.Net()
+        self.net.opt.use_vulkan_compute = self.use_gpu
+
+        # https://github.com/miaow1988/ShuffleNet_V2_pytorch_caffe
+        # models can be downloaded from https://github.com/miaow1988/ShuffleNet_V2_pytorch_caffe/releases
+        # the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
+        self.net.load_param(get_model_file("shufflenet_v2_x0.5.param"))
+        self.net.load_model(get_model_file("shufflenet_v2_x0.5.bin"))
+
+    def __del__(self):
+        self.net = None
+
+    def __call__(self, img):
+        img_h = img.shape[0]
+        img_w = img.shape[1]
+
+        mat_in = ncnn.Mat.from_pixels_resize(
+            img,
+            ncnn.Mat.PixelType.PIXEL_BGR,
+            img.shape[1],
+            img.shape[0],
+            self.target_size,
+            self.target_size,
+        )
+        mat_in.substract_mean_normalize(self.mean_vals, self.norm_vals)
+
+        ex = self.net.create_extractor()
+        ex.set_num_threads(self.num_threads)
+
+        ex.input("data", mat_in)
+
+        ret, mat_out = ex.extract("fc")
+
+        # manually call softmax on the fc output
+        # convert result into probability
+        # skip if your model already has softmax operation
+        softmax = ncnn.create_layer("Softmax")
+
+        pd = ncnn.ParamDict()
+        softmax.load_param(pd)
+
+        softmax.forward_inplace(mat_out, self.net.opt)
+
+        mat_out = mat_out.reshape(mat_out.w * mat_out.h * mat_out.c)
+
+        cls_scores = np.array(mat_out)
+        return cls_scores
--- a/3rdparty/ncnn/python/ncnn/model_zoo/simplepose.py
+++ b/3rdparty/ncnn/python/ncnn/model_zoo/simplepose.py
@ -0,0 +1,92 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import ncnn
+from .model_store import get_model_file
+from ..utils.objects import KeyPoint
+
+
+class SimplePose:
+    def __init__(
+        self, target_width=192, target_height=256, num_threads=1, use_gpu=False
+    ):
+        self.target_width = target_width
+        self.target_height = target_height
+        self.num_threads = num_threads
+        self.use_gpu = use_gpu
+
+        self.mean_vals = [0.485 * 255.0, 0.456 * 255.0, 0.406 * 255.0]
+        self.norm_vals = [1 / 0.229 / 255.0, 1 / 0.224 / 255.0, 1 / 0.225 / 255.0]
+
+        self.net = ncnn.Net()
+        self.net.opt.use_vulkan_compute = self.use_gpu
+
+        # the simple baseline human pose estimation from gluon-cv
+        # https://gluon-cv.mxnet.io/build/examples_pose/demo_simple_pose.html
+        # mxnet model exported via
+        #      pose_net.hybridize()
+        #      pose_net.export('pose')
+        # then mxnet2ncnn
+        # the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
+        self.net.load_param(get_model_file("pose.param"))
+        self.net.load_model(get_model_file("pose.bin"))
+
+    def __del__(self):
+        self.net = None
+
+    def __call__(self, img):
+        h = img.shape[0]
+        w = img.shape[1]
+
+        mat_in = ncnn.Mat.from_pixels_resize(
+            img,
+            ncnn.Mat.PixelType.PIXEL_BGR2RGB,
+            img.shape[1],
+            img.shape[0],
+            self.target_width,
+            self.target_height,
+        )
+        mat_in.substract_mean_normalize(self.mean_vals, self.norm_vals)
+
+        ex = self.net.create_extractor()
+        ex.set_num_threads(self.num_threads)
+        ex.input("data", mat_in)
+
+        ret, mat_out = ex.extract("conv3_fwd")
+
+        keypoints = []
+
+        for p in range(mat_out.c):
+            m = mat_out.channel(p)
+
+            max_prob = 0.0
+            max_x = 0
+            max_y = 0
+            for y in range(mat_out.h):
+                ptr = m.row(y)
+                for x in range(mat_out.w):
+                    prob = ptr[x]
+                    if prob > max_prob:
+                        max_prob = prob
+                        max_x = x
+                        max_y = y
+
+            keypoint = KeyPoint()
+            keypoint.p.x = max_x * w / float(mat_out.w)
+            keypoint.p.y = max_y * h / float(mat_out.h)
+            keypoint.prob = max_prob
+
+            keypoints.append(keypoint)
+
+        return keypoints
--- a/3rdparty/ncnn/python/ncnn/model_zoo/squeezenet.py
+++ b/3rdparty/ncnn/python/ncnn/model_zoo/squeezenet.py
@ -0,0 +1,63 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import numpy as np
+import ncnn
+from .model_store import get_model_file
+
+
+class SqueezeNet:
+    def __init__(self, target_size=227, num_threads=1, use_gpu=False):
+        self.target_size = target_size
+        self.num_threads = num_threads
+        self.use_gpu = use_gpu
+
+        self.mean_vals = [104.0, 117.0, 123.0]
+        self.norm_vals = []
+
+        self.net = ncnn.Net()
+        self.net.opt.use_vulkan_compute = self.use_gpu
+
+        # the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
+        self.net.load_param(get_model_file("squeezenet_v1.1.param"))
+        self.net.load_model(get_model_file("squeezenet_v1.1.bin"))
+
+    def __del__(self):
+        self.net = None
+
+    def __call__(self, img):
+        img_h = img.shape[0]
+        img_w = img.shape[1]
+
+        mat_in = ncnn.Mat.from_pixels_resize(
+            img,
+            ncnn.Mat.PixelType.PIXEL_BGR,
+            img.shape[1],
+            img.shape[0],
+            self.target_size,
+            self.target_size,
+        )
+        mat_in.substract_mean_normalize(self.mean_vals, self.norm_vals)
+
+        ex = self.net.create_extractor()
+        ex.set_num_threads(self.num_threads)
+
+        ex.input("data", mat_in)
+
+        ret, mat_out = ex.extract("prob")
+
+        # printf("%d %d %d\n", mat_out.w, mat_out.h, mat_out.c)
+
+        out = np.array(mat_out)
+        return out
--- a/3rdparty/ncnn/python/ncnn/model_zoo/squeezenetssd.py
+++ b/3rdparty/ncnn/python/ncnn/model_zoo/squeezenetssd.py
@ -0,0 +1,120 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import ncnn
+from .model_store import get_model_file
+from ..utils.objects import Detect_Object
+
+
+class SqueezeNet_SSD:
+    def __init__(self, target_size=300, num_threads=1, use_gpu=False):
+        self.target_size = target_size
+        self.num_threads = num_threads
+        self.use_gpu = use_gpu
+
+        self.mean_vals = [104.0, 117.0, 123.0]
+        self.norm_vals = []
+
+        self.net = ncnn.Net()
+        self.net.opt.use_vulkan_compute = self.use_gpu
+
+        # original pretrained model from https://github.com/chuanqi305/SqueezeNet-SSD
+        # squeezenet_ssd_voc_deploy.prototxt
+        # https://drive.google.com/open?id=0B3gersZ2cHIxdGpyZlZnbEQ5Snc
+        # the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
+        self.net.load_param(get_model_file("squeezenet_ssd_voc.param"))
+        self.net.load_model(get_model_file("squeezenet_ssd_voc.bin"))
+
+        self.class_names = [
+            "background",
+            "aeroplane",
+            "bicycle",
+            "bird",
+            "boat",
+            "bottle",
+            "bus",
+            "car",
+            "cat",
+            "chair",
+            "cow",
+            "diningtable",
+            "dog",
+            "horse",
+            "motorbike",
+            "person",
+            "pottedplant",
+            "sheep",
+            "sofa",
+            "train",
+            "tvmonitor",
+        ]
+
+    def __del__(self):
+        self.net = None
+
+    def __call__(self, img):
+        img_h = img.shape[0]
+        img_w = img.shape[1]
+
+        mat_in = ncnn.Mat.from_pixels_resize(
+            img,
+            ncnn.Mat.PixelType.PIXEL_BGR,
+            img.shape[1],
+            img.shape[0],
+            self.target_size,
+            self.target_size,
+        )
+        mat_in.substract_mean_normalize(self.mean_vals, self.norm_vals)
+
+        ex = self.net.create_extractor()
+        ex.set_num_threads(self.num_threads)
+
+        ex.input("data", mat_in)
+
+        ret, mat_out = ex.extract("detection_out")
+
+        objects = []
+
+        # printf("%d %d %d\n", mat_out.w, mat_out.h, mat_out.c)
+
+        # method 1, use ncnn.Mat.row to get the result, no memory copy
+        for i in range(mat_out.h):
+            values = mat_out.row(i)
+
+            obj = Detect_Object()
+            obj.label = values[0]
+            obj.prob = values[1]
+            obj.rect.x = values[2] * img_w
+            obj.rect.y = values[3] * img_h
+            obj.rect.w = values[4] * img_w - obj.rect.x
+            obj.rect.h = values[5] * img_h - obj.rect.y
+
+            objects.append(obj)
+
+        """
+        #method 2, use ncnn.Mat->numpy.array to get the result, no memory copy too
+        out = np.array(mat_out)
+        for i in range(len(out)):
+            values = out[i]
+            obj = Detect_Object()
+            obj.label = values[0]
+            obj.prob = values[1]
+            obj.rect.x = values[2] * img_w
+            obj.rect.y = values[3] * img_h
+            obj.rect.w = values[4] * img_w - obj.rect.x
+            obj.rect.h = values[5] * img_h - obj.rect.y
+            objects.append(obj)
+        """
+
+        return objects
--- a/3rdparty/ncnn/python/ncnn/model_zoo/yolact.py
+++ b/3rdparty/ncnn/python/ncnn/model_zoo/yolact.py
@ -0,0 +1,347 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+from math import sqrt
+import numpy as np
+import cv2
+import ncnn
+from .model_store import get_model_file
+from ..utils.objects import Detect_Object
+from ..utils.functional import sigmoid, nms
+
+
+class Yolact:
+    def __init__(
+        self,
+        target_size=550,
+        confidence_threshold=0.05,
+        nms_threshold=0.5,
+        keep_top_k=200,
+        num_threads=1,
+        use_gpu=False,
+    ):
+        self.target_size = target_size
+        self.confidence_threshold = confidence_threshold
+        self.nms_threshold = nms_threshold
+        self.keep_top_k = keep_top_k
+        self.num_threads = num_threads
+        self.use_gpu = use_gpu
+
+        self.mean_vals = [123.68, 116.78, 103.94]
+        self.norm_vals = [1.0 / 58.40, 1.0 / 57.12, 1.0 / 57.38]
+
+        self.net = ncnn.Net()
+        self.net.opt.use_vulkan_compute = self.use_gpu
+        self.net.opt.num_threads = self.num_threads
+
+        # original model converted from https://github.com/dbolya/yolact
+        # yolact_resnet50_54_800000.pth
+        # the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
+        self.net.load_param(get_model_file("yolact.param"))
+        self.net.load_model(get_model_file("yolact.bin"))
+
+        self.conv_ws = [69, 35, 18, 9, 5]
+        self.conv_hs = [69, 35, 18, 9, 5]
+        self.aspect_ratios = [1, 0.5, 2]
+        self.scales = [24, 48, 96, 192, 384]
+
+        self.priors = None
+        self.last_img_size = None
+
+        self.make_priors()
+
+        self.class_names = [
+            "background",
+            "person",
+            "bicycle",
+            "car",
+            "motorcycle",
+            "airplane",
+            "bus",
+            "train",
+            "truck",
+            "boat",
+            "traffic light",
+            "fire hydrant",
+            "stop sign",
+            "parking meter",
+            "bench",
+            "bird",
+            "cat",
+            "dog",
+            "horse",
+            "sheep",
+            "cow",
+            "elephant",
+            "bear",
+            "zebra",
+            "giraffe",
+            "backpack",
+            "umbrella",
+            "handbag",
+            "tie",
+            "suitcase",
+            "frisbee",
+            "skis",
+            "snowboard",
+            "sports ball",
+            "kite",
+            "baseball bat",
+            "baseball glove",
+            "skateboard",
+            "surfboard",
+            "tennis racket",
+            "bottle",
+            "wine glass",
+            "cup",
+            "fork",
+            "knife",
+            "spoon",
+            "bowl",
+            "banana",
+            "apple",
+            "sandwich",
+            "orange",
+            "broccoli",
+            "carrot",
+            "hot dog",
+            "pizza",
+            "donut",
+            "cake",
+            "chair",
+            "couch",
+            "potted plant",
+            "bed",
+            "dining table",
+            "toilet",
+            "tv",
+            "laptop",
+            "mouse",
+            "remote",
+            "keyboard",
+            "cell phone",
+            "microwave",
+            "oven",
+            "toaster",
+            "sink",
+            "refrigerator",
+            "book",
+            "clock",
+            "vase",
+            "scissors",
+            "teddy bear",
+            "hair drier",
+            "toothbrush",
+        ]
+
+    def __del__(self):
+        self.net = None
+
+    def __call__(self, img):
+        img_h = img.shape[0]
+        img_w = img.shape[1]
+
+        mat_in = ncnn.Mat.from_pixels_resize(
+            img,
+            ncnn.Mat.PixelType.PIXEL_BGR2RGB,
+            img_w,
+            img_h,
+            self.target_size,
+            self.target_size,
+        )
+        mat_in.substract_mean_normalize(self.mean_vals, self.norm_vals)
+
+        ex = self.net.create_extractor()
+        ex.input("input.1", mat_in)
+
+        ret1, proto_data = ex.extract("619")  # 138x138 x 32
+        ret2, loc_data = ex.extract("816")  # 4 x 19248
+        ret3, mask_data = ex.extract("818")  # maskdim 32 x 19248
+        ret4, conf_data = ex.extract("820")  # 81 x 19248
+
+        proto_data = np.array(proto_data)
+        loc_data = np.array(loc_data)
+        mask_data = np.array(mask_data)
+        conf_data = np.array(conf_data)
+        prior_data = self.make_priors()
+
+        # decoded_boxes = self.decode(loc_data, prior_data)
+        boxes, masks, classes, scores = self.detect(
+            conf_data, loc_data, prior_data, mask_data, img_w, img_h
+        )
+
+        # generate mask
+        masks = proto_data.transpose(1, 2, 0) @ masks.T
+        masks = sigmoid(masks)
+
+        # Scale masks up to the full image
+        masks = cv2.resize(masks, (img_w, img_h), interpolation=cv2.INTER_LINEAR)
+
+        # transpose into the correct output shape [num_dets, proto_h, proto_w]
+        masks = masks.transpose(2, 0, 1)
+
+        masks = masks > 0.5
+
+        return boxes, masks, classes, scores
+
+    def make_priors(self):
+        """ Note that priors are [x,y,width,height] where (x,y) is the center of the box. """
+        if self.last_img_size != (self.target_size, self.target_size):
+            prior_data = []
+
+            for conv_w, conv_h, scale in zip(self.conv_ws, self.conv_hs, self.scales):
+                for i in range(conv_h):
+                    for j in range(conv_w):
+                        # +0.5 because priors are in center-size notation
+                        cx = (j + 0.5) / conv_w
+                        cy = (i + 0.5) / conv_h
+
+                        for ar in self.aspect_ratios:
+                            ar = sqrt(ar)
+
+                            w = scale * ar / self.target_size
+                            h = scale / ar / self.target_size
+
+                            # This is for backward compatibility with a bug where I made everything square by accident
+                            h = w
+
+                            prior_data += [cx, cy, w, h]
+
+            self.priors = np.array(prior_data).reshape(-1, 4)
+            self.last_img_size = (self.target_size, self.target_size)
+
+        return self.priors
+
+    def decode(self, loc, priors, img_w, img_h):
+        """
+        Decode predicted bbox coordinates using the same scheme
+        employed by Yolov2: https://arxiv.org/pdf/1612.08242.pdf
+
+            b_x = (sigmoid(pred_x) - .5) / conv_w + prior_x
+            b_y = (sigmoid(pred_y) - .5) / conv_h + prior_y
+            b_w = prior_w * exp(loc_w)
+            b_h = prior_h * exp(loc_h)
+
+        Note that loc is inputed as [(s(x)-.5)/conv_w, (s(y)-.5)/conv_h, w, h]
+        while priors are inputed as [x, y, w, h] where each coordinate
+        is relative to size of the image (even sigmoid(x)). We do this
+        in the network by dividing by the 'cell size', which is just
+        the size of the convouts.
+
+        Also note that prior_x and prior_y are center coordinates which
+        is why we have to subtract .5 from sigmoid(pred_x and pred_y).
+
+        Args:
+            - loc:    The predicted bounding boxes of size [num_priors, 4]
+            - priors: The priorbox coords with size [num_priors, 4]
+
+        Returns: A tensor of decoded relative coordinates in point form
+                form with size [num_priors, 4(x, y, w, h)]
+        """
+
+        variances = [0.1, 0.2]
+
+        boxes = np.concatenate(
+            (
+                priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
+                priors[:, 2:] * np.exp(loc[:, 2:] * variances[1]),
+            ),
+            1,
+        )
+        boxes[:, :2] -= boxes[:, 2:] / 2
+        # boxes[:, 2:] += boxes[:, :2]
+
+        # crop
+        np.where(boxes[:, 0] < 0, 0, boxes[:, 0])
+        np.where(boxes[:, 1] < 0, 0, boxes[:, 1])
+        np.where(boxes[:, 2] > 1, 1, boxes[:, 2])
+        np.where(boxes[:, 3] > 1, 1, boxes[:, 3])
+
+        # decode to img size
+        boxes[:, 0] *= img_w
+        boxes[:, 1] *= img_h
+        boxes[:, 2] = boxes[:, 2] * img_w + 1
+        boxes[:, 3] = boxes[:, 3] * img_h + 1
+
+        return boxes
+
+    def detect(self, conf_preds, loc_data, prior_data, mask_data, img_w, img_h):
+        """ Perform nms for only the max scoring class that isn't background (class 0) """
+        cur_scores = conf_preds[:, 1:]
+        num_class = cur_scores.shape[1]
+
+        classes = np.argmax(cur_scores, axis=1)
+        conf_scores = cur_scores[range(cur_scores.shape[0]), classes]
+
+        # filte by confidence_threshold
+        keep = conf_scores > self.confidence_threshold
+        conf_scores = conf_scores[keep]
+        classes = classes[keep]
+        loc_data = loc_data[keep, :]
+        prior_data = prior_data[keep, :]
+        masks = mask_data[keep, :]
+
+        # decode x, y, w, h
+        boxes = self.decode(loc_data, prior_data, img_w, img_h)
+
+        # nms for every class
+        boxes_result = []
+        masks_result = []
+        classes_result = []
+        conf_scores_result = []
+        for i in range(num_class):
+            where = np.where(classes == i)
+            if len(where) == 0:
+                continue
+
+            boxes_tmp = boxes[where]
+            masks_tmp = masks[where]
+            classes_tmp = classes[where]
+            conf_scores_tmp = conf_scores[where]
+
+            score_mask = conf_scores_tmp > self.confidence_threshold
+            boxes_tmp = boxes_tmp[score_mask]
+            masks_tmp = masks_tmp[score_mask]
+            classes_tmp = classes_tmp[score_mask]
+            conf_scores_tmp = conf_scores_tmp[score_mask]
+
+            indexes = nms(
+                boxes_tmp,
+                conf_scores_tmp,
+                iou_threshold=self.nms_threshold,
+                top_k=self.keep_top_k,
+            )
+
+            for index in indexes:
+                boxes_result.append(boxes_tmp[index])
+                masks_result.append(masks_tmp[index])
+                classes_result.append(classes_tmp[index] + 1)
+                conf_scores_result.append(conf_scores_tmp[index])
+
+        # keep top k
+        if len(conf_scores_result) > self.keep_top_k:
+            indexes = np.argsort(conf_scores_result)
+            indexes = indexes[: self.keep_top_k]
+
+            boxes_result = boxes_result[indexes]
+            masks_result = masks_result[indexes]
+            classes_result = classes_result[indexes]
+            conf_scores_result = conf_scores_result[indexes]
+
+        return (
+            np.array(boxes_result),
+            np.array(masks_result),
+            np.array(classes_result),
+            np.array(conf_scores_result),
+        )
--- a/3rdparty/ncnn/python/ncnn/model_zoo/yolov2.py
+++ b/3rdparty/ncnn/python/ncnn/model_zoo/yolov2.py
@ -0,0 +1,121 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import ncnn
+from .model_store import get_model_file
+from ..utils.objects import Detect_Object
+
+
+class MobileNet_YoloV2:
+    def __init__(self, target_size=416, num_threads=1, use_gpu=False):
+        self.target_size = target_size
+        self.num_threads = num_threads
+        self.use_gpu = use_gpu
+
+        self.mean_vals = [1.0, 1.0, 1.0]
+        self.norm_vals = [0.007843, 0.007843, 0.007843]
+
+        self.net = ncnn.Net()
+        self.net.opt.use_vulkan_compute = self.use_gpu
+
+        # original pretrained model from https://github.com/eric612/MobileNet-YOLO
+        # https://github.com/eric612/MobileNet-YOLO/blob/master/models/yolov2/mobilenet_yolo_deploy.prototxt
+        # https://github.com/eric612/MobileNet-YOLO/blob/master/models/yolov2/mobilenet_yolo_deploy_iter_80000.caffemodel
+        # the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
+        self.net.load_param(get_model_file("mobilenet_yolo.param"))
+        self.net.load_model(get_model_file("mobilenet_yolo.bin"))
+
+        self.class_names = [
+            "background",
+            "aeroplane",
+            "bicycle",
+            "bird",
+            "boat",
+            "bottle",
+            "bus",
+            "car",
+            "cat",
+            "chair",
+            "cow",
+            "diningtable",
+            "dog",
+            "horse",
+            "motorbike",
+            "person",
+            "pottedplant",
+            "sheep",
+            "sofa",
+            "train",
+            "tvmonitor",
+        ]
+
+    def __del__(self):
+        self.net = None
+
+    def __call__(self, img):
+        img_h = img.shape[0]
+        img_w = img.shape[1]
+
+        mat_in = ncnn.Mat.from_pixels_resize(
+            img,
+            ncnn.Mat.PixelType.PIXEL_BGR,
+            img.shape[1],
+            img.shape[0],
+            self.target_size,
+            self.target_size,
+        )
+        mat_in.substract_mean_normalize([], self.norm_vals)
+        mat_in.substract_mean_normalize(self.mean_vals, [])
+
+        ex = self.net.create_extractor()
+        ex.set_num_threads(self.num_threads)
+
+        ex.input("data", mat_in)
+
+        ret, mat_out = ex.extract("detection_out")
+
+        objects = []
+
+        # printf("%d %d %d\n", mat_out.w, mat_out.h, mat_out.c)
+
+        # method 1, use ncnn.Mat.row to get the result, no memory copy
+        for i in range(mat_out.h):
+            values = mat_out.row(i)
+
+            obj = Detect_Object()
+            obj.label = values[0]
+            obj.prob = values[1]
+            obj.rect.x = values[2] * img_w
+            obj.rect.y = values[3] * img_h
+            obj.rect.w = values[4] * img_w - obj.rect.x
+            obj.rect.h = values[5] * img_h - obj.rect.y
+
+            objects.append(obj)
+
+        """
+        #method 2, use ncnn.Mat->numpy.array to get the result, no memory copy too
+        out = np.array(mat_out)
+        for i in range(len(out)):
+            values = out[i]
+            obj = Detect_Object()
+            obj.label = values[0]
+            obj.prob = values[1]
+            obj.rect.x = values[2] * img_w
+            obj.rect.y = values[3] * img_h
+            obj.rect.w = values[4] * img_w - obj.rect.x
+            obj.rect.h = values[5] * img_h - obj.rect.y
+            objects.append(obj)
+        """
+
+        return objects
--- a/3rdparty/ncnn/python/ncnn/model_zoo/yolov3.py
+++ b/3rdparty/ncnn/python/ncnn/model_zoo/yolov3.py
@ -0,0 +1,119 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import ncnn
+from .model_store import get_model_file
+from ..utils.objects import Detect_Object
+
+
+class MobileNetV2_YoloV3:
+    def __init__(self, target_size=352, num_threads=1, use_gpu=False):
+        self.target_size = target_size
+        self.num_threads = num_threads
+        self.use_gpu = use_gpu
+
+        self.mean_vals = [127.5, 127.5, 127.5]
+        self.norm_vals = [0.007843, 0.007843, 0.007843]
+
+        self.net = ncnn.Net()
+        self.net.opt.use_vulkan_compute = self.use_gpu
+
+        # original pretrained model from https://github.com/eric612/MobileNet-YOLO
+        # param : https://drive.google.com/open?id=1V9oKHP6G6XvXZqhZbzNKL6FI_clRWdC-
+        # bin : https://drive.google.com/open?id=1DBcuFCr-856z3FRQznWL_S5h-Aj3RawA
+        # the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
+        self.net.load_param(get_model_file("mobilenetv2_yolov3.param"))
+        self.net.load_model(get_model_file("mobilenetv2_yolov3.bin"))
+
+        self.class_names = [
+            "background",
+            "aeroplane",
+            "bicycle",
+            "bird",
+            "boat",
+            "bottle",
+            "bus",
+            "car",
+            "cat",
+            "chair",
+            "cow",
+            "diningtable",
+            "dog",
+            "horse",
+            "motorbike",
+            "person",
+            "pottedplant",
+            "sheep",
+            "sofa",
+            "train",
+            "tvmonitor",
+        ]
+
+    def __del__(self):
+        self.net = None
+
+    def __call__(self, img):
+        img_h = img.shape[0]
+        img_w = img.shape[1]
+
+        mat_in = ncnn.Mat.from_pixels_resize(
+            img,
+            ncnn.Mat.PixelType.PIXEL_BGR,
+            img.shape[1],
+            img.shape[0],
+            self.target_size,
+            self.target_size,
+        )
+        mat_in.substract_mean_normalize(self.mean_vals, self.norm_vals)
+
+        ex = self.net.create_extractor()
+        ex.set_num_threads(self.num_threads)
+        ex.input("data", mat_in)
+
+        ret, mat_out = ex.extract("detection_out")
+
+        objects = []
+
+        # printf("%d %d %d\n", mat_out.w, mat_out.h, mat_out.c)
+
+        # method 1, use ncnn.Mat.row to get the result, no memory copy
+        for i in range(mat_out.h):
+            values = mat_out.row(i)
+
+            obj = Detect_Object()
+            obj.label = values[0]
+            obj.prob = values[1]
+            obj.rect.x = values[2] * img_w
+            obj.rect.y = values[3] * img_h
+            obj.rect.w = values[4] * img_w - obj.rect.x
+            obj.rect.h = values[5] * img_h - obj.rect.y
+
+            objects.append(obj)
+
+        """
+        #method 2, use ncnn.Mat->numpy.array to get the result, no memory copy too
+        out = np.array(mat_out)
+        for i in range(len(out)):
+            values = out[i]
+            obj = Detect_Object()
+            obj.label = values[0]
+            obj.prob = values[1]
+            obj.x = values[2] * img_w
+            obj.y = values[3] * img_h
+            obj.w = values[4] * img_w - obj.x
+            obj.h = values[5] * img_h - obj.y
+            objects.append(obj)
+        """
+
+        return objects
--- a/3rdparty/ncnn/python/ncnn/model_zoo/yolov4.py
+++ b/3rdparty/ncnn/python/ncnn/model_zoo/yolov4.py
@ -0,0 +1,190 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import ncnn
+from .model_store import get_model_file
+from ..utils.objects import Detect_Object
+
+
+class YoloV4_Base:
+    def __init__(self, tiny, target_size, num_threads=1, use_gpu=False):
+        self.target_size = target_size
+        self.num_threads = num_threads
+        self.use_gpu = use_gpu
+
+        self.mean_vals = []
+        self.norm_vals = [1 / 255.0, 1 / 255.0, 1 / 255.0]
+
+        self.net = ncnn.Net()
+        self.net.opt.use_vulkan_compute = self.use_gpu
+        self.net.opt.num_threads = self.num_threads
+
+        # original pretrained model from https://github.com/AlexeyAB/darknet
+        # the ncnn model https://drive.google.com/drive/folders/1YzILvh0SKQPS_lrb33dmGNq7aVTKPWS0?usp=sharing
+        # the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
+        if tiny == True:
+            self.net.load_param(get_model_file("yolov4-tiny-opt.param"))
+            self.net.load_model(get_model_file("yolov4-tiny-opt.bin"))
+        else:
+            self.net.load_param(get_model_file("yolov4-opt.param"))
+            self.net.load_model(get_model_file("yolov4-opt.bin"))
+
+        self.class_names = [
+            "background",
+            "person",
+            "bicycle",
+            "car",
+            "motorbike",
+            "aeroplane",
+            "bus",
+            "train",
+            "truck",
+            "boat",
+            "traffic light",
+            "fire hydrant",
+            "stop sign",
+            "parking meter",
+            "bench",
+            "bird",
+            "cat",
+            "dog",
+            "horse",
+            "sheep",
+            "cow",
+            "elephant",
+            "bear",
+            "zebra",
+            "giraffe",
+            "backpack",
+            "umbrella",
+            "handbag",
+            "tie",
+            "suitcase",
+            "frisbee",
+            "skis",
+            "snowboard",
+            "sports ball",
+            "kite",
+            "baseball bat",
+            "baseball glove",
+            "skateboard",
+            "surfboard",
+            "tennis racket",
+            "bottle",
+            "wine glass",
+            "cup",
+            "fork",
+            "knife",
+            "spoon",
+            "bowl",
+            "banana",
+            "apple",
+            "sandwich",
+            "orange",
+            "broccoli",
+            "carrot",
+            "hot dog",
+            "pizza",
+            "donut",
+            "cake",
+            "chair",
+            "sofa",
+            "pottedplant",
+            "bed",
+            "diningtable",
+            "toilet",
+            "tvmonitor",
+            "laptop",
+            "mouse",
+            "remote",
+            "keyboard",
+            "cell phone",
+            "microwave",
+            "oven",
+            "toaster",
+            "sink",
+            "refrigerator",
+            "book",
+            "clock",
+            "vase",
+            "scissors",
+            "teddy bear",
+            "hair drier",
+            "toothbrush",
+        ]
+
+    def __del__(self):
+        self.net = None
+
+    def __call__(self, img):
+        img_h = img.shape[0]
+        img_w = img.shape[1]
+
+        mat_in = ncnn.Mat.from_pixels_resize(
+            img,
+            ncnn.Mat.PixelType.PIXEL_BGR2RGB,
+            img.shape[1],
+            img.shape[0],
+            self.target_size,
+            self.target_size,
+        )
+        mat_in.substract_mean_normalize(self.mean_vals, self.norm_vals)
+
+        ex = self.net.create_extractor()
+        ex.input("data", mat_in)
+
+        ret, mat_out = ex.extract("output")
+
+        objects = []
+
+        # method 1, use ncnn.Mat.row to get the result, no memory copy
+        for i in range(mat_out.h):
+            values = mat_out.row(i)
+
+            obj = Detect_Object()
+            obj.label = values[0]
+            obj.prob = values[1]
+            obj.rect.x = values[2] * img_w
+            obj.rect.y = values[3] * img_h
+            obj.rect.w = values[4] * img_w - obj.rect.x
+            obj.rect.h = values[5] * img_h - obj.rect.y
+
+            objects.append(obj)
+
+        """
+        #method 2, use ncnn.Mat->numpy.array to get the result, no memory copy too
+        out = np.array(mat_out)
+        for i in range(len(out)):
+            values = out[i]
+            obj = Detect_Object()
+            obj.label = values[0]
+            obj.prob = values[1]
+            obj.x = values[2] * img_w
+            obj.y = values[3] * img_h
+            obj.w = values[4] * img_w - obj.x
+            obj.h = values[5] * img_h - obj.y
+            objects.append(obj)
+        """
+
+        return objects
+
+
+class YoloV4_Tiny(YoloV4_Base):
+    def __init__(self, **kwargs):
+        super(YoloV4_Tiny, self).__init__(True, 416, **kwargs)
+
+
+class YoloV4(YoloV4_Base):
+    def __init__(self, **kwargs):
+        super(YoloV4, self).__init__(False, 608, **kwargs)
--- a/3rdparty/ncnn/python/ncnn/model_zoo/yolov5.py
+++ b/3rdparty/ncnn/python/ncnn/model_zoo/yolov5.py
@ -0,0 +1,366 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import time
+import numpy as np
+import ncnn
+from .model_store import get_model_file
+from ..utils.objects import Detect_Object
+from ..utils.functional import *
+
+
+class YoloV5Focus(ncnn.Layer):
+    yolov5FocusLayers = []
+
+    def __init__(self):
+        ncnn.Layer.__init__(self)
+        self.one_blob_only = True
+
+        self.yolov5FocusLayers.append(self)
+
+    def forward(self, bottom_blob, top_blob, opt):
+        x = np.array(bottom_blob)
+        x = np.concatenate(
+            [
+                x[..., ::2, ::2],
+                x[..., 1::2, ::2],
+                x[..., ::2, 1::2],
+                x[..., 1::2, 1::2],
+            ]
+        )
+
+        top_blob.clone_from(ncnn.Mat(x), opt.blob_allocator)
+        if top_blob.empty():
+            return -100
+
+        return 0
+
+
+def YoloV5Focus_layer_creator():
+    return YoloV5Focus()
+
+
+def YoloV5Focus_layer_destroyer(layer):
+    for i in range(len(YoloV5Focus.yolov5FocusLayers)):
+        if YoloV5Focus.yolov5FocusLayers[i] == layer:
+            del YoloV5Focus.yolov5FocusLayers[i]
+            break
+
+
+class YoloV5s:
+    def __init__(
+        self,
+        target_size=640,
+        prob_threshold=0.25,
+        nms_threshold=0.45,
+        num_threads=1,
+        use_gpu=False,
+    ):
+        self.target_size = target_size
+        self.prob_threshold = prob_threshold
+        self.nms_threshold = nms_threshold
+        self.num_threads = num_threads
+        self.use_gpu = use_gpu
+
+        self.mean_vals = []
+        self.norm_vals = [1 / 255.0, 1 / 255.0, 1 / 255.0]
+
+        self.net = ncnn.Net()
+        self.net.opt.use_vulkan_compute = self.use_gpu
+        self.net.opt.num_threads = self.num_threads
+
+        self.net.register_custom_layer(
+            "YoloV5Focus", YoloV5Focus_layer_creator, YoloV5Focus_layer_destroyer
+        )
+
+        # original pretrained model from https://github.com/ultralytics/yolov5
+        # the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
+        self.net.load_param(get_model_file("yolov5s.param"))
+        self.net.load_model(get_model_file("yolov5s.bin"))
+
+        self.grid = [make_grid(10, 6), make_grid(20, 12), make_grid(40, 24)]
+        self.stride = np.array([32, 16, 8])
+        self.anchor_grid = np.array(
+            [
+                [116, 90, 156, 198, 373, 326],
+                [30, 61, 62, 45, 59, 119],
+                [10, 13, 16, 30, 33, 23],
+            ]
+        ).reshape((3, 1, 3, 1, 1, 2))
+
+        self.class_names = [
+            "person",
+            "bicycle",
+            "car",
+            "motorcycle",
+            "airplane",
+            "bus",
+            "train",
+            "truck",
+            "boat",
+            "traffic light",
+            "fire hydrant",
+            "stop sign",
+            "parking meter",
+            "bench",
+            "bird",
+            "cat",
+            "dog",
+            "horse",
+            "sheep",
+            "cow",
+            "elephant",
+            "bear",
+            "zebra",
+            "giraffe",
+            "backpack",
+            "umbrella",
+            "handbag",
+            "tie",
+            "suitcase",
+            "frisbee",
+            "skis",
+            "snowboard",
+            "sports ball",
+            "kite",
+            "baseball bat",
+            "baseball glove",
+            "skateboard",
+            "surfboard",
+            "tennis racket",
+            "bottle",
+            "wine glass",
+            "cup",
+            "fork",
+            "knife",
+            "spoon",
+            "bowl",
+            "banana",
+            "apple",
+            "sandwich",
+            "orange",
+            "broccoli",
+            "carrot",
+            "hot dog",
+            "pizza",
+            "donut",
+            "cake",
+            "chair",
+            "couch",
+            "potted plant",
+            "bed",
+            "dining table",
+            "toilet",
+            "tv",
+            "laptop",
+            "mouse",
+            "remote",
+            "keyboard",
+            "cell phone",
+            "microwave",
+            "oven",
+            "toaster",
+            "sink",
+            "refrigerator",
+            "book",
+            "clock",
+            "vase",
+            "scissors",
+            "teddy bear",
+            "hair drier",
+            "toothbrush",
+        ]
+
+    def __del__(self):
+        self.net = None
+
+    def __call__(self, img):
+        img_w = img.shape[1]
+        img_h = img.shape[0]
+
+        w = img_w
+        h = img_h
+        scale = 1.0
+        if w > h:
+            scale = float(self.target_size) / w
+            w = self.target_size
+            h = int(h * scale)
+        else:
+            scale = float(self.target_size) / h
+            h = self.target_size
+            w = int(w * scale)
+
+        mat_in = ncnn.Mat.from_pixels_resize(
+            img, ncnn.Mat.PixelType.PIXEL_BGR2RGB, img_w, img_h, w, h
+        )
+        # pad to target_size rectangle
+        # yolov5/utils/datasets.py letterbox
+        wpad = (w + 31) // 32 * 32 - w
+        hpad = (h + 31) // 32 * 32 - h
+        mat_in_pad = ncnn.copy_make_border(
+            mat_in,
+            hpad // 2,
+            hpad - hpad // 2,
+            wpad // 2,
+            wpad - wpad // 2,
+            ncnn.BorderType.BORDER_CONSTANT,
+            114.0,
+        )
+
+        mat_in_pad.substract_mean_normalize(self.mean_vals, self.norm_vals)
+
+        ex = self.net.create_extractor()
+        ex.input("images", mat_in_pad)
+
+        # anchor setting from yolov5/models/yolov5s.yaml
+        ret1, mat_out1 = ex.extract("output")  # stride 8
+        ret2, mat_out2 = ex.extract("781")  # stride 16
+        ret3, mat_out3 = ex.extract("801")  # stride 32
+
+        pred = [np.array(mat_out3), np.array(mat_out2), np.array(mat_out1)]
+        z = []
+        for i in range(len(pred)):
+            num_grid = pred[i].shape[1]
+            if mat_in_pad.w > mat_in_pad.h:
+                num_grid_x = mat_in_pad.w // self.stride[i]
+                num_grid_y = num_grid // num_grid_x
+            else:
+                num_grid_y = mat_in_pad.h // self.stride[i]
+                num_grid_x = num_grid // num_grid_y
+            if (
+                self.grid[i].shape[0] != num_grid_x
+                or self.grid[i].shape[1] != num_grid_y
+            ):
+                self.grid[i] = make_grid(num_grid_x, num_grid_y)
+
+            y = sigmoid(pred[i])
+            y = y.reshape(pred[i].shape[0], num_grid_y, num_grid_x, pred[i].shape[2])
+            y[..., 0:2] = (y[..., 0:2] * 2.0 - 0.5 + self.grid[i]) * self.stride[
+                i
+            ]  # xy
+            y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
+            z.append(y.reshape(1, -1, y.shape[-1]))
+        pred = np.concatenate(z, 1)
+
+        result = self.non_max_suppression(
+            pred, self.prob_threshold, self.nms_threshold
+        )[0]
+
+        objects = [
+            Detect_Object(
+                obj[5],
+                obj[4],
+                (obj[0] - (wpad / 2)) / scale,
+                (obj[1] - (hpad / 2)) / scale,
+                (obj[2] - obj[0]) / scale,
+                (obj[3] - obj[1]) / scale,
+            )
+            for obj in result
+        ]
+
+        return objects
+
+    def non_max_suppression(
+        self,
+        prediction,
+        conf_thres=0.1,
+        iou_thres=0.6,
+        merge=False,
+        classes=None,
+        agnostic=False,
+    ):
+        """Performs Non-Maximum Suppression (NMS) on inference results
+
+        Returns:
+            detections with shape: nx6 (x1, y1, x2, y2, conf, cls)
+        """
+        nc = prediction[0].shape[1] - 5  # number of classes
+        xc = prediction[..., 4] > conf_thres  # candidates
+
+        # Settings
+        min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
+        max_det = 300  # maximum number of detections per image
+        time_limit = 10.0  # seconds to quit after
+        redundant = True  # require redundant detections
+        multi_label = nc > 1  # multiple labels per box (adds 0.5ms/img)
+
+        t = time.time()
+        output = [None] * prediction.shape[0]
+        for xi, x in enumerate(prediction):  # image index, image inference
+            # Apply constraints
+            # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
+            x = x[xc[xi]]  # confidence
+
+            # If none remain process next image
+            if not x.shape[0]:
+                continue
+
+            # Compute conf
+            x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
+
+            # Box (center x, center y, width, height) to (x1, y1, x2, y2)
+            box = xywh2xyxy(x[:, :4])
+
+            # Detections matrix nx6 (xyxy, conf, cls)
+            if multi_label:
+                i, j = (x[:, 5:] > conf_thres).nonzero()
+                x = np.concatenate(
+                    (box[i], x[i, j + 5, None], j[:, None].astype(np.float32)), axis=1
+                )
+            else:  # best class only
+                conf, j = x[:, 5:].max(1, keepdim=True)
+                x = np.concatenate((box, conf, j.float()), axis=1)[
+                    conf.view(-1) > conf_thres
+                ]
+
+            # Filter by class
+            if classes:
+                x = x[(x[:, 5:6] == np.array(classes)).any(1)]
+
+            # Apply finite constraint
+            # if not torch.isfinite(x).all():
+            #     x = x[torch.isfinite(x).all(1)]
+
+            # If none remain process next image
+            n = x.shape[0]  # number of boxes
+            if not n:
+                continue
+
+            # Sort by confidence
+            # x = x[x[:, 4].argsort(descending=True)]
+
+            # Batched NMS
+            c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
+            boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
+            i = nms(boxes, scores, iou_threshold=iou_thres)
+            if len(i) > max_det:  # limit detections
+                i = i[:max_det]
+            if merge and (1 < n < 3e3):  # Merge NMS (boxes merged using weighted mean)
+                try:  # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
+                    iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
+                    weights = iou * scores[None]  # box weights
+                    x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(
+                        1, keepdim=True
+                    )  # merged boxes
+                    if redundant:
+                        i = i[iou.sum(1) > 1]  # require redundancy
+                except:  # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139
+                    print(x, i, x.shape, i.shape)
+                    pass
+
+            output[xi] = x[i]
+            if (time.time() - t) > time_limit:
+                break  # time limit exceeded
+
+        return output
--- a/3rdparty/ncnn/python/ncnn/utils/init.py
+++ b/3rdparty/ncnn/python/ncnn/utils/init.py
@ -0,0 +1,17 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+from .download import download, check_sha1
+from .visual import *
+from .objects import *
--- a/3rdparty/ncnn/python/ncnn/utils/download.py
+++ b/3rdparty/ncnn/python/ncnn/utils/download.py
@ -0,0 +1,115 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+"""Download files with progress bar."""
+
+import os
+import hashlib
+import requests
+from tqdm import tqdm
+
+
+def check_sha1(filename, sha1_hash):
+    """Check whether the sha1 hash of the file content matches the expected hash.
+    Parameters
+    ----------
+    filename : str
+        Path to the file.
+    sha1_hash : str
+        Expected sha1 hash in hexadecimal digits.
+    Returns
+    -------
+    bool
+        Whether the file content matches the expected hash.
+    """
+    sha1 = hashlib.sha1()
+    with open(filename, "rb") as f:
+        while True:
+            data = f.read(1048576)
+            if not data:
+                break
+            sha1.update(data)
+
+    sha1_file = sha1.hexdigest()
+    l = min(len(sha1_file), len(sha1_hash))
+    return sha1.hexdigest()[0:l] == sha1_hash[0:l]
+
+
+def download(url, path=None, overwrite=False, sha1_hash=None):
+    """Download an given URL
+    Parameters
+    ----------
+    url : str
+        URL to download
+    path : str, optional
+        Destination path to store downloaded file. By default stores to the
+        current directory with same name as in url.
+    overwrite : bool, optional
+        Whether to overwrite destination file if already exists.
+    sha1_hash : str, optional
+        Expected sha1 hash in hexadecimal digits. Will ignore existing file when hash is specified
+        but doesn't match.
+    Returns
+    -------
+    str
+        The file path of the downloaded file.
+    """
+    if path is None:
+        fname = url.split("/")[-1]
+    else:
+        path = os.path.expanduser(path)
+        if os.path.isdir(path):
+            fname = os.path.join(path, url.split("/")[-1])
+        else:
+            fname = path
+
+    if (
+        overwrite
+        or not os.path.exists(fname)
+        or (sha1_hash and not check_sha1(fname, sha1_hash))
+    ):
+        dirname = os.path.dirname(os.path.abspath(os.path.expanduser(fname)))
+        if not os.path.exists(dirname):
+            os.makedirs(dirname)
+
+        print("Downloading %s from %s..." % (fname, url))
+        r = requests.get(url, stream=True)
+        if r.status_code != 200:
+            raise RuntimeError("Failed downloading url %s" % url)
+        total_length = r.headers.get("content-length")
+        with open(fname, "wb") as f:
+            if total_length is None:  # no content length header
+                for chunk in r.iter_content(chunk_size=1024):
+                    if chunk:  # filter out keep-alive new chunks
+                        f.write(chunk)
+            else:
+                total_length = int(total_length)
+                for chunk in tqdm(
+                    r.iter_content(chunk_size=1024),
+                    total=int(total_length / 1024.0 + 0.5),
+                    unit="KB",
+                    unit_scale=False,
+                    dynamic_ncols=True,
+                ):
+                    f.write(chunk)
+
+        if sha1_hash and not check_sha1(fname, sha1_hash):
+            raise UserWarning(
+                "File {} is downloaded but the content hash does not match. "
+                "The repo may be outdated or download may be incomplete. "
+                'If the "repo_url" is overridden, consider switching to '
+                "the default repo.".format(fname)
+            )
+
+    return fname
--- a/3rdparty/ncnn/python/ncnn/utils/functional.py
+++ b/3rdparty/ncnn/python/ncnn/utils/functional.py
@ -0,0 +1,120 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import numpy as np
+
+
+def xywh2xyxy(x):
+    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+    y = np.zeros_like(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
+    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
+    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
+    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
+    return y
+
+
+def xyxy2xywh(x):
+    # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
+    y = np.zeros_like(x)
+    y[:, 0] = (x[:, 0] + x[:, 2]) / 2  # x center
+    y[:, 1] = (x[:, 1] + x[:, 3]) / 2  # y center
+    y[:, 2] = x[:, 2] - x[:, 0]  # width
+    y[:, 3] = x[:, 3] - x[:, 1]  # height
+    return y
+
+
+def make_grid(nx=20, ny=20):
+    xv1, yv1 = np.meshgrid(np.arange(nx), np.arange(ny))
+    z1 = np.stack((xv1, yv1), 2).reshape((1, ny, nx, 2)).astype(np.float32)
+    return z1
+
+
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+
+
+def softmax(x):
+    max_value = np.max(x, axis=-1)
+    x -= max_value.reshape((x.shape[0], 1))
+    x = np.exp(x)
+    sum_value = np.sum(x, axis=-1)
+    x /= sum_value.reshape((x.shape[0], 1))
+    return x
+
+
+def iou_of(boxes0, boxes1, eps=1e-5):
+    """Return intersection-over-union (Jaccard index) of boxes.
+
+    Args:
+        boxes0 (N, 4): ground truth boxes.
+        boxes1 (N or 1, 4): predicted boxes.
+        eps: a small number to avoid 0 as denominator.
+    Returns:
+        iou (N): IoU values.
+    """
+    overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
+    overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
+
+    overlap_area = area_of(overlap_left_top, overlap_right_bottom)
+    area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
+    area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
+    return overlap_area / (area0 + area1 - overlap_area + eps)
+
+
+def area_of(left_top, right_bottom):
+    """Compute the areas of rectangles given two corners.
+
+    Args:
+        left_top (N, 2): left top corner.
+        right_bottom (N, 2): right bottom corner.
+
+    Returns:
+        area (N): return the area.
+    """
+    hw = np.clip(right_bottom - left_top, 0.0, None)
+    return hw[..., 0] * hw[..., 1]
+
+
+def nms(boxes, scores, iou_threshold, top_k=-1, candidate_size=200):
+    """
+
+    Args:
+        box_scores (N, 5): boxes in corner-form(x1, y1, x2, y2) and probabilities.
+        iou_threshold: intersection over union threshold.
+        top_k: keep top_k results. If k <= 0, keep all the results.
+        candidate_size: only consider the candidates with the highest scores.
+    Returns:
+         picked: a list of indexes of the kept boxes
+    """
+
+    picked = []
+    indexes = np.argsort(scores)
+    indexes = indexes[-candidate_size:]
+    while len(indexes) > 0:
+        current = indexes[-1]
+        picked.append(current)
+        if 0 < top_k == len(picked) or len(indexes) == 1:
+            break
+
+        current_box = boxes[current, :]
+        indexes = indexes[:-1]
+        rest_boxes = boxes[indexes, :]
+        iou = iou_of(
+            rest_boxes,
+            np.expand_dims(current_box, axis=0),
+        )
+        indexes = indexes[iou <= iou_threshold]
+
+    return picked
--- a/3rdparty/ncnn/python/ncnn/utils/objects.py
+++ b/3rdparty/ncnn/python/ncnn/utils/objects.py
@ -0,0 +1,59 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import numpy as np
+
+
+class Point(object):
+    def __init__(self):
+        self.x = 0.0
+        self.y = 0.0
+
+
+class Rect(object):
+    def __init__(self, x=0, y=0, w=0, h=0):
+        self.x = x
+        self.y = y
+        self.w = w
+        self.h = h
+
+    def area(self):
+        return self.w * self.h
+
+    def intersection_area(self, b):
+        x1 = np.maximum(self.x, b.x)
+        y1 = np.maximum(self.y, b.y)
+        x2 = np.minimum(self.x + self.w, b.x + b.w)
+        y2 = np.minimum(self.y + self.h, b.y + b.h)
+        return np.abs(x1 - x2) * np.abs(y1 - y2)
+
+
+class Detect_Object(object):
+    def __init__(self, label=0, prob=0, x=0, y=0, w=0, h=0):
+        self.label = label
+        self.prob = prob
+        self.rect = Rect(x, y, w, h)
+
+
+class Face_Object(object):
+    def __init__(self):
+        self.prob = 0.0
+        self.rect = Rect()
+        self.landmark = []
+
+
+class KeyPoint(object):
+    def __init__(self):
+        self.p = Point()
+        self.prob = 0.0
--- a/3rdparty/ncnn/python/ncnn/utils/visual.py
+++ b/3rdparty/ncnn/python/ncnn/utils/visual.py
@ -0,0 +1,205 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import numpy as np
+import cv2
+from .objects import Detect_Object, Face_Object
+
+
+def draw_detection_objects(image, class_names, objects, min_prob=0.0):
+    for obj in objects:
+        if obj.prob < min_prob:
+            continue
+
+        print(
+            "%d = %.5f at %.2f %.2f %.2f x %.2f\n"
+            % (obj.label, obj.prob, obj.rect.x, obj.rect.y, obj.rect.w, obj.rect.h)
+        )
+
+        cv2.rectangle(
+            image,
+            (int(obj.rect.x), int(obj.rect.y)),
+            (int(obj.rect.x + obj.rect.w), int(obj.rect.y + obj.rect.h)),
+            (255, 0, 0),
+        )
+
+        text = "%s %.1f%%" % (class_names[int(obj.label)], obj.prob * 100)
+
+        label_size, baseLine = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+
+        x = obj.rect.x
+        y = obj.rect.y - label_size[1] - baseLine
+        if y < 0:
+            y = 0
+        if x + label_size[0] > image.shape[1]:
+            x = image.shape[1] - label_size[0]
+
+        cv2.rectangle(
+            image,
+            (int(x), int(y)),
+            (int(x + label_size[0]), int(y + label_size[1] + baseLine)),
+            (255, 255, 255),
+            -1,
+        )
+
+        cv2.putText(
+            image,
+            text,
+            (int(x), int(y + label_size[1])),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.5,
+            (0, 0, 0),
+        )
+
+    cv2.imshow("image", image)
+    cv2.waitKey(0)
+
+
+def print_topk(cls_scores, topk):
+    indexes = np.argsort(cls_scores)[::-1][0:topk]
+    scores = cls_scores[indexes]
+
+    for index, score in zip(indexes, scores):
+        print("%d=%f" % (index, score))
+
+
+def draw_faceobjects(image, faceobjects):
+    for obj in faceobjects:
+        print(
+            "%.5f at %.2f %.2f %.2f x %.2f"
+            % (obj.prob, obj.rect.x, obj.rect.y, obj.rect.w, obj.rect.h)
+        )
+
+        cv2.rectangle(
+            image,
+            (int(obj.rect.x), int(obj.rect.y)),
+            (int(obj.rect.x + obj.rect.w), int(obj.rect.y + obj.rect.h)),
+            (255, 0, 0),
+        )
+
+        cv2.circle(
+            image,
+            (int(obj.landmark[0].x), int(obj.landmark[0].y)),
+            2,
+            (0, 255, 255),
+            -1,
+        )
+        cv2.circle(
+            image,
+            (int(obj.landmark[1].x), int(obj.landmark[1].y)),
+            2,
+            (0, 255, 255),
+            -1,
+        )
+        cv2.circle(
+            image,
+            (int(obj.landmark[2].x), int(obj.landmark[2].y)),
+            2,
+            (0, 255, 255),
+            -1,
+        )
+        cv2.circle(
+            image,
+            (int(obj.landmark[3].x), int(obj.landmark[3].y)),
+            2,
+            (0, 255, 255),
+            -1,
+        )
+        cv2.circle(
+            image,
+            (int(obj.landmark[4].x), int(obj.landmark[4].y)),
+            2,
+            (0, 255, 255),
+            -1,
+        )
+
+        text = "%.1f%%" % (obj.prob * 100)
+
+        label_size, baseLine = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+
+        x = obj.rect.x
+        y = obj.rect.y - label_size[1] - baseLine
+        if y < 0:
+            y = 0
+        if x + label_size[0] > image.shape[1]:
+            x = image.shape[1] - label_size[0]
+
+        cv2.rectangle(
+            image,
+            (int(x), int(y)),
+            (int(x + label_size[0]), int(y + label_size[1] + baseLine)),
+            (255, 255, 255),
+            -1,
+        )
+
+        cv2.putText(
+            image,
+            text,
+            (int(x), int(y + label_size[1])),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.5,
+            (0, 0, 0),
+        )
+
+    cv2.imshow("image", image)
+    cv2.waitKey(0)
+
+
+def draw_pose(image, keypoints):
+    # draw bone
+    joint_pairs = [
+        (0, 1),
+        (1, 3),
+        (0, 2),
+        (2, 4),
+        (5, 6),
+        (5, 7),
+        (7, 9),
+        (6, 8),
+        (8, 10),
+        (5, 11),
+        (6, 12),
+        (11, 12),
+        (11, 13),
+        (12, 14),
+        (13, 15),
+        (14, 16),
+    ]
+
+    for i in range(16):
+        p1 = keypoints[joint_pairs[i][0]]
+        p2 = keypoints[joint_pairs[i][1]]
+
+        if p1.prob < 0.2 or p2.prob < 0.2:
+            continue
+
+        cv2.line(
+            image,
+            (int(p1.p.x), int(p1.p.y)),
+            (int(p2.p.x), int(p2.p.y)),
+            (255, 0, 0),
+            2,
+        )
+
+    # draw joint
+    for keypoint in keypoints:
+        print("%.2f %.2f = %.5f" % (keypoint.p.x, keypoint.p.y, keypoint.prob))
+
+        if keypoint.prob < 0.2:
+            continue
+
+        cv2.circle(image, (int(keypoint.p.x), int(keypoint.p.y)), 3, (0, 255, 0), -1)
+
+    cv2.imshow("image", image)
+    cv2.waitKey(0)
--- a/3rdparty/ncnn/python/requirements.txt
+++ b/3rdparty/ncnn/python/requirements.txt
@ -0,0 +1,5 @@
+numpy
+tqdm
+requests
+portalocker
+opencv-python
--- a/3rdparty/ncnn/python/setup.py.i
+++ b/3rdparty/ncnn/python/setup.py.i
@ -0,0 +1,48 @@
+import sys
+from setuptools import setup, find_packages
+
+try:
+    from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
+
+    class bdist_wheel(_bdist_wheel):
+        def finalize_options(self):
+            _bdist_wheel.finalize_options(self)
+            self.root_is_pure = False
+
+
+except ImportError:
+    bdist_wheel = None
+
+if sys.version_info < (3, 0):
+    sys.exit("Sorry, Python < 3.0 is not supported")
+
+requirements = ["numpy", "tqdm", "requests", "portalocker", "opencv-python"]
+
+setup(
+    name="ncnn",
+    version="${PACKAGE_VERSION}",
+    author="nihui",
+    author_email="nihuini@tencent.com",
+    maintainer="caishanli",
+    maintainer_email="caishanli25@gmail.com",
+    description="ncnn is a high-performance neural network inference framework optimized for the mobile platform",
+    url="https://github.com/Tencent/ncnn",
+    classifiers=[
+        "Programming Language :: C++",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.6",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "License :: OSI Approved :: BSD License",
+        "Operating System :: OS Independent",
+    ],
+    license="BSD-3",
+    python_requires=">=3.5",
+    packages=find_packages(),
+    package_dir={"": "."},
+    package_data={"ncnn": ["ncnn${PYTHON_MODULE_PREFIX}${PYTHON_MODULE_EXTENSION}"]},
+    install_requires=requirements,
+    cmdclass={"bdist_wheel": bdist_wheel},
+)
--- a/3rdparty/ncnn/python/src/main.cpp
+++ b/3rdparty/ncnn/python/src/main.cpp
--- a/3rdparty/ncnn/python/src/pybind11_allocator.h
+++ b/3rdparty/ncnn/python/src/pybind11_allocator.h
@ -0,0 +1,137 @@
+/* Tencent is pleased to support the open source community by making ncnn available.
+ *
+ * Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+ *
+ * Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/BSD-3-Clause
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+
+#ifndef PYBIND11_NCNN_ALLOCATOR_H
+#define PYBIND11_NCNN_ALLOCATOR_H
+
+#include <allocator.h>
+
+template<class Base = ncnn::Allocator>
+class PyAllocator : public Base
+{
+public:
+    using Base::Base; // Inherit constructors
+    void* fastMalloc(size_t size) override
+    {
+        PYBIND11_OVERLOAD_PURE(void*, Base, fastMalloc, size);
+    }
+    void fastFree(void* ptr) override
+    {
+        PYBIND11_OVERLOAD_PURE(void, Base, fastFree, ptr);
+    }
+};
+
+template<class Other>
+class PyAllocatorOther : public PyAllocator<Other>
+{
+public:
+    using PyAllocator<Other>::PyAllocator;
+    void* fastMalloc(size_t size) override
+    {
+        PYBIND11_OVERLOAD(void*, Other, fastMalloc, size);
+    }
+    void fastFree(void* ptr) override
+    {
+        PYBIND11_OVERLOAD(void, Other, fastFree, ptr);
+    }
+};
+
+#if NCNN_VULKAN
+template<class Base = ncnn::VkAllocator>
+class PyVkAllocator : public Base
+{
+public:
+    using Base::Base; // Inherit constructors
+    void clear() override
+    {
+        PYBIND11_OVERLOAD(void, Base, clear, );
+    }
+    ncnn::VkBufferMemory* fastMalloc(size_t size) override
+    {
+        PYBIND11_OVERLOAD_PURE(ncnn::VkBufferMemory*, Base, fastMalloc, size);
+    }
+    void fastFree(ncnn::VkBufferMemory* ptr) override
+    {
+        PYBIND11_OVERLOAD_PURE(void, Base, fastFree, ptr);
+    }
+    int flush(ncnn::VkBufferMemory* ptr) override
+    {
+        PYBIND11_OVERLOAD(int, Base, flush, ptr);
+    }
+    int invalidate(ncnn::VkBufferMemory* ptr) override
+    {
+        PYBIND11_OVERLOAD(int, Base, invalidate, ptr);
+    }
+};
+
+template<class Other>
+class PyVkAllocatorOther : public PyVkAllocator<Other>
+{
+public:
+    using PyVkAllocator<Other>::PyVkAllocator;
+    void clear() override
+    {
+        PYBIND11_OVERLOAD(void, Other, clear, );
+    }
+    ncnn::VkBufferMemory* fastMalloc(size_t size) override
+    {
+        PYBIND11_OVERLOAD(ncnn::VkBufferMemory*, Other, fastMalloc, size);
+    }
+    void fastFree(ncnn::VkBufferMemory* ptr) override
+    {
+        PYBIND11_OVERLOAD(void, Other, fastFree, ptr);
+    }
+};
+
+template<class Base = ncnn::VkBlobAllocator>
+class PyVkBlobAllocator : public Base
+{
+public:
+    using Base::Base; // Inherit constructors
+    void clear() override
+    {
+        PYBIND11_OVERLOAD(void, Base, clear, );
+    }
+    ncnn::VkImageMemory* fastMalloc(int width, int height,
+                                    VkFormat format) override
+    {
+        PYBIND11_OVERLOAD_PURE(ncnn::VkImageMemory*, Base, fastMalloc, width,
+                               height, format);
+    }
+    void fastFree(ncnn::VkImageMemory* ptr) override
+    {
+        PYBIND11_OVERLOAD_PURE(void, Base, fastFree, ptr);
+    }
+};
+
+//template<class Other>
+//class PyVkImageAllocatorOther : public PyVkImageAllocator<Other>
+//{
+//public:
+//    using PyVkImageAllocator<Other>::PyVkImageAllocator;
+//    ncnn::VkImageMemory* fastMalloc(int width, int height,
+//                                    VkFormat format) override
+//    {
+//        PYBIND11_OVERLOAD(ncnn::VkImageMemory*, Other, fastMalloc, width, height,
+//                          format);
+//    }
+//    void fastFree(ncnn::VkImageMemory* ptr) override
+//    {
+//        PYBIND11_OVERLOAD(void, Other, fastFree, ptr);
+//    }
+//};
+#endif // NCNN_VULKAN
+
+#endif
--- a/3rdparty/ncnn/python/src/pybind11_bind.h
+++ b/3rdparty/ncnn/python/src/pybind11_bind.h
@ -0,0 +1,52 @@
+/* Tencent is pleased to support the open source community by making ncnn available.
+ *
+ * Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+ *
+ * Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/BSD-3-Clause
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+
+#ifndef PYBIND11_NCNN_BIND_H
+#define PYBIND11_NCNN_BIND_H
+
+#include <pybind11/functional.h>
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// virtual function pass by reference by https://github.com/pybind/pybind11/issues/2033
+#define PYBIND11_OVERRIDE_REFERENCE_IMPL(ret_type, cname, name, ...)                                 \
+    do                                                                                               \
+    {                                                                                                \
+        pybind11::gil_scoped_acquire gil;                                                            \
+        pybind11::function override = pybind11::get_override(static_cast<const cname*>(this), name); \
+        if (override)                                                                                \
+        {                                                                                            \
+            auto o = override.operator()<pybind11::return_value_policy::reference>(__VA_ARGS__);     \
+            if (pybind11::detail::cast_is_temporary_value_reference<ret_type>::value)                \
+            {                                                                                        \
+                static pybind11::detail::override_caster_t<ret_type> caster;                         \
+                return pybind11::detail::cast_ref<ret_type>(std::move(o), caster);                   \
+            }                                                                                        \
+            else                                                                                     \
+                return pybind11::detail::cast_safe<ret_type>(std::move(o));                          \
+        }                                                                                            \
+    } while (false)
+
+#define PYBIND11_OVERRIDE_REFERENCE_NAME(ret_type, cname, name, fn, ...)                                    \
+    do                                                                                                      \
+    {                                                                                                       \
+        PYBIND11_OVERRIDE_REFERENCE_IMPL(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, __VA_ARGS__); \
+        return cname::fn(__VA_ARGS__);                                                                      \
+    } while (false)
+
+#define PYBIND11_OVERRIDE_REFERENCE(ret_type, cname, fn, ...) \
+    PYBIND11_OVERRIDE_REFERENCE_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), #fn, fn, __VA_ARGS__)
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#endif
--- a/3rdparty/ncnn/python/src/pybind11_datareader.h
+++ b/3rdparty/ncnn/python/src/pybind11_datareader.h
@ -0,0 +1,71 @@
+/* Tencent is pleased to support the open source community by making ncnn available.
+ *
+ * Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+ *
+ * Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/BSD-3-Clause
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+
+#ifndef PYBIND11_NCNN_DATAREADER_H
+#define PYBIND11_NCNN_DATAREADER_H
+
+#include <datareader.h>
+
+class DataReaderFromEmpty : public ncnn::DataReader
+{
+public:
+#if NCNN_STRING
+    virtual int scan(const char* format, void* p) const
+    {
+        return 0;
+    }
+#endif // NCNN_STRING
+    virtual size_t read(void* buf, size_t size) const
+    {
+        memset(buf, 0, size);
+        return size;
+    }
+};
+
+template<class Base = ncnn::DataReader>
+class PyDataReader : public Base
+{
+public:
+    using Base::Base; // Inherit constructors
+#if NCNN_STRING
+    int scan(const char* format, void* p) const override
+    {
+        PYBIND11_OVERLOAD(int, Base, scan, format, p);
+    }
+#endif // NCNN_STRING
+    size_t read(void* buf, size_t size) const override
+    {
+        PYBIND11_OVERLOAD(size_t, Base, read, buf, size);
+    }
+};
+
+template<class Other>
+class PyDataReaderOther : public PyDataReader<Other>
+{
+public:
+    using PyDataReader<Other>::PyDataReader;
+#if NCNN_STRING
+    int scan(const char* format, void* p) const override
+    {
+        PYBIND11_OVERLOAD(int, Other, scan, format, p);
+    }
+#endif // NCNN_STRING
+    size_t read(void* buf, size_t size) const override
+    {
+        PYBIND11_OVERLOAD(size_t, Other, read, buf, size);
+    }
+};
+
+#endif
--- a/3rdparty/ncnn/python/src/pybind11_layer.h
+++ b/3rdparty/ncnn/python/src/pybind11_layer.h
@ -0,0 +1,161 @@
+/* Tencent is pleased to support the open source community by making ncnn available.
+ *
+ * Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+ *
+ * Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/BSD-3-Clause
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+
+#ifndef PYBIND11_NCNN_LAYER_H
+#define PYBIND11_NCNN_LAYER_H
+
+#include <layer.h>
+#include "pybind11_bind.h"
+
+class PyLayer : public ncnn::Layer
+{
+public:
+    virtual int load_param(const ncnn::ParamDict& pd)
+    {
+        PYBIND11_OVERRIDE(
+            int,
+            ncnn::Layer,
+            load_param,
+            pd);
+    }
+
+    virtual int load_model(const ncnn::ModelBin& mb)
+    {
+        PYBIND11_OVERRIDE(
+            int,
+            ncnn::Layer,
+            load_model,
+            mb);
+    }
+
+    virtual int create_pipeline(const ncnn::Option& opt)
+    {
+        PYBIND11_OVERRIDE(
+            int,
+            ncnn::Layer,
+            create_pipeline,
+            opt);
+    }
+
+    virtual int destroy_pipeline(const ncnn::Option& opt)
+    {
+        PYBIND11_OVERRIDE(
+            int,
+            ncnn::Layer,
+            destroy_pipeline,
+            opt);
+    }
+
+public:
+    virtual int forward(const std::vector<ncnn::Mat>& bottom_blobs, std::vector<ncnn::Mat>& top_blobs, const ncnn::Option& opt) const
+    {
+        PYBIND11_OVERRIDE_REFERENCE(
+            int,
+            ncnn::Layer,
+            forward,
+            bottom_blobs,
+            top_blobs,
+            opt);
+    }
+    virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob, const ncnn::Option& opt) const
+    {
+        PYBIND11_OVERRIDE_REFERENCE(
+            int,
+            ncnn::Layer,
+            forward,
+            bottom_blob,
+            top_blob,
+            opt);
+    }
+
+    virtual int forward_inplace(std::vector<ncnn::Mat>& bottom_top_blobs, const ncnn::Option& opt) const
+    {
+        PYBIND11_OVERRIDE_REFERENCE(
+            int,
+            ncnn::Layer,
+            forward_inplace,
+            bottom_top_blobs,
+            opt);
+    }
+    virtual int forward_inplace(ncnn::Mat& bottom_top_blob, const ncnn::Option& opt) const
+    {
+        PYBIND11_OVERRIDE_REFERENCE(
+            int,
+            ncnn::Layer,
+            forward_inplace,
+            bottom_top_blob,
+            opt);
+    }
+
+#if NCNN_VULKAN
+public:
+    virtual int upload_model(ncnn::VkTransfer& cmd, const ncnn::Option& opt)
+    {
+        PYBIND11_OVERRIDE_REFERENCE(
+            int,
+            ncnn::Layer,
+            upload_model,
+            cmd,
+            opt);
+    }
+
+public:
+    virtual int forward(const std::vector<ncnn::VkMat>& bottom_blobs, std::vector<ncnn::VkMat>& top_blobs, ncnn::VkCompute& cmd, const ncnn::Option& opt) const
+    {
+        PYBIND11_OVERRIDE_REFERENCE(
+            int,
+            ncnn::Layer,
+            forward,
+            bottom_blobs,
+            top_blobs,
+            cmd,
+            opt);
+    }
+    virtual int forward(const ncnn::VkMat& bottom_blob, ncnn::VkMat& top_blob, ncnn::VkCompute& cmd, const ncnn::Option& opt) const
+    {
+        PYBIND11_OVERRIDE_REFERENCE(
+            int,
+            ncnn::Layer,
+            forward,
+            bottom_blob,
+            top_blob,
+            cmd,
+            opt);
+    }
+
+    virtual int forward_inplace(std::vector<ncnn::VkMat>& bottom_top_blobs, ncnn::VkCompute& cmd, const ncnn::Option& opt) const
+    {
+        PYBIND11_OVERRIDE_REFERENCE(
+            int,
+            ncnn::Layer,
+            forward_inplace,
+            bottom_top_blobs,
+            cmd,
+            opt);
+    }
+    virtual int forward_inplace(ncnn::VkMat& bottom_top_blob, ncnn::VkCompute& cmd, const ncnn::Option& opt) const
+    {
+        PYBIND11_OVERRIDE_REFERENCE(
+            int,
+            ncnn::Layer,
+            forward_inplace,
+            bottom_top_blob,
+            cmd,
+            opt);
+    }
+#endif // NCNN_VULKAN
+};
+
+#endif
--- a/3rdparty/ncnn/python/src/pybind11_mat.h
+++ b/3rdparty/ncnn/python/src/pybind11_mat.h
@ -0,0 +1,44 @@
+/* Tencent is pleased to support the open source community by making ncnn available.
+ *
+ * Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+ *
+ * Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/BSD-3-Clause
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+
+#ifndef PYBIND11_NCNN_MAT_H
+#define PYBIND11_NCNN_MAT_H
+
+#include <string>
+
+#include <pybind11/pybind11.h>
+
+#include <mat.h>
+
+std::string get_mat_format(const ncnn::Mat& m)
+{
+    std::string format;
+    if (m.elemsize == 4)
+    {
+        format = pybind11::format_descriptor<float>::format();
+    }
+    if (m.elemsize == 2)
+    {
+        // see https://docs.python.org/3/library/struct.html#format-characters
+        format = "e";
+    }
+    if (m.elemsize == 1)
+    {
+        format = pybind11::format_descriptor<int8_t>::format();
+    }
+    return format;
+}
+
+#endif
--- a/3rdparty/ncnn/python/src/pybind11_modelbin.h
+++ b/3rdparty/ncnn/python/src/pybind11_modelbin.h
@ -0,0 +1,49 @@
+/* Tencent is pleased to support the open source community by making ncnn available.
+ *
+ * Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+ *
+ * Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/BSD-3-Clause
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+
+#ifndef PYBIND11_NCNN_MODELBIN_H
+#define PYBIND11_NCNN_MODELBIN_H
+
+#include <modelbin.h>
+
+template<class Base = ncnn::ModelBin>
+class PyModelBin : public Base
+{
+public:
+    using Base::Base; // Inherit constructors
+    ncnn::Mat load(int w, int type) const override
+    {
+        PYBIND11_OVERLOAD_PURE(ncnn::Mat, Base, load, w, type);
+    }
+    //ncnn::Mat load(int w, int h, int type) const override {
+    //	PYBIND11_OVERLOAD(ncnn::Mat, Base, load, w, h, type);
+    //}
+    //ncnn::Mat load(int w, int h, int c, int type) const override {
+    //	PYBIND11_OVERLOAD(ncnn::Mat, Base, load, w, h, c, type);
+    //}
+};
+
+template<class Other>
+class PyModelBinOther : public PyModelBin<Other>
+{
+public:
+    using PyModelBin<Other>::PyModelBin;
+    ncnn::Mat load(int w, int type) const override
+    {
+        PYBIND11_OVERLOAD(ncnn::Mat, Other, load, w, type);
+    }
+};
+
+#endif
--- a/3rdparty/ncnn/python/tests/benchmark.py
+++ b/3rdparty/ncnn/python/tests/benchmark.py
@ -0,0 +1,192 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import sys
+import time
+import ncnn
+
+param_root = "../benchmark/"
+
+g_warmup_loop_count = 8
+g_loop_count = 4
+g_enable_cooling_down = True
+
+g_vkdev = None
+g_blob_vkallocator = None
+g_staging_vkallocator = None
+
+g_blob_pool_allocator = ncnn.UnlockedPoolAllocator()
+g_workspace_pool_allocator = ncnn.PoolAllocator()
+
+
+def benchmark(comment, _in, opt):
+    _in.fill(0.01)
+
+    g_blob_pool_allocator.clear()
+    g_workspace_pool_allocator.clear()
+
+    if opt.use_vulkan_compute:
+        g_blob_vkallocator.clear()
+        g_staging_vkallocator.clear()
+
+    net = ncnn.Net()
+    net.opt = opt
+
+    if net.opt.use_vulkan_compute:
+        net.set_vulkan_device(g_vkdev)
+
+    net.load_param(param_root + comment + ".param")
+
+    dr = ncnn.DataReaderFromEmpty()
+    net.load_model(dr)
+
+    input_names = net.input_names()
+    output_names = net.output_names()
+
+    if g_enable_cooling_down:
+        time.sleep(10)
+
+    # warm up
+    for i in range(g_warmup_loop_count):
+        # test with statement
+        with net.create_extractor() as ex:
+            ex.input(input_names[0], _in)
+            ex.extract(output_names[0])
+
+    time_min = sys.float_info.max
+    time_max = -sys.float_info.max
+    time_avg = 0.0
+
+    for i in range(g_loop_count):
+        start = time.time()
+
+        # test net keep alive until ex freed
+        ex = net.create_extractor()
+        ex.input(input_names[0], _in)
+        ex.extract(output_names[0])
+
+        end = time.time()
+
+        timespan = end - start
+
+        time_min = timespan if timespan < time_min else time_min
+        time_max = timespan if timespan > time_max else time_max
+        time_avg += timespan
+
+    time_avg /= g_loop_count
+
+    print(
+        "%20s  min = %7.2f  max = %7.2f  avg = %7.2f"
+        % (comment, time_min * 1000, time_max * 1000, time_avg * 1000)
+    )
+
+
+if __name__ == "__main__":
+    loop_count = 4
+    num_threads = ncnn.get_cpu_count()
+    powersave = 0
+    gpu_device = -1
+    cooling_down = 1
+
+    argc = len(sys.argv)
+    if argc >= 2:
+        loop_count = int(sys.argv[1])
+    if argc >= 3:
+        num_threads = int(sys.argv[2])
+    if argc >= 4:
+        powersave = int(sys.argv[3])
+    if argc >= 5:
+        gpu_device = int(sys.argv[4])
+    if argc >= 6:
+        cooling_down = int(sys.argv[5])
+
+    use_vulkan_compute = gpu_device != -1
+
+    g_enable_cooling_down = cooling_down != 0
+
+    g_loop_count = loop_count
+
+    g_blob_pool_allocator.set_size_compare_ratio(0.0)
+    g_workspace_pool_allocator.set_size_compare_ratio(0.5)
+
+    if use_vulkan_compute:
+        g_warmup_loop_count = 10
+
+        g_vkdev = ncnn.get_gpu_device(gpu_device)
+
+        g_blob_vkallocator = ncnn.VkBlobAllocator(g_vkdev)
+        g_staging_vkallocator = ncnn.VkStagingAllocator(g_vkdev)
+
+    opt = ncnn.Option()
+    opt.lightmode = True
+    opt.num_threads = num_threads
+    opt.blob_allocator = g_blob_pool_allocator
+    opt.workspace_allocator = g_workspace_pool_allocator
+    if use_vulkan_compute:
+        opt.blob_vkallocator = g_blob_vkallocator
+        opt.workspace_vkallocator = g_blob_vkallocator
+        opt.staging_vkallocator = g_staging_vkallocator
+    opt.use_winograd_convolution = True
+    opt.use_sgemm_convolution = True
+    opt.use_int8_inference = True
+    opt.use_vulkan_compute = use_vulkan_compute
+    opt.use_fp16_packed = True
+    opt.use_fp16_storage = True
+    opt.use_fp16_arithmetic = True
+    opt.use_int8_storage = True
+    opt.use_int8_arithmetic = True
+    opt.use_packing_layout = True
+    opt.use_shader_pack8 = False
+    opt.use_image_storage = False
+
+    ncnn.set_cpu_powersave(powersave)
+    ncnn.set_omp_dynamic(0)
+    ncnn.set_omp_num_threads(num_threads)
+
+    print("loop_count =", loop_count)
+    print("num_threads =", num_threads)
+    print("powersave =", ncnn.get_cpu_powersave())
+    print("gpu_device =", gpu_device)
+    print("cooling_down =", g_enable_cooling_down)
+
+    benchmark("squeezenet", ncnn.Mat((227, 227, 3)), opt)
+    benchmark("squeezenet_int8", ncnn.Mat((227, 227, 3)), opt)
+    benchmark("mobilenet", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("mobilenet_int8", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("mobilenet_v2", ncnn.Mat((224, 224, 3)), opt)
+    # benchmark("mobilenet_v2_int8", ncnn.Mat(w=224, h=224, c=3), opt)
+    benchmark("mobilenet_v3", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("shufflenet", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("shufflenet_v2", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("mnasnet", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("proxylessnasnet", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("efficientnet_b0", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("regnety_400m", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("blazeface", ncnn.Mat((128, 128, 3)), opt)
+    benchmark("googlenet", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("googlenet_int8", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("resnet18", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("resnet18_int8", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("alexnet", ncnn.Mat((227, 227, 3)), opt)
+    benchmark("vgg16", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("vgg16_int8", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("resnet50", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("resnet50_int8", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("squeezenet_ssd", ncnn.Mat((300, 300, 3)), opt)
+    benchmark("squeezenet_ssd_int8", ncnn.Mat((300, 300, 3)), opt)
+    benchmark("mobilenet_ssd", ncnn.Mat((300, 300, 3)), opt)
+    benchmark("mobilenet_ssd_int8", ncnn.Mat((300, 300, 3)), opt)
+    benchmark("mobilenet_yolo", ncnn.Mat((416, 416, 3)), opt)
+    benchmark("mobilenetv2_yolov3", ncnn.Mat((352, 352, 3)), opt)
+    benchmark("yolov4-tiny", ncnn.Mat((416, 416, 3)), opt)
--- a/3rdparty/ncnn/python/tests/custom_layer.param
+++ b/3rdparty/ncnn/python/tests/custom_layer.param
@ -0,0 +1,4 @@
+7767517
+2 2
+Input            data                             0 1 data
+CustomLayer      cl_fwd                           1 1 data output
--- a/3rdparty/ncnn/python/tests/test.param
+++ b/3rdparty/ncnn/python/tests/test.param
@ -0,0 +1,5 @@
+7767517
+3 3
+Input            data                             0 1 data
+Convolution      conv0_fwd                        1 1 data conv0_fwd 0=3 1=3 11=3 2=1 12=1 3=1 13=1 4=0 14=0 5=1 6=81
+InnerProduct     dense0_fwd                       1 1 conv0_fwd output 0=1 1=1 2=151875
--- a/3rdparty/ncnn/python/tests/test_allocator.py
+++ b/3rdparty/ncnn/python/tests/test_allocator.py
@ -0,0 +1,37 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import pytest
+
+import ncnn
+
+
+def test_pool_allocator():
+    pa = ncnn.PoolAllocator()
+    assert pa is not None
+    pa.set_size_compare_ratio(0.5)
+    buf = pa.fastMalloc(10 * 1024)
+    assert buf is not None
+    pa.fastFree(buf)
+    pa.clear()
+
+
+def test_unlocked_pool_allocator():
+    upa = ncnn.UnlockedPoolAllocator()
+    assert upa is not None
+    upa.set_size_compare_ratio(0.5)
+    buf = upa.fastMalloc(10 * 1024)
+    assert buf is not None
+    upa.fastFree(buf)
+    upa.clear()
--- a/3rdparty/ncnn/python/tests/test_blob.py
+++ b/3rdparty/ncnn/python/tests/test_blob.py
@ -0,0 +1,33 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import pytest
+
+import ncnn
+
+
+def test_blob():
+    blob = ncnn.Blob()
+
+    blob.name = "myblob"
+    assert blob.name == "myblob"
+
+    blob.producer = 0
+    assert blob.producer == 0
+
+    blob.consumer = 0
+    assert blob.consumer == 0
+
+    blob.shape = ncnn.Mat(1)
+    assert blob.shape.dims == 1 and blob.shape.w == 1
--- a/3rdparty/ncnn/python/tests/test_extractor.py
+++ b/3rdparty/ncnn/python/tests/test_extractor.py
@ -0,0 +1,86 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import pytest
+
+import ncnn
+
+alloctor = ncnn.PoolAllocator()
+
+
+def test_extractor():
+    with pytest.raises(TypeError, match="No constructor"):
+        ex = ncnn.Extractor()
+
+    dr = ncnn.DataReaderFromEmpty()
+
+    net = ncnn.Net()
+    net.load_param("tests/test.param")
+    net.load_model(dr)
+
+    in_mat = ncnn.Mat((227, 227, 3))
+    with net.create_extractor() as ex:
+        ex.set_light_mode(True)
+        ex.set_num_threads(2)
+
+        ex.set_blob_allocator(alloctor)
+        ex.set_workspace_allocator(alloctor)
+
+        ex.input("data", in_mat)
+        ret, out_mat = ex.extract("conv0_fwd")
+        assert (
+            ret == 0
+            and out_mat.dims == 3
+            and out_mat.w == 225
+            and out_mat.h == 225
+            and out_mat.c == 3
+        )
+
+        ret, out_mat = ex.extract("output")
+        assert ret == 0 and out_mat.dims == 1 and out_mat.w == 1
+
+
+def test_extractor_index():
+    with pytest.raises(TypeError, match="No constructor"):
+        ex = ncnn.Extractor()
+
+    dr = ncnn.DataReaderFromEmpty()
+
+    net = ncnn.Net()
+    net.load_param("tests/test.param")
+    net.load_model(dr)
+
+    in_mat = ncnn.Mat((227, 227, 3))
+    ex = net.create_extractor()
+    ex.set_light_mode(True)
+    ex.set_num_threads(2)
+
+    ex.set_blob_allocator(alloctor)
+    ex.set_workspace_allocator(alloctor)
+
+    ex.input(0, in_mat)
+    ret, out_mat = ex.extract(1)
+    assert (
+        ret == 0
+        and out_mat.dims == 3
+        and out_mat.w == 225
+        and out_mat.h == 225
+        and out_mat.c == 3
+    )
+
+    ret, out_mat = ex.extract(2)
+    assert ret == 0 and out_mat.dims == 1 and out_mat.w == 1
+
+    # not use with sentence, call clear manually to ensure ex destruct before net
+    ex.clear()
--- a/3rdparty/ncnn/python/tests/test_mat.py
+++ b/3rdparty/ncnn/python/tests/test_mat.py
@ -0,0 +1,762 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import sys
+import numpy as np
+import pytest
+
+import ncnn
+
+
+def test_mat_dims1():
+    mat = ncnn.Mat(1)
+    assert mat.dims == 1 and mat.w == 1
+    mat = ncnn.Mat(2, elemsize=4)
+    assert mat.dims == 1 and mat.w == 2 and mat.elemsize == 4
+    mat = ncnn.Mat(3, elemsize=4, elempack=1)
+    assert mat.dims == 1 and mat.w == 3 and mat.elemsize == 4 and mat.elempack == 1
+    mat = ncnn.Mat(4, elemsize=4, elempack=1, allocator=None)
+    assert (
+        mat.dims == 1
+        and mat.w == 4
+        and mat.elemsize == 4
+        and mat.elempack == 1
+        and mat.allocator == None
+    )
+
+    mat = ncnn.Mat((1,))
+    assert mat.dims == 1 and mat.w == 1
+    mat = ncnn.Mat((2,), elemsize=4)
+    assert mat.dims == 1 and mat.w == 2 and mat.elemsize == 4
+    mat = ncnn.Mat((3,), elemsize=4, elempack=1)
+    assert mat.dims == 1 and mat.w == 3 and mat.elemsize == 4 and mat.elempack == 1
+    mat = ncnn.Mat((4,), elemsize=4, elempack=1, allocator=None)
+    assert (
+        mat.dims == 1
+        and mat.w == 4
+        and mat.elemsize == 4
+        and mat.elempack == 1
+        and mat.allocator == None
+    )
+
+
+def test_mat_dims2():
+    mat = ncnn.Mat(1, 2)
+    assert mat.dims == 2 and mat.w == 1 and mat.h == 2
+    mat = ncnn.Mat(3, 4, elemsize=4)
+    assert mat.dims == 2 and mat.w == 3 and mat.h == 4 and mat.elemsize == 4
+    mat = ncnn.Mat(5, 6, elemsize=4, elempack=1)
+    assert (
+        mat.dims == 2
+        and mat.w == 5
+        and mat.h == 6
+        and mat.elemsize == 4
+        and mat.elempack == 1
+    )
+    mat = ncnn.Mat(7, 8, elemsize=4, elempack=1, allocator=None)
+    assert (
+        mat.dims == 2
+        and mat.w == 7
+        and mat.h == 8
+        and mat.elemsize == 4
+        and mat.elempack == 1
+        and mat.allocator == None
+    )
+
+    mat = ncnn.Mat((1, 2))
+    assert mat.dims == 2 and mat.w == 1 and mat.h == 2
+    mat = ncnn.Mat((3, 4), elemsize=4)
+    assert mat.dims == 2 and mat.w == 3 and mat.h == 4 and mat.elemsize == 4
+    mat = ncnn.Mat((5, 6), elemsize=4, elempack=1)
+    assert (
+        mat.dims == 2
+        and mat.w == 5
+        and mat.h == 6
+        and mat.elemsize == 4
+        and mat.elempack == 1
+    )
+    mat = ncnn.Mat((7, 8), elemsize=4, elempack=1, allocator=None)
+    assert (
+        mat.dims == 2
+        and mat.w == 7
+        and mat.h == 8
+        and mat.elemsize == 4
+        and mat.elempack == 1
+        and mat.allocator == None
+    )
+
+
+def test_mat_dims3():
+    mat = ncnn.Mat(1, 2, 3)
+    assert mat.dims == 3 and mat.w == 1 and mat.h == 2 and mat.c == 3
+    mat = ncnn.Mat(4, 5, 6, elemsize=4)
+    assert (
+        mat.dims == 3 and mat.w == 4 and mat.h == 5 and mat.c == 6 and mat.elemsize == 4
+    )
+    mat = ncnn.Mat(7, 8, 9, elemsize=4, elempack=1)
+    assert (
+        mat.dims == 3
+        and mat.w == 7
+        and mat.h == 8
+        and mat.c == 9
+        and mat.elemsize == 4
+        and mat.elempack == 1
+    )
+    mat = ncnn.Mat(10, 11, 12, elemsize=4, elempack=1, allocator=None)
+    assert (
+        mat.dims == 3
+        and mat.w == 10
+        and mat.h == 11
+        and mat.c == 12
+        and mat.elemsize == 4
+        and mat.elempack == 1
+        and mat.allocator == None
+    )
+
+    mat = ncnn.Mat((1, 2, 3))
+    assert mat.dims == 3 and mat.w == 1 and mat.h == 2 and mat.c == 3
+    mat = ncnn.Mat((4, 5, 6), elemsize=4)
+    assert (
+        mat.dims == 3 and mat.w == 4 and mat.h == 5 and mat.c == 6 and mat.elemsize == 4
+    )
+    mat = ncnn.Mat((7, 8, 9), elemsize=4, elempack=1)
+    assert (
+        mat.dims == 3
+        and mat.w == 7
+        and mat.h == 8
+        and mat.c == 9
+        and mat.elemsize == 4
+        and mat.elempack == 1
+    )
+    mat = ncnn.Mat((10, 11, 12), elemsize=4, elempack=1, allocator=None)
+    assert (
+        mat.dims == 3
+        and mat.w == 10
+        and mat.h == 11
+        and mat.c == 12
+        and mat.elemsize == 4
+        and mat.elempack == 1
+        and mat.allocator == None
+    )
+
+
+def test_mat_dims4():
+    mat = ncnn.Mat(1, 2, 3, 4)
+    assert mat.dims == 4 and mat.w == 1 and mat.h == 2 and mat.d == 3 and mat.c == 4
+    mat = ncnn.Mat(4, 5, 6, 7, elemsize=4)
+    assert (
+        mat.dims == 4 and mat.w == 4 and mat.h == 5 and mat.d == 6 and mat.c == 7 and mat.elemsize == 4
+    )
+    mat = ncnn.Mat(7, 8, 9, 10, elemsize=4, elempack=1)
+    assert (
+        mat.dims == 4
+        and mat.w == 7
+        and mat.h == 8
+        and mat.d == 9
+        and mat.c == 10
+        and mat.elemsize == 4
+        and mat.elempack == 1
+    )
+    mat = ncnn.Mat(10, 11, 12, 13, elemsize=4, elempack=1, allocator=None)
+    assert (
+        mat.dims == 4
+        and mat.w == 10
+        and mat.h == 11
+        and mat.d == 12
+        and mat.c == 13
+        and mat.elemsize == 4
+        and mat.elempack == 1
+        and mat.allocator == None
+    )
+
+    mat = ncnn.Mat((1, 2, 3, 4))
+    assert mat.dims == 4 and mat.w == 1 and mat.h == 2 and mat.d == 3 and mat.c == 4
+    mat = ncnn.Mat((4, 5, 6, 7), elemsize=4)
+    assert (
+        mat.dims == 4 and mat.w == 4 and mat.h == 5 and mat.d == 6 and mat.c == 7 and mat.elemsize == 4
+    )
+    mat = ncnn.Mat((7, 8, 9, 10), elemsize=4, elempack=1)
+    assert (
+        mat.dims == 4
+        and mat.w == 7
+        and mat.h == 8
+        and mat.d == 9
+        and mat.c == 10
+        and mat.elemsize == 4
+        and mat.elempack == 1
+    )
+    mat = ncnn.Mat((10, 11, 12, 13), elemsize=4, elempack=1, allocator=None)
+    assert (
+        mat.dims == 4
+        and mat.w == 10
+        and mat.h == 11
+        and mat.d == 12
+        and mat.c == 13
+        and mat.elemsize == 4
+        and mat.elempack == 1
+        and mat.allocator == None
+    )
+
+
+def test_numpy():
+    mat = ncnn.Mat(1)
+    array = np.array(mat)
+    assert mat.dims == array.ndim and mat.w == array.shape[0]
+    mat = ncnn.Mat(2, 3)
+    array = np.array(mat)
+    assert (
+        mat.dims == array.ndim and mat.w == array.shape[1] and mat.h == array.shape[0]
+    )
+    mat = ncnn.Mat(4, 5, 6)
+    array = np.array(mat)
+    assert (
+        mat.dims == array.ndim
+        and mat.w == array.shape[2]
+        and mat.h == array.shape[1]
+        and mat.c == array.shape[0]
+    )
+    mat = ncnn.Mat(7, 8, 9, 10)
+    array = np.array(mat)
+    assert (
+        mat.dims == array.ndim
+        and mat.w == array.shape[3]
+        and mat.h == array.shape[2]
+        and mat.d == array.shape[1]
+        and mat.c == array.shape[0]
+    )
+
+    mat = ncnn.Mat(1, elemsize=1)
+    array = np.array(mat)
+    assert array.dtype == np.int8
+    mat = ncnn.Mat(1, elemsize=2)
+    array = np.array(mat)
+    assert array.dtype == np.float16
+    # pybind11 def_buffer throw bug
+    # with pytest.raises(RuntimeError) as execinfo:
+    #     mat = ncnn.Mat(1, elemsize=3)
+    #     array = np.array(mat)
+    #     assert "convert ncnn.Mat to numpy.ndarray only elemsize 1, 2, 4 support now, but given 3" in str(
+    #         execinfo.value
+    #     )
+    assert array.dtype == np.float16
+    mat = ncnn.Mat(1, elemsize=4)
+    array = np.array(mat)
+    assert array.dtype == np.float32
+
+    mat = np.random.randint(0, 128, size=(12,)).astype(np.uint8)
+    array = np.array(mat)
+    assert (mat == array).all()
+    mat = np.random.rand(12).astype(np.float32)
+    array = np.array(mat)
+    assert (mat == array).all()
+    mat = np.random.randint(0, 128, size=(12, 11)).astype(np.uint8)
+    array = np.array(mat)
+    assert (mat == array).all()
+    mat = np.random.rand(12, 11).astype(np.float32)
+    array = np.array(mat)
+    assert (mat == array).all()
+    mat = np.random.randint(0, 256, size=(12, 11, 3)).astype(np.uint8)
+    array = np.array(mat)
+    assert (mat == array).all()
+    mat = np.random.rand(12, 11, 3).astype(np.float32)
+    array = np.array(mat)
+    assert (mat == array).all()
+    mat = np.random.randint(0, 256, size=(12, 11, 7, 3)).astype(np.uint8)
+    array = np.array(mat)
+    assert (mat == array).all()
+    mat = np.random.rand(12, 11, 7, 3).astype(np.float32)
+    array = np.array(mat)
+    assert (mat == array).all()
+
+
+def test_fill():
+    mat = ncnn.Mat(1)
+    mat.fill(1.0)
+    array = np.array(mat)
+    assert np.abs(array[0] - 1.0) < sys.float_info.min
+
+
+def test_clone():
+    mat1 = ncnn.Mat(1)
+    mat2 = mat1.clone()
+    assert mat1.dims == mat2.dims and mat1.w == mat2.w
+
+    mat1 = ncnn.Mat(2, 3)
+    mat2 = mat1.clone()
+    assert mat1.dims == mat2.dims and mat1.w == mat2.w and mat1.h == mat2.h
+
+    mat1 = ncnn.Mat(4, 5, 6)
+    mat2 = mat1.clone()
+    assert (
+        mat1.dims == mat2.dims
+        and mat1.w == mat2.w
+        and mat1.h == mat2.h
+        and mat1.c == mat2.c
+    )
+
+    mat1 = ncnn.Mat(7, 8, 9, 10)
+    mat2 = mat1.clone()
+    assert (
+        mat1.dims == mat2.dims
+        and mat1.w == mat2.w
+        and mat1.h == mat2.h
+        and mat1.d == mat2.d
+        and mat1.c == mat2.c
+    )
+
+    mat1 = ncnn.Mat((1,))
+    mat2 = mat1.clone()
+    assert mat1.dims == mat2.dims and mat1.w == mat2.w
+
+    mat1 = ncnn.Mat((2, 3))
+    mat2 = mat1.clone()
+    assert mat1.dims == mat2.dims and mat1.w == mat2.w and mat1.h == mat2.h
+
+    mat1 = ncnn.Mat((4, 5, 6))
+    mat2 = mat1.clone()
+    assert (
+        mat1.dims == mat2.dims
+        and mat1.w == mat2.w
+        and mat1.h == mat2.h
+        and mat1.c == mat2.c
+    )
+
+    mat1 = ncnn.Mat((7, 8, 9, 10))
+    mat2 = mat1.clone()
+    assert (
+        mat1.dims == mat2.dims
+        and mat1.w == mat2.w
+        and mat1.h == mat2.h
+        and mat1.d == mat2.d
+        and mat1.c == mat2.c
+    )
+
+
+def test_clone_from():
+    mat2 = ncnn.Mat()
+
+    mat1 = ncnn.Mat(1)
+    mat2.clone_from(mat1)
+    assert mat1.dims == mat2.dims and mat1.w == mat2.w
+
+    mat1 = ncnn.Mat(2, 3)
+    mat2.clone_from(mat1)
+    assert mat1.dims == mat2.dims and mat1.w == mat2.w and mat1.h == mat2.h
+
+    mat1 = ncnn.Mat(4, 5, 6)
+    mat2.clone_from(mat1)
+    assert (
+        mat1.dims == mat2.dims
+        and mat1.w == mat2.w
+        and mat1.h == mat2.h
+        and mat1.c == mat2.c
+    )
+
+    mat1 = ncnn.Mat(7, 8, 9, 10)
+    mat2.clone_from(mat1)
+    assert (
+        mat1.dims == mat2.dims
+        and mat1.w == mat2.w
+        and mat1.h == mat2.h
+        and mat1.d == mat2.d
+        and mat1.c == mat2.c
+    )
+
+    mat1 = ncnn.Mat((1,))
+    mat2.clone_from(mat1)
+    assert mat1.dims == mat2.dims and mat1.w == mat2.w
+
+    mat1 = ncnn.Mat((2, 3))
+    mat2.clone_from(mat1)
+    assert mat1.dims == mat2.dims and mat1.w == mat2.w and mat1.h == mat2.h
+
+    mat1 = ncnn.Mat((4, 5, 6))
+    mat2.clone_from(mat1)
+    assert (
+        mat1.dims == mat2.dims
+        and mat1.w == mat2.w
+        and mat1.h == mat2.h
+        and mat1.c == mat2.c
+    )
+
+    mat1 = ncnn.Mat((7, 8, 9, 10))
+    mat2.clone_from(mat1)
+    assert (
+        mat1.dims == mat2.dims
+        and mat1.w == mat2.w
+        and mat1.h == mat2.h
+        and mat1.d == mat2.d
+        and mat1.c == mat2.c
+    )
+
+
+def test_reshape():
+    mat1 = ncnn.Mat()
+    mat2 = mat1.reshape(1)
+    assert mat2.dims == 0
+    mat2 = mat1.reshape(1, 1)
+    assert mat2.dims == 0
+    mat2 = mat1.reshape(1, 1, 1)
+    assert mat2.dims == 0
+    mat2 = mat1.reshape(1, 1, 1, 1)
+    assert mat2.dims == 0
+
+    mat1 = ncnn.Mat(1)
+    mat2 = mat1.reshape(1, 1)
+    assert mat2.dims == 2 and mat2.w == 1 and mat2.h == 1
+    mat2 = mat1.reshape(1, 1, 1)
+    assert mat2.dims == 3 and mat2.w == 1 and mat2.h == 1 and mat2.c == 1
+    mat2 = mat1.reshape(1, 1, 1, 1)
+    assert mat2.dims == 4 and mat2.w == 1 and mat2.h == 1 and mat2.d == 1 and mat2.c == 1
+
+    mat1 = ncnn.Mat(1, 2)
+    mat2 = mat1.reshape(2)
+    assert mat2.dims == 1 and mat2.w == 2
+    mat2 = mat1.reshape(2, 1)
+    assert mat2.dims == 2 and mat2.w == 2 and mat2.h == 1
+    mat2 = mat1.reshape(2, 1, 1)
+    assert mat2.dims == 3 and mat2.w == 2 and mat2.h == 1 and mat2.c == 1
+    mat2 = mat1.reshape(2, 1, 1, 1)
+    assert mat2.dims == 4 and mat2.w == 2 and mat2.h == 1 and mat2.d == 1 and mat2.c == 1
+
+    mat1 = ncnn.Mat(1, 2, 3)
+    mat2 = mat1.reshape(6)
+    assert mat2.dims == 1 and mat2.w == 6
+    mat2 = mat1.reshape(2, 3)
+    assert mat2.dims == 2 and mat2.w == 2 and mat2.h == 3
+    mat2 = mat1.reshape(2, 3, 1)
+    assert mat2.dims == 3 and mat2.w == 2 and mat2.h == 3 and mat2.c == 1
+    mat2 = mat1.reshape(2, 1, 3, 1)
+    assert mat2.dims == 4 and mat2.w == 2 and mat2.h == 1 and mat2.d == 3 and mat2.c == 1
+
+    mat1 = ncnn.Mat((1,))
+    mat2 = mat1.reshape((1, 1))
+    assert mat2.dims == 2 and mat2.w == 1 and mat2.h == 1
+    mat2 = mat1.reshape((1, 1, 1))
+    assert mat2.dims == 3 and mat2.w == 1 and mat2.h == 1 and mat2.c == 1
+    mat2 = mat1.reshape((1, 1, 1, 1))
+    assert mat2.dims == 4 and mat2.w == 1 and mat2.h == 1 and mat2.d == 1 and mat2.c == 1
+
+    mat1 = ncnn.Mat((1, 2))
+    mat2 = mat1.reshape((2,))
+    assert mat2.dims == 1 and mat2.w == 2
+    mat2 = mat1.reshape((2, 1))
+    assert mat2.dims == 2 and mat2.w == 2 and mat2.h == 1
+    mat2 = mat1.reshape((2, 1, 1))
+    assert mat2.dims == 3 and mat2.w == 2 and mat2.h == 1 and mat2.c == 1
+    mat2 = mat1.reshape((2, 1, 1, 1))
+    assert mat2.dims == 4 and mat2.w == 2 and mat2.h == 1 and mat2.d == 1 and mat2.c == 1
+
+    mat1 = ncnn.Mat((1, 2, 3))
+    mat2 = mat1.reshape((6,))
+    assert mat2.dims == 1 and mat2.w == 6
+    mat2 = mat1.reshape((2, 3))
+    assert mat2.dims == 2 and mat2.w == 2 and mat2.h == 3 and mat2.c == 1
+    mat2 = mat1.reshape((2, 3, 1))
+    assert mat2.dims == 3 and mat2.w == 2 and mat2.h == 3 and mat2.c == 1
+    mat2 = mat1.reshape((2, 1, 3, 1))
+    assert mat2.dims == 4 and mat2.w == 2 and mat2.h == 1 and mat2.d == 3 and mat2.c == 1
+
+    with pytest.raises(RuntimeError) as execinfo:
+        mat1.reshape((1, 1, 1, 1, 1))
+    assert "shape must be 1, 2, 3 or 4 dims, not 5" in str(execinfo.value)
+
+
+def test_create():
+    mat = ncnn.Mat()
+    mat.create(1)
+    assert mat.dims == 1 and mat.w == 1
+    mat.create(2, 3)
+    assert mat.dims == 2 and mat.w == 2 and mat.h == 3
+    mat.create(4, 5, 6)
+    assert mat.dims == 3 and mat.w == 4 and mat.h == 5 and mat.c == 6
+    mat.create(7, 8, 9, 10)
+    assert mat.dims == 4 and mat.w == 7 and mat.h == 8 and mat.d == 9 and mat.c == 10
+
+    mat.create((1,))
+    assert mat.dims == 1 and mat.w == 1
+    mat.create((2, 3))
+    assert mat.dims == 2 and mat.w == 2 and mat.h == 3
+    mat.create((4, 5, 6))
+    assert mat.dims == 3 and mat.w == 4 and mat.h == 5 and mat.c == 6
+    mat.create((7, 8, 9, 10))
+    assert mat.dims == 4 and mat.w == 7 and mat.h == 8 and mat.d == 9 and mat.c == 10
+
+
+def test_create_like():
+    mat2 = ncnn.Mat()
+
+    mat1 = ncnn.Mat(1)
+    mat2.create_like(mat1)
+    assert mat1.dims == mat2.dims and mat1.w == mat2.w
+    mat1 = ncnn.Mat(2, 3)
+    mat2.create_like(mat1)
+    assert mat1.dims == mat2.dims and mat1.w == mat2.w and mat1.h == mat2.h
+    mat1 = ncnn.Mat(4, 5, 6)
+    mat2.create_like(mat1)
+    assert (
+        mat1.dims == mat2.dims
+        and mat1.w == mat2.w
+        and mat1.h == mat2.h
+        and mat1.c == mat2.c
+    )
+    mat1 = ncnn.Mat(7, 8, 9, 10)
+    mat2.create_like(mat1)
+    assert (
+        mat1.dims == mat2.dims
+        and mat1.w == mat2.w
+        and mat1.h == mat2.h
+        and mat1.d == mat2.d
+        and mat1.c == mat2.c
+    )
+
+
+def test_addref_release():
+    mat = ncnn.Mat(1)
+    assert mat.refcount == 1
+
+    mat.addref()
+    assert mat.refcount == 2
+
+    mat.release()
+    assert mat.refcount == None
+
+
+def test_empty():
+    mat = ncnn.Mat()
+    assert mat.empty() == True
+
+    mat = ncnn.Mat(1)
+    assert mat.empty() == False
+
+
+def test_total():
+    mat = ncnn.Mat(1)
+    assert mat.total() == 1
+    mat = ncnn.Mat(2, 3)
+    assert mat.total() == 2 * 3
+    mat = ncnn.Mat(4, 5, 6)
+    assert mat.total() == 4 * 5 * 6
+    mat = ncnn.Mat(7, 8, 9, 10)
+    assert mat.total() == 7 * 8 * 9 * 10
+
+
+def test_elembits():
+    mat = ncnn.Mat(1, elemsize=1, elempack=1)
+    assert mat.elembits() == 8
+    mat = ncnn.Mat(2, elemsize=2, elempack=1)
+    assert mat.elembits() == 16
+    mat = ncnn.Mat(3, elemsize=4, elempack=1)
+    assert mat.elembits() == 32
+
+
+def test_shape():
+    mat = ncnn.Mat(1)
+    shape = mat.shape()
+    assert shape.dims == 1 and shape.w == 1
+    mat = ncnn.Mat(2, 3)
+    shape = mat.shape()
+    assert shape.dims == 2 and shape.w == 2 and shape.h == 3
+    mat = ncnn.Mat(4, 5, 6)
+    shape = mat.shape()
+    assert shape.dims == 3 and shape.w == 4 and shape.h == 5 and shape.c == 6
+    mat = ncnn.Mat(7, 8, 9, 10)
+    shape = mat.shape()
+    assert shape.dims == 4 and shape.w == 7 and shape.h == 8 and shape.d == 9 and shape.c == 10
+
+
+def test_channel_depth_row():
+    mat = ncnn.Mat(2, 3, 4, 5)
+    mat.fill(6.0)
+    channel = mat.channel(1)
+    assert channel.dims == 3 and channel.w == 2 and channel.h == 3 and channel.c == 4
+
+    depth = channel.depth(1)
+    assert depth.dims == 2 and depth.w == 2 and depth.h == 3
+
+    row = depth.row(1)
+    assert len(row) == 2 and np.abs(row[0] - 6.0) < sys.float_info.min
+
+
+def test_channel_row():
+    mat = ncnn.Mat(2, 3, 4)
+    mat.fill(4.0)
+    channel = mat.channel(1)
+    assert channel.dims == 2 and channel.w == 2 and channel.h == 3 and channel.c == 1
+
+    row = channel.row(1)
+    assert len(row) == 2 and np.abs(row[0] - 4.0) < sys.float_info.min
+
+
+def test_channel_range():
+    mat = ncnn.Mat(1, 2, 3)
+    channel_range = mat.channel_range(0, 2)
+    assert (
+        channel_range.dims == 3
+        and channel_range.w == 1
+        and channel_range.h == 2
+        and channel_range.c == 2
+    )
+
+
+def test_depth_range():
+    mat = ncnn.Mat(1, 2, 3, 4)
+    depth_range = mat.channel(1).depth_range(1, 2)
+    assert (
+        depth_range.dims == 3
+        and depth_range.w == 1
+        and depth_range.h == 2
+        and depth_range.c == 2
+    )
+
+
+def test_row_range():
+    mat = ncnn.Mat(1, 2)
+    row_range = mat.row_range(0, 2)
+    assert row_range.dims == 2 and row_range.w == 1 and row_range.h == 2
+
+
+def test_range():
+    mat = ncnn.Mat(2)
+    range = mat.range(0, 2)
+    assert range.dims == 1 and range.w == 2
+
+
+def test_getitem_setitem():
+    mat = ncnn.Mat(2)
+    mat.fill(1)
+    assert (
+        np.abs(mat[0] - 1.0) < sys.float_info.min
+        and np.abs(mat[1] - 1.0) < sys.float_info.min
+    )
+
+    mat[0] = 2.0
+    assert (
+        np.abs(mat[0] - 2.0) < sys.float_info.min
+        and np.abs(mat[1] - 1.0) < sys.float_info.min
+    )
+
+
+def test_from_pixels():
+    pixels = np.random.randint(0, 256, size=(300, 400, 3)).astype(np.uint8)  # hwc
+    mat = ncnn.Mat.from_pixels(pixels, ncnn.Mat.PixelType.PIXEL_RGB, 400, 300)  # chw
+    assert mat.dims == 3 and mat.w == 400 and mat.h == 300 and mat.c == 3
+    assert pixels[0, 0, 0] == mat.channel(0).row(0)[0]
+    assert pixels[200, 150, 1] == mat.channel(1).row(200)[150]
+    assert pixels[299, 399, 2] == mat.channel(2).row(299)[399]
+
+    pixels = np.random.randint(0, 256, size=(300, 500, 3)).astype(np.uint8)  # hwc
+    mat = ncnn.Mat.from_pixels(
+        pixels, ncnn.Mat.PixelType.PIXEL_RGB, 400, 300, stride=500 * 3
+    )  # chw
+    assert mat.dims == 3 and mat.w == 400 and mat.h == 300 and mat.c == 3
+    assert pixels[0, 0, 0] == mat.channel(0).row(0)[0]
+    assert pixels[200, 150, 1] == mat.channel(1).row(200)[150]
+    assert pixels[299, 399, 2] == mat.channel(2).row(299)[399]
+
+
+def test_from_pixels_resize():
+    pixels = np.random.randint(0, 256, size=(300, 400, 3)).astype(np.uint8)  # hwc
+    mat = ncnn.Mat.from_pixels_resize(
+        pixels, ncnn.Mat.PixelType.PIXEL_BGR2RGB, 400, 300, 200, 150
+    )  # chw
+    assert mat.dims == 3 and mat.w == 200 and mat.h == 150 and mat.c == 3
+
+    pixels = np.random.randint(0, 256, size=(300, 400, 3)).astype(np.uint8)  # hwc
+    mat = ncnn.Mat.from_pixels_resize(
+        pixels, ncnn.Mat.PixelType.PIXEL_BGR2RGB, 400, 300, 400, 300
+    )  # chw
+    assert mat.dims == 3 and mat.w == 400 and mat.h == 300 and mat.c == 3
+    assert pixels[0, 0, 0] == mat.channel(2).row(0)[0]
+    assert pixels[200, 150, 1] == mat.channel(1).row(200)[150]
+    assert pixels[299, 399, 2] == mat.channel(0).row(299)[399]
+
+    pixels = np.random.randint(0, 256, size=(300, 500, 3)).astype(np.uint8)  # hwc
+    mat = ncnn.Mat.from_pixels_resize(
+        pixels, ncnn.Mat.PixelType.PIXEL_BGR2RGB, 400, 300, 500 * 3, 200, 150
+    )  # chw
+    assert mat.dims == 3 and mat.w == 200 and mat.h == 150 and mat.c == 3
+
+    pixels = np.random.randint(0, 256, size=(300, 500, 3)).astype(np.uint8)  # hwc
+    mat = ncnn.Mat.from_pixels_resize(
+        pixels, ncnn.Mat.PixelType.PIXEL_BGR2RGB, 400, 300, 500 * 3, 400, 300
+    )  # chw
+    assert mat.dims == 3 and mat.w == 400 and mat.h == 300 and mat.c == 3
+    assert pixels[0, 0, 0] == mat.channel(2).row(0)[0]
+    assert pixels[200, 150, 1] == mat.channel(1).row(200)[150]
+    assert pixels[299, 399, 2] == mat.channel(0).row(299)[399]
+
+
+def test_from_pixels_roi():
+    pixels = np.random.randint(0, 256, size=(300, 400, 3)).astype(np.uint8)  # hwc
+    mat = ncnn.Mat.from_pixels_roi(
+        pixels, ncnn.Mat.PixelType.PIXEL_RGB, 400, 300, 100, 75, 200, 150
+    )  # chw
+    assert mat.dims == 3 and mat.w == 200 and mat.h == 150 and mat.c == 3
+    assert pixels[75, 100, 0] == mat.channel(0).row(0)[0]
+    assert pixels[150, 200, 1] == mat.channel(1).row(75)[100]
+    assert pixels[224, 299, 2] == mat.channel(2).row(149)[199]
+
+    pixels = np.random.randint(0, 256, size=(300, 500, 3)).astype(np.uint8)  # hwc
+    mat = ncnn.Mat.from_pixels_roi(
+        pixels, ncnn.Mat.PixelType.PIXEL_RGB, 400, 300, 500 * 3, 100, 75, 200, 150
+    )  # chw
+    assert mat.dims == 3 and mat.w == 200 and mat.h == 150 and mat.c == 3
+    assert pixels[75, 100, 0] == mat.channel(0).row(0)[0]
+    assert pixels[150, 200, 1] == mat.channel(1).row(75)[100]
+    assert pixels[224, 299, 2] == mat.channel(2).row(149)[199]
+
+
+def test_from_pixels_roi_resize():
+    pixels = np.random.randint(0, 256, size=(300, 400, 3)).astype(np.uint8)  # hwc
+    mat = ncnn.Mat.from_pixels_roi_resize(
+        pixels, ncnn.Mat.PixelType.PIXEL_RGB, 400, 300, 100, 75, 200, 150, 100, 75
+    )  # chw
+    assert mat.dims == 3 and mat.w == 100 and mat.h == 75 and mat.c == 3
+
+    pixels = np.random.randint(0, 256, size=(300, 500, 3)).astype(np.uint8)  # hwc
+    mat = ncnn.Mat.from_pixels_roi_resize(
+        pixels,
+        ncnn.Mat.PixelType.PIXEL_RGB,
+        400,
+        300,
+        500 * 3,
+        100,
+        75,
+        200,
+        150,
+        100,
+        75,
+    )  # chw
+    assert mat.dims == 3 and mat.w == 100 and mat.h == 75 and mat.c == 3
+
+
+def test_substract_mean_normalize():
+    pixels = np.random.randint(0, 256, size=(300, 400, 3)).astype(np.uint8)  # hwc
+    mean_vals = [127.5, 127.5, 127.5]
+    norm_vals = [0.007843, 0.007843, 0.007843]
+
+    mat = ncnn.Mat.from_pixels(pixels, ncnn.Mat.PixelType.PIXEL_RGB, 400, 300)  # chw
+    mat.substract_mean_normalize([], norm_vals)
+    assert np.abs(pixels[0, 0, 0] * 0.007843 - mat.channel(0).row(0)[0]) < 1e-5
+
+    mat = ncnn.Mat.from_pixels(pixels, ncnn.Mat.PixelType.PIXEL_RGB, 400, 300)  # chw
+    mat.substract_mean_normalize(mean_vals, [])
+    assert np.abs((pixels[0, 0, 0] - 127.5) - mat.channel(0).row(0)[0]) < 1e-5
+
+    mat = ncnn.Mat.from_pixels(pixels, ncnn.Mat.PixelType.PIXEL_RGB, 400, 300)  # chw
+    mat.substract_mean_normalize(mean_vals, norm_vals)
+    assert (
+        np.abs((pixels[0, 0, 0] - 127.5) * 0.007843 - mat.channel(0).row(0)[0]) < 1e-5
+    )
--- a/3rdparty/ncnn/python/tests/test_net.py
+++ b/3rdparty/ncnn/python/tests/test_net.py
@ -0,0 +1,144 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import numpy as np
+import pytest
+
+import ncnn
+
+
+def test_net():
+    dr = ncnn.DataReaderFromEmpty()
+
+    with ncnn.Net() as net:
+        ret = net.load_param("tests/test.param")
+        net.load_model(dr)
+        assert ret == 0 and len(net.blobs()) == 3 and len(net.layers()) == 3
+
+        input_names = net.input_names()
+        output_names = net.output_names()
+        assert len(input_names) > 0 and len(output_names) > 0
+
+        in_mat = ncnn.Mat((227, 227, 3))
+
+        with net.create_extractor() as ex:
+            ex.input("data", in_mat)
+            ret, out_mat = ex.extract("output")
+
+        assert ret == 0 and out_mat.dims == 1 and out_mat.w == 1
+
+        net.clear()
+        assert len(net.blobs()) == 0 and len(net.layers()) == 0
+
+
+def test_net_vulkan():
+    if not hasattr(ncnn, "get_gpu_count"):
+        return
+
+    dr = ncnn.DataReaderFromEmpty()
+
+    net = ncnn.Net()
+    net.opt.use_vulkan_compute = True
+    ret = net.load_param("tests/test.param")
+    net.load_model(dr)
+    assert ret == 0 and len(net.blobs()) == 3 and len(net.layers()) == 3
+
+    in_mat = ncnn.Mat((227, 227, 3))
+
+    ex = net.create_extractor()
+    ex.input("data", in_mat)
+    ret, out_mat = ex.extract("output")
+
+    assert ret == 0 and out_mat.dims == 1 and out_mat.w == 1
+
+    ex.clear()
+
+    net.clear()
+    assert len(net.blobs()) == 0 and len(net.layers()) == 0
+
+
+def test_custom_layer():
+    class CustomLayer(ncnn.Layer):
+        customLayers = []
+
+        def __init__(self):
+            ncnn.Layer.__init__(self)
+            self.one_blob_only = True
+
+            self.customLayers.append(self)
+
+        def forward(self, bottom_blob, top_blob, opt):
+            x = np.array(bottom_blob)
+            x += 1
+
+            top_blob.clone_from(ncnn.Mat(x), opt.blob_allocator)
+            if top_blob.empty():
+                return -100
+
+            return 0
+
+    def CustomLayer_layer_creator():
+        return CustomLayer()
+
+    def CustomLayer_layer_destroyer(layer):
+        for i in range(len(CustomLayer.customLayers)):
+            if CustomLayer.customLayers[i] == layer:
+                del CustomLayer.customLayers[i]
+                break
+
+    dr = ncnn.DataReaderFromEmpty()
+
+    net = ncnn.Net()
+    net.register_custom_layer(
+        "CustomLayer", CustomLayer_layer_creator, CustomLayer_layer_destroyer
+    )
+    ret = net.load_param("tests/custom_layer.param")
+    net.load_model(dr)
+    assert ret == 0 and len(net.blobs()) == 2 and len(net.layers()) == 2
+
+    in_mat = ncnn.Mat(1)
+    in_mat.fill(1.0)
+
+    ex = net.create_extractor()
+    ex.input("data", in_mat)
+    ret, out_mat = ex.extract("output")
+    assert ret == 0 and out_mat.dims == 1 and out_mat.w == 1 and out_mat[0] == 2.0
+
+    ex.clear()
+
+    net.clear()
+    assert len(net.blobs()) == 0 and len(net.layers()) == 0
+
+
+def test_vulkan_device_index():
+    if not hasattr(ncnn, "get_gpu_count"):
+        return
+
+    net = ncnn.Net()
+    assert net.vulkan_device() is None
+
+    net.set_vulkan_device(0)
+    assert net.vulkan_device() is not None
+
+
+def test_vulkan_device_vkdev():
+    if not hasattr(ncnn, "get_gpu_count"):
+        return
+
+    net = ncnn.Net()
+    assert net.vulkan_device() is None
+
+    vkdev = ncnn.get_gpu_device(0)
+    net.set_vulkan_device(vkdev)
+    assert net.vulkan_device() is not None
--- a/3rdparty/ncnn/python/tests/test_option.py
+++ b/3rdparty/ncnn/python/tests/test_option.py
@ -0,0 +1,139 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import pytest
+
+import ncnn
+
+
+def test_option():
+    allocator = ncnn.PoolAllocator()
+
+    opt = ncnn.Option()
+
+    opt.lightmode = True
+    assert opt.lightmode == True
+    opt.lightmode = False
+    assert opt.lightmode == False
+
+    assert opt.num_threads == ncnn.get_cpu_count()
+    opt.num_threads = 1
+    assert opt.num_threads == 1
+
+    assert opt.blob_allocator is None
+    opt.blob_allocator = allocator
+    assert opt.blob_allocator == allocator
+
+    assert opt.workspace_allocator is None
+    opt.workspace_allocator = allocator
+    assert opt.workspace_allocator == allocator
+
+    assert opt.openmp_blocktime == 20
+    opt.openmp_blocktime = 40
+    assert opt.openmp_blocktime == 40
+
+    opt.use_winograd_convolution = True
+    assert opt.use_winograd_convolution == True
+    opt.use_winograd_convolution = False
+    assert opt.use_winograd_convolution == False
+
+    opt.use_sgemm_convolution = True
+    assert opt.use_sgemm_convolution == True
+    opt.use_sgemm_convolution = False
+    assert opt.use_sgemm_convolution == False
+
+    opt.use_int8_inference = True
+    assert opt.use_int8_inference == True
+    opt.use_int8_inference = False
+    assert opt.use_int8_inference == False
+
+    opt.use_vulkan_compute = True
+    assert opt.use_vulkan_compute == True
+    opt.use_vulkan_compute = False
+    assert opt.use_vulkan_compute == False
+
+    opt.use_bf16_storage = True
+    assert opt.use_bf16_storage == True
+    opt.use_bf16_storage = False
+    assert opt.use_bf16_storage == False
+
+    opt.use_fp16_packed = True
+    assert opt.use_fp16_packed == True
+    opt.use_fp16_packed = False
+    assert opt.use_fp16_packed == False
+
+    opt.use_fp16_storage = True
+    assert opt.use_fp16_storage == True
+    opt.use_fp16_storage = False
+    assert opt.use_fp16_storage == False
+
+    opt.use_fp16_arithmetic = True
+    assert opt.use_fp16_arithmetic == True
+    opt.use_fp16_arithmetic = False
+    assert opt.use_fp16_arithmetic == False
+
+    opt.use_int8_packed = True
+    assert opt.use_int8_packed == True
+    opt.use_int8_packed = False
+    assert opt.use_int8_packed == False
+
+    opt.use_int8_storage = True
+    assert opt.use_int8_storage == True
+    opt.use_int8_storage = False
+    assert opt.use_int8_storage == False
+
+    opt.use_int8_arithmetic = True
+    assert opt.use_int8_arithmetic == True
+    opt.use_int8_arithmetic = False
+    assert opt.use_int8_arithmetic == False
+
+    opt.use_packing_layout = True
+    assert opt.use_packing_layout == True
+    opt.use_packing_layout = False
+    assert opt.use_packing_layout == False
+
+    opt.use_shader_pack8 = True
+    assert opt.use_shader_pack8 == True
+    opt.use_shader_pack8 = False
+    assert opt.use_shader_pack8 == False
+
+    opt.use_subgroup_basic = True
+    assert opt.use_subgroup_basic == True
+    opt.use_subgroup_basic = False
+    assert opt.use_subgroup_basic == False
+
+    opt.use_subgroup_vote = True
+    assert opt.use_subgroup_vote == True
+    opt.use_subgroup_vote = False
+    assert opt.use_subgroup_vote == False
+
+    opt.use_subgroup_ballot = True
+    assert opt.use_subgroup_ballot == True
+    opt.use_subgroup_ballot = False
+    assert opt.use_subgroup_ballot == False
+
+    opt.use_subgroup_shuffle = True
+    assert opt.use_subgroup_shuffle == True
+    opt.use_subgroup_shuffle = False
+    assert opt.use_subgroup_shuffle == False
+
+    opt.use_image_storage = True
+    assert opt.use_image_storage == True
+    opt.use_image_storage = False
+    assert opt.use_image_storage == False
+
+    opt.use_tensor_storage = True
+    assert opt.use_tensor_storage == True
+    opt.use_tensor_storage = False
+    assert opt.use_tensor_storage == False
--- a/3rdparty/ncnn/python/tests/test_paramdict.py
+++ b/3rdparty/ncnn/python/tests/test_paramdict.py
@ -0,0 +1,33 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import pytest
+
+import ncnn
+
+
+def test_paramdict():
+    pd = ncnn.ParamDict()
+    assert pd.type(0) == 0
+    assert pd.get(0, -1) == -1
+
+    pd.set(1, 1)
+    assert pd.type(1) == 2 and pd.get(1, -1) == 1
+
+    pd.set(2, 2.0)
+    assert pd.type(2) == 3 and pd.get(2, -2.0) == 2.0
+
+    mat = ncnn.Mat(1)
+    pd.set(3, mat)
+    assert pd.type(3) == 4 and pd.get(3, ncnn.Mat()).dims == mat.dims
--- a/3rdparty/ncnn/python/tests/test_vulkan_allocator.py
+++ b/3rdparty/ncnn/python/tests/test_vulkan_allocator.py
@ -0,0 +1,124 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import pytest
+
+import ncnn
+
+
+def test_vk_blob_allocator():
+    if not hasattr(ncnn, "get_gpu_count"):
+        return
+
+    vkdev = ncnn.get_gpu_device(0)
+    assert vkdev is not None
+    allocator = ncnn.VkBlobAllocator(vkdev)
+    assert allocator.buffer_memory_type_index >= 0
+    assert allocator.image_memory_type_index >= 0
+
+    mappable = allocator.mappable
+    allocator.mappable = not mappable
+    assert allocator.mappable == (not mappable)
+
+    coherent = allocator.coherent
+    allocator.coherent = not coherent
+    assert allocator.coherent == (not coherent)
+
+    bufmem = allocator.fastMalloc(10 * 1024)
+    assert bufmem is not None
+    allocator.fastFree(bufmem)
+
+    imgmem = allocator.fastMalloc(4, 4, 3, 4, 1)
+    assert imgmem is not None
+    allocator.fastFree(imgmem)
+
+
+def test_vk_weight_allocator():
+    if not hasattr(ncnn, "get_gpu_count"):
+        return
+
+    vkdev = ncnn.get_gpu_device(0)
+    assert vkdev is not None
+    allocator = ncnn.VkWeightAllocator(vkdev)
+    assert allocator.buffer_memory_type_index >= 0
+    assert allocator.image_memory_type_index >= 0
+
+    mappable = allocator.mappable
+    allocator.mappable = not mappable
+    assert allocator.mappable == (not mappable)
+
+    coherent = allocator.coherent
+    allocator.coherent = not coherent
+    assert allocator.coherent == (not coherent)
+
+    bufmem = allocator.fastMalloc(10 * 1024)
+    assert bufmem is not None
+    allocator.fastFree(bufmem)
+
+    imgmem = allocator.fastMalloc(4, 4, 3, 4, 1)
+    assert imgmem is not None
+    allocator.fastFree(imgmem)
+
+
+def test_vk_staging_allocator():
+    if not hasattr(ncnn, "get_gpu_count"):
+        return
+
+    vkdev = ncnn.get_gpu_device(0)
+    assert vkdev is not None
+    allocator = ncnn.VkStagingAllocator(vkdev)
+    assert allocator.buffer_memory_type_index >= 0
+    assert allocator.image_memory_type_index >= 0
+
+    mappable = allocator.mappable
+    allocator.mappable = not mappable
+    assert allocator.mappable == (not mappable)
+
+    coherent = allocator.coherent
+    allocator.coherent = not coherent
+    assert allocator.coherent == (not coherent)
+
+    bufmem = allocator.fastMalloc(10 * 1024)
+    assert bufmem is not None
+    allocator.fastFree(bufmem)
+
+    imgmem = allocator.fastMalloc(4, 4, 3, 4, 1)
+    assert imgmem is not None
+    allocator.fastFree(imgmem)
+
+
+def test_vk_weight_staging_allocator():
+    if not hasattr(ncnn, "get_gpu_count"):
+        return
+
+    vkdev = ncnn.get_gpu_device(0)
+    assert vkdev is not None
+    allocator = ncnn.VkWeightStagingAllocator(vkdev)
+    assert allocator.buffer_memory_type_index >= 0
+    assert allocator.image_memory_type_index >= 0
+
+    mappable = allocator.mappable
+    allocator.mappable = not mappable
+    assert allocator.mappable == (not mappable)
+
+    coherent = allocator.coherent
+    allocator.coherent = not coherent
+    assert allocator.coherent == (not coherent)
+
+    bufmem = allocator.fastMalloc(10 * 1024)
+    assert bufmem is not None
+    allocator.fastFree(bufmem)
+
+    imgmem = allocator.fastMalloc(4, 4, 3, 4, 1)
+    assert imgmem is None
--- a/3rdparty/ncnn/python/tests/test_vulkan_device.py
+++ b/3rdparty/ncnn/python/tests/test_vulkan_device.py
@ -0,0 +1,55 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import pytest
+
+import ncnn
+
+
+def check_gpuinfo(gpuinfo):
+    assert gpuinfo.api_version() > 0
+    assert gpuinfo.driver_version() > 0
+    assert gpuinfo.vendor_id() > 0
+    assert gpuinfo.device_id() > 0
+    assert gpuinfo.pipeline_cache_uuid() is not None
+    assert gpuinfo.type() >= 0
+
+
+def test_gpu_api():
+    if not hasattr(ncnn, "get_gpu_count"):
+        return
+
+    assert ncnn.create_gpu_instance() == 0
+    assert ncnn.get_gpu_count() > 0
+    assert ncnn.get_default_gpu_index() >= 0
+
+    gpuinfo = ncnn.get_gpu_info(0)
+    check_gpuinfo(gpuinfo)
+
+    vkdev = ncnn.get_gpu_device(0)
+    assert vkdev is not None
+    gpuinfo = vkdev.info()
+    check_gpuinfo(gpuinfo)
+
+    ncnn.destroy_gpu_instance()
+
+
+def test_vulkan_device():
+    if not hasattr(ncnn, "get_gpu_count"):
+        return
+
+    vkdev = ncnn.VulkanDevice(0)
+    assert vkdev is not None
+    gpuinfo = vkdev.info()
+    check_gpuinfo(gpuinfo)