feat: 切换后端至PaddleOCR-NCNN，切换工程为CMake

1.项目后端整体迁移至PaddleOCR-NCNN算法，已通过基本的兼容性测试 2.工程改为使用CMake组织，后续为了更好地兼容第三方库，不再提供QMake工程 3.重整权利声明文件，重整代码工程，确保最小化侵权风险 Log: 切换后端至PaddleOCR-NCNN，切换工程为CMake Change-Id: I4d5d2c5d37505a4a24b389b1a4c5d12f17bfa38c
2022-05-10 09:54:44 +08:00
parent ecdd171c6f
commit 718c41634f
10018 changed files with 3593797 additions and 186748 deletions
--- a/3rdparty/ncnn/python/tests/benchmark.py
+++ b/3rdparty/ncnn/python/tests/benchmark.py
@ -0,0 +1,192 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import sys
+import time
+import ncnn
+
+param_root = "../benchmark/"
+
+g_warmup_loop_count = 8
+g_loop_count = 4
+g_enable_cooling_down = True
+
+g_vkdev = None
+g_blob_vkallocator = None
+g_staging_vkallocator = None
+
+g_blob_pool_allocator = ncnn.UnlockedPoolAllocator()
+g_workspace_pool_allocator = ncnn.PoolAllocator()
+
+
+def benchmark(comment, _in, opt):
+    _in.fill(0.01)
+
+    g_blob_pool_allocator.clear()
+    g_workspace_pool_allocator.clear()
+
+    if opt.use_vulkan_compute:
+        g_blob_vkallocator.clear()
+        g_staging_vkallocator.clear()
+
+    net = ncnn.Net()
+    net.opt = opt
+
+    if net.opt.use_vulkan_compute:
+        net.set_vulkan_device(g_vkdev)
+
+    net.load_param(param_root + comment + ".param")
+
+    dr = ncnn.DataReaderFromEmpty()
+    net.load_model(dr)
+
+    input_names = net.input_names()
+    output_names = net.output_names()
+
+    if g_enable_cooling_down:
+        time.sleep(10)
+
+    # warm up
+    for i in range(g_warmup_loop_count):
+        # test with statement
+        with net.create_extractor() as ex:
+            ex.input(input_names[0], _in)
+            ex.extract(output_names[0])
+
+    time_min = sys.float_info.max
+    time_max = -sys.float_info.max
+    time_avg = 0.0
+
+    for i in range(g_loop_count):
+        start = time.time()
+
+        # test net keep alive until ex freed
+        ex = net.create_extractor()
+        ex.input(input_names[0], _in)
+        ex.extract(output_names[0])
+
+        end = time.time()
+
+        timespan = end - start
+
+        time_min = timespan if timespan < time_min else time_min
+        time_max = timespan if timespan > time_max else time_max
+        time_avg += timespan
+
+    time_avg /= g_loop_count
+
+    print(
+        "%20s  min = %7.2f  max = %7.2f  avg = %7.2f"
+        % (comment, time_min * 1000, time_max * 1000, time_avg * 1000)
+    )
+
+
+if __name__ == "__main__":
+    loop_count = 4
+    num_threads = ncnn.get_cpu_count()
+    powersave = 0
+    gpu_device = -1
+    cooling_down = 1
+
+    argc = len(sys.argv)
+    if argc >= 2:
+        loop_count = int(sys.argv[1])
+    if argc >= 3:
+        num_threads = int(sys.argv[2])
+    if argc >= 4:
+        powersave = int(sys.argv[3])
+    if argc >= 5:
+        gpu_device = int(sys.argv[4])
+    if argc >= 6:
+        cooling_down = int(sys.argv[5])
+
+    use_vulkan_compute = gpu_device != -1
+
+    g_enable_cooling_down = cooling_down != 0
+
+    g_loop_count = loop_count
+
+    g_blob_pool_allocator.set_size_compare_ratio(0.0)
+    g_workspace_pool_allocator.set_size_compare_ratio(0.5)
+
+    if use_vulkan_compute:
+        g_warmup_loop_count = 10
+
+        g_vkdev = ncnn.get_gpu_device(gpu_device)
+
+        g_blob_vkallocator = ncnn.VkBlobAllocator(g_vkdev)
+        g_staging_vkallocator = ncnn.VkStagingAllocator(g_vkdev)
+
+    opt = ncnn.Option()
+    opt.lightmode = True
+    opt.num_threads = num_threads
+    opt.blob_allocator = g_blob_pool_allocator
+    opt.workspace_allocator = g_workspace_pool_allocator
+    if use_vulkan_compute:
+        opt.blob_vkallocator = g_blob_vkallocator
+        opt.workspace_vkallocator = g_blob_vkallocator
+        opt.staging_vkallocator = g_staging_vkallocator
+    opt.use_winograd_convolution = True
+    opt.use_sgemm_convolution = True
+    opt.use_int8_inference = True
+    opt.use_vulkan_compute = use_vulkan_compute
+    opt.use_fp16_packed = True
+    opt.use_fp16_storage = True
+    opt.use_fp16_arithmetic = True
+    opt.use_int8_storage = True
+    opt.use_int8_arithmetic = True
+    opt.use_packing_layout = True
+    opt.use_shader_pack8 = False
+    opt.use_image_storage = False
+
+    ncnn.set_cpu_powersave(powersave)
+    ncnn.set_omp_dynamic(0)
+    ncnn.set_omp_num_threads(num_threads)
+
+    print("loop_count =", loop_count)
+    print("num_threads =", num_threads)
+    print("powersave =", ncnn.get_cpu_powersave())
+    print("gpu_device =", gpu_device)
+    print("cooling_down =", g_enable_cooling_down)
+
+    benchmark("squeezenet", ncnn.Mat((227, 227, 3)), opt)
+    benchmark("squeezenet_int8", ncnn.Mat((227, 227, 3)), opt)
+    benchmark("mobilenet", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("mobilenet_int8", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("mobilenet_v2", ncnn.Mat((224, 224, 3)), opt)
+    # benchmark("mobilenet_v2_int8", ncnn.Mat(w=224, h=224, c=3), opt)
+    benchmark("mobilenet_v3", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("shufflenet", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("shufflenet_v2", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("mnasnet", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("proxylessnasnet", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("efficientnet_b0", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("regnety_400m", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("blazeface", ncnn.Mat((128, 128, 3)), opt)
+    benchmark("googlenet", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("googlenet_int8", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("resnet18", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("resnet18_int8", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("alexnet", ncnn.Mat((227, 227, 3)), opt)
+    benchmark("vgg16", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("vgg16_int8", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("resnet50", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("resnet50_int8", ncnn.Mat((224, 224, 3)), opt)
+    benchmark("squeezenet_ssd", ncnn.Mat((300, 300, 3)), opt)
+    benchmark("squeezenet_ssd_int8", ncnn.Mat((300, 300, 3)), opt)
+    benchmark("mobilenet_ssd", ncnn.Mat((300, 300, 3)), opt)
+    benchmark("mobilenet_ssd_int8", ncnn.Mat((300, 300, 3)), opt)
+    benchmark("mobilenet_yolo", ncnn.Mat((416, 416, 3)), opt)
+    benchmark("mobilenetv2_yolov3", ncnn.Mat((352, 352, 3)), opt)
+    benchmark("yolov4-tiny", ncnn.Mat((416, 416, 3)), opt)