feat: 切换后端至PaddleOCR-NCNN，切换工程为CMake

1.项目后端整体迁移至PaddleOCR-NCNN算法，已通过基本的兼容性测试 2.工程改为使用CMake组织，后续为了更好地兼容第三方库，不再提供QMake工程 3.重整权利声明文件，重整代码工程，确保最小化侵权风险 Log: 切换后端至PaddleOCR-NCNN，切换工程为CMake Change-Id: I4d5d2c5d37505a4a24b389b1a4c5d12f17bfa38c
2022-05-10 09:54:44 +08:00
parent ecdd171c6f
commit 718c41634f
10018 changed files with 3593797 additions and 186748 deletions
--- a/3rdparty/opencv-4.5.4/samples/dnn/scene_text_recognition.cpp
+++ b/3rdparty/opencv-4.5.4/samples/dnn/scene_text_recognition.cpp
@ -0,0 +1,144 @@
+#include <iostream>
+#include <fstream>
+
+#include <opencv2/imgproc.hpp>
+#include <opencv2/highgui.hpp>
+#include <opencv2/dnn/dnn.hpp>
+
+using namespace cv;
+using namespace cv::dnn;
+
+String keys =
+        "{ help  h                          | | Print help message. }"
+        "{ inputImage i                     | | Path to an input image. Skip this argument to capture frames from a camera. }"
+        "{ modelPath mp                     | | Path to a binary .onnx file contains trained CRNN text recognition model. "
+            "Download links are provided in doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown}"
+        "{ RGBInput rgb                     |0| 0: imread with flags=IMREAD_GRAYSCALE; 1: imread with flags=IMREAD_COLOR. }"
+        "{ evaluate e                       |false| false: predict with input images; true: evaluate on benchmarks. }"
+        "{ evalDataPath edp                 | | Path to benchmarks for evaluation. "
+            "Download links are provided in doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown}"
+        "{ vocabularyPath vp                | alphabet_36.txt | Path to recognition vocabulary. "
+            "Download links are provided in doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown}";
+
+String convertForEval(String &input);
+
+int main(int argc, char** argv)
+{
+    // Parse arguments
+    CommandLineParser parser(argc, argv, keys);
+    parser.about("Use this script to run the PyTorch implementation of "
+                 "An End-to-End Trainable Neural Network for Image-based SequenceRecognition and Its Application to Scene Text Recognition "
+                 "(https://arxiv.org/abs/1507.05717)");
+    if (argc == 1 || parser.has("help"))
+    {
+        parser.printMessage();
+        return 0;
+    }
+
+    String modelPath = parser.get<String>("modelPath");
+    String vocPath = parser.get<String>("vocabularyPath");
+    int imreadRGB = parser.get<int>("RGBInput");
+
+    if (!parser.check())
+    {
+        parser.printErrors();
+        return 1;
+    }
+
+    // Load the network
+    CV_Assert(!modelPath.empty());
+    TextRecognitionModel recognizer(modelPath);
+
+    // Load vocabulary
+    CV_Assert(!vocPath.empty());
+    std::ifstream vocFile;
+    vocFile.open(samples::findFile(vocPath));
+    CV_Assert(vocFile.is_open());
+    String vocLine;
+    std::vector<String> vocabulary;
+    while (std::getline(vocFile, vocLine)) {
+        vocabulary.push_back(vocLine);
+    }
+    recognizer.setVocabulary(vocabulary);
+    recognizer.setDecodeType("CTC-greedy");
+
+    // Set parameters
+    double scale = 1.0 / 127.5;
+    Scalar mean = Scalar(127.5, 127.5, 127.5);
+    Size inputSize = Size(100, 32);
+    recognizer.setInputParams(scale, inputSize, mean);
+
+    if (parser.get<bool>("evaluate"))
+    {
+        // For evaluation
+        String evalDataPath = parser.get<String>("evalDataPath");
+        CV_Assert(!evalDataPath.empty());
+        String gtPath = evalDataPath + "/test_gts.txt";
+        std::ifstream evalGts;
+        evalGts.open(gtPath);
+        CV_Assert(evalGts.is_open());
+
+        String gtLine;
+        int cntRight=0, cntAll=0;
+        TickMeter timer;
+        timer.reset();
+
+        while (std::getline(evalGts, gtLine)) {
+            size_t splitLoc = gtLine.find_first_of(' ');
+            String imgPath = evalDataPath + '/' + gtLine.substr(0, splitLoc);
+            String gt = gtLine.substr(splitLoc+1);
+
+            // Inference
+            Mat frame = imread(samples::findFile(imgPath), imreadRGB);
+            CV_Assert(!frame.empty());
+            timer.start();
+            std::string recognitionResult = recognizer.recognize(frame);
+            timer.stop();
+
+            if (gt == convertForEval(recognitionResult))
+                cntRight++;
+
+            cntAll++;
+        }
+        std::cout << "Accuracy(%): " << (double)(cntRight) / (double)(cntAll) << std::endl;
+        std::cout << "Average Inference Time(ms): " << timer.getTimeMilli() / (double)(cntAll) << std::endl;
+    }
+    else
+    {
+        // Create a window
+        static const std::string winName = "Input Cropped Image";
+
+        // Open an image file
+        CV_Assert(parser.has("inputImage"));
+        Mat frame = imread(samples::findFile(parser.get<String>("inputImage")), imreadRGB);
+        CV_Assert(!frame.empty());
+
+        // Recognition
+        std::string recognitionResult = recognizer.recognize(frame);
+
+        imshow(winName, frame);
+        std::cout << "Predition: '" << recognitionResult << "'" << std::endl;
+        waitKey();
+    }
+
+    return 0;
+}
+
+// Convert the predictions to lower case, and remove other characters.
+// Only for Evaluation
+String convertForEval(String & input)
+{
+    String output;
+    for (uint i = 0; i < input.length(); i++){
+        char ch = input[i];
+        if ((int)ch >= 97 && (int)ch <= 122) {
+            output.push_back(ch);
+        } else if ((int)ch >= 65 && (int)ch <= 90) {
+            output.push_back((char)(ch + 32));
+        } else {
+            continue;
+        }
+    }
+
+    return output;
+}