feat: 切换后端至PaddleOCR-NCNN，切换工程为CMake

1.项目后端整体迁移至PaddleOCR-NCNN算法，已通过基本的兼容性测试 2.工程改为使用CMake组织，后续为了更好地兼容第三方库，不再提供QMake工程 3.重整权利声明文件，重整代码工程，确保最小化侵权风险 Log: 切换后端至PaddleOCR-NCNN，切换工程为CMake Change-Id: I4d5d2c5d37505a4a24b389b1a4c5d12f17bfa38c
2022-05-10 09:54:44 +08:00
parent ecdd171c6f
commit 718c41634f
10018 changed files with 3593797 additions and 186748 deletions
--- a/3rdparty/opencv-4.5.4/samples/gpu/CMakeLists.txt
+++ b/3rdparty/opencv-4.5.4/samples/gpu/CMakeLists.txt
@ -0,0 +1,61 @@
+ocv_install_example_src(gpu *.cpp *.hpp CMakeLists.txt)
+
+set(OPENCV_CUDA_SAMPLES_REQUIRED_DEPS
+  opencv_core
+  opencv_flann
+  opencv_imgproc
+  opencv_imgcodecs
+  opencv_videoio
+  opencv_highgui
+  opencv_ml
+  opencv_video
+  opencv_objdetect
+  opencv_features2d
+  opencv_calib3d
+  opencv_superres
+  opencv_cudaarithm
+  opencv_cudafilters
+  opencv_cudawarping
+  opencv_cudaimgproc
+  opencv_cudafeatures2d
+  opencv_cudaoptflow
+  opencv_cudabgsegm
+  opencv_cudastereo
+  opencv_cudaobjdetect)
+ocv_check_dependencies(${OPENCV_CUDA_SAMPLES_REQUIRED_DEPS})
+
+if(NOT BUILD_EXAMPLES OR NOT OCV_DEPENDENCIES_FOUND)
+  return()
+endif()
+
+project(gpu_samples)
+if(HAVE_CUDA OR CUDA_FOUND)
+  add_definitions(-DHAVE_CUDA=1)
+endif()
+if(COMMAND ocv_warnings_disable)
+  ocv_warnings_disable(CMAKE_CXX_FLAGS -Wsuggest-override -Winconsistent-missing-override)
+endif()
+ocv_include_modules_recurse(${OPENCV_CUDA_SAMPLES_REQUIRED_DEPS})
+if(HAVE_opencv_xfeatures2d)
+  ocv_include_modules_recurse(opencv_xfeatures2d)
+endif()
+if(HAVE_opencv_cudacodec)
+  ocv_include_modules_recurse(opencv_cudacodec)
+endif()
+if(HAVE_CUDA)
+  ocv_include_directories(${CUDA_INCLUDE_DIRS})
+endif()
+if((CV_GCC OR CV_CLANG) AND NOT ENABLE_NOISY_WARNINGS)
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function")
+endif()
+file(GLOB all_samples RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp)
+foreach(sample_filename ${all_samples})
+  ocv_define_sample(tgt ${sample_filename} gpu)
+  ocv_target_link_libraries(${tgt} PRIVATE ${OPENCV_LINKER_LIBS} ${OPENCV_CUDA_SAMPLES_REQUIRED_DEPS})
+  if(HAVE_opencv_xfeatures2d)
+    ocv_target_link_libraries(${tgt} PRIVATE opencv_xfeatures2d)
+  endif()
+  if(HAVE_opencv_cudacodec)
+    ocv_target_link_libraries(${tgt} PRIVATE opencv_cudacodec)
+  endif()
+endforeach()
--- a/3rdparty/opencv-4.5.4/samples/gpu/alpha_comp.cpp
+++ b/3rdparty/opencv-4.5.4/samples/gpu/alpha_comp.cpp
@ -0,0 +1,68 @@
+#include <iostream>
+
+#include "opencv2/core/opengl.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/cudaimgproc.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace cv::cuda;
+
+int main()
+{
+    cout << "This program demonstrates using alphaComp" << endl;
+    cout << "Press SPACE to change compositing operation" << endl;
+    cout << "Press ESC to exit" << endl;
+
+    namedWindow("First Image", WINDOW_NORMAL);
+    namedWindow("Second Image", WINDOW_NORMAL);
+    namedWindow("Result", WINDOW_OPENGL);
+
+    setGlDevice();
+
+    Mat src1(640, 480, CV_8UC4, Scalar::all(0));
+    Mat src2(640, 480, CV_8UC4, Scalar::all(0));
+
+    rectangle(src1, Rect(50, 50, 200, 200), Scalar(0, 0, 255, 128), 30);
+    rectangle(src2, Rect(100, 100, 200, 200), Scalar(255, 0, 0, 128), 30);
+
+    GpuMat d_src1(src1);
+    GpuMat d_src2(src2);
+
+    GpuMat d_res;
+
+    imshow("First Image", src1);
+    imshow("Second Image", src2);
+
+    int alpha_op = ALPHA_OVER;
+
+    const char* op_names[] =
+    {
+        "ALPHA_OVER", "ALPHA_IN", "ALPHA_OUT", "ALPHA_ATOP", "ALPHA_XOR", "ALPHA_PLUS", "ALPHA_OVER_PREMUL", "ALPHA_IN_PREMUL", "ALPHA_OUT_PREMUL",
+        "ALPHA_ATOP_PREMUL", "ALPHA_XOR_PREMUL", "ALPHA_PLUS_PREMUL", "ALPHA_PREMUL"
+    };
+
+    for(;;)
+    {
+        cout << op_names[alpha_op] << endl;
+
+        alphaComp(d_src1, d_src2, d_res, alpha_op);
+
+        imshow("Result", d_res);
+
+        char key = static_cast<char>(waitKey());
+
+        if (key == 27)
+            break;
+
+        if (key == 32)
+        {
+            ++alpha_op;
+
+            if (alpha_op > ALPHA_PREMUL)
+                alpha_op = ALPHA_OVER;
+        }
+    }
+
+    return 0;
+}
--- a/3rdparty/opencv-4.5.4/samples/gpu/bgfg_segm.cpp
+++ b/3rdparty/opencv-4.5.4/samples/gpu/bgfg_segm.cpp
@ -0,0 +1,143 @@
+#include <iostream>
+#include <string>
+
+#include "opencv2/core.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/cudabgsegm.hpp"
+#include "opencv2/video.hpp"
+#include "opencv2/highgui.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace cv::cuda;
+
+enum Method
+{
+    MOG,
+    MOG2,
+};
+
+int main(int argc, const char** argv)
+{
+    cv::CommandLineParser cmd(argc, argv,
+        "{ c camera |                    | use camera }"
+        "{ f file   | ../data/vtest.avi  | input video file }"
+        "{ m method | mog                | method (mog, mog2) }"
+        "{ h help   |                    | print help message }");
+
+    if (cmd.has("help") || !cmd.check())
+    {
+        cmd.printMessage();
+        cmd.printErrors();
+        return 0;
+    }
+
+    bool useCamera = cmd.has("camera");
+    string file = cmd.get<string>("file");
+    string method = cmd.get<string>("method");
+
+    if (method != "mog"
+        && method != "mog2")
+    {
+        cerr << "Incorrect method" << endl;
+        return -1;
+    }
+
+    Method m = method == "mog" ? MOG :
+               method == "mog2" ? MOG2 :
+                                  (Method)-1;
+    CV_Assert(m != (Method)-1);
+
+    VideoCapture cap;
+
+    if (useCamera)
+        cap.open(0);
+    else
+        cap.open(file);
+
+    if (!cap.isOpened())
+    {
+        cerr << "can not open camera or video file" << endl;
+        return -1;
+    }
+
+    Mat frame;
+    cap >> frame;
+
+    GpuMat d_frame(frame);
+
+    Ptr<BackgroundSubtractor> mog = cuda::createBackgroundSubtractorMOG();
+    Ptr<BackgroundSubtractor> mog2 = cuda::createBackgroundSubtractorMOG2();
+
+    GpuMat d_fgmask;
+    GpuMat d_fgimg;
+    GpuMat d_bgimg;
+
+    Mat fgmask;
+    Mat fgimg;
+    Mat bgimg;
+
+    switch (m)
+    {
+    case MOG:
+        mog->apply(d_frame, d_fgmask, 0.01);
+        break;
+
+    case MOG2:
+        mog2->apply(d_frame, d_fgmask);
+        break;
+    }
+
+    namedWindow("image", WINDOW_NORMAL);
+    namedWindow("foreground mask", WINDOW_NORMAL);
+    namedWindow("foreground image", WINDOW_NORMAL);
+    namedWindow("mean background image", WINDOW_NORMAL);
+
+    for(;;)
+    {
+        cap >> frame;
+        if (frame.empty())
+            break;
+        d_frame.upload(frame);
+
+        int64 start = cv::getTickCount();
+
+        //update the model
+        switch (m)
+        {
+        case MOG:
+            mog->apply(d_frame, d_fgmask, 0.01);
+            mog->getBackgroundImage(d_bgimg);
+            break;
+
+        case MOG2:
+            mog2->apply(d_frame, d_fgmask);
+            mog2->getBackgroundImage(d_bgimg);
+            break;
+        }
+
+        double fps = cv::getTickFrequency() / (cv::getTickCount() - start);
+        std::cout << "FPS : " << fps << std::endl;
+
+        d_fgimg.create(d_frame.size(), d_frame.type());
+        d_fgimg.setTo(Scalar::all(0));
+        d_frame.copyTo(d_fgimg, d_fgmask);
+
+        d_fgmask.download(fgmask);
+        d_fgimg.download(fgimg);
+        if (!d_bgimg.empty())
+            d_bgimg.download(bgimg);
+
+        imshow("image", frame);
+        imshow("foreground mask", fgmask);
+        imshow("foreground image", fgimg);
+        if (!bgimg.empty())
+            imshow("mean background image", bgimg);
+
+        char key = (char)waitKey(30);
+        if (key == 27)
+            break;
+    }
+
+    return 0;
+}
--- a/3rdparty/opencv-4.5.4/samples/gpu/cascadeclassifier.cpp
+++ b/3rdparty/opencv-4.5.4/samples/gpu/cascadeclassifier.cpp
@ -0,0 +1,316 @@
+// WARNING: this sample is under construction! Use it on your own risk.
+#if defined _MSC_VER && _MSC_VER >= 1400
+#pragma warning(disable : 4100)
+#endif
+
+
+#include <iostream>
+#include <iomanip>
+#include "opencv2/objdetect.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/cudaobjdetect.hpp"
+#include "opencv2/cudaimgproc.hpp"
+#include "opencv2/cudawarping.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace cv::cuda;
+
+
+static void help()
+{
+    cout << "Usage: ./cascadeclassifier \n\t--cascade <cascade_file>\n\t(<image>|--video <video>|--camera <camera_id>)\n"
+            "Using OpenCV version " << CV_VERSION << endl << endl;
+}
+
+
+static void convertAndResize(const Mat& src, Mat& gray, Mat& resized, double scale)
+{
+    if (src.channels() == 3)
+    {
+        cv::cvtColor( src, gray, COLOR_BGR2GRAY );
+    }
+    else
+    {
+        gray = src;
+    }
+
+    Size sz(cvRound(gray.cols * scale), cvRound(gray.rows * scale));
+
+    if (scale != 1)
+    {
+        cv::resize(gray, resized, sz);
+    }
+    else
+    {
+        resized = gray;
+    }
+}
+
+static void convertAndResize(const GpuMat& src, GpuMat& gray, GpuMat& resized, double scale)
+{
+    if (src.channels() == 3)
+    {
+        cv::cuda::cvtColor( src, gray, COLOR_BGR2GRAY );
+    }
+    else
+    {
+        gray = src;
+    }
+
+    Size sz(cvRound(gray.cols * scale), cvRound(gray.rows * scale));
+
+    if (scale != 1)
+    {
+        cv::cuda::resize(gray, resized, sz);
+    }
+    else
+    {
+        resized = gray;
+    }
+}
+
+
+static void matPrint(Mat &img, int lineOffsY, Scalar fontColor, const string &ss)
+{
+    int fontFace = FONT_HERSHEY_DUPLEX;
+    double fontScale = 0.8;
+    int fontThickness = 2;
+    Size fontSize = cv::getTextSize("T[]", fontFace, fontScale, fontThickness, 0);
+
+    Point org;
+    org.x = 1;
+    org.y = 3 * fontSize.height * (lineOffsY + 1) / 2;
+    putText(img, ss, org, fontFace, fontScale, Scalar(0,0,0), 5*fontThickness/2, 16);
+    putText(img, ss, org, fontFace, fontScale, fontColor, fontThickness, 16);
+}
+
+
+static void displayState(Mat &canvas, bool bHelp, bool bGpu, bool bLargestFace, bool bFilter, double fps)
+{
+    Scalar fontColorRed = Scalar(255,0,0);
+    Scalar fontColorNV  = Scalar(118,185,0);
+
+    ostringstream ss;
+    ss << "FPS = " << setprecision(1) << fixed << fps;
+    matPrint(canvas, 0, fontColorRed, ss.str());
+    ss.str("");
+    ss << "[" << canvas.cols << "x" << canvas.rows << "], " <<
+        (bGpu ? "GPU, " : "CPU, ") <<
+        (bLargestFace ? "OneFace, " : "MultiFace, ") <<
+        (bFilter ? "Filter:ON" : "Filter:OFF");
+    matPrint(canvas, 1, fontColorRed, ss.str());
+
+    // by Anatoly. MacOS fix. ostringstream(const string&) is a private
+    // matPrint(canvas, 2, fontColorNV, ostringstream("Space - switch GPU / CPU"));
+    if (bHelp)
+    {
+        matPrint(canvas, 2, fontColorNV, "Space - switch GPU / CPU");
+        matPrint(canvas, 3, fontColorNV, "M - switch OneFace / MultiFace");
+        matPrint(canvas, 4, fontColorNV, "F - toggle rectangles Filter");
+        matPrint(canvas, 5, fontColorNV, "H - toggle hotkeys help");
+        matPrint(canvas, 6, fontColorNV, "1/Q - increase/decrease scale");
+    }
+    else
+    {
+        matPrint(canvas, 2, fontColorNV, "H - toggle hotkeys help");
+    }
+}
+
+
+int main(int argc, const char *argv[])
+{
+    if (argc == 1)
+    {
+        help();
+        return -1;
+    }
+
+    if (getCudaEnabledDeviceCount() == 0)
+    {
+        return cerr << "No GPU found or the library is compiled without CUDA support" << endl, -1;
+    }
+
+    cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice());
+
+    string cascadeName;
+    string inputName;
+    bool isInputImage = false;
+    bool isInputVideo = false;
+    bool isInputCamera = false;
+
+    for (int i = 1; i < argc; ++i)
+    {
+        if (string(argv[i]) == "--cascade")
+            cascadeName = argv[++i];
+        else if (string(argv[i]) == "--video")
+        {
+            inputName = argv[++i];
+            isInputVideo = true;
+        }
+        else if (string(argv[i]) == "--camera")
+        {
+            inputName = argv[++i];
+            isInputCamera = true;
+        }
+        else if (string(argv[i]) == "--help")
+        {
+            help();
+            return -1;
+        }
+        else if (!isInputImage)
+        {
+            inputName = argv[i];
+            isInputImage = true;
+        }
+        else
+        {
+            cout << "Unknown key: " << argv[i] << endl;
+            return -1;
+        }
+    }
+
+    Ptr<cuda::CascadeClassifier> cascade_gpu = cuda::CascadeClassifier::create(cascadeName);
+
+    cv::CascadeClassifier cascade_cpu;
+    if (!cascade_cpu.load(cascadeName))
+    {
+        return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName << "\"" << endl, help(), -1;
+    }
+
+    VideoCapture capture;
+    Mat image;
+
+    if (isInputImage)
+    {
+        image = imread(inputName);
+        CV_Assert(!image.empty());
+    }
+    else if (isInputVideo)
+    {
+        capture.open(inputName);
+        CV_Assert(capture.isOpened());
+    }
+    else
+    {
+        capture.open(atoi(inputName.c_str()));
+        CV_Assert(capture.isOpened());
+    }
+
+    namedWindow("result", 1);
+
+    Mat frame, frame_cpu, gray_cpu, resized_cpu, frameDisp;
+    vector<Rect> faces;
+
+    GpuMat frame_gpu, gray_gpu, resized_gpu, facesBuf_gpu;
+
+    /* parameters */
+    bool useGPU = true;
+    double scaleFactor = 1.0;
+    bool findLargestObject = false;
+    bool filterRects = true;
+    bool helpScreen = false;
+
+    for (;;)
+    {
+        if (isInputCamera || isInputVideo)
+        {
+            capture >> frame;
+            if (frame.empty())
+            {
+                break;
+            }
+        }
+
+        (image.empty() ? frame : image).copyTo(frame_cpu);
+        frame_gpu.upload(image.empty() ? frame : image);
+
+        convertAndResize(frame_gpu, gray_gpu, resized_gpu, scaleFactor);
+        convertAndResize(frame_cpu, gray_cpu, resized_cpu, scaleFactor);
+
+        TickMeter tm;
+        tm.start();
+
+        if (useGPU)
+        {
+            cascade_gpu->setFindLargestObject(findLargestObject);
+            cascade_gpu->setScaleFactor(1.2);
+            cascade_gpu->setMinNeighbors((filterRects || findLargestObject) ? 4 : 0);
+
+            cascade_gpu->detectMultiScale(resized_gpu, facesBuf_gpu);
+            cascade_gpu->convert(facesBuf_gpu, faces);
+        }
+        else
+        {
+            Size minSize = cascade_gpu->getClassifierSize();
+            cascade_cpu.detectMultiScale(resized_cpu, faces, 1.2,
+                                         (filterRects || findLargestObject) ? 4 : 0,
+                                         (findLargestObject ? CASCADE_FIND_BIGGEST_OBJECT : 0)
+                                            | CASCADE_SCALE_IMAGE,
+                                         minSize);
+        }
+
+        for (size_t i = 0; i < faces.size(); ++i)
+        {
+            rectangle(resized_cpu, faces[i], Scalar(255));
+        }
+
+        tm.stop();
+        double detectionTime = tm.getTimeMilli();
+        double fps = 1000 / detectionTime;
+
+        //print detections to console
+        cout << setfill(' ') << setprecision(2);
+        cout << setw(6) << fixed << fps << " FPS, " << faces.size() << " det";
+        if ((filterRects || findLargestObject) && !faces.empty())
+        {
+            for (size_t i = 0; i < faces.size(); ++i)
+            {
+                cout << ", [" << setw(4) << faces[i].x
+                     << ", " << setw(4) << faces[i].y
+                     << ", " << setw(4) << faces[i].width
+                     << ", " << setw(4) << faces[i].height << "]";
+            }
+        }
+        cout << endl;
+
+        cv::cvtColor(resized_cpu, frameDisp, COLOR_GRAY2BGR);
+        displayState(frameDisp, helpScreen, useGPU, findLargestObject, filterRects, fps);
+        imshow("result", frameDisp);
+
+        char key = (char)waitKey(5);
+        if (key == 27)
+        {
+            break;
+        }
+
+        switch (key)
+        {
+        case ' ':
+            useGPU = !useGPU;
+            break;
+        case 'm':
+        case 'M':
+            findLargestObject = !findLargestObject;
+            break;
+        case 'f':
+        case 'F':
+            filterRects = !filterRects;
+            break;
+        case '1':
+            scaleFactor *= 1.05;
+            break;
+        case 'q':
+        case 'Q':
+            scaleFactor /= 1.05;
+            break;
+        case 'h':
+        case 'H':
+            helpScreen = !helpScreen;
+            break;
+        }
+    }
+
+    return 0;
+}
--- a/3rdparty/opencv-4.5.4/samples/gpu/farneback_optical_flow.cpp
+++ b/3rdparty/opencv-4.5.4/samples/gpu/farneback_optical_flow.cpp
@ -0,0 +1,140 @@
+#include <iostream>
+#include <vector>
+#include <sstream>
+#include <cmath>
+
+#include "opencv2/core.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/video.hpp"
+#include "opencv2/cudaoptflow.hpp"
+#include "opencv2/cudaarithm.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace cv::cuda;
+
+template <typename T>
+inline T mapVal(T x, T a, T b, T c, T d)
+{
+    x = ::max(::min(x, b), a);
+    return c + (d-c) * (x-a) / (b-a);
+}
+
+static void colorizeFlow(const Mat &u, const Mat &v, Mat &dst)
+{
+    double uMin, uMax;
+    cv::minMaxLoc(u, &uMin, &uMax, 0, 0);
+    double vMin, vMax;
+    cv::minMaxLoc(v, &vMin, &vMax, 0, 0);
+    uMin = ::abs(uMin); uMax = ::abs(uMax);
+    vMin = ::abs(vMin); vMax = ::abs(vMax);
+    float dMax = static_cast<float>(::max(::max(uMin, uMax), ::max(vMin, vMax)));
+
+    dst.create(u.size(), CV_8UC3);
+    for (int y = 0; y < u.rows; ++y)
+    {
+        for (int x = 0; x < u.cols; ++x)
+        {
+            dst.at<uchar>(y,3*x) = 0;
+            dst.at<uchar>(y,3*x+1) = (uchar)mapVal(-v.at<float>(y,x), -dMax, dMax, 0.f, 255.f);
+            dst.at<uchar>(y,3*x+2) = (uchar)mapVal(u.at<float>(y,x), -dMax, dMax, 0.f, 255.f);
+        }
+    }
+}
+
+int main(int argc, char **argv)
+{
+    CommandLineParser cmd(argc, argv,
+            "{ l left  | ../data/basketball1.png | specify left image }"
+            "{ r right | ../data/basketball2.png | specify right image }"
+            "{ h help  | | print help message }");
+
+    cmd.about("Farneback's optical flow sample.");
+    if (cmd.has("help") || !cmd.check())
+    {
+        cmd.printMessage();
+        cmd.printErrors();
+        return 0;
+    }
+
+
+    string pathL = cmd.get<string>("left");
+    string pathR = cmd.get<string>("right");
+    if (pathL.empty()) cout << "Specify left image path\n";
+    if (pathR.empty()) cout << "Specify right image path\n";
+    if (pathL.empty() || pathR.empty()) return -1;
+
+    Mat frameL = imread(pathL, IMREAD_GRAYSCALE);
+    Mat frameR = imread(pathR, IMREAD_GRAYSCALE);
+    if (frameL.empty()) cout << "Can't open '" << pathL << "'\n";
+    if (frameR.empty()) cout << "Can't open '" << pathR << "'\n";
+    if (frameL.empty() || frameR.empty()) return -1;
+
+    GpuMat d_frameL(frameL), d_frameR(frameR);
+    GpuMat d_flow;
+    Ptr<cuda::FarnebackOpticalFlow> d_calc = cuda::FarnebackOpticalFlow::create();
+    Mat flowxy, flowx, flowy, image;
+
+    bool running = true, gpuMode = true;
+    int64 t, t0=0, t1=1, tc0, tc1;
+
+    cout << "Use 'm' for CPU/GPU toggling\n";
+
+    while (running)
+    {
+        t = getTickCount();
+
+        if (gpuMode)
+        {
+            tc0 = getTickCount();
+            d_calc->calc(d_frameL, d_frameR, d_flow);
+            tc1 = getTickCount();
+
+            GpuMat planes[2];
+            cuda::split(d_flow, planes);
+
+            planes[0].download(flowx);
+            planes[1].download(flowy);
+        }
+        else
+        {
+            tc0 = getTickCount();
+            calcOpticalFlowFarneback(
+                        frameL, frameR, flowxy, d_calc->getPyrScale(), d_calc->getNumLevels(), d_calc->getWinSize(),
+                        d_calc->getNumIters(), d_calc->getPolyN(), d_calc->getPolySigma(), d_calc->getFlags());
+            tc1 = getTickCount();
+
+            Mat planes[] = {flowx, flowy};
+            split(flowxy, planes);
+            flowx = planes[0]; flowy = planes[1];
+        }
+
+        colorizeFlow(flowx, flowy, image);
+
+        stringstream s;
+        s << "mode: " << (gpuMode?"GPU":"CPU");
+        putText(image, s.str(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1., Scalar(255,0,255), 2);
+
+        s.str("");
+        s << "opt. flow FPS: " << cvRound((getTickFrequency()/(tc1-tc0)));
+        putText(image, s.str(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255,0,255), 2);
+
+        s.str("");
+        s << "total FPS: " << cvRound((getTickFrequency()/(t1-t0)));
+        putText(image, s.str(), Point(5, 105), FONT_HERSHEY_SIMPLEX, 1., Scalar(255,0,255), 2);
+
+        imshow("flow", image);
+
+        char ch = (char)waitKey(3);
+        if (ch == 27)
+            running = false;
+        else if (ch == 'm' || ch == 'M')
+            gpuMode = !gpuMode;
+
+        t0 = t;
+        t1 = getTickCount();
+    }
+
+    return 0;
+}
--- a/3rdparty/opencv-4.5.4/samples/gpu/generalized_hough.cpp
+++ b/3rdparty/opencv-4.5.4/samples/gpu/generalized_hough.cpp
@ -0,0 +1,182 @@
+#include <vector>
+#include <iostream>
+#include <string>
+
+#include "opencv2/core.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/cudaimgproc.hpp"
+#include "opencv2/highgui.hpp"
+
+using namespace std;
+using namespace cv;
+
+static Mat loadImage(const string& name)
+{
+    Mat image = imread(name, IMREAD_GRAYSCALE);
+    if (image.empty())
+    {
+        cerr << "Can't load image - " << name << endl;
+        exit(-1);
+    }
+    return image;
+}
+
+int main(int argc, const char* argv[])
+{
+    CommandLineParser cmd(argc, argv,
+        "{ image i        | ../data/pic1.png  | input image }"
+        "{ template t     | templ.png | template image }"
+        "{ full           |           | estimate scale and rotation }"
+        "{ gpu            |           | use gpu version }"
+        "{ minDist        | 100       | minimum distance between the centers of the detected objects }"
+        "{ levels         | 360       | R-Table levels }"
+        "{ votesThreshold | 30        | the accumulator threshold for the template centers at the detection stage. The smaller it is, the more false positions may be detected }"
+        "{ angleThresh    | 10000     | angle votes threshold }"
+        "{ scaleThresh    | 1000      | scale votes threshold }"
+        "{ posThresh      | 100       | position votes threshold }"
+        "{ dp             | 2         | inverse ratio of the accumulator resolution to the image resolution }"
+        "{ minScale       | 0.5       | minimal scale to detect }"
+        "{ maxScale       | 2         | maximal scale to detect }"
+        "{ scaleStep      | 0.05      | scale step }"
+        "{ minAngle       | 0         | minimal rotation angle to detect in degrees }"
+        "{ maxAngle       | 360       | maximal rotation angle to detect in degrees }"
+        "{ angleStep      | 1         | angle step in degrees }"
+        "{ maxBufSize     | 1000      | maximal size of inner buffers }"
+        "{ help h ?       |           | print help message }"
+    );
+
+    cmd.about("This program demonstrates arbitrary object finding with the Generalized Hough transform.");
+
+    if (cmd.has("help"))
+    {
+        cmd.printMessage();
+        return 0;
+    }
+
+    const string templName = cmd.get<string>("template");
+    const string imageName = cmd.get<string>("image");
+    const bool full = cmd.has("full");
+    const bool useGpu = cmd.has("gpu");
+    const double minDist = cmd.get<double>("minDist");
+    const int levels = cmd.get<int>("levels");
+    const int votesThreshold = cmd.get<int>("votesThreshold");
+    const int angleThresh = cmd.get<int>("angleThresh");
+    const int scaleThresh = cmd.get<int>("scaleThresh");
+    const int posThresh = cmd.get<int>("posThresh");
+    const double dp = cmd.get<double>("dp");
+    const double minScale = cmd.get<double>("minScale");
+    const double maxScale = cmd.get<double>("maxScale");
+    const double scaleStep = cmd.get<double>("scaleStep");
+    const double minAngle = cmd.get<double>("minAngle");
+    const double maxAngle = cmd.get<double>("maxAngle");
+    const double angleStep = cmd.get<double>("angleStep");
+    const int maxBufSize = cmd.get<int>("maxBufSize");
+
+    if (!cmd.check())
+    {
+        cmd.printErrors();
+        return -1;
+    }
+
+    Mat templ = loadImage(templName);
+    Mat image = loadImage(imageName);
+
+    Ptr<GeneralizedHough> alg;
+
+    if (!full)
+    {
+        Ptr<GeneralizedHoughBallard> ballard = useGpu ? cuda::createGeneralizedHoughBallard() : createGeneralizedHoughBallard();
+
+        ballard->setMinDist(minDist);
+        ballard->setLevels(levels);
+        ballard->setDp(dp);
+        ballard->setMaxBufferSize(maxBufSize);
+        ballard->setVotesThreshold(votesThreshold);
+
+        alg = ballard;
+    }
+    else
+    {
+        Ptr<GeneralizedHoughGuil> guil = useGpu ? cuda::createGeneralizedHoughGuil() : createGeneralizedHoughGuil();
+
+        guil->setMinDist(minDist);
+        guil->setLevels(levels);
+        guil->setDp(dp);
+        guil->setMaxBufferSize(maxBufSize);
+
+        guil->setMinAngle(minAngle);
+        guil->setMaxAngle(maxAngle);
+        guil->setAngleStep(angleStep);
+        guil->setAngleThresh(angleThresh);
+
+        guil->setMinScale(minScale);
+        guil->setMaxScale(maxScale);
+        guil->setScaleStep(scaleStep);
+        guil->setScaleThresh(scaleThresh);
+
+        guil->setPosThresh(posThresh);
+
+        alg = guil;
+    }
+
+    vector<Vec4f> position;
+    TickMeter tm;
+
+    if (useGpu)
+    {
+        cuda::GpuMat d_templ(templ);
+        cuda::GpuMat d_image(image);
+        cuda::GpuMat d_position;
+
+        alg->setTemplate(d_templ);
+
+        tm.start();
+
+        alg->detect(d_image, d_position);
+        d_position.download(position);
+
+        tm.stop();
+    }
+    else
+    {
+        alg->setTemplate(templ);
+
+        tm.start();
+
+        alg->detect(image, position);
+
+        tm.stop();
+    }
+
+    cout << "Found : " << position.size() << " objects" << endl;
+    cout << "Detection time : " << tm.getTimeMilli() << " ms" << endl;
+
+    Mat out;
+    cv::cvtColor(image, out, COLOR_GRAY2BGR);
+
+    for (size_t i = 0; i < position.size(); ++i)
+    {
+        Point2f pos(position[i][0], position[i][1]);
+        float scale = position[i][2];
+        float angle = position[i][3];
+
+        RotatedRect rect;
+        rect.center = pos;
+        rect.size = Size2f(templ.cols * scale, templ.rows * scale);
+        rect.angle = angle;
+
+        Point2f pts[4];
+        rect.points(pts);
+
+        line(out, pts[0], pts[1], Scalar(0, 0, 255), 3);
+        line(out, pts[1], pts[2], Scalar(0, 0, 255), 3);
+        line(out, pts[2], pts[3], Scalar(0, 0, 255), 3);
+        line(out, pts[3], pts[0], Scalar(0, 0, 255), 3);
+    }
+
+    imshow("out", out);
+    waitKey();
+
+    return 0;
+}
--- a/3rdparty/opencv-4.5.4/samples/gpu/hog.cpp
+++ b/3rdparty/opencv-4.5.4/samples/gpu/hog.cpp
@ -0,0 +1,552 @@
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <sstream>
+#include <iomanip>
+#include <stdexcept>
+#include <opencv2/core/utility.hpp>
+#include "opencv2/cudaobjdetect.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/objdetect.hpp"
+#include "opencv2/imgproc.hpp"
+
+using namespace std;
+using namespace cv;
+
+bool help_showed = false;
+
+class Args
+{
+public:
+    Args();
+    static Args read(int argc, char** argv);
+
+    string src;
+    bool src_is_folder;
+    bool src_is_video;
+    bool src_is_camera;
+    int camera_id;
+
+    bool svm_load;
+    string svm;
+
+    bool write_video;
+    string dst_video;
+    double dst_video_fps;
+
+    bool make_gray;
+
+    bool resize_src;
+    int width, height;
+
+    double scale;
+    int nlevels;
+    int gr_threshold;
+
+    double hit_threshold;
+    bool hit_threshold_auto;
+
+    int win_width;
+    int win_stride_width, win_stride_height;
+    int block_width;
+    int block_stride_width, block_stride_height;
+    int cell_width;
+    int nbins;
+
+    bool gamma_corr;
+};
+
+
+class App
+{
+public:
+    App(const Args& s);
+    void run();
+
+    void handleKey(char key);
+
+    void hogWorkBegin();
+    void hogWorkEnd();
+    string hogWorkFps() const;
+
+    void workBegin();
+    void workEnd();
+    string workFps() const;
+
+    string message() const;
+
+private:
+    App operator=(App&);
+
+    Args args;
+    bool running;
+
+    bool use_gpu;
+    bool make_gray;
+    double scale;
+    int gr_threshold;
+    int nlevels;
+    double hit_threshold;
+    bool gamma_corr;
+
+    int64 hog_work_begin;
+    double hog_work_fps;
+
+    int64 work_begin;
+    double work_fps;
+};
+
+static void printHelp()
+{
+    cout << "Histogram of Oriented Gradients descriptor and detector sample.\n"
+         << "\nUsage: hog\n"
+         << "  (<image>|--video <vide>|--camera <camera_id>) # frames source\n"
+         << "  or"
+         << "  (--folder <folder_path>) # load images from folder\n"
+         << "  [--svm <file> # load svm file"
+         << "  [--make_gray <true/false>] # convert image to gray one or not\n"
+         << "  [--resize_src <true/false>] # do resize of the source image or not\n"
+         << "  [--width <int>] # resized image width\n"
+         << "  [--height <int>] # resized image height\n"
+         << "  [--hit_threshold <double>] # classifying plane distance threshold (0.0 usually)\n"
+         << "  [--scale <double>] # HOG window scale factor\n"
+         << "  [--nlevels <int>] # max number of HOG window scales\n"
+         << "  [--win_width <int>] # width of the window\n"
+         << "  [--win_stride_width <int>] # distance by OX axis between neighbour wins\n"
+         << "  [--win_stride_height <int>] # distance by OY axis between neighbour wins\n"
+         << "  [--block_width <int>] # width of the block\n"
+         << "  [--block_stride_width <int>] # distance by 0X axis between neighbour blocks\n"
+         << "  [--block_stride_height <int>] # distance by 0Y axis between neighbour blocks\n"
+         << "  [--cell_width <int>] # width of the cell\n"
+         << "  [--nbins <int>] # number of bins\n"
+         << "  [--gr_threshold <int>] # merging similar rects constant\n"
+         << "  [--gamma_correct <int>] # do gamma correction or not\n"
+         << "  [--write_video <bool>] # write video or not\n"
+         << "  [--dst_video <path>] # output video path\n"
+         << "  [--dst_video_fps <double>] # output video fps\n";
+    help_showed = true;
+}
+
+int main(int argc, char** argv)
+{
+    try
+    {
+        Args args;
+        if (argc < 2)
+        {
+            printHelp();
+            args.camera_id = 0;
+            args.src_is_camera = true;
+        }
+        else
+        {
+            args = Args::read(argc, argv);
+            if (help_showed)
+                return -1;
+        }
+        App app(args);
+        app.run();
+    }
+    catch (const Exception& e) { return cout << "error: "  << e.what() << endl, 1; }
+    catch (const exception& e) { return cout << "error: "  << e.what() << endl, 1; }
+    catch(...) { return cout << "unknown exception" << endl, 1; }
+    return 0;
+}
+
+
+Args::Args()
+{
+    src_is_video = false;
+    src_is_camera = false;
+    src_is_folder = false;
+    svm_load = false;
+    camera_id = 0;
+
+    write_video = false;
+    dst_video_fps = 24.;
+
+    make_gray = false;
+
+    resize_src = false;
+    width = 640;
+    height = 480;
+
+    scale = 1.05;
+    nlevels = 13;
+    gr_threshold = 8;
+    hit_threshold = 1.4;
+    hit_threshold_auto = true;
+
+    win_width = 48;
+    win_stride_width = 8;
+    win_stride_height = 8;
+    block_width = 16;
+    block_stride_width = 8;
+    block_stride_height = 8;
+    cell_width = 8;
+    nbins = 9;
+
+    gamma_corr = true;
+}
+
+
+Args Args::read(int argc, char** argv)
+{
+    Args args;
+    for (int i = 1; i < argc; i++)
+    {
+        if (string(argv[i]) == "--make_gray") args.make_gray = (string(argv[++i]) == "true");
+        else if (string(argv[i]) == "--resize_src") args.resize_src = (string(argv[++i]) == "true");
+        else if (string(argv[i]) == "--width") args.width = atoi(argv[++i]);
+        else if (string(argv[i]) == "--height") args.height = atoi(argv[++i]);
+        else if (string(argv[i]) == "--hit_threshold")
+        {
+            args.hit_threshold = atof(argv[++i]);
+            args.hit_threshold_auto = false;
+        }
+        else if (string(argv[i]) == "--scale") args.scale = atof(argv[++i]);
+        else if (string(argv[i]) == "--nlevels") args.nlevels = atoi(argv[++i]);
+        else if (string(argv[i]) == "--win_width") args.win_width = atoi(argv[++i]);
+        else if (string(argv[i]) == "--win_stride_width") args.win_stride_width = atoi(argv[++i]);
+        else if (string(argv[i]) == "--win_stride_height") args.win_stride_height = atoi(argv[++i]);
+        else if (string(argv[i]) == "--block_width") args.block_width = atoi(argv[++i]);
+        else if (string(argv[i]) == "--block_stride_width") args.block_stride_width = atoi(argv[++i]);
+        else if (string(argv[i]) == "--block_stride_height") args.block_stride_height = atoi(argv[++i]);
+        else if (string(argv[i]) == "--cell_width") args.cell_width = atoi(argv[++i]);
+        else if (string(argv[i]) == "--nbins") args.nbins = atoi(argv[++i]);
+        else if (string(argv[i]) == "--gr_threshold") args.gr_threshold = atoi(argv[++i]);
+        else if (string(argv[i]) == "--gamma_correct") args.gamma_corr = (string(argv[++i]) == "true");
+        else if (string(argv[i]) == "--write_video") args.write_video = (string(argv[++i]) == "true");
+        else if (string(argv[i]) == "--dst_video") args.dst_video = argv[++i];
+        else if (string(argv[i]) == "--dst_video_fps") args.dst_video_fps = atof(argv[++i]);
+        else if (string(argv[i]) == "--help") printHelp();
+        else if (string(argv[i]) == "--video") { args.src = argv[++i]; args.src_is_video = true; }
+        else if (string(argv[i]) == "--camera") { args.camera_id = atoi(argv[++i]); args.src_is_camera = true; }
+        else if (string(argv[i]) == "--folder") { args.src = argv[++i]; args.src_is_folder = true;}
+        else if (string(argv[i]) == "--svm") { args.svm = argv[++i]; args.svm_load = true;}
+        else if (args.src.empty()) args.src = argv[i];
+        else throw runtime_error((string("unknown key: ") + argv[i]));
+    }
+    return args;
+}
+
+
+App::App(const Args& s)
+{
+    cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice());
+
+    args = s;
+    cout << "\nControls:\n"
+         << "\tESC - exit\n"
+         << "\tm - change mode GPU <-> CPU\n"
+         << "\tg - convert image to gray or not\n"
+         << "\t1/q - increase/decrease HOG scale\n"
+         << "\t2/w - increase/decrease levels count\n"
+         << "\t3/e - increase/decrease HOG group threshold\n"
+         << "\t4/r - increase/decrease hit threshold\n"
+         << endl;
+
+    use_gpu = true;
+    make_gray = args.make_gray;
+    scale = args.scale;
+    gr_threshold = args.gr_threshold;
+    nlevels = args.nlevels;
+
+    if (args.hit_threshold_auto)
+        args.hit_threshold = args.win_width == 48 ? 1.4 : 0.;
+    hit_threshold = args.hit_threshold;
+
+    gamma_corr = args.gamma_corr;
+
+    cout << "Scale: " << scale << endl;
+    if (args.resize_src)
+        cout << "Resized source: (" << args.width << ", " << args.height << ")\n";
+    cout << "Group threshold: " << gr_threshold << endl;
+    cout << "Levels number: " << nlevels << endl;
+    cout << "Win size: (" << args.win_width << ", " << args.win_width*2 << ")\n";
+    cout << "Win stride: (" << args.win_stride_width << ", " << args.win_stride_height << ")\n";
+    cout << "Block size: (" << args.block_width << ", " << args.block_width << ")\n";
+    cout << "Block stride: (" << args.block_stride_width << ", " << args.block_stride_height << ")\n";
+    cout << "Cell size: (" << args.cell_width << ", " << args.cell_width << ")\n";
+    cout << "Bins number: " << args.nbins << endl;
+    cout << "Hit threshold: " << hit_threshold << endl;
+    cout << "Gamma correction: " << gamma_corr << endl;
+    cout << endl;
+}
+
+
+void App::run()
+{
+    running = true;
+    cv::VideoWriter video_writer;
+
+    Size win_stride(args.win_stride_width, args.win_stride_height);
+    Size win_size(args.win_width, args.win_width * 2);
+    Size block_size(args.block_width, args.block_width);
+    Size block_stride(args.block_stride_width, args.block_stride_height);
+    Size cell_size(args.cell_width, args.cell_width);
+
+    cv::Ptr<cv::cuda::HOG> gpu_hog = cv::cuda::HOG::create(win_size, block_size, block_stride, cell_size, args.nbins);
+    cv::HOGDescriptor cpu_hog(win_size, block_size, block_stride, cell_size, args.nbins);
+
+    if(args.svm_load) {
+        std::vector<float> svm_model;
+        const std::string model_file_name = args.svm;
+        FileStorage ifs(model_file_name, FileStorage::READ);
+        if (ifs.isOpened()) {
+            ifs["svm_detector"] >> svm_model;
+        } else {
+            const std::string what =
+                    "could not load model for hog classifier from file: "
+                    + model_file_name;
+            throw std::runtime_error(what);
+        }
+
+        // check if the variables are initialized
+        if (svm_model.empty()) {
+            const std::string what =
+                    "HoG classifier: svm model could not be loaded from file"
+                    + model_file_name;
+            throw std::runtime_error(what);
+        }
+
+        gpu_hog->setSVMDetector(svm_model);
+        cpu_hog.setSVMDetector(svm_model);
+    } else {
+        // Create HOG descriptors and detectors here
+        Mat detector = gpu_hog->getDefaultPeopleDetector();
+
+        gpu_hog->setSVMDetector(detector);
+        cpu_hog.setSVMDetector(detector);
+    }
+
+    cout << "gpusvmDescriptorSize : " << gpu_hog->getDescriptorSize()
+         << endl;
+    cout << "cpusvmDescriptorSize : " << cpu_hog.getDescriptorSize()
+         << endl;
+
+    while (running)
+    {
+        VideoCapture vc;
+        Mat frame;
+        vector<String> filenames;
+
+        unsigned int count = 1;
+
+        if (args.src_is_video)
+        {
+            vc.open(args.src.c_str());
+            if (!vc.isOpened())
+                throw runtime_error(string("can't open video file: " + args.src));
+            vc >> frame;
+        }
+        else if (args.src_is_folder) {
+            String folder = args.src;
+            cout << folder << endl;
+            glob(folder, filenames);
+            frame = imread(filenames[count]);	// 0 --> .gitignore
+            if (!frame.data)
+                cerr << "Problem loading image from folder!!!" << endl;
+        }
+        else if (args.src_is_camera)
+        {
+            vc.open(args.camera_id);
+            if (!vc.isOpened())
+            {
+                stringstream msg;
+                msg << "can't open camera: " << args.camera_id;
+                throw runtime_error(msg.str());
+            }
+            vc >> frame;
+        }
+        else
+        {
+            frame = imread(args.src);
+            if (frame.empty())
+                throw runtime_error(string("can't open image file: " + args.src));
+        }
+
+        Mat img_aux, img, img_to_show;
+        cuda::GpuMat gpu_img;
+
+        // Iterate over all frames
+        while (running && !frame.empty())
+        {
+            workBegin();
+
+            // Change format of the image
+            if (make_gray) cvtColor(frame, img_aux, COLOR_BGR2GRAY);
+            else if (use_gpu) cvtColor(frame, img_aux, COLOR_BGR2BGRA);
+            else frame.copyTo(img_aux);
+
+            // Resize image
+            if (args.resize_src) resize(img_aux, img, Size(args.width, args.height));
+            else img = img_aux;
+            img_to_show = img;
+
+            vector<Rect> found;
+
+            // Perform HOG classification
+            hogWorkBegin();
+            if (use_gpu)
+            {
+                gpu_img.upload(img);
+                gpu_hog->setNumLevels(nlevels);
+                gpu_hog->setHitThreshold(hit_threshold);
+                gpu_hog->setWinStride(win_stride);
+                gpu_hog->setScaleFactor(scale);
+                gpu_hog->setGroupThreshold(gr_threshold);
+                gpu_hog->detectMultiScale(gpu_img, found);
+            }
+            else
+            {
+                cpu_hog.nlevels = nlevels;
+                cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
+                                         Size(0, 0), scale, gr_threshold);
+            }
+            hogWorkEnd();
+
+            // Draw positive classified windows
+            for (size_t i = 0; i < found.size(); i++)
+            {
+                Rect r = found[i];
+                rectangle(img_to_show, r.tl(), r.br(), Scalar(0, 255, 0), 3);
+            }
+
+            if (use_gpu)
+                putText(img_to_show, "Mode: GPU", Point(5, 25), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
+            else
+                putText(img_to_show, "Mode: CPU", Point(5, 25), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
+            putText(img_to_show, "FPS HOG: " + hogWorkFps(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
+            putText(img_to_show, "FPS total: " + workFps(), Point(5, 105), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
+            imshow("opencv_gpu_hog", img_to_show);
+
+            if (args.src_is_video || args.src_is_camera) vc >> frame;
+            if (args.src_is_folder) {
+                count++;
+                if (count < filenames.size()) {
+                    frame = imread(filenames[count]);
+                } else {
+                    Mat empty;
+                    frame = empty;
+                }
+            }
+
+            workEnd();
+
+            if (args.write_video)
+            {
+                if (!video_writer.isOpened())
+                {
+                    video_writer.open(args.dst_video, VideoWriter::fourcc('x','v','i','d'), args.dst_video_fps,
+                                      img_to_show.size(), true);
+                    if (!video_writer.isOpened())
+                        throw std::runtime_error("can't create video writer");
+                }
+
+                if (make_gray) cvtColor(img_to_show, img, COLOR_GRAY2BGR);
+                else cvtColor(img_to_show, img, COLOR_BGRA2BGR);
+
+                video_writer << img;
+            }
+
+            handleKey((char)waitKey(3));
+        }
+    }
+}
+
+
+void App::handleKey(char key)
+{
+    switch (key)
+    {
+    case 27:
+        running = false;
+        break;
+    case 'm':
+    case 'M':
+        use_gpu = !use_gpu;
+        cout << "Switched to " << (use_gpu ? "CUDA" : "CPU") << " mode\n";
+        break;
+    case 'g':
+    case 'G':
+        make_gray = !make_gray;
+        cout << "Convert image to gray: " << (make_gray ? "YES" : "NO") << endl;
+        break;
+    case '1':
+        scale *= 1.05;
+        cout << "Scale: " << scale << endl;
+        break;
+    case 'q':
+    case 'Q':
+        scale /= 1.05;
+        cout << "Scale: " << scale << endl;
+        break;
+    case '2':
+        nlevels++;
+        cout << "Levels number: " << nlevels << endl;
+        break;
+    case 'w':
+    case 'W':
+        nlevels = max(nlevels - 1, 1);
+        cout << "Levels number: " << nlevels << endl;
+        break;
+    case '3':
+        gr_threshold++;
+        cout << "Group threshold: " << gr_threshold << endl;
+        break;
+    case 'e':
+    case 'E':
+        gr_threshold = max(0, gr_threshold - 1);
+        cout << "Group threshold: " << gr_threshold << endl;
+        break;
+    case '4':
+        hit_threshold+=0.25;
+        cout << "Hit threshold: " << hit_threshold << endl;
+        break;
+    case 'r':
+    case 'R':
+        hit_threshold = max(0.0, hit_threshold - 0.25);
+        cout << "Hit threshold: " << hit_threshold << endl;
+        break;
+    case 'c':
+    case 'C':
+        gamma_corr = !gamma_corr;
+        cout << "Gamma correction: " << gamma_corr << endl;
+        break;
+    }
+}
+
+
+inline void App::hogWorkBegin() { hog_work_begin = getTickCount(); }
+
+inline void App::hogWorkEnd()
+{
+    int64 delta = getTickCount() - hog_work_begin;
+    double freq = getTickFrequency();
+    hog_work_fps = freq / delta;
+}
+
+inline string App::hogWorkFps() const
+{
+    stringstream ss;
+    ss << hog_work_fps;
+    return ss.str();
+}
+
+
+inline void App::workBegin() { work_begin = getTickCount(); }
+
+inline void App::workEnd()
+{
+    int64 delta = getTickCount() - work_begin;
+    double freq = getTickFrequency();
+    work_fps = freq / delta;
+}
+
+inline string App::workFps() const
+{
+    stringstream ss;
+    ss << work_fps;
+    return ss.str();
+}
--- a/3rdparty/opencv-4.5.4/samples/gpu/houghlines.cpp
+++ b/3rdparty/opencv-4.5.4/samples/gpu/houghlines.cpp
@ -0,0 +1,90 @@
+#include <cmath>
+#include <iostream>
+
+#include "opencv2/core.hpp"
+#include <opencv2/core/utility.hpp>
+#include "opencv2/highgui.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/cudaimgproc.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace cv::cuda;
+
+static void help()
+{
+    cout << "This program demonstrates line finding with the Hough transform." << endl;
+    cout << "Usage:" << endl;
+    cout << "./gpu-example-houghlines <image_name>, Default is ../data/pic1.png\n" << endl;
+}
+
+int main(int argc, const char* argv[])
+{
+    const string filename = argc >= 2 ? argv[1] : "../data/pic1.png";
+
+    Mat src = imread(filename, IMREAD_GRAYSCALE);
+    if (src.empty())
+    {
+        help();
+        cout << "can not open " << filename << endl;
+        return -1;
+    }
+
+    Mat mask;
+    cv::Canny(src, mask, 100, 200, 3);
+
+    Mat dst_cpu;
+    cv::cvtColor(mask, dst_cpu, COLOR_GRAY2BGR);
+    Mat dst_gpu = dst_cpu.clone();
+
+    vector<Vec4i> lines_cpu;
+    {
+        const int64 start = getTickCount();
+
+        cv::HoughLinesP(mask, lines_cpu, 1, CV_PI / 180, 50, 60, 5);
+
+        const double timeSec = (getTickCount() - start) / getTickFrequency();
+        cout << "CPU Time : " << timeSec * 1000 << " ms" << endl;
+        cout << "CPU Found : " << lines_cpu.size() << endl;
+    }
+
+    for (size_t i = 0; i < lines_cpu.size(); ++i)
+    {
+        Vec4i l = lines_cpu[i];
+        line(dst_cpu, Point(l[0], l[1]), Point(l[2], l[3]), Scalar(0, 0, 255), 3, LINE_AA);
+    }
+
+    GpuMat d_src(mask);
+    GpuMat d_lines;
+    {
+        const int64 start = getTickCount();
+
+        Ptr<cuda::HoughSegmentDetector> hough = cuda::createHoughSegmentDetector(1.0f, (float) (CV_PI / 180.0f), 50, 5);
+
+        hough->detect(d_src, d_lines);
+
+        const double timeSec = (getTickCount() - start) / getTickFrequency();
+        cout << "GPU Time : " << timeSec * 1000 << " ms" << endl;
+        cout << "GPU Found : " << d_lines.cols << endl;
+    }
+    vector<Vec4i> lines_gpu;
+    if (!d_lines.empty())
+    {
+        lines_gpu.resize(d_lines.cols);
+        Mat h_lines(1, d_lines.cols, CV_32SC4, &lines_gpu[0]);
+        d_lines.download(h_lines);
+    }
+
+    for (size_t i = 0; i < lines_gpu.size(); ++i)
+    {
+        Vec4i l = lines_gpu[i];
+        line(dst_gpu, Point(l[0], l[1]), Point(l[2], l[3]), Scalar(0, 0, 255), 3, LINE_AA);
+    }
+
+    imshow("source", src);
+    imshow("detected lines [CPU]", dst_cpu);
+    imshow("detected lines [GPU]", dst_gpu);
+    waitKey();
+
+    return 0;
+}
--- a/3rdparty/opencv-4.5.4/samples/gpu/morphology.cpp
+++ b/3rdparty/opencv-4.5.4/samples/gpu/morphology.cpp
@ -0,0 +1,186 @@
+#include <iostream>
+
+#include "opencv2/imgproc.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/cudafilters.hpp"
+#include "opencv2/cudaimgproc.hpp"
+
+using namespace std;
+using namespace cv;
+
+class App
+{
+public:
+    App(int argc, const char* argv[]);
+
+    int run();
+
+private:
+    void help();
+
+    void OpenClose();
+    void ErodeDilate();
+
+    static void OpenCloseCallback(int, void*);
+    static void ErodeDilateCallback(int, void*);
+
+    cuda::GpuMat src, dst;
+
+    int element_shape;
+
+    int max_iters;
+    int open_close_pos;
+    int erode_dilate_pos;
+};
+
+App::App(int argc, const char* argv[])
+{
+    element_shape = MORPH_RECT;
+    open_close_pos = erode_dilate_pos = max_iters = 10;
+
+    if (argc == 2 && String(argv[1]) == "--help")
+    {
+        help();
+        exit(0);
+    }
+
+    String filename = argc == 2 ? argv[1] : "../data/baboon.jpg";
+
+    Mat img = imread(filename);
+    if (img.empty())
+    {
+        cerr << "Can't open image " << filename.c_str() << endl;
+        exit(-1);
+    }
+
+    src.upload(img);
+    if (src.channels() == 3)
+    {
+        // gpu support only 4th channel images
+        cuda::GpuMat src4ch;
+        cuda::cvtColor(src, src4ch, COLOR_BGR2BGRA);
+        src = src4ch;
+    }
+
+    help();
+
+    cuda::printShortCudaDeviceInfo(cuda::getDevice());
+}
+
+int App::run()
+{
+    // create windows for output images
+    namedWindow("Open/Close");
+    namedWindow("Erode/Dilate");
+
+    createTrackbar("iterations", "Open/Close", &open_close_pos, max_iters * 2 + 1, OpenCloseCallback, this);
+    createTrackbar("iterations", "Erode/Dilate", &erode_dilate_pos, max_iters * 2 + 1, ErodeDilateCallback, this);
+
+    for(;;)
+    {
+        OpenClose();
+        ErodeDilate();
+
+        char c = (char) waitKey();
+
+        switch (c)
+        {
+        case 27:
+            return 0;
+            break;
+
+        case 'e':
+            element_shape = MORPH_ELLIPSE;
+            break;
+
+        case 'r':
+            element_shape = MORPH_RECT;
+            break;
+
+        case 'c':
+            element_shape = MORPH_CROSS;
+            break;
+
+        case ' ':
+            element_shape = (element_shape + 1) % 3;
+            break;
+        }
+    }
+}
+
+void App::help()
+{
+    cout << "Show off image morphology: erosion, dialation, open and close \n";
+    cout << "Call: \n";
+    cout << "   gpu-example-morphology [image] \n";
+    cout << "This program also shows use of rect, ellipse and cross kernels \n" << endl;
+
+    cout << "Hot keys: \n";
+    cout << "\tESC - quit the program \n";
+    cout << "\tr - use rectangle structuring element \n";
+    cout << "\te - use elliptic structuring element \n";
+    cout << "\tc - use cross-shaped structuring element \n";
+    cout << "\tSPACE - loop through all the options \n" << endl;
+}
+
+void App::OpenClose()
+{
+    int n = open_close_pos - max_iters;
+    int an = n > 0 ? n : -n;
+
+    Mat element = getStructuringElement(element_shape, Size(an*2+1, an*2+1), Point(an, an));
+
+    if (n < 0)
+    {
+        Ptr<cuda::Filter> openFilter = cuda::createMorphologyFilter(MORPH_OPEN, src.type(), element);
+        openFilter->apply(src, dst);
+    }
+    else
+    {
+        Ptr<cuda::Filter> closeFilter = cuda::createMorphologyFilter(MORPH_CLOSE, src.type(), element);
+        closeFilter->apply(src, dst);
+    }
+
+    Mat h_dst(dst);
+    imshow("Open/Close", h_dst);
+}
+
+void App::ErodeDilate()
+{
+    int n = erode_dilate_pos - max_iters;
+    int an = n > 0 ? n : -n;
+
+    Mat element = getStructuringElement(element_shape, Size(an*2+1, an*2+1), Point(an, an));
+
+    if (n < 0)
+    {
+        Ptr<cuda::Filter> erodeFilter = cuda::createMorphologyFilter(MORPH_ERODE, src.type(), element);
+        erodeFilter->apply(src, dst);
+    }
+    else
+    {
+        Ptr<cuda::Filter> dilateFilter = cuda::createMorphologyFilter(MORPH_DILATE, src.type(), element);
+        dilateFilter->apply(src, dst);
+    }
+
+    Mat h_dst(dst);
+    imshow("Erode/Dilate", h_dst);
+}
+
+void App::OpenCloseCallback(int, void* data)
+{
+    App* thiz = (App*) data;
+    thiz->OpenClose();
+}
+
+void App::ErodeDilateCallback(int, void* data)
+{
+    App* thiz = (App*) data;
+    thiz->ErodeDilate();
+}
+
+int main(int argc, const char* argv[])
+{
+    App app(argc, argv);
+    return app.run();
+}
--- a/3rdparty/opencv-4.5.4/samples/gpu/multi.cpp
+++ b/3rdparty/opencv-4.5.4/samples/gpu/multi.cpp
@ -0,0 +1,95 @@
+/* This sample demonstrates the way you can perform independent tasks
+   on the different GPUs */
+
+// Disable some warnings which are caused with CUDA headers
+#if defined(_MSC_VER)
+#pragma warning(disable: 4201 4408 4100)
+#endif
+
+#include <iostream>
+#include "opencv2/core.hpp"
+#include "opencv2/cudaarithm.hpp"
+
+#if !defined(HAVE_CUDA)
+
+int main()
+{
+    std::cout << "CUDA support is required (OpenCV CMake parameter 'WITH_CUDA' must be true)." << std::endl;
+    return 0;
+}
+
+#else
+
+using namespace std;
+using namespace cv;
+using namespace cv::cuda;
+
+struct Worker : public cv::ParallelLoopBody
+{
+    void operator()(const Range& r) const CV_OVERRIDE
+    {
+        for (int i = r.start; i < r.end; ++i) { this->operator()(i); }
+    }
+    void operator()(int device_id) const;
+};
+
+int main()
+{
+    int num_devices = getCudaEnabledDeviceCount();
+    if (num_devices < 2)
+    {
+        std::cout << "Two or more GPUs are required\n";
+        return -1;
+    }
+    for (int i = 0; i < num_devices; ++i)
+    {
+        cv::cuda::printShortCudaDeviceInfo(i);
+
+        DeviceInfo dev_info(i);
+        if (!dev_info.isCompatible())
+        {
+            std::cout << "CUDA module isn't built for GPU #" << i << " ("
+                 << dev_info.name() << ", CC " << dev_info.majorVersion()
+                 << dev_info.minorVersion() << "\n";
+            return -1;
+        }
+    }
+
+    // Execute calculation in two threads using two GPUs
+    cv::Range devices(0, 2);
+    cv::parallel_for_(devices, Worker(), devices.size());
+
+    return 0;
+}
+
+
+void Worker::operator()(int device_id) const
+{
+    setDevice(device_id);
+
+    Mat src(1000, 1000, CV_32F);
+    Mat dst;
+
+    RNG rng(0);
+    rng.fill(src, RNG::UNIFORM, 0, 1);
+
+    // CPU works
+    cv::transpose(src, dst);
+
+    // GPU works
+    GpuMat d_src(src);
+    GpuMat d_dst;
+    cuda::transpose(d_src, d_dst);
+
+    // Check results
+    bool passed = cv::norm(dst - Mat(d_dst), NORM_INF) < 1e-3;
+    std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() << "): "
+        << (passed ? "passed" : "FAILED") << endl;
+
+    // Deallocate data here, otherwise deallocation will be performed
+    // after context is extracted from the stack
+    d_src.release();
+    d_dst.release();
+}
+
+#endif
--- a/3rdparty/opencv-4.5.4/samples/gpu/pyrlk_optical_flow.cpp
+++ b/3rdparty/opencv-4.5.4/samples/gpu/pyrlk_optical_flow.cpp
@ -0,0 +1,331 @@
+#include <iostream>
+#include <vector>
+
+#include <opencv2/core.hpp>
+#include <opencv2/core/utility.hpp>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/highgui.hpp>
+#include <opencv2/video.hpp>
+#include <opencv2/cudaoptflow.hpp>
+#include <opencv2/cudaimgproc.hpp>
+#include <opencv2/cudaarithm.hpp>
+
+using namespace std;
+using namespace cv;
+using namespace cv::cuda;
+
+static void download(const GpuMat& d_mat, vector<Point2f>& vec)
+{
+    vec.resize(d_mat.cols);
+    Mat mat(1, d_mat.cols, CV_32FC2, (void*)&vec[0]);
+    d_mat.download(mat);
+}
+
+static void download(const GpuMat& d_mat, vector<uchar>& vec)
+{
+    vec.resize(d_mat.cols);
+    Mat mat(1, d_mat.cols, CV_8UC1, (void*)&vec[0]);
+    d_mat.download(mat);
+}
+
+static void drawArrows(Mat& frame, const vector<Point2f>& prevPts, const vector<Point2f>& nextPts, const vector<uchar>& status, Scalar line_color = Scalar(0, 0, 255))
+{
+    for (size_t i = 0; i < prevPts.size(); ++i)
+    {
+        if (status[i])
+        {
+            int line_thickness = 1;
+
+            Point p = prevPts[i];
+            Point q = nextPts[i];
+
+            double angle = atan2((double) p.y - q.y, (double) p.x - q.x);
+
+            double hypotenuse = sqrt( (double)(p.y - q.y)*(p.y - q.y) + (double)(p.x - q.x)*(p.x - q.x) );
+
+            if (hypotenuse < 1.0)
+                continue;
+
+            // Here we lengthen the arrow by a factor of three.
+            q.x = (int) (p.x - 3 * hypotenuse * cos(angle));
+            q.y = (int) (p.y - 3 * hypotenuse * sin(angle));
+
+            // Now we draw the main line of the arrow.
+            line(frame, p, q, line_color, line_thickness);
+
+            // Now draw the tips of the arrow. I do some scaling so that the
+            // tips look proportional to the main line of the arrow.
+
+            p.x = (int) (q.x + 9 * cos(angle + CV_PI / 4));
+            p.y = (int) (q.y + 9 * sin(angle + CV_PI / 4));
+            line(frame, p, q, line_color, line_thickness);
+
+            p.x = (int) (q.x + 9 * cos(angle - CV_PI / 4));
+            p.y = (int) (q.y + 9 * sin(angle - CV_PI / 4));
+            line(frame, p, q, line_color, line_thickness);
+        }
+    }
+}
+
+inline bool isFlowCorrect(Point2f u)
+{
+    return !cvIsNaN(u.x) && !cvIsNaN(u.y) && fabs(u.x) < 1e9 && fabs(u.y) < 1e9;
+}
+
+static Vec3b computeColor(float fx, float fy)
+{
+    static bool first = true;
+
+    // relative lengths of color transitions:
+    // these are chosen based on perceptual similarity
+    // (e.g. one can distinguish more shades between red and yellow
+    //  than between yellow and green)
+    const int RY = 15;
+    const int YG = 6;
+    const int GC = 4;
+    const int CB = 11;
+    const int BM = 13;
+    const int MR = 6;
+    const int NCOLS = RY + YG + GC + CB + BM + MR;
+    static Vec3i colorWheel[NCOLS];
+
+    if (first)
+    {
+        int k = 0;
+
+        for (int i = 0; i < RY; ++i, ++k)
+            colorWheel[k] = Vec3i(255, 255 * i / RY, 0);
+
+        for (int i = 0; i < YG; ++i, ++k)
+            colorWheel[k] = Vec3i(255 - 255 * i / YG, 255, 0);
+
+        for (int i = 0; i < GC; ++i, ++k)
+            colorWheel[k] = Vec3i(0, 255, 255 * i / GC);
+
+        for (int i = 0; i < CB; ++i, ++k)
+            colorWheel[k] = Vec3i(0, 255 - 255 * i / CB, 255);
+
+        for (int i = 0; i < BM; ++i, ++k)
+            colorWheel[k] = Vec3i(255 * i / BM, 0, 255);
+
+        for (int i = 0; i < MR; ++i, ++k)
+            colorWheel[k] = Vec3i(255, 0, 255 - 255 * i / MR);
+
+        first = false;
+    }
+
+    const float rad = sqrt(fx * fx + fy * fy);
+    const float a = atan2(-fy, -fx) / (float)CV_PI;
+
+    const float fk = (a + 1.0f) / 2.0f * (NCOLS - 1);
+    const int k0 = static_cast<int>(fk);
+    const int k1 = (k0 + 1) % NCOLS;
+    const float f = fk - k0;
+
+    Vec3b pix;
+
+    for (int b = 0; b < 3; b++)
+    {
+        const float col0 = colorWheel[k0][b] / 255.0f;
+        const float col1 = colorWheel[k1][b] / 255.0f;
+
+        float col = (1 - f) * col0 + f * col1;
+
+        if (rad <= 1)
+            col = 1 - rad * (1 - col); // increase saturation with radius
+        else
+            col *= .75; // out of range
+
+        pix[2 - b] = static_cast<uchar>(255.0 * col);
+    }
+
+    return pix;
+}
+
+static void drawOpticalFlow(const Mat_<float>& flowx, const Mat_<float>& flowy, Mat& dst, float maxmotion = -1)
+{
+    dst.create(flowx.size(), CV_8UC3);
+    dst.setTo(Scalar::all(0));
+
+    // determine motion range:
+    float maxrad = maxmotion;
+
+    if (maxmotion <= 0)
+    {
+        maxrad = 1;
+        for (int y = 0; y < flowx.rows; ++y)
+        {
+            for (int x = 0; x < flowx.cols; ++x)
+            {
+                Point2f u(flowx(y, x), flowy(y, x));
+
+                if (!isFlowCorrect(u))
+                    continue;
+
+                maxrad = max(maxrad, sqrt(u.x * u.x + u.y * u.y));
+            }
+        }
+    }
+
+    for (int y = 0; y < flowx.rows; ++y)
+    {
+        for (int x = 0; x < flowx.cols; ++x)
+        {
+            Point2f u(flowx(y, x), flowy(y, x));
+
+            if (isFlowCorrect(u))
+                dst.at<Vec3b>(y, x) = computeColor(u.x / maxrad, u.y / maxrad);
+        }
+    }
+}
+
+static void showFlow(const char* name, const GpuMat& d_flow)
+{
+    GpuMat planes[2];
+    cuda::split(d_flow, planes);
+
+    Mat flowx(planes[0]);
+    Mat flowy(planes[1]);
+
+    Mat out;
+    drawOpticalFlow(flowx, flowy, out, 10);
+
+    imshow(name, out);
+}
+
+template <typename T> inline T clamp (T x, T a, T b)
+{
+    return ((x) > (a) ? ((x) < (b) ? (x) : (b)) : (a));
+}
+
+template <typename T> inline T mapValue(T x, T a, T b, T c, T d)
+{
+    x = clamp(x, a, b);
+    return c + (d - c) * (x - a) / (b - a);
+}
+
+int main(int argc, const char* argv[])
+{
+    const char* keys =
+        "{ h             help   |        | print help message }"
+        "{ l             left   | ../data/pic1.png       | specify left image }"
+        "{ r             right  | ../data/pic2.png       | specify right image }"
+        "{ flow                 | sparse | specify flow type [PyrLK] }"
+        "{ gray                 |        | use grayscale sources [PyrLK Sparse] }"
+        "{ win_size             | 21     | specify windows size [PyrLK] }"
+        "{ max_level            | 3      | specify max level [PyrLK] }"
+        "{ iters                | 30     | specify iterations count [PyrLK] }"
+        "{ points               | 4000   | specify points count [GoodFeatureToTrack] }"
+        "{ min_dist             | 0      | specify minimal distance between points [GoodFeatureToTrack] }";
+
+    CommandLineParser cmd(argc, argv, keys);
+
+    if (cmd.has("help") || !cmd.check())
+    {
+        cmd.printMessage();
+        cmd.printErrors();
+        return 0;
+    }
+
+    string fname0 = cmd.get<string>("left");
+    string fname1 = cmd.get<string>("right");
+
+    if (fname0.empty() || fname1.empty())
+    {
+        cerr << "Missing input file names" << endl;
+        return -1;
+    }
+
+    string flow_type = cmd.get<string>("flow");
+    bool is_sparse = true;
+    if (flow_type == "sparse")
+    {
+        is_sparse = true;
+    }
+    else if (flow_type == "dense")
+    {
+        is_sparse = false;
+    }
+    else
+    {
+        cerr << "please specify 'sparse' or 'dense' as flow type" << endl;
+        return -1;
+    }
+
+    bool useGray = cmd.has("gray");
+    int winSize = cmd.get<int>("win_size");
+    int maxLevel = cmd.get<int>("max_level");
+    int iters = cmd.get<int>("iters");
+    int points = cmd.get<int>("points");
+    double minDist = cmd.get<double>("min_dist");
+
+    Mat frame0 = imread(fname0);
+    Mat frame1 = imread(fname1);
+
+    if (frame0.empty() || frame1.empty())
+    {
+        cout << "Can't load input images" << endl;
+        return -1;
+    }
+
+    cout << "Image size : " << frame0.cols << " x " << frame0.rows << endl;
+    cout << "Points count : " << points << endl;
+
+    cout << endl;
+
+    Mat frame0Gray;
+    cv::cvtColor(frame0, frame0Gray, COLOR_BGR2GRAY);
+    Mat frame1Gray;
+    cv::cvtColor(frame1, frame1Gray, COLOR_BGR2GRAY);
+
+    // goodFeaturesToTrack
+    GpuMat d_frame0Gray(frame0Gray);
+    GpuMat d_prevPts;
+
+    Ptr<cuda::CornersDetector> detector = cuda::createGoodFeaturesToTrackDetector(d_frame0Gray.type(), points, 0.01, minDist);
+    detector->detect(d_frame0Gray, d_prevPts);
+
+    GpuMat d_frame0(frame0);
+    GpuMat d_frame1(frame1);
+    GpuMat d_frame1Gray(frame1Gray);
+    GpuMat d_nextPts;
+    GpuMat d_status;
+    GpuMat d_flow(frame0.size(), CV_32FC2);
+
+    if (is_sparse)
+    {
+        // Sparse
+        Ptr<cuda::SparsePyrLKOpticalFlow> d_pyrLK_sparse = cuda::SparsePyrLKOpticalFlow::create(
+            Size(winSize, winSize), maxLevel, iters);
+        d_pyrLK_sparse->calc(useGray ? d_frame0Gray : d_frame0, useGray ? d_frame1Gray : d_frame1, d_prevPts, d_nextPts, d_status);
+
+        // Draw arrows
+        vector<Point2f> prevPts(d_prevPts.cols);
+        download(d_prevPts, prevPts);
+
+        vector<Point2f> nextPts(d_nextPts.cols);
+        download(d_nextPts, nextPts);
+
+        vector<uchar> status(d_status.cols);
+        download(d_status, status);
+
+        namedWindow("PyrLK [Sparse]", WINDOW_NORMAL);
+        drawArrows(frame0, prevPts, nextPts, status, Scalar(255, 0, 0));
+        imshow("PyrLK [Sparse]", frame0);
+    }
+    else
+    {
+        // Dense
+        Ptr<cuda::DensePyrLKOpticalFlow> d_pyrLK_dense = cuda::DensePyrLKOpticalFlow::create(
+            Size(winSize, winSize), maxLevel, iters);
+        d_pyrLK_dense->calc(d_frame0Gray, d_frame1Gray, d_flow);
+
+        // Draw flows
+        namedWindow("PyrLK [Dense] Flow Field", WINDOW_NORMAL);
+        showFlow("PyrLK [Dense] Flow Field", d_flow);
+    }
+
+    waitKey(0);
+
+    return 0;
+}
--- a/3rdparty/opencv-4.5.4/samples/gpu/stereo_match.cpp
+++ b/3rdparty/opencv-4.5.4/samples/gpu/stereo_match.cpp
@ -0,0 +1,382 @@
+#include <iostream>
+#include <string>
+#include <sstream>
+#include <iomanip>
+#include <stdexcept>
+#include <opencv2/core/utility.hpp>
+#include "opencv2/cudastereo.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/imgproc.hpp"
+
+using namespace cv;
+using namespace std;
+
+bool help_showed = false;
+
+struct Params
+{
+    Params();
+    static Params read(int argc, char** argv);
+
+    string left;
+    string right;
+
+    string method_str() const
+    {
+        switch (method)
+        {
+        case BM: return "BM";
+        case BP: return "BP";
+        case CSBP: return "CSBP";
+        }
+        return "";
+    }
+    enum {BM, BP, CSBP} method;
+    int ndisp; // Max disparity + 1
+};
+
+
+struct App
+{
+    App(const Params& p);
+    void run();
+    void handleKey(char key);
+    void printParams() const;
+
+    void workBegin() { work_begin = getTickCount(); }
+    void workEnd()
+    {
+        int64 d = getTickCount() - work_begin;
+        double f = getTickFrequency();
+        work_fps = f / d;
+    }
+
+    string text() const
+    {
+        stringstream ss;
+        ss << "(" << p.method_str() << ") FPS: " << setiosflags(ios::left)
+            << setprecision(4) << work_fps;
+        return ss.str();
+    }
+private:
+    Params p;
+    bool running;
+
+    Mat left_src, right_src;
+    Mat left, right;
+    cuda::GpuMat d_left, d_right;
+
+    Ptr<cuda::StereoBM> bm;
+    Ptr<cuda::StereoBeliefPropagation> bp;
+    Ptr<cuda::StereoConstantSpaceBP> csbp;
+
+    int64 work_begin;
+    double work_fps;
+};
+
+static void printHelp()
+{
+    cout << "Usage: stereo_match\n"
+        << "\t--left <left_view> --right <right_view> # must be rectified\n"
+        << "\t--method <stereo_match_method> # BM | BP | CSBP\n"
+        << "\t--ndisp <number> # number of disparity levels\n";
+    help_showed = true;
+}
+
+int main(int argc, char** argv)
+{
+    try
+    {
+        if (argc < 2)
+        {
+            printHelp();
+            return 1;
+        }
+        Params args = Params::read(argc, argv);
+        if (help_showed)
+            return -1;
+        App app(args);
+        app.run();
+    }
+    catch (const exception& e)
+    {
+        cout << "error: " << e.what() << endl;
+    }
+    return 0;
+}
+
+
+Params::Params()
+{
+    method = BM;
+    ndisp = 64;
+}
+
+
+Params Params::read(int argc, char** argv)
+{
+    Params p;
+
+    for (int i = 1; i < argc; i++)
+    {
+        if (string(argv[i]) == "--left") p.left = argv[++i];
+        else if (string(argv[i]) == "--right") p.right = argv[++i];
+        else if (string(argv[i]) == "--method")
+        {
+            if (string(argv[i + 1]) == "BM") p.method = BM;
+            else if (string(argv[i + 1]) == "BP") p.method = BP;
+            else if (string(argv[i + 1]) == "CSBP") p.method = CSBP;
+            else throw runtime_error("unknown stereo match method: " + string(argv[i + 1]));
+            i++;
+        }
+        else if (string(argv[i]) == "--ndisp") p.ndisp = atoi(argv[++i]);
+        else if (string(argv[i]) == "--help") printHelp();
+        else throw runtime_error("unknown key: " + string(argv[i]));
+    }
+
+    return p;
+}
+
+
+App::App(const Params& params)
+    : p(params), running(false)
+{
+    cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice());
+
+    cout << "stereo_match_gpu sample\n";
+    cout << "\nControls:\n"
+        << "\tesc - exit\n"
+        << "\tp - print current parameters\n"
+        << "\tg - convert source images into gray\n"
+        << "\tm - change stereo match method\n"
+        << "\ts - change Sobel prefiltering flag (for BM only)\n"
+        << "\t1/q - increase/decrease maximum disparity\n"
+        << "\t2/w - increase/decrease window size (for BM only)\n"
+        << "\t3/e - increase/decrease iteration count (for BP and CSBP only)\n"
+        << "\t4/r - increase/decrease level count (for BP and CSBP only)\n";
+}
+
+
+void App::run()
+{
+    // Load images
+    left_src = imread(p.left);
+    right_src = imread(p.right);
+    if (left_src.empty()) throw runtime_error("can't open file \"" + p.left + "\"");
+    if (right_src.empty()) throw runtime_error("can't open file \"" + p.right + "\"");
+    cvtColor(left_src, left, COLOR_BGR2GRAY);
+    cvtColor(right_src, right, COLOR_BGR2GRAY);
+    d_left.upload(left);
+    d_right.upload(right);
+
+    imshow("left", left);
+    imshow("right", right);
+
+    // Set common parameters
+    bm = cuda::createStereoBM(p.ndisp);
+    bp = cuda::createStereoBeliefPropagation(p.ndisp);
+    csbp = cv::cuda::createStereoConstantSpaceBP(p.ndisp);
+
+    // Prepare disparity map of specified type
+    Mat disp(left.size(), CV_8U);
+    cuda::GpuMat d_disp(left.size(), CV_8U);
+
+    cout << endl;
+    printParams();
+
+    running = true;
+    while (running)
+    {
+        workBegin();
+        switch (p.method)
+        {
+        case Params::BM:
+            if (d_left.channels() > 1 || d_right.channels() > 1)
+            {
+                cout << "BM doesn't support color images\n";
+                cvtColor(left_src, left, COLOR_BGR2GRAY);
+                cvtColor(right_src, right, COLOR_BGR2GRAY);
+                cout << "image_channels: " << left.channels() << endl;
+                d_left.upload(left);
+                d_right.upload(right);
+                imshow("left", left);
+                imshow("right", right);
+            }
+            bm->compute(d_left, d_right, d_disp);
+            break;
+        case Params::BP: bp->compute(d_left, d_right, d_disp); break;
+        case Params::CSBP: csbp->compute(d_left, d_right, d_disp); break;
+        }
+        workEnd();
+
+        // Show results
+        d_disp.download(disp);
+        putText(disp, text(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1.0, Scalar::all(255));
+        imshow("disparity", (Mat_<uchar>)disp);
+
+        handleKey((char)waitKey(3));
+    }
+}
+
+
+void App::printParams() const
+{
+    cout << "--- Parameters ---\n";
+    cout << "image_size: (" << left.cols << ", " << left.rows << ")\n";
+    cout << "image_channels: " << left.channels() << endl;
+    cout << "method: " << p.method_str() << endl
+        << "ndisp: " << p.ndisp << endl;
+    switch (p.method)
+    {
+    case Params::BM:
+        cout << "win_size: " << bm->getBlockSize() << endl;
+        cout << "prefilter_sobel: " << bm->getPreFilterType() << endl;
+        break;
+    case Params::BP:
+        cout << "iter_count: " << bp->getNumIters() << endl;
+        cout << "level_count: " << bp->getNumLevels() << endl;
+        break;
+    case Params::CSBP:
+        cout << "iter_count: " << csbp->getNumIters() << endl;
+        cout << "level_count: " << csbp->getNumLevels() << endl;
+        break;
+    }
+    cout << endl;
+}
+
+
+void App::handleKey(char key)
+{
+    switch (key)
+    {
+    case 27:
+        running = false;
+        break;
+    case 'p': case 'P':
+        printParams();
+        break;
+    case 'g': case 'G':
+        if (left.channels() == 1 && p.method != Params::BM)
+        {
+            left = left_src;
+            right = right_src;
+        }
+        else
+        {
+            cvtColor(left_src, left, COLOR_BGR2GRAY);
+            cvtColor(right_src, right, COLOR_BGR2GRAY);
+        }
+        d_left.upload(left);
+        d_right.upload(right);
+        cout << "image_channels: " << left.channels() << endl;
+        imshow("left", left);
+        imshow("right", right);
+        break;
+    case 'm': case 'M':
+        switch (p.method)
+        {
+        case Params::BM:
+            p.method = Params::BP;
+            break;
+        case Params::BP:
+            p.method = Params::CSBP;
+            break;
+        case Params::CSBP:
+            p.method = Params::BM;
+            break;
+        }
+        cout << "method: " << p.method_str() << endl;
+        break;
+    case 's': case 'S':
+        if (p.method == Params::BM)
+        {
+            switch (bm->getPreFilterType())
+            {
+            case 0:
+                bm->setPreFilterType(cv::StereoBM::PREFILTER_XSOBEL);
+                break;
+            case cv::StereoBM::PREFILTER_XSOBEL:
+                bm->setPreFilterType(0);
+                break;
+            }
+            cout << "prefilter_sobel: " << bm->getPreFilterType() << endl;
+        }
+        break;
+    case '1':
+        p.ndisp = p.ndisp == 1 ? 8 : p.ndisp + 8;
+        cout << "ndisp: " << p.ndisp << endl;
+        bm->setNumDisparities(p.ndisp);
+        bp->setNumDisparities(p.ndisp);
+        csbp->setNumDisparities(p.ndisp);
+        break;
+    case 'q': case 'Q':
+        p.ndisp = max(p.ndisp - 8, 1);
+        cout << "ndisp: " << p.ndisp << endl;
+        bm->setNumDisparities(p.ndisp);
+        bp->setNumDisparities(p.ndisp);
+        csbp->setNumDisparities(p.ndisp);
+        break;
+    case '2':
+        if (p.method == Params::BM)
+        {
+            bm->setBlockSize(min(bm->getBlockSize() + 1, 51));
+            cout << "win_size: " << bm->getBlockSize() << endl;
+        }
+        break;
+    case 'w': case 'W':
+        if (p.method == Params::BM)
+        {
+            bm->setBlockSize(max(bm->getBlockSize() - 1, 2));
+            cout << "win_size: " << bm->getBlockSize() << endl;
+        }
+        break;
+    case '3':
+        if (p.method == Params::BP)
+        {
+            bp->setNumIters(bp->getNumIters() + 1);
+            cout << "iter_count: " << bp->getNumIters() << endl;
+        }
+        else if (p.method == Params::CSBP)
+        {
+            csbp->setNumIters(csbp->getNumIters() + 1);
+            cout << "iter_count: " << csbp->getNumIters() << endl;
+        }
+        break;
+    case 'e': case 'E':
+        if (p.method == Params::BP)
+        {
+            bp->setNumIters(max(bp->getNumIters() - 1, 1));
+            cout << "iter_count: " << bp->getNumIters() << endl;
+        }
+        else if (p.method == Params::CSBP)
+        {
+            csbp->setNumIters(max(csbp->getNumIters() - 1, 1));
+            cout << "iter_count: " << csbp->getNumIters() << endl;
+        }
+        break;
+    case '4':
+        if (p.method == Params::BP)
+        {
+            bp->setNumLevels(bp->getNumLevels() + 1);
+            cout << "level_count: " << bp->getNumLevels() << endl;
+        }
+        else if (p.method == Params::CSBP)
+        {
+            csbp->setNumLevels(csbp->getNumLevels() + 1);
+            cout << "level_count: " << csbp->getNumLevels() << endl;
+        }
+        break;
+    case 'r': case 'R':
+        if (p.method == Params::BP)
+        {
+            bp->setNumLevels(max(bp->getNumLevels() - 1, 1));
+            cout << "level_count: " << bp->getNumLevels() << endl;
+        }
+        else if (p.method == Params::CSBP)
+        {
+            csbp->setNumLevels(max(csbp->getNumLevels() - 1, 1));
+            cout << "level_count: " << csbp->getNumLevels() << endl;
+        }
+        break;
+    }
+}
--- a/3rdparty/opencv-4.5.4/samples/gpu/stereo_multi.cpp
+++ b/3rdparty/opencv-4.5.4/samples/gpu/stereo_multi.cpp
@ -0,0 +1,498 @@
+// This sample demonstrates working on one piece of data using two GPUs.
+// It splits input into two parts and processes them separately on different GPUs.
+
+#ifdef _WIN32
+    #define NOMINMAX
+    #include <windows.h>
+#else
+    #include <pthread.h>
+    #include <unistd.h>
+#endif
+
+#include <iostream>
+#include <iomanip>
+
+#include "opencv2/core.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/cudastereo.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace cv::cuda;
+
+///////////////////////////////////////////////////////////
+// Thread
+// OS-specific wrappers for multi-threading
+
+#ifdef _WIN32
+class Thread
+{
+    struct UserData
+    {
+        void (*func)(void* userData);
+        void* param;
+    };
+
+    static DWORD WINAPI WinThreadFunction(LPVOID lpParam)
+    {
+        UserData* userData = static_cast<UserData*>(lpParam);
+
+        userData->func(userData->param);
+
+        return 0;
+    }
+
+    UserData userData_;
+    HANDLE thread_;
+    DWORD threadId_;
+
+public:
+    Thread(void (*func)(void* userData), void* userData)
+    {
+        userData_.func = func;
+        userData_.param = userData;
+
+        thread_ = CreateThread(
+            NULL,                   // default security attributes
+            0,                      // use default stack size
+            WinThreadFunction,      // thread function name
+            &userData_,             // argument to thread function
+            0,                      // use default creation flags
+            &threadId_);            // returns the thread identifier
+    }
+
+    ~Thread()
+    {
+        CloseHandle(thread_);
+    }
+
+    void wait()
+    {
+        WaitForSingleObject(thread_, INFINITE);
+    }
+};
+#else
+class Thread
+{
+    struct UserData
+    {
+        void (*func)(void* userData);
+        void* param;
+    };
+
+    static void* PThreadFunction(void* lpParam)
+    {
+        UserData* userData = static_cast<UserData*>(lpParam);
+
+        userData->func(userData->param);
+
+        return 0;
+    }
+
+    pthread_t thread_;
+    UserData userData_;
+
+public:
+    Thread(void (*func)(void* userData), void* userData)
+    {
+        userData_.func = func;
+        userData_.param = userData;
+
+        pthread_create(&thread_, NULL, PThreadFunction, &userData_);
+    }
+
+    ~Thread()
+    {
+        pthread_detach(thread_);
+    }
+
+    void wait()
+    {
+        pthread_join(thread_, NULL);
+    }
+};
+#endif
+
+///////////////////////////////////////////////////////////
+// StereoSingleGpu
+// Run Stereo algorithm on single GPU
+
+class StereoSingleGpu
+{
+public:
+    explicit StereoSingleGpu(int deviceId = 0);
+    ~StereoSingleGpu();
+
+    void compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity);
+
+private:
+    int deviceId_;
+    GpuMat d_leftFrame;
+    GpuMat d_rightFrame;
+    GpuMat d_disparity;
+    Ptr<cuda::StereoBM> d_alg;
+};
+
+StereoSingleGpu::StereoSingleGpu(int deviceId) : deviceId_(deviceId)
+{
+    cuda::setDevice(deviceId_);
+    d_alg = cuda::createStereoBM(256);
+}
+
+StereoSingleGpu::~StereoSingleGpu()
+{
+    cuda::setDevice(deviceId_);
+    d_leftFrame.release();
+    d_rightFrame.release();
+    d_disparity.release();
+    d_alg.release();
+}
+
+void StereoSingleGpu::compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity)
+{
+    cuda::setDevice(deviceId_);
+    d_leftFrame.upload(leftFrame);
+    d_rightFrame.upload(rightFrame);
+    d_alg->compute(d_leftFrame, d_rightFrame, d_disparity);
+    d_disparity.download(disparity);
+}
+
+///////////////////////////////////////////////////////////
+// StereoMultiGpuThread
+// Run Stereo algorithm on two GPUs using different host threads
+
+class StereoMultiGpuThread
+{
+public:
+    StereoMultiGpuThread();
+    ~StereoMultiGpuThread();
+
+    void compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity);
+
+private:
+    GpuMat d_leftFrames[2];
+    GpuMat d_rightFrames[2];
+    GpuMat d_disparities[2];
+    Ptr<cuda::StereoBM> d_algs[2];
+
+    struct StereoLaunchData
+    {
+        int deviceId;
+        Mat leftFrame;
+        Mat rightFrame;
+        Mat disparity;
+        GpuMat* d_leftFrame;
+        GpuMat* d_rightFrame;
+        GpuMat* d_disparity;
+        Ptr<cuda::StereoBM> d_alg;
+    };
+
+    static void launchGpuStereoAlg(void* userData);
+};
+
+StereoMultiGpuThread::StereoMultiGpuThread()
+{
+    cuda::setDevice(0);
+    d_algs[0] = cuda::createStereoBM(256);
+
+    cuda::setDevice(1);
+    d_algs[1] = cuda::createStereoBM(256);
+}
+
+StereoMultiGpuThread::~StereoMultiGpuThread()
+{
+    cuda::setDevice(0);
+    d_leftFrames[0].release();
+    d_rightFrames[0].release();
+    d_disparities[0].release();
+    d_algs[0].release();
+
+    cuda::setDevice(1);
+    d_leftFrames[1].release();
+    d_rightFrames[1].release();
+    d_disparities[1].release();
+    d_algs[1].release();
+}
+
+void StereoMultiGpuThread::compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity)
+{
+    disparity.create(leftFrame.size(), CV_8UC1);
+
+    // Split input data onto two parts for each GPUs.
+    // We add small border for each part,
+    // because original algorithm doesn't calculate disparity on image borders.
+    // With such padding we will get output in the middle of final result.
+
+    StereoLaunchData launchDatas[2];
+
+    launchDatas[0].deviceId = 0;
+    launchDatas[0].leftFrame = leftFrame.rowRange(0, leftFrame.rows / 2 + 32);
+    launchDatas[0].rightFrame = rightFrame.rowRange(0, rightFrame.rows / 2 + 32);
+    launchDatas[0].disparity = disparity.rowRange(0, leftFrame.rows / 2);
+    launchDatas[0].d_leftFrame = &d_leftFrames[0];
+    launchDatas[0].d_rightFrame = &d_rightFrames[0];
+    launchDatas[0].d_disparity = &d_disparities[0];
+    launchDatas[0].d_alg = d_algs[0];
+
+    launchDatas[1].deviceId = 1;
+    launchDatas[1].leftFrame = leftFrame.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows);
+    launchDatas[1].rightFrame = rightFrame.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows);
+    launchDatas[1].disparity = disparity.rowRange(leftFrame.rows / 2, leftFrame.rows);
+    launchDatas[1].d_leftFrame = &d_leftFrames[1];
+    launchDatas[1].d_rightFrame = &d_rightFrames[1];
+    launchDatas[1].d_disparity = &d_disparities[1];
+    launchDatas[1].d_alg = d_algs[1];
+
+    Thread thread0(launchGpuStereoAlg, &launchDatas[0]);
+    Thread thread1(launchGpuStereoAlg, &launchDatas[1]);
+
+    thread0.wait();
+    thread1.wait();
+}
+
+void StereoMultiGpuThread::launchGpuStereoAlg(void* userData)
+{
+    StereoLaunchData* data = static_cast<StereoLaunchData*>(userData);
+
+    cuda::setDevice(data->deviceId);
+    data->d_leftFrame->upload(data->leftFrame);
+    data->d_rightFrame->upload(data->rightFrame);
+    data->d_alg->compute(*data->d_leftFrame, *data->d_rightFrame, *data->d_disparity);
+
+    if (data->deviceId == 0)
+        data->d_disparity->rowRange(0, data->d_disparity->rows - 32).download(data->disparity);
+    else
+        data->d_disparity->rowRange(32, data->d_disparity->rows).download(data->disparity);
+}
+
+///////////////////////////////////////////////////////////
+// StereoMultiGpuStream
+// Run Stereo algorithm on two GPUs from single host thread using async API
+
+class StereoMultiGpuStream
+{
+public:
+    StereoMultiGpuStream();
+    ~StereoMultiGpuStream();
+
+    void compute(const HostMem& leftFrame, const HostMem& rightFrame, HostMem& disparity);
+
+private:
+    GpuMat d_leftFrames[2];
+    GpuMat d_rightFrames[2];
+    GpuMat d_disparities[2];
+    Ptr<cuda::StereoBM> d_algs[2];
+    Ptr<Stream> streams[2];
+};
+
+StereoMultiGpuStream::StereoMultiGpuStream()
+{
+    cuda::setDevice(0);
+    d_algs[0] = cuda::createStereoBM(256);
+    streams[0] = makePtr<Stream>();
+
+    cuda::setDevice(1);
+    d_algs[1] = cuda::createStereoBM(256);
+    streams[1] = makePtr<Stream>();
+}
+
+StereoMultiGpuStream::~StereoMultiGpuStream()
+{
+    cuda::setDevice(0);
+    d_leftFrames[0].release();
+    d_rightFrames[0].release();
+    d_disparities[0].release();
+    d_algs[0].release();
+    streams[0].release();
+
+    cuda::setDevice(1);
+    d_leftFrames[1].release();
+    d_rightFrames[1].release();
+    d_disparities[1].release();
+    d_algs[1].release();
+    streams[1].release();
+}
+
+void StereoMultiGpuStream::compute(const HostMem& leftFrame, const HostMem& rightFrame, HostMem& disparity)
+{
+    disparity.create(leftFrame.size(), CV_8UC1);
+
+    // Split input data onto two parts for each GPUs.
+    // We add small border for each part,
+    // because original algorithm doesn't calculate disparity on image borders.
+    // With such padding we will get output in the middle of final result.
+
+    Mat leftFrameHdr = leftFrame.createMatHeader();
+    Mat rightFrameHdr = rightFrame.createMatHeader();
+    Mat disparityHdr = disparity.createMatHeader();
+    Mat disparityPart0 = disparityHdr.rowRange(0, leftFrame.rows / 2);
+    Mat disparityPart1 = disparityHdr.rowRange(leftFrame.rows / 2, leftFrame.rows);
+
+    cuda::setDevice(0);
+    d_leftFrames[0].upload(leftFrameHdr.rowRange(0, leftFrame.rows / 2 + 32), *streams[0]);
+    d_rightFrames[0].upload(rightFrameHdr.rowRange(0, leftFrame.rows / 2 + 32), *streams[0]);
+    d_algs[0]->compute(d_leftFrames[0], d_rightFrames[0], d_disparities[0], *streams[0]);
+    d_disparities[0].rowRange(0, leftFrame.rows / 2).download(disparityPart0, *streams[0]);
+
+    cuda::setDevice(1);
+    d_leftFrames[1].upload(leftFrameHdr.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows), *streams[1]);
+    d_rightFrames[1].upload(rightFrameHdr.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows), *streams[1]);
+    d_algs[1]->compute(d_leftFrames[1], d_rightFrames[1], d_disparities[1], *streams[1]);
+    d_disparities[1].rowRange(32, d_disparities[1].rows).download(disparityPart1, *streams[1]);
+
+    cuda::setDevice(0);
+    streams[0]->waitForCompletion();
+
+    cuda::setDevice(1);
+    streams[1]->waitForCompletion();
+}
+
+///////////////////////////////////////////////////////////
+// main
+
+int main(int argc, char** argv)
+{
+    if (argc != 3)
+    {
+        cerr << "Usage: stereo_multi <left_video> <right_video>" << endl;
+        return -1;
+    }
+
+    const int numDevices = getCudaEnabledDeviceCount();
+    if (numDevices != 2)
+    {
+        cerr << "Two GPUs are required" << endl;
+        return -1;
+    }
+
+    for (int i = 0; i < numDevices; ++i)
+    {
+        DeviceInfo devInfo(i);
+        if (!devInfo.isCompatible())
+        {
+            cerr << "CUDA module wasn't built for GPU #" << i << " ("
+                 << devInfo.name() << ", CC " << devInfo.majorVersion()
+                 << devInfo.minorVersion() << endl;
+            return -1;
+        }
+
+        printShortCudaDeviceInfo(i);
+    }
+
+    VideoCapture leftVideo(argv[1]);
+    VideoCapture rightVideo(argv[2]);
+
+    if (!leftVideo.isOpened())
+    {
+         cerr << "Can't open " << argv[1] << " video file" << endl;
+         return -1;
+    }
+
+    if (!rightVideo.isOpened())
+    {
+         cerr << "Can't open " << argv[2] << " video file" << endl;
+         return -1;
+    }
+
+    cout << endl;
+    cout << "This sample demonstrates working on one piece of data using two GPUs." << endl;
+    cout << "It splits input into two parts and processes them separately on different GPUs." << endl;
+    cout << endl;
+
+    Mat leftFrame, rightFrame;
+    HostMem leftGrayFrame, rightGrayFrame;
+
+    StereoSingleGpu gpu0Alg(0);
+    StereoSingleGpu gpu1Alg(1);
+    StereoMultiGpuThread multiThreadAlg;
+    StereoMultiGpuStream multiStreamAlg;
+
+    Mat disparityGpu0;
+    Mat disparityGpu1;
+    Mat disparityMultiThread;
+    HostMem disparityMultiStream;
+
+    Mat disparityGpu0Show;
+    Mat disparityGpu1Show;
+    Mat disparityMultiThreadShow;
+    Mat disparityMultiStreamShow;
+
+    TickMeter tm;
+
+    cout << "-------------------------------------------------------------------" << endl;
+    cout << "| Frame | GPU 0 ms | GPU 1 ms | Multi Thread ms | Multi Stream ms |" << endl;
+    cout << "-------------------------------------------------------------------" << endl;
+
+    for (int i = 0;; ++i)
+    {
+        leftVideo >> leftFrame;
+        rightVideo >> rightFrame;
+
+        if (leftFrame.empty() || rightFrame.empty())
+            break;
+
+        if (leftFrame.size() != rightFrame.size())
+        {
+            cerr << "Frames have different sizes" << endl;
+            return -1;
+        }
+
+        leftGrayFrame.create(leftFrame.size(), CV_8UC1);
+        rightGrayFrame.create(leftFrame.size(), CV_8UC1);
+
+        cvtColor(leftFrame, leftGrayFrame.createMatHeader(), COLOR_BGR2GRAY);
+        cvtColor(rightFrame, rightGrayFrame.createMatHeader(), COLOR_BGR2GRAY);
+
+        tm.reset(); tm.start();
+        gpu0Alg.compute(leftGrayFrame.createMatHeader(), rightGrayFrame.createMatHeader(),
+                        disparityGpu0);
+        tm.stop();
+
+        const double gpu0Time = tm.getTimeMilli();
+
+        tm.reset(); tm.start();
+        gpu1Alg.compute(leftGrayFrame.createMatHeader(), rightGrayFrame.createMatHeader(),
+                        disparityGpu1);
+        tm.stop();
+
+        const double gpu1Time = tm.getTimeMilli();
+
+        tm.reset(); tm.start();
+        multiThreadAlg.compute(leftGrayFrame.createMatHeader(), rightGrayFrame.createMatHeader(),
+                               disparityMultiThread);
+        tm.stop();
+
+        const double multiThreadTime = tm.getTimeMilli();
+
+        tm.reset(); tm.start();
+        multiStreamAlg.compute(leftGrayFrame, rightGrayFrame, disparityMultiStream);
+        tm.stop();
+
+        const double multiStreamTime = tm.getTimeMilli();
+
+        cout << "| " << setw(5) << i << " | "
+             << setw(8) << setprecision(1) << fixed << gpu0Time << " | "
+             << setw(8) << setprecision(1) << fixed << gpu1Time << " | "
+             << setw(15) << setprecision(1) << fixed << multiThreadTime << " | "
+             << setw(15) << setprecision(1) << fixed << multiStreamTime << " |" << endl;
+
+        resize(disparityGpu0, disparityGpu0Show, Size(1024, 768), 0, 0, INTER_AREA);
+        resize(disparityGpu1, disparityGpu1Show, Size(1024, 768), 0, 0, INTER_AREA);
+        resize(disparityMultiThread, disparityMultiThreadShow, Size(1024, 768), 0, 0, INTER_AREA);
+        resize(disparityMultiStream.createMatHeader(), disparityMultiStreamShow, Size(1024, 768), 0, 0, INTER_AREA);
+
+        imshow("disparityGpu0", disparityGpu0Show);
+        imshow("disparityGpu1", disparityGpu1Show);
+        imshow("disparityMultiThread", disparityMultiThreadShow);
+        imshow("disparityMultiStream", disparityMultiStreamShow);
+
+        const int key = waitKey(30) & 0xff;
+        if (key == 27)
+            break;
+    }
+
+    cout << "-------------------------------------------------------------------" << endl;
+
+    return 0;
+}
--- a/3rdparty/opencv-4.5.4/samples/gpu/super_resolution.cpp
+++ b/3rdparty/opencv-4.5.4/samples/gpu/super_resolution.cpp
@ -0,0 +1,161 @@
+#include <iostream>
+#include <iomanip>
+#include <string>
+#include <ctype.h>
+
+#include "opencv2/core.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/superres.hpp"
+#include "opencv2/superres/optical_flow.hpp"
+#include "opencv2/opencv_modules.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace cv::superres;
+
+#define MEASURE_TIME(op) \
+    { \
+        TickMeter tm; \
+        tm.start(); \
+        op; \
+        tm.stop(); \
+        cout << tm.getTimeSec() << " sec" << endl; \
+    }
+
+static Ptr<cv::superres::DenseOpticalFlowExt> createOptFlow(const string& name, bool useGpu)
+{
+    if (name == "farneback")
+    {
+        if (useGpu)
+            return cv::superres::createOptFlow_Farneback_CUDA();
+        else
+            return cv::superres::createOptFlow_Farneback();
+    }
+    /*else if (name == "simple")
+        return createOptFlow_Simple();*/
+    else if (name == "tvl1")
+    {
+        if (useGpu)
+            return cv::superres::createOptFlow_DualTVL1_CUDA();
+        else
+            return cv::superres::createOptFlow_DualTVL1();
+    }
+    else if (name == "brox")
+        return cv::superres::createOptFlow_Brox_CUDA();
+    else if (name == "pyrlk")
+        return cv::superres::createOptFlow_PyrLK_CUDA();
+    else
+        cerr << "Incorrect Optical Flow algorithm - " << name << endl;
+
+    return Ptr<cv::superres::DenseOpticalFlowExt>();
+}
+
+int main(int argc, const char* argv[])
+{
+    CommandLineParser cmd(argc, argv,
+        "{ v video      |           | Input video (mandatory)}"
+        "{ o output     |           | Output video }"
+        "{ s scale      | 4         | Scale factor }"
+        "{ i iterations | 180       | Iteration count }"
+        "{ t temporal   | 4         | Radius of the temporal search area }"
+        "{ f flow       | farneback | Optical flow algorithm (farneback, tvl1, brox, pyrlk) }"
+        "{ g gpu        | false     | CPU as default device, cuda for CUDA }"
+        "{ h help       | false     | Print help message }"
+    );
+
+    const string inputVideoName = cmd.get<string>("video");
+    if (cmd.get<bool>("help") || inputVideoName.empty())
+    {
+        cout << "This sample demonstrates Super Resolution algorithms for video sequence" << endl;
+        cmd.printMessage();
+        return EXIT_SUCCESS;
+    }
+
+    const string outputVideoName = cmd.get<string>("output");
+    const int scale = cmd.get<int>("scale");
+    const int iterations = cmd.get<int>("iterations");
+    const int temporalAreaRadius = cmd.get<int>("temporal");
+    const string optFlow = cmd.get<string>("flow");
+    string gpuOption = cmd.get<string>("gpu");
+
+    std::transform(gpuOption.begin(), gpuOption.end(), gpuOption.begin(), ::tolower);
+
+    bool useCuda = gpuOption.compare("cuda") == 0;
+    Ptr<SuperResolution> superRes;
+
+    if (useCuda)
+        superRes = createSuperResolution_BTVL1_CUDA();
+    else
+        superRes = createSuperResolution_BTVL1();
+
+    Ptr<cv::superres::DenseOpticalFlowExt> of = createOptFlow(optFlow, useCuda);
+
+    if (of.empty())
+        return EXIT_FAILURE;
+    superRes->setOpticalFlow(of);
+
+    superRes->setScale(scale);
+    superRes->setIterations(iterations);
+    superRes->setTemporalAreaRadius(temporalAreaRadius);
+
+    Ptr<FrameSource> frameSource;
+    if (useCuda)
+    {
+        // Try to use gpu Video Decoding
+        try
+        {
+            frameSource = createFrameSource_Video_CUDA(inputVideoName);
+            Mat frame;
+            frameSource->nextFrame(frame);
+        }
+        catch (const cv::Exception&)
+        {
+            frameSource.release();
+        }
+    }
+    if (!frameSource)
+        frameSource = createFrameSource_Video(inputVideoName);
+
+    // skip first frame, it is usually corrupted
+    {
+        Mat frame;
+        frameSource->nextFrame(frame);
+        cout << "Input           : " << inputVideoName << " " << frame.size() << endl;
+        cout << "Scale factor    : " << scale << endl;
+        cout << "Iterations      : " << iterations << endl;
+        cout << "Temporal radius : " << temporalAreaRadius << endl;
+        cout << "Optical Flow    : " << optFlow << endl;
+        cout << "Mode            : " << (useCuda ? "CUDA" : "CPU") << endl;
+    }
+
+    superRes->setInput(frameSource);
+
+    VideoWriter writer;
+
+    for (int i = 0;; ++i)
+    {
+        cout << '[' << setw(3) << i << "] : " << flush;
+        Mat result;
+
+        MEASURE_TIME(superRes->nextFrame(result));
+
+        if (result.empty())
+            break;
+
+        imshow("Super Resolution", result);
+
+        if (waitKey(1000) > 0)
+            break;
+
+        if (!outputVideoName.empty())
+        {
+            if (!writer.isOpened())
+                writer.open(outputVideoName, VideoWriter::fourcc('X', 'V', 'I', 'D'), 25.0, result.size());
+            writer << result;
+        }
+    }
+
+    return 0;
+}
--- a/3rdparty/opencv-4.5.4/samples/gpu/surf_keypoint_matcher.cpp
+++ b/3rdparty/opencv-4.5.4/samples/gpu/surf_keypoint_matcher.cpp
@ -0,0 +1,96 @@
+#include <iostream>
+
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_XFEATURES2D
+
+#include "opencv2/core.hpp"
+#include "opencv2/features2d.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/cudafeatures2d.hpp"
+#include "opencv2/xfeatures2d/cuda.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace cv::cuda;
+
+static void help()
+{
+    cout << "\nThis program demonstrates using SURF_CUDA features detector, descriptor extractor and BruteForceMatcher_CUDA" << endl;
+    cout << "\nUsage:\n\tsurf_keypoint_matcher --left <image1> --right <image2>" << endl;
+}
+
+int main(int argc, char* argv[])
+{
+    if (argc != 5)
+    {
+        help();
+        return -1;
+    }
+
+    GpuMat img1, img2;
+    for (int i = 1; i < argc; ++i)
+    {
+        if (string(argv[i]) == "--left")
+        {
+            img1.upload(imread(argv[++i], IMREAD_GRAYSCALE));
+            CV_Assert(!img1.empty());
+        }
+        else if (string(argv[i]) == "--right")
+        {
+            img2.upload(imread(argv[++i], IMREAD_GRAYSCALE));
+            CV_Assert(!img2.empty());
+        }
+        else if (string(argv[i]) == "--help")
+        {
+            help();
+            return -1;
+        }
+    }
+
+    cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice());
+
+    SURF_CUDA surf;
+
+    // detecting keypoints & computing descriptors
+    GpuMat keypoints1GPU, keypoints2GPU;
+    GpuMat descriptors1GPU, descriptors2GPU;
+    surf(img1, GpuMat(), keypoints1GPU, descriptors1GPU);
+    surf(img2, GpuMat(), keypoints2GPU, descriptors2GPU);
+
+    cout << "FOUND " << keypoints1GPU.cols << " keypoints on first image" << endl;
+    cout << "FOUND " << keypoints2GPU.cols << " keypoints on second image" << endl;
+
+    // matching descriptors
+    Ptr<cv::cuda::DescriptorMatcher> matcher = cv::cuda::DescriptorMatcher::createBFMatcher(surf.defaultNorm());
+    vector<DMatch> matches;
+    matcher->match(descriptors1GPU, descriptors2GPU, matches);
+
+    // downloading results
+    vector<KeyPoint> keypoints1, keypoints2;
+    vector<float> descriptors1, descriptors2;
+    surf.downloadKeypoints(keypoints1GPU, keypoints1);
+    surf.downloadKeypoints(keypoints2GPU, keypoints2);
+    surf.downloadDescriptors(descriptors1GPU, descriptors1);
+    surf.downloadDescriptors(descriptors2GPU, descriptors2);
+
+    // drawing the results
+    Mat img_matches;
+    drawMatches(Mat(img1), keypoints1, Mat(img2), keypoints2, matches, img_matches);
+
+    namedWindow("matches", 0);
+    imshow("matches", img_matches);
+    waitKey(0);
+
+    return 0;
+}
+
+#else
+
+int main()
+{
+    std::cerr << "OpenCV was built without xfeatures2d module" << std::endl;
+    return 0;
+}
+
+#endif
--- a/3rdparty/opencv-4.5.4/samples/gpu/video_reader.cpp
+++ b/3rdparty/opencv-4.5.4/samples/gpu/video_reader.cpp
@ -0,0 +1,95 @@
+#include <iostream>
+
+#include "opencv2/opencv_modules.hpp"
+
+#if defined(HAVE_OPENCV_CUDACODEC)
+
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <numeric>
+
+#include <opencv2/core.hpp>
+#include <opencv2/core/opengl.hpp>
+#include <opencv2/cudacodec.hpp>
+#include <opencv2/highgui.hpp>
+
+int main(int argc, const char* argv[])
+{
+    if (argc != 2)
+        return -1;
+
+    const std::string fname(argv[1]);
+
+    cv::namedWindow("CPU", cv::WINDOW_NORMAL);
+    cv::namedWindow("GPU", cv::WINDOW_OPENGL);
+    cv::cuda::setGlDevice();
+
+    cv::Mat frame;
+    cv::VideoCapture reader(fname);
+
+    cv::cuda::GpuMat d_frame;
+    cv::Ptr<cv::cudacodec::VideoReader> d_reader = cv::cudacodec::createVideoReader(fname);
+
+    cv::TickMeter tm;
+    std::vector<double> cpu_times;
+    std::vector<double> gpu_times;
+
+    int gpu_frame_count=0, cpu_frame_count=0;
+
+    for (;;)
+    {
+        tm.reset(); tm.start();
+        if (!reader.read(frame))
+            break;
+        tm.stop();
+        cpu_times.push_back(tm.getTimeMilli());
+        cpu_frame_count++;
+
+        cv::imshow("CPU", frame);
+
+        if (cv::waitKey(3) > 0)
+            break;
+    }
+
+    for (;;)
+    {
+        tm.reset(); tm.start();
+        if (!d_reader->nextFrame(d_frame))
+            break;
+        tm.stop();
+        gpu_times.push_back(tm.getTimeMilli());
+        gpu_frame_count++;
+
+        cv::imshow("GPU", d_frame);
+
+        if (cv::waitKey(3) > 0)
+            break;
+    }
+
+    if (!cpu_times.empty() && !gpu_times.empty())
+    {
+        std::cout << std::endl << "Results:" << std::endl;
+
+        std::sort(cpu_times.begin(), cpu_times.end());
+        std::sort(gpu_times.begin(), gpu_times.end());
+
+        double cpu_avg = std::accumulate(cpu_times.begin(), cpu_times.end(), 0.0) / cpu_times.size();
+        double gpu_avg = std::accumulate(gpu_times.begin(), gpu_times.end(), 0.0) / gpu_times.size();
+
+        std::cout << "CPU : Avg : " << cpu_avg << " ms FPS : " << 1000.0 / cpu_avg << " Frames " << cpu_frame_count << std::endl;
+        std::cout << "GPU : Avg : " << gpu_avg << " ms FPS : " << 1000.0 / gpu_avg << " Frames " << gpu_frame_count << std::endl;
+    }
+
+    return 0;
+}
+
+#else
+
+int main()
+{
+    std::cout << "OpenCV was built without CUDA Video decoding support\n" << std::endl;
+    return 0;
+}
+
+#endif
--- a/3rdparty/opencv-4.5.4/samples/gpu/video_writer.cpp
+++ b/3rdparty/opencv-4.5.4/samples/gpu/video_writer.cpp
@ -0,0 +1,112 @@
+#include <iostream>
+
+#include "opencv2/opencv_modules.hpp"
+
+#if defined(HAVE_OPENCV_CUDACODEC) && defined(_WIN32)
+
+#include <vector>
+#include <numeric>
+
+#include "opencv2/core.hpp"
+#include "opencv2/cudacodec.hpp"
+#include "opencv2/highgui.hpp"
+
+using namespace cv;
+int main(int argc, const char* argv[])
+{
+    if (argc != 2)
+    {
+        std::cerr << "Usage : video_writer <input video file>" << std::endl;
+        return -1;
+    }
+
+    const double FPS = 25.0;
+
+    cv::VideoCapture reader(argv[1]);
+
+    if (!reader.isOpened())
+    {
+        std::cerr << "Can't open input video file" << std::endl;
+        return -1;
+    }
+
+    cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice());
+
+    cv::VideoWriter writer;
+    cv::Ptr<cv::cudacodec::VideoWriter> d_writer;
+
+    cv::Mat frame;
+    cv::cuda::GpuMat d_frame;
+
+    std::vector<double> cpu_times;
+    std::vector<double> gpu_times;
+    TickMeter tm;
+
+    for (int i = 1;; ++i)
+    {
+        std::cout << "Read " << i << " frame" << std::endl;
+
+        reader >> frame;
+
+        if (frame.empty())
+        {
+            std::cout << "Stop" << std::endl;
+            break;
+        }
+
+        if (!writer.isOpened())
+        {
+            std::cout << "Frame Size : " << frame.cols << "x" << frame.rows << std::endl;
+
+            std::cout << "Open CPU Writer" << std::endl;
+
+            if (!writer.open("output_cpu.avi", cv::VideoWriter::fourcc('X', 'V', 'I', 'D'), FPS, frame.size()))
+                return -1;
+        }
+
+        if (d_writer.empty())
+        {
+            std::cout << "Open CUDA Writer" << std::endl;
+
+            const cv::String outputFilename = "output_gpu.avi";
+            d_writer = cv::cudacodec::createVideoWriter(outputFilename, frame.size(), FPS);
+        }
+
+        d_frame.upload(frame);
+
+        std::cout << "Write " << i << " frame" << std::endl;
+
+        tm.reset(); tm.start();
+        writer.write(frame);
+        tm.stop();
+        cpu_times.push_back(tm.getTimeMilli());
+
+        tm.reset(); tm.start();
+        d_writer->write(d_frame);
+        tm.stop();
+        gpu_times.push_back(tm.getTimeMilli());
+    }
+
+    std::cout << std::endl << "Results:" << std::endl;
+
+    std::sort(cpu_times.begin(), cpu_times.end());
+    std::sort(gpu_times.begin(), gpu_times.end());
+
+    double cpu_avg = std::accumulate(cpu_times.begin(), cpu_times.end(), 0.0) / cpu_times.size();
+    double gpu_avg = std::accumulate(gpu_times.begin(), gpu_times.end(), 0.0) / gpu_times.size();
+
+    std::cout << "CPU [XVID] : Avg : " << cpu_avg << " ms FPS : " << 1000.0 / cpu_avg << std::endl;
+    std::cout << "GPU [H264] : Avg : " << gpu_avg << " ms FPS : " << 1000.0 / gpu_avg << std::endl;
+
+    return 0;
+}
+
+#else
+
+int main()
+{
+    std::cout << "OpenCV was built without CUDA Video encoding support\n" << std::endl;
+    return 0;
+}
+
+#endif