2021-06-11 14:39:51 +08:00
|
|
|
|
/*
|
|
|
|
|
|
|
|
|
|
* Copyright (C) 2019 ~ 2019 Deepin Technology Co., Ltd.
|
|
|
|
|
|
|
|
|
|
*
|
|
|
|
|
|
|
|
|
|
* Author: wangcong <wangcong@uniontech.com>
|
|
|
|
|
|
|
|
|
|
*
|
|
|
|
|
|
|
|
|
|
* Maintainer: wangcong <wangcong@uniontech.com>
|
|
|
|
|
|
|
|
|
|
*
|
|
|
|
|
|
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
|
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
|
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
|
|
|
|
|
|
* any later version.
|
|
|
|
|
|
|
|
|
|
*
|
|
|
|
|
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
|
|
|
|
|
*
|
|
|
|
|
|
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
|
|
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
#ifndef TESSOCRUTILS_H
|
|
|
|
|
#define TESSOCRUTILS_H
|
|
|
|
|
//本项目的文件
|
|
|
|
|
|
|
|
|
|
//dtk的类
|
|
|
|
|
|
|
|
|
|
//qt的类
|
|
|
|
|
#include <QDebug>
|
|
|
|
|
#include <QException>
|
|
|
|
|
#include <QFileInfo>
|
|
|
|
|
#include <QImage>
|
|
|
|
|
//其他库
|
|
|
|
|
#include <tesseract/baseapi.h>
|
|
|
|
|
#include <leptonica/allheaders.h>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief 返回结果字符串的类型
|
|
|
|
|
*/
|
|
|
|
|
enum ResultType{
|
2021-06-15 13:22:21 +08:00
|
|
|
|
RESULT_STRING = 1, //纯字符串结果
|
|
|
|
|
RESULT_HTML = 2, //HTML文本
|
|
|
|
|
UNKNOWN_TYPE = -1 //XML文本
|
2021-06-11 14:39:51 +08:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief 返回识别过程中的错误码
|
|
|
|
|
*/
|
|
|
|
|
enum ErrorCode{
|
2021-06-15 13:22:21 +08:00
|
|
|
|
OK = 1, //成功
|
|
|
|
|
UNKNOWN = -1, //未知错误
|
|
|
|
|
OCR_P_NULL = 101, //文件路径为空
|
|
|
|
|
OCR_RT_NULL = 102, //结果字符串类型不存在
|
|
|
|
|
OCR_INI_F = 103, //ocr三方库初始化失败
|
|
|
|
|
OCR_LI_F = 104, //OCR加载图片失败
|
|
|
|
|
OCR_RI_F = 105 //OCR识别图片失败
|
2021-06-11 14:39:51 +08:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief 现今支持的语言包类型
|
|
|
|
|
*/
|
|
|
|
|
enum Languages{
|
2021-06-15 13:22:21 +08:00
|
|
|
|
UNKNOWN_LAN = -1, //未知语言
|
2021-06-11 14:39:51 +08:00
|
|
|
|
CHI_SIM=1, //简体中文
|
|
|
|
|
CHI_TRA=2, //繁体中文
|
|
|
|
|
ENG=3 //英文
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief ocr识别的返回结果
|
|
|
|
|
*/
|
|
|
|
|
struct RecognitionResult{
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief 识别是否成功
|
|
|
|
|
*/
|
|
|
|
|
bool flag;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief 识别返回的消息,如果识别失败发生的错误通过此字段返回
|
|
|
|
|
*/
|
|
|
|
|
QString message;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief 返回的错误码
|
|
|
|
|
*/
|
|
|
|
|
ErrorCode errorCode;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief 返回字符串结果的类型
|
|
|
|
|
*/
|
|
|
|
|
ResultType resultType;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief 返回的字符串结果
|
|
|
|
|
*/
|
|
|
|
|
QString result;
|
|
|
|
|
|
|
|
|
|
RecognitionResult(){
|
|
|
|
|
flag = false;
|
|
|
|
|
message.clear();
|
2021-06-15 13:22:21 +08:00
|
|
|
|
errorCode = ErrorCode::UNKNOWN;
|
|
|
|
|
resultType = ResultType::RESULT_STRING;
|
2021-06-11 14:39:51 +08:00
|
|
|
|
result.clear();
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief ocr接口工具
|
2021-06-15 13:22:21 +08:00
|
|
|
|
* 使用方法: *
|
|
|
|
|
* 1.获取识别结果 TessOcrUtils::instance()->getRecogitionResult(t_image);
|
2021-06-11 14:39:51 +08:00
|
|
|
|
*/
|
|
|
|
|
class TessOcrUtils
|
|
|
|
|
{
|
|
|
|
|
public:
|
|
|
|
|
TessOcrUtils();
|
2021-06-15 13:22:21 +08:00
|
|
|
|
~TessOcrUtils();
|
2021-06-11 14:39:51 +08:00
|
|
|
|
static TessOcrUtils *instance();
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief 传入待识别图片的路径和想得到的返回结果类型,获取识别结果
|
|
|
|
|
* @param 图片路径
|
|
|
|
|
* @param 返回的字符串结果类型
|
|
|
|
|
* @return resultType类型的字符串结果
|
|
|
|
|
*/
|
2021-06-22 20:13:39 +08:00
|
|
|
|
RecognitionResult getRecogitionResult(const QString &imagePath,const ResultType &resultType);
|
2021-06-11 14:39:51 +08:00
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief 传入待识别图片的路径,获取纯字符串的识别结果
|
|
|
|
|
* @param 图片路径
|
|
|
|
|
* @return 识别的字符串结果
|
|
|
|
|
*/
|
2021-06-22 20:13:39 +08:00
|
|
|
|
RecognitionResult getRecogitionResult(const QString &imagePath);
|
2021-06-11 14:39:51 +08:00
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief 传入待识别图片和想得到的返回结果类型,获取识别结果
|
|
|
|
|
* @param 图片路径
|
|
|
|
|
* @param 返回的字符串结果类型
|
|
|
|
|
* @return resultType类型的字符串结果
|
|
|
|
|
*/
|
2021-06-22 20:13:39 +08:00
|
|
|
|
RecognitionResult getRecogitionResult(QImage *image,const ResultType &resultType);
|
2021-06-11 14:39:51 +08:00
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief 传入待识别图片,获取纯字符串的识别结果
|
|
|
|
|
* @param 图片路径
|
|
|
|
|
* @return 识别的字符串结果
|
|
|
|
|
*/
|
|
|
|
|
RecognitionResult getRecogitionResult(QImage *image);
|
|
|
|
|
|
2021-06-28 11:46:54 +08:00
|
|
|
|
/**
|
|
|
|
|
* @brief 是否正在识别中
|
|
|
|
|
* @return true:正在识别中 false:识别结束
|
|
|
|
|
*/
|
|
|
|
|
bool isRunning();
|
|
|
|
|
|
2021-06-15 13:22:21 +08:00
|
|
|
|
private :
|
2021-06-11 14:39:51 +08:00
|
|
|
|
|
2021-06-15 13:22:21 +08:00
|
|
|
|
/**
|
|
|
|
|
* @brief 设置当前使用的语言包
|
|
|
|
|
* @param 需要使用的语言包
|
|
|
|
|
* @return 是否成功
|
|
|
|
|
*/
|
|
|
|
|
//bool setLanguages(const QList<Languages> langs);
|
2021-06-11 14:39:51 +08:00
|
|
|
|
|
2021-06-15 13:22:21 +08:00
|
|
|
|
/**
|
|
|
|
|
* @brief 设置当前使用的语言包路径
|
|
|
|
|
* @param 语言包路径
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
bool setLanguagesPath(const QString langsPath);
|
2021-06-11 14:39:51 +08:00
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief 判断指定的结果类型是否存在
|
|
|
|
|
* @param 指定的类型
|
|
|
|
|
* @return 存在返回true
|
|
|
|
|
*/
|
|
|
|
|
bool isExistsResultType(ResultType resultType);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief 判断指定的语言包类型是否存在
|
|
|
|
|
* @param 指定的类型
|
|
|
|
|
* @return 存在返回true
|
|
|
|
|
*/
|
|
|
|
|
bool isExistsLanguage(Languages lang);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief 根据传入的枚举,获取相应的语言包字符串
|
|
|
|
|
* @param 语言包的枚举
|
|
|
|
|
* @return 语言包的字符串
|
|
|
|
|
*/
|
|
|
|
|
QString getLangStr(Languages lang);
|
|
|
|
|
|
2021-06-15 13:22:21 +08:00
|
|
|
|
/**
|
|
|
|
|
* @brief 获取系统当前的语言
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
Languages getSystemLang();
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief 获取当前使用的语言包
|
|
|
|
|
* @param 需要使用的语言包
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
QString getLanguages();
|
|
|
|
|
|
2021-06-11 14:39:51 +08:00
|
|
|
|
/**
|
|
|
|
|
* @brief 获取纯字符串的识别结果
|
|
|
|
|
* @param 需识别的图片
|
|
|
|
|
* @param 返回的字符串结果类型
|
|
|
|
|
* @return 返回识别结果
|
|
|
|
|
*/
|
2021-06-28 21:17:18 +08:00
|
|
|
|
RecognitionResult getRecognizeResult(Pix * image,ResultType resultType);
|
2021-06-11 14:39:51 +08:00
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief 设置返回结果,内部使用
|
|
|
|
|
* @param 错误码
|
|
|
|
|
* @param 错误消息
|
|
|
|
|
* @param 结果类型
|
|
|
|
|
*/
|
|
|
|
|
void setResult(ErrorCode errCode, const QString errMessage,const ResultType resultType,RecognitionResult &result);
|
|
|
|
|
|
|
|
|
|
/**
|
2021-06-15 13:22:21 +08:00
|
|
|
|
* @brief 语言包路径
|
2021-06-11 14:39:51 +08:00
|
|
|
|
*/
|
|
|
|
|
QString m_sTessdataPath;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief 语言包
|
|
|
|
|
*/
|
|
|
|
|
QString m_sLangs;
|
|
|
|
|
|
2021-06-28 11:46:54 +08:00
|
|
|
|
/**
|
|
|
|
|
* @brief 是否正在识别中
|
|
|
|
|
*/
|
|
|
|
|
bool m_isRunning;
|
|
|
|
|
|
2021-06-11 14:39:51 +08:00
|
|
|
|
static TessOcrUtils *m_tessOcrUtils;
|
|
|
|
|
|
2021-06-28 11:46:54 +08:00
|
|
|
|
static tesseract::TessBaseAPI *t_Tesseract;
|
2021-06-15 15:21:36 +08:00
|
|
|
|
|
2021-06-11 14:39:51 +08:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
#endif // TESSOCRUTILS_H
|