diff --git a/assets/tesslangs/chi_sim_vert.traineddata b/assets/tesslangs/chi_sim_vert.traineddata new file mode 100644 index 0000000..2284498 Binary files /dev/null and b/assets/tesslangs/chi_sim_vert.traineddata differ diff --git a/assets/tesslangs/chi_tra_vert.traineddata b/assets/tesslangs/chi_tra_vert.traineddata new file mode 100644 index 0000000..930343b Binary files /dev/null and b/assets/tesslangs/chi_tra_vert.traineddata differ diff --git a/deepin-ocr.pro b/deepin-ocr.pro index 1248f1a..b6eb79f 100644 --- a/deepin-ocr.pro +++ b/deepin-ocr.pro @@ -62,7 +62,7 @@ dbus_service.files=./com.deepin.Ocr.service #Tesseract-ocr识别语言包 tesslangs.path=/usr/share/deepin-ocr/tesslangs -tesslangs.files=./assets/tesslangs/chi_sim.traineddata ./assets/tesslangs/chi_tra.traineddata ./assets/tesslangs/eng.traineddata +tesslangs.files=./assets/tesslangs/chi_sim.traineddata ./assets/tesslangs/chi_tra.traineddata ./assets/tesslangs/eng.traineddata ./assets/tesslangs/chi_sim_vert.traineddata ./assets/tesslangs/chi_tra_vert.traineddata INSTALLS += target dbus_service translations tesslangs diff --git a/src/tessocrutils/tessocrutils.cpp b/src/tessocrutils/tessocrutils.cpp index d0368c5..44b9a2f 100644 --- a/src/tessocrutils/tessocrutils.cpp +++ b/src/tessocrutils/tessocrutils.cpp @@ -39,12 +39,29 @@ */ #include "tessocrutils.h" +/** + * @brief 语言包路径 + */ +const QString TESSDATA_PATH = "/usr/share/deepin-ocr/tesslangs"; + +/** + * @brief TessOcrUtils::m_tessOcrUtils + */ +TessOcrUtils *TessOcrUtils::m_tessOcrUtils = nullptr; + TessOcrUtils::TessOcrUtils() { + //设置语言包路径 + setLanguagesPath(TESSDATA_PATH); + //设置当前默认使用的识别语言系统语言+英语 + m_sLangs = getLanguages(); +} + +TessOcrUtils::~TessOcrUtils() +{ } -TessOcrUtils *TessOcrUtils::m_tessOcrUtils = nullptr; TessOcrUtils *TessOcrUtils::instance() { @@ -54,11 +71,11 @@ TessOcrUtils *TessOcrUtils::instance() return m_tessOcrUtils; } - +//传入待识别图片的路径和想得到的返回结果类型,获取识别结果 RecognitionResult TessOcrUtils::getRecogitionResult(const QString imagePath,const ResultType resultType) { QString errorMessage = ""; - ErrorCode errorCode = ErrorCode::UKNOW; + ErrorCode errorCode = ErrorCode::UNKNOWN; RecognitionResult t_result; Pix *p_image; @@ -66,7 +83,7 @@ RecognitionResult TessOcrUtils::getRecogitionResult(const QString imagePath,cons { //errorMesage = "不能传递空的图片路径!"; errorMessage = "Can't pass an empty image path!"; - errorCode = ErrorCode::OCRPNULL; + errorCode = ErrorCode::OCR_P_NULL; setResult(errorCode,errorMessage,resultType,t_result); return t_result; } @@ -77,7 +94,7 @@ RecognitionResult TessOcrUtils::getRecogitionResult(const QString imagePath,cons { //errorMesage = "图片不存在!imagePath: " + imagePath; errorMessage = "Image does not exist! imagePath: " + imagePath; - errorCode = ErrorCode::OCRPNULL; + errorCode = ErrorCode::OCR_P_NULL; setResult(errorCode,errorMessage,resultType,t_result); return t_result; } @@ -91,7 +108,7 @@ RecognitionResult TessOcrUtils::getRecogitionResult(const QString imagePath,cons } catch (const std::logic_error &e) { //errorMesage = "加载图片失败!" + QString(qExc.what()); errorMessage = "Failed to load picture! " + QString(e.what()); - errorCode = ErrorCode::OCRLIF; + errorCode = ErrorCode::OCR_LI_F; setResult(errorCode,errorMessage,resultType,t_result); return t_result; } @@ -99,22 +116,24 @@ RecognitionResult TessOcrUtils::getRecogitionResult(const QString imagePath,cons return getRecogitionResult(p_image,resultType); } +//传入待识别图片的路径,获取纯字符串的识别结果 RecognitionResult TessOcrUtils::getRecogitionResult(const QString imagePath) { - return getRecogitionResult(imagePath,ResultType::ResultString); + return getRecogitionResult(imagePath,ResultType::RESULT_STRING); } +//传入待识别图片和想得到的返回结果类型,获取识别结果 RecognitionResult TessOcrUtils::getRecogitionResult(QImage *image, const ResultType resultType) { QString errorMessage = ""; - ErrorCode errorCode = ErrorCode::UKNOW; + ErrorCode errorCode = ErrorCode::UNKNOWN; RecognitionResult t_result; if(image->isNull()) { //errorMesage = "不能传递空的图片路径!"; errorMessage = "Can't pass an empty image!"; - errorCode = ErrorCode::OCRPNULL; + errorCode = ErrorCode::OCR_P_NULL; setResult(errorCode,errorMessage,resultType,t_result); return t_result; } @@ -137,17 +156,51 @@ RecognitionResult TessOcrUtils::getRecogitionResult(QImage *image, const ResultT //p_image->colormap->nalloc; //p_image->colormap->n; p_image->data = reinterpret_cast(image->bits()); - - //获取识别结果 return getRecogitionResult(p_image,resultType); } +//传入待识别图片,获取纯字符串的识别结果 RecognitionResult TessOcrUtils::getRecogitionResult(QImage *image) { - return getRecogitionResult(image,ResultType::ResultString); + return getRecogitionResult(image,ResultType::RESULT_STRING); } +//获取系统当前的语言 +Languages TessOcrUtils::getSystemLang() +{ + QLocale locale; + if(locale.language() == QLocale::Language::Chinese){ + //查询当前国别代码 + QLocale::Country t_countryId = locale.country(); + if(t_countryId == QLocale::Country::China){ + return Languages::CHI_SIM; + }else { + return Languages::CHI_TRA; + } + }else { + return Languages::ENG; + } +} + +//获取识别需要的使用的语言包 +QString TessOcrUtils::getLanguages() +{ + QString t_langs = ""; + + //当前系统语言 + Languages t_systemLang = getSystemLang(); + + if(t_systemLang == Languages::ENG) + { + t_langs=getLangStr(t_systemLang); + }else{ + t_langs=getLangStr(t_systemLang)+"+"+getLangStr(Languages::ENG); + } + return t_langs; +} + +//设置识别需要的使用的语言包的路径 bool TessOcrUtils::setLanguagesPath(const QString langsPath) { if(langsPath.isNull() || langsPath.isEmpty()) @@ -158,24 +211,22 @@ bool TessOcrUtils::setLanguagesPath(const QString langsPath) return true; } +//获取识别结果 RecognitionResult TessOcrUtils::getRecogitionResult(Pix * image,ResultType resultType) { QString errorMessage = ""; - ErrorCode errorCode = ErrorCode::UKNOW; + ErrorCode errorCode = ErrorCode::UNKNOWN; QString result = ""; RecognitionResult t_result; if(!isExistsResultType(resultType)) { //errorMesage = "结果类型不存在 resultType: " + QString::number(resultType); errorMessage = "The result type does not exist! resultType: " + QString::number(resultType); - errorCode = ErrorCode::OCRRTNULL; + errorCode = ErrorCode::OCR_RT_NULL; setResult(errorCode,errorMessage,resultType,t_result); return t_result; } - tesseract::TessBaseAPI *t_Tesseract = new tesseract::TessBaseAPI(); - - try{ //实例化tesseract t_Tesseract = new tesseract::TessBaseAPI(); @@ -184,45 +235,40 @@ RecognitionResult TessOcrUtils::getRecogitionResult(Pix * image,ResultType resul { //errorMesage = "初始化 Tesseract 失败!" + QString::number(resultType); errorMessage = "Could not initialize tesseract! Tesseract couldn't load any languages!"; - errorCode = ErrorCode::OCRINIF; + errorCode = ErrorCode::OCR_INI_F; setResult(errorCode,errorMessage,resultType,t_result); return t_result; } }catch(const std::logic_error &e){ //errorMesage = "初始化 Tesseract 失败!Tesseract 内部错误!" + QString(qExc.what()); errorMessage = "Could not initialize tesseract! Tesseract internal error! " + QString(e.what()); - errorCode = ErrorCode::OCRINIF; + errorCode = ErrorCode::OCR_INI_F; setResult(errorCode,errorMessage,resultType,t_result); return t_result; } - //为Tesseract设置待识别图片 t_Tesseract->SetImage(image); try { //识别图片 switch (resultType) { - case ResultType::ResultXML: - //result = QString(t_Tesseract->GetAltoText(0)); - break; - case ResultType::ResultHTML: + case ResultType::RESULT_HTML: result = QString(t_Tesseract->GetHOCRText(0)); break; - case ResultType::ResultString: + case ResultType::RESULT_STRING: result = QString(t_Tesseract->GetUTF8Text()); break; } } catch (const std::logic_error &e) { //errorMesage = "识别图片失败!" + QString(qExc.what()); errorMessage = "Image recognition failed! " + QString(e.what()); - errorCode = ErrorCode::OCRLIF; + errorCode = ErrorCode::OCR_LI_F; setResult(errorCode,errorMessage,resultType,t_result); return t_result; } t_Tesseract->End(); delete t_Tesseract; pixDestroy(&image); - t_result.flag = true; t_result.message = errorMessage; t_result.errorCode = ErrorCode::OK; @@ -231,9 +277,9 @@ RecognitionResult TessOcrUtils::getRecogitionResult(Pix * image,ResultType resul return t_result; } +//设置返回结果,内部使用 void TessOcrUtils::setResult(ErrorCode errCode, const QString errMessage,const ResultType resultType,RecognitionResult &result) { - result.flag = false; result.message = errMessage; result.errorCode = errCode; @@ -241,34 +287,32 @@ void TessOcrUtils::setResult(ErrorCode errCode, const QString errMessage,const R result.result = ""; } -bool TessOcrUtils::setLanguages(const QList langs) -{ - QString t_langs = ""; - if(langs.isEmpty()) - { - return false; - } - - int i; - for ( i = 0;i langs) +//{ +// QString t_langs = ""; +// if(langs.isEmpty()) +// { +// return false; +// } +// int i; +// for ( i = 0;isetLanguagesPath(langPath); - * 2.设置用来识别的语言包 TessOcrUtils::instance()->setLanguages(langs); - * 3.获取识别结果 TessOcrUtils::instance()->getRecogitionResult(t_image); + * 使用方法: * + * 1.获取识别结果 TessOcrUtils::instance()->getRecogitionResult(t_image); */ class TessOcrUtils { public: TessOcrUtils(); + ~TessOcrUtils(); static TessOcrUtils *instance(); /** @@ -166,23 +166,22 @@ public: */ RecognitionResult getRecogitionResult(QImage *image); - - /** - * @brief 设置当前使用的语言包 - * @param 需要使用的语言包 - * @return 是否成功 - */ - bool setLanguages(const QList langs); - - /** - * @brief 设置当前使用的语言包路径 - * @param 语言包路径 - * @return - */ - bool setLanguagesPath(const QString langsPath); - private : + /** + * @brief 设置当前使用的语言包 + * @param 需要使用的语言包 + * @return 是否成功 + */ + //bool setLanguages(const QList langs); + + /** + * @brief 设置当前使用的语言包路径 + * @param 语言包路径 + * @return + */ + bool setLanguagesPath(const QString langsPath); + /** * @brief 判断指定的结果类型是否存在 * @param 指定的类型 @@ -204,6 +203,19 @@ private : */ QString getLangStr(Languages lang); + /** + * @brief 获取系统当前的语言 + * @return + */ + Languages getSystemLang(); + + /** + * @brief 获取当前使用的语言包 + * @param 需要使用的语言包 + * @return + */ + QString getLanguages(); + /** * @brief 获取纯字符串的识别结果 * @param 需识别的图片 @@ -220,9 +232,8 @@ private : */ void setResult(ErrorCode errCode, const QString errMessage,const ResultType resultType,RecognitionResult &result); - /** - * @brief 语言包路径m_image + * @brief 语言包路径 */ QString m_sTessdataPath;