OCRTesseract.cs 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. //#if !UNITY_WSA_10_0
  2. //using OpenCVForUnity.CoreModule;
  3. //using OpenCVForUnity.UtilsModule;
  4. //using System;
  5. //using System.Collections.Generic;
  6. //using System.Runtime.InteropServices;
  7. //namespace OpenCVForUnity.TextModule
  8. //{
  9. // // C++: class OCRTesseract
  10. // /**
  11. // * OCRTesseract class provides an interface with the tesseract-ocr API (v3.02.02) in C++.
  12. // *
  13. // * Notice that it is compiled only when tesseract-ocr is correctly installed.
  14. // *
  15. // * <b>Note:</b>
  16. // * <ul>
  17. // * <li>
  18. // * (C++) An example of OCRTesseract recognition combined with scene text detection can be found
  19. // * at the end_to_end_recognition demo:
  20. // * &lt;https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/end_to_end_recognition.cpp&gt;
  21. // * <ul>
  22. // * <li>
  23. // * (C++) Another example of OCRTesseract recognition combined with scene text detection can be
  24. // * found at the webcam_demo:
  25. // * &lt;https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp&gt;
  26. // * </li>
  27. // * </ul>
  28. // * </li>
  29. // * </ul>
  30. // */
  31. // public class OCRTesseract : BaseOCR
  32. // {
  33. // protected override void Dispose(bool disposing)
  34. // {
  35. // try
  36. // {
  37. // if (disposing)
  38. // {
  39. // }
  40. // if (IsEnabledDispose)
  41. // {
  42. // if (nativeObj != IntPtr.Zero)
  43. // text_OCRTesseract_delete(nativeObj);
  44. // nativeObj = IntPtr.Zero;
  45. // }
  46. // }
  47. // finally
  48. // {
  49. // base.Dispose(disposing);
  50. // }
  51. // }
  52. // protected internal OCRTesseract(IntPtr addr) : base(addr) { }
  53. // // internal usage only
  54. // public static new OCRTesseract __fromPtr__(IntPtr addr) { return new OCRTesseract(addr); }
  55. // //
  56. // // C++: String cv::text::OCRTesseract::run(Mat image, int min_confidence, int component_level = 0)
  57. // //
  58. // /**
  59. // * Recognize text using the tesseract-ocr API.
  60. // *
  61. // * Takes image on input and returns recognized text in the output_text parameter. Optionally
  62. // * provides also the Rects for individual text elements found (e.g. words), and the list of those
  63. // * text elements with their confidence values.
  64. // *
  65. // * param image Input image CV_8UC1 or CV_8UC3
  66. // * text elements found (e.g. words or text lines).
  67. // * recognition of individual text elements found (e.g. words or text lines).
  68. // * for the recognition of individual text elements found (e.g. words or text lines).
  69. // * param component_level OCR_LEVEL_WORD (by default), or OCR_LEVEL_TEXTLINE.
  70. // * param min_confidence automatically generated
  71. // * return automatically generated
  72. // */
  73. // public string run(Mat image, int min_confidence, int component_level)
  74. // {
  75. // ThrowIfDisposed();
  76. // if (image != null) image.ThrowIfDisposed();
  77. // string retVal = Marshal.PtrToStringAnsi(DisposableObject.ThrowIfNullIntPtr(text_OCRTesseract_run_10(nativeObj, image.nativeObj, min_confidence, component_level)));
  78. // return retVal;
  79. // }
  80. // /**
  81. // * Recognize text using the tesseract-ocr API.
  82. // *
  83. // * Takes image on input and returns recognized text in the output_text parameter. Optionally
  84. // * provides also the Rects for individual text elements found (e.g. words), and the list of those
  85. // * text elements with their confidence values.
  86. // *
  87. // * param image Input image CV_8UC1 or CV_8UC3
  88. // * text elements found (e.g. words or text lines).
  89. // * recognition of individual text elements found (e.g. words or text lines).
  90. // * for the recognition of individual text elements found (e.g. words or text lines).
  91. // * param min_confidence automatically generated
  92. // * return automatically generated
  93. // */
  94. // public string run(Mat image, int min_confidence)
  95. // {
  96. // ThrowIfDisposed();
  97. // if (image != null) image.ThrowIfDisposed();
  98. // string retVal = Marshal.PtrToStringAnsi(DisposableObject.ThrowIfNullIntPtr(text_OCRTesseract_run_11(nativeObj, image.nativeObj, min_confidence)));
  99. // return retVal;
  100. // }
  101. // //
  102. // // C++: String cv::text::OCRTesseract::run(Mat image, Mat mask, int min_confidence, int component_level = 0)
  103. // //
  104. // public string run(Mat image, Mat mask, int min_confidence, int component_level)
  105. // {
  106. // ThrowIfDisposed();
  107. // if (image != null) image.ThrowIfDisposed();
  108. // if (mask != null) mask.ThrowIfDisposed();
  109. // string retVal = Marshal.PtrToStringAnsi(DisposableObject.ThrowIfNullIntPtr(text_OCRTesseract_run_12(nativeObj, image.nativeObj, mask.nativeObj, min_confidence, component_level)));
  110. // return retVal;
  111. // }
  112. // public string run(Mat image, Mat mask, int min_confidence)
  113. // {
  114. // ThrowIfDisposed();
  115. // if (image != null) image.ThrowIfDisposed();
  116. // if (mask != null) mask.ThrowIfDisposed();
  117. // string retVal = Marshal.PtrToStringAnsi(DisposableObject.ThrowIfNullIntPtr(text_OCRTesseract_run_13(nativeObj, image.nativeObj, mask.nativeObj, min_confidence)));
  118. // return retVal;
  119. // }
  120. // //
  121. // // C++: void cv::text::OCRTesseract::setWhiteList(String char_whitelist)
  122. // //
  123. // public void setWhiteList(string char_whitelist)
  124. // {
  125. // ThrowIfDisposed();
  126. // text_OCRTesseract_setWhiteList_10(nativeObj, char_whitelist);
  127. // }
  128. // //
  129. // // C++: static Ptr_OCRTesseract cv::text::OCRTesseract::create(c_string datapath = 0, c_string language = 0, c_string char_whitelist = 0, int oem = OEM_DEFAULT, int psmode = PSM_AUTO)
  130. // //
  131. // /**
  132. // * Creates an instance of the OCRTesseract class. Initializes Tesseract.
  133. // *
  134. // * param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the
  135. // * system's default directory.
  136. // * param language an ISO 639-3 code or NULL will default to "eng".
  137. // * param char_whitelist specifies the list of characters used for recognition. NULL defaults to ""
  138. // * (All characters will be used for recognition).
  139. // * param oem tesseract-ocr offers different OCR Engine Modes (OEM), by default
  140. // * tesseract::OEM_DEFAULT is used. See the tesseract-ocr API documentation for other possible
  141. // * values.
  142. // * param psmode tesseract-ocr offers different Page Segmentation Modes (PSM) tesseract::PSM_AUTO
  143. // * (fully automatic layout analysis) is used. See the tesseract-ocr API documentation for other
  144. // * possible values.
  145. // *
  146. // * <b>Note:</b> The char_whitelist default is changed after OpenCV 4.7.0/3.19.0 from "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" to "".
  147. // * return automatically generated
  148. // */
  149. // public static OCRTesseract create(string datapath, string language, string char_whitelist, int oem, int psmode)
  150. // {
  151. // return OCRTesseract.__fromPtr__(DisposableObject.ThrowIfNullIntPtr(text_OCRTesseract_create_10(datapath, language, char_whitelist, oem, psmode)));
  152. // }
  153. // /**
  154. // * Creates an instance of the OCRTesseract class. Initializes Tesseract.
  155. // *
  156. // * param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the
  157. // * system's default directory.
  158. // * param language an ISO 639-3 code or NULL will default to "eng".
  159. // * param char_whitelist specifies the list of characters used for recognition. NULL defaults to ""
  160. // * (All characters will be used for recognition).
  161. // * param oem tesseract-ocr offers different OCR Engine Modes (OEM), by default
  162. // * tesseract::OEM_DEFAULT is used. See the tesseract-ocr API documentation for other possible
  163. // * values.
  164. // * (fully automatic layout analysis) is used. See the tesseract-ocr API documentation for other
  165. // * possible values.
  166. // *
  167. // * <b>Note:</b> The char_whitelist default is changed after OpenCV 4.7.0/3.19.0 from "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" to "".
  168. // * return automatically generated
  169. // */
  170. // public static OCRTesseract create(string datapath, string language, string char_whitelist, int oem)
  171. // {
  172. // return OCRTesseract.__fromPtr__(DisposableObject.ThrowIfNullIntPtr(text_OCRTesseract_create_11(datapath, language, char_whitelist, oem)));
  173. // }
  174. // /**
  175. // * Creates an instance of the OCRTesseract class. Initializes Tesseract.
  176. // *
  177. // * param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the
  178. // * system's default directory.
  179. // * param language an ISO 639-3 code or NULL will default to "eng".
  180. // * param char_whitelist specifies the list of characters used for recognition. NULL defaults to ""
  181. // * (All characters will be used for recognition).
  182. // * tesseract::OEM_DEFAULT is used. See the tesseract-ocr API documentation for other possible
  183. // * values.
  184. // * (fully automatic layout analysis) is used. See the tesseract-ocr API documentation for other
  185. // * possible values.
  186. // *
  187. // * <b>Note:</b> The char_whitelist default is changed after OpenCV 4.7.0/3.19.0 from "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" to "".
  188. // * return automatically generated
  189. // */
  190. // public static OCRTesseract create(string datapath, string language, string char_whitelist)
  191. // {
  192. // return OCRTesseract.__fromPtr__(DisposableObject.ThrowIfNullIntPtr(text_OCRTesseract_create_12(datapath, language, char_whitelist)));
  193. // }
  194. // /**
  195. // * Creates an instance of the OCRTesseract class. Initializes Tesseract.
  196. // *
  197. // * param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the
  198. // * system's default directory.
  199. // * param language an ISO 639-3 code or NULL will default to "eng".
  200. // * (All characters will be used for recognition).
  201. // * tesseract::OEM_DEFAULT is used. See the tesseract-ocr API documentation for other possible
  202. // * values.
  203. // * (fully automatic layout analysis) is used. See the tesseract-ocr API documentation for other
  204. // * possible values.
  205. // *
  206. // * <b>Note:</b> The char_whitelist default is changed after OpenCV 4.7.0/3.19.0 from "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" to "".
  207. // * return automatically generated
  208. // */
  209. // public static OCRTesseract create(string datapath, string language)
  210. // {
  211. // return OCRTesseract.__fromPtr__(DisposableObject.ThrowIfNullIntPtr(text_OCRTesseract_create_13(datapath, language)));
  212. // }
  213. // /**
  214. // * Creates an instance of the OCRTesseract class. Initializes Tesseract.
  215. // *
  216. // * param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the
  217. // * system's default directory.
  218. // * (All characters will be used for recognition).
  219. // * tesseract::OEM_DEFAULT is used. See the tesseract-ocr API documentation for other possible
  220. // * values.
  221. // * (fully automatic layout analysis) is used. See the tesseract-ocr API documentation for other
  222. // * possible values.
  223. // *
  224. // * <b>Note:</b> The char_whitelist default is changed after OpenCV 4.7.0/3.19.0 from "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" to "".
  225. // * return automatically generated
  226. // */
  227. // public static OCRTesseract create(string datapath)
  228. // {
  229. // return OCRTesseract.__fromPtr__(DisposableObject.ThrowIfNullIntPtr(text_OCRTesseract_create_14(datapath)));
  230. // }
  231. // /**
  232. // * Creates an instance of the OCRTesseract class. Initializes Tesseract.
  233. // *
  234. // * system's default directory.
  235. // * (All characters will be used for recognition).
  236. // * tesseract::OEM_DEFAULT is used. See the tesseract-ocr API documentation for other possible
  237. // * values.
  238. // * (fully automatic layout analysis) is used. See the tesseract-ocr API documentation for other
  239. // * possible values.
  240. // *
  241. // * <b>Note:</b> The char_whitelist default is changed after OpenCV 4.7.0/3.19.0 from "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" to "".
  242. // * return automatically generated
  243. // */
  244. // public static OCRTesseract create()
  245. // {
  246. // return OCRTesseract.__fromPtr__(DisposableObject.ThrowIfNullIntPtr(text_OCRTesseract_create_15()));
  247. // }
  248. //#if (UNITY_IOS || UNITY_WEBGL) && !UNITY_EDITOR
  249. // const string LIBNAME = "__Internal";
  250. //#else
  251. // const string LIBNAME = "opencvforunity";
  252. //#endif
  253. // // C++: String cv::text::OCRTesseract::run(Mat image, int min_confidence, int component_level = 0)
  254. // [DllImport(LIBNAME)]
  255. // private static extern IntPtr text_OCRTesseract_run_10(IntPtr nativeObj, IntPtr image_nativeObj, int min_confidence, int component_level);
  256. // [DllImport(LIBNAME)]
  257. // private static extern IntPtr text_OCRTesseract_run_11(IntPtr nativeObj, IntPtr image_nativeObj, int min_confidence);
  258. // // C++: String cv::text::OCRTesseract::run(Mat image, Mat mask, int min_confidence, int component_level = 0)
  259. // [DllImport(LIBNAME)]
  260. // private static extern IntPtr text_OCRTesseract_run_12(IntPtr nativeObj, IntPtr image_nativeObj, IntPtr mask_nativeObj, int min_confidence, int component_level);
  261. // [DllImport(LIBNAME)]
  262. // private static extern IntPtr text_OCRTesseract_run_13(IntPtr nativeObj, IntPtr image_nativeObj, IntPtr mask_nativeObj, int min_confidence);
  263. // // C++: void cv::text::OCRTesseract::setWhiteList(String char_whitelist)
  264. // [DllImport(LIBNAME)]
  265. // private static extern void text_OCRTesseract_setWhiteList_10(IntPtr nativeObj, string char_whitelist);
  266. // // C++: static Ptr_OCRTesseract cv::text::OCRTesseract::create(c_string datapath = 0, c_string language = 0, c_string char_whitelist = 0, int oem = OEM_DEFAULT, int psmode = PSM_AUTO)
  267. // [DllImport(LIBNAME)]
  268. // private static extern IntPtr text_OCRTesseract_create_10(string datapath, string language, string char_whitelist, int oem, int psmode);
  269. // [DllImport(LIBNAME)]
  270. // private static extern IntPtr text_OCRTesseract_create_11(string datapath, string language, string char_whitelist, int oem);
  271. // [DllImport(LIBNAME)]
  272. // private static extern IntPtr text_OCRTesseract_create_12(string datapath, string language, string char_whitelist);
  273. // [DllImport(LIBNAME)]
  274. // private static extern IntPtr text_OCRTesseract_create_13(string datapath, string language);
  275. // [DllImport(LIBNAME)]
  276. // private static extern IntPtr text_OCRTesseract_create_14(string datapath);
  277. // [DllImport(LIBNAME)]
  278. // private static extern IntPtr text_OCRTesseract_create_15();
  279. // // native support for java finalize()
  280. // [DllImport(LIBNAME)]
  281. // private static extern void text_OCRTesseract_delete(IntPtr nativeObj);
  282. // }
  283. //}
  284. //#endif