TextRecognitionExample.cs 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. #if !UNITY_WSA_10_0
  2. using UnityEngine;
  3. using UnityEngine.SceneManagement;
  4. using System;
  5. using System.Collections;
  6. using System.Collections.Generic;
  7. using System.Xml;
  8. using OpenCVForUnity.CoreModule;
  9. using OpenCVForUnity.ImgcodecsModule;
  10. using OpenCVForUnity.TextModule;
  11. using OpenCVForUnity.ImgprocModule;
  12. using OpenCVForUnity.UnityUtils;
  13. namespace OpenCVForUnityExample
  14. {
  15. /// <summary>
  16. /// Text Detection Example
  17. /// A demo script of the Extremal Region Filter algorithm described in:Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012.
  18. /// Referring to https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/textdetection.py.
  19. /// </summary>
  20. public class TextRecognitionExample : MonoBehaviour
  21. {
  22. /// <summary>
  23. /// IMAGE_FILENAME
  24. /// </summary>
  25. protected static readonly string IMAGE_FILENAME = "OpenCVForUnity/text/test_text.jpg";
  26. /// <summary>
  27. /// The image filepath.
  28. /// </summary>
  29. string image_filepath;
  30. /// <summary>
  31. /// TRAINED_CLASSIFIER_NM_1_FILENAME
  32. /// </summary>
  33. protected static readonly string TRAINED_CLASSIFIER_NM_1_FILENAME = "OpenCVForUnity/text/trained_classifierNM1.xml";
  34. /// <summary>
  35. /// The trained classifierNM1 filepath.
  36. /// </summary>
  37. string trained_classifierNM1_filepath;
  38. /// <summary>
  39. /// TRAINED_CLASSIFIER_NM_2_FILENAME
  40. /// </summary>
  41. protected static readonly string TRAINED_CLASSIFIER_NM_2_FILENAME = "OpenCVForUnity/text/trained_classifierNM2.xml";
  42. /// <summary>
  43. /// The trained classifierNM2 filepath.
  44. /// </summary>
  45. string trained_classifierNM2_filepath;
  46. /// <summary>
  47. /// CLASSIFIER_NM_2_FILENAME
  48. /// </summary>
  49. protected static readonly string OCRHMM_TRANSITIONS_TABLE_FILENAME = "OpenCVForUnity/text/OCRHMM_transitions_table.xml";
  50. /// <summary>
  51. /// The OCRHMM transitions table filepath.
  52. /// </summary>
  53. string OCRHMM_transitions_table_filepath;
  54. /// <summary>
  55. /// CLASSIFIER_NM_2_FILENAME
  56. /// </summary>
  57. /// https://stackoverflow.com/questions/4666098/why-does-android-aapt-remove-gz-file-extension-of-assets
  58. #if UNITY_ANDROID && !UNITY_EDITOR
  59. protected static readonly string OCRHMM_KNN_MODEL_FILENAME = "OpenCVForUnity/text/OCRHMM_knn_model_data.xml";
  60. #else
  61. protected static readonly string OCRHMM_KNN_MODEL_FILENAME = "OpenCVForUnity/text/OCRHMM_knn_model_data.xml.gz";
  62. #endif
  63. /// <summary>
  64. /// The OCRHMM knn model data filepath.
  65. /// </summary>
  66. string OCRHMM_knn_model_data_filepath;
  67. #if UNITY_WEBGL
  68. IEnumerator getFilePath_Coroutine;
  69. #endif
  70. // Use this for initialization
  71. void Start()
  72. {
  73. #if UNITY_WEBGL
  74. getFilePath_Coroutine = GetFilePath ();
  75. StartCoroutine (getFilePath_Coroutine);
  76. #else
  77. image_filepath = Utils.getFilePath(IMAGE_FILENAME);
  78. trained_classifierNM1_filepath = Utils.getFilePath(TRAINED_CLASSIFIER_NM_1_FILENAME);
  79. trained_classifierNM2_filepath = Utils.getFilePath(TRAINED_CLASSIFIER_NM_2_FILENAME);
  80. OCRHMM_transitions_table_filepath = Utils.getFilePath(OCRHMM_TRANSITIONS_TABLE_FILENAME);
  81. OCRHMM_knn_model_data_filepath = Utils.getFilePath(OCRHMM_KNN_MODEL_FILENAME);
  82. Run();
  83. #endif
  84. }
  85. #if UNITY_WEBGL
  86. private IEnumerator GetFilePath ()
  87. {
  88. var getFilePathAsync_0_Coroutine = Utils.getFilePathAsync (IMAGE_FILENAME, (result) => {
  89. image_filepath = result;
  90. });
  91. yield return getFilePathAsync_0_Coroutine;
  92. var getFilePathAsync_1_Coroutine = Utils.getFilePathAsync (TRAINED_CLASSIFIER_NM_1_FILENAME, (result) => {
  93. trained_classifierNM1_filepath = result;
  94. });
  95. yield return getFilePathAsync_1_Coroutine;
  96. var getFilePathAsync_2_Coroutine = Utils.getFilePathAsync (TRAINED_CLASSIFIER_NM_2_FILENAME, (result) => {
  97. trained_classifierNM2_filepath = result;
  98. });
  99. yield return getFilePathAsync_2_Coroutine;
  100. var getFilePathAsync_3_Coroutine = Utils.getFilePathAsync (OCRHMM_TRANSITIONS_TABLE_FILENAME, (result) => {
  101. OCRHMM_transitions_table_filepath = result;
  102. });
  103. yield return getFilePathAsync_3_Coroutine;
  104. var getFilePathAsync_4_Coroutine = Utils.getFilePathAsync (OCRHMM_KNN_MODEL_FILENAME, (result) => {
  105. OCRHMM_knn_model_data_filepath = result;
  106. });
  107. yield return getFilePathAsync_4_Coroutine;
  108. getFilePath_Coroutine = null;
  109. Run ();
  110. }
  111. #endif
  112. private void Run()
  113. {
  114. //if true, The error log of the Native side OpenCV will be displayed on the Unity Editor Console.
  115. Utils.setDebugMode(true);
  116. Mat frame = Imgcodecs.imread(image_filepath);
  117. if (frame.empty())
  118. {
  119. Debug.LogError(IMAGE_FILENAME + " is not loaded. Please move from “OpenCVForUnity/StreamingAssets/OpenCVForUnity/” to “Assets/StreamingAssets/OpenCVForUnity/” folder.");
  120. }
  121. if (string.IsNullOrEmpty(trained_classifierNM1_filepath) || string.IsNullOrEmpty(trained_classifierNM2_filepath))
  122. {
  123. Debug.LogError(TRAINED_CLASSIFIER_NM_1_FILENAME + " or " + TRAINED_CLASSIFIER_NM_2_FILENAME + " is not loaded. Please move from “OpenCVForUnity/StreamingAssets/OpenCVForUnity/” to “Assets/StreamingAssets/OpenCVForUnity/” folder.");
  124. }
  125. if (string.IsNullOrEmpty(OCRHMM_transitions_table_filepath) || string.IsNullOrEmpty(OCRHMM_knn_model_data_filepath))
  126. {
  127. Debug.LogError(OCRHMM_TRANSITIONS_TABLE_FILENAME + " or " + OCRHMM_KNN_MODEL_FILENAME + " is not loaded. Please move from “OpenCVForUnity/StreamingAssets/OpenCVForUnity/” to “Assets/StreamingAssets/OpenCVForUnity/” folder.");
  128. }
  129. Mat binaryMat = new Mat();
  130. Mat maskMat = new Mat();
  131. List<MatOfPoint> regions = new List<MatOfPoint>();
  132. ERFilter er_filter1 = Text.createERFilterNM1(trained_classifierNM1_filepath, 8, 0.00015f, 0.13f, 0.2f, true, 0.1f);
  133. ERFilter er_filter2 = Text.createERFilterNM2(trained_classifierNM2_filepath, 0.5f);
  134. Mat transition_p = new Mat(62, 62, CvType.CV_64FC1);
  135. //string filename = "OCRHMM_transitions_table.xml";
  136. //FileStorage fs(filename, FileStorage::READ);
  137. //fs["transition_probabilities"] >> transition_p;
  138. //fs.release();
  139. //Load TransitionProbabilitiesData.
  140. transition_p.put(0, 0, GetTransitionProbabilitiesData(OCRHMM_transitions_table_filepath));
  141. Mat emission_p = Mat.eye(62, 62, CvType.CV_64FC1);
  142. string voc = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
  143. OCRHMMDecoder decoder = OCRHMMDecoder.create(
  144. OCRHMM_knn_model_data_filepath,
  145. voc, transition_p, emission_p);
  146. //Text Detection
  147. Imgproc.cvtColor(frame, frame, Imgproc.COLOR_BGR2RGB);
  148. Imgproc.cvtColor(frame, binaryMat, Imgproc.COLOR_RGB2GRAY);
  149. Imgproc.threshold(binaryMat, binaryMat, 0, 255, Imgproc.THRESH_BINARY | Imgproc.THRESH_OTSU);
  150. Core.absdiff(binaryMat, new Scalar(255), maskMat);
  151. Text.detectRegions(binaryMat, er_filter1, er_filter2, regions);
  152. Debug.Log("regions.Count " + regions.Count);
  153. MatOfRect groups_rects = new MatOfRect();
  154. List<OpenCVForUnity.CoreModule.Rect> rects = new List<OpenCVForUnity.CoreModule.Rect>();
  155. Text.erGrouping(frame, binaryMat, regions, groups_rects);
  156. for (int i = 0; i < regions.Count; i++)
  157. {
  158. regions[i].Dispose();
  159. }
  160. regions.Clear();
  161. rects.AddRange(groups_rects.toList());
  162. groups_rects.Dispose();
  163. //Text Recognition (OCR)
  164. List<Mat> detections = new List<Mat>();
  165. for (int i = 0; i < (int)rects.Count; i++)
  166. {
  167. Mat group_img = new Mat();
  168. maskMat.submat(rects[i]).copyTo(group_img);
  169. Core.copyMakeBorder(group_img, group_img, 15, 15, 15, 15, Core.BORDER_CONSTANT, new Scalar(0));
  170. detections.Add(group_img);
  171. }
  172. Debug.Log("detections.Count " + detections.Count);
  173. //#Visualization
  174. for (int i = 0; i < rects.Count; i++)
  175. {
  176. Imgproc.rectangle(frame, new Point(rects[i].x, rects[i].y), new Point(rects[i].x + rects[i].width, rects[i].y + rects[i].height), new Scalar(255, 0, 0), 2);
  177. Imgproc.rectangle(frame, new Point(rects[i].x, rects[i].y), new Point(rects[i].x + rects[i].width, rects[i].y + rects[i].height), new Scalar(255, 255, 255), 1);
  178. string output = decoder.run(detections[i], 0);
  179. if (!string.IsNullOrEmpty(output))
  180. {
  181. Debug.Log("output " + output);
  182. Imgproc.putText(frame, output, new Point(rects[i].x, rects[i].y), Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, new Scalar(0, 0, 255), 1, Imgproc.LINE_AA, false);
  183. }
  184. }
  185. Texture2D texture = new Texture2D(frame.cols(), frame.rows(), TextureFormat.RGBA32, false);
  186. Utils.matToTexture2D(frame, texture);
  187. //Texture2D texture = new Texture2D (detections [0].cols (), detections [0].rows (), TextureFormat.RGBA32, false);
  188. //
  189. //Utils.matToTexture2D (detections [0], texture);
  190. gameObject.GetComponent<Renderer>().material.mainTexture = texture;
  191. for (int i = 0; i < detections.Count; i++)
  192. {
  193. detections[i].Dispose();
  194. }
  195. binaryMat.Dispose();
  196. maskMat.Dispose();
  197. Utils.setDebugMode(false);
  198. }
  199. // Update is called once per frame
  200. void Update()
  201. {
  202. }
  203. /// <summary>
  204. /// Gets the transition probabilities data.
  205. /// </summary>
  206. /// <returns>The transition probabilities data.</returns>
  207. /// <param name="filePath">File path.</param>
  208. double[] GetTransitionProbabilitiesData(string filePath)
  209. {
  210. XmlDocument xmlDoc = new XmlDocument();
  211. xmlDoc.Load(filePath);
  212. XmlNode dataNode = xmlDoc.GetElementsByTagName("data").Item(0);
  213. //Debug.Log ("dataNode.InnerText " + dataNode.InnerText);
  214. string[] dataString = dataNode.InnerText.Split(new string[] {
  215. " ",
  216. "\r\n", "\n"
  217. }, StringSplitOptions.RemoveEmptyEntries);
  218. //Debug.Log ("dataString.Length " + dataString.Length);
  219. double[] data = new double[dataString.Length];
  220. for (int i = 0; i < data.Length; i++)
  221. {
  222. try
  223. {
  224. data[i] = Convert.ToDouble(dataString[i]);
  225. }
  226. catch (FormatException)
  227. {
  228. Debug.Log("Unable to convert '{" + dataString[i] + "}' to a Double.");
  229. }
  230. catch (OverflowException)
  231. {
  232. Debug.Log("'{" + dataString[i] + "}' is outside the range of a Double.");
  233. }
  234. }
  235. return data;
  236. }
  237. /// <summary>
  238. /// Raises the destroy event.
  239. /// </summary>
  240. void OnDestroy()
  241. {
  242. #if UNITY_WEBGL
  243. if (getFilePath_Coroutine != null) {
  244. StopCoroutine (getFilePath_Coroutine);
  245. ((IDisposable)getFilePath_Coroutine).Dispose ();
  246. }
  247. #endif
  248. }
  249. /// <summary>
  250. /// Raises the back button click event.
  251. /// </summary>
  252. public void OnBackButtonClick()
  253. {
  254. SceneManager.LoadScene("OpenCVForUnityExample");
  255. }
  256. }
  257. }
  258. #endif