TextRecognitionExample.cs 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. #if !UNITY_WSA_10_0
  2. using UnityEngine;
  3. using UnityEngine.SceneManagement;
  4. using System;
  5. using System.Collections;
  6. using System.Collections.Generic;
  7. using System.Xml;
  8. using OpenCVForUnity.CoreModule;
  9. using OpenCVForUnity.ImgcodecsModule;
  10. using OpenCVForUnity.TextModule;
  11. using OpenCVForUnity.ImgprocModule;
  12. using OpenCVForUnity.UnityUtils;
  13. namespace OpenCVForUnityExample
  14. {
  15. /// <summary>
  16. /// Text Detection Example
  17. /// A demo script of the Extremal Region Filter algorithm described in:Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012.
  18. /// Referring to https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/textdetection.py.
  19. /// </summary>
  20. public class TextRecognitionExample : MonoBehaviour
  21. {
  22. string scenetext01_jpg_filepath;
  23. string trained_classifierNM1_xml_filepath;
  24. string trained_classifierNM2_xml_filepath;
  25. string OCRHMM_transitions_table_xml_filepath;
  26. string OCRHMM_knn_model_data_xml_gz_filepath;
  27. #if UNITY_WEBGL && !UNITY_EDITOR
  28. IEnumerator getFilePath_Coroutine;
  29. #endif
  30. // Use this for initialization
  31. void Start ()
  32. {
  33. #if UNITY_WEBGL && !UNITY_EDITOR
  34. getFilePath_Coroutine = GetFilePath ();
  35. StartCoroutine (getFilePath_Coroutine);
  36. #else
  37. scenetext01_jpg_filepath = Utils.getFilePath ("text/test_text.jpg");
  38. trained_classifierNM1_xml_filepath = Utils.getFilePath ("text/trained_classifierNM1.xml");
  39. trained_classifierNM2_xml_filepath = Utils.getFilePath ("text/trained_classifierNM2.xml");
  40. OCRHMM_transitions_table_xml_filepath = Utils.getFilePath ("text/OCRHMM_transitions_table.xml");
  41. #if UNITY_ANDROID && !UNITY_EDITOR
  42. OCRHMM_knn_model_data_xml_gz_filepath = Utils.getFilePath ("text/OCRHMM_knn_model_data.xml");
  43. #else
  44. OCRHMM_knn_model_data_xml_gz_filepath = Utils.getFilePath ("text/OCRHMM_knn_model_data.xml.gz");
  45. #endif
  46. Run ();
  47. #endif
  48. }
  49. #if UNITY_WEBGL && !UNITY_EDITOR
  50. private IEnumerator GetFilePath ()
  51. {
  52. var getFilePathAsync_0_Coroutine = Utils.getFilePathAsync ("text/test_text.jpg", (result) => {
  53. scenetext01_jpg_filepath = result;
  54. });
  55. yield return getFilePathAsync_0_Coroutine;
  56. var getFilePathAsync_1_Coroutine = Utils.getFilePathAsync ("text/trained_classifierNM1.xml", (result) => {
  57. trained_classifierNM1_xml_filepath = result;
  58. });
  59. yield return getFilePathAsync_1_Coroutine;
  60. var getFilePathAsync_2_Coroutine = Utils.getFilePathAsync ("text/trained_classifierNM2.xml", (result) => {
  61. trained_classifierNM2_xml_filepath = result;
  62. });
  63. yield return getFilePathAsync_2_Coroutine;
  64. var getFilePathAsync_3_Coroutine = Utils.getFilePathAsync ("text/OCRHMM_transitions_table.xml", (result) => {
  65. OCRHMM_transitions_table_xml_filepath = result;
  66. });
  67. yield return getFilePathAsync_3_Coroutine;
  68. var getFilePathAsync_4_Coroutine = Utils.getFilePathAsync ("text/OCRHMM_knn_model_data.xml.gz", (result) => {
  69. OCRHMM_knn_model_data_xml_gz_filepath = result;
  70. });
  71. yield return getFilePathAsync_4_Coroutine;
  72. getFilePath_Coroutine = null;
  73. Run ();
  74. }
  75. #endif
  76. private void Run ()
  77. {
  78. //if true, The error log of the Native side OpenCV will be displayed on the Unity Editor Console.
  79. Utils.setDebugMode (true);
  80. Mat frame = Imgcodecs.imread (scenetext01_jpg_filepath);
  81. #if !UNITY_WSA_10_0
  82. if (frame.empty ()) {
  83. Debug.LogError ("text/scenetext01.jpg is not loaded. Please copy from “OpenCVForUnity/StreamingAssets/text/” to “Assets/StreamingAssets/” folder. ");
  84. }
  85. #endif
  86. Mat binaryMat = new Mat ();
  87. Mat maskMat = new Mat ();
  88. List<MatOfPoint> regions = new List<MatOfPoint> ();
  89. ERFilter er_filter1 = Text.createERFilterNM1 (trained_classifierNM1_xml_filepath, 8, 0.00015f, 0.13f, 0.2f, true, 0.1f);
  90. ERFilter er_filter2 = Text.createERFilterNM2 (trained_classifierNM2_xml_filepath, 0.5f);
  91. Mat transition_p = new Mat (62, 62, CvType.CV_64FC1);
  92. // string filename = "OCRHMM_transitions_table.xml";
  93. // FileStorage fs(filename, FileStorage::READ);
  94. // fs["transition_probabilities"] >> transition_p;
  95. // fs.release();
  96. //Load TransitionProbabilitiesData.
  97. transition_p.put (0, 0, GetTransitionProbabilitiesData (OCRHMM_transitions_table_xml_filepath));
  98. Mat emission_p = Mat.eye (62, 62, CvType.CV_64FC1);
  99. string voc = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
  100. OCRHMMDecoder decoder = OCRHMMDecoder.create (
  101. OCRHMM_knn_model_data_xml_gz_filepath,
  102. voc, transition_p, emission_p);
  103. //Text Detection
  104. Imgproc.cvtColor (frame, frame, Imgproc.COLOR_BGR2RGB);
  105. Imgproc.cvtColor (frame, binaryMat, Imgproc.COLOR_RGB2GRAY);
  106. Imgproc.threshold (binaryMat, binaryMat, 0, 255, Imgproc.THRESH_BINARY | Imgproc.THRESH_OTSU);
  107. Core.absdiff (binaryMat, new Scalar (255), maskMat);
  108. Text.detectRegions (binaryMat, er_filter1, er_filter2, regions);
  109. Debug.Log ("regions.Count " + regions.Count);
  110. MatOfRect groups_rects = new MatOfRect ();
  111. List<OpenCVForUnity.CoreModule.Rect> rects = new List<OpenCVForUnity.CoreModule.Rect> ();
  112. Text.erGrouping (frame, binaryMat, regions, groups_rects);
  113. for (int i = 0; i < regions.Count; i++) {
  114. regions [i].Dispose ();
  115. }
  116. regions.Clear ();
  117. rects.AddRange (groups_rects.toList ());
  118. groups_rects.Dispose ();
  119. //Text Recognition (OCR)
  120. List<Mat> detections = new List<Mat> ();
  121. for (int i = 0; i < (int)rects.Count; i++) {
  122. Mat group_img = new Mat ();
  123. maskMat.submat (rects [i]).copyTo (group_img);
  124. Core.copyMakeBorder (group_img, group_img, 15, 15, 15, 15, Core.BORDER_CONSTANT, new Scalar (0));
  125. detections.Add (group_img);
  126. }
  127. Debug.Log ("detections.Count " + detections.Count);
  128. //#Visualization
  129. for (int i = 0; i < rects.Count; i++) {
  130. Imgproc.rectangle (frame, new Point (rects [i].x, rects [i].y), new Point (rects [i].x + rects [i].width, rects [i].y + rects [i].height), new Scalar (255, 0, 0), 2);
  131. Imgproc.rectangle (frame, new Point (rects [i].x, rects [i].y), new Point (rects [i].x + rects [i].width, rects [i].y + rects [i].height), new Scalar (255, 255, 255), 1);
  132. string output = decoder.run (detections [i], 0);
  133. if (!string.IsNullOrEmpty (output)) {
  134. Debug.Log ("output " + output);
  135. Imgproc.putText (frame, output, new Point (rects [i].x, rects [i].y), Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, new Scalar (0, 0, 255), 1, Imgproc.LINE_AA, false);
  136. }
  137. }
  138. Texture2D texture = new Texture2D (frame.cols (), frame.rows (), TextureFormat.RGBA32, false);
  139. Utils.matToTexture2D (frame, texture);
  140. // Texture2D texture = new Texture2D (detections [0].cols (), detections [0].rows (), TextureFormat.RGBA32, false);
  141. //
  142. // Utils.matToTexture2D (detections [0], texture);
  143. gameObject.GetComponent<Renderer> ().material.mainTexture = texture;
  144. for (int i = 0; i < detections.Count; i++) {
  145. detections [i].Dispose ();
  146. }
  147. binaryMat.Dispose ();
  148. maskMat.Dispose ();
  149. Utils.setDebugMode (false);
  150. }
  151. // Update is called once per frame
  152. void Update ()
  153. {
  154. }
  155. /// <summary>
  156. /// Gets the transition probabilities data.
  157. /// </summary>
  158. /// <returns>The transition probabilities data.</returns>
  159. /// <param name="filePath">File path.</param>
  160. double[] GetTransitionProbabilitiesData (string filePath)
  161. {
  162. XmlDocument xmlDoc = new XmlDocument ();
  163. xmlDoc.Load (filePath);
  164. XmlNode dataNode = xmlDoc.GetElementsByTagName ("data").Item (0);
  165. // Debug.Log ("dataNode.InnerText " + dataNode.InnerText);
  166. string[] dataString = dataNode.InnerText.Split (new string[] {
  167. " ",
  168. "\r\n", "\n"
  169. }, StringSplitOptions.RemoveEmptyEntries);
  170. // Debug.Log ("dataString.Length " + dataString.Length);
  171. double[] data = new double[dataString.Length];
  172. for (int i = 0; i < data.Length; i++) {
  173. try {
  174. data [i] = Convert.ToDouble (dataString [i]);
  175. } catch (FormatException) {
  176. Debug.Log ("Unable to convert '{" + dataString [i] + "}' to a Double.");
  177. } catch (OverflowException) {
  178. Debug.Log ("'{" + dataString [i] + "}' is outside the range of a Double.");
  179. }
  180. }
  181. return data;
  182. }
  183. /// <summary>
  184. /// Raises the destroy event.
  185. /// </summary>
  186. void OnDestroy ()
  187. {
  188. #if UNITY_WEBGL && !UNITY_EDITOR
  189. if (getFilePath_Coroutine != null) {
  190. StopCoroutine (getFilePath_Coroutine);
  191. ((IDisposable)getFilePath_Coroutine).Dispose ();
  192. }
  193. #endif
  194. }
  195. /// <summary>
  196. /// Raises the back button click event.
  197. /// </summary>
  198. public void OnBackButtonClick ()
  199. {
  200. SceneManager.LoadScene ("OpenCVForUnityExample");
  201. }
  202. }
  203. }
  204. #endif