DnnObjectDetectionWebCamTextureExample.cs 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628
  1. #if !UNITY_WSA_10_0
  2. using System;
  3. using System.Collections;
  4. using System.Collections.Generic;
  5. using System.Linq;
  6. using UnityEngine;
  7. using UnityEngine.UI;
  8. using UnityEngine.SceneManagement;
  9. using OpenCVForUnity.CoreModule;
  10. using OpenCVForUnity.DnnModule;
  11. using OpenCVForUnity.ImgprocModule;
  12. using OpenCVForUnity.UnityUtils;
  13. using OpenCVForUnity.UnityUtils.Helper;
  14. namespace OpenCVForUnityExample
  15. {
  16. /// <summary>
  17. /// Dnn ObjectDetection Example
  18. /// Referring to https://github.com/opencv/opencv/blob/master/samples/dnn/object_detection.cpp.
  19. /// </summary>
  20. [RequireComponent (typeof(WebCamTextureToMatHelper))]
  21. public class DnnObjectDetectionWebCamTextureExample : MonoBehaviour
  22. {
  23. [TooltipAttribute ("Path to a binary file of model contains trained weights. It could be a file with extensions .caffemodel (Caffe), .pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet).")]
  24. public string model;
  25. [TooltipAttribute ("Path to a text file of model contains network configuration. It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet).")]
  26. public string config;
  27. [TooltipAttribute ("Optional path to a text file with names of classes to label detected objects.")]
  28. public string classes;
  29. [TooltipAttribute ("Optional list of classes to label detected objects.")]
  30. public List<string> classesList;
  31. [TooltipAttribute ("Confidence threshold.")]
  32. public float confThreshold;
  33. [TooltipAttribute ("Non-maximum suppression threshold.")]
  34. public float nmsThreshold;
  35. [TooltipAttribute ("Preprocess input image by multiplying on a scale factor.")]
  36. public float scale;
  37. [TooltipAttribute ("Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces.")]
  38. public Scalar mean;
  39. [TooltipAttribute ("Indicate that model works with RGB input images instead BGR ones.")]
  40. public bool swapRB;
  41. [TooltipAttribute ("Preprocess input image by resizing to a specific width.")]
  42. public int inpWidth;
  43. [TooltipAttribute ("Preprocess input image by resizing to a specific height.")]
  44. public int inpHeight;
  45. /// <summary>
  46. /// The texture.
  47. /// </summary>
  48. Texture2D texture;
  49. /// <summary>
  50. /// The webcam texture to mat helper.
  51. /// </summary>
  52. WebCamTextureToMatHelper webCamTextureToMatHelper;
  53. /// <summary>
  54. /// The bgr mat.
  55. /// </summary>
  56. Mat bgrMat;
  57. /// <summary>
  58. /// The net.
  59. /// </summary>
  60. Net net;
  61. /// <summary>
  62. /// The FPS monitor.
  63. /// </summary>
  64. FpsMonitor fpsMonitor;
  65. List<string> classNames;
  66. List<string> outBlobNames;
  67. List<string> outBlobTypes;
  68. string classes_filepath;
  69. string config_filepath;
  70. string model_filepath;
  71. #if UNITY_WEBGL && !UNITY_EDITOR
  72. IEnumerator getFilePath_Coroutine;
  73. #endif
  74. // Use this for initialization
  75. void Start ()
  76. {
  77. fpsMonitor = GetComponent<FpsMonitor> ();
  78. webCamTextureToMatHelper = gameObject.GetComponent<WebCamTextureToMatHelper> ();
  79. #if UNITY_WEBGL && !UNITY_EDITOR
  80. getFilePath_Coroutine = GetFilePath ();
  81. StartCoroutine (getFilePath_Coroutine);
  82. #else
  83. classes_filepath = Utils.getFilePath ("dnn/" + classes);
  84. config_filepath = Utils.getFilePath ("dnn/" + config);
  85. model_filepath = Utils.getFilePath ("dnn/" + model);
  86. Run ();
  87. #endif
  88. }
  89. #if UNITY_WEBGL && !UNITY_EDITOR
  90. private IEnumerator GetFilePath ()
  91. {
  92. if (!string.IsNullOrEmpty (classes)) {
  93. var getFilePathAsync_0_Coroutine = Utils.getFilePathAsync ("dnn/" + classes, (result) => {
  94. classes_filepath = result;
  95. });
  96. yield return getFilePathAsync_0_Coroutine;
  97. }
  98. if (!string.IsNullOrEmpty (config)) {
  99. var getFilePathAsync_1_Coroutine = Utils.getFilePathAsync ("dnn/" + config, (result) => {
  100. config_filepath = result;
  101. });
  102. yield return getFilePathAsync_1_Coroutine;
  103. }
  104. if (!string.IsNullOrEmpty (model)) {
  105. var getFilePathAsync_2_Coroutine = Utils.getFilePathAsync ("dnn/" + model, (result) => {
  106. model_filepath = result;
  107. });
  108. yield return getFilePathAsync_2_Coroutine;
  109. }
  110. getFilePath_Coroutine = null;
  111. Run ();
  112. }
  113. #endif
  114. // Use this for initialization
  115. void Run ()
  116. {
  117. //if true, The error log of the Native side OpenCV will be displayed on the Unity Editor Console.
  118. Utils.setDebugMode (true);
  119. if (!string.IsNullOrEmpty (classes)) {
  120. classNames = readClassNames (classes_filepath);
  121. #if !UNITY_WSA_10_0
  122. if (classNames == null) {
  123. Debug.LogError (classes_filepath + " is not loaded. Please see \"StreamingAssets/dnn/setup_dnn_module.pdf\". ");
  124. }
  125. #endif
  126. } else if (classesList.Count > 0) {
  127. classNames = classesList;
  128. }
  129. if (string.IsNullOrEmpty (config_filepath) || string.IsNullOrEmpty (model_filepath)) {
  130. Debug.LogError (config_filepath + " or " + model_filepath + " is not loaded. Please see \"StreamingAssets/dnn/setup_dnn_module.pdf\". ");
  131. } else {
  132. //! [Initialize network]
  133. net = Dnn.readNet (model_filepath, config_filepath);
  134. //! [Initialize network]
  135. outBlobNames = getOutputsNames (net);
  136. // for (int i = 0; i < outBlobNames.Count; i++) {
  137. // Debug.Log ("names [" + i + "] " + outBlobNames [i]);
  138. // }
  139. outBlobTypes = getOutputsTypes (net);
  140. // for (int i = 0; i < outBlobTypes.Count; i++) {
  141. // Debug.Log ("types [" + i + "] " + outBlobTypes [i]);
  142. // }
  143. }
  144. #if UNITY_ANDROID && !UNITY_EDITOR
  145. // Avoids the front camera low light issue that occurs in only some Android devices (e.g. Google Pixel, Pixel2).
  146. webCamTextureToMatHelper.avoidAndroidFrontCameraLowLightIssue = true;
  147. #endif
  148. webCamTextureToMatHelper.Initialize ();
  149. }
  150. /// <summary>
  151. /// Raises the webcam texture to mat helper initialized event.
  152. /// </summary>
  153. public void OnWebCamTextureToMatHelperInitialized ()
  154. {
  155. Debug.Log ("OnWebCamTextureToMatHelperInitialized");
  156. Mat webCamTextureMat = webCamTextureToMatHelper.GetMat ();
  157. texture = new Texture2D (webCamTextureMat.cols (), webCamTextureMat.rows (), TextureFormat.RGBA32, false);
  158. gameObject.GetComponent<Renderer> ().material.mainTexture = texture;
  159. gameObject.transform.localScale = new Vector3 (webCamTextureMat.cols (), webCamTextureMat.rows (), 1);
  160. Debug.Log ("Screen.width " + Screen.width + " Screen.height " + Screen.height + " Screen.orientation " + Screen.orientation);
  161. if (fpsMonitor != null) {
  162. fpsMonitor.Add ("width", webCamTextureMat.width ().ToString ());
  163. fpsMonitor.Add ("height", webCamTextureMat.height ().ToString ());
  164. fpsMonitor.Add ("orientation", Screen.orientation.ToString ());
  165. }
  166. float width = webCamTextureMat.width ();
  167. float height = webCamTextureMat.height ();
  168. float widthScale = (float)Screen.width / width;
  169. float heightScale = (float)Screen.height / height;
  170. if (widthScale < heightScale) {
  171. Camera.main.orthographicSize = (width * (float)Screen.height / (float)Screen.width) / 2;
  172. } else {
  173. Camera.main.orthographicSize = height / 2;
  174. }
  175. bgrMat = new Mat (webCamTextureMat.rows (), webCamTextureMat.cols (), CvType.CV_8UC3);
  176. }
  177. /// <summary>
  178. /// Raises the webcam texture to mat helper disposed event.
  179. /// </summary>
  180. public void OnWebCamTextureToMatHelperDisposed ()
  181. {
  182. Debug.Log ("OnWebCamTextureToMatHelperDisposed");
  183. if (bgrMat != null)
  184. bgrMat.Dispose ();
  185. if (texture != null) {
  186. Texture2D.Destroy (texture);
  187. texture = null;
  188. }
  189. }
  190. /// <summary>
  191. /// Raises the webcam texture to mat helper error occurred event.
  192. /// </summary>
  193. /// <param name="errorCode">Error code.</param>
  194. public void OnWebCamTextureToMatHelperErrorOccurred (WebCamTextureToMatHelper.ErrorCode errorCode)
  195. {
  196. Debug.Log ("OnWebCamTextureToMatHelperErrorOccurred " + errorCode);
  197. }
  198. // Update is called once per frame
  199. void Update ()
  200. {
  201. if (webCamTextureToMatHelper.IsPlaying () && webCamTextureToMatHelper.DidUpdateThisFrame ()) {
  202. Mat rgbaMat = webCamTextureToMatHelper.GetMat ();
  203. if (net == null) {
  204. Imgproc.putText (rgbaMat, "model file is not loaded.", new Point (5, rgbaMat.rows () - 30), Imgproc.FONT_HERSHEY_SIMPLEX, 0.7, new Scalar (255, 255, 255, 255), 2, Imgproc.LINE_AA, false);
  205. Imgproc.putText (rgbaMat, "Please read console message.", new Point (5, rgbaMat.rows () - 10), Imgproc.FONT_HERSHEY_SIMPLEX, 0.7, new Scalar (255, 255, 255, 255), 2, Imgproc.LINE_AA, false);
  206. } else {
  207. Imgproc.cvtColor (rgbaMat, bgrMat, Imgproc.COLOR_RGBA2BGR);
  208. // Create a 4D blob from a frame.
  209. Size inpSize = new Size (inpWidth > 0 ? inpWidth : bgrMat.cols (),
  210. inpHeight > 0 ? inpHeight : bgrMat.rows ());
  211. Mat blob = Dnn.blobFromImage (bgrMat, scale, inpSize, mean, swapRB, false);
  212. // Run a model.
  213. net.setInput (blob);
  214. if (net.getLayer (new DictValue (0)).outputNameToIndex ("im_info") != -1) { // Faster-RCNN or R-FCN
  215. Imgproc.resize (bgrMat, bgrMat, inpSize);
  216. Mat imInfo = new Mat (1, 3, CvType.CV_32FC1);
  217. imInfo.put (0, 0, new float[] {
  218. (float)inpSize.height,
  219. (float)inpSize.width,
  220. 1.6f
  221. });
  222. net.setInput (imInfo, "im_info");
  223. }
  224. TickMeter tm = new TickMeter ();
  225. tm.start ();
  226. List<Mat> outs = new List<Mat> ();
  227. net.forward (outs, outBlobNames);
  228. tm.stop ();
  229. // Debug.Log ("Inference time, ms: " + tm.getTimeMilli ());
  230. postprocess (rgbaMat, outs, net);
  231. for (int i = 0; i < outs.Count; i++) {
  232. outs [i].Dispose ();
  233. }
  234. blob.Dispose ();
  235. }
  236. Utils.fastMatToTexture2D (rgbaMat, texture);
  237. }
  238. }
  239. /// <summary>
  240. /// Raises the destroy event.
  241. /// </summary>
  242. void OnDestroy ()
  243. {
  244. webCamTextureToMatHelper.Dispose ();
  245. if (net != null)
  246. net.Dispose ();
  247. Utils.setDebugMode (false);
  248. #if UNITY_WEBGL && !UNITY_EDITOR
  249. if (getFilePath_Coroutine != null) {
  250. StopCoroutine (getFilePath_Coroutine);
  251. ((IDisposable)getFilePath_Coroutine).Dispose ();
  252. }
  253. #endif
  254. }
  255. /// <summary>
  256. /// Raises the back button click event.
  257. /// </summary>
  258. public void OnBackButtonClick ()
  259. {
  260. SceneManager.LoadScene ("OpenCVForUnityExample");
  261. }
  262. /// <summary>
  263. /// Raises the play button click event.
  264. /// </summary>
  265. public void OnPlayButtonClick ()
  266. {
  267. webCamTextureToMatHelper.Play ();
  268. }
  269. /// <summary>
  270. /// Raises the pause button click event.
  271. /// </summary>
  272. public void OnPauseButtonClick ()
  273. {
  274. webCamTextureToMatHelper.Pause ();
  275. }
  276. /// <summary>
  277. /// Raises the stop button click event.
  278. /// </summary>
  279. public void OnStopButtonClick ()
  280. {
  281. webCamTextureToMatHelper.Stop ();
  282. }
  283. /// <summary>
  284. /// Raises the change camera button click event.
  285. /// </summary>
  286. public void OnChangeCameraButtonClick ()
  287. {
  288. webCamTextureToMatHelper.requestedIsFrontFacing = !webCamTextureToMatHelper.IsFrontFacing ();
  289. }
  290. /// <summary>
  291. /// Reads the class names.
  292. /// </summary>
  293. /// <returns>The class names.</returns>
  294. /// <param name="filename">Filename.</param>
  295. private List<string> readClassNames (string filename)
  296. {
  297. List<string> classNames = new List<string> ();
  298. System.IO.StreamReader cReader = null;
  299. try {
  300. cReader = new System.IO.StreamReader (filename, System.Text.Encoding.Default);
  301. while (cReader.Peek () >= 0) {
  302. string name = cReader.ReadLine ();
  303. classNames.Add (name);
  304. }
  305. } catch (System.Exception ex) {
  306. Debug.LogError (ex.Message);
  307. return null;
  308. } finally {
  309. if (cReader != null)
  310. cReader.Close ();
  311. }
  312. return classNames;
  313. }
  314. /// <summary>
  315. /// Postprocess the specified frame, outs and net.
  316. /// </summary>
  317. /// <param name="frame">Frame.</param>
  318. /// <param name="outs">Outs.</param>
  319. /// <param name="net">Net.</param>
  320. private void postprocess (Mat frame, List<Mat> outs, Net net)
  321. {
  322. string outLayerType = outBlobTypes [0];
  323. List<int> classIdsList = new List<int> ();
  324. List<float> confidencesList = new List<float> ();
  325. List<OpenCVForUnity.CoreModule.Rect> boxesList = new List<OpenCVForUnity.CoreModule.Rect> ();
  326. if (net.getLayer (new DictValue (0)).outputNameToIndex ("im_info") != -1) { // Faster-RCNN or R-FCN
  327. // Network produces output blob with a shape 1x1xNx7 where N is a number of
  328. // detections and an every detection is a vector of values
  329. // [batchId, classId, confidence, left, top, right, bottom]
  330. if (outs.Count == 1) {
  331. outs [0] = outs [0].reshape (1, (int)outs [0].total () / 7);
  332. // Debug.Log ("outs[i].ToString() " + outs [0].ToString ());
  333. float[] data = new float[7];
  334. for (int i = 0; i < outs [0].rows (); i++) {
  335. outs [0].get (i, 0, data);
  336. float confidence = data [2];
  337. if (confidence > confThreshold) {
  338. int class_id = (int)(data [1]);
  339. int left = (int)(data [3] * frame.cols ());
  340. int top = (int)(data [4] * frame.rows ());
  341. int right = (int)(data [5] * frame.cols ());
  342. int bottom = (int)(data [6] * frame.rows ());
  343. int width = right - left + 1;
  344. int height = bottom - top + 1;
  345. classIdsList.Add ((int)(class_id) - 0);
  346. confidencesList.Add ((float)confidence);
  347. boxesList.Add (new OpenCVForUnity.CoreModule.Rect (left, top, width, height));
  348. }
  349. }
  350. }
  351. } else if (outLayerType == "DetectionOutput") {
  352. // Network produces output blob with a shape 1x1xNx7 where N is a number of
  353. // detections and an every detection is a vector of values
  354. // [batchId, classId, confidence, left, top, right, bottom]
  355. if (outs.Count == 1) {
  356. outs [0] = outs [0].reshape (1, (int)outs [0].total () / 7);
  357. // Debug.Log ("outs[i].ToString() " + outs [0].ToString ());
  358. float[] data = new float[7];
  359. for (int i = 0; i < outs [0].rows (); i++) {
  360. outs [0].get (i, 0, data);
  361. float confidence = data [2];
  362. if (confidence > confThreshold) {
  363. int class_id = (int)(data [1]);
  364. int left = (int)(data [3] * frame.cols ());
  365. int top = (int)(data [4] * frame.rows ());
  366. int right = (int)(data [5] * frame.cols ());
  367. int bottom = (int)(data [6] * frame.rows ());
  368. int width = right - left + 1;
  369. int height = bottom - top + 1;
  370. classIdsList.Add ((int)(class_id) - 0);
  371. confidencesList.Add ((float)confidence);
  372. boxesList.Add (new OpenCVForUnity.CoreModule.Rect (left, top, width, height));
  373. }
  374. }
  375. }
  376. } else if (outLayerType == "Region") {
  377. for (int i = 0; i < outs.Count; ++i) {
  378. // Network produces output blob with a shape NxC where N is a number of
  379. // detected objects and C is a number of classes + 4 where the first 4
  380. // numbers are [center_x, center_y, width, height]
  381. // Debug.Log ("outs[i].ToString() "+outs[i].ToString());
  382. float[] positionData = new float[5];
  383. float[] confidenceData = new float[outs [i].cols () - 5];
  384. for (int p = 0; p < outs [i].rows (); p++) {
  385. outs [i].get (p, 0, positionData);
  386. outs [i].get (p, 5, confidenceData);
  387. int maxIdx = confidenceData.Select ((val, idx) => new { V = val, I = idx }).Aggregate ((max, working) => (max.V > working.V) ? max : working).I;
  388. float confidence = confidenceData [maxIdx];
  389. if (confidence > confThreshold) {
  390. int centerX = (int)(positionData [0] * frame.cols ());
  391. int centerY = (int)(positionData [1] * frame.rows ());
  392. int width = (int)(positionData [2] * frame.cols ());
  393. int height = (int)(positionData [3] * frame.rows ());
  394. int left = centerX - width / 2;
  395. int top = centerY - height / 2;
  396. classIdsList.Add (maxIdx);
  397. confidencesList.Add ((float)confidence);
  398. boxesList.Add (new OpenCVForUnity.CoreModule.Rect (left, top, width, height));
  399. }
  400. }
  401. }
  402. } else {
  403. Debug.Log ("Unknown output layer type: " + outLayerType);
  404. }
  405. MatOfRect boxes = new MatOfRect ();
  406. boxes.fromList (boxesList);
  407. MatOfFloat confidences = new MatOfFloat ();
  408. confidences.fromList (confidencesList);
  409. MatOfInt indices = new MatOfInt ();
  410. Dnn.NMSBoxes (boxes, confidences, confThreshold, nmsThreshold, indices);
  411. // Debug.Log ("indices.dump () "+indices.dump ());
  412. // Debug.Log ("indices.ToString () "+indices.ToString());
  413. for (int i = 0; i < indices.total (); ++i) {
  414. int idx = (int)indices.get (i, 0) [0];
  415. OpenCVForUnity.CoreModule.Rect box = boxesList [idx];
  416. drawPred (classIdsList [idx], confidencesList [idx], box.x, box.y,
  417. box.x + box.width, box.y + box.height, frame);
  418. }
  419. indices.Dispose ();
  420. boxes.Dispose ();
  421. confidences.Dispose ();
  422. }
  423. /// <summary>
  424. /// Draws the pred.
  425. /// </summary>
  426. /// <param name="classId">Class identifier.</param>
  427. /// <param name="conf">Conf.</param>
  428. /// <param name="left">Left.</param>
  429. /// <param name="top">Top.</param>
  430. /// <param name="right">Right.</param>
  431. /// <param name="bottom">Bottom.</param>
  432. /// <param name="frame">Frame.</param>
  433. private void drawPred (int classId, float conf, int left, int top, int right, int bottom, Mat frame)
  434. {
  435. Imgproc.rectangle (frame, new Point (left, top), new Point (right, bottom), new Scalar (0, 255, 0, 255), 2);
  436. string label = conf.ToString ();
  437. if (classNames != null && classNames.Count != 0) {
  438. if (classId < (int)classNames.Count) {
  439. label = classNames [classId] + ": " + label;
  440. }
  441. }
  442. int[] baseLine = new int[1];
  443. Size labelSize = Imgproc.getTextSize (label, Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, 1, baseLine);
  444. top = Mathf.Max (top, (int)labelSize.height);
  445. Imgproc.rectangle (frame, new Point (left, top - labelSize.height),
  446. new Point (left + labelSize.width, top + baseLine [0]), Scalar.all (255), Core.FILLED);
  447. Imgproc.putText (frame, label, new Point (left, top), Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, new Scalar (0, 0, 0, 255));
  448. }
  449. /// <summary>
  450. /// Gets the outputs names.
  451. /// </summary>
  452. /// <returns>The outputs names.</returns>
  453. /// <param name="net">Net.</param>
  454. private List<string> getOutputsNames (Net net)
  455. {
  456. List<string> names = new List<string> ();
  457. MatOfInt outLayers = net.getUnconnectedOutLayers ();
  458. for (int i = 0; i < outLayers.total (); ++i) {
  459. names.Add (net.getLayer (new DictValue ((int)outLayers.get (i, 0) [0])).get_name ());
  460. }
  461. outLayers.Dispose ();
  462. return names;
  463. }
  464. /// <summary>
  465. /// Gets the outputs types.
  466. /// </summary>
  467. /// <returns>The outputs types.</returns>
  468. /// <param name="net">Net.</param>
  469. private List<string> getOutputsTypes (Net net)
  470. {
  471. List<string> types = new List<string> ();
  472. MatOfInt outLayers = net.getUnconnectedOutLayers ();
  473. for (int i = 0; i < outLayers.total (); ++i) {
  474. types.Add (net.getLayer (new DictValue ((int)outLayers.get (i, 0) [0])).get_type ());
  475. }
  476. outLayers.Dispose ();
  477. return types;
  478. }
  479. }
  480. }
  481. #endif