DnnObjectDetectionExample.cs 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603
  1. #if !UNITY_WSA_10_0
  2. using UnityEngine;
  3. using UnityEngine.SceneManagement;
  4. using System;
  5. using System.Linq;
  6. using System.Collections;
  7. using System.Collections.Generic;
  8. using OpenCVForUnity.CoreModule;
  9. using OpenCVForUnity.DnnModule;
  10. using OpenCVForUnity.ImgprocModule;
  11. using OpenCVForUnity.UnityUtils;
  12. using OpenCVForUnity.ImgcodecsModule;
  13. namespace OpenCVForUnityExample
  14. {
  15. /// <summary>
  16. /// Dnn ObjectDetection Example
  17. /// Referring to https://github.com/opencv/opencv/blob/master/samples/dnn/object_detection.cpp.
  18. /// </summary>
  19. public class DnnObjectDetectionExample : MonoBehaviour
  20. {
  21. [TooltipAttribute ("Path to input image.")]
  22. public string input;
  23. [TooltipAttribute ("Path to a binary file of model contains trained weights. It could be a file with extensions .caffemodel (Caffe), .pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet).")]
  24. public string model;
  25. [TooltipAttribute ("Path to a text file of model contains network configuration. It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet).")]
  26. public string config;
  27. [TooltipAttribute ("Optional path to a text file with names of classes to label detected objects.")]
  28. public string classes;
  29. [TooltipAttribute ("Optional list of classes to label detected objects.")]
  30. public List<string> classesList;
  31. [TooltipAttribute ("Confidence threshold.")]
  32. public float confThreshold;
  33. [TooltipAttribute ("Non-maximum suppression threshold.")]
  34. public float nmsThreshold;
  35. [TooltipAttribute ("Preprocess input image by multiplying on a scale factor.")]
  36. public float scale;
  37. [TooltipAttribute ("Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces.")]
  38. public Scalar mean;
  39. [TooltipAttribute ("Indicate that model works with RGB input images instead BGR ones.")]
  40. public bool swapRB;
  41. [TooltipAttribute ("Preprocess input image by resizing to a specific width.")]
  42. public int inpWidth;
  43. [TooltipAttribute ("Preprocess input image by resizing to a specific height.")]
  44. public int inpHeight;
  45. //yolov3
  46. // string input = "004545.jpg";
  47. // public string input = "person.jpg";
  48. // public string model = "yolov3-tiny.weights";
  49. // public string config = "yolov3-tiny.cfg";
  50. // // string model = "yolov2-tiny.weights";
  51. // // string config = "yolov2-tiny.cfg";
  52. // public string classes = "coco.names";
  53. //
  54. //
  55. // public float confThreshold = 0.24f;
  56. // public float nmsThreshold = 0.24f;
  57. // public float scale = 1f / 255f;
  58. // public Scalar mean = new Scalar (0, 0, 0);
  59. // public bool swapRB = false;
  60. // public int inpWidth = 416;
  61. // public int inpHeight = 416;
  62. //
  63. // List<string> classNames;
  64. // //MobileNetSSD
  65. // string input = "004545.jpg";
  66. // // string input = "person.jpg";
  67. // string model = "MobileNetSSD_deploy.caffemodel";
  68. // string config = "MobileNetSSD_deploy.prototxt";
  69. // string classes;
  70. // // string classes = "coco.names";
  71. //
  72. // float confThreshold = 0.2f;
  73. // float nmsThreshold = 0.2f;
  74. // float scale = 2f / 255f;
  75. // Scalar mean = new Scalar (127.5, 127.5, 127.5);
  76. // bool swapRB = false;
  77. // int inpWidth = 300;
  78. // int inpHeight = 300;
  79. //
  80. // List<string> classNames = new List<string>(new string[]{"background",
  81. // "aeroplane", "bicycle", "bird", "boat",
  82. // "bottle", "bus", "car", "cat", "chair",
  83. // "cow", "diningtable", "dog", "horse",
  84. // "motorbike", "person", "pottedplant",
  85. // "sheep", "sofa", "train", "tvmonitor"
  86. // });
  87. // // List<string> classNames;
  88. // //ResnetSSDFaceDetection
  89. // string input = "grace_hopper_227.png";
  90. // // string input = "person.jpg";
  91. // string model = "res10_300x300_ssd_iter_140000.caffemodel";
  92. // string config = "deploy.prototxt";
  93. // // string model = "yolov2-tiny.weights";
  94. // // string config = "yolov2-tiny.cfg";
  95. // string classes;
  96. //
  97. //
  98. // float confThreshold = 0.5f;
  99. // float nmsThreshold = 0.5f;
  100. // float scale = 1f;
  101. // Scalar mean = new Scalar (104, 177, 123);
  102. // bool swapRB = false;
  103. // int inpWidth = 300;
  104. // int inpHeight = 300;
  105. //
  106. // List<string> classNames;
  107. List<string> classNames;
  108. List<string> outBlobNames;
  109. List<string> outBlobTypes;
  110. string classes_filepath;
  111. string input_filepath;
  112. string config_filepath;
  113. string model_filepath;
  114. #if UNITY_WEBGL && !UNITY_EDITOR
  115. IEnumerator getFilePath_Coroutine;
  116. #endif
  117. // Use this for initialization
  118. void Start ()
  119. {
  120. #if UNITY_WEBGL && !UNITY_EDITOR
  121. getFilePath_Coroutine = GetFilePath ();
  122. StartCoroutine (getFilePath_Coroutine);
  123. #else
  124. classes_filepath = Utils.getFilePath ("dnn/" + classes);
  125. input_filepath = Utils.getFilePath ("dnn/" + input);
  126. config_filepath = Utils.getFilePath ("dnn/" + config);
  127. model_filepath = Utils.getFilePath ("dnn/" + model);
  128. Run ();
  129. #endif
  130. }
  131. #if UNITY_WEBGL && !UNITY_EDITOR
  132. private IEnumerator GetFilePath ()
  133. {
  134. if (!string.IsNullOrEmpty (classes)) {
  135. var getFilePathAsync_0_Coroutine = Utils.getFilePathAsync ("dnn/" + classes, (result) => {
  136. classes_filepath = result;
  137. });
  138. yield return getFilePathAsync_0_Coroutine;
  139. }
  140. if (!string.IsNullOrEmpty (input)) {
  141. var getFilePathAsync_1_Coroutine = Utils.getFilePathAsync ("dnn/" + input, (result) => {
  142. input_filepath = result;
  143. });
  144. yield return getFilePathAsync_1_Coroutine;
  145. }
  146. if (!string.IsNullOrEmpty (config)) {
  147. var getFilePathAsync_2_Coroutine = Utils.getFilePathAsync ("dnn/" + config, (result) => {
  148. config_filepath = result;
  149. });
  150. yield return getFilePathAsync_2_Coroutine;
  151. }
  152. if (!string.IsNullOrEmpty (model)) {
  153. var getFilePathAsync_3_Coroutine = Utils.getFilePathAsync ("dnn/" + model, (result) => {
  154. model_filepath = result;
  155. });
  156. yield return getFilePathAsync_3_Coroutine;
  157. }
  158. getFilePath_Coroutine = null;
  159. Run ();
  160. }
  161. #endif
  162. // Use this for initialization
  163. void Run ()
  164. {
  165. //if true, The error log of the Native side OpenCV will be displayed on the Unity Editor Console.
  166. Utils.setDebugMode (true);
  167. if (!string.IsNullOrEmpty (classes)) {
  168. classNames = readClassNames (classes_filepath);
  169. #if !UNITY_WSA_10_0
  170. if (classNames == null) {
  171. Debug.LogError (classes_filepath + " is not loaded. Please see \"StreamingAssets/dnn/setup_dnn_module.pdf\". ");
  172. }
  173. #endif
  174. } else if (classesList.Count > 0) {
  175. classNames = classesList;
  176. }
  177. Mat img = Imgcodecs.imread (input_filepath);
  178. #if !UNITY_WSA_10_0
  179. if (img.empty ()) {
  180. Debug.LogError (input_filepath + " is not loaded. Please see \"StreamingAssets/dnn/setup_dnn_module.pdf\". ");
  181. img = new Mat (424, 640, CvType.CV_8UC3, new Scalar (0, 0, 0));
  182. }
  183. #endif
  184. //Adust Quad.transform.localScale.
  185. gameObject.transform.localScale = new Vector3 (img.width (), img.height (), 1);
  186. Debug.Log ("Screen.width " + Screen.width + " Screen.height " + Screen.height + " Screen.orientation " + Screen.orientation);
  187. float imageWidth = img.width ();
  188. float imageHeight = img.height ();
  189. float widthScale = (float)Screen.width / imageWidth;
  190. float heightScale = (float)Screen.height / imageHeight;
  191. if (widthScale < heightScale) {
  192. Camera.main.orthographicSize = (imageWidth * (float)Screen.height / (float)Screen.width) / 2;
  193. } else {
  194. Camera.main.orthographicSize = imageHeight / 2;
  195. }
  196. Net net = null;
  197. if (string.IsNullOrEmpty (config_filepath) || string.IsNullOrEmpty (model_filepath)) {
  198. Debug.LogError (config_filepath + " or " + model_filepath + " is not loaded. Please see \"StreamingAssets/dnn/setup_dnn_module.pdf\". ");
  199. } else {
  200. //! [Initialize network]
  201. net = Dnn.readNet (model_filepath, config_filepath);
  202. //! [Initialize network]
  203. }
  204. if (net == null) {
  205. Imgproc.putText (img, "model file is not loaded.", new Point (5, img.rows () - 30), Imgproc.FONT_HERSHEY_SIMPLEX, 0.7, new Scalar (255, 255, 255), 2, Imgproc.LINE_AA, false);
  206. Imgproc.putText (img, "Please read console message.", new Point (5, img.rows () - 10), Imgproc.FONT_HERSHEY_SIMPLEX, 0.7, new Scalar (255, 255, 255), 2, Imgproc.LINE_AA, false);
  207. } else {
  208. outBlobNames = getOutputsNames (net);
  209. // for (int i = 0; i < outBlobNames.Count; i++) {
  210. // Debug.Log ("names [" + i + "] " + outBlobNames [i]);
  211. // }
  212. outBlobTypes = getOutputsTypes (net);
  213. // for (int i = 0; i < outBlobTypes.Count; i++) {
  214. // Debug.Log ("types [" + i + "] " + outBlobTypes [i]);
  215. // }
  216. // Create a 4D blob from a frame.
  217. Size inpSize = new Size (inpWidth > 0 ? inpWidth : img.cols (),
  218. inpHeight > 0 ? inpHeight : img.rows ());
  219. Mat blob = Dnn.blobFromImage (img, scale, inpSize, mean, swapRB, false);
  220. // Run a model.
  221. net.setInput (blob);
  222. if (net.getLayer (new DictValue (0)).outputNameToIndex ("im_info") != -1) { // Faster-RCNN or R-FCN
  223. Imgproc.resize (img, img, inpSize);
  224. Mat imInfo = new Mat (1, 3, CvType.CV_32FC1);
  225. imInfo.put (0, 0, new float[] {
  226. (float)inpSize.height,
  227. (float)inpSize.width,
  228. 1.6f
  229. });
  230. net.setInput (imInfo, "im_info");
  231. }
  232. TickMeter tm = new TickMeter ();
  233. tm.start ();
  234. List<Mat> outs = new List<Mat> ();
  235. net.forward (outs, outBlobNames);
  236. tm.stop ();
  237. Debug.Log ("Inference time, ms: " + tm.getTimeMilli ());
  238. postprocess (img, outs, net);
  239. for (int i = 0; i < outs.Count; i++) {
  240. outs [i].Dispose ();
  241. }
  242. blob.Dispose ();
  243. net.Dispose ();
  244. }
  245. Imgproc.cvtColor (img, img, Imgproc.COLOR_BGR2RGB);
  246. Texture2D texture = new Texture2D (img.cols (), img.rows (), TextureFormat.RGBA32, false);
  247. Utils.matToTexture2D (img, texture);
  248. gameObject.GetComponent<Renderer> ().material.mainTexture = texture;
  249. Utils.setDebugMode (false);
  250. }
  251. // Update is called once per frame
  252. void Update ()
  253. {
  254. }
  255. /// <summary>
  256. /// Raises the disable event.
  257. /// </summary>
  258. void OnDisable ()
  259. {
  260. #if UNITY_WEBGL && !UNITY_EDITOR
  261. if (getFilePath_Coroutine != null) {
  262. StopCoroutine (getFilePath_Coroutine);
  263. ((IDisposable)getFilePath_Coroutine).Dispose ();
  264. }
  265. #endif
  266. }
  267. /// <summary>
  268. /// Raises the back button click event.
  269. /// </summary>
  270. public void OnBackButtonClick ()
  271. {
  272. SceneManager.LoadScene ("OpenCVForUnityExample");
  273. }
  274. /// <summary>
  275. /// Reads the class names.
  276. /// </summary>
  277. /// <returns>The class names.</returns>
  278. /// <param name="filename">Filename.</param>
  279. private List<string> readClassNames (string filename)
  280. {
  281. List<string> classNames = new List<string> ();
  282. System.IO.StreamReader cReader = null;
  283. try {
  284. cReader = new System.IO.StreamReader (filename, System.Text.Encoding.Default);
  285. while (cReader.Peek () >= 0) {
  286. string name = cReader.ReadLine ();
  287. classNames.Add (name);
  288. }
  289. } catch (System.Exception ex) {
  290. Debug.LogError (ex.Message);
  291. return null;
  292. } finally {
  293. if (cReader != null)
  294. cReader.Close ();
  295. }
  296. return classNames;
  297. }
  298. /// <summary>
  299. /// Postprocess the specified frame, outs and net.
  300. /// </summary>
  301. /// <param name="frame">Frame.</param>
  302. /// <param name="outs">Outs.</param>
  303. /// <param name="net">Net.</param>
  304. private void postprocess (Mat frame, List<Mat> outs, Net net)
  305. {
  306. string outLayerType = outBlobTypes [0];
  307. List<int> classIdsList = new List<int> ();
  308. List<float> confidencesList = new List<float> ();
  309. List<OpenCVForUnity.CoreModule.Rect> boxesList = new List<OpenCVForUnity.CoreModule.Rect> ();
  310. if (net.getLayer (new DictValue (0)).outputNameToIndex ("im_info") != -1) { // Faster-RCNN or R-FCN
  311. // Network produces output blob with a shape 1x1xNx7 where N is a number of
  312. // detections and an every detection is a vector of values
  313. // [batchId, classId, confidence, left, top, right, bottom]
  314. if (outs.Count == 1) {
  315. outs [0] = outs [0].reshape (1, (int)outs [0].total () / 7);
  316. // Debug.Log ("outs[i].ToString() " + outs [0].ToString ());
  317. float[] data = new float[7];
  318. for (int i = 0; i < outs [0].rows (); i++) {
  319. outs [0].get (i, 0, data);
  320. float confidence = data [2];
  321. if (confidence > confThreshold) {
  322. int class_id = (int)(data [1]);
  323. int left = (int)(data [3] * frame.cols ());
  324. int top = (int)(data [4] * frame.rows ());
  325. int right = (int)(data [5] * frame.cols ());
  326. int bottom = (int)(data [6] * frame.rows ());
  327. int width = right - left + 1;
  328. int height = bottom - top + 1;
  329. classIdsList.Add ((int)(class_id) - 0);
  330. confidencesList.Add ((float)confidence);
  331. boxesList.Add (new OpenCVForUnity.CoreModule.Rect (left, top, width, height));
  332. }
  333. }
  334. }
  335. } else if (outLayerType == "DetectionOutput") {
  336. // Network produces output blob with a shape 1x1xNx7 where N is a number of
  337. // detections and an every detection is a vector of values
  338. // [batchId, classId, confidence, left, top, right, bottom]
  339. if (outs.Count == 1) {
  340. outs [0] = outs [0].reshape (1, (int)outs [0].total () / 7);
  341. // Debug.Log ("outs[i].ToString() " + outs [0].ToString ());
  342. float[] data = new float[7];
  343. for (int i = 0; i < outs [0].rows (); i++) {
  344. outs [0].get (i, 0, data);
  345. float confidence = data [2];
  346. if (confidence > confThreshold) {
  347. int class_id = (int)(data [1]);
  348. int left = (int)(data [3] * frame.cols ());
  349. int top = (int)(data [4] * frame.rows ());
  350. int right = (int)(data [5] * frame.cols ());
  351. int bottom = (int)(data [6] * frame.rows ());
  352. int width = right - left + 1;
  353. int height = bottom - top + 1;
  354. classIdsList.Add ((int)(class_id) - 0);
  355. confidencesList.Add ((float)confidence);
  356. boxesList.Add (new OpenCVForUnity.CoreModule.Rect (left, top, width, height));
  357. }
  358. }
  359. }
  360. } else if (outLayerType == "Region") {
  361. for (int i = 0; i < outs.Count; ++i) {
  362. // Network produces output blob with a shape NxC where N is a number of
  363. // detected objects and C is a number of classes + 4 where the first 4
  364. // numbers are [center_x, center_y, width, height]
  365. // Debug.Log ("outs[i].ToString() "+outs[i].ToString());
  366. float[] positionData = new float[5];
  367. float[] confidenceData = new float[outs [i].cols () - 5];
  368. for (int p = 0; p < outs [i].rows (); p++) {
  369. outs [i].get (p, 0, positionData);
  370. outs [i].get (p, 5, confidenceData);
  371. int maxIdx = confidenceData.Select ((val, idx) => new { V = val, I = idx }).Aggregate ((max, working) => (max.V > working.V) ? max : working).I;
  372. float confidence = confidenceData [maxIdx];
  373. if (confidence > confThreshold) {
  374. int centerX = (int)(positionData [0] * frame.cols ());
  375. int centerY = (int)(positionData [1] * frame.rows ());
  376. int width = (int)(positionData [2] * frame.cols ());
  377. int height = (int)(positionData [3] * frame.rows ());
  378. int left = centerX - width / 2;
  379. int top = centerY - height / 2;
  380. classIdsList.Add (maxIdx);
  381. confidencesList.Add ((float)confidence);
  382. boxesList.Add (new OpenCVForUnity.CoreModule.Rect (left, top, width, height));
  383. }
  384. }
  385. }
  386. } else {
  387. Debug.Log ("Unknown output layer type: " + outLayerType);
  388. }
  389. MatOfRect boxes = new MatOfRect ();
  390. boxes.fromList (boxesList);
  391. MatOfFloat confidences = new MatOfFloat ();
  392. confidences.fromList (confidencesList);
  393. MatOfInt indices = new MatOfInt ();
  394. Dnn.NMSBoxes (boxes, confidences, confThreshold, nmsThreshold, indices);
  395. // Debug.Log ("indices.dump () "+indices.dump ());
  396. // Debug.Log ("indices.ToString () "+indices.ToString());
  397. for (int i = 0; i < indices.total (); ++i) {
  398. int idx = (int)indices.get (i, 0) [0];
  399. OpenCVForUnity.CoreModule.Rect box = boxesList [idx];
  400. drawPred (classIdsList [idx], confidencesList [idx], box.x, box.y,
  401. box.x + box.width, box.y + box.height, frame);
  402. }
  403. indices.Dispose ();
  404. boxes.Dispose ();
  405. confidences.Dispose ();
  406. }
  407. /// <summary>
  408. /// Draws the pred.
  409. /// </summary>
  410. /// <param name="classId">Class identifier.</param>
  411. /// <param name="conf">Conf.</param>
  412. /// <param name="left">Left.</param>
  413. /// <param name="top">Top.</param>
  414. /// <param name="right">Right.</param>
  415. /// <param name="bottom">Bottom.</param>
  416. /// <param name="frame">Frame.</param>
  417. private void drawPred (int classId, float conf, int left, int top, int right, int bottom, Mat frame)
  418. {
  419. Imgproc.rectangle (frame, new Point (left, top), new Point (right, bottom), new Scalar (0, 255, 0, 255), 2);
  420. string label = conf.ToString ();
  421. if (classNames != null && classNames.Count != 0) {
  422. if (classId < (int)classNames.Count) {
  423. label = classNames [classId] + ": " + label;
  424. }
  425. }
  426. int[] baseLine = new int[1];
  427. Size labelSize = Imgproc.getTextSize (label, Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, 1, baseLine);
  428. top = Mathf.Max (top, (int)labelSize.height);
  429. Imgproc.rectangle (frame, new Point (left, top - labelSize.height),
  430. new Point (left + labelSize.width, top + baseLine [0]), Scalar.all (255), Core.FILLED);
  431. Imgproc.putText (frame, label, new Point (left, top), Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, new Scalar (0, 0, 0, 255));
  432. }
  433. /// <summary>
  434. /// Gets the outputs names.
  435. /// </summary>
  436. /// <returns>The outputs names.</returns>
  437. /// <param name="net">Net.</param>
  438. private List<string> getOutputsNames (Net net)
  439. {
  440. List<string> names = new List<string> ();
  441. MatOfInt outLayers = net.getUnconnectedOutLayers ();
  442. for (int i = 0; i < outLayers.total (); ++i) {
  443. names.Add (net.getLayer (new DictValue ((int)outLayers.get (i, 0) [0])).get_name ());
  444. }
  445. outLayers.Dispose ();
  446. return names;
  447. }
  448. /// <summary>
  449. /// Gets the outputs types.
  450. /// </summary>
  451. /// <returns>The outputs types.</returns>
  452. /// <param name="net">Net.</param>
  453. private List<string> getOutputsTypes (Net net)
  454. {
  455. List<string> types = new List<string> ();
  456. MatOfInt outLayers = net.getUnconnectedOutLayers ();
  457. for (int i = 0; i < outLayers.total (); ++i) {
  458. types.Add (net.getLayer (new DictValue ((int)outLayers.get (i, 0) [0])).get_type ());
  459. }
  460. outLayers.Dispose ();
  461. return types;
  462. }
  463. }
  464. }
  465. #endif