#if !UNITY_WSA_10_0 using UnityEngine; using UnityEngine.SceneManagement; using System; using System.Linq; using System.Collections; using System.Collections.Generic; using OpenCVForUnity.CoreModule; using OpenCVForUnity.DnnModule; using OpenCVForUnity.ImgprocModule; using OpenCVForUnity.UnityUtils; using OpenCVForUnity.ImgcodecsModule; namespace OpenCVForUnityExample { /// /// Dnn ObjectDetection Example /// Referring to https://github.com/opencv/opencv/blob/master/samples/dnn/object_detection.cpp. /// public class DnnObjectDetectionExample : MonoBehaviour { [TooltipAttribute ("Path to input image.")] public string input; [TooltipAttribute ("Path to a binary file of model contains trained weights. It could be a file with extensions .caffemodel (Caffe), .pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet).")] public string model; [TooltipAttribute ("Path to a text file of model contains network configuration. It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet).")] public string config; [TooltipAttribute ("Optional path to a text file with names of classes to label detected objects.")] public string classes; [TooltipAttribute ("Optional list of classes to label detected objects.")] public List classesList; [TooltipAttribute ("Confidence threshold.")] public float confThreshold; [TooltipAttribute ("Non-maximum suppression threshold.")] public float nmsThreshold; [TooltipAttribute ("Preprocess input image by multiplying on a scale factor.")] public float scale; [TooltipAttribute ("Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces.")] public Scalar mean; [TooltipAttribute ("Indicate that model works with RGB input images instead BGR ones.")] public bool swapRB; [TooltipAttribute ("Preprocess input image by resizing to a specific width.")] public int inpWidth; [TooltipAttribute ("Preprocess input image by resizing to a specific height.")] public int inpHeight; //yolov3 // string input = "004545.jpg"; // public string input = "person.jpg"; // public string model = "yolov3-tiny.weights"; // public string config = "yolov3-tiny.cfg"; // // string model = "yolov2-tiny.weights"; // // string config = "yolov2-tiny.cfg"; // public string classes = "coco.names"; // // // public float confThreshold = 0.24f; // public float nmsThreshold = 0.24f; // public float scale = 1f / 255f; // public Scalar mean = new Scalar (0, 0, 0); // public bool swapRB = false; // public int inpWidth = 416; // public int inpHeight = 416; // // List classNames; // //MobileNetSSD // string input = "004545.jpg"; // // string input = "person.jpg"; // string model = "MobileNetSSD_deploy.caffemodel"; // string config = "MobileNetSSD_deploy.prototxt"; // string classes; // // string classes = "coco.names"; // // float confThreshold = 0.2f; // float nmsThreshold = 0.2f; // float scale = 2f / 255f; // Scalar mean = new Scalar (127.5, 127.5, 127.5); // bool swapRB = false; // int inpWidth = 300; // int inpHeight = 300; // // List classNames = new List(new string[]{"background", // "aeroplane", "bicycle", "bird", "boat", // "bottle", "bus", "car", "cat", "chair", // "cow", "diningtable", "dog", "horse", // "motorbike", "person", "pottedplant", // "sheep", "sofa", "train", "tvmonitor" // }); // // List classNames; // //ResnetSSDFaceDetection // string input = "grace_hopper_227.png"; // // string input = "person.jpg"; // string model = "res10_300x300_ssd_iter_140000.caffemodel"; // string config = "deploy.prototxt"; // // string model = "yolov2-tiny.weights"; // // string config = "yolov2-tiny.cfg"; // string classes; // // // float confThreshold = 0.5f; // float nmsThreshold = 0.5f; // float scale = 1f; // Scalar mean = new Scalar (104, 177, 123); // bool swapRB = false; // int inpWidth = 300; // int inpHeight = 300; // // List classNames; List classNames; List outBlobNames; List outBlobTypes; string classes_filepath; string input_filepath; string config_filepath; string model_filepath; #if UNITY_WEBGL && !UNITY_EDITOR IEnumerator getFilePath_Coroutine; #endif // Use this for initialization void Start () { #if UNITY_WEBGL && !UNITY_EDITOR getFilePath_Coroutine = GetFilePath (); StartCoroutine (getFilePath_Coroutine); #else classes_filepath = Utils.getFilePath ("dnn/" + classes); input_filepath = Utils.getFilePath ("dnn/" + input); config_filepath = Utils.getFilePath ("dnn/" + config); model_filepath = Utils.getFilePath ("dnn/" + model); Run (); #endif } #if UNITY_WEBGL && !UNITY_EDITOR private IEnumerator GetFilePath () { if (!string.IsNullOrEmpty (classes)) { var getFilePathAsync_0_Coroutine = Utils.getFilePathAsync ("dnn/" + classes, (result) => { classes_filepath = result; }); yield return getFilePathAsync_0_Coroutine; } if (!string.IsNullOrEmpty (input)) { var getFilePathAsync_1_Coroutine = Utils.getFilePathAsync ("dnn/" + input, (result) => { input_filepath = result; }); yield return getFilePathAsync_1_Coroutine; } if (!string.IsNullOrEmpty (config)) { var getFilePathAsync_2_Coroutine = Utils.getFilePathAsync ("dnn/" + config, (result) => { config_filepath = result; }); yield return getFilePathAsync_2_Coroutine; } if (!string.IsNullOrEmpty (model)) { var getFilePathAsync_3_Coroutine = Utils.getFilePathAsync ("dnn/" + model, (result) => { model_filepath = result; }); yield return getFilePathAsync_3_Coroutine; } getFilePath_Coroutine = null; Run (); } #endif // Use this for initialization void Run () { //if true, The error log of the Native side OpenCV will be displayed on the Unity Editor Console. Utils.setDebugMode (true); if (!string.IsNullOrEmpty (classes)) { classNames = readClassNames (classes_filepath); #if !UNITY_WSA_10_0 if (classNames == null) { Debug.LogError (classes_filepath + " is not loaded. Please see \"StreamingAssets/dnn/setup_dnn_module.pdf\". "); } #endif } else if (classesList.Count > 0) { classNames = classesList; } Mat img = Imgcodecs.imread (input_filepath); #if !UNITY_WSA_10_0 if (img.empty ()) { Debug.LogError (input_filepath + " is not loaded. Please see \"StreamingAssets/dnn/setup_dnn_module.pdf\". "); img = new Mat (424, 640, CvType.CV_8UC3, new Scalar (0, 0, 0)); } #endif //Adust Quad.transform.localScale. gameObject.transform.localScale = new Vector3 (img.width (), img.height (), 1); Debug.Log ("Screen.width " + Screen.width + " Screen.height " + Screen.height + " Screen.orientation " + Screen.orientation); float imageWidth = img.width (); float imageHeight = img.height (); float widthScale = (float)Screen.width / imageWidth; float heightScale = (float)Screen.height / imageHeight; if (widthScale < heightScale) { Camera.main.orthographicSize = (imageWidth * (float)Screen.height / (float)Screen.width) / 2; } else { Camera.main.orthographicSize = imageHeight / 2; } Net net = null; if (string.IsNullOrEmpty (config_filepath) || string.IsNullOrEmpty (model_filepath)) { Debug.LogError (config_filepath + " or " + model_filepath + " is not loaded. Please see \"StreamingAssets/dnn/setup_dnn_module.pdf\". "); } else { //! [Initialize network] net = Dnn.readNet (model_filepath, config_filepath); //! [Initialize network] } if (net == null) { Imgproc.putText (img, "model file is not loaded.", new Point (5, img.rows () - 30), Imgproc.FONT_HERSHEY_SIMPLEX, 0.7, new Scalar (255, 255, 255), 2, Imgproc.LINE_AA, false); Imgproc.putText (img, "Please read console message.", new Point (5, img.rows () - 10), Imgproc.FONT_HERSHEY_SIMPLEX, 0.7, new Scalar (255, 255, 255), 2, Imgproc.LINE_AA, false); } else { outBlobNames = getOutputsNames (net); // for (int i = 0; i < outBlobNames.Count; i++) { // Debug.Log ("names [" + i + "] " + outBlobNames [i]); // } outBlobTypes = getOutputsTypes (net); // for (int i = 0; i < outBlobTypes.Count; i++) { // Debug.Log ("types [" + i + "] " + outBlobTypes [i]); // } // Create a 4D blob from a frame. Size inpSize = new Size (inpWidth > 0 ? inpWidth : img.cols (), inpHeight > 0 ? inpHeight : img.rows ()); Mat blob = Dnn.blobFromImage (img, scale, inpSize, mean, swapRB, false); // Run a model. net.setInput (blob); if (net.getLayer (new DictValue (0)).outputNameToIndex ("im_info") != -1) { // Faster-RCNN or R-FCN Imgproc.resize (img, img, inpSize); Mat imInfo = new Mat (1, 3, CvType.CV_32FC1); imInfo.put (0, 0, new float[] { (float)inpSize.height, (float)inpSize.width, 1.6f }); net.setInput (imInfo, "im_info"); } TickMeter tm = new TickMeter (); tm.start (); List outs = new List (); net.forward (outs, outBlobNames); tm.stop (); Debug.Log ("Inference time, ms: " + tm.getTimeMilli ()); postprocess (img, outs, net); for (int i = 0; i < outs.Count; i++) { outs [i].Dispose (); } blob.Dispose (); net.Dispose (); } Imgproc.cvtColor (img, img, Imgproc.COLOR_BGR2RGB); Texture2D texture = new Texture2D (img.cols (), img.rows (), TextureFormat.RGBA32, false); Utils.matToTexture2D (img, texture); gameObject.GetComponent ().material.mainTexture = texture; Utils.setDebugMode (false); } // Update is called once per frame void Update () { } /// /// Raises the disable event. /// void OnDisable () { #if UNITY_WEBGL && !UNITY_EDITOR if (getFilePath_Coroutine != null) { StopCoroutine (getFilePath_Coroutine); ((IDisposable)getFilePath_Coroutine).Dispose (); } #endif } /// /// Raises the back button click event. /// public void OnBackButtonClick () { SceneManager.LoadScene ("OpenCVForUnityExample"); } /// /// Reads the class names. /// /// The class names. /// Filename. private List readClassNames (string filename) { List classNames = new List (); System.IO.StreamReader cReader = null; try { cReader = new System.IO.StreamReader (filename, System.Text.Encoding.Default); while (cReader.Peek () >= 0) { string name = cReader.ReadLine (); classNames.Add (name); } } catch (System.Exception ex) { Debug.LogError (ex.Message); return null; } finally { if (cReader != null) cReader.Close (); } return classNames; } /// /// Postprocess the specified frame, outs and net. /// /// Frame. /// Outs. /// Net. private void postprocess (Mat frame, List outs, Net net) { string outLayerType = outBlobTypes [0]; List classIdsList = new List (); List confidencesList = new List (); List boxesList = new List (); if (net.getLayer (new DictValue (0)).outputNameToIndex ("im_info") != -1) { // Faster-RCNN or R-FCN // Network produces output blob with a shape 1x1xNx7 where N is a number of // detections and an every detection is a vector of values // [batchId, classId, confidence, left, top, right, bottom] if (outs.Count == 1) { outs [0] = outs [0].reshape (1, (int)outs [0].total () / 7); // Debug.Log ("outs[i].ToString() " + outs [0].ToString ()); float[] data = new float[7]; for (int i = 0; i < outs [0].rows (); i++) { outs [0].get (i, 0, data); float confidence = data [2]; if (confidence > confThreshold) { int class_id = (int)(data [1]); int left = (int)(data [3] * frame.cols ()); int top = (int)(data [4] * frame.rows ()); int right = (int)(data [5] * frame.cols ()); int bottom = (int)(data [6] * frame.rows ()); int width = right - left + 1; int height = bottom - top + 1; classIdsList.Add ((int)(class_id) - 0); confidencesList.Add ((float)confidence); boxesList.Add (new OpenCVForUnity.CoreModule.Rect (left, top, width, height)); } } } } else if (outLayerType == "DetectionOutput") { // Network produces output blob with a shape 1x1xNx7 where N is a number of // detections and an every detection is a vector of values // [batchId, classId, confidence, left, top, right, bottom] if (outs.Count == 1) { outs [0] = outs [0].reshape (1, (int)outs [0].total () / 7); // Debug.Log ("outs[i].ToString() " + outs [0].ToString ()); float[] data = new float[7]; for (int i = 0; i < outs [0].rows (); i++) { outs [0].get (i, 0, data); float confidence = data [2]; if (confidence > confThreshold) { int class_id = (int)(data [1]); int left = (int)(data [3] * frame.cols ()); int top = (int)(data [4] * frame.rows ()); int right = (int)(data [5] * frame.cols ()); int bottom = (int)(data [6] * frame.rows ()); int width = right - left + 1; int height = bottom - top + 1; classIdsList.Add ((int)(class_id) - 0); confidencesList.Add ((float)confidence); boxesList.Add (new OpenCVForUnity.CoreModule.Rect (left, top, width, height)); } } } } else if (outLayerType == "Region") { for (int i = 0; i < outs.Count; ++i) { // Network produces output blob with a shape NxC where N is a number of // detected objects and C is a number of classes + 4 where the first 4 // numbers are [center_x, center_y, width, height] // Debug.Log ("outs[i].ToString() "+outs[i].ToString()); float[] positionData = new float[5]; float[] confidenceData = new float[outs [i].cols () - 5]; for (int p = 0; p < outs [i].rows (); p++) { outs [i].get (p, 0, positionData); outs [i].get (p, 5, confidenceData); int maxIdx = confidenceData.Select ((val, idx) => new { V = val, I = idx }).Aggregate ((max, working) => (max.V > working.V) ? max : working).I; float confidence = confidenceData [maxIdx]; if (confidence > confThreshold) { int centerX = (int)(positionData [0] * frame.cols ()); int centerY = (int)(positionData [1] * frame.rows ()); int width = (int)(positionData [2] * frame.cols ()); int height = (int)(positionData [3] * frame.rows ()); int left = centerX - width / 2; int top = centerY - height / 2; classIdsList.Add (maxIdx); confidencesList.Add ((float)confidence); boxesList.Add (new OpenCVForUnity.CoreModule.Rect (left, top, width, height)); } } } } else { Debug.Log ("Unknown output layer type: " + outLayerType); } MatOfRect boxes = new MatOfRect (); boxes.fromList (boxesList); MatOfFloat confidences = new MatOfFloat (); confidences.fromList (confidencesList); MatOfInt indices = new MatOfInt (); Dnn.NMSBoxes (boxes, confidences, confThreshold, nmsThreshold, indices); // Debug.Log ("indices.dump () "+indices.dump ()); // Debug.Log ("indices.ToString () "+indices.ToString()); for (int i = 0; i < indices.total (); ++i) { int idx = (int)indices.get (i, 0) [0]; OpenCVForUnity.CoreModule.Rect box = boxesList [idx]; drawPred (classIdsList [idx], confidencesList [idx], box.x, box.y, box.x + box.width, box.y + box.height, frame); } indices.Dispose (); boxes.Dispose (); confidences.Dispose (); } /// /// Draws the pred. /// /// Class identifier. /// Conf. /// Left. /// Top. /// Right. /// Bottom. /// Frame. private void drawPred (int classId, float conf, int left, int top, int right, int bottom, Mat frame) { Imgproc.rectangle (frame, new Point (left, top), new Point (right, bottom), new Scalar (0, 255, 0, 255), 2); string label = conf.ToString (); if (classNames != null && classNames.Count != 0) { if (classId < (int)classNames.Count) { label = classNames [classId] + ": " + label; } } int[] baseLine = new int[1]; Size labelSize = Imgproc.getTextSize (label, Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, 1, baseLine); top = Mathf.Max (top, (int)labelSize.height); Imgproc.rectangle (frame, new Point (left, top - labelSize.height), new Point (left + labelSize.width, top + baseLine [0]), Scalar.all (255), Core.FILLED); Imgproc.putText (frame, label, new Point (left, top), Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, new Scalar (0, 0, 0, 255)); } /// /// Gets the outputs names. /// /// The outputs names. /// Net. private List getOutputsNames (Net net) { List names = new List (); MatOfInt outLayers = net.getUnconnectedOutLayers (); for (int i = 0; i < outLayers.total (); ++i) { names.Add (net.getLayer (new DictValue ((int)outLayers.get (i, 0) [0])).get_name ()); } outLayers.Dispose (); return names; } /// /// Gets the outputs types. /// /// The outputs types. /// Net. private List getOutputsTypes (Net net) { List types = new List (); MatOfInt outLayers = net.getUnconnectedOutLayers (); for (int i = 0; i < outLayers.total (); ++i) { types.Add (net.getLayer (new DictValue ((int)outLayers.get (i, 0) [0])).get_type ()); } outLayers.Dispose (); return types; } } } #endif