D3D12GraphicsDevice.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. #include "pch.h"
  2. #include <third_party/libyuv/include/libyuv.h>
  3. #include "D3D12Constants.h"
  4. #include "D3D12GraphicsDevice.h"
  5. #include "D3D12Texture2D.h"
  6. #include "GraphicsDevice/Cuda/GpuMemoryBufferCudaHandle.h"
  7. #include "GraphicsDevice/D3D11/D3D11Texture2D.h"
  8. #include "GraphicsDevice/GraphicsUtility.h"
  9. #include "NvCodecUtils.h"
  10. // nonstandard extension used : class rvalue used as lvalue
  11. #pragma clang diagnostic ignored "-Wlanguage-extension-token"
  12. using namespace Microsoft::WRL;
  13. namespace unity
  14. {
  15. namespace webrtc
  16. {
  17. //---------------------------------------------------------------------------------------------------------------------
  18. D3D12GraphicsDevice::D3D12GraphicsDevice(
  19. ID3D12Device* nativeDevice,
  20. IUnityGraphicsD3D12v5* unityInterface,
  21. UnityGfxRenderer renderer,
  22. ProfilerMarkerFactory* profiler)
  23. : IGraphicsDevice(renderer, profiler)
  24. , m_d3d12Device(nativeDevice)
  25. , m_d3d12CommandQueue(unityInterface->GetCommandQueue())
  26. , m_d3d11Device(nullptr)
  27. , m_d3d11Context(nullptr)
  28. , m_copyResourceFence(nullptr)
  29. , m_copyResourceEventHandle(nullptr)
  30. {
  31. }
  32. //---------------------------------------------------------------------------------------------------------------------
  33. D3D12GraphicsDevice::D3D12GraphicsDevice(
  34. ID3D12Device* nativeDevice,
  35. ID3D12CommandQueue* commandQueue,
  36. UnityGfxRenderer renderer,
  37. ProfilerMarkerFactory* profiler)
  38. : IGraphicsDevice(renderer, profiler)
  39. , m_d3d12Device(nativeDevice)
  40. , m_d3d12CommandQueue(commandQueue)
  41. , m_d3d11Device(nullptr)
  42. , m_d3d11Context(nullptr)
  43. , m_copyResourceFence(nullptr)
  44. , m_copyResourceEventHandle(nullptr)
  45. {
  46. }
  47. //---------------------------------------------------------------------------------------------------------------------
  48. D3D12GraphicsDevice::~D3D12GraphicsDevice() { }
  49. //---------------------------------------------------------------------------------------------------------------------
  50. bool D3D12GraphicsDevice::InitV()
  51. {
  52. ID3D11Device* legacyDevice;
  53. ID3D11DeviceContext* legacyContext;
  54. HRESULT hr = D3D11CreateDevice(
  55. nullptr,
  56. D3D_DRIVER_TYPE_HARDWARE,
  57. nullptr,
  58. 0,
  59. nullptr,
  60. 0,
  61. D3D11_SDK_VERSION,
  62. &legacyDevice,
  63. nullptr,
  64. &legacyContext);
  65. if (FAILED(hr))
  66. {
  67. RTC_LOG(LS_ERROR) << "D3D11CreateDevice is failed. " << HrToString(hr);
  68. return false;
  69. }
  70. hr = legacyDevice->QueryInterface(IID_PPV_ARGS(&m_d3d11Device));
  71. if (FAILED(hr))
  72. {
  73. RTC_LOG(LS_ERROR) << "ID3D11DeviceContext::QueryInterface is failed. " << HrToString(hr);
  74. return false;
  75. }
  76. legacyDevice->GetImmediateContext(&legacyContext);
  77. hr = legacyContext->QueryInterface(IID_PPV_ARGS(&m_d3d11Context));
  78. if (FAILED(hr))
  79. {
  80. RTC_LOG(LS_ERROR) << "ID3D11DeviceContext::QueryInterface is failed. " << HrToString(hr);
  81. return false;
  82. }
  83. hr = m_d3d12Device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocator));
  84. if (FAILED(hr))
  85. {
  86. RTC_LOG(LS_ERROR) << "ID3D12Device::CreateCommandAllocator is failed. " << HrToString(hr);
  87. return false;
  88. }
  89. hr = m_d3d12Device->CreateCommandList(
  90. 0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator, nullptr, IID_PPV_ARGS(&m_commandList));
  91. if (FAILED(hr))
  92. {
  93. RTC_LOG(LS_ERROR) << "ID3D12Device::CreateCommandList is failed. " << HrToString(hr);
  94. return false;
  95. }
  96. // Command lists are created in the recording state, but there is nothing
  97. // to record yet. The main loop expects it to be closed, so close it now.
  98. hr = m_commandList->Close();
  99. if (FAILED(hr))
  100. {
  101. RTC_LOG(LS_ERROR) << "ID3D12GraphicsCommandList::Close is failed. " << HrToString(hr);
  102. return false;
  103. }
  104. hr = m_d3d12Device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_copyResourceFence));
  105. if (FAILED(hr))
  106. {
  107. RTC_LOG(LS_ERROR) << "ID3D12Device::CreateFence is failed. " << HrToString(hr);
  108. return false;
  109. }
  110. m_copyResourceEventHandle = CreateEvent(nullptr, FALSE, FALSE, nullptr);
  111. if (m_copyResourceEventHandle == nullptr)
  112. {
  113. hr = HRESULT_FROM_WIN32(GetLastError());
  114. RTC_LOG(LS_ERROR) << "CreateEvent is failed. " << HrToString(hr);
  115. return false;
  116. }
  117. m_isCudaSupport = CUDA_SUCCESS == m_cudaContext.Init(m_d3d12Device.Get());
  118. return true;
  119. }
  120. //---------------------------------------------------------------------------------------------------------------------
  121. void D3D12GraphicsDevice::ShutdownV()
  122. {
  123. m_cudaContext.Shutdown();
  124. SAFE_CLOSE_HANDLE(m_copyResourceEventHandle)
  125. }
  126. //---------------------------------------------------------------------------------------------------------------------
  127. ITexture2D*
  128. D3D12GraphicsDevice::CreateDefaultTextureV(uint32_t w, uint32_t h, UnityRenderingExtTextureFormat textureFormat)
  129. {
  130. return CreateSharedD3D12Texture(w, h);
  131. }
  132. //---------------------------------------------------------------------------------------------------------------------
  133. bool D3D12GraphicsDevice::CopyResourceV(ITexture2D* dest, ITexture2D* src)
  134. {
  135. //[Note-sin: 2020-2-19] This function is currently not required by RenderStreaming. Delete?
  136. return true;
  137. }
  138. //---------------------------------------------------------------------------------------------------------------------
  139. bool D3D12GraphicsDevice::CopyResourceFromNativeV(ITexture2D* baseDest, void* nativeTexturePtr)
  140. {
  141. D3D12Texture2D* dest = reinterpret_cast<D3D12Texture2D*>(baseDest);
  142. assert(nullptr != dest);
  143. if (nullptr == dest)
  144. return false;
  145. ID3D12Resource* nativeDest = reinterpret_cast<ID3D12Resource*>(dest->GetNativeTexturePtrV());
  146. ID3D12Resource* nativeSrc = reinterpret_cast<ID3D12Resource*>(nativeTexturePtr);
  147. if (nativeSrc == nativeDest)
  148. return false;
  149. if (nativeSrc == nullptr || nativeDest == nullptr)
  150. return false;
  151. ThrowIfFailed(m_commandAllocator->Reset());
  152. ThrowIfFailed(m_commandList->Reset(m_commandAllocator, nullptr));
  153. m_commandList->CopyResource(nativeDest, nativeSrc);
  154. // for CPU accessible texture
  155. ID3D12Resource* readbackResource = dest->GetReadbackResource();
  156. const D3D12ResourceFootprint* resFP = dest->GetNativeTextureFootprint();
  157. if (nullptr != readbackResource)
  158. {
  159. // Change dest state, copy, change dest state back
  160. Barrier(nativeDest, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE);
  161. D3D12_TEXTURE_COPY_LOCATION td, ts;
  162. td.pResource = readbackResource;
  163. td.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
  164. td.PlacedFootprint = resFP->Footprint;
  165. ts.pResource = nativeDest;
  166. ts.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
  167. ts.SubresourceIndex = 0;
  168. m_commandList->CopyTextureRegion(&td, 0, 0, 0, &ts, nullptr);
  169. Barrier(nativeDest, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_COPY_DEST);
  170. }
  171. ThrowIfFailed(m_commandList->Close());
  172. ID3D12CommandList* cmdList[] = { m_commandList };
  173. m_d3d12CommandQueue->ExecuteCommandLists(_countof(cmdList), cmdList);
  174. WaitForFence(m_copyResourceFence.Get(), m_copyResourceEventHandle, &m_copyResourceFenceValue);
  175. return true;
  176. }
  177. //---------------------------------------------------------------------------------------------------------------------
  178. D3D12Texture2D* D3D12GraphicsDevice::CreateSharedD3D12Texture(uint32_t w, uint32_t h)
  179. {
  180. //[Note-sin: 2019-10-30] Taken from RaytracedHardShadow
  181. // note: sharing textures with d3d11 requires some flags and restrictions:
  182. // - MipLevels must be 1
  183. // - D3D12_HEAP_FLAG_SHARED for heap flags
  184. // - D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET and D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS for resource
  185. // flags
  186. D3D12_RESOURCE_DESC desc {};
  187. desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
  188. desc.Alignment = 0;
  189. desc.Width = w;
  190. desc.Height = h;
  191. desc.DepthOrArraySize = 1;
  192. desc.MipLevels = 1;
  193. desc.Format =
  194. DXGI_FORMAT_B8G8R8A8_UNORM; // We only support this format which has 4 bytes -> DX12_BYTES_PER_PIXEL
  195. desc.SampleDesc.Count = 1;
  196. desc.SampleDesc.Quality = 0;
  197. desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
  198. desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
  199. desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS;
  200. const D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_SHARED;
  201. const D3D12_RESOURCE_STATES initialState = D3D12_RESOURCE_STATE_COPY_DEST;
  202. ID3D12Resource* resource = nullptr;
  203. HRESULT result = m_d3d12Device->CreateCommittedResource(
  204. &D3D12_DEFAULT_HEAP_PROPS, flags, &desc, initialState, nullptr, IID_PPV_ARGS(&resource));
  205. if (result != S_OK)
  206. {
  207. RTC_LOG(LS_INFO) << "CreateCommittedResource failed. error:" << result;
  208. return nullptr;
  209. }
  210. HANDLE handle = nullptr;
  211. result = m_d3d12Device->CreateSharedHandle(resource, nullptr, GENERIC_ALL, nullptr, &handle);
  212. if (result != S_OK)
  213. {
  214. RTC_LOG(LS_INFO) << "CreateSharedHandle failed. error:" << result;
  215. return nullptr;
  216. }
  217. // ID3D11Device::OpenSharedHandle() doesn't accept handles created by d3d12.
  218. // OpenSharedResource1() is needed.
  219. ID3D11Texture2D* sharedTex = nullptr;
  220. result = m_d3d11Device->OpenSharedResource1(handle, IID_PPV_ARGS(&sharedTex));
  221. if (result != S_OK)
  222. {
  223. RTC_LOG(LS_INFO) << "OpenSharedResource1 failed. error:" << result;
  224. return nullptr;
  225. }
  226. return new D3D12Texture2D(w, h, resource, handle, sharedTex);
  227. }
  228. //----------------------------------------------------------------------------------------------------------------------
  229. void D3D12GraphicsDevice::WaitForFence(ID3D12Fence* fence, HANDLE handle, uint64_t* fenceValue)
  230. {
  231. ThrowIfFailed(m_d3d12CommandQueue->Signal(fence, *fenceValue));
  232. ThrowIfFailed(fence->SetEventOnCompletion(*fenceValue, handle));
  233. WaitForSingleObject(handle, INFINITE);
  234. ++(*fenceValue);
  235. }
  236. //----------------------------------------------------------------------------------------------------------------------
  237. void D3D12GraphicsDevice::Barrier(
  238. ID3D12Resource* res,
  239. const D3D12_RESOURCE_STATES stateBefore,
  240. const D3D12_RESOURCE_STATES stateAfter,
  241. const UINT subresource)
  242. {
  243. D3D12_RESOURCE_BARRIER barrier;
  244. barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
  245. barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
  246. barrier.Transition.pResource = res;
  247. barrier.Transition.StateBefore = stateBefore;
  248. barrier.Transition.StateAfter = stateAfter;
  249. barrier.Transition.Subresource = subresource;
  250. m_commandList->ResourceBarrier(1, &barrier);
  251. }
  252. //----------------------------------------------------------------------------------------------------------------------
  253. ITexture2D*
  254. D3D12GraphicsDevice::CreateCPUReadTextureV(uint32_t w, uint32_t h, UnityRenderingExtTextureFormat textureFormat)
  255. {
  256. D3D12Texture2D* tex = CreateSharedD3D12Texture(w, h);
  257. const HRESULT hr = tex->CreateReadbackResource(m_d3d12Device.Get());
  258. if (FAILED(hr))
  259. {
  260. delete tex;
  261. return nullptr;
  262. }
  263. return tex;
  264. }
  265. //----------------------------------------------------------------------------------------------------------------------
  266. rtc::scoped_refptr<webrtc::I420Buffer> D3D12GraphicsDevice::ConvertRGBToI420(ITexture2D* baseTex)
  267. {
  268. D3D12Texture2D* tex = reinterpret_cast<D3D12Texture2D*>(baseTex);
  269. assert(nullptr != tex);
  270. if (nullptr == tex)
  271. return nullptr;
  272. ID3D12Resource* readbackResource = tex->GetReadbackResource();
  273. assert(nullptr != readbackResource);
  274. if (nullptr == readbackResource) // the texture has to be prepared for CPU access
  275. return nullptr;
  276. const int width = static_cast<int>(tex->GetWidth());
  277. const int height = static_cast<int>(tex->GetHeight());
  278. const D3D12ResourceFootprint* footprint = tex->GetNativeTextureFootprint();
  279. const int rowPitch = static_cast<int>(footprint->Footprint.Footprint.RowPitch);
  280. // Map to read from CPU
  281. uint8* data {};
  282. const HRESULT hr = readbackResource->Map(0, nullptr, reinterpret_cast<void**>(&data));
  283. assert(hr == S_OK);
  284. if (hr != S_OK)
  285. {
  286. return nullptr;
  287. }
  288. // RGBA -> I420
  289. rtc::scoped_refptr<webrtc::I420Buffer> i420_buffer = webrtc::I420Buffer::Create(width, height);
  290. libyuv::ARGBToI420(
  291. static_cast<uint8_t*>(data),
  292. rowPitch,
  293. i420_buffer->MutableDataY(),
  294. i420_buffer->StrideY(),
  295. i420_buffer->MutableDataU(),
  296. i420_buffer->StrideU(),
  297. i420_buffer->MutableDataV(),
  298. i420_buffer->StrideV(),
  299. width,
  300. height);
  301. D3D12_RANGE emptyRange { 0, 0 };
  302. readbackResource->Unmap(0, &emptyRange);
  303. return i420_buffer;
  304. }
  305. std::unique_ptr<GpuMemoryBufferHandle> D3D12GraphicsDevice::Map(ITexture2D* texture)
  306. {
  307. if (!IsCudaSupport())
  308. return nullptr;
  309. D3D12Texture2D* d3d12Texure = static_cast<D3D12Texture2D*>(texture);
  310. // set context on the thread.
  311. cuCtxPushCurrent(GetCUcontext());
  312. HANDLE sharedHandle = d3d12Texure->GetHandle();
  313. if (!sharedHandle)
  314. {
  315. RTC_LOG(LS_ERROR) << "cannot get shared handle";
  316. throw;
  317. }
  318. size_t width = d3d12Texure->GetWidth();
  319. size_t height = d3d12Texure->GetHeight();
  320. D3D12_RESOURCE_DESC desc = d3d12Texure->GetDesc();
  321. D3D12_RESOURCE_ALLOCATION_INFO d3d12ResourceAllocationInfo;
  322. d3d12ResourceAllocationInfo = m_d3d12Device->GetResourceAllocationInfo(0, 1, &desc);
  323. size_t actualSize = d3d12ResourceAllocationInfo.SizeInBytes;
  324. CUDA_EXTERNAL_MEMORY_HANDLE_DESC memDesc = {};
  325. memDesc.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE;
  326. memDesc.handle.win32.handle = static_cast<void*>(sharedHandle);
  327. memDesc.size = actualSize;
  328. memDesc.flags = CUDA_EXTERNAL_MEMORY_DEDICATED;
  329. CUresult result;
  330. CUexternalMemory externalMemory = {};
  331. result = cuImportExternalMemory(&externalMemory, &memDesc);
  332. if (result != CUDA_SUCCESS)
  333. {
  334. RTC_LOG(LS_ERROR) << "cuImportExternalMemory error";
  335. throw;
  336. }
  337. CUDA_ARRAY3D_DESCRIPTOR arrayDesc = {};
  338. arrayDesc.Width = width;
  339. arrayDesc.Height = height;
  340. arrayDesc.Depth = 0; /* CUDA 2D arrays are defined to have depth 0 */
  341. arrayDesc.Format = CU_AD_FORMAT_UNSIGNED_INT32;
  342. arrayDesc.NumChannels = 1;
  343. arrayDesc.Flags = CUDA_ARRAY3D_SURFACE_LDST | CUDA_ARRAY3D_COLOR_ATTACHMENT;
  344. CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC mipmapArrayDesc = {};
  345. mipmapArrayDesc.arrayDesc = arrayDesc;
  346. mipmapArrayDesc.numLevels = 1;
  347. CUmipmappedArray mipmappedArray;
  348. result = cuExternalMemoryGetMappedMipmappedArray(&mipmappedArray, externalMemory, &mipmapArrayDesc);
  349. if (result != CUDA_SUCCESS)
  350. {
  351. RTC_LOG(LS_ERROR) << "cuExternalMemoryGetMappedMipmappedArray error";
  352. throw;
  353. }
  354. CUarray array;
  355. result = cuMipmappedArrayGetLevel(&array, mipmappedArray, 0);
  356. if (result != CUDA_SUCCESS)
  357. {
  358. RTC_LOG(LS_ERROR) << "cuMipmappedArrayGetLevel error";
  359. throw;
  360. }
  361. cuCtxPopCurrent(nullptr);
  362. std::unique_ptr<GpuMemoryBufferCudaHandle> handle = std::make_unique<GpuMemoryBufferCudaHandle>();
  363. handle->context = GetCUcontext();
  364. handle->mappedArray = array;
  365. handle->externalMemory = externalMemory;
  366. return std::move(handle);
  367. }
  368. } // end namespace webrtc
  369. } // end namespace unity