NvEncoderCudaWithCUarray.cpp 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. #include "pch.h"
  2. #include "NvEncoder/NvEncoder.h"
  3. #include "NvEncoder/NvEncoderCuda.h"
  4. #include "NvEncoderCudaWithCUarray.h"
  5. namespace unity
  6. {
  7. namespace webrtc
  8. {
  9. static CUresult CreateCUarray(CUarray* pDstArray, uint32_t width, uint32_t height, CUarray_format format, int numChannels)
  10. {
  11. CUDA_ARRAY3D_DESCRIPTOR arrayDesc = CUDA_ARRAY3D_DESCRIPTOR();
  12. arrayDesc.Width = width;
  13. arrayDesc.Height = height;
  14. arrayDesc.Depth = 0; /* CUDA 2D arrays are defined to have depth 0 */
  15. arrayDesc.Format = format;
  16. arrayDesc.NumChannels = static_cast<uint32_t>(numChannels);
  17. arrayDesc.Flags = CUDA_ARRAY3D_SURFACE_LDST;
  18. return cuArray3DCreate(pDstArray, &arrayDesc);
  19. }
  20. NvEncoderCudaWithCUarray::NvEncoderCudaWithCUarray(
  21. CUcontext cuContext,
  22. uint32_t nWidth,
  23. uint32_t nHeight,
  24. NV_ENC_BUFFER_FORMAT eBufferFormat,
  25. uint32_t nExtraOutputDelay,
  26. bool bMotionEstimationOnly,
  27. bool bOutputInVideoMemory)
  28. : NvEncoder(
  29. NV_ENC_DEVICE_TYPE_CUDA,
  30. cuContext,
  31. nWidth,
  32. nHeight,
  33. eBufferFormat,
  34. nExtraOutputDelay,
  35. bMotionEstimationOnly,
  36. bOutputInVideoMemory)
  37. , m_cuContext(cuContext)
  38. {
  39. if (!m_hEncoder)
  40. {
  41. NVENC_THROW_ERROR("Encoder Initialization failed", NV_ENC_ERR_INVALID_DEVICE);
  42. }
  43. if (!m_cuContext)
  44. {
  45. NVENC_THROW_ERROR("Invalid Cuda Context", NV_ENC_ERR_INVALID_DEVICE);
  46. }
  47. }
  48. NvEncoderCudaWithCUarray::~NvEncoderCudaWithCUarray() { ReleaseCudaResources(); }
  49. void NvEncoderCudaWithCUarray::AllocateInputBuffers(int32_t numInputBuffers)
  50. {
  51. if (!IsHWEncoderInitialized())
  52. {
  53. NVENC_THROW_ERROR("Encoder intialization failed", NV_ENC_ERR_ENCODER_NOT_INITIALIZED);
  54. }
  55. // for MEOnly mode we need to allocate seperate set of buffers for reference frame
  56. int numCount = m_bMotionEstimationOnly ? 2 : 1;
  57. for (int count = 0; count < numCount; count++)
  58. {
  59. CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
  60. std::vector<void*> inputFrames;
  61. for (int i = 0; i < numInputBuffers; i++)
  62. {
  63. CUarray frame;
  64. CUDA_DRVAPI_CALL(
  65. CreateCUarray(&frame, GetMaxEncodeWidth(), GetMaxEncodeHeight(), CU_AD_FORMAT_UNSIGNED_INT32, 1));
  66. inputFrames.push_back(static_cast<void*>(frame));
  67. }
  68. CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
  69. int encodeWidth = static_cast<int>(GetMaxEncodeWidth());
  70. int encodeHeight = static_cast<int>(GetMaxEncodeHeight());
  71. int widthInBytes = static_cast<int>(GetWidthInBytes(GetPixelFormat(), GetMaxEncodeWidth()));
  72. RegisterInputResources(
  73. inputFrames,
  74. NV_ENC_INPUT_RESOURCE_TYPE_CUDAARRAY,
  75. encodeWidth,
  76. encodeHeight,
  77. widthInBytes,
  78. GetPixelFormat(),
  79. (count == 1) ? true : false);
  80. }
  81. }
  82. void NvEncoderCudaWithCUarray::ReleaseInputBuffers() { ReleaseCudaResources(); }
  83. void NvEncoderCudaWithCUarray::ReleaseCudaResources()
  84. {
  85. if (!m_hEncoder)
  86. {
  87. return;
  88. }
  89. if (!m_cuContext)
  90. {
  91. return;
  92. }
  93. UnregisterInputResources();
  94. cuCtxPushCurrent(m_cuContext);
  95. for (uint32_t i = 0; i < m_vInputFrames.size(); ++i)
  96. {
  97. if (m_vInputFrames[i].inputPtr)
  98. {
  99. cuMemFree(reinterpret_cast<CUdeviceptr>(m_vInputFrames[i].inputPtr));
  100. }
  101. }
  102. m_vInputFrames.clear();
  103. for (uint32_t i = 0; i < m_vReferenceFrames.size(); ++i)
  104. {
  105. if (m_vReferenceFrames[i].inputPtr)
  106. {
  107. cuMemFree(reinterpret_cast<CUdeviceptr>(m_vReferenceFrames[i].inputPtr));
  108. }
  109. }
  110. m_vReferenceFrames.clear();
  111. cuCtxPopCurrent(nullptr);
  112. m_cuContext = nullptr;
  113. }
  114. void NvEncoderCudaWithCUarray::CopyToDeviceFrame(
  115. CUcontext device,
  116. void* pSrcArray,
  117. uint32_t nSrcPitch,
  118. CUarray pDstArray,
  119. uint32_t dstPitch,
  120. int width,
  121. int height,
  122. CUmemorytype srcMemoryType,
  123. NV_ENC_BUFFER_FORMAT pixelFormat,
  124. const uint32_t dstChromaOffsets[],
  125. uint32_t numChromaPlanes,
  126. bool bUnAlignedDeviceCopy,
  127. CUstream stream)
  128. {
  129. if (srcMemoryType != CU_MEMORYTYPE_HOST && srcMemoryType != CU_MEMORYTYPE_ARRAY)
  130. {
  131. NVENC_THROW_ERROR("Invalid source memory type for copy", NV_ENC_ERR_INVALID_PARAM);
  132. }
  133. CUDA_DRVAPI_CALL(cuCtxPushCurrent(device));
  134. uint32_t srcPitch = nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, static_cast<uint32_t>(width));
  135. CUDA_MEMCPY2D m = CUDA_MEMCPY2D();
  136. m.srcMemoryType = srcMemoryType;
  137. if (srcMemoryType == CU_MEMORYTYPE_HOST)
  138. {
  139. m.srcHost = pSrcArray;
  140. }
  141. else
  142. {
  143. m.srcArray = static_cast<CUarray>(pSrcArray);
  144. }
  145. m.srcPitch = srcPitch;
  146. m.dstMemoryType = CU_MEMORYTYPE_ARRAY;
  147. m.dstArray = pDstArray;
  148. m.dstPitch = dstPitch;
  149. m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, static_cast<uint32_t>(width));
  150. m.Height = static_cast<size_t>(height);
  151. if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_ARRAY)
  152. {
  153. CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
  154. }
  155. else
  156. {
  157. CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D(&m) : cuMemcpy2DAsync(&m, stream));
  158. }
  159. std::vector<uint32_t> srcChromaOffsets;
  160. NvEncoder::GetChromaSubPlaneOffsets(pixelFormat, srcPitch, static_cast<uint32_t>(height), srcChromaOffsets);
  161. uint32_t chromaHeight = NvEncoder::GetChromaHeight(pixelFormat, static_cast<uint32_t>(height));
  162. uint32_t destChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, dstPitch);
  163. uint32_t srcChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, srcPitch);
  164. uint32_t chromaWidthInBytes = NvEncoder::GetChromaWidthInBytes(pixelFormat, static_cast<uint32_t>(width));
  165. for (uint32_t i = 0; i < numChromaPlanes; ++i)
  166. {
  167. if (chromaHeight)
  168. {
  169. if (srcMemoryType == CU_MEMORYTYPE_HOST)
  170. {
  171. m.srcHost = (static_cast<uint8_t*>(pSrcArray) + srcChromaOffsets[i]);
  172. }
  173. else
  174. {
  175. m.srcArray = (CUarray)(static_cast<uint8_t*>(pSrcArray) + srcChromaOffsets[i]);
  176. }
  177. m.srcPitch = srcChromaPitch;
  178. m.dstArray = (CUarray)((uint8_t*)pDstArray + dstChromaOffsets[i]);
  179. m.dstPitch = destChromaPitch;
  180. m.WidthInBytes = chromaWidthInBytes;
  181. m.Height = chromaHeight;
  182. if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_ARRAY)
  183. {
  184. CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
  185. }
  186. else
  187. {
  188. CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D(&m) : cuMemcpy2DAsync(&m, stream));
  189. }
  190. }
  191. }
  192. CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
  193. }
  194. } // end namespace webrtc
  195. } // end namespace unity