| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  | /*
 | 
					
						
							| 
									
										
										
										
											2016-02-24 15:23:16 +00:00
										 |  |  |  * This file is part of FFmpeg. | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2016-02-24 15:23:16 +00:00
										 |  |  |  * FFmpeg is free software; you can redistribute it and/or | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  |  * modify it under the terms of the GNU Lesser General Public | 
					
						
							|  |  |  |  * License as published by the Free Software Foundation; either | 
					
						
							|  |  |  |  * version 2.1 of the License, or (at your option) any later version. | 
					
						
							|  |  |  |  * | 
					
						
							| 
									
										
										
										
											2016-02-24 15:23:16 +00:00
										 |  |  |  * FFmpeg is distributed in the hope that it will be useful, | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
					
						
							|  |  |  |  * Lesser General Public License for more details. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * You should have received a copy of the GNU Lesser General Public | 
					
						
							| 
									
										
										
										
											2016-02-24 15:23:16 +00:00
										 |  |  |  * License along with FFmpeg; if not, write to the Free Software | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "buffer.h"
 | 
					
						
							|  |  |  | #include "common.h"
 | 
					
						
							|  |  |  | #include "hwcontext.h"
 | 
					
						
							|  |  |  | #include "hwcontext_internal.h"
 | 
					
						
							| 
									
										
										
										
											2016-10-10 12:52:40 +02:00
										 |  |  | #include "hwcontext_cuda_internal.h"
 | 
					
						
							| 
									
										
										
										
											2019-08-28 21:58:10 +01:00
										 |  |  | #if CONFIG_VULKAN
 | 
					
						
							|  |  |  | #include "hwcontext_vulkan.h"
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  | #include "cuda_check.h"
 | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  | #include "mem.h"
 | 
					
						
							|  |  |  | #include "pixdesc.h"
 | 
					
						
							|  |  |  | #include "pixfmt.h"
 | 
					
						
							| 
									
										
										
										
											2018-05-09 21:18:15 +02:00
										 |  |  | #include "imgutils.h"
 | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-01 18:57:44 +02:00
										 |  |  | #define CUDA_FRAME_ALIGNMENT 256
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  | typedef struct CUDAFramesContext { | 
					
						
							|  |  |  |     int shift_width, shift_height; | 
					
						
							|  |  |  | } CUDAFramesContext; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const enum AVPixelFormat supported_formats[] = { | 
					
						
							|  |  |  |     AV_PIX_FMT_NV12, | 
					
						
							|  |  |  |     AV_PIX_FMT_YUV420P, | 
					
						
							|  |  |  |     AV_PIX_FMT_YUV444P, | 
					
						
							| 
									
										
										
										
											2016-11-22 08:18:31 -08:00
										 |  |  |     AV_PIX_FMT_P010, | 
					
						
							|  |  |  |     AV_PIX_FMT_P016, | 
					
						
							| 
									
										
										
										
											2017-07-18 16:35:53 +05:30
										 |  |  |     AV_PIX_FMT_YUV444P16, | 
					
						
							| 
									
										
										
										
											2018-05-09 18:59:05 +02:00
										 |  |  |     AV_PIX_FMT_0RGB32, | 
					
						
							|  |  |  |     AV_PIX_FMT_0BGR32, | 
					
						
							| 
									
										
										
										
											2019-08-28 21:58:10 +01:00
										 |  |  | #if CONFIG_VULKAN
 | 
					
						
							|  |  |  |     AV_PIX_FMT_VULKAN, | 
					
						
							|  |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  | #define CHECK_CU(x) FF_CUDA_CHECK_DL(device_ctx, cu, x)
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-01-16 16:43:13 +01:00
										 |  |  | static int cuda_frames_get_constraints(AVHWDeviceContext *ctx, | 
					
						
							|  |  |  |                                        const void *hwconfig, | 
					
						
							|  |  |  |                                        AVHWFramesConstraints *constraints) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     int i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     constraints->valid_sw_formats = av_malloc_array(FF_ARRAY_ELEMS(supported_formats) + 1, | 
					
						
							|  |  |  |                                                     sizeof(*constraints->valid_sw_formats)); | 
					
						
							|  |  |  |     if (!constraints->valid_sw_formats) | 
					
						
							|  |  |  |         return AVERROR(ENOMEM); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) | 
					
						
							|  |  |  |         constraints->valid_sw_formats[i] = supported_formats[i]; | 
					
						
							|  |  |  |     constraints->valid_sw_formats[FF_ARRAY_ELEMS(supported_formats)] = AV_PIX_FMT_NONE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     constraints->valid_hw_formats = av_malloc_array(2, sizeof(*constraints->valid_hw_formats)); | 
					
						
							|  |  |  |     if (!constraints->valid_hw_formats) | 
					
						
							|  |  |  |         return AVERROR(ENOMEM); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     constraints->valid_hw_formats[0] = AV_PIX_FMT_CUDA; | 
					
						
							|  |  |  |     constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  | static void cuda_buffer_free(void *opaque, uint8_t *data) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |     AVHWFramesContext        *ctx = opaque; | 
					
						
							|  |  |  |     AVHWDeviceContext *device_ctx = ctx->device_ctx; | 
					
						
							|  |  |  |     AVCUDADeviceContext    *hwctx = device_ctx->hwctx; | 
					
						
							|  |  |  |     CudaFunctions             *cu = hwctx->internal->cuda_dl; | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     CUcontext dummy; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |     CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx)); | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |     CHECK_CU(cu->cuMemFree((CUdeviceptr)data)); | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |     CHECK_CU(cu->cuCtxPopCurrent(&dummy)); | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static AVBufferRef *cuda_pool_alloc(void *opaque, int size) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |     AVHWFramesContext        *ctx = opaque; | 
					
						
							|  |  |  |     AVHWDeviceContext *device_ctx = ctx->device_ctx; | 
					
						
							|  |  |  |     AVCUDADeviceContext    *hwctx = device_ctx->hwctx; | 
					
						
							|  |  |  |     CudaFunctions             *cu = hwctx->internal->cuda_dl; | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     AVBufferRef *ret = NULL; | 
					
						
							|  |  |  |     CUcontext dummy = NULL; | 
					
						
							|  |  |  |     CUdeviceptr data; | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |     int err; | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |     err = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx)); | 
					
						
							|  |  |  |     if (err < 0) | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  |         return NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |     err = CHECK_CU(cu->cuMemAlloc(&data, size)); | 
					
						
							|  |  |  |     if (err < 0) | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  |         goto fail; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0); | 
					
						
							|  |  |  |     if (!ret) { | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |         CHECK_CU(cu->cuMemFree(data)); | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  |         goto fail; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | fail: | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |     CHECK_CU(cu->cuCtxPopCurrent(&dummy)); | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  |     return ret; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int cuda_frames_init(AVHWFramesContext *ctx) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     CUDAFramesContext *priv = ctx->internal->priv; | 
					
						
							|  |  |  |     int i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) { | 
					
						
							|  |  |  |         if (ctx->sw_format == supported_formats[i]) | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (i == FF_ARRAY_ELEMS(supported_formats)) { | 
					
						
							|  |  |  |         av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n", | 
					
						
							|  |  |  |                av_get_pix_fmt_name(ctx->sw_format)); | 
					
						
							|  |  |  |         return AVERROR(ENOSYS); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (!ctx->pool) { | 
					
						
							| 
									
										
										
										
											2018-05-09 21:18:15 +02:00
										 |  |  |         int size = av_image_get_buffer_size(ctx->sw_format, ctx->width, ctx->height, CUDA_FRAME_ALIGNMENT); | 
					
						
							|  |  |  |         if (size < 0) | 
					
						
							|  |  |  |             return size; | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL); | 
					
						
							|  |  |  |         if (!ctx->internal->pool_internal) | 
					
						
							|  |  |  |             return AVERROR(ENOMEM); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2018-05-09 21:18:15 +02:00
										 |  |  |     int res; | 
					
						
							| 
									
										
										
										
											2016-10-01 18:57:44 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  |     frame->buf[0] = av_buffer_pool_get(ctx->pool); | 
					
						
							|  |  |  |     if (!frame->buf[0]) | 
					
						
							|  |  |  |         return AVERROR(ENOMEM); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-05-09 21:18:15 +02:00
										 |  |  |     res = av_image_fill_arrays(frame->data, frame->linesize, frame->buf[0]->data, | 
					
						
							|  |  |  |                                ctx->sw_format, ctx->width, ctx->height, CUDA_FRAME_ALIGNMENT); | 
					
						
							|  |  |  |     if (res < 0) | 
					
						
							|  |  |  |         return res; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // YUV420P is a special case.
 | 
					
						
							| 
									
										
										
										
											2018-05-14 23:24:43 +02:00
										 |  |  |     // Nvenc expects the U/V planes in swapped order from how ffmpeg expects them, also chroma is half-aligned
 | 
					
						
							| 
									
										
										
										
											2018-05-09 21:18:15 +02:00
										 |  |  |     if (ctx->sw_format == AV_PIX_FMT_YUV420P) { | 
					
						
							| 
									
										
										
										
											2018-05-14 23:24:43 +02:00
										 |  |  |         frame->linesize[1] = frame->linesize[2] = frame->linesize[0] / 2; | 
					
						
							|  |  |  |         frame->data[2]     = frame->data[1]; | 
					
						
							|  |  |  |         frame->data[1]     = frame->data[2] + frame->linesize[2] * ctx->height / 2; | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     frame->format = AV_PIX_FMT_CUDA; | 
					
						
							|  |  |  |     frame->width  = ctx->width; | 
					
						
							|  |  |  |     frame->height = ctx->height; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int cuda_transfer_get_formats(AVHWFramesContext *ctx, | 
					
						
							|  |  |  |                                      enum AVHWFrameTransferDirection dir, | 
					
						
							|  |  |  |                                      enum AVPixelFormat **formats) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     enum AVPixelFormat *fmts; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     fmts = av_malloc_array(2, sizeof(*fmts)); | 
					
						
							|  |  |  |     if (!fmts) | 
					
						
							|  |  |  |         return AVERROR(ENOMEM); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     fmts[0] = ctx->sw_format; | 
					
						
							|  |  |  |     fmts[1] = AV_PIX_FMT_NONE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     *formats = fmts; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, | 
					
						
							|  |  |  |                                    const AVFrame *src) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |     CUDAFramesContext       *priv = ctx->internal->priv; | 
					
						
							|  |  |  |     AVHWDeviceContext *device_ctx = ctx->device_ctx; | 
					
						
							|  |  |  |     AVCUDADeviceContext    *hwctx = device_ctx->hwctx; | 
					
						
							|  |  |  |     CudaFunctions             *cu = hwctx->internal->cuda_dl; | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     CUcontext dummy; | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |     int i, ret; | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-28 21:58:10 +01:00
										 |  |  |     /* We don't support transfers to HW devices. */ | 
					
						
							|  |  |  |     if (dst->hw_frames_ctx) | 
					
						
							|  |  |  |         return AVERROR(ENOSYS); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |     ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx)); | 
					
						
							|  |  |  |     if (ret < 0) | 
					
						
							|  |  |  |         return ret; | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) { | 
					
						
							|  |  |  |         CUDA_MEMCPY2D cpy = { | 
					
						
							|  |  |  |             .srcMemoryType = CU_MEMORYTYPE_DEVICE, | 
					
						
							|  |  |  |             .dstMemoryType = CU_MEMORYTYPE_HOST, | 
					
						
							|  |  |  |             .srcDevice     = (CUdeviceptr)src->data[i], | 
					
						
							|  |  |  |             .dstHost       = dst->data[i], | 
					
						
							|  |  |  |             .srcPitch      = src->linesize[i], | 
					
						
							|  |  |  |             .dstPitch      = dst->linesize[i], | 
					
						
							|  |  |  |             .WidthInBytes  = FFMIN(src->linesize[i], dst->linesize[i]), | 
					
						
							|  |  |  |             .Height        = src->height >> (i ? priv->shift_height : 0), | 
					
						
							|  |  |  |         }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |         ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, hwctx->stream)); | 
					
						
							|  |  |  |         if (ret < 0) | 
					
						
							|  |  |  |             goto exit; | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |     ret = CHECK_CU(cu->cuStreamSynchronize(hwctx->stream)); | 
					
						
							|  |  |  |     if (ret < 0) | 
					
						
							|  |  |  |         goto exit; | 
					
						
							| 
									
										
										
										
											2018-05-08 12:12:01 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  | exit: | 
					
						
							|  |  |  |     CHECK_CU(cu->cuCtxPopCurrent(&dummy)); | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, | 
					
						
							|  |  |  |                                  const AVFrame *src) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |     CUDAFramesContext       *priv = ctx->internal->priv; | 
					
						
							|  |  |  |     AVHWDeviceContext *device_ctx = ctx->device_ctx; | 
					
						
							|  |  |  |     AVCUDADeviceContext    *hwctx = device_ctx->hwctx; | 
					
						
							|  |  |  |     CudaFunctions             *cu = hwctx->internal->cuda_dl; | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     CUcontext dummy; | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |     int i, ret; | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-28 21:58:10 +01:00
										 |  |  |     /* We don't support transfers from HW devices. */ | 
					
						
							|  |  |  |     if (src->hw_frames_ctx) | 
					
						
							|  |  |  |         return AVERROR(ENOSYS); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |     ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx)); | 
					
						
							|  |  |  |     if (ret < 0) | 
					
						
							|  |  |  |         return ret; | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) { | 
					
						
							|  |  |  |         CUDA_MEMCPY2D cpy = { | 
					
						
							|  |  |  |             .srcMemoryType = CU_MEMORYTYPE_HOST, | 
					
						
							|  |  |  |             .dstMemoryType = CU_MEMORYTYPE_DEVICE, | 
					
						
							|  |  |  |             .srcHost       = src->data[i], | 
					
						
							|  |  |  |             .dstDevice     = (CUdeviceptr)dst->data[i], | 
					
						
							|  |  |  |             .srcPitch      = src->linesize[i], | 
					
						
							|  |  |  |             .dstPitch      = dst->linesize[i], | 
					
						
							|  |  |  |             .WidthInBytes  = FFMIN(src->linesize[i], dst->linesize[i]), | 
					
						
							|  |  |  |             .Height        = src->height >> (i ? priv->shift_height : 0), | 
					
						
							|  |  |  |         }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |         ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, hwctx->stream)); | 
					
						
							|  |  |  |         if (ret < 0) | 
					
						
							|  |  |  |             goto exit; | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  | exit: | 
					
						
							|  |  |  |     CHECK_CU(cu->cuCtxPopCurrent(&dummy)); | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  | static void cuda_device_uninit(AVHWDeviceContext *device_ctx) | 
					
						
							| 
									
										
										
										
											2016-05-19 19:19:20 +02:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |     AVCUDADeviceContext *hwctx = device_ctx->hwctx; | 
					
						
							| 
									
										
										
										
											2016-10-10 12:52:40 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if (hwctx->internal) { | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |         CudaFunctions *cu = hwctx->internal->cuda_dl; | 
					
						
							| 
									
										
										
										
											2019-11-18 15:35:49 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-10 12:52:40 +02:00
										 |  |  |         if (hwctx->internal->is_allocated && hwctx->cuda_ctx) { | 
					
						
							| 
									
										
										
										
											2019-11-18 15:35:49 +02:00
										 |  |  |             if (hwctx->internal->flags & AV_CUDA_USE_PRIMARY_CONTEXT) | 
					
						
							|  |  |  |                 CHECK_CU(cu->cuDevicePrimaryCtxRelease(hwctx->internal->cuda_device)); | 
					
						
							|  |  |  |             else | 
					
						
							|  |  |  |                 CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx)); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-10 12:52:40 +02:00
										 |  |  |             hwctx->cuda_ctx = NULL; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2019-11-18 15:35:49 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-10 12:52:40 +02:00
										 |  |  |         cuda_free_functions(&hwctx->internal->cuda_dl); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     av_freep(&hwctx->internal); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int cuda_device_init(AVHWDeviceContext *ctx) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     AVCUDADeviceContext *hwctx = ctx->hwctx; | 
					
						
							|  |  |  |     int ret; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (!hwctx->internal) { | 
					
						
							|  |  |  |         hwctx->internal = av_mallocz(sizeof(*hwctx->internal)); | 
					
						
							|  |  |  |         if (!hwctx->internal) | 
					
						
							|  |  |  |             return AVERROR(ENOMEM); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (!hwctx->internal->cuda_dl) { | 
					
						
							| 
									
										
										
										
											2017-11-18 17:16:14 +00:00
										 |  |  |         ret = cuda_load_functions(&hwctx->internal->cuda_dl, ctx); | 
					
						
							| 
									
										
										
										
											2016-10-10 12:52:40 +02:00
										 |  |  |         if (ret < 0) { | 
					
						
							|  |  |  |             av_log(ctx, AV_LOG_ERROR, "Could not dynamically load CUDA\n"); | 
					
						
							|  |  |  |             goto error; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | error: | 
					
						
							|  |  |  |     cuda_device_uninit(ctx); | 
					
						
							|  |  |  |     return ret; | 
					
						
							| 
									
										
										
										
											2016-05-19 19:19:20 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  | static int cuda_device_create(AVHWDeviceContext *device_ctx, | 
					
						
							|  |  |  |                               const char *device, | 
					
						
							| 
									
										
										
										
											2016-05-19 19:19:20 +02:00
										 |  |  |                               AVDictionary *opts, int flags) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |     AVCUDADeviceContext *hwctx = device_ctx->hwctx; | 
					
						
							| 
									
										
										
										
											2016-10-10 12:52:40 +02:00
										 |  |  |     CudaFunctions *cu; | 
					
						
							| 
									
										
										
										
											2016-05-19 19:19:20 +02:00
										 |  |  |     CUcontext dummy; | 
					
						
							| 
									
										
										
										
											2019-11-18 15:35:49 +02:00
										 |  |  |     int ret, dev_active = 0, device_idx = 0; | 
					
						
							|  |  |  |     unsigned int dev_flags = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     const unsigned int desired_flags = CU_CTX_SCHED_BLOCKING_SYNC; | 
					
						
							| 
									
										
										
										
											2016-05-19 19:19:20 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if (device) | 
					
						
							|  |  |  |         device_idx = strtol(device, NULL, 0); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |     if (cuda_device_init(device_ctx) < 0) | 
					
						
							| 
									
										
										
										
											2016-10-10 12:52:40 +02:00
										 |  |  |         goto error; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     cu = hwctx->internal->cuda_dl; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |     ret = CHECK_CU(cu->cuInit(0)); | 
					
						
							|  |  |  |     if (ret < 0) | 
					
						
							| 
									
										
										
										
											2016-10-10 12:52:40 +02:00
										 |  |  |         goto error; | 
					
						
							| 
									
										
										
										
											2016-05-19 19:19:20 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-18 15:35:49 +02:00
										 |  |  |     ret = CHECK_CU(cu->cuDeviceGet(&hwctx->internal->cuda_device, device_idx)); | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |     if (ret < 0) | 
					
						
							| 
									
										
										
										
											2016-10-10 12:52:40 +02:00
										 |  |  |         goto error; | 
					
						
							| 
									
										
										
										
											2016-05-19 19:19:20 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-18 15:35:49 +02:00
										 |  |  |     hwctx->internal->flags = flags; | 
					
						
							| 
									
										
										
										
											2016-05-19 19:19:20 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-18 15:35:49 +02:00
										 |  |  |     if (flags & AV_CUDA_USE_PRIMARY_CONTEXT) { | 
					
						
							|  |  |  |         ret = CHECK_CU(cu->cuDevicePrimaryCtxGetState(hwctx->internal->cuda_device, &dev_flags, &dev_active)); | 
					
						
							|  |  |  |         if (ret < 0) | 
					
						
							|  |  |  |             goto error; | 
					
						
							| 
									
										
										
										
											2018-05-07 15:01:22 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-18 15:35:49 +02:00
										 |  |  |         if (dev_active && dev_flags != desired_flags) { | 
					
						
							|  |  |  |             av_log(device_ctx, AV_LOG_ERROR, "Primary context already active with incompatible flags.\n"); | 
					
						
							|  |  |  |             goto error; | 
					
						
							|  |  |  |         } else if (dev_flags != desired_flags) { | 
					
						
							|  |  |  |             ret = CHECK_CU(cu->cuDevicePrimaryCtxSetFlags(hwctx->internal->cuda_device, desired_flags)); | 
					
						
							|  |  |  |             if (ret < 0) | 
					
						
							|  |  |  |                 goto error; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx->cuda_ctx, hwctx->internal->cuda_device)); | 
					
						
							|  |  |  |         if (ret < 0) | 
					
						
							|  |  |  |             goto error; | 
					
						
							|  |  |  |     } else { | 
					
						
							|  |  |  |         ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, desired_flags, hwctx->internal->cuda_device)); | 
					
						
							|  |  |  |         if (ret < 0) | 
					
						
							|  |  |  |             goto error; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         CHECK_CU(cu->cuCtxPopCurrent(&dummy)); | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2016-05-19 19:19:20 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-10 12:52:40 +02:00
										 |  |  |     hwctx->internal->is_allocated = 1; | 
					
						
							| 
									
										
										
										
											2016-05-19 19:19:20 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-18 15:35:49 +02:00
										 |  |  |     // Setting stream to NULL will make functions automatically use the default CUstream
 | 
					
						
							|  |  |  |     hwctx->stream = NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-05-19 19:19:20 +02:00
										 |  |  |     return 0; | 
					
						
							| 
									
										
										
										
											2016-10-10 12:52:40 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | error: | 
					
						
							| 
									
										
										
										
											2018-11-10 22:47:28 -08:00
										 |  |  |     cuda_device_uninit(device_ctx); | 
					
						
							| 
									
										
										
										
											2016-10-10 12:52:40 +02:00
										 |  |  |     return AVERROR_UNKNOWN; | 
					
						
							| 
									
										
										
										
											2016-05-19 19:19:20 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-28 21:58:10 +01:00
										 |  |  | static int cuda_device_derive(AVHWDeviceContext *device_ctx, | 
					
						
							|  |  |  |                               AVHWDeviceContext *src_ctx, | 
					
						
							|  |  |  |                               int flags) { | 
					
						
							|  |  |  |     AVCUDADeviceContext *hwctx = device_ctx->hwctx; | 
					
						
							|  |  |  |     CudaFunctions *cu; | 
					
						
							|  |  |  |     const char *src_uuid = NULL; | 
					
						
							|  |  |  |     CUcontext dummy; | 
					
						
							|  |  |  |     int ret, i, device_count, dev_active = 0; | 
					
						
							|  |  |  |     unsigned int dev_flags = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     const unsigned int desired_flags = CU_CTX_SCHED_BLOCKING_SYNC; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #if CONFIG_VULKAN
 | 
					
						
							|  |  |  |     VkPhysicalDeviceIDProperties vk_idp = { | 
					
						
							|  |  |  |         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES, | 
					
						
							|  |  |  |     }; | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     switch (src_ctx->type) { | 
					
						
							|  |  |  | #if CONFIG_VULKAN
 | 
					
						
							|  |  |  |     case AV_HWDEVICE_TYPE_VULKAN: { | 
					
						
							|  |  |  |         AVVulkanDeviceContext *vkctx = src_ctx->hwctx; | 
					
						
							|  |  |  |         VkPhysicalDeviceProperties2 vk_dev_props = { | 
					
						
							|  |  |  |             .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, | 
					
						
							|  |  |  |             .pNext = &vk_idp, | 
					
						
							|  |  |  |         }; | 
					
						
							|  |  |  |         vkGetPhysicalDeviceProperties2(vkctx->phys_dev, &vk_dev_props); | 
					
						
							|  |  |  |         src_uuid = vk_idp.deviceUUID; | 
					
						
							|  |  |  |         break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     default: | 
					
						
							|  |  |  |         return AVERROR(ENOSYS); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (!src_uuid) { | 
					
						
							|  |  |  |         av_log(device_ctx, AV_LOG_ERROR, | 
					
						
							|  |  |  |                "Failed to get UUID of source device.\n"); | 
					
						
							|  |  |  |         goto error; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (cuda_device_init(device_ctx) < 0) | 
					
						
							|  |  |  |         goto error; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     cu = hwctx->internal->cuda_dl; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     ret = CHECK_CU(cu->cuInit(0)); | 
					
						
							|  |  |  |     if (ret < 0) | 
					
						
							|  |  |  |         goto error; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     ret = CHECK_CU(cu->cuDeviceGetCount(&device_count)); | 
					
						
							|  |  |  |     if (ret < 0) | 
					
						
							|  |  |  |         goto error; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     hwctx->internal->cuda_device = -1; | 
					
						
							|  |  |  |     for (i = 0; i < device_count; i++) { | 
					
						
							|  |  |  |         CUdevice dev; | 
					
						
							|  |  |  |         CUuuid uuid; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         ret = CHECK_CU(cu->cuDeviceGet(&dev, i)); | 
					
						
							|  |  |  |         if (ret < 0) | 
					
						
							|  |  |  |             goto error; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         ret = CHECK_CU(cu->cuDeviceGetUuid(&uuid, dev)); | 
					
						
							|  |  |  |         if (ret < 0) | 
					
						
							|  |  |  |             goto error; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if (memcmp(src_uuid, uuid.bytes, sizeof (uuid.bytes)) == 0) { | 
					
						
							|  |  |  |             hwctx->internal->cuda_device = dev; | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (hwctx->internal->cuda_device == -1) { | 
					
						
							|  |  |  |         av_log(device_ctx, AV_LOG_ERROR, "Could not derive CUDA device.\n"); | 
					
						
							|  |  |  |         goto error; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     hwctx->internal->flags = flags; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (flags & AV_CUDA_USE_PRIMARY_CONTEXT) { | 
					
						
							|  |  |  |         ret = CHECK_CU(cu->cuDevicePrimaryCtxGetState(hwctx->internal->cuda_device, &dev_flags, &dev_active)); | 
					
						
							|  |  |  |         if (ret < 0) | 
					
						
							|  |  |  |             goto error; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if (dev_active && dev_flags != desired_flags) { | 
					
						
							|  |  |  |             av_log(device_ctx, AV_LOG_ERROR, "Primary context already active with incompatible flags.\n"); | 
					
						
							|  |  |  |             goto error; | 
					
						
							|  |  |  |         } else if (dev_flags != desired_flags) { | 
					
						
							|  |  |  |             ret = CHECK_CU(cu->cuDevicePrimaryCtxSetFlags(hwctx->internal->cuda_device, desired_flags)); | 
					
						
							|  |  |  |             if (ret < 0) | 
					
						
							|  |  |  |                 goto error; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx->cuda_ctx, hwctx->internal->cuda_device)); | 
					
						
							|  |  |  |         if (ret < 0) | 
					
						
							|  |  |  |             goto error; | 
					
						
							|  |  |  |     } else { | 
					
						
							|  |  |  |         ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, desired_flags, hwctx->internal->cuda_device)); | 
					
						
							|  |  |  |         if (ret < 0) | 
					
						
							|  |  |  |             goto error; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         CHECK_CU(cu->cuCtxPopCurrent(&dummy)); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     hwctx->internal->is_allocated = 1; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Setting stream to NULL will make functions automatically use the default CUstream
 | 
					
						
							|  |  |  |     hwctx->stream = NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | error: | 
					
						
							|  |  |  |     cuda_device_uninit(device_ctx); | 
					
						
							|  |  |  |     return AVERROR_UNKNOWN; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  | const HWContextType ff_hwcontext_type_cuda = { | 
					
						
							|  |  |  |     .type                 = AV_HWDEVICE_TYPE_CUDA, | 
					
						
							|  |  |  |     .name                 = "CUDA", | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     .device_hwctx_size    = sizeof(AVCUDADeviceContext), | 
					
						
							|  |  |  |     .frames_priv_size     = sizeof(CUDAFramesContext), | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-05-19 19:19:20 +02:00
										 |  |  |     .device_create        = cuda_device_create, | 
					
						
							| 
									
										
										
										
											2019-08-28 21:58:10 +01:00
										 |  |  |     .device_derive        = cuda_device_derive, | 
					
						
							| 
									
										
										
										
											2016-10-10 12:52:40 +02:00
										 |  |  |     .device_init          = cuda_device_init, | 
					
						
							|  |  |  |     .device_uninit        = cuda_device_uninit, | 
					
						
							| 
									
										
										
										
											2017-01-16 16:43:13 +01:00
										 |  |  |     .frames_get_constraints = cuda_frames_get_constraints, | 
					
						
							| 
									
										
										
										
											2016-01-13 14:25:58 +01:00
										 |  |  |     .frames_init          = cuda_frames_init, | 
					
						
							|  |  |  |     .frames_get_buffer    = cuda_get_buffer, | 
					
						
							|  |  |  |     .transfer_get_formats = cuda_transfer_get_formats, | 
					
						
							|  |  |  |     .transfer_data_to     = cuda_transfer_data_to, | 
					
						
							|  |  |  |     .transfer_data_from   = cuda_transfer_data_from, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     .pix_fmts             = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE }, | 
					
						
							|  |  |  | }; |