libavcodec/vaapi_encode: Add async_depth to vaapi_encoder to increase performance

Fix: #7706. After commit 5fdcf85bbf, vaapi encoder's performance decrease. The reason is that vaRenderPicture() and vaSyncBuffer() are called at the same time (vaRenderPicture() always followed by a vaSyncBuffer()). Now I changed them to be called in a asynchronous way, which will make better use of hardware. Async_depth is added to increase encoder's performance. The frames that are sent to hardware are stored in a fifo. Encoder will sync output after async fifo is full. Signed-off-by: Wenbin Chen <wenbin.chen@intel.com> Signed-off-by: Haihao Xiang <haihao.xiang@intel.com>
2025-12-08 06:09:50 +00:00 · 2022-02-18 11:07:47 +08:00 · 2022-02-18 11:07:47 +08:00 · d165ce22a4
commit d165ce22a4
parent e0ff869930
3 changed files with 71 additions and 15 deletions
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@ -965,8 +965,10 @@ static int vaapi_encode_pick_next(AVCodecContext *avctx,
    if (!pic && ctx->end_of_stream) {
        --b_counter;
        pic = ctx->pic_end;
-        if (pic->encode_issued)
+        if (pic->encode_complete)
            return AVERROR_EOF;
+        else if (pic->encode_issued)
+            return AVERROR(EAGAIN);
    }

    if (!pic) {
@ -1137,7 +1139,8 @@ static int vaapi_encode_send_frame(AVCodecContext *avctx, AVFrame *frame)
        if (ctx->input_order == ctx->decode_delay)
            ctx->dts_pts_diff = pic->pts - ctx->first_pts;
        if (ctx->output_delay > 0)
-            ctx->ts_ring[ctx->input_order % (3 * ctx->output_delay)] = pic->pts;
+            ctx->ts_ring[ctx->input_order %
+                        (3 * ctx->output_delay + ctx->async_depth)] = pic->pts;

        pic->display_order = ctx->input_order;
        ++ctx->input_order;
@ -1191,18 +1194,47 @@ int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
            return AVERROR(EAGAIN);
    }

-    pic = NULL;
-    err = vaapi_encode_pick_next(avctx, &pic);
-    if (err < 0)
-        return err;
-    av_assert0(pic);
+    if (ctx->has_sync_buffer_func) {
+        pic = NULL;

-    pic->encode_order = ctx->encode_order++;
+        if (av_fifo_can_write(ctx->encode_fifo)) {
+            err = vaapi_encode_pick_next(avctx, &pic);
+            if (!err) {
+                av_assert0(pic);
+                pic->encode_order = ctx->encode_order +
+                    av_fifo_can_read(ctx->encode_fifo);
+                err = vaapi_encode_issue(avctx, pic);
+                if (err < 0) {
+                    av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
+                    return err;
+                }
+                av_fifo_write(ctx->encode_fifo, &pic, 1);
+            }
+        }

-    err = vaapi_encode_issue(avctx, pic);
-    if (err < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
-        return err;
+        if (!av_fifo_can_read(ctx->encode_fifo))
+            return err;
+
+        // More frames can be buffered
+        if (av_fifo_can_write(ctx->encode_fifo) && !ctx->end_of_stream)
+            return AVERROR(EAGAIN);
+
+        av_fifo_read(ctx->encode_fifo, &pic, 1);
+        ctx->encode_order = pic->encode_order + 1;
+    } else {
+        pic = NULL;
+        err = vaapi_encode_pick_next(avctx, &pic);
+        if (err < 0)
+            return err;
+        av_assert0(pic);
+
+        pic->encode_order = ctx->encode_order++;
+
+        err = vaapi_encode_issue(avctx, pic);
+        if (err < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
+            return err;
+        }
    }

    err = vaapi_encode_output(avctx, pic, pkt);
@ -1220,7 +1252,7 @@ int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
            pkt->dts = ctx->ts_ring[pic->encode_order] - ctx->dts_pts_diff;
    } else {
        pkt->dts = ctx->ts_ring[(pic->encode_order - ctx->decode_delay) %
-                                (3 * ctx->output_delay)];
+                                (3 * ctx->output_delay + ctx->async_depth)];
    }
    av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64" dts %"PRId64".\n",
           pkt->pts, pkt->dts);
@ -2541,6 +2573,11 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
    vas = vaSyncBuffer(ctx->hwctx->display, VA_INVALID_ID, 0);
    if (vas != VA_STATUS_ERROR_UNIMPLEMENTED) {
        ctx->has_sync_buffer_func = 1;
+        ctx->encode_fifo = av_fifo_alloc2(ctx->async_depth,
+                                          sizeof(VAAPIEncodePicture *),
+                                          0);
+        if (!ctx->encode_fifo)
+            return AVERROR(ENOMEM);
    }
 #endif

@ -2581,6 +2618,7 @@ av_cold int ff_vaapi_encode_close(AVCodecContext *avctx)

    av_freep(&ctx->codec_sequence_params);
    av_freep(&ctx->codec_picture_params);
+    av_fifo_freep2(&ctx->encode_fifo);

    av_buffer_unref(&ctx->recon_frames_ref);
    av_buffer_unref(&ctx->input_frames_ref);