VP8: much faster DC transform handling

A lot of the time the DC block is empty: don't do the WHT in this case. A lot of the rest of the time, there's only one coefficient: make a special DC-only transform for that case. When the block is empty, don't incorrectly mark luma DCT blocks as having DC coefficients. Originally committed as revision 24670 to svn://svn.ffmpeg.org/ffmpeg/trunk
2025-12-08 06:09:50 +00:00 · 2010-08-02 20:57:03 +00:00 · 2010-08-02 20:57:03 +00:00 · f311208cf1
commit f311208cf1
parent c934562c12
3 changed files with 29 additions and 8 deletions
--- a/libavcodec/vp8dsp.c
+++ b/libavcodec/vp8dsp.c
@ -51,13 +51,25 @@ static void vp8_luma_dc_wht_c(DCTELEM block[4][4][16], DCTELEM dc[16])
        dc[i*4+2] = 0;
        dc[i*4+3] = 0;

-        *block[i][0] = (t0 + t1) >> 3;
-        *block[i][1] = (t3 + t2) >> 3;
-        *block[i][2] = (t0 - t1) >> 3;
-        *block[i][3] = (t3 - t2) >> 3;
+        block[i][0][0] = (t0 + t1) >> 3;
+        block[i][1][0] = (t3 + t2) >> 3;
+        block[i][2][0] = (t0 - t1) >> 3;
+        block[i][3][0] = (t3 - t2) >> 3;
    }
 }

+static void vp8_luma_dc_wht_dc_c(DCTELEM block[4][4][16], DCTELEM dc[16])
+{
+    int i, val = (dc[0] + 3) >> 3;
+    dc[0] = 0;
+
+    for (i = 0; i < 4; i++) {
+        block[i][0][0] = val;
+        block[i][1][0] = val;
+        block[i][2][0] = val;
+        block[i][3][0] = val;
+    }
+}

 #define MUL_20091(a) ((((a)*20091) >> 16) + (a))
 #define MUL_35468(a)  (((a)*35468) >> 16)
@ -480,6 +492,7 @@ VP8_BILINEAR(4)
 av_cold void ff_vp8dsp_init(VP8DSPContext *dsp)
 {
    dsp->vp8_luma_dc_wht    = vp8_luma_dc_wht_c;
+    dsp->vp8_luma_dc_wht_dc = vp8_luma_dc_wht_dc_c;
    dsp->vp8_idct_add       = vp8_idct_add_c;
    dsp->vp8_idct_dc_add    = vp8_idct_dc_add_c;
    dsp->vp8_idct_dc_add4y  = vp8_idct_dc_add4y_c;