mirror of
				https://github.com/godotengine/godot.git
				synced 2025-10-30 21:21:10 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			792 lines
		
	
	
	
		
			28 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			792 lines
		
	
	
	
		
			28 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /********************************************************************
 | |
|  *                                                                  *
 | |
|  * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
 | |
|  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
 | |
|  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
 | |
|  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 | |
|  *                                                                  *
 | |
|  * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
 | |
|  * by the Xiph.Org Foundation http://www.xiph.org/                  *
 | |
|  *                                                                  *
 | |
|  ********************************************************************
 | |
| 
 | |
|   function:
 | |
|   last mod: $Id$
 | |
| 
 | |
|  ********************************************************************/
 | |
| #include <stdlib.h>
 | |
| #include <limits.h>
 | |
| #include <string.h>
 | |
| #include "encint.h"
 | |
| 
 | |
| 
 | |
| 
 | |
| typedef struct oc_mcenc_ctx           oc_mcenc_ctx;
 | |
| 
 | |
| 
 | |
| 
 | |
| /*Temporary state used for motion estimation.*/
 | |
| struct oc_mcenc_ctx{
 | |
|   /*The candidate motion vectors.*/
 | |
|   int                candidates[13][2];
 | |
|   /*The start of the Set B candidates.*/
 | |
|   int                setb0;
 | |
|   /*The total number of candidates.*/
 | |
|   int                ncandidates;
 | |
| };
 | |
| 
 | |
| 
 | |
| 
 | |
| /*The maximum Y plane SAD value for accepting the median predictor.*/
 | |
| #define OC_YSAD_THRESH1            (256)
 | |
| /*The amount to right shift the minimum error by when inflating it for
 | |
|    computing the second maximum Y plane SAD threshold.*/
 | |
| #define OC_YSAD_THRESH2_SCALE_BITS (4)
 | |
| /*The amount to add to the second maximum Y plane threshold when inflating
 | |
|    it.*/
 | |
| #define OC_YSAD_THRESH2_OFFSET     (64)
 | |
| 
 | |
| /*The vector offsets in the X direction for each search site in the square
 | |
|    pattern.*/
 | |
| static const int OC_SQUARE_DX[9]={-1,0,1,-1,0,1,-1,0,1};
 | |
| /*The vector offsets in the Y direction for each search site in the square
 | |
|    pattern.*/
 | |
| static const int OC_SQUARE_DY[9]={-1,-1,-1,0,0,0,1,1,1};
 | |
| /*The number of sites to search for each boundary condition in the square
 | |
|    pattern.
 | |
|   Bit flags for the boundary conditions are as follows:
 | |
|   1: -16==dx
 | |
|   2:      dx==15(.5)
 | |
|   4: -16==dy
 | |
|   8:      dy==15(.5)*/
 | |
| static const int OC_SQUARE_NSITES[11]={8,5,5,0,5,3,3,0,5,3,3};
 | |
| /*The list of sites to search for each boundary condition in the square
 | |
|    pattern.*/
 | |
| static const int OC_SQUARE_SITES[11][8]={
 | |
|   /* -15.5<dx<31,       -15.5<dy<15(.5)*/
 | |
|   {0,1,2,3,5,6,7,8},
 | |
|   /*-15.5==dx,          -15.5<dy<15(.5)*/
 | |
|   {1,2,5,7,8},
 | |
|   /*     dx==15(.5),    -15.5<dy<15(.5)*/
 | |
|   {0,1,3,6,7},
 | |
|   /*-15.5==dx==15(.5),  -15.5<dy<15(.5)*/
 | |
|   {-1},
 | |
|   /* -15.5<dx<15(.5),  -15.5==dy*/
 | |
|   {3,5,6,7,8},
 | |
|   /*-15.5==dx,         -15.5==dy*/
 | |
|   {5,7,8},
 | |
|   /*     dx==15(.5),   -15.5==dy*/
 | |
|   {3,6,7},
 | |
|   /*-15.5==dx==15(.5), -15.5==dy*/
 | |
|   {-1},
 | |
|   /*-15.5dx<15(.5),           dy==15(.5)*/
 | |
|   {0,1,2,3,5},
 | |
|   /*-15.5==dx,                dy==15(.5)*/
 | |
|   {1,2,5},
 | |
|   /*       dx==15(.5),        dy==15(.5)*/
 | |
|   {0,1,3}
 | |
| };
 | |
| 
 | |
| 
 | |
| static void oc_mcenc_find_candidates_a(oc_enc_ctx *_enc,oc_mcenc_ctx *_mcenc,
 | |
|  oc_mv _accum,int _mbi,int _frame){
 | |
|   oc_mb_enc_info *embs;
 | |
|   int             accum_x;
 | |
|   int             accum_y;
 | |
|   int             a[3][2];
 | |
|   int             ncandidates;
 | |
|   unsigned        nmbi;
 | |
|   int             i;
 | |
|   embs=_enc->mb_info;
 | |
|   /*Skip a position to store the median predictor in.*/
 | |
|   ncandidates=1;
 | |
|   if(embs[_mbi].ncneighbors>0){
 | |
|     /*Fill in the first part of set A: the vectors from adjacent blocks.*/
 | |
|     for(i=0;i<embs[_mbi].ncneighbors;i++){
 | |
|       nmbi=embs[_mbi].cneighbors[i];
 | |
|       _mcenc->candidates[ncandidates][0]=
 | |
|        OC_MV_X(embs[nmbi].analysis_mv[0][_frame]);
 | |
|       _mcenc->candidates[ncandidates][1]=
 | |
|        OC_MV_Y(embs[nmbi].analysis_mv[0][_frame]);
 | |
|       ncandidates++;
 | |
|     }
 | |
|   }
 | |
|   accum_x=OC_MV_X(_accum);
 | |
|   accum_y=OC_MV_Y(_accum);
 | |
|   /*Add a few additional vectors to set A: the vectors used in the previous
 | |
|      frames and the (0,0) vector.*/
 | |
|   _mcenc->candidates[ncandidates][0]=accum_x;
 | |
|   _mcenc->candidates[ncandidates][1]=accum_y;
 | |
|   ncandidates++;
 | |
|   _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
 | |
|    OC_MV_X(embs[_mbi].analysis_mv[1][_frame])+accum_x,31);
 | |
|   _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
 | |
|    OC_MV_Y(embs[_mbi].analysis_mv[1][_frame])+accum_y,31);
 | |
|   ncandidates++;
 | |
|   _mcenc->candidates[ncandidates][0]=0;
 | |
|   _mcenc->candidates[ncandidates][1]=0;
 | |
|   ncandidates++;
 | |
|   /*Use the first three vectors of set A to find our best predictor: their
 | |
|      median.*/
 | |
|   memcpy(a,_mcenc->candidates+1,sizeof(a));
 | |
|   OC_SORT2I(a[0][0],a[1][0]);
 | |
|   OC_SORT2I(a[0][1],a[1][1]);
 | |
|   OC_SORT2I(a[1][0],a[2][0]);
 | |
|   OC_SORT2I(a[1][1],a[2][1]);
 | |
|   OC_SORT2I(a[0][0],a[1][0]);
 | |
|   OC_SORT2I(a[0][1],a[1][1]);
 | |
|   _mcenc->candidates[0][0]=a[1][0];
 | |
|   _mcenc->candidates[0][1]=a[1][1];
 | |
|   _mcenc->setb0=ncandidates;
 | |
| }
 | |
| 
 | |
| static void oc_mcenc_find_candidates_b(oc_enc_ctx *_enc,oc_mcenc_ctx *_mcenc,
 | |
|  oc_mv _accum,int _mbi,int _frame){
 | |
|   oc_mb_enc_info *embs;
 | |
|   int             accum_x;
 | |
|   int             accum_y;
 | |
|   int             ncandidates;
 | |
|   embs=_enc->mb_info;
 | |
|   accum_x=OC_MV_X(_accum);
 | |
|   accum_y=OC_MV_Y(_accum);
 | |
|   /*Fill in set B: accelerated predictors for this and adjacent macro blocks.*/
 | |
|   ncandidates=_mcenc->setb0;
 | |
|   /*Use only the current block. Using more did not appear to be helpful
 | |
|     with the current selection logic due to escaping the local search too
 | |
|     quickly.*/
 | |
|   _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
 | |
|    2*OC_MV_X(embs[_mbi].analysis_mv[1][_frame])
 | |
|    -OC_MV_X(embs[_mbi].analysis_mv[2][_frame])+accum_x,31);
 | |
|   _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
 | |
|    2*OC_MV_Y(embs[_mbi].analysis_mv[1][_frame])
 | |
|    -OC_MV_Y(embs[_mbi].analysis_mv[2][_frame])+accum_y,31);
 | |
|   ncandidates++;
 | |
|   _mcenc->ncandidates=ncandidates;
 | |
| }
 | |
| 
 | |
| static unsigned oc_sad16_halfpel(const oc_enc_ctx *_enc,
 | |
|  const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],
 | |
|  int _mvoffset0,int _mvoffset1,const unsigned char *_src,
 | |
|  const unsigned char *_ref,int _ystride,unsigned _best_err){
 | |
|   unsigned err;
 | |
|   int      bi;
 | |
|   err=0;
 | |
|   for(bi=0;bi<4;bi++){
 | |
|     ptrdiff_t frag_offs;
 | |
|     frag_offs=_frag_buf_offs[_fragis[bi]];
 | |
|     err+=oc_enc_frag_sad2_thresh(_enc,_src+frag_offs,_ref+frag_offs+_mvoffset0,
 | |
|      _ref+frag_offs+_mvoffset1,_ystride,_best_err-err);
 | |
|   }
 | |
|   return err;
 | |
| }
 | |
| 
 | |
| static unsigned oc_satd16_halfpel(const oc_enc_ctx *_enc,
 | |
|  const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],
 | |
|  int _mvoffset0,int _mvoffset1,const unsigned char *_src,
 | |
|  const unsigned char *_ref,int _ystride,unsigned _best_err){
 | |
|   unsigned err;
 | |
|   int      dc;
 | |
|   int      bi;
 | |
|   err=0;
 | |
|   for(bi=0;bi<4;bi++){
 | |
|     ptrdiff_t frag_offs;
 | |
|     frag_offs=_frag_buf_offs[_fragis[bi]];
 | |
|     err+=oc_enc_frag_satd2(_enc,&dc,_src+frag_offs,
 | |
|      _ref+frag_offs+_mvoffset0,_ref+frag_offs+_mvoffset1,_ystride);
 | |
|     err+=abs(dc);
 | |
|   }
 | |
|   return err;
 | |
| }
 | |
| 
 | |
| static unsigned oc_mcenc_ysad_check_mbcandidate_fullpel(const oc_enc_ctx *_enc,
 | |
|  const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],int _dx,int _dy,
 | |
|  const unsigned char *_src,const unsigned char *_ref,int _ystride,
 | |
|  unsigned _block_err[4]){
 | |
|   unsigned err;
 | |
|   int      mvoffset;
 | |
|   int      bi;
 | |
|   mvoffset=_dx+_dy*_ystride;
 | |
|   err=0;
 | |
|   for(bi=0;bi<4;bi++){
 | |
|     ptrdiff_t frag_offs;
 | |
|     unsigned  block_err;
 | |
|     frag_offs=_frag_buf_offs[_fragis[bi]];
 | |
|     block_err=oc_enc_frag_sad(_enc,
 | |
|      _src+frag_offs,_ref+frag_offs+mvoffset,_ystride);
 | |
|     _block_err[bi]=block_err;
 | |
|     err+=block_err;
 | |
|   }
 | |
|   return err;
 | |
| }
 | |
| 
 | |
| static int oc_mcenc_ysatd_check_mbcandidate_fullpel(const oc_enc_ctx *_enc,
 | |
|  const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],int _dx,int _dy,
 | |
|  const unsigned char *_src,const unsigned char *_ref,int _ystride){
 | |
|   int mvoffset;
 | |
|   int err;
 | |
|   int bi;
 | |
|   mvoffset=_dx+_dy*_ystride;
 | |
|   err=0;
 | |
|   for(bi=0;bi<4;bi++){
 | |
|     ptrdiff_t frag_offs;
 | |
|     int       dc;
 | |
|     frag_offs=_frag_buf_offs[_fragis[bi]];
 | |
|     if(_enc->sp_level<OC_SP_LEVEL_NOSATD){
 | |
|       err+=oc_enc_frag_satd(_enc,&dc,
 | |
|        _src+frag_offs,_ref+frag_offs+mvoffset,_ystride);
 | |
|       err+=abs(dc);
 | |
|     }
 | |
|     else{
 | |
|       err+=oc_enc_frag_sad(_enc,
 | |
|        _src+frag_offs,_ref+frag_offs+mvoffset,_ystride);
 | |
|     }
 | |
|   }
 | |
|   return err;
 | |
| }
 | |
| 
 | |
| static unsigned oc_mcenc_ysatd_check_bcandidate_fullpel(const oc_enc_ctx *_enc,
 | |
|  ptrdiff_t _frag_offs,int _dx,int _dy,
 | |
|  const unsigned char *_src,const unsigned char *_ref,int _ystride){
 | |
|   unsigned err;
 | |
|   int      dc;
 | |
|   err=oc_enc_frag_satd(_enc,&dc,
 | |
|    _src+_frag_offs,_ref+_frag_offs+_dx+_dy*_ystride,_ystride);
 | |
|   return err+abs(dc);
 | |
| }
 | |
| 
 | |
| /*Perform a motion vector search for this macro block against a single
 | |
|    reference frame.
 | |
|   As a bonus, individual block motion vectors are computed as well, as much of
 | |
|    the work can be shared.
 | |
|   The actual motion vector is stored in the appropriate place in the
 | |
|    oc_mb_enc_info structure.
 | |
|   _accum:      Drop frame/golden MV accumulators.
 | |
|   _mbi:        The macro block index.
 | |
|   _frame:      The frame to use for SATD calculations and refinement,
 | |
|                 either OC_FRAME_PREV or OC_FRAME_GOLD.
 | |
|   _frame_full: The frame to perform the 1px search on, one of OC_FRAME_PREV,
 | |
|                 OC_FRAME_GOLD, OC_FRAME_PREV_ORIG, or OC_FRAME_GOLD_ORIG.*/
 | |
| void oc_mcenc_search_frame(oc_enc_ctx *_enc,oc_mv _accum,int _mbi,int _frame,
 | |
|  int _frame_full){
 | |
|   /*Note: Traditionally this search is done using a rate-distortion objective
 | |
|      function of the form D+lambda*R.
 | |
|     However, xiphmont tested this and found it produced a small degredation,
 | |
|      while requiring extra computation.
 | |
|     This is most likely due to Theora's peculiar MV encoding scheme: MVs are
 | |
|      not coded relative to a predictor, and the only truly cheap way to use a
 | |
|      MV is in the LAST or LAST2 MB modes, which are not being considered here.
 | |
|     Therefore if we use the MV found here, it's only because both LAST and
 | |
|      LAST2 performed poorly, and therefore the MB is not likely to be uniform
 | |
|      or suffer from the aperture problem.
 | |
|     Furthermore we would like to re-use the MV found here for as many MBs as
 | |
|      possible, so picking a slightly sub-optimal vector to save a bit or two
 | |
|      may cause increased degredation in many blocks to come.
 | |
|     We could artificially reduce lambda to compensate, but it's faster to just
 | |
|      disable it entirely, and use D (the distortion) as the sole criterion.*/
 | |
|   oc_mcenc_ctx         mcenc;
 | |
|   const ptrdiff_t     *frag_buf_offs;
 | |
|   const ptrdiff_t     *fragis;
 | |
|   const unsigned char *src;
 | |
|   const unsigned char *ref;
 | |
|   const unsigned char *satd_ref;
 | |
|   int                  ystride;
 | |
|   oc_mb_enc_info      *embs;
 | |
|   ogg_int32_t          hit_cache[31];
 | |
|   ogg_int32_t          hitbit;
 | |
|   unsigned             best_block_err[4];
 | |
|   unsigned             block_err[4];
 | |
|   unsigned             best_err;
 | |
|   int                  best_vec[2];
 | |
|   int                  best_block_vec[4][2];
 | |
|   int                  candx;
 | |
|   int                  candy;
 | |
|   int                  bi;
 | |
|   embs=_enc->mb_info;
 | |
|   /*Find some candidate motion vectors.*/
 | |
|   oc_mcenc_find_candidates_a(_enc,&mcenc,_accum,_mbi,_frame);
 | |
|   /*Clear the cache of locations we've examined.*/
 | |
|   memset(hit_cache,0,sizeof(hit_cache));
 | |
|   /*Start with the median predictor.*/
 | |
|   candx=OC_DIV2(mcenc.candidates[0][0]);
 | |
|   candy=OC_DIV2(mcenc.candidates[0][1]);
 | |
|   hit_cache[candy+15]|=(ogg_int32_t)1<<candx+15;
 | |
|   frag_buf_offs=_enc->state.frag_buf_offs;
 | |
|   fragis=_enc->state.mb_maps[_mbi][0];
 | |
|   src=_enc->state.ref_frame_data[OC_FRAME_IO];
 | |
|   ref=_enc->state.ref_frame_data[_frame_full];
 | |
|   satd_ref=_enc->state.ref_frame_data[_frame];
 | |
|   ystride=_enc->state.ref_ystride[0];
 | |
|   /*TODO: customize error function for speed/(quality+size) tradeoff.*/
 | |
|   best_err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
 | |
|    frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
 | |
|   best_vec[0]=candx;
 | |
|   best_vec[1]=candy;
 | |
|   if(_frame==OC_FRAME_PREV){
 | |
|     for(bi=0;bi<4;bi++){
 | |
|       best_block_err[bi]=block_err[bi];
 | |
|       best_block_vec[bi][0]=candx;
 | |
|       best_block_vec[bi][1]=candy;
 | |
|     }
 | |
|   }
 | |
|   /*If this predictor fails, move on to set A.*/
 | |
|   if(best_err>OC_YSAD_THRESH1){
 | |
|     unsigned err;
 | |
|     unsigned t2;
 | |
|     int      ncs;
 | |
|     int      ci;
 | |
|     /*Compute the early termination threshold for set A.*/
 | |
|     t2=embs[_mbi].error[_frame];
 | |
|     ncs=OC_MINI(3,embs[_mbi].ncneighbors);
 | |
|     for(ci=0;ci<ncs;ci++){
 | |
|       t2=OC_MAXI(t2,embs[embs[_mbi].cneighbors[ci]].error[_frame]);
 | |
|     }
 | |
|     t2+=(t2>>OC_YSAD_THRESH2_SCALE_BITS)+OC_YSAD_THRESH2_OFFSET;
 | |
|     /*Examine the candidates in set A.*/
 | |
|     for(ci=1;ci<mcenc.setb0;ci++){
 | |
|       candx=OC_DIV2(mcenc.candidates[ci][0]);
 | |
|       candy=OC_DIV2(mcenc.candidates[ci][1]);
 | |
|       /*If we've already examined this vector, then we would be using it if it
 | |
|          was better than what we are using.*/
 | |
|       hitbit=(ogg_int32_t)1<<candx+15;
 | |
|       if(hit_cache[candy+15]&hitbit)continue;
 | |
|       hit_cache[candy+15]|=hitbit;
 | |
|       err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
 | |
|        frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
 | |
|       if(err<best_err){
 | |
|         best_err=err;
 | |
|         best_vec[0]=candx;
 | |
|         best_vec[1]=candy;
 | |
|       }
 | |
|       if(_frame==OC_FRAME_PREV){
 | |
|         for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
 | |
|           best_block_err[bi]=block_err[bi];
 | |
|           best_block_vec[bi][0]=candx;
 | |
|           best_block_vec[bi][1]=candy;
 | |
|         }
 | |
|       }
 | |
|     }
 | |
|     if(best_err>t2){
 | |
|       oc_mcenc_find_candidates_b(_enc,&mcenc,_accum,_mbi,_frame);
 | |
|       /*Examine the candidates in set B.*/
 | |
|       for(;ci<mcenc.ncandidates;ci++){
 | |
|         candx=OC_DIV2(mcenc.candidates[ci][0]);
 | |
|         candy=OC_DIV2(mcenc.candidates[ci][1]);
 | |
|         hitbit=(ogg_int32_t)1<<candx+15;
 | |
|         if(hit_cache[candy+15]&hitbit)continue;
 | |
|         hit_cache[candy+15]|=hitbit;
 | |
|         err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
 | |
|          frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
 | |
|         if(err<best_err){
 | |
|           best_err=err;
 | |
|           best_vec[0]=candx;
 | |
|           best_vec[1]=candy;
 | |
|         }
 | |
|         if(_frame==OC_FRAME_PREV){
 | |
|           for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
 | |
|             best_block_err[bi]=block_err[bi];
 | |
|             best_block_vec[bi][0]=candx;
 | |
|             best_block_vec[bi][1]=candy;
 | |
|           }
 | |
|         }
 | |
|       }
 | |
|       /*Use the same threshold for set B as in set A.*/
 | |
|       if(best_err>t2){
 | |
|         int best_site;
 | |
|         int nsites;
 | |
|         int sitei;
 | |
|         int site;
 | |
|         int b;
 | |
|         /*Square pattern search.*/
 | |
|         for(;;){
 | |
|           best_site=4;
 | |
|           /*Compose the bit flags for boundary conditions.*/
 | |
|           b=OC_DIV16(-best_vec[0]+1)|OC_DIV16(best_vec[0]+1)<<1|
 | |
|            OC_DIV16(-best_vec[1]+1)<<2|OC_DIV16(best_vec[1]+1)<<3;
 | |
|           nsites=OC_SQUARE_NSITES[b];
 | |
|           for(sitei=0;sitei<nsites;sitei++){
 | |
|             site=OC_SQUARE_SITES[b][sitei];
 | |
|             candx=best_vec[0]+OC_SQUARE_DX[site];
 | |
|             candy=best_vec[1]+OC_SQUARE_DY[site];
 | |
|             hitbit=(ogg_int32_t)1<<candx+15;
 | |
|             if(hit_cache[candy+15]&hitbit)continue;
 | |
|             hit_cache[candy+15]|=hitbit;
 | |
|             err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
 | |
|              frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
 | |
|             if(err<best_err){
 | |
|               best_err=err;
 | |
|               best_site=site;
 | |
|             }
 | |
|             if(_frame==OC_FRAME_PREV){
 | |
|               for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
 | |
|                 best_block_err[bi]=block_err[bi];
 | |
|                 best_block_vec[bi][0]=candx;
 | |
|                 best_block_vec[bi][1]=candy;
 | |
|               }
 | |
|             }
 | |
|           }
 | |
|           if(best_site==4)break;
 | |
|           best_vec[0]+=OC_SQUARE_DX[best_site];
 | |
|           best_vec[1]+=OC_SQUARE_DY[best_site];
 | |
|         }
 | |
|         /*Final 4-MV search.*/
 | |
|         /*Simply use 1/4 of the macro block set A and B threshold as the
 | |
|            individual block threshold.*/
 | |
|         if(_frame==OC_FRAME_PREV){
 | |
|           t2>>=2;
 | |
|           for(bi=0;bi<4;bi++){
 | |
|             if(best_block_err[bi]>t2){
 | |
|               /*Square pattern search.
 | |
|                 We do this in a slightly interesting manner.
 | |
|                 We continue to check the SAD of all four blocks in the
 | |
|                  macro block.
 | |
|                 This gives us two things:
 | |
|                  1) We can continue to use the hit_cache to avoid duplicate
 | |
|                      checks.
 | |
|                     Otherwise we could continue to read it, but not write to it
 | |
|                      without saving and restoring it for each block.
 | |
|                     Note that we could still eliminate a large number of
 | |
|                      duplicate checks by taking into account the site we came
 | |
|                      from when choosing the site list.
 | |
|                     We can still do that to avoid extra hit_cache queries, and
 | |
|                      it might even be a speed win.
 | |
|                  2) It gives us a slightly better chance of escaping local
 | |
|                      minima.
 | |
|                     We would not be here if we weren't doing a fairly bad job
 | |
|                      in finding a good vector, and checking these vectors can
 | |
|                      save us from 100 to several thousand points off our SAD 1
 | |
|                      in 15 times.
 | |
|                 TODO: Is this a good idea?
 | |
|                 Who knows.
 | |
|                 It needs more testing.*/
 | |
|               for(;;){
 | |
|                 int bestx;
 | |
|                 int besty;
 | |
|                 int bj;
 | |
|                 bestx=best_block_vec[bi][0];
 | |
|                 besty=best_block_vec[bi][1];
 | |
|                 /*Compose the bit flags for boundary conditions.*/
 | |
|                 b=OC_DIV16(-bestx+1)|OC_DIV16(bestx+1)<<1|
 | |
|                  OC_DIV16(-besty+1)<<2|OC_DIV16(besty+1)<<3;
 | |
|                 nsites=OC_SQUARE_NSITES[b];
 | |
|                 for(sitei=0;sitei<nsites;sitei++){
 | |
|                   site=OC_SQUARE_SITES[b][sitei];
 | |
|                   candx=bestx+OC_SQUARE_DX[site];
 | |
|                   candy=besty+OC_SQUARE_DY[site];
 | |
|                   hitbit=(ogg_int32_t)1<<candx+15;
 | |
|                   if(hit_cache[candy+15]&hitbit)continue;
 | |
|                   hit_cache[candy+15]|=hitbit;
 | |
|                   err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
 | |
|                    frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
 | |
|                   if(err<best_err){
 | |
|                     best_err=err;
 | |
|                     best_vec[0]=candx;
 | |
|                     best_vec[1]=candy;
 | |
|                   }
 | |
|                   for(bj=0;bj<4;bj++)if(block_err[bj]<best_block_err[bj]){
 | |
|                     best_block_err[bj]=block_err[bj];
 | |
|                     best_block_vec[bj][0]=candx;
 | |
|                     best_block_vec[bj][1]=candy;
 | |
|                   }
 | |
|                 }
 | |
|                 if(best_block_vec[bi][0]==bestx&&best_block_vec[bi][1]==besty){
 | |
|                   break;
 | |
|                 }
 | |
|               }
 | |
|             }
 | |
|           }
 | |
|         }
 | |
|       }
 | |
|     }
 | |
|   }
 | |
|   embs[_mbi].error[_frame]=(ogg_uint16_t)best_err;
 | |
|   candx=best_vec[0];
 | |
|   candy=best_vec[1];
 | |
|   embs[_mbi].satd[_frame]=oc_mcenc_ysatd_check_mbcandidate_fullpel(_enc,
 | |
|    frag_buf_offs,fragis,candx,candy,src,satd_ref,ystride);
 | |
|   embs[_mbi].analysis_mv[0][_frame]=OC_MV(candx<<1,candy<<1);
 | |
|   if(_frame==OC_FRAME_PREV&&_enc->sp_level<OC_SP_LEVEL_FAST_ANALYSIS){
 | |
|     for(bi=0;bi<4;bi++){
 | |
|       candx=best_block_vec[bi][0];
 | |
|       candy=best_block_vec[bi][1];
 | |
|       embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_check_bcandidate_fullpel(_enc,
 | |
|        frag_buf_offs[fragis[bi]],candx,candy,src,satd_ref,ystride);
 | |
|       embs[_mbi].block_mv[bi]=OC_MV(candx<<1,candy<<1);
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| void oc_mcenc_search(oc_enc_ctx *_enc,int _mbi){
 | |
|   oc_mv2 *mvs;
 | |
|   oc_mv   accum_p;
 | |
|   oc_mv   accum_g;
 | |
|   oc_mv   mv2_p;
 | |
|   mvs=_enc->mb_info[_mbi].analysis_mv;
 | |
|   if(_enc->prevframe_dropped)accum_p=mvs[0][OC_FRAME_PREV];
 | |
|   else accum_p=0;
 | |
|   accum_g=mvs[2][OC_FRAME_GOLD];
 | |
|   /*Move the motion vector predictors back a frame.*/
 | |
|   mv2_p=mvs[2][OC_FRAME_PREV];
 | |
|   mvs[2][OC_FRAME_GOLD]=mvs[1][OC_FRAME_GOLD];
 | |
|   mvs[2][OC_FRAME_PREV]=mvs[1][OC_FRAME_PREV];
 | |
|   mvs[1][OC_FRAME_GOLD]=mvs[0][OC_FRAME_GOLD];
 | |
|   mvs[1][OC_FRAME_PREV]=OC_MV_SUB(mvs[0][OC_FRAME_PREV],mv2_p);
 | |
|   /*Search the last frame.*/
 | |
|   oc_mcenc_search_frame(_enc,accum_p,_mbi,OC_FRAME_PREV,OC_FRAME_PREV_ORIG);
 | |
|   mvs[2][OC_FRAME_PREV]=accum_p;
 | |
|   /*GOLDEN MVs are different from PREV MVs in that they're each absolute
 | |
|      offsets from some frame in the past rather than relative offsets from the
 | |
|      frame before.
 | |
|     For predictor calculation to make sense, we need them to be in the same
 | |
|      form as PREV MVs.*/
 | |
|   mvs[1][OC_FRAME_GOLD]=OC_MV_SUB(mvs[1][OC_FRAME_GOLD],mvs[2][OC_FRAME_GOLD]);
 | |
|   mvs[2][OC_FRAME_GOLD]=OC_MV_SUB(mvs[2][OC_FRAME_GOLD],accum_g);
 | |
|   /*Search the golden frame.*/
 | |
|   oc_mcenc_search_frame(_enc,accum_g,_mbi,OC_FRAME_GOLD,OC_FRAME_GOLD_ORIG);
 | |
|   /*Put GOLDEN MVs back into absolute offset form.
 | |
|     The newest MV is already an absolute offset.*/
 | |
|   mvs[2][OC_FRAME_GOLD]=OC_MV_ADD(mvs[2][OC_FRAME_GOLD],accum_g);
 | |
|   mvs[1][OC_FRAME_GOLD]=OC_MV_ADD(mvs[1][OC_FRAME_GOLD],mvs[2][OC_FRAME_GOLD]);
 | |
| }
 | |
| 
 | |
| #if 0
 | |
| static int oc_mcenc_ysad_halfpel_mbrefine(const oc_enc_ctx *_enc,int _mbi,
 | |
|  int _vec[2],int _best_err,int _frame){
 | |
|   const unsigned char *src;
 | |
|   const unsigned char *ref;
 | |
|   const ptrdiff_t     *frag_buf_offs;
 | |
|   const ptrdiff_t     *fragis;
 | |
|   int                  offset_y[9];
 | |
|   int                  ystride;
 | |
|   int                  mvoffset_base;
 | |
|   int                  best_site;
 | |
|   int                  sitei;
 | |
|   int                  err;
 | |
|   src=_enc->state.ref_frame_data[OC_FRAME_IO];
 | |
|   ref=_enc->state.ref_frame_data[_framei];
 | |
|   frag_buf_offs=_enc->state.frag_buf_offs;
 | |
|   fragis=_enc->state.mb_maps[_mbi][0];
 | |
|   ystride=_enc->state.ref_ystride[0];
 | |
|   mvoffset_base=_vec[0]+_vec[1]*ystride;
 | |
|   offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
 | |
|   offset_y[3]=offset_y[5]=0;
 | |
|   offset_y[6]=offset_y[7]=offset_y[8]=ystride;
 | |
|   best_site=4;
 | |
|   for(sitei=0;sitei<8;sitei++){
 | |
|     int site;
 | |
|     int xmask;
 | |
|     int ymask;
 | |
|     int dx;
 | |
|     int dy;
 | |
|     int mvoffset0;
 | |
|     int mvoffset1;
 | |
|     site=OC_SQUARE_SITES[0][sitei];
 | |
|     dx=OC_SQUARE_DX[site];
 | |
|     dy=OC_SQUARE_DY[site];
 | |
|     /*The following code SHOULD be equivalent to
 | |
|         oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
 | |
|          (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
 | |
|       However, it should also be much faster, as it involves no multiplies and
 | |
|        doesn't have to handle chroma vectors.*/
 | |
|     xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
 | |
|     ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
 | |
|     mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask);
 | |
|     mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask);
 | |
|     err=oc_sad16_halfpel(_enc,frag_buf_offs,fragis,
 | |
|      mvoffset0,mvoffset1,src,ref,ystride,_best_err);
 | |
|     if(err<_best_err){
 | |
|       _best_err=err;
 | |
|       best_site=site;
 | |
|     }
 | |
|   }
 | |
|   _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
 | |
|   _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
 | |
|   return _best_err;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| static unsigned oc_mcenc_ysatd_halfpel_mbrefine(const oc_enc_ctx *_enc,
 | |
|  int _mbi,int _vec[2],unsigned _best_err,int _frame){
 | |
|   const unsigned char *src;
 | |
|   const unsigned char *ref;
 | |
|   const ptrdiff_t     *frag_buf_offs;
 | |
|   const ptrdiff_t     *fragis;
 | |
|   int                  offset_y[9];
 | |
|   int                  ystride;
 | |
|   int                  mvoffset_base;
 | |
|   int                  best_site;
 | |
|   int                  sitei;
 | |
|   int                  err;
 | |
|   src=_enc->state.ref_frame_data[OC_FRAME_IO];
 | |
|   ref=_enc->state.ref_frame_data[_frame];
 | |
|   frag_buf_offs=_enc->state.frag_buf_offs;
 | |
|   fragis=_enc->state.mb_maps[_mbi][0];
 | |
|   ystride=_enc->state.ref_ystride[0];
 | |
|   mvoffset_base=_vec[0]+_vec[1]*ystride;
 | |
|   offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
 | |
|   offset_y[3]=offset_y[5]=0;
 | |
|   offset_y[6]=offset_y[7]=offset_y[8]=ystride;
 | |
|   best_site=4;
 | |
|   for(sitei=0;sitei<8;sitei++){
 | |
|     int site;
 | |
|     int xmask;
 | |
|     int ymask;
 | |
|     int dx;
 | |
|     int dy;
 | |
|     int mvoffset0;
 | |
|     int mvoffset1;
 | |
|     site=OC_SQUARE_SITES[0][sitei];
 | |
|     dx=OC_SQUARE_DX[site];
 | |
|     dy=OC_SQUARE_DY[site];
 | |
|     /*The following code SHOULD be equivalent to
 | |
|         oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
 | |
|          (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
 | |
|       However, it should also be much faster, as it involves no multiplies and
 | |
|        doesn't have to handle chroma vectors.*/
 | |
|     xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
 | |
|     ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
 | |
|     mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask);
 | |
|     mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask);
 | |
|     if(_enc->sp_level<OC_SP_LEVEL_NOSATD){
 | |
|       err=oc_satd16_halfpel(_enc,frag_buf_offs,fragis,
 | |
|        mvoffset0,mvoffset1,src,ref,ystride,_best_err);
 | |
|     }
 | |
|     else{
 | |
|       err=oc_sad16_halfpel(_enc,frag_buf_offs,fragis,
 | |
|            mvoffset0,mvoffset1,src,ref,ystride,_best_err);
 | |
|     }
 | |
|     if(err<_best_err){
 | |
|       _best_err=err;
 | |
|       best_site=site;
 | |
|     }
 | |
|   }
 | |
|   _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
 | |
|   _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
 | |
|   return _best_err;
 | |
| }
 | |
| 
 | |
| void oc_mcenc_refine1mv(oc_enc_ctx *_enc,int _mbi,int _frame){
 | |
|   oc_mb_enc_info *embs;
 | |
|   int             vec[2];
 | |
|   embs=_enc->mb_info;
 | |
|   vec[0]=OC_DIV2(OC_MV_X(embs[_mbi].analysis_mv[0][_frame]));
 | |
|   vec[1]=OC_DIV2(OC_MV_Y(embs[_mbi].analysis_mv[0][_frame]));
 | |
|   embs[_mbi].satd[_frame]=oc_mcenc_ysatd_halfpel_mbrefine(_enc,
 | |
|    _mbi,vec,embs[_mbi].satd[_frame],_frame);
 | |
|   embs[_mbi].analysis_mv[0][_frame]=OC_MV(vec[0],vec[1]);
 | |
| }
 | |
| 
 | |
| #if 0
 | |
| static int oc_mcenc_ysad_halfpel_brefine(const oc_enc_ctx *_enc,
 | |
|  int _vec[2],const unsigned char *_src,const unsigned char *_ref,int _ystride,
 | |
|  int _offset_y[9],unsigned _best_err){
 | |
|   int mvoffset_base;
 | |
|   int best_site;
 | |
|   int sitei;
 | |
|   mvoffset_base=_vec[0]+_vec[1]*_ystride;
 | |
|   best_site=4;
 | |
|   for(sitei=0;sitei<8;sitei++){
 | |
|     unsigned err;
 | |
|     int      site;
 | |
|     int      xmask;
 | |
|     int      ymask;
 | |
|     int      dx;
 | |
|     int      dy;
 | |
|     int      mvoffset0;
 | |
|     int      mvoffset1;
 | |
|     site=OC_SQUARE_SITES[0][sitei];
 | |
|     dx=OC_SQUARE_DX[site];
 | |
|     dy=OC_SQUARE_DY[site];
 | |
|     /*The following code SHOULD be equivalent to
 | |
|         oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
 | |
|          (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
 | |
|       However, it should also be much faster, as it involves no multiplies and
 | |
|        doesn't have to handle chroma vectors.*/
 | |
|     xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
 | |
|     ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
 | |
|     mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask);
 | |
|     mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask);
 | |
|     err=oc_enc_frag_sad2_thresh(_enc,_src,
 | |
|      _ref+mvoffset0,_ref+mvoffset1,ystride,_best_err);
 | |
|     if(err<_best_err){
 | |
|       _best_err=err;
 | |
|       best_site=site;
 | |
|     }
 | |
|   }
 | |
|   _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
 | |
|   _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
 | |
|   return _best_err;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| static unsigned oc_mcenc_ysatd_halfpel_brefine(const oc_enc_ctx *_enc,
 | |
|  int _vec[2],const unsigned char *_src,const unsigned char *_ref,int _ystride,
 | |
|  int _offset_y[9],unsigned _best_err){
 | |
|   int mvoffset_base;
 | |
|   int best_site;
 | |
|   int sitei;
 | |
|   mvoffset_base=_vec[0]+_vec[1]*_ystride;
 | |
|   best_site=4;
 | |
|   for(sitei=0;sitei<8;sitei++){
 | |
|     unsigned err;
 | |
|     int      dc;
 | |
|     int      site;
 | |
|     int      xmask;
 | |
|     int      ymask;
 | |
|     int      dx;
 | |
|     int      dy;
 | |
|     int      mvoffset0;
 | |
|     int      mvoffset1;
 | |
|     site=OC_SQUARE_SITES[0][sitei];
 | |
|     dx=OC_SQUARE_DX[site];
 | |
|     dy=OC_SQUARE_DY[site];
 | |
|     /*The following code SHOULD be equivalent to
 | |
|         oc_state_get_mv_offsets(&_enc->state,&mvoffsets,0,
 | |
|          (_vec[0]<<1)+dx,(_vec[1]<<1)+dy);
 | |
|       However, it should also be much faster, as it involves no multiplies and
 | |
|        doesn't have to handle chroma vectors.*/
 | |
|     xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
 | |
|     ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
 | |
|     mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask);
 | |
|     mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask);
 | |
|     err=oc_enc_frag_satd2(_enc,&dc,_src,
 | |
|      _ref+mvoffset0,_ref+mvoffset1,_ystride);
 | |
|     err+=abs(dc);
 | |
|     if(err<_best_err){
 | |
|       _best_err=err;
 | |
|       best_site=site;
 | |
|     }
 | |
|   }
 | |
|   _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
 | |
|   _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
 | |
|   return _best_err;
 | |
| }
 | |
| 
 | |
| void oc_mcenc_refine4mv(oc_enc_ctx *_enc,int _mbi){
 | |
|   oc_mb_enc_info      *embs;
 | |
|   const ptrdiff_t     *frag_buf_offs;
 | |
|   const ptrdiff_t     *fragis;
 | |
|   const unsigned char *src;
 | |
|   const unsigned char *ref;
 | |
|   int                  offset_y[9];
 | |
|   int                  ystride;
 | |
|   int                  bi;
 | |
|   ystride=_enc->state.ref_ystride[0];
 | |
|   frag_buf_offs=_enc->state.frag_buf_offs;
 | |
|   fragis=_enc->state.mb_maps[_mbi][0];
 | |
|   src=_enc->state.ref_frame_data[OC_FRAME_IO];
 | |
|   ref=_enc->state.ref_frame_data[OC_FRAME_PREV];
 | |
|   offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
 | |
|   offset_y[3]=offset_y[5]=0;
 | |
|   offset_y[6]=offset_y[7]=offset_y[8]=ystride;
 | |
|   embs=_enc->mb_info;
 | |
|   for(bi=0;bi<4;bi++){
 | |
|     ptrdiff_t frag_offs;
 | |
|     int       vec[2];
 | |
|     frag_offs=frag_buf_offs[fragis[bi]];
 | |
|     vec[0]=OC_DIV2(OC_MV_X(embs[_mbi].block_mv[bi]));
 | |
|     vec[1]=OC_DIV2(OC_MV_Y(embs[_mbi].block_mv[bi]));
 | |
|     embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_halfpel_brefine(_enc,vec,
 | |
|      src+frag_offs,ref+frag_offs,ystride,offset_y,embs[_mbi].block_satd[bi]);
 | |
|     embs[_mbi].ref_mv[bi]=OC_MV(vec[0],vec[1]);
 | |
|   }
 | |
| }
 | 
