Linux-DVB-DVBT-TS
view release on metacpan or search on metacpan
clib/libmpeg2/motion_comp_mmx.c view on Meta::CPAN
3334353637383940414243444546474849505152535455565758#include "mmx.h"
#define CPU_MMXEXT 0
#define CPU_3DNOW 1
/* MMX code - needs a rewrite */
/*
* Motion Compensation frequently needs to average
values
using the
* formula (x+y+1)>>1. Both MMXEXT and 3Dnow include one instruction
* to compute this, but it's been left out of classic MMX.
*
* We need to be careful of overflows
when
doing this computation.
* Rather than unpacking data to 16-bits, which reduces parallelism,
*
* (x+y)>>1 == (x
&y
)+((x^y)>>1)
* (x+y+1)>>1 == (x|y)-((x^y)>>1)
*/
/* some rounding constants */
static mmx_t mask1 = {0xfefefefefefefefeLL};
static mmx_t round4 = {0x0002000200020002LL};
/*
clib/libmpeg2/motion_comp_vis.c view on Meta::CPAN
242526272829303132333435363738394041424344#ifdef ARCH_SPARC
#include <inttypes.h>
#include "mpeg2.h"
#include "attributes.h"
#include "mpeg2_internal.h"
#include "vis.h"
/* The trick used in some of this file is the formula from the MMX
* motion comp code, which is:
*
* (x+y+1)>>1 == (x|y)-((x^y)>>1)
*
* This allows us to average 8 bytes at a
time
in a 64-bit FPU reg.
* We avoid overflows by masking
before
we
do
the
shift
, and we
* implement the
shift
by multiplying by 1/2 using mul8x16. So in
* VIS this is (assume
'x'
is in f0,
'y'
is in f2, a repeating mask
* of
'0xfe'
is in f4, a repeating mask of
'0x7f'
is in f6, and
* the value 0x80808080 is in f8):
( run in 0.292 second using v1.01-cache-2.11-cpan-33209edd8b4 )