139 #define AANSCALE_BITS 12
142 #define NB_ITS_SPEED 50000
147 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
148 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
149 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
150 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
151 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
152 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
153 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
154 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
164 for (i = 0; i < 64; i++) {
165 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
176 memset(block, 0, 64 *
sizeof(*block));
180 for (i = 0; i < 64; i++)
184 for (i = 0; i < 64; i++)
190 for (i = 0; i < j; i++)
195 block[63] = (block[0] & 1) ^ 1;
205 for (i = 0; i < 64; i++)
208 for (i = 0; i < 64; i++)
211 for (i = 0; i < 64; i++)
214 for (i = 0; i < 64; i++)
215 dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
217 for (i = 0; i < 64; i++)
227 int64_t err2, ti, ti1, it1, err_sum = 0;
228 int64_t sysErr[64], sysErrMax = 0;
230 int blockSumErrMax = 0, blockSumErr;
239 for (i = 0; i < 64; i++)
241 for (it = 0; it <
NB_ITS; it++) {
249 for (i = 0; i < 64; i++) {
258 for (i = 0; i < 64; i++) {
265 sysErr[i] +=
block[i] - block1[i];
267 if (abs(
block[i]) > maxout)
268 maxout = abs(
block[i]);
270 if (blockSumErrMax < blockSumErr)
271 blockSumErrMax = blockSumErr;
273 for (i = 0; i < 64; i++)
274 sysErrMax =
FFMAX(sysErrMax,
FFABS(sysErr[i]));
276 for (i = 0; i < 64; i++) {
279 printf(
"%7d ", (
int) sysErr[i]);
283 omse = (double) err2 / NB_ITS / 64;
284 ome = (double) err_sum / NB_ITS / 64;
286 spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
288 printf(
"%s %s: ppe=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
289 is_idct ?
"IDCT" :
"DCT", dct->
name, err_inf,
290 omse, ome, (
double) sysErrMax / NB_ITS,
291 maxout, blockSumErrMax);
312 }
while (ti1 < 1000000);
315 printf(
"%s %s: %0.1f kdct/s\n", is_idct ?
"IDCT" :
"DCT", dct->
name,
316 (
double) it1 * 1000.0 / (
double) ti1);
327 static double c8[8][8];
328 static double c4[4][4];
329 double block1[64], block2[64], block3[64];
336 for (i = 0; i < 8; i++) {
338 for (j = 0; j < 8; j++) {
339 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
340 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
341 sum += c8[i][j] * c8[i][j];
345 for (i = 0; i < 4; i++) {
347 for (j = 0; j < 4; j++) {
348 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
349 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
350 sum += c4[i][j] * c4[i][j];
357 for (i = 0; i < 4; i++) {
358 for (j = 0; j < 8; j++) {
359 block1[8 * (2 * i) + j] =
360 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
361 block1[8 * (2 * i + 1) + j] =
362 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
367 for (i = 0; i < 8; i++) {
368 for (j = 0; j < 8; j++) {
370 for (k = 0; k < 8; k++)
371 sum += c8[k][j] * block1[8 * i + k];
372 block2[8 * i + j] = sum;
377 for (i = 0; i < 8; i++) {
378 for (j = 0; j < 4; j++) {
381 for (k = 0; k < 4; k++)
382 sum += c4[k][j] * block2[8 * (2 * k) + i];
383 block3[8 * (2 * j) + i] = sum;
387 for (k = 0; k < 4; k++)
388 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
389 block3[8 * (2 * j + 1) + i] = sum;
394 for (i = 0; i < 8; i++) {
395 for (j = 0; j < 8; j++) {
396 v = block3[8 * i + j];
398 else if (v > 255) v = 255;
399 dest[i * linesize + j] = (int)
rint(v);
405 void (*idct248_put)(
uint8_t *dest,
int line_size,
409 int it, i, it1, ti, ti1, err_max, v;
417 for (it = 0; it <
NB_ITS; it++) {
419 for (i = 0; i < 64; i++)
423 for (i = 0; i < 64; i++)
427 for (i = 0; i < 64; i++)
431 for (i = 0; i < 64; i++) {
439 printf(
"%s %s: err_inf=%d\n", 1 ?
"IDCT248" :
"DCT248", name, err_max);
448 for (i = 0; i < 64; i++)
454 }
while (ti1 < 1000000);
457 printf(
"%s %s: %0.1f kdct/s\n", 1 ?
"IDCT248" :
"DCT248", name,
458 (
double) it1 * 1000.0 / (
double) ti1);
463 printf(
"dct-test [-i] [<test-number>]\n"
464 "test-number 0 -> test with random matrixes\n"
465 " 1 -> test with random sparse matrixes\n"
466 " 2 -> do 3. test from mpeg4 std\n"
467 "-i test IDCT implementations\n"
468 "-4 test IDCT248 implementations\n"
476 int main(
int argc,
char **argv)
478 int test_idct = 0, test_248_dct = 0;
490 c =
getopt(argc, argv,
"ih4t");
511 test = atoi(argv[
optind]);
513 printf(
"Libav DCT/IDCT test\n");
518 const struct algo *algos = test_idct ? idct_tab :
fdct_tab;
519 for (i = 0; algos[i].
name; i++)
521 err |=
dct_error(&algos[i], test, test_idct, speed);
static double rint(double x)
void ff_fdct_altivec(DCTELEM *block)
static const struct algo idct_tab[]
static uint8_t img_dest[64]
#define AV_CPU_FLAG_ALTIVEC
void ff_simple_idct_8(DCTELEM *block)
void ff_idct_xvid_sse2(short *block)
int main(int argc, char **argv)
#define AV_CPU_FLAG_MMXEXT
void ff_j_rev_dct(DCTELEM *data)
av_cold void ff_ref_dct_init(void)
Initialize the double precision discrete cosine transform functions fdct & idct.
void ff_fdct_mmx(DCTELEM *block)
const uint16_t ff_aanscales[64]
static int init(AVCodecParserContext *s)
#define AV_CPU_FLAG_ARMV5TE
void ff_fdct_sse2(DCTELEM *block)
void ff_simple_idct_mmx(int16_t *block)
void ff_simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block)
void ff_faanidct(DCTELEM block[64])
static const struct algo fdct_tab[]
static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
static short idct_simple_mmx_perm[64]
void ff_simple_idct_arm(DCTELEM *data)
void ff_simple_idct_axp(DCTELEM *data)
void ff_fdct_ifast(DCTELEM *data)
static DCTELEM block1[64]
static void init_block(DCTELEM block[64], int test, int is_idct, AVLFG *prng)
void ff_simple_idct_armv5te(DCTELEM *data)
static short idct_mmx_perm[64]
void ff_j_rev_dct_arm(DCTELEM *data)
int64_t av_gettime(void)
Get the current time in microseconds.
static void idct_mmx_init(void)
void ff_bfin_idct(DCTELEM *block)
void ff_faandct(DCTELEM *data)
static void test(const char *base, const char *rel)
static void(WINAPI *cond_broadcast)(pthread_cond_t *cond)
static const uint8_t idct_sse2_row_perm[8]
static int getopt(int argc, char *argv[], char *opts)
static unsigned int av_lfg_get(AVLFG *c)
Get the next random unsigned 32-bit number using an ALFG.
void ff_simple_idct_armv6(DCTELEM *data)
void ff_ref_fdct(short *block)
Transform 8x8 block of data with a double precision forward DCT This is a reference implementation...
av_cold void av_lfg_init(AVLFG *c, unsigned int seed)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
void ff_bfin_fdct(DCTELEM *block)
static const uint16_t scale[4]
#define DECLARE_ALIGNED(n, t, v)
AAN (Arai Agui Nakajima) (I)DCT tables.
header for Xvid IDCT functions
void ff_simple_idct_neon(DCTELEM *data)
static uint8_t img_dest1[64]
common internal and external API header
void ff_jpeg_fdct_islow_8(DCTELEM *data)
void(* func)(DCTELEM *block)
static void idct248_error(const char *name, void(*idct248_put)(uint8_t *dest, int line_size, int16_t *block), int speed)
void ff_ref_idct(short *block)
Transform 8x8 block of data with a double precision inverse DCT This is a reference implementation...
#define AV_CPU_FLAG_ARMV6
static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
void ff_fdct_mmxext(DCTELEM *block)
static void permute(DCTELEM dst[64], const DCTELEM src[64], int perm)
enum algo::formattag format
void ff_idct_xvid_mmx(short *block)
void ff_idct_xvid_mmxext(short *block)