summaryrefslogtreecommitdiff
path: root/core/multimedia/opieplayer/libmad/fixed.h
Unidiff
Diffstat (limited to 'core/multimedia/opieplayer/libmad/fixed.h') (more/less context) (ignore whitespace changes)
-rw-r--r--core/multimedia/opieplayer/libmad/fixed.h111
1 files changed, 67 insertions, 44 deletions
diff --git a/core/multimedia/opieplayer/libmad/fixed.h b/core/multimedia/opieplayer/libmad/fixed.h
index c9b98ca..baa7dc5 100644
--- a/core/multimedia/opieplayer/libmad/fixed.h
+++ b/core/multimedia/opieplayer/libmad/fixed.h
@@ -1,6 +1,6 @@
1/* 1/*
2 * libmad - MPEG audio decoder library 2 * libmad - MPEG audio decoder library
3 * Copyright (C) 2000-2001 Robert Leslie 3 * Copyright (C) 2000-2004 Underbit Technologies, Inc.
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
@@ -208,6 +208,21 @@ mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y)
208 : "cc"); \ 208 : "cc"); \
209 __result; \ 209 __result; \
210 }) 210 })
211# elif defined(OPT_INTEL)
212/*
213 * Alternate Intel scaling that may or may not perform better.
214 */
215# define mad_f_scale64(hi, lo) \
216 ({ mad_fixed_t __result; \
217 asm ("shrl %3,%1\n\t" \
218 "shll %4,%2\n\t" \
219 "orl %2,%1" \
220 : "=rm" (__result) \
221 : "0" (lo), "r" (hi), \
222 "I" (MAD_F_SCALEBITS), "I" (32 - MAD_F_SCALEBITS) \
223 : "cc"); \
224 __result; \
225 })
211# else 226# else
212# define mad_f_scale64(hi, lo) \ 227# define mad_f_scale64(hi, lo) \
213 ({ mad_fixed_t __result; \ 228 ({ mad_fixed_t __result; \
@@ -232,12 +247,8 @@ mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y)
232 */ 247 */
233# if 1 248# if 1
234/* 249/*
235 * There's a bug somewhere, possibly in the compiler, that sometimes makes 250 * This is faster than the default implementation via MAD_F_MLX() and
236 * this necessary instead of the default implementation via MAD_F_MLX and 251 * mad_f_scale64().
237 * mad_f_scale64. It may be related to the use (or lack) of
238 * -finline-functions and/or -fstrength-reduce.
239 *
240 * This is also apparently faster than MAD_F_MLX/mad_f_scale64.
241 */ 252 */
242# define mad_f_mul(x, y) \ 253# define mad_f_mul(x, y) \
243 ({ mad_fixed64hi_t __hi; \ 254 ({ mad_fixed64hi_t __hi; \
@@ -275,7 +286,7 @@ mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y)
275 ({ mad_fixed_t __result; \ 286 ({ mad_fixed_t __result; \
276 asm ("movs%0, %1, lsr %3\n\t" \ 287 asm ("movs%0, %1, lsr %3\n\t" \
277 "adc%0, %0, %2, lsl %4" \ 288 "adc%0, %0, %2, lsl %4" \
278 : "=r" (__result) \ 289 : "=&r" (__result) \
279 : "r" (lo), "r" (hi), \ 290 : "r" (lo), "r" (hi), \
280 "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS) \ 291 "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS) \
281 : "cc"); \ 292 : "cc"); \
@@ -343,58 +354,69 @@ mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y)
343# elif defined(FPM_PPC) 354# elif defined(FPM_PPC)
344 355
345/* 356/*
346 * This PowerPC version is tuned for the 4xx embedded processors. It is 357 * This PowerPC version is fast and accurate; the disposition of the least
347 * effectively a tuned version of FPM_64BIT. It is a little faster and just 358 * significant bit depends on OPT_ACCURACY via mad_f_scale64().
348 * as accurate. The disposition of the least significant bit depends on
349 * OPT_ACCURACY via mad_f_scale64().
350 */ 359 */
351# define MAD_F_MLX(hi, lo, x, y) \ 360# define MAD_F_MLX(hi, lo, x, y) \
352 asm ("mulhw %1, %2, %3\n\t" \ 361 do { \
353 "mullw %0, %2, %3" \ 362 asm ("mullw %0,%1,%2" \
354 : "=&r" (lo), "=&r" (hi) \ 363 : "=r" (lo) \
355 : "%r" (x), "r" (y)) 364 : "%r" (x), "r" (y)); \
365 asm ("mulhw %0,%1,%2" \
366 : "=r" (hi) \
367 : "%r" (x), "r" (y)); \
368 } \
369 while (0)
356 370
357# define MAD_F_MLA(hi, lo, x, y) \ 371# if defined(OPT_ACCURACY)
372/*
373 * This gives best accuracy but is not very fast.
374 */
375# define MAD_F_MLA(hi, lo, x, y) \
358 ({ mad_fixed64hi_t __hi; \ 376 ({ mad_fixed64hi_t __hi; \
359 mad_fixed64lo_t __lo; \ 377 mad_fixed64lo_t __lo; \
360 MAD_F_MLX(__hi, __lo, (x), (y)); \ 378 MAD_F_MLX(__hi, __lo, (x), (y)); \
361 asm ("addc %0, %2, %3\n\t" \ 379 asm ("addc %0,%2,%3\n\t" \
362 "adde %1, %4, %5" \ 380 "adde %1,%4,%5" \
363 : "=r" (lo), "=r" (hi) \ 381 : "=r" (lo), "=r" (hi) \
364 : "%r" (__lo), "0" (lo), "%r" (__hi), "1" (hi)); \ 382 : "%r" (lo), "r" (__lo), \
383 "%r" (hi), "r" (__hi) \
384 : "xer"); \
365 }) 385 })
386# endif
366 387
367# if defined(OPT_ACCURACY) 388# if defined(OPT_ACCURACY)
368/* 389/*
369 * This is accurate and ~2 - 2.5 times slower than the unrounded version. 390 * This is slower than the truncating version below it.
370 *
371 * The __volatile__ improves the generated code by another 5% (fewer spills
372 * to memory); eventually they should be removed.
373 */ 391 */
374# define mad_f_scale64(hi, lo) \ 392# define mad_f_scale64(hi, lo) \
375 ({ mad_fixed_t __result; \ 393 ({ mad_fixed_t __result, __round; \
376 mad_fixed64hi_t __hi_; \ 394 asm ("rotrwi %0,%1,%2" \
377 mad_fixed64lo_t __lo_; \ 395 : "=r" (__result) \
378 asm __volatile__ ("addc %0, %2, %4\n\t" \ 396 : "r" (lo), "i" (MAD_F_SCALEBITS)); \
379 "addze %1, %3" \ 397 asm ("extrwi %0,%1,1,0" \
380 : "=r" (__lo_), "=r" (__hi_) \ 398 : "=r" (__round) \
381 : "r" (lo), "r" (hi), "r" (1 << (MAD_F_SCALEBITS - 1))); \ 399 : "r" (__result)); \
382 asm __volatile__ ("rlwinm %0, %2,32-%3,0,%3-1\n\t" \ 400 asm ("insrwi %0,%1,%2,0" \
383 "rlwimi %0, %1,32-%3,%3,31" \ 401 : "+r" (__result) \
384 : "=&r" (__result) \ 402 : "r" (hi), "i" (MAD_F_SCALEBITS)); \
385 : "r" (__lo_), "r" (__hi_), "I" (MAD_F_SCALEBITS)); \ 403 asm ("add %0,%1,%2" \
386 __result; \ 404 : "=r" (__result) \
405 : "%r" (__result), "r" (__round)); \
406 __result; \
387 }) 407 })
388# else 408# else
389# define mad_f_scale64(hi, lo) \ 409# define mad_f_scale64(hi, lo) \
390 ({ mad_fixed_t __result; \ 410 ({ mad_fixed_t __result; \
391 asm ("rlwinm %0, %2,32-%3,0,%3-1\n\t" \ 411 asm ("rotrwi %0,%1,%2" \
392 "rlwimi %0, %1,32-%3,%3,31" \
393 : "=r" (__result) \ 412 : "=r" (__result) \
394 : "r" (lo), "r" (hi), "I" (MAD_F_SCALEBITS)); \ 413 : "r" (lo), "i" (MAD_F_SCALEBITS)); \
395 __result; \ 414 asm ("insrwi %0,%1,%2,0" \
415 : "+r" (__result) \
416 : "r" (hi), "i" (MAD_F_SCALEBITS)); \
417 __result; \
396 }) 418 })
397# endif /* OPT_ACCURACY */ 419# endif
398 420
399# define MAD_F_SCALEBITS MAD_F_FRACBITS 421# define MAD_F_SCALEBITS MAD_F_FRACBITS
400 422
@@ -428,8 +450,8 @@ mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y)
428 450
429# if !defined(mad_f_mul) 451# if !defined(mad_f_mul)
430# define mad_f_mul(x, y) \ 452# define mad_f_mul(x, y) \
431 ({ mad_fixed64hi_t __hi; \ 453 ({ register mad_fixed64hi_t __hi; \
432 mad_fixed64lo_t __lo; \ 454 register mad_fixed64lo_t __lo; \
433 MAD_F_MLX(__hi, __lo, (x), (y)); \ 455 MAD_F_MLX(__hi, __lo, (x), (y)); \
434 mad_f_scale64(__hi, __lo); \ 456 mad_f_scale64(__hi, __lo); \
435 }) 457 })
@@ -469,8 +491,9 @@ mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y)
469# define MAD_F_SCALEBITS MAD_F_FRACBITS 491# define MAD_F_SCALEBITS MAD_F_FRACBITS
470# endif 492# endif
471 493
472/* miscellaneous C routines */ 494/* C routines */
473 495
474mad_fixed_t mad_f_abs(mad_fixed_t); 496mad_fixed_t mad_f_abs(mad_fixed_t);
497mad_fixed_t mad_f_div(mad_fixed_t, mad_fixed_t);
475 498
476# endif 499# endif