1 files changed, 67 insertions, 44 deletions
diff --git a/core/multimedia/opieplayer/libmad/fixed.h b/core/multimedia/opieplayer/libmad/fixed.h
index c9b98ca..baa7dc5 100644
--- a/core/multimedia/opieplayer/libmad/fixed.h
+++ b/core/multimedia/opieplayer/libmad/fixed.h
@@ -1,19 +1,19 @@
 /*
  * libmad - MPEG audio decoder library
- * Copyright (C) 2000-2001 Robert Leslie
+ * Copyright (C) 2000-2004 Underbit Technologies, Inc.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  *
  * $Id$
@@ -195,62 +195,73 @@ mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y)
 #    define mad_f_scale64(hi, lo)  \
     ({ mad_fixed64hi_t __hi_;  \
        mad_fixed64lo_t __lo_;  \
        mad_fixed_t __result;  \
        asm ("addl %4,%2\n\t"  \
 	    "adcl %5,%3"  \
 	    : "=rm" (__lo_), "=rm" (__hi_)  \
 	    : "0" (lo), "1" (hi),  \
 	      "ir" (1L << (MAD_F_SCALEBITS - 1)), "ir" (0)  \
 	    : "cc");  \
        asm ("shrdl %3,%2,%1"  \
 	    : "=rm" (__result)  \
 	    : "0" (__lo_), "r" (__hi_), "I" (MAD_F_SCALEBITS)  \
 	    : "cc");  \
        __result;  \
     })
+#   elif defined(OPT_INTEL)
+/*
+ * Alternate Intel scaling that may or may not perform better.
+ */
+#    define mad_f_scale64(hi, lo)  \
+    ({ mad_fixed_t __result;  \
+       asm ("shrl %3,%1\n\t"  \
+	    "shll %4,%2\n\t"  \
+	    "orl %2,%1"  \
+	    : "=rm" (__result)  \
+	    : "0" (lo), "r" (hi),  \
+	      "I" (MAD_F_SCALEBITS), "I" (32 - MAD_F_SCALEBITS)  \
+	    : "cc");  \
+       __result;  \
+    })
 #   else
 #    define mad_f_scale64(hi, lo)  \
     ({ mad_fixed_t __result;  \
        asm ("shrdl %3,%2,%1"  \
 	    : "=rm" (__result)  \
 	    : "0" (lo), "r" (hi), "I" (MAD_F_SCALEBITS)  \
 	    : "cc");  \
        __result;  \
     })
 #   endif  /* OPT_ACCURACY */
 
 #   define MAD_F_SCALEBITS  MAD_F_FRACBITS
 #  endif
 
 /* --- ARM ----------------------------------------------------------------- */
 
 # elif defined(FPM_ARM)
 
 /* 
  * This ARM V4 version is as accurate as FPM_64BIT but much faster. The
  * least significant bit is properly rounded at no CPU cycle cost!
  */
 # if 1
 /*
- * There's a bug somewhere, possibly in the compiler, that sometimes makes
- * this necessary instead of the default implementation via MAD_F_MLX and
- * mad_f_scale64. It may be related to the use (or lack) of
- * -finline-functions and/or -fstrength-reduce.
- *
- * This is also apparently faster than MAD_F_MLX/mad_f_scale64.
+ * This is faster than the default implementation via MAD_F_MLX() and
+ * mad_f_scale64().
  */
 #  define mad_f_mul(x, y)  \
     ({ mad_fixed64hi_t __hi;  \
        mad_fixed64lo_t __lo;  \
        mad_fixed_t __result;  \
        asm ("smull	%0, %1, %3, %4\n\t"  \
 	    "movs	%0, %0, lsr %5\n\t"  \
 	    "adc	%2, %0, %1, lsl %6"  \
 	    : "=&r" (__lo), "=&r" (__hi), "=r" (__result)  \
 	    : "%r" (x), "r" (y),  \
 	      "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS)  \
 	    : "cc");  \
        __result;  \
     })
 # endif
 
@@ -262,33 +273,33 @@ mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y)
 #  define MAD_F_MLA(hi, lo, x, y)  \
     asm ("smlal	%0, %1, %2, %3"  \
 	 : "+r" (lo), "+r" (hi)  \
 	 : "%r" (x), "r" (y))
 
 #  define MAD_F_MLN(hi, lo)  \
     asm ("rsbs	%0, %2, #0\n\t"  \
 	 "rsc	%1, %3, #0"  \
 	 : "=r" (lo), "=r" (hi)  \
 	 : "0" (lo), "1" (hi)  \
 	 : "cc")
 
 #  define mad_f_scale64(hi, lo)  \
     ({ mad_fixed_t __result;  \
        asm ("movs	%0, %1, lsr %3\n\t"  \
 	    "adc	%0, %0, %2, lsl %4"  \
-	    : "=r" (__result)  \
+	    : "=&r" (__result)  \
 	    : "r" (lo), "r" (hi),  \
 	      "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS)  \
 	    : "cc");  \
        __result;  \
     })
 
 #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
 
 /* --- MIPS ---------------------------------------------------------------- */
 
 # elif defined(FPM_MIPS)
 
 /*
  * This MIPS version is fast and accurate; the disposition of the least
  * significant bit depends on OPT_ACCURACY via mad_f_scale64().
  */
@@ -330,84 +341,95 @@ mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y)
 
 /*
  * This SPARC V8 version is fast and accurate; the disposition of the least
  * significant bit depends on OPT_ACCURACY via mad_f_scale64().
  */
 #  define MAD_F_MLX(hi, lo, x, y)  \
     asm ("smul %2, %3, %0\n\t"  \
 	 "rd %%y, %1"  \
 	 : "=r" (lo), "=r" (hi)  \
 	 : "%r" (x), "rI" (y))
 
 /* --- PowerPC ------------------------------------------------------------- */
 
 # elif defined(FPM_PPC)
 
 /*
- * This PowerPC version is tuned for the 4xx embedded processors. It is
- * effectively a tuned version of FPM_64BIT. It is a little faster and just
- * as accurate. The disposition of the least significant bit depends on
- * OPT_ACCURACY via mad_f_scale64().
+ * This PowerPC version is fast and accurate; the disposition of the least
+ * significant bit depends on OPT_ACCURACY via mad_f_scale64().
  */
 #  define MAD_F_MLX(hi, lo, x, y)  \
-    asm ("mulhw %1, %2, %3\n\t"  \
-	 "mullw %0, %2, %3"  \
-	 : "=&r" (lo), "=&r" (hi)  \
-	 : "%r" (x), "r" (y))
+    do {  \
+      asm ("mullw %0,%1,%2"  \
+	   : "=r" (lo)  \
+	   : "%r" (x), "r" (y));  \
+      asm ("mulhw %0,%1,%2"  \
+	   : "=r" (hi)  \
+	   : "%r" (x), "r" (y));  \
+    }  \
+    while (0)
 
-#  define MAD_F_MLA(hi, lo, x, y)  \
+#  if defined(OPT_ACCURACY)
+/*
+ * This gives best accuracy but is not very fast.
+ */
+#   define MAD_F_MLA(hi, lo, x, y)  \
     ({ mad_fixed64hi_t __hi;  \
        mad_fixed64lo_t __lo;  \
        MAD_F_MLX(__hi, __lo, (x), (y));  \
-       asm ("addc %0, %2, %3\n\t"  \
-	    "adde %1, %4, %5"  \
+       asm ("addc %0,%2,%3\n\t"  \
+	    "adde %1,%4,%5"  \
 	    : "=r" (lo), "=r" (hi)  \
-	    : "%r" (__lo), "0" (lo), "%r" (__hi), "1" (hi));  \
+	    : "%r" (lo), "r" (__lo),  \
+	      "%r" (hi), "r" (__hi)  \
+	    : "xer");  \
     })
+#  endif
 
 #  if defined(OPT_ACCURACY)
 /*
- * This is accurate and ~2 - 2.5 times slower than the unrounded version.
- *
- * The __volatile__ improves the generated code by another 5% (fewer spills
- * to memory); eventually they should be removed.
+ * This is slower than the truncating version below it.
  */
 #   define mad_f_scale64(hi, lo)  \
-    ({ mad_fixed_t __result;  \
-       mad_fixed64hi_t __hi_;  \
-       mad_fixed64lo_t __lo_;  \
-       asm __volatile__ ("addc %0, %2, %4\n\t"  \
-			 "addze %1, %3"  \
-	    : "=r" (__lo_), "=r" (__hi_)  \
-	    : "r" (lo), "r" (hi), "r" (1 << (MAD_F_SCALEBITS - 1)));  \
-       asm __volatile__ ("rlwinm %0, %2,32-%3,0,%3-1\n\t"  \
-			 "rlwimi %0, %1,32-%3,%3,31"  \
-	    : "=&r" (__result)  \
-	    : "r" (__lo_), "r" (__hi_), "I" (MAD_F_SCALEBITS));  \
-	    __result;  \
+    ({ mad_fixed_t __result, __round;  \
+       asm ("rotrwi %0,%1,%2"  \
+	    : "=r" (__result)  \
+	    : "r" (lo), "i" (MAD_F_SCALEBITS));  \
+       asm ("extrwi %0,%1,1,0"  \
+	    : "=r" (__round)  \
+	    : "r" (__result));  \
+       asm ("insrwi %0,%1,%2,0"  \
+	    : "+r" (__result)  \
+	    : "r" (hi), "i" (MAD_F_SCALEBITS));  \
+       asm ("add %0,%1,%2"  \
+	    : "=r" (__result)  \
+	    : "%r" (__result), "r" (__round));  \
+       __result;  \
     })
 #  else
 #   define mad_f_scale64(hi, lo)  \
     ({ mad_fixed_t __result;  \
-       asm ("rlwinm %0, %2,32-%3,0,%3-1\n\t"  \
-	    "rlwimi %0, %1,32-%3,%3,31"  \
+       asm ("rotrwi %0,%1,%2"  \
 	    : "=r" (__result)  \
-	    : "r" (lo), "r" (hi), "I" (MAD_F_SCALEBITS));  \
-	    __result;  \
+	    : "r" (lo), "i" (MAD_F_SCALEBITS));  \
+       asm ("insrwi %0,%1,%2,0"  \
+	    : "+r" (__result)  \
+	    : "r" (hi), "i" (MAD_F_SCALEBITS));  \
+       __result;  \
     })
-#  endif  /* OPT_ACCURACY */
+#  endif
 
 #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
 
 /* --- Default ------------------------------------------------------------- */
 
 # elif defined(FPM_DEFAULT)
 
 /*
  * This version is the most portable but it loses significant accuracy.
  * Furthermore, accuracy is biased against the second argument, so care
  * should be taken when ordering operands.
  *
  * The scale factors are constant as this is not used with SSO.
  *
  * Pre-rounding is required to stay within the limits of compliance.
  */
@@ -415,34 +437,34 @@ mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y)
 #   define mad_f_mul(x, y)	(((x) >> 12) * ((y) >> 16))
 #  else
 #   define mad_f_mul(x, y)	((((x) + (1L << 11)) >> 12) *  \
 				 (((y) + (1L << 15)) >> 16))
 #  endif
 
 /* ------------------------------------------------------------------------- */
 
 # else
 #  error "no FPM selected"
 # endif
 
 /* default implementations */
 
 # if !defined(mad_f_mul)
 #  define mad_f_mul(x, y)  \
-    ({ mad_fixed64hi_t __hi;  \
-       mad_fixed64lo_t __lo;  \
+    ({ register mad_fixed64hi_t __hi;  \
+       register mad_fixed64lo_t __lo;  \
        MAD_F_MLX(__hi, __lo, (x), (y));  \
        mad_f_scale64(__hi, __lo);  \
     })
 # endif
 
 # if !defined(MAD_F_MLA)
 #  define MAD_F_ML0(hi, lo, x, y)	((lo)  = mad_f_mul((x), (y)))
 #  define MAD_F_MLA(hi, lo, x, y)	((lo) += mad_f_mul((x), (y)))
 #  define MAD_F_MLN(hi, lo)		((lo)  = -(lo))
 #  define MAD_F_MLZ(hi, lo)		((void) (hi), (mad_fixed_t) (lo))
 # endif
 
 # if !defined(MAD_F_ML0)
 #  define MAD_F_ML0(hi, lo, x, y)	MAD_F_MLX((hi), (lo), (x), (y))
 # endif
 
@@ -456,21 +478,22 @@ mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y)
 
 # if !defined(mad_f_scale64)
 #  if defined(OPT_ACCURACY)
 #   define mad_f_scale64(hi, lo)  \
     ((((mad_fixed_t)  \
        (((hi) << (32 - (MAD_F_SCALEBITS - 1))) |  \
 	((lo) >> (MAD_F_SCALEBITS - 1)))) + 1) >> 1)
 #  else
 #   define mad_f_scale64(hi, lo)  \
     ((mad_fixed_t)  \
      (((hi) << (32 - MAD_F_SCALEBITS)) |  \
       ((lo) >> MAD_F_SCALEBITS)))
 #  endif
 #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
 # endif
 
-/* miscellaneous C routines */
+/* C routines */
 
 mad_fixed_t mad_f_abs(mad_fixed_t);
+mad_fixed_t mad_f_div(mad_fixed_t, mad_fixed_t);
 
 # endif