1 files changed, 67 insertions, 44 deletions
diff --git a/core/multimedia/opieplayer/libmad/fixed.h b/core/multimedia/opieplayer/libmad/fixed.h
index c9b98ca..baa7dc5 100644
--- a/core/multimedia/opieplayer/libmad/fixed.h
+++ b/core/multimedia/opieplayer/libmad/fixed.h
@@ -1,27 +1,27 @@
 /*
 * libmad - MPEG audio decoder library
- * Copyright (C) 2000-2001 Robert Leslie
+ * Copyright (C) 2000-2004 Underbit Technologies, Inc.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * $Id$
 */
 # ifndef LIBMAD_FIXED_H
 # define LIBMAD_FIXED_H
 # if SIZEOF_INT >= 4
 typedef   signed int mad_fixed_t;
@@ -187,116 +187,127 @@ mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y)
            : "cc");  \
    })
 #   endif  /* OPT_ACCURACY */
 #   if defined(OPT_ACCURACY)
 /*
 * Surprisingly, this is faster than SHRD followed by ADC.
 */
 #    define mad_f_scale64(hi, lo)  \
    ({ mad_fixed64hi_t __hi_;  \
       mad_fixed64lo_t __lo_;  \
       mad_fixed_t __result;  \
       asm ("addl %4,%2\n\t"  \
            "adcl %5,%3"  \
            : "=rm" (__lo_), "=rm" (__hi_)  \
            : "0" (lo), "1" (hi),  \
              "ir" (1L << (MAD_F_SCALEBITS - 1)), "ir" (0)  \
            : "cc");  \
       asm ("shrdl %3,%2,%1"  \
            : "=rm" (__result)  \
            : "0" (__lo_), "r" (__hi_), "I" (MAD_F_SCALEBITS)  \
            : "cc");  \
       __result;  \
    })
+#   elif defined(OPT_INTEL)
+/*
+ * Alternate Intel scaling that may or may not perform better.
+ */
+#    define mad_f_scale64(hi, lo)  \
+    ({ mad_fixed_t __result;  \
+       asm ("shrl %3,%1\n\t"  \
+            "shll %4,%2\n\t"  \
+            "orl %2,%1"  \
+            : "=rm" (__result)  \
+            : "0" (lo), "r" (hi),  \
+              "I" (MAD_F_SCALEBITS), "I" (32 - MAD_F_SCALEBITS)  \
+            : "cc");  \
+       __result;  \
+    })
 #   else
 #    define mad_f_scale64(hi, lo)  \
    ({ mad_fixed_t __result;  \
       asm ("shrdl %3,%2,%1"  \
            : "=rm" (__result)  \
            : "0" (lo), "r" (hi), "I" (MAD_F_SCALEBITS)  \
            : "cc");  \
       __result;  \
    })
 #   endif  /* OPT_ACCURACY */
 #   define MAD_F_SCALEBITS  MAD_F_FRACBITS
 #  endif
 /* --- ARM ----------------------------------------------------------------- */
 # elif defined(FPM_ARM)
 /* 
 * This ARM V4 version is as accurate as FPM_64BIT but much faster. The
 * least significant bit is properly rounded at no CPU cycle cost!
 */
 # if 1
 /*
- * There's a bug somewhere, possibly in the compiler, that sometimes makes
+ * This is faster than the default implementation via MAD_F_MLX() and
- * this necessary instead of the default implementation via MAD_F_MLX and
+ * mad_f_scale64().
- * mad_f_scale64. It may be related to the use (or lack) of
- * -finline-functions and/or -fstrength-reduce.
- *
- * This is also apparently faster than MAD_F_MLX/mad_f_scale64.
 */
 #  define mad_f_mul(x, y)  \
    ({ mad_fixed64hi_t __hi;  \
       mad_fixed64lo_t __lo;  \
       mad_fixed_t __result;  \
               asm ("smull%0, %1, %3, %4\n\t"  \
                    "movs%0, %0, lsr %5\n\t"  \
                    "adc%2, %0, %1, lsl %6"  \
            : "=&r" (__lo), "=&r" (__hi), "=r" (__result)  \
            : "%r" (x), "r" (y),  \
              "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS)  \
            : "cc");  \
       __result;  \
    })
 # endif
 #  define MAD_F_MLX(hi, lo, x, y)  \
            asm ("smull%0, %1, %2, %3"  \
         : "=&r" (lo), "=&r" (hi)  \
         : "%r" (x), "r" (y))
 #  define MAD_F_MLA(hi, lo, x, y)  \
            asm ("smlal%0, %1, %2, %3"  \
         : "+r" (lo), "+r" (hi)  \
         : "%r" (x), "r" (y))
 #  define MAD_F_MLN(hi, lo)  \
            asm ("rsbs%0, %2, #0\n\t"  \
                 "rsc%1, %3, #0"  \
         : "=r" (lo), "=r" (hi)  \
         : "0" (lo), "1" (hi)  \
         : "cc")
 #  define mad_f_scale64(hi, lo)  \
    ({ mad_fixed_t __result;  \
               asm ("movs%0, %1, lsr %3\n\t"  \
                    "adc%0, %0, %2, lsl %4"  \
-            : "=r" (__result)  \
+            : "=&r" (__result)  \
            : "r" (lo), "r" (hi),  \
              "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS)  \
            : "cc");  \
       __result;  \
    })
 #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
 /* --- MIPS ---------------------------------------------------------------- */
 # elif defined(FPM_MIPS)
 /*
 * This MIPS version is fast and accurate; the disposition of the least
 * significant bit depends on OPT_ACCURACY via mad_f_scale64().
 */
 #  define MAD_F_MLX(hi, lo, x, y)  \
            asm ("mult%2,%3"  \
         : "=l" (lo), "=h" (hi)  \
         : "%r" (x), "r" (y))
 # if defined(HAVE_MADD_ASM)
 #  define MAD_F_MLA(hi, lo, x, y)  \
            asm ("madd%2,%3"  \
@@ -322,155 +333,167 @@ mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y)
 #  define mad_f_scale64(hi, lo)  \
    ((mad_fixed_t) ((hi) << (32 - MAD_F_SCALEBITS)))
 #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
 # endif
 /* --- SPARC --------------------------------------------------------------- */
 # elif defined(FPM_SPARC)
 /*
 * This SPARC V8 version is fast and accurate; the disposition of the least
 * significant bit depends on OPT_ACCURACY via mad_f_scale64().
 */
 #  define MAD_F_MLX(hi, lo, x, y)  \
    asm ("smul %2, %3, %0\n\t"  \
         "rd %%y, %1"  \
         : "=r" (lo), "=r" (hi)  \
         : "%r" (x), "rI" (y))
 /* --- PowerPC ------------------------------------------------------------- */
 # elif defined(FPM_PPC)
 /*
- * This PowerPC version is tuned for the 4xx embedded processors. It is
+ * This PowerPC version is fast and accurate; the disposition of the least
- * effectively a tuned version of FPM_64BIT. It is a little faster and just
+ * significant bit depends on OPT_ACCURACY via mad_f_scale64().
- * as accurate. The disposition of the least significant bit depends on
- * OPT_ACCURACY via mad_f_scale64().
 */
 #  define MAD_F_MLX(hi, lo, x, y)  \
-    asm ("mulhw %1, %2, %3\n\t"  \
+    do {  \
-         "mullw %0, %2, %3"  \
+      asm ("mullw %0,%1,%2"  \
-         : "=&r" (lo), "=&r" (hi)  \
+           : "=r" (lo)  \
-         : "%r" (x), "r" (y))
+           : "%r" (x), "r" (y));  \
+      asm ("mulhw %0,%1,%2"  \
+           : "=r" (hi)  \
+           : "%r" (x), "r" (y));  \
+    }  \
+    while (0)
-#  define MAD_F_MLA(hi, lo, x, y)  \
+#  if defined(OPT_ACCURACY)
+/*
+ * This gives best accuracy but is not very fast.
+ */
+#   define MAD_F_MLA(hi, lo, x, y)  \
    ({ mad_fixed64hi_t __hi;  \
       mad_fixed64lo_t __lo;  \
       MAD_F_MLX(__hi, __lo, (x), (y));  \
-       asm ("addc %0, %2, %3\n\t"  \
+       asm ("addc %0,%2,%3\n\t"  \
-            "adde %1, %4, %5"  \
+            "adde %1,%4,%5"  \
            : "=r" (lo), "=r" (hi)  \
-            : "%r" (__lo), "0" (lo), "%r" (__hi), "1" (hi));  \
+            : "%r" (lo), "r" (__lo),  \
+              "%r" (hi), "r" (__hi)  \
+            : "xer");  \
    })
+#  endif
 #  if defined(OPT_ACCURACY)
 /*
- * This is accurate and ~2 - 2.5 times slower than the unrounded version.
+ * This is slower than the truncating version below it.
- *
- * The __volatile__ improves the generated code by another 5% (fewer spills
- * to memory); eventually they should be removed.
 */
 #   define mad_f_scale64(hi, lo)  \
-    ({ mad_fixed_t __result;  \
+    ({ mad_fixed_t __result, __round;  \
-       mad_fixed64hi_t __hi_;  \
+       asm ("rotrwi %0,%1,%2"  \
-       mad_fixed64lo_t __lo_;  \
+            : "=r" (__result)  \
-       asm __volatile__ ("addc %0, %2, %4\n\t"  \
+            : "r" (lo), "i" (MAD_F_SCALEBITS));  \
-                         "addze %1, %3"  \
+       asm ("extrwi %0,%1,1,0"  \
-            : "=r" (__lo_), "=r" (__hi_)  \
+            : "=r" (__round)  \
-            : "r" (lo), "r" (hi), "r" (1 << (MAD_F_SCALEBITS - 1)));  \
+            : "r" (__result));  \
-       asm __volatile__ ("rlwinm %0, %2,32-%3,0,%3-1\n\t"  \
+       asm ("insrwi %0,%1,%2,0"  \
-                         "rlwimi %0, %1,32-%3,%3,31"  \
+            : "+r" (__result)  \
-            : "=&r" (__result)  \
+            : "r" (hi), "i" (MAD_F_SCALEBITS));  \
-            : "r" (__lo_), "r" (__hi_), "I" (MAD_F_SCALEBITS));  \
+       asm ("add %0,%1,%2"  \
-            __result;  \
+            : "=r" (__result)  \
+            : "%r" (__result), "r" (__round));  \
+       __result;  \
    })
 #  else
 #   define mad_f_scale64(hi, lo)  \
    ({ mad_fixed_t __result;  \
-       asm ("rlwinm %0, %2,32-%3,0,%3-1\n\t"  \
+       asm ("rotrwi %0,%1,%2"  \
-            "rlwimi %0, %1,32-%3,%3,31"  \
            : "=r" (__result)  \
-            : "r" (lo), "r" (hi), "I" (MAD_F_SCALEBITS));  \
+            : "r" (lo), "i" (MAD_F_SCALEBITS));  \
-            __result;  \
+       asm ("insrwi %0,%1,%2,0"  \
+            : "+r" (__result)  \
+            : "r" (hi), "i" (MAD_F_SCALEBITS));  \
+       __result;  \
    })
-#  endif  /* OPT_ACCURACY */
+#  endif
 #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
 /* --- Default ------------------------------------------------------------- */
 # elif defined(FPM_DEFAULT)
 /*
 * This version is the most portable but it loses significant accuracy.
 * Furthermore, accuracy is biased against the second argument, so care
 * should be taken when ordering operands.
 *
 * The scale factors are constant as this is not used with SSO.
 *
 * Pre-rounding is required to stay within the limits of compliance.
 */
 #  if defined(OPT_SPEED)
        #   define mad_f_mul(x, y)(((x) >> 12) * ((y) >> 16))
 #  else
        #   define mad_f_mul(x, y)((((x) + (1L << 11)) >> 12) *  \
                                 (((y) + (1L << 15)) >> 16))
 #  endif
 /* ------------------------------------------------------------------------- */
 # else
 #  error "no FPM selected"
 # endif
 /* default implementations */
 # if !defined(mad_f_mul)
 #  define mad_f_mul(x, y)  \
-    ({ mad_fixed64hi_t __hi;  \
+    ({ register mad_fixed64hi_t __hi;  \
-       mad_fixed64lo_t __lo;  \
+       register mad_fixed64lo_t __lo;  \
       MAD_F_MLX(__hi, __lo, (x), (y));  \
       mad_f_scale64(__hi, __lo);  \
    })
 # endif
 # if !defined(MAD_F_MLA)
        #  define MAD_F_ML0(hi, lo, x, y)((lo)  = mad_f_mul((x), (y)))
        #  define MAD_F_MLA(hi, lo, x, y)((lo) += mad_f_mul((x), (y)))
        #  define MAD_F_MLN(hi, lo)     ((lo)  = -(lo))
        #  define MAD_F_MLZ(hi, lo)     ((void) (hi), (mad_fixed_t) (lo))
 # endif
 # if !defined(MAD_F_ML0)
        #  define MAD_F_ML0(hi, lo, x, y)MAD_F_MLX((hi), (lo), (x), (y))
 # endif
 # if !defined(MAD_F_MLN)
        #  define MAD_F_MLN(hi, lo)     ((hi) = ((lo) = -(lo)) ? ~(hi) : -(hi))
 # endif
 # if !defined(MAD_F_MLZ)
        #  define MAD_F_MLZ(hi, lo)     mad_f_scale64((hi), (lo))
 # endif
 # if !defined(mad_f_scale64)
 #  if defined(OPT_ACCURACY)
 #   define mad_f_scale64(hi, lo)  \
    ((((mad_fixed_t)  \
       (((hi) << (32 - (MAD_F_SCALEBITS - 1))) |  \
        ((lo) >> (MAD_F_SCALEBITS - 1)))) + 1) >> 1)
 #  else
 #   define mad_f_scale64(hi, lo)  \
    ((mad_fixed_t)  \
     (((hi) << (32 - MAD_F_SCALEBITS)) |  \
      ((lo) >> MAD_F_SCALEBITS)))
 #  endif
 #  define MAD_F_SCALEBITS  MAD_F_FRACBITS
 # endif
-/* miscellaneous C routines */
+/* C routines */
 mad_fixed_t mad_f_abs(mad_fixed_t);
+mad_fixed_t mad_f_div(mad_fixed_t, mad_fixed_t);
 # endif