1 files changed, 214 insertions, 18 deletions
diff --git a/core/multimedia/opieplayer/libmad/layer3.c b/core/multimedia/opieplayer/libmad/layer3.c
index 03f13fe..3c5dd9e 100644
--- a/core/multimedia/opieplayer/libmad/layer3.c
+++ b/core/multimedia/opieplayer/libmad/layer3.c
@@ -1,15 +1,15 @@
 /*
 * libmad - MPEG audio decoder library
- * Copyright (C) 2000-2001 Robert Leslie
+ * Copyright (C) 2000-2004 Underbit Technologies, Inc.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
@@ -279,26 +279,26 @@ unsigned char const sfb_8000_long[] = {
 # define sfb_11025_short  sfb_12000_short
 static
 unsigned char const sfb_8000_short[] = {
   8,  8,  8,  8,  8,  8,  8,  8,  8, 12, 12, 12, 16,
  16, 16, 20, 20, 20, 24, 24, 24, 28, 28, 28, 36, 36,
  36,  2,  2,  2,  2,  2,  2,  2,  2,  2, 26, 26, 26
 };
 # define sfb_12000_mixed  sfb_16000_mixed
 # define sfb_11025_mixed  sfb_12000_mixed
-/* the 8000 Hz short block scalefactor bands do not break after the first 36
+/* the 8000 Hz short block scalefactor bands do not break after
-   frequency lines, so this is probably wrong */
+   the first 36 frequency lines, so this is probably wrong */
 static
 unsigned char const sfb_8000_mixed[] = {
  /* long */  12, 12, 12,
  /* short */  4,  4,  4,  8,  8,  8, 12, 12, 12, 16, 16, 16,
              20, 20, 20, 24, 24, 24, 28, 28, 28, 36, 36, 36,
               2,  2,  2,  2,  2,  2,  2,  2,  2, 26, 26, 26
 };
 static
 struct {
  unsigned char const *l;
  unsigned char const *s;
@@ -373,25 +373,25 @@ mad_fixed_t const cs[8] = {
 static
 mad_fixed_t const ca[8] = {
  -MAD_F(0x083b5fe7) /* -0.514495755 */, -MAD_F(0x078c36d2) /* -0.471731969 */,
  -MAD_F(0x05039814) /* -0.313377454 */, -MAD_F(0x02e91dd1) /* -0.181913200 */,
  -MAD_F(0x0183603a) /* -0.094574193 */, -MAD_F(0x00a7cb87) /* -0.040965583 */,
  -MAD_F(0x003a2847) /* -0.014198569 */, -MAD_F(0x000f27b4) /* -0.003699975 */
 };
 /*
 * IMDCT coefficients for short blocks
 * derived from section 2.4.3.4.10.2 of ISO/IEC 11172-3
 *
- * imdct_s[i/even][k] = cos((PI / 24) * (2 * (i / 2) + 7) * (2 * k + 1))
+ * imdct_s[i/even][k] = cos((PI / 24) * (2 *       (i / 2) + 7) * (2 * k + 1))
 * imdct_s[i /odd][k] = cos((PI / 24) * (2 * (6 + (i-1)/2) + 7) * (2 * k + 1))
 */
 static
 mad_fixed_t const imdct_s[6][6] = {
 # include "imdct_s.dat"
 };
 # if !defined(ASO_IMDCT)
 /*
 * windowing coefficients for long blocks
 * derived from section 2.4.3.4.10.3 of ISO/IEC 11172-3
 *
@@ -452,25 +452,25 @@ mad_fixed_t const is_table[7] = {
  MAD_F(0x05db3d74) /* 0.366025404 */,
  MAD_F(0x08000000) /* 0.500000000 */,
  MAD_F(0x0a24c28c) /* 0.633974596 */,
  MAD_F(0x0c9e69d1) /* 0.788675135 */,
  MAD_F(0x10000000) /* 1.000000000 */
 };
 /*
 * coefficients for LSF intensity stereo processing
 * derived from section 2.4.3.2 of ISO/IEC 13818-3
 *
 * is_lsf_table[0][i] = (1 / sqrt(sqrt(2)))^(i + 1)
- * is_lsf_table[1][i] = (1 / sqrt(2))^(i + 1)
+ * is_lsf_table[1][i] = (1 /      sqrt(2)) ^(i + 1)
 */
 static
 mad_fixed_t const is_lsf_table[2][15] = {
  {
    MAD_F(0x0d744fcd) /* 0.840896415 */,
    MAD_F(0x0b504f33) /* 0.707106781 */,
    MAD_F(0x09837f05) /* 0.594603558 */,
    MAD_F(0x08000000) /* 0.500000000 */,
    MAD_F(0x06ba27e6) /* 0.420448208 */,
    MAD_F(0x05a8279a) /* 0.353553391 */,
    MAD_F(0x04c1bf83) /* 0.297301779 */,
    MAD_F(0x04000000) /* 0.250000000 */,
@@ -1566,24 +1566,211 @@ void III_aliasreduce(mad_fixed_t xr[576], int lines)
        xr[     i] = MAD_F_MLZ(hi, lo);
 # if defined(ASO_ZEROCHECK)
      }
 # endif
    }
  }
 }
 # if defined(ASO_IMDCT)
 void III_imdct_l(mad_fixed_t const [18], mad_fixed_t [36], unsigned int);
 # else
+#  if 1
+static
+void fastsdct(mad_fixed_t const x[9], mad_fixed_t y[18])
+{
+  mad_fixed_t a0,  a1,  a2,  a3,  a4,  a5,  a6,  a7,  a8,  a9,  a10, a11, a12;
+  mad_fixed_t a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25;
+  mad_fixed_t m0,  m1,  m2,  m3,  m4,  m5,  m6,  m7;
+  enum {
+    c0 =  MAD_F(0x1f838b8d),  /* 2 * cos( 1 * PI / 18) */
+    c1 =  MAD_F(0x1bb67ae8),  /* 2 * cos( 3 * PI / 18) */
+    c2 =  MAD_F(0x18836fa3),  /* 2 * cos( 4 * PI / 18) */
+    c3 =  MAD_F(0x1491b752),  /* 2 * cos( 5 * PI / 18) */
+    c4 =  MAD_F(0x0af1d43a),  /* 2 * cos( 7 * PI / 18) */
+    c5 =  MAD_F(0x058e86a0),  /* 2 * cos( 8 * PI / 18) */
+    c6 = -MAD_F(0x1e11f642)   /* 2 * cos(16 * PI / 18) */
+  };
+  a0 = x[3] + x[5];
+  a1 = x[3] - x[5];
+  a2 = x[6] + x[2];
+  a3 = x[6] - x[2];
+  a4 = x[1] + x[7];
+  a5 = x[1] - x[7];
+  a6 = x[8] + x[0];
+  a7 = x[8] - x[0];
+  a8  = a0  + a2;
+  a9  = a0  - a2;
+  a10 = a0  - a6;
+  a11 = a2  - a6;
+  a12 = a8  + a6;
+  a13 = a1  - a3;
+  a14 = a13 + a7;
+  a15 = a3  + a7;
+  a16 = a1  - a7;
+  a17 = a1  + a3;
+  m0 = mad_f_mul(a17, -c3);
+  m1 = mad_f_mul(a16, -c0);
+  m2 = mad_f_mul(a15, -c4);
+  m3 = mad_f_mul(a14, -c1);
+  m4 = mad_f_mul(a5,  -c1);
+  m5 = mad_f_mul(a11, -c6);
+  m6 = mad_f_mul(a10, -c5);
+  m7 = mad_f_mul(a9,  -c2);
+  a18 =     x[4] + a4;
+  a19 = 2 * x[4] - a4;
+  a20 = a19 + m5;
+  a21 = a19 - m5;
+  a22 = a19 + m6;
+  a23 = m4  + m2;
+  a24 = m4  - m2;
+  a25 = m4  + m1;
+  /* output to every other slot for convenience */
+  y[ 0] = a18 + a12;
+  y[ 2] = m0  - a25;
+  y[ 4] = m7  - a20;
+  y[ 6] = m3;
+  y[ 8] = a21 - m6;
+  y[10] = a24 - m1;
+  y[12] = a12 - 2 * a18;
+  y[14] = a23 + m0;
+  y[16] = a22 + m7;
+}
+static inline
+void sdctII(mad_fixed_t const x[18], mad_fixed_t X[18])
+{
+  mad_fixed_t tmp[9];
+  int i;
+  /* scale[i] = 2 * cos(PI * (2 * i + 1) / (2 * 18)) */
+  static mad_fixed_t const scale[9] = {
+    MAD_F(0x1fe0d3b4), MAD_F(0x1ee8dd47), MAD_F(0x1d007930),
+    MAD_F(0x1a367e59), MAD_F(0x16a09e66), MAD_F(0x125abcf8),
+    MAD_F(0x0d8616bc), MAD_F(0x08483ee1), MAD_F(0x02c9fad7)
+  };
+  /* divide the 18-point SDCT-II into two 9-point SDCT-IIs */
+  /* even input butterfly */
+  for (i = 0; i < 9; i += 3) {
+    tmp[i + 0] = x[i + 0] + x[18 - (i + 0) - 1];
+    tmp[i + 1] = x[i + 1] + x[18 - (i + 1) - 1];
+    tmp[i + 2] = x[i + 2] + x[18 - (i + 2) - 1];
+  }
+  fastsdct(tmp, &X[0]);
+  /* odd input butterfly and scaling */
+  for (i = 0; i < 9; i += 3) {
+    tmp[i + 0] = mad_f_mul(x[i + 0] - x[18 - (i + 0) - 1], scale[i + 0]);
+    tmp[i + 1] = mad_f_mul(x[i + 1] - x[18 - (i + 1) - 1], scale[i + 1]);
+    tmp[i + 2] = mad_f_mul(x[i + 2] - x[18 - (i + 2) - 1], scale[i + 2]);
+  }
+  fastsdct(tmp, &X[1]);
+  /* output accumulation */
+  for (i = 3; i < 18; i += 8) {
+    X[i + 0] -= X[(i + 0) - 2];
+    X[i + 2] -= X[(i + 2) - 2];
+    X[i + 4] -= X[(i + 4) - 2];
+    X[i + 6] -= X[(i + 6) - 2];
+  }
+}
+static inline
+void dctIV(mad_fixed_t const y[18], mad_fixed_t X[18])
+{
+  mad_fixed_t tmp[18];
+  int i;
+  /* scale[i] = 2 * cos(PI * (2 * i + 1) / (4 * 18)) */
+  static mad_fixed_t const scale[18] = {
+    MAD_F(0x1ff833fa), MAD_F(0x1fb9ea93), MAD_F(0x1f3dd120),
+    MAD_F(0x1e84d969), MAD_F(0x1d906bcf), MAD_F(0x1c62648b),
+    MAD_F(0x1afd100f), MAD_F(0x1963268b), MAD_F(0x1797c6a4),
+    MAD_F(0x159e6f5b), MAD_F(0x137af940), MAD_F(0x11318ef3),
+    MAD_F(0x0ec6a507), MAD_F(0x0c3ef153), MAD_F(0x099f61c5),
+    MAD_F(0x06ed12c5), MAD_F(0x042d4544), MAD_F(0x0165547c)
+  };
+  /* scaling */
+  for (i = 0; i < 18; i += 3) {
+    tmp[i + 0] = mad_f_mul(y[i + 0], scale[i + 0]);
+    tmp[i + 1] = mad_f_mul(y[i + 1], scale[i + 1]);
+    tmp[i + 2] = mad_f_mul(y[i + 2], scale[i + 2]);
+  }
+  /* SDCT-II */
+  sdctII(tmp, X);
+  /* scale reduction and output accumulation */
+  X[0] /= 2;
+  for (i = 1; i < 17; i += 4) {
+    X[i + 0] = X[i + 0] / 2 - X[(i + 0) - 1];
+    X[i + 1] = X[i + 1] / 2 - X[(i + 1) - 1];
+    X[i + 2] = X[i + 2] / 2 - X[(i + 2) - 1];
+    X[i + 3] = X[i + 3] / 2 - X[(i + 3) - 1];
+  }
+  X[17] = X[17] / 2 - X[16];
+}
+/*
+         * NAME:imdct36
+         * DESCRIPTION:perform X[18]->x[36] IMDCT using Szu-Wei Lee's fast algorithm
+ */
+static inline
+void imdct36(mad_fixed_t const x[18], mad_fixed_t y[36])
+{
+  mad_fixed_t tmp[18];
+  int i;
+  /* DCT-IV */
+  dctIV(x, tmp);
+  /* convert 18-point DCT-IV to 36-point IMDCT */
+  for (i =  0; i <  9; i += 3) {
+    y[i + 0] =  tmp[9 + (i + 0)];
+    y[i + 1] =  tmp[9 + (i + 1)];
+    y[i + 2] =  tmp[9 + (i + 2)];
+  }
+  for (i =  9; i < 27; i += 3) {
+    y[i + 0] = -tmp[36 - (9 + (i + 0)) - 1];
+    y[i + 1] = -tmp[36 - (9 + (i + 1)) - 1];
+    y[i + 2] = -tmp[36 - (9 + (i + 2)) - 1];
+  }
+  for (i = 27; i < 36; i += 3) {
+    y[i + 0] = -tmp[(i + 0) - 27];
+    y[i + 1] = -tmp[(i + 1) - 27];
+    y[i + 2] = -tmp[(i + 2) - 27];
+  }
+}
+#  else
 /*
         * NAME:imdct36
         * DESCRIPTION:perform X[18]->x[36] IMDCT
 */
 static inline
 void imdct36(mad_fixed_t const X[18], mad_fixed_t x[36])
 {
  mad_fixed_t t0, t1, t2,  t3,  t4,  t5,  t6,  t7;
  mad_fixed_t t8, t9, t10, t11, t12, t13, t14, t15;
  register mad_fixed64hi_t hi;
  register mad_fixed64lo_t lo;
@@ -1856,24 +2043,25 @@ void imdct36(mad_fixed_t const X[18], mad_fixed_t x[36])
  MAD_F_MLA(hi, lo, X[5],  -MAD_F(0x0e313245));
  MAD_F_MLA(hi, lo, X[6],  -MAD_F(0x0d7e8807));
  MAD_F_MLA(hi, lo, X[8],  -MAD_F(0x0bcbe352));
  MAD_F_MLA(hi, lo, X[9],  -MAD_F(0x0acf37ad));
  MAD_F_MLA(hi, lo, X[11], -MAD_F(0x0898c779));
  MAD_F_MLA(hi, lo, X[12], -MAD_F(0x07635284));
  MAD_F_MLA(hi, lo, X[14], -MAD_F(0x04cfb0e2));
  MAD_F_MLA(hi, lo, X[15], -MAD_F(0x03768962));
  MAD_F_MLA(hi, lo, X[17], -MAD_F(0x00b2aa3e));
  x[26] = x[27] = MAD_F_MLZ(hi, lo) + t5;
 }
+#  endif
 /*
         * NAME:III_imdct_l()
         * DESCRIPTION:perform IMDCT and windowing for long blocks
 */
 static
 void III_imdct_l(mad_fixed_t const X[18], mad_fixed_t z[36],
                 unsigned int block_type)
 {
  unsigned int i;
  /* IMDCT */
@@ -1920,35 +2108,43 @@ void III_imdct_l(mad_fixed_t const X[18], mad_fixed_t z[36],
    for (i = 0; i < 36; i += 4) {
      z[i + 0] = mad_f_mul(z[i + 0], window_l[i + 0]);
      z[i + 1] = mad_f_mul(z[i + 1], window_l[i + 1]);
      z[i + 2] = mad_f_mul(z[i + 2], window_l[i + 2]);
      z[i + 3] = mad_f_mul(z[i + 3], window_l[i + 3]);
    }
 # else
    for (i =  0; i < 36; ++i) z[i] = mad_f_mul(z[i], window_l[i]);
 # endif
    break;
  case 1:  /* start block */
-    for (i =  0; i < 18; ++i) z[i] = mad_f_mul(z[i], window_l[i]);
+    for (i =  0; i < 18; i += 3) {
+      z[i + 0] = mad_f_mul(z[i + 0], window_l[i + 0]);
+      z[i + 1] = mad_f_mul(z[i + 1], window_l[i + 1]);
+      z[i + 2] = mad_f_mul(z[i + 2], window_l[i + 2]);
+    }
    /*  (i = 18; i < 24; ++i) z[i] unchanged */
    for (i = 24; i < 30; ++i) z[i] = mad_f_mul(z[i], window_s[i - 18]);
    for (i = 30; i < 36; ++i) z[i] = 0;
    break;
  case 3:  /* stop block */
    for (i =  0; i <  6; ++i) z[i] = 0;
    for (i =  6; i < 12; ++i) z[i] = mad_f_mul(z[i], window_s[i - 6]);
    /*  (i = 12; i < 18; ++i) z[i] unchanged */
-    for (i = 18; i < 36; ++i) z[i] = mad_f_mul(z[i], window_l[i]);
+    for (i = 18; i < 36; i += 3) {
+      z[i + 0] = mad_f_mul(z[i + 0], window_l[i + 0]);
+      z[i + 1] = mad_f_mul(z[i + 1], window_l[i + 1]);
+      z[i + 2] = mad_f_mul(z[i + 2], window_l[i + 2]);
+    }
    break;
  }
 }
 # endif  /* ASO_IMDCT */
 /*
         * NAME:III_imdct_s()
         * DESCRIPTION:perform IMDCT and windowing for short blocks
 */
 static
 void III_imdct_s(mad_fixed_t const X[18], mad_fixed_t z[36])
 {
@@ -2033,49 +2229,49 @@ void III_overlap(mad_fixed_t const output[36], mad_fixed_t overlap[18],
                 mad_fixed_t sample[18][32], unsigned int sb)
 {
  unsigned int i;
 # if defined(ASO_INTERLEAVE2)
  {
    register mad_fixed_t tmp1, tmp2;
    tmp1 = overlap[0];
    tmp2 = overlap[1];
    for (i = 0; i < 16; i += 2) {
-      sample[i + 0][sb] = output[i + 0] + tmp1;
+      sample[i + 0][sb] = output[i + 0 +  0] + tmp1;
      overlap[i + 0]    = output[i + 0 + 18];
      tmp1 = overlap[i + 2];
-      sample[i + 1][sb] = output[i + 1] + tmp2;
+      sample[i + 1][sb] = output[i + 1 +  0] + tmp2;
      overlap[i + 1]    = output[i + 1 + 18];
      tmp2 = overlap[i + 3];
    }
-    sample[16][sb] = output[16] + tmp1;
+    sample[16][sb] = output[16 +  0] + tmp1;
    overlap[16]    = output[16 + 18];
-    sample[17][sb] = output[17] + tmp2;
+    sample[17][sb] = output[17 +  0] + tmp2;
    overlap[17]    = output[17 + 18];
  }
 # elif 0
  for (i = 0; i < 18; i += 2) {
-    sample[i + 0][sb] = output[i + 0] + overlap[i + 0];
+    sample[i + 0][sb] = output[i + 0 +  0] + overlap[i + 0];
    overlap[i + 0]    = output[i + 0 + 18];
-    sample[i + 1][sb] = output[i + 1] + overlap[i + 1];
+    sample[i + 1][sb] = output[i + 1 +  0] + overlap[i + 1];
    overlap[i + 1]    = output[i + 1 + 18];
  }
 # else
  for (i = 0; i < 18; ++i) {
-    sample[i][sb] = output[i] + overlap[i];
+    sample[i][sb] = output[i +  0] + overlap[i];
    overlap[i]    = output[i + 18];
  }
 # endif
 }
 /*
         * NAME:III_overlap_z()
         * DESCRIPTION:perform "overlap-add" of zero IMDCT outputs
 */
 static inline
 void III_overlap_z(mad_fixed_t overlap[18],
                   mad_fixed_t sample[18][32], unsigned int sb)
@@ -2445,30 +2641,30 @@ int mad_layer_III(struct mad_stream *stream, struct mad_frame *frame)
  }
  frame_free = frame_space - frame_used;
  /* decode main_data */
  if (result == 0) {
    error = III_decode(&ptr, frame, &si, nch);
    if (error) {
      stream->error = error;
      result = -1;
    }
-  }
-  /* designate ancillary bits */
+    /* designate ancillary bits */
-  stream->anc_ptr    = ptr;
+    stream->anc_ptr    = ptr;
-  stream->anc_bitlen = md_len * CHAR_BIT - data_bitlen;
+    stream->anc_bitlen = md_len * CHAR_BIT - data_bitlen;
+  }
 # if 0 && defined(DEBUG)
  fprintf(stderr,
          "main_data_begin:%u, md_len:%u, frame_free:%u, "
          "data_bitlen:%u, anc_bitlen: %u\n",
          si.main_data_begin, md_len, frame_free,
          data_bitlen, stream->anc_bitlen);
 # endif
  /* preload main_data buffer with up to 511 bytes for next frame(s) */
  if (frame_free >= next_md_begin) {