author | zecke <zecke> | 2003-12-06 11:38:18 (UTC) |
---|---|---|
committer | zecke <zecke> | 2003-12-06 11:38:18 (UTC) |
commit | faaeb2cd28b47d79e9644e770622d141d315c195 (patch) (side-by-side diff) | |
tree | 1d3b08bb7c786ba9e39b33ad9f59bc27a64d7f31 | |
parent | 65ae9068fd4e489ecd7ac4d44d312951894f240a (diff) | |
download | opie-faaeb2cd28b47d79e9644e770622d141d315c195.zip opie-faaeb2cd28b47d79e9644e770622d141d315c195.tar.gz opie-faaeb2cd28b47d79e9644e770622d141d315c195.tar.bz2 |
Multiline strings moved from depreciated to illegal ... patch for it
by Wim delvaux
-rw-r--r-- | core/multimedia/opieplayer/libmpeg3/video/output.c | 217 |
1 files changed, 107 insertions, 110 deletions
diff --git a/core/multimedia/opieplayer/libmpeg3/video/output.c b/core/multimedia/opieplayer/libmpeg3/video/output.c index 919a0ff..bf0d6ed 100644 --- a/core/multimedia/opieplayer/libmpeg3/video/output.c +++ b/core/multimedia/opieplayer/libmpeg3/video/output.c @@ -29,550 +29,547 @@ static unsigned char mpeg3_601_to_rgb[256]; /* b = (int)(*y + 1.732 * (*cb - 128)); */ #ifdef HAVE_MMX inline void mpeg3video_rgb16_mmx(unsigned char *lum, unsigned char *cr, unsigned char *cb, unsigned char *out, int rows, int cols, int mod) { unsigned short *row1; int x; unsigned char *y; int col1; row1 = (unsigned short *)out; col1 = cols + mod; mod += cols + mod; mod *= 2; y = lum + cols * rows; x = 0; __asm__ __volatile__( ".align 8\n" "1:\n" "movd (%1), %%mm0\n" /* 4 Cb 0 0 0 0 u3 u2 u1 u0 */ "pxor %%mm7, %%mm7\n" "movd (%0), %%mm1\n" /* 4 Cr 0 0 0 0 v3 v2 v1 v0 */ "punpcklbw %%mm7, %%mm0\n" /* 4 W cb 0 u3 0 u2 0 u1 0 u0 */ "punpcklbw %%mm7, %%mm1\n" /* 4 W cr 0 v3 0 v2 0 v1 0 v0 */ "psubw mpeg3_MMX_80w, %%mm0\n" "psubw mpeg3_MMX_80w, %%mm1\n" "movq %%mm0, %%mm2\n" /* Cb 0 u3 0 u2 0 u1 0 u0 */ "movq %%mm1, %%mm3\n" /* Cr */ "pmullw mpeg3_MMX_Ugrncoeff, %%mm2\n" /* Cb2green 0 R3 0 R2 0 R1 0 R0 */ "movq (%2), %%mm6\n" /* L1 l7 L6 L5 L4 L3 L2 L1 L0 */ "pmullw mpeg3_MMX_Ublucoeff, %%mm0\n" /* Cb2blue */ "pand mpeg3_MMX_00FFw, %%mm6\n" /* L1 00 L6 00 L4 00 L2 00 L0 */ "pmullw mpeg3_MMX_Vgrncoeff, %%mm3\n" /* Cr2green */ "movq (%2), %%mm7\n" /* L2 */ "pmullw mpeg3_MMX_Vredcoeff, %%mm1\n" /* Cr2red */ "psrlw $8, %%mm7\n" /* L2 00 L7 00 L5 00 L3 00 L1 */ "pmullw mpeg3_MMX_Ycoeff, %%mm6\n" /* lum1 */ "paddw %%mm3, %%mm2\n" /* Cb2green + Cr2green == green */ "pmullw mpeg3_MMX_Ycoeff, %%mm7\n" /* lum2 */ "movq %%mm6, %%mm4\n" /* lum1 */ "paddw %%mm0, %%mm6\n" /* lum1 +blue 00 B6 00 B4 00 B2 00 B0 */ "movq %%mm4, %%mm5\n" /* lum1 */ "paddw %%mm1, %%mm4\n" /* lum1 +red 00 R6 00 R4 00 R2 00 R0 */ "paddw %%mm2, %%mm5\n" /* lum1 +green 00 G6 00 G4 00 G2 00 G0 */ "psraw $6, %%mm4\n" /* R1 0 .. 64 */ "movq %%mm7, %%mm3\n" /* lum2 00 L7 00 L5 00 L3 00 L1 */ "psraw $6, %%mm5\n" /* G1 - .. + */ "paddw %%mm0, %%mm7\n" /* Lum2 +blue 00 B7 00 B5 00 B3 00 B1 */ "psraw $6, %%mm6\n" /* B1 0 .. 64 */ "packuswb %%mm4, %%mm4\n" /* R1 R1 */ "packuswb %%mm5, %%mm5\n" /* G1 G1 */ "packuswb %%mm6, %%mm6\n" /* B1 B1 */ "punpcklbw %%mm4, %%mm4\n" "punpcklbw %%mm5, %%mm5\n" "pand mpeg3_MMX_redmask, %%mm4\n" "psllw $3, %%mm5\n" /* GREEN 1 */ "punpcklbw %%mm6, %%mm6\n" "pand mpeg3_MMX_grnmask, %%mm5\n" "pand mpeg3_MMX_redmask, %%mm6\n" "por %%mm5, %%mm4\n" /* */ "psrlw $11, %%mm6\n" /* BLUE 1 */ "movq %%mm3, %%mm5\n" /* lum2 */ "paddw %%mm1, %%mm3\n" /* lum2 +red 00 R7 00 R5 00 R3 00 R1 */ "paddw %%mm2, %%mm5\n" /* lum2 +green 00 G7 00 G5 00 G3 00 G1 */ "psraw $6, %%mm3\n" /* R2 */ "por %%mm6, %%mm4\n" /* MM4 */ "psraw $6, %%mm5\n" /* G2 */ "movq (%2, %3), %%mm6\n" /* L3 */ "psraw $6, %%mm7\n" "packuswb %%mm3, %%mm3\n" "packuswb %%mm5, %%mm5\n" "packuswb %%mm7, %%mm7\n" "pand mpeg3_MMX_00FFw, %%mm6\n" /* L3 */ "punpcklbw %%mm3, %%mm3\n" "punpcklbw %%mm5, %%mm5\n" "pmullw mpeg3_MMX_Ycoeff, %%mm6\n" /* lum3 */ "punpcklbw %%mm7, %%mm7\n" "psllw $3, %%mm5\n" /* GREEN 2 */ "pand mpeg3_MMX_redmask, %%mm7\n" "pand mpeg3_MMX_redmask, %%mm3\n" "psrlw $11, %%mm7\n" /* BLUE 2 */ "pand mpeg3_MMX_grnmask, %%mm5\n" "por %%mm7, %%mm3\n" "movq (%2,%3), %%mm7\n" /* L4 */ "por %%mm5, %%mm3\n" /* */ "psrlw $8, %%mm7\n" /* L4 */ "movq %%mm4, %%mm5\n" "punpcklwd %%mm3, %%mm4\n" "pmullw mpeg3_MMX_Ycoeff, %%mm7\n" /* lum4 */ "punpckhwd %%mm3, %%mm5\n" "movq %%mm4, (%4)\n" "movq %%mm5, 8(%4)\n" "movq %%mm6, %%mm4\n" /* Lum3 */ "paddw %%mm0, %%mm6\n" /* Lum3 +blue */ "movq %%mm4, %%mm5\n" /* Lum3 */ "paddw %%mm1, %%mm4\n" /* Lum3 +red */ "paddw %%mm2, %%mm5\n" /* Lum3 +green */ "psraw $6, %%mm4\n" "movq %%mm7, %%mm3\n" /* Lum4 */ "psraw $6, %%mm5\n" "paddw %%mm0, %%mm7\n" /* Lum4 +blue */ "psraw $6, %%mm6\n" /* Lum3 +blue */ "movq %%mm3, %%mm0\n" /* Lum4 */ "packuswb %%mm4, %%mm4\n" "paddw %%mm1, %%mm3\n" /* Lum4 +red */ "packuswb %%mm5, %%mm5\n" "paddw %%mm2, %%mm0\n" /* Lum4 +green */ "packuswb %%mm6, %%mm6\n" "punpcklbw %%mm4, %%mm4\n" "punpcklbw %%mm5, %%mm5\n" "punpcklbw %%mm6, %%mm6\n" "psllw $3, %%mm5\n" /* GREEN 3 */ "pand mpeg3_MMX_redmask, %%mm4\n" "psraw $6, %%mm3\n" /* psr 6 */ "psraw $6, %%mm0\n" "pand mpeg3_MMX_redmask, %%mm6\n" /* BLUE */ "pand mpeg3_MMX_grnmask, %%mm5\n" "psrlw $11, %%mm6\n" /* BLUE 3 */ "por %%mm5, %%mm4\n" "psraw $6, %%mm7\n" "por %%mm6, %%mm4\n" "packuswb %%mm3, %%mm3\n" "packuswb %%mm0, %%mm0\n" "packuswb %%mm7, %%mm7\n" "punpcklbw %%mm3, %%mm3\n" "punpcklbw %%mm0, %%mm0\n" "punpcklbw %%mm7, %%mm7\n" "pand mpeg3_MMX_redmask, %%mm3\n" "pand mpeg3_MMX_redmask, %%mm7\n" /* BLUE */ "psllw $3, %%mm0\n" /* GREEN 4 */ "psrlw $11, %%mm7\n" "pand mpeg3_MMX_grnmask, %%mm0\n" "por %%mm7, %%mm3\n" "addl $8, %6\n" "por %%mm0, %%mm3\n" "movq %%mm4, %%mm5\n" "punpcklwd %%mm3, %%mm4\n" "punpckhwd %%mm3, %%mm5\n" "movq %%mm4, (%4,%5,2)\n" "movq %%mm5, 8(%4,%5,2)\n" "addl $8, %2\n" "addl $4, %0\n" "addl $4, %1\n" "cmpl %3, %6\n" "leal 16(%4), %4\n" "jl 1b\n" "addl %3, %2\n" /* lum += cols */ "addl %7, %4\n" /* row1 += mod */ "movl $0, %6\n" "cmpl %8, %2\n" "jl 1b\n" : : "r" (cr), "r" (cb), "r" (lum), "r" (cols), "r" (row1) , "r" (col1), "m" (x), "m" (mod), "m" (y) ); } static unsigned LONGLONG mpeg3_MMX_U_80 = 0x0000008000800000LL; static unsigned LONGLONG mpeg3_MMX_V_80 = 0x0000000000800080LL; static LONGLONG mpeg3_MMX_U_COEF = 0x00000058ffd30000LL; static LONGLONG mpeg3_MMX_V_COEF = 0x00000000ffea006fLL; static LONGLONG mpeg3_MMX_601_Y_COEF = 0x0000004800480048LL; static LONGLONG mpeg3_MMX_601_Y_DIFF = 0x0000000000000010LL; inline void mpeg3_bgra32_mmx(unsigned long y, unsigned long u, unsigned long v, unsigned long *output) { -asm(" + +asm( /* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ /* for bgr24. */ - movd (%0), %%mm0; /* Load y 0x00000000000000yy */ - movd (%1), %%mm1; /* Load u 0x00000000000000cr */ - movq %%mm0, %%mm3; /* Copy y to temp */ - psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */ - movd (%2), %%mm2; /* Load v 0x00000000000000cb */ - psllq $16, %%mm3; /* Shift y */ - movq %%mm1, %%mm4; /* Copy u to temp */ - por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ - psllq $16, %%mm4; /* Shift u */ - movq %%mm2, %%mm5; /* Copy v to temp */ - psllq $16, %%mm3; /* Shift y */ - por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */ - psllq $16, %%mm5; /* Shift v */ - por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ - por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */ + "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */ + "movd (%1), %%mm1;" /* Load u 0x00000000000000cr */ + "movq %%mm0, %%mm3;" /* Copy y to temp */ + "psllq $16, %%mm1;" /* Shift u 0x0000000000cr0000 */ + "movd (%2), %%mm2;" /* Load v 0x00000000000000cb */ + "psllq $16, %%mm3;" /* Shift y */ + "movq %%mm1, %%mm4;" /* Copy u to temp */ + "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */ + "psllq $16, %%mm4;" /* Shift u */ + "movq %%mm2, %%mm5;" /* Copy v to temp */ + "psllq $16, %%mm3;" /* Shift y */ + "por %%mm4, %%mm1;" /* Overlay new u byte 0x000000cr00cr0000 */ + "psllq $16, %%mm5;" /* Shift v */ + "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */ + "por %%mm5, %%mm2;" /* Overlay new v byte 0x0000000000cb00cb */ /* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ - psubw mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */ - pmullw mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */ - psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ - psubw mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */ - pmullw mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */ + "psubw mpeg3_MMX_U_80, %%mm1;" /* Subtract 128 from u 0x000000uu00uu0000 */ + "pmullw mpeg3_MMX_U_COEF, %%mm1;" /* Multiply u coeffs 0x0000uuuuuuuu0000 */ + "psllw $6, %%mm0;" /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ + "psubw mpeg3_MMX_V_80, %%mm2;" /* Subtract 128 from v 0x0000000000cb00cb */ + "pmullw mpeg3_MMX_V_COEF, %%mm2;" /* Multiply v coeffs 0x0000crcrcrcrcrcr */ /* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ - paddsw %%mm1, %%mm0; /* Add u to result */ - paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */ - psraw $6, %%mm0; /* Demote precision */ - packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */ - movd %%mm0, (%3); /* Store output */ - " + "paddsw %%mm1, %%mm0;" /* Add u to result */ + "paddsw %%mm2, %%mm0;" /* Add v to result 0x0000rrrrggggbbbb */ + "psraw $6, %%mm0;" /* Demote precision */ + "packuswb %%mm0, %%mm0;" /* Pack into ARGB 0x0000000000rrggbb */ + "movd %%mm0, (%3);" /* Store output */ : : "r" (&y), "r" (&u), "r" (&v), "r" (output)); } inline void mpeg3_601_bgra32_mmx(unsigned long y, unsigned long u, unsigned long v, unsigned long *output) { -asm(" +asm( /* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ /* for bgr24. */ - movd (%0), %%mm0; /* Load y 0x00000000000000yy */ - psubsw mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */ - movd (%1), %%mm1; /* Load u 0x00000000000000cr */ - movq %%mm0, %%mm3; /* Copy y to temp */ - psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */ - movd (%2), %%mm2; /* Load v 0x00000000000000cb */ - psllq $16, %%mm3; /* Shift y */ - movq %%mm1, %%mm4; /* Copy u to temp */ - por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ - psllq $16, %%mm4; /* Shift u */ - movq %%mm2, %%mm5; /* Copy v to temp */ - psllq $16, %%mm3; /* Shift y */ - por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */ - psllq $16, %%mm5; /* Shift v */ - por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ - por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */ + "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */ + "psubsw mpeg3_MMX_601_Y_DIFF, %%mm0;" /* Subtract 16 from y */ + "movd (%1), %%mm1;" /* Load u 0x00000000000000cr */ + "movq %%mm0, %%mm3;" /* Copy y to temp */ + "psllq $16, %%mm1;" /* Shift u 0x0000000000cr0000 */ + "movd (%2), %%mm2;" /* Load v 0x00000000000000cb */ + "psllq $16, %%mm3;" /* Shift y */ + "movq %%mm1, %%mm4;" /* Copy u to temp */ + "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */ + "psllq $16, %%mm4;" /* Shift u */ + "movq %%mm2, %%mm5;" /* Copy v to temp */ + "psllq $16, %%mm3;" /* Shift y */ + "por %%mm4, %%mm1;" /* Overlay new u byte 0x000000cr00cr0000 */ + "psllq $16, %%mm5;" /* Shift v */ + "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */ + "por %%mm5, %%mm2;" /* Overlay new v byte 0x0000000000cb00cb */ /* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ - pmullw mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale and shift y coeffs */ - psubw mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */ - pmullw mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */ - psubw mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */ - pmullw mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */ + "pmullw mpeg3_MMX_601_Y_COEF, %%mm0;" /* Scale and shift y coeffs */ + "psubw mpeg3_MMX_U_80, %%mm1;" /* Subtract 128 from u 0x000000uu00uu0000 */ + "pmullw mpeg3_MMX_U_COEF, %%mm1;" /* Multiply u coeffs 0x0000uuuuuuuu0000 */ + "psubw mpeg3_MMX_V_80, %%mm2;" /* Subtract 128 from v 0x0000000000cb00cb */ + "pmullw mpeg3_MMX_V_COEF, %%mm2;" /* Multiply v coeffs 0x0000crcrcrcrcrcr */ /* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ - paddsw %%mm1, %%mm0; /* Add u to result */ - paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */ - psraw $6, %%mm0; /* Demote precision */ - packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */ - movd %%mm0, (%3); /* Store output */ - " + "paddsw %%mm1, %%mm0;" /* Add u to result */ + "paddsw %%mm2, %%mm0;" /* Add v to result 0x0000rrrrggggbbbb */ + "psraw $6, %%mm0;" /* Demote precision */ + "packuswb %%mm0, %%mm0;" /* Pack into ARGB 0x0000000000rrggbb */ + "movd %%mm0, (%3);" /* Store output */ : : "r" (&y), "r" (&u), "r" (&v), "r" (output)); } static unsigned LONGLONG mpeg3_MMX_U_80_RGB = 0x0000000000800080LL; static unsigned LONGLONG mpeg3_MMX_V_80_RGB = 0x0000008000800000LL; static LONGLONG mpeg3_MMX_U_COEF_RGB = 0x00000000ffd30058LL; static LONGLONG mpeg3_MMX_V_COEF_RGB = 0x0000006fffea0000LL; inline void mpeg3_rgba32_mmx(unsigned long y, unsigned long u, unsigned long v, unsigned long *output) { -asm(" +asm( /* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ /* for rgb24. */ - movd (%0), %%mm0; /* Load y 0x00000000000000yy */ - movd (%1), %%mm1; /* Load v 0x00000000000000vv */ - movq %%mm0, %%mm3; /* Copy y to temp */ - psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */ - movd (%2), %%mm2; /* Load u 0x00000000000000uu */ - psllq $16, %%mm3; /* Shift y */ - movq %%mm1, %%mm4; /* Copy v to temp */ - por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ - psllq $16, %%mm4; /* Shift v */ - movq %%mm2, %%mm5; /* Copy u to temp */ - psllq $16, %%mm3; /* Shift y */ - por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */ - psllq $16, %%mm5; /* Shift u */ - por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ - por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */ + "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */ + "movd (%1), %%mm1;" /* Load v 0x00000000000000vv */ + "movq %%mm0, %%mm3;" /* Copy y to temp */ + "psllq $16, %%mm1;" /* Shift v 0x0000000000vv0000 */ + "movd (%2), %%mm2;" /* Load u 0x00000000000000uu */ + "psllq $16, %%mm3;" /* Shift y */ + "movq %%mm1, %%mm4;" /* Copy v to temp */ + "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */ + "psllq $16, %%mm4;" /* Shift v */ + "movq %%mm2, %%mm5;" /* Copy u to temp */ + "psllq $16, %%mm3;" /* Shift y */ + "por %%mm4, %%mm1;" /* Overlay new v byte 0x000000vv00vv0000 */ + "psllq $16, %%mm5;" /* Shift u */ + "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */ + "por %%mm5, %%mm2;" /* Overlay new u byte 0x0000000000uu00uu */ /* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ - psubw mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */ - pmullw mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */ - psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ - psubw mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */ - pmullw mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ + "psubw mpeg3_MMX_V_80_RGB, %%mm1;" /* Subtract 128 from v 0x000000vv00vv0000 */ + "pmullw mpeg3_MMX_V_COEF_RGB, %%mm1;" /* Multiply v coeffs 0x0000vvvvvvvv0000 */ + "psllw $6, %%mm0;" /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ + "psubw mpeg3_MMX_U_80_RGB, %%mm2;" /* Subtract 128 from u 0x0000000000uu00uu */ + "pmullw mpeg3_MMX_U_COEF_RGB, %%mm2;" /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ /* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ - paddsw %%mm1, %%mm0; /* Add v to result */ - paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */ - psraw $6, %%mm0; /* Demote precision */ - packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */ - movd %%mm0, (%3); /* Store output */ - " + "paddsw %%mm1, %%mm0;" /* Add v to result */ + "paddsw %%mm2, %%mm0;" /* Add u to result 0x0000bbbbggggrrrr */ + "psraw $6, %%mm0;" /* Demote precision */ + "packuswb %%mm0, %%mm0;" /* Pack into RGBA 0x0000000000bbggrr */ + "movd %%mm0, (%3);" /* Store output */ : : "r" (&y), "r" (&v), "r" (&u), "r" (output)); } inline void mpeg3_601_rgba32_mmx(unsigned long y, unsigned long u, unsigned long v, unsigned long *output) { -asm(" +asm( /* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ /* for rgb24. */ - movd (%0), %%mm0; /* Load y 0x00000000000000yy */ - psubsw mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */ - movd (%1), %%mm1; /* Load v 0x00000000000000vv */ - movq %%mm0, %%mm3; /* Copy y to temp */ - psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */ - movd (%2), %%mm2; /* Load u 0x00000000000000uu */ - psllq $16, %%mm3; /* Shift y */ - movq %%mm1, %%mm4; /* Copy v to temp */ - por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ - psllq $16, %%mm4; /* Shift v */ - movq %%mm2, %%mm5; /* Copy u to temp */ - psllq $16, %%mm3; /* Shift y */ - por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */ - psllq $16, %%mm5; /* Shift u */ - por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ - por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */ + "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */ + "psubsw mpeg3_MMX_601_Y_DIFF, %%mm0;" /* Subtract 16 from y */ + "movd (%1), %%mm1;" /* Load v 0x00000000000000vv */ + "movq %%mm0, %%mm3;" /* Copy y to temp */ + "psllq $16, %%mm1;" /* Shift v 0x0000000000vv0000 */ + "movd (%2), %%mm2;" /* Load u 0x00000000000000uu */ + "psllq $16, %%mm3;" /* Shift y */ + "movq %%mm1, %%mm4;" /* Copy v to temp */ + "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */ + "psllq $16, %%mm4;" /* Shift v */ + "movq %%mm2, %%mm5;" /* Copy u to temp */ + "psllq $16, %%mm3;" /* Shift y */ + "por %%mm4, %%mm1;" /* Overlay new v byte 0x000000vv00vv0000 */ + "psllq $16, %%mm5;" /* Shift u */ + "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */ + "por %%mm5, %%mm2;" /* Overlay new u byte 0x0000000000uu00uu */ /* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ - pmullw mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale y coeffs */ - psubw mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */ - pmullw mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */ - psubw mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */ - pmullw mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ + "pmullw mpeg3_MMX_601_Y_COEF, %%mm0;" /* Scale y coeffs */ + "psubw mpeg3_MMX_V_80_RGB, %%mm1;" /* Subtract 128 from v 0x000000vv00vv0000 */ + "pmullw mpeg3_MMX_V_COEF_RGB, %%mm1;" /* Multiply v coeffs 0x0000vvvvvvvv0000 */ + "psubw mpeg3_MMX_U_80_RGB, %%mm2;" /* Subtract 128 from u 0x0000000000uu00uu */ + "pmullw mpeg3_MMX_U_COEF_RGB, %%mm2;" /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ /* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ - paddsw %%mm1, %%mm0; /* Add v to result */ - paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */ - psraw $6, %%mm0; /* Demote precision */ - packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */ - movd %%mm0, (%3); /* Store output */ - " + "paddsw %%mm1, %%mm0;" /* Add v to result */ + "paddsw %%mm2, %%mm0;" /* Add u to result 0x0000bbbbggggrrrr */ + "psraw $6, %%mm0;" /* Demote precision */ + "packuswb %%mm0, %%mm0;" /* Pack into RGBA 0x0000000000bbggrr */ + "movd %%mm0, (%3);" /* Store output */ : : "r" (&y), "r" (&v), "r" (&u), "r" (output)); } #endif #define DITHER_ROW_HEAD \ for(h = 0; h < video->out_h; h++) \ { \ y_in = &src[0][(video->y_table[h] + video->in_y) * video->coded_picture_width] + video->in_x; \ cb_in = &src[1][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 2); \ cr_in = &src[2][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 1); \ data = output_rows[h]; #define DITHER_ROW_TAIL \ } #define DITHER_SCALE_HEAD \ for(w = 0; w < video->out_w; w++) \ { \ uv_subscript = video->x_table[w] / 2; \ y_l = y_in[video->x_table[w]]; \ y_l <<= 16; \ r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \ g_l = (y_l + video->cr_to_g[cr_in[uv_subscript]] + video->cb_to_g[cb_in[uv_subscript]]) >> 16; \ b_l = (y_l + video->cb_to_b[cb_in[uv_subscript]]) >> 16; #define DITHER_SCALE_601_HEAD \ for(w = 0; w < video->out_w; w++) \ { \ uv_subscript = video->x_table[w] / 2; \ y_l = mpeg3_601_to_rgb[y_in[video->x_table[w]]]; \ y_l <<= 16; \ r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \ g_l = (y_l + video->cr_to_g[cr_in[uv_subscript]] + video->cb_to_g[cb_in[uv_subscript]]) >> 16; \ b_l = (y_l + video->cb_to_b[cb_in[uv_subscript]]) >> 16; #define DITHER_SCALE_TAIL \ } #define DITHER_MMX_SCALE_HEAD \ for(w = 0; w < video->out_w; w++) \ { \ uv_subscript = video->x_table[w] / 2; #define DITHER_MMX_SCALE_TAIL \ data += step; \ } #define DITHER_MMX_HEAD \ for(w = 0; w < video->out_w; w += 2) \ { #define DITHER_MMX_TAIL \ data += step; \ cr_in++; \ cb_in++; \ } #define DITHER_HEAD \ for(w = 0; w < video->horizontal_size; w++) \ { \ y_l = *y_in++; \ y_l <<= 16; \ r_l = (y_l + video->cr_to_r[*cr_in]) >> 16; \ g_l = (y_l + video->cr_to_g[*cr_in] + video->cb_to_g[*cb_in]) >> 16; \ b_l = (y_l + video->cb_to_b[*cb_in]) >> 16; #define DITHER_601_HEAD \ for(w = 0; w < video->horizontal_size; w++) \ { \ y_l = mpeg3_601_to_rgb[*y_in++]; \ y_l <<= 16; \ r_l = (y_l + video->cr_to_r[*cr_in]) >> 16; \ g_l = (y_l + video->cr_to_g[*cr_in] + video->cb_to_g[*cb_in]) >> 16; \ b_l = (y_l + video->cb_to_b[*cb_in]) >> 16; #define DITHER_TAIL \ if(w & 1) \ { \ cr_in++; \ cb_in++; \ } \ } #define STORE_PIXEL_BGR888 \ *data++ = CLIP(b_l); \ *data++ = CLIP(g_l); \ *data++ = CLIP(r_l); #define STORE_PIXEL_BGRA8888 \ *data++ = CLIP(b_l); \ *data++ = CLIP(g_l); \ *data++ = CLIP(r_l); \ *data++ = 0; #define STORE_PIXEL_RGB565 \ *((unsigned short*)data)++ = \ ((CLIP(r_l) & 0xf8) << 8) | \ ((CLIP(g_l) & 0xfc) << 3) | \ ((CLIP(b_l) & 0xf8) >> 3); #define STORE_PIXEL_RGB888 \ *data++ = CLIP(r_l); \ *data++ = CLIP(g_l); \ *data++ = CLIP(b_l); #define STORE_PIXEL_RGBA8888 \ *data++ = CLIP(r_l); \ *data++ = CLIP(g_l); \ *data++ = CLIP(b_l); \ *data++ = 0; #define STORE_PIXEL_RGBA16161616 \ *data_s++ = CLIP(r_l); \ *data_s++ = CLIP(g_l); \ *data_s++ = CLIP(b_l); \ *data_s++ = 0; /* Only good for YUV 4:2:0 */ int mpeg3video_ditherframe(mpeg3video_t *video, unsigned char **src, unsigned char **output_rows) { int h = 0; register unsigned char *y_in, *cb_in, *cr_in; long y_l, r_l, b_l, g_l; register unsigned char *data; register int uv_subscript, step, w = -1; #ifdef HAVE_MMX /* =================================== MMX ===================================== */ if(video->have_mmx && video->out_w == video->horizontal_size && video->out_h == video->vertical_size && video->in_w == video->out_w && video->in_h == video->out_h && video->in_x == 0 && video->in_y == 0 && (video->color_model == MPEG3_RGB565 || video->color_model == MPEG3_601_RGB565)) { /* Unscaled 16 bit */ mpeg3video_rgb16_mmx(src[0], src[2], src[1], output_rows[0], video->out_h, video->out_w, (output_rows[1] - output_rows[0]) / 2 - video->out_w); } else if(video->have_mmx && (video->color_model == MPEG3_BGRA8888 || video->color_model == MPEG3_BGR888 || /* video->color_model == MPEG3_RGB888 || */ video->color_model == MPEG3_RGBA8888 || video->color_model == MPEG3_601_BGR888 || video->color_model == MPEG3_601_BGRA8888 || video->color_model == MPEG3_601_RGB888 || video->color_model == MPEG3_601_RGBA8888)) { /* Original MMX */ if(video->color_model == MPEG3_BGRA8888 || video->color_model == MPEG3_RGBA8888 || video->color_model == MPEG3_601_BGRA8888 || video->color_model == MPEG3_601_RGBA8888) step = 4; else if(video->color_model == MPEG3_BGR888 || video->color_model == MPEG3_RGB888 || video->color_model == MPEG3_601_BGR888 || video->color_model == MPEG3_601_RGB888) step = 3; DITHER_ROW_HEAD /* Transfer row with scaling */ if(video->out_w != video->horizontal_size) { switch(video->color_model) { case MPEG3_BGRA8888: case MPEG3_BGR888: DITHER_MMX_SCALE_HEAD mpeg3_bgra32_mmx(y_in[video->x_table[w]], cr_in[uv_subscript], cb_in[uv_subscript], (unsigned long*)data); DITHER_MMX_SCALE_TAIL break; case MPEG3_601_BGRA8888: case MPEG3_601_BGR888: DITHER_MMX_SCALE_HEAD |