author | zecke <zecke> | 2003-12-06 11:38:18 (UTC) |
---|---|---|
committer | zecke <zecke> | 2003-12-06 11:38:18 (UTC) |
commit | faaeb2cd28b47d79e9644e770622d141d315c195 (patch) (unidiff) | |
tree | 1d3b08bb7c786ba9e39b33ad9f59bc27a64d7f31 | |
parent | 65ae9068fd4e489ecd7ac4d44d312951894f240a (diff) | |
download | opie-faaeb2cd28b47d79e9644e770622d141d315c195.zip opie-faaeb2cd28b47d79e9644e770622d141d315c195.tar.gz opie-faaeb2cd28b47d79e9644e770622d141d315c195.tar.bz2 |
Multiline strings moved from depreciated to illegal ... patch for it
by Wim delvaux
-rw-r--r-- | core/multimedia/opieplayer/libmpeg3/video/output.c | 217 |
1 files changed, 107 insertions, 110 deletions
diff --git a/core/multimedia/opieplayer/libmpeg3/video/output.c b/core/multimedia/opieplayer/libmpeg3/video/output.c index 919a0ff..bf0d6ed 100644 --- a/core/multimedia/opieplayer/libmpeg3/video/output.c +++ b/core/multimedia/opieplayer/libmpeg3/video/output.c | |||
@@ -1,898 +1,895 @@ | |||
1 | #include "../libmpeg3.h" | 1 | #include "../libmpeg3.h" |
2 | #include "../mpeg3protos.h" | 2 | #include "../mpeg3protos.h" |
3 | #include "mpeg3video.h" | 3 | #include "mpeg3video.h" |
4 | #include <string.h> | 4 | #include <string.h> |
5 | 5 | ||
6 | static LONGLONG mpeg3_MMX_0 = 0L; | 6 | static LONGLONG mpeg3_MMX_0 = 0L; |
7 | static unsigned long mpeg3_MMX_10w[] = {0x00100010, 0x00100010}; /*dd 00010 0010h, 000100010h */ | 7 | static unsigned long mpeg3_MMX_10w[] = {0x00100010, 0x00100010}; /*dd 00010 0010h, 000100010h */ |
8 | static unsigned long mpeg3_MMX_80w[] = {0x00800080, 0x00800080}; /*dd 00080 0080h, 000800080h */ | 8 | static unsigned long mpeg3_MMX_80w[] = {0x00800080, 0x00800080}; /*dd 00080 0080h, 000800080h */ |
9 | 9 | ||
10 | static unsigned long mpeg3_MMX_00FFw[] = {0x00ff00ff, 0x00ff00ff}; /*dd 000FF 00FFh, 000FF00FFh */ | 10 | static unsigned long mpeg3_MMX_00FFw[] = {0x00ff00ff, 0x00ff00ff}; /*dd 000FF 00FFh, 000FF00FFh */ |
11 | 11 | ||
12 | static unsigned short mpeg3_MMX_Ublucoeff[] = {0x81, 0x81, 0x81, 0x81}; /*dd 00081 0081h, 000810081h */ | 12 | static unsigned short mpeg3_MMX_Ublucoeff[] = {0x81, 0x81, 0x81, 0x81}; /*dd 00081 0081h, 000810081h */ |
13 | static unsigned short mpeg3_MMX_Vredcoeff[] = {0x66, 0x66, 0x66, 0x66}; /*dd 00066 0066h, 000660066h */ | 13 | static unsigned short mpeg3_MMX_Vredcoeff[] = {0x66, 0x66, 0x66, 0x66}; /*dd 00066 0066h, 000660066h */ |
14 | 14 | ||
15 | static unsigned short mpeg3_MMX_Ugrncoeff[] = {0xffe8, 0xffe8, 0xffe8, 0xffe8}; /*dd 0FFE7 FFE7h, 0FFE7FFE7h */ | 15 | static unsigned short mpeg3_MMX_Ugrncoeff[] = {0xffe8, 0xffe8, 0xffe8, 0xffe8}; /*dd 0FFE7 FFE7h, 0FFE7FFE7h */ |
16 | static unsigned short mpeg3_MMX_Vgrncoeff[] = {0xffcd, 0xffcd, 0xffcd, 0xffcd}; /*dd 0FFCC FFCCh, 0FFCCFFCCh */ | 16 | static unsigned short mpeg3_MMX_Vgrncoeff[] = {0xffcd, 0xffcd, 0xffcd, 0xffcd}; /*dd 0FFCC FFCCh, 0FFCCFFCCh */ |
17 | 17 | ||
18 | static unsigned short mpeg3_MMX_Ycoeff[] = {0x4a, 0x4a, 0x4a, 0x4a}; /*dd 0004A 004Ah, 0004A004Ah */ | 18 | static unsigned short mpeg3_MMX_Ycoeff[] = {0x4a, 0x4a, 0x4a, 0x4a}; /*dd 0004A 004Ah, 0004A004Ah */ |
19 | 19 | ||
20 | static unsigned short mpeg3_MMX_redmask[] = {0xf800, 0xf800, 0xf800, 0xf800}; /*dd 07c00 7c00h, 07c007c00h */ | 20 | static unsigned short mpeg3_MMX_redmask[] = {0xf800, 0xf800, 0xf800, 0xf800}; /*dd 07c00 7c00h, 07c007c00h */ |
21 | 21 | ||
22 | static unsigned short mpeg3_MMX_grnmask[] = {0x7e0, 0x7e0, 0x7e0, 0x7e0}; /*dd 003e0 03e0h, 003e003e0h */ | 22 | static unsigned short mpeg3_MMX_grnmask[] = {0x7e0, 0x7e0, 0x7e0, 0x7e0}; /*dd 003e0 03e0h, 003e003e0h */ |
23 | 23 | ||
24 | static unsigned char mpeg3_601_to_rgb[256]; | 24 | static unsigned char mpeg3_601_to_rgb[256]; |
25 | 25 | ||
26 | /* Algorithm */ | 26 | /* Algorithm */ |
27 | /* r = (int)(*y + 1.371 * (*cr - 128)); */ | 27 | /* r = (int)(*y + 1.371 * (*cr - 128)); */ |
28 | /* g = (int)(*y - 0.698 * (*cr - 128) - 0.336 * (*cb - 128)); */ | 28 | /* g = (int)(*y - 0.698 * (*cr - 128) - 0.336 * (*cb - 128)); */ |
29 | /* b = (int)(*y + 1.732 * (*cb - 128)); */ | 29 | /* b = (int)(*y + 1.732 * (*cb - 128)); */ |
30 | 30 | ||
31 | #ifdef HAVE_MMX | 31 | #ifdef HAVE_MMX |
32 | inline void mpeg3video_rgb16_mmx(unsigned char *lum, | 32 | inline void mpeg3video_rgb16_mmx(unsigned char *lum, |
33 | unsigned char *cr, | 33 | unsigned char *cr, |
34 | unsigned char *cb, | 34 | unsigned char *cb, |
35 | unsigned char *out, | 35 | unsigned char *out, |
36 | int rows, | 36 | int rows, |
37 | int cols, | 37 | int cols, |
38 | int mod) | 38 | int mod) |
39 | { | 39 | { |
40 | unsigned short *row1; | 40 | unsigned short *row1; |
41 | int x; | 41 | int x; |
42 | unsigned char *y; | 42 | unsigned char *y; |
43 | int col1; | 43 | int col1; |
44 | 44 | ||
45 | row1 = (unsigned short *)out; | 45 | row1 = (unsigned short *)out; |
46 | col1 = cols + mod; | 46 | col1 = cols + mod; |
47 | mod += cols + mod; | 47 | mod += cols + mod; |
48 | mod *= 2; | 48 | mod *= 2; |
49 | y = lum + cols * rows; | 49 | y = lum + cols * rows; |
50 | x = 0; | 50 | x = 0; |
51 | 51 | ||
52 | __asm__ __volatile__( | 52 | __asm__ __volatile__( |
53 | ".align 8\n" | 53 | ".align 8\n" |
54 | "1:\n" | 54 | "1:\n" |
55 | "movd (%1), %%mm0\n" /* 4 Cb 0 0 0 0 u3 u2 u1 u0 */ | 55 | "movd (%1), %%mm0\n" /* 4 Cb 0 0 0 0 u3 u2 u1 u0 */ |
56 | "pxor %%mm7, %%mm7\n" | 56 | "pxor %%mm7, %%mm7\n" |
57 | "movd (%0), %%mm1\n" /* 4 Cr 0 0 0 0 v3 v2 v1 v0 */ | 57 | "movd (%0), %%mm1\n" /* 4 Cr 0 0 0 0 v3 v2 v1 v0 */ |
58 | "punpcklbw %%mm7, %%mm0\n" /* 4 W cb 0 u3 0 u2 0 u1 0 u0 */ | 58 | "punpcklbw %%mm7, %%mm0\n" /* 4 W cb 0 u3 0 u2 0 u1 0 u0 */ |
59 | "punpcklbw %%mm7, %%mm1\n" /* 4 W cr 0 v3 0 v2 0 v1 0 v0 */ | 59 | "punpcklbw %%mm7, %%mm1\n" /* 4 W cr 0 v3 0 v2 0 v1 0 v0 */ |
60 | 60 | ||
61 | "psubw mpeg3_MMX_80w, %%mm0\n" | 61 | "psubw mpeg3_MMX_80w, %%mm0\n" |
62 | "psubw mpeg3_MMX_80w, %%mm1\n" | 62 | "psubw mpeg3_MMX_80w, %%mm1\n" |
63 | "movq %%mm0, %%mm2\n" /* Cb 0 u3 0 u2 0 u1 0 u0 */ | 63 | "movq %%mm0, %%mm2\n" /* Cb 0 u3 0 u2 0 u1 0 u0 */ |
64 | "movq %%mm1, %%mm3\n" /* Cr */ | 64 | "movq %%mm1, %%mm3\n" /* Cr */ |
65 | "pmullw mpeg3_MMX_Ugrncoeff, %%mm2\n" /* Cb2green 0 R3 0 R2 0 R1 0 R0 */ | 65 | "pmullw mpeg3_MMX_Ugrncoeff, %%mm2\n" /* Cb2green 0 R3 0 R2 0 R1 0 R0 */ |
66 | "movq (%2), %%mm6\n" /* L1 l7 L6 L5 L4 L3 L2 L1 L0 */ | 66 | "movq (%2), %%mm6\n" /* L1 l7 L6 L5 L4 L3 L2 L1 L0 */ |
67 | "pmullw mpeg3_MMX_Ublucoeff, %%mm0\n" /* Cb2blue */ | 67 | "pmullw mpeg3_MMX_Ublucoeff, %%mm0\n" /* Cb2blue */ |
68 | "pand mpeg3_MMX_00FFw, %%mm6\n" /* L1 00 L6 00 L4 00 L2 00 L0 */ | 68 | "pand mpeg3_MMX_00FFw, %%mm6\n" /* L1 00 L6 00 L4 00 L2 00 L0 */ |
69 | "pmullw mpeg3_MMX_Vgrncoeff, %%mm3\n" /* Cr2green */ | 69 | "pmullw mpeg3_MMX_Vgrncoeff, %%mm3\n" /* Cr2green */ |
70 | "movq (%2), %%mm7\n" /* L2 */ | 70 | "movq (%2), %%mm7\n" /* L2 */ |
71 | "pmullw mpeg3_MMX_Vredcoeff, %%mm1\n" /* Cr2red */ | 71 | "pmullw mpeg3_MMX_Vredcoeff, %%mm1\n" /* Cr2red */ |
72 | "psrlw $8, %%mm7\n" /* L2 00 L7 00 L5 00 L3 00 L1 */ | 72 | "psrlw $8, %%mm7\n" /* L2 00 L7 00 L5 00 L3 00 L1 */ |
73 | "pmullw mpeg3_MMX_Ycoeff, %%mm6\n" /* lum1 */ | 73 | "pmullw mpeg3_MMX_Ycoeff, %%mm6\n" /* lum1 */ |
74 | "paddw %%mm3, %%mm2\n" /* Cb2green + Cr2green == green */ | 74 | "paddw %%mm3, %%mm2\n" /* Cb2green + Cr2green == green */ |
75 | "pmullw mpeg3_MMX_Ycoeff, %%mm7\n" /* lum2 */ | 75 | "pmullw mpeg3_MMX_Ycoeff, %%mm7\n" /* lum2 */ |
76 | 76 | ||
77 | "movq %%mm6, %%mm4\n" /* lum1 */ | 77 | "movq %%mm6, %%mm4\n" /* lum1 */ |
78 | "paddw %%mm0, %%mm6\n" /* lum1 +blue 00 B6 00 B4 00 B2 00 B0 */ | 78 | "paddw %%mm0, %%mm6\n" /* lum1 +blue 00 B6 00 B4 00 B2 00 B0 */ |
79 | "movq %%mm4, %%mm5\n" /* lum1 */ | 79 | "movq %%mm4, %%mm5\n" /* lum1 */ |
80 | "paddw %%mm1, %%mm4\n" /* lum1 +red 00 R6 00 R4 00 R2 00 R0 */ | 80 | "paddw %%mm1, %%mm4\n" /* lum1 +red 00 R6 00 R4 00 R2 00 R0 */ |
81 | "paddw %%mm2, %%mm5\n" /* lum1 +green 00 G6 00 G4 00 G2 00 G0 */ | 81 | "paddw %%mm2, %%mm5\n" /* lum1 +green 00 G6 00 G4 00 G2 00 G0 */ |
82 | "psraw $6, %%mm4\n" /* R1 0 .. 64 */ | 82 | "psraw $6, %%mm4\n" /* R1 0 .. 64 */ |
83 | "movq %%mm7, %%mm3\n" /* lum2 00 L7 00 L5 00 L3 00 L1 */ | 83 | "movq %%mm7, %%mm3\n" /* lum2 00 L7 00 L5 00 L3 00 L1 */ |
84 | "psraw $6, %%mm5\n" /* G1 - .. + */ | 84 | "psraw $6, %%mm5\n" /* G1 - .. + */ |
85 | "paddw %%mm0, %%mm7\n" /* Lum2 +blue 00 B7 00 B5 00 B3 00 B1 */ | 85 | "paddw %%mm0, %%mm7\n" /* Lum2 +blue 00 B7 00 B5 00 B3 00 B1 */ |
86 | "psraw $6, %%mm6\n" /* B1 0 .. 64 */ | 86 | "psraw $6, %%mm6\n" /* B1 0 .. 64 */ |
87 | "packuswb %%mm4, %%mm4\n" /* R1 R1 */ | 87 | "packuswb %%mm4, %%mm4\n" /* R1 R1 */ |
88 | "packuswb %%mm5, %%mm5\n" /* G1 G1 */ | 88 | "packuswb %%mm5, %%mm5\n" /* G1 G1 */ |
89 | "packuswb %%mm6, %%mm6\n" /* B1 B1 */ | 89 | "packuswb %%mm6, %%mm6\n" /* B1 B1 */ |
90 | "punpcklbw %%mm4, %%mm4\n" | 90 | "punpcklbw %%mm4, %%mm4\n" |
91 | "punpcklbw %%mm5, %%mm5\n" | 91 | "punpcklbw %%mm5, %%mm5\n" |
92 | 92 | ||
93 | "pand mpeg3_MMX_redmask, %%mm4\n" | 93 | "pand mpeg3_MMX_redmask, %%mm4\n" |
94 | "psllw $3, %%mm5\n" /* GREEN 1 */ | 94 | "psllw $3, %%mm5\n" /* GREEN 1 */ |
95 | "punpcklbw %%mm6, %%mm6\n" | 95 | "punpcklbw %%mm6, %%mm6\n" |
96 | "pand mpeg3_MMX_grnmask, %%mm5\n" | 96 | "pand mpeg3_MMX_grnmask, %%mm5\n" |
97 | "pand mpeg3_MMX_redmask, %%mm6\n" | 97 | "pand mpeg3_MMX_redmask, %%mm6\n" |
98 | "por %%mm5, %%mm4\n" /* */ | 98 | "por %%mm5, %%mm4\n" /* */ |
99 | "psrlw $11, %%mm6\n" /* BLUE 1 */ | 99 | "psrlw $11, %%mm6\n" /* BLUE 1 */ |
100 | "movq %%mm3, %%mm5\n" /* lum2 */ | 100 | "movq %%mm3, %%mm5\n" /* lum2 */ |
101 | "paddw %%mm1, %%mm3\n" /* lum2 +red 00 R7 00 R5 00 R3 00 R1 */ | 101 | "paddw %%mm1, %%mm3\n" /* lum2 +red 00 R7 00 R5 00 R3 00 R1 */ |
102 | "paddw %%mm2, %%mm5\n" /* lum2 +green 00 G7 00 G5 00 G3 00 G1 */ | 102 | "paddw %%mm2, %%mm5\n" /* lum2 +green 00 G7 00 G5 00 G3 00 G1 */ |
103 | "psraw $6, %%mm3\n" /* R2 */ | 103 | "psraw $6, %%mm3\n" /* R2 */ |
104 | "por %%mm6, %%mm4\n" /* MM4 */ | 104 | "por %%mm6, %%mm4\n" /* MM4 */ |
105 | "psraw $6, %%mm5\n" /* G2 */ | 105 | "psraw $6, %%mm5\n" /* G2 */ |
106 | "movq (%2, %3), %%mm6\n" /* L3 */ | 106 | "movq (%2, %3), %%mm6\n" /* L3 */ |
107 | "psraw $6, %%mm7\n" | 107 | "psraw $6, %%mm7\n" |
108 | "packuswb %%mm3, %%mm3\n" | 108 | "packuswb %%mm3, %%mm3\n" |
109 | "packuswb %%mm5, %%mm5\n" | 109 | "packuswb %%mm5, %%mm5\n" |
110 | "packuswb %%mm7, %%mm7\n" | 110 | "packuswb %%mm7, %%mm7\n" |
111 | "pand mpeg3_MMX_00FFw, %%mm6\n" /* L3 */ | 111 | "pand mpeg3_MMX_00FFw, %%mm6\n" /* L3 */ |
112 | "punpcklbw %%mm3, %%mm3\n" | 112 | "punpcklbw %%mm3, %%mm3\n" |
113 | "punpcklbw %%mm5, %%mm5\n" | 113 | "punpcklbw %%mm5, %%mm5\n" |
114 | "pmullw mpeg3_MMX_Ycoeff, %%mm6\n" /* lum3 */ | 114 | "pmullw mpeg3_MMX_Ycoeff, %%mm6\n" /* lum3 */ |
115 | "punpcklbw %%mm7, %%mm7\n" | 115 | "punpcklbw %%mm7, %%mm7\n" |
116 | "psllw $3, %%mm5\n" /* GREEN 2 */ | 116 | "psllw $3, %%mm5\n" /* GREEN 2 */ |
117 | "pand mpeg3_MMX_redmask, %%mm7\n" | 117 | "pand mpeg3_MMX_redmask, %%mm7\n" |
118 | "pand mpeg3_MMX_redmask, %%mm3\n" | 118 | "pand mpeg3_MMX_redmask, %%mm3\n" |
119 | "psrlw $11, %%mm7\n" /* BLUE 2 */ | 119 | "psrlw $11, %%mm7\n" /* BLUE 2 */ |
120 | "pand mpeg3_MMX_grnmask, %%mm5\n" | 120 | "pand mpeg3_MMX_grnmask, %%mm5\n" |
121 | "por %%mm7, %%mm3\n" | 121 | "por %%mm7, %%mm3\n" |
122 | "movq (%2,%3), %%mm7\n" /* L4 */ | 122 | "movq (%2,%3), %%mm7\n" /* L4 */ |
123 | "por %%mm5, %%mm3\n" /* */ | 123 | "por %%mm5, %%mm3\n" /* */ |
124 | "psrlw $8, %%mm7\n" /* L4 */ | 124 | "psrlw $8, %%mm7\n" /* L4 */ |
125 | "movq %%mm4, %%mm5\n" | 125 | "movq %%mm4, %%mm5\n" |
126 | "punpcklwd %%mm3, %%mm4\n" | 126 | "punpcklwd %%mm3, %%mm4\n" |
127 | "pmullw mpeg3_MMX_Ycoeff, %%mm7\n" /* lum4 */ | 127 | "pmullw mpeg3_MMX_Ycoeff, %%mm7\n" /* lum4 */ |
128 | "punpckhwd %%mm3, %%mm5\n" | 128 | "punpckhwd %%mm3, %%mm5\n" |
129 | 129 | ||
130 | "movq %%mm4, (%4)\n" | 130 | "movq %%mm4, (%4)\n" |
131 | "movq %%mm5, 8(%4)\n" | 131 | "movq %%mm5, 8(%4)\n" |
132 | 132 | ||
133 | "movq %%mm6, %%mm4\n" /* Lum3 */ | 133 | "movq %%mm6, %%mm4\n" /* Lum3 */ |
134 | "paddw %%mm0, %%mm6\n" /* Lum3 +blue */ | 134 | "paddw %%mm0, %%mm6\n" /* Lum3 +blue */ |
135 | 135 | ||
136 | "movq %%mm4, %%mm5\n" /* Lum3 */ | 136 | "movq %%mm4, %%mm5\n" /* Lum3 */ |
137 | "paddw %%mm1, %%mm4\n" /* Lum3 +red */ | 137 | "paddw %%mm1, %%mm4\n" /* Lum3 +red */ |
138 | "paddw %%mm2, %%mm5\n" /* Lum3 +green */ | 138 | "paddw %%mm2, %%mm5\n" /* Lum3 +green */ |
139 | "psraw $6, %%mm4\n" | 139 | "psraw $6, %%mm4\n" |
140 | "movq %%mm7, %%mm3\n"/* Lum4 */ | 140 | "movq %%mm7, %%mm3\n"/* Lum4 */ |
141 | "psraw $6, %%mm5\n" | 141 | "psraw $6, %%mm5\n" |
142 | "paddw %%mm0, %%mm7\n" /* Lum4 +blue */ | 142 | "paddw %%mm0, %%mm7\n" /* Lum4 +blue */ |
143 | "psraw $6, %%mm6\n" /* Lum3 +blue */ | 143 | "psraw $6, %%mm6\n" /* Lum3 +blue */ |
144 | "movq %%mm3, %%mm0\n" /* Lum4 */ | 144 | "movq %%mm3, %%mm0\n" /* Lum4 */ |
145 | "packuswb %%mm4, %%mm4\n" | 145 | "packuswb %%mm4, %%mm4\n" |
146 | "paddw %%mm1, %%mm3\n" /* Lum4 +red */ | 146 | "paddw %%mm1, %%mm3\n" /* Lum4 +red */ |
147 | "packuswb %%mm5, %%mm5\n" | 147 | "packuswb %%mm5, %%mm5\n" |
148 | "paddw %%mm2, %%mm0\n" /* Lum4 +green */ | 148 | "paddw %%mm2, %%mm0\n" /* Lum4 +green */ |
149 | "packuswb %%mm6, %%mm6\n" | 149 | "packuswb %%mm6, %%mm6\n" |
150 | "punpcklbw %%mm4, %%mm4\n" | 150 | "punpcklbw %%mm4, %%mm4\n" |
151 | "punpcklbw %%mm5, %%mm5\n" | 151 | "punpcklbw %%mm5, %%mm5\n" |
152 | "punpcklbw %%mm6, %%mm6\n" | 152 | "punpcklbw %%mm6, %%mm6\n" |
153 | "psllw $3, %%mm5\n" /* GREEN 3 */ | 153 | "psllw $3, %%mm5\n" /* GREEN 3 */ |
154 | "pand mpeg3_MMX_redmask, %%mm4\n" | 154 | "pand mpeg3_MMX_redmask, %%mm4\n" |
155 | "psraw $6, %%mm3\n" /* psr 6 */ | 155 | "psraw $6, %%mm3\n" /* psr 6 */ |
156 | "psraw $6, %%mm0\n" | 156 | "psraw $6, %%mm0\n" |
157 | "pand mpeg3_MMX_redmask, %%mm6\n" /* BLUE */ | 157 | "pand mpeg3_MMX_redmask, %%mm6\n" /* BLUE */ |
158 | "pand mpeg3_MMX_grnmask, %%mm5\n" | 158 | "pand mpeg3_MMX_grnmask, %%mm5\n" |
159 | "psrlw $11, %%mm6\n" /* BLUE 3 */ | 159 | "psrlw $11, %%mm6\n" /* BLUE 3 */ |
160 | "por %%mm5, %%mm4\n" | 160 | "por %%mm5, %%mm4\n" |
161 | "psraw $6, %%mm7\n" | 161 | "psraw $6, %%mm7\n" |
162 | "por %%mm6, %%mm4\n" | 162 | "por %%mm6, %%mm4\n" |
163 | "packuswb %%mm3, %%mm3\n" | 163 | "packuswb %%mm3, %%mm3\n" |
164 | "packuswb %%mm0, %%mm0\n" | 164 | "packuswb %%mm0, %%mm0\n" |
165 | "packuswb %%mm7, %%mm7\n" | 165 | "packuswb %%mm7, %%mm7\n" |
166 | "punpcklbw %%mm3, %%mm3\n" | 166 | "punpcklbw %%mm3, %%mm3\n" |
167 | "punpcklbw %%mm0, %%mm0\n" | 167 | "punpcklbw %%mm0, %%mm0\n" |
168 | "punpcklbw %%mm7, %%mm7\n" | 168 | "punpcklbw %%mm7, %%mm7\n" |
169 | "pand mpeg3_MMX_redmask, %%mm3\n" | 169 | "pand mpeg3_MMX_redmask, %%mm3\n" |
170 | "pand mpeg3_MMX_redmask, %%mm7\n" /* BLUE */ | 170 | "pand mpeg3_MMX_redmask, %%mm7\n" /* BLUE */ |
171 | "psllw $3, %%mm0\n" /* GREEN 4 */ | 171 | "psllw $3, %%mm0\n" /* GREEN 4 */ |
172 | "psrlw $11, %%mm7\n" | 172 | "psrlw $11, %%mm7\n" |
173 | "pand mpeg3_MMX_grnmask, %%mm0\n" | 173 | "pand mpeg3_MMX_grnmask, %%mm0\n" |
174 | "por %%mm7, %%mm3\n" | 174 | "por %%mm7, %%mm3\n" |
175 | "addl $8, %6\n" | 175 | "addl $8, %6\n" |
176 | "por %%mm0, %%mm3\n" | 176 | "por %%mm0, %%mm3\n" |
177 | 177 | ||
178 | "movq %%mm4, %%mm5\n" | 178 | "movq %%mm4, %%mm5\n" |
179 | 179 | ||
180 | "punpcklwd %%mm3, %%mm4\n" | 180 | "punpcklwd %%mm3, %%mm4\n" |
181 | "punpckhwd %%mm3, %%mm5\n" | 181 | "punpckhwd %%mm3, %%mm5\n" |
182 | 182 | ||
183 | "movq %%mm4, (%4,%5,2)\n" | 183 | "movq %%mm4, (%4,%5,2)\n" |
184 | "movq %%mm5, 8(%4,%5,2)\n" | 184 | "movq %%mm5, 8(%4,%5,2)\n" |
185 | 185 | ||
186 | "addl $8, %2\n" | 186 | "addl $8, %2\n" |
187 | "addl $4, %0\n" | 187 | "addl $4, %0\n" |
188 | "addl $4, %1\n" | 188 | "addl $4, %1\n" |
189 | "cmpl %3, %6\n" | 189 | "cmpl %3, %6\n" |
190 | "leal 16(%4), %4\n" | 190 | "leal 16(%4), %4\n" |
191 | "jl 1b\n" | 191 | "jl 1b\n" |
192 | "addl %3, %2\n" /* lum += cols */ | 192 | "addl %3, %2\n" /* lum += cols */ |
193 | "addl %7, %4\n" /* row1 += mod */ | 193 | "addl %7, %4\n" /* row1 += mod */ |
194 | "movl $0, %6\n" | 194 | "movl $0, %6\n" |
195 | "cmpl %8, %2\n" | 195 | "cmpl %8, %2\n" |
196 | "jl 1b\n" | 196 | "jl 1b\n" |
197 | : : "r" (cr), | 197 | : : "r" (cr), |
198 | "r" (cb), | 198 | "r" (cb), |
199 | "r" (lum), | 199 | "r" (lum), |
200 | "r" (cols), | 200 | "r" (cols), |
201 | "r" (row1) , | 201 | "r" (row1) , |
202 | "r" (col1), | 202 | "r" (col1), |
203 | "m" (x), | 203 | "m" (x), |
204 | "m" (mod), | 204 | "m" (mod), |
205 | "m" (y) | 205 | "m" (y) |
206 | ); | 206 | ); |
207 | } | 207 | } |
208 | 208 | ||
209 | static unsigned LONGLONG mpeg3_MMX_U_80 = 0x0000008000800000LL; | 209 | static unsigned LONGLONG mpeg3_MMX_U_80 = 0x0000008000800000LL; |
210 | static unsigned LONGLONG mpeg3_MMX_V_80 = 0x0000000000800080LL; | 210 | static unsigned LONGLONG mpeg3_MMX_V_80 = 0x0000000000800080LL; |
211 | static LONGLONG mpeg3_MMX_U_COEF = 0x00000058ffd30000LL; | 211 | static LONGLONG mpeg3_MMX_U_COEF = 0x00000058ffd30000LL; |
212 | static LONGLONG mpeg3_MMX_V_COEF = 0x00000000ffea006fLL; | 212 | static LONGLONG mpeg3_MMX_V_COEF = 0x00000000ffea006fLL; |
213 | static LONGLONG mpeg3_MMX_601_Y_COEF = 0x0000004800480048LL; | 213 | static LONGLONG mpeg3_MMX_601_Y_COEF = 0x0000004800480048LL; |
214 | static LONGLONG mpeg3_MMX_601_Y_DIFF = 0x0000000000000010LL; | 214 | static LONGLONG mpeg3_MMX_601_Y_DIFF = 0x0000000000000010LL; |
215 | 215 | ||
216 | inline void mpeg3_bgra32_mmx(unsigned long y, | 216 | inline void mpeg3_bgra32_mmx(unsigned long y, |
217 | unsigned long u, | 217 | unsigned long u, |
218 | unsigned long v, | 218 | unsigned long v, |
219 | unsigned long *output) | 219 | unsigned long *output) |
220 | { | 220 | { |
221 | asm(" | 221 | |
222 | asm( | ||
222 | /* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ | 223 | /* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ |
223 | /* for bgr24. */ | 224 | /* for bgr24. */ |
224 | movd (%0), %%mm0; /* Load y 0x00000000000000yy */ | 225 | "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */ |
225 | movd (%1), %%mm1; /* Load u 0x00000000000000cr */ | 226 | "movd (%1), %%mm1;" /* Load u 0x00000000000000cr */ |
226 | movq %%mm0, %%mm3; /* Copy y to temp */ | 227 | "movq %%mm0, %%mm3;" /* Copy y to temp */ |
227 | psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */ | 228 | "psllq $16, %%mm1;" /* Shift u 0x0000000000cr0000 */ |
228 | movd (%2), %%mm2; /* Load v 0x00000000000000cb */ | 229 | "movd (%2), %%mm2;" /* Load v 0x00000000000000cb */ |
229 | psllq $16, %%mm3; /* Shift y */ | 230 | "psllq $16, %%mm3;" /* Shift y */ |
230 | movq %%mm1, %%mm4; /* Copy u to temp */ | 231 | "movq %%mm1, %%mm4;" /* Copy u to temp */ |
231 | por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ | 232 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */ |
232 | psllq $16, %%mm4; /* Shift u */ | 233 | "psllq $16, %%mm4;" /* Shift u */ |
233 | movq %%mm2, %%mm5; /* Copy v to temp */ | 234 | "movq %%mm2, %%mm5;" /* Copy v to temp */ |
234 | psllq $16, %%mm3; /* Shift y */ | 235 | "psllq $16, %%mm3;" /* Shift y */ |
235 | por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */ | 236 | "por %%mm4, %%mm1;" /* Overlay new u byte 0x000000cr00cr0000 */ |
236 | psllq $16, %%mm5; /* Shift v */ | 237 | "psllq $16, %%mm5;" /* Shift v */ |
237 | por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ | 238 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */ |
238 | por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */ | 239 | "por %%mm5, %%mm2;" /* Overlay new v byte 0x0000000000cb00cb */ |
239 | 240 | ||
240 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ | 241 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ |
241 | psubw mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */ | 242 | "psubw mpeg3_MMX_U_80, %%mm1;" /* Subtract 128 from u 0x000000uu00uu0000 */ |
242 | pmullw mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */ | 243 | "pmullw mpeg3_MMX_U_COEF, %%mm1;" /* Multiply u coeffs 0x0000uuuuuuuu0000 */ |
243 | psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ | 244 | "psllw $6, %%mm0;" /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ |
244 | psubw mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */ | 245 | "psubw mpeg3_MMX_V_80, %%mm2;" /* Subtract 128 from v 0x0000000000cb00cb */ |
245 | pmullw mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */ | 246 | "pmullw mpeg3_MMX_V_COEF, %%mm2;" /* Multiply v coeffs 0x0000crcrcrcrcrcr */ |
246 | 247 | ||
247 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ | 248 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ |
248 | paddsw %%mm1, %%mm0; /* Add u to result */ | 249 | "paddsw %%mm1, %%mm0;" /* Add u to result */ |
249 | paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */ | 250 | "paddsw %%mm2, %%mm0;" /* Add v to result 0x0000rrrrggggbbbb */ |
250 | psraw $6, %%mm0; /* Demote precision */ | 251 | "psraw $6, %%mm0;" /* Demote precision */ |
251 | packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */ | 252 | "packuswb %%mm0, %%mm0;" /* Pack into ARGB 0x0000000000rrggbb */ |
252 | movd %%mm0, (%3); /* Store output */ | 253 | "movd %%mm0, (%3);" /* Store output */ |
253 | " | ||
254 | : | 254 | : |
255 | : "r" (&y), "r" (&u), "r" (&v), "r" (output)); | 255 | : "r" (&y), "r" (&u), "r" (&v), "r" (output)); |
256 | } | 256 | } |
257 | 257 | ||
258 | inline void mpeg3_601_bgra32_mmx(unsigned long y, | 258 | inline void mpeg3_601_bgra32_mmx(unsigned long y, |
259 | unsigned long u, | 259 | unsigned long u, |
260 | unsigned long v, | 260 | unsigned long v, |
261 | unsigned long *output) | 261 | unsigned long *output) |
262 | { | 262 | { |
263 | asm(" | 263 | asm( |
264 | /* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ | 264 | /* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ |
265 | /* for bgr24. */ | 265 | /* for bgr24. */ |
266 | movd (%0), %%mm0; /* Load y 0x00000000000000yy */ | 266 | "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */ |
267 | psubsw mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */ | 267 | "psubsw mpeg3_MMX_601_Y_DIFF, %%mm0;" /* Subtract 16 from y */ |
268 | movd (%1), %%mm1; /* Load u 0x00000000000000cr */ | 268 | "movd (%1), %%mm1;" /* Load u 0x00000000000000cr */ |
269 | movq %%mm0, %%mm3; /* Copy y to temp */ | 269 | "movq %%mm0, %%mm3;" /* Copy y to temp */ |
270 | psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */ | 270 | "psllq $16, %%mm1;" /* Shift u 0x0000000000cr0000 */ |
271 | movd (%2), %%mm2; /* Load v 0x00000000000000cb */ | 271 | "movd (%2), %%mm2;" /* Load v 0x00000000000000cb */ |
272 | psllq $16, %%mm3; /* Shift y */ | 272 | "psllq $16, %%mm3;" /* Shift y */ |
273 | movq %%mm1, %%mm4; /* Copy u to temp */ | 273 | "movq %%mm1, %%mm4;" /* Copy u to temp */ |
274 | por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ | 274 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */ |
275 | psllq $16, %%mm4; /* Shift u */ | 275 | "psllq $16, %%mm4;" /* Shift u */ |
276 | movq %%mm2, %%mm5; /* Copy v to temp */ | 276 | "movq %%mm2, %%mm5;" /* Copy v to temp */ |
277 | psllq $16, %%mm3; /* Shift y */ | 277 | "psllq $16, %%mm3;" /* Shift y */ |
278 | por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */ | 278 | "por %%mm4, %%mm1;" /* Overlay new u byte 0x000000cr00cr0000 */ |
279 | psllq $16, %%mm5; /* Shift v */ | 279 | "psllq $16, %%mm5;" /* Shift v */ |
280 | por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ | 280 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */ |
281 | por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */ | 281 | "por %%mm5, %%mm2;" /* Overlay new v byte 0x0000000000cb00cb */ |
282 | 282 | ||
283 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ | 283 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ |
284 | pmullw mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale and shift y coeffs */ | 284 | "pmullw mpeg3_MMX_601_Y_COEF, %%mm0;" /* Scale and shift y coeffs */ |
285 | psubw mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */ | 285 | "psubw mpeg3_MMX_U_80, %%mm1;" /* Subtract 128 from u 0x000000uu00uu0000 */ |
286 | pmullw mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */ | 286 | "pmullw mpeg3_MMX_U_COEF, %%mm1;" /* Multiply u coeffs 0x0000uuuuuuuu0000 */ |
287 | psubw mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */ | 287 | "psubw mpeg3_MMX_V_80, %%mm2;" /* Subtract 128 from v 0x0000000000cb00cb */ |
288 | pmullw mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */ | 288 | "pmullw mpeg3_MMX_V_COEF, %%mm2;" /* Multiply v coeffs 0x0000crcrcrcrcrcr */ |
289 | 289 | ||
290 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ | 290 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ |
291 | paddsw %%mm1, %%mm0; /* Add u to result */ | 291 | "paddsw %%mm1, %%mm0;" /* Add u to result */ |
292 | paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */ | 292 | "paddsw %%mm2, %%mm0;" /* Add v to result 0x0000rrrrggggbbbb */ |
293 | psraw $6, %%mm0; /* Demote precision */ | 293 | "psraw $6, %%mm0;" /* Demote precision */ |
294 | packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */ | 294 | "packuswb %%mm0, %%mm0;" /* Pack into ARGB 0x0000000000rrggbb */ |
295 | movd %%mm0, (%3); /* Store output */ | 295 | "movd %%mm0, (%3);" /* Store output */ |
296 | " | ||
297 | : | 296 | : |
298 | : "r" (&y), "r" (&u), "r" (&v), "r" (output)); | 297 | : "r" (&y), "r" (&u), "r" (&v), "r" (output)); |
299 | } | 298 | } |
300 | 299 | ||
301 | static unsigned LONGLONG mpeg3_MMX_U_80_RGB = 0x0000000000800080LL; | 300 | static unsigned LONGLONG mpeg3_MMX_U_80_RGB = 0x0000000000800080LL; |
302 | static unsigned LONGLONG mpeg3_MMX_V_80_RGB = 0x0000008000800000LL; | 301 | static unsigned LONGLONG mpeg3_MMX_V_80_RGB = 0x0000008000800000LL; |
303 | static LONGLONG mpeg3_MMX_U_COEF_RGB = 0x00000000ffd30058LL; | 302 | static LONGLONG mpeg3_MMX_U_COEF_RGB = 0x00000000ffd30058LL; |
304 | static LONGLONG mpeg3_MMX_V_COEF_RGB = 0x0000006fffea0000LL; | 303 | static LONGLONG mpeg3_MMX_V_COEF_RGB = 0x0000006fffea0000LL; |
305 | 304 | ||
306 | inline void mpeg3_rgba32_mmx(unsigned long y, | 305 | inline void mpeg3_rgba32_mmx(unsigned long y, |
307 | unsigned long u, | 306 | unsigned long u, |
308 | unsigned long v, | 307 | unsigned long v, |
309 | unsigned long *output) | 308 | unsigned long *output) |
310 | { | 309 | { |
311 | asm(" | 310 | asm( |
312 | /* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ | 311 | /* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ |
313 | /* for rgb24. */ | 312 | /* for rgb24. */ |
314 | movd (%0), %%mm0; /* Load y 0x00000000000000yy */ | 313 | "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */ |
315 | movd (%1), %%mm1; /* Load v 0x00000000000000vv */ | 314 | "movd (%1), %%mm1;" /* Load v 0x00000000000000vv */ |
316 | movq %%mm0, %%mm3; /* Copy y to temp */ | 315 | "movq %%mm0, %%mm3;" /* Copy y to temp */ |
317 | psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */ | 316 | "psllq $16, %%mm1;" /* Shift v 0x0000000000vv0000 */ |
318 | movd (%2), %%mm2; /* Load u 0x00000000000000uu */ | 317 | "movd (%2), %%mm2;" /* Load u 0x00000000000000uu */ |
319 | psllq $16, %%mm3; /* Shift y */ | 318 | "psllq $16, %%mm3;" /* Shift y */ |
320 | movq %%mm1, %%mm4; /* Copy v to temp */ | 319 | "movq %%mm1, %%mm4;" /* Copy v to temp */ |
321 | por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ | 320 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */ |
322 | psllq $16, %%mm4; /* Shift v */ | 321 | "psllq $16, %%mm4;" /* Shift v */ |
323 | movq %%mm2, %%mm5; /* Copy u to temp */ | 322 | "movq %%mm2, %%mm5;" /* Copy u to temp */ |
324 | psllq $16, %%mm3; /* Shift y */ | 323 | "psllq $16, %%mm3;" /* Shift y */ |
325 | por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */ | 324 | "por %%mm4, %%mm1;" /* Overlay new v byte 0x000000vv00vv0000 */ |
326 | psllq $16, %%mm5; /* Shift u */ | 325 | "psllq $16, %%mm5;" /* Shift u */ |
327 | por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ | 326 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */ |
328 | por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */ | 327 | "por %%mm5, %%mm2;" /* Overlay new u byte 0x0000000000uu00uu */ |
329 | 328 | ||
330 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ | 329 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ |
331 | psubw mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */ | 330 | "psubw mpeg3_MMX_V_80_RGB, %%mm1;" /* Subtract 128 from v 0x000000vv00vv0000 */ |
332 | pmullw mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */ | 331 | "pmullw mpeg3_MMX_V_COEF_RGB, %%mm1;" /* Multiply v coeffs 0x0000vvvvvvvv0000 */ |
333 | psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ | 332 | "psllw $6, %%mm0;" /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ |
334 | psubw mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */ | 333 | "psubw mpeg3_MMX_U_80_RGB, %%mm2;" /* Subtract 128 from u 0x0000000000uu00uu */ |
335 | pmullw mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ | 334 | "pmullw mpeg3_MMX_U_COEF_RGB, %%mm2;" /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ |
336 | 335 | ||
337 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ | 336 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ |
338 | paddsw %%mm1, %%mm0; /* Add v to result */ | 337 | "paddsw %%mm1, %%mm0;" /* Add v to result */ |
339 | paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */ | 338 | "paddsw %%mm2, %%mm0;" /* Add u to result 0x0000bbbbggggrrrr */ |
340 | psraw $6, %%mm0; /* Demote precision */ | 339 | "psraw $6, %%mm0;" /* Demote precision */ |
341 | packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */ | 340 | "packuswb %%mm0, %%mm0;" /* Pack into RGBA 0x0000000000bbggrr */ |
342 | movd %%mm0, (%3); /* Store output */ | 341 | "movd %%mm0, (%3);" /* Store output */ |
343 | " | ||
344 | : | 342 | : |
345 | : "r" (&y), "r" (&v), "r" (&u), "r" (output)); | 343 | : "r" (&y), "r" (&v), "r" (&u), "r" (output)); |
346 | } | 344 | } |
347 | 345 | ||
348 | inline void mpeg3_601_rgba32_mmx(unsigned long y, | 346 | inline void mpeg3_601_rgba32_mmx(unsigned long y, |
349 | unsigned long u, | 347 | unsigned long u, |
350 | unsigned long v, | 348 | unsigned long v, |
351 | unsigned long *output) | 349 | unsigned long *output) |
352 | { | 350 | { |
353 | asm(" | 351 | asm( |
354 | /* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ | 352 | /* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ |
355 | /* for rgb24. */ | 353 | /* for rgb24. */ |
356 | movd (%0), %%mm0; /* Load y 0x00000000000000yy */ | 354 | "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */ |
357 | psubsw mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */ | 355 | "psubsw mpeg3_MMX_601_Y_DIFF, %%mm0;" /* Subtract 16 from y */ |
358 | movd (%1), %%mm1; /* Load v 0x00000000000000vv */ | 356 | "movd (%1), %%mm1;" /* Load v 0x00000000000000vv */ |
359 | movq %%mm0, %%mm3; /* Copy y to temp */ | 357 | "movq %%mm0, %%mm3;" /* Copy y to temp */ |
360 | psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */ | 358 | "psllq $16, %%mm1;" /* Shift v 0x0000000000vv0000 */ |
361 | movd (%2), %%mm2; /* Load u 0x00000000000000uu */ | 359 | "movd (%2), %%mm2;" /* Load u 0x00000000000000uu */ |
362 | psllq $16, %%mm3; /* Shift y */ | 360 | "psllq $16, %%mm3;" /* Shift y */ |
363 | movq %%mm1, %%mm4; /* Copy v to temp */ | 361 | "movq %%mm1, %%mm4;" /* Copy v to temp */ |
364 | por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ | 362 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */ |
365 | psllq $16, %%mm4; /* Shift v */ | 363 | "psllq $16, %%mm4;" /* Shift v */ |
366 | movq %%mm2, %%mm5; /* Copy u to temp */ | 364 | "movq %%mm2, %%mm5;" /* Copy u to temp */ |
367 | psllq $16, %%mm3; /* Shift y */ | 365 | "psllq $16, %%mm3;" /* Shift y */ |
368 | por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */ | 366 | "por %%mm4, %%mm1;" /* Overlay new v byte 0x000000vv00vv0000 */ |
369 | psllq $16, %%mm5; /* Shift u */ | 367 | "psllq $16, %%mm5;" /* Shift u */ |
370 | por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ | 368 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */ |
371 | por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */ | 369 | "por %%mm5, %%mm2;" /* Overlay new u byte 0x0000000000uu00uu */ |
372 | 370 | ||
373 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ | 371 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ |
374 | pmullw mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale y coeffs */ | 372 | "pmullw mpeg3_MMX_601_Y_COEF, %%mm0;" /* Scale y coeffs */ |
375 | psubw mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */ | 373 | "psubw mpeg3_MMX_V_80_RGB, %%mm1;" /* Subtract 128 from v 0x000000vv00vv0000 */ |
376 | pmullw mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */ | 374 | "pmullw mpeg3_MMX_V_COEF_RGB, %%mm1;" /* Multiply v coeffs 0x0000vvvvvvvv0000 */ |
377 | psubw mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */ | 375 | "psubw mpeg3_MMX_U_80_RGB, %%mm2;" /* Subtract 128 from u 0x0000000000uu00uu */ |
378 | pmullw mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ | 376 | "pmullw mpeg3_MMX_U_COEF_RGB, %%mm2;" /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ |
379 | 377 | ||
380 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ | 378 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ |
381 | paddsw %%mm1, %%mm0; /* Add v to result */ | 379 | "paddsw %%mm1, %%mm0;" /* Add v to result */ |
382 | paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */ | 380 | "paddsw %%mm2, %%mm0;" /* Add u to result 0x0000bbbbggggrrrr */ |
383 | psraw $6, %%mm0; /* Demote precision */ | 381 | "psraw $6, %%mm0;" /* Demote precision */ |
384 | packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */ | 382 | "packuswb %%mm0, %%mm0;" /* Pack into RGBA 0x0000000000bbggrr */ |
385 | movd %%mm0, (%3); /* Store output */ | 383 | "movd %%mm0, (%3);" /* Store output */ |
386 | " | ||
387 | : | 384 | : |
388 | : "r" (&y), "r" (&v), "r" (&u), "r" (output)); | 385 | : "r" (&y), "r" (&v), "r" (&u), "r" (output)); |
389 | } | 386 | } |
390 | 387 | ||
391 | #endif | 388 | #endif |
392 | 389 | ||
393 | #define DITHER_ROW_HEAD \ | 390 | #define DITHER_ROW_HEAD \ |
394 | for(h = 0; h < video->out_h; h++) \ | 391 | for(h = 0; h < video->out_h; h++) \ |
395 | { \ | 392 | { \ |
396 | y_in = &src[0][(video->y_table[h] + video->in_y) * video->coded_picture_width] + video->in_x; \ | 393 | y_in = &src[0][(video->y_table[h] + video->in_y) * video->coded_picture_width] + video->in_x; \ |
397 | cb_in = &src[1][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 2); \ | 394 | cb_in = &src[1][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 2); \ |
398 | cr_in = &src[2][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 1); \ | 395 | cr_in = &src[2][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 1); \ |
399 | data = output_rows[h]; | 396 | data = output_rows[h]; |
400 | 397 | ||
401 | #define DITHER_ROW_TAIL \ | 398 | #define DITHER_ROW_TAIL \ |
402 | } | 399 | } |
403 | 400 | ||
404 | #define DITHER_SCALE_HEAD \ | 401 | #define DITHER_SCALE_HEAD \ |
405 | for(w = 0; w < video->out_w; w++) \ | 402 | for(w = 0; w < video->out_w; w++) \ |
406 | { \ | 403 | { \ |
407 | uv_subscript = video->x_table[w] / 2; \ | 404 | uv_subscript = video->x_table[w] / 2; \ |
408 | y_l = y_in[video->x_table[w]]; \ | 405 | y_l = y_in[video->x_table[w]]; \ |
409 | y_l <<= 16; \ | 406 | y_l <<= 16; \ |
410 | r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \ | 407 | r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \ |
411 | g_l = (y_l + video->cr_to_g[cr_in[uv_subscript]] + video->cb_to_g[cb_in[uv_subscript]]) >> 16; \ | 408 | g_l = (y_l + video->cr_to_g[cr_in[uv_subscript]] + video->cb_to_g[cb_in[uv_subscript]]) >> 16; \ |
412 | b_l = (y_l + video->cb_to_b[cb_in[uv_subscript]]) >> 16; | 409 | b_l = (y_l + video->cb_to_b[cb_in[uv_subscript]]) >> 16; |
413 | 410 | ||
414 | #define DITHER_SCALE_601_HEAD \ | 411 | #define DITHER_SCALE_601_HEAD \ |
415 | for(w = 0; w < video->out_w; w++) \ | 412 | for(w = 0; w < video->out_w; w++) \ |
416 | { \ | 413 | { \ |
417 | uv_subscript = video->x_table[w] / 2; \ | 414 | uv_subscript = video->x_table[w] / 2; \ |
418 | y_l = mpeg3_601_to_rgb[y_in[video->x_table[w]]]; \ | 415 | y_l = mpeg3_601_to_rgb[y_in[video->x_table[w]]]; \ |
419 | y_l <<= 16; \ | 416 | y_l <<= 16; \ |
420 | r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \ | 417 | r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \ |
421 | g_l = (y_l + video->cr_to_g[cr_in[uv_subscript]] + video->cb_to_g[cb_in[uv_subscript]]) >> 16; \ | 418 | g_l = (y_l + video->cr_to_g[cr_in[uv_subscript]] + video->cb_to_g[cb_in[uv_subscript]]) >> 16; \ |
422 | b_l = (y_l + video->cb_to_b[cb_in[uv_subscript]]) >> 16; | 419 | b_l = (y_l + video->cb_to_b[cb_in[uv_subscript]]) >> 16; |
423 | 420 | ||
424 | #define DITHER_SCALE_TAIL \ | 421 | #define DITHER_SCALE_TAIL \ |
425 | } | 422 | } |
426 | 423 | ||
427 | #define DITHER_MMX_SCALE_HEAD \ | 424 | #define DITHER_MMX_SCALE_HEAD \ |
428 | for(w = 0; w < video->out_w; w++) \ | 425 | for(w = 0; w < video->out_w; w++) \ |
429 | { \ | 426 | { \ |
430 | uv_subscript = video->x_table[w] / 2; | 427 | uv_subscript = video->x_table[w] / 2; |
431 | 428 | ||
432 | #define DITHER_MMX_SCALE_TAIL \ | 429 | #define DITHER_MMX_SCALE_TAIL \ |
433 | data += step; \ | 430 | data += step; \ |
434 | } | 431 | } |
435 | 432 | ||
436 | #define DITHER_MMX_HEAD \ | 433 | #define DITHER_MMX_HEAD \ |
437 | for(w = 0; w < video->out_w; w += 2) \ | 434 | for(w = 0; w < video->out_w; w += 2) \ |
438 | { | 435 | { |
439 | 436 | ||
440 | #define DITHER_MMX_TAIL \ | 437 | #define DITHER_MMX_TAIL \ |
441 | data += step; \ | 438 | data += step; \ |
442 | cr_in++; \ | 439 | cr_in++; \ |
443 | cb_in++; \ | 440 | cb_in++; \ |
444 | } | 441 | } |
445 | 442 | ||
446 | #define DITHER_HEAD \ | 443 | #define DITHER_HEAD \ |
447 | for(w = 0; w < video->horizontal_size; w++) \ | 444 | for(w = 0; w < video->horizontal_size; w++) \ |
448 | { \ | 445 | { \ |
449 | y_l = *y_in++; \ | 446 | y_l = *y_in++; \ |
450 | y_l <<= 16; \ | 447 | y_l <<= 16; \ |
451 | r_l = (y_l + video->cr_to_r[*cr_in]) >> 16; \ | 448 | r_l = (y_l + video->cr_to_r[*cr_in]) >> 16; \ |
452 | g_l = (y_l + video->cr_to_g[*cr_in] + video->cb_to_g[*cb_in]) >> 16; \ | 449 | g_l = (y_l + video->cr_to_g[*cr_in] + video->cb_to_g[*cb_in]) >> 16; \ |
453 | b_l = (y_l + video->cb_to_b[*cb_in]) >> 16; | 450 | b_l = (y_l + video->cb_to_b[*cb_in]) >> 16; |
454 | 451 | ||
455 | #define DITHER_601_HEAD \ | 452 | #define DITHER_601_HEAD \ |
456 | for(w = 0; w < video->horizontal_size; w++) \ | 453 | for(w = 0; w < video->horizontal_size; w++) \ |
457 | { \ | 454 | { \ |
458 | y_l = mpeg3_601_to_rgb[*y_in++]; \ | 455 | y_l = mpeg3_601_to_rgb[*y_in++]; \ |
459 | y_l <<= 16; \ | 456 | y_l <<= 16; \ |
460 | r_l = (y_l + video->cr_to_r[*cr_in]) >> 16; \ | 457 | r_l = (y_l + video->cr_to_r[*cr_in]) >> 16; \ |
461 | g_l = (y_l + video->cr_to_g[*cr_in] + video->cb_to_g[*cb_in]) >> 16; \ | 458 | g_l = (y_l + video->cr_to_g[*cr_in] + video->cb_to_g[*cb_in]) >> 16; \ |
462 | b_l = (y_l + video->cb_to_b[*cb_in]) >> 16; | 459 | b_l = (y_l + video->cb_to_b[*cb_in]) >> 16; |
463 | 460 | ||
464 | #define DITHER_TAIL \ | 461 | #define DITHER_TAIL \ |
465 | if(w & 1) \ | 462 | if(w & 1) \ |
466 | { \ | 463 | { \ |
467 | cr_in++; \ | 464 | cr_in++; \ |
468 | cb_in++; \ | 465 | cb_in++; \ |
469 | } \ | 466 | } \ |
470 | } | 467 | } |
471 | 468 | ||
472 | 469 | ||
473 | #define STORE_PIXEL_BGR888 \ | 470 | #define STORE_PIXEL_BGR888 \ |
474 | *data++ = CLIP(b_l); \ | 471 | *data++ = CLIP(b_l); \ |
475 | *data++ = CLIP(g_l); \ | 472 | *data++ = CLIP(g_l); \ |
476 | *data++ = CLIP(r_l); | 473 | *data++ = CLIP(r_l); |
477 | 474 | ||
478 | #define STORE_PIXEL_BGRA8888 \ | 475 | #define STORE_PIXEL_BGRA8888 \ |
479 | *data++ = CLIP(b_l); \ | 476 | *data++ = CLIP(b_l); \ |
480 | *data++ = CLIP(g_l); \ | 477 | *data++ = CLIP(g_l); \ |
481 | *data++ = CLIP(r_l); \ | 478 | *data++ = CLIP(r_l); \ |
482 | *data++ = 0; | 479 | *data++ = 0; |
483 | 480 | ||
484 | #define STORE_PIXEL_RGB565 \ | 481 | #define STORE_PIXEL_RGB565 \ |
485 | *((unsigned short*)data)++ = \ | 482 | *((unsigned short*)data)++ = \ |
486 | ((CLIP(r_l) & 0xf8) << 8) | \ | 483 | ((CLIP(r_l) & 0xf8) << 8) | \ |
487 | ((CLIP(g_l) & 0xfc) << 3) | \ | 484 | ((CLIP(g_l) & 0xfc) << 3) | \ |
488 | ((CLIP(b_l) & 0xf8) >> 3); | 485 | ((CLIP(b_l) & 0xf8) >> 3); |
489 | 486 | ||
490 | #define STORE_PIXEL_RGB888 \ | 487 | #define STORE_PIXEL_RGB888 \ |
491 | *data++ = CLIP(r_l); \ | 488 | *data++ = CLIP(r_l); \ |
492 | *data++ = CLIP(g_l); \ | 489 | *data++ = CLIP(g_l); \ |
493 | *data++ = CLIP(b_l); | 490 | *data++ = CLIP(b_l); |
494 | 491 | ||
495 | #define STORE_PIXEL_RGBA8888 \ | 492 | #define STORE_PIXEL_RGBA8888 \ |
496 | *data++ = CLIP(r_l); \ | 493 | *data++ = CLIP(r_l); \ |
497 | *data++ = CLIP(g_l); \ | 494 | *data++ = CLIP(g_l); \ |
498 | *data++ = CLIP(b_l); \ | 495 | *data++ = CLIP(b_l); \ |
499 | *data++ = 0; | 496 | *data++ = 0; |
500 | 497 | ||
501 | #define STORE_PIXEL_RGBA16161616 \ | 498 | #define STORE_PIXEL_RGBA16161616 \ |
502 | *data_s++ = CLIP(r_l); \ | 499 | *data_s++ = CLIP(r_l); \ |
503 | *data_s++ = CLIP(g_l); \ | 500 | *data_s++ = CLIP(g_l); \ |
504 | *data_s++ = CLIP(b_l); \ | 501 | *data_s++ = CLIP(b_l); \ |
505 | *data_s++ = 0; | 502 | *data_s++ = 0; |
506 | 503 | ||
507 | 504 | ||
508 | 505 | ||
509 | /* Only good for YUV 4:2:0 */ | 506 | /* Only good for YUV 4:2:0 */ |
510 | int mpeg3video_ditherframe(mpeg3video_t *video, unsigned char **src, unsigned char **output_rows) | 507 | int mpeg3video_ditherframe(mpeg3video_t *video, unsigned char **src, unsigned char **output_rows) |
511 | { | 508 | { |
512 | int h = 0; | 509 | int h = 0; |
513 | register unsigned char *y_in, *cb_in, *cr_in; | 510 | register unsigned char *y_in, *cb_in, *cr_in; |
514 | long y_l, r_l, b_l, g_l; | 511 | long y_l, r_l, b_l, g_l; |
515 | register unsigned char *data; | 512 | register unsigned char *data; |
516 | register int uv_subscript, step, w = -1; | 513 | register int uv_subscript, step, w = -1; |
517 | 514 | ||
518 | #ifdef HAVE_MMX | 515 | #ifdef HAVE_MMX |
519 | /* =================================== MMX ===================================== */ | 516 | /* =================================== MMX ===================================== */ |
520 | if(video->have_mmx && | 517 | if(video->have_mmx && |
521 | video->out_w == video->horizontal_size && | 518 | video->out_w == video->horizontal_size && |
522 | video->out_h == video->vertical_size && | 519 | video->out_h == video->vertical_size && |
523 | video->in_w == video->out_w && | 520 | video->in_w == video->out_w && |
524 | video->in_h == video->out_h && | 521 | video->in_h == video->out_h && |
525 | video->in_x == 0 && | 522 | video->in_x == 0 && |
526 | video->in_y == 0 && | 523 | video->in_y == 0 && |
527 | (video->color_model == MPEG3_RGB565 || video->color_model == MPEG3_601_RGB565)) | 524 | (video->color_model == MPEG3_RGB565 || video->color_model == MPEG3_601_RGB565)) |
528 | { | 525 | { |
529 | /* Unscaled 16 bit */ | 526 | /* Unscaled 16 bit */ |
530 | mpeg3video_rgb16_mmx(src[0], | 527 | mpeg3video_rgb16_mmx(src[0], |
531 | src[2], | 528 | src[2], |
532 | src[1], | 529 | src[1], |
533 | output_rows[0], | 530 | output_rows[0], |
534 | video->out_h, | 531 | video->out_h, |
535 | video->out_w, | 532 | video->out_w, |
536 | (output_rows[1] - output_rows[0]) / 2 - video->out_w); | 533 | (output_rows[1] - output_rows[0]) / 2 - video->out_w); |
537 | } | 534 | } |
538 | else | 535 | else |
539 | if(video->have_mmx && | 536 | if(video->have_mmx && |
540 | (video->color_model == MPEG3_BGRA8888 || | 537 | (video->color_model == MPEG3_BGRA8888 || |
541 | video->color_model == MPEG3_BGR888 || | 538 | video->color_model == MPEG3_BGR888 || |
542 | /* video->color_model == MPEG3_RGB888 || */ | 539 | /* video->color_model == MPEG3_RGB888 || */ |
543 | video->color_model == MPEG3_RGBA8888 || | 540 | video->color_model == MPEG3_RGBA8888 || |
544 | video->color_model == MPEG3_601_BGR888 || | 541 | video->color_model == MPEG3_601_BGR888 || |
545 | video->color_model == MPEG3_601_BGRA8888 || | 542 | video->color_model == MPEG3_601_BGRA8888 || |
546 | video->color_model == MPEG3_601_RGB888 || | 543 | video->color_model == MPEG3_601_RGB888 || |
547 | video->color_model == MPEG3_601_RGBA8888)) | 544 | video->color_model == MPEG3_601_RGBA8888)) |
548 | { | 545 | { |
549 | /* Original MMX */ | 546 | /* Original MMX */ |
550 | if(video->color_model == MPEG3_BGRA8888 || | 547 | if(video->color_model == MPEG3_BGRA8888 || |
551 | video->color_model == MPEG3_RGBA8888 || | 548 | video->color_model == MPEG3_RGBA8888 || |
552 | video->color_model == MPEG3_601_BGRA8888 || | 549 | video->color_model == MPEG3_601_BGRA8888 || |
553 | video->color_model == MPEG3_601_RGBA8888) step = 4; | 550 | video->color_model == MPEG3_601_RGBA8888) step = 4; |
554 | else | 551 | else |
555 | if(video->color_model == MPEG3_BGR888 || | 552 | if(video->color_model == MPEG3_BGR888 || |
556 | video->color_model == MPEG3_RGB888 || | 553 | video->color_model == MPEG3_RGB888 || |
557 | video->color_model == MPEG3_601_BGR888 || | 554 | video->color_model == MPEG3_601_BGR888 || |
558 | video->color_model == MPEG3_601_RGB888) step = 3; | 555 | video->color_model == MPEG3_601_RGB888) step = 3; |
559 | 556 | ||
560 | DITHER_ROW_HEAD | 557 | DITHER_ROW_HEAD |
561 | /* Transfer row with scaling */ | 558 | /* Transfer row with scaling */ |
562 | if(video->out_w != video->horizontal_size) | 559 | if(video->out_w != video->horizontal_size) |
563 | { | 560 | { |
564 | switch(video->color_model) | 561 | switch(video->color_model) |
565 | { | 562 | { |
566 | case MPEG3_BGRA8888: | 563 | case MPEG3_BGRA8888: |
567 | case MPEG3_BGR888: | 564 | case MPEG3_BGR888: |
568 | DITHER_MMX_SCALE_HEAD | 565 | DITHER_MMX_SCALE_HEAD |
569 | mpeg3_bgra32_mmx(y_in[video->x_table[w]], | 566 | mpeg3_bgra32_mmx(y_in[video->x_table[w]], |
570 | cr_in[uv_subscript], | 567 | cr_in[uv_subscript], |
571 | cb_in[uv_subscript], | 568 | cb_in[uv_subscript], |
572 | (unsigned long*)data); | 569 | (unsigned long*)data); |
573 | DITHER_MMX_SCALE_TAIL | 570 | DITHER_MMX_SCALE_TAIL |
574 | break; | 571 | break; |
575 | 572 | ||
576 | case MPEG3_601_BGRA8888: | 573 | case MPEG3_601_BGRA8888: |
577 | case MPEG3_601_BGR888: | 574 | case MPEG3_601_BGR888: |
578 | DITHER_MMX_SCALE_HEAD | 575 | DITHER_MMX_SCALE_HEAD |
579 | mpeg3_601_bgra32_mmx(y_in[video->x_table[w]], | 576 | mpeg3_601_bgra32_mmx(y_in[video->x_table[w]], |
580 | cr_in[uv_subscript], | 577 | cr_in[uv_subscript], |
581 | cb_in[uv_subscript], | 578 | cb_in[uv_subscript], |
582 | (unsigned long*)data); | 579 | (unsigned long*)data); |
583 | DITHER_MMX_SCALE_TAIL | 580 | DITHER_MMX_SCALE_TAIL |
584 | break; | 581 | break; |
585 | 582 | ||
586 | case MPEG3_RGBA8888: | 583 | case MPEG3_RGBA8888: |
587 | case MPEG3_RGB888: | 584 | case MPEG3_RGB888: |
588 | DITHER_MMX_SCALE_HEAD | 585 | DITHER_MMX_SCALE_HEAD |
589 | mpeg3_rgba32_mmx(y_in[video->x_table[w]], | 586 | mpeg3_rgba32_mmx(y_in[video->x_table[w]], |
590 | cr_in[uv_subscript], | 587 | cr_in[uv_subscript], |
591 | cb_in[uv_subscript], | 588 | cb_in[uv_subscript], |
592 | (unsigned long*)data); | 589 | (unsigned long*)data); |
593 | DITHER_MMX_SCALE_TAIL | 590 | DITHER_MMX_SCALE_TAIL |
594 | break; | 591 | break; |
595 | 592 | ||
596 | case MPEG3_601_RGBA8888: | 593 | case MPEG3_601_RGBA8888: |
597 | case MPEG3_601_RGB888: | 594 | case MPEG3_601_RGB888: |
598 | DITHER_MMX_SCALE_HEAD | 595 | DITHER_MMX_SCALE_HEAD |
599 | mpeg3_601_rgba32_mmx(y_in[video->x_table[w]], | 596 | mpeg3_601_rgba32_mmx(y_in[video->x_table[w]], |
600 | cr_in[uv_subscript], | 597 | cr_in[uv_subscript], |
601 | cb_in[uv_subscript], | 598 | cb_in[uv_subscript], |
602 | (unsigned long*)data); | 599 | (unsigned long*)data); |
603 | DITHER_MMX_SCALE_TAIL | 600 | DITHER_MMX_SCALE_TAIL |
604 | break; | 601 | break; |
605 | } | 602 | } |
606 | } | 603 | } |
607 | else | 604 | else |
608 | /* Transfer row unscaled */ | 605 | /* Transfer row unscaled */ |
609 | { | 606 | { |
610 | switch(video->color_model) | 607 | switch(video->color_model) |
611 | { | 608 | { |
612 | /* MMX byte swap 24 and 32 bit */ | 609 | /* MMX byte swap 24 and 32 bit */ |
613 | case MPEG3_BGRA8888: | 610 | case MPEG3_BGRA8888: |
614 | case MPEG3_BGR888: | 611 | case MPEG3_BGR888: |
615 | DITHER_MMX_HEAD | 612 | DITHER_MMX_HEAD |
616 | mpeg3_bgra32_mmx(*y_in++, | 613 | mpeg3_bgra32_mmx(*y_in++, |
617 | *cr_in, | 614 | *cr_in, |
618 | *cb_in, | 615 | *cb_in, |
619 | (unsigned long*)data); | 616 | (unsigned long*)data); |
620 | data += step; | 617 | data += step; |
621 | mpeg3_bgra32_mmx(*y_in++, | 618 | mpeg3_bgra32_mmx(*y_in++, |
622 | *cr_in, | 619 | *cr_in, |
623 | *cb_in, | 620 | *cb_in, |
624 | (unsigned long*)data); | 621 | (unsigned long*)data); |
625 | DITHER_MMX_TAIL | 622 | DITHER_MMX_TAIL |
626 | break; | 623 | break; |
627 | 624 | ||
628 | /* MMX 601 byte swap 24 and 32 bit */ | 625 | /* MMX 601 byte swap 24 and 32 bit */ |
629 | case MPEG3_601_BGRA8888: | 626 | case MPEG3_601_BGRA8888: |
630 | case MPEG3_601_BGR888: | 627 | case MPEG3_601_BGR888: |
631 | DITHER_MMX_HEAD | 628 | DITHER_MMX_HEAD |
632 | mpeg3_601_bgra32_mmx(*y_in++, | 629 | mpeg3_601_bgra32_mmx(*y_in++, |
633 | *cr_in, | 630 | *cr_in, |
634 | *cb_in, | 631 | *cb_in, |
635 | (unsigned long*)data); | 632 | (unsigned long*)data); |
636 | data += step; | 633 | data += step; |
637 | mpeg3_601_bgra32_mmx(*y_in++, | 634 | mpeg3_601_bgra32_mmx(*y_in++, |
638 | *cr_in, | 635 | *cr_in, |
639 | *cb_in, | 636 | *cb_in, |
640 | (unsigned long*)data); | 637 | (unsigned long*)data); |
641 | DITHER_MMX_TAIL | 638 | DITHER_MMX_TAIL |
642 | break; | 639 | break; |
643 | 640 | ||
644 | /* MMX 24 and 32 bit no byte swap */ | 641 | /* MMX 24 and 32 bit no byte swap */ |
645 | case MPEG3_RGBA8888: | 642 | case MPEG3_RGBA8888: |
646 | case MPEG3_RGB888: | 643 | case MPEG3_RGB888: |
647 | DITHER_MMX_HEAD | 644 | DITHER_MMX_HEAD |
648 | mpeg3_rgba32_mmx(*y_in++, | 645 | mpeg3_rgba32_mmx(*y_in++, |
649 | *cr_in, | 646 | *cr_in, |
650 | *cb_in, | 647 | *cb_in, |
651 | (unsigned long*)data); | 648 | (unsigned long*)data); |
652 | data += step; | 649 | data += step; |
653 | mpeg3_rgba32_mmx(*y_in++, | 650 | mpeg3_rgba32_mmx(*y_in++, |
654 | *cr_in, | 651 | *cr_in, |
655 | *cb_in, | 652 | *cb_in, |
656 | (unsigned long*)data); | 653 | (unsigned long*)data); |
657 | DITHER_MMX_TAIL | 654 | DITHER_MMX_TAIL |
658 | break; | 655 | break; |
659 | 656 | ||
660 | /* MMX 601 24 and 32 bit no byte swap */ | 657 | /* MMX 601 24 and 32 bit no byte swap */ |
661 | case MPEG3_601_RGBA8888: | 658 | case MPEG3_601_RGBA8888: |
662 | case MPEG3_601_RGB888: | 659 | case MPEG3_601_RGB888: |
663 | DITHER_MMX_HEAD | 660 | DITHER_MMX_HEAD |
664 | mpeg3_601_rgba32_mmx(*y_in++, | 661 | mpeg3_601_rgba32_mmx(*y_in++, |
665 | *cr_in, | 662 | *cr_in, |
666 | *cb_in, | 663 | *cb_in, |
667 | (unsigned long*)data); | 664 | (unsigned long*)data); |
668 | data += step; | 665 | data += step; |
669 | mpeg3_601_rgba32_mmx(*y_in++, | 666 | mpeg3_601_rgba32_mmx(*y_in++, |
670 | *cr_in, | 667 | *cr_in, |
671 | *cb_in, | 668 | *cb_in, |
672 | (unsigned long*)data); | 669 | (unsigned long*)data); |
673 | DITHER_MMX_TAIL | 670 | DITHER_MMX_TAIL |
674 | break; | 671 | break; |
675 | } | 672 | } |
676 | } | 673 | } |
677 | DITHER_ROW_TAIL | 674 | DITHER_ROW_TAIL |
678 | } | 675 | } |
679 | else | 676 | else |
680 | #endif | 677 | #endif |
681 | /* ================================== NO MMX ==================================== */ | 678 | /* ================================== NO MMX ==================================== */ |
682 | { | 679 | { |
683 | DITHER_ROW_HEAD | 680 | DITHER_ROW_HEAD |
684 | /* Transfer row with scaling */ | 681 | /* Transfer row with scaling */ |
685 | if(video->out_w != video->horizontal_size) | 682 | if(video->out_w != video->horizontal_size) |
686 | { | 683 | { |
687 | switch(video->color_model) | 684 | switch(video->color_model) |
688 | { | 685 | { |
689 | case MPEG3_BGR888: | 686 | case MPEG3_BGR888: |
690 | DITHER_SCALE_HEAD | 687 | DITHER_SCALE_HEAD |
691 | STORE_PIXEL_BGR888 | 688 | STORE_PIXEL_BGR888 |
692 | DITHER_SCALE_TAIL | 689 | DITHER_SCALE_TAIL |
693 | break; | 690 | break; |
694 | case MPEG3_BGRA8888: | 691 | case MPEG3_BGRA8888: |
695 | DITHER_SCALE_HEAD | 692 | DITHER_SCALE_HEAD |
696 | STORE_PIXEL_BGRA8888 | 693 | STORE_PIXEL_BGRA8888 |
697 | DITHER_SCALE_TAIL | 694 | DITHER_SCALE_TAIL |
698 | break; | 695 | break; |
699 | case MPEG3_RGB565: | 696 | case MPEG3_RGB565: |
700 | DITHER_SCALE_HEAD | 697 | DITHER_SCALE_HEAD |
701 | STORE_PIXEL_RGB565 | 698 | STORE_PIXEL_RGB565 |
702 | DITHER_SCALE_TAIL | 699 | DITHER_SCALE_TAIL |
703 | break; | 700 | break; |
704 | case MPEG3_RGB888: | 701 | case MPEG3_RGB888: |
705 | DITHER_SCALE_HEAD | 702 | DITHER_SCALE_HEAD |
706 | STORE_PIXEL_RGB888 | 703 | STORE_PIXEL_RGB888 |
707 | DITHER_SCALE_TAIL | 704 | DITHER_SCALE_TAIL |
708 | break; | 705 | break; |
709 | case MPEG3_RGBA8888: | 706 | case MPEG3_RGBA8888: |
710 | DITHER_SCALE_HEAD | 707 | DITHER_SCALE_HEAD |
711 | STORE_PIXEL_RGBA8888 | 708 | STORE_PIXEL_RGBA8888 |
712 | DITHER_SCALE_TAIL | 709 | DITHER_SCALE_TAIL |
713 | break; | 710 | break; |
714 | case MPEG3_601_BGR888: | 711 | case MPEG3_601_BGR888: |
715 | DITHER_SCALE_601_HEAD | 712 | DITHER_SCALE_601_HEAD |
716 | STORE_PIXEL_BGR888 | 713 | STORE_PIXEL_BGR888 |
717 | DITHER_SCALE_TAIL | 714 | DITHER_SCALE_TAIL |
718 | break; | 715 | break; |
719 | case MPEG3_601_BGRA8888: | 716 | case MPEG3_601_BGRA8888: |
720 | DITHER_SCALE_601_HEAD | 717 | DITHER_SCALE_601_HEAD |
721 | STORE_PIXEL_BGRA8888 | 718 | STORE_PIXEL_BGRA8888 |
722 | DITHER_SCALE_TAIL | 719 | DITHER_SCALE_TAIL |
723 | break; | 720 | break; |
724 | case MPEG3_601_RGB565: | 721 | case MPEG3_601_RGB565: |
725 | DITHER_SCALE_601_HEAD | 722 | DITHER_SCALE_601_HEAD |
726 | STORE_PIXEL_RGB565 | 723 | STORE_PIXEL_RGB565 |
727 | DITHER_SCALE_TAIL | 724 | DITHER_SCALE_TAIL |
728 | break; | 725 | break; |
729 | case MPEG3_601_RGB888: | 726 | case MPEG3_601_RGB888: |
730 | DITHER_SCALE_601_HEAD | 727 | DITHER_SCALE_601_HEAD |
731 | STORE_PIXEL_RGB888 | 728 | STORE_PIXEL_RGB888 |
732 | DITHER_SCALE_TAIL | 729 | DITHER_SCALE_TAIL |
733 | break; | 730 | break; |
734 | case MPEG3_601_RGBA8888: | 731 | case MPEG3_601_RGBA8888: |
735 | DITHER_SCALE_601_HEAD | 732 | DITHER_SCALE_601_HEAD |
736 | STORE_PIXEL_RGBA8888 | 733 | STORE_PIXEL_RGBA8888 |
737 | DITHER_SCALE_TAIL | 734 | DITHER_SCALE_TAIL |
738 | break; | 735 | break; |
739 | case MPEG3_RGBA16161616: | 736 | case MPEG3_RGBA16161616: |
740 | { | 737 | { |
741 | register unsigned short *data_s = (unsigned short*)data; | 738 | register unsigned short *data_s = (unsigned short*)data; |
742 | DITHER_SCALE_HEAD | 739 | DITHER_SCALE_HEAD |
743 | STORE_PIXEL_RGBA16161616 | 740 | STORE_PIXEL_RGBA16161616 |
744 | DITHER_SCALE_TAIL | 741 | DITHER_SCALE_TAIL |
745 | } | 742 | } |
746 | break; | 743 | break; |
747 | } | 744 | } |
748 | } | 745 | } |
749 | else | 746 | else |
750 | { | 747 | { |
751 | /* Transfer row unscaled */ | 748 | /* Transfer row unscaled */ |
752 | switch(video->color_model) | 749 | switch(video->color_model) |
753 | { | 750 | { |
754 | case MPEG3_BGR888: | 751 | case MPEG3_BGR888: |
755 | DITHER_HEAD | 752 | DITHER_HEAD |
756 | STORE_PIXEL_BGR888 | 753 | STORE_PIXEL_BGR888 |
757 | DITHER_TAIL | 754 | DITHER_TAIL |
758 | break; | 755 | break; |
759 | case MPEG3_BGRA8888: | 756 | case MPEG3_BGRA8888: |
760 | DITHER_HEAD | 757 | DITHER_HEAD |
761 | STORE_PIXEL_BGRA8888 | 758 | STORE_PIXEL_BGRA8888 |
762 | DITHER_TAIL | 759 | DITHER_TAIL |
763 | break; | 760 | break; |
764 | case MPEG3_RGB565: | 761 | case MPEG3_RGB565: |
765 | DITHER_HEAD | 762 | DITHER_HEAD |
766 | STORE_PIXEL_RGB565 | 763 | STORE_PIXEL_RGB565 |
767 | DITHER_TAIL | 764 | DITHER_TAIL |
768 | break; | 765 | break; |
769 | case MPEG3_RGB888: | 766 | case MPEG3_RGB888: |
770 | DITHER_HEAD | 767 | DITHER_HEAD |
771 | STORE_PIXEL_RGB888 | 768 | STORE_PIXEL_RGB888 |
772 | DITHER_TAIL | 769 | DITHER_TAIL |
773 | break; | 770 | break; |
774 | case MPEG3_RGBA8888: | 771 | case MPEG3_RGBA8888: |
775 | DITHER_HEAD | 772 | DITHER_HEAD |
776 | STORE_PIXEL_RGBA8888 | 773 | STORE_PIXEL_RGBA8888 |
777 | DITHER_TAIL | 774 | DITHER_TAIL |
778 | break; | 775 | break; |
779 | case MPEG3_601_BGR888: | 776 | case MPEG3_601_BGR888: |
780 | DITHER_601_HEAD | 777 | DITHER_601_HEAD |
781 | STORE_PIXEL_BGR888 | 778 | STORE_PIXEL_BGR888 |
782 | DITHER_TAIL | 779 | DITHER_TAIL |
783 | break; | 780 | break; |
784 | case MPEG3_601_BGRA8888: | 781 | case MPEG3_601_BGRA8888: |
785 | DITHER_601_HEAD | 782 | DITHER_601_HEAD |
786 | STORE_PIXEL_RGB565 | 783 | STORE_PIXEL_RGB565 |
787 | DITHER_TAIL | 784 | DITHER_TAIL |
788 | break; | 785 | break; |
789 | case MPEG3_601_RGB565: | 786 | case MPEG3_601_RGB565: |
790 | DITHER_601_HEAD | 787 | DITHER_601_HEAD |
791 | STORE_PIXEL_RGB565 | 788 | STORE_PIXEL_RGB565 |
792 | DITHER_TAIL | 789 | DITHER_TAIL |
793 | break; | 790 | break; |
794 | case MPEG3_601_RGB888: | 791 | case MPEG3_601_RGB888: |
795 | DITHER_601_HEAD | 792 | DITHER_601_HEAD |
796 | STORE_PIXEL_RGB888 | 793 | STORE_PIXEL_RGB888 |
797 | DITHER_TAIL | 794 | DITHER_TAIL |
798 | break; | 795 | break; |
799 | case MPEG3_601_RGBA8888: | 796 | case MPEG3_601_RGBA8888: |
800 | DITHER_601_HEAD | 797 | DITHER_601_HEAD |
801 | STORE_PIXEL_RGBA8888 | 798 | STORE_PIXEL_RGBA8888 |
802 | DITHER_TAIL | 799 | DITHER_TAIL |
803 | break; | 800 | break; |
804 | case MPEG3_RGBA16161616: | 801 | case MPEG3_RGBA16161616: |
805 | { | 802 | { |
806 | register unsigned short *data_s = (unsigned short*)data; | 803 | register unsigned short *data_s = (unsigned short*)data; |
807 | DITHER_HEAD | 804 | DITHER_HEAD |
808 | STORE_PIXEL_RGBA16161616 | 805 | STORE_PIXEL_RGBA16161616 |
809 | DITHER_TAIL | 806 | DITHER_TAIL |
810 | } | 807 | } |
811 | break; | 808 | break; |
812 | } | 809 | } |
813 | } | 810 | } |
814 | DITHER_ROW_TAIL | 811 | DITHER_ROW_TAIL |
815 | } /* End of non-MMX */ | 812 | } /* End of non-MMX */ |
816 | 813 | ||
817 | #ifdef HAVE_MMX | 814 | #ifdef HAVE_MMX |
818 | if(video->have_mmx) | 815 | if(video->have_mmx) |
819 | __asm__ __volatile__ ("emms"); | 816 | __asm__ __volatile__ ("emms"); |
820 | #endif | 817 | #endif |
821 | return 0; | 818 | return 0; |
822 | } | 819 | } |
823 | 820 | ||
824 | int mpeg3video_ditherframe444(mpeg3video_t *video, unsigned char *src[]) | 821 | int mpeg3video_ditherframe444(mpeg3video_t *video, unsigned char *src[]) |
825 | { | 822 | { |
826 | return 0; | 823 | return 0; |
827 | } | 824 | } |
828 | 825 | ||
829 | int mpeg3video_dithertop(mpeg3video_t *video, unsigned char *src[]) | 826 | int mpeg3video_dithertop(mpeg3video_t *video, unsigned char *src[]) |
830 | { | 827 | { |
831 | return mpeg3video_ditherframe(video, src, video->output_rows); | 828 | return mpeg3video_ditherframe(video, src, video->output_rows); |
832 | } | 829 | } |
833 | 830 | ||
834 | int mpeg3video_dithertop444(mpeg3video_t *video, unsigned char *src[]) | 831 | int mpeg3video_dithertop444(mpeg3video_t *video, unsigned char *src[]) |
835 | { | 832 | { |
836 | return 0; | 833 | return 0; |
837 | } | 834 | } |
838 | 835 | ||
839 | int mpeg3video_ditherbot(mpeg3video_t *video, unsigned char *src[]) | 836 | int mpeg3video_ditherbot(mpeg3video_t *video, unsigned char *src[]) |
840 | { | 837 | { |
841 | return 0; | 838 | return 0; |
842 | } | 839 | } |
843 | 840 | ||
844 | int mpeg3video_ditherbot444(mpeg3video_t *video, unsigned char *src[]) | 841 | int mpeg3video_ditherbot444(mpeg3video_t *video, unsigned char *src[]) |
845 | { | 842 | { |
846 | return 0; | 843 | return 0; |
847 | } | 844 | } |
848 | 845 | ||
849 | void memcpy_fast(unsigned char *output, unsigned char *input, long len) | 846 | void memcpy_fast(unsigned char *output, unsigned char *input, long len) |
850 | { | 847 | { |
851 | int i, len2; | 848 | int i, len2; |
852 | /* 8 byte alignment */ | 849 | /* 8 byte alignment */ |
853 | /* | 850 | /* |
854 | * if(!((long)input & 0x7)) | 851 | * if(!((long)input & 0x7)) |
855 | * { | 852 | * { |
856 | * len2 = len >> 4; | 853 | * len2 = len >> 4; |
857 | * for(i = 0; i < len2; ) | 854 | * for(i = 0; i < len2; ) |
858 | * { | 855 | * { |
859 | * ((MPEG3_INT64*)output)[i] = ((MPEG3_INT64*)input)[i]; | 856 | * ((MPEG3_INT64*)output)[i] = ((MPEG3_INT64*)input)[i]; |
860 | * i++; | 857 | * i++; |
861 | * ((MPEG3_INT64*)output)[i] = ((MPEG3_INT64*)input)[i]; | 858 | * ((MPEG3_INT64*)output)[i] = ((MPEG3_INT64*)input)[i]; |
862 | * i++; | 859 | * i++; |
863 | * } | 860 | * } |
864 | * | 861 | * |
865 | * for(i *= 16; i < len; i++) | 862 | * for(i *= 16; i < len; i++) |
866 | * { | 863 | * { |
867 | * output[i] = input[i]; | 864 | * output[i] = input[i]; |
868 | * } | 865 | * } |
869 | * } | 866 | * } |
870 | * else | 867 | * else |
871 | */ | 868 | */ |
872 | memcpy(output, input, len); | 869 | memcpy(output, input, len); |
873 | } | 870 | } |
874 | 871 | ||
875 | int mpeg3video_init_output() | 872 | int mpeg3video_init_output() |
876 | { | 873 | { |
877 | int i, value; | 874 | int i, value; |
878 | for(i = 0; i < 256; i++) | 875 | for(i = 0; i < 256; i++) |
879 | { | 876 | { |
880 | value = (int)(1.1644 * i - 255 * 0.0627 + 0.5); | 877 | value = (int)(1.1644 * i - 255 * 0.0627 + 0.5); |
881 | if(value < 0) value = 0; | 878 | if(value < 0) value = 0; |
882 | else | 879 | else |
883 | if(value > 255) value = 255; | 880 | if(value > 255) value = 255; |
884 | mpeg3_601_to_rgb[i] = value; | 881 | mpeg3_601_to_rgb[i] = value; |
885 | } | 882 | } |
886 | return 0; | 883 | return 0; |
887 | } | 884 | } |
888 | 885 | ||
889 | int mpeg3video_present_frame(mpeg3video_t *video) | 886 | int mpeg3video_present_frame(mpeg3video_t *video) |
890 | { | 887 | { |
891 | int i, j, k, l; | 888 | int i, j, k, l; |
892 | unsigned char **src = video->output_src; | 889 | unsigned char **src = video->output_src; |
893 | 890 | ||
894 | /* Copy YUV buffers */ | 891 | /* Copy YUV buffers */ |
895 | if(video->want_yvu) | 892 | if(video->want_yvu) |
896 | { | 893 | { |
897 | long size[2]; | 894 | long size[2]; |
898 | long offset[2]; | 895 | long offset[2]; |