-rw-r--r-- | core/multimedia/opieplayer/libmpeg3/video/output.c | 217 |
1 files changed, 107 insertions, 110 deletions
diff --git a/core/multimedia/opieplayer/libmpeg3/video/output.c b/core/multimedia/opieplayer/libmpeg3/video/output.c index 919a0ff..bf0d6ed 100644 --- a/core/multimedia/opieplayer/libmpeg3/video/output.c +++ b/core/multimedia/opieplayer/libmpeg3/video/output.c | |||
@@ -93,422 +93,419 @@ inline void mpeg3video_rgb16_mmx(unsigned char *lum, | |||
93 | "pand mpeg3_MMX_redmask, %%mm4\n" | 93 | "pand mpeg3_MMX_redmask, %%mm4\n" |
94 | "psllw $3, %%mm5\n" /* GREEN 1 */ | 94 | "psllw $3, %%mm5\n" /* GREEN 1 */ |
95 | "punpcklbw %%mm6, %%mm6\n" | 95 | "punpcklbw %%mm6, %%mm6\n" |
96 | "pand mpeg3_MMX_grnmask, %%mm5\n" | 96 | "pand mpeg3_MMX_grnmask, %%mm5\n" |
97 | "pand mpeg3_MMX_redmask, %%mm6\n" | 97 | "pand mpeg3_MMX_redmask, %%mm6\n" |
98 | "por %%mm5, %%mm4\n" /* */ | 98 | "por %%mm5, %%mm4\n" /* */ |
99 | "psrlw $11, %%mm6\n" /* BLUE 1 */ | 99 | "psrlw $11, %%mm6\n" /* BLUE 1 */ |
100 | "movq %%mm3, %%mm5\n" /* lum2 */ | 100 | "movq %%mm3, %%mm5\n" /* lum2 */ |
101 | "paddw %%mm1, %%mm3\n" /* lum2 +red 00 R7 00 R5 00 R3 00 R1 */ | 101 | "paddw %%mm1, %%mm3\n" /* lum2 +red 00 R7 00 R5 00 R3 00 R1 */ |
102 | "paddw %%mm2, %%mm5\n" /* lum2 +green 00 G7 00 G5 00 G3 00 G1 */ | 102 | "paddw %%mm2, %%mm5\n" /* lum2 +green 00 G7 00 G5 00 G3 00 G1 */ |
103 | "psraw $6, %%mm3\n" /* R2 */ | 103 | "psraw $6, %%mm3\n" /* R2 */ |
104 | "por %%mm6, %%mm4\n" /* MM4 */ | 104 | "por %%mm6, %%mm4\n" /* MM4 */ |
105 | "psraw $6, %%mm5\n" /* G2 */ | 105 | "psraw $6, %%mm5\n" /* G2 */ |
106 | "movq (%2, %3), %%mm6\n" /* L3 */ | 106 | "movq (%2, %3), %%mm6\n" /* L3 */ |
107 | "psraw $6, %%mm7\n" | 107 | "psraw $6, %%mm7\n" |
108 | "packuswb %%mm3, %%mm3\n" | 108 | "packuswb %%mm3, %%mm3\n" |
109 | "packuswb %%mm5, %%mm5\n" | 109 | "packuswb %%mm5, %%mm5\n" |
110 | "packuswb %%mm7, %%mm7\n" | 110 | "packuswb %%mm7, %%mm7\n" |
111 | "pand mpeg3_MMX_00FFw, %%mm6\n" /* L3 */ | 111 | "pand mpeg3_MMX_00FFw, %%mm6\n" /* L3 */ |
112 | "punpcklbw %%mm3, %%mm3\n" | 112 | "punpcklbw %%mm3, %%mm3\n" |
113 | "punpcklbw %%mm5, %%mm5\n" | 113 | "punpcklbw %%mm5, %%mm5\n" |
114 | "pmullw mpeg3_MMX_Ycoeff, %%mm6\n" /* lum3 */ | 114 | "pmullw mpeg3_MMX_Ycoeff, %%mm6\n" /* lum3 */ |
115 | "punpcklbw %%mm7, %%mm7\n" | 115 | "punpcklbw %%mm7, %%mm7\n" |
116 | "psllw $3, %%mm5\n" /* GREEN 2 */ | 116 | "psllw $3, %%mm5\n" /* GREEN 2 */ |
117 | "pand mpeg3_MMX_redmask, %%mm7\n" | 117 | "pand mpeg3_MMX_redmask, %%mm7\n" |
118 | "pand mpeg3_MMX_redmask, %%mm3\n" | 118 | "pand mpeg3_MMX_redmask, %%mm3\n" |
119 | "psrlw $11, %%mm7\n" /* BLUE 2 */ | 119 | "psrlw $11, %%mm7\n" /* BLUE 2 */ |
120 | "pand mpeg3_MMX_grnmask, %%mm5\n" | 120 | "pand mpeg3_MMX_grnmask, %%mm5\n" |
121 | "por %%mm7, %%mm3\n" | 121 | "por %%mm7, %%mm3\n" |
122 | "movq (%2,%3), %%mm7\n" /* L4 */ | 122 | "movq (%2,%3), %%mm7\n" /* L4 */ |
123 | "por %%mm5, %%mm3\n" /* */ | 123 | "por %%mm5, %%mm3\n" /* */ |
124 | "psrlw $8, %%mm7\n" /* L4 */ | 124 | "psrlw $8, %%mm7\n" /* L4 */ |
125 | "movq %%mm4, %%mm5\n" | 125 | "movq %%mm4, %%mm5\n" |
126 | "punpcklwd %%mm3, %%mm4\n" | 126 | "punpcklwd %%mm3, %%mm4\n" |
127 | "pmullw mpeg3_MMX_Ycoeff, %%mm7\n" /* lum4 */ | 127 | "pmullw mpeg3_MMX_Ycoeff, %%mm7\n" /* lum4 */ |
128 | "punpckhwd %%mm3, %%mm5\n" | 128 | "punpckhwd %%mm3, %%mm5\n" |
129 | 129 | ||
130 | "movq %%mm4, (%4)\n" | 130 | "movq %%mm4, (%4)\n" |
131 | "movq %%mm5, 8(%4)\n" | 131 | "movq %%mm5, 8(%4)\n" |
132 | 132 | ||
133 | "movq %%mm6, %%mm4\n" /* Lum3 */ | 133 | "movq %%mm6, %%mm4\n" /* Lum3 */ |
134 | "paddw %%mm0, %%mm6\n" /* Lum3 +blue */ | 134 | "paddw %%mm0, %%mm6\n" /* Lum3 +blue */ |
135 | 135 | ||
136 | "movq %%mm4, %%mm5\n" /* Lum3 */ | 136 | "movq %%mm4, %%mm5\n" /* Lum3 */ |
137 | "paddw %%mm1, %%mm4\n" /* Lum3 +red */ | 137 | "paddw %%mm1, %%mm4\n" /* Lum3 +red */ |
138 | "paddw %%mm2, %%mm5\n" /* Lum3 +green */ | 138 | "paddw %%mm2, %%mm5\n" /* Lum3 +green */ |
139 | "psraw $6, %%mm4\n" | 139 | "psraw $6, %%mm4\n" |
140 | "movq %%mm7, %%mm3\n"/* Lum4 */ | 140 | "movq %%mm7, %%mm3\n"/* Lum4 */ |
141 | "psraw $6, %%mm5\n" | 141 | "psraw $6, %%mm5\n" |
142 | "paddw %%mm0, %%mm7\n" /* Lum4 +blue */ | 142 | "paddw %%mm0, %%mm7\n" /* Lum4 +blue */ |
143 | "psraw $6, %%mm6\n" /* Lum3 +blue */ | 143 | "psraw $6, %%mm6\n" /* Lum3 +blue */ |
144 | "movq %%mm3, %%mm0\n" /* Lum4 */ | 144 | "movq %%mm3, %%mm0\n" /* Lum4 */ |
145 | "packuswb %%mm4, %%mm4\n" | 145 | "packuswb %%mm4, %%mm4\n" |
146 | "paddw %%mm1, %%mm3\n" /* Lum4 +red */ | 146 | "paddw %%mm1, %%mm3\n" /* Lum4 +red */ |
147 | "packuswb %%mm5, %%mm5\n" | 147 | "packuswb %%mm5, %%mm5\n" |
148 | "paddw %%mm2, %%mm0\n" /* Lum4 +green */ | 148 | "paddw %%mm2, %%mm0\n" /* Lum4 +green */ |
149 | "packuswb %%mm6, %%mm6\n" | 149 | "packuswb %%mm6, %%mm6\n" |
150 | "punpcklbw %%mm4, %%mm4\n" | 150 | "punpcklbw %%mm4, %%mm4\n" |
151 | "punpcklbw %%mm5, %%mm5\n" | 151 | "punpcklbw %%mm5, %%mm5\n" |
152 | "punpcklbw %%mm6, %%mm6\n" | 152 | "punpcklbw %%mm6, %%mm6\n" |
153 | "psllw $3, %%mm5\n" /* GREEN 3 */ | 153 | "psllw $3, %%mm5\n" /* GREEN 3 */ |
154 | "pand mpeg3_MMX_redmask, %%mm4\n" | 154 | "pand mpeg3_MMX_redmask, %%mm4\n" |
155 | "psraw $6, %%mm3\n" /* psr 6 */ | 155 | "psraw $6, %%mm3\n" /* psr 6 */ |
156 | "psraw $6, %%mm0\n" | 156 | "psraw $6, %%mm0\n" |
157 | "pand mpeg3_MMX_redmask, %%mm6\n" /* BLUE */ | 157 | "pand mpeg3_MMX_redmask, %%mm6\n" /* BLUE */ |
158 | "pand mpeg3_MMX_grnmask, %%mm5\n" | 158 | "pand mpeg3_MMX_grnmask, %%mm5\n" |
159 | "psrlw $11, %%mm6\n" /* BLUE 3 */ | 159 | "psrlw $11, %%mm6\n" /* BLUE 3 */ |
160 | "por %%mm5, %%mm4\n" | 160 | "por %%mm5, %%mm4\n" |
161 | "psraw $6, %%mm7\n" | 161 | "psraw $6, %%mm7\n" |
162 | "por %%mm6, %%mm4\n" | 162 | "por %%mm6, %%mm4\n" |
163 | "packuswb %%mm3, %%mm3\n" | 163 | "packuswb %%mm3, %%mm3\n" |
164 | "packuswb %%mm0, %%mm0\n" | 164 | "packuswb %%mm0, %%mm0\n" |
165 | "packuswb %%mm7, %%mm7\n" | 165 | "packuswb %%mm7, %%mm7\n" |
166 | "punpcklbw %%mm3, %%mm3\n" | 166 | "punpcklbw %%mm3, %%mm3\n" |
167 | "punpcklbw %%mm0, %%mm0\n" | 167 | "punpcklbw %%mm0, %%mm0\n" |
168 | "punpcklbw %%mm7, %%mm7\n" | 168 | "punpcklbw %%mm7, %%mm7\n" |
169 | "pand mpeg3_MMX_redmask, %%mm3\n" | 169 | "pand mpeg3_MMX_redmask, %%mm3\n" |
170 | "pand mpeg3_MMX_redmask, %%mm7\n" /* BLUE */ | 170 | "pand mpeg3_MMX_redmask, %%mm7\n" /* BLUE */ |
171 | "psllw $3, %%mm0\n" /* GREEN 4 */ | 171 | "psllw $3, %%mm0\n" /* GREEN 4 */ |
172 | "psrlw $11, %%mm7\n" | 172 | "psrlw $11, %%mm7\n" |
173 | "pand mpeg3_MMX_grnmask, %%mm0\n" | 173 | "pand mpeg3_MMX_grnmask, %%mm0\n" |
174 | "por %%mm7, %%mm3\n" | 174 | "por %%mm7, %%mm3\n" |
175 | "addl $8, %6\n" | 175 | "addl $8, %6\n" |
176 | "por %%mm0, %%mm3\n" | 176 | "por %%mm0, %%mm3\n" |
177 | 177 | ||
178 | "movq %%mm4, %%mm5\n" | 178 | "movq %%mm4, %%mm5\n" |
179 | 179 | ||
180 | "punpcklwd %%mm3, %%mm4\n" | 180 | "punpcklwd %%mm3, %%mm4\n" |
181 | "punpckhwd %%mm3, %%mm5\n" | 181 | "punpckhwd %%mm3, %%mm5\n" |
182 | 182 | ||
183 | "movq %%mm4, (%4,%5,2)\n" | 183 | "movq %%mm4, (%4,%5,2)\n" |
184 | "movq %%mm5, 8(%4,%5,2)\n" | 184 | "movq %%mm5, 8(%4,%5,2)\n" |
185 | 185 | ||
186 | "addl $8, %2\n" | 186 | "addl $8, %2\n" |
187 | "addl $4, %0\n" | 187 | "addl $4, %0\n" |
188 | "addl $4, %1\n" | 188 | "addl $4, %1\n" |
189 | "cmpl %3, %6\n" | 189 | "cmpl %3, %6\n" |
190 | "leal 16(%4), %4\n" | 190 | "leal 16(%4), %4\n" |
191 | "jl 1b\n" | 191 | "jl 1b\n" |
192 | "addl %3, %2\n" /* lum += cols */ | 192 | "addl %3, %2\n" /* lum += cols */ |
193 | "addl %7, %4\n" /* row1 += mod */ | 193 | "addl %7, %4\n" /* row1 += mod */ |
194 | "movl $0, %6\n" | 194 | "movl $0, %6\n" |
195 | "cmpl %8, %2\n" | 195 | "cmpl %8, %2\n" |
196 | "jl 1b\n" | 196 | "jl 1b\n" |
197 | : : "r" (cr), | 197 | : : "r" (cr), |
198 | "r" (cb), | 198 | "r" (cb), |
199 | "r" (lum), | 199 | "r" (lum), |
200 | "r" (cols), | 200 | "r" (cols), |
201 | "r" (row1) , | 201 | "r" (row1) , |
202 | "r" (col1), | 202 | "r" (col1), |
203 | "m" (x), | 203 | "m" (x), |
204 | "m" (mod), | 204 | "m" (mod), |
205 | "m" (y) | 205 | "m" (y) |
206 | ); | 206 | ); |
207 | } | 207 | } |
208 | 208 | ||
209 | static unsigned LONGLONG mpeg3_MMX_U_80 = 0x0000008000800000LL; | 209 | static unsigned LONGLONG mpeg3_MMX_U_80 = 0x0000008000800000LL; |
210 | static unsigned LONGLONG mpeg3_MMX_V_80 = 0x0000000000800080LL; | 210 | static unsigned LONGLONG mpeg3_MMX_V_80 = 0x0000000000800080LL; |
211 | static LONGLONG mpeg3_MMX_U_COEF = 0x00000058ffd30000LL; | 211 | static LONGLONG mpeg3_MMX_U_COEF = 0x00000058ffd30000LL; |
212 | static LONGLONG mpeg3_MMX_V_COEF = 0x00000000ffea006fLL; | 212 | static LONGLONG mpeg3_MMX_V_COEF = 0x00000000ffea006fLL; |
213 | static LONGLONG mpeg3_MMX_601_Y_COEF = 0x0000004800480048LL; | 213 | static LONGLONG mpeg3_MMX_601_Y_COEF = 0x0000004800480048LL; |
214 | static LONGLONG mpeg3_MMX_601_Y_DIFF = 0x0000000000000010LL; | 214 | static LONGLONG mpeg3_MMX_601_Y_DIFF = 0x0000000000000010LL; |
215 | 215 | ||
216 | inline void mpeg3_bgra32_mmx(unsigned long y, | 216 | inline void mpeg3_bgra32_mmx(unsigned long y, |
217 | unsigned long u, | 217 | unsigned long u, |
218 | unsigned long v, | 218 | unsigned long v, |
219 | unsigned long *output) | 219 | unsigned long *output) |
220 | { | 220 | { |
221 | asm(" | 221 | |
222 | asm( | ||
222 | /* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ | 223 | /* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ |
223 | /* for bgr24. */ | 224 | /* for bgr24. */ |
224 | movd (%0), %%mm0; /* Load y 0x00000000000000yy */ | 225 | "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */ |
225 | movd (%1), %%mm1; /* Load u 0x00000000000000cr */ | 226 | "movd (%1), %%mm1;" /* Load u 0x00000000000000cr */ |
226 | movq %%mm0, %%mm3; /* Copy y to temp */ | 227 | "movq %%mm0, %%mm3;" /* Copy y to temp */ |
227 | psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */ | 228 | "psllq $16, %%mm1;" /* Shift u 0x0000000000cr0000 */ |
228 | movd (%2), %%mm2; /* Load v 0x00000000000000cb */ | 229 | "movd (%2), %%mm2;" /* Load v 0x00000000000000cb */ |
229 | psllq $16, %%mm3; /* Shift y */ | 230 | "psllq $16, %%mm3;" /* Shift y */ |
230 | movq %%mm1, %%mm4; /* Copy u to temp */ | 231 | "movq %%mm1, %%mm4;" /* Copy u to temp */ |
231 | por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ | 232 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */ |
232 | psllq $16, %%mm4; /* Shift u */ | 233 | "psllq $16, %%mm4;" /* Shift u */ |
233 | movq %%mm2, %%mm5; /* Copy v to temp */ | 234 | "movq %%mm2, %%mm5;" /* Copy v to temp */ |
234 | psllq $16, %%mm3; /* Shift y */ | 235 | "psllq $16, %%mm3;" /* Shift y */ |
235 | por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */ | 236 | "por %%mm4, %%mm1;" /* Overlay new u byte 0x000000cr00cr0000 */ |
236 | psllq $16, %%mm5; /* Shift v */ | 237 | "psllq $16, %%mm5;" /* Shift v */ |
237 | por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ | 238 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */ |
238 | por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */ | 239 | "por %%mm5, %%mm2;" /* Overlay new v byte 0x0000000000cb00cb */ |
239 | 240 | ||
240 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ | 241 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ |
241 | psubw mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */ | 242 | "psubw mpeg3_MMX_U_80, %%mm1;" /* Subtract 128 from u 0x000000uu00uu0000 */ |
242 | pmullw mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */ | 243 | "pmullw mpeg3_MMX_U_COEF, %%mm1;" /* Multiply u coeffs 0x0000uuuuuuuu0000 */ |
243 | psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ | 244 | "psllw $6, %%mm0;" /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ |
244 | psubw mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */ | 245 | "psubw mpeg3_MMX_V_80, %%mm2;" /* Subtract 128 from v 0x0000000000cb00cb */ |
245 | pmullw mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */ | 246 | "pmullw mpeg3_MMX_V_COEF, %%mm2;" /* Multiply v coeffs 0x0000crcrcrcrcrcr */ |
246 | 247 | ||
247 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ | 248 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ |
248 | paddsw %%mm1, %%mm0; /* Add u to result */ | 249 | "paddsw %%mm1, %%mm0;" /* Add u to result */ |
249 | paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */ | 250 | "paddsw %%mm2, %%mm0;" /* Add v to result 0x0000rrrrggggbbbb */ |
250 | psraw $6, %%mm0; /* Demote precision */ | 251 | "psraw $6, %%mm0;" /* Demote precision */ |
251 | packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */ | 252 | "packuswb %%mm0, %%mm0;" /* Pack into ARGB 0x0000000000rrggbb */ |
252 | movd %%mm0, (%3); /* Store output */ | 253 | "movd %%mm0, (%3);" /* Store output */ |
253 | " | ||
254 | : | 254 | : |
255 | : "r" (&y), "r" (&u), "r" (&v), "r" (output)); | 255 | : "r" (&y), "r" (&u), "r" (&v), "r" (output)); |
256 | } | 256 | } |
257 | 257 | ||
258 | inline void mpeg3_601_bgra32_mmx(unsigned long y, | 258 | inline void mpeg3_601_bgra32_mmx(unsigned long y, |
259 | unsigned long u, | 259 | unsigned long u, |
260 | unsigned long v, | 260 | unsigned long v, |
261 | unsigned long *output) | 261 | unsigned long *output) |
262 | { | 262 | { |
263 | asm(" | 263 | asm( |
264 | /* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ | 264 | /* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ |
265 | /* for bgr24. */ | 265 | /* for bgr24. */ |
266 | movd (%0), %%mm0; /* Load y 0x00000000000000yy */ | 266 | "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */ |
267 | psubsw mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */ | 267 | "psubsw mpeg3_MMX_601_Y_DIFF, %%mm0;" /* Subtract 16 from y */ |
268 | movd (%1), %%mm1; /* Load u 0x00000000000000cr */ | 268 | "movd (%1), %%mm1;" /* Load u 0x00000000000000cr */ |
269 | movq %%mm0, %%mm3; /* Copy y to temp */ | 269 | "movq %%mm0, %%mm3;" /* Copy y to temp */ |
270 | psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */ | 270 | "psllq $16, %%mm1;" /* Shift u 0x0000000000cr0000 */ |
271 | movd (%2), %%mm2; /* Load v 0x00000000000000cb */ | 271 | "movd (%2), %%mm2;" /* Load v 0x00000000000000cb */ |
272 | psllq $16, %%mm3; /* Shift y */ | 272 | "psllq $16, %%mm3;" /* Shift y */ |
273 | movq %%mm1, %%mm4; /* Copy u to temp */ | 273 | "movq %%mm1, %%mm4;" /* Copy u to temp */ |
274 | por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ | 274 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */ |
275 | psllq $16, %%mm4; /* Shift u */ | 275 | "psllq $16, %%mm4;" /* Shift u */ |
276 | movq %%mm2, %%mm5; /* Copy v to temp */ | 276 | "movq %%mm2, %%mm5;" /* Copy v to temp */ |
277 | psllq $16, %%mm3; /* Shift y */ | 277 | "psllq $16, %%mm3;" /* Shift y */ |
278 | por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */ | 278 | "por %%mm4, %%mm1;" /* Overlay new u byte 0x000000cr00cr0000 */ |
279 | psllq $16, %%mm5; /* Shift v */ | 279 | "psllq $16, %%mm5;" /* Shift v */ |
280 | por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ | 280 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */ |
281 | por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */ | 281 | "por %%mm5, %%mm2;" /* Overlay new v byte 0x0000000000cb00cb */ |
282 | 282 | ||
283 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ | 283 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ |
284 | pmullw mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale and shift y coeffs */ | 284 | "pmullw mpeg3_MMX_601_Y_COEF, %%mm0;" /* Scale and shift y coeffs */ |
285 | psubw mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */ | 285 | "psubw mpeg3_MMX_U_80, %%mm1;" /* Subtract 128 from u 0x000000uu00uu0000 */ |
286 | pmullw mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */ | 286 | "pmullw mpeg3_MMX_U_COEF, %%mm1;" /* Multiply u coeffs 0x0000uuuuuuuu0000 */ |
287 | psubw mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */ | 287 | "psubw mpeg3_MMX_V_80, %%mm2;" /* Subtract 128 from v 0x0000000000cb00cb */ |
288 | pmullw mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */ | 288 | "pmullw mpeg3_MMX_V_COEF, %%mm2;" /* Multiply v coeffs 0x0000crcrcrcrcrcr */ |
289 | 289 | ||
290 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ | 290 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ |
291 | paddsw %%mm1, %%mm0; /* Add u to result */ | 291 | "paddsw %%mm1, %%mm0;" /* Add u to result */ |
292 | paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */ | 292 | "paddsw %%mm2, %%mm0;" /* Add v to result 0x0000rrrrggggbbbb */ |
293 | psraw $6, %%mm0; /* Demote precision */ | 293 | "psraw $6, %%mm0;" /* Demote precision */ |
294 | packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */ | 294 | "packuswb %%mm0, %%mm0;" /* Pack into ARGB 0x0000000000rrggbb */ |
295 | movd %%mm0, (%3); /* Store output */ | 295 | "movd %%mm0, (%3);" /* Store output */ |
296 | " | ||
297 | : | 296 | : |
298 | : "r" (&y), "r" (&u), "r" (&v), "r" (output)); | 297 | : "r" (&y), "r" (&u), "r" (&v), "r" (output)); |
299 | } | 298 | } |
300 | 299 | ||
301 | static unsigned LONGLONG mpeg3_MMX_U_80_RGB = 0x0000000000800080LL; | 300 | static unsigned LONGLONG mpeg3_MMX_U_80_RGB = 0x0000000000800080LL; |
302 | static unsigned LONGLONG mpeg3_MMX_V_80_RGB = 0x0000008000800000LL; | 301 | static unsigned LONGLONG mpeg3_MMX_V_80_RGB = 0x0000008000800000LL; |
303 | static LONGLONG mpeg3_MMX_U_COEF_RGB = 0x00000000ffd30058LL; | 302 | static LONGLONG mpeg3_MMX_U_COEF_RGB = 0x00000000ffd30058LL; |
304 | static LONGLONG mpeg3_MMX_V_COEF_RGB = 0x0000006fffea0000LL; | 303 | static LONGLONG mpeg3_MMX_V_COEF_RGB = 0x0000006fffea0000LL; |
305 | 304 | ||
306 | inline void mpeg3_rgba32_mmx(unsigned long y, | 305 | inline void mpeg3_rgba32_mmx(unsigned long y, |
307 | unsigned long u, | 306 | unsigned long u, |
308 | unsigned long v, | 307 | unsigned long v, |
309 | unsigned long *output) | 308 | unsigned long *output) |
310 | { | 309 | { |
311 | asm(" | 310 | asm( |
312 | /* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ | 311 | /* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ |
313 | /* for rgb24. */ | 312 | /* for rgb24. */ |
314 | movd (%0), %%mm0; /* Load y 0x00000000000000yy */ | 313 | "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */ |
315 | movd (%1), %%mm1; /* Load v 0x00000000000000vv */ | 314 | "movd (%1), %%mm1;" /* Load v 0x00000000000000vv */ |
316 | movq %%mm0, %%mm3; /* Copy y to temp */ | 315 | "movq %%mm0, %%mm3;" /* Copy y to temp */ |
317 | psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */ | 316 | "psllq $16, %%mm1;" /* Shift v 0x0000000000vv0000 */ |
318 | movd (%2), %%mm2; /* Load u 0x00000000000000uu */ | 317 | "movd (%2), %%mm2;" /* Load u 0x00000000000000uu */ |
319 | psllq $16, %%mm3; /* Shift y */ | 318 | "psllq $16, %%mm3;" /* Shift y */ |
320 | movq %%mm1, %%mm4; /* Copy v to temp */ | 319 | "movq %%mm1, %%mm4;" /* Copy v to temp */ |
321 | por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ | 320 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */ |
322 | psllq $16, %%mm4; /* Shift v */ | 321 | "psllq $16, %%mm4;" /* Shift v */ |
323 | movq %%mm2, %%mm5; /* Copy u to temp */ | 322 | "movq %%mm2, %%mm5;" /* Copy u to temp */ |
324 | psllq $16, %%mm3; /* Shift y */ | 323 | "psllq $16, %%mm3;" /* Shift y */ |
325 | por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */ | 324 | "por %%mm4, %%mm1;" /* Overlay new v byte 0x000000vv00vv0000 */ |
326 | psllq $16, %%mm5; /* Shift u */ | 325 | "psllq $16, %%mm5;" /* Shift u */ |
327 | por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ | 326 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */ |
328 | por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */ | 327 | "por %%mm5, %%mm2;" /* Overlay new u byte 0x0000000000uu00uu */ |
329 | 328 | ||
330 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ | 329 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ |
331 | psubw mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */ | 330 | "psubw mpeg3_MMX_V_80_RGB, %%mm1;" /* Subtract 128 from v 0x000000vv00vv0000 */ |
332 | pmullw mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */ | 331 | "pmullw mpeg3_MMX_V_COEF_RGB, %%mm1;" /* Multiply v coeffs 0x0000vvvvvvvv0000 */ |
333 | psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ | 332 | "psllw $6, %%mm0;" /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ |
334 | psubw mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */ | 333 | "psubw mpeg3_MMX_U_80_RGB, %%mm2;" /* Subtract 128 from u 0x0000000000uu00uu */ |
335 | pmullw mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ | 334 | "pmullw mpeg3_MMX_U_COEF_RGB, %%mm2;" /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ |
336 | 335 | ||
337 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ | 336 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ |
338 | paddsw %%mm1, %%mm0; /* Add v to result */ | 337 | "paddsw %%mm1, %%mm0;" /* Add v to result */ |
339 | paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */ | 338 | "paddsw %%mm2, %%mm0;" /* Add u to result 0x0000bbbbggggrrrr */ |
340 | psraw $6, %%mm0; /* Demote precision */ | 339 | "psraw $6, %%mm0;" /* Demote precision */ |
341 | packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */ | 340 | "packuswb %%mm0, %%mm0;" /* Pack into RGBA 0x0000000000bbggrr */ |
342 | movd %%mm0, (%3); /* Store output */ | 341 | "movd %%mm0, (%3);" /* Store output */ |
343 | " | ||
344 | : | 342 | : |
345 | : "r" (&y), "r" (&v), "r" (&u), "r" (output)); | 343 | : "r" (&y), "r" (&v), "r" (&u), "r" (output)); |
346 | } | 344 | } |
347 | 345 | ||
348 | inline void mpeg3_601_rgba32_mmx(unsigned long y, | 346 | inline void mpeg3_601_rgba32_mmx(unsigned long y, |
349 | unsigned long u, | 347 | unsigned long u, |
350 | unsigned long v, | 348 | unsigned long v, |
351 | unsigned long *output) | 349 | unsigned long *output) |
352 | { | 350 | { |
353 | asm(" | 351 | asm( |
354 | /* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ | 352 | /* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ |
355 | /* for rgb24. */ | 353 | /* for rgb24. */ |
356 | movd (%0), %%mm0; /* Load y 0x00000000000000yy */ | 354 | "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */ |
357 | psubsw mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */ | 355 | "psubsw mpeg3_MMX_601_Y_DIFF, %%mm0;" /* Subtract 16 from y */ |
358 | movd (%1), %%mm1; /* Load v 0x00000000000000vv */ | 356 | "movd (%1), %%mm1;" /* Load v 0x00000000000000vv */ |
359 | movq %%mm0, %%mm3; /* Copy y to temp */ | 357 | "movq %%mm0, %%mm3;" /* Copy y to temp */ |
360 | psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */ | 358 | "psllq $16, %%mm1;" /* Shift v 0x0000000000vv0000 */ |
361 | movd (%2), %%mm2; /* Load u 0x00000000000000uu */ | 359 | "movd (%2), %%mm2;" /* Load u 0x00000000000000uu */ |
362 | psllq $16, %%mm3; /* Shift y */ | 360 | "psllq $16, %%mm3;" /* Shift y */ |
363 | movq %%mm1, %%mm4; /* Copy v to temp */ | 361 | "movq %%mm1, %%mm4;" /* Copy v to temp */ |
364 | por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ | 362 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */ |
365 | psllq $16, %%mm4; /* Shift v */ | 363 | "psllq $16, %%mm4;" /* Shift v */ |
366 | movq %%mm2, %%mm5; /* Copy u to temp */ | 364 | "movq %%mm2, %%mm5;" /* Copy u to temp */ |
367 | psllq $16, %%mm3; /* Shift y */ | 365 | "psllq $16, %%mm3;" /* Shift y */ |
368 | por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */ | 366 | "por %%mm4, %%mm1;" /* Overlay new v byte 0x000000vv00vv0000 */ |
369 | psllq $16, %%mm5; /* Shift u */ | 367 | "psllq $16, %%mm5;" /* Shift u */ |
370 | por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ | 368 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */ |
371 | por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */ | 369 | "por %%mm5, %%mm2;" /* Overlay new u byte 0x0000000000uu00uu */ |
372 | 370 | ||
373 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ | 371 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ |
374 | pmullw mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale y coeffs */ | 372 | "pmullw mpeg3_MMX_601_Y_COEF, %%mm0;" /* Scale y coeffs */ |
375 | psubw mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */ | 373 | "psubw mpeg3_MMX_V_80_RGB, %%mm1;" /* Subtract 128 from v 0x000000vv00vv0000 */ |
376 | pmullw mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */ | 374 | "pmullw mpeg3_MMX_V_COEF_RGB, %%mm1;" /* Multiply v coeffs 0x0000vvvvvvvv0000 */ |
377 | psubw mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */ | 375 | "psubw mpeg3_MMX_U_80_RGB, %%mm2;" /* Subtract 128 from u 0x0000000000uu00uu */ |
378 | pmullw mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ | 376 | "pmullw mpeg3_MMX_U_COEF_RGB, %%mm2;" /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ |
379 | 377 | ||
380 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ | 378 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ |
381 | paddsw %%mm1, %%mm0; /* Add v to result */ | 379 | "paddsw %%mm1, %%mm0;" /* Add v to result */ |
382 | paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */ | 380 | "paddsw %%mm2, %%mm0;" /* Add u to result 0x0000bbbbggggrrrr */ |
383 | psraw $6, %%mm0; /* Demote precision */ | 381 | "psraw $6, %%mm0;" /* Demote precision */ |
384 | packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */ | 382 | "packuswb %%mm0, %%mm0;" /* Pack into RGBA 0x0000000000bbggrr */ |
385 | movd %%mm0, (%3); /* Store output */ | 383 | "movd %%mm0, (%3);" /* Store output */ |
386 | " | ||
387 | : | 384 | : |
388 | : "r" (&y), "r" (&v), "r" (&u), "r" (output)); | 385 | : "r" (&y), "r" (&v), "r" (&u), "r" (output)); |
389 | } | 386 | } |
390 | 387 | ||
391 | #endif | 388 | #endif |
392 | 389 | ||
393 | #define DITHER_ROW_HEAD \ | 390 | #define DITHER_ROW_HEAD \ |
394 | for(h = 0; h < video->out_h; h++) \ | 391 | for(h = 0; h < video->out_h; h++) \ |
395 | { \ | 392 | { \ |
396 | y_in = &src[0][(video->y_table[h] + video->in_y) * video->coded_picture_width] + video->in_x; \ | 393 | y_in = &src[0][(video->y_table[h] + video->in_y) * video->coded_picture_width] + video->in_x; \ |
397 | cb_in = &src[1][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 2); \ | 394 | cb_in = &src[1][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 2); \ |
398 | cr_in = &src[2][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 1); \ | 395 | cr_in = &src[2][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 1); \ |
399 | data = output_rows[h]; | 396 | data = output_rows[h]; |
400 | 397 | ||
401 | #define DITHER_ROW_TAIL \ | 398 | #define DITHER_ROW_TAIL \ |
402 | } | 399 | } |
403 | 400 | ||
404 | #define DITHER_SCALE_HEAD \ | 401 | #define DITHER_SCALE_HEAD \ |
405 | for(w = 0; w < video->out_w; w++) \ | 402 | for(w = 0; w < video->out_w; w++) \ |
406 | { \ | 403 | { \ |
407 | uv_subscript = video->x_table[w] / 2; \ | 404 | uv_subscript = video->x_table[w] / 2; \ |
408 | y_l = y_in[video->x_table[w]]; \ | 405 | y_l = y_in[video->x_table[w]]; \ |
409 | y_l <<= 16; \ | 406 | y_l <<= 16; \ |
410 | r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \ | 407 | r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \ |
411 | g_l = (y_l + video->cr_to_g[cr_in[uv_subscript]] + video->cb_to_g[cb_in[uv_subscript]]) >> 16; \ | 408 | g_l = (y_l + video->cr_to_g[cr_in[uv_subscript]] + video->cb_to_g[cb_in[uv_subscript]]) >> 16; \ |
412 | b_l = (y_l + video->cb_to_b[cb_in[uv_subscript]]) >> 16; | 409 | b_l = (y_l + video->cb_to_b[cb_in[uv_subscript]]) >> 16; |
413 | 410 | ||
414 | #define DITHER_SCALE_601_HEAD \ | 411 | #define DITHER_SCALE_601_HEAD \ |
415 | for(w = 0; w < video->out_w; w++) \ | 412 | for(w = 0; w < video->out_w; w++) \ |
416 | { \ | 413 | { \ |
417 | uv_subscript = video->x_table[w] / 2; \ | 414 | uv_subscript = video->x_table[w] / 2; \ |
418 | y_l = mpeg3_601_to_rgb[y_in[video->x_table[w]]]; \ | 415 | y_l = mpeg3_601_to_rgb[y_in[video->x_table[w]]]; \ |
419 | y_l <<= 16; \ | 416 | y_l <<= 16; \ |
420 | r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \ | 417 | r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \ |
421 | g_l = (y_l + video->cr_to_g[cr_in[uv_subscript]] + video->cb_to_g[cb_in[uv_subscript]]) >> 16; \ | 418 | g_l = (y_l + video->cr_to_g[cr_in[uv_subscript]] + video->cb_to_g[cb_in[uv_subscript]]) >> 16; \ |
422 | b_l = (y_l + video->cb_to_b[cb_in[uv_subscript]]) >> 16; | 419 | b_l = (y_l + video->cb_to_b[cb_in[uv_subscript]]) >> 16; |
423 | 420 | ||
424 | #define DITHER_SCALE_TAIL \ | 421 | #define DITHER_SCALE_TAIL \ |
425 | } | 422 | } |
426 | 423 | ||
427 | #define DITHER_MMX_SCALE_HEAD \ | 424 | #define DITHER_MMX_SCALE_HEAD \ |
428 | for(w = 0; w < video->out_w; w++) \ | 425 | for(w = 0; w < video->out_w; w++) \ |
429 | { \ | 426 | { \ |
430 | uv_subscript = video->x_table[w] / 2; | 427 | uv_subscript = video->x_table[w] / 2; |
431 | 428 | ||
432 | #define DITHER_MMX_SCALE_TAIL \ | 429 | #define DITHER_MMX_SCALE_TAIL \ |
433 | data += step; \ | 430 | data += step; \ |
434 | } | 431 | } |
435 | 432 | ||
436 | #define DITHER_MMX_HEAD \ | 433 | #define DITHER_MMX_HEAD \ |
437 | for(w = 0; w < video->out_w; w += 2) \ | 434 | for(w = 0; w < video->out_w; w += 2) \ |
438 | { | 435 | { |
439 | 436 | ||
440 | #define DITHER_MMX_TAIL \ | 437 | #define DITHER_MMX_TAIL \ |
441 | data += step; \ | 438 | data += step; \ |
442 | cr_in++; \ | 439 | cr_in++; \ |
443 | cb_in++; \ | 440 | cb_in++; \ |
444 | } | 441 | } |
445 | 442 | ||
446 | #define DITHER_HEAD \ | 443 | #define DITHER_HEAD \ |
447 | for(w = 0; w < video->horizontal_size; w++) \ | 444 | for(w = 0; w < video->horizontal_size; w++) \ |
448 | { \ | 445 | { \ |
449 | y_l = *y_in++; \ | 446 | y_l = *y_in++; \ |
450 | y_l <<= 16; \ | 447 | y_l <<= 16; \ |
451 | r_l = (y_l + video->cr_to_r[*cr_in]) >> 16; \ | 448 | r_l = (y_l + video->cr_to_r[*cr_in]) >> 16; \ |
452 | g_l = (y_l + video->cr_to_g[*cr_in] + video->cb_to_g[*cb_in]) >> 16; \ | 449 | g_l = (y_l + video->cr_to_g[*cr_in] + video->cb_to_g[*cb_in]) >> 16; \ |
453 | b_l = (y_l + video->cb_to_b[*cb_in]) >> 16; | 450 | b_l = (y_l + video->cb_to_b[*cb_in]) >> 16; |
454 | 451 | ||
455 | #define DITHER_601_HEAD \ | 452 | #define DITHER_601_HEAD \ |
456 | for(w = 0; w < video->horizontal_size; w++) \ | 453 | for(w = 0; w < video->horizontal_size; w++) \ |
457 | { \ | 454 | { \ |
458 | y_l = mpeg3_601_to_rgb[*y_in++]; \ | 455 | y_l = mpeg3_601_to_rgb[*y_in++]; \ |
459 | y_l <<= 16; \ | 456 | y_l <<= 16; \ |
460 | r_l = (y_l + video->cr_to_r[*cr_in]) >> 16; \ | 457 | r_l = (y_l + video->cr_to_r[*cr_in]) >> 16; \ |
461 | g_l = (y_l + video->cr_to_g[*cr_in] + video->cb_to_g[*cb_in]) >> 16; \ | 458 | g_l = (y_l + video->cr_to_g[*cr_in] + video->cb_to_g[*cb_in]) >> 16; \ |
462 | b_l = (y_l + video->cb_to_b[*cb_in]) >> 16; | 459 | b_l = (y_l + video->cb_to_b[*cb_in]) >> 16; |
463 | 460 | ||
464 | #define DITHER_TAIL \ | 461 | #define DITHER_TAIL \ |
465 | if(w & 1) \ | 462 | if(w & 1) \ |
466 | { \ | 463 | { \ |
467 | cr_in++; \ | 464 | cr_in++; \ |
468 | cb_in++; \ | 465 | cb_in++; \ |
469 | } \ | 466 | } \ |
470 | } | 467 | } |
471 | 468 | ||
472 | 469 | ||
473 | #define STORE_PIXEL_BGR888 \ | 470 | #define STORE_PIXEL_BGR888 \ |
474 | *data++ = CLIP(b_l); \ | 471 | *data++ = CLIP(b_l); \ |
475 | *data++ = CLIP(g_l); \ | 472 | *data++ = CLIP(g_l); \ |
476 | *data++ = CLIP(r_l); | 473 | *data++ = CLIP(r_l); |
477 | 474 | ||
478 | #define STORE_PIXEL_BGRA8888 \ | 475 | #define STORE_PIXEL_BGRA8888 \ |
479 | *data++ = CLIP(b_l); \ | 476 | *data++ = CLIP(b_l); \ |
480 | *data++ = CLIP(g_l); \ | 477 | *data++ = CLIP(g_l); \ |
481 | *data++ = CLIP(r_l); \ | 478 | *data++ = CLIP(r_l); \ |
482 | *data++ = 0; | 479 | *data++ = 0; |
483 | 480 | ||
484 | #define STORE_PIXEL_RGB565 \ | 481 | #define STORE_PIXEL_RGB565 \ |
485 | *((unsigned short*)data)++ = \ | 482 | *((unsigned short*)data)++ = \ |
486 | ((CLIP(r_l) & 0xf8) << 8) | \ | 483 | ((CLIP(r_l) & 0xf8) << 8) | \ |
487 | ((CLIP(g_l) & 0xfc) << 3) | \ | 484 | ((CLIP(g_l) & 0xfc) << 3) | \ |
488 | ((CLIP(b_l) & 0xf8) >> 3); | 485 | ((CLIP(b_l) & 0xf8) >> 3); |
489 | 486 | ||
490 | #define STORE_PIXEL_RGB888 \ | 487 | #define STORE_PIXEL_RGB888 \ |
491 | *data++ = CLIP(r_l); \ | 488 | *data++ = CLIP(r_l); \ |
492 | *data++ = CLIP(g_l); \ | 489 | *data++ = CLIP(g_l); \ |
493 | *data++ = CLIP(b_l); | 490 | *data++ = CLIP(b_l); |
494 | 491 | ||
495 | #define STORE_PIXEL_RGBA8888 \ | 492 | #define STORE_PIXEL_RGBA8888 \ |
496 | *data++ = CLIP(r_l); \ | 493 | *data++ = CLIP(r_l); \ |
497 | *data++ = CLIP(g_l); \ | 494 | *data++ = CLIP(g_l); \ |
498 | *data++ = CLIP(b_l); \ | 495 | *data++ = CLIP(b_l); \ |
499 | *data++ = 0; | 496 | *data++ = 0; |
500 | 497 | ||
501 | #define STORE_PIXEL_RGBA16161616 \ | 498 | #define STORE_PIXEL_RGBA16161616 \ |
502 | *data_s++ = CLIP(r_l); \ | 499 | *data_s++ = CLIP(r_l); \ |
503 | *data_s++ = CLIP(g_l); \ | 500 | *data_s++ = CLIP(g_l); \ |
504 | *data_s++ = CLIP(b_l); \ | 501 | *data_s++ = CLIP(b_l); \ |
505 | *data_s++ = 0; | 502 | *data_s++ = 0; |
506 | 503 | ||
507 | 504 | ||
508 | 505 | ||
509 | /* Only good for YUV 4:2:0 */ | 506 | /* Only good for YUV 4:2:0 */ |
510 | int mpeg3video_ditherframe(mpeg3video_t *video, unsigned char **src, unsigned char **output_rows) | 507 | int mpeg3video_ditherframe(mpeg3video_t *video, unsigned char **src, unsigned char **output_rows) |
511 | { | 508 | { |
512 | int h = 0; | 509 | int h = 0; |
513 | register unsigned char *y_in, *cb_in, *cr_in; | 510 | register unsigned char *y_in, *cb_in, *cr_in; |
514 | long y_l, r_l, b_l, g_l; | 511 | long y_l, r_l, b_l, g_l; |