summaryrefslogtreecommitdiff
Unidiff
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--core/multimedia/opieplayer/libmpeg3/video/output.c217
1 files changed, 107 insertions, 110 deletions
diff --git a/core/multimedia/opieplayer/libmpeg3/video/output.c b/core/multimedia/opieplayer/libmpeg3/video/output.c
index 919a0ff..bf0d6ed 100644
--- a/core/multimedia/opieplayer/libmpeg3/video/output.c
+++ b/core/multimedia/opieplayer/libmpeg3/video/output.c
@@ -197,214 +197,211 @@ inline void mpeg3video_rgb16_mmx(unsigned char *lum,
197 : : "r" (cr), 197 : : "r" (cr),
198 "r" (cb), 198 "r" (cb),
199 "r" (lum), 199 "r" (lum),
200 "r" (cols), 200 "r" (cols),
201 "r" (row1) , 201 "r" (row1) ,
202 "r" (col1), 202 "r" (col1),
203 "m" (x), 203 "m" (x),
204 "m" (mod), 204 "m" (mod),
205 "m" (y) 205 "m" (y)
206 ); 206 );
207} 207}
208 208
209static unsigned LONGLONG mpeg3_MMX_U_80 = 0x0000008000800000LL; 209static unsigned LONGLONG mpeg3_MMX_U_80 = 0x0000008000800000LL;
210static unsigned LONGLONG mpeg3_MMX_V_80 = 0x0000000000800080LL; 210static unsigned LONGLONG mpeg3_MMX_V_80 = 0x0000000000800080LL;
211static LONGLONG mpeg3_MMX_U_COEF = 0x00000058ffd30000LL; 211static LONGLONG mpeg3_MMX_U_COEF = 0x00000058ffd30000LL;
212static LONGLONG mpeg3_MMX_V_COEF = 0x00000000ffea006fLL; 212static LONGLONG mpeg3_MMX_V_COEF = 0x00000000ffea006fLL;
213static LONGLONG mpeg3_MMX_601_Y_COEF = 0x0000004800480048LL; 213static LONGLONG mpeg3_MMX_601_Y_COEF = 0x0000004800480048LL;
214static LONGLONG mpeg3_MMX_601_Y_DIFF = 0x0000000000000010LL; 214static LONGLONG mpeg3_MMX_601_Y_DIFF = 0x0000000000000010LL;
215 215
216inline void mpeg3_bgra32_mmx(unsigned long y, 216inline void mpeg3_bgra32_mmx(unsigned long y,
217 unsigned long u, 217 unsigned long u,
218 unsigned long v, 218 unsigned long v,
219 unsigned long *output) 219 unsigned long *output)
220{ 220{
221asm(" 221
222asm(
222/* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ 223/* Output will be 0x00rrggbb with the 00 trailing so this can also be used */
223/* for bgr24. */ 224/* for bgr24. */
224 movd (%0), %%mm0; /* Load y 0x00000000000000yy */ 225 "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */
225 movd (%1), %%mm1; /* Load u 0x00000000000000cr */ 226 "movd (%1), %%mm1;" /* Load u 0x00000000000000cr */
226 movq %%mm0, %%mm3; /* Copy y to temp */ 227 "movq %%mm0, %%mm3;" /* Copy y to temp */
227 psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */ 228 "psllq $16, %%mm1;" /* Shift u 0x0000000000cr0000 */
228 movd (%2), %%mm2; /* Load v 0x00000000000000cb */ 229 "movd (%2), %%mm2;" /* Load v 0x00000000000000cb */
229 psllq $16, %%mm3; /* Shift y */ 230 "psllq $16, %%mm3;" /* Shift y */
230 movq %%mm1, %%mm4; /* Copy u to temp */ 231 "movq %%mm1, %%mm4;" /* Copy u to temp */
231 por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ 232 "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */
232 psllq $16, %%mm4; /* Shift u */ 233 "psllq $16, %%mm4;" /* Shift u */
233 movq %%mm2, %%mm5; /* Copy v to temp */ 234 "movq %%mm2, %%mm5;" /* Copy v to temp */
234 psllq $16, %%mm3; /* Shift y */ 235 "psllq $16, %%mm3;" /* Shift y */
235 por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */ 236 "por %%mm4, %%mm1;" /* Overlay new u byte 0x000000cr00cr0000 */
236 psllq $16, %%mm5; /* Shift v */ 237 "psllq $16, %%mm5;" /* Shift v */
237 por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ 238 "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */
238 por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */ 239 "por %%mm5, %%mm2;" /* Overlay new v byte 0x0000000000cb00cb */
239 240
240/* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ 241/* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */
241 psubw mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */ 242 "psubw mpeg3_MMX_U_80, %%mm1;" /* Subtract 128 from u 0x000000uu00uu0000 */
242 pmullw mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */ 243 "pmullw mpeg3_MMX_U_COEF, %%mm1;" /* Multiply u coeffs 0x0000uuuuuuuu0000 */
243 psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ 244 "psllw $6, %%mm0;" /* Shift y coeffs 0x0000yyy0yyy0yyy0 */
244 psubw mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */ 245 "psubw mpeg3_MMX_V_80, %%mm2;" /* Subtract 128 from v 0x0000000000cb00cb */
245 pmullw mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */ 246 "pmullw mpeg3_MMX_V_COEF, %%mm2;" /* Multiply v coeffs 0x0000crcrcrcrcrcr */
246 247
247/* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ 248/* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */
248 paddsw %%mm1, %%mm0; /* Add u to result */ 249 "paddsw %%mm1, %%mm0;" /* Add u to result */
249 paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */ 250 "paddsw %%mm2, %%mm0;" /* Add v to result 0x0000rrrrggggbbbb */
250 psraw $6, %%mm0; /* Demote precision */ 251 "psraw $6, %%mm0;" /* Demote precision */
251 packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */ 252 "packuswb %%mm0, %%mm0;" /* Pack into ARGB 0x0000000000rrggbb */
252 movd %%mm0, (%3); /* Store output */ 253 "movd %%mm0, (%3);" /* Store output */
253 "
254: 254:
255: "r" (&y), "r" (&u), "r" (&v), "r" (output)); 255: "r" (&y), "r" (&u), "r" (&v), "r" (output));
256} 256}
257 257
258inline void mpeg3_601_bgra32_mmx(unsigned long y, 258inline void mpeg3_601_bgra32_mmx(unsigned long y,
259 unsigned long u, 259 unsigned long u,
260 unsigned long v, 260 unsigned long v,
261 unsigned long *output) 261 unsigned long *output)
262{ 262{
263asm(" 263asm(
264/* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ 264/* Output will be 0x00rrggbb with the 00 trailing so this can also be used */
265/* for bgr24. */ 265/* for bgr24. */
266 movd (%0), %%mm0; /* Load y 0x00000000000000yy */ 266 "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */
267 psubsw mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */ 267 "psubsw mpeg3_MMX_601_Y_DIFF, %%mm0;" /* Subtract 16 from y */
268 movd (%1), %%mm1; /* Load u 0x00000000000000cr */ 268 "movd (%1), %%mm1;" /* Load u 0x00000000000000cr */
269 movq %%mm0, %%mm3; /* Copy y to temp */ 269 "movq %%mm0, %%mm3;" /* Copy y to temp */
270 psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */ 270 "psllq $16, %%mm1;" /* Shift u 0x0000000000cr0000 */
271 movd (%2), %%mm2; /* Load v 0x00000000000000cb */ 271 "movd (%2), %%mm2;" /* Load v 0x00000000000000cb */
272 psllq $16, %%mm3; /* Shift y */ 272 "psllq $16, %%mm3;" /* Shift y */
273 movq %%mm1, %%mm4; /* Copy u to temp */ 273 "movq %%mm1, %%mm4;" /* Copy u to temp */
274 por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ 274 "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */
275 psllq $16, %%mm4; /* Shift u */ 275 "psllq $16, %%mm4;" /* Shift u */
276 movq %%mm2, %%mm5; /* Copy v to temp */ 276 "movq %%mm2, %%mm5;" /* Copy v to temp */
277 psllq $16, %%mm3; /* Shift y */ 277 "psllq $16, %%mm3;" /* Shift y */
278 por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */ 278 "por %%mm4, %%mm1;" /* Overlay new u byte 0x000000cr00cr0000 */
279 psllq $16, %%mm5; /* Shift v */ 279 "psllq $16, %%mm5;" /* Shift v */
280 por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ 280 "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */
281 por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */ 281 "por %%mm5, %%mm2;" /* Overlay new v byte 0x0000000000cb00cb */
282 282
283/* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ 283/* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */
284 pmullw mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale and shift y coeffs */ 284 "pmullw mpeg3_MMX_601_Y_COEF, %%mm0;" /* Scale and shift y coeffs */
285 psubw mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */ 285 "psubw mpeg3_MMX_U_80, %%mm1;" /* Subtract 128 from u 0x000000uu00uu0000 */
286 pmullw mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */ 286 "pmullw mpeg3_MMX_U_COEF, %%mm1;" /* Multiply u coeffs 0x0000uuuuuuuu0000 */
287 psubw mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */ 287 "psubw mpeg3_MMX_V_80, %%mm2;" /* Subtract 128 from v 0x0000000000cb00cb */
288 pmullw mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */ 288 "pmullw mpeg3_MMX_V_COEF, %%mm2;" /* Multiply v coeffs 0x0000crcrcrcrcrcr */
289 289
290/* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ 290/* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */
291 paddsw %%mm1, %%mm0; /* Add u to result */ 291 "paddsw %%mm1, %%mm0;" /* Add u to result */
292 paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */ 292 "paddsw %%mm2, %%mm0;" /* Add v to result 0x0000rrrrggggbbbb */
293 psraw $6, %%mm0; /* Demote precision */ 293 "psraw $6, %%mm0;" /* Demote precision */
294 packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */ 294 "packuswb %%mm0, %%mm0;" /* Pack into ARGB 0x0000000000rrggbb */
295 movd %%mm0, (%3); /* Store output */ 295 "movd %%mm0, (%3);" /* Store output */
296 "
297: 296:
298: "r" (&y), "r" (&u), "r" (&v), "r" (output)); 297: "r" (&y), "r" (&u), "r" (&v), "r" (output));
299} 298}
300 299
301static unsigned LONGLONG mpeg3_MMX_U_80_RGB = 0x0000000000800080LL; 300static unsigned LONGLONG mpeg3_MMX_U_80_RGB = 0x0000000000800080LL;
302static unsigned LONGLONG mpeg3_MMX_V_80_RGB = 0x0000008000800000LL; 301static unsigned LONGLONG mpeg3_MMX_V_80_RGB = 0x0000008000800000LL;
303static LONGLONG mpeg3_MMX_U_COEF_RGB = 0x00000000ffd30058LL; 302static LONGLONG mpeg3_MMX_U_COEF_RGB = 0x00000000ffd30058LL;
304static LONGLONG mpeg3_MMX_V_COEF_RGB = 0x0000006fffea0000LL; 303static LONGLONG mpeg3_MMX_V_COEF_RGB = 0x0000006fffea0000LL;
305 304
306inline void mpeg3_rgba32_mmx(unsigned long y, 305inline void mpeg3_rgba32_mmx(unsigned long y,
307 unsigned long u, 306 unsigned long u,
308 unsigned long v, 307 unsigned long v,
309 unsigned long *output) 308 unsigned long *output)
310{ 309{
311asm(" 310asm(
312/* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ 311/* Output will be 0x00bbggrr with the 00 trailing so this can also be used */
313/* for rgb24. */ 312/* for rgb24. */
314 movd (%0), %%mm0; /* Load y 0x00000000000000yy */ 313 "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */
315 movd (%1), %%mm1; /* Load v 0x00000000000000vv */ 314 "movd (%1), %%mm1;" /* Load v 0x00000000000000vv */
316 movq %%mm0, %%mm3; /* Copy y to temp */ 315 "movq %%mm0, %%mm3;" /* Copy y to temp */
317 psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */ 316 "psllq $16, %%mm1;" /* Shift v 0x0000000000vv0000 */
318 movd (%2), %%mm2; /* Load u 0x00000000000000uu */ 317 "movd (%2), %%mm2;" /* Load u 0x00000000000000uu */
319 psllq $16, %%mm3; /* Shift y */ 318 "psllq $16, %%mm3;" /* Shift y */
320 movq %%mm1, %%mm4; /* Copy v to temp */ 319 "movq %%mm1, %%mm4;" /* Copy v to temp */
321 por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ 320 "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */
322 psllq $16, %%mm4; /* Shift v */ 321 "psllq $16, %%mm4;" /* Shift v */
323 movq %%mm2, %%mm5; /* Copy u to temp */ 322 "movq %%mm2, %%mm5;" /* Copy u to temp */
324 psllq $16, %%mm3; /* Shift y */ 323 "psllq $16, %%mm3;" /* Shift y */
325 por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */ 324 "por %%mm4, %%mm1;" /* Overlay new v byte 0x000000vv00vv0000 */
326 psllq $16, %%mm5; /* Shift u */ 325 "psllq $16, %%mm5;" /* Shift u */
327 por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ 326 "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */
328 por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */ 327 "por %%mm5, %%mm2;" /* Overlay new u byte 0x0000000000uu00uu */
329 328
330/* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ 329/* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */
331 psubw mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */ 330 "psubw mpeg3_MMX_V_80_RGB, %%mm1;" /* Subtract 128 from v 0x000000vv00vv0000 */
332 pmullw mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */ 331 "pmullw mpeg3_MMX_V_COEF_RGB, %%mm1;" /* Multiply v coeffs 0x0000vvvvvvvv0000 */
333 psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ 332 "psllw $6, %%mm0;" /* Shift y coeffs 0x0000yyy0yyy0yyy0 */
334 psubw mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */ 333 "psubw mpeg3_MMX_U_80_RGB, %%mm2;" /* Subtract 128 from u 0x0000000000uu00uu */
335 pmullw mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ 334 "pmullw mpeg3_MMX_U_COEF_RGB, %%mm2;" /* Multiply u coeffs 0x0000uuuuuuuuuuuu */
336 335
337/* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ 336/* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */
338 paddsw %%mm1, %%mm0; /* Add v to result */ 337 "paddsw %%mm1, %%mm0;" /* Add v to result */
339 paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */ 338 "paddsw %%mm2, %%mm0;" /* Add u to result 0x0000bbbbggggrrrr */
340 psraw $6, %%mm0; /* Demote precision */ 339 "psraw $6, %%mm0;" /* Demote precision */
341 packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */ 340 "packuswb %%mm0, %%mm0;" /* Pack into RGBA 0x0000000000bbggrr */
342 movd %%mm0, (%3); /* Store output */ 341 "movd %%mm0, (%3);" /* Store output */
343 "
344: 342:
345: "r" (&y), "r" (&v), "r" (&u), "r" (output)); 343: "r" (&y), "r" (&v), "r" (&u), "r" (output));
346} 344}
347 345
348inline void mpeg3_601_rgba32_mmx(unsigned long y, 346inline void mpeg3_601_rgba32_mmx(unsigned long y,
349 unsigned long u, 347 unsigned long u,
350 unsigned long v, 348 unsigned long v,
351 unsigned long *output) 349 unsigned long *output)
352{ 350{
353asm(" 351asm(
354/* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ 352/* Output will be 0x00bbggrr with the 00 trailing so this can also be used */
355/* for rgb24. */ 353/* for rgb24. */
356 movd (%0), %%mm0; /* Load y 0x00000000000000yy */ 354 "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */
357 psubsw mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */ 355 "psubsw mpeg3_MMX_601_Y_DIFF, %%mm0;" /* Subtract 16 from y */
358 movd (%1), %%mm1; /* Load v 0x00000000000000vv */ 356 "movd (%1), %%mm1;" /* Load v 0x00000000000000vv */
359 movq %%mm0, %%mm3; /* Copy y to temp */ 357 "movq %%mm0, %%mm3;" /* Copy y to temp */
360 psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */ 358 "psllq $16, %%mm1;" /* Shift v 0x0000000000vv0000 */
361 movd (%2), %%mm2; /* Load u 0x00000000000000uu */ 359 "movd (%2), %%mm2;" /* Load u 0x00000000000000uu */
362 psllq $16, %%mm3; /* Shift y */ 360 "psllq $16, %%mm3;" /* Shift y */
363 movq %%mm1, %%mm4; /* Copy v to temp */ 361 "movq %%mm1, %%mm4;" /* Copy v to temp */
364 por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ 362 "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */
365 psllq $16, %%mm4; /* Shift v */ 363 "psllq $16, %%mm4;" /* Shift v */
366 movq %%mm2, %%mm5; /* Copy u to temp */ 364 "movq %%mm2, %%mm5;" /* Copy u to temp */
367 psllq $16, %%mm3; /* Shift y */ 365 "psllq $16, %%mm3;" /* Shift y */
368 por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */ 366 "por %%mm4, %%mm1;" /* Overlay new v byte 0x000000vv00vv0000 */
369 psllq $16, %%mm5; /* Shift u */ 367 "psllq $16, %%mm5;" /* Shift u */
370 por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ 368 "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */
371 por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */ 369 "por %%mm5, %%mm2;" /* Overlay new u byte 0x0000000000uu00uu */
372 370
373/* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ 371/* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */
374 pmullw mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale y coeffs */ 372 "pmullw mpeg3_MMX_601_Y_COEF, %%mm0;" /* Scale y coeffs */
375 psubw mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */ 373 "psubw mpeg3_MMX_V_80_RGB, %%mm1;" /* Subtract 128 from v 0x000000vv00vv0000 */
376 pmullw mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */ 374 "pmullw mpeg3_MMX_V_COEF_RGB, %%mm1;" /* Multiply v coeffs 0x0000vvvvvvvv0000 */
377 psubw mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */ 375 "psubw mpeg3_MMX_U_80_RGB, %%mm2;" /* Subtract 128 from u 0x0000000000uu00uu */
378 pmullw mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ 376 "pmullw mpeg3_MMX_U_COEF_RGB, %%mm2;" /* Multiply u coeffs 0x0000uuuuuuuuuuuu */
379 377
380/* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ 378/* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */
381 paddsw %%mm1, %%mm0; /* Add v to result */ 379 "paddsw %%mm1, %%mm0;" /* Add v to result */
382 paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */ 380 "paddsw %%mm2, %%mm0;" /* Add u to result 0x0000bbbbggggrrrr */
383 psraw $6, %%mm0; /* Demote precision */ 381 "psraw $6, %%mm0;" /* Demote precision */
384 packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */ 382 "packuswb %%mm0, %%mm0;" /* Pack into RGBA 0x0000000000bbggrr */
385 movd %%mm0, (%3); /* Store output */ 383 "movd %%mm0, (%3);" /* Store output */
386 "
387: 384:
388: "r" (&y), "r" (&v), "r" (&u), "r" (output)); 385: "r" (&y), "r" (&v), "r" (&u), "r" (output));
389} 386}
390 387
391#endif 388#endif
392 389
393#define DITHER_ROW_HEAD \ 390#define DITHER_ROW_HEAD \
394 for(h = 0; h < video->out_h; h++) \ 391 for(h = 0; h < video->out_h; h++) \
395 { \ 392 { \
396 y_in = &src[0][(video->y_table[h] + video->in_y) * video->coded_picture_width] + video->in_x; \ 393 y_in = &src[0][(video->y_table[h] + video->in_y) * video->coded_picture_width] + video->in_x; \
397 cb_in = &src[1][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 2); \ 394 cb_in = &src[1][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 2); \
398 cr_in = &src[2][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 1); \ 395 cr_in = &src[2][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 1); \
399 data = output_rows[h]; 396 data = output_rows[h];
400 397
401#define DITHER_ROW_TAIL \ 398#define DITHER_ROW_TAIL \
402 } 399 }
403 400
404#define DITHER_SCALE_HEAD \ 401#define DITHER_SCALE_HEAD \
405 for(w = 0; w < video->out_w; w++) \ 402 for(w = 0; w < video->out_w; w++) \
406 { \ 403 { \
407 uv_subscript = video->x_table[w] / 2; \ 404 uv_subscript = video->x_table[w] / 2; \
408 y_l = y_in[video->x_table[w]]; \ 405 y_l = y_in[video->x_table[w]]; \
409 y_l <<= 16; \ 406 y_l <<= 16; \
410 r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \ 407 r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \