-rw-r--r-- | core/multimedia/opieplayer/libmpeg3/video/output.c | 217 |
1 files changed, 107 insertions, 110 deletions
diff --git a/core/multimedia/opieplayer/libmpeg3/video/output.c b/core/multimedia/opieplayer/libmpeg3/video/output.c index 919a0ff..bf0d6ed 100644 --- a/core/multimedia/opieplayer/libmpeg3/video/output.c +++ b/core/multimedia/opieplayer/libmpeg3/video/output.c | |||
@@ -197,214 +197,211 @@ inline void mpeg3video_rgb16_mmx(unsigned char *lum, | |||
197 | : : "r" (cr), | 197 | : : "r" (cr), |
198 | "r" (cb), | 198 | "r" (cb), |
199 | "r" (lum), | 199 | "r" (lum), |
200 | "r" (cols), | 200 | "r" (cols), |
201 | "r" (row1) , | 201 | "r" (row1) , |
202 | "r" (col1), | 202 | "r" (col1), |
203 | "m" (x), | 203 | "m" (x), |
204 | "m" (mod), | 204 | "m" (mod), |
205 | "m" (y) | 205 | "m" (y) |
206 | ); | 206 | ); |
207 | } | 207 | } |
208 | 208 | ||
209 | static unsigned LONGLONG mpeg3_MMX_U_80 = 0x0000008000800000LL; | 209 | static unsigned LONGLONG mpeg3_MMX_U_80 = 0x0000008000800000LL; |
210 | static unsigned LONGLONG mpeg3_MMX_V_80 = 0x0000000000800080LL; | 210 | static unsigned LONGLONG mpeg3_MMX_V_80 = 0x0000000000800080LL; |
211 | static LONGLONG mpeg3_MMX_U_COEF = 0x00000058ffd30000LL; | 211 | static LONGLONG mpeg3_MMX_U_COEF = 0x00000058ffd30000LL; |
212 | static LONGLONG mpeg3_MMX_V_COEF = 0x00000000ffea006fLL; | 212 | static LONGLONG mpeg3_MMX_V_COEF = 0x00000000ffea006fLL; |
213 | static LONGLONG mpeg3_MMX_601_Y_COEF = 0x0000004800480048LL; | 213 | static LONGLONG mpeg3_MMX_601_Y_COEF = 0x0000004800480048LL; |
214 | static LONGLONG mpeg3_MMX_601_Y_DIFF = 0x0000000000000010LL; | 214 | static LONGLONG mpeg3_MMX_601_Y_DIFF = 0x0000000000000010LL; |
215 | 215 | ||
216 | inline void mpeg3_bgra32_mmx(unsigned long y, | 216 | inline void mpeg3_bgra32_mmx(unsigned long y, |
217 | unsigned long u, | 217 | unsigned long u, |
218 | unsigned long v, | 218 | unsigned long v, |
219 | unsigned long *output) | 219 | unsigned long *output) |
220 | { | 220 | { |
221 | asm(" | 221 | |
222 | asm( | ||
222 | /* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ | 223 | /* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ |
223 | /* for bgr24. */ | 224 | /* for bgr24. */ |
224 | movd (%0), %%mm0; /* Load y 0x00000000000000yy */ | 225 | "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */ |
225 | movd (%1), %%mm1; /* Load u 0x00000000000000cr */ | 226 | "movd (%1), %%mm1;" /* Load u 0x00000000000000cr */ |
226 | movq %%mm0, %%mm3; /* Copy y to temp */ | 227 | "movq %%mm0, %%mm3;" /* Copy y to temp */ |
227 | psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */ | 228 | "psllq $16, %%mm1;" /* Shift u 0x0000000000cr0000 */ |
228 | movd (%2), %%mm2; /* Load v 0x00000000000000cb */ | 229 | "movd (%2), %%mm2;" /* Load v 0x00000000000000cb */ |
229 | psllq $16, %%mm3; /* Shift y */ | 230 | "psllq $16, %%mm3;" /* Shift y */ |
230 | movq %%mm1, %%mm4; /* Copy u to temp */ | 231 | "movq %%mm1, %%mm4;" /* Copy u to temp */ |
231 | por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ | 232 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */ |
232 | psllq $16, %%mm4; /* Shift u */ | 233 | "psllq $16, %%mm4;" /* Shift u */ |
233 | movq %%mm2, %%mm5; /* Copy v to temp */ | 234 | "movq %%mm2, %%mm5;" /* Copy v to temp */ |
234 | psllq $16, %%mm3; /* Shift y */ | 235 | "psllq $16, %%mm3;" /* Shift y */ |
235 | por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */ | 236 | "por %%mm4, %%mm1;" /* Overlay new u byte 0x000000cr00cr0000 */ |
236 | psllq $16, %%mm5; /* Shift v */ | 237 | "psllq $16, %%mm5;" /* Shift v */ |
237 | por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ | 238 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */ |
238 | por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */ | 239 | "por %%mm5, %%mm2;" /* Overlay new v byte 0x0000000000cb00cb */ |
239 | 240 | ||
240 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ | 241 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ |
241 | psubw mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */ | 242 | "psubw mpeg3_MMX_U_80, %%mm1;" /* Subtract 128 from u 0x000000uu00uu0000 */ |
242 | pmullw mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */ | 243 | "pmullw mpeg3_MMX_U_COEF, %%mm1;" /* Multiply u coeffs 0x0000uuuuuuuu0000 */ |
243 | psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ | 244 | "psllw $6, %%mm0;" /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ |
244 | psubw mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */ | 245 | "psubw mpeg3_MMX_V_80, %%mm2;" /* Subtract 128 from v 0x0000000000cb00cb */ |
245 | pmullw mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */ | 246 | "pmullw mpeg3_MMX_V_COEF, %%mm2;" /* Multiply v coeffs 0x0000crcrcrcrcrcr */ |
246 | 247 | ||
247 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ | 248 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ |
248 | paddsw %%mm1, %%mm0; /* Add u to result */ | 249 | "paddsw %%mm1, %%mm0;" /* Add u to result */ |
249 | paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */ | 250 | "paddsw %%mm2, %%mm0;" /* Add v to result 0x0000rrrrggggbbbb */ |
250 | psraw $6, %%mm0; /* Demote precision */ | 251 | "psraw $6, %%mm0;" /* Demote precision */ |
251 | packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */ | 252 | "packuswb %%mm0, %%mm0;" /* Pack into ARGB 0x0000000000rrggbb */ |
252 | movd %%mm0, (%3); /* Store output */ | 253 | "movd %%mm0, (%3);" /* Store output */ |
253 | " | ||
254 | : | 254 | : |
255 | : "r" (&y), "r" (&u), "r" (&v), "r" (output)); | 255 | : "r" (&y), "r" (&u), "r" (&v), "r" (output)); |
256 | } | 256 | } |
257 | 257 | ||
258 | inline void mpeg3_601_bgra32_mmx(unsigned long y, | 258 | inline void mpeg3_601_bgra32_mmx(unsigned long y, |
259 | unsigned long u, | 259 | unsigned long u, |
260 | unsigned long v, | 260 | unsigned long v, |
261 | unsigned long *output) | 261 | unsigned long *output) |
262 | { | 262 | { |
263 | asm(" | 263 | asm( |
264 | /* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ | 264 | /* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ |
265 | /* for bgr24. */ | 265 | /* for bgr24. */ |
266 | movd (%0), %%mm0; /* Load y 0x00000000000000yy */ | 266 | "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */ |
267 | psubsw mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */ | 267 | "psubsw mpeg3_MMX_601_Y_DIFF, %%mm0;" /* Subtract 16 from y */ |
268 | movd (%1), %%mm1; /* Load u 0x00000000000000cr */ | 268 | "movd (%1), %%mm1;" /* Load u 0x00000000000000cr */ |
269 | movq %%mm0, %%mm3; /* Copy y to temp */ | 269 | "movq %%mm0, %%mm3;" /* Copy y to temp */ |
270 | psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */ | 270 | "psllq $16, %%mm1;" /* Shift u 0x0000000000cr0000 */ |
271 | movd (%2), %%mm2; /* Load v 0x00000000000000cb */ | 271 | "movd (%2), %%mm2;" /* Load v 0x00000000000000cb */ |
272 | psllq $16, %%mm3; /* Shift y */ | 272 | "psllq $16, %%mm3;" /* Shift y */ |
273 | movq %%mm1, %%mm4; /* Copy u to temp */ | 273 | "movq %%mm1, %%mm4;" /* Copy u to temp */ |
274 | por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ | 274 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */ |
275 | psllq $16, %%mm4; /* Shift u */ | 275 | "psllq $16, %%mm4;" /* Shift u */ |
276 | movq %%mm2, %%mm5; /* Copy v to temp */ | 276 | "movq %%mm2, %%mm5;" /* Copy v to temp */ |
277 | psllq $16, %%mm3; /* Shift y */ | 277 | "psllq $16, %%mm3;" /* Shift y */ |
278 | por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */ | 278 | "por %%mm4, %%mm1;" /* Overlay new u byte 0x000000cr00cr0000 */ |
279 | psllq $16, %%mm5; /* Shift v */ | 279 | "psllq $16, %%mm5;" /* Shift v */ |
280 | por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ | 280 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */ |
281 | por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */ | 281 | "por %%mm5, %%mm2;" /* Overlay new v byte 0x0000000000cb00cb */ |
282 | 282 | ||
283 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ | 283 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ |
284 | pmullw mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale and shift y coeffs */ | 284 | "pmullw mpeg3_MMX_601_Y_COEF, %%mm0;" /* Scale and shift y coeffs */ |
285 | psubw mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */ | 285 | "psubw mpeg3_MMX_U_80, %%mm1;" /* Subtract 128 from u 0x000000uu00uu0000 */ |
286 | pmullw mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */ | 286 | "pmullw mpeg3_MMX_U_COEF, %%mm1;" /* Multiply u coeffs 0x0000uuuuuuuu0000 */ |
287 | psubw mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */ | 287 | "psubw mpeg3_MMX_V_80, %%mm2;" /* Subtract 128 from v 0x0000000000cb00cb */ |
288 | pmullw mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */ | 288 | "pmullw mpeg3_MMX_V_COEF, %%mm2;" /* Multiply v coeffs 0x0000crcrcrcrcrcr */ |
289 | 289 | ||
290 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ | 290 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ |
291 | paddsw %%mm1, %%mm0; /* Add u to result */ | 291 | "paddsw %%mm1, %%mm0;" /* Add u to result */ |
292 | paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */ | 292 | "paddsw %%mm2, %%mm0;" /* Add v to result 0x0000rrrrggggbbbb */ |
293 | psraw $6, %%mm0; /* Demote precision */ | 293 | "psraw $6, %%mm0;" /* Demote precision */ |
294 | packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */ | 294 | "packuswb %%mm0, %%mm0;" /* Pack into ARGB 0x0000000000rrggbb */ |
295 | movd %%mm0, (%3); /* Store output */ | 295 | "movd %%mm0, (%3);" /* Store output */ |
296 | " | ||
297 | : | 296 | : |
298 | : "r" (&y), "r" (&u), "r" (&v), "r" (output)); | 297 | : "r" (&y), "r" (&u), "r" (&v), "r" (output)); |
299 | } | 298 | } |
300 | 299 | ||
301 | static unsigned LONGLONG mpeg3_MMX_U_80_RGB = 0x0000000000800080LL; | 300 | static unsigned LONGLONG mpeg3_MMX_U_80_RGB = 0x0000000000800080LL; |
302 | static unsigned LONGLONG mpeg3_MMX_V_80_RGB = 0x0000008000800000LL; | 301 | static unsigned LONGLONG mpeg3_MMX_V_80_RGB = 0x0000008000800000LL; |
303 | static LONGLONG mpeg3_MMX_U_COEF_RGB = 0x00000000ffd30058LL; | 302 | static LONGLONG mpeg3_MMX_U_COEF_RGB = 0x00000000ffd30058LL; |
304 | static LONGLONG mpeg3_MMX_V_COEF_RGB = 0x0000006fffea0000LL; | 303 | static LONGLONG mpeg3_MMX_V_COEF_RGB = 0x0000006fffea0000LL; |
305 | 304 | ||
306 | inline void mpeg3_rgba32_mmx(unsigned long y, | 305 | inline void mpeg3_rgba32_mmx(unsigned long y, |
307 | unsigned long u, | 306 | unsigned long u, |
308 | unsigned long v, | 307 | unsigned long v, |
309 | unsigned long *output) | 308 | unsigned long *output) |
310 | { | 309 | { |
311 | asm(" | 310 | asm( |
312 | /* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ | 311 | /* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ |
313 | /* for rgb24. */ | 312 | /* for rgb24. */ |
314 | movd (%0), %%mm0; /* Load y 0x00000000000000yy */ | 313 | "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */ |
315 | movd (%1), %%mm1; /* Load v 0x00000000000000vv */ | 314 | "movd (%1), %%mm1;" /* Load v 0x00000000000000vv */ |
316 | movq %%mm0, %%mm3; /* Copy y to temp */ | 315 | "movq %%mm0, %%mm3;" /* Copy y to temp */ |
317 | psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */ | 316 | "psllq $16, %%mm1;" /* Shift v 0x0000000000vv0000 */ |
318 | movd (%2), %%mm2; /* Load u 0x00000000000000uu */ | 317 | "movd (%2), %%mm2;" /* Load u 0x00000000000000uu */ |
319 | psllq $16, %%mm3; /* Shift y */ | 318 | "psllq $16, %%mm3;" /* Shift y */ |
320 | movq %%mm1, %%mm4; /* Copy v to temp */ | 319 | "movq %%mm1, %%mm4;" /* Copy v to temp */ |
321 | por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ | 320 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */ |
322 | psllq $16, %%mm4; /* Shift v */ | 321 | "psllq $16, %%mm4;" /* Shift v */ |
323 | movq %%mm2, %%mm5; /* Copy u to temp */ | 322 | "movq %%mm2, %%mm5;" /* Copy u to temp */ |
324 | psllq $16, %%mm3; /* Shift y */ | 323 | "psllq $16, %%mm3;" /* Shift y */ |
325 | por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */ | 324 | "por %%mm4, %%mm1;" /* Overlay new v byte 0x000000vv00vv0000 */ |
326 | psllq $16, %%mm5; /* Shift u */ | 325 | "psllq $16, %%mm5;" /* Shift u */ |
327 | por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ | 326 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */ |
328 | por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */ | 327 | "por %%mm5, %%mm2;" /* Overlay new u byte 0x0000000000uu00uu */ |
329 | 328 | ||
330 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ | 329 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ |
331 | psubw mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */ | 330 | "psubw mpeg3_MMX_V_80_RGB, %%mm1;" /* Subtract 128 from v 0x000000vv00vv0000 */ |
332 | pmullw mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */ | 331 | "pmullw mpeg3_MMX_V_COEF_RGB, %%mm1;" /* Multiply v coeffs 0x0000vvvvvvvv0000 */ |
333 | psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ | 332 | "psllw $6, %%mm0;" /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ |
334 | psubw mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */ | 333 | "psubw mpeg3_MMX_U_80_RGB, %%mm2;" /* Subtract 128 from u 0x0000000000uu00uu */ |
335 | pmullw mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ | 334 | "pmullw mpeg3_MMX_U_COEF_RGB, %%mm2;" /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ |
336 | 335 | ||
337 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ | 336 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ |
338 | paddsw %%mm1, %%mm0; /* Add v to result */ | 337 | "paddsw %%mm1, %%mm0;" /* Add v to result */ |
339 | paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */ | 338 | "paddsw %%mm2, %%mm0;" /* Add u to result 0x0000bbbbggggrrrr */ |
340 | psraw $6, %%mm0; /* Demote precision */ | 339 | "psraw $6, %%mm0;" /* Demote precision */ |
341 | packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */ | 340 | "packuswb %%mm0, %%mm0;" /* Pack into RGBA 0x0000000000bbggrr */ |
342 | movd %%mm0, (%3); /* Store output */ | 341 | "movd %%mm0, (%3);" /* Store output */ |
343 | " | ||
344 | : | 342 | : |
345 | : "r" (&y), "r" (&v), "r" (&u), "r" (output)); | 343 | : "r" (&y), "r" (&v), "r" (&u), "r" (output)); |
346 | } | 344 | } |
347 | 345 | ||
348 | inline void mpeg3_601_rgba32_mmx(unsigned long y, | 346 | inline void mpeg3_601_rgba32_mmx(unsigned long y, |
349 | unsigned long u, | 347 | unsigned long u, |
350 | unsigned long v, | 348 | unsigned long v, |
351 | unsigned long *output) | 349 | unsigned long *output) |
352 | { | 350 | { |
353 | asm(" | 351 | asm( |
354 | /* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ | 352 | /* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ |
355 | /* for rgb24. */ | 353 | /* for rgb24. */ |
356 | movd (%0), %%mm0; /* Load y 0x00000000000000yy */ | 354 | "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */ |
357 | psubsw mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */ | 355 | "psubsw mpeg3_MMX_601_Y_DIFF, %%mm0;" /* Subtract 16 from y */ |
358 | movd (%1), %%mm1; /* Load v 0x00000000000000vv */ | 356 | "movd (%1), %%mm1;" /* Load v 0x00000000000000vv */ |
359 | movq %%mm0, %%mm3; /* Copy y to temp */ | 357 | "movq %%mm0, %%mm3;" /* Copy y to temp */ |
360 | psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */ | 358 | "psllq $16, %%mm1;" /* Shift v 0x0000000000vv0000 */ |
361 | movd (%2), %%mm2; /* Load u 0x00000000000000uu */ | 359 | "movd (%2), %%mm2;" /* Load u 0x00000000000000uu */ |
362 | psllq $16, %%mm3; /* Shift y */ | 360 | "psllq $16, %%mm3;" /* Shift y */ |
363 | movq %%mm1, %%mm4; /* Copy v to temp */ | 361 | "movq %%mm1, %%mm4;" /* Copy v to temp */ |
364 | por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ | 362 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */ |
365 | psllq $16, %%mm4; /* Shift v */ | 363 | "psllq $16, %%mm4;" /* Shift v */ |
366 | movq %%mm2, %%mm5; /* Copy u to temp */ | 364 | "movq %%mm2, %%mm5;" /* Copy u to temp */ |
367 | psllq $16, %%mm3; /* Shift y */ | 365 | "psllq $16, %%mm3;" /* Shift y */ |
368 | por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */ | 366 | "por %%mm4, %%mm1;" /* Overlay new v byte 0x000000vv00vv0000 */ |
369 | psllq $16, %%mm5; /* Shift u */ | 367 | "psllq $16, %%mm5;" /* Shift u */ |
370 | por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ | 368 | "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */ |
371 | por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */ | 369 | "por %%mm5, %%mm2;" /* Overlay new u byte 0x0000000000uu00uu */ |
372 | 370 | ||
373 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ | 371 | /* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ |
374 | pmullw mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale y coeffs */ | 372 | "pmullw mpeg3_MMX_601_Y_COEF, %%mm0;" /* Scale y coeffs */ |
375 | psubw mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */ | 373 | "psubw mpeg3_MMX_V_80_RGB, %%mm1;" /* Subtract 128 from v 0x000000vv00vv0000 */ |
376 | pmullw mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */ | 374 | "pmullw mpeg3_MMX_V_COEF_RGB, %%mm1;" /* Multiply v coeffs 0x0000vvvvvvvv0000 */ |
377 | psubw mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */ | 375 | "psubw mpeg3_MMX_U_80_RGB, %%mm2;" /* Subtract 128 from u 0x0000000000uu00uu */ |
378 | pmullw mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ | 376 | "pmullw mpeg3_MMX_U_COEF_RGB, %%mm2;" /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ |
379 | 377 | ||
380 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ | 378 | /* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ |
381 | paddsw %%mm1, %%mm0; /* Add v to result */ | 379 | "paddsw %%mm1, %%mm0;" /* Add v to result */ |
382 | paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */ | 380 | "paddsw %%mm2, %%mm0;" /* Add u to result 0x0000bbbbggggrrrr */ |
383 | psraw $6, %%mm0; /* Demote precision */ | 381 | "psraw $6, %%mm0;" /* Demote precision */ |
384 | packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */ | 382 | "packuswb %%mm0, %%mm0;" /* Pack into RGBA 0x0000000000bbggrr */ |
385 | movd %%mm0, (%3); /* Store output */ | 383 | "movd %%mm0, (%3);" /* Store output */ |
386 | " | ||
387 | : | 384 | : |
388 | : "r" (&y), "r" (&v), "r" (&u), "r" (output)); | 385 | : "r" (&y), "r" (&v), "r" (&u), "r" (output)); |
389 | } | 386 | } |
390 | 387 | ||
391 | #endif | 388 | #endif |
392 | 389 | ||
393 | #define DITHER_ROW_HEAD \ | 390 | #define DITHER_ROW_HEAD \ |
394 | for(h = 0; h < video->out_h; h++) \ | 391 | for(h = 0; h < video->out_h; h++) \ |
395 | { \ | 392 | { \ |
396 | y_in = &src[0][(video->y_table[h] + video->in_y) * video->coded_picture_width] + video->in_x; \ | 393 | y_in = &src[0][(video->y_table[h] + video->in_y) * video->coded_picture_width] + video->in_x; \ |
397 | cb_in = &src[1][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 2); \ | 394 | cb_in = &src[1][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 2); \ |
398 | cr_in = &src[2][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 1); \ | 395 | cr_in = &src[2][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 1); \ |
399 | data = output_rows[h]; | 396 | data = output_rows[h]; |
400 | 397 | ||
401 | #define DITHER_ROW_TAIL \ | 398 | #define DITHER_ROW_TAIL \ |
402 | } | 399 | } |
403 | 400 | ||
404 | #define DITHER_SCALE_HEAD \ | 401 | #define DITHER_SCALE_HEAD \ |
405 | for(w = 0; w < video->out_w; w++) \ | 402 | for(w = 0; w < video->out_w; w++) \ |
406 | { \ | 403 | { \ |
407 | uv_subscript = video->x_table[w] / 2; \ | 404 | uv_subscript = video->x_table[w] / 2; \ |
408 | y_l = y_in[video->x_table[w]]; \ | 405 | y_l = y_in[video->x_table[w]]; \ |
409 | y_l <<= 16; \ | 406 | y_l <<= 16; \ |
410 | r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \ | 407 | r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \ |