author | zecke <zecke> | 2002-07-11 22:48:38 (UTC) |
---|---|---|
committer | zecke <zecke> | 2002-07-11 22:48:38 (UTC) |
commit | a7b8ef4096c17ba5e0ff96e9292a291390831e69 (patch) (unidiff) | |
tree | 94efad118c336e4d8c00137c7be5b31852c8296d /noncore/multimedia/opieplayer2/yuv2rgb_mmx.c | |
parent | d2c7228d2efe6f47ef5b7ec04157fcead775be74 (diff) | |
download | opie-a7b8ef4096c17ba5e0ff96e9292a291390831e69.zip opie-a7b8ef4096c17ba5e0ff96e9292a291390831e69.tar.gz opie-a7b8ef4096c17ba5e0ff96e9292a291390831e69.tar.bz2 |
add files
Diffstat (limited to 'noncore/multimedia/opieplayer2/yuv2rgb_mmx.c') (more/less context) (ignore whitespace changes)
-rw-r--r-- | noncore/multimedia/opieplayer2/yuv2rgb_mmx.c | 1047 |
1 files changed, 1047 insertions, 0 deletions
diff --git a/noncore/multimedia/opieplayer2/yuv2rgb_mmx.c b/noncore/multimedia/opieplayer2/yuv2rgb_mmx.c new file mode 100644 index 0000000..f092e6f --- a/dev/null +++ b/noncore/multimedia/opieplayer2/yuv2rgb_mmx.c | |||
@@ -0,0 +1,1047 @@ | |||
1 | /* | ||
2 | * yuv2rgb_mmx.c | ||
3 | * Copyright (C) 2000-2001 Silicon Integrated System Corp. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Author: Olie Lho <ollie@sis.com.tw> | ||
7 | * | ||
8 | * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. | ||
9 | * | ||
10 | * mpeg2dec is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * mpeg2dec is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
23 | */ | ||
24 | |||
25 | |||
26 | #ifdef ARCH_X86 | ||
27 | |||
28 | #include <stdio.h> | ||
29 | #include <stdlib.h> | ||
30 | #include <string.h> | ||
31 | #include <inttypes.h> | ||
32 | |||
33 | #include "yuv2rgb.h" | ||
34 | #include "xineutils.h" | ||
35 | |||
36 | #define CPU_MMXEXT 0 | ||
37 | #define CPU_MMX 1 | ||
38 | |||
39 | /* CPU_MMXEXT/CPU_MMX adaptation layer */ | ||
40 | |||
41 | #define movntq(src,dest)\ | ||
42 | do { \ | ||
43 | if (cpu == CPU_MMXEXT)\ | ||
44 | movntq_r2m (src, dest);\ | ||
45 | else \ | ||
46 | movq_r2m (src, dest);\ | ||
47 | } while (0) | ||
48 | |||
49 | static mmx_t mmx_subYw = {0x1010101010101010}; | ||
50 | static mmx_t mmx_addYw = {0x0000000000000000}; | ||
51 | |||
52 | void mmx_yuv2rgb_set_gamma(int gamma) | ||
53 | { | ||
54 | int a,s,i; | ||
55 | |||
56 | if( gamma <= 16 ) { | ||
57 | a = 0; | ||
58 | s = 16 - gamma; | ||
59 | } else { | ||
60 | a = gamma - 16; | ||
61 | s = 0; | ||
62 | } | ||
63 | |||
64 | for( i = 0; i < 8; i++ ) { | ||
65 | *((unsigned char *)&mmx_subYw + i) = s; | ||
66 | *((unsigned char *)&mmx_addYw + i) = a; | ||
67 | } | ||
68 | } | ||
69 | |||
70 | static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv) | ||
71 | { | ||
72 | static mmx_t mmx_80w = {0x0080008000800080}; | ||
73 | static mmx_t mmx_U_green = {0xf37df37df37df37d}; | ||
74 | static mmx_t mmx_U_blue = {0x4093409340934093}; | ||
75 | static mmx_t mmx_V_red = {0x3312331233123312}; | ||
76 | static mmx_t mmx_V_green = {0xe5fce5fce5fce5fc}; | ||
77 | static mmx_t mmx_00ffw = {0x00ff00ff00ff00ff}; | ||
78 | static mmx_t mmx_Y_coeff = {0x253f253f253f253f}; | ||
79 | |||
80 | movq_m2r (*py, mm6); // mm6 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 | ||
81 | pxor_r2r (mm4, mm4); // mm4 = 0 | ||
82 | |||
83 | psubusb_m2r (mmx_subYw, mm6);// Y -= 16 | ||
84 | paddusb_m2r (mmx_addYw, mm6); | ||
85 | |||
86 | movd_m2r (*pu, mm0); // mm0 = 00 00 00 00 u3 u2 u1 u0 | ||
87 | movq_r2r (mm6, mm7); // mm7 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 | ||
88 | |||
89 | pand_m2r (mmx_00ffw, mm6); // mm6 = Y6 Y4 Y2 Y0 | ||
90 | psrlw_i2r (8, mm7); // mm7 = Y7 Y5 Y3 Y1 | ||
91 | |||
92 | movd_m2r (*pv, mm1); // mm1 = 00 00 00 00 v3 v2 v1 v0 | ||
93 | psllw_i2r (3, mm6); // promote precision | ||
94 | |||
95 | pmulhw_m2r (mmx_Y_coeff, mm6);// mm6 = luma_rgb even | ||
96 | psllw_i2r (3, mm7); // promote precision | ||
97 | |||
98 | punpcklbw_r2r (mm4, mm0); // mm0 = u3 u2 u1 u0 | ||
99 | |||
100 | psubsw_m2r (mmx_80w, mm0); // u -= 128 | ||
101 | punpcklbw_r2r (mm4, mm1); // mm1 = v3 v2 v1 v0 | ||
102 | |||
103 | pmulhw_m2r (mmx_Y_coeff, mm7);// mm7 = luma_rgb odd | ||
104 | psllw_i2r (3, mm0); // promote precision | ||
105 | |||
106 | psubsw_m2r (mmx_80w, mm1); // v -= 128 | ||
107 | movq_r2r (mm0, mm2); // mm2 = u3 u2 u1 u0 | ||
108 | |||
109 | psllw_i2r (3, mm1); // promote precision | ||
110 | |||
111 | movq_r2r (mm1, mm4); // mm4 = v3 v2 v1 v0 | ||
112 | |||
113 | pmulhw_m2r (mmx_U_blue, mm0);// mm0 = chroma_b | ||
114 | |||
115 | |||
116 | // slot | ||
117 | |||
118 | |||
119 | // slot | ||
120 | |||
121 | |||
122 | pmulhw_m2r (mmx_V_red, mm1);// mm1 = chroma_r | ||
123 | movq_r2r (mm0, mm3); // mm3 = chroma_b | ||
124 | |||
125 | paddsw_r2r (mm6, mm0); // mm0 = B6 B4 B2 B0 | ||
126 | paddsw_r2r (mm7, mm3); // mm3 = B7 B5 B3 B1 | ||
127 | |||
128 | packuswb_r2r (mm0, mm0); // saturate to 0-255 | ||
129 | |||
130 | |||
131 | pmulhw_m2r (mmx_U_green, mm2);// mm2 = u * u_green | ||
132 | |||
133 | |||
134 | packuswb_r2r (mm3, mm3); // saturate to 0-255 | ||
135 | |||
136 | |||
137 | punpcklbw_r2r (mm3, mm0); // mm0 = B7 B6 B5 B4 B3 B2 B1 B0 | ||
138 | |||
139 | |||
140 | pmulhw_m2r (mmx_V_green, mm4);// mm4 = v * v_green | ||
141 | |||
142 | |||
143 | // slot | ||
144 | |||
145 | |||
146 | // slot | ||
147 | |||
148 | |||
149 | paddsw_r2r (mm4, mm2); // mm2 = chroma_g | ||
150 | movq_r2r (mm2, mm5); // mm5 = chroma_g | ||
151 | |||
152 | |||
153 | movq_r2r (mm1, mm4); // mm4 = chroma_r | ||
154 | paddsw_r2r (mm6, mm2); // mm2 = G6 G4 G2 G0 | ||
155 | |||
156 | |||
157 | packuswb_r2r (mm2, mm2); // saturate to 0-255 | ||
158 | paddsw_r2r (mm6, mm1); // mm1 = R6 R4 R2 R0 | ||
159 | |||
160 | packuswb_r2r (mm1, mm1); // saturate to 0-255 | ||
161 | paddsw_r2r (mm7, mm4); // mm4 = R7 R5 R3 R1 | ||
162 | |||
163 | packuswb_r2r (mm4, mm4); // saturate to 0-255 | ||
164 | paddsw_r2r (mm7, mm5); // mm5 = G7 G5 G3 G1 | ||
165 | |||
166 | |||
167 | packuswb_r2r (mm5, mm5); // saturate to 0-255 | ||
168 | |||
169 | |||
170 | punpcklbw_r2r (mm4, mm1); // mm1 = R7 R6 R5 R4 R3 R2 R1 R0 | ||
171 | |||
172 | |||
173 | punpcklbw_r2r (mm5, mm2); // mm2 = G7 G6 G5 G4 G3 G2 G1 G0 | ||
174 | } | ||
175 | |||
176 | // basic opt | ||
177 | static inline void mmx_unpack_16rgb (uint8_t * image, int cpu) | ||
178 | { | ||
179 | static mmx_t mmx_bluemask = {0xf8f8f8f8f8f8f8f8}; | ||
180 | static mmx_t mmx_greenmask = {0xfcfcfcfcfcfcfcfc}; | ||
181 | static mmx_t mmx_redmask = {0xf8f8f8f8f8f8f8f8}; | ||
182 | |||
183 | /* | ||
184 | * convert RGB plane to RGB 16 bits | ||
185 | * mm0 -> B, mm1 -> R, mm2 -> G | ||
186 | * mm4 -> GB, mm5 -> AR pixel 4-7 | ||
187 | * mm6 -> GB, mm7 -> AR pixel 0-3 | ||
188 | */ | ||
189 | |||
190 | pand_m2r (mmx_bluemask, mm0);// mm0 = b7b6b5b4b3______ | ||
191 | pxor_r2r (mm4, mm4); // mm4 = 0 | ||
192 | |||
193 | pand_m2r (mmx_greenmask, mm2);// mm2 = g7g6g5g4g3g2____ | ||
194 | psrlq_i2r (3, mm0); // mm0 = ______b7b6b5b4b3 | ||
195 | |||
196 | movq_r2r (mm2, mm7); // mm7 = g7g6g5g4g3g2____ | ||
197 | movq_r2r (mm0, mm5); // mm5 = ______b7b6b5b4b3 | ||
198 | |||
199 | pand_m2r (mmx_redmask, mm1);// mm1 = r7r6r5r4r3______ | ||
200 | punpcklbw_r2r (mm4, mm2); | ||
201 | |||
202 | punpcklbw_r2r (mm1, mm0); | ||
203 | |||
204 | psllq_i2r (3, mm2); | ||
205 | |||
206 | punpckhbw_r2r (mm4, mm7); | ||
207 | por_r2r (mm2, mm0); | ||
208 | |||
209 | psllq_i2r (3, mm7); | ||
210 | |||
211 | movntq (mm0, *image); | ||
212 | punpckhbw_r2r (mm1, mm5); | ||
213 | |||
214 | por_r2r (mm7, mm5); | ||
215 | |||
216 | // U | ||
217 | // V | ||
218 | |||
219 | movntq (mm5, *(image+8)); | ||
220 | } | ||
221 | |||
222 | static inline void mmx_unpack_15rgb (uint8_t * image, int cpu) | ||
223 | { | ||
224 | static mmx_t mmx_bluemask = {0xf8f8f8f8f8f8f8f8}; | ||
225 | static mmx_t mmx_greenmask = {0xf8f8f8f8f8f8f8f8}; | ||
226 | static mmx_t mmx_redmask = {0xf8f8f8f8f8f8f8f8}; | ||
227 | |||
228 | /* | ||
229 | * convert RGB plane to RGB 15 bits | ||
230 | * mm0 -> B, mm1 -> R, mm2 -> G | ||
231 | * mm4 -> GB, mm5 -> AR pixel 4-7 | ||
232 | * mm6 -> GB, mm7 -> AR pixel 0-3 | ||
233 | */ | ||
234 | |||
235 | pand_m2r (mmx_bluemask, mm0);// mm0 = b7b6b5b4b3______ | ||
236 | pxor_r2r (mm4, mm4); // mm4 = 0 | ||
237 | |||
238 | pand_m2r (mmx_greenmask, mm2);// mm2 = g7g6g5g4g3g2____ | ||
239 | psrlq_i2r (3, mm0); // mm0 = ______b7b6b5b4b3 | ||
240 | |||
241 | movq_r2r (mm2, mm7); // mm7 = g7g6g5g4g3g2____ | ||
242 | movq_r2r (mm0, mm5); // mm5 = ______b7b6b5b4b3 | ||
243 | |||
244 | pand_m2r (mmx_redmask, mm1);// mm1 = r7r6r5r4r3______ | ||
245 | punpcklbw_r2r (mm4, mm2); | ||
246 | |||
247 | psrlq_i2r (1, mm1); | ||
248 | punpcklbw_r2r (mm1, mm0); | ||
249 | |||
250 | psllq_i2r (2, mm2); | ||
251 | |||
252 | punpckhbw_r2r (mm4, mm7); | ||
253 | por_r2r (mm2, mm0); | ||
254 | |||
255 | psllq_i2r (2, mm7); | ||
256 | |||
257 | movntq (mm0, *image); | ||
258 | punpckhbw_r2r (mm1, mm5); | ||
259 | |||
260 | por_r2r (mm7, mm5); | ||
261 | |||
262 | // U | ||
263 | // V | ||
264 | |||
265 | movntq (mm5, *(image+8)); | ||
266 | } | ||
267 | |||
268 | static inline void mmx_unpack_32rgb (uint8_t * image, int cpu) | ||
269 | { | ||
270 | /* | ||
271 | * convert RGB plane to RGB packed format, | ||
272 | * mm0 -> B, mm1 -> R, mm2 -> G, mm3 -> 0, | ||
273 | * mm4 -> GB, mm5 -> AR pixel 4-7, | ||
274 | * mm6 -> GB, mm7 -> AR pixel 0-3 | ||
275 | */ | ||
276 | |||
277 | pxor_r2r (mm3, mm3); | ||
278 | movq_r2r (mm0, mm6); | ||
279 | |||
280 | punpcklbw_r2r (mm2, mm6); | ||
281 | movq_r2r (mm1, mm7); | ||
282 | |||
283 | punpcklbw_r2r (mm3, mm7); | ||
284 | movq_r2r (mm0, mm4); | ||
285 | |||
286 | punpcklwd_r2r (mm7, mm6); | ||
287 | movq_r2r (mm1, mm5); | ||
288 | |||
289 | /* scheduling: this is hopeless */ | ||
290 | movntq (mm6, *image); | ||
291 | movq_r2r (mm0, mm6); | ||
292 | punpcklbw_r2r (mm2, mm6); | ||
293 | punpckhwd_r2r (mm7, mm6); | ||
294 | movntq (mm6, *(image+8)); | ||
295 | punpckhbw_r2r (mm2, mm4); | ||
296 | punpckhbw_r2r (mm3, mm5); | ||
297 | punpcklwd_r2r (mm5, mm4); | ||
298 | movntq (mm4, *(image+16)); | ||
299 | movq_r2r (mm0, mm4); | ||
300 | punpckhbw_r2r (mm2, mm4); | ||
301 | punpckhwd_r2r (mm5, mm4); | ||
302 | movntq (mm4, *(image+24)); | ||
303 | } | ||
304 | |||
305 | static inline void mmx_unpack_32bgr (uint8_t * image, int cpu) | ||
306 | { | ||
307 | /* | ||
308 | * convert RGB plane to RGB packed format, | ||
309 | * mm0 -> B, mm1 -> R, mm2 -> G, mm3 -> 0, | ||
310 | * mm4 -> GB, mm5 -> AR pixel 4-7, | ||
311 | * mm6 -> GB, mm7 -> AR pixel 0-3 | ||
312 | */ | ||
313 | |||
314 | pxor_r2r (mm3, mm3); | ||
315 | movq_r2r (mm1, mm6); | ||
316 | |||
317 | punpcklbw_r2r (mm2, mm6); | ||
318 | movq_r2r (mm0, mm7); | ||
319 | |||
320 | punpcklbw_r2r (mm3, mm7); | ||
321 | movq_r2r (mm1, mm4); | ||
322 | |||
323 | punpcklwd_r2r (mm7, mm6); | ||
324 | movq_r2r (mm0, mm5); | ||
325 | |||
326 | /* scheduling: this is hopeless */ | ||
327 | movntq (mm6, *image); | ||
328 | movq_r2r (mm0, mm6); | ||
329 | punpcklbw_r2r (mm2, mm6); | ||
330 | punpckhwd_r2r (mm7, mm6); | ||
331 | movntq (mm6, *(image+8)); | ||
332 | punpckhbw_r2r (mm2, mm4); | ||
333 | punpckhbw_r2r (mm3, mm5); | ||
334 | punpcklwd_r2r (mm5, mm4); | ||
335 | movntq (mm4, *(image+16)); | ||
336 | movq_r2r (mm0, mm4); | ||
337 | punpckhbw_r2r (mm2, mm4); | ||
338 | punpckhwd_r2r (mm5, mm4); | ||
339 | movntq (mm4, *(image+24)); | ||
340 | } | ||
341 | |||
342 | static inline void mmx_unpack_24rgb (uint8_t * image, int cpu) | ||
343 | { | ||
344 | /* | ||
345 | * convert RGB plane to RGB packed format, | ||
346 | * mm0 -> B, mm1 -> R, mm2 -> G, mm3 -> 0, | ||
347 | * mm4 -> GB, mm5 -> AR pixel 4-7, | ||
348 | * mm6 -> GB, mm7 -> AR pixel 0-3 | ||
349 | */ | ||
350 | |||
351 | pxor_r2r (mm3, mm3); | ||
352 | movq_r2r (mm0, mm6); | ||
353 | |||
354 | punpcklbw_r2r (mm2, mm6); | ||
355 | movq_r2r (mm1, mm7); | ||
356 | |||
357 | punpcklbw_r2r (mm3, mm7); | ||
358 | movq_r2r (mm0, mm4); | ||
359 | |||
360 | punpcklwd_r2r (mm7, mm6); | ||
361 | movq_r2r (mm1, mm5); | ||
362 | |||
363 | /* scheduling: this is hopeless */ | ||
364 | movntq (mm6, *image); | ||
365 | movq_r2r (mm0, mm6); | ||
366 | punpcklbw_r2r (mm2, mm6); | ||
367 | punpckhwd_r2r (mm7, mm6); | ||
368 | movntq (mm6, *(image+8)); | ||
369 | punpckhbw_r2r (mm2, mm4); | ||
370 | punpckhbw_r2r (mm3, mm5); | ||
371 | punpcklwd_r2r (mm5, mm4); | ||
372 | movntq (mm4, *(image+16)); | ||
373 | } | ||
374 | |||
375 | static inline void yuv420_rgb16 (yuv2rgb_t *this, | ||
376 | uint8_t * image, | ||
377 | uint8_t * py, uint8_t * pu, uint8_t * pv, | ||
378 | int cpu) | ||
379 | { | ||
380 | int i; | ||
381 | int rgb_stride = this->rgb_stride; | ||
382 | int y_stride = this->y_stride; | ||
383 | int uv_stride = this->uv_stride; | ||
384 | int width = this->source_width; | ||
385 | int height = this->source_height; | ||
386 | int dst_height = this->dest_height; | ||
387 | uint8_t *img; | ||
388 | |||
389 | width >>= 3; | ||
390 | |||
391 | if (!this->do_scale) { | ||
392 | y_stride -= 8 * width; | ||
393 | uv_stride -= 4 * width; | ||
394 | |||
395 | do { | ||
396 | |||
397 | i = width; img = image; | ||
398 | do { | ||
399 | mmx_yuv2rgb (py, pu, pv); | ||
400 | mmx_unpack_16rgb (img, cpu); | ||
401 | py += 8; | ||
402 | pu += 4; | ||
403 | pv += 4; | ||
404 | img += 16; | ||
405 | } while (--i); | ||
406 | |||
407 | py += y_stride; | ||
408 | image += rgb_stride; | ||
409 | if (height & 1) { | ||
410 | pu += uv_stride; | ||
411 | pv += uv_stride; | ||
412 | } else { | ||
413 | pu -= 4 * width; | ||
414 | pv -= 4 * width; | ||
415 | } | ||
416 | } while (--height); | ||
417 | |||
418 | } else { | ||
419 | |||
420 | scale_line_func_t scale_line = this->scale_line; | ||
421 | uint8_t *y_buf, *u_buf, *v_buf; | ||
422 | int dy = 0; | ||
423 | |||
424 | scale_line (pu, this->u_buffer, | ||
425 | this->dest_width >> 1, this->step_dx); | ||
426 | scale_line (pv, this->v_buffer, | ||
427 | this->dest_width >> 1, this->step_dx); | ||
428 | scale_line (py, this->y_buffer, | ||
429 | this->dest_width, this->step_dx); | ||
430 | for (height = 0;; ) { | ||
431 | |||
432 | y_buf = this->y_buffer; | ||
433 | u_buf = this->u_buffer; | ||
434 | v_buf = this->v_buffer; | ||
435 | |||
436 | i = this->dest_width >> 3; img = image; | ||
437 | do { | ||
438 | /* printf ("i : %d\n",i); */ | ||
439 | |||
440 | mmx_yuv2rgb (y_buf, u_buf, v_buf); | ||
441 | mmx_unpack_16rgb (img, cpu); | ||
442 | y_buf += 8; | ||
443 | u_buf += 4; | ||
444 | v_buf += 4; | ||
445 | img += 16; | ||
446 | } while (--i); | ||
447 | |||
448 | dy += this->step_dy; | ||
449 | image += rgb_stride; | ||
450 | |||
451 | while (--dst_height > 0 && dy < 32768) { | ||
452 | |||
453 | xine_fast_memcpy (image, image-rgb_stride, this->dest_width*2); | ||
454 | |||
455 | dy += this->step_dy; | ||
456 | image += rgb_stride; | ||
457 | } | ||
458 | |||
459 | if (dst_height <= 0) | ||
460 | break; | ||
461 | |||
462 | do { | ||
463 | dy -= 32768; | ||
464 | |||
465 | py += y_stride; | ||
466 | |||
467 | scale_line (py, this->y_buffer, | ||
468 | this->dest_width, this->step_dx); | ||
469 | |||
470 | if (height & 1) { | ||
471 | pu += uv_stride; | ||
472 | pv += uv_stride; | ||
473 | |||
474 | scale_line (pu, this->u_buffer, | ||
475 | this->dest_width >> 1, this->step_dx); | ||
476 | scale_line (pv, this->v_buffer, | ||
477 | this->dest_width >> 1, this->step_dx); | ||
478 | |||
479 | } | ||
480 | height++; | ||
481 | } while( dy>=32768); | ||
482 | } | ||
483 | } | ||
484 | } | ||
485 | |||
486 | static inline void yuv420_rgb15 (yuv2rgb_t *this, | ||
487 | uint8_t * image, | ||
488 | uint8_t * py, uint8_t * pu, uint8_t * pv, | ||
489 | int cpu) | ||
490 | { | ||
491 | int i; | ||
492 | int rgb_stride = this->rgb_stride; | ||
493 | int y_stride = this->y_stride; | ||
494 | int uv_stride = this->uv_stride; | ||
495 | int width = this->source_width; | ||
496 | int height = this->source_height; | ||
497 | int dst_height = this->dest_height; | ||
498 | uint8_t *img; | ||
499 | |||
500 | width >>= 3; | ||
501 | |||
502 | if (!this->do_scale) { | ||
503 | y_stride -= 8 * width; | ||
504 | uv_stride -= 4 * width; | ||
505 | |||
506 | do { | ||
507 | |||
508 | i = width; img = image; | ||
509 | do { | ||
510 | mmx_yuv2rgb (py, pu, pv); | ||
511 | mmx_unpack_15rgb (img, cpu); | ||
512 | py += 8; | ||
513 | pu += 4; | ||
514 | pv += 4; | ||
515 | img += 16; | ||
516 | } while (--i); | ||
517 | |||
518 | py += y_stride; | ||
519 | image += rgb_stride; | ||
520 | if (height & 1) { | ||
521 | pu += uv_stride; | ||
522 | pv += uv_stride; | ||
523 | } else { | ||
524 | pu -= 4 * width; | ||
525 | pv -= 4 * width; | ||
526 | } | ||
527 | } while (--height); | ||
528 | |||
529 | } else { | ||
530 | |||
531 | scale_line_func_t scale_line = this->scale_line; | ||
532 | uint8_t *y_buf, *u_buf, *v_buf; | ||
533 | int dy = 0; | ||
534 | |||
535 | scale_line (pu, this->u_buffer, | ||
536 | this->dest_width >> 1, this->step_dx); | ||
537 | scale_line (pv, this->v_buffer, | ||
538 | this->dest_width >> 1, this->step_dx); | ||
539 | scale_line (py, this->y_buffer, | ||
540 | this->dest_width, this->step_dx); | ||
541 | for (height = 0;; ) { | ||
542 | |||
543 | y_buf = this->y_buffer; | ||
544 | u_buf = this->u_buffer; | ||
545 | v_buf = this->v_buffer; | ||
546 | |||
547 | i = this->dest_width >> 3; img = image; | ||
548 | do { | ||
549 | /* printf ("i : %d\n",i); */ | ||
550 | |||
551 | mmx_yuv2rgb (y_buf, u_buf, v_buf); | ||
552 | mmx_unpack_15rgb (img, cpu); | ||
553 | y_buf += 8; | ||
554 | u_buf += 4; | ||
555 | v_buf += 4; | ||
556 | img += 16; | ||
557 | } while (--i); | ||
558 | |||
559 | dy += this->step_dy; | ||
560 | image += rgb_stride; | ||
561 | |||
562 | while (--dst_height > 0 && dy < 32768) { | ||
563 | |||
564 | xine_fast_memcpy (image, image-rgb_stride, this->dest_width*2); | ||
565 | |||
566 | dy += this->step_dy; | ||
567 | image += rgb_stride; | ||
568 | } | ||
569 | |||
570 | if (dst_height <= 0) | ||
571 | break; | ||
572 | |||
573 | do { | ||
574 | dy -= 32768; | ||
575 | py += y_stride; | ||
576 | |||
577 | scale_line (py, this->y_buffer, | ||
578 | this->dest_width, this->step_dx); | ||
579 | |||
580 | if (height & 1) { | ||
581 | pu += uv_stride; | ||
582 | pv += uv_stride; | ||
583 | |||
584 | scale_line (pu, this->u_buffer, | ||
585 | this->dest_width >> 1, this->step_dx); | ||
586 | scale_line (pv, this->v_buffer, | ||
587 | this->dest_width >> 1, this->step_dx); | ||
588 | |||
589 | } | ||
590 | height++; | ||
591 | } while( dy>=32768 ); | ||
592 | } | ||
593 | } | ||
594 | } | ||
595 | |||
596 | static inline void yuv420_rgb24 (yuv2rgb_t *this, | ||
597 | uint8_t * image, uint8_t * py, | ||
598 | uint8_t * pu, uint8_t * pv, int cpu) | ||
599 | { | ||
600 | int i; | ||
601 | int rgb_stride = this->rgb_stride; | ||
602 | int y_stride = this->y_stride; | ||
603 | int uv_stride = this->uv_stride; | ||
604 | int width = this->source_width; | ||
605 | int height = this->source_height; | ||
606 | int dst_height = this->dest_height; | ||
607 | uint8_t *img; | ||
608 | |||
609 | /* rgb_stride -= 4 * this->dest_width; */ | ||
610 | width >>= 3; | ||
611 | |||
612 | if (!this->do_scale) { | ||
613 | y_stride -= 8 * width; | ||
614 | uv_stride -= 4 * width; | ||
615 | |||
616 | do { | ||
617 | i = width; img = image; | ||
618 | do { | ||
619 | mmx_yuv2rgb (py, pu, pv); | ||
620 | mmx_unpack_24rgb (img, cpu); | ||
621 | py += 8; | ||
622 | pu += 4; | ||
623 | pv += 4; | ||
624 | img += 24; | ||
625 | } while (--i); | ||
626 | |||
627 | py += y_stride; | ||
628 | image += rgb_stride; | ||
629 | if (height & 1) { | ||
630 | pu += uv_stride; | ||
631 | pv += uv_stride; | ||
632 | } else { | ||
633 | pu -= 4 * width; | ||
634 | pv -= 4 * width; | ||
635 | } | ||
636 | } while (--height); | ||
637 | } else { | ||
638 | |||
639 | scale_line_func_t scale_line = this->scale_line; | ||
640 | uint8_t *y_buf, *u_buf, *v_buf; | ||
641 | int dy = 0; | ||
642 | |||
643 | scale_line (pu, this->u_buffer, | ||
644 | this->dest_width >> 1, this->step_dx); | ||
645 | scale_line (pv, this->v_buffer, | ||
646 | this->dest_width >> 1, this->step_dx); | ||
647 | scale_line (py, this->y_buffer, | ||
648 | this->dest_width, this->step_dx); | ||
649 | |||
650 | for (height = 0;; ) { | ||
651 | |||
652 | y_buf = this->y_buffer; | ||
653 | u_buf = this->u_buffer; | ||
654 | v_buf = this->v_buffer; | ||
655 | |||
656 | |||
657 | i = this->dest_width >> 3; img=image; | ||
658 | do { | ||
659 | /* printf ("i : %d\n",i); */ | ||
660 | |||
661 | mmx_yuv2rgb (y_buf, u_buf, v_buf); | ||
662 | mmx_unpack_24rgb (img, cpu); | ||
663 | y_buf += 8; | ||
664 | u_buf += 4; | ||
665 | v_buf += 4; | ||
666 | img += 24; | ||
667 | } while (--i); | ||
668 | |||
669 | dy += this->step_dy; | ||
670 | image += rgb_stride; | ||
671 | |||
672 | while (--dst_height > 0 && dy < 32768) { | ||
673 | |||
674 | xine_fast_memcpy (image, image-rgb_stride, this->dest_width*3); | ||
675 | |||
676 | dy += this->step_dy; | ||
677 | image += rgb_stride; | ||
678 | } | ||
679 | |||
680 | if (dst_height <= 0) | ||
681 | break; | ||
682 | |||
683 | do { | ||
684 | dy -= 32768; | ||
685 | py += y_stride; | ||
686 | |||
687 | scale_line (py, this->y_buffer, | ||
688 | this->dest_width, this->step_dx); | ||
689 | |||
690 | if (height & 1) { | ||
691 | pu += uv_stride; | ||
692 | pv += uv_stride; | ||
693 | |||
694 | scale_line (pu, this->u_buffer, | ||
695 | this->dest_width >> 1, this->step_dx); | ||
696 | scale_line (pv, this->v_buffer, | ||
697 | this->dest_width >> 1, this->step_dx); | ||
698 | } | ||
699 | height++; | ||
700 | } while( dy>=32768 ); | ||
701 | |||
702 | } | ||
703 | |||
704 | } | ||
705 | } | ||
706 | |||
707 | static inline void yuv420_argb32 (yuv2rgb_t *this, | ||
708 | uint8_t * image, uint8_t * py, | ||
709 | uint8_t * pu, uint8_t * pv, int cpu) | ||
710 | { | ||
711 | int i; | ||
712 | int rgb_stride = this->rgb_stride; | ||
713 | int y_stride = this->y_stride; | ||
714 | int uv_stride = this->uv_stride; | ||
715 | int width = this->source_width; | ||
716 | int height = this->source_height; | ||
717 | int dst_height = this->dest_height; | ||
718 | uint8_t *img; | ||
719 | |||
720 | /* rgb_stride -= 4 * this->dest_width; */ | ||
721 | width >>= 3; | ||
722 | |||
723 | if (!this->do_scale) { | ||
724 | y_stride -= 8 * width; | ||
725 | uv_stride -= 4 * width; | ||
726 | |||
727 | do { | ||
728 | i = width; img = image; | ||
729 | do { | ||
730 | mmx_yuv2rgb (py, pu, pv); | ||
731 | mmx_unpack_32rgb (img, cpu); | ||
732 | py += 8; | ||
733 | pu += 4; | ||
734 | pv += 4; | ||
735 | img += 32; | ||
736 | } while (--i); | ||
737 | |||
738 | py += y_stride; | ||
739 | image += rgb_stride; | ||
740 | if (height & 1) { | ||
741 | pu += uv_stride; | ||
742 | pv += uv_stride; | ||
743 | } else { | ||
744 | pu -= 4 * width; | ||
745 | pv -= 4 * width; | ||
746 | } | ||
747 | } while (--height); | ||
748 | } else { | ||
749 | |||
750 | scale_line_func_t scale_line = this->scale_line; | ||
751 | uint8_t *y_buf, *u_buf, *v_buf; | ||
752 | int dy = 0; | ||
753 | |||
754 | scale_line (pu, this->u_buffer, | ||
755 | this->dest_width >> 1, this->step_dx); | ||
756 | scale_line (pv, this->v_buffer, | ||
757 | this->dest_width >> 1, this->step_dx); | ||
758 | scale_line (py, this->y_buffer, | ||
759 | this->dest_width, this->step_dx); | ||
760 | |||
761 | for (height = 0;; ) { | ||
762 | |||
763 | y_buf = this->y_buffer; | ||
764 | u_buf = this->u_buffer; | ||
765 | v_buf = this->v_buffer; | ||
766 | |||
767 | |||
768 | i = this->dest_width >> 3; img=image; | ||
769 | do { | ||
770 | /* printf ("i : %d\n",i); */ | ||
771 | |||
772 | mmx_yuv2rgb (y_buf, u_buf, v_buf); | ||
773 | mmx_unpack_32rgb (img, cpu); | ||
774 | y_buf += 8; | ||
775 | u_buf += 4; | ||
776 | v_buf += 4; | ||
777 | img += 32; | ||
778 | } while (--i); | ||
779 | |||
780 | dy += this->step_dy; | ||
781 | image += rgb_stride; | ||
782 | |||
783 | while (--dst_height > 0 && dy < 32768) { | ||
784 | |||
785 | xine_fast_memcpy (image, image-rgb_stride, this->dest_width*4); | ||
786 | |||
787 | dy += this->step_dy; | ||
788 | image += rgb_stride; | ||
789 | } | ||
790 | |||
791 | if (dst_height <= 0) | ||
792 | break; | ||
793 | |||
794 | do { | ||
795 | dy -= 32768; | ||
796 | py += y_stride; | ||
797 | |||
798 | scale_line (py, this->y_buffer, | ||
799 | this->dest_width, this->step_dx); | ||
800 | |||
801 | if (height & 1) { | ||
802 | pu += uv_stride; | ||
803 | pv += uv_stride; | ||
804 | |||
805 | scale_line (pu, this->u_buffer, | ||
806 | this->dest_width >> 1, this->step_dx); | ||
807 | scale_line (pv, this->v_buffer, | ||
808 | this->dest_width >> 1, this->step_dx); | ||
809 | } | ||
810 | height++; | ||
811 | } while( dy>=32768 ); | ||
812 | } | ||
813 | |||
814 | } | ||
815 | } | ||
816 | |||
817 | static inline void yuv420_abgr32 (yuv2rgb_t *this, | ||
818 | uint8_t * image, uint8_t * py, | ||
819 | uint8_t * pu, uint8_t * pv, int cpu) | ||
820 | { | ||
821 | int i; | ||
822 | int rgb_stride = this->rgb_stride; | ||
823 | int y_stride = this->y_stride; | ||
824 | int uv_stride = this->uv_stride; | ||
825 | int width = this->source_width; | ||
826 | int height = this->source_height; | ||
827 | int dst_height = this->dest_height; | ||
828 | uint8_t *img; | ||
829 | |||
830 | /* rgb_stride -= 4 * this->dest_width; */ | ||
831 | width >>= 3; | ||
832 | |||
833 | if (!this->do_scale) { | ||
834 | y_stride -= 8 * width; | ||
835 | uv_stride -= 4 * width; | ||
836 | |||
837 | do { | ||
838 | i = width; img = image; | ||
839 | do { | ||
840 | mmx_yuv2rgb (py, pu, pv); | ||
841 | mmx_unpack_32bgr (img, cpu); | ||
842 | py += 8; | ||
843 | pu += 4; | ||
844 | pv += 4; | ||
845 | img += 32; | ||
846 | } while (--i); | ||
847 | |||
848 | py += y_stride; | ||
849 | image += rgb_stride; | ||
850 | if (height & 1) { | ||
851 | pu += uv_stride; | ||
852 | pv += uv_stride; | ||
853 | } else { | ||
854 | pu -= 4 * width; | ||
855 | pv -= 4 * width; | ||
856 | } | ||
857 | } while (--height); | ||
858 | } else { | ||
859 | |||
860 | scale_line_func_t scale_line = this->scale_line; | ||
861 | uint8_t *y_buf, *u_buf, *v_buf; | ||
862 | int dy = 0; | ||
863 | |||
864 | scale_line (pu, this->u_buffer, | ||
865 | this->dest_width >> 1, this->step_dx); | ||
866 | scale_line (pv, this->v_buffer, | ||
867 | this->dest_width >> 1, this->step_dx); | ||
868 | scale_line (py, this->y_buffer, | ||
869 | this->dest_width, this->step_dx); | ||
870 | |||
871 | for (height = 0;; ) { | ||
872 | |||
873 | y_buf = this->y_buffer; | ||
874 | u_buf = this->u_buffer; | ||
875 | v_buf = this->v_buffer; | ||
876 | |||
877 | |||
878 | i = this->dest_width >> 3; img=image; | ||
879 | do { | ||
880 | /* printf ("i : %d\n",i); */ | ||
881 | |||
882 | mmx_yuv2rgb (y_buf, u_buf, v_buf); | ||
883 | mmx_unpack_32bgr (img, cpu); | ||
884 | y_buf += 8; | ||
885 | u_buf += 4; | ||
886 | v_buf += 4; | ||
887 | img += 32; | ||
888 | } while (--i); | ||
889 | |||
890 | dy += this->step_dy; | ||
891 | image += rgb_stride; | ||
892 | |||
893 | while (--dst_height > 0 && dy < 32768) { | ||
894 | |||
895 | xine_fast_memcpy (image, image-rgb_stride, this->dest_width*4); | ||
896 | |||
897 | dy += this->step_dy; | ||
898 | image += rgb_stride; | ||
899 | } | ||
900 | |||
901 | if (dst_height <= 0) | ||
902 | break; | ||
903 | |||
904 | do { | ||
905 | dy -= 32768; | ||
906 | py += y_stride; | ||
907 | |||
908 | scale_line (py, this->y_buffer, | ||
909 | this->dest_width, this->step_dx); | ||
910 | |||
911 | if (height & 1) { | ||
912 | pu += uv_stride; | ||
913 | pv += uv_stride; | ||
914 | |||
915 | scale_line (pu, this->u_buffer, | ||
916 | this->dest_width >> 1, this->step_dx); | ||
917 | scale_line (pv, this->v_buffer, | ||
918 | this->dest_width >> 1, this->step_dx); | ||
919 | } | ||
920 | height++; | ||
921 | } while( dy>=32768 ); | ||
922 | |||
923 | } | ||
924 | |||
925 | } | ||
926 | } | ||
927 | |||
928 | static void mmxext_rgb15 (yuv2rgb_t *this, uint8_t * image, | ||
929 | uint8_t * py, uint8_t * pu, uint8_t * pv) | ||
930 | { | ||
931 | yuv420_rgb15 (this, image, py, pu, pv, CPU_MMXEXT); | ||
932 | emms();/* re-initialize x86 FPU after MMX use */ | ||
933 | } | ||
934 | |||
935 | static void mmxext_rgb16 (yuv2rgb_t *this, uint8_t * image, | ||
936 | uint8_t * py, uint8_t * pu, uint8_t * pv) | ||
937 | { | ||
938 | yuv420_rgb16 (this, image, py, pu, pv, CPU_MMXEXT); | ||
939 | emms();/* re-initialize x86 FPU after MMX use */ | ||
940 | } | ||
941 | |||
942 | static void mmxext_rgb24 (yuv2rgb_t *this, uint8_t * image, | ||
943 | uint8_t * py, uint8_t * pu, uint8_t * pv) | ||
944 | { | ||
945 | yuv420_rgb24 (this, image, py, pu, pv, CPU_MMXEXT); | ||
946 | emms();/* re-initialize x86 FPU after MMX use */ | ||
947 | } | ||
948 | |||
949 | static void mmxext_argb32 (yuv2rgb_t *this, uint8_t * image, | ||
950 | uint8_t * py, uint8_t * pu, uint8_t * pv) | ||
951 | { | ||
952 | yuv420_argb32 (this, image, py, pu, pv, CPU_MMXEXT); | ||
953 | emms();/* re-initialize x86 FPU after MMX use */ | ||
954 | } | ||
955 | |||
956 | static void mmxext_abgr32 (yuv2rgb_t *this, uint8_t * image, | ||
957 | uint8_t * py, uint8_t * pu, uint8_t * pv) | ||
958 | { | ||
959 | yuv420_abgr32 (this, image, py, pu, pv, CPU_MMXEXT); | ||
960 | emms();/* re-initialize x86 FPU after MMX use */ | ||
961 | } | ||
962 | |||
963 | static void mmx_rgb15 (yuv2rgb_t *this, uint8_t * image, | ||
964 | uint8_t * py, uint8_t * pu, uint8_t * pv) | ||
965 | { | ||
966 | yuv420_rgb15 (this, image, py, pu, pv, CPU_MMX); | ||
967 | emms();/* re-initialize x86 FPU after MMX use */ | ||
968 | } | ||
969 | |||
970 | static void mmx_rgb16 (yuv2rgb_t *this, uint8_t * image, | ||
971 | uint8_t * py, uint8_t * pu, uint8_t * pv) | ||
972 | { | ||
973 | yuv420_rgb16 (this, image, py, pu, pv, CPU_MMX); | ||
974 | emms();/* re-initialize x86 FPU after MMX use */ | ||
975 | } | ||
976 | |||
977 | static void mmx_rgb24 (yuv2rgb_t *this, uint8_t * image, | ||
978 | uint8_t * py, uint8_t * pu, uint8_t * pv) | ||
979 | { | ||
980 | yuv420_rgb24 (this, image, py, pu, pv, CPU_MMX); | ||
981 | emms();/* re-initialize x86 FPU after MMX use */ | ||
982 | } | ||
983 | |||
984 | static void mmx_argb32 (yuv2rgb_t *this, uint8_t * image, | ||
985 | uint8_t * py, uint8_t * pu, uint8_t * pv) | ||
986 | { | ||
987 | yuv420_argb32 (this, image, py, pu, pv, CPU_MMX); | ||
988 | emms();/* re-initialize x86 FPU after MMX use */ | ||
989 | } | ||
990 | |||
991 | static void mmx_abgr32 (yuv2rgb_t *this, uint8_t * image, | ||
992 | uint8_t * py, uint8_t * pu, uint8_t * pv) | ||
993 | { | ||
994 | yuv420_abgr32 (this, image, py, pu, pv, CPU_MMX); | ||
995 | emms();/* re-initialize x86 FPU after MMX use */ | ||
996 | } | ||
997 | |||
998 | void yuv2rgb_init_mmxext (yuv2rgb_factory_t *this) { | ||
999 | |||
1000 | if (this->swapped) | ||
1001 | return; /*no swapped pixel output upto now*/ | ||
1002 | |||
1003 | switch (this->mode) { | ||
1004 | case MODE_15_RGB: | ||
1005 | this->yuv2rgb_fun = mmxext_rgb15; | ||
1006 | break; | ||
1007 | case MODE_16_RGB: | ||
1008 | this->yuv2rgb_fun = mmxext_rgb16; | ||
1009 | break; | ||
1010 | case MODE_24_RGB: | ||
1011 | this->yuv2rgb_fun = mmxext_rgb24; | ||
1012 | break; | ||
1013 | case MODE_32_RGB: | ||
1014 | this->yuv2rgb_fun = mmxext_argb32; | ||
1015 | break; | ||
1016 | case MODE_32_BGR: | ||
1017 | this->yuv2rgb_fun = mmxext_abgr32; | ||
1018 | break; | ||
1019 | } | ||
1020 | } | ||
1021 | |||
1022 | void yuv2rgb_init_mmx (yuv2rgb_factory_t *this) { | ||
1023 | |||
1024 | if (this->swapped) | ||
1025 | return; /*no swapped pixel output upto now*/ | ||
1026 | |||
1027 | switch (this->mode) { | ||
1028 | case MODE_15_RGB: | ||
1029 | this->yuv2rgb_fun = mmx_rgb15; | ||
1030 | break; | ||
1031 | case MODE_16_RGB: | ||
1032 | this->yuv2rgb_fun = mmx_rgb16; | ||
1033 | break; | ||
1034 | case MODE_24_RGB: | ||
1035 | this->yuv2rgb_fun = mmx_rgb24; | ||
1036 | break; | ||
1037 | case MODE_32_RGB: | ||
1038 | this->yuv2rgb_fun = mmx_argb32; | ||
1039 | break; | ||
1040 | case MODE_32_BGR: | ||
1041 | this->yuv2rgb_fun = mmx_abgr32; | ||
1042 | break; | ||
1043 | } | ||
1044 | } | ||
1045 | |||
1046 | |||
1047 | #endif | ||