summaryrefslogtreecommitdiff
Unidiff
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--noncore/multimedia/opieplayer2/alphablend.c753
-rw-r--r--noncore/multimedia/opieplayer2/alphablend.h57
-rw-r--r--noncore/multimedia/opieplayer2/yuv2rgb.c3160
-rw-r--r--noncore/multimedia/opieplayer2/yuv2rgb.h151
-rw-r--r--noncore/multimedia/opieplayer2/yuv2rgb_mlib.c313
-rw-r--r--noncore/multimedia/opieplayer2/yuv2rgb_mmx.c1047
6 files changed, 5481 insertions, 0 deletions
diff --git a/noncore/multimedia/opieplayer2/alphablend.c b/noncore/multimedia/opieplayer2/alphablend.c
new file mode 100644
index 0000000..57f6013
--- a/dev/null
+++ b/noncore/multimedia/opieplayer2/alphablend.c
@@ -0,0 +1,753 @@
1//TOAST_SPU will define ALL spu entries - no matter the tranparency
2//#define TOAST_SPU
3/* #define PRIV_CLUT */
4/* Currently only blend_yuv(..) works */
5/*
6 *
7 * Copyright (C) James Courtier-Dutton James@superbug.demon.co.uk - July 2001
8 *
9 * Copyright (C) 2000 Thomas Mirlacher
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 * The author may be reached as <dent@linuxvideo.org>
26 *
27 *------------------------------------------------------------
28 *
29 */
30
31/*
32#define LOG_BLEND_YUV
33*/
34
35#include <string.h>
36#include <stdlib.h>
37#include <stdio.h>
38#include <inttypes.h>
39
40#include <xine/video_out.h>
41#include "alphablend.h"
42
43
44#define BLEND_COLOR(dst, src, mask, o) ((((src&mask)*o + ((dst&mask)*(0x0f-o)))/0xf) & mask)
45
46#define BLEND_BYTE(dst, src, o) (((src)*o + ((dst)*(0xf-o)))/0xf)
47
48static void mem_blend16(uint16_t *mem, uint16_t clr, uint8_t o, int len) {
49 uint16_t *limit = mem + len;
50 while (mem < limit) {
51 *mem =
52 BLEND_COLOR(*mem, clr, 0xf800, o) |
53 BLEND_COLOR(*mem, clr, 0x07e0, o) |
54 BLEND_COLOR(*mem, clr, 0x001f, o);
55 mem++;
56 }
57}
58
59static void mem_blend24(uint8_t *mem, uint8_t r, uint8_t g, uint8_t b,
60 uint8_t o, int len) {
61 uint8_t *limit = mem + len*3;
62 while (mem < limit) {
63 *mem = BLEND_BYTE(*mem, r, o);
64 mem++;
65 *mem = BLEND_BYTE(*mem, g, o);
66 mem++;
67 *mem = BLEND_BYTE(*mem, b, o);
68 mem++;
69 }
70}
71
72static void mem_blend24_32(uint8_t *mem, uint8_t r, uint8_t g, uint8_t b,
73 uint8_t o, int len) {
74 uint8_t *limit = mem + len*4;
75 while (mem < limit) {
76 *mem = BLEND_BYTE(*mem, r, o);
77 mem++;
78 *mem = BLEND_BYTE(*mem, g, o);
79 mem++;
80 *mem = BLEND_BYTE(*mem, b, o);
81 mem += 2;
82 }
83}
84
85static void mem_blend32(uint8_t *mem, uint8_t *src, uint8_t o, int len) {
86 uint8_t *limit = mem + len*4;
87 while (mem < limit) {
88 *mem = BLEND_BYTE(*mem, src[0], o);
89 mem++;
90 *mem = BLEND_BYTE(*mem, src[1], o);
91 mem++;
92 *mem = BLEND_BYTE(*mem, src[2], o);
93 mem++;
94 *mem = BLEND_BYTE(*mem, src[3], o);
95 mem++;
96 }
97}
98
99
100/*
101 * Some macros for fixed point arithmetic.
102 *
103 * The blend_rgb* routines perform rle image scaling using
104 * scale factors that are expressed as integers scaled with
105 * a factor of 2**16.
106 *
107 * INT_TO_SCALED()/SCALED_TO_INT() converts from integer
108 * to scaled fixed point and back.
109 */
110 #define SCALE_SHIFT 16
111 #define SCALE_FACTOR (1<<SCALE_SHIFT)
112 #defineINT_TO_SCALED(i) ((i) << SCALE_SHIFT)
113 #defineSCALED_TO_INT(sc) ((sc) >> SCALE_SHIFT)
114
115
116static rle_elem_t *
117rle_img_advance_line(rle_elem_t *rle, rle_elem_t *rle_limit, int w)
118{
119 int x;
120
121 for (x = 0; x < w && rle < rle_limit; ) {
122 x += rle->len;
123 rle++;
124 }
125 return rle;
126}
127
128
129void blend_rgb16 (uint8_t * img, vo_overlay_t * img_overl,
130 int img_width, int img_height,
131 int dst_width, int dst_height)
132{
133 uint8_t *trans;
134 clut_t* clut = (clut_t*) img_overl->clip_color;
135
136 int src_width = img_overl->width;
137 int src_height = img_overl->height;
138 rle_elem_t *rle = img_overl->rle;
139 rle_elem_t *rle_limit = rle + img_overl->num_rle;
140 int x, y, x1_scaled, x2_scaled;
141 int dy, dy_step, x_scale;/* scaled 2**SCALE_SHIFT */
142 int clip_right;
143 uint16_t *img_pix;
144
145 dy_step = INT_TO_SCALED(dst_height) / img_height;
146 x_scale = INT_TO_SCALED(img_width) / dst_width;
147
148 img_pix = (uint16_t *) img
149 + (img_overl->y * img_height / dst_height) * img_width
150 + (img_overl->x * img_width / dst_width);
151
152 trans = img_overl->clip_trans;
153
154 /* avoid wraping overlay if drawing to small image */
155 if( (img_overl->x + img_overl->clip_right) < dst_width )
156 clip_right = img_overl->clip_right;
157 else
158 clip_right = dst_width - 1 - img_overl->x;
159
160 /* avoid buffer overflow */
161 if( (src_height + img_overl->y) >= dst_height )
162 src_height = dst_height - 1 - img_overl->y;
163
164 for (y = dy = 0; y < src_height && rle < rle_limit;) {
165 int mask = !(img_overl->clip_top > y || img_overl->clip_bottom < y);
166 rle_elem_t *rle_start = rle;
167
168 for (x = x1_scaled = 0; x < src_width;) {
169 uint8_t clr;
170 uint16_t o;
171 int rlelen;
172
173 clr = rle->color;
174 o = trans[clr];
175 rlelen = rle->len;
176
177 if (o && mask) {
178 /* threat cases where clipping border is inside rle->len pixels */
179 if ( img_overl->clip_left > x ) {
180 if( img_overl->clip_left < x + rlelen ) {
181 x1_scaled = SCALED_TO_INT( img_overl->clip_left * x_scale );
182 rlelen -= img_overl->clip_left - x;
183 x += img_overl->clip_left - x;
184 } else {
185 o = 0;
186 }
187 } else if( clip_right < x + rlelen ) {
188 if( clip_right > x ) {
189 x2_scaled = SCALED_TO_INT( clip_right * x_scale);
190 mem_blend16(img_pix+x1_scaled, *((uint16_t *)&clut[clr]), o,
191 x2_scaled-x1_scaled);
192 o = 0;
193 } else {
194 o = 0;
195 }
196 }
197 }
198
199 x2_scaled = SCALED_TO_INT((x + rlelen) * x_scale);
200 if (o && mask) {
201 mem_blend16(img_pix+x1_scaled, *((uint16_t *)&clut[clr]), o, x2_scaled-x1_scaled);
202 }
203
204 x1_scaled = x2_scaled;
205 x += rlelen;
206 rle++;
207 if (rle >= rle_limit) break;
208 }
209
210 img_pix += img_width;
211 dy += dy_step;
212 if (dy >= INT_TO_SCALED(1)) {
213 dy -= INT_TO_SCALED(1);
214 ++y;
215 while (dy >= INT_TO_SCALED(1)) {
216 rle = rle_img_advance_line(rle, rle_limit, src_width);
217 dy -= INT_TO_SCALED(1);
218 ++y;
219 }
220 } else {
221 rle = rle_start; /* y-scaling, reuse the last rle encoded line */
222 }
223 }
224}
225
226void blend_rgb24 (uint8_t * img, vo_overlay_t * img_overl,
227 int img_width, int img_height,
228 int dst_width, int dst_height)
229{
230 clut_t* clut = (clut_t*) img_overl->clip_color;
231 uint8_t *trans;
232 int src_width = img_overl->width;
233 int src_height = img_overl->height;
234 rle_elem_t *rle = img_overl->rle;
235 rle_elem_t *rle_limit = rle + img_overl->num_rle;
236 int x, y, x1_scaled, x2_scaled;
237 int dy, dy_step, x_scale;/* scaled 2**SCALE_SHIFT */
238 int clip_right;
239 uint8_t *img_pix;
240
241 dy_step = INT_TO_SCALED(dst_height) / img_height;
242 x_scale = INT_TO_SCALED(img_width) / dst_width;
243
244 img_pix = img + 3 * ( (img_overl->y * img_height / dst_height) * img_width
245 + (img_overl->x * img_width / dst_width));
246
247 trans = img_overl->clip_trans;
248
249 /* avoid wraping overlay if drawing to small image */
250 if( (img_overl->x + img_overl->clip_right) < dst_width )
251 clip_right = img_overl->clip_right;
252 else
253 clip_right = dst_width - 1 - img_overl->x;
254
255 /* avoid buffer overflow */
256 if( (src_height + img_overl->y) >= dst_height )
257 src_height = dst_height - 1 - img_overl->y;
258
259 for (dy = y = 0; y < src_height && rle < rle_limit; ) {
260 int mask = !(img_overl->clip_top > y || img_overl->clip_bottom < y);
261 rle_elem_t *rle_start = rle;
262
263 for (x = x1_scaled = 0; x < src_width;) {
264 uint8_t clr;
265 uint16_t o;
266 int rlelen;
267
268 clr = rle->color;
269 o = trans[clr];
270 rlelen = rle->len;
271
272 if (o && mask) {
273 /* threat cases where clipping border is inside rle->len pixels */
274 if ( img_overl->clip_left > x ) {
275 if( img_overl->clip_left < x + rlelen ) {
276 x1_scaled = SCALED_TO_INT( img_overl->clip_left * x_scale );
277 rlelen -= img_overl->clip_left - x;
278 x += img_overl->clip_left - x;
279 } else {
280 o = 0;
281 }
282 } else if( clip_right < x + rlelen ) {
283 if( clip_right > x ) {
284 x2_scaled = SCALED_TO_INT( clip_right * x_scale);
285 mem_blend24(img_pix + x1_scaled*3, clut[clr].cb,
286 clut[clr].cr, clut[clr].y,
287 o, x2_scaled-x1_scaled);
288 o = 0;
289 } else {
290 o = 0;
291 }
292 }
293 }
294
295 x2_scaled = SCALED_TO_INT((x + rlelen) * x_scale);
296 if (o && mask) {
297 mem_blend24(img_pix + x1_scaled*3, clut[clr].cb,
298 clut[clr].cr, clut[clr].y,
299 o, x2_scaled-x1_scaled);
300 }
301
302 x1_scaled = x2_scaled;
303 x += rlelen;
304 rle++;
305 if (rle >= rle_limit) break;
306 }
307
308 img_pix += img_width * 3;
309 dy += dy_step;
310 if (dy >= INT_TO_SCALED(1)) {
311 dy -= INT_TO_SCALED(1);
312 ++y;
313 while (dy >= INT_TO_SCALED(1)) {
314 rle = rle_img_advance_line(rle, rle_limit, src_width);
315 dy -= INT_TO_SCALED(1);
316 ++y;
317 }
318 } else {
319 rle = rle_start; /* y-scaling, reuse the last rle encoded line */
320 }
321 }
322}
323
324void blend_rgb32 (uint8_t * img, vo_overlay_t * img_overl,
325 int img_width, int img_height,
326 int dst_width, int dst_height)
327{
328 clut_t* clut = (clut_t*) img_overl->clip_color;
329 uint8_t *trans;
330 int src_width = img_overl->width;
331 int src_height = img_overl->height;
332 rle_elem_t *rle = img_overl->rle;
333 rle_elem_t *rle_limit = rle + img_overl->num_rle;
334 int x, y, x1_scaled, x2_scaled;
335 int dy, dy_step, x_scale;/* scaled 2**SCALE_SHIFT */
336 int clip_right;
337 uint8_t *img_pix;
338
339 dy_step = INT_TO_SCALED(dst_height) / img_height;
340 x_scale = INT_TO_SCALED(img_width) / dst_width;
341
342 img_pix = img + 4 * ( (img_overl->y * img_height / dst_height) * img_width
343 + (img_overl->x * img_width / dst_width));
344
345 trans = img_overl->clip_trans;
346
347 /* avoid wraping overlay if drawing to small image */
348 if( (img_overl->x + img_overl->clip_right) < dst_width )
349 clip_right = img_overl->clip_right;
350 else
351 clip_right = dst_width - 1 - img_overl->x;
352
353 /* avoid buffer overflow */
354 if( (src_height + img_overl->y) >= dst_height )
355 src_height = dst_height - 1 - img_overl->y;
356
357 for (y = dy = 0; y < src_height && rle < rle_limit; ) {
358 int mask = !(img_overl->clip_top > y || img_overl->clip_bottom < y);
359 rle_elem_t *rle_start = rle;
360
361 for (x = x1_scaled = 0; x < src_width;) {
362 uint8_t clr;
363 uint16_t o;
364 int rlelen;
365
366 clr = rle->color;
367 o = trans[clr];
368 rlelen = rle->len;
369
370 if (o && mask) {
371 /* threat cases where clipping border is inside rle->len pixels */
372 if ( img_overl->clip_left > x ) {
373 if( img_overl->clip_left < x + rlelen ) {
374 x1_scaled = SCALED_TO_INT( img_overl->clip_left * x_scale );
375 rlelen -= img_overl->clip_left - x;
376 x += img_overl->clip_left - x;
377 } else {
378 o = 0;
379 }
380 } else if( clip_right < x + rlelen ) {
381 if( clip_right > x ) {
382 x2_scaled = SCALED_TO_INT( clip_right * x_scale);
383 mem_blend24_32(img_pix + x1_scaled*4, clut[clr].cb,
384 clut[clr].cr, clut[clr].y,
385 o, x2_scaled-x1_scaled);
386 o = 0;
387 } else {
388 o = 0;
389 }
390 }
391 }
392
393 x2_scaled = SCALED_TO_INT((x + rlelen) * x_scale);
394 if (o && mask) {
395 mem_blend24_32(img_pix + x1_scaled*4, clut[clr].cb,
396 clut[clr].cr, clut[clr].y,
397 o, x2_scaled-x1_scaled);
398 }
399
400 x1_scaled = x2_scaled;
401 x += rlelen;
402 rle++;
403 if (rle >= rle_limit) break;
404 }
405
406 img_pix += img_width * 4;
407 dy += dy_step;
408 if (dy >= INT_TO_SCALED(1)) {
409 dy -= INT_TO_SCALED(1);
410 ++y;
411 while (dy >= INT_TO_SCALED(1)) {
412 rle = rle_img_advance_line(rle, rle_limit, src_width);
413 dy -= INT_TO_SCALED(1);
414 ++y;
415 }
416 } else {
417 rle = rle_start; /* y-scaling, reuse the last rle encoded line */
418 }
419 }
420}
421
422static void mem_blend8(uint8_t *mem, uint8_t val, uint8_t o, size_t sz)
423{
424 uint8_t *limit = mem + sz;
425 while (mem < limit) {
426 *mem = BLEND_BYTE(*mem, val, o);
427 mem++;
428 }
429}
430
431void blend_yuv (uint8_t *dst_base[3], vo_overlay_t * img_overl,
432 int dst_width, int dst_height)
433{
434 clut_t *my_clut;
435 uint8_t *my_trans;
436
437 int src_width = img_overl->width;
438 int src_height = img_overl->height;
439 rle_elem_t *rle = img_overl->rle;
440 rle_elem_t *rle_limit = rle + img_overl->num_rle;
441 int x_off = img_overl->x;
442 int y_off = img_overl->y;
443 int ymask,xmask;
444 int rle_this_bite;
445 int rle_remainder;
446 int rlelen;
447 int x, y;
448 int clip_right;
449 uint8_t clr=0;
450
451 uint8_t *dst_y = dst_base[0] + dst_width * y_off + x_off;
452 uint8_t *dst_cr = dst_base[2] +
453 (y_off / 2) * (dst_width / 2) + (x_off / 2) + 1;
454 uint8_t *dst_cb = dst_base[1] +
455 (y_off / 2) * (dst_width / 2) + (x_off / 2) + 1;
456#ifdef LOG_BLEND_YUV
457 printf("overlay_blend started x=%d, y=%d, w=%d h=%d\n",img_overl->x,img_overl->y,img_overl->width,img_overl->height);
458#endif
459 my_clut = (clut_t*) img_overl->clip_color;
460 my_trans = img_overl->clip_trans;
461
462 /* avoid wraping overlay if drawing to small image */
463 if( (x_off + img_overl->clip_right) < dst_width )
464 clip_right = img_overl->clip_right;
465 else
466 clip_right = dst_width - 1 - x_off;
467
468 /* avoid buffer overflow */
469 if( (src_height + y_off) >= dst_height )
470 src_height = dst_height - 1 - y_off;
471
472 rlelen=rle_remainder=0;
473 for (y = 0; y < src_height; y++) {
474 ymask = ((img_overl->clip_top > y) || (img_overl->clip_bottom < y));
475 xmask = 0;
476#ifdef LOG_BLEND_YUV
477 printf("X started ymask=%d y=%d src_height=%d\n",ymask, y, src_height);
478#endif
479
480 for (x = 0; x < src_width;) {
481 uint16_t o;
482#ifdef LOG_BLEND_YUV
483 printf("1:rle_len=%d, remainder=%d, x=%d\n",rlelen, rle_remainder, x);
484#endif
485
486 if ((rlelen < 0) || (rle_remainder < 0)) {
487 printf("alphablend: major bug in blend_yuv < 0\n");
488 }
489 if (rlelen == 0) {
490 rle_remainder = rlelen = rle->len;
491 clr = rle->color;
492 rle++;
493 }
494 if (rle_remainder == 0) {
495 rle_remainder = rlelen;
496 }
497 if ((rle_remainder + x) > src_width) {
498 /* Do something for long rlelengths */
499 rle_remainder = src_width - x;
500 ;
501 }
502#ifdef LOG_BLEND_YUV
503 printf("2:rle_len=%d, remainder=%d, x=%d\n",rlelen, rle_remainder, x);
504#endif
505
506 if (ymask == 0) {
507 if (x <= img_overl->clip_left) {
508 /* Starts outside clip area */
509 if ((x + rle_remainder - 1) > img_overl->clip_left ) {
510#ifdef LOG_BLEND_YUV
511 printf("Outside clip left %d, ending inside\n", img_overl->clip_left);
512#endif
513 /* Cutting needed, starts outside, ends inside */
514 rle_this_bite = (img_overl->clip_left - x + 1);
515 rle_remainder -= rle_this_bite;
516 rlelen -= rle_this_bite;
517 my_clut = (clut_t*) img_overl->color;
518 my_trans = img_overl->trans;
519 xmask = 0;
520 } else {
521#ifdef LOG_BLEND_YUV
522 printf("Outside clip left %d, ending outside\n", img_overl->clip_left);
523#endif
524 /* no cutting needed, starts outside, ends outside */
525 rle_this_bite = rle_remainder;
526 rle_remainder = 0;
527 rlelen -= rle_this_bite;
528 my_clut = (clut_t*) img_overl->color;
529 my_trans = img_overl->trans;
530 xmask = 0;
531 }
532 } else if (x < clip_right) {
533 /* Starts inside clip area */
534 if ((x + rle_remainder) > clip_right ) {
535#ifdef LOG_BLEND_YUV
536 printf("Inside clip right %d, ending outside\n", clip_right);
537#endif
538 /* Cutting needed, starts inside, ends outside */
539 rle_this_bite = (clip_right - x);
540 rle_remainder -= rle_this_bite;
541 rlelen -= rle_this_bite;
542 my_clut = (clut_t*) img_overl->clip_color;
543 my_trans = img_overl->clip_trans;
544 xmask++;
545 } else {
546#ifdef LOG_BLEND_YUV
547 printf("Inside clip right %d, ending inside\n", clip_right);
548#endif
549 /* no cutting needed, starts inside, ends inside */
550 rle_this_bite = rle_remainder;
551 rle_remainder = 0;
552 rlelen -= rle_this_bite;
553 my_clut = (clut_t*) img_overl->clip_color;
554 my_trans = img_overl->clip_trans;
555 xmask++;
556 }
557 } else if (x >= clip_right) {
558 /* Starts outside clip area, ends outsite clip area */
559 if ((x + rle_remainder ) > src_width ) {
560#ifdef LOG_BLEND_YUV
561 printf("Outside clip right %d, ending eol\n", clip_right);
562#endif
563 /* Cutting needed, starts outside, ends at right edge */
564 /* It should never reach here due to the earlier test of src_width */
565 rle_this_bite = (src_width - x );
566 rle_remainder -= rle_this_bite;
567 rlelen -= rle_this_bite;
568 my_clut = (clut_t*) img_overl->color;
569 my_trans = img_overl->trans;
570 xmask = 0;
571 } else {
572 /* no cutting needed, starts outside, ends outside */
573#ifdef LOG_BLEND_YUV
574 printf("Outside clip right %d, ending outside\n", clip_right);
575#endif
576 rle_this_bite = rle_remainder;
577 rle_remainder = 0;
578 rlelen -= rle_this_bite;
579 my_clut = (clut_t*) img_overl->color;
580 my_trans = img_overl->trans;
581 xmask = 0;
582 }
583 }
584 } else {
585 /* Outside clip are due to y */
586 /* no cutting needed, starts outside, ends outside */
587 rle_this_bite = rle_remainder;
588 rle_remainder = 0;
589 rlelen -= rle_this_bite;
590 my_clut = (clut_t*) img_overl->color;
591 my_trans = img_overl->trans;
592 xmask = 0;
593 }
594 o = my_trans[clr];
595#ifdef LOG_BLEND_YUV
596 printf("Trans=%d clr=%d xmask=%d my_clut[clr]=%d\n",o, clr, xmask, my_clut[clr].y);
597#endif
598 if (o) {
599 if(o >= 15) {
600 memset(dst_y + x, my_clut[clr].y, rle_this_bite);
601 if (y & 1) {
602 memset(dst_cr + (x >> 1), my_clut[clr].cr, (rle_this_bite+1) >> 1);
603 memset(dst_cb + (x >> 1), my_clut[clr].cb, (rle_this_bite+1) >> 1);
604 }
605 } else {
606 mem_blend8(dst_y + x, my_clut[clr].y, o, rle_this_bite);
607 if (y & 1) {
608 /* Blending cr and cb should use a different function, with pre -128 to each sample */
609 mem_blend8(dst_cr + (x >> 1), my_clut[clr].cr, o, (rle_this_bite+1) >> 1);
610 mem_blend8(dst_cb + (x >> 1), my_clut[clr].cb, o, (rle_this_bite+1) >> 1);
611 }
612 }
613
614 }
615#ifdef LOG_BLEND_YUV
616 printf("rle_this_bite=%d, remainder=%d, x=%d\n",rle_this_bite, rle_remainder, x);
617#endif
618 x += rle_this_bite;
619 if (rle >= rle_limit) {
620#ifdef LOG_BLEND_YUV
621 printf("x-rle_limit\n");
622#endif
623 break;
624 }
625 }
626 if (rle >= rle_limit) {
627#ifdef LOG_BLEND_YUV
628 printf("x-rle_limit\n");
629#endif
630 break;
631 }
632
633 dst_y += dst_width;
634
635 if (y & 1) {
636 dst_cr += (dst_width + 1) / 2;
637 dst_cb += (dst_width + 1) / 2;
638 }
639 }
640#ifdef LOG_BLEND_YUV
641 printf("overlay_blend ended\n");
642#endif
643}
644
645void blend_yuy2 (uint8_t * dst_img, vo_overlay_t * img_overl,
646 int dst_width, int dst_height)
647{
648 clut_t *my_clut;
649 uint8_t *my_trans;
650
651 int src_width = img_overl->width;
652 int src_height = img_overl->height;
653 rle_elem_t *rle = img_overl->rle;
654 rle_elem_t *rle_limit = rle + img_overl->num_rle;
655 int x_off = img_overl->x;
656 int y_off = img_overl->y;
657 int mask;
658 int x, y;
659 int l;
660 int clip_right;
661 uint32_t yuy2;
662
663 uint8_t *dst_y = dst_img + 2 * (dst_width * y_off + x_off);
664 uint8_t *dst;
665
666 my_clut = (clut_t*) img_overl->clip_color;
667 my_trans = img_overl->clip_trans;
668
669 /* avoid wraping overlay if drawing to small image */
670 if( (x_off + img_overl->clip_right) < dst_width )
671 clip_right = img_overl->clip_right;
672 else
673 clip_right = dst_width - 1 - x_off;
674
675 /* avoid buffer overflow */
676 if( (src_height + y_off) >= dst_height )
677 src_height = dst_height - 1 - y_off;
678
679 for (y = 0; y < src_height; y++) {
680 mask = !(img_overl->clip_top > y || img_overl->clip_bottom < y);
681
682 dst = dst_y;
683 for (x = 0; x < src_width;) {
684 uint8_t clr;
685 uint16_t o;
686 int rlelen;
687
688 clr = rle->color;
689 o = my_trans[clr];
690 rlelen = rle->len;
691
692 if (o && mask) {
693 /* threat cases where clipping border is inside rle->len pixels */
694 if ( img_overl->clip_left > x ) {
695 if( img_overl->clip_left < x + rlelen ) {
696 rlelen -= img_overl->clip_left - x;
697 x += img_overl->clip_left - x;
698 } else {
699 o = 0;
700 }
701 } else if( clip_right < x + rlelen ) {
702 if( clip_right > x ) {
703 /* fixme: case not implemented */
704 o = 0;
705 } else {
706 o = 0;
707 }
708 }
709 }
710
711
712 if (o && mask) {
713 l = rlelen>>1;
714 if( !(x & 1) ) {
715 yuy2 = my_clut[clr].y + (my_clut[clr].cb << 8) +
716 (my_clut[clr].y << 16) + (my_clut[clr].cr << 24);
717 } else {
718 yuy2 = my_clut[clr].y + (my_clut[clr].cr << 8) +
719 (my_clut[clr].y << 16) + (my_clut[clr].cb << 24);
720 }
721
722 if (o >= 15) {
723 while(l--) {
724 *((uint32_t *)dst)++ = yuy2;
725 }
726 if(rlelen & 1)
727 *((uint16_t *)dst)++ = yuy2 & 0xffff;
728 } else {
729 if( l ) {
730 mem_blend32(dst, (uint8_t *)&yuy2, o, l);
731 dst += 4*l;
732 }
733
734 if(rlelen & 1) {
735 *dst = BLEND_BYTE(*dst, *((uint8_t *)&yuy2), o);
736 dst++;
737 *dst = BLEND_BYTE(*dst, *((uint8_t *)&yuy2+1), o);
738 dst++;
739 }
740 }
741 } else {
742 dst += rlelen*2;
743 }
744
745 x += rlelen;
746 rle++;
747 if (rle >= rle_limit) break;
748 }
749 if (rle >= rle_limit) break;
750
751 dst_y += dst_width*2;
752 }
753}
diff --git a/noncore/multimedia/opieplayer2/alphablend.h b/noncore/multimedia/opieplayer2/alphablend.h
new file mode 100644
index 0000000..7230f41
--- a/dev/null
+++ b/noncore/multimedia/opieplayer2/alphablend.h
@@ -0,0 +1,57 @@
1/*
2 *
3 * Copyright (C) 2000 Thomas Mirlacher
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 *
19 * The author may be reached as <dent@linuxvideo.org>
20 *
21 *------------------------------------------------------------
22 *
23 */
24
25#ifndef __ALPHABLEND_H__
26#define __ALPHABLEND_H__
27
28#include <xine/video_out.h>
29
30typedef struct { /* CLUT == Color LookUp Table */
31 uint8_t cb : 8;
32 uint8_t cr : 8;
33 uint8_t y : 8;
34 uint8_t foo : 8;
35} __attribute__ ((packed)) clut_t;
36
37void blend_rgb16 (uint8_t * img, vo_overlay_t * img_overl,
38 int img_width, int img_height,
39 int dst_width, int dst_height);
40
41void blend_rgb24 (uint8_t * img, vo_overlay_t * img_overl,
42 int img_width, int img_height,
43 int dst_width, int dst_height);
44
45void blend_rgb32 (uint8_t * img, vo_overlay_t * img_overl,
46 int img_width, int img_height,
47 int dst_width, int dst_height);
48
49void blend_yuv (uint8_t *dst_base[3], vo_overlay_t * img_overl,
50 int dst_width, int dst_height);
51
52void blend_yuy2 (uint8_t * dst_img, vo_overlay_t * img_overl,
53 int dst_width, int dst_height);
54
55void crop_overlay (vo_overlay_t * overlay);
56
57#endif
diff --git a/noncore/multimedia/opieplayer2/yuv2rgb.c b/noncore/multimedia/opieplayer2/yuv2rgb.c
new file mode 100644
index 0000000..d1d6627
--- a/dev/null
+++ b/noncore/multimedia/opieplayer2/yuv2rgb.c
@@ -0,0 +1,3160 @@
1/*
2 * yuv2rgb.c
3 *
4 * This file is part of xine, a unix video player.
5 *
6 * based on work from mpeg2dec:
7 * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
8 *
9 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
10 *
11 * mpeg2dec is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * mpeg2dec is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 *
25 * $Id$
26 */
27
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31#include <inttypes.h>
32
33#include "yuv2rgb.h"
34#include <xine/xineutils.h>
35
36
37static int prof_scale_line = -1;
38
39static scale_line_func_t find_scale_line_func(int step);
40
41
42const int32_t Inverse_Table_6_9[8][4] = {
43 {117504, 138453, 13954, 34903}, /* no sequence_display_extension */
44 {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */
45 {104597, 132201, 25675, 53279}, /* unspecified */
46 {104597, 132201, 25675, 53279}, /* reserved */
47 {104448, 132798, 24759, 53109}, /* FCC */
48 {104597, 132201, 25675, 53279}, /* ITU-R Rec. 624-4 System B, G */
49 {104597, 132201, 25675, 53279}, /* SMPTE 170M */
50 {117579, 136230, 16907, 35559} /* SMPTE 240M (1987) */
51};
52
53
54static void *my_malloc_aligned (size_t alignment, size_t size, void **chunk) {
55
56 char *pMem;
57
58 pMem = xine_xmalloc (size+alignment);
59
60 *chunk = pMem;
61
62 while ((int) pMem % alignment)
63 pMem++;
64
65 return pMem;
66}
67
68
69static int yuv2rgb_configure (yuv2rgb_t *this,
70 int source_width, int source_height,
71 int y_stride, int uv_stride,
72 int dest_width, int dest_height,
73 int rgb_stride) {
74 /*
75 printf ("yuv2rgb setup (%d x %d => %d x %d)\n", source_width, source_height,
76 dest_width, dest_height);
77 */
78 if (prof_scale_line == -1)
79 prof_scale_line = xine_profiler_allocate_slot("xshm scale line");
80
81 this->source_width = source_width;
82 this->source_height = source_height;
83 this->y_stride = y_stride;
84 this->uv_stride = uv_stride;
85 this->dest_width = dest_width;
86 this->dest_height = dest_height;
87 this->rgb_stride = rgb_stride;
88
89 if (this->y_chunk) {
90 free (this->y_chunk);
91 this->y_buffer = this->y_chunk = NULL;
92 }
93 if (this->u_chunk) {
94 free (this->u_chunk);
95 this->u_buffer = this->u_chunk = NULL;
96 }
97 if (this->v_chunk) {
98 free (this->v_chunk);
99 this->v_buffer = this->v_chunk = NULL;
100 }
101
102
103 this->step_dx = source_width * 32768 / dest_width;
104 this->step_dy = source_height * 32768 / dest_height;
105
106 this->scale_line = find_scale_line_func(this->step_dx);
107
108 if ((source_width == dest_width) && (source_height == dest_height)) {
109 this->do_scale = 0;
110
111 /*
112 * space for two y-lines (for yuv2rgb_mlib)
113 * u,v subsampled 2:1
114 */
115 this->y_buffer = my_malloc_aligned (16, 2*dest_width, &this->y_chunk);
116 if (!this->y_buffer)
117 return 0;
118 this->u_buffer = my_malloc_aligned (16, (dest_width+1)/2, &this->u_chunk);
119 if (!this->u_buffer)
120 return 0;
121 this->v_buffer = my_malloc_aligned (16, (dest_width+1)/2, &this->v_chunk);
122 if (!this->v_buffer)
123 return 0;
124
125 } else {
126 this->do_scale = 1;
127
128 /*
129 * space for two y-lines (for yuv2rgb_mlib)
130 * u,v subsampled 2:1
131 */
132 this->y_buffer = my_malloc_aligned (16, 2*dest_width, &this->y_chunk);
133 if (!this->y_buffer)
134 return 0;
135 this->u_buffer = my_malloc_aligned (16, (dest_width+1)/2, &this->u_chunk);
136 if (!this->u_buffer)
137 return 0;
138 this->v_buffer = my_malloc_aligned (16, (dest_width+1)/2, &this->v_chunk);
139 if (!this->v_buffer)
140 return 0;
141 }
142 return 1;
143}
144
145
146static void scale_line_gen (uint8_t *source, uint8_t *dest,
147 int width, int step) {
148
149 /*
150 * scales a yuv source row to a dest row, with interpolation
151 * (good quality, but slow)
152 */
153 int p1;
154 int p2;
155 int dx;
156
157 xine_profiler_start_count(prof_scale_line);
158
159 p1 = *source++;
160 p2 = *source++;
161 dx = 0;
162
163 /*
164 * the following code has been optimized by Scott Smith <ssmith@akamai.com>:
165 *
166 * ok now I have a meaningful optimization for yuv2rgb.c:scale_line_gen.
167 * it removes the loop from within the while() loop by separating it out
168 * into 3 cases: where you are enlarging the line (<32768), where you are
169 * between 50% and 100% of the original line (<=65536), and where you are
170 * shrinking it by a lot. anyways, I went from 200 delivered / 100+
171 * skipped to 200 delivered / 80 skipped for the enlarging case. I
172 * noticed when looking at the assembly that the compiler was able to
173 * unroll these while(width) loops, whereas before it was trying to
174 * unroll the while(dx>32768) loops. so the compiler is better able to
175 * deal with this code.
176 */
177
178
179 if (step < 32768) {
180 while (width) {
181 *dest = p1 + (((p2-p1) * dx)>>15);
182
183 dx += step;
184 if (dx > 32768) {
185 dx -= 32768;
186 p1 = p2;
187 p2 = *source++;
188 }
189
190 dest ++;
191 width --;
192 }
193 } else if (step <= 65536) {
194 while (width) {
195 *dest = p1 + (((p2-p1) * dx)>>15);
196
197 dx += step;
198 if (dx > 65536) {
199 dx -= 65536;
200 p1 = *source++;
201 p2 = *source++;
202 } else {
203 dx -= 32768;
204 p1 = p2;
205 p2 = *source++;
206 }
207
208 dest ++;
209 width --;
210 }
211 } else {
212 while (width) {
213 int offs;
214
215 *dest = p1 + (((p2-p1) * dx)>>15);
216
217 dx += step;
218 offs=((dx-1)>>15);
219 dx-=offs<<15;
220 source+=offs-2;
221 p1=*source++;
222 p2=*source++;
223 dest ++;
224 width --;
225 }
226 }
227 xine_profiler_stop_count(prof_scale_line);
228
229
230
231
232}
233
234/*
235 * Interpolates 16 output pixels from 15 source pixels using shifts.
236 * Useful for scaling a PAL mpeg2 dvd input source to 4:3 format on
237 * a monitor using square pixels.
238 * (720 x 576 ==> 768 x 576)
239 */
240static void scale_line_15_16 (uint8_t *source, uint8_t *dest,
241 int width, int step) {
242
243 int p1, p2;
244
245 xine_profiler_start_count(prof_scale_line);
246
247 while ((width -= 16) >= 0) {
248 p1 = source[0];
249 dest[0] = p1;
250 p2 = source[1];
251 dest[1] = (1*p1 + 7*p2) >> 3;
252 p1 = source[2];
253 dest[2] = (1*p2 + 7*p1) >> 3;
254 p2 = source[3];
255 dest[3] = (1*p1 + 3*p2) >> 2;
256 p1 = source[4];
257 dest[4] = (1*p2 + 3*p1) >> 2;
258 p2 = source[5];
259 dest[5] = (3*p1 + 5*p2) >> 3;
260 p1 = source[6];
261 dest[6] = (3*p2 + 5*p1) >> 3;
262 p2 = source[7];
263 dest[7] = (1*p1 + 1*p1) >> 1;
264 p1 = source[8];
265 dest[8] = (1*p2 + 1*p1) >> 1;
266 p2 = source[9];
267 dest[9] = (5*p1 + 3*p2) >> 3;
268 p1 = source[10];
269 dest[10] = (5*p2 + 3*p1) >> 3;
270 p2 = source[11];
271 dest[11] = (3*p1 + 1*p2) >> 2;
272 p1 = source[12];
273 dest[12] = (3*p2 + 1*p1) >> 2;
274 p2 = source[13];
275 dest[13] = (7*p1 + 1*p2) >> 3;
276 p1 = source[14];
277 dest[14] = (7*p2 + 1*p1) >> 3;
278 dest[15] = p1;
279 source += 15;
280 dest += 16;
281 }
282
283 if ((width += 16) <= 0) goto done;
284 *dest++ = source[0];
285 if (--width <= 0) goto done;
286 *dest++ = (1*source[0] + 7*source[1]) >> 3;
287 if (--width <= 0) goto done;
288 *dest++ = (1*source[1] + 7*source[2]) >> 3;
289 if (--width <= 0) goto done;
290 *dest++ = (1*source[2] + 3*source[3]) >> 2;
291 if (--width <= 0) goto done;
292 *dest++ = (1*source[3] + 3*source[4]) >> 2;
293 if (--width <= 0) goto done;
294 *dest++ = (3*source[4] + 5*source[5]) >> 3;
295 if (--width <= 0) goto done;
296 *dest++ = (3*source[5] + 5*source[6]) >> 3;
297 if (--width <= 0) goto done;
298 *dest++ = (1*source[6] + 1*source[7]) >> 1;
299 if (--width <= 0) goto done;
300 *dest++ = (1*source[7] + 1*source[8]) >> 1;
301 if (--width <= 0) goto done;
302 *dest++ = (5*source[8] + 3*source[9]) >> 3;
303 if (--width <= 0) goto done;
304 *dest++ = (5*source[9] + 3*source[10]) >> 3;
305 if (--width <= 0) goto done;
306 *dest++ = (3*source[10] + 1*source[11]) >> 2;
307 if (--width <= 0) goto done;
308 *dest++ = (3*source[11] + 1*source[12]) >> 2;
309 if (--width <= 0) goto done;
310 *dest++ = (7*source[12] + 1*source[13]) >> 3;
311 if (--width <= 0) goto done;
312 *dest++ = (7*source[13] + 1*source[14]) >> 3;
313 done:
314 xine_profiler_stop_count(prof_scale_line);
315}
316
317
318/*
319 * Interpolates 53 output pixels from 45 source pixels using shifts.
320 * Useful for scaling a NTSC mpeg2 dvd input source to 16:9 display
321 * resulution
322 * fullscreen resolution, or to 16:9 format on a monitor using square
323 * pixels.
324 * (720 x 480 ==> 848 x 480)
325 */
326static void scale_line_45_53 (uint8_t *source, uint8_t *dest,
327 int width, int step) {
328
329 int p1, p2;
330
331 xine_profiler_start_count(prof_scale_line);
332
333 while ((width -= 53) >= 0) {
334 p1 = source[0];
335 p2 = source[1];
336 dest[0] = p1;
337 dest[1] = (1*p1 + 7*p2) >> 3;
338 p1 = source[2];
339 dest[2] = (1*p2 + 3*p1) >> 2;
340 p2 = source[3];
341 dest[3] = (1*p1 + 1*p2) >> 1;
342 p1 = source[4];
343 dest[4] = (5*p2 + 3*p1) >> 3;
344 p2 = source[5];
345 dest[5] = (3*p1 + 1*p2) >> 2;
346 p1 = source[6];
347 dest[6] = (7*p2 + 1*p1) >> 3;
348 dest[7] = p1;
349 p2 = source[7];
350 dest[8] = (1*p1 + 3*p2) >> 2;
351 p1 = source[8];
352 dest[9] = (3*p2 + 5*p1) >> 3;
353 p2 = source[9];
354 dest[10] = (1*p1 + 1*p2) >> 1;
355 p1 = source[10];
356 dest[11] = (5*p2 + 3*p1) >> 3;
357 p2 = source[11];
358 dest[12] = (3*p1 + 1*p2) >> 2;
359 p1 = source[12];
360 dest[13] = p2;
361 dest[14] = (1*p2 + 7*p1) >> 3;
362 p2 = source[13];
363 dest[15] = (1*p1 + 3*p2) >> 2;
364 p1 = source[14];
365 dest[16] = (3*p2 + 5*p1) >> 3;
366 p2 = source[15];
367 dest[17] = (5*p1 + 3*p2) >> 3;
368 p1 = source[16];
369 dest[18] = (3*p2 + 1*p1) >> 2;
370 p2 = source[17];
371 dest[19] = (7*p1 + 1*p2) >> 3;
372 dest[20] = p2;
373 p1 = source[18];
374 dest[21] = (1*p2 + 7*p1) >> 3;
375 p2 = source[19];
376 dest[22] = (3*p1 + 5*p2) >> 3;
377 p1 = source[20];
378 dest[23] = (1*p2 + 1*p1) >> 1;
379 p2 = source[21];
380 dest[24] = (5*p1 + 3*p2) >> 3;
381 p1 = source[22];
382 dest[25] = (3*p2 + 1*p1) >> 2;
383 p2 = source[23];
384 dest[26] = (7*p1 + 1*p2) >> 3;
385 dest[27] = (1*p1 + 7*p2) >> 3;
386 p1 = source[24];
387 dest[28] = (1*p2 + 3*p1) >> 2;
388 p2 = source[25];
389 dest[29] = (3*p1 + 5*p2) >> 3;
390 p1 = source[26];
391 dest[30] = (1*p2 + 1*p1) >> 1;
392 p2 = source[27];
393 dest[31] = (5*p1 + 3*p2) >> 3;
394 p1 = source[28];
395 dest[32] = (7*p2 + 1*p1) >> 3;
396 p2 = source[29];
397 dest[33] = p1;
398 dest[34] = (1*p1 + 7*p2) >> 3;
399 p1 = source[30];
400 dest[35] = (1*p2 + 3*p1) >> 2;
401 p2 = source[31];
402 dest[36] = (3*p1 + 5*p2) >> 3;
403 p1 = source[32];
404 dest[37] = (5*p2 + 3*p1) >> 3;
405 p2 = source[33];
406 dest[38] = (3*p1 + 1*p2) >> 2;
407 p1 = source[34];
408 dest[39] = (7*p2 + 1*p1) >> 3;
409 dest[40] = p1;
410 p2 = source[35];
411 dest[41] = (1*p1 + 3*p2) >> 2;
412 p1 = source[36];
413 dest[42] = (3*p2 + 5*p1) >> 3;
414 p2 = source[37];
415 dest[43] = (1*p1 + 1*p2) >> 1;
416 p1 = source[38];
417 dest[44] = (5*p2 + 3*p1) >> 3;
418 p2 = source[39];
419 dest[45] = (3*p1 + 1*p2) >> 2;
420 p1 = source[40];
421 dest[46] = p2;
422 dest[47] = (1*p2 + 7*p1) >> 3;
423 p2 = source[41];
424 dest[48] = (1*p1 + 3*p2) >> 2;
425 p1 = source[42];
426 dest[49] = (3*p2 + 5*p1) >> 3;
427 p2 = source[43];
428 dest[50] = (1*p1 + 1*p2) >> 1;
429 p1 = source[44];
430 dest[51] = (3*p2 + 1*p1) >> 2;
431 p2 = source[45];
432 dest[52] = (7*p1 + 1*p2) >> 3;
433 source += 45;
434 dest += 53;
435 }
436
437 if ((width += 53) <= 0) goto done;
438 *dest++ = source[0];
439 if (--width <= 0) goto done;
440 *dest++ = (1*source[0] + 7*source[1]) >> 3;
441 if (--width <= 0) goto done;
442 *dest++ = (1*source[1] + 3*source[2]) >> 2;
443 if (--width <= 0) goto done;
444 *dest++ = (1*source[2] + 1*source[3]) >> 1;
445 if (--width <= 0) goto done;
446 *dest++ = (5*source[3] + 3*source[4]) >> 3;
447 if (--width <= 0) goto done;
448 *dest++ = (3*source[4] + 1*source[5]) >> 2;
449 if (--width <= 0) goto done;
450 *dest++ = (7*source[5] + 1*source[6]) >> 3;
451 if (--width <= 0) goto done;
452 *dest++ = source[6];
453 if (--width <= 0) goto done;
454 *dest++ = (1*source[6] + 3*source[7]) >> 2;
455 if (--width <= 0) goto done;
456 *dest++ = (3*source[7] + 5*source[8]) >> 3;
457 if (--width <= 0) goto done;
458 *dest++ = (1*source[8] + 1*source[9]) >> 1;
459 if (--width <= 0) goto done;
460 *dest++ = (5*source[9] + 3*source[10]) >> 3;
461 if (--width <= 0) goto done;
462 *dest++ = (3*source[10] + 1*source[11]) >> 2;
463 if (--width <= 0) goto done;
464 *dest++ = source[11];
465 if (--width <= 0) goto done;
466 *dest++ = (1*source[11] + 7*source[12]) >> 3;
467 if (--width <= 0) goto done;
468 *dest++ = (1*source[12] + 3*source[13]) >> 2;
469 if (--width <= 0) goto done;
470 *dest++ = (3*source[13] + 5*source[14]) >> 3;
471 if (--width <= 0) goto done;
472 *dest++ = (5*source[14] + 3*source[15]) >> 3;
473 if (--width <= 0) goto done;
474 *dest++ = (3*source[15] + 1*source[16]) >> 2;
475 if (--width <= 0) goto done;
476 *dest++ = (7*source[16] + 1*source[17]) >> 3;
477 if (--width <= 0) goto done;
478 *dest++ = source[17];
479 if (--width <= 0) goto done;
480 *dest++ = (1*source[17] + 7*source[18]) >> 3;
481 if (--width <= 0) goto done;
482 *dest++ = (3*source[18] + 5*source[19]) >> 3;
483 if (--width <= 0) goto done;
484 *dest++ = (1*source[19] + 1*source[20]) >> 1;
485 if (--width <= 0) goto done;
486 *dest++ = (5*source[20] + 3*source[21]) >> 3;
487 if (--width <= 0) goto done;
488 *dest++ = (3*source[21] + 1*source[22]) >> 2;
489 if (--width <= 0) goto done;
490 *dest++ = (7*source[22] + 1*source[23]) >> 3;
491 if (--width <= 0) goto done;
492 *dest++ = (1*source[22] + 7*source[23]) >> 3;
493 if (--width <= 0) goto done;
494 *dest++ = (1*source[23] + 3*source[24]) >> 2;
495 if (--width <= 0) goto done;
496 *dest++ = (3*source[24] + 5*source[25]) >> 3;
497 if (--width <= 0) goto done;
498 *dest++ = (1*source[25] + 1*source[26]) >> 1;
499 if (--width <= 0) goto done;
500 *dest++ = (5*source[26] + 3*source[27]) >> 3;
501 if (--width <= 0) goto done;
502 *dest++ = (7*source[27] + 1*source[28]) >> 3;
503 if (--width <= 0) goto done;
504 *dest++ = source[28];
505 if (--width <= 0) goto done;
506 *dest++ = (1*source[28] + 7*source[29]) >> 3;
507 if (--width <= 0) goto done;
508 *dest++ = (1*source[29] + 3*source[30]) >> 2;
509 if (--width <= 0) goto done;
510 *dest++ = (3*source[30] + 5*source[31]) >> 3;
511 if (--width <= 0) goto done;
512 *dest++ = (5*source[31] + 3*source[32]) >> 3;
513 if (--width <= 0) goto done;
514 *dest++ = (3*source[32] + 1*source[33]) >> 2;
515 if (--width <= 0) goto done;
516 *dest++ = (7*source[33] + 1*source[34]) >> 3;
517 if (--width <= 0) goto done;
518 *dest++ = source[34];
519 if (--width <= 0) goto done;
520 *dest++ = (1*source[34] + 3*source[35]) >> 2;
521 if (--width <= 0) goto done;
522 *dest++ = (3*source[35] + 5*source[36]) >> 3;
523 if (--width <= 0) goto done;
524 *dest++ = (1*source[36] + 1*source[37]) >> 1;
525 if (--width <= 0) goto done;
526 *dest++ = (5*source[37] + 3*source[38]) >> 3;
527 if (--width <= 0) goto done;
528 *dest++ = (3*source[38] + 1*source[39]) >> 2;
529 if (--width <= 0) goto done;
530 *dest++ = source[39];
531 if (--width <= 0) goto done;
532 *dest++ = (1*source[39] + 7*source[40]) >> 3;
533 if (--width <= 0) goto done;
534 *dest++ = (1*source[40] + 3*source[41]) >> 2;
535 if (--width <= 0) goto done;
536 *dest++ = (3*source[41] + 5*source[42]) >> 3;
537 if (--width <= 0) goto done;
538 *dest++ = (1*source[42] + 1*source[43]) >> 1;
539 if (--width <= 0) goto done;
540 *dest++ = (3*source[43] + 1*source[44]) >> 2;
541 done:
542
543 xine_profiler_stop_count(prof_scale_line);
544}
545
546
547/*
548 * Interpolates 64 output pixels from 45 source pixels using shifts.
549 * Useful for scaling a PAL mpeg2 dvd input source to 1024x768
550 * fullscreen resolution, or to 16:9 format on a monitor using square
551 * pixels.
552 * (720 x 576 ==> 1024 x 576)
553 */
554static void scale_line_45_64 (uint8_t *source, uint8_t *dest,
555 int width, int step) {
556
557 int p1, p2;
558
559 xine_profiler_start_count(prof_scale_line);
560
561 while ((width -= 64) >= 0) {
562 p1 = source[0];
563 p2 = source[1];
564 dest[0] = p1;
565 dest[1] = (1*p1 + 3*p2) >> 2;
566 p1 = source[2];
567 dest[2] = (5*p2 + 3*p1) >> 3;
568 p2 = source[3];
569 dest[3] = (7*p1 + 1*p2) >> 3;
570 dest[4] = (1*p1 + 3*p2) >> 2;
571 p1 = source[4];
572 dest[5] = (1*p2 + 1*p1) >> 1;
573 p2 = source[5];
574 dest[6] = (3*p1 + 1*p2) >> 2;
575 dest[7] = (1*p1 + 7*p2) >> 3;
576 p1 = source[6];
577 dest[8] = (3*p2 + 5*p1) >> 3;
578 p2 = source[7];
579 dest[9] = (5*p1 + 3*p2) >> 3;
580 p1 = source[8];
581 dest[10] = p2;
582 dest[11] = (1*p2 + 3*p1) >> 2;
583 p2 = source[9];
584 dest[12] = (5*p1 + 3*p2) >> 3;
585 p1 = source[10];
586 dest[13] = (7*p2 + 1*p1) >> 3;
587 dest[14] = (1*p2 + 7*p1) >> 3;
588 p2 = source[11];
589 dest[15] = (1*p1 + 1*p2) >> 1;
590 p1 = source[12];
591 dest[16] = (3*p2 + 1*p1) >> 2;
592 dest[17] = p1;
593 p2 = source[13];
594 dest[18] = (3*p1 + 5*p2) >> 3;
595 p1 = source[14];
596 dest[19] = (5*p2 + 3*p1) >> 3;
597 p2 = source[15];
598 dest[20] = p1;
599 dest[21] = (1*p1 + 3*p2) >> 2;
600 p1 = source[16];
601 dest[22] = (1*p2 + 1*p1) >> 1;
602 p2 = source[17];
603 dest[23] = (7*p1 + 1*p2) >> 3;
604 dest[24] = (1*p1 + 7*p2) >> 3;
605 p1 = source[18];
606 dest[25] = (3*p2 + 5*p1) >> 3;
607 p2 = source[19];
608 dest[26] = (3*p1 + 1*p2) >> 2;
609 dest[27] = p2;
610 p1 = source[20];
611 dest[28] = (3*p2 + 5*p1) >> 3;
612 p2 = source[21];
613 dest[29] = (5*p1 + 3*p2) >> 3;
614 p1 = source[22];
615 dest[30] = (7*p2 + 1*p1) >> 3;
616 dest[31] = (1*p2 + 3*p1) >> 2;
617 p2 = source[23];
618 dest[32] = (1*p1 + 1*p2) >> 1;
619 p1 = source[24];
620 dest[33] = (3*p2 + 1*p1) >> 2;
621 dest[34] = (1*p2 + 7*p1) >> 3;
622 p2 = source[25];
623 dest[35] = (3*p1 + 5*p2) >> 3;
624 p1 = source[26];
625 dest[36] = (3*p2 + 1*p1) >> 2;
626 p2 = source[27];
627 dest[37] = p1;
628 dest[38] = (1*p1 + 3*p2) >> 2;
629 p1 = source[28];
630 dest[39] = (5*p2 + 3*p1) >> 3;
631 p2 = source[29];
632 dest[40] = (7*p1 + 1*p2) >> 3;
633 dest[41] = (1*p1 + 7*p2) >> 3;
634 p1 = source[30];
635 dest[42] = (1*p2 + 1*p1) >> 1;
636 p2 = source[31];
637 dest[43] = (3*p1 + 1*p2) >> 2;
638 dest[44] = (1*p1 + 7*p2) >> 3;
639 p1 = source[32];
640 dest[45] = (3*p2 + 5*p1) >> 3;
641 p2 = source[33];
642 dest[46] = (5*p1 + 3*p2) >> 3;
643 p1 = source[34];
644 dest[47] = p2;
645 dest[48] = (1*p2 + 3*p1) >> 2;
646 p2 = source[35];
647 dest[49] = (1*p1 + 1*p2) >> 1;
648 p1 = source[36];
649 dest[50] = (7*p2 + 1*p1) >> 3;
650 dest[51] = (1*p2 + 7*p1) >> 3;
651 p2 = source[37];
652 dest[52] = (1*p1 + 1*p2) >> 1;
653 p1 = source[38];
654 dest[53] = (3*p2 + 1*p1) >> 2;
655 dest[54] = p1;
656 p2 = source[39];
657 dest[55] = (3*p1 + 5*p2) >> 3;
658 p1 = source[40];
659 dest[56] = (5*p2 + 3*p1) >> 3;
660 p2 = source[41];
661 dest[57] = (7*p1 + 1*p2) >> 3;
662 dest[58] = (1*p1 + 3*p2) >> 2;
663 p1 = source[42];
664 dest[59] = (1*p2 + 1*p1) >> 1;
665 p2 = source[43];
666 dest[60] = (7*p1 + 1*p2) >> 3;
667 dest[61] = (1*p1 + 7*p2) >> 3;
668 p1 = source[44];
669 dest[62] = (3*p2 + 5*p1) >> 3;
670 p2 = source[45];
671 dest[63] = (3*p1 + 1*p2) >> 2;
672 source += 45;
673 dest += 64;
674 }
675
676 if ((width += 64) <= 0) goto done;
677 *dest++ = source[0];
678 if (--width <= 0) goto done;
679 *dest++ = (1*source[0] + 3*source[1]) >> 2;
680 if (--width <= 0) goto done;
681 *dest++ = (5*source[1] + 3*source[2]) >> 3;
682 if (--width <= 0) goto done;
683 *dest++ = (7*source[2] + 1*source[3]) >> 3;
684 if (--width <= 0) goto done;
685 *dest++ = (1*source[2] + 3*source[3]) >> 2;
686 if (--width <= 0) goto done;
687 *dest++ = (1*source[3] + 1*source[4]) >> 1;
688 if (--width <= 0) goto done;
689 *dest++ = (3*source[4] + 1*source[5]) >> 2;
690 if (--width <= 0) goto done;
691 *dest++ = (1*source[4] + 7*source[5]) >> 3;
692 if (--width <= 0) goto done;
693 *dest++ = (3*source[5] + 5*source[6]) >> 3;
694 if (--width <= 0) goto done;
695 *dest++ = (5*source[6] + 3*source[7]) >> 3;
696 if (--width <= 0) goto done;
697 *dest++ = source[7];
698 if (--width <= 0) goto done;
699 *dest++ = (1*source[7] + 3*source[8]) >> 2;
700 if (--width <= 0) goto done;
701 *dest++ = (5*source[8] + 3*source[9]) >> 3;
702 if (--width <= 0) goto done;
703 *dest++ = (7*source[9] + 1*source[10]) >> 3;
704 if (--width <= 0) goto done;
705 *dest++ = (1*source[9] + 7*source[10]) >> 3;
706 if (--width <= 0) goto done;
707 *dest++ = (1*source[10] + 1*source[11]) >> 1;
708 if (--width <= 0) goto done;
709 *dest++ = (3*source[11] + 1*source[12]) >> 2;
710 if (--width <= 0) goto done;
711 *dest++ = source[12];
712 if (--width <= 0) goto done;
713 *dest++ = (3*source[12] + 5*source[13]) >> 3;
714 if (--width <= 0) goto done;
715 *dest++ = (5*source[13] + 3*source[14]) >> 3;
716 if (--width <= 0) goto done;
717 *dest++ = source[14];
718 if (--width <= 0) goto done;
719 *dest++ = (1*source[14] + 3*source[15]) >> 2;
720 if (--width <= 0) goto done;
721 *dest++ = (1*source[15] + 1*source[16]) >> 1;
722 if (--width <= 0) goto done;
723 *dest++ = (7*source[16] + 1*source[17]) >> 3;
724 if (--width <= 0) goto done;
725 *dest++ = (1*source[16] + 7*source[17]) >> 3;
726 if (--width <= 0) goto done;
727 *dest++ = (3*source[17] + 5*source[18]) >> 3;
728 if (--width <= 0) goto done;
729 *dest++ = (3*source[18] + 1*source[19]) >> 2;
730 if (--width <= 0) goto done;
731 *dest++ = source[19];
732 if (--width <= 0) goto done;
733 *dest++ = (3*source[19] + 5*source[20]) >> 3;
734 if (--width <= 0) goto done;
735 *dest++ = (5*source[20] + 3*source[21]) >> 3;
736 if (--width <= 0) goto done;
737 *dest++ = (7*source[21] + 1*source[22]) >> 3;
738 if (--width <= 0) goto done;
739 *dest++ = (1*source[21] + 3*source[22]) >> 2;
740 if (--width <= 0) goto done;
741 *dest++ = (1*source[22] + 1*source[23]) >> 1;
742 if (--width <= 0) goto done;
743 *dest++ = (3*source[23] + 1*source[24]) >> 2;
744 if (--width <= 0) goto done;
745 *dest++ = (1*source[23] + 7*source[24]) >> 3;
746 if (--width <= 0) goto done;
747 *dest++ = (3*source[24] + 5*source[25]) >> 3;
748 if (--width <= 0) goto done;
749 *dest++ = (3*source[25] + 1*source[26]) >> 2;
750 if (--width <= 0) goto done;
751 *dest++ = source[26];
752 if (--width <= 0) goto done;
753 *dest++ = (1*source[26] + 3*source[27]) >> 2;
754 if (--width <= 0) goto done;
755 *dest++ = (5*source[27] + 3*source[28]) >> 3;
756 if (--width <= 0) goto done;
757 *dest++ = (7*source[28] + 1*source[29]) >> 3;
758 if (--width <= 0) goto done;
759 *dest++ = (1*source[28] + 7*source[29]) >> 3;
760 if (--width <= 0) goto done;
761 *dest++ = (1*source[29] + 1*source[30]) >> 1;
762 if (--width <= 0) goto done;
763 *dest++ = (3*source[30] + 1*source[31]) >> 2;
764 if (--width <= 0) goto done;
765 *dest++ = (1*source[30] + 7*source[31]) >> 3;
766 if (--width <= 0) goto done;
767 *dest++ = (3*source[31] + 5*source[32]) >> 3;
768 if (--width <= 0) goto done;
769 *dest++ = (5*source[32] + 3*source[33]) >> 3;
770 if (--width <= 0) goto done;
771 *dest++ = source[33];
772 if (--width <= 0) goto done;
773 *dest++ = (1*source[33] + 3*source[34]) >> 2;
774 if (--width <= 0) goto done;
775 *dest++ = (1*source[34] + 1*source[35]) >> 1;
776 if (--width <= 0) goto done;
777 *dest++ = (7*source[35] + 1*source[36]) >> 3;
778 if (--width <= 0) goto done;
779 *dest++ = (1*source[35] + 7*source[36]) >> 3;
780 if (--width <= 0) goto done;
781 *dest++ = (1*source[36] + 1*source[37]) >> 1;
782 if (--width <= 0) goto done;
783 *dest++ = (3*source[37] + 1*source[38]) >> 2;
784 if (--width <= 0) goto done;
785 *dest++ = source[38];
786 if (--width <= 0) goto done;
787 *dest++ = (3*source[38] + 5*source[39]) >> 3;
788 if (--width <= 0) goto done;
789 *dest++ = (5*source[39] + 3*source[40]) >> 3;
790 if (--width <= 0) goto done;
791 *dest++ = (7*source[40] + 1*source[41]) >> 3;
792 if (--width <= 0) goto done;
793 *dest++ = (1*source[40] + 3*source[41]) >> 2;
794 if (--width <= 0) goto done;
795 *dest++ = (1*source[41] + 1*source[42]) >> 1;
796 if (--width <= 0) goto done;
797 *dest++ = (7*source[42] + 1*source[43]) >> 3;
798 if (--width <= 0) goto done;
799 *dest++ = (1*source[42] + 7*source[43]) >> 3;
800 if (--width <= 0) goto done;
801 *dest++ = (3*source[43] + 5*source[44]) >> 3;
802 done:
803
804 xine_profiler_stop_count(prof_scale_line);
805}
806
807
808/*
809 * Interpolates 16 output pixels from 9 source pixels using shifts.
810 * Useful for scaling a PAL mpeg2 dvd input source to 1280x1024 fullscreen
811 * (720 x 576 ==> 1280 x XXX)
812 */
813static void scale_line_9_16 (uint8_t *source, uint8_t *dest,
814 int width, int step) {
815
816 int p1, p2;
817
818 xine_profiler_start_count(prof_scale_line);
819
820 while ((width -= 16) >= 0) {
821 p1 = source[0];
822 p2 = source[1];
823 dest[0] = p1;
824 dest[1] = (1*p1 + 1*p2) >> 1;
825 p1 = source[2];
826 dest[2] = (7*p2 + 1*p1) >> 3;
827 dest[3] = (3*p2 + 5*p1) >> 3;
828 p2 = source[3];
829 dest[4] = (3*p1 + 1*p2) >> 2;
830 dest[5] = (1*p1 + 3*p2) >> 2;
831 p1 = source[4];
832 dest[6] = (5*p2 + 3*p1) >> 3;
833 dest[7] = (1*p2 + 7*p1) >> 3;
834 p2 = source[5];
835 dest[8] = (1*p1 + 1*p2) >> 1;
836 p1 = source[6];
837 dest[9] = p2;
838 dest[10] = (3*p2 + 5*p1) >> 3;
839 p2 = source[7];
840 dest[11] = (7*p1 + 1*p2) >> 3;
841 dest[12] = (1*p1 + 3*p2) >> 2;
842 p1 = source[8];
843 dest[13] = (3*p2 + 1*p1) >> 2;
844 dest[14] = (1*p2 + 7*p1) >> 3;
845 p2 = source[9];
846 dest[15] = (5*p1 + 3*p2) >> 3;
847 source += 9;
848 dest += 16;
849 }
850
851 if ((width += 16) <= 0) goto done;
852 *dest++ = source[0];
853 if (--width <= 0) goto done;
854 *dest++ = (1*source[0] + 1*source[1]) >> 1;
855 if (--width <= 0) goto done;
856 *dest++ = (7*source[1] + 1*source[2]) >> 3;
857 if (--width <= 0) goto done;
858 *dest++ = (3*source[1] + 5*source[2]) >> 3;
859 if (--width <= 0) goto done;
860 *dest++ = (3*source[2] + 1*source[3]) >> 2;
861 if (--width <= 0) goto done;
862 *dest++ = (1*source[2] + 3*source[3]) >> 2;
863 if (--width <= 0) goto done;
864 *dest++ = (5*source[3] + 3*source[4]) >> 3;
865 if (--width <= 0) goto done;
866 *dest++ = (1*source[3] + 7*source[4]) >> 3;
867 if (--width <= 0) goto done;
868 *dest++ = (1*source[4] + 1*source[5]) >> 1;
869 if (--width <= 0) goto done;
870 *dest++ = source[5];
871 if (--width <= 0) goto done;
872 *dest++ = (3*source[5] + 5*source[6]) >> 3;
873 if (--width <= 0) goto done;
874 *dest++ = (7*source[6] + 1*source[7]) >> 3;
875 if (--width <= 0) goto done;
876 *dest++ = (1*source[6] + 3*source[7]) >> 2;
877 if (--width <= 0) goto done;
878 *dest++ = (3*source[7] + 1*source[8]) >> 2;
879 if (--width <= 0) goto done;
880 *dest++ = (1*source[7] + 7*source[8]) >> 3;
881done:
882 xine_profiler_stop_count(prof_scale_line);
883}
884
885
886/*
887 * Interpolates 12 output pixels from 11 source pixels using shifts.
888 * Useful for scaling a PAL vcd input source to 4:3 display format.
889 */
890static void scale_line_11_12 (uint8_t *source, uint8_t *dest,
891 int width, int step) {
892
893 int p1, p2;
894
895 xine_profiler_start_count(prof_scale_line);
896
897 while ((width -= 12) >= 0) {
898 p1 = source[0];
899 p2 = source[1];
900 dest[0] = p1;
901 dest[1] = (1*p1 + 7*p2) >> 3;
902 p1 = source[2];
903 dest[2] = (1*p2 + 7*p1) >> 3;
904 p2 = source[3];
905 dest[3] = (1*p1 + 3*p2) >> 2;
906 p1 = source[4];
907 dest[4] = (3*p2 + 5*p1) >> 3;
908 p2 = source[5];
909 dest[5] = (3*p1 + 5*p2) >> 3;
910 p1 = source[6];
911 dest[6] = (1*p2 + 1*p1) >> 1;
912 p2 = source[7];
913 dest[7] = (5*p1 + 3*p2) >> 3;
914 p1 = source[8];
915 dest[8] = (5*p2 + 3*p1) >> 3;
916 p2 = source[9];
917 dest[9] = (3*p1 + 1*p2) >> 2;
918 p1 = source[10];
919 dest[10] = (7*p2 + 1*p1) >> 3;
920 p2 = source[11];
921 dest[11] = (7*p1 + 1*p2) >> 3;
922 source += 11;
923 dest += 12;
924 }
925
926 if ((width += 12) <= 0) goto done;
927 *dest++ = source[0];
928 if (--width <= 0) goto done;
929 *dest++ = (1*source[0] + 7*source[1]) >> 3;
930 if (--width <= 0) goto done;
931 *dest++ = (1*source[1] + 7*source[2]) >> 3;
932 if (--width <= 0) goto done;
933 *dest++ = (1*source[2] + 3*source[3]) >> 2;
934 if (--width <= 0) goto done;
935 *dest++ = (3*source[3] + 5*source[4]) >> 3;
936 if (--width <= 0) goto done;
937 *dest++ = (3*source[4] + 5*source[5]) >> 3;
938 if (--width <= 0) goto done;
939 *dest++ = (1*source[5] + 1*source[6]) >> 1;
940 if (--width <= 0) goto done;
941 *dest++ = (5*source[6] + 3*source[7]) >> 3;
942 if (--width <= 0) goto done;
943 *dest++ = (5*source[7] + 3*source[8]) >> 3;
944 if (--width <= 0) goto done;
945 *dest++ = (3*source[8] + 1*source[9]) >> 2;
946 if (--width <= 0) goto done;
947 *dest++ = (7*source[9] + 1*source[10]) >> 3;
948done:
949
950 xine_profiler_stop_count(prof_scale_line);
951}
952
953
954/*
955 * Interpolates 24 output pixels from 11 source pixels using shifts.
956 * Useful for scaling a PAL vcd input source to 4:3 display format
957 * at 2*zoom.
958 */
959static void scale_line_11_24 (uint8_t *source, uint8_t *dest,
960 int width, int step) {
961
962 int p1, p2;
963
964 xine_profiler_start_count(prof_scale_line);
965
966 while ((width -= 24) >= 0) {
967 p1 = source[0];
968 p2 = source[1];
969 dest[0] = p1;
970 dest[1] = (1*p1 + 1*p2) >> 1;
971 dest[2] = (1*p1 + 7*p2) >> 3;
972 p1 = source[2];
973 dest[3] = (5*p2 + 3*p1) >> 3;
974 dest[4] = (1*p2 + 7*p1) >> 3;
975 p2 = source[3];
976 dest[5] = (3*p1 + 1*p2) >> 2;
977 dest[6] = (1*p1 + 3*p2) >> 2;
978 p1 = source[4];
979 dest[7] = (3*p2 + 1*p1) >> 2;
980 dest[8] = (3*p2 + 5*p1) >> 3;
981 p2 = source[5];
982 dest[9] = (7*p1 + 1*p2) >> 3;
983 dest[10] = (3*p1 + 5*p2) >> 3;
984 p1 = source[6];
985 dest[11] = p2;
986 dest[12] = (1*p2 + 1*p1) >> 1;
987 dest[13] = p1;
988 p2 = source[7];
989 dest[14] = (5*p1 + 3*p2) >> 3;
990 dest[15] = (1*p1 + 7*p2) >> 3;
991 p1 = source[8];
992 dest[16] = (5*p2 + 3*p1) >> 3;
993 dest[17] = (1*p2 + 3*p1) >> 2;
994 p2 = source[9];
995 dest[18] = (3*p1 + 1*p2) >> 2;
996 dest[19] = (1*p1 + 3*p2) >> 2;
997 p1 = source[10];
998 dest[20] = (7*p2 + 1*p1) >> 3;
999 dest[21] = (3*p2 + 5*p1) >> 3;
1000 p2 = source[11];
1001 dest[22] = (7*p1 + 1*p2) >> 3;
1002 dest[23] = (1*p1 + 1*p2) >> 1;
1003 source += 11;
1004 dest += 24;
1005 }
1006
1007 if ((width += 24) <= 0) goto done;
1008 *dest++ = source[0];
1009 if (--width <= 0) goto done;
1010 *dest++ = (1*source[0] + 1*source[1]) >> 1;
1011 if (--width <= 0) goto done;
1012 *dest++ = (1*source[0] + 7*source[1]) >> 3;
1013 if (--width <= 0) goto done;
1014 *dest++ = (5*source[1] + 3*source[2]) >> 3;
1015 if (--width <= 0) goto done;
1016 *dest++ = (1*source[1] + 7*source[2]) >> 3;
1017 if (--width <= 0) goto done;
1018 *dest++ = (3*source[2] + 1*source[3]) >> 2;
1019 if (--width <= 0) goto done;
1020 *dest++ = (1*source[2] + 3*source[3]) >> 2;
1021 if (--width <= 0) goto done;
1022 *dest++ = (3*source[3] + 1*source[4]) >> 2;
1023 if (--width <= 0) goto done;
1024 *dest++ = (3*source[3] + 5*source[4]) >> 3;
1025 if (--width <= 0) goto done;
1026 *dest++ = (7*source[4] + 1*source[5]) >> 3;
1027 if (--width <= 0) goto done;
1028 *dest++ = (3*source[4] + 5*source[5]) >> 3;
1029 if (--width <= 0) goto done;
1030 *dest++ = source[5];
1031 if (--width <= 0) goto done;
1032 *dest++ = (1*source[5] + 1*source[6]) >> 1;
1033 if (--width <= 0) goto done;
1034 *dest++ = source[6];
1035 if (--width <= 0) goto done;
1036 *dest++ = (5*source[6] + 3*source[7]) >> 3;
1037 if (--width <= 0) goto done;
1038 *dest++ = (1*source[6] + 7*source[7]) >> 3;
1039 if (--width <= 0) goto done;
1040 *dest++ = (5*source[7] + 3*source[8]) >> 3;
1041 if (--width <= 0) goto done;
1042 *dest++ = (1*source[7] + 3*source[8]) >> 2;
1043 if (--width <= 0) goto done;
1044 *dest++ = (3*source[8] + 1*source[9]) >> 2;
1045 if (--width <= 0) goto done;
1046 *dest++ = (1*source[8] + 3*source[9]) >> 2;
1047 if (--width <= 0) goto done;
1048 *dest++ = (7*source[9] + 1*source[10]) >> 3;
1049 if (--width <= 0) goto done;
1050 *dest++ = (3*source[9] + 5*source[10]) >> 3;
1051 if (--width <= 0) goto done;
1052 *dest++ = (7*source[10] + 1*source[11]) >> 3;
1053done:
1054
1055 xine_profiler_stop_count(prof_scale_line);
1056}
1057
1058
1059/*
1060 * Interpolates 8 output pixels from 5 source pixels using shifts.
1061 * Useful for scaling a PAL svcd input source to 4:3 display format.
1062 */
1063static void scale_line_5_8 (uint8_t *source, uint8_t *dest,
1064 int width, int step) {
1065
1066 int p1, p2;
1067
1068 xine_profiler_start_count(prof_scale_line);
1069
1070 while ((width -= 8) >= 0) {
1071 p1 = source[0];
1072 p2 = source[1];
1073 dest[0] = p1;
1074 dest[1] = (3*p1 + 5*p2) >> 3;
1075 p1 = source[2];
1076 dest[2] = (3*p2 + 1*p1) >> 2;
1077 dest[3] = (1*p2 + 7*p1) >> 3;
1078 p2 = source[3];
1079 dest[4] = (1*p1 + 1*p2) >> 1;
1080 p1 = source[4];
1081 dest[5] = (7*p2 + 1*p1) >> 3;
1082 dest[6] = (1*p2 + 3*p1) >> 2;
1083 p2 = source[5];
1084 dest[7] = (5*p1 + 3*p2) >> 3;
1085 source += 5;
1086 dest += 8;
1087 }
1088
1089 if ((width += 8) <= 0) goto done;
1090 *dest++ = source[0];
1091 if (--width <= 0) goto done;
1092 *dest++ = (3*source[0] + 5*source[1]) >> 3;
1093 if (--width <= 0) goto done;
1094 *dest++ = (3*source[1] + 1*source[2]) >> 2;
1095 if (--width <= 0) goto done;
1096 *dest++ = (1*source[1] + 7*source[2]) >> 3;
1097 if (--width <= 0) goto done;
1098 *dest++ = (1*source[2] + 1*source[3]) >> 1;
1099 if (--width <= 0) goto done;
1100 *dest++ = (7*source[3] + 1*source[4]) >> 3;
1101 if (--width <= 0) goto done;
1102 *dest++ = (1*source[3] + 3*source[4]) >> 2;
1103done:
1104
1105 xine_profiler_stop_count(prof_scale_line);
1106}
1107
1108
1109/*
1110 * Interpolates 4 output pixels from 3 source pixels using shifts.
1111 * Useful for scaling a NTSC svcd input source to 4:3 display format.
1112 */
1113static void scale_line_3_4 (uint8_t *source, uint8_t *dest,
1114 int width, int step) {
1115
1116 int p1, p2;
1117
1118 xine_profiler_start_count(prof_scale_line);
1119
1120 while ((width -= 4) >= 0) {
1121 p1 = source[0];
1122 p2 = source[1];
1123 dest[0] = p1;
1124 dest[1] = (1*p1 + 3*p2) >> 2;
1125 p1 = source[2];
1126 dest[2] = (1*p2 + 1*p1) >> 1;
1127 p2 = source[3];
1128 dest[3] = (3*p1 + 1*p2) >> 2;
1129 source += 3;
1130 dest += 4;
1131 }
1132
1133 if ((width += 4) <= 0) goto done;
1134 *dest++ = source[0];
1135 if (--width <= 0) goto done;
1136 *dest++ = (1*source[0] + 3*source[1]) >> 2;
1137 if (--width <= 0) goto done;
1138 *dest++ = (1*source[1] + 1*source[2]) >> 1;
1139done:
1140
1141 xine_profiler_stop_count(prof_scale_line);
1142}
1143
1144
1145/* Interpolate 2 output pixels from one source pixel. */
1146
1147static void scale_line_1_2 (uint8_t *source, uint8_t *dest,
1148 int width, int step) {
1149 int p1, p2;
1150
1151 xine_profiler_start_count(prof_scale_line);
1152
1153 p1 = *source;
1154 while ((width -= 4) >= 0) {
1155 *dest++ = p1;
1156 p2 = *++source;
1157 *dest++ = (p1 + p2) >> 1;
1158 *dest++ = p2;
1159 p1 = *++source;
1160 *dest++ = (p2 + p1) >> 1;
1161 }
1162
1163 if ((width += 4) <= 0) goto done;
1164 *dest++ = source[0];
1165 if (--width <= 0) goto done;
1166 *dest++ = (source[0] + source[1]) >> 1;
1167 if (--width <= 0) goto done;
1168 *dest++ = source[1];
1169 done:
1170
1171 xine_profiler_stop_count(prof_scale_line);
1172}
1173
1174
1175/*
1176 * Scale line with no horizontal scaling. For NTSC mpeg2 dvd input in
1177 * 4:3 output format (720x480 -> 720x540)
1178 */
1179static void scale_line_1_1 (uint8_t *source, uint8_t *dest,
1180 int width, int step) {
1181
1182 xine_profiler_start_count(prof_scale_line);
1183 xine_fast_memcpy(dest, source, width);
1184 xine_profiler_stop_count(prof_scale_line);
1185}
1186
1187
1188static scale_line_func_t find_scale_line_func(int step) {
1189 static struct {
1190 int src_step;
1191 int dest_step;
1192 scale_line_func_tfunc;
1193 char *desc;
1194 } scale_line[] = {
1195 { 15, 16, scale_line_15_16, "dvd 4:3(pal)" },
1196 { 45, 64, scale_line_45_64, "dvd 16:9(pal), fullscreen(1024x768)" },
1197 { 9, 16, scale_line_9_16, "dvd fullscreen(1280x1024)" },
1198 { 45, 53, scale_line_45_53, "dvd 16:9(ntsc)" },
1199 { 11, 12, scale_line_11_12, "vcd 4:3(pal)" },
1200 { 11, 24, scale_line_11_24, "vcd 4:3(pal) 2*zoom" },
1201 { 5, 8, scale_line_5_8, "svcd 4:3(pal)" },
1202 { 3, 4, scale_line_3_4, "svcd 4:3(ntsc)" },
1203 { 1, 2, scale_line_1_2, "2*zoom" },
1204 { 1, 1, scale_line_1_1, "non-scaled" },
1205 };
1206 int i;
1207
1208 for (i = 0; i < sizeof(scale_line)/sizeof(scale_line[0]); i++) {
1209 if (step == scale_line[i].src_step*32768/scale_line[i].dest_step) {
1210 printf("yuv2rgb: using %s optimized scale_line\n", scale_line[i].desc);
1211 return scale_line[i].func;
1212 }
1213 }
1214 printf("yuv2rgb: using generic scale_line with interpolation\n");
1215 return scale_line_gen;
1216
1217}
1218
1219
1220static void scale_line_2 (uint8_t *source, uint8_t *dest,
1221 int width, int step) {
1222 int p1;
1223 int p2;
1224 int dx;
1225
1226 p1 = *source; source+=2;
1227 p2 = *source; source+=2;
1228 dx = 0;
1229
1230 while (width) {
1231
1232 *dest = (p1 * (32768 - dx) + p2 * dx) / 32768;
1233
1234 dx += step;
1235 while (dx > 32768) {
1236 dx -= 32768;
1237 p1 = p2;
1238 p2 = *source;
1239 source+=2;
1240 }
1241
1242 dest ++;
1243 width --;
1244 }
1245}
1246
1247static void scale_line_4 (uint8_t *source, uint8_t *dest,
1248 int width, int step) {
1249 int p1;
1250 int p2;
1251 int dx;
1252
1253 p1 = *source; source+=4;
1254 p2 = *source; source+=4;
1255 dx = 0;
1256
1257 while (width) {
1258
1259 *dest = (p1 * (32768 - dx) + p2 * dx) / 32768;
1260
1261 dx += step;
1262 while (dx > 32768) {
1263 dx -= 32768;
1264 p1 = p2;
1265 p2 = *source;
1266 source+=4;
1267 }
1268
1269 dest ++;
1270 width --;
1271 }
1272}
1273
1274
1275 #define RGB(i) \
1276 U = pu[i]; \
1277 V = pv[i]; \
1278 r = this->table_rV[V]; \
1279 g = (void *) (((uint8_t *)this->table_gU[U]) + this->table_gV[V]);\
1280 b = this->table_bU[U];
1281
1282 #define DST1(i) \
1283 Y = py_1[2*i]; \
1284 dst_1[2*i] = r[Y] + g[Y] + b[Y];\
1285 Y = py_1[2*i+1]; \
1286 dst_1[2*i+1] = r[Y] + g[Y] + b[Y];
1287
1288 #define DST2(i) \
1289 Y = py_2[2*i]; \
1290 dst_2[2*i] = r[Y] + g[Y] + b[Y];\
1291 Y = py_2[2*i+1]; \
1292 dst_2[2*i+1] = r[Y] + g[Y] + b[Y];
1293
1294 #define DST1RGB(i) \
1295 Y = py_1[2*i]; \
1296 dst_1[6*i] = r[Y]; dst_1[6*i+1] = g[Y]; dst_1[6*i+2] = b[Y];\
1297 Y = py_1[2*i+1]; \
1298 dst_1[6*i+3] = r[Y]; dst_1[6*i+4] = g[Y]; dst_1[6*i+5] = b[Y];
1299
1300 #define DST2RGB(i) \
1301 Y = py_2[2*i]; \
1302 dst_2[6*i] = r[Y]; dst_2[6*i+1] = g[Y]; dst_2[6*i+2] = b[Y];\
1303 Y = py_2[2*i+1]; \
1304 dst_2[6*i+3] = r[Y]; dst_2[6*i+4] = g[Y]; dst_2[6*i+5] = b[Y];
1305
1306 #define DST1BGR(i) \
1307 Y = py_1[2*i]; \
1308 dst_1[6*i] = b[Y]; dst_1[6*i+1] = g[Y]; dst_1[6*i+2] = r[Y];\
1309 Y = py_1[2*i+1]; \
1310 dst_1[6*i+3] = b[Y]; dst_1[6*i+4] = g[Y]; dst_1[6*i+5] = r[Y];
1311
1312 #define DST2BGR(i) \
1313 Y = py_2[2*i]; \
1314 dst_2[6*i] = b[Y]; dst_2[6*i+1] = g[Y]; dst_2[6*i+2] = r[Y];\
1315 Y = py_2[2*i+1]; \
1316 dst_2[6*i+3] = b[Y]; dst_2[6*i+4] = g[Y]; dst_2[6*i+5] = r[Y];
1317
1318 #define DST1CMAP(i) \
1319 Y = py_1[2*i]; \
1320 dst_1[2*i] = this->cmap[r[Y] + g[Y] + b[Y]]; \
1321 Y = py_1[2*i+1]; \
1322 dst_1[2*i+1] = this->cmap[r[Y] + g[Y] + b[Y]];
1323
1324 #define DST2CMAP(i) \
1325 Y = py_2[2*i]; \
1326 dst_2[2*i] = this->cmap[r[Y] + g[Y] + b[Y]]; \
1327 Y = py_2[2*i+1]; \
1328 dst_2[2*i+1] = this->cmap[r[Y] + g[Y] + b[Y]];
1329
1330static void yuv2rgb_c_32 (yuv2rgb_t *this, uint8_t * _dst,
1331 uint8_t * _py, uint8_t * _pu, uint8_t * _pv)
1332{
1333 int U, V, Y;
1334 uint8_t * py_1, * py_2, * pu, * pv;
1335 uint32_t * r, * g, * b;
1336 uint32_t * dst_1, * dst_2;
1337 int width, height, dst_height;
1338 int dy;
1339
1340 if (this->do_scale) {
1341 scale_line_func_t scale_line = this->scale_line;
1342
1343 scale_line (_pu, this->u_buffer,
1344 this->dest_width >> 1, this->step_dx);
1345 scale_line (_pv, this->v_buffer,
1346 this->dest_width >> 1, this->step_dx);
1347 scale_line (_py, this->y_buffer,
1348 this->dest_width, this->step_dx);
1349
1350 dy = 0;
1351 dst_height = this->dest_height;
1352
1353 for (height = 0;; ) {
1354 dst_1 = (uint32_t*)_dst;
1355 py_1 = this->y_buffer;
1356 pu = this->u_buffer;
1357 pv = this->v_buffer;
1358
1359 width = this->dest_width >> 3;
1360
1361 do {
1362 RGB(0);
1363 DST1(0);
1364
1365 RGB(1);
1366 DST1(1);
1367
1368 RGB(2);
1369 DST1(2);
1370
1371 RGB(3);
1372 DST1(3);
1373
1374 pu += 4;
1375 pv += 4;
1376 py_1 += 8;
1377 dst_1 += 8;
1378 } while (--width);
1379
1380 dy += this->step_dy;
1381 _dst += this->rgb_stride;
1382
1383 while (--dst_height > 0 && dy < 32768) {
1384
1385 xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*4);
1386
1387 dy += this->step_dy;
1388 _dst += this->rgb_stride;
1389 }
1390
1391 if (dst_height <= 0)
1392 break;
1393
1394 do {
1395 dy -= 32768;
1396 _py += this->y_stride;
1397
1398 scale_line (_py, this->y_buffer,
1399 this->dest_width, this->step_dx);
1400
1401 if (height & 1) {
1402 _pu += this->uv_stride;
1403 _pv += this->uv_stride;
1404
1405 scale_line (_pu, this->u_buffer,
1406 this->dest_width >> 1, this->step_dx);
1407 scale_line (_pv, this->v_buffer,
1408 this->dest_width >> 1, this->step_dx);
1409
1410 }
1411 height++;
1412 } while( dy>=32768);
1413 }
1414 } else {
1415 height = this->source_height >> 1;
1416 do {
1417 dst_1 = (uint32_t*)_dst;
1418 dst_2 = (void*)( (uint8_t *)_dst + this->rgb_stride );
1419 py_1 = _py;
1420 py_2 = _py + this->y_stride;
1421 pu = _pu;
1422 pv = _pv;
1423
1424 width = this->source_width >> 3;
1425 do {
1426 RGB(0);
1427 DST1(0);
1428 DST2(0);
1429
1430 RGB(1);
1431 DST2(1);
1432 DST1(1);
1433
1434 RGB(2);
1435 DST1(2);
1436 DST2(2);
1437
1438 RGB(3);
1439 DST2(3);
1440 DST1(3);
1441
1442 pu += 4;
1443 pv += 4;
1444 py_1 += 8;
1445 py_2 += 8;
1446 dst_1 += 8;
1447 dst_2 += 8;
1448 } while (--width);
1449
1450 _dst += 2 * this->rgb_stride;
1451 _py += 2 * this->y_stride;
1452 _pu += this->uv_stride;
1453 _pv += this->uv_stride;
1454
1455 } while (--height);
1456 }
1457}
1458
1459/* This is very near from the yuv2rgb_c_32 code */
1460static void yuv2rgb_c_24_rgb (yuv2rgb_t *this, uint8_t * _dst,
1461 uint8_t * _py, uint8_t * _pu, uint8_t * _pv)
1462{
1463 int U, V, Y;
1464 uint8_t * py_1, * py_2, * pu, * pv;
1465 uint8_t * r, * g, * b;
1466 uint8_t * dst_1, * dst_2;
1467 int width, height, dst_height;
1468 int dy;
1469
1470 if (this->do_scale) {
1471
1472 scale_line_func_t scale_line = this->scale_line;
1473
1474 scale_line (_pu, this->u_buffer,
1475 this->dest_width >> 1, this->step_dx);
1476 scale_line (_pv, this->v_buffer,
1477 this->dest_width >> 1, this->step_dx);
1478 scale_line (_py, this->y_buffer,
1479 this->dest_width, this->step_dx);
1480
1481 dy = 0;
1482 dst_height = this->dest_height;
1483
1484 for (height = 0;; ) {
1485 dst_1 = _dst;
1486 py_1 = this->y_buffer;
1487 pu = this->u_buffer;
1488 pv = this->v_buffer;
1489
1490 width = this->dest_width >> 3;
1491
1492 do {
1493 RGB(0);
1494 DST1RGB(0);
1495
1496 RGB(1);
1497 DST1RGB(1);
1498
1499 RGB(2);
1500 DST1RGB(2);
1501
1502 RGB(3);
1503 DST1RGB(3);
1504
1505 pu += 4;
1506 pv += 4;
1507 py_1 += 8;
1508 dst_1 += 24;
1509 } while (--width);
1510
1511 dy += this->step_dy;
1512 _dst += this->rgb_stride;
1513
1514 while (--dst_height > 0 && dy < 32768) {
1515
1516 xine_fast_memcpy (_dst, _dst-this->rgb_stride, this->dest_width*3);
1517
1518 dy += this->step_dy;
1519 _dst += this->rgb_stride;
1520 }
1521
1522 if (dst_height <= 0)
1523 break;
1524
1525 do {
1526 dy -= 32768;
1527 _py += this->y_stride;
1528
1529 scale_line (_py, this->y_buffer,
1530 this->dest_width, this->step_dx);
1531
1532 if (height & 1) {
1533 _pu += this->uv_stride;
1534 _pv += this->uv_stride;
1535
1536 scale_line (_pu, this->u_buffer,
1537 this->dest_width >> 1, this->step_dx);
1538 scale_line (_pv, this->v_buffer,
1539 this->dest_width >> 1, this->step_dx);
1540
1541 }
1542 height++;
1543 } while (dy>=32768);
1544 }
1545 } else {
1546 height = this->source_height >> 1;
1547 do {
1548 dst_1 = _dst;
1549 dst_2 = (void*)( (uint8_t *)_dst + this->rgb_stride );
1550 py_1 = _py;
1551 py_2 = _py + this->y_stride;
1552 pu = _pu;
1553 pv = _pv;
1554
1555 width = this->source_width >> 3;
1556 do {
1557 RGB(0);
1558 DST1RGB(0);
1559 DST2RGB(0);
1560
1561 RGB(1);
1562 DST2RGB(1);
1563 DST1RGB(1);
1564
1565 RGB(2);
1566 DST1RGB(2);
1567 DST2RGB(2);
1568
1569 RGB(3);
1570 DST2RGB(3);
1571 DST1RGB(3);
1572
1573 pu += 4;
1574 pv += 4;
1575 py_1 += 8;
1576 py_2 += 8;
1577 dst_1 += 24;
1578 dst_2 += 24;
1579 } while (--width);
1580
1581 _dst += 2 * this->rgb_stride;
1582 _py += 2 * this->y_stride;
1583 _pu += this->uv_stride;
1584 _pv += this->uv_stride;
1585
1586 } while (--height);
1587 }
1588}
1589
1590/* only trivial mods from yuv2rgb_c_24_rgb */
1591static void yuv2rgb_c_24_bgr (yuv2rgb_t *this, uint8_t * _dst,
1592 uint8_t * _py, uint8_t * _pu, uint8_t * _pv)
1593{
1594 int U, V, Y;
1595 uint8_t * py_1, * py_2, * pu, * pv;
1596 uint8_t * r, * g, * b;
1597 uint8_t * dst_1, * dst_2;
1598 int width, height, dst_height;
1599 int dy;
1600
1601 if (this->do_scale) {
1602
1603 scale_line_func_t scale_line = this->scale_line;
1604
1605 scale_line (_pu, this->u_buffer,
1606 this->dest_width >> 1, this->step_dx);
1607 scale_line (_pv, this->v_buffer,
1608 this->dest_width >> 1, this->step_dx);
1609 scale_line (_py, this->y_buffer,
1610 this->dest_width, this->step_dx);
1611
1612 dy = 0;
1613 dst_height = this->dest_height;
1614
1615 for (height = 0;; ) {
1616 dst_1 = _dst;
1617 py_1 = this->y_buffer;
1618 pu = this->u_buffer;
1619 pv = this->v_buffer;
1620
1621 width = this->dest_width >> 3;
1622
1623 do {
1624 RGB(0);
1625 DST1BGR(0);
1626
1627 RGB(1);
1628 DST1BGR(1);
1629
1630 RGB(2);
1631 DST1BGR(2);
1632
1633 RGB(3);
1634 DST1BGR(3);
1635
1636 pu += 4;
1637 pv += 4;
1638 py_1 += 8;
1639 dst_1 += 24;
1640 } while (--width);
1641
1642 dy += this->step_dy;
1643 _dst += this->rgb_stride;
1644
1645 while (--dst_height > 0 && dy < 32768) {
1646
1647 xine_fast_memcpy (_dst, _dst-this->rgb_stride, this->dest_width*3);
1648
1649 dy += this->step_dy;
1650 _dst += this->rgb_stride;
1651 }
1652
1653 if (dst_height <= 0)
1654 break;
1655
1656 do {
1657 dy -= 32768;
1658 _py += this->y_stride;
1659
1660 scale_line (_py, this->y_buffer,
1661 this->dest_width, this->step_dx);
1662
1663 if (height & 1) {
1664 _pu += this->uv_stride;
1665 _pv += this->uv_stride;
1666
1667 scale_line (_pu, this->u_buffer,
1668 this->dest_width >> 1, this->step_dx);
1669 scale_line (_pv, this->v_buffer,
1670 this->dest_width >> 1, this->step_dx);
1671
1672 }
1673 height++;
1674 } while( dy>=32768 );
1675 }
1676
1677 } else {
1678 height = this->source_height >> 1;
1679 do {
1680 dst_1 = _dst;
1681 dst_2 = (void*)( (uint8_t *)_dst + this->rgb_stride );
1682 py_1 = _py;
1683 py_2 = _py + this->y_stride;
1684 pu = _pu;
1685 pv = _pv;
1686 width = this->source_width >> 3;
1687 do {
1688 RGB(0);
1689 DST1BGR(0);
1690 DST2BGR(0);
1691
1692 RGB(1);
1693 DST2BGR(1);
1694 DST1BGR(1);
1695
1696 RGB(2);
1697 DST1BGR(2);
1698 DST2BGR(2);
1699
1700 RGB(3);
1701 DST2BGR(3);
1702 DST1BGR(3);
1703
1704 pu += 4;
1705 pv += 4;
1706 py_1 += 8;
1707 py_2 += 8;
1708 dst_1 += 24;
1709 dst_2 += 24;
1710 } while (--width);
1711
1712 _dst += 2 * this->rgb_stride;
1713 _py += 2 * this->y_stride;
1714 _pu += this->uv_stride;
1715 _pv += this->uv_stride;
1716
1717 } while (--height);
1718 }
1719}
1720
1721/* This is exactly the same code as yuv2rgb_c_32 except for the types of */
1722/* r, g, b, dst_1, dst_2 */
1723static void yuv2rgb_c_16 (yuv2rgb_t *this, uint8_t * _dst,
1724 uint8_t * _py, uint8_t * _pu, uint8_t * _pv)
1725{
1726 int U, V, Y;
1727 uint8_t * py_1, * py_2, * pu, * pv;
1728 uint16_t * r, * g, * b;
1729 uint16_t * dst_1, * dst_2;
1730 int width, height, dst_height;
1731 int dy;
1732
1733 if (this->do_scale) {
1734 scale_line_func_t scale_line = this->scale_line;
1735
1736 scale_line (_pu, this->u_buffer,
1737 this->dest_width >> 1, this->step_dx);
1738 scale_line (_pv, this->v_buffer,
1739 this->dest_width >> 1, this->step_dx);
1740 scale_line (_py, this->y_buffer,
1741 this->dest_width, this->step_dx);
1742
1743 dy = 0;
1744 dst_height = this->dest_height;
1745
1746 for (height = 0;; ) {
1747 dst_1 = (uint16_t*)_dst;
1748 py_1 = this->y_buffer;
1749 pu = this->u_buffer;
1750 pv = this->v_buffer;
1751
1752 width = this->dest_width >> 3;
1753
1754 do {
1755 RGB(0);
1756 DST1(0);
1757
1758 RGB(1);
1759 DST1(1);
1760
1761 RGB(2);
1762 DST1(2);
1763
1764 RGB(3);
1765 DST1(3);
1766
1767 pu += 4;
1768 pv += 4;
1769 py_1 += 8;
1770 dst_1 += 8;
1771 } while (--width);
1772
1773 dy += this->step_dy;
1774 _dst += this->rgb_stride;
1775
1776 while (--dst_height > 0 && dy < 32768) {
1777
1778 xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2);
1779
1780 dy += this->step_dy;
1781 _dst += this->rgb_stride;
1782 }
1783
1784 if (dst_height <= 0)
1785 break;
1786
1787 do {
1788 dy -= 32768;
1789 _py += this->y_stride;
1790
1791 scale_line (_py, this->y_buffer,
1792 this->dest_width, this->step_dx);
1793
1794 if (height & 1) {
1795 _pu += this->uv_stride;
1796 _pv += this->uv_stride;
1797
1798 scale_line (_pu, this->u_buffer,
1799 this->dest_width >> 1, this->step_dx);
1800 scale_line (_pv, this->v_buffer,
1801 this->dest_width >> 1, this->step_dx);
1802
1803 }
1804 height++;
1805 } while( dy>=32768);
1806 }
1807 } else {
1808 height = this->source_height >> 1;
1809 do {
1810 dst_1 = (uint16_t*)_dst;
1811 dst_2 = (void*)( (uint8_t *)_dst + this->rgb_stride );
1812 py_1 = _py;
1813 py_2 = _py + this->y_stride;
1814 pu = _pu;
1815 pv = _pv;
1816 width = this->source_width >> 3;
1817 do {
1818 RGB(0);
1819 DST1(0);
1820 DST2(0);
1821
1822 RGB(1);
1823 DST2(1);
1824 DST1(1);
1825
1826 RGB(2);
1827 DST1(2);
1828 DST2(2);
1829
1830 RGB(3);
1831 DST2(3);
1832 DST1(3);
1833
1834 pu += 4;
1835 pv += 4;
1836 py_1 += 8;
1837 py_2 += 8;
1838 dst_1 += 8;
1839 dst_2 += 8;
1840 } while (--width);
1841
1842 _dst += 2 * this->rgb_stride;
1843 _py += 2 * this->y_stride;
1844 _pu += this->uv_stride;
1845 _pv += this->uv_stride;
1846
1847 } while (--height);
1848 }
1849}
1850
1851/* This is exactly the same code as yuv2rgb_c_32 except for the types of */
1852/* r, g, b, dst_1, dst_2 */
1853static void yuv2rgb_c_8 (yuv2rgb_t *this, uint8_t * _dst,
1854 uint8_t * _py, uint8_t * _pu, uint8_t * _pv)
1855{
1856 int U, V, Y;
1857 uint8_t * py_1, * py_2, * pu, * pv;
1858 uint8_t * r, * g, * b;
1859 uint8_t * dst_1, * dst_2;
1860 int width, height, dst_height;
1861 int dy;
1862
1863 if (this->do_scale) {
1864 scale_line_func_t scale_line = this->scale_line;
1865
1866 scale_line (_pu, this->u_buffer,
1867 this->dest_width >> 1, this->step_dx);
1868 scale_line (_pv, this->v_buffer,
1869 this->dest_width >> 1, this->step_dx);
1870 scale_line (_py, this->y_buffer,
1871 this->dest_width, this->step_dx);
1872
1873 dy = 0;
1874 dst_height = this->dest_height;
1875
1876 for (height = 0;; ) {
1877 dst_1 = (uint8_t*)_dst;
1878 py_1 = this->y_buffer;
1879 pu = this->u_buffer;
1880 pv = this->v_buffer;
1881
1882 width = this->dest_width >> 3;
1883
1884 do {
1885 RGB(0);
1886 DST1(0);
1887
1888 RGB(1);
1889 DST1(1);
1890
1891 RGB(2);
1892 DST1(2);
1893
1894 RGB(3);
1895 DST1(3);
1896
1897 pu += 4;
1898 pv += 4;
1899 py_1 += 8;
1900 dst_1 += 8;
1901 } while (--width);
1902
1903 dy += this->step_dy;
1904 _dst += this->rgb_stride;
1905
1906 while (--dst_height > 0 && dy < 32768) {
1907
1908 xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width);
1909
1910 dy += this->step_dy;
1911 _dst += this->rgb_stride;
1912 }
1913
1914 if (dst_height <= 0)
1915 break;
1916
1917 do {
1918 dy -= 32768;
1919 _py += this->y_stride;
1920
1921 scale_line (_py, this->y_buffer,
1922 this->dest_width, this->step_dx);
1923
1924 if (height & 1) {
1925 _pu += this->uv_stride;
1926 _pv += this->uv_stride;
1927
1928 scale_line (_pu, this->u_buffer,
1929 this->dest_width >> 1, this->step_dx);
1930 scale_line (_pv, this->v_buffer,
1931 this->dest_width >> 1, this->step_dx);
1932
1933 }
1934 height++;
1935 } while( dy>=32768 );
1936 }
1937 } else {
1938 height = this->source_height >> 1;
1939 do {
1940 dst_1 = (uint8_t*)_dst;
1941 dst_2 = (void*)( (uint8_t *)_dst + this->rgb_stride );
1942 py_1 = _py;
1943 py_2 = _py + this->y_stride;
1944 pu = _pu;
1945 pv = _pv;
1946
1947 width = this->source_width >> 3;
1948 do {
1949 RGB(0);
1950 DST1(0);
1951 DST2(0);
1952
1953 RGB(1);
1954 DST2(1);
1955 DST1(1);
1956
1957 RGB(2);
1958 DST1(2);
1959 DST2(2);
1960
1961 RGB(3);
1962 DST2(3);
1963 DST1(3);
1964
1965 pu += 4;
1966 pv += 4;
1967 py_1 += 8;
1968 py_2 += 8;
1969 dst_1 += 8;
1970 dst_2 += 8;
1971 } while (--width);
1972
1973 _dst += 2 * this->rgb_stride;
1974 _py += 2 * this->y_stride;
1975 _pu += this->uv_stride;
1976 _pv += this->uv_stride;
1977
1978 } while (--height);
1979 }
1980}
1981
1982/* now for something different: 256 grayscale mode */
1983static void yuv2rgb_c_gray (yuv2rgb_t *this, uint8_t * _dst,
1984 uint8_t * _py, uint8_t * _pu, uint8_t * _pv)
1985{
1986 int height, dst_height;
1987 int dy;
1988
1989 if (this->do_scale) {
1990 scale_line_func_t scale_line = this->scale_line;
1991
1992 dy = 0;
1993 dst_height = this->dest_height;
1994
1995 for (;;) {
1996 scale_line (_py, _dst, this->dest_width, this->step_dx);
1997
1998 dy += this->step_dy;
1999 _dst += this->rgb_stride;
2000
2001 while (--dst_height > 0 && dy < 32768) {
2002
2003 xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width);
2004
2005 dy += this->step_dy;
2006 _dst += this->rgb_stride;
2007 }
2008
2009 if (dst_height <= 0)
2010 break;
2011
2012 _py += this->y_stride*(dy>>15);
2013 dy &= 32767;
2014 /* dy -= 32768;
2015 _py += this->y_stride;
2016 */
2017 }
2018 } else {
2019 for (height = this->source_height; --height >= 0; ) {
2020 xine_fast_memcpy(_dst, _py, this->dest_width);
2021 _dst += this->rgb_stride;
2022 _py += this->y_stride;
2023 }
2024 }
2025}
2026
2027/* now for something different: 256 color mode */
2028static void yuv2rgb_c_palette (yuv2rgb_t *this, uint8_t * _dst,
2029 uint8_t * _py, uint8_t * _pu, uint8_t * _pv)
2030{
2031 int U, V, Y;
2032 uint8_t * py_1, * py_2, * pu, * pv;
2033 uint16_t * r, * g, * b;
2034 uint8_t * dst_1, * dst_2;
2035 int width, height, dst_height;
2036 int dy;
2037
2038 if (this->do_scale) {
2039 scale_line_func_t scale_line = this->scale_line;
2040
2041 scale_line (_pu, this->u_buffer,
2042 this->dest_width >> 1, this->step_dx);
2043 scale_line (_pv, this->v_buffer,
2044 this->dest_width >> 1, this->step_dx);
2045 scale_line (_py, this->y_buffer,
2046 this->dest_width, this->step_dx);
2047
2048 dy = 0;
2049 dst_height = this->dest_height;
2050
2051 for (height = 0;; ) {
2052 dst_1 = _dst;
2053 py_1 = this->y_buffer;
2054 pu = this->u_buffer;
2055 pv = this->v_buffer;
2056
2057 width = this->dest_width >> 3;
2058
2059 do {
2060 RGB(0);
2061 DST1CMAP(0);
2062
2063 RGB(1);
2064 DST1CMAP(1);
2065
2066 RGB(2);
2067 DST1CMAP(2);
2068
2069 RGB(3);
2070 DST1CMAP(3);
2071
2072 pu += 4;
2073 pv += 4;
2074 py_1 += 8;
2075 dst_1 += 8;
2076 } while (--width);
2077
2078 dy += this->step_dy;
2079 _dst += this->rgb_stride;
2080
2081 while (--dst_height > 0 && dy < 32768) {
2082
2083 xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width);
2084
2085 dy += this->step_dy;
2086 _dst += this->rgb_stride;
2087 }
2088
2089 if (dst_height <= 0)
2090 break;
2091
2092 do {
2093 dy -= 32768;
2094 _py += this->y_stride;
2095
2096 scale_line (_py, this->y_buffer,
2097 this->dest_width, this->step_dx);
2098
2099 if (height & 1) {
2100 _pu += this->uv_stride;
2101 _pv += this->uv_stride;
2102
2103 scale_line (_pu, this->u_buffer,
2104 this->dest_width >> 1, this->step_dx);
2105 scale_line (_pv, this->v_buffer,
2106 this->dest_width >> 1, this->step_dx);
2107
2108 }
2109 height++;
2110 } while( dy>=32768 );
2111 }
2112 } else {
2113 height = this->source_height >> 1;
2114 do {
2115 dst_1 = _dst;
2116 dst_2 = _dst + this->rgb_stride;
2117 py_1 = _py;
2118 py_2 = _py + this->y_stride;
2119 pu = _pu;
2120 pv = _pv;
2121 width = this->source_width >> 3;
2122 do {
2123 RGB(0);
2124 DST1CMAP(0);
2125 DST2CMAP(0);
2126
2127 RGB(1);
2128 DST2CMAP(1);
2129 DST1CMAP(1);
2130
2131 RGB(2);
2132 DST1CMAP(2);
2133 DST2CMAP(2);
2134
2135 RGB(3);
2136 DST2CMAP(3);
2137 DST1CMAP(3);
2138
2139 pu += 4;
2140 pv += 4;
2141 py_1 += 8;
2142 py_2 += 8;
2143 dst_1 += 8;
2144 dst_2 += 8;
2145 } while (--width);
2146
2147 _dst += 2 * this->rgb_stride;
2148 _py += 2 * this->y_stride;
2149 _pu += this->uv_stride;
2150 _pv += this->uv_stride;
2151
2152 } while (--height);
2153 }
2154}
2155
2156static int div_round (int dividend, int divisor)
2157{
2158 if (dividend > 0)
2159 return (dividend + (divisor>>1)) / divisor;
2160 else
2161 return -((-dividend + (divisor>>1)) / divisor);
2162}
2163
2164static void yuv2rgb_setup_tables (yuv2rgb_factory_t *this, int mode, int swapped)
2165{
2166 int i;
2167 uint8_t table_Y[1024];
2168 uint32_t * table_32 = 0;
2169 uint16_t * table_16 = 0;
2170 uint8_t * table_8 = 0;
2171 int entry_size = 0;
2172 void *table_r = 0, *table_g = 0, *table_b = 0;
2173 int shift_r = 0, shift_g = 0, shift_b = 0;
2174
2175 int crv = Inverse_Table_6_9[this->matrix_coefficients][0];
2176 int cbu = Inverse_Table_6_9[this->matrix_coefficients][1];
2177 int cgu = -Inverse_Table_6_9[this->matrix_coefficients][2];
2178 int cgv = -Inverse_Table_6_9[this->matrix_coefficients][3];
2179
2180 for (i = 0; i < 1024; i++) {
2181 int j;
2182
2183 j = (76309 * (i - 384 - 16) + 32768) >> 16;
2184 j = (j < 0) ? 0 : ((j > 255) ? 255 : j);
2185 table_Y[i] = j;
2186 }
2187
2188 switch (mode) {
2189 case MODE_32_RGB:
2190 case MODE_32_BGR:
2191 table_32 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint32_t));
2192
2193 entry_size = sizeof (uint32_t);
2194 table_r = table_32 + 197;
2195 table_b = table_32 + 197 + 685;
2196 table_g = table_32 + 197 + 2*682;
2197
2198 if (swapped) {
2199 switch (mode) {
2200 case MODE_32_RGB: shift_r = 8; shift_g = 16; shift_b = 24; break;
2201 case MODE_32_BGR:shift_r = 24; shift_g = 16; shift_b = 8; break;
2202 }
2203 } else {
2204 switch (mode) {
2205 case MODE_32_RGB:shift_r = 16; shift_g = 8; shift_b = 0; break;
2206 case MODE_32_BGR:shift_r = 0; shift_g = 8; shift_b = 16; break;
2207 }
2208 }
2209
2210 for (i = -197; i < 256+197; i++)
2211 ((uint32_t *) table_r)[i] = table_Y[i+384] << shift_r;
2212 for (i = -132; i < 256+132; i++)
2213 ((uint32_t *) table_g)[i] = table_Y[i+384] << shift_g;
2214 for (i = -232; i < 256+232; i++)
2215 ((uint32_t *) table_b)[i] = table_Y[i+384] << shift_b;
2216 break;
2217
2218 case MODE_24_RGB:
2219 case MODE_24_BGR:
2220 table_8 = malloc ((256 + 2*232) * sizeof (uint8_t));
2221
2222 entry_size = sizeof (uint8_t);
2223 table_r = table_g = table_b = table_8 + 232;
2224
2225 for (i = -232; i < 256+232; i++)
2226 ((uint8_t * )table_b)[i] = table_Y[i+384];
2227 break;
2228
2229 case MODE_15_BGR:
2230 case MODE_16_BGR:
2231 case MODE_15_RGB:
2232 case MODE_16_RGB:
2233 table_16 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint16_t));
2234
2235 entry_size = sizeof (uint16_t);
2236 table_r = table_16 + 197;
2237 table_b = table_16 + 197 + 685;
2238 table_g = table_16 + 197 + 2*682;
2239
2240 if (swapped) {
2241 switch (mode) {
2242 case MODE_15_BGR: shift_r = 8; shift_g = 5; shift_b = 2; break;
2243 case MODE_16_BGR:shift_r = 8; shift_g = 5; shift_b = 3; break;
2244 case MODE_15_RGB:shift_r = 2; shift_g = 5; shift_b = 8; break;
2245 case MODE_16_RGB:shift_r = 3; shift_g = 5; shift_b = 8; break;
2246 }
2247 } else {
2248 switch (mode) {
2249 case MODE_15_BGR:shift_r = 0; shift_g = 5; shift_b = 10; break;
2250 case MODE_16_BGR:shift_r = 0; shift_g = 5; shift_b = 11; break;
2251 case MODE_15_RGB:shift_r = 10; shift_g = 5; shift_b = 0; break;
2252 case MODE_16_RGB:shift_r = 11; shift_g = 5; shift_b = 0; break;
2253 }
2254 }
2255
2256 for (i = -197; i < 256+197; i++)
2257 ((uint16_t *)table_r)[i] = (table_Y[i+384] >> 3) << shift_r;
2258
2259 for (i = -132; i < 256+132; i++) {
2260 int j = table_Y[i+384] >> (((mode==MODE_16_RGB) || (mode==MODE_16_BGR)) ? 2 : 3);
2261 if (swapped)
2262 ((uint16_t *)table_g)[i] = (j&7) << 13 | (j>>3);
2263 else
2264 ((uint16_t *)table_g)[i] = j << 5;
2265 }
2266 for (i = -232; i < 256+232; i++)
2267 ((uint16_t *)table_b)[i] = (table_Y[i+384] >> 3) << shift_b;
2268
2269 break;
2270
2271 case MODE_8_RGB:
2272 case MODE_8_BGR:
2273 table_8 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint8_t));
2274
2275 entry_size = sizeof (uint8_t);
2276 table_r = table_8 + 197;
2277 table_b = table_8 + 197 + 685;
2278 table_g = table_8 + 197 + 2*682;
2279
2280 switch (mode) {
2281 case MODE_8_RGB: shift_r = 5; shift_g = 2; shift_b = 0; break;
2282 case MODE_8_BGR: shift_r = 0; shift_g = 3; shift_b = 6; break;
2283 }
2284
2285 for (i = -197; i < 256+197; i++)
2286 ((uint8_t *) table_r)[i] = (table_Y[i+384] >> 5) << shift_r;
2287 for (i = -132; i < 256+132; i++)
2288 ((uint8_t *) table_g)[i] = (table_Y[i+384] >> 5) << shift_g;
2289 for (i = -232; i < 256+232; i++)
2290 ((uint8_t *) table_b)[i] = (table_Y[i+384] >> 6) << shift_b;
2291 break;
2292
2293 case MODE_8_GRAY:
2294 return;
2295
2296 case MODE_PALETTE:
2297 table_16 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint16_t));
2298
2299 entry_size = sizeof (uint16_t);
2300 table_r = table_16 + 197;
2301 table_b = table_16 + 197 + 685;
2302 table_g = table_16 + 197 + 2*682;
2303
2304 shift_r = 10;
2305 shift_g = 5;
2306 shift_b = 0;
2307
2308 for (i = -197; i < 256+197; i++)
2309 ((uint16_t *)table_r)[i] = (table_Y[i+384] >> 3) << 10;
2310
2311 for (i = -132; i < 256+132; i++)
2312 ((uint16_t *)table_g)[i] = (table_Y[i+384] >> 3) << 5;
2313
2314 for (i = -232; i < 256+232; i++)
2315 ((uint16_t *)table_b)[i] = (table_Y[i+384] >> 3) << 0;
2316
2317 break;
2318
2319
2320 default:
2321 fprintf (stderr, "mode %d not supported by yuv2rgb\n", mode);
2322 abort();
2323 }
2324
2325 for (i = 0; i < 256; i++) {
2326 this->table_rV[i] = (((uint8_t *) table_r) +
2327 entry_size * div_round (crv * (i-128), 76309));
2328 this->table_gU[i] = (((uint8_t *) table_g) +
2329 entry_size * div_round (cgu * (i-128), 76309));
2330 this->table_gV[i] = entry_size * div_round (cgv * (i-128), 76309);
2331 this->table_bU[i] = (((uint8_t *)table_b) +
2332 entry_size * div_round (cbu * (i-128), 76309));
2333 }
2334 this->gamma = 0;
2335 this->entry_size = entry_size;
2336}
2337
2338static uint32_t yuv2rgb_single_pixel_32 (yuv2rgb_t *this, uint8_t y, uint8_t u, uint8_t v)
2339{
2340 uint32_t * r, * g, * b;
2341
2342 r = this->table_rV[v];
2343 g = (void *) (((uint8_t *)this->table_gU[u]) + this->table_gV[v]);
2344 b = this->table_bU[u];
2345
2346 return r[y] + g[y] + b[y];
2347}
2348
2349static uint32_t yuv2rgb_single_pixel_24_rgb (yuv2rgb_t *this, uint8_t y, uint8_t u, uint8_t v)
2350{
2351 uint8_t * r, * g, * b;
2352
2353 r = this->table_rV[v];
2354 g = (void *) (((uint8_t *)this->table_gU[u]) + this->table_gV[v]);
2355 b = this->table_bU[u];
2356
2357 return (uint32_t) r[y] +
2358 ((uint32_t) g[y] << 8) +
2359 ((uint32_t) b[y] << 16);
2360}
2361
2362static uint32_t yuv2rgb_single_pixel_24_bgr (yuv2rgb_t *this, uint8_t y, uint8_t u, uint8_t v)
2363{
2364 uint8_t * r, * g, * b;
2365
2366 r = this->table_rV[v];
2367 g = (void *) (((uint8_t *)this->table_gU[u]) + this->table_gV[v]);
2368 b = this->table_bU[u];
2369
2370 return (uint32_t) b[y] +
2371 ((uint32_t) g[y] << 8) +
2372 ((uint32_t) r[y] << 16);
2373}
2374
2375static uint32_t yuv2rgb_single_pixel_16 (yuv2rgb_t *this, uint8_t y, uint8_t u, uint8_t v)
2376{
2377 uint16_t * r, * g, * b;
2378
2379 r = this->table_rV[v];
2380 g = (void *) (((uint8_t *)this->table_gU[u]) + this->table_gV[v]);
2381 b = this->table_bU[u];
2382
2383 return r[y] + g[y] + b[y];
2384}
2385
2386static uint32_t yuv2rgb_single_pixel_8 (yuv2rgb_t *this, uint8_t y, uint8_t u, uint8_t v)
2387{
2388 uint8_t * r, * g, * b;
2389
2390 r = this->table_rV[v];
2391 g = (void *) (((uint8_t *)this->table_gU[u]) + this->table_gV[v]);
2392 b = this->table_bU[u];
2393
2394 return r[y] + g[y] + b[y];
2395}
2396
2397static uint32_t yuv2rgb_single_pixel_gray (yuv2rgb_t *this, uint8_t y, uint8_t u, uint8_t v)
2398{
2399 return y;
2400}
2401
2402static uint32_t yuv2rgb_single_pixel_palette (yuv2rgb_t *this, uint8_t y, uint8_t u, uint8_t v)
2403{
2404 uint16_t * r, * g, * b;
2405
2406 r = this->table_rV[v];
2407 g = (void *) (((uint8_t *)this->table_gU[u]) + this->table_gV[v]);
2408 b = this->table_bU[u];
2409
2410 return this->cmap[r[y] + g[y] + b[y]];
2411}
2412
2413
2414static void yuv2rgb_c_init (yuv2rgb_factory_t *this)
2415{
2416 switch (this->mode) {
2417 case MODE_32_RGB:
2418 case MODE_32_BGR:
2419 this->yuv2rgb_fun = yuv2rgb_c_32;
2420 break;
2421
2422 case MODE_24_RGB:
2423 case MODE_24_BGR:
2424 this->yuv2rgb_fun =
2425 (this->mode==MODE_24_RGB && !this->swapped) || (this->mode==MODE_24_BGR && this->swapped)
2426 ? yuv2rgb_c_24_rgb
2427 : yuv2rgb_c_24_bgr;
2428 break;
2429
2430 case MODE_15_BGR:
2431 case MODE_16_BGR:
2432 case MODE_15_RGB:
2433 case MODE_16_RGB:
2434 this->yuv2rgb_fun = yuv2rgb_c_16;
2435 break;
2436
2437 case MODE_8_RGB:
2438 case MODE_8_BGR:
2439 this->yuv2rgb_fun = yuv2rgb_c_8;
2440 break;
2441
2442 case MODE_8_GRAY:
2443 this->yuv2rgb_fun = yuv2rgb_c_gray;
2444 break;
2445
2446 case MODE_PALETTE:
2447 this->yuv2rgb_fun = yuv2rgb_c_palette;
2448 break;
2449
2450 default:
2451 printf ("yuv2rgb: mode %d not supported by yuv2rgb\n", this->mode);
2452 abort();
2453 }
2454
2455}
2456
2457static void yuv2rgb_single_pixel_init (yuv2rgb_factory_t *this) {
2458
2459 switch (this->mode) {
2460 case MODE_32_RGB:
2461 case MODE_32_BGR:
2462 this->yuv2rgb_single_pixel_fun = yuv2rgb_single_pixel_32;
2463 break;
2464
2465 case MODE_24_RGB:
2466 case MODE_24_BGR:
2467 this->yuv2rgb_single_pixel_fun =
2468 (this->mode==MODE_24_RGB && !this->swapped) || (this->mode==MODE_24_BGR && this->swapped)
2469 ? yuv2rgb_single_pixel_24_rgb
2470 : yuv2rgb_single_pixel_24_bgr;
2471 break;
2472
2473 case MODE_15_BGR:
2474 case MODE_16_BGR:
2475 case MODE_15_RGB:
2476 case MODE_16_RGB:
2477 this->yuv2rgb_single_pixel_fun = yuv2rgb_single_pixel_16;
2478 break;
2479
2480 case MODE_8_RGB:
2481 case MODE_8_BGR:
2482 this->yuv2rgb_single_pixel_fun = yuv2rgb_single_pixel_8;
2483 break;
2484
2485 case MODE_8_GRAY:
2486 this->yuv2rgb_single_pixel_fun = yuv2rgb_single_pixel_gray;
2487 break;
2488
2489 case MODE_PALETTE:
2490 this->yuv2rgb_single_pixel_fun = yuv2rgb_single_pixel_palette;
2491 break;
2492
2493 default:
2494 printf ("yuv2rgb: mode %d not supported by yuv2rgb\n", this->mode);
2495 abort();
2496 }
2497}
2498
2499
2500/*
2501 * yuy2 stuff
2502 */
2503
2504static void yuy22rgb_c_32 (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p)
2505{
2506 int U, V, Y;
2507 uint8_t * py_1, * pu, * pv;
2508 uint32_t * r, * g, * b;
2509 uint32_t * dst_1;
2510 int width, height;
2511 int dy;
2512
2513 /* FIXME: implement unscaled version */
2514
2515 scale_line_4 (_p+1, this->u_buffer,
2516 this->dest_width >> 1, this->step_dx);
2517 scale_line_4 (_p+3, this->v_buffer,
2518 this->dest_width >> 1, this->step_dx);
2519 scale_line_2 (_p, this->y_buffer,
2520 this->dest_width, this->step_dx);
2521
2522 dy = 0;
2523 height = this->dest_height;
2524
2525 for (;;) {
2526 dst_1 = (uint32_t*)_dst;
2527 py_1 = this->y_buffer;
2528 pu = this->u_buffer;
2529 pv = this->v_buffer;
2530
2531 width = this->dest_width >> 3;
2532
2533 do {
2534
2535 RGB(0);
2536 DST1(0);
2537
2538 RGB(1);
2539 DST1(1);
2540
2541 RGB(2);
2542 DST1(2);
2543
2544 RGB(3);
2545 DST1(3);
2546
2547 pu += 4;
2548 pv += 4;
2549 py_1 += 8;
2550 dst_1 += 8;
2551 } while (--width);
2552
2553 dy += this->step_dy;
2554 _dst += this->rgb_stride;
2555
2556 while (--height > 0 && dy < 32768) {
2557
2558 xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*4);
2559
2560 dy += this->step_dy;
2561 _dst += this->rgb_stride;
2562 }
2563
2564 if (height <= 0)
2565 break;
2566
2567 _p += this->y_stride*2*(dy>>15);
2568 dy &= 32767;
2569 /*
2570 dy -= 32768;
2571 _p += this->y_stride*2;
2572 */
2573
2574 scale_line_4 (_p+1, this->u_buffer,
2575 this->dest_width >> 1, this->step_dx);
2576 scale_line_4 (_p+3, this->v_buffer,
2577 this->dest_width >> 1, this->step_dx);
2578 scale_line_2 (_p, this->y_buffer,
2579 this->dest_width, this->step_dx);
2580 }
2581}
2582
2583static void yuy22rgb_c_24_rgb (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p)
2584{
2585 int U, V, Y;
2586 uint8_t * py_1, * pu, * pv;
2587 uint8_t * r, * g, * b;
2588 uint8_t * dst_1;
2589 int width, height;
2590 int dy;
2591
2592 /* FIXME: implement unscaled version */
2593
2594 scale_line_4 (_p+1, this->u_buffer,
2595 this->dest_width >> 1, this->step_dx);
2596 scale_line_4 (_p+3, this->v_buffer,
2597 this->dest_width >> 1, this->step_dx);
2598 scale_line_2 (_p, this->y_buffer,
2599 this->dest_width, this->step_dx);
2600
2601 dy = 0;
2602 height = this->dest_height;
2603
2604 for (;;) {
2605 dst_1 = _dst;
2606 py_1 = this->y_buffer;
2607 pu = this->u_buffer;
2608 pv = this->v_buffer;
2609
2610 width = this->dest_width >> 3;
2611
2612 do {
2613 RGB(0);
2614 DST1RGB(0);
2615
2616 RGB(1);
2617 DST1RGB(1);
2618
2619 RGB(2);
2620 DST1RGB(2);
2621
2622 RGB(3);
2623 DST1RGB(3);
2624
2625 pu += 4;
2626 pv += 4;
2627 py_1 += 8;
2628 dst_1 += 24;
2629 } while (--width);
2630
2631 dy += this->step_dy;
2632 _dst += this->rgb_stride;
2633
2634 while (--height > 0 && dy < 32768) {
2635
2636 xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*3);
2637
2638 dy += this->step_dy;
2639 _dst += this->rgb_stride;
2640 }
2641
2642 if (height <= 0)
2643 break;
2644
2645 _p += this->y_stride*2*(dy>>15);
2646 dy &= 32767;
2647 /*
2648 dy -= 32768;
2649 _p += this->y_stride*2;
2650 */
2651
2652 scale_line_4 (_p+1, this->u_buffer,
2653 this->dest_width >> 1, this->step_dx);
2654 scale_line_4 (_p+3, this->v_buffer,
2655 this->dest_width >> 1, this->step_dx);
2656 scale_line_2 (_p, this->y_buffer,
2657 this->dest_width, this->step_dx);
2658 }
2659}
2660
2661static void yuy22rgb_c_24_bgr (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p)
2662{
2663 int U, V, Y;
2664 uint8_t * py_1, * pu, * pv;
2665 uint8_t * r, * g, * b;
2666 uint8_t * dst_1;
2667 int width, height;
2668 int dy;
2669
2670 /* FIXME: implement unscaled version */
2671
2672 scale_line_4 (_p+1, this->u_buffer,
2673 this->dest_width >> 1, this->step_dx);
2674 scale_line_4 (_p+3, this->v_buffer,
2675 this->dest_width >> 1, this->step_dx);
2676 scale_line_2 (_p, this->y_buffer,
2677 this->dest_width, this->step_dx);
2678
2679 dy = 0;
2680 height = this->dest_height;
2681
2682 for (;;) {
2683 dst_1 = _dst;
2684 py_1 = this->y_buffer;
2685 pu = this->u_buffer;
2686 pv = this->v_buffer;
2687
2688 width = this->dest_width >> 3;
2689
2690 do {
2691 RGB(0);
2692 DST1BGR(0);
2693
2694 RGB(1);
2695 DST1BGR(1);
2696
2697 RGB(2);
2698 DST1BGR(2);
2699
2700 RGB(3);
2701 DST1BGR(3);
2702
2703 pu += 4;
2704 pv += 4;
2705 py_1 += 8;
2706 dst_1 += 24;
2707 } while (--width);
2708
2709 dy += this->step_dy;
2710 _dst += this->rgb_stride;
2711
2712 while (--height > 0 && dy < 32768) {
2713
2714 xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*3);
2715
2716 dy += this->step_dy;
2717 _dst += this->rgb_stride;
2718 }
2719
2720 if (height <= 0)
2721 break;
2722
2723 _p += this->y_stride*2*(dy>>15);
2724 dy &= 32767;
2725
2726 scale_line_4 (_p+1, this->u_buffer,
2727 this->dest_width >> 1, this->step_dx);
2728 scale_line_4 (_p+3, this->v_buffer,
2729 this->dest_width >> 1, this->step_dx);
2730 scale_line_2 (_p, this->y_buffer,
2731 this->dest_width, this->step_dx);
2732 }
2733}
2734
2735static void yuy22rgb_c_16 (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p)
2736{
2737 int U, V, Y;
2738 uint8_t * py_1, * pu, * pv;
2739 uint16_t * r, * g, * b;
2740 uint16_t * dst_1;
2741 int width, height;
2742 int dy;
2743
2744 /* FIXME: implement unscaled version */
2745
2746 scale_line_4 (_p+1, this->u_buffer,
2747 this->dest_width >> 1, this->step_dx);
2748 scale_line_4 (_p+3, this->v_buffer,
2749 this->dest_width >> 1, this->step_dx);
2750 scale_line_2 (_p, this->y_buffer,
2751 this->dest_width, this->step_dx);
2752
2753 dy = 0;
2754 height = this->dest_height;
2755
2756 for (;;) {
2757 dst_1 = (uint16_t*)_dst;
2758 py_1 = this->y_buffer;
2759 pu = this->u_buffer;
2760 pv = this->v_buffer;
2761
2762 width = this->dest_width >> 3;
2763
2764 do {
2765 RGB(0);
2766 DST1(0);
2767
2768 RGB(1);
2769 DST1(1);
2770
2771 RGB(2);
2772 DST1(2);
2773
2774 RGB(3);
2775 DST1(3);
2776
2777 pu += 4;
2778 pv += 4;
2779 py_1 += 8;
2780 dst_1 += 8;
2781 } while (--width);
2782
2783 dy += this->step_dy;
2784 _dst += this->rgb_stride;
2785
2786 while (--height > 0 && dy < 32768) {
2787
2788 xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2);
2789
2790 dy += this->step_dy;
2791 _dst += this->rgb_stride;
2792 }
2793
2794 if (height <= 0)
2795 break;
2796
2797 _p += this->y_stride*2*(dy>>15);
2798 dy &= 32767;
2799
2800 scale_line_4 (_p+1, this->u_buffer,
2801 this->dest_width >> 1, this->step_dx);
2802 scale_line_4 (_p+3, this->v_buffer,
2803 this->dest_width >> 1, this->step_dx);
2804 scale_line_2 (_p, this->y_buffer,
2805 this->dest_width, this->step_dx);
2806 }
2807}
2808
2809static void yuy22rgb_c_8 (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p)
2810{
2811 int U, V, Y;
2812 uint8_t * py_1, * pu, * pv;
2813 uint8_t * r, * g, * b;
2814 uint8_t * dst_1;
2815 int width, height;
2816 int dy;
2817
2818 /* FIXME: implement unscaled version */
2819
2820 scale_line_4 (_p+1, this->u_buffer,
2821 this->dest_width >> 1, this->step_dx);
2822 scale_line_4 (_p+3, this->v_buffer,
2823 this->dest_width >> 1, this->step_dx);
2824 scale_line_2 (_p, this->y_buffer,
2825 this->dest_width, this->step_dx);
2826
2827 dy = 0;
2828 height = this->dest_height;
2829
2830 for (;;) {
2831 dst_1 = _dst;
2832 py_1 = this->y_buffer;
2833 pu = this->u_buffer;
2834 pv = this->v_buffer;
2835
2836 width = this->dest_width >> 3;
2837
2838 do {
2839 RGB(0);
2840 DST1(0);
2841
2842 RGB(1);
2843 DST1(1);
2844
2845 RGB(2);
2846 DST1(2);
2847
2848 RGB(3);
2849 DST1(3);
2850
2851 pu += 4;
2852 pv += 4;
2853 py_1 += 8;
2854 dst_1 += 8;
2855 } while (--width);
2856
2857 dy += this->step_dy;
2858 _dst += this->rgb_stride;
2859
2860 while (--height > 0 && dy < 32768) {
2861
2862 xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width);
2863
2864 dy += this->step_dy;
2865 _dst += this->rgb_stride;
2866 }
2867
2868 if (height <= 0)
2869 break;
2870
2871 _p += this->y_stride*2*(dy>>15);
2872 dy &= 32767;
2873
2874 scale_line_4 (_p+1, this->u_buffer,
2875 this->dest_width >> 1, this->step_dx);
2876 scale_line_4 (_p+3, this->v_buffer,
2877 this->dest_width >> 1, this->step_dx);
2878 scale_line_2 (_p, this->y_buffer,
2879 this->dest_width, this->step_dx);
2880 }
2881}
2882
2883static void yuy22rgb_c_gray (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p)
2884{
2885 int width, height;
2886 int dy;
2887 uint8_t * dst;
2888 uint8_t * y;
2889
2890 if (this->do_scale) {
2891 dy = 0;
2892 height = this->dest_height;
2893
2894 for (;;) {
2895 scale_line_2 (_p, _dst, this->dest_width, this->step_dx);
2896
2897 dy += this->step_dy;
2898 _dst += this->rgb_stride;
2899
2900 while (--height > 0 && dy < 32768) {
2901
2902 xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width);
2903
2904 dy += this->step_dy;
2905 _dst += this->rgb_stride;
2906 }
2907
2908 if (height <= 0)
2909 break;
2910
2911 _p += this->y_stride*2*(dy>>15);
2912 dy &= 32767;
2913 }
2914 } else {
2915 for (height = this->source_height; --height >= 0; ) {
2916 dst = _dst;
2917 y = _p;
2918 for (width = this->source_width; --width >= 0; ) {
2919 *dst++ = *y;
2920 y += 2;
2921 }
2922 _dst += this->rgb_stride;
2923 _p += this->y_stride*2;
2924 }
2925 }
2926}
2927
2928static void yuy22rgb_c_palette (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p)
2929{
2930 int U, V, Y;
2931 uint8_t * py_1, * pu, * pv;
2932 uint16_t * r, * g, * b;
2933 uint8_t * dst_1;
2934 int width, height;
2935 int dy;
2936
2937 scale_line_4 (_p+1, this->u_buffer,
2938 this->dest_width >> 1, this->step_dx);
2939 scale_line_4 (_p+3, this->v_buffer,
2940 this->dest_width >> 1, this->step_dx);
2941 scale_line_2 (_p, this->y_buffer,
2942 this->dest_width, this->step_dx);
2943
2944 dy = 0;
2945 height = this->dest_height;
2946
2947 for (;;) {
2948 dst_1 = _dst;
2949 py_1 = this->y_buffer;
2950 pu = this->u_buffer;
2951 pv = this->v_buffer;
2952
2953 width = this->dest_width >> 3;
2954
2955 do {
2956 RGB(0);
2957 DST1CMAP(0);
2958
2959 RGB(1);
2960 DST1CMAP(1);
2961
2962 RGB(2);
2963 DST1CMAP(2);
2964
2965 RGB(3);
2966 DST1CMAP(3);
2967
2968 pu += 4;
2969 pv += 4;
2970 py_1 += 8;
2971 dst_1 += 8;
2972 } while (--width);
2973
2974 dy += this->step_dy;
2975 _dst += this->rgb_stride;
2976
2977 while (--height > 0 && dy < 32768) {
2978
2979 xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width);
2980
2981 dy += this->step_dy;
2982 _dst += this->rgb_stride;
2983 }
2984
2985 if (height <= 0)
2986 break;
2987
2988 _p += this->y_stride*2*(dy>>15);
2989 dy &= 32767;
2990
2991 scale_line_4 (_p+1, this->u_buffer,
2992 this->dest_width >> 1, this->step_dx);
2993 scale_line_4 (_p+3, this->v_buffer,
2994 this->dest_width >> 1, this->step_dx);
2995 scale_line_2 (_p, this->y_buffer,
2996 this->dest_width, this->step_dx);
2997 }
2998}
2999
3000static void yuy22rgb_c_init (yuv2rgb_factory_t *this)
3001{
3002 switch (this->mode) {
3003 case MODE_32_RGB:
3004 case MODE_32_BGR:
3005 this->yuy22rgb_fun = yuy22rgb_c_32;
3006 break;
3007
3008 case MODE_24_RGB:
3009 case MODE_24_BGR:
3010 this->yuy22rgb_fun =
3011 (this->mode==MODE_24_RGB && !this->swapped) || (this->mode==MODE_24_BGR && this->swapped)
3012 ? yuy22rgb_c_24_rgb
3013 : yuy22rgb_c_24_bgr;
3014 break;
3015 case MODE_15_BGR:
3016 case MODE_16_BGR:
3017 case MODE_15_RGB:
3018 case MODE_16_RGB:
3019 this->yuy22rgb_fun = yuy22rgb_c_16;
3020 break;
3021
3022 case MODE_8_RGB:
3023 case MODE_8_BGR:
3024 this->yuy22rgb_fun = yuy22rgb_c_8;
3025 break;
3026
3027 case MODE_8_GRAY:
3028 this->yuy22rgb_fun = yuy22rgb_c_gray;
3029 break;
3030
3031 case MODE_PALETTE:
3032 this->yuy22rgb_fun = yuy22rgb_c_palette;
3033 break;
3034
3035 default:
3036 printf ("yuv2rgb: mode %d not supported for yuy2\n", this->mode);
3037 }
3038}
3039
3040yuv2rgb_t *yuv2rgb_create_converter (yuv2rgb_factory_t *factory) {
3041
3042 yuv2rgb_t *this = xine_xmalloc (sizeof (yuv2rgb_t));
3043
3044 this->cmap = factory->cmap;
3045
3046 this->y_chunk = this->y_buffer = NULL;
3047 this->u_chunk = this->u_buffer = NULL;
3048 this->v_chunk = this->v_buffer = NULL;
3049
3050 this->table_rV = factory->table_rV;
3051 this->table_gU = factory->table_gU;
3052 this->table_gV = factory->table_gV;
3053 this->table_bU = factory->table_bU;
3054
3055 this->yuv2rgb_fun = factory->yuv2rgb_fun;
3056 this->yuy22rgb_fun = factory->yuy22rgb_fun;
3057 this->yuv2rgb_single_pixel_fun = factory->yuv2rgb_single_pixel_fun;
3058
3059 this->configure = yuv2rgb_configure;
3060 return this;
3061}
3062
3063/*
3064 * factory functions
3065 */
3066
3067void yuv2rgb_set_gamma (yuv2rgb_factory_t *this, int gamma) {
3068
3069 int i;
3070
3071 for (i = 0; i < 256; i++) {
3072 (uint8_t *)this->table_rV[i] += this->entry_size*(gamma - this->gamma);
3073 (uint8_t *)this->table_gU[i] += this->entry_size*(gamma - this->gamma);
3074 (uint8_t *)this->table_bU[i] += this->entry_size*(gamma - this->gamma);
3075 }
3076#ifdef ARCH_X86
3077 mmx_yuv2rgb_set_gamma(gamma);
3078#endif
3079 this->gamma = gamma;
3080}
3081
3082int yuv2rgb_get_gamma (yuv2rgb_factory_t *this) {
3083
3084 return this->gamma;
3085}
3086
3087yuv2rgb_factory_t* yuv2rgb_factory_init (int mode, int swapped,
3088 uint8_t *cmap) {
3089
3090 yuv2rgb_factory_t *this;
3091
3092#ifdef ARCH_X86
3093 uint32_t mm = xine_mm_accel();
3094#endif
3095
3096 this = malloc (sizeof (yuv2rgb_factory_t));
3097
3098 this->mode = mode;
3099 this->swapped = swapped;
3100 this->cmap = cmap;
3101 this->create_converter = yuv2rgb_create_converter;
3102 this->set_gamma = yuv2rgb_set_gamma;
3103 this->get_gamma = yuv2rgb_get_gamma;
3104 this->matrix_coefficients = 6;
3105
3106
3107 yuv2rgb_setup_tables (this, mode, swapped);
3108
3109 /*
3110 * auto-probe for the best yuv2rgb function
3111 */
3112
3113 this->yuv2rgb_fun = NULL;
3114#ifdef ARCH_X86
3115 if ((this->yuv2rgb_fun == NULL) && (mm & MM_ACCEL_X86_MMXEXT)) {
3116
3117 yuv2rgb_init_mmxext (this);
3118
3119 if (this->yuv2rgb_fun != NULL)
3120 printf ("yuv2rgb: using MMXEXT for colorspace transform\n");
3121 }
3122
3123 if ((this->yuv2rgb_fun == NULL) && (mm & MM_ACCEL_X86_MMX)) {
3124
3125 yuv2rgb_init_mmx (this);
3126
3127 if (this->yuv2rgb_fun != NULL)
3128 printf ("yuv2rgb: using MMX for colorspace transform\n");
3129 }
3130#endif
3131#if HAVE_MLIB
3132 if (this->yuv2rgb_fun == NULL) {
3133
3134 yuv2rgb_init_mlib (this);
3135
3136 if (this->yuv2rgb_fun != NULL)
3137 printf ("yuv2rgb: using medialib for colorspace transform\n");
3138 }
3139#endif
3140 if (this->yuv2rgb_fun == NULL) {
3141 printf ("yuv2rgb: no accelerated colorspace conversion found\n");
3142 yuv2rgb_c_init (this);
3143 }
3144
3145 /*
3146 * auto-probe for the best yuy22rgb function
3147 */
3148
3149 /* FIXME: implement mmx/mlib functions */
3150 yuy22rgb_c_init (this);
3151
3152 /*
3153 * set up single pixel function
3154 */
3155
3156 yuv2rgb_single_pixel_init (this);
3157
3158 return this;
3159}
3160
diff --git a/noncore/multimedia/opieplayer2/yuv2rgb.h b/noncore/multimedia/opieplayer2/yuv2rgb.h
new file mode 100644
index 0000000..5b9c3f6
--- a/dev/null
+++ b/noncore/multimedia/opieplayer2/yuv2rgb.h
@@ -0,0 +1,151 @@
1
2#ifndef HAVE_YUV2RGB_H
3#define HAVE_YUV2RGB_h
4
5#include <inttypes.h>
6
7typedef struct yuv2rgb_s yuv2rgb_t;
8
9typedef struct yuv2rgb_factory_s yuv2rgb_factory_t;
10
11/*
12 * function types for functions which can be replaced
13 * by hardware-accelerated versions
14 */
15
16/* internal function use to scale yuv data */
17typedef void (*scale_line_func_t) (uint8_t *source, uint8_t *dest, int width, int step);
18
19typedef void (*yuv2rgb_fun_t) (yuv2rgb_t *this, uint8_t * image, uint8_t * py, uint8_t * pu, uint8_t * pv) ;
20
21typedef void (*yuy22rgb_fun_t) (yuv2rgb_t *this, uint8_t * image, uint8_t * p);
22
23typedef uint32_t (*yuv2rgb_single_pixel_fun_t) (yuv2rgb_t *this, uint8_t y, uint8_t u, uint8_t v);
24
25
26/*
27 * modes supported - feel free to implement yours
28 */
29
30#define MODE_8_RGB 1
31#define MODE_8_BGR 2
32#define MODE_15_RGB 3
33#define MODE_15_BGR 4
34#define MODE_16_RGB 5
35#define MODE_16_BGR 6
36#define MODE_24_RGB 7
37#define MODE_24_BGR 8
38#define MODE_32_RGB 9
39#define MODE_32_BGR 10
40 #defineMODE_8_GRAY 11
41#define MODE_PALETTE 12
42
43struct yuv2rgb_s {
44
45 /*
46 * configure converter for scaling factors
47 */
48 int (*configure) (yuv2rgb_t *this,
49 int source_width, int source_height,
50 int y_stride, int uv_stride,
51 int dest_width, int dest_height,
52 int rgb_stride);
53
54 /*
55 * this is the function to call for the yuv2rgb and scaling process
56 */
57 yuv2rgb_fun_t yuv2rgb_fun;
58
59 /*
60 * this is the function to call for the yuy2->rgb and scaling process
61 */
62 yuy22rgb_fun_t yuy22rgb_fun;
63
64 /*
65 * this is the function to call for the yuv2rgb for a single pixel
66 * (used for converting clut colors)
67 */
68
69 yuv2rgb_single_pixel_fun_t yuv2rgb_single_pixel_fun;
70
71 /* private stuff below */
72
73 int source_width, source_height;
74 int y_stride, uv_stride;
75 int dest_width, dest_height;
76 int rgb_stride;
77 int step_dx, step_dy;
78 int do_scale;
79
80 uint8_t *y_buffer;
81 uint8_t *u_buffer;
82 uint8_t *v_buffer;
83 void *y_chunk;
84 void *u_chunk;
85 void *v_chunk;
86
87 void **table_rV;
88 void **table_gU;
89 int *table_gV;
90 void **table_bU;
91
92 uint8_t *cmap;
93 scale_line_func_t scale_line;
94
95} ;
96
97/*
98 * convenience class to easily create a lot of converters
99 */
100
101struct yuv2rgb_factory_s {
102
103 yuv2rgb_t* (*create_converter) (yuv2rgb_factory_t *this);
104
105 /*
106 * adjust gamma (-100 to 100 looks fine)
107 * for all converters produced by this factory
108 */
109 void (*set_gamma) (yuv2rgb_factory_t *this, int gamma);
110
111 /*
112 * get gamma value
113 */
114 int (*get_gamma) (yuv2rgb_factory_t *this);
115
116 /* private data */
117
118 int mode;
119 int swapped;
120 uint8_t *cmap;
121
122 int gamma;
123 int entry_size;
124
125 uint32_t matrix_coefficients;
126
127 void *table_rV[256];
128 void *table_gU[256];
129 int table_gV[256];
130 void *table_bU[256];
131
132 /* preselected functions for mode/swap/hardware */
133 yuv2rgb_fun_t yuv2rgb_fun;
134 yuy22rgb_fun_t yuy22rgb_fun;
135 yuv2rgb_single_pixel_fun_t yuv2rgb_single_pixel_fun;
136
137};
138
139yuv2rgb_factory_t *yuv2rgb_factory_init (int mode, int swapped, uint8_t *colormap);
140
141
142/*
143 * internal stuff below this line
144 */
145
146void mmx_yuv2rgb_set_gamma(int gamma);
147void yuv2rgb_init_mmxext (yuv2rgb_factory_t *this);
148void yuv2rgb_init_mmx (yuv2rgb_factory_t *this);
149void yuv2rgb_init_mlib (yuv2rgb_factory_t *this);
150
151#endif
diff --git a/noncore/multimedia/opieplayer2/yuv2rgb_mlib.c b/noncore/multimedia/opieplayer2/yuv2rgb_mlib.c
new file mode 100644
index 0000000..908b439
--- a/dev/null
+++ b/noncore/multimedia/opieplayer2/yuv2rgb_mlib.c
@@ -0,0 +1,313 @@
1/*
2 * yuv2rgb_mlib.c
3 * Copyright (C) 2000-2001 Silicon Integrated System Corp.
4 * All Rights Reserved.
5 *
6 * Author: Juergen Keil <jk@tools.de>
7 *
8 * This file is part of xine, a free unix video player.
9 *
10 * xine is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * xine is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25
26#if HAVE_MLIB
27
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31#include <inttypes.h>
32#include <mlib_video.h>
33
34#include "attributes.h"
35#include "yuv2rgb.h"
36
37
38static void scale_line (uint8_t *source, uint8_t *dest,
39 int width, int step) {
40
41 unsigned p1;
42 unsigned p2;
43 int dx;
44
45 p1 = *source++;
46 p2 = *source++;
47 dx = 0;
48
49 while (width) {
50
51 /*
52 printf ("scale_line, width = %d\n", width);
53 printf ("scale_line, dx = %d, p1 = %d, p2 = %d\n", dx, p1, p2);
54 */
55
56 *dest = (p1 * (32768 - dx) + p2 * dx) / 32768;
57
58 dx += step;
59 while (dx > 32768) {
60 dx -= 32768;
61 p1 = p2;
62 p2 = *source++;
63 }
64
65 dest ++;
66 width --;
67 }
68}
69
70
71
72static void mlib_yuv420_rgb24 (yuv2rgb_t *this,
73 uint8_t * image, uint8_t * py,
74 uint8_t * pu, uint8_t * pv)
75{
76 int dst_height;
77 int dy;
78 mlib_status mlib_stat;
79
80 if (this->do_scale) {
81 dy = 0;
82 dst_height = this->dest_height;
83
84 for (;;) {
85 scale_line (pu, this->u_buffer,
86 this->dest_width >> 1, this->step_dx);
87 pu += this->uv_stride;
88
89 scale_line (pv, this->v_buffer,
90 this->dest_width >> 1, this->step_dx);
91 pv += this->uv_stride;
92
93 scale_line (py, this->y_buffer,
94 this->dest_width, this->step_dx);
95 py += this->y_stride;
96 scale_line (py, this->y_buffer + this->dest_width,
97 this->dest_width, this->step_dx);
98 py += this->y_stride;
99
100 mlib_stat = mlib_VideoColorYUV2RGB420(image,
101 this->y_buffer,
102 this->u_buffer,
103 this->v_buffer,
104 this->dest_width & ~1, 2,
105 this->rgb_stride,
106 this->dest_width,
107 this->dest_width >> 1);
108 dy += this->step_dy;
109 image += this->rgb_stride;
110
111 while (--dst_height > 0 && dy < 32768) {
112 memcpy (image, (uint8_t*)image-this->rgb_stride, this->dest_width*6);
113 dy += this->step_dy;
114 image += this->rgb_stride;
115 }
116
117 if (dst_height <= 0)
118 break;
119
120 dy -= 32768;
121
122 dy += this->step_dy;
123 image += this->rgb_stride;
124
125 while (--dst_height > 0 && dy < 32768) {
126 memcpy (image, (uint8_t*)image-this->rgb_stride, this->dest_width*3);
127 dy += this->step_dy;
128 image += this->rgb_stride;
129 }
130
131 if (dst_height <= 0)
132 break;
133
134 dy -= 32768;
135 }
136 } else {
137 mlib_stat = mlib_VideoColorYUV2RGB420(image, py, pu, pv,
138 this->source_width,
139 this->source_height,
140 this->rgb_stride,
141 this->y_stride,
142 this->uv_stride);
143 }
144}
145
146static void mlib_yuv420_argb32 (yuv2rgb_t *this,
147 uint8_t * image, uint8_t * py,
148 uint8_t * pu, uint8_t * pv)
149{
150 int dst_height;
151 int dy;
152 mlib_status mlib_stat;
153
154 if (this->do_scale) {
155 dy = 0;
156 dst_height = this->dest_height;
157
158 for (;;) {
159 scale_line (pu, this->u_buffer,
160 this->dest_width >> 1, this->step_dx);
161 pu += this->uv_stride;
162
163 scale_line (pv, this->v_buffer,
164 this->dest_width >> 1, this->step_dx);
165 pv += this->uv_stride;
166
167 scale_line (py, this->y_buffer,
168 this->dest_width, this->step_dx);
169 py += this->y_stride;
170 scale_line (py, this->y_buffer + this->dest_width,
171 this->dest_width, this->step_dx);
172 py += this->y_stride;
173
174 mlib_stat = mlib_VideoColorYUV2ARGB420(image,
175 this->y_buffer,
176 this->u_buffer,
177 this->v_buffer,
178 this->dest_width & ~1, 2,
179 this->rgb_stride,
180 this->dest_width,
181 this->dest_width >> 1);
182 dy += this->step_dy;
183 image += this->rgb_stride;
184
185 while (--dst_height > 0 && dy < 32768) {
186 memcpy (image, (uint8_t*)image-this->rgb_stride, this->dest_width*8);
187 dy += this->step_dy;
188 image += this->rgb_stride;
189 }
190
191 if (dst_height <= 0)
192 break;
193
194 dy -= 32768;
195
196 dy += this->step_dy;
197 image += this->rgb_stride;
198
199 while (--dst_height > 0 && dy < 32768) {
200 memcpy (image, (uint8_t*)image-this->rgb_stride, this->dest_width*4);
201 dy += this->step_dy;
202 image += this->rgb_stride;
203 }
204
205 if (dst_height <= 0)
206 break;
207
208 dy -= 32768;
209 }
210 } else {
211 mlib_stat = mlib_VideoColorYUV2ARGB420(image, py, pu, pv,
212 this->source_width,
213 this->source_height,
214 this->rgb_stride,
215 this->y_stride,
216 this->uv_stride);
217 }
218}
219
220static void mlib_yuv420_abgr32 (yuv2rgb_t *this,
221 uint8_t * image, uint8_t * py,
222 uint8_t * pu, uint8_t * pv)
223{
224 int dst_height;
225 int dy;
226 mlib_status mlib_stat;
227
228 if (this->do_scale) {
229 dy = 0;
230 dst_height = this->dest_height;
231
232 for (;;) {
233 scale_line (pu, this->u_buffer,
234 this->dest_width >> 1, this->step_dx);
235 pu += this->uv_stride;
236
237 scale_line (pv, this->v_buffer,
238 this->dest_width >> 1, this->step_dx);
239 pv += this->uv_stride;
240
241 scale_line (py, this->y_buffer,
242 this->dest_width, this->step_dx);
243 py += this->y_stride;
244 scale_line (py, this->y_buffer + this->dest_width,
245 this->dest_width, this->step_dx);
246 py += this->y_stride;
247
248 mlib_stat = mlib_VideoColorYUV2ABGR420(image,
249 this->y_buffer,
250 this->u_buffer,
251 this->v_buffer,
252 this->dest_width & ~1, 2,
253 this->rgb_stride,
254 this->dest_width,
255 this->dest_width >> 1);
256 dy += this->step_dy;
257 image += this->rgb_stride;
258
259 while (--dst_height > 0 && dy < 32768) {
260 memcpy (image, (uint8_t*)image-this->rgb_stride, this->dest_width*8);
261 dy += this->step_dy;
262 image += this->rgb_stride;
263 }
264
265 if (dst_height <= 0)
266 break;
267
268 dy -= 32768;
269
270 dy += this->step_dy;
271 image += this->rgb_stride;
272
273 while (--dst_height > 0 && dy < 32768) {
274 memcpy (image, (uint8_t*)image-this->rgb_stride, this->dest_width*4);
275 dy += this->step_dy;
276 image += this->rgb_stride;
277 }
278
279 if (dst_height <= 0)
280 break;
281
282 dy -= 32768;
283 }
284 } else {
285 mlib_stat = mlib_VideoColorYUV2ABGR420(image, py, pu, pv,
286 this->source_width,
287 this->source_height,
288 this->rgb_stride,
289 this->y_stride,
290 this->uv_stride);
291 }
292}
293
294
295void yuv2rgb_init_mlib (yuv2rgb_factory_t *this) {
296
297 if (this->swapped) return; /*no swapped pixel output upto now*/
298
299 switch (this->mode) {
300 case MODE_24_RGB:
301 this->yuv2rgb_fun = mlib_yuv420_rgb24;
302 break;
303 case MODE_32_RGB:
304 this->yuv2rgb_fun = mlib_yuv420_argb32;
305 break;
306 case MODE_32_BGR:
307 this->yuv2rgb_fun = mlib_yuv420_abgr32;
308 break;
309 }
310}
311
312
313 #endif/* HAVE_MLIB */
diff --git a/noncore/multimedia/opieplayer2/yuv2rgb_mmx.c b/noncore/multimedia/opieplayer2/yuv2rgb_mmx.c
new file mode 100644
index 0000000..f092e6f
--- a/dev/null
+++ b/noncore/multimedia/opieplayer2/yuv2rgb_mmx.c
@@ -0,0 +1,1047 @@
1/*
2 * yuv2rgb_mmx.c
3 * Copyright (C) 2000-2001 Silicon Integrated System Corp.
4 * All Rights Reserved.
5 *
6 * Author: Olie Lho <ollie@sis.com.tw>
7 *
8 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
9 *
10 * mpeg2dec is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * mpeg2dec is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25
26#ifdef ARCH_X86
27
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31#include <inttypes.h>
32
33#include "yuv2rgb.h"
34#include "xineutils.h"
35
36#define CPU_MMXEXT 0
37#define CPU_MMX 1
38
39/* CPU_MMXEXT/CPU_MMX adaptation layer */
40
41 #define movntq(src,dest)\
42 do { \
43 if (cpu == CPU_MMXEXT)\
44 movntq_r2m (src, dest);\
45 else \
46 movq_r2m (src, dest);\
47} while (0)
48
49static mmx_t mmx_subYw = {0x1010101010101010};
50static mmx_t mmx_addYw = {0x0000000000000000};
51
52void mmx_yuv2rgb_set_gamma(int gamma)
53{
54int a,s,i;
55
56 if( gamma <= 16 ) {
57 a = 0;
58 s = 16 - gamma;
59 } else {
60 a = gamma - 16;
61 s = 0;
62 }
63
64 for( i = 0; i < 8; i++ ) {
65 *((unsigned char *)&mmx_subYw + i) = s;
66 *((unsigned char *)&mmx_addYw + i) = a;
67 }
68}
69
70static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv)
71{
72 static mmx_t mmx_80w = {0x0080008000800080};
73 static mmx_t mmx_U_green = {0xf37df37df37df37d};
74 static mmx_t mmx_U_blue = {0x4093409340934093};
75 static mmx_t mmx_V_red = {0x3312331233123312};
76 static mmx_t mmx_V_green = {0xe5fce5fce5fce5fc};
77 static mmx_t mmx_00ffw = {0x00ff00ff00ff00ff};
78 static mmx_t mmx_Y_coeff = {0x253f253f253f253f};
79
80 movq_m2r (*py, mm6); // mm6 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0
81 pxor_r2r (mm4, mm4); // mm4 = 0
82
83 psubusb_m2r (mmx_subYw, mm6);// Y -= 16
84 paddusb_m2r (mmx_addYw, mm6);
85
86 movd_m2r (*pu, mm0); // mm0 = 00 00 00 00 u3 u2 u1 u0
87 movq_r2r (mm6, mm7); // mm7 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0
88
89 pand_m2r (mmx_00ffw, mm6); // mm6 = Y6 Y4 Y2 Y0
90 psrlw_i2r (8, mm7); // mm7 = Y7 Y5 Y3 Y1
91
92 movd_m2r (*pv, mm1); // mm1 = 00 00 00 00 v3 v2 v1 v0
93 psllw_i2r (3, mm6); // promote precision
94
95 pmulhw_m2r (mmx_Y_coeff, mm6);// mm6 = luma_rgb even
96 psllw_i2r (3, mm7); // promote precision
97
98 punpcklbw_r2r (mm4, mm0); // mm0 = u3 u2 u1 u0
99
100 psubsw_m2r (mmx_80w, mm0); // u -= 128
101 punpcklbw_r2r (mm4, mm1); // mm1 = v3 v2 v1 v0
102
103 pmulhw_m2r (mmx_Y_coeff, mm7);// mm7 = luma_rgb odd
104 psllw_i2r (3, mm0); // promote precision
105
106 psubsw_m2r (mmx_80w, mm1); // v -= 128
107 movq_r2r (mm0, mm2); // mm2 = u3 u2 u1 u0
108
109 psllw_i2r (3, mm1); // promote precision
110
111 movq_r2r (mm1, mm4); // mm4 = v3 v2 v1 v0
112
113 pmulhw_m2r (mmx_U_blue, mm0);// mm0 = chroma_b
114
115
116 // slot
117
118
119 // slot
120
121
122 pmulhw_m2r (mmx_V_red, mm1);// mm1 = chroma_r
123 movq_r2r (mm0, mm3); // mm3 = chroma_b
124
125 paddsw_r2r (mm6, mm0); // mm0 = B6 B4 B2 B0
126 paddsw_r2r (mm7, mm3); // mm3 = B7 B5 B3 B1
127
128 packuswb_r2r (mm0, mm0); // saturate to 0-255
129
130
131 pmulhw_m2r (mmx_U_green, mm2);// mm2 = u * u_green
132
133
134 packuswb_r2r (mm3, mm3); // saturate to 0-255
135
136
137 punpcklbw_r2r (mm3, mm0); // mm0 = B7 B6 B5 B4 B3 B2 B1 B0
138
139
140 pmulhw_m2r (mmx_V_green, mm4);// mm4 = v * v_green
141
142
143 // slot
144
145
146 // slot
147
148
149 paddsw_r2r (mm4, mm2); // mm2 = chroma_g
150 movq_r2r (mm2, mm5); // mm5 = chroma_g
151
152
153 movq_r2r (mm1, mm4); // mm4 = chroma_r
154 paddsw_r2r (mm6, mm2); // mm2 = G6 G4 G2 G0
155
156
157 packuswb_r2r (mm2, mm2); // saturate to 0-255
158 paddsw_r2r (mm6, mm1); // mm1 = R6 R4 R2 R0
159
160 packuswb_r2r (mm1, mm1); // saturate to 0-255
161 paddsw_r2r (mm7, mm4); // mm4 = R7 R5 R3 R1
162
163 packuswb_r2r (mm4, mm4); // saturate to 0-255
164 paddsw_r2r (mm7, mm5); // mm5 = G7 G5 G3 G1
165
166
167 packuswb_r2r (mm5, mm5); // saturate to 0-255
168
169
170 punpcklbw_r2r (mm4, mm1); // mm1 = R7 R6 R5 R4 R3 R2 R1 R0
171
172
173 punpcklbw_r2r (mm5, mm2); // mm2 = G7 G6 G5 G4 G3 G2 G1 G0
174}
175
176// basic opt
177static inline void mmx_unpack_16rgb (uint8_t * image, int cpu)
178{
179 static mmx_t mmx_bluemask = {0xf8f8f8f8f8f8f8f8};
180 static mmx_t mmx_greenmask = {0xfcfcfcfcfcfcfcfc};
181 static mmx_t mmx_redmask = {0xf8f8f8f8f8f8f8f8};
182
183 /*
184 * convert RGB plane to RGB 16 bits
185 * mm0 -> B, mm1 -> R, mm2 -> G
186 * mm4 -> GB, mm5 -> AR pixel 4-7
187 * mm6 -> GB, mm7 -> AR pixel 0-3
188 */
189
190 pand_m2r (mmx_bluemask, mm0);// mm0 = b7b6b5b4b3______
191 pxor_r2r (mm4, mm4); // mm4 = 0
192
193 pand_m2r (mmx_greenmask, mm2);// mm2 = g7g6g5g4g3g2____
194 psrlq_i2r (3, mm0); // mm0 = ______b7b6b5b4b3
195
196 movq_r2r (mm2, mm7); // mm7 = g7g6g5g4g3g2____
197 movq_r2r (mm0, mm5); // mm5 = ______b7b6b5b4b3
198
199 pand_m2r (mmx_redmask, mm1);// mm1 = r7r6r5r4r3______
200 punpcklbw_r2r (mm4, mm2);
201
202 punpcklbw_r2r (mm1, mm0);
203
204 psllq_i2r (3, mm2);
205
206 punpckhbw_r2r (mm4, mm7);
207 por_r2r (mm2, mm0);
208
209 psllq_i2r (3, mm7);
210
211 movntq (mm0, *image);
212 punpckhbw_r2r (mm1, mm5);
213
214 por_r2r (mm7, mm5);
215
216 // U
217 // V
218
219 movntq (mm5, *(image+8));
220}
221
222static inline void mmx_unpack_15rgb (uint8_t * image, int cpu)
223{
224 static mmx_t mmx_bluemask = {0xf8f8f8f8f8f8f8f8};
225 static mmx_t mmx_greenmask = {0xf8f8f8f8f8f8f8f8};
226 static mmx_t mmx_redmask = {0xf8f8f8f8f8f8f8f8};
227
228 /*
229 * convert RGB plane to RGB 15 bits
230 * mm0 -> B, mm1 -> R, mm2 -> G
231 * mm4 -> GB, mm5 -> AR pixel 4-7
232 * mm6 -> GB, mm7 -> AR pixel 0-3
233 */
234
235 pand_m2r (mmx_bluemask, mm0);// mm0 = b7b6b5b4b3______
236 pxor_r2r (mm4, mm4); // mm4 = 0
237
238 pand_m2r (mmx_greenmask, mm2);// mm2 = g7g6g5g4g3g2____
239 psrlq_i2r (3, mm0); // mm0 = ______b7b6b5b4b3
240
241 movq_r2r (mm2, mm7); // mm7 = g7g6g5g4g3g2____
242 movq_r2r (mm0, mm5); // mm5 = ______b7b6b5b4b3
243
244 pand_m2r (mmx_redmask, mm1);// mm1 = r7r6r5r4r3______
245 punpcklbw_r2r (mm4, mm2);
246
247 psrlq_i2r (1, mm1);
248 punpcklbw_r2r (mm1, mm0);
249
250 psllq_i2r (2, mm2);
251
252 punpckhbw_r2r (mm4, mm7);
253 por_r2r (mm2, mm0);
254
255 psllq_i2r (2, mm7);
256
257 movntq (mm0, *image);
258 punpckhbw_r2r (mm1, mm5);
259
260 por_r2r (mm7, mm5);
261
262 // U
263 // V
264
265 movntq (mm5, *(image+8));
266}
267
268static inline void mmx_unpack_32rgb (uint8_t * image, int cpu)
269{
270 /*
271 * convert RGB plane to RGB packed format,
272 * mm0 -> B, mm1 -> R, mm2 -> G, mm3 -> 0,
273 * mm4 -> GB, mm5 -> AR pixel 4-7,
274 * mm6 -> GB, mm7 -> AR pixel 0-3
275 */
276
277 pxor_r2r (mm3, mm3);
278 movq_r2r (mm0, mm6);
279
280 punpcklbw_r2r (mm2, mm6);
281 movq_r2r (mm1, mm7);
282
283 punpcklbw_r2r (mm3, mm7);
284 movq_r2r (mm0, mm4);
285
286 punpcklwd_r2r (mm7, mm6);
287 movq_r2r (mm1, mm5);
288
289 /* scheduling: this is hopeless */
290 movntq (mm6, *image);
291 movq_r2r (mm0, mm6);
292 punpcklbw_r2r (mm2, mm6);
293 punpckhwd_r2r (mm7, mm6);
294 movntq (mm6, *(image+8));
295 punpckhbw_r2r (mm2, mm4);
296 punpckhbw_r2r (mm3, mm5);
297 punpcklwd_r2r (mm5, mm4);
298 movntq (mm4, *(image+16));
299 movq_r2r (mm0, mm4);
300 punpckhbw_r2r (mm2, mm4);
301 punpckhwd_r2r (mm5, mm4);
302 movntq (mm4, *(image+24));
303}
304
305static inline void mmx_unpack_32bgr (uint8_t * image, int cpu)
306{
307 /*
308 * convert RGB plane to RGB packed format,
309 * mm0 -> B, mm1 -> R, mm2 -> G, mm3 -> 0,
310 * mm4 -> GB, mm5 -> AR pixel 4-7,
311 * mm6 -> GB, mm7 -> AR pixel 0-3
312 */
313
314 pxor_r2r (mm3, mm3);
315 movq_r2r (mm1, mm6);
316
317 punpcklbw_r2r (mm2, mm6);
318 movq_r2r (mm0, mm7);
319
320 punpcklbw_r2r (mm3, mm7);
321 movq_r2r (mm1, mm4);
322
323 punpcklwd_r2r (mm7, mm6);
324 movq_r2r (mm0, mm5);
325
326 /* scheduling: this is hopeless */
327 movntq (mm6, *image);
328 movq_r2r (mm0, mm6);
329 punpcklbw_r2r (mm2, mm6);
330 punpckhwd_r2r (mm7, mm6);
331 movntq (mm6, *(image+8));
332 punpckhbw_r2r (mm2, mm4);
333 punpckhbw_r2r (mm3, mm5);
334 punpcklwd_r2r (mm5, mm4);
335 movntq (mm4, *(image+16));
336 movq_r2r (mm0, mm4);
337 punpckhbw_r2r (mm2, mm4);
338 punpckhwd_r2r (mm5, mm4);
339 movntq (mm4, *(image+24));
340}
341
342static inline void mmx_unpack_24rgb (uint8_t * image, int cpu)
343{
344 /*
345 * convert RGB plane to RGB packed format,
346 * mm0 -> B, mm1 -> R, mm2 -> G, mm3 -> 0,
347 * mm4 -> GB, mm5 -> AR pixel 4-7,
348 * mm6 -> GB, mm7 -> AR pixel 0-3
349 */
350
351 pxor_r2r (mm3, mm3);
352 movq_r2r (mm0, mm6);
353
354 punpcklbw_r2r (mm2, mm6);
355 movq_r2r (mm1, mm7);
356
357 punpcklbw_r2r (mm3, mm7);
358 movq_r2r (mm0, mm4);
359
360 punpcklwd_r2r (mm7, mm6);
361 movq_r2r (mm1, mm5);
362
363 /* scheduling: this is hopeless */
364 movntq (mm6, *image);
365 movq_r2r (mm0, mm6);
366 punpcklbw_r2r (mm2, mm6);
367 punpckhwd_r2r (mm7, mm6);
368 movntq (mm6, *(image+8));
369 punpckhbw_r2r (mm2, mm4);
370 punpckhbw_r2r (mm3, mm5);
371 punpcklwd_r2r (mm5, mm4);
372 movntq (mm4, *(image+16));
373}
374
375static inline void yuv420_rgb16 (yuv2rgb_t *this,
376 uint8_t * image,
377 uint8_t * py, uint8_t * pu, uint8_t * pv,
378 int cpu)
379{
380 int i;
381 int rgb_stride = this->rgb_stride;
382 int y_stride = this->y_stride;
383 int uv_stride = this->uv_stride;
384 int width = this->source_width;
385 int height = this->source_height;
386 int dst_height = this->dest_height;
387 uint8_t *img;
388
389 width >>= 3;
390
391 if (!this->do_scale) {
392 y_stride -= 8 * width;
393 uv_stride -= 4 * width;
394
395 do {
396
397 i = width; img = image;
398 do {
399 mmx_yuv2rgb (py, pu, pv);
400 mmx_unpack_16rgb (img, cpu);
401 py += 8;
402 pu += 4;
403 pv += 4;
404 img += 16;
405 } while (--i);
406
407 py += y_stride;
408 image += rgb_stride;
409 if (height & 1) {
410 pu += uv_stride;
411 pv += uv_stride;
412 } else {
413 pu -= 4 * width;
414 pv -= 4 * width;
415 }
416 } while (--height);
417
418 } else {
419
420 scale_line_func_t scale_line = this->scale_line;
421 uint8_t *y_buf, *u_buf, *v_buf;
422 int dy = 0;
423
424 scale_line (pu, this->u_buffer,
425 this->dest_width >> 1, this->step_dx);
426 scale_line (pv, this->v_buffer,
427 this->dest_width >> 1, this->step_dx);
428 scale_line (py, this->y_buffer,
429 this->dest_width, this->step_dx);
430 for (height = 0;; ) {
431
432 y_buf = this->y_buffer;
433 u_buf = this->u_buffer;
434 v_buf = this->v_buffer;
435
436 i = this->dest_width >> 3; img = image;
437 do {
438 /* printf ("i : %d\n",i); */
439
440 mmx_yuv2rgb (y_buf, u_buf, v_buf);
441 mmx_unpack_16rgb (img, cpu);
442 y_buf += 8;
443 u_buf += 4;
444 v_buf += 4;
445 img += 16;
446 } while (--i);
447
448 dy += this->step_dy;
449 image += rgb_stride;
450
451 while (--dst_height > 0 && dy < 32768) {
452
453 xine_fast_memcpy (image, image-rgb_stride, this->dest_width*2);
454
455 dy += this->step_dy;
456 image += rgb_stride;
457 }
458
459 if (dst_height <= 0)
460 break;
461
462 do {
463 dy -= 32768;
464
465 py += y_stride;
466
467 scale_line (py, this->y_buffer,
468 this->dest_width, this->step_dx);
469
470 if (height & 1) {
471 pu += uv_stride;
472 pv += uv_stride;
473
474 scale_line (pu, this->u_buffer,
475 this->dest_width >> 1, this->step_dx);
476 scale_line (pv, this->v_buffer,
477 this->dest_width >> 1, this->step_dx);
478
479 }
480 height++;
481 } while( dy>=32768);
482 }
483 }
484}
485
486static inline void yuv420_rgb15 (yuv2rgb_t *this,
487 uint8_t * image,
488 uint8_t * py, uint8_t * pu, uint8_t * pv,
489 int cpu)
490{
491 int i;
492 int rgb_stride = this->rgb_stride;
493 int y_stride = this->y_stride;
494 int uv_stride = this->uv_stride;
495 int width = this->source_width;
496 int height = this->source_height;
497 int dst_height = this->dest_height;
498 uint8_t *img;
499
500 width >>= 3;
501
502 if (!this->do_scale) {
503 y_stride -= 8 * width;
504 uv_stride -= 4 * width;
505
506 do {
507
508 i = width; img = image;
509 do {
510 mmx_yuv2rgb (py, pu, pv);
511 mmx_unpack_15rgb (img, cpu);
512 py += 8;
513 pu += 4;
514 pv += 4;
515 img += 16;
516 } while (--i);
517
518 py += y_stride;
519 image += rgb_stride;
520 if (height & 1) {
521 pu += uv_stride;
522 pv += uv_stride;
523 } else {
524 pu -= 4 * width;
525 pv -= 4 * width;
526 }
527 } while (--height);
528
529 } else {
530
531 scale_line_func_t scale_line = this->scale_line;
532 uint8_t *y_buf, *u_buf, *v_buf;
533 int dy = 0;
534
535 scale_line (pu, this->u_buffer,
536 this->dest_width >> 1, this->step_dx);
537 scale_line (pv, this->v_buffer,
538 this->dest_width >> 1, this->step_dx);
539 scale_line (py, this->y_buffer,
540 this->dest_width, this->step_dx);
541 for (height = 0;; ) {
542
543 y_buf = this->y_buffer;
544 u_buf = this->u_buffer;
545 v_buf = this->v_buffer;
546
547 i = this->dest_width >> 3; img = image;
548 do {
549 /* printf ("i : %d\n",i); */
550
551 mmx_yuv2rgb (y_buf, u_buf, v_buf);
552 mmx_unpack_15rgb (img, cpu);
553 y_buf += 8;
554 u_buf += 4;
555 v_buf += 4;
556 img += 16;
557 } while (--i);
558
559 dy += this->step_dy;
560 image += rgb_stride;
561
562 while (--dst_height > 0 && dy < 32768) {
563
564 xine_fast_memcpy (image, image-rgb_stride, this->dest_width*2);
565
566 dy += this->step_dy;
567 image += rgb_stride;
568 }
569
570 if (dst_height <= 0)
571 break;
572
573 do {
574 dy -= 32768;
575 py += y_stride;
576
577 scale_line (py, this->y_buffer,
578 this->dest_width, this->step_dx);
579
580 if (height & 1) {
581 pu += uv_stride;
582 pv += uv_stride;
583
584 scale_line (pu, this->u_buffer,
585 this->dest_width >> 1, this->step_dx);
586 scale_line (pv, this->v_buffer,
587 this->dest_width >> 1, this->step_dx);
588
589 }
590 height++;
591 } while( dy>=32768 );
592 }
593 }
594}
595
596static inline void yuv420_rgb24 (yuv2rgb_t *this,
597 uint8_t * image, uint8_t * py,
598 uint8_t * pu, uint8_t * pv, int cpu)
599{
600 int i;
601 int rgb_stride = this->rgb_stride;
602 int y_stride = this->y_stride;
603 int uv_stride = this->uv_stride;
604 int width = this->source_width;
605 int height = this->source_height;
606 int dst_height = this->dest_height;
607 uint8_t *img;
608
609 /* rgb_stride -= 4 * this->dest_width; */
610 width >>= 3;
611
612 if (!this->do_scale) {
613 y_stride -= 8 * width;
614 uv_stride -= 4 * width;
615
616 do {
617 i = width; img = image;
618 do {
619 mmx_yuv2rgb (py, pu, pv);
620 mmx_unpack_24rgb (img, cpu);
621 py += 8;
622 pu += 4;
623 pv += 4;
624 img += 24;
625 } while (--i);
626
627 py += y_stride;
628 image += rgb_stride;
629 if (height & 1) {
630 pu += uv_stride;
631 pv += uv_stride;
632 } else {
633 pu -= 4 * width;
634 pv -= 4 * width;
635 }
636 } while (--height);
637 } else {
638
639 scale_line_func_t scale_line = this->scale_line;
640 uint8_t *y_buf, *u_buf, *v_buf;
641 int dy = 0;
642
643 scale_line (pu, this->u_buffer,
644 this->dest_width >> 1, this->step_dx);
645 scale_line (pv, this->v_buffer,
646 this->dest_width >> 1, this->step_dx);
647 scale_line (py, this->y_buffer,
648 this->dest_width, this->step_dx);
649
650 for (height = 0;; ) {
651
652 y_buf = this->y_buffer;
653 u_buf = this->u_buffer;
654 v_buf = this->v_buffer;
655
656
657 i = this->dest_width >> 3; img=image;
658 do {
659 /* printf ("i : %d\n",i); */
660
661 mmx_yuv2rgb (y_buf, u_buf, v_buf);
662 mmx_unpack_24rgb (img, cpu);
663 y_buf += 8;
664 u_buf += 4;
665 v_buf += 4;
666 img += 24;
667 } while (--i);
668
669 dy += this->step_dy;
670 image += rgb_stride;
671
672 while (--dst_height > 0 && dy < 32768) {
673
674 xine_fast_memcpy (image, image-rgb_stride, this->dest_width*3);
675
676 dy += this->step_dy;
677 image += rgb_stride;
678 }
679
680 if (dst_height <= 0)
681 break;
682
683 do {
684 dy -= 32768;
685 py += y_stride;
686
687 scale_line (py, this->y_buffer,
688 this->dest_width, this->step_dx);
689
690 if (height & 1) {
691 pu += uv_stride;
692 pv += uv_stride;
693
694 scale_line (pu, this->u_buffer,
695 this->dest_width >> 1, this->step_dx);
696 scale_line (pv, this->v_buffer,
697 this->dest_width >> 1, this->step_dx);
698 }
699 height++;
700 } while( dy>=32768 );
701
702 }
703
704 }
705}
706
707static inline void yuv420_argb32 (yuv2rgb_t *this,
708 uint8_t * image, uint8_t * py,
709 uint8_t * pu, uint8_t * pv, int cpu)
710{
711 int i;
712 int rgb_stride = this->rgb_stride;
713 int y_stride = this->y_stride;
714 int uv_stride = this->uv_stride;
715 int width = this->source_width;
716 int height = this->source_height;
717 int dst_height = this->dest_height;
718 uint8_t *img;
719
720 /* rgb_stride -= 4 * this->dest_width; */
721 width >>= 3;
722
723 if (!this->do_scale) {
724 y_stride -= 8 * width;
725 uv_stride -= 4 * width;
726
727 do {
728 i = width; img = image;
729 do {
730 mmx_yuv2rgb (py, pu, pv);
731 mmx_unpack_32rgb (img, cpu);
732 py += 8;
733 pu += 4;
734 pv += 4;
735 img += 32;
736 } while (--i);
737
738 py += y_stride;
739 image += rgb_stride;
740 if (height & 1) {
741 pu += uv_stride;
742 pv += uv_stride;
743 } else {
744 pu -= 4 * width;
745 pv -= 4 * width;
746 }
747 } while (--height);
748 } else {
749
750 scale_line_func_t scale_line = this->scale_line;
751 uint8_t *y_buf, *u_buf, *v_buf;
752 int dy = 0;
753
754 scale_line (pu, this->u_buffer,
755 this->dest_width >> 1, this->step_dx);
756 scale_line (pv, this->v_buffer,
757 this->dest_width >> 1, this->step_dx);
758 scale_line (py, this->y_buffer,
759 this->dest_width, this->step_dx);
760
761 for (height = 0;; ) {
762
763 y_buf = this->y_buffer;
764 u_buf = this->u_buffer;
765 v_buf = this->v_buffer;
766
767
768 i = this->dest_width >> 3; img=image;
769 do {
770 /* printf ("i : %d\n",i); */
771
772 mmx_yuv2rgb (y_buf, u_buf, v_buf);
773 mmx_unpack_32rgb (img, cpu);
774 y_buf += 8;
775 u_buf += 4;
776 v_buf += 4;
777 img += 32;
778 } while (--i);
779
780 dy += this->step_dy;
781 image += rgb_stride;
782
783 while (--dst_height > 0 && dy < 32768) {
784
785 xine_fast_memcpy (image, image-rgb_stride, this->dest_width*4);
786
787 dy += this->step_dy;
788 image += rgb_stride;
789 }
790
791 if (dst_height <= 0)
792 break;
793
794 do {
795 dy -= 32768;
796 py += y_stride;
797
798 scale_line (py, this->y_buffer,
799 this->dest_width, this->step_dx);
800
801 if (height & 1) {
802 pu += uv_stride;
803 pv += uv_stride;
804
805 scale_line (pu, this->u_buffer,
806 this->dest_width >> 1, this->step_dx);
807 scale_line (pv, this->v_buffer,
808 this->dest_width >> 1, this->step_dx);
809 }
810 height++;
811 } while( dy>=32768 );
812 }
813
814 }
815}
816
817static inline void yuv420_abgr32 (yuv2rgb_t *this,
818 uint8_t * image, uint8_t * py,
819 uint8_t * pu, uint8_t * pv, int cpu)
820{
821 int i;
822 int rgb_stride = this->rgb_stride;
823 int y_stride = this->y_stride;
824 int uv_stride = this->uv_stride;
825 int width = this->source_width;
826 int height = this->source_height;
827 int dst_height = this->dest_height;
828 uint8_t *img;
829
830 /* rgb_stride -= 4 * this->dest_width; */
831 width >>= 3;
832
833 if (!this->do_scale) {
834 y_stride -= 8 * width;
835 uv_stride -= 4 * width;
836
837 do {
838 i = width; img = image;
839 do {
840 mmx_yuv2rgb (py, pu, pv);
841 mmx_unpack_32bgr (img, cpu);
842 py += 8;
843 pu += 4;
844 pv += 4;
845 img += 32;
846 } while (--i);
847
848 py += y_stride;
849 image += rgb_stride;
850 if (height & 1) {
851 pu += uv_stride;
852 pv += uv_stride;
853 } else {
854 pu -= 4 * width;
855 pv -= 4 * width;
856 }
857 } while (--height);
858 } else {
859
860 scale_line_func_t scale_line = this->scale_line;
861 uint8_t *y_buf, *u_buf, *v_buf;
862 int dy = 0;
863
864 scale_line (pu, this->u_buffer,
865 this->dest_width >> 1, this->step_dx);
866 scale_line (pv, this->v_buffer,
867 this->dest_width >> 1, this->step_dx);
868 scale_line (py, this->y_buffer,
869 this->dest_width, this->step_dx);
870
871 for (height = 0;; ) {
872
873 y_buf = this->y_buffer;
874 u_buf = this->u_buffer;
875 v_buf = this->v_buffer;
876
877
878 i = this->dest_width >> 3; img=image;
879 do {
880 /* printf ("i : %d\n",i); */
881
882 mmx_yuv2rgb (y_buf, u_buf, v_buf);
883 mmx_unpack_32bgr (img, cpu);
884 y_buf += 8;
885 u_buf += 4;
886 v_buf += 4;
887 img += 32;
888 } while (--i);
889
890 dy += this->step_dy;
891 image += rgb_stride;
892
893 while (--dst_height > 0 && dy < 32768) {
894
895 xine_fast_memcpy (image, image-rgb_stride, this->dest_width*4);
896
897 dy += this->step_dy;
898 image += rgb_stride;
899 }
900
901 if (dst_height <= 0)
902 break;
903
904 do {
905 dy -= 32768;
906 py += y_stride;
907
908 scale_line (py, this->y_buffer,
909 this->dest_width, this->step_dx);
910
911 if (height & 1) {
912 pu += uv_stride;
913 pv += uv_stride;
914
915 scale_line (pu, this->u_buffer,
916 this->dest_width >> 1, this->step_dx);
917 scale_line (pv, this->v_buffer,
918 this->dest_width >> 1, this->step_dx);
919 }
920 height++;
921 } while( dy>=32768 );
922
923 }
924
925 }
926}
927
928static void mmxext_rgb15 (yuv2rgb_t *this, uint8_t * image,
929 uint8_t * py, uint8_t * pu, uint8_t * pv)
930{
931 yuv420_rgb15 (this, image, py, pu, pv, CPU_MMXEXT);
932 emms();/* re-initialize x86 FPU after MMX use */
933}
934
935static void mmxext_rgb16 (yuv2rgb_t *this, uint8_t * image,
936 uint8_t * py, uint8_t * pu, uint8_t * pv)
937{
938 yuv420_rgb16 (this, image, py, pu, pv, CPU_MMXEXT);
939 emms();/* re-initialize x86 FPU after MMX use */
940}
941
942static void mmxext_rgb24 (yuv2rgb_t *this, uint8_t * image,
943 uint8_t * py, uint8_t * pu, uint8_t * pv)
944{
945 yuv420_rgb24 (this, image, py, pu, pv, CPU_MMXEXT);
946 emms();/* re-initialize x86 FPU after MMX use */
947}
948
949static void mmxext_argb32 (yuv2rgb_t *this, uint8_t * image,
950 uint8_t * py, uint8_t * pu, uint8_t * pv)
951{
952 yuv420_argb32 (this, image, py, pu, pv, CPU_MMXEXT);
953 emms();/* re-initialize x86 FPU after MMX use */
954}
955
956static void mmxext_abgr32 (yuv2rgb_t *this, uint8_t * image,
957 uint8_t * py, uint8_t * pu, uint8_t * pv)
958{
959 yuv420_abgr32 (this, image, py, pu, pv, CPU_MMXEXT);
960 emms();/* re-initialize x86 FPU after MMX use */
961}
962
963static void mmx_rgb15 (yuv2rgb_t *this, uint8_t * image,
964 uint8_t * py, uint8_t * pu, uint8_t * pv)
965{
966 yuv420_rgb15 (this, image, py, pu, pv, CPU_MMX);
967 emms();/* re-initialize x86 FPU after MMX use */
968}
969
970static void mmx_rgb16 (yuv2rgb_t *this, uint8_t * image,
971 uint8_t * py, uint8_t * pu, uint8_t * pv)
972{
973 yuv420_rgb16 (this, image, py, pu, pv, CPU_MMX);
974 emms();/* re-initialize x86 FPU after MMX use */
975}
976
977static void mmx_rgb24 (yuv2rgb_t *this, uint8_t * image,
978 uint8_t * py, uint8_t * pu, uint8_t * pv)
979{
980 yuv420_rgb24 (this, image, py, pu, pv, CPU_MMX);
981 emms();/* re-initialize x86 FPU after MMX use */
982}
983
984static void mmx_argb32 (yuv2rgb_t *this, uint8_t * image,
985 uint8_t * py, uint8_t * pu, uint8_t * pv)
986{
987 yuv420_argb32 (this, image, py, pu, pv, CPU_MMX);
988 emms();/* re-initialize x86 FPU after MMX use */
989}
990
991static void mmx_abgr32 (yuv2rgb_t *this, uint8_t * image,
992 uint8_t * py, uint8_t * pu, uint8_t * pv)
993{
994 yuv420_abgr32 (this, image, py, pu, pv, CPU_MMX);
995 emms();/* re-initialize x86 FPU after MMX use */
996}
997
998void yuv2rgb_init_mmxext (yuv2rgb_factory_t *this) {
999
1000 if (this->swapped)
1001 return; /*no swapped pixel output upto now*/
1002
1003 switch (this->mode) {
1004 case MODE_15_RGB:
1005 this->yuv2rgb_fun = mmxext_rgb15;
1006 break;
1007 case MODE_16_RGB:
1008 this->yuv2rgb_fun = mmxext_rgb16;
1009 break;
1010 case MODE_24_RGB:
1011 this->yuv2rgb_fun = mmxext_rgb24;
1012 break;
1013 case MODE_32_RGB:
1014 this->yuv2rgb_fun = mmxext_argb32;
1015 break;
1016 case MODE_32_BGR:
1017 this->yuv2rgb_fun = mmxext_abgr32;
1018 break;
1019 }
1020}
1021
1022void yuv2rgb_init_mmx (yuv2rgb_factory_t *this) {
1023
1024 if (this->swapped)
1025 return; /*no swapped pixel output upto now*/
1026
1027 switch (this->mode) {
1028 case MODE_15_RGB:
1029 this->yuv2rgb_fun = mmx_rgb15;
1030 break;
1031 case MODE_16_RGB:
1032 this->yuv2rgb_fun = mmx_rgb16;
1033 break;
1034 case MODE_24_RGB:
1035 this->yuv2rgb_fun = mmx_rgb24;
1036 break;
1037 case MODE_32_RGB:
1038 this->yuv2rgb_fun = mmx_argb32;
1039 break;
1040 case MODE_32_BGR:
1041 this->yuv2rgb_fun = mmx_abgr32;
1042 break;
1043 }
1044}
1045
1046
1047#endif