From a7b8ef4096c17ba5e0ff96e9292a291390831e69 Mon Sep 17 00:00:00 2001 From: zecke Date: Thu, 11 Jul 2002 22:48:38 +0000 Subject: add files --- (limited to 'noncore/multimedia/opieplayer2/yuv2rgb.c') diff --git a/noncore/multimedia/opieplayer2/yuv2rgb.c b/noncore/multimedia/opieplayer2/yuv2rgb.c new file mode 100644 index 0000000..d1d6627 --- a/dev/null +++ b/noncore/multimedia/opieplayer2/yuv2rgb.c @@ -0,0 +1,3160 @@ +/* + * yuv2rgb.c + * + * This file is part of xine, a unix video player. + * + * based on work from mpeg2dec: + * Copyright (C) 1999-2001 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * $Id$ + */ + +#include +#include +#include +#include + +#include "yuv2rgb.h" +#include + + +static int prof_scale_line = -1; + +static scale_line_func_t find_scale_line_func(int step); + + +const int32_t Inverse_Table_6_9[8][4] = { + {117504, 138453, 13954, 34903}, /* no sequence_display_extension */ + {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */ + {104597, 132201, 25675, 53279}, /* unspecified */ + {104597, 132201, 25675, 53279}, /* reserved */ + {104448, 132798, 24759, 53109}, /* FCC */ + {104597, 132201, 25675, 53279}, /* ITU-R Rec. 624-4 System B, G */ + {104597, 132201, 25675, 53279}, /* SMPTE 170M */ + {117579, 136230, 16907, 35559} /* SMPTE 240M (1987) */ +}; + + +static void *my_malloc_aligned (size_t alignment, size_t size, void **chunk) { + + char *pMem; + + pMem = xine_xmalloc (size+alignment); + + *chunk = pMem; + + while ((int) pMem % alignment) + pMem++; + + return pMem; +} + + +static int yuv2rgb_configure (yuv2rgb_t *this, + int source_width, int source_height, + int y_stride, int uv_stride, + int dest_width, int dest_height, + int rgb_stride) { + /* + printf ("yuv2rgb setup (%d x %d => %d x %d)\n", source_width, source_height, + dest_width, dest_height); + */ + if (prof_scale_line == -1) + prof_scale_line = xine_profiler_allocate_slot("xshm scale line"); + + this->source_width = source_width; + this->source_height = source_height; + this->y_stride = y_stride; + this->uv_stride = uv_stride; + this->dest_width = dest_width; + this->dest_height = dest_height; + this->rgb_stride = rgb_stride; + + if (this->y_chunk) { + free (this->y_chunk); + this->y_buffer = this->y_chunk = NULL; + } + if (this->u_chunk) { + free (this->u_chunk); + this->u_buffer = this->u_chunk = NULL; + } + if (this->v_chunk) { + free (this->v_chunk); + this->v_buffer = this->v_chunk = NULL; + } + + + this->step_dx = source_width * 32768 / dest_width; + this->step_dy = source_height * 32768 / dest_height; + + this->scale_line = find_scale_line_func(this->step_dx); + + if ((source_width == dest_width) && (source_height == dest_height)) { + this->do_scale = 0; + + /* + * space for two y-lines (for yuv2rgb_mlib) + * u,v subsampled 2:1 + */ + this->y_buffer = my_malloc_aligned (16, 2*dest_width, &this->y_chunk); + if (!this->y_buffer) + return 0; + this->u_buffer = my_malloc_aligned (16, (dest_width+1)/2, &this->u_chunk); + if (!this->u_buffer) + return 0; + this->v_buffer = my_malloc_aligned (16, (dest_width+1)/2, &this->v_chunk); + if (!this->v_buffer) + return 0; + + } else { + this->do_scale = 1; + + /* + * space for two y-lines (for yuv2rgb_mlib) + * u,v subsampled 2:1 + */ + this->y_buffer = my_malloc_aligned (16, 2*dest_width, &this->y_chunk); + if (!this->y_buffer) + return 0; + this->u_buffer = my_malloc_aligned (16, (dest_width+1)/2, &this->u_chunk); + if (!this->u_buffer) + return 0; + this->v_buffer = my_malloc_aligned (16, (dest_width+1)/2, &this->v_chunk); + if (!this->v_buffer) + return 0; + } + return 1; +} + + +static void scale_line_gen (uint8_t *source, uint8_t *dest, + int width, int step) { + + /* + * scales a yuv source row to a dest row, with interpolation + * (good quality, but slow) + */ + int p1; + int p2; + int dx; + + xine_profiler_start_count(prof_scale_line); + + p1 = *source++; + p2 = *source++; + dx = 0; + + /* + * the following code has been optimized by Scott Smith : + * + * ok now I have a meaningful optimization for yuv2rgb.c:scale_line_gen. + * it removes the loop from within the while() loop by separating it out + * into 3 cases: where you are enlarging the line (<32768), where you are + * between 50% and 100% of the original line (<=65536), and where you are + * shrinking it by a lot. anyways, I went from 200 delivered / 100+ + * skipped to 200 delivered / 80 skipped for the enlarging case. I + * noticed when looking at the assembly that the compiler was able to + * unroll these while(width) loops, whereas before it was trying to + * unroll the while(dx>32768) loops. so the compiler is better able to + * deal with this code. + */ + + + if (step < 32768) { + while (width) { + *dest = p1 + (((p2-p1) * dx)>>15); + + dx += step; + if (dx > 32768) { + dx -= 32768; + p1 = p2; + p2 = *source++; + } + + dest ++; + width --; + } + } else if (step <= 65536) { + while (width) { + *dest = p1 + (((p2-p1) * dx)>>15); + + dx += step; + if (dx > 65536) { + dx -= 65536; + p1 = *source++; + p2 = *source++; + } else { + dx -= 32768; + p1 = p2; + p2 = *source++; + } + + dest ++; + width --; + } + } else { + while (width) { + int offs; + + *dest = p1 + (((p2-p1) * dx)>>15); + + dx += step; + offs=((dx-1)>>15); + dx-=offs<<15; + source+=offs-2; + p1=*source++; + p2=*source++; + dest ++; + width --; + } + } + xine_profiler_stop_count(prof_scale_line); + + + + +} + +/* + * Interpolates 16 output pixels from 15 source pixels using shifts. + * Useful for scaling a PAL mpeg2 dvd input source to 4:3 format on + * a monitor using square pixels. + * (720 x 576 ==> 768 x 576) + */ +static void scale_line_15_16 (uint8_t *source, uint8_t *dest, + int width, int step) { + + int p1, p2; + + xine_profiler_start_count(prof_scale_line); + + while ((width -= 16) >= 0) { + p1 = source[0]; + dest[0] = p1; + p2 = source[1]; + dest[1] = (1*p1 + 7*p2) >> 3; + p1 = source[2]; + dest[2] = (1*p2 + 7*p1) >> 3; + p2 = source[3]; + dest[3] = (1*p1 + 3*p2) >> 2; + p1 = source[4]; + dest[4] = (1*p2 + 3*p1) >> 2; + p2 = source[5]; + dest[5] = (3*p1 + 5*p2) >> 3; + p1 = source[6]; + dest[6] = (3*p2 + 5*p1) >> 3; + p2 = source[7]; + dest[7] = (1*p1 + 1*p1) >> 1; + p1 = source[8]; + dest[8] = (1*p2 + 1*p1) >> 1; + p2 = source[9]; + dest[9] = (5*p1 + 3*p2) >> 3; + p1 = source[10]; + dest[10] = (5*p2 + 3*p1) >> 3; + p2 = source[11]; + dest[11] = (3*p1 + 1*p2) >> 2; + p1 = source[12]; + dest[12] = (3*p2 + 1*p1) >> 2; + p2 = source[13]; + dest[13] = (7*p1 + 1*p2) >> 3; + p1 = source[14]; + dest[14] = (7*p2 + 1*p1) >> 3; + dest[15] = p1; + source += 15; + dest += 16; + } + + if ((width += 16) <= 0) goto done; + *dest++ = source[0]; + if (--width <= 0) goto done; + *dest++ = (1*source[0] + 7*source[1]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[1] + 7*source[2]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[2] + 3*source[3]) >> 2; + if (--width <= 0) goto done; + *dest++ = (1*source[3] + 3*source[4]) >> 2; + if (--width <= 0) goto done; + *dest++ = (3*source[4] + 5*source[5]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[5] + 5*source[6]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[6] + 1*source[7]) >> 1; + if (--width <= 0) goto done; + *dest++ = (1*source[7] + 1*source[8]) >> 1; + if (--width <= 0) goto done; + *dest++ = (5*source[8] + 3*source[9]) >> 3; + if (--width <= 0) goto done; + *dest++ = (5*source[9] + 3*source[10]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[10] + 1*source[11]) >> 2; + if (--width <= 0) goto done; + *dest++ = (3*source[11] + 1*source[12]) >> 2; + if (--width <= 0) goto done; + *dest++ = (7*source[12] + 1*source[13]) >> 3; + if (--width <= 0) goto done; + *dest++ = (7*source[13] + 1*source[14]) >> 3; + done: + xine_profiler_stop_count(prof_scale_line); +} + + +/* + * Interpolates 53 output pixels from 45 source pixels using shifts. + * Useful for scaling a NTSC mpeg2 dvd input source to 16:9 display + * resulution + * fullscreen resolution, or to 16:9 format on a monitor using square + * pixels. + * (720 x 480 ==> 848 x 480) + */ +static void scale_line_45_53 (uint8_t *source, uint8_t *dest, + int width, int step) { + + int p1, p2; + + xine_profiler_start_count(prof_scale_line); + + while ((width -= 53) >= 0) { + p1 = source[0]; + p2 = source[1]; + dest[0] = p1; + dest[1] = (1*p1 + 7*p2) >> 3; + p1 = source[2]; + dest[2] = (1*p2 + 3*p1) >> 2; + p2 = source[3]; + dest[3] = (1*p1 + 1*p2) >> 1; + p1 = source[4]; + dest[4] = (5*p2 + 3*p1) >> 3; + p2 = source[5]; + dest[5] = (3*p1 + 1*p2) >> 2; + p1 = source[6]; + dest[6] = (7*p2 + 1*p1) >> 3; + dest[7] = p1; + p2 = source[7]; + dest[8] = (1*p1 + 3*p2) >> 2; + p1 = source[8]; + dest[9] = (3*p2 + 5*p1) >> 3; + p2 = source[9]; + dest[10] = (1*p1 + 1*p2) >> 1; + p1 = source[10]; + dest[11] = (5*p2 + 3*p1) >> 3; + p2 = source[11]; + dest[12] = (3*p1 + 1*p2) >> 2; + p1 = source[12]; + dest[13] = p2; + dest[14] = (1*p2 + 7*p1) >> 3; + p2 = source[13]; + dest[15] = (1*p1 + 3*p2) >> 2; + p1 = source[14]; + dest[16] = (3*p2 + 5*p1) >> 3; + p2 = source[15]; + dest[17] = (5*p1 + 3*p2) >> 3; + p1 = source[16]; + dest[18] = (3*p2 + 1*p1) >> 2; + p2 = source[17]; + dest[19] = (7*p1 + 1*p2) >> 3; + dest[20] = p2; + p1 = source[18]; + dest[21] = (1*p2 + 7*p1) >> 3; + p2 = source[19]; + dest[22] = (3*p1 + 5*p2) >> 3; + p1 = source[20]; + dest[23] = (1*p2 + 1*p1) >> 1; + p2 = source[21]; + dest[24] = (5*p1 + 3*p2) >> 3; + p1 = source[22]; + dest[25] = (3*p2 + 1*p1) >> 2; + p2 = source[23]; + dest[26] = (7*p1 + 1*p2) >> 3; + dest[27] = (1*p1 + 7*p2) >> 3; + p1 = source[24]; + dest[28] = (1*p2 + 3*p1) >> 2; + p2 = source[25]; + dest[29] = (3*p1 + 5*p2) >> 3; + p1 = source[26]; + dest[30] = (1*p2 + 1*p1) >> 1; + p2 = source[27]; + dest[31] = (5*p1 + 3*p2) >> 3; + p1 = source[28]; + dest[32] = (7*p2 + 1*p1) >> 3; + p2 = source[29]; + dest[33] = p1; + dest[34] = (1*p1 + 7*p2) >> 3; + p1 = source[30]; + dest[35] = (1*p2 + 3*p1) >> 2; + p2 = source[31]; + dest[36] = (3*p1 + 5*p2) >> 3; + p1 = source[32]; + dest[37] = (5*p2 + 3*p1) >> 3; + p2 = source[33]; + dest[38] = (3*p1 + 1*p2) >> 2; + p1 = source[34]; + dest[39] = (7*p2 + 1*p1) >> 3; + dest[40] = p1; + p2 = source[35]; + dest[41] = (1*p1 + 3*p2) >> 2; + p1 = source[36]; + dest[42] = (3*p2 + 5*p1) >> 3; + p2 = source[37]; + dest[43] = (1*p1 + 1*p2) >> 1; + p1 = source[38]; + dest[44] = (5*p2 + 3*p1) >> 3; + p2 = source[39]; + dest[45] = (3*p1 + 1*p2) >> 2; + p1 = source[40]; + dest[46] = p2; + dest[47] = (1*p2 + 7*p1) >> 3; + p2 = source[41]; + dest[48] = (1*p1 + 3*p2) >> 2; + p1 = source[42]; + dest[49] = (3*p2 + 5*p1) >> 3; + p2 = source[43]; + dest[50] = (1*p1 + 1*p2) >> 1; + p1 = source[44]; + dest[51] = (3*p2 + 1*p1) >> 2; + p2 = source[45]; + dest[52] = (7*p1 + 1*p2) >> 3; + source += 45; + dest += 53; + } + + if ((width += 53) <= 0) goto done; + *dest++ = source[0]; + if (--width <= 0) goto done; + *dest++ = (1*source[0] + 7*source[1]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[1] + 3*source[2]) >> 2; + if (--width <= 0) goto done; + *dest++ = (1*source[2] + 1*source[3]) >> 1; + if (--width <= 0) goto done; + *dest++ = (5*source[3] + 3*source[4]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[4] + 1*source[5]) >> 2; + if (--width <= 0) goto done; + *dest++ = (7*source[5] + 1*source[6]) >> 3; + if (--width <= 0) goto done; + *dest++ = source[6]; + if (--width <= 0) goto done; + *dest++ = (1*source[6] + 3*source[7]) >> 2; + if (--width <= 0) goto done; + *dest++ = (3*source[7] + 5*source[8]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[8] + 1*source[9]) >> 1; + if (--width <= 0) goto done; + *dest++ = (5*source[9] + 3*source[10]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[10] + 1*source[11]) >> 2; + if (--width <= 0) goto done; + *dest++ = source[11]; + if (--width <= 0) goto done; + *dest++ = (1*source[11] + 7*source[12]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[12] + 3*source[13]) >> 2; + if (--width <= 0) goto done; + *dest++ = (3*source[13] + 5*source[14]) >> 3; + if (--width <= 0) goto done; + *dest++ = (5*source[14] + 3*source[15]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[15] + 1*source[16]) >> 2; + if (--width <= 0) goto done; + *dest++ = (7*source[16] + 1*source[17]) >> 3; + if (--width <= 0) goto done; + *dest++ = source[17]; + if (--width <= 0) goto done; + *dest++ = (1*source[17] + 7*source[18]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[18] + 5*source[19]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[19] + 1*source[20]) >> 1; + if (--width <= 0) goto done; + *dest++ = (5*source[20] + 3*source[21]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[21] + 1*source[22]) >> 2; + if (--width <= 0) goto done; + *dest++ = (7*source[22] + 1*source[23]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[22] + 7*source[23]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[23] + 3*source[24]) >> 2; + if (--width <= 0) goto done; + *dest++ = (3*source[24] + 5*source[25]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[25] + 1*source[26]) >> 1; + if (--width <= 0) goto done; + *dest++ = (5*source[26] + 3*source[27]) >> 3; + if (--width <= 0) goto done; + *dest++ = (7*source[27] + 1*source[28]) >> 3; + if (--width <= 0) goto done; + *dest++ = source[28]; + if (--width <= 0) goto done; + *dest++ = (1*source[28] + 7*source[29]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[29] + 3*source[30]) >> 2; + if (--width <= 0) goto done; + *dest++ = (3*source[30] + 5*source[31]) >> 3; + if (--width <= 0) goto done; + *dest++ = (5*source[31] + 3*source[32]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[32] + 1*source[33]) >> 2; + if (--width <= 0) goto done; + *dest++ = (7*source[33] + 1*source[34]) >> 3; + if (--width <= 0) goto done; + *dest++ = source[34]; + if (--width <= 0) goto done; + *dest++ = (1*source[34] + 3*source[35]) >> 2; + if (--width <= 0) goto done; + *dest++ = (3*source[35] + 5*source[36]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[36] + 1*source[37]) >> 1; + if (--width <= 0) goto done; + *dest++ = (5*source[37] + 3*source[38]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[38] + 1*source[39]) >> 2; + if (--width <= 0) goto done; + *dest++ = source[39]; + if (--width <= 0) goto done; + *dest++ = (1*source[39] + 7*source[40]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[40] + 3*source[41]) >> 2; + if (--width <= 0) goto done; + *dest++ = (3*source[41] + 5*source[42]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[42] + 1*source[43]) >> 1; + if (--width <= 0) goto done; + *dest++ = (3*source[43] + 1*source[44]) >> 2; + done: + + xine_profiler_stop_count(prof_scale_line); +} + + +/* + * Interpolates 64 output pixels from 45 source pixels using shifts. + * Useful for scaling a PAL mpeg2 dvd input source to 1024x768 + * fullscreen resolution, or to 16:9 format on a monitor using square + * pixels. + * (720 x 576 ==> 1024 x 576) + */ +static void scale_line_45_64 (uint8_t *source, uint8_t *dest, + int width, int step) { + + int p1, p2; + + xine_profiler_start_count(prof_scale_line); + + while ((width -= 64) >= 0) { + p1 = source[0]; + p2 = source[1]; + dest[0] = p1; + dest[1] = (1*p1 + 3*p2) >> 2; + p1 = source[2]; + dest[2] = (5*p2 + 3*p1) >> 3; + p2 = source[3]; + dest[3] = (7*p1 + 1*p2) >> 3; + dest[4] = (1*p1 + 3*p2) >> 2; + p1 = source[4]; + dest[5] = (1*p2 + 1*p1) >> 1; + p2 = source[5]; + dest[6] = (3*p1 + 1*p2) >> 2; + dest[7] = (1*p1 + 7*p2) >> 3; + p1 = source[6]; + dest[8] = (3*p2 + 5*p1) >> 3; + p2 = source[7]; + dest[9] = (5*p1 + 3*p2) >> 3; + p1 = source[8]; + dest[10] = p2; + dest[11] = (1*p2 + 3*p1) >> 2; + p2 = source[9]; + dest[12] = (5*p1 + 3*p2) >> 3; + p1 = source[10]; + dest[13] = (7*p2 + 1*p1) >> 3; + dest[14] = (1*p2 + 7*p1) >> 3; + p2 = source[11]; + dest[15] = (1*p1 + 1*p2) >> 1; + p1 = source[12]; + dest[16] = (3*p2 + 1*p1) >> 2; + dest[17] = p1; + p2 = source[13]; + dest[18] = (3*p1 + 5*p2) >> 3; + p1 = source[14]; + dest[19] = (5*p2 + 3*p1) >> 3; + p2 = source[15]; + dest[20] = p1; + dest[21] = (1*p1 + 3*p2) >> 2; + p1 = source[16]; + dest[22] = (1*p2 + 1*p1) >> 1; + p2 = source[17]; + dest[23] = (7*p1 + 1*p2) >> 3; + dest[24] = (1*p1 + 7*p2) >> 3; + p1 = source[18]; + dest[25] = (3*p2 + 5*p1) >> 3; + p2 = source[19]; + dest[26] = (3*p1 + 1*p2) >> 2; + dest[27] = p2; + p1 = source[20]; + dest[28] = (3*p2 + 5*p1) >> 3; + p2 = source[21]; + dest[29] = (5*p1 + 3*p2) >> 3; + p1 = source[22]; + dest[30] = (7*p2 + 1*p1) >> 3; + dest[31] = (1*p2 + 3*p1) >> 2; + p2 = source[23]; + dest[32] = (1*p1 + 1*p2) >> 1; + p1 = source[24]; + dest[33] = (3*p2 + 1*p1) >> 2; + dest[34] = (1*p2 + 7*p1) >> 3; + p2 = source[25]; + dest[35] = (3*p1 + 5*p2) >> 3; + p1 = source[26]; + dest[36] = (3*p2 + 1*p1) >> 2; + p2 = source[27]; + dest[37] = p1; + dest[38] = (1*p1 + 3*p2) >> 2; + p1 = source[28]; + dest[39] = (5*p2 + 3*p1) >> 3; + p2 = source[29]; + dest[40] = (7*p1 + 1*p2) >> 3; + dest[41] = (1*p1 + 7*p2) >> 3; + p1 = source[30]; + dest[42] = (1*p2 + 1*p1) >> 1; + p2 = source[31]; + dest[43] = (3*p1 + 1*p2) >> 2; + dest[44] = (1*p1 + 7*p2) >> 3; + p1 = source[32]; + dest[45] = (3*p2 + 5*p1) >> 3; + p2 = source[33]; + dest[46] = (5*p1 + 3*p2) >> 3; + p1 = source[34]; + dest[47] = p2; + dest[48] = (1*p2 + 3*p1) >> 2; + p2 = source[35]; + dest[49] = (1*p1 + 1*p2) >> 1; + p1 = source[36]; + dest[50] = (7*p2 + 1*p1) >> 3; + dest[51] = (1*p2 + 7*p1) >> 3; + p2 = source[37]; + dest[52] = (1*p1 + 1*p2) >> 1; + p1 = source[38]; + dest[53] = (3*p2 + 1*p1) >> 2; + dest[54] = p1; + p2 = source[39]; + dest[55] = (3*p1 + 5*p2) >> 3; + p1 = source[40]; + dest[56] = (5*p2 + 3*p1) >> 3; + p2 = source[41]; + dest[57] = (7*p1 + 1*p2) >> 3; + dest[58] = (1*p1 + 3*p2) >> 2; + p1 = source[42]; + dest[59] = (1*p2 + 1*p1) >> 1; + p2 = source[43]; + dest[60] = (7*p1 + 1*p2) >> 3; + dest[61] = (1*p1 + 7*p2) >> 3; + p1 = source[44]; + dest[62] = (3*p2 + 5*p1) >> 3; + p2 = source[45]; + dest[63] = (3*p1 + 1*p2) >> 2; + source += 45; + dest += 64; + } + + if ((width += 64) <= 0) goto done; + *dest++ = source[0]; + if (--width <= 0) goto done; + *dest++ = (1*source[0] + 3*source[1]) >> 2; + if (--width <= 0) goto done; + *dest++ = (5*source[1] + 3*source[2]) >> 3; + if (--width <= 0) goto done; + *dest++ = (7*source[2] + 1*source[3]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[2] + 3*source[3]) >> 2; + if (--width <= 0) goto done; + *dest++ = (1*source[3] + 1*source[4]) >> 1; + if (--width <= 0) goto done; + *dest++ = (3*source[4] + 1*source[5]) >> 2; + if (--width <= 0) goto done; + *dest++ = (1*source[4] + 7*source[5]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[5] + 5*source[6]) >> 3; + if (--width <= 0) goto done; + *dest++ = (5*source[6] + 3*source[7]) >> 3; + if (--width <= 0) goto done; + *dest++ = source[7]; + if (--width <= 0) goto done; + *dest++ = (1*source[7] + 3*source[8]) >> 2; + if (--width <= 0) goto done; + *dest++ = (5*source[8] + 3*source[9]) >> 3; + if (--width <= 0) goto done; + *dest++ = (7*source[9] + 1*source[10]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[9] + 7*source[10]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[10] + 1*source[11]) >> 1; + if (--width <= 0) goto done; + *dest++ = (3*source[11] + 1*source[12]) >> 2; + if (--width <= 0) goto done; + *dest++ = source[12]; + if (--width <= 0) goto done; + *dest++ = (3*source[12] + 5*source[13]) >> 3; + if (--width <= 0) goto done; + *dest++ = (5*source[13] + 3*source[14]) >> 3; + if (--width <= 0) goto done; + *dest++ = source[14]; + if (--width <= 0) goto done; + *dest++ = (1*source[14] + 3*source[15]) >> 2; + if (--width <= 0) goto done; + *dest++ = (1*source[15] + 1*source[16]) >> 1; + if (--width <= 0) goto done; + *dest++ = (7*source[16] + 1*source[17]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[16] + 7*source[17]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[17] + 5*source[18]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[18] + 1*source[19]) >> 2; + if (--width <= 0) goto done; + *dest++ = source[19]; + if (--width <= 0) goto done; + *dest++ = (3*source[19] + 5*source[20]) >> 3; + if (--width <= 0) goto done; + *dest++ = (5*source[20] + 3*source[21]) >> 3; + if (--width <= 0) goto done; + *dest++ = (7*source[21] + 1*source[22]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[21] + 3*source[22]) >> 2; + if (--width <= 0) goto done; + *dest++ = (1*source[22] + 1*source[23]) >> 1; + if (--width <= 0) goto done; + *dest++ = (3*source[23] + 1*source[24]) >> 2; + if (--width <= 0) goto done; + *dest++ = (1*source[23] + 7*source[24]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[24] + 5*source[25]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[25] + 1*source[26]) >> 2; + if (--width <= 0) goto done; + *dest++ = source[26]; + if (--width <= 0) goto done; + *dest++ = (1*source[26] + 3*source[27]) >> 2; + if (--width <= 0) goto done; + *dest++ = (5*source[27] + 3*source[28]) >> 3; + if (--width <= 0) goto done; + *dest++ = (7*source[28] + 1*source[29]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[28] + 7*source[29]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[29] + 1*source[30]) >> 1; + if (--width <= 0) goto done; + *dest++ = (3*source[30] + 1*source[31]) >> 2; + if (--width <= 0) goto done; + *dest++ = (1*source[30] + 7*source[31]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[31] + 5*source[32]) >> 3; + if (--width <= 0) goto done; + *dest++ = (5*source[32] + 3*source[33]) >> 3; + if (--width <= 0) goto done; + *dest++ = source[33]; + if (--width <= 0) goto done; + *dest++ = (1*source[33] + 3*source[34]) >> 2; + if (--width <= 0) goto done; + *dest++ = (1*source[34] + 1*source[35]) >> 1; + if (--width <= 0) goto done; + *dest++ = (7*source[35] + 1*source[36]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[35] + 7*source[36]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[36] + 1*source[37]) >> 1; + if (--width <= 0) goto done; + *dest++ = (3*source[37] + 1*source[38]) >> 2; + if (--width <= 0) goto done; + *dest++ = source[38]; + if (--width <= 0) goto done; + *dest++ = (3*source[38] + 5*source[39]) >> 3; + if (--width <= 0) goto done; + *dest++ = (5*source[39] + 3*source[40]) >> 3; + if (--width <= 0) goto done; + *dest++ = (7*source[40] + 1*source[41]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[40] + 3*source[41]) >> 2; + if (--width <= 0) goto done; + *dest++ = (1*source[41] + 1*source[42]) >> 1; + if (--width <= 0) goto done; + *dest++ = (7*source[42] + 1*source[43]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[42] + 7*source[43]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[43] + 5*source[44]) >> 3; + done: + + xine_profiler_stop_count(prof_scale_line); +} + + +/* + * Interpolates 16 output pixels from 9 source pixels using shifts. + * Useful for scaling a PAL mpeg2 dvd input source to 1280x1024 fullscreen + * (720 x 576 ==> 1280 x XXX) + */ +static void scale_line_9_16 (uint8_t *source, uint8_t *dest, + int width, int step) { + + int p1, p2; + + xine_profiler_start_count(prof_scale_line); + + while ((width -= 16) >= 0) { + p1 = source[0]; + p2 = source[1]; + dest[0] = p1; + dest[1] = (1*p1 + 1*p2) >> 1; + p1 = source[2]; + dest[2] = (7*p2 + 1*p1) >> 3; + dest[3] = (3*p2 + 5*p1) >> 3; + p2 = source[3]; + dest[4] = (3*p1 + 1*p2) >> 2; + dest[5] = (1*p1 + 3*p2) >> 2; + p1 = source[4]; + dest[6] = (5*p2 + 3*p1) >> 3; + dest[7] = (1*p2 + 7*p1) >> 3; + p2 = source[5]; + dest[8] = (1*p1 + 1*p2) >> 1; + p1 = source[6]; + dest[9] = p2; + dest[10] = (3*p2 + 5*p1) >> 3; + p2 = source[7]; + dest[11] = (7*p1 + 1*p2) >> 3; + dest[12] = (1*p1 + 3*p2) >> 2; + p1 = source[8]; + dest[13] = (3*p2 + 1*p1) >> 2; + dest[14] = (1*p2 + 7*p1) >> 3; + p2 = source[9]; + dest[15] = (5*p1 + 3*p2) >> 3; + source += 9; + dest += 16; + } + + if ((width += 16) <= 0) goto done; + *dest++ = source[0]; + if (--width <= 0) goto done; + *dest++ = (1*source[0] + 1*source[1]) >> 1; + if (--width <= 0) goto done; + *dest++ = (7*source[1] + 1*source[2]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[1] + 5*source[2]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[2] + 1*source[3]) >> 2; + if (--width <= 0) goto done; + *dest++ = (1*source[2] + 3*source[3]) >> 2; + if (--width <= 0) goto done; + *dest++ = (5*source[3] + 3*source[4]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[3] + 7*source[4]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[4] + 1*source[5]) >> 1; + if (--width <= 0) goto done; + *dest++ = source[5]; + if (--width <= 0) goto done; + *dest++ = (3*source[5] + 5*source[6]) >> 3; + if (--width <= 0) goto done; + *dest++ = (7*source[6] + 1*source[7]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[6] + 3*source[7]) >> 2; + if (--width <= 0) goto done; + *dest++ = (3*source[7] + 1*source[8]) >> 2; + if (--width <= 0) goto done; + *dest++ = (1*source[7] + 7*source[8]) >> 3; +done: + xine_profiler_stop_count(prof_scale_line); +} + + +/* + * Interpolates 12 output pixels from 11 source pixels using shifts. + * Useful for scaling a PAL vcd input source to 4:3 display format. + */ +static void scale_line_11_12 (uint8_t *source, uint8_t *dest, + int width, int step) { + + int p1, p2; + + xine_profiler_start_count(prof_scale_line); + + while ((width -= 12) >= 0) { + p1 = source[0]; + p2 = source[1]; + dest[0] = p1; + dest[1] = (1*p1 + 7*p2) >> 3; + p1 = source[2]; + dest[2] = (1*p2 + 7*p1) >> 3; + p2 = source[3]; + dest[3] = (1*p1 + 3*p2) >> 2; + p1 = source[4]; + dest[4] = (3*p2 + 5*p1) >> 3; + p2 = source[5]; + dest[5] = (3*p1 + 5*p2) >> 3; + p1 = source[6]; + dest[6] = (1*p2 + 1*p1) >> 1; + p2 = source[7]; + dest[7] = (5*p1 + 3*p2) >> 3; + p1 = source[8]; + dest[8] = (5*p2 + 3*p1) >> 3; + p2 = source[9]; + dest[9] = (3*p1 + 1*p2) >> 2; + p1 = source[10]; + dest[10] = (7*p2 + 1*p1) >> 3; + p2 = source[11]; + dest[11] = (7*p1 + 1*p2) >> 3; + source += 11; + dest += 12; + } + + if ((width += 12) <= 0) goto done; + *dest++ = source[0]; + if (--width <= 0) goto done; + *dest++ = (1*source[0] + 7*source[1]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[1] + 7*source[2]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[2] + 3*source[3]) >> 2; + if (--width <= 0) goto done; + *dest++ = (3*source[3] + 5*source[4]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[4] + 5*source[5]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[5] + 1*source[6]) >> 1; + if (--width <= 0) goto done; + *dest++ = (5*source[6] + 3*source[7]) >> 3; + if (--width <= 0) goto done; + *dest++ = (5*source[7] + 3*source[8]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[8] + 1*source[9]) >> 2; + if (--width <= 0) goto done; + *dest++ = (7*source[9] + 1*source[10]) >> 3; +done: + + xine_profiler_stop_count(prof_scale_line); +} + + +/* + * Interpolates 24 output pixels from 11 source pixels using shifts. + * Useful for scaling a PAL vcd input source to 4:3 display format + * at 2*zoom. + */ +static void scale_line_11_24 (uint8_t *source, uint8_t *dest, + int width, int step) { + + int p1, p2; + + xine_profiler_start_count(prof_scale_line); + + while ((width -= 24) >= 0) { + p1 = source[0]; + p2 = source[1]; + dest[0] = p1; + dest[1] = (1*p1 + 1*p2) >> 1; + dest[2] = (1*p1 + 7*p2) >> 3; + p1 = source[2]; + dest[3] = (5*p2 + 3*p1) >> 3; + dest[4] = (1*p2 + 7*p1) >> 3; + p2 = source[3]; + dest[5] = (3*p1 + 1*p2) >> 2; + dest[6] = (1*p1 + 3*p2) >> 2; + p1 = source[4]; + dest[7] = (3*p2 + 1*p1) >> 2; + dest[8] = (3*p2 + 5*p1) >> 3; + p2 = source[5]; + dest[9] = (7*p1 + 1*p2) >> 3; + dest[10] = (3*p1 + 5*p2) >> 3; + p1 = source[6]; + dest[11] = p2; + dest[12] = (1*p2 + 1*p1) >> 1; + dest[13] = p1; + p2 = source[7]; + dest[14] = (5*p1 + 3*p2) >> 3; + dest[15] = (1*p1 + 7*p2) >> 3; + p1 = source[8]; + dest[16] = (5*p2 + 3*p1) >> 3; + dest[17] = (1*p2 + 3*p1) >> 2; + p2 = source[9]; + dest[18] = (3*p1 + 1*p2) >> 2; + dest[19] = (1*p1 + 3*p2) >> 2; + p1 = source[10]; + dest[20] = (7*p2 + 1*p1) >> 3; + dest[21] = (3*p2 + 5*p1) >> 3; + p2 = source[11]; + dest[22] = (7*p1 + 1*p2) >> 3; + dest[23] = (1*p1 + 1*p2) >> 1; + source += 11; + dest += 24; + } + + if ((width += 24) <= 0) goto done; + *dest++ = source[0]; + if (--width <= 0) goto done; + *dest++ = (1*source[0] + 1*source[1]) >> 1; + if (--width <= 0) goto done; + *dest++ = (1*source[0] + 7*source[1]) >> 3; + if (--width <= 0) goto done; + *dest++ = (5*source[1] + 3*source[2]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[1] + 7*source[2]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[2] + 1*source[3]) >> 2; + if (--width <= 0) goto done; + *dest++ = (1*source[2] + 3*source[3]) >> 2; + if (--width <= 0) goto done; + *dest++ = (3*source[3] + 1*source[4]) >> 2; + if (--width <= 0) goto done; + *dest++ = (3*source[3] + 5*source[4]) >> 3; + if (--width <= 0) goto done; + *dest++ = (7*source[4] + 1*source[5]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[4] + 5*source[5]) >> 3; + if (--width <= 0) goto done; + *dest++ = source[5]; + if (--width <= 0) goto done; + *dest++ = (1*source[5] + 1*source[6]) >> 1; + if (--width <= 0) goto done; + *dest++ = source[6]; + if (--width <= 0) goto done; + *dest++ = (5*source[6] + 3*source[7]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[6] + 7*source[7]) >> 3; + if (--width <= 0) goto done; + *dest++ = (5*source[7] + 3*source[8]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[7] + 3*source[8]) >> 2; + if (--width <= 0) goto done; + *dest++ = (3*source[8] + 1*source[9]) >> 2; + if (--width <= 0) goto done; + *dest++ = (1*source[8] + 3*source[9]) >> 2; + if (--width <= 0) goto done; + *dest++ = (7*source[9] + 1*source[10]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[9] + 5*source[10]) >> 3; + if (--width <= 0) goto done; + *dest++ = (7*source[10] + 1*source[11]) >> 3; +done: + + xine_profiler_stop_count(prof_scale_line); +} + + +/* + * Interpolates 8 output pixels from 5 source pixels using shifts. + * Useful for scaling a PAL svcd input source to 4:3 display format. + */ +static void scale_line_5_8 (uint8_t *source, uint8_t *dest, + int width, int step) { + + int p1, p2; + + xine_profiler_start_count(prof_scale_line); + + while ((width -= 8) >= 0) { + p1 = source[0]; + p2 = source[1]; + dest[0] = p1; + dest[1] = (3*p1 + 5*p2) >> 3; + p1 = source[2]; + dest[2] = (3*p2 + 1*p1) >> 2; + dest[3] = (1*p2 + 7*p1) >> 3; + p2 = source[3]; + dest[4] = (1*p1 + 1*p2) >> 1; + p1 = source[4]; + dest[5] = (7*p2 + 1*p1) >> 3; + dest[6] = (1*p2 + 3*p1) >> 2; + p2 = source[5]; + dest[7] = (5*p1 + 3*p2) >> 3; + source += 5; + dest += 8; + } + + if ((width += 8) <= 0) goto done; + *dest++ = source[0]; + if (--width <= 0) goto done; + *dest++ = (3*source[0] + 5*source[1]) >> 3; + if (--width <= 0) goto done; + *dest++ = (3*source[1] + 1*source[2]) >> 2; + if (--width <= 0) goto done; + *dest++ = (1*source[1] + 7*source[2]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[2] + 1*source[3]) >> 1; + if (--width <= 0) goto done; + *dest++ = (7*source[3] + 1*source[4]) >> 3; + if (--width <= 0) goto done; + *dest++ = (1*source[3] + 3*source[4]) >> 2; +done: + + xine_profiler_stop_count(prof_scale_line); +} + + +/* + * Interpolates 4 output pixels from 3 source pixels using shifts. + * Useful for scaling a NTSC svcd input source to 4:3 display format. + */ +static void scale_line_3_4 (uint8_t *source, uint8_t *dest, + int width, int step) { + + int p1, p2; + + xine_profiler_start_count(prof_scale_line); + + while ((width -= 4) >= 0) { + p1 = source[0]; + p2 = source[1]; + dest[0] = p1; + dest[1] = (1*p1 + 3*p2) >> 2; + p1 = source[2]; + dest[2] = (1*p2 + 1*p1) >> 1; + p2 = source[3]; + dest[3] = (3*p1 + 1*p2) >> 2; + source += 3; + dest += 4; + } + + if ((width += 4) <= 0) goto done; + *dest++ = source[0]; + if (--width <= 0) goto done; + *dest++ = (1*source[0] + 3*source[1]) >> 2; + if (--width <= 0) goto done; + *dest++ = (1*source[1] + 1*source[2]) >> 1; +done: + + xine_profiler_stop_count(prof_scale_line); +} + + +/* Interpolate 2 output pixels from one source pixel. */ + +static void scale_line_1_2 (uint8_t *source, uint8_t *dest, + int width, int step) { + int p1, p2; + + xine_profiler_start_count(prof_scale_line); + + p1 = *source; + while ((width -= 4) >= 0) { + *dest++ = p1; + p2 = *++source; + *dest++ = (p1 + p2) >> 1; + *dest++ = p2; + p1 = *++source; + *dest++ = (p2 + p1) >> 1; + } + + if ((width += 4) <= 0) goto done; + *dest++ = source[0]; + if (--width <= 0) goto done; + *dest++ = (source[0] + source[1]) >> 1; + if (--width <= 0) goto done; + *dest++ = source[1]; + done: + + xine_profiler_stop_count(prof_scale_line); +} + + +/* + * Scale line with no horizontal scaling. For NTSC mpeg2 dvd input in + * 4:3 output format (720x480 -> 720x540) + */ +static void scale_line_1_1 (uint8_t *source, uint8_t *dest, + int width, int step) { + + xine_profiler_start_count(prof_scale_line); + xine_fast_memcpy(dest, source, width); + xine_profiler_stop_count(prof_scale_line); +} + + +static scale_line_func_t find_scale_line_func(int step) { + static struct { + int src_step; + int dest_step; + scale_line_func_t func; + char *desc; + } scale_line[] = { + { 15, 16, scale_line_15_16, "dvd 4:3(pal)" }, + { 45, 64, scale_line_45_64, "dvd 16:9(pal), fullscreen(1024x768)" }, + { 9, 16, scale_line_9_16, "dvd fullscreen(1280x1024)" }, + { 45, 53, scale_line_45_53, "dvd 16:9(ntsc)" }, + { 11, 12, scale_line_11_12, "vcd 4:3(pal)" }, + { 11, 24, scale_line_11_24, "vcd 4:3(pal) 2*zoom" }, + { 5, 8, scale_line_5_8, "svcd 4:3(pal)" }, + { 3, 4, scale_line_3_4, "svcd 4:3(ntsc)" }, + { 1, 2, scale_line_1_2, "2*zoom" }, + { 1, 1, scale_line_1_1, "non-scaled" }, + }; + int i; + + for (i = 0; i < sizeof(scale_line)/sizeof(scale_line[0]); i++) { + if (step == scale_line[i].src_step*32768/scale_line[i].dest_step) { + printf("yuv2rgb: using %s optimized scale_line\n", scale_line[i].desc); + return scale_line[i].func; + } + } + printf("yuv2rgb: using generic scale_line with interpolation\n"); + return scale_line_gen; + +} + + +static void scale_line_2 (uint8_t *source, uint8_t *dest, + int width, int step) { + int p1; + int p2; + int dx; + + p1 = *source; source+=2; + p2 = *source; source+=2; + dx = 0; + + while (width) { + + *dest = (p1 * (32768 - dx) + p2 * dx) / 32768; + + dx += step; + while (dx > 32768) { + dx -= 32768; + p1 = p2; + p2 = *source; + source+=2; + } + + dest ++; + width --; + } +} + +static void scale_line_4 (uint8_t *source, uint8_t *dest, + int width, int step) { + int p1; + int p2; + int dx; + + p1 = *source; source+=4; + p2 = *source; source+=4; + dx = 0; + + while (width) { + + *dest = (p1 * (32768 - dx) + p2 * dx) / 32768; + + dx += step; + while (dx > 32768) { + dx -= 32768; + p1 = p2; + p2 = *source; + source+=4; + } + + dest ++; + width --; + } +} + + +#define RGB(i) \ + U = pu[i]; \ + V = pv[i]; \ + r = this->table_rV[V]; \ + g = (void *) (((uint8_t *)this->table_gU[U]) + this->table_gV[V]); \ + b = this->table_bU[U]; + +#define DST1(i) \ + Y = py_1[2*i]; \ + dst_1[2*i] = r[Y] + g[Y] + b[Y]; \ + Y = py_1[2*i+1]; \ + dst_1[2*i+1] = r[Y] + g[Y] + b[Y]; + +#define DST2(i) \ + Y = py_2[2*i]; \ + dst_2[2*i] = r[Y] + g[Y] + b[Y]; \ + Y = py_2[2*i+1]; \ + dst_2[2*i+1] = r[Y] + g[Y] + b[Y]; + +#define DST1RGB(i) \ + Y = py_1[2*i]; \ + dst_1[6*i] = r[Y]; dst_1[6*i+1] = g[Y]; dst_1[6*i+2] = b[Y]; \ + Y = py_1[2*i+1]; \ + dst_1[6*i+3] = r[Y]; dst_1[6*i+4] = g[Y]; dst_1[6*i+5] = b[Y]; + +#define DST2RGB(i) \ + Y = py_2[2*i]; \ + dst_2[6*i] = r[Y]; dst_2[6*i+1] = g[Y]; dst_2[6*i+2] = b[Y]; \ + Y = py_2[2*i+1]; \ + dst_2[6*i+3] = r[Y]; dst_2[6*i+4] = g[Y]; dst_2[6*i+5] = b[Y]; + +#define DST1BGR(i) \ + Y = py_1[2*i]; \ + dst_1[6*i] = b[Y]; dst_1[6*i+1] = g[Y]; dst_1[6*i+2] = r[Y]; \ + Y = py_1[2*i+1]; \ + dst_1[6*i+3] = b[Y]; dst_1[6*i+4] = g[Y]; dst_1[6*i+5] = r[Y]; + +#define DST2BGR(i) \ + Y = py_2[2*i]; \ + dst_2[6*i] = b[Y]; dst_2[6*i+1] = g[Y]; dst_2[6*i+2] = r[Y]; \ + Y = py_2[2*i+1]; \ + dst_2[6*i+3] = b[Y]; dst_2[6*i+4] = g[Y]; dst_2[6*i+5] = r[Y]; + +#define DST1CMAP(i) \ + Y = py_1[2*i]; \ + dst_1[2*i] = this->cmap[r[Y] + g[Y] + b[Y]]; \ + Y = py_1[2*i+1]; \ + dst_1[2*i+1] = this->cmap[r[Y] + g[Y] + b[Y]]; + +#define DST2CMAP(i) \ + Y = py_2[2*i]; \ + dst_2[2*i] = this->cmap[r[Y] + g[Y] + b[Y]]; \ + Y = py_2[2*i+1]; \ + dst_2[2*i+1] = this->cmap[r[Y] + g[Y] + b[Y]]; + +static void yuv2rgb_c_32 (yuv2rgb_t *this, uint8_t * _dst, + uint8_t * _py, uint8_t * _pu, uint8_t * _pv) +{ + int U, V, Y; + uint8_t * py_1, * py_2, * pu, * pv; + uint32_t * r, * g, * b; + uint32_t * dst_1, * dst_2; + int width, height, dst_height; + int dy; + + if (this->do_scale) { + scale_line_func_t scale_line = this->scale_line; + + scale_line (_pu, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line (_pv, this->v_buffer, + this->dest_width >> 1, this->step_dx); + scale_line (_py, this->y_buffer, + this->dest_width, this->step_dx); + + dy = 0; + dst_height = this->dest_height; + + for (height = 0;; ) { + dst_1 = (uint32_t*)_dst; + py_1 = this->y_buffer; + pu = this->u_buffer; + pv = this->v_buffer; + + width = this->dest_width >> 3; + + do { + RGB(0); + DST1(0); + + RGB(1); + DST1(1); + + RGB(2); + DST1(2); + + RGB(3); + DST1(3); + + pu += 4; + pv += 4; + py_1 += 8; + dst_1 += 8; + } while (--width); + + dy += this->step_dy; + _dst += this->rgb_stride; + + while (--dst_height > 0 && dy < 32768) { + + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*4); + + dy += this->step_dy; + _dst += this->rgb_stride; + } + + if (dst_height <= 0) + break; + + do { + dy -= 32768; + _py += this->y_stride; + + scale_line (_py, this->y_buffer, + this->dest_width, this->step_dx); + + if (height & 1) { + _pu += this->uv_stride; + _pv += this->uv_stride; + + scale_line (_pu, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line (_pv, this->v_buffer, + this->dest_width >> 1, this->step_dx); + + } + height++; + } while( dy>=32768); + } + } else { + height = this->source_height >> 1; + do { + dst_1 = (uint32_t*)_dst; + dst_2 = (void*)( (uint8_t *)_dst + this->rgb_stride ); + py_1 = _py; + py_2 = _py + this->y_stride; + pu = _pu; + pv = _pv; + + width = this->source_width >> 3; + do { + RGB(0); + DST1(0); + DST2(0); + + RGB(1); + DST2(1); + DST1(1); + + RGB(2); + DST1(2); + DST2(2); + + RGB(3); + DST2(3); + DST1(3); + + pu += 4; + pv += 4; + py_1 += 8; + py_2 += 8; + dst_1 += 8; + dst_2 += 8; + } while (--width); + + _dst += 2 * this->rgb_stride; + _py += 2 * this->y_stride; + _pu += this->uv_stride; + _pv += this->uv_stride; + + } while (--height); + } +} + +/* This is very near from the yuv2rgb_c_32 code */ +static void yuv2rgb_c_24_rgb (yuv2rgb_t *this, uint8_t * _dst, + uint8_t * _py, uint8_t * _pu, uint8_t * _pv) +{ + int U, V, Y; + uint8_t * py_1, * py_2, * pu, * pv; + uint8_t * r, * g, * b; + uint8_t * dst_1, * dst_2; + int width, height, dst_height; + int dy; + + if (this->do_scale) { + + scale_line_func_t scale_line = this->scale_line; + + scale_line (_pu, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line (_pv, this->v_buffer, + this->dest_width >> 1, this->step_dx); + scale_line (_py, this->y_buffer, + this->dest_width, this->step_dx); + + dy = 0; + dst_height = this->dest_height; + + for (height = 0;; ) { + dst_1 = _dst; + py_1 = this->y_buffer; + pu = this->u_buffer; + pv = this->v_buffer; + + width = this->dest_width >> 3; + + do { + RGB(0); + DST1RGB(0); + + RGB(1); + DST1RGB(1); + + RGB(2); + DST1RGB(2); + + RGB(3); + DST1RGB(3); + + pu += 4; + pv += 4; + py_1 += 8; + dst_1 += 24; + } while (--width); + + dy += this->step_dy; + _dst += this->rgb_stride; + + while (--dst_height > 0 && dy < 32768) { + + xine_fast_memcpy (_dst, _dst-this->rgb_stride, this->dest_width*3); + + dy += this->step_dy; + _dst += this->rgb_stride; + } + + if (dst_height <= 0) + break; + + do { + dy -= 32768; + _py += this->y_stride; + + scale_line (_py, this->y_buffer, + this->dest_width, this->step_dx); + + if (height & 1) { + _pu += this->uv_stride; + _pv += this->uv_stride; + + scale_line (_pu, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line (_pv, this->v_buffer, + this->dest_width >> 1, this->step_dx); + + } + height++; + } while (dy>=32768); + } + } else { + height = this->source_height >> 1; + do { + dst_1 = _dst; + dst_2 = (void*)( (uint8_t *)_dst + this->rgb_stride ); + py_1 = _py; + py_2 = _py + this->y_stride; + pu = _pu; + pv = _pv; + + width = this->source_width >> 3; + do { + RGB(0); + DST1RGB(0); + DST2RGB(0); + + RGB(1); + DST2RGB(1); + DST1RGB(1); + + RGB(2); + DST1RGB(2); + DST2RGB(2); + + RGB(3); + DST2RGB(3); + DST1RGB(3); + + pu += 4; + pv += 4; + py_1 += 8; + py_2 += 8; + dst_1 += 24; + dst_2 += 24; + } while (--width); + + _dst += 2 * this->rgb_stride; + _py += 2 * this->y_stride; + _pu += this->uv_stride; + _pv += this->uv_stride; + + } while (--height); + } +} + +/* only trivial mods from yuv2rgb_c_24_rgb */ +static void yuv2rgb_c_24_bgr (yuv2rgb_t *this, uint8_t * _dst, + uint8_t * _py, uint8_t * _pu, uint8_t * _pv) +{ + int U, V, Y; + uint8_t * py_1, * py_2, * pu, * pv; + uint8_t * r, * g, * b; + uint8_t * dst_1, * dst_2; + int width, height, dst_height; + int dy; + + if (this->do_scale) { + + scale_line_func_t scale_line = this->scale_line; + + scale_line (_pu, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line (_pv, this->v_buffer, + this->dest_width >> 1, this->step_dx); + scale_line (_py, this->y_buffer, + this->dest_width, this->step_dx); + + dy = 0; + dst_height = this->dest_height; + + for (height = 0;; ) { + dst_1 = _dst; + py_1 = this->y_buffer; + pu = this->u_buffer; + pv = this->v_buffer; + + width = this->dest_width >> 3; + + do { + RGB(0); + DST1BGR(0); + + RGB(1); + DST1BGR(1); + + RGB(2); + DST1BGR(2); + + RGB(3); + DST1BGR(3); + + pu += 4; + pv += 4; + py_1 += 8; + dst_1 += 24; + } while (--width); + + dy += this->step_dy; + _dst += this->rgb_stride; + + while (--dst_height > 0 && dy < 32768) { + + xine_fast_memcpy (_dst, _dst-this->rgb_stride, this->dest_width*3); + + dy += this->step_dy; + _dst += this->rgb_stride; + } + + if (dst_height <= 0) + break; + + do { + dy -= 32768; + _py += this->y_stride; + + scale_line (_py, this->y_buffer, + this->dest_width, this->step_dx); + + if (height & 1) { + _pu += this->uv_stride; + _pv += this->uv_stride; + + scale_line (_pu, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line (_pv, this->v_buffer, + this->dest_width >> 1, this->step_dx); + + } + height++; + } while( dy>=32768 ); + } + + } else { + height = this->source_height >> 1; + do { + dst_1 = _dst; + dst_2 = (void*)( (uint8_t *)_dst + this->rgb_stride ); + py_1 = _py; + py_2 = _py + this->y_stride; + pu = _pu; + pv = _pv; + width = this->source_width >> 3; + do { + RGB(0); + DST1BGR(0); + DST2BGR(0); + + RGB(1); + DST2BGR(1); + DST1BGR(1); + + RGB(2); + DST1BGR(2); + DST2BGR(2); + + RGB(3); + DST2BGR(3); + DST1BGR(3); + + pu += 4; + pv += 4; + py_1 += 8; + py_2 += 8; + dst_1 += 24; + dst_2 += 24; + } while (--width); + + _dst += 2 * this->rgb_stride; + _py += 2 * this->y_stride; + _pu += this->uv_stride; + _pv += this->uv_stride; + + } while (--height); + } +} + +/* This is exactly the same code as yuv2rgb_c_32 except for the types of */ +/* r, g, b, dst_1, dst_2 */ +static void yuv2rgb_c_16 (yuv2rgb_t *this, uint8_t * _dst, + uint8_t * _py, uint8_t * _pu, uint8_t * _pv) +{ + int U, V, Y; + uint8_t * py_1, * py_2, * pu, * pv; + uint16_t * r, * g, * b; + uint16_t * dst_1, * dst_2; + int width, height, dst_height; + int dy; + + if (this->do_scale) { + scale_line_func_t scale_line = this->scale_line; + + scale_line (_pu, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line (_pv, this->v_buffer, + this->dest_width >> 1, this->step_dx); + scale_line (_py, this->y_buffer, + this->dest_width, this->step_dx); + + dy = 0; + dst_height = this->dest_height; + + for (height = 0;; ) { + dst_1 = (uint16_t*)_dst; + py_1 = this->y_buffer; + pu = this->u_buffer; + pv = this->v_buffer; + + width = this->dest_width >> 3; + + do { + RGB(0); + DST1(0); + + RGB(1); + DST1(1); + + RGB(2); + DST1(2); + + RGB(3); + DST1(3); + + pu += 4; + pv += 4; + py_1 += 8; + dst_1 += 8; + } while (--width); + + dy += this->step_dy; + _dst += this->rgb_stride; + + while (--dst_height > 0 && dy < 32768) { + + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2); + + dy += this->step_dy; + _dst += this->rgb_stride; + } + + if (dst_height <= 0) + break; + + do { + dy -= 32768; + _py += this->y_stride; + + scale_line (_py, this->y_buffer, + this->dest_width, this->step_dx); + + if (height & 1) { + _pu += this->uv_stride; + _pv += this->uv_stride; + + scale_line (_pu, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line (_pv, this->v_buffer, + this->dest_width >> 1, this->step_dx); + + } + height++; + } while( dy>=32768); + } + } else { + height = this->source_height >> 1; + do { + dst_1 = (uint16_t*)_dst; + dst_2 = (void*)( (uint8_t *)_dst + this->rgb_stride ); + py_1 = _py; + py_2 = _py + this->y_stride; + pu = _pu; + pv = _pv; + width = this->source_width >> 3; + do { + RGB(0); + DST1(0); + DST2(0); + + RGB(1); + DST2(1); + DST1(1); + + RGB(2); + DST1(2); + DST2(2); + + RGB(3); + DST2(3); + DST1(3); + + pu += 4; + pv += 4; + py_1 += 8; + py_2 += 8; + dst_1 += 8; + dst_2 += 8; + } while (--width); + + _dst += 2 * this->rgb_stride; + _py += 2 * this->y_stride; + _pu += this->uv_stride; + _pv += this->uv_stride; + + } while (--height); + } +} + +/* This is exactly the same code as yuv2rgb_c_32 except for the types of */ +/* r, g, b, dst_1, dst_2 */ +static void yuv2rgb_c_8 (yuv2rgb_t *this, uint8_t * _dst, + uint8_t * _py, uint8_t * _pu, uint8_t * _pv) +{ + int U, V, Y; + uint8_t * py_1, * py_2, * pu, * pv; + uint8_t * r, * g, * b; + uint8_t * dst_1, * dst_2; + int width, height, dst_height; + int dy; + + if (this->do_scale) { + scale_line_func_t scale_line = this->scale_line; + + scale_line (_pu, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line (_pv, this->v_buffer, + this->dest_width >> 1, this->step_dx); + scale_line (_py, this->y_buffer, + this->dest_width, this->step_dx); + + dy = 0; + dst_height = this->dest_height; + + for (height = 0;; ) { + dst_1 = (uint8_t*)_dst; + py_1 = this->y_buffer; + pu = this->u_buffer; + pv = this->v_buffer; + + width = this->dest_width >> 3; + + do { + RGB(0); + DST1(0); + + RGB(1); + DST1(1); + + RGB(2); + DST1(2); + + RGB(3); + DST1(3); + + pu += 4; + pv += 4; + py_1 += 8; + dst_1 += 8; + } while (--width); + + dy += this->step_dy; + _dst += this->rgb_stride; + + while (--dst_height > 0 && dy < 32768) { + + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width); + + dy += this->step_dy; + _dst += this->rgb_stride; + } + + if (dst_height <= 0) + break; + + do { + dy -= 32768; + _py += this->y_stride; + + scale_line (_py, this->y_buffer, + this->dest_width, this->step_dx); + + if (height & 1) { + _pu += this->uv_stride; + _pv += this->uv_stride; + + scale_line (_pu, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line (_pv, this->v_buffer, + this->dest_width >> 1, this->step_dx); + + } + height++; + } while( dy>=32768 ); + } + } else { + height = this->source_height >> 1; + do { + dst_1 = (uint8_t*)_dst; + dst_2 = (void*)( (uint8_t *)_dst + this->rgb_stride ); + py_1 = _py; + py_2 = _py + this->y_stride; + pu = _pu; + pv = _pv; + + width = this->source_width >> 3; + do { + RGB(0); + DST1(0); + DST2(0); + + RGB(1); + DST2(1); + DST1(1); + + RGB(2); + DST1(2); + DST2(2); + + RGB(3); + DST2(3); + DST1(3); + + pu += 4; + pv += 4; + py_1 += 8; + py_2 += 8; + dst_1 += 8; + dst_2 += 8; + } while (--width); + + _dst += 2 * this->rgb_stride; + _py += 2 * this->y_stride; + _pu += this->uv_stride; + _pv += this->uv_stride; + + } while (--height); + } +} + +/* now for something different: 256 grayscale mode */ +static void yuv2rgb_c_gray (yuv2rgb_t *this, uint8_t * _dst, + uint8_t * _py, uint8_t * _pu, uint8_t * _pv) +{ + int height, dst_height; + int dy; + + if (this->do_scale) { + scale_line_func_t scale_line = this->scale_line; + + dy = 0; + dst_height = this->dest_height; + + for (;;) { + scale_line (_py, _dst, this->dest_width, this->step_dx); + + dy += this->step_dy; + _dst += this->rgb_stride; + + while (--dst_height > 0 && dy < 32768) { + + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width); + + dy += this->step_dy; + _dst += this->rgb_stride; + } + + if (dst_height <= 0) + break; + + _py += this->y_stride*(dy>>15); + dy &= 32767; + /* dy -= 32768; + _py += this->y_stride; + */ + } + } else { + for (height = this->source_height; --height >= 0; ) { + xine_fast_memcpy(_dst, _py, this->dest_width); + _dst += this->rgb_stride; + _py += this->y_stride; + } + } +} + +/* now for something different: 256 color mode */ +static void yuv2rgb_c_palette (yuv2rgb_t *this, uint8_t * _dst, + uint8_t * _py, uint8_t * _pu, uint8_t * _pv) +{ + int U, V, Y; + uint8_t * py_1, * py_2, * pu, * pv; + uint16_t * r, * g, * b; + uint8_t * dst_1, * dst_2; + int width, height, dst_height; + int dy; + + if (this->do_scale) { + scale_line_func_t scale_line = this->scale_line; + + scale_line (_pu, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line (_pv, this->v_buffer, + this->dest_width >> 1, this->step_dx); + scale_line (_py, this->y_buffer, + this->dest_width, this->step_dx); + + dy = 0; + dst_height = this->dest_height; + + for (height = 0;; ) { + dst_1 = _dst; + py_1 = this->y_buffer; + pu = this->u_buffer; + pv = this->v_buffer; + + width = this->dest_width >> 3; + + do { + RGB(0); + DST1CMAP(0); + + RGB(1); + DST1CMAP(1); + + RGB(2); + DST1CMAP(2); + + RGB(3); + DST1CMAP(3); + + pu += 4; + pv += 4; + py_1 += 8; + dst_1 += 8; + } while (--width); + + dy += this->step_dy; + _dst += this->rgb_stride; + + while (--dst_height > 0 && dy < 32768) { + + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width); + + dy += this->step_dy; + _dst += this->rgb_stride; + } + + if (dst_height <= 0) + break; + + do { + dy -= 32768; + _py += this->y_stride; + + scale_line (_py, this->y_buffer, + this->dest_width, this->step_dx); + + if (height & 1) { + _pu += this->uv_stride; + _pv += this->uv_stride; + + scale_line (_pu, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line (_pv, this->v_buffer, + this->dest_width >> 1, this->step_dx); + + } + height++; + } while( dy>=32768 ); + } + } else { + height = this->source_height >> 1; + do { + dst_1 = _dst; + dst_2 = _dst + this->rgb_stride; + py_1 = _py; + py_2 = _py + this->y_stride; + pu = _pu; + pv = _pv; + width = this->source_width >> 3; + do { + RGB(0); + DST1CMAP(0); + DST2CMAP(0); + + RGB(1); + DST2CMAP(1); + DST1CMAP(1); + + RGB(2); + DST1CMAP(2); + DST2CMAP(2); + + RGB(3); + DST2CMAP(3); + DST1CMAP(3); + + pu += 4; + pv += 4; + py_1 += 8; + py_2 += 8; + dst_1 += 8; + dst_2 += 8; + } while (--width); + + _dst += 2 * this->rgb_stride; + _py += 2 * this->y_stride; + _pu += this->uv_stride; + _pv += this->uv_stride; + + } while (--height); + } +} + +static int div_round (int dividend, int divisor) +{ + if (dividend > 0) + return (dividend + (divisor>>1)) / divisor; + else + return -((-dividend + (divisor>>1)) / divisor); +} + +static void yuv2rgb_setup_tables (yuv2rgb_factory_t *this, int mode, int swapped) +{ + int i; + uint8_t table_Y[1024]; + uint32_t * table_32 = 0; + uint16_t * table_16 = 0; + uint8_t * table_8 = 0; + int entry_size = 0; + void *table_r = 0, *table_g = 0, *table_b = 0; + int shift_r = 0, shift_g = 0, shift_b = 0; + + int crv = Inverse_Table_6_9[this->matrix_coefficients][0]; + int cbu = Inverse_Table_6_9[this->matrix_coefficients][1]; + int cgu = -Inverse_Table_6_9[this->matrix_coefficients][2]; + int cgv = -Inverse_Table_6_9[this->matrix_coefficients][3]; + + for (i = 0; i < 1024; i++) { + int j; + + j = (76309 * (i - 384 - 16) + 32768) >> 16; + j = (j < 0) ? 0 : ((j > 255) ? 255 : j); + table_Y[i] = j; + } + + switch (mode) { + case MODE_32_RGB: + case MODE_32_BGR: + table_32 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint32_t)); + + entry_size = sizeof (uint32_t); + table_r = table_32 + 197; + table_b = table_32 + 197 + 685; + table_g = table_32 + 197 + 2*682; + + if (swapped) { + switch (mode) { + case MODE_32_RGB: shift_r = 8; shift_g = 16; shift_b = 24; break; + case MODE_32_BGR: shift_r = 24; shift_g = 16; shift_b = 8; break; + } + } else { + switch (mode) { + case MODE_32_RGB: shift_r = 16; shift_g = 8; shift_b = 0; break; + case MODE_32_BGR: shift_r = 0; shift_g = 8; shift_b = 16; break; + } + } + + for (i = -197; i < 256+197; i++) + ((uint32_t *) table_r)[i] = table_Y[i+384] << shift_r; + for (i = -132; i < 256+132; i++) + ((uint32_t *) table_g)[i] = table_Y[i+384] << shift_g; + for (i = -232; i < 256+232; i++) + ((uint32_t *) table_b)[i] = table_Y[i+384] << shift_b; + break; + + case MODE_24_RGB: + case MODE_24_BGR: + table_8 = malloc ((256 + 2*232) * sizeof (uint8_t)); + + entry_size = sizeof (uint8_t); + table_r = table_g = table_b = table_8 + 232; + + for (i = -232; i < 256+232; i++) + ((uint8_t * )table_b)[i] = table_Y[i+384]; + break; + + case MODE_15_BGR: + case MODE_16_BGR: + case MODE_15_RGB: + case MODE_16_RGB: + table_16 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint16_t)); + + entry_size = sizeof (uint16_t); + table_r = table_16 + 197; + table_b = table_16 + 197 + 685; + table_g = table_16 + 197 + 2*682; + + if (swapped) { + switch (mode) { + case MODE_15_BGR: shift_r = 8; shift_g = 5; shift_b = 2; break; + case MODE_16_BGR: shift_r = 8; shift_g = 5; shift_b = 3; break; + case MODE_15_RGB: shift_r = 2; shift_g = 5; shift_b = 8; break; + case MODE_16_RGB: shift_r = 3; shift_g = 5; shift_b = 8; break; + } + } else { + switch (mode) { + case MODE_15_BGR: shift_r = 0; shift_g = 5; shift_b = 10; break; + case MODE_16_BGR: shift_r = 0; shift_g = 5; shift_b = 11; break; + case MODE_15_RGB: shift_r = 10; shift_g = 5; shift_b = 0; break; + case MODE_16_RGB: shift_r = 11; shift_g = 5; shift_b = 0; break; + } + } + + for (i = -197; i < 256+197; i++) + ((uint16_t *)table_r)[i] = (table_Y[i+384] >> 3) << shift_r; + + for (i = -132; i < 256+132; i++) { + int j = table_Y[i+384] >> (((mode==MODE_16_RGB) || (mode==MODE_16_BGR)) ? 2 : 3); + if (swapped) + ((uint16_t *)table_g)[i] = (j&7) << 13 | (j>>3); + else + ((uint16_t *)table_g)[i] = j << 5; + } + for (i = -232; i < 256+232; i++) + ((uint16_t *)table_b)[i] = (table_Y[i+384] >> 3) << shift_b; + + break; + + case MODE_8_RGB: + case MODE_8_BGR: + table_8 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint8_t)); + + entry_size = sizeof (uint8_t); + table_r = table_8 + 197; + table_b = table_8 + 197 + 685; + table_g = table_8 + 197 + 2*682; + + switch (mode) { + case MODE_8_RGB: shift_r = 5; shift_g = 2; shift_b = 0; break; + case MODE_8_BGR: shift_r = 0; shift_g = 3; shift_b = 6; break; + } + + for (i = -197; i < 256+197; i++) + ((uint8_t *) table_r)[i] = (table_Y[i+384] >> 5) << shift_r; + for (i = -132; i < 256+132; i++) + ((uint8_t *) table_g)[i] = (table_Y[i+384] >> 5) << shift_g; + for (i = -232; i < 256+232; i++) + ((uint8_t *) table_b)[i] = (table_Y[i+384] >> 6) << shift_b; + break; + + case MODE_8_GRAY: + return; + + case MODE_PALETTE: + table_16 = malloc ((197 + 2*682 + 256 + 132) * sizeof (uint16_t)); + + entry_size = sizeof (uint16_t); + table_r = table_16 + 197; + table_b = table_16 + 197 + 685; + table_g = table_16 + 197 + 2*682; + + shift_r = 10; + shift_g = 5; + shift_b = 0; + + for (i = -197; i < 256+197; i++) + ((uint16_t *)table_r)[i] = (table_Y[i+384] >> 3) << 10; + + for (i = -132; i < 256+132; i++) + ((uint16_t *)table_g)[i] = (table_Y[i+384] >> 3) << 5; + + for (i = -232; i < 256+232; i++) + ((uint16_t *)table_b)[i] = (table_Y[i+384] >> 3) << 0; + + break; + + + default: + fprintf (stderr, "mode %d not supported by yuv2rgb\n", mode); + abort(); + } + + for (i = 0; i < 256; i++) { + this->table_rV[i] = (((uint8_t *) table_r) + + entry_size * div_round (crv * (i-128), 76309)); + this->table_gU[i] = (((uint8_t *) table_g) + + entry_size * div_round (cgu * (i-128), 76309)); + this->table_gV[i] = entry_size * div_round (cgv * (i-128), 76309); + this->table_bU[i] = (((uint8_t *)table_b) + + entry_size * div_round (cbu * (i-128), 76309)); + } + this->gamma = 0; + this->entry_size = entry_size; +} + +static uint32_t yuv2rgb_single_pixel_32 (yuv2rgb_t *this, uint8_t y, uint8_t u, uint8_t v) +{ + uint32_t * r, * g, * b; + + r = this->table_rV[v]; + g = (void *) (((uint8_t *)this->table_gU[u]) + this->table_gV[v]); + b = this->table_bU[u]; + + return r[y] + g[y] + b[y]; +} + +static uint32_t yuv2rgb_single_pixel_24_rgb (yuv2rgb_t *this, uint8_t y, uint8_t u, uint8_t v) +{ + uint8_t * r, * g, * b; + + r = this->table_rV[v]; + g = (void *) (((uint8_t *)this->table_gU[u]) + this->table_gV[v]); + b = this->table_bU[u]; + + return (uint32_t) r[y] + + ((uint32_t) g[y] << 8) + + ((uint32_t) b[y] << 16); +} + +static uint32_t yuv2rgb_single_pixel_24_bgr (yuv2rgb_t *this, uint8_t y, uint8_t u, uint8_t v) +{ + uint8_t * r, * g, * b; + + r = this->table_rV[v]; + g = (void *) (((uint8_t *)this->table_gU[u]) + this->table_gV[v]); + b = this->table_bU[u]; + + return (uint32_t) b[y] + + ((uint32_t) g[y] << 8) + + ((uint32_t) r[y] << 16); +} + +static uint32_t yuv2rgb_single_pixel_16 (yuv2rgb_t *this, uint8_t y, uint8_t u, uint8_t v) +{ + uint16_t * r, * g, * b; + + r = this->table_rV[v]; + g = (void *) (((uint8_t *)this->table_gU[u]) + this->table_gV[v]); + b = this->table_bU[u]; + + return r[y] + g[y] + b[y]; +} + +static uint32_t yuv2rgb_single_pixel_8 (yuv2rgb_t *this, uint8_t y, uint8_t u, uint8_t v) +{ + uint8_t * r, * g, * b; + + r = this->table_rV[v]; + g = (void *) (((uint8_t *)this->table_gU[u]) + this->table_gV[v]); + b = this->table_bU[u]; + + return r[y] + g[y] + b[y]; +} + +static uint32_t yuv2rgb_single_pixel_gray (yuv2rgb_t *this, uint8_t y, uint8_t u, uint8_t v) +{ + return y; +} + +static uint32_t yuv2rgb_single_pixel_palette (yuv2rgb_t *this, uint8_t y, uint8_t u, uint8_t v) +{ + uint16_t * r, * g, * b; + + r = this->table_rV[v]; + g = (void *) (((uint8_t *)this->table_gU[u]) + this->table_gV[v]); + b = this->table_bU[u]; + + return this->cmap[r[y] + g[y] + b[y]]; +} + + +static void yuv2rgb_c_init (yuv2rgb_factory_t *this) +{ + switch (this->mode) { + case MODE_32_RGB: + case MODE_32_BGR: + this->yuv2rgb_fun = yuv2rgb_c_32; + break; + + case MODE_24_RGB: + case MODE_24_BGR: + this->yuv2rgb_fun = + (this->mode==MODE_24_RGB && !this->swapped) || (this->mode==MODE_24_BGR && this->swapped) + ? yuv2rgb_c_24_rgb + : yuv2rgb_c_24_bgr; + break; + + case MODE_15_BGR: + case MODE_16_BGR: + case MODE_15_RGB: + case MODE_16_RGB: + this->yuv2rgb_fun = yuv2rgb_c_16; + break; + + case MODE_8_RGB: + case MODE_8_BGR: + this->yuv2rgb_fun = yuv2rgb_c_8; + break; + + case MODE_8_GRAY: + this->yuv2rgb_fun = yuv2rgb_c_gray; + break; + + case MODE_PALETTE: + this->yuv2rgb_fun = yuv2rgb_c_palette; + break; + + default: + printf ("yuv2rgb: mode %d not supported by yuv2rgb\n", this->mode); + abort(); + } + +} + +static void yuv2rgb_single_pixel_init (yuv2rgb_factory_t *this) { + + switch (this->mode) { + case MODE_32_RGB: + case MODE_32_BGR: + this->yuv2rgb_single_pixel_fun = yuv2rgb_single_pixel_32; + break; + + case MODE_24_RGB: + case MODE_24_BGR: + this->yuv2rgb_single_pixel_fun = + (this->mode==MODE_24_RGB && !this->swapped) || (this->mode==MODE_24_BGR && this->swapped) + ? yuv2rgb_single_pixel_24_rgb + : yuv2rgb_single_pixel_24_bgr; + break; + + case MODE_15_BGR: + case MODE_16_BGR: + case MODE_15_RGB: + case MODE_16_RGB: + this->yuv2rgb_single_pixel_fun = yuv2rgb_single_pixel_16; + break; + + case MODE_8_RGB: + case MODE_8_BGR: + this->yuv2rgb_single_pixel_fun = yuv2rgb_single_pixel_8; + break; + + case MODE_8_GRAY: + this->yuv2rgb_single_pixel_fun = yuv2rgb_single_pixel_gray; + break; + + case MODE_PALETTE: + this->yuv2rgb_single_pixel_fun = yuv2rgb_single_pixel_palette; + break; + + default: + printf ("yuv2rgb: mode %d not supported by yuv2rgb\n", this->mode); + abort(); + } +} + + +/* + * yuy2 stuff + */ + +static void yuy22rgb_c_32 (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p) +{ + int U, V, Y; + uint8_t * py_1, * pu, * pv; + uint32_t * r, * g, * b; + uint32_t * dst_1; + int width, height; + int dy; + + /* FIXME: implement unscaled version */ + + scale_line_4 (_p+1, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_4 (_p+3, this->v_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_2 (_p, this->y_buffer, + this->dest_width, this->step_dx); + + dy = 0; + height = this->dest_height; + + for (;;) { + dst_1 = (uint32_t*)_dst; + py_1 = this->y_buffer; + pu = this->u_buffer; + pv = this->v_buffer; + + width = this->dest_width >> 3; + + do { + + RGB(0); + DST1(0); + + RGB(1); + DST1(1); + + RGB(2); + DST1(2); + + RGB(3); + DST1(3); + + pu += 4; + pv += 4; + py_1 += 8; + dst_1 += 8; + } while (--width); + + dy += this->step_dy; + _dst += this->rgb_stride; + + while (--height > 0 && dy < 32768) { + + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*4); + + dy += this->step_dy; + _dst += this->rgb_stride; + } + + if (height <= 0) + break; + + _p += this->y_stride*2*(dy>>15); + dy &= 32767; + /* + dy -= 32768; + _p += this->y_stride*2; + */ + + scale_line_4 (_p+1, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_4 (_p+3, this->v_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_2 (_p, this->y_buffer, + this->dest_width, this->step_dx); + } +} + +static void yuy22rgb_c_24_rgb (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p) +{ + int U, V, Y; + uint8_t * py_1, * pu, * pv; + uint8_t * r, * g, * b; + uint8_t * dst_1; + int width, height; + int dy; + + /* FIXME: implement unscaled version */ + + scale_line_4 (_p+1, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_4 (_p+3, this->v_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_2 (_p, this->y_buffer, + this->dest_width, this->step_dx); + + dy = 0; + height = this->dest_height; + + for (;;) { + dst_1 = _dst; + py_1 = this->y_buffer; + pu = this->u_buffer; + pv = this->v_buffer; + + width = this->dest_width >> 3; + + do { + RGB(0); + DST1RGB(0); + + RGB(1); + DST1RGB(1); + + RGB(2); + DST1RGB(2); + + RGB(3); + DST1RGB(3); + + pu += 4; + pv += 4; + py_1 += 8; + dst_1 += 24; + } while (--width); + + dy += this->step_dy; + _dst += this->rgb_stride; + + while (--height > 0 && dy < 32768) { + + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*3); + + dy += this->step_dy; + _dst += this->rgb_stride; + } + + if (height <= 0) + break; + + _p += this->y_stride*2*(dy>>15); + dy &= 32767; + /* + dy -= 32768; + _p += this->y_stride*2; + */ + + scale_line_4 (_p+1, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_4 (_p+3, this->v_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_2 (_p, this->y_buffer, + this->dest_width, this->step_dx); + } +} + +static void yuy22rgb_c_24_bgr (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p) +{ + int U, V, Y; + uint8_t * py_1, * pu, * pv; + uint8_t * r, * g, * b; + uint8_t * dst_1; + int width, height; + int dy; + + /* FIXME: implement unscaled version */ + + scale_line_4 (_p+1, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_4 (_p+3, this->v_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_2 (_p, this->y_buffer, + this->dest_width, this->step_dx); + + dy = 0; + height = this->dest_height; + + for (;;) { + dst_1 = _dst; + py_1 = this->y_buffer; + pu = this->u_buffer; + pv = this->v_buffer; + + width = this->dest_width >> 3; + + do { + RGB(0); + DST1BGR(0); + + RGB(1); + DST1BGR(1); + + RGB(2); + DST1BGR(2); + + RGB(3); + DST1BGR(3); + + pu += 4; + pv += 4; + py_1 += 8; + dst_1 += 24; + } while (--width); + + dy += this->step_dy; + _dst += this->rgb_stride; + + while (--height > 0 && dy < 32768) { + + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*3); + + dy += this->step_dy; + _dst += this->rgb_stride; + } + + if (height <= 0) + break; + + _p += this->y_stride*2*(dy>>15); + dy &= 32767; + + scale_line_4 (_p+1, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_4 (_p+3, this->v_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_2 (_p, this->y_buffer, + this->dest_width, this->step_dx); + } +} + +static void yuy22rgb_c_16 (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p) +{ + int U, V, Y; + uint8_t * py_1, * pu, * pv; + uint16_t * r, * g, * b; + uint16_t * dst_1; + int width, height; + int dy; + + /* FIXME: implement unscaled version */ + + scale_line_4 (_p+1, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_4 (_p+3, this->v_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_2 (_p, this->y_buffer, + this->dest_width, this->step_dx); + + dy = 0; + height = this->dest_height; + + for (;;) { + dst_1 = (uint16_t*)_dst; + py_1 = this->y_buffer; + pu = this->u_buffer; + pv = this->v_buffer; + + width = this->dest_width >> 3; + + do { + RGB(0); + DST1(0); + + RGB(1); + DST1(1); + + RGB(2); + DST1(2); + + RGB(3); + DST1(3); + + pu += 4; + pv += 4; + py_1 += 8; + dst_1 += 8; + } while (--width); + + dy += this->step_dy; + _dst += this->rgb_stride; + + while (--height > 0 && dy < 32768) { + + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2); + + dy += this->step_dy; + _dst += this->rgb_stride; + } + + if (height <= 0) + break; + + _p += this->y_stride*2*(dy>>15); + dy &= 32767; + + scale_line_4 (_p+1, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_4 (_p+3, this->v_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_2 (_p, this->y_buffer, + this->dest_width, this->step_dx); + } +} + +static void yuy22rgb_c_8 (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p) +{ + int U, V, Y; + uint8_t * py_1, * pu, * pv; + uint8_t * r, * g, * b; + uint8_t * dst_1; + int width, height; + int dy; + + /* FIXME: implement unscaled version */ + + scale_line_4 (_p+1, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_4 (_p+3, this->v_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_2 (_p, this->y_buffer, + this->dest_width, this->step_dx); + + dy = 0; + height = this->dest_height; + + for (;;) { + dst_1 = _dst; + py_1 = this->y_buffer; + pu = this->u_buffer; + pv = this->v_buffer; + + width = this->dest_width >> 3; + + do { + RGB(0); + DST1(0); + + RGB(1); + DST1(1); + + RGB(2); + DST1(2); + + RGB(3); + DST1(3); + + pu += 4; + pv += 4; + py_1 += 8; + dst_1 += 8; + } while (--width); + + dy += this->step_dy; + _dst += this->rgb_stride; + + while (--height > 0 && dy < 32768) { + + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width); + + dy += this->step_dy; + _dst += this->rgb_stride; + } + + if (height <= 0) + break; + + _p += this->y_stride*2*(dy>>15); + dy &= 32767; + + scale_line_4 (_p+1, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_4 (_p+3, this->v_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_2 (_p, this->y_buffer, + this->dest_width, this->step_dx); + } +} + +static void yuy22rgb_c_gray (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p) +{ + int width, height; + int dy; + uint8_t * dst; + uint8_t * y; + + if (this->do_scale) { + dy = 0; + height = this->dest_height; + + for (;;) { + scale_line_2 (_p, _dst, this->dest_width, this->step_dx); + + dy += this->step_dy; + _dst += this->rgb_stride; + + while (--height > 0 && dy < 32768) { + + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width); + + dy += this->step_dy; + _dst += this->rgb_stride; + } + + if (height <= 0) + break; + + _p += this->y_stride*2*(dy>>15); + dy &= 32767; + } + } else { + for (height = this->source_height; --height >= 0; ) { + dst = _dst; + y = _p; + for (width = this->source_width; --width >= 0; ) { + *dst++ = *y; + y += 2; + } + _dst += this->rgb_stride; + _p += this->y_stride*2; + } + } +} + +static void yuy22rgb_c_palette (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p) +{ + int U, V, Y; + uint8_t * py_1, * pu, * pv; + uint16_t * r, * g, * b; + uint8_t * dst_1; + int width, height; + int dy; + + scale_line_4 (_p+1, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_4 (_p+3, this->v_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_2 (_p, this->y_buffer, + this->dest_width, this->step_dx); + + dy = 0; + height = this->dest_height; + + for (;;) { + dst_1 = _dst; + py_1 = this->y_buffer; + pu = this->u_buffer; + pv = this->v_buffer; + + width = this->dest_width >> 3; + + do { + RGB(0); + DST1CMAP(0); + + RGB(1); + DST1CMAP(1); + + RGB(2); + DST1CMAP(2); + + RGB(3); + DST1CMAP(3); + + pu += 4; + pv += 4; + py_1 += 8; + dst_1 += 8; + } while (--width); + + dy += this->step_dy; + _dst += this->rgb_stride; + + while (--height > 0 && dy < 32768) { + + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width); + + dy += this->step_dy; + _dst += this->rgb_stride; + } + + if (height <= 0) + break; + + _p += this->y_stride*2*(dy>>15); + dy &= 32767; + + scale_line_4 (_p+1, this->u_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_4 (_p+3, this->v_buffer, + this->dest_width >> 1, this->step_dx); + scale_line_2 (_p, this->y_buffer, + this->dest_width, this->step_dx); + } +} + +static void yuy22rgb_c_init (yuv2rgb_factory_t *this) +{ + switch (this->mode) { + case MODE_32_RGB: + case MODE_32_BGR: + this->yuy22rgb_fun = yuy22rgb_c_32; + break; + + case MODE_24_RGB: + case MODE_24_BGR: + this->yuy22rgb_fun = + (this->mode==MODE_24_RGB && !this->swapped) || (this->mode==MODE_24_BGR && this->swapped) + ? yuy22rgb_c_24_rgb + : yuy22rgb_c_24_bgr; + break; + case MODE_15_BGR: + case MODE_16_BGR: + case MODE_15_RGB: + case MODE_16_RGB: + this->yuy22rgb_fun = yuy22rgb_c_16; + break; + + case MODE_8_RGB: + case MODE_8_BGR: + this->yuy22rgb_fun = yuy22rgb_c_8; + break; + + case MODE_8_GRAY: + this->yuy22rgb_fun = yuy22rgb_c_gray; + break; + + case MODE_PALETTE: + this->yuy22rgb_fun = yuy22rgb_c_palette; + break; + + default: + printf ("yuv2rgb: mode %d not supported for yuy2\n", this->mode); + } +} + +yuv2rgb_t *yuv2rgb_create_converter (yuv2rgb_factory_t *factory) { + + yuv2rgb_t *this = xine_xmalloc (sizeof (yuv2rgb_t)); + + this->cmap = factory->cmap; + + this->y_chunk = this->y_buffer = NULL; + this->u_chunk = this->u_buffer = NULL; + this->v_chunk = this->v_buffer = NULL; + + this->table_rV = factory->table_rV; + this->table_gU = factory->table_gU; + this->table_gV = factory->table_gV; + this->table_bU = factory->table_bU; + + this->yuv2rgb_fun = factory->yuv2rgb_fun; + this->yuy22rgb_fun = factory->yuy22rgb_fun; + this->yuv2rgb_single_pixel_fun = factory->yuv2rgb_single_pixel_fun; + + this->configure = yuv2rgb_configure; + return this; +} + +/* + * factory functions + */ + +void yuv2rgb_set_gamma (yuv2rgb_factory_t *this, int gamma) { + + int i; + + for (i = 0; i < 256; i++) { + (uint8_t *)this->table_rV[i] += this->entry_size*(gamma - this->gamma); + (uint8_t *)this->table_gU[i] += this->entry_size*(gamma - this->gamma); + (uint8_t *)this->table_bU[i] += this->entry_size*(gamma - this->gamma); + } +#ifdef ARCH_X86 + mmx_yuv2rgb_set_gamma(gamma); +#endif + this->gamma = gamma; +} + +int yuv2rgb_get_gamma (yuv2rgb_factory_t *this) { + + return this->gamma; +} + +yuv2rgb_factory_t* yuv2rgb_factory_init (int mode, int swapped, + uint8_t *cmap) { + + yuv2rgb_factory_t *this; + +#ifdef ARCH_X86 + uint32_t mm = xine_mm_accel(); +#endif + + this = malloc (sizeof (yuv2rgb_factory_t)); + + this->mode = mode; + this->swapped = swapped; + this->cmap = cmap; + this->create_converter = yuv2rgb_create_converter; + this->set_gamma = yuv2rgb_set_gamma; + this->get_gamma = yuv2rgb_get_gamma; + this->matrix_coefficients = 6; + + + yuv2rgb_setup_tables (this, mode, swapped); + + /* + * auto-probe for the best yuv2rgb function + */ + + this->yuv2rgb_fun = NULL; +#ifdef ARCH_X86 + if ((this->yuv2rgb_fun == NULL) && (mm & MM_ACCEL_X86_MMXEXT)) { + + yuv2rgb_init_mmxext (this); + + if (this->yuv2rgb_fun != NULL) + printf ("yuv2rgb: using MMXEXT for colorspace transform\n"); + } + + if ((this->yuv2rgb_fun == NULL) && (mm & MM_ACCEL_X86_MMX)) { + + yuv2rgb_init_mmx (this); + + if (this->yuv2rgb_fun != NULL) + printf ("yuv2rgb: using MMX for colorspace transform\n"); + } +#endif +#if HAVE_MLIB + if (this->yuv2rgb_fun == NULL) { + + yuv2rgb_init_mlib (this); + + if (this->yuv2rgb_fun != NULL) + printf ("yuv2rgb: using medialib for colorspace transform\n"); + } +#endif + if (this->yuv2rgb_fun == NULL) { + printf ("yuv2rgb: no accelerated colorspace conversion found\n"); + yuv2rgb_c_init (this); + } + + /* + * auto-probe for the best yuy22rgb function + */ + + /* FIXME: implement mmx/mlib functions */ + yuy22rgb_c_init (this); + + /* + * set up single pixel function + */ + + yuv2rgb_single_pixel_init (this); + + return this; +} + -- cgit v0.9.0.2