author | sandman <sandman> | 2002-08-04 20:23:19 (UTC) |
---|---|---|
committer | sandman <sandman> | 2002-08-04 20:23:19 (UTC) |
commit | 57bd412cf973805fbe69ecfa8f168ad2e28311a9 (patch) (unidiff) | |
tree | 78d7bab924023bdf33a437447bb31fff52b51c32 | |
parent | 7bf26dd95a7bd434edc8dd5e001d8ac490f67dc3 (diff) | |
download | opie-57bd412cf973805fbe69ecfa8f168ad2e28311a9.zip opie-57bd412cf973805fbe69ecfa8f168ad2e28311a9.tar.gz opie-57bd412cf973805fbe69ecfa8f168ad2e28311a9.tar.bz2 |
- Removed the mlib and mmx yuv2rgb converters
- Added an optimized (non-scaling !) arm4l yuv2rgb (taken from bbplay)
-rw-r--r-- | noncore/multimedia/opieplayer2/nullvideo.c | 2 | ||||
-rw-r--r-- | noncore/multimedia/opieplayer2/opieplayer2.pro | 2 | ||||
-rw-r--r-- | noncore/multimedia/opieplayer2/yuv2rgb.c | 8 | ||||
-rw-r--r-- | noncore/multimedia/opieplayer2/yuv2rgb_arm.c | 174 | ||||
-rw-r--r-- | noncore/multimedia/opieplayer2/yuv2rgb_arm4l.S | 192 | ||||
-rw-r--r-- | noncore/multimedia/opieplayer2/yuv2rgb_mlib.c | 313 | ||||
-rw-r--r-- | noncore/multimedia/opieplayer2/yuv2rgb_mmx.c | 1047 |
7 files changed, 375 insertions, 1363 deletions
diff --git a/noncore/multimedia/opieplayer2/nullvideo.c b/noncore/multimedia/opieplayer2/nullvideo.c index 79337c2..bd52869 100644 --- a/noncore/multimedia/opieplayer2/nullvideo.c +++ b/noncore/multimedia/opieplayer2/nullvideo.c | |||
@@ -331,34 +331,32 @@ static void null_update_frame_format( vo_driver_t* self, vo_frame_t* img, | |||
331 | free ( frame->data ); | 331 | free ( frame->data ); |
332 | } | 332 | } |
333 | printf("after freeing\n"); | 333 | printf("after freeing\n"); |
334 | frame->data = xine_xmalloc (frame->output_width * frame->output_height * | 334 | frame->data = xine_xmalloc (frame->output_width * frame->output_height * |
335 | this->bytes_per_pixel ); | 335 | this->bytes_per_pixel ); |
336 | 336 | ||
337 | if( format == IMGFMT_YV12 ) { | 337 | if( format == IMGFMT_YV12 ) { |
338 | frame->frame.pitches[0] = 8*((width + 7) / 8); | 338 | frame->frame.pitches[0] = 8*((width + 7) / 8); |
339 | frame->frame.pitches[1] = 8*((width + 15) / 16); | 339 | frame->frame.pitches[1] = 8*((width + 15) / 16); |
340 | frame->frame.pitches[2] = 8*((width + 15) / 16); | 340 | frame->frame.pitches[2] = 8*((width + 15) / 16); |
341 | frame->frame.base[0] = xine_xmalloc_aligned (16, frame->frame.pitches[0] * height,(void **)&frame->chunk[0]); | 341 | frame->frame.base[0] = xine_xmalloc_aligned (16, frame->frame.pitches[0] * height,(void **)&frame->chunk[0]); |
342 | frame->frame.base[1] = xine_xmalloc_aligned (16, frame->frame.pitches[1] * ((height+ 1)/2), (void **)&frame->chunk[1]); | 342 | frame->frame.base[1] = xine_xmalloc_aligned (16, frame->frame.pitches[1] * ((height+ 1)/2), (void **)&frame->chunk[1]); |
343 | frame->frame.base[2] = xine_xmalloc_aligned (16, frame->frame.pitches[2] * ((height+ 1)/2), (void **)&frame->chunk[2]); | 343 | frame->frame.base[2] = xine_xmalloc_aligned (16, frame->frame.pitches[2] * ((height+ 1)/2), (void **)&frame->chunk[2]); |
344 | 344 | ||
345 | }else{ | 345 | }else{ |
346 | frame->frame.pitches[0] = 8*((width + 3) / 4); | 346 | frame->frame.pitches[0] = 8*((width + 3) / 4); |
347 | frame->frame.pitches[1] = 8*((width + 3) / 4); | ||
348 | frame->frame.pitches[2] = 8*((width + 3) / 4); | ||
349 | 347 | ||
350 | frame->frame.base[0] = xine_xmalloc_aligned (16, frame->frame.pitches[0] * height, | 348 | frame->frame.base[0] = xine_xmalloc_aligned (16, frame->frame.pitches[0] * height, |
351 | (void **)&frame->chunk[0]); | 349 | (void **)&frame->chunk[0]); |
352 | frame->chunk[1] = NULL; | 350 | frame->chunk[1] = NULL; |
353 | frame->chunk[2] = NULL; | 351 | frame->chunk[2] = NULL; |
354 | } | 352 | } |
355 | 353 | ||
356 | frame->format = format; | 354 | frame->format = format; |
357 | frame->width = width; | 355 | frame->width = width; |
358 | frame->height = height; | 356 | frame->height = height; |
359 | 357 | ||
360 | frame->stripe_height = 16 * frame->output_height / frame->height; | 358 | frame->stripe_height = 16 * frame->output_height / frame->height; |
361 | frame->bytes_per_line = frame->output_width * this->bytes_per_pixel; | 359 | frame->bytes_per_line = frame->output_width * this->bytes_per_pixel; |
362 | 360 | ||
363 | /* | 361 | /* |
364 | * set up colorspace converter | 362 | * set up colorspace converter |
diff --git a/noncore/multimedia/opieplayer2/opieplayer2.pro b/noncore/multimedia/opieplayer2/opieplayer2.pro index d8cacd0..fee9242 100644 --- a/noncore/multimedia/opieplayer2/opieplayer2.pro +++ b/noncore/multimedia/opieplayer2/opieplayer2.pro | |||
@@ -1,23 +1,23 @@ | |||
1 | TEMPLATE = app | 1 | TEMPLATE = app |
2 | CONFIG = qt warn_on release | 2 | CONFIG = qt warn_on release |
3 | #release | 3 | #release |
4 | DESTDIR = $(OPIEDIR)/bin | 4 | DESTDIR = $(OPIEDIR)/bin |
5 | HEADERS = playlistselection.h mediaplayerstate.h xinecontrol.h mediadetect.h\ | 5 | HEADERS = playlistselection.h mediaplayerstate.h xinecontrol.h mediadetect.h\ |
6 | videowidget.h audiowidget.h playlistwidget.h mediaplayer.h inputDialog.h \ | 6 | videowidget.h audiowidget.h playlistwidget.h mediaplayer.h inputDialog.h \ |
7 | frame.h lib.h xinevideowidget.h \ | 7 | frame.h lib.h xinevideowidget.h \ |
8 | alphablend.h yuv2rgb.h | 8 | alphablend.h yuv2rgb.h |
9 | SOURCES = main.cpp \ | 9 | SOURCES = main.cpp \ |
10 | playlistselection.cpp mediaplayerstate.cpp xinecontrol.cpp mediadetect.cpp\ | 10 | playlistselection.cpp mediaplayerstate.cpp xinecontrol.cpp mediadetect.cpp\ |
11 | videowidget.cpp audiowidget.cpp playlistwidget.cpp mediaplayer.cpp inputDialog.cpp \ | 11 | videowidget.cpp audiowidget.cpp playlistwidget.cpp mediaplayer.cpp inputDialog.cpp \ |
12 | frame.cpp lib.cpp nullvideo.c xinevideowidget.cpp \ | 12 | frame.cpp lib.cpp nullvideo.c xinevideowidget.cpp \ |
13 | alphablend.c yuv2rgb.c yuv2rgb_mlib.c yuv2rgb_mmx.c | 13 | alphablend.c yuv2rgb.c yuv2rgb_arm.c yuv2rgb_arm4l.S |
14 | TARGET = opieplayer2 | 14 | TARGET = opieplayer2 |
15 | INCLUDEPATH += $(OPIEDIR)/include | 15 | INCLUDEPATH += $(OPIEDIR)/include |
16 | DEPENDPATH += $(OPIEDIR)/include | 16 | DEPENDPATH += $(OPIEDIR)/include |
17 | LIBS += -lqpe -lpthread -lopie -lxine -lxineutils | 17 | LIBS += -lqpe -lpthread -lopie -lxine -lxineutils |
18 | MOC_DIR=qpeobj | 18 | MOC_DIR=qpeobj |
19 | OBJECTS_DIR=qpeobj | 19 | OBJECTS_DIR=qpeobj |
20 | 20 | ||
21 | INCLUDEPATH += $(OPIEDIR)/include | 21 | INCLUDEPATH += $(OPIEDIR)/include |
22 | DEPENDPATH += $(OPIEDIR)/include | 22 | DEPENDPATH += $(OPIEDIR)/include |
23 | 23 | ||
diff --git a/noncore/multimedia/opieplayer2/yuv2rgb.c b/noncore/multimedia/opieplayer2/yuv2rgb.c index d1d6627..22bb4cb 100644 --- a/noncore/multimedia/opieplayer2/yuv2rgb.c +++ b/noncore/multimedia/opieplayer2/yuv2rgb.c | |||
@@ -3124,32 +3124,40 @@ yuv2rgb_factory_t* yuv2rgb_factory_init (int mode, int swapped, | |||
3124 | 3124 | ||
3125 | yuv2rgb_init_mmx (this); | 3125 | yuv2rgb_init_mmx (this); |
3126 | 3126 | ||
3127 | if (this->yuv2rgb_fun != NULL) | 3127 | if (this->yuv2rgb_fun != NULL) |
3128 | printf ("yuv2rgb: using MMX for colorspace transform\n"); | 3128 | printf ("yuv2rgb: using MMX for colorspace transform\n"); |
3129 | } | 3129 | } |
3130 | #endif | 3130 | #endif |
3131 | #if HAVE_MLIB | 3131 | #if HAVE_MLIB |
3132 | if (this->yuv2rgb_fun == NULL) { | 3132 | if (this->yuv2rgb_fun == NULL) { |
3133 | 3133 | ||
3134 | yuv2rgb_init_mlib (this); | 3134 | yuv2rgb_init_mlib (this); |
3135 | 3135 | ||
3136 | if (this->yuv2rgb_fun != NULL) | 3136 | if (this->yuv2rgb_fun != NULL) |
3137 | printf ("yuv2rgb: using medialib for colorspace transform\n"); | 3137 | printf ("yuv2rgb: using medialib for colorspace transform\n"); |
3138 | } | 3138 | } |
3139 | #endif | 3139 | #endif |
3140 | #ifdef __arm__ | ||
3141 | if (this->yuv2rgb_fun == NULL) { | ||
3142 | yuv2rgb_init_arm ( this ); | ||
3143 | |||
3144 | if(this->yuv2rgb_fun != NULL) | ||
3145 | printf("yuv2rgb: using arm4l assembler for colorspace transform\n" ); | ||
3146 | } | ||
3147 | #endif | ||
3140 | if (this->yuv2rgb_fun == NULL) { | 3148 | if (this->yuv2rgb_fun == NULL) { |
3141 | printf ("yuv2rgb: no accelerated colorspace conversion found\n"); | 3149 | printf ("yuv2rgb: no accelerated colorspace conversion found\n"); |
3142 | yuv2rgb_c_init (this); | 3150 | yuv2rgb_c_init (this); |
3143 | } | 3151 | } |
3144 | 3152 | ||
3145 | /* | 3153 | /* |
3146 | * auto-probe for the best yuy22rgb function | 3154 | * auto-probe for the best yuy22rgb function |
3147 | */ | 3155 | */ |
3148 | 3156 | ||
3149 | /* FIXME: implement mmx/mlib functions */ | 3157 | /* FIXME: implement mmx/mlib functions */ |
3150 | yuy22rgb_c_init (this); | 3158 | yuy22rgb_c_init (this); |
3151 | 3159 | ||
3152 | /* | 3160 | /* |
3153 | * set up single pixel function | 3161 | * set up single pixel function |
3154 | */ | 3162 | */ |
3155 | 3163 | ||
diff --git a/noncore/multimedia/opieplayer2/yuv2rgb_arm.c b/noncore/multimedia/opieplayer2/yuv2rgb_arm.c new file mode 100644 index 0000000..699ee48 --- a/dev/null +++ b/noncore/multimedia/opieplayer2/yuv2rgb_arm.c | |||
@@ -0,0 +1,174 @@ | |||
1 | /* | ||
2 | * yuv2rgb_arm.c | ||
3 | * Copyright (C) 2000-2001 Project OPIE. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Author: Robert Griebl <sandman@handhelds.org> | ||
7 | * | ||
8 | * This file is part of OpiePlayer2. | ||
9 | * | ||
10 | * OpiePlayer2 is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * OpiePlayer2 is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
23 | */ | ||
24 | |||
25 | #ifdef __arm__ | ||
26 | |||
27 | #include <stdio.h> | ||
28 | #include <stdlib.h> | ||
29 | #include <string.h> | ||
30 | #include <inttypes.h> | ||
31 | |||
32 | #include "yuv2rgb.h" | ||
33 | #include <xine/xineutils.h> | ||
34 | |||
35 | #define RGB(i) \ | ||
36 | U = pu[i]; \ | ||
37 | V = pv[i]; \ | ||
38 | r = this->table_rV[V]; \ | ||
39 | g = (void *) (((uint8_t *)this->table_gU[U]) + this->table_gV[V]);\ | ||
40 | b = this->table_bU[U]; | ||
41 | |||
42 | #define DST1(i) \ | ||
43 | Y = py_1[2*i]; \ | ||
44 | dst_1[2*i] = r[Y] + g[Y] + b[Y];\ | ||
45 | Y = py_1[2*i+1]; \ | ||
46 | dst_1[2*i+1] = r[Y] + g[Y] + b[Y]; | ||
47 | |||
48 | |||
49 | struct dummy { | ||
50 | uint8_t *yuv [3]; | ||
51 | int stride [3]; | ||
52 | }; | ||
53 | |||
54 | extern void convert_yuv420_rgb565(struct dummy *picture, unsigned char *results, int w, int h) ; | ||
55 | |||
56 | |||
57 | static void arm_rgb16 (yuv2rgb_t *this, uint8_t * _dst, | ||
58 | uint8_t * _py, uint8_t * _pu, uint8_t * _pv) | ||
59 | { | ||
60 | if ( !this-> do_scale ) { | ||
61 | struct dummy d; | ||
62 | d. yuv [0] = _py; | ||
63 | d. yuv [1] = _pu; | ||
64 | d. yuv [2] = _pv; | ||
65 | d. stride [0] = this-> y_stride; | ||
66 | d. stride [1] = d. stride [2] = this-> uv_stride; | ||
67 | |||
68 | // printf( "calling arm (%dx%d)\n", this-> dest_width, this-> dest_height ); | ||
69 | |||
70 | convert_yuv420_rgb565 ( &d, _dst, this->dest_width, this->dest_height ); | ||
71 | |||
72 | // printf ( "arm done\n" ); | ||
73 | } | ||
74 | else { | ||
75 | int U, V, Y; | ||
76 | uint8_t * py_1, * py_2, * pu, * pv; | ||
77 | uint16_t * r, * g, * b; | ||
78 | uint16_t * dst_1, * dst_2; | ||
79 | int width, height, dst_height; | ||
80 | int dy; | ||
81 | |||
82 | scale_line_func_t scale_line = this->scale_line; | ||
83 | |||
84 | scale_line (_pu, this->u_buffer, | ||
85 | this->dest_width >> 1, this->step_dx); | ||
86 | scale_line (_pv, this->v_buffer, | ||
87 | this->dest_width >> 1, this->step_dx); | ||
88 | scale_line (_py, this->y_buffer, | ||
89 | this->dest_width, this->step_dx); | ||
90 | |||
91 | dy = 0; | ||
92 | dst_height = this->dest_height; | ||
93 | |||
94 | for (height = 0;; ) { | ||
95 | dst_1 = (uint16_t*)_dst; | ||
96 | py_1 = this->y_buffer; | ||
97 | pu = this->u_buffer; | ||
98 | pv = this->v_buffer; | ||
99 | |||
100 | width = this->dest_width >> 3; | ||
101 | |||
102 | do { | ||
103 | RGB(0); | ||
104 | DST1(0); | ||
105 | |||
106 | RGB(1); | ||
107 | DST1(1); | ||
108 | |||
109 | RGB(2); | ||
110 | DST1(2); | ||
111 | |||
112 | RGB(3); | ||
113 | DST1(3); | ||
114 | |||
115 | pu += 4; | ||
116 | pv += 4; | ||
117 | py_1 += 8; | ||
118 | dst_1 += 8; | ||
119 | } while (--width); | ||
120 | |||
121 | dy += this->step_dy; | ||
122 | _dst += this->rgb_stride; | ||
123 | |||
124 | while (--dst_height > 0 && dy < 32768) { | ||
125 | |||
126 | xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2); | ||
127 | |||
128 | dy += this->step_dy; | ||
129 | _dst += this->rgb_stride; | ||
130 | } | ||
131 | |||
132 | if (dst_height <= 0) | ||
133 | break; | ||
134 | |||
135 | do { | ||
136 | dy -= 32768; | ||
137 | _py += this->y_stride; | ||
138 | |||
139 | scale_line (_py, this->y_buffer, | ||
140 | this->dest_width, this->step_dx); | ||
141 | |||
142 | if (height & 1) { | ||
143 | _pu += this->uv_stride; | ||
144 | _pv += this->uv_stride; | ||
145 | |||
146 | scale_line (_pu, this->u_buffer, | ||
147 | this->dest_width >> 1, this->step_dx); | ||
148 | scale_line (_pv, this->v_buffer, | ||
149 | this->dest_width >> 1, this->step_dx); | ||
150 | |||
151 | } | ||
152 | height++; | ||
153 | } while( dy>=32768); | ||
154 | } | ||
155 | } | ||
156 | } | ||
157 | |||
158 | |||
159 | |||
160 | void yuv2rgb_init_arm (yuv2rgb_factory_t *this) { | ||
161 | |||
162 | if (this->swapped) | ||
163 | return; /*no swapped pixel output upto now*/ | ||
164 | |||
165 | switch (this->mode) { | ||
166 | case MODE_16_RGB: | ||
167 | this->yuv2rgb_fun = arm_rgb16; | ||
168 | break; | ||
169 | } | ||
170 | } | ||
171 | |||
172 | |||
173 | |||
174 | #endif | ||
diff --git a/noncore/multimedia/opieplayer2/yuv2rgb_arm4l.S b/noncore/multimedia/opieplayer2/yuv2rgb_arm4l.S new file mode 100644 index 0000000..f4a3395 --- a/dev/null +++ b/noncore/multimedia/opieplayer2/yuv2rgb_arm4l.S | |||
@@ -0,0 +1,192 @@ | |||
1 | /* WARNING : this function only works when stride_U == stride_V (I use some hacks to | ||
2 | not have to do too many computations at line's end)... | ||
3 | |||
4 | C-like prototype : | ||
5 | void convert_yuv420_rgb565(AVPicture *picture, unsigned char *results, int w, int h) ; | ||
6 | |||
7 | */ | ||
8 | |||
9 | #ifdef __arm__ | ||
10 | |||
11 | .text | ||
12 | .align | ||
13 | |||
14 | .global convert_yuv420_rgb565 | ||
15 | convert_yuv420_rgb565: | ||
16 | stmdb sp!, { r4 - r12, lr } @ all callee saved regs | ||
17 | ldr r7, [r0, #0] @ Y ptr | ||
18 | ldr r9, [r0, #4] @ U ptr | ||
19 | ldr r10, [r0, #8] @ V ptr | ||
20 | subs r10, r10, r9 @ V ptr - U ptr | ||
21 | ldr r8, [r0, #12] | ||
22 | add r8, r8, r7 @ Y + stride_Y | ||
23 | ldr r4, [r0, #12] @ Stride_Y | ||
24 | mov r4, r4, lsl #1 | ||
25 | sub r4, r4, r2 @ (2 * Stride_Y) - width | ||
26 | ldr r5, [r0, #16] @ Stride_U | ||
27 | sub r5, r5, r2, lsr #1 @ Stride_U - (width / 2) | ||
28 | ldr r6, [r0, #20] @ Stride_V | ||
29 | sub r6, r6, r2, lsr #1 @ Stride_V - (width / 2) | ||
30 | add r0, r1, r2, lsl #1 @ RGB + 1 | ||
31 | stmdb sp!, { r0-r10 } | ||
32 | @ Stack description : | ||
33 | @ (sp+ 0) RGB + one line | ||
34 | @ (sp+ 4) RGB | ||
35 | @ (sp+ 8) width (save) | ||
36 | @ (sp+12) height | ||
37 | @ (sp+16) (2 * stride_Y) - width | ||
38 | @ (sp+20) stride_U - (width / 2) | ||
39 | @ (sp+24) stride_V - (width / 2) !!! UNUSED !!! | ||
40 | @ (sp+28) Y ptr | ||
41 | @ (sp+32) Y ptr + one line | ||
42 | @ (sp+36) U ptr | ||
43 | @ (sp+40) V - U | ||
44 | mov lr, r2 @ Initialize the width counter | ||
45 | add r0, pc, #(const_storage-.-8) @ r0 = base pointer to the constants array | ||
46 | ldr r8, [r0, #(4*4)] @ r8 = multy | ||
47 | yuv_loop: | ||
48 | add r0, pc, #(const_storage-.-8) @ r0 = base pointer to the constants array | ||
49 | ldr r10, [sp, #28] @ r10 = Y | ||
50 | ldr r1, [sp, #36] @ r1 = U | ||
51 | ldrb r9, [r10, #0] @ r9 = *Y | ||
52 | ldrb r11, [r1] @ r11 = *U | ||
53 | add r1, r1, #1 @ r1 = U++ | ||
54 | ldr r2, [sp, #40] @ r2 = V - U | ||
55 | str r1, [sp, #36] @ store U++ | ||
56 | add r2, r1, r2 @ r2 = V+1 | ||
57 | ldrb r12, [r2, #-1] @ r12 = *V | ||
58 | sub r11, r11, #128 @ r11 = *U - 128 | ||
59 | sub r12, r12, #128 @ r12 = *V - 128 | ||
60 | ldr r1, [r0, #(4*0)] @ r1 = crv | ||
61 | mov r7, #32768 @ r7 = 32768 (for additions in MLA) | ||
62 | ldr r2, [r0, #(4*3)] @ r2 = -cgv | ||
63 | mla r6, r1, r12, r7 @ r6 = nonyc_r = crv * (*V - 128) + 32768 | ||
64 | ldr r3, [r0, #(4*1)] @ r3 = cbu | ||
65 | mla r4, r2, r12, r7 @ r4 = - cgv * (*V - 128) + 32768 | ||
66 | sub r9, r9, #16 @ r9 = *Y - 16 | ||
67 | mla r5, r3, r11, r7 @ r5 = nonyc_b = cbu * (*U - 128) + 32768 | ||
68 | ldr r0, [r0, #(4*2)] @ r0 = -cgu | ||
69 | mla r7, r8, r9, r6 @ r7 = (*Y - 16) * multy + nonyc_r | ||
70 | add r10, r10, #2 @ r10 = Y + 2 | ||
71 | mla r4, r0, r11, r4 @ r4 = nonyc_g = - cgu * (*U - 128) + r4 = - cgu * (*U - 128) - cgv * (*V - 128) + 32768 | ||
72 | add r0, pc, #(rb_clip-.-8) @ r0 contains the pointer to the R and B clipping array | ||
73 | mla r12, r8, r9, r5 @ r12 = (*Y - 16) * multy + nonyc_b | ||
74 | ldrb r7, [r0, r7, asr #(16+3)] @ r7 = R composant | ||
75 | mla r1, r8, r9, r4 @ r1 = (*Y - 16) * multy + nonyc_g | ||
76 | ldrb r9, [r10, #-1] @ r9 = *(Y+1) | ||
77 | str r10, [sp, #28] @ save Y + 2 | ||
78 | ldrb r12, [r0, r12, asr #(16+3)] @ r12 = B composant (and the start of the RGB word) | ||
79 | add r11, pc, #(g_clip-.-8) @ r11 now contains the pointer to the G clipping array | ||
80 | ldrb r1, [r11, r1, asr #(16+2)] @ r1 contains the G part of the RGB triplet | ||
81 | sub r9, r9, #16 @ r9 = *(Y+1) - 16 | ||
82 | mla r10, r8, r9, r6 @ r10 is the Red part of the RGB triplet | ||
83 | add r12, r12, r7, lsl #11 @ r12 = .GB ... | ||
84 | mla r7, r8, r9, r5 @ r7 is the Blue part of the RGB triplet | ||
85 | add r12, r12, r1, lsl #5 @ r12 = RGB ... (ie the first pixel (half-word) is done) | ||
86 | mla r2, r8, r9, r4 @ r2 is the Green part of the RGB triplet | ||
87 | ldrb r10, [r0, r10, asr #(16+3)] @ r10 = R composant | ||
88 | ldrb r7, [r0, r7, asr #(16+3)] @ r7 = B composant | ||
89 | ldr r1, [sp, #32] @ r1 = Ynext | ||
90 | ldrb r2, [r11, r2, asr #(16+2)] @ r2 = G composant | ||
91 | ldrb r9, [r1] @ r9 = *Ynext | ||
92 | add r12, r12, r2, lsl #(5+16) @ r12 = RGB .G. | ||
93 | sub r9, r9, #16 @ r9 = *Ynext - 16 | ||
94 | mla r2, r8, r9, r4 @ r2 is the Green part of the RGB triplet | ||
95 | add r12, r12, r7, lsl #(0+16) @ r12 = RGB .GB | ||
96 | mla r7, r8, r9, r5 @ r7 is the Blue part of the RGB triplet | ||
97 | add r12, r12, r10, lsl #(11+16) @ r12 = RGB RGB | ||
98 | ldr r3, [sp, #4] @ r3 = RGB | ||
99 | mla r10, r8, r9, r6 @ r10 is the Red part of the RGB triplet | ||
100 | str r12, [r3] @ store the rgb pixel at *RGB | ||
101 | add r3, r3, #4 @ r3 = RGB++ (ie next double-pixel) | ||
102 | str r3, [sp, #4] @ store the RGB pointer | ||
103 | ldrb r9, [r1, #1] @ r9 = *(Ynext+1) | ||
104 | add r1, r1, #2 @ r1 = Ynext + 2 | ||
105 | sub r9, r9, #16 @ r9 = *(Ynext+1) - 16 | ||
106 | ldrb r12, [r0, r7, asr #(16+3)] @ r12 = ..B ... | ||
107 | ldrb r10, [r0, r10, asr #(16+3)] @ r10 = B composant | ||
108 | mla r7, r8, r9, r5 @ r7 is the Blue part of the RGB triplet | ||
109 | add r12, r12, r10, lsl #11 @ r12 = R.B ... | ||
110 | ldrb r2, [r11, r2, asr #(16+2)] @ r2 = G composant | ||
111 | mla r10, r8, r9, r6 @ r10 is the Red part of the RGB triplet | ||
112 | add r12, r12, r2, lsl #5 @ r12 = RGB ... | ||
113 | mla r2, r8, r9, r4 @ r2 is the Green part of the RGB triplet | ||
114 | ldrb r7, [r0, r7, asr #(16+3)] @ r7 = B composant | ||
115 | str r1, [sp, #32] @ store the increased Ynext pointer | ||
116 | add r12, r12, r7, lsl #(16+0) @ r12 = RGB ..B | ||
117 | ldrb r10, [r0, r10, asr #(16+3)] @ r10 = R composant | ||
118 | ldr r3, [sp, #0] @ r3 = RGBnext pointer | ||
119 | add r12, r12, r10, lsl #(16+11) @ r12 = RGB R.B | ||
120 | ldrb r2, [r11, r2, asr #(16+2)] @ r2 = G composant | ||
121 | add r3, r3, #4 @ r3 = next pixel on the RGBnext line | ||
122 | add r12, r12, r2, lsl #(16+5) @ r12 = RGB RGB | ||
123 | str r12, [r3, #-4] @ store the next pixel | ||
124 | str r3, [sp, #0] @ store the increased 'next line' pixel pointer | ||
125 | subs lr, lr, #2 @ decrement the line counter | ||
126 | bne yuv_loop @ and restart if not at the end of the line | ||
127 | |||
128 | ldr r0, [sp, #8] @ r0 = saved width | ||
129 | ldr r1, [sp, #0] @ r1 = RGBnext pointer | ||
130 | mov lr, r0 @ lr = saved width (to restart the line counter) | ||
131 | str r1, [sp, #4] @ current RGBnext pointer is next iteration RGB pointer | ||
132 | add r1, r1, r0, lsl #1 @ r1 = update RGBnext to next line | ||
133 | str r1, [sp, #0] @ store updated RGBnext pointer | ||
134 | |||
135 | ldr r3, [sp, #16] @ r3 = (2 * stride_Y) - width | ||
136 | ldr r4, [sp, #28] @ r4 = Y ptr | ||
137 | ldr r5, [sp, #32] @ r5 = Ynext ptr | ||
138 | add r4, r4, r3 @ r4 = Y ptr for the next two lines | ||
139 | add r5, r5, r3 @ r5 = Ynext ptr for the next two lines | ||
140 | str r4, [sp, #28] @ store updated Y pointer | ||
141 | str r5, [sp, #32] @ store update Ynext pointer | ||
142 | |||
143 | ldr r1, [sp, #20] @ r1 = stride_U - (width / 2) | ||
144 | ldr r2, [sp, #36] @ r2 = U ptr | ||
145 | |||
146 | ldr r6, [sp, #12] @ get height counter | ||
147 | |||
148 | add r2, r2, r1 @ update U ptr | ||
149 | str r2, [sp, #36] @ store updated U ptr (and update 'V' at the same time :-) ) | ||
150 | |||
151 | subs r6, r6, #2 | ||
152 | str r6, [sp, #12] | ||
153 | bne yuv_loop | ||
154 | |||
155 | @ Exit cleanly :-) | ||
156 | add sp, sp, #(11*4) @ remove all custom things from stack | ||
157 | ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return | ||
158 | |||
159 | |||
160 | const_storage: | ||
161 | @ In order : crv, cbu, - cgu, - cgv, multy | ||
162 | .word 0x00019895, 0x00020469, 0xffff9bb5, 0xffff2fe1, 0x00012A15 | ||
163 | rb_clip_dummy: | ||
164 | .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | ||
165 | .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | ||
166 | .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | ||
167 | rb_clip: | ||
168 | .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f | ||
169 | .byte 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f | ||
170 | .byte 0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f | ||
171 | .byte 0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f | ||
172 | .byte 0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f | ||
173 | g_clip_dummy: | ||
174 | .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | ||
175 | .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | ||
176 | .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | ||
177 | .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | ||
178 | .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | ||
179 | .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | ||
180 | g_clip: | ||
181 | .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f | ||
182 | .byte 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f | ||
183 | .byte 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f | ||
184 | .byte 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f | ||
185 | .byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f | ||
186 | .byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f | ||
187 | .byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f | ||
188 | .byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f | ||
189 | .byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f | ||
190 | .byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f | ||
191 | |||
192 | #endif | ||
diff --git a/noncore/multimedia/opieplayer2/yuv2rgb_mlib.c b/noncore/multimedia/opieplayer2/yuv2rgb_mlib.c deleted file mode 100644 index 908b439..0000000 --- a/noncore/multimedia/opieplayer2/yuv2rgb_mlib.c +++ b/dev/null | |||
@@ -1,313 +0,0 @@ | |||
1 | /* | ||
2 | * yuv2rgb_mlib.c | ||
3 | * Copyright (C) 2000-2001 Silicon Integrated System Corp. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Author: Juergen Keil <jk@tools.de> | ||
7 | * | ||
8 | * This file is part of xine, a free unix video player. | ||
9 | * | ||
10 | * xine is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * xine is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
23 | */ | ||
24 | |||
25 | |||
26 | #if HAVE_MLIB | ||
27 | |||
28 | #include <stdio.h> | ||
29 | #include <stdlib.h> | ||
30 | #include <string.h> | ||
31 | #include <inttypes.h> | ||
32 | #include <mlib_video.h> | ||
33 | |||
34 | #include "attributes.h" | ||
35 | #include "yuv2rgb.h" | ||
36 | |||
37 | |||
38 | static void scale_line (uint8_t *source, uint8_t *dest, | ||
39 | int width, int step) { | ||
40 | |||
41 | unsigned p1; | ||
42 | unsigned p2; | ||
43 | int dx; | ||
44 | |||
45 | p1 = *source++; | ||
46 | p2 = *source++; | ||
47 | dx = 0; | ||
48 | |||
49 | while (width) { | ||
50 | |||
51 | /* | ||
52 | printf ("scale_line, width = %d\n", width); | ||
53 | printf ("scale_line, dx = %d, p1 = %d, p2 = %d\n", dx, p1, p2); | ||
54 | */ | ||
55 | |||
56 | *dest = (p1 * (32768 - dx) + p2 * dx) / 32768; | ||
57 | |||
58 | dx += step; | ||
59 | while (dx > 32768) { | ||
60 | dx -= 32768; | ||
61 | p1 = p2; | ||
62 | p2 = *source++; | ||
63 | } | ||
64 | |||
65 | dest ++; | ||
66 | width --; | ||
67 | } | ||
68 | } | ||
69 | |||
70 | |||
71 | |||
72 | static void mlib_yuv420_rgb24 (yuv2rgb_t *this, | ||
73 | uint8_t * image, uint8_t * py, | ||
74 | uint8_t * pu, uint8_t * pv) | ||
75 | { | ||
76 | int dst_height; | ||
77 | int dy; | ||
78 | mlib_status mlib_stat; | ||
79 | |||
80 | if (this->do_scale) { | ||
81 | dy = 0; | ||
82 | dst_height = this->dest_height; | ||
83 | |||
84 | for (;;) { | ||
85 | scale_line (pu, this->u_buffer, | ||
86 | this->dest_width >> 1, this->step_dx); | ||
87 | pu += this->uv_stride; | ||
88 | |||
89 | scale_line (pv, this->v_buffer, | ||
90 | this->dest_width >> 1, this->step_dx); | ||
91 | pv += this->uv_stride; | ||
92 | |||
93 | scale_line (py, this->y_buffer, | ||
94 | this->dest_width, this->step_dx); | ||
95 | py += this->y_stride; | ||
96 | scale_line (py, this->y_buffer + this->dest_width, | ||
97 | this->dest_width, this->step_dx); | ||
98 | py += this->y_stride; | ||
99 | |||
100 | mlib_stat = mlib_VideoColorYUV2RGB420(image, | ||
101 | this->y_buffer, | ||
102 | this->u_buffer, | ||
103 | this->v_buffer, | ||
104 | this->dest_width & ~1, 2, | ||
105 | this->rgb_stride, | ||
106 | this->dest_width, | ||
107 | this->dest_width >> 1); | ||
108 | dy += this->step_dy; | ||
109 | image += this->rgb_stride; | ||
110 | |||
111 | while (--dst_height > 0 && dy < 32768) { | ||
112 | memcpy (image, (uint8_t*)image-this->rgb_stride, this->dest_width*6); | ||
113 | dy += this->step_dy; | ||
114 | image += this->rgb_stride; | ||
115 | } | ||
116 | |||
117 | if (dst_height <= 0) | ||
118 | break; | ||
119 | |||
120 | dy -= 32768; | ||
121 | |||
122 | dy += this->step_dy; | ||
123 | image += this->rgb_stride; | ||
124 | |||
125 | while (--dst_height > 0 && dy < 32768) { | ||
126 | memcpy (image, (uint8_t*)image-this->rgb_stride, this->dest_width*3); | ||
127 | dy += this->step_dy; | ||
128 | image += this->rgb_stride; | ||
129 | } | ||
130 | |||
131 | if (dst_height <= 0) | ||
132 | break; | ||
133 | |||
134 | dy -= 32768; | ||
135 | } | ||
136 | } else { | ||
137 | mlib_stat = mlib_VideoColorYUV2RGB420(image, py, pu, pv, | ||
138 | this->source_width, | ||
139 | this->source_height, | ||
140 | this->rgb_stride, | ||
141 | this->y_stride, | ||
142 | this->uv_stride); | ||
143 | } | ||
144 | } | ||
145 | |||
146 | static void mlib_yuv420_argb32 (yuv2rgb_t *this, | ||
147 | uint8_t * image, uint8_t * py, | ||
148 | uint8_t * pu, uint8_t * pv) | ||
149 | { | ||
150 | int dst_height; | ||
151 | int dy; | ||
152 | mlib_status mlib_stat; | ||
153 | |||
154 | if (this->do_scale) { | ||
155 | dy = 0; | ||
156 | dst_height = this->dest_height; | ||
157 | |||
158 | for (;;) { | ||
159 | scale_line (pu, this->u_buffer, | ||
160 | this->dest_width >> 1, this->step_dx); | ||
161 | pu += this->uv_stride; | ||
162 | |||
163 | scale_line (pv, this->v_buffer, | ||
164 | this->dest_width >> 1, this->step_dx); | ||
165 | pv += this->uv_stride; | ||
166 | |||
167 | scale_line (py, this->y_buffer, | ||
168 | this->dest_width, this->step_dx); | ||
169 | py += this->y_stride; | ||
170 | scale_line (py, this->y_buffer + this->dest_width, | ||
171 | this->dest_width, this->step_dx); | ||
172 | py += this->y_stride; | ||
173 | |||
174 | mlib_stat = mlib_VideoColorYUV2ARGB420(image, | ||
175 | this->y_buffer, | ||
176 | this->u_buffer, | ||
177 | this->v_buffer, | ||
178 | this->dest_width & ~1, 2, | ||
179 | this->rgb_stride, | ||
180 | this->dest_width, | ||
181 | this->dest_width >> 1); | ||
182 | dy += this->step_dy; | ||
183 | image += this->rgb_stride; | ||
184 | |||
185 | while (--dst_height > 0 && dy < 32768) { | ||
186 | memcpy (image, (uint8_t*)image-this->rgb_stride, this->dest_width*8); | ||
187 | dy += this->step_dy; | ||
188 | image += this->rgb_stride; | ||
189 | } | ||
190 | |||
191 | if (dst_height <= 0) | ||
192 | break; | ||
193 | |||
194 | dy -= 32768; | ||
195 | |||
196 | dy += this->step_dy; | ||
197 | image += this->rgb_stride; | ||
198 | |||
199 | while (--dst_height > 0 && dy < 32768) { | ||
200 | memcpy (image, (uint8_t*)image-this->rgb_stride, this->dest_width*4); | ||
201 | dy += this->step_dy; | ||
202 | image += this->rgb_stride; | ||
203 | } | ||
204 | |||
205 | if (dst_height <= 0) | ||
206 | break; | ||
207 | |||
208 | dy -= 32768; | ||
209 | } | ||
210 | } else { | ||
211 | mlib_stat = mlib_VideoColorYUV2ARGB420(image, py, pu, pv, | ||
212 | this->source_width, | ||
213 | this->source_height, | ||
214 | this->rgb_stride, | ||
215 | this->y_stride, | ||
216 | this->uv_stride); | ||
217 | } | ||
218 | } | ||
219 | |||
220 | static void mlib_yuv420_abgr32 (yuv2rgb_t *this, | ||
221 | uint8_t * image, uint8_t * py, | ||
222 | uint8_t * pu, uint8_t * pv) | ||
223 | { | ||
224 | int dst_height; | ||
225 | int dy; | ||
226 | mlib_status mlib_stat; | ||
227 | |||
228 | if (this->do_scale) { | ||
229 | dy = 0; | ||
230 | dst_height = this->dest_height; | ||
231 | |||
232 | for (;;) { | ||
233 | scale_line (pu, this->u_buffer, | ||
234 | this->dest_width >> 1, this->step_dx); | ||
235 | pu += this->uv_stride; | ||
236 | |||
237 | scale_line (pv, this->v_buffer, | ||
238 | this->dest_width >> 1, this->step_dx); | ||
239 | pv += this->uv_stride; | ||
240 | |||
241 | scale_line (py, this->y_buffer, | ||
242 | this->dest_width, this->step_dx); | ||
243 | py += this->y_stride; | ||
244 | scale_line (py, this->y_buffer + this->dest_width, | ||
245 | this->dest_width, this->step_dx); | ||
246 | py += this->y_stride; | ||
247 | |||
248 | mlib_stat = mlib_VideoColorYUV2ABGR420(image, | ||
249 | this->y_buffer, | ||
250 | this->u_buffer, | ||
251 | this->v_buffer, | ||
252 | this->dest_width & ~1, 2, | ||
253 | this->rgb_stride, | ||
254 | this->dest_width, | ||
255 | this->dest_width >> 1); | ||
256 | dy += this->step_dy; | ||
257 | image += this->rgb_stride; | ||
258 | |||
259 | while (--dst_height > 0 && dy < 32768) { | ||
260 | memcpy (image, (uint8_t*)image-this->rgb_stride, this->dest_width*8); | ||
261 | dy += this->step_dy; | ||
262 | image += this->rgb_stride; | ||
263 | } | ||
264 | |||
265 | if (dst_height <= 0) | ||
266 | break; | ||
267 | |||
268 | dy -= 32768; | ||
269 | |||
270 | dy += this->step_dy; | ||
271 | image += this->rgb_stride; | ||
272 | |||
273 | while (--dst_height > 0 && dy < 32768) { | ||
274 | memcpy (image, (uint8_t*)image-this->rgb_stride, this->dest_width*4); | ||
275 | dy += this->step_dy; | ||
276 | image += this->rgb_stride; | ||
277 | } | ||
278 | |||
279 | if (dst_height <= 0) | ||
280 | break; | ||
281 | |||
282 | dy -= 32768; | ||
283 | } | ||
284 | } else { | ||
285 | mlib_stat = mlib_VideoColorYUV2ABGR420(image, py, pu, pv, | ||
286 | this->source_width, | ||
287 | this->source_height, | ||
288 | this->rgb_stride, | ||
289 | this->y_stride, | ||
290 | this->uv_stride); | ||
291 | } | ||
292 | } | ||
293 | |||
294 | |||
295 | void yuv2rgb_init_mlib (yuv2rgb_factory_t *this) { | ||
296 | |||
297 | if (this->swapped) return; /*no swapped pixel output upto now*/ | ||
298 | |||
299 | switch (this->mode) { | ||
300 | case MODE_24_RGB: | ||
301 | this->yuv2rgb_fun = mlib_yuv420_rgb24; | ||
302 | break; | ||
303 | case MODE_32_RGB: | ||
304 | this->yuv2rgb_fun = mlib_yuv420_argb32; | ||
305 | break; | ||
306 | case MODE_32_BGR: | ||
307 | this->yuv2rgb_fun = mlib_yuv420_abgr32; | ||
308 | break; | ||
309 | } | ||
310 | } | ||
311 | |||
312 | |||
313 | #endif/* HAVE_MLIB */ | ||
diff --git a/noncore/multimedia/opieplayer2/yuv2rgb_mmx.c b/noncore/multimedia/opieplayer2/yuv2rgb_mmx.c deleted file mode 100644 index f092e6f..0000000 --- a/noncore/multimedia/opieplayer2/yuv2rgb_mmx.c +++ b/dev/null | |||
@@ -1,1047 +0,0 @@ | |||
1 | /* | ||
2 | * yuv2rgb_mmx.c | ||
3 | * Copyright (C) 2000-2001 Silicon Integrated System Corp. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Author: Olie Lho <ollie@sis.com.tw> | ||
7 | * | ||
8 | * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. | ||
9 | * | ||
10 | * mpeg2dec is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * mpeg2dec is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
23 | */ | ||
24 | |||
25 | |||
26 | #ifdef ARCH_X86 | ||
27 | |||
28 | #include <stdio.h> | ||
29 | #include <stdlib.h> | ||
30 | #include <string.h> | ||
31 | #include <inttypes.h> | ||
32 | |||
33 | #include "yuv2rgb.h" | ||
34 | #include "xineutils.h" | ||
35 | |||
36 | #define CPU_MMXEXT 0 | ||
37 | #define CPU_MMX 1 | ||
38 | |||
39 | /* CPU_MMXEXT/CPU_MMX adaptation layer */ | ||
40 | |||
41 | #define movntq(src,dest)\ | ||
42 | do { \ | ||
43 | if (cpu == CPU_MMXEXT)\ | ||
44 | movntq_r2m (src, dest);\ | ||
45 | else \ | ||
46 | movq_r2m (src, dest);\ | ||
47 | } while (0) | ||
48 | |||
49 | static mmx_t mmx_subYw = {0x1010101010101010}; | ||
50 | static mmx_t mmx_addYw = {0x0000000000000000}; | ||
51 | |||
52 | void mmx_yuv2rgb_set_gamma(int gamma) | ||
53 | { | ||
54 | int a,s,i; | ||
55 | |||
56 | if( gamma <= 16 ) { | ||
57 | a = 0; | ||
58 | s = 16 - gamma; | ||
59 | } else { | ||
60 | a = gamma - 16; | ||
61 | s = 0; | ||
62 | } | ||
63 | |||
64 | for( i = 0; i < 8; i++ ) { | ||
65 | *((unsigned char *)&mmx_subYw + i) = s; | ||
66 | *((unsigned char *)&mmx_addYw + i) = a; | ||
67 | } | ||
68 | } | ||
69 | |||
70 | static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv) | ||
71 | { | ||
72 | static mmx_t mmx_80w = {0x0080008000800080}; | ||
73 | static mmx_t mmx_U_green = {0xf37df37df37df37d}; | ||
74 | static mmx_t mmx_U_blue = {0x4093409340934093}; | ||
75 | static mmx_t mmx_V_red = {0x3312331233123312}; | ||
76 | static mmx_t mmx_V_green = {0xe5fce5fce5fce5fc}; | ||
77 | static mmx_t mmx_00ffw = {0x00ff00ff00ff00ff}; | ||
78 | static mmx_t mmx_Y_coeff = {0x253f253f253f253f}; | ||
79 | |||
80 | movq_m2r (*py, mm6); // mm6 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 | ||
81 | pxor_r2r (mm4, mm4); // mm4 = 0 | ||
82 | |||
83 | psubusb_m2r (mmx_subYw, mm6);// Y -= 16 | ||
84 | paddusb_m2r (mmx_addYw, mm6); | ||
85 | |||
86 | movd_m2r (*pu, mm0); // mm0 = 00 00 00 00 u3 u2 u1 u0 | ||
87 | movq_r2r (mm6, mm7); // mm7 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 | ||
88 | |||
89 | pand_m2r (mmx_00ffw, mm6); // mm6 = Y6 Y4 Y2 Y0 | ||
90 | psrlw_i2r (8, mm7); // mm7 = Y7 Y5 Y3 Y1 | ||
91 | |||
92 | movd_m2r (*pv, mm1); // mm1 = 00 00 00 00 v3 v2 v1 v0 | ||
93 | psllw_i2r (3, mm6); // promote precision | ||
94 | |||
95 | pmulhw_m2r (mmx_Y_coeff, mm6);// mm6 = luma_rgb even | ||
96 | psllw_i2r (3, mm7); // promote precision | ||
97 | |||
98 | punpcklbw_r2r (mm4, mm0); // mm0 = u3 u2 u1 u0 | ||
99 | |||
100 | psubsw_m2r (mmx_80w, mm0); // u -= 128 | ||
101 | punpcklbw_r2r (mm4, mm1); // mm1 = v3 v2 v1 v0 | ||
102 | |||
103 | pmulhw_m2r (mmx_Y_coeff, mm7);// mm7 = luma_rgb odd | ||
104 | psllw_i2r (3, mm0); // promote precision | ||
105 | |||
106 | psubsw_m2r (mmx_80w, mm1); // v -= 128 | ||
107 | movq_r2r (mm0, mm2); // mm2 = u3 u2 u1 u0 | ||
108 | |||
109 | psllw_i2r (3, mm1); // promote precision | ||
110 | |||
111 | movq_r2r (mm1, mm4); // mm4 = v3 v2 v1 v0 | ||
112 | |||
113 | pmulhw_m2r (mmx_U_blue, mm0);// mm0 = chroma_b | ||
114 | |||
115 | |||
116 | // slot | ||
117 | |||
118 | |||
119 | // slot | ||
120 | |||
121 | |||
122 | pmulhw_m2r (mmx_V_red, mm1);// mm1 = chroma_r | ||
123 | movq_r2r (mm0, mm3); // mm3 = chroma_b | ||
124 | |||
125 | paddsw_r2r (mm6, mm0); // mm0 = B6 B4 B2 B0 | ||
126 | paddsw_r2r (mm7, mm3); // mm3 = B7 B5 B3 B1 | ||
127 | |||
128 | packuswb_r2r (mm0, mm0); // saturate to 0-255 | ||
129 | |||
130 | |||
131 | pmulhw_m2r (mmx_U_green, mm2);// mm2 = u * u_green | ||
132 | |||
133 | |||
134 | packuswb_r2r (mm3, mm3); // saturate to 0-255 | ||
135 | |||
136 | |||
137 | punpcklbw_r2r (mm3, mm0); // mm0 = B7 B6 B5 B4 B3 B2 B1 B0 | ||
138 | |||
139 | |||
140 | pmulhw_m2r (mmx_V_green, mm4);// mm4 = v * v_green | ||
141 | |||
142 | |||
143 | // slot | ||
144 | |||
145 | |||
146 | // slot | ||
147 | |||
148 | |||
149 | paddsw_r2r (mm4, mm2); // mm2 = chroma_g | ||
150 | movq_r2r (mm2, mm5); // mm5 = chroma_g | ||
151 | |||
152 | |||
153 | movq_r2r (mm1, mm4); // mm4 = chroma_r | ||
154 | paddsw_r2r (mm6, mm2); // mm2 = G6 G4 G2 G0 | ||
155 | |||
156 | |||
157 | packuswb_r2r (mm2, mm2); // saturate to 0-255 | ||
158 | paddsw_r2r (mm6, mm1); // mm1 = R6 R4 R2 R0 | ||
159 | |||
160 | packuswb_r2r (mm1, mm1); // saturate to 0-255 | ||
161 | paddsw_r2r (mm7, mm4); // mm4 = R7 R5 R3 R1 | ||
162 | |||
163 | packuswb_r2r (mm4, mm4); // saturate to 0-255 | ||
164 | paddsw_r2r (mm7, mm5); // mm5 = G7 G5 G3 G1 | ||
165 | |||
166 | |||
167 | packuswb_r2r (mm5, mm5); // saturate to 0-255 | ||
168 | |||
169 | |||
170 | punpcklbw_r2r (mm4, mm1); // mm1 = R7 R6 R5 R4 R3 R2 R1 R0 | ||
171 | |||
172 | |||
173 | punpcklbw_r2r (mm5, mm2); // mm2 = G7 G6 G5 G4 G3 G2 G1 G0 | ||
174 | } | ||
175 | |||
176 | // basic opt | ||
177 | static inline void mmx_unpack_16rgb (uint8_t * image, int cpu) | ||
178 | { | ||
179 | static mmx_t mmx_bluemask = {0xf8f8f8f8f8f8f8f8}; | ||
180 | static mmx_t mmx_greenmask = {0xfcfcfcfcfcfcfcfc}; | ||
181 | static mmx_t mmx_redmask = {0xf8f8f8f8f8f8f8f8}; | ||
182 | |||
183 | /* | ||
184 | * convert RGB plane to RGB 16 bits | ||
185 | * mm0 -> B, mm1 -> R, mm2 -> G | ||
186 | * mm4 -> GB, mm5 -> AR pixel 4-7 | ||
187 | * mm6 -> GB, mm7 -> AR pixel 0-3 | ||
188 | */ | ||
189 | |||
190 | pand_m2r (mmx_bluemask, mm0);// mm0 = b7b6b5b4b3______ | ||
191 | pxor_r2r (mm4, mm4); // mm4 = 0 | ||
192 | |||
193 | pand_m2r (mmx_greenmask, mm2);// mm2 = g7g6g5g4g3g2____ | ||
194 | psrlq_i2r (3, mm0); // mm0 = ______b7b6b5b4b3 | ||
195 | |||
196 | movq_r2r (mm2, mm7); // mm7 = g7g6g5g4g3g2____ | ||
197 | movq_r2r (mm0, mm5); // mm5 = ______b7b6b5b4b3 | ||
198 | |||
199 | pand_m2r (mmx_redmask, mm1);// mm1 = r7r6r5r4r3______ | ||
200 | punpcklbw_r2r (mm4, mm2); | ||
201 | |||
202 | punpcklbw_r2r (mm1, mm0); | ||
203 | |||
204 | psllq_i2r (3, mm2); | ||
205 | |||
206 | punpckhbw_r2r (mm4, mm7); | ||
207 | por_r2r (mm2, mm0); | ||
208 | |||
209 | psllq_i2r (3, mm7); | ||
210 | |||
211 | movntq (mm0, *image); | ||
212 | punpckhbw_r2r (mm1, mm5); | ||
213 | |||
214 | por_r2r (mm7, mm5); | ||
215 | |||
216 | // U | ||
217 | // V | ||
218 | |||
219 | movntq (mm5, *(image+8)); | ||
220 | } | ||
221 | |||
222 | static inline void mmx_unpack_15rgb (uint8_t * image, int cpu) | ||
223 | { | ||
224 | static mmx_t mmx_bluemask = {0xf8f8f8f8f8f8f8f8}; | ||
225 | static mmx_t mmx_greenmask = {0xf8f8f8f8f8f8f8f8}; | ||
226 | static mmx_t mmx_redmask = {0xf8f8f8f8f8f8f8f8}; | ||
227 | |||
228 | /* | ||
229 | * convert RGB plane to RGB 15 bits | ||
230 | * mm0 -> B, mm1 -> R, mm2 -> G | ||
231 | * mm4 -> GB, mm5 -> AR pixel 4-7 | ||
232 | * mm6 -> GB, mm7 -> AR pixel 0-3 | ||
233 | */ | ||
234 | |||
235 | pand_m2r (mmx_bluemask, mm0);// mm0 = b7b6b5b4b3______ | ||
236 | pxor_r2r (mm4, mm4); // mm4 = 0 | ||
237 | |||
238 | pand_m2r (mmx_greenmask, mm2);// mm2 = g7g6g5g4g3g2____ | ||
239 | psrlq_i2r (3, mm0); // mm0 = ______b7b6b5b4b3 | ||
240 | |||
241 | movq_r2r (mm2, mm7); // mm7 = g7g6g5g4g3g2____ | ||
242 | movq_r2r (mm0, mm5); // mm5 = ______b7b6b5b4b3 | ||
243 | |||
244 | pand_m2r (mmx_redmask, mm1);// mm1 = r7r6r5r4r3______ | ||
245 | punpcklbw_r2r (mm4, mm2); | ||
246 | |||
247 | psrlq_i2r (1, mm1); | ||
248 | punpcklbw_r2r (mm1, mm0); | ||
249 | |||
250 | psllq_i2r (2, mm2); | ||
251 | |||
252 | punpckhbw_r2r (mm4, mm7); | ||
253 | por_r2r (mm2, mm0); | ||
254 | |||
255 | psllq_i2r (2, mm7); | ||
256 | |||
257 | movntq (mm0, *image); | ||
258 | punpckhbw_r2r (mm1, mm5); | ||
259 | |||
260 | por_r2r (mm7, mm5); | ||
261 | |||
262 | // U | ||
263 | // V | ||
264 | |||
265 | movntq (mm5, *(image+8)); | ||
266 | } | ||
267 | |||
268 | static inline void mmx_unpack_32rgb (uint8_t * image, int cpu) | ||
269 | { | ||
270 | /* | ||
271 | * convert RGB plane to RGB packed format, | ||
272 | * mm0 -> B, mm1 -> R, mm2 -> G, mm3 -> 0, | ||
273 | * mm4 -> GB, mm5 -> AR pixel 4-7, | ||
274 | * mm6 -> GB, mm7 -> AR pixel 0-3 | ||
275 | */ | ||
276 | |||
277 | pxor_r2r (mm3, mm3); | ||
278 | movq_r2r (mm0, mm6); | ||
279 | |||
280 | punpcklbw_r2r (mm2, mm6); | ||
281 | movq_r2r (mm1, mm7); | ||
282 | |||
283 | punpcklbw_r2r (mm3, mm7); | ||
284 | movq_r2r (mm0, mm4); | ||
285 | |||
286 | punpcklwd_r2r (mm7, mm6); | ||
287 | movq_r2r (mm1, mm5); | ||
288 | |||
289 | /* scheduling: this is hopeless */ | ||
290 | movntq (mm6, *image); | ||
291 | movq_r2r (mm0, mm6); | ||
292 | punpcklbw_r2r (mm2, mm6); | ||
293 | punpckhwd_r2r (mm7, mm6); | ||
294 | movntq (mm6, *(image+8)); | ||
295 | punpckhbw_r2r (mm2, mm4); | ||
296 | punpckhbw_r2r (mm3, mm5); | ||
297 | punpcklwd_r2r (mm5, mm4); | ||
298 | movntq (mm4, *(image+16)); | ||
299 | movq_r2r (mm0, mm4); | ||
300 | punpckhbw_r2r (mm2, mm4); | ||
301 | punpckhwd_r2r (mm5, mm4); | ||
302 | movntq (mm4, *(image+24)); | ||
303 | } | ||
304 | |||
305 | static inline void mmx_unpack_32bgr (uint8_t * image, int cpu) | ||
306 | { | ||
307 | /* | ||
308 | * convert RGB plane to RGB packed format, | ||
309 | * mm0 -> B, mm1 -> R, mm2 -> G, mm3 -> 0, | ||
310 | * mm4 -> GB, mm5 -> AR pixel 4-7, | ||
311 | * mm6 -> GB, mm7 -> AR pixel 0-3 | ||
312 | */ | ||
313 | |||
314 | pxor_r2r (mm3, mm3); | ||
315 | movq_r2r (mm1, mm6); | ||
316 | |||
317 | punpcklbw_r2r (mm2, mm6); | ||
318 | movq_r2r (mm0, mm7); | ||
319 | |||
320 | punpcklbw_r2r (mm3, mm7); | ||
321 | movq_r2r (mm1, mm4); | ||
322 | |||
323 | punpcklwd_r2r (mm7, mm6); | ||
324 | movq_r2r (mm0, mm5); | ||
325 | |||
326 | /* scheduling: this is hopeless */ | ||
327 | movntq (mm6, *image); | ||
328 | movq_r2r (mm0, mm6); | ||
329 | punpcklbw_r2r (mm2, mm6); | ||
330 | punpckhwd_r2r (mm7, mm6); | ||
331 | movntq (mm6, *(image+8)); | ||
332 | punpckhbw_r2r (mm2, mm4); | ||
333 | punpckhbw_r2r (mm3, mm5); | ||
334 | punpcklwd_r2r (mm5, mm4); | ||
335 | movntq (mm4, *(image+16)); | ||
336 | movq_r2r (mm0, mm4); | ||
337 | punpckhbw_r2r (mm2, mm4); | ||
338 | punpckhwd_r2r (mm5, mm4); | ||
339 | movntq (mm4, *(image+24)); | ||
340 | } | ||
341 | |||
342 | static inline void mmx_unpack_24rgb (uint8_t * image, int cpu) | ||
343 | { | ||
344 | /* | ||
345 | * convert RGB plane to RGB packed format, | ||
346 | * mm0 -> B, mm1 -> R, mm2 -> G, mm3 -> 0, | ||
347 | * mm4 -> GB, mm5 -> AR pixel 4-7, | ||
348 | * mm6 -> GB, mm7 -> AR pixel 0-3 | ||
349 | */ | ||
350 | |||
351 | pxor_r2r (mm3, mm3); | ||
352 | movq_r2r (mm0, mm6); | ||
353 | |||
354 | punpcklbw_r2r (mm2, mm6); | ||
355 | movq_r2r (mm1, mm7); | ||
356 | |||
357 | punpcklbw_r2r (mm3, mm7); | ||
358 | movq_r2r (mm0, mm4); | ||
359 | |||
360 | punpcklwd_r2r (mm7, mm6); | ||
361 | movq_r2r (mm1, mm5); | ||
362 | |||
363 | /* scheduling: this is hopeless */ | ||
364 | movntq (mm6, *image); | ||
365 | movq_r2r (mm0, mm6); | ||
366 | punpcklbw_r2r (mm2, mm6); | ||
367 | punpckhwd_r2r (mm7, mm6); | ||
368 | movntq (mm6, *(image+8)); | ||
369 | punpckhbw_r2r (mm2, mm4); | ||
370 | punpckhbw_r2r (mm3, mm5); | ||
371 | punpcklwd_r2r (mm5, mm4); | ||
372 | movntq (mm4, *(image+16)); | ||
373 | } | ||
374 | |||
375 | static inline void yuv420_rgb16 (yuv2rgb_t *this, | ||
376 | uint8_t * image, | ||
377 | uint8_t * py, uint8_t * pu, uint8_t * pv, | ||
378 | int cpu) | ||
379 | { | ||
380 | int i; | ||
381 | int rgb_stride = this->rgb_stride; | ||
382 | int y_stride = this->y_stride; | ||
383 | int uv_stride = this->uv_stride; | ||
384 | int width = this->source_width; | ||
385 | int height = this->source_height; | ||
386 | int dst_height = this->dest_height; | ||
387 | uint8_t *img; | ||
388 | |||
389 | width >>= 3; | ||
390 | |||
391 | if (!this->do_scale) { | ||
392 | y_stride -= 8 * width; | ||
393 | uv_stride -= 4 * width; | ||
394 | |||
395 | do { | ||
396 | |||
397 | i = width; img = image; | ||
398 | do { | ||
399 | mmx_yuv2rgb (py, pu, pv); | ||
400 | mmx_unpack_16rgb (img, cpu); | ||
401 | py += 8; | ||
402 | pu += 4; | ||
403 | pv += 4; | ||
404 | img += 16; | ||
405 | } while (--i); | ||
406 | |||
407 | py += y_stride; | ||
408 | image += rgb_stride; | ||
409 | if (height & 1) { | ||
410 | pu += uv_stride; | ||
411 | pv += uv_stride; | ||
412 | } else { | ||
413 | pu -= 4 * width; | ||
414 | pv -= 4 * width; | ||
415 | } | ||
416 | } while (--height); | ||
417 | |||
418 | } else { | ||
419 | |||
420 | scale_line_func_t scale_line = this->scale_line; | ||
421 | uint8_t *y_buf, *u_buf, *v_buf; | ||
422 | int dy = 0; | ||
423 | |||
424 | scale_line (pu, this->u_buffer, | ||
425 | this->dest_width >> 1, this->step_dx); | ||
426 | scale_line (pv, this->v_buffer, | ||
427 | this->dest_width >> 1, this->step_dx); | ||
428 | scale_line (py, this->y_buffer, | ||
429 | this->dest_width, this->step_dx); | ||
430 | for (height = 0;; ) { | ||
431 | |||
432 | y_buf = this->y_buffer; | ||
433 | u_buf = this->u_buffer; | ||
434 | v_buf = this->v_buffer; | ||
435 | |||
436 | i = this->dest_width >> 3; img = image; | ||
437 | do { | ||
438 | /* printf ("i : %d\n",i); */ | ||
439 | |||
440 | mmx_yuv2rgb (y_buf, u_buf, v_buf); | ||
441 | mmx_unpack_16rgb (img, cpu); | ||
442 | y_buf += 8; | ||
443 | u_buf += 4; | ||
444 | v_buf += 4; | ||
445 | img += 16; | ||
446 | } while (--i); | ||
447 | |||
448 | dy += this->step_dy; | ||
449 | image += rgb_stride; | ||
450 | |||
451 | while (--dst_height > 0 && dy < 32768) { | ||
452 | |||
453 | xine_fast_memcpy (image, image-rgb_stride, this->dest_width*2); | ||
454 | |||
455 | dy += this->step_dy; | ||
456 | image += rgb_stride; | ||
457 | } | ||
458 | |||
459 | if (dst_height <= 0) | ||
460 | break; | ||
461 | |||
462 | do { | ||
463 | dy -= 32768; | ||
464 | |||
465 | py += y_stride; | ||
466 | |||
467 | scale_line (py, this->y_buffer, | ||
468 | this->dest_width, this->step_dx); | ||
469 | |||
470 | if (height & 1) { | ||
471 | pu += uv_stride; | ||
472 | pv += uv_stride; | ||
473 | |||
474 | scale_line (pu, this->u_buffer, | ||
475 | this->dest_width >> 1, this->step_dx); | ||
476 | scale_line (pv, this->v_buffer, | ||
477 | this->dest_width >> 1, this->step_dx); | ||
478 | |||
479 | } | ||
480 | height++; | ||
481 | } while( dy>=32768); | ||
482 | } | ||
483 | } | ||
484 | } | ||
485 | |||
486 | static inline void yuv420_rgb15 (yuv2rgb_t *this, | ||
487 | uint8_t * image, | ||
488 | uint8_t * py, uint8_t * pu, uint8_t * pv, | ||
489 | int cpu) | ||
490 | { | ||
491 | int i; | ||
492 | int rgb_stride = this->rgb_stride; | ||
493 | int y_stride = this->y_stride; | ||
494 | int uv_stride = this->uv_stride; | ||
495 | int width = this->source_width; | ||
496 | int height = this->source_height; | ||
497 | int dst_height = this->dest_height; | ||
498 | uint8_t *img; | ||
499 | |||
500 | width >>= 3; | ||
501 | |||
502 | if (!this->do_scale) { | ||
503 | y_stride -= 8 * width; | ||
504 | uv_stride -= 4 * width; | ||
505 | |||
506 | do { | ||
507 | |||
508 | i = width; img = image; | ||
509 | do { | ||
510 | mmx_yuv2rgb (py, pu, pv); | ||
511 | mmx_unpack_15rgb (img, cpu); | ||
512 | py += 8; | ||
513 | pu += 4; | ||
514 | pv += 4; | ||
515 | img += 16; | ||
516 | } while (--i); | ||
517 | |||
518 | py += y_stride; | ||
519 | image += rgb_stride; | ||
520 | if (height & 1) { | ||
521 | pu += uv_stride; | ||
522 | pv += uv_stride; | ||
523 | } else { | ||
524 | pu -= 4 * width; | ||
525 | pv -= 4 * width; | ||
526 | } | ||
527 | } while (--height); | ||
528 | |||
529 | } else { | ||
530 | |||
531 | scale_line_func_t scale_line = this->scale_line; | ||
532 | uint8_t *y_buf, *u_buf, *v_buf; | ||
533 | int dy = 0; | ||
534 | |||
535 | scale_line (pu, this->u_buffer, | ||
536 | this->dest_width >> 1, this->step_dx); | ||
537 | scale_line (pv, this->v_buffer, | ||
538 | this->dest_width >> 1, this->step_dx); | ||
539 | scale_line (py, this->y_buffer, | ||
540 | this->dest_width, this->step_dx); | ||
541 | for (height = 0;; ) { | ||
542 | |||
543 | y_buf = this->y_buffer; | ||
544 | u_buf = this->u_buffer; | ||
545 | v_buf = this->v_buffer; | ||
546 | |||
547 | i = this->dest_width >> 3; img = image; | ||
548 | do { | ||
549 | /* printf ("i : %d\n",i); */ | ||
550 | |||
551 | mmx_yuv2rgb (y_buf, u_buf, v_buf); | ||
552 | mmx_unpack_15rgb (img, cpu); | ||
553 | y_buf += 8; | ||
554 | u_buf += 4; | ||
555 | v_buf += 4; | ||
556 | img += 16; | ||
557 | } while (--i); | ||
558 | |||
559 | dy += this->step_dy; | ||
560 | image += rgb_stride; | ||
561 | |||
562 | while (--dst_height > 0 && dy < 32768) { | ||
563 | |||
564 | xine_fast_memcpy (image, image-rgb_stride, this->dest_width*2); | ||
565 | |||
566 | dy += this->step_dy; | ||
567 | image += rgb_stride; | ||
568 | } | ||
569 | |||
570 | if (dst_height <= 0) | ||
571 | break; | ||
572 | |||
573 | do { | ||
574 | dy -= 32768; | ||
575 | py += y_stride; | ||
576 | |||
577 | scale_line (py, this->y_buffer, | ||
578 | this->dest_width, this->step_dx); | ||
579 | |||
580 | if (height & 1) { | ||
581 | pu += uv_stride; | ||
582 | pv += uv_stride; | ||
583 | |||
584 | scale_line (pu, this->u_buffer, | ||
585 | this->dest_width >> 1, this->step_dx); | ||
586 | scale_line (pv, this->v_buffer, | ||
587 | this->dest_width >> 1, this->step_dx); | ||
588 | |||
589 | } | ||
590 | height++; | ||
591 | } while( dy>=32768 ); | ||
592 | } | ||
593 | } | ||
594 | } | ||
595 | |||
596 | static inline void yuv420_rgb24 (yuv2rgb_t *this, | ||
597 | uint8_t * image, uint8_t * py, | ||
598 | uint8_t * pu, uint8_t * pv, int cpu) | ||
599 | { | ||
600 | int i; | ||
601 | int rgb_stride = this->rgb_stride; | ||
602 | int y_stride = this->y_stride; | ||
603 | int uv_stride = this->uv_stride; | ||
604 | int width = this->source_width; | ||
605 | int height = this->source_height; | ||
606 | int dst_height = this->dest_height; | ||
607 | uint8_t *img; | ||
608 | |||
609 | /* rgb_stride -= 4 * this->dest_width; */ | ||
610 | width >>= 3; | ||
611 | |||
612 | if (!this->do_scale) { | ||
613 | y_stride -= 8 * width; | ||
614 | uv_stride -= 4 * width; | ||
615 | |||
616 | do { | ||
617 | i = width; img = image; | ||
618 | do { | ||
619 | mmx_yuv2rgb (py, pu, pv); | ||
620 | mmx_unpack_24rgb (img, cpu); | ||
621 | py += 8; | ||
622 | pu += 4; | ||
623 | pv += 4; | ||
624 | img += 24; | ||
625 | } while (--i); | ||
626 | |||
627 | py += y_stride; | ||
628 | image += rgb_stride; | ||
629 | if (height & 1) { | ||
630 | pu += uv_stride; | ||
631 | pv += uv_stride; | ||
632 | } else { | ||
633 | pu -= 4 * width; | ||
634 | pv -= 4 * width; | ||
635 | } | ||
636 | } while (--height); | ||
637 | } else { | ||
638 | |||
639 | scale_line_func_t scale_line = this->scale_line; | ||
640 | uint8_t *y_buf, *u_buf, *v_buf; | ||
641 | int dy = 0; | ||
642 | |||
643 | scale_line (pu, this->u_buffer, | ||
644 | this->dest_width >> 1, this->step_dx); | ||
645 | scale_line (pv, this->v_buffer, | ||
646 | this->dest_width >> 1, this->step_dx); | ||
647 | scale_line (py, this->y_buffer, | ||
648 | this->dest_width, this->step_dx); | ||
649 | |||
650 | for (height = 0;; ) { | ||
651 | |||
652 | y_buf = this->y_buffer; | ||
653 | u_buf = this->u_buffer; | ||
654 | v_buf = this->v_buffer; | ||
655 | |||
656 | |||
657 | i = this->dest_width >> 3; img=image; | ||
658 | do { | ||
659 | /* printf ("i : %d\n",i); */ | ||
660 | |||
661 | mmx_yuv2rgb (y_buf, u_buf, v_buf); | ||
662 | mmx_unpack_24rgb (img, cpu); | ||
663 | y_buf += 8; | ||
664 | u_buf += 4; | ||
665 | v_buf += 4; | ||
666 | img += 24; | ||
667 | } while (--i); | ||
668 | |||
669 | dy += this->step_dy; | ||
670 | image += rgb_stride; | ||
671 | |||
672 | while (--dst_height > 0 && dy < 32768) { | ||
673 | |||
674 | xine_fast_memcpy (image, image-rgb_stride, this->dest_width*3); | ||
675 | |||
676 | dy += this->step_dy; | ||
677 | image += rgb_stride; | ||
678 | } | ||
679 | |||
680 | if (dst_height <= 0) | ||
681 | break; | ||
682 | |||
683 | do { | ||
684 | dy -= 32768; | ||
685 | py += y_stride; | ||
686 | |||
687 | scale_line (py, this->y_buffer, | ||
688 | this->dest_width, this->step_dx); | ||
689 | |||
690 | if (height & 1) { | ||
691 | pu += uv_stride; | ||
692 | pv += uv_stride; | ||
693 | |||
694 | scale_line (pu, this->u_buffer, | ||
695 | this->dest_width >> 1, this->step_dx); | ||
696 | scale_line (pv, this->v_buffer, | ||
697 | this->dest_width >> 1, this->step_dx); | ||
698 | } | ||
699 | height++; | ||
700 | } while( dy>=32768 ); | ||
701 | |||
702 | } | ||
703 | |||
704 | } | ||
705 | } | ||
706 | |||
707 | static inline void yuv420_argb32 (yuv2rgb_t *this, | ||
708 | uint8_t * image, uint8_t * py, | ||
709 | uint8_t * pu, uint8_t * pv, int cpu) | ||
710 | { | ||
711 | int i; | ||
712 | int rgb_stride = this->rgb_stride; | ||
713 | int y_stride = this->y_stride; | ||
714 | int uv_stride = this->uv_stride; | ||
715 | int width = this->source_width; | ||
716 | int height = this->source_height; | ||
717 | int dst_height = this->dest_height; | ||
718 | uint8_t *img; | ||
719 | |||
720 | /* rgb_stride -= 4 * this->dest_width; */ | ||
721 | width >>= 3; | ||
722 | |||
723 | if (!this->do_scale) { | ||
724 | y_stride -= 8 * width; | ||
725 | uv_stride -= 4 * width; | ||
726 | |||
727 | do { | ||
728 | i = width; img = image; | ||
729 | do { | ||
730 | mmx_yuv2rgb (py, pu, pv); | ||
731 | mmx_unpack_32rgb (img, cpu); | ||
732 | py += 8; | ||
733 | pu += 4; | ||
734 | pv += 4; | ||
735 | img += 32; | ||
736 | } while (--i); | ||
737 | |||
738 | py += y_stride; | ||
739 | image += rgb_stride; | ||
740 | if (height & 1) { | ||
741 | pu += uv_stride; | ||
742 | pv += uv_stride; | ||
743 | } else { | ||
744 | pu -= 4 * width; | ||
745 | pv -= 4 * width; | ||
746 | } | ||
747 | } while (--height); | ||
748 | } else { | ||
749 | |||
750 | scale_line_func_t scale_line = this->scale_line; | ||
751 | uint8_t *y_buf, *u_buf, *v_buf; | ||
752 | int dy = 0; | ||
753 | |||
754 | scale_line (pu, this->u_buffer, | ||
755 | this->dest_width >> 1, this->step_dx); | ||
756 | scale_line (pv, this->v_buffer, | ||
757 | this->dest_width >> 1, this->step_dx); | ||
758 | scale_line (py, this->y_buffer, | ||
759 | this->dest_width, this->step_dx); | ||
760 | |||
761 | for (height = 0;; ) { | ||
762 | |||
763 | y_buf = this->y_buffer; | ||
764 | u_buf = this->u_buffer; | ||
765 | v_buf = this->v_buffer; | ||
766 | |||
767 | |||
768 | i = this->dest_width >> 3; img=image; | ||
769 | do { | ||
770 | /* printf ("i : %d\n",i); */ | ||
771 | |||
772 | mmx_yuv2rgb (y_buf, u_buf, v_buf); | ||
773 | mmx_unpack_32rgb (img, cpu); | ||
774 | y_buf += 8; | ||
775 | u_buf += 4; | ||
776 | v_buf += 4; | ||
777 | img += 32; | ||
778 | } while (--i); | ||
779 | |||
780 | dy += this->step_dy; | ||
781 | image += rgb_stride; | ||
782 | |||
783 | while (--dst_height > 0 && dy < 32768) { | ||
784 | |||
785 | xine_fast_memcpy (image, image-rgb_stride, this->dest_width*4); | ||
786 | |||
787 | dy += this->step_dy; | ||
788 | image += rgb_stride; | ||
789 | } | ||
790 | |||
791 | if (dst_height <= 0) | ||
792 | break; | ||
793 | |||
794 | do { | ||
795 | dy -= 32768; | ||
796 | py += y_stride; | ||
797 | |||
798 | scale_line (py, this->y_buffer, | ||
799 | this->dest_width, this->step_dx); | ||
800 | |||
801 | if (height & 1) { | ||
802 | pu += uv_stride; | ||
803 | pv += uv_stride; | ||
804 | |||
805 | scale_line (pu, this->u_buffer, | ||
806 | this->dest_width >> 1, this->step_dx); | ||
807 | scale_line (pv, this->v_buffer, | ||
808 | this->dest_width >> 1, this->step_dx); | ||
809 | } | ||
810 | height++; | ||
811 | } while( dy>=32768 ); | ||
812 | } | ||
813 | |||
814 | } | ||
815 | } | ||
816 | |||
817 | static inline void yuv420_abgr32 (yuv2rgb_t *this, | ||
818 | uint8_t * image, uint8_t * py, | ||
819 | uint8_t * pu, uint8_t * pv, int cpu) | ||
820 | { | ||
821 | int i; | ||
822 | int rgb_stride = this->rgb_stride; | ||
823 | int y_stride = this->y_stride; | ||
824 | int uv_stride = this->uv_stride; | ||
825 | int width = this->source_width; | ||
826 | int height = this->source_height; | ||
827 | int dst_height = this->dest_height; | ||
828 | uint8_t *img; | ||
829 | |||
830 | /* rgb_stride -= 4 * this->dest_width; */ | ||
831 | width >>= 3; | ||
832 | |||
833 | if (!this->do_scale) { | ||
834 | y_stride -= 8 * width; | ||
835 | uv_stride -= 4 * width; | ||
836 | |||
837 | do { | ||
838 | i = width; img = image; | ||
839 | do { | ||
840 | mmx_yuv2rgb (py, pu, pv); | ||
841 | mmx_unpack_32bgr (img, cpu); | ||
842 | py += 8; | ||
843 | pu += 4; | ||
844 | pv += 4; | ||
845 | img += 32; | ||
846 | } while (--i); | ||
847 | |||
848 | py += y_stride; | ||
849 | image += rgb_stride; | ||
850 | if (height & 1) { | ||
851 | pu += uv_stride; | ||
852 | pv += uv_stride; | ||
853 | } else { | ||
854 | pu -= 4 * width; | ||
855 | pv -= 4 * width; | ||
856 | } | ||
857 | } while (--height); | ||
858 | } else { | ||
859 | |||
860 | scale_line_func_t scale_line = this->scale_line; | ||
861 | uint8_t *y_buf, *u_buf, *v_buf; | ||
862 | int dy = 0; | ||
863 | |||
864 | scale_line (pu, this->u_buffer, | ||
865 | this->dest_width >> 1, this->step_dx); | ||
866 | scale_line (pv, this->v_buffer, | ||
867 | this->dest_width >> 1, this->step_dx); | ||
868 | scale_line (py, this->y_buffer, | ||
869 | this->dest_width, this->step_dx); | ||
870 | |||
871 | for (height = 0;; ) { | ||
872 | |||
873 | y_buf = this->y_buffer; | ||
874 | u_buf = this->u_buffer; | ||
875 | v_buf = this->v_buffer; | ||
876 | |||
877 | |||
878 | i = this->dest_width >> 3; img=image; | ||
879 | do { | ||
880 | /* printf ("i : %d\n",i); */ | ||
881 | |||
882 | mmx_yuv2rgb (y_buf, u_buf, v_buf); | ||
883 | mmx_unpack_32bgr (img, cpu); | ||
884 | y_buf += 8; | ||
885 | u_buf += 4; | ||
886 | v_buf += 4; | ||
887 | img += 32; | ||
888 | } while (--i); | ||
889 | |||
890 | dy += this->step_dy; | ||
891 | image += rgb_stride; | ||
892 | |||
893 | while (--dst_height > 0 && dy < 32768) { | ||
894 | |||
895 | xine_fast_memcpy (image, image-rgb_stride, this->dest_width*4); | ||
896 | |||
897 | dy += this->step_dy; | ||
898 | image += rgb_stride; | ||
899 | } | ||
900 | |||
901 | if (dst_height <= 0) | ||
902 | break; | ||
903 | |||
904 | do { | ||
905 | dy -= 32768; | ||
906 | py += y_stride; | ||
907 | |||
908 | scale_line (py, this->y_buffer, | ||
909 | this->dest_width, this->step_dx); | ||
910 | |||
911 | if (height & 1) { | ||
912 | pu += uv_stride; | ||
913 | pv += uv_stride; | ||
914 | |||
915 | scale_line (pu, this->u_buffer, | ||
916 | this->dest_width >> 1, this->step_dx); | ||
917 | scale_line (pv, this->v_buffer, | ||
918 | this->dest_width >> 1, this->step_dx); | ||
919 | } | ||
920 | height++; | ||
921 | } while( dy>=32768 ); | ||
922 | |||
923 | } | ||
924 | |||
925 | } | ||
926 | } | ||
927 | |||
928 | static void mmxext_rgb15 (yuv2rgb_t *this, uint8_t * image, | ||
929 | uint8_t * py, uint8_t * pu, uint8_t * pv) | ||
930 | { | ||
931 | yuv420_rgb15 (this, image, py, pu, pv, CPU_MMXEXT); | ||
932 | emms();/* re-initialize x86 FPU after MMX use */ | ||
933 | } | ||
934 | |||
935 | static void mmxext_rgb16 (yuv2rgb_t *this, uint8_t * image, | ||
936 | uint8_t * py, uint8_t * pu, uint8_t * pv) | ||
937 | { | ||
938 | yuv420_rgb16 (this, image, py, pu, pv, CPU_MMXEXT); | ||
939 | emms();/* re-initialize x86 FPU after MMX use */ | ||
940 | } | ||
941 | |||
942 | static void mmxext_rgb24 (yuv2rgb_t *this, uint8_t * image, | ||
943 | uint8_t * py, uint8_t * pu, uint8_t * pv) | ||
944 | { | ||
945 | yuv420_rgb24 (this, image, py, pu, pv, CPU_MMXEXT); | ||
946 | emms();/* re-initialize x86 FPU after MMX use */ | ||
947 | } | ||
948 | |||
949 | static void mmxext_argb32 (yuv2rgb_t *this, uint8_t * image, | ||
950 | uint8_t * py, uint8_t * pu, uint8_t * pv) | ||
951 | { | ||
952 | yuv420_argb32 (this, image, py, pu, pv, CPU_MMXEXT); | ||
953 | emms();/* re-initialize x86 FPU after MMX use */ | ||
954 | } | ||
955 | |||
956 | static void mmxext_abgr32 (yuv2rgb_t *this, uint8_t * image, | ||
957 | uint8_t * py, uint8_t * pu, uint8_t * pv) | ||
958 | { | ||
959 | yuv420_abgr32 (this, image, py, pu, pv, CPU_MMXEXT); | ||
960 | emms();/* re-initialize x86 FPU after MMX use */ | ||
961 | } | ||
962 | |||
963 | static void mmx_rgb15 (yuv2rgb_t *this, uint8_t * image, | ||
964 | uint8_t * py, uint8_t * pu, uint8_t * pv) | ||
965 | { | ||
966 | yuv420_rgb15 (this, image, py, pu, pv, CPU_MMX); | ||
967 | emms();/* re-initialize x86 FPU after MMX use */ | ||
968 | } | ||
969 | |||
970 | static void mmx_rgb16 (yuv2rgb_t *this, uint8_t * image, | ||
971 | uint8_t * py, uint8_t * pu, uint8_t * pv) | ||
972 | { | ||
973 | yuv420_rgb16 (this, image, py, pu, pv, CPU_MMX); | ||
974 | emms();/* re-initialize x86 FPU after MMX use */ | ||
975 | } | ||
976 | |||
977 | static void mmx_rgb24 (yuv2rgb_t *this, uint8_t * image, | ||
978 | uint8_t * py, uint8_t * pu, uint8_t * pv) | ||
979 | { | ||
980 | yuv420_rgb24 (this, image, py, pu, pv, CPU_MMX); | ||
981 | emms();/* re-initialize x86 FPU after MMX use */ | ||
982 | } | ||
983 | |||
984 | static void mmx_argb32 (yuv2rgb_t *this, uint8_t * image, | ||
985 | uint8_t * py, uint8_t * pu, uint8_t * pv) | ||
986 | { | ||
987 | yuv420_argb32 (this, image, py, pu, pv, CPU_MMX); | ||
988 | emms();/* re-initialize x86 FPU after MMX use */ | ||
989 | } | ||
990 | |||
991 | static void mmx_abgr32 (yuv2rgb_t *this, uint8_t * image, | ||
992 | uint8_t * py, uint8_t * pu, uint8_t * pv) | ||
993 | { | ||
994 | yuv420_abgr32 (this, image, py, pu, pv, CPU_MMX); | ||
995 | emms();/* re-initialize x86 FPU after MMX use */ | ||
996 | } | ||
997 | |||
998 | void yuv2rgb_init_mmxext (yuv2rgb_factory_t *this) { | ||
999 | |||
1000 | if (this->swapped) | ||
1001 | return; /*no swapped pixel output upto now*/ | ||
1002 | |||
1003 | switch (this->mode) { | ||
1004 | case MODE_15_RGB: | ||
1005 | this->yuv2rgb_fun = mmxext_rgb15; | ||
1006 | break; | ||
1007 | case MODE_16_RGB: | ||
1008 | this->yuv2rgb_fun = mmxext_rgb16; | ||
1009 | break; | ||
1010 | case MODE_24_RGB: | ||
1011 | this->yuv2rgb_fun = mmxext_rgb24; | ||
1012 | break; | ||
1013 | case MODE_32_RGB: | ||
1014 | this->yuv2rgb_fun = mmxext_argb32; | ||
1015 | break; | ||
1016 | case MODE_32_BGR: | ||
1017 | this->yuv2rgb_fun = mmxext_abgr32; | ||
1018 | break; | ||
1019 | } | ||
1020 | } | ||
1021 | |||
1022 | void yuv2rgb_init_mmx (yuv2rgb_factory_t *this) { | ||
1023 | |||
1024 | if (this->swapped) | ||
1025 | return; /*no swapped pixel output upto now*/ | ||
1026 | |||
1027 | switch (this->mode) { | ||
1028 | case MODE_15_RGB: | ||
1029 | this->yuv2rgb_fun = mmx_rgb15; | ||
1030 | break; | ||
1031 | case MODE_16_RGB: | ||
1032 | this->yuv2rgb_fun = mmx_rgb16; | ||
1033 | break; | ||
1034 | case MODE_24_RGB: | ||
1035 | this->yuv2rgb_fun = mmx_rgb24; | ||
1036 | break; | ||
1037 | case MODE_32_RGB: | ||
1038 | this->yuv2rgb_fun = mmx_argb32; | ||
1039 | break; | ||
1040 | case MODE_32_BGR: | ||
1041 | this->yuv2rgb_fun = mmx_abgr32; | ||
1042 | break; | ||
1043 | } | ||
1044 | } | ||
1045 | |||
1046 | |||
1047 | #endif | ||