Diffstat (limited to 'noncore/multimedia/opieplayer2/yuv2rgb_arm2.c') (more/less context) (show whitespace changes)
-rw-r--r-- | noncore/multimedia/opieplayer2/yuv2rgb_arm2.c | 875 |
1 files changed, 875 insertions, 0 deletions
diff --git a/noncore/multimedia/opieplayer2/yuv2rgb_arm2.c b/noncore/multimedia/opieplayer2/yuv2rgb_arm2.c new file mode 100644 index 0000000..cbd32e8 --- a/dev/null +++ b/noncore/multimedia/opieplayer2/yuv2rgb_arm2.c | |||
@@ -0,0 +1,875 @@ | |||
1 | /* | ||
2 | * yuv2rgb_arm2.c | ||
3 | * Copyright (C) 2002 Frederic 'dilb' Boulay. | ||
4 | * All Rights Reserved. | ||
5 | * | ||
6 | * Author: Frederic Boulay <dilb@handhelds.org> | ||
7 | * | ||
8 | * you can redistribute this file and/or modify | ||
9 | * it under the terms of the GNU General Public License (version 2) | ||
10 | * as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This file is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
20 | * | ||
21 | * | ||
22 | * The function defined in this file, are derived from work done in the xine | ||
23 | * project. | ||
24 | * In order to improve performance, by strongly reducing memory bandwidth | ||
25 | * needed, the scaling functions are merged with the yuv2rgb function. | ||
26 | */ | ||
27 | |||
28 | #ifdef __arm__ | ||
29 | |||
30 | #include <stdio.h> | ||
31 | #include <stdlib.h> | ||
32 | #include <string.h> | ||
33 | #include <inttypes.h> | ||
34 | |||
35 | #include "yuv2rgb.h" | ||
36 | #include <xine/xineutils.h> | ||
37 | |||
38 | /* Prototypes of the "local" functions available here: */ | ||
39 | /* first prototype, function called when no scaling is needed: */ | ||
40 | static void arm_rgb16_noscale(yuv2rgb_t*, uint8_t*, uint8_t*, uint8_t*, uint8_t*); | ||
41 | /* second prototype, function called when no horizontal scaling is needed: */ | ||
42 | static void arm_rgb16_step_dx_32768(yuv2rgb_t*, uint8_t*, uint8_t*, uint8_t*, uint8_t*); | ||
43 | /* third prototype, function called when scaling is needed for zooming in: */ | ||
44 | static void arm_rgb16_step_dx_inf_32768(yuv2rgb_t*, uint8_t*, uint8_t*, uint8_t*, uint8_t*); | ||
45 | /* fourth prototype, function called when scaling is needed for zooming out (between 1x and 2x): */ | ||
46 | static void arm_rgb16_step_dx_bet_32768_65536(yuv2rgb_t*, uint8_t*, uint8_t*, uint8_t*, uint8_t*); | ||
47 | /* fifth prototype, function called when scaling is needed for zooming out (greater than 2x): */ | ||
48 | static void arm_rgb16_step_dx_sup_65536(yuv2rgb_t*, uint8_t*, uint8_t*, uint8_t*, uint8_t*); | ||
49 | /* sixth prototype, function where the decision of the scaling function to use is made.*/ | ||
50 | static void arm_rgb16_2 (yuv2rgb_t*, uint8_t*, uint8_t*, uint8_t*, uint8_t*); | ||
51 | |||
52 | |||
53 | |||
54 | |||
55 | /* extern function: */ | ||
56 | |||
57 | /* Function: */ | ||
58 | void yuv2rgb_init_arm (yuv2rgb_factory_t *this) | ||
59 | /* This function initialise the member yuv2rgb_fun, if everything is right | ||
60 | the function optimised for the arm target should be used.*/ | ||
61 | { | ||
62 | if (this->swapped) | ||
63 | return; /*no swapped pixel output upto now*/ | ||
64 | |||
65 | switch (this->mode) | ||
66 | { | ||
67 | case MODE_16_RGB: | ||
68 | this->yuv2rgb_fun = arm_rgb16_2; | ||
69 | break; | ||
70 | default: | ||
71 | } | ||
72 | } | ||
73 | |||
74 | |||
75 | |||
76 | /* local functions: */ | ||
77 | |||
78 | /* Function: */ | ||
79 | static void arm_rgb16_2 (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _py, uint8_t * _pu, uint8_t * _pv) | ||
80 | /* This function takes care of applying the right scaling conversion | ||
81 | (yuv2rgb is included in each scaling function!)*/ | ||
82 | { | ||
83 | if (!this->do_scale) | ||
84 | { | ||
85 | arm_rgb16_noscale(this, _dst, _py, _pu, _pv); | ||
86 | return; | ||
87 | } | ||
88 | if (this->step_dx<32768) | ||
89 | { | ||
90 | arm_rgb16_step_dx_inf_32768(this, _dst, _py, _pu, _pv); | ||
91 | return; | ||
92 | } | ||
93 | if (this->step_dx==32768) | ||
94 | { | ||
95 | arm_rgb16_step_dx_32768(this, _dst, _py, _pu, _pv); | ||
96 | return; | ||
97 | } | ||
98 | if (this->step_dx<65536) | ||
99 | { | ||
100 | arm_rgb16_step_dx_bet_32768_65536(this, _dst, _py, _pu, _pv); | ||
101 | return; | ||
102 | } | ||
103 | arm_rgb16_step_dx_sup_65536(this, _dst, _py, _pu, _pv); | ||
104 | return; | ||
105 | } | ||
106 | |||
107 | |||
108 | /* Function: */ | ||
109 | static void arm_rgb16_noscale(yuv2rgb_t *this, uint8_t * _dst, uint8_t * _py, uint8_t * _pu, uint8_t * _pv) | ||
110 | /* This function is called when the source and the destination pictures have the same size. | ||
111 | In this case, scaling part is not needed. | ||
112 | (This code is probably far from being optimised, in particular, the asm | ||
113 | generated is not the most efficient, a pure asm version will probably | ||
114 | emerge sooner or later). But at least, this version is faster than what | ||
115 | was used before.*/ | ||
116 | { | ||
117 | int height; | ||
118 | |||
119 | height=this->dest_height; | ||
120 | |||
121 | while (height>0) | ||
122 | { | ||
123 | uint16_t *r, *g, *b; | ||
124 | uint8_t *py, *py2, *pu, *pv; | ||
125 | uint16_t *dst, *dst2; | ||
126 | int width; | ||
127 | register uint8_t p1y, p1u, p1v; | ||
128 | |||
129 | height-=2; | ||
130 | width=this->dest_width; | ||
131 | dst = _dst; | ||
132 | dst2 = _dst + this->rgb_stride; | ||
133 | py = _py; | ||
134 | py2 = _py + this->y_stride; | ||
135 | pu = _pu; | ||
136 | pv = _pv; | ||
137 | |||
138 | while (width>0) | ||
139 | { | ||
140 | width-=2; | ||
141 | p1y=*py++; | ||
142 | p1u=*pu++; | ||
143 | p1v=*pv++; | ||
144 | |||
145 | r = this->table_rV[p1v]; | ||
146 | g = (void *) (((uint8_t *)this->table_gU[p1u]) + this->table_gV[p1v]); | ||
147 | b = this->table_bU[p1u]; | ||
148 | |||
149 | *dst++ = r[p1y] + g[p1y] + b[p1y]; | ||
150 | p1y=*py++; | ||
151 | *dst++ = r[p1y] + g[p1y] + b[p1y]; | ||
152 | |||
153 | p1y=*py2++; | ||
154 | |||
155 | *dst2++ = r[p1y] + g[p1y] + b[p1y]; | ||
156 | p1y=*py2++; | ||
157 | *dst2++ = r[p1y] + g[p1y] + b[p1y]; | ||
158 | } | ||
159 | _dst += (this->rgb_stride)<<1; | ||
160 | _py += (this->y_stride)<<1; | ||
161 | _pu += this->uv_stride; | ||
162 | _pv += this->uv_stride; | ||
163 | } | ||
164 | } | ||
165 | |||
166 | |||
167 | /* Function: */ | ||
168 | static void arm_rgb16_step_dx_inf_32768(yuv2rgb_t *this, uint8_t * _dst, uint8_t * _py, uint8_t * _pu, uint8_t * _pv) | ||
169 | /* This function is called when the destination picture is bigger than the size | ||
170 | of the source picture. | ||
171 | */ | ||
172 | { | ||
173 | int recal_uv, height; /* Note about recal_uv: bit0 is for | ||
174 | applying scale on u and v, bit1 is for increments of u and v pointers.*/ | ||
175 | int dy; | ||
176 | |||
177 | dy = 0; | ||
178 | height = this->dest_height; | ||
179 | recal_uv=1; // 1 for evaluation of scale_line, needed the first time | ||
180 | |||
181 | while(1) | ||
182 | { | ||
183 | register int dxy; | ||
184 | register int dxuv; | ||
185 | register uint8_t p1y, p2y; | ||
186 | uint8_t dest1y, dest2y; | ||
187 | register uint8_t p1u, p2u; | ||
188 | register uint8_t p1v, p2v; | ||
189 | uint8_t dest1u; | ||
190 | uint8_t dest1v; | ||
191 | int width; | ||
192 | uint8_t *u_buffer; | ||
193 | uint8_t *v_buffer; | ||
194 | uint16_t *r, *g, *b; | ||
195 | uint8_t *py, *pu, *pv; | ||
196 | uint16_t *dst; | ||
197 | |||
198 | dxy = 0; | ||
199 | dxuv = 0; | ||
200 | width = this->dest_width; | ||
201 | u_buffer=this->u_buffer; | ||
202 | v_buffer=this->v_buffer; | ||
203 | dst = (uint16_t*)_dst; | ||
204 | py = _py; | ||
205 | pu = _pu; | ||
206 | pv = _pv; | ||
207 | |||
208 | //proceed with line scaling/conversion | ||
209 | if ((recal_uv&1)!=0) | ||
210 | { | ||
211 | recal_uv^=1; //reset bit0. | ||
212 | // init values: | ||
213 | p1u = *pu++; | ||
214 | p2u = *pu++; | ||
215 | p1v = *pv++; | ||
216 | p2v = *pv++; | ||
217 | p1y = *py++; | ||
218 | p2y = *py++; | ||
219 | |||
220 | //width loop (compute all data for a line). | ||
221 | while (width>0) | ||
222 | { | ||
223 | // proceed with u and v first (ok, and y too finally :)) [scaling part]: | ||
224 | // evaluate 1u, 1v, and 2y | ||
225 | //block1_uvy | ||
226 | dest1u=p1u + ((dxuv*(p2u-p1u))>>15); | ||
227 | dest1v=p1v + ((dxuv*(p2v-p1v))>>15); | ||
228 | // as u and v are evaluated, better save them now | ||
229 | *u_buffer++ = (uint8_t)dest1u; | ||
230 | *v_buffer++ = (uint8_t)dest1v; | ||
231 | |||
232 | dest1y=p1y + ((dxy*(p2y-p1y))>>15); | ||
233 | |||
234 | dxuv += this->step_dx; | ||
235 | dxy += this->step_dx; | ||
236 | if (dxuv > 32768) | ||
237 | { | ||
238 | dxuv -= 32768; | ||
239 | p1u = p2u; | ||
240 | p2u = *pu++; //idee pour asm, cf cas then, un merge est possible!!! | ||
241 | p1v = p2v; | ||
242 | p2v = *pv++; | ||
243 | } | ||
244 | if (dxy > 32768) | ||
245 | { | ||
246 | dxy -= 32768; | ||
247 | p1y = p2y; | ||
248 | p2y = *py++; | ||
249 | } | ||
250 | //end block1_uvy | ||
251 | |||
252 | //block2_y | ||
253 | dest2y=p1y + ((dxy*(p2y-p1y))>>15); | ||
254 | |||
255 | dxy += this->step_dx; | ||
256 | if (dxy > 32768) | ||
257 | { | ||
258 | dxy -= 32768; | ||
259 | p1y = p2y; | ||
260 | p2y = *py++; // idee pour asm, cf cas then, un merge est possible!!! | ||
261 | } | ||
262 | //end block2_y | ||
263 | |||
264 | // proceed now with YUV2RGB [conversion part]: | ||
265 | // u and v are currently in dest1u and dest1v | ||
266 | // the 2 y are in dest1y and dest2y. | ||
267 | // RGB(0),DST1(0), RGB(1), DST1(1) | ||
268 | r = this->table_rV[dest1v]; | ||
269 | g = (void *) (((uint8_t *)this->table_gU[dest1u]) + this->table_gV[dest1v]); | ||
270 | b = this->table_bU[dest1u]; | ||
271 | |||
272 | *dst++ = r[dest1y] + g[dest1y] + b[dest1y]; | ||
273 | *dst++ = r[dest2y] + g[dest2y] + b[dest2y]; | ||
274 | |||
275 | width -=2; | ||
276 | } | ||
277 | } | ||
278 | else | ||
279 | { | ||
280 | // this case is simple, u and v are already evaluated, | ||
281 | // Note pour moi: r, g et b pourraient etre reutilises!! | ||
282 | |||
283 | // init values: | ||
284 | p1y = *py++; | ||
285 | p2y = *py++; | ||
286 | |||
287 | //width loop (compute all data for a line). | ||
288 | while (width>0) | ||
289 | { | ||
290 | // proceed with y [scaling part]: | ||
291 | // evaluate 2y | ||
292 | //block1_y | ||
293 | dest1y=p1y + ((dxy*(p2y-p1y))>>15); | ||
294 | |||
295 | dxy += this->step_dx; | ||
296 | if (dxy > 32768) | ||
297 | { | ||
298 | dxy -= 32768; | ||
299 | p1y = p2y; | ||
300 | p2y = *py++; | ||
301 | } | ||
302 | //end block1_uvy | ||
303 | |||
304 | //block2_y | ||
305 | dest2y=p1y + ((dxy*(p2y-p1y))>>15); | ||
306 | |||
307 | dxy += this->step_dx; | ||
308 | if (dxy > 32768) | ||
309 | { | ||
310 | dxy -= 32768; | ||
311 | p1y = p2y; | ||
312 | p2y = *py++; // idee pour asm, cf cas then, un merge est possible!!! | ||
313 | } | ||
314 | //end block2_y | ||
315 | |||
316 | // proceed now with YUV2RGB [conversion part]: | ||
317 | // u and v are currently in dest1u and dest1v | ||
318 | // the 2 y are in dest1y and dest2y. | ||
319 | // RGB(0),DST1(0) | ||
320 | dest1u=*u_buffer++; | ||
321 | dest1v=*v_buffer++; | ||
322 | r = this->table_rV[dest1v]; | ||
323 | g = (void *) (((uint8_t *)this->table_gU[dest1u]) + this->table_gV[dest1v]); | ||
324 | b = this->table_bU[dest1u]; | ||
325 | |||
326 | *dst++ = r[dest1y] + g[dest1y] + b[dest1y]; | ||
327 | *dst++ = r[dest2y] + g[dest2y] + b[dest2y]; | ||
328 | |||
329 | width -=2; | ||
330 | } | ||
331 | } | ||
332 | // end of line scaling/conversion | ||
333 | dy += this->step_dy; | ||
334 | _dst += this->rgb_stride; | ||
335 | |||
336 | while (--height > 0 && dy < 32768) | ||
337 | { | ||
338 | xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2); // *2 because of int8 cast! | ||
339 | dy += this->step_dy; | ||
340 | _dst += this->rgb_stride; | ||
341 | } | ||
342 | |||
343 | |||
344 | if (height <= 0) | ||
345 | break; | ||
346 | |||
347 | do | ||
348 | { | ||
349 | dy -= 32768; | ||
350 | _py += this->y_stride; | ||
351 | |||
352 | recal_uv^=2; /*bit 0 for reevaluation of scanline, bit 1 for offset.*/ | ||
353 | |||
354 | if ((recal_uv&2)==0) | ||
355 | { | ||
356 | _pu += this->uv_stride; | ||
357 | _pv += this->uv_stride; | ||
358 | recal_uv|=1; // if update, then reevaluate scanline! | ||
359 | } | ||
360 | } | ||
361 | while( dy>=32768); | ||
362 | } | ||
363 | } | ||
364 | |||
365 | |||
366 | |||
367 | /* Function: */ | ||
368 | static void arm_rgb16_step_dx_32768(yuv2rgb_t *this, uint8_t * _dst, uint8_t * _py, uint8_t * _pu, uint8_t * _pv) | ||
369 | /* This function is called when the widht of the destination picture is the | ||
370 | same as the size of the source picture. | ||
371 | */ | ||
372 | { | ||
373 | int recal_uv, height; | ||
374 | int dy; | ||
375 | |||
376 | dy = 0; | ||
377 | height=this->dest_height; | ||
378 | recal_uv=0; | ||
379 | |||
380 | while (1) | ||
381 | { | ||
382 | uint16_t *r, *g, *b; | ||
383 | uint8_t *py, *pu, *pv; | ||
384 | uint16_t *dst; | ||
385 | int width; | ||
386 | register uint8_t p1y, p1u, p1v; | ||
387 | |||
388 | width=this->dest_width; | ||
389 | dst = (uint16_t*)_dst; | ||
390 | py = _py; | ||
391 | pu = _pu; | ||
392 | pv = _pv; | ||
393 | |||
394 | while (width>0) | ||
395 | { | ||
396 | width-=2; | ||
397 | p1y=*py++; | ||
398 | p1u=*pu++; | ||
399 | p1v=*pv++; | ||
400 | |||
401 | r = this->table_rV[p1v]; | ||
402 | g = (void *) (((uint8_t *)this->table_gU[p1u]) + this->table_gV[p1v]); | ||
403 | b = this->table_bU[p1u]; | ||
404 | |||
405 | *dst++ = r[p1y] + g[p1y] + b[p1y]; | ||
406 | p1y=*py++; | ||
407 | *dst++ = r[p1y] + g[p1y] + b[p1y]; | ||
408 | } | ||
409 | |||
410 | |||
411 | |||
412 | // end of line scaling/conversion | ||
413 | dy += this->step_dy; | ||
414 | _dst += this->rgb_stride; | ||
415 | |||
416 | while (--height > 0 && dy < 32768) | ||
417 | { | ||
418 | xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2); // *2 because of int8 cast! | ||
419 | dy += this->step_dy; | ||
420 | _dst += this->rgb_stride; | ||
421 | } | ||
422 | |||
423 | |||
424 | if (height <= 0) | ||
425 | break; | ||
426 | |||
427 | do | ||
428 | { | ||
429 | dy -= 32768; | ||
430 | _py += this->y_stride; | ||
431 | |||
432 | recal_uv^=2; /*bit 0 for reevaluation of scanline, bit 1 for offset.*/ | ||
433 | |||
434 | if ((recal_uv&2)==0) | ||
435 | { | ||
436 | _pu += this->uv_stride; | ||
437 | _pv += this->uv_stride; | ||
438 | recal_uv|=1; // if update, then reevaluate scanline! | ||
439 | } | ||
440 | } | ||
441 | while( dy>=32768); | ||
442 | } | ||
443 | } | ||
444 | |||
445 | |||
446 | |||
447 | /* Function: */ | ||
448 | static void arm_rgb16_step_dx_bet_32768_65536(yuv2rgb_t *this, uint8_t * _dst, uint8_t * _py, uint8_t * _pu, uint8_t * _pv) | ||
449 | /* This function is called when the destination picture is between the size | ||
450 | of the source picture, and half its size. | ||
451 | */ | ||
452 | { | ||
453 | int recal_uv, height; /* Note about recal_uv: bit0 is for | ||
454 | applying scale on u and v, bit1 is for increments of u and v pointers.*/ | ||
455 | int dy; | ||
456 | |||
457 | dy = 0; | ||
458 | height = this->dest_height; | ||
459 | recal_uv=1; // 1 for evaluation of scale_line, needed the first time | ||
460 | |||
461 | while(1) | ||
462 | { | ||
463 | register int dxy; | ||
464 | register int dxuv; | ||
465 | register uint8_t p1y, p2y; | ||
466 | uint8_t dest1y, dest2y; | ||
467 | register uint8_t p1u, p2u; | ||
468 | register uint8_t p1v, p2v; | ||
469 | uint8_t dest1u; | ||
470 | uint8_t dest1v; | ||
471 | int width; | ||
472 | uint8_t *u_buffer; | ||
473 | uint8_t *v_buffer; | ||
474 | uint16_t *r, *g, *b; | ||
475 | uint8_t *py, *pu, *pv; | ||
476 | uint16_t *dst; | ||
477 | |||
478 | dxy = 0; | ||
479 | dxuv = 0; | ||
480 | width = this->dest_width; | ||
481 | u_buffer=this->u_buffer; | ||
482 | v_buffer=this->v_buffer; | ||
483 | dst = (uint16_t*)_dst; | ||
484 | py = _py; | ||
485 | pu = _pu; | ||
486 | pv = _pv; | ||
487 | |||
488 | //proceed with line scaling/conversion | ||
489 | if ((recal_uv&1)!=0) | ||
490 | { | ||
491 | recal_uv^=1; //reset bit0. | ||
492 | // init values: | ||
493 | p1u = *pu++; | ||
494 | p2u = *pu++; | ||
495 | p1v = *pv++; | ||
496 | p2v = *pv++; | ||
497 | p1y = *py++; | ||
498 | p2y = *py++; | ||
499 | |||
500 | //width loop (compute all data for a line). | ||
501 | while (width>0) | ||
502 | { | ||
503 | // proceed with u and v first (ok, and y too finally :)) [scaling part]: | ||
504 | // evaluate 1u, 1v, and 2y | ||
505 | //block1_uvy | ||
506 | dest1u=p1u + ((dxuv*(p2u-p1u))>>15); | ||
507 | dest1v=p1v + ((dxuv*(p2v-p1v))>>15); | ||
508 | // as u and v are evaluated, better save them now | ||
509 | *u_buffer++ = (uint8_t)dest1u; | ||
510 | *v_buffer++ = (uint8_t)dest1v; | ||
511 | |||
512 | dest1y=p1y + ((dxy*(p2y-p1y))>>15); | ||
513 | |||
514 | dxuv += this->step_dx; | ||
515 | dxy += this->step_dx; | ||
516 | if (dxuv > 65536) | ||
517 | { | ||
518 | dxuv -= 65536; | ||
519 | p1u = *pu++; | ||
520 | p2u = *pu++; | ||
521 | p1v = *pv++; | ||
522 | p2v = *pv++; | ||
523 | } | ||
524 | else | ||
525 | { | ||
526 | dxuv -= 32768; | ||
527 | p1u = p2u; | ||
528 | p2u = *pu++; //idee pour asm, cf cas then, un merge est possible!!! | ||
529 | p1v = p2v; | ||
530 | p2v = *pv++; | ||
531 | } | ||
532 | if (dxy > 65536) | ||
533 | { | ||
534 | dxy -= 65536; | ||
535 | p1y = *py++; | ||
536 | p2y = *py++; | ||
537 | } | ||
538 | else | ||
539 | { | ||
540 | dxy -= 32768; | ||
541 | p1y = p2y; | ||
542 | p2y = *py++; | ||
543 | } | ||
544 | //end block1_uvy | ||
545 | |||
546 | //block2_y | ||
547 | dest2y=p1y + ((dxy*(p2y-p1y))>>15); | ||
548 | |||
549 | dxy += this->step_dx; | ||
550 | if (dxy > 65536) | ||
551 | { | ||
552 | dxy -= 65536; | ||
553 | p1y = *py++; | ||
554 | p2y = *py++; | ||
555 | } | ||
556 | else | ||
557 | { | ||
558 | dxy -= 32768; | ||
559 | p1y = p2y; | ||
560 | p2y = *py++; // idee pour asm, cf cas then, un merge est possible!!! | ||
561 | } | ||
562 | //end block2_y | ||
563 | |||
564 | // proceed now with YUV2RGB [conversion part]: | ||
565 | // u and v are currently in dest1u and dest1v | ||
566 | // the 2 y are in dest1y and dest2y. | ||
567 | // RGB(0),DST1(0), RGB(1), DST1(1) | ||
568 | r = this->table_rV[dest1v]; | ||
569 | g = (void *) (((uint8_t *)this->table_gU[dest1u]) + this->table_gV[dest1v]); | ||
570 | b = this->table_bU[dest1u]; | ||
571 | |||
572 | *dst++ = r[dest1y] + g[dest1y] + b[dest1y]; | ||
573 | *dst++ = r[dest2y] + g[dest2y] + b[dest2y]; | ||
574 | |||
575 | width -=2; | ||
576 | } | ||
577 | } | ||
578 | else | ||
579 | { | ||
580 | // this case is simple, u and v are already evaluated, | ||
581 | // Note pour moi: r, g et b pourraient etre reutilises!! | ||
582 | |||
583 | // init values: | ||
584 | p1y = *py++; | ||
585 | p2y = *py++; | ||
586 | |||
587 | //width loop (compute all data for a line). | ||
588 | while (width>0) | ||
589 | { | ||
590 | // proceed with y [scaling part]: | ||
591 | // evaluate 2y | ||
592 | //block1_y | ||
593 | dest1y=p1y + ((dxy*(p2y-p1y))>>15); | ||
594 | |||
595 | dxy += this->step_dx; | ||
596 | if (dxy > 65536) | ||
597 | { | ||
598 | dxy -= 65536; | ||
599 | p1y = *py++; | ||
600 | p2y = *py++; | ||
601 | } | ||
602 | else | ||
603 | { | ||
604 | dxy -= 32768; | ||
605 | p1y = p2y; | ||
606 | p2y = *py++; | ||
607 | } | ||
608 | //end block1_uvy | ||
609 | |||
610 | //block2_y | ||
611 | dest2y=p1y + ((dxy*(p2y-p1y))>>15); | ||
612 | |||
613 | dxy += this->step_dx; | ||
614 | if (dxy > 65536) | ||
615 | { | ||
616 | dxy -= 65536; | ||
617 | p1y = *py++; | ||
618 | p2y = *py++; | ||
619 | } | ||
620 | else | ||
621 | { | ||
622 | dxy -= 32768; | ||
623 | p1y = p2y; | ||
624 | p2y = *py++; // idee pour asm, cf cas then, un merge est possible!!! | ||
625 | } | ||
626 | //end block2_y | ||
627 | |||
628 | // proceed now with YUV2RGB [conversion part]: | ||
629 | // u and v are currently in dest1u and dest1v | ||
630 | // the 2 y are in dest1y and dest2y. | ||
631 | // RGB(0),DST1(0) | ||
632 | dest1u=*u_buffer++; | ||
633 | dest1v=*v_buffer++; | ||
634 | r = this->table_rV[dest1v]; | ||
635 | g = (void *) (((uint8_t *)this->table_gU[dest1u]) + this->table_gV[dest1v]); | ||
636 | b = this->table_bU[dest1u]; | ||
637 | |||
638 | *dst++ = r[dest1y] + g[dest1y] + b[dest1y]; | ||
639 | *dst++ = r[dest2y] + g[dest2y] + b[dest2y]; | ||
640 | |||
641 | width -=2; | ||
642 | } | ||
643 | } | ||
644 | // end of line scaling/conversion | ||
645 | dy += this->step_dy; | ||
646 | _dst += this->rgb_stride; | ||
647 | |||
648 | while (--height > 0 && dy < 32768) | ||
649 | { | ||
650 | xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2); // *2 because of int8 cast! | ||
651 | dy += this->step_dy; | ||
652 | _dst += this->rgb_stride; | ||
653 | } | ||
654 | |||
655 | |||
656 | if (height <= 0) | ||
657 | break; | ||
658 | |||
659 | do | ||
660 | { | ||
661 | dy -= 32768; | ||
662 | _py += this->y_stride; | ||
663 | |||
664 | recal_uv^=2; /*bit 0 for reevaluation of scanline, bit 1 for offset.*/ | ||
665 | |||
666 | if ((recal_uv&2)==0) | ||
667 | { | ||
668 | _pu += this->uv_stride; | ||
669 | _pv += this->uv_stride; | ||
670 | recal_uv|=1; // if update, then reevaluate scanline! | ||
671 | } | ||
672 | } | ||
673 | while( dy>=32768); | ||
674 | } | ||
675 | } | ||
676 | |||
677 | |||
678 | |||
679 | /* Function: */ | ||
680 | static void arm_rgb16_step_dx_sup_65536(yuv2rgb_t *this, uint8_t * _dst, uint8_t * _py, uint8_t * _pu, uint8_t * _pv) | ||
681 | /* This function is called when the destination picture is smaller than half | ||
682 | the size of the source picture, and half its size. | ||
683 | */ | ||
684 | { | ||
685 | int recal_uv, height; /* Note about recal_uv: bit0 is for | ||
686 | applying scale on u and v, bit1 is for increments of u and v pointers.*/ | ||
687 | int dy; | ||
688 | |||
689 | dy = 0; | ||
690 | height = this->dest_height; | ||
691 | recal_uv=1; // 1 for evaluation of scale_line, needed the first time | ||
692 | |||
693 | while(1) | ||
694 | { | ||
695 | register int dxy; | ||
696 | register int dxuv; | ||
697 | int offdxy, offdxuv; | ||
698 | register uint8_t p1y, p2y; | ||
699 | uint8_t dest1y, dest2y; | ||
700 | register uint8_t p1u, p2u; | ||
701 | register uint8_t p1v, p2v; | ||
702 | uint8_t dest1u; | ||
703 | uint8_t dest1v; | ||
704 | int width; | ||
705 | uint8_t *u_buffer; | ||
706 | uint8_t *v_buffer; | ||
707 | uint16_t *r, *g, *b; | ||
708 | uint8_t *py, *pu, *pv; | ||
709 | uint16_t *dst; | ||
710 | |||
711 | dxy = 0; | ||
712 | dxuv = 0; | ||
713 | width = this->dest_width; | ||
714 | u_buffer=this->u_buffer; | ||
715 | v_buffer=this->v_buffer; | ||
716 | dst = (uint16_t*)_dst; | ||
717 | py = _py; | ||
718 | pu = _pu; | ||
719 | pv = _pv; | ||
720 | |||
721 | //proceed with line scaling/conversion | ||
722 | if ((recal_uv&1)!=0) | ||
723 | { | ||
724 | recal_uv^=1; //reset bit0. | ||
725 | // init values: | ||
726 | p1u = *pu++; | ||
727 | p2u = *pu++; | ||
728 | p1v = *pv++; | ||
729 | p2v = *pv++; | ||
730 | p1y = *py++; | ||
731 | p2y = *py++; | ||
732 | |||
733 | //width loop (compute all data for a line). | ||
734 | while (width>0) | ||
735 | { | ||
736 | // proceed with u and v first (ok, and y too finally :)) [scaling part]: | ||
737 | // evaluate 1u, 1v, and 2y | ||
738 | //block1_uvy | ||
739 | dest1u=p1u + ((dxuv*(p2u-p1u))>>15); | ||
740 | dest1v=p1v + ((dxuv*(p2v-p1v))>>15); | ||
741 | // as u and v are evaluated, better save them now | ||
742 | *u_buffer++ = (uint8_t)dest1u; | ||
743 | *v_buffer++ = (uint8_t)dest1v; | ||
744 | |||
745 | dest1y=p1y + ((dxy*(p2y-p1y))>>15); | ||
746 | |||
747 | dxuv += this->step_dx; | ||
748 | dxy += this->step_dx; | ||
749 | |||
750 | offdxuv=((dxuv-1)>>15); | ||
751 | dxuv-=offdxuv<<15; | ||
752 | pu+=offdxuv-2; | ||
753 | pv+=offdxuv-2; | ||
754 | p1u = *pu++; | ||
755 | p2u = *pu++; | ||
756 | p1v = *pv++; | ||
757 | p2v = *pv++; | ||
758 | offdxy=((dxy-1)>>15); | ||
759 | dxy-=offdxy<<15; | ||
760 | py+=offdxy-2; | ||
761 | p1y = *py++; | ||
762 | p2y = *py++; | ||
763 | |||
764 | //block2_y | ||
765 | dest2y=p1y + ((dxy*(p2y-p1y))>>15); | ||
766 | |||
767 | dxy += this->step_dx; | ||
768 | offdxy=((dxy-1)>>15); | ||
769 | dxy-=offdxy<<15; | ||
770 | py+=offdxy-2; | ||
771 | p1y = *py++; | ||
772 | p2y = *py++; | ||
773 | |||
774 | // proceed now with YUV2RGB [conversion part]: | ||
775 | // u and v are currently in dest1u and dest1v | ||
776 | // the 2 y are in dest1y and dest2y. | ||
777 | // RGB(0),DST1(0), RGB(1), DST1(1) | ||
778 | r = this->table_rV[dest1v]; | ||
779 | g = (void *) (((uint8_t *)this->table_gU[dest1u]) + this->table_gV[dest1v]); | ||
780 | b = this->table_bU[dest1u]; | ||
781 | |||
782 | *dst++ = r[dest1y] + g[dest1y] + b[dest1y]; | ||
783 | *dst++ = r[dest2y] + g[dest2y] + b[dest2y]; | ||
784 | |||
785 | width -=2; | ||
786 | } | ||
787 | } | ||
788 | else | ||
789 | { | ||
790 | // this case is simple, u and v are already evaluated, | ||
791 | // Note pour moi: r, g et b pourraient etre reutilises!! | ||
792 | |||
793 | // init values: | ||
794 | p1y = *py++; | ||
795 | p2y = *py++; | ||
796 | |||
797 | //width loop (compute all data for a line). | ||
798 | while (width>0) | ||
799 | { | ||
800 | // proceed with y [scaling part]: | ||
801 | // evaluate 2y | ||
802 | //block1_y | ||
803 | dest1y=p1y + ((dxy*(p2y-p1y))>>15); | ||
804 | |||
805 | dxy += this->step_dx; | ||
806 | offdxy=((dxy-1)>>15); | ||
807 | dxy-=offdxy<<15; | ||
808 | py+=offdxy-2; | ||
809 | p1y = *py++; | ||
810 | p2y = *py++; | ||
811 | |||
812 | //end block1_uvy | ||
813 | |||
814 | //block2_y | ||
815 | dest2y=p1y + ((dxy*(p2y-p1y))>>15); | ||
816 | |||
817 | dxy += this->step_dx; | ||
818 | offdxy=((dxy-1)>>15); | ||
819 | dxy-=offdxy<<15; | ||
820 | py+=offdxy-2; | ||
821 | p1y = *py++; | ||
822 | p2y = *py++; | ||
823 | //end block2_y | ||
824 | |||
825 | // proceed now with YUV2RGB [conversion part]: | ||
826 | // u and v are currently in dest1u and dest1v | ||
827 | // the 2 y are in dest1y and dest2y. | ||
828 | // RGB(0),DST1(0) | ||
829 | dest1u=*u_buffer++; | ||
830 | dest1v=*v_buffer++; | ||
831 | r = this->table_rV[dest1v]; | ||
832 | g = (void *) (((uint8_t *)this->table_gU[dest1u]) + this->table_gV[dest1v]); | ||
833 | b = this->table_bU[dest1u]; | ||
834 | |||
835 | *dst++ = r[dest1y] + g[dest1y] + b[dest1y]; | ||
836 | *dst++ = r[dest2y] + g[dest2y] + b[dest2y]; | ||
837 | |||
838 | width -=2; | ||
839 | } | ||
840 | } | ||
841 | // end of line scaling/conversion | ||
842 | dy += this->step_dy; | ||
843 | _dst += this->rgb_stride; | ||
844 | |||
845 | while (--height > 0 && dy < 32768) | ||
846 | { | ||
847 | xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2); // *2 because of int8 cast! | ||
848 | dy += this->step_dy; | ||
849 | _dst += this->rgb_stride; | ||
850 | } | ||
851 | |||
852 | |||
853 | if (height <= 0) | ||
854 | break; | ||
855 | |||
856 | do | ||
857 | { | ||
858 | dy -= 32768; | ||
859 | _py += this->y_stride; | ||
860 | |||
861 | recal_uv^=2; /*bit 0 for reevaluation of scanline, bit 1 for offset.*/ | ||
862 | |||
863 | if ((recal_uv&2)==0) | ||
864 | { | ||
865 | _pu += this->uv_stride; | ||
866 | _pv += this->uv_stride; | ||
867 | recal_uv|=1; // if update, then reevaluate scanline! | ||
868 | } | ||
869 | } | ||
870 | while( dy>=32768); | ||
871 | } | ||
872 | } | ||
873 | |||
874 | |||
875 | #endif | ||