-rw-r--r-- | noncore/multimedia/opieplayer2/yuv2rgb_arm2.c | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/noncore/multimedia/opieplayer2/yuv2rgb_arm2.c b/noncore/multimedia/opieplayer2/yuv2rgb_arm2.c index cbd32e8..91cd0b1 100644 --- a/noncore/multimedia/opieplayer2/yuv2rgb_arm2.c +++ b/noncore/multimedia/opieplayer2/yuv2rgb_arm2.c | |||
@@ -1,875 +1,876 @@ | |||
1 | /* | 1 | /* |
2 | * yuv2rgb_arm2.c | 2 | * yuv2rgb_arm2.c |
3 | * Copyright (C) 2002 Frederic 'dilb' Boulay. | 3 | * Copyright (C) 2002 Frederic 'dilb' Boulay. |
4 | * All Rights Reserved. | 4 | * All Rights Reserved. |
5 | * | 5 | * |
6 | * Author: Frederic Boulay <dilb@handhelds.org> | 6 | * Author: Frederic Boulay <dilb@handhelds.org> |
7 | * | 7 | * |
8 | * you can redistribute this file and/or modify | 8 | * you can redistribute this file and/or modify |
9 | * it under the terms of the GNU General Public License (version 2) | 9 | * it under the terms of the GNU General Public License (version 2) |
10 | * as published by the Free Software Foundation. | 10 | * as published by the Free Software Foundation. |
11 | * | 11 | * |
12 | * This file is distributed in the hope that it will be useful, | 12 | * This file is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | * GNU General Public License for more details. | 15 | * GNU General Public License for more details. |
16 | * | 16 | * |
17 | * You should have received a copy of the GNU General Public License | 17 | * You should have received a copy of the GNU General Public License |
18 | * along with this program; if not, write to the Free Software | 18 | * along with this program; if not, write to the Free Software |
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
20 | * | 20 | * |
21 | * | 21 | * |
22 | * The function defined in this file, are derived from work done in the xine | 22 | * The function defined in this file, are derived from work done in the xine |
23 | * project. | 23 | * project. |
24 | * In order to improve performance, by strongly reducing memory bandwidth | 24 | * In order to improve performance, by strongly reducing memory bandwidth |
25 | * needed, the scaling functions are merged with the yuv2rgb function. | 25 | * needed, the scaling functions are merged with the yuv2rgb function. |
26 | */ | 26 | */ |
27 | 27 | ||
28 | #ifdef __arm__ | 28 | #ifdef __arm__ |
29 | 29 | ||
30 | #include <stdio.h> | 30 | #include <stdio.h> |
31 | #include <stdlib.h> | 31 | #include <stdlib.h> |
32 | #include <string.h> | 32 | #include <string.h> |
33 | #include <inttypes.h> | 33 | #include <inttypes.h> |
34 | 34 | ||
35 | #include "yuv2rgb.h" | 35 | #include "yuv2rgb.h" |
36 | #include <xine/xineutils.h> | 36 | #include <xine/xineutils.h> |
37 | 37 | ||
38 | /* Prototypes of the "local" functions available here: */ | 38 | /* Prototypes of the "local" functions available here: */ |
39 | /* first prototype, function called when no scaling is needed: */ | 39 | /* first prototype, function called when no scaling is needed: */ |
40 | static void arm_rgb16_noscale(yuv2rgb_t*, uint8_t*, uint8_t*, uint8_t*, uint8_t*); | 40 | static void arm_rgb16_noscale(yuv2rgb_t*, uint8_t*, uint8_t*, uint8_t*, uint8_t*); |
41 | /* second prototype, function called when no horizontal scaling is needed: */ | 41 | /* second prototype, function called when no horizontal scaling is needed: */ |
42 | static void arm_rgb16_step_dx_32768(yuv2rgb_t*, uint8_t*, uint8_t*, uint8_t*, uint8_t*); | 42 | static void arm_rgb16_step_dx_32768(yuv2rgb_t*, uint8_t*, uint8_t*, uint8_t*, uint8_t*); |
43 | /* third prototype, function called when scaling is needed for zooming in: */ | 43 | /* third prototype, function called when scaling is needed for zooming in: */ |
44 | static void arm_rgb16_step_dx_inf_32768(yuv2rgb_t*, uint8_t*, uint8_t*, uint8_t*, uint8_t*); | 44 | static void arm_rgb16_step_dx_inf_32768(yuv2rgb_t*, uint8_t*, uint8_t*, uint8_t*, uint8_t*); |
45 | /* fourth prototype, function called when scaling is needed for zooming out (between 1x and 2x): */ | 45 | /* fourth prototype, function called when scaling is needed for zooming out (between 1x and 2x): */ |
46 | static void arm_rgb16_step_dx_bet_32768_65536(yuv2rgb_t*, uint8_t*, uint8_t*, uint8_t*, uint8_t*); | 46 | static void arm_rgb16_step_dx_bet_32768_65536(yuv2rgb_t*, uint8_t*, uint8_t*, uint8_t*, uint8_t*); |
47 | /* fifth prototype, function called when scaling is needed for zooming out (greater than 2x): */ | 47 | /* fifth prototype, function called when scaling is needed for zooming out (greater than 2x): */ |
48 | static void arm_rgb16_step_dx_sup_65536(yuv2rgb_t*, uint8_t*, uint8_t*, uint8_t*, uint8_t*); | 48 | static void arm_rgb16_step_dx_sup_65536(yuv2rgb_t*, uint8_t*, uint8_t*, uint8_t*, uint8_t*); |
49 | /* sixth prototype, function where the decision of the scaling function to use is made.*/ | 49 | /* sixth prototype, function where the decision of the scaling function to use is made.*/ |
50 | static void arm_rgb16_2 (yuv2rgb_t*, uint8_t*, uint8_t*, uint8_t*, uint8_t*); | 50 | static void arm_rgb16_2 (yuv2rgb_t*, uint8_t*, uint8_t*, uint8_t*, uint8_t*); |
51 | 51 | ||
52 | 52 | ||
53 | 53 | ||
54 | 54 | ||
55 | /* extern function: */ | 55 | /* extern function: */ |
56 | 56 | ||
57 | /* Function: */ | 57 | /* Function: */ |
58 | void yuv2rgb_init_arm (yuv2rgb_factory_t *this) | 58 | void yuv2rgb_init_arm (yuv2rgb_factory_t *this) |
59 | /* This function initialise the member yuv2rgb_fun, if everything is right | 59 | /* This function initialise the member yuv2rgb_fun, if everything is right |
60 | the function optimised for the arm target should be used.*/ | 60 | the function optimised for the arm target should be used.*/ |
61 | { | 61 | { |
62 | if (this->swapped) | 62 | if (this->swapped) |
63 | return; /*no swapped pixel output upto now*/ | 63 | return; /*no swapped pixel output upto now*/ |
64 | 64 | ||
65 | switch (this->mode) | 65 | switch (this->mode) |
66 | { | 66 | { |
67 | case MODE_16_RGB: | 67 | case MODE_16_RGB: |
68 | this->yuv2rgb_fun = arm_rgb16_2; | 68 | this->yuv2rgb_fun = arm_rgb16_2; |
69 | break; | 69 | break; |
70 | default: | 70 | default: |
71 | break; | ||
71 | } | 72 | } |
72 | } | 73 | } |
73 | 74 | ||
74 | 75 | ||
75 | 76 | ||
76 | /* local functions: */ | 77 | /* local functions: */ |
77 | 78 | ||
78 | /* Function: */ | 79 | /* Function: */ |
79 | static void arm_rgb16_2 (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _py, uint8_t * _pu, uint8_t * _pv) | 80 | static void arm_rgb16_2 (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _py, uint8_t * _pu, uint8_t * _pv) |
80 | /* This function takes care of applying the right scaling conversion | 81 | /* This function takes care of applying the right scaling conversion |
81 | (yuv2rgb is included in each scaling function!)*/ | 82 | (yuv2rgb is included in each scaling function!)*/ |
82 | { | 83 | { |
83 | if (!this->do_scale) | 84 | if (!this->do_scale) |
84 | { | 85 | { |
85 | arm_rgb16_noscale(this, _dst, _py, _pu, _pv); | 86 | arm_rgb16_noscale(this, _dst, _py, _pu, _pv); |
86 | return; | 87 | return; |
87 | } | 88 | } |
88 | if (this->step_dx<32768) | 89 | if (this->step_dx<32768) |
89 | { | 90 | { |
90 | arm_rgb16_step_dx_inf_32768(this, _dst, _py, _pu, _pv); | 91 | arm_rgb16_step_dx_inf_32768(this, _dst, _py, _pu, _pv); |
91 | return; | 92 | return; |
92 | } | 93 | } |
93 | if (this->step_dx==32768) | 94 | if (this->step_dx==32768) |
94 | { | 95 | { |
95 | arm_rgb16_step_dx_32768(this, _dst, _py, _pu, _pv); | 96 | arm_rgb16_step_dx_32768(this, _dst, _py, _pu, _pv); |
96 | return; | 97 | return; |
97 | } | 98 | } |
98 | if (this->step_dx<65536) | 99 | if (this->step_dx<65536) |
99 | { | 100 | { |
100 | arm_rgb16_step_dx_bet_32768_65536(this, _dst, _py, _pu, _pv); | 101 | arm_rgb16_step_dx_bet_32768_65536(this, _dst, _py, _pu, _pv); |
101 | return; | 102 | return; |
102 | } | 103 | } |
103 | arm_rgb16_step_dx_sup_65536(this, _dst, _py, _pu, _pv); | 104 | arm_rgb16_step_dx_sup_65536(this, _dst, _py, _pu, _pv); |
104 | return; | 105 | return; |
105 | } | 106 | } |
106 | 107 | ||
107 | 108 | ||
108 | /* Function: */ | 109 | /* Function: */ |
109 | static void arm_rgb16_noscale(yuv2rgb_t *this, uint8_t * _dst, uint8_t * _py, uint8_t * _pu, uint8_t * _pv) | 110 | static void arm_rgb16_noscale(yuv2rgb_t *this, uint8_t * _dst, uint8_t * _py, uint8_t * _pu, uint8_t * _pv) |
110 | /* This function is called when the source and the destination pictures have the same size. | 111 | /* This function is called when the source and the destination pictures have the same size. |
111 | In this case, scaling part is not needed. | 112 | In this case, scaling part is not needed. |
112 | (This code is probably far from being optimised, in particular, the asm | 113 | (This code is probably far from being optimised, in particular, the asm |
113 | generated is not the most efficient, a pure asm version will probably | 114 | generated is not the most efficient, a pure asm version will probably |
114 | emerge sooner or later). But at least, this version is faster than what | 115 | emerge sooner or later). But at least, this version is faster than what |
115 | was used before.*/ | 116 | was used before.*/ |
116 | { | 117 | { |
117 | int height; | 118 | int height; |
118 | 119 | ||
119 | height=this->dest_height; | 120 | height=this->dest_height; |
120 | 121 | ||
121 | while (height>0) | 122 | while (height>0) |
122 | { | 123 | { |
123 | uint16_t *r, *g, *b; | 124 | uint16_t *r, *g, *b; |
124 | uint8_t *py, *py2, *pu, *pv; | 125 | uint8_t *py, *py2, *pu, *pv; |
125 | uint16_t *dst, *dst2; | 126 | uint16_t *dst, *dst2; |
126 | int width; | 127 | int width; |
127 | register uint8_t p1y, p1u, p1v; | 128 | register uint8_t p1y, p1u, p1v; |
128 | 129 | ||
129 | height-=2; | 130 | height-=2; |
130 | width=this->dest_width; | 131 | width=this->dest_width; |
131 | dst = _dst; | 132 | dst = _dst; |
132 | dst2 = _dst + this->rgb_stride; | 133 | dst2 = _dst + this->rgb_stride; |
133 | py = _py; | 134 | py = _py; |
134 | py2 = _py + this->y_stride; | 135 | py2 = _py + this->y_stride; |
135 | pu = _pu; | 136 | pu = _pu; |
136 | pv = _pv; | 137 | pv = _pv; |
137 | 138 | ||
138 | while (width>0) | 139 | while (width>0) |
139 | { | 140 | { |
140 | width-=2; | 141 | width-=2; |
141 | p1y=*py++; | 142 | p1y=*py++; |
142 | p1u=*pu++; | 143 | p1u=*pu++; |
143 | p1v=*pv++; | 144 | p1v=*pv++; |
144 | 145 | ||
145 | r = this->table_rV[p1v]; | 146 | r = this->table_rV[p1v]; |
146 | g = (void *) (((uint8_t *)this->table_gU[p1u]) + this->table_gV[p1v]); | 147 | g = (void *) (((uint8_t *)this->table_gU[p1u]) + this->table_gV[p1v]); |
147 | b = this->table_bU[p1u]; | 148 | b = this->table_bU[p1u]; |
148 | 149 | ||
149 | *dst++ = r[p1y] + g[p1y] + b[p1y]; | 150 | *dst++ = r[p1y] + g[p1y] + b[p1y]; |
150 | p1y=*py++; | 151 | p1y=*py++; |
151 | *dst++ = r[p1y] + g[p1y] + b[p1y]; | 152 | *dst++ = r[p1y] + g[p1y] + b[p1y]; |
152 | 153 | ||
153 | p1y=*py2++; | 154 | p1y=*py2++; |
154 | 155 | ||
155 | *dst2++ = r[p1y] + g[p1y] + b[p1y]; | 156 | *dst2++ = r[p1y] + g[p1y] + b[p1y]; |
156 | p1y=*py2++; | 157 | p1y=*py2++; |
157 | *dst2++ = r[p1y] + g[p1y] + b[p1y]; | 158 | *dst2++ = r[p1y] + g[p1y] + b[p1y]; |
158 | } | 159 | } |
159 | _dst += (this->rgb_stride)<<1; | 160 | _dst += (this->rgb_stride)<<1; |
160 | _py += (this->y_stride)<<1; | 161 | _py += (this->y_stride)<<1; |
161 | _pu += this->uv_stride; | 162 | _pu += this->uv_stride; |
162 | _pv += this->uv_stride; | 163 | _pv += this->uv_stride; |
163 | } | 164 | } |
164 | } | 165 | } |
165 | 166 | ||
166 | 167 | ||
167 | /* Function: */ | 168 | /* Function: */ |
168 | static void arm_rgb16_step_dx_inf_32768(yuv2rgb_t *this, uint8_t * _dst, uint8_t * _py, uint8_t * _pu, uint8_t * _pv) | 169 | static void arm_rgb16_step_dx_inf_32768(yuv2rgb_t *this, uint8_t * _dst, uint8_t * _py, uint8_t * _pu, uint8_t * _pv) |
169 | /* This function is called when the destination picture is bigger than the size | 170 | /* This function is called when the destination picture is bigger than the size |
170 | of the source picture. | 171 | of the source picture. |
171 | */ | 172 | */ |
172 | { | 173 | { |
173 | int recal_uv, height; /* Note about recal_uv: bit0 is for | 174 | int recal_uv, height; /* Note about recal_uv: bit0 is for |
174 | applying scale on u and v, bit1 is for increments of u and v pointers.*/ | 175 | applying scale on u and v, bit1 is for increments of u and v pointers.*/ |
175 | int dy; | 176 | int dy; |
176 | 177 | ||
177 | dy = 0; | 178 | dy = 0; |
178 | height = this->dest_height; | 179 | height = this->dest_height; |
179 | recal_uv=1; // 1 for evaluation of scale_line, needed the first time | 180 | recal_uv=1; // 1 for evaluation of scale_line, needed the first time |
180 | 181 | ||
181 | while(1) | 182 | while(1) |
182 | { | 183 | { |
183 | register int dxy; | 184 | register int dxy; |
184 | register int dxuv; | 185 | register int dxuv; |
185 | register uint8_t p1y, p2y; | 186 | register uint8_t p1y, p2y; |
186 | uint8_t dest1y, dest2y; | 187 | uint8_t dest1y, dest2y; |
187 | register uint8_t p1u, p2u; | 188 | register uint8_t p1u, p2u; |
188 | register uint8_t p1v, p2v; | 189 | register uint8_t p1v, p2v; |
189 | uint8_t dest1u; | 190 | uint8_t dest1u; |
190 | uint8_t dest1v; | 191 | uint8_t dest1v; |
191 | int width; | 192 | int width; |
192 | uint8_t *u_buffer; | 193 | uint8_t *u_buffer; |
193 | uint8_t *v_buffer; | 194 | uint8_t *v_buffer; |
194 | uint16_t *r, *g, *b; | 195 | uint16_t *r, *g, *b; |
195 | uint8_t *py, *pu, *pv; | 196 | uint8_t *py, *pu, *pv; |
196 | uint16_t *dst; | 197 | uint16_t *dst; |
197 | 198 | ||
198 | dxy = 0; | 199 | dxy = 0; |
199 | dxuv = 0; | 200 | dxuv = 0; |
200 | width = this->dest_width; | 201 | width = this->dest_width; |
201 | u_buffer=this->u_buffer; | 202 | u_buffer=this->u_buffer; |
202 | v_buffer=this->v_buffer; | 203 | v_buffer=this->v_buffer; |
203 | dst = (uint16_t*)_dst; | 204 | dst = (uint16_t*)_dst; |
204 | py = _py; | 205 | py = _py; |
205 | pu = _pu; | 206 | pu = _pu; |
206 | pv = _pv; | 207 | pv = _pv; |
207 | 208 | ||
208 | //proceed with line scaling/conversion | 209 | //proceed with line scaling/conversion |
209 | if ((recal_uv&1)!=0) | 210 | if ((recal_uv&1)!=0) |
210 | { | 211 | { |
211 | recal_uv^=1; //reset bit0. | 212 | recal_uv^=1; //reset bit0. |
212 | // init values: | 213 | // init values: |
213 | p1u = *pu++; | 214 | p1u = *pu++; |
214 | p2u = *pu++; | 215 | p2u = *pu++; |
215 | p1v = *pv++; | 216 | p1v = *pv++; |
216 | p2v = *pv++; | 217 | p2v = *pv++; |
217 | p1y = *py++; | 218 | p1y = *py++; |
218 | p2y = *py++; | 219 | p2y = *py++; |
219 | 220 | ||
220 | //width loop (compute all data for a line). | 221 | //width loop (compute all data for a line). |
221 | while (width>0) | 222 | while (width>0) |
222 | { | 223 | { |
223 | // proceed with u and v first (ok, and y too finally :)) [scaling part]: | 224 | // proceed with u and v first (ok, and y too finally :)) [scaling part]: |
224 | // evaluate 1u, 1v, and 2y | 225 | // evaluate 1u, 1v, and 2y |
225 | //block1_uvy | 226 | //block1_uvy |
226 | dest1u=p1u + ((dxuv*(p2u-p1u))>>15); | 227 | dest1u=p1u + ((dxuv*(p2u-p1u))>>15); |
227 | dest1v=p1v + ((dxuv*(p2v-p1v))>>15); | 228 | dest1v=p1v + ((dxuv*(p2v-p1v))>>15); |
228 | // as u and v are evaluated, better save them now | 229 | // as u and v are evaluated, better save them now |
229 | *u_buffer++ = (uint8_t)dest1u; | 230 | *u_buffer++ = (uint8_t)dest1u; |
230 | *v_buffer++ = (uint8_t)dest1v; | 231 | *v_buffer++ = (uint8_t)dest1v; |
231 | 232 | ||
232 | dest1y=p1y + ((dxy*(p2y-p1y))>>15); | 233 | dest1y=p1y + ((dxy*(p2y-p1y))>>15); |
233 | 234 | ||
234 | dxuv += this->step_dx; | 235 | dxuv += this->step_dx; |
235 | dxy += this->step_dx; | 236 | dxy += this->step_dx; |
236 | if (dxuv > 32768) | 237 | if (dxuv > 32768) |
237 | { | 238 | { |
238 | dxuv -= 32768; | 239 | dxuv -= 32768; |
239 | p1u = p2u; | 240 | p1u = p2u; |
240 | p2u = *pu++; //idee pour asm, cf cas then, un merge est possible!!! | 241 | p2u = *pu++; //idee pour asm, cf cas then, un merge est possible!!! |
241 | p1v = p2v; | 242 | p1v = p2v; |
242 | p2v = *pv++; | 243 | p2v = *pv++; |
243 | } | 244 | } |
244 | if (dxy > 32768) | 245 | if (dxy > 32768) |
245 | { | 246 | { |
246 | dxy -= 32768; | 247 | dxy -= 32768; |
247 | p1y = p2y; | 248 | p1y = p2y; |
248 | p2y = *py++; | 249 | p2y = *py++; |
249 | } | 250 | } |
250 | //end block1_uvy | 251 | //end block1_uvy |
251 | 252 | ||
252 | //block2_y | 253 | //block2_y |
253 | dest2y=p1y + ((dxy*(p2y-p1y))>>15); | 254 | dest2y=p1y + ((dxy*(p2y-p1y))>>15); |
254 | 255 | ||
255 | dxy += this->step_dx; | 256 | dxy += this->step_dx; |
256 | if (dxy > 32768) | 257 | if (dxy > 32768) |
257 | { | 258 | { |
258 | dxy -= 32768; | 259 | dxy -= 32768; |
259 | p1y = p2y; | 260 | p1y = p2y; |
260 | p2y = *py++; // idee pour asm, cf cas then, un merge est possible!!! | 261 | p2y = *py++; // idee pour asm, cf cas then, un merge est possible!!! |
261 | } | 262 | } |
262 | //end block2_y | 263 | //end block2_y |
263 | 264 | ||
264 | // proceed now with YUV2RGB [conversion part]: | 265 | // proceed now with YUV2RGB [conversion part]: |
265 | // u and v are currently in dest1u and dest1v | 266 | // u and v are currently in dest1u and dest1v |
266 | // the 2 y are in dest1y and dest2y. | 267 | // the 2 y are in dest1y and dest2y. |
267 | // RGB(0),DST1(0), RGB(1), DST1(1) | 268 | // RGB(0),DST1(0), RGB(1), DST1(1) |
268 | r = this->table_rV[dest1v]; | 269 | r = this->table_rV[dest1v]; |
269 | g = (void *) (((uint8_t *)this->table_gU[dest1u]) + this->table_gV[dest1v]); | 270 | g = (void *) (((uint8_t *)this->table_gU[dest1u]) + this->table_gV[dest1v]); |
270 | b = this->table_bU[dest1u]; | 271 | b = this->table_bU[dest1u]; |
271 | 272 | ||
272 | *dst++ = r[dest1y] + g[dest1y] + b[dest1y]; | 273 | *dst++ = r[dest1y] + g[dest1y] + b[dest1y]; |
273 | *dst++ = r[dest2y] + g[dest2y] + b[dest2y]; | 274 | *dst++ = r[dest2y] + g[dest2y] + b[dest2y]; |
274 | 275 | ||
275 | width -=2; | 276 | width -=2; |
276 | } | 277 | } |
277 | } | 278 | } |
278 | else | 279 | else |
279 | { | 280 | { |
280 | // this case is simple, u and v are already evaluated, | 281 | // this case is simple, u and v are already evaluated, |
281 | // Note pour moi: r, g et b pourraient etre reutilises!! | 282 | // Note pour moi: r, g et b pourraient etre reutilises!! |
282 | 283 | ||
283 | // init values: | 284 | // init values: |
284 | p1y = *py++; | 285 | p1y = *py++; |
285 | p2y = *py++; | 286 | p2y = *py++; |
286 | 287 | ||
287 | //width loop (compute all data for a line). | 288 | //width loop (compute all data for a line). |
288 | while (width>0) | 289 | while (width>0) |
289 | { | 290 | { |
290 | // proceed with y [scaling part]: | 291 | // proceed with y [scaling part]: |
291 | // evaluate 2y | 292 | // evaluate 2y |
292 | //block1_y | 293 | //block1_y |
293 | dest1y=p1y + ((dxy*(p2y-p1y))>>15); | 294 | dest1y=p1y + ((dxy*(p2y-p1y))>>15); |
294 | 295 | ||
295 | dxy += this->step_dx; | 296 | dxy += this->step_dx; |
296 | if (dxy > 32768) | 297 | if (dxy > 32768) |
297 | { | 298 | { |
298 | dxy -= 32768; | 299 | dxy -= 32768; |
299 | p1y = p2y; | 300 | p1y = p2y; |
300 | p2y = *py++; | 301 | p2y = *py++; |
301 | } | 302 | } |
302 | //end block1_uvy | 303 | //end block1_uvy |
303 | 304 | ||
304 | //block2_y | 305 | //block2_y |
305 | dest2y=p1y + ((dxy*(p2y-p1y))>>15); | 306 | dest2y=p1y + ((dxy*(p2y-p1y))>>15); |
306 | 307 | ||
307 | dxy += this->step_dx; | 308 | dxy += this->step_dx; |
308 | if (dxy > 32768) | 309 | if (dxy > 32768) |
309 | { | 310 | { |
310 | dxy -= 32768; | 311 | dxy -= 32768; |
311 | p1y = p2y; | 312 | p1y = p2y; |
312 | p2y = *py++; // idee pour asm, cf cas then, un merge est possible!!! | 313 | p2y = *py++; // idee pour asm, cf cas then, un merge est possible!!! |
313 | } | 314 | } |
314 | //end block2_y | 315 | //end block2_y |
315 | 316 | ||
316 | // proceed now with YUV2RGB [conversion part]: | 317 | // proceed now with YUV2RGB [conversion part]: |
317 | // u and v are currently in dest1u and dest1v | 318 | // u and v are currently in dest1u and dest1v |
318 | // the 2 y are in dest1y and dest2y. | 319 | // the 2 y are in dest1y and dest2y. |
319 | // RGB(0),DST1(0) | 320 | // RGB(0),DST1(0) |
320 | dest1u=*u_buffer++; | 321 | dest1u=*u_buffer++; |
321 | dest1v=*v_buffer++; | 322 | dest1v=*v_buffer++; |
322 | r = this->table_rV[dest1v]; | 323 | r = this->table_rV[dest1v]; |
323 | g = (void *) (((uint8_t *)this->table_gU[dest1u]) + this->table_gV[dest1v]); | 324 | g = (void *) (((uint8_t *)this->table_gU[dest1u]) + this->table_gV[dest1v]); |
324 | b = this->table_bU[dest1u]; | 325 | b = this->table_bU[dest1u]; |
325 | 326 | ||
326 | *dst++ = r[dest1y] + g[dest1y] + b[dest1y]; | 327 | *dst++ = r[dest1y] + g[dest1y] + b[dest1y]; |
327 | *dst++ = r[dest2y] + g[dest2y] + b[dest2y]; | 328 | *dst++ = r[dest2y] + g[dest2y] + b[dest2y]; |
328 | 329 | ||
329 | width -=2; | 330 | width -=2; |
330 | } | 331 | } |
331 | } | 332 | } |
332 | // end of line scaling/conversion | 333 | // end of line scaling/conversion |
333 | dy += this->step_dy; | 334 | dy += this->step_dy; |
334 | _dst += this->rgb_stride; | 335 | _dst += this->rgb_stride; |
335 | 336 | ||
336 | while (--height > 0 && dy < 32768) | 337 | while (--height > 0 && dy < 32768) |
337 | { | 338 | { |
338 | xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2); // *2 because of int8 cast! | 339 | xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2); // *2 because of int8 cast! |
339 | dy += this->step_dy; | 340 | dy += this->step_dy; |
340 | _dst += this->rgb_stride; | 341 | _dst += this->rgb_stride; |
341 | } | 342 | } |
342 | 343 | ||
343 | 344 | ||
344 | if (height <= 0) | 345 | if (height <= 0) |
345 | break; | 346 | break; |
346 | 347 | ||
347 | do | 348 | do |
348 | { | 349 | { |
349 | dy -= 32768; | 350 | dy -= 32768; |
350 | _py += this->y_stride; | 351 | _py += this->y_stride; |
351 | 352 | ||
352 | recal_uv^=2; /*bit 0 for reevaluation of scanline, bit 1 for offset.*/ | 353 | recal_uv^=2; /*bit 0 for reevaluation of scanline, bit 1 for offset.*/ |
353 | 354 | ||
354 | if ((recal_uv&2)==0) | 355 | if ((recal_uv&2)==0) |
355 | { | 356 | { |
356 | _pu += this->uv_stride; | 357 | _pu += this->uv_stride; |
357 | _pv += this->uv_stride; | 358 | _pv += this->uv_stride; |
358 | recal_uv|=1; // if update, then reevaluate scanline! | 359 | recal_uv|=1; // if update, then reevaluate scanline! |
359 | } | 360 | } |
360 | } | 361 | } |
361 | while( dy>=32768); | 362 | while( dy>=32768); |
362 | } | 363 | } |
363 | } | 364 | } |
364 | 365 | ||
365 | 366 | ||
366 | 367 | ||
367 | /* Function: */ | 368 | /* Function: */ |
368 | static void arm_rgb16_step_dx_32768(yuv2rgb_t *this, uint8_t * _dst, uint8_t * _py, uint8_t * _pu, uint8_t * _pv) | 369 | static void arm_rgb16_step_dx_32768(yuv2rgb_t *this, uint8_t * _dst, uint8_t * _py, uint8_t * _pu, uint8_t * _pv) |
369 | /* This function is called when the widht of the destination picture is the | 370 | /* This function is called when the widht of the destination picture is the |
370 | same as the size of the source picture. | 371 | same as the size of the source picture. |
371 | */ | 372 | */ |
372 | { | 373 | { |
373 | int recal_uv, height; | 374 | int recal_uv, height; |
374 | int dy; | 375 | int dy; |
375 | 376 | ||
376 | dy = 0; | 377 | dy = 0; |
377 | height=this->dest_height; | 378 | height=this->dest_height; |
378 | recal_uv=0; | 379 | recal_uv=0; |
379 | 380 | ||
380 | while (1) | 381 | while (1) |
381 | { | 382 | { |
382 | uint16_t *r, *g, *b; | 383 | uint16_t *r, *g, *b; |
383 | uint8_t *py, *pu, *pv; | 384 | uint8_t *py, *pu, *pv; |
384 | uint16_t *dst; | 385 | uint16_t *dst; |
385 | int width; | 386 | int width; |
386 | register uint8_t p1y, p1u, p1v; | 387 | register uint8_t p1y, p1u, p1v; |
387 | 388 | ||
388 | width=this->dest_width; | 389 | width=this->dest_width; |
389 | dst = (uint16_t*)_dst; | 390 | dst = (uint16_t*)_dst; |
390 | py = _py; | 391 | py = _py; |
391 | pu = _pu; | 392 | pu = _pu; |
392 | pv = _pv; | 393 | pv = _pv; |
393 | 394 | ||
394 | while (width>0) | 395 | while (width>0) |
395 | { | 396 | { |
396 | width-=2; | 397 | width-=2; |
397 | p1y=*py++; | 398 | p1y=*py++; |
398 | p1u=*pu++; | 399 | p1u=*pu++; |
399 | p1v=*pv++; | 400 | p1v=*pv++; |
400 | 401 | ||
401 | r = this->table_rV[p1v]; | 402 | r = this->table_rV[p1v]; |
402 | g = (void *) (((uint8_t *)this->table_gU[p1u]) + this->table_gV[p1v]); | 403 | g = (void *) (((uint8_t *)this->table_gU[p1u]) + this->table_gV[p1v]); |
403 | b = this->table_bU[p1u]; | 404 | b = this->table_bU[p1u]; |
404 | 405 | ||
405 | *dst++ = r[p1y] + g[p1y] + b[p1y]; | 406 | *dst++ = r[p1y] + g[p1y] + b[p1y]; |
406 | p1y=*py++; | 407 | p1y=*py++; |
407 | *dst++ = r[p1y] + g[p1y] + b[p1y]; | 408 | *dst++ = r[p1y] + g[p1y] + b[p1y]; |
408 | } | 409 | } |
409 | 410 | ||
410 | 411 | ||
411 | 412 | ||
412 | // end of line scaling/conversion | 413 | // end of line scaling/conversion |
413 | dy += this->step_dy; | 414 | dy += this->step_dy; |
414 | _dst += this->rgb_stride; | 415 | _dst += this->rgb_stride; |
415 | 416 | ||
416 | while (--height > 0 && dy < 32768) | 417 | while (--height > 0 && dy < 32768) |
417 | { | 418 | { |
418 | xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2); // *2 because of int8 cast! | 419 | xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2); // *2 because of int8 cast! |
419 | dy += this->step_dy; | 420 | dy += this->step_dy; |
420 | _dst += this->rgb_stride; | 421 | _dst += this->rgb_stride; |
421 | } | 422 | } |
422 | 423 | ||
423 | 424 | ||
424 | if (height <= 0) | 425 | if (height <= 0) |
425 | break; | 426 | break; |
426 | 427 | ||
427 | do | 428 | do |
428 | { | 429 | { |
429 | dy -= 32768; | 430 | dy -= 32768; |
430 | _py += this->y_stride; | 431 | _py += this->y_stride; |
431 | 432 | ||
432 | recal_uv^=2; /*bit 0 for reevaluation of scanline, bit 1 for offset.*/ | 433 | recal_uv^=2; /*bit 0 for reevaluation of scanline, bit 1 for offset.*/ |
433 | 434 | ||
434 | if ((recal_uv&2)==0) | 435 | if ((recal_uv&2)==0) |
435 | { | 436 | { |
436 | _pu += this->uv_stride; | 437 | _pu += this->uv_stride; |
437 | _pv += this->uv_stride; | 438 | _pv += this->uv_stride; |
438 | recal_uv|=1; // if update, then reevaluate scanline! | 439 | recal_uv|=1; // if update, then reevaluate scanline! |
439 | } | 440 | } |
440 | } | 441 | } |
441 | while( dy>=32768); | 442 | while( dy>=32768); |
442 | } | 443 | } |
443 | } | 444 | } |
444 | 445 | ||
445 | 446 | ||
446 | 447 | ||
447 | /* Function: */ | 448 | /* Function: */ |
448 | static void arm_rgb16_step_dx_bet_32768_65536(yuv2rgb_t *this, uint8_t * _dst, uint8_t * _py, uint8_t * _pu, uint8_t * _pv) | 449 | static void arm_rgb16_step_dx_bet_32768_65536(yuv2rgb_t *this, uint8_t * _dst, uint8_t * _py, uint8_t * _pu, uint8_t * _pv) |
449 | /* This function is called when the destination picture is between the size | 450 | /* This function is called when the destination picture is between the size |
450 | of the source picture, and half its size. | 451 | of the source picture, and half its size. |
451 | */ | 452 | */ |
452 | { | 453 | { |
453 | int recal_uv, height; /* Note about recal_uv: bit0 is for | 454 | int recal_uv, height; /* Note about recal_uv: bit0 is for |
454 | applying scale on u and v, bit1 is for increments of u and v pointers.*/ | 455 | applying scale on u and v, bit1 is for increments of u and v pointers.*/ |
455 | int dy; | 456 | int dy; |
456 | 457 | ||
457 | dy = 0; | 458 | dy = 0; |
458 | height = this->dest_height; | 459 | height = this->dest_height; |
459 | recal_uv=1; // 1 for evaluation of scale_line, needed the first time | 460 | recal_uv=1; // 1 for evaluation of scale_line, needed the first time |
460 | 461 | ||
461 | while(1) | 462 | while(1) |
462 | { | 463 | { |
463 | register int dxy; | 464 | register int dxy; |
464 | register int dxuv; | 465 | register int dxuv; |
465 | register uint8_t p1y, p2y; | 466 | register uint8_t p1y, p2y; |
466 | uint8_t dest1y, dest2y; | 467 | uint8_t dest1y, dest2y; |
467 | register uint8_t p1u, p2u; | 468 | register uint8_t p1u, p2u; |
468 | register uint8_t p1v, p2v; | 469 | register uint8_t p1v, p2v; |
469 | uint8_t dest1u; | 470 | uint8_t dest1u; |
470 | uint8_t dest1v; | 471 | uint8_t dest1v; |
471 | int width; | 472 | int width; |
472 | uint8_t *u_buffer; | 473 | uint8_t *u_buffer; |
473 | uint8_t *v_buffer; | 474 | uint8_t *v_buffer; |
474 | uint16_t *r, *g, *b; | 475 | uint16_t *r, *g, *b; |
475 | uint8_t *py, *pu, *pv; | 476 | uint8_t *py, *pu, *pv; |
476 | uint16_t *dst; | 477 | uint16_t *dst; |
477 | 478 | ||
478 | dxy = 0; | 479 | dxy = 0; |
479 | dxuv = 0; | 480 | dxuv = 0; |
480 | width = this->dest_width; | 481 | width = this->dest_width; |
481 | u_buffer=this->u_buffer; | 482 | u_buffer=this->u_buffer; |
482 | v_buffer=this->v_buffer; | 483 | v_buffer=this->v_buffer; |
483 | dst = (uint16_t*)_dst; | 484 | dst = (uint16_t*)_dst; |
484 | py = _py; | 485 | py = _py; |
485 | pu = _pu; | 486 | pu = _pu; |
486 | pv = _pv; | 487 | pv = _pv; |
487 | 488 | ||
488 | //proceed with line scaling/conversion | 489 | //proceed with line scaling/conversion |
489 | if ((recal_uv&1)!=0) | 490 | if ((recal_uv&1)!=0) |
490 | { | 491 | { |
491 | recal_uv^=1; //reset bit0. | 492 | recal_uv^=1; //reset bit0. |
492 | // init values: | 493 | // init values: |
493 | p1u = *pu++; | 494 | p1u = *pu++; |
494 | p2u = *pu++; | 495 | p2u = *pu++; |
495 | p1v = *pv++; | 496 | p1v = *pv++; |
496 | p2v = *pv++; | 497 | p2v = *pv++; |
497 | p1y = *py++; | 498 | p1y = *py++; |
498 | p2y = *py++; | 499 | p2y = *py++; |
499 | 500 | ||
500 | //width loop (compute all data for a line). | 501 | //width loop (compute all data for a line). |
501 | while (width>0) | 502 | while (width>0) |
502 | { | 503 | { |
503 | // proceed with u and v first (ok, and y too finally :)) [scaling part]: | 504 | // proceed with u and v first (ok, and y too finally :)) [scaling part]: |
504 | // evaluate 1u, 1v, and 2y | 505 | // evaluate 1u, 1v, and 2y |
505 | //block1_uvy | 506 | //block1_uvy |
506 | dest1u=p1u + ((dxuv*(p2u-p1u))>>15); | 507 | dest1u=p1u + ((dxuv*(p2u-p1u))>>15); |
507 | dest1v=p1v + ((dxuv*(p2v-p1v))>>15); | 508 | dest1v=p1v + ((dxuv*(p2v-p1v))>>15); |
508 | // as u and v are evaluated, better save them now | 509 | // as u and v are evaluated, better save them now |
509 | *u_buffer++ = (uint8_t)dest1u; | 510 | *u_buffer++ = (uint8_t)dest1u; |
510 | *v_buffer++ = (uint8_t)dest1v; | 511 | *v_buffer++ = (uint8_t)dest1v; |
511 | 512 | ||
512 | dest1y=p1y + ((dxy*(p2y-p1y))>>15); | 513 | dest1y=p1y + ((dxy*(p2y-p1y))>>15); |
513 | 514 | ||
514 | dxuv += this->step_dx; | 515 | dxuv += this->step_dx; |
515 | dxy += this->step_dx; | 516 | dxy += this->step_dx; |
516 | if (dxuv > 65536) | 517 | if (dxuv > 65536) |
517 | { | 518 | { |
518 | dxuv -= 65536; | 519 | dxuv -= 65536; |
519 | p1u = *pu++; | 520 | p1u = *pu++; |
520 | p2u = *pu++; | 521 | p2u = *pu++; |
521 | p1v = *pv++; | 522 | p1v = *pv++; |
522 | p2v = *pv++; | 523 | p2v = *pv++; |
523 | } | 524 | } |
524 | else | 525 | else |
525 | { | 526 | { |
526 | dxuv -= 32768; | 527 | dxuv -= 32768; |
527 | p1u = p2u; | 528 | p1u = p2u; |
528 | p2u = *pu++; //idee pour asm, cf cas then, un merge est possible!!! | 529 | p2u = *pu++; //idee pour asm, cf cas then, un merge est possible!!! |
529 | p1v = p2v; | 530 | p1v = p2v; |
530 | p2v = *pv++; | 531 | p2v = *pv++; |
531 | } | 532 | } |
532 | if (dxy > 65536) | 533 | if (dxy > 65536) |
533 | { | 534 | { |
534 | dxy -= 65536; | 535 | dxy -= 65536; |
535 | p1y = *py++; | 536 | p1y = *py++; |
536 | p2y = *py++; | 537 | p2y = *py++; |
537 | } | 538 | } |
538 | else | 539 | else |
539 | { | 540 | { |
540 | dxy -= 32768; | 541 | dxy -= 32768; |
541 | p1y = p2y; | 542 | p1y = p2y; |
542 | p2y = *py++; | 543 | p2y = *py++; |
543 | } | 544 | } |
544 | //end block1_uvy | 545 | //end block1_uvy |
545 | 546 | ||
546 | //block2_y | 547 | //block2_y |
547 | dest2y=p1y + ((dxy*(p2y-p1y))>>15); | 548 | dest2y=p1y + ((dxy*(p2y-p1y))>>15); |
548 | 549 | ||
549 | dxy += this->step_dx; | 550 | dxy += this->step_dx; |
550 | if (dxy > 65536) | 551 | if (dxy > 65536) |
551 | { | 552 | { |
552 | dxy -= 65536; | 553 | dxy -= 65536; |
553 | p1y = *py++; | 554 | p1y = *py++; |
554 | p2y = *py++; | 555 | p2y = *py++; |
555 | } | 556 | } |
556 | else | 557 | else |
557 | { | 558 | { |
558 | dxy -= 32768; | 559 | dxy -= 32768; |
559 | p1y = p2y; | 560 | p1y = p2y; |
560 | p2y = *py++; // idee pour asm, cf cas then, un merge est possible!!! | 561 | p2y = *py++; // idee pour asm, cf cas then, un merge est possible!!! |
561 | } | 562 | } |
562 | //end block2_y | 563 | //end block2_y |
563 | 564 | ||
564 | // proceed now with YUV2RGB [conversion part]: | 565 | // proceed now with YUV2RGB [conversion part]: |
565 | // u and v are currently in dest1u and dest1v | 566 | // u and v are currently in dest1u and dest1v |
566 | // the 2 y are in dest1y and dest2y. | 567 | // the 2 y are in dest1y and dest2y. |
567 | // RGB(0),DST1(0), RGB(1), DST1(1) | 568 | // RGB(0),DST1(0), RGB(1), DST1(1) |
568 | r = this->table_rV[dest1v]; | 569 | r = this->table_rV[dest1v]; |
569 | g = (void *) (((uint8_t *)this->table_gU[dest1u]) + this->table_gV[dest1v]); | 570 | g = (void *) (((uint8_t *)this->table_gU[dest1u]) + this->table_gV[dest1v]); |
570 | b = this->table_bU[dest1u]; | 571 | b = this->table_bU[dest1u]; |
571 | 572 | ||
572 | *dst++ = r[dest1y] + g[dest1y] + b[dest1y]; | 573 | *dst++ = r[dest1y] + g[dest1y] + b[dest1y]; |
573 | *dst++ = r[dest2y] + g[dest2y] + b[dest2y]; | 574 | *dst++ = r[dest2y] + g[dest2y] + b[dest2y]; |
574 | 575 | ||
575 | width -=2; | 576 | width -=2; |
576 | } | 577 | } |
577 | } | 578 | } |
578 | else | 579 | else |
579 | { | 580 | { |
580 | // this case is simple, u and v are already evaluated, | 581 | // this case is simple, u and v are already evaluated, |
581 | // Note pour moi: r, g et b pourraient etre reutilises!! | 582 | // Note pour moi: r, g et b pourraient etre reutilises!! |
582 | 583 | ||
583 | // init values: | 584 | // init values: |
584 | p1y = *py++; | 585 | p1y = *py++; |
585 | p2y = *py++; | 586 | p2y = *py++; |
586 | 587 | ||
587 | //width loop (compute all data for a line). | 588 | //width loop (compute all data for a line). |
588 | while (width>0) | 589 | while (width>0) |
589 | { | 590 | { |
590 | // proceed with y [scaling part]: | 591 | // proceed with y [scaling part]: |
591 | // evaluate 2y | 592 | // evaluate 2y |
592 | //block1_y | 593 | //block1_y |
593 | dest1y=p1y + ((dxy*(p2y-p1y))>>15); | 594 | dest1y=p1y + ((dxy*(p2y-p1y))>>15); |
594 | 595 | ||
595 | dxy += this->step_dx; | 596 | dxy += this->step_dx; |
596 | if (dxy > 65536) | 597 | if (dxy > 65536) |
597 | { | 598 | { |
598 | dxy -= 65536; | 599 | dxy -= 65536; |
599 | p1y = *py++; | 600 | p1y = *py++; |
600 | p2y = *py++; | 601 | p2y = *py++; |
601 | } | 602 | } |
602 | else | 603 | else |
603 | { | 604 | { |
604 | dxy -= 32768; | 605 | dxy -= 32768; |
605 | p1y = p2y; | 606 | p1y = p2y; |
606 | p2y = *py++; | 607 | p2y = *py++; |
607 | } | 608 | } |
608 | //end block1_uvy | 609 | //end block1_uvy |
609 | 610 | ||
610 | //block2_y | 611 | //block2_y |
611 | dest2y=p1y + ((dxy*(p2y-p1y))>>15); | 612 | dest2y=p1y + ((dxy*(p2y-p1y))>>15); |
612 | 613 | ||
613 | dxy += this->step_dx; | 614 | dxy += this->step_dx; |
614 | if (dxy > 65536) | 615 | if (dxy > 65536) |
615 | { | 616 | { |
616 | dxy -= 65536; | 617 | dxy -= 65536; |
617 | p1y = *py++; | 618 | p1y = *py++; |
618 | p2y = *py++; | 619 | p2y = *py++; |
619 | } | 620 | } |
620 | else | 621 | else |
621 | { | 622 | { |
622 | dxy -= 32768; | 623 | dxy -= 32768; |
623 | p1y = p2y; | 624 | p1y = p2y; |
624 | p2y = *py++; // idee pour asm, cf cas then, un merge est possible!!! | 625 | p2y = *py++; // idee pour asm, cf cas then, un merge est possible!!! |
625 | } | 626 | } |
626 | //end block2_y | 627 | //end block2_y |
627 | 628 | ||
628 | // proceed now with YUV2RGB [conversion part]: | 629 | // proceed now with YUV2RGB [conversion part]: |
629 | // u and v are currently in dest1u and dest1v | 630 | // u and v are currently in dest1u and dest1v |
630 | // the 2 y are in dest1y and dest2y. | 631 | // the 2 y are in dest1y and dest2y. |
631 | // RGB(0),DST1(0) | 632 | // RGB(0),DST1(0) |
632 | dest1u=*u_buffer++; | 633 | dest1u=*u_buffer++; |
633 | dest1v=*v_buffer++; | 634 | dest1v=*v_buffer++; |
634 | r = this->table_rV[dest1v]; | 635 | r = this->table_rV[dest1v]; |
635 | g = (void *) (((uint8_t *)this->table_gU[dest1u]) + this->table_gV[dest1v]); | 636 | g = (void *) (((uint8_t *)this->table_gU[dest1u]) + this->table_gV[dest1v]); |
636 | b = this->table_bU[dest1u]; | 637 | b = this->table_bU[dest1u]; |
637 | 638 | ||
638 | *dst++ = r[dest1y] + g[dest1y] + b[dest1y]; | 639 | *dst++ = r[dest1y] + g[dest1y] + b[dest1y]; |
639 | *dst++ = r[dest2y] + g[dest2y] + b[dest2y]; | 640 | *dst++ = r[dest2y] + g[dest2y] + b[dest2y]; |
640 | 641 | ||
641 | width -=2; | 642 | width -=2; |
642 | } | 643 | } |
643 | } | 644 | } |
644 | // end of line scaling/conversion | 645 | // end of line scaling/conversion |
645 | dy += this->step_dy; | 646 | dy += this->step_dy; |
646 | _dst += this->rgb_stride; | 647 | _dst += this->rgb_stride; |
647 | 648 | ||
648 | while (--height > 0 && dy < 32768) | 649 | while (--height > 0 && dy < 32768) |
649 | { | 650 | { |
650 | xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2); // *2 because of int8 cast! | 651 | xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2); // *2 because of int8 cast! |
651 | dy += this->step_dy; | 652 | dy += this->step_dy; |
652 | _dst += this->rgb_stride; | 653 | _dst += this->rgb_stride; |
653 | } | 654 | } |
654 | 655 | ||
655 | 656 | ||
656 | if (height <= 0) | 657 | if (height <= 0) |
657 | break; | 658 | break; |
658 | 659 | ||
659 | do | 660 | do |
660 | { | 661 | { |
661 | dy -= 32768; | 662 | dy -= 32768; |
662 | _py += this->y_stride; | 663 | _py += this->y_stride; |
663 | 664 | ||
664 | recal_uv^=2; /*bit 0 for reevaluation of scanline, bit 1 for offset.*/ | 665 | recal_uv^=2; /*bit 0 for reevaluation of scanline, bit 1 for offset.*/ |
665 | 666 | ||
666 | if ((recal_uv&2)==0) | 667 | if ((recal_uv&2)==0) |
667 | { | 668 | { |
668 | _pu += this->uv_stride; | 669 | _pu += this->uv_stride; |
669 | _pv += this->uv_stride; | 670 | _pv += this->uv_stride; |
670 | recal_uv|=1; // if update, then reevaluate scanline! | 671 | recal_uv|=1; // if update, then reevaluate scanline! |
671 | } | 672 | } |
672 | } | 673 | } |
673 | while( dy>=32768); | 674 | while( dy>=32768); |
674 | } | 675 | } |
675 | } | 676 | } |
676 | 677 | ||
677 | 678 | ||
678 | 679 | ||
679 | /* Function: */ | 680 | /* Function: */ |
680 | static void arm_rgb16_step_dx_sup_65536(yuv2rgb_t *this, uint8_t * _dst, uint8_t * _py, uint8_t * _pu, uint8_t * _pv) | 681 | static void arm_rgb16_step_dx_sup_65536(yuv2rgb_t *this, uint8_t * _dst, uint8_t * _py, uint8_t * _pu, uint8_t * _pv) |
681 | /* This function is called when the destination picture is smaller than half | 682 | /* This function is called when the destination picture is smaller than half |
682 | the size of the source picture, and half its size. | 683 | the size of the source picture, and half its size. |
683 | */ | 684 | */ |
684 | { | 685 | { |
685 | int recal_uv, height; /* Note about recal_uv: bit0 is for | 686 | int recal_uv, height; /* Note about recal_uv: bit0 is for |
686 | applying scale on u and v, bit1 is for increments of u and v pointers.*/ | 687 | applying scale on u and v, bit1 is for increments of u and v pointers.*/ |
687 | int dy; | 688 | int dy; |
688 | 689 | ||
689 | dy = 0; | 690 | dy = 0; |
690 | height = this->dest_height; | 691 | height = this->dest_height; |
691 | recal_uv=1; // 1 for evaluation of scale_line, needed the first time | 692 | recal_uv=1; // 1 for evaluation of scale_line, needed the first time |
692 | 693 | ||
693 | while(1) | 694 | while(1) |
694 | { | 695 | { |
695 | register int dxy; | 696 | register int dxy; |
696 | register int dxuv; | 697 | register int dxuv; |
697 | int offdxy, offdxuv; | 698 | int offdxy, offdxuv; |
698 | register uint8_t p1y, p2y; | 699 | register uint8_t p1y, p2y; |
699 | uint8_t dest1y, dest2y; | 700 | uint8_t dest1y, dest2y; |
700 | register uint8_t p1u, p2u; | 701 | register uint8_t p1u, p2u; |
701 | register uint8_t p1v, p2v; | 702 | register uint8_t p1v, p2v; |
702 | uint8_t dest1u; | 703 | uint8_t dest1u; |
703 | uint8_t dest1v; | 704 | uint8_t dest1v; |
704 | int width; | 705 | int width; |
705 | uint8_t *u_buffer; | 706 | uint8_t *u_buffer; |
706 | uint8_t *v_buffer; | 707 | uint8_t *v_buffer; |
707 | uint16_t *r, *g, *b; | 708 | uint16_t *r, *g, *b; |
708 | uint8_t *py, *pu, *pv; | 709 | uint8_t *py, *pu, *pv; |
709 | uint16_t *dst; | 710 | uint16_t *dst; |
710 | 711 | ||
711 | dxy = 0; | 712 | dxy = 0; |
712 | dxuv = 0; | 713 | dxuv = 0; |
713 | width = this->dest_width; | 714 | width = this->dest_width; |
714 | u_buffer=this->u_buffer; | 715 | u_buffer=this->u_buffer; |
715 | v_buffer=this->v_buffer; | 716 | v_buffer=this->v_buffer; |
716 | dst = (uint16_t*)_dst; | 717 | dst = (uint16_t*)_dst; |
717 | py = _py; | 718 | py = _py; |
718 | pu = _pu; | 719 | pu = _pu; |
719 | pv = _pv; | 720 | pv = _pv; |
720 | 721 | ||
721 | //proceed with line scaling/conversion | 722 | //proceed with line scaling/conversion |
722 | if ((recal_uv&1)!=0) | 723 | if ((recal_uv&1)!=0) |
723 | { | 724 | { |
724 | recal_uv^=1; //reset bit0. | 725 | recal_uv^=1; //reset bit0. |
725 | // init values: | 726 | // init values: |
726 | p1u = *pu++; | 727 | p1u = *pu++; |
727 | p2u = *pu++; | 728 | p2u = *pu++; |
728 | p1v = *pv++; | 729 | p1v = *pv++; |
729 | p2v = *pv++; | 730 | p2v = *pv++; |
730 | p1y = *py++; | 731 | p1y = *py++; |
731 | p2y = *py++; | 732 | p2y = *py++; |
732 | 733 | ||
733 | //width loop (compute all data for a line). | 734 | //width loop (compute all data for a line). |
734 | while (width>0) | 735 | while (width>0) |
735 | { | 736 | { |
736 | // proceed with u and v first (ok, and y too finally :)) [scaling part]: | 737 | // proceed with u and v first (ok, and y too finally :)) [scaling part]: |
737 | // evaluate 1u, 1v, and 2y | 738 | // evaluate 1u, 1v, and 2y |
738 | //block1_uvy | 739 | //block1_uvy |
739 | dest1u=p1u + ((dxuv*(p2u-p1u))>>15); | 740 | dest1u=p1u + ((dxuv*(p2u-p1u))>>15); |
740 | dest1v=p1v + ((dxuv*(p2v-p1v))>>15); | 741 | dest1v=p1v + ((dxuv*(p2v-p1v))>>15); |
741 | // as u and v are evaluated, better save them now | 742 | // as u and v are evaluated, better save them now |
742 | *u_buffer++ = (uint8_t)dest1u; | 743 | *u_buffer++ = (uint8_t)dest1u; |
743 | *v_buffer++ = (uint8_t)dest1v; | 744 | *v_buffer++ = (uint8_t)dest1v; |
744 | 745 | ||
745 | dest1y=p1y + ((dxy*(p2y-p1y))>>15); | 746 | dest1y=p1y + ((dxy*(p2y-p1y))>>15); |
746 | 747 | ||
747 | dxuv += this->step_dx; | 748 | dxuv += this->step_dx; |
748 | dxy += this->step_dx; | 749 | dxy += this->step_dx; |
749 | 750 | ||
750 | offdxuv=((dxuv-1)>>15); | 751 | offdxuv=((dxuv-1)>>15); |
751 | dxuv-=offdxuv<<15; | 752 | dxuv-=offdxuv<<15; |
752 | pu+=offdxuv-2; | 753 | pu+=offdxuv-2; |
753 | pv+=offdxuv-2; | 754 | pv+=offdxuv-2; |
754 | p1u = *pu++; | 755 | p1u = *pu++; |
755 | p2u = *pu++; | 756 | p2u = *pu++; |
756 | p1v = *pv++; | 757 | p1v = *pv++; |
757 | p2v = *pv++; | 758 | p2v = *pv++; |
758 | offdxy=((dxy-1)>>15); | 759 | offdxy=((dxy-1)>>15); |
759 | dxy-=offdxy<<15; | 760 | dxy-=offdxy<<15; |
760 | py+=offdxy-2; | 761 | py+=offdxy-2; |
761 | p1y = *py++; | 762 | p1y = *py++; |
762 | p2y = *py++; | 763 | p2y = *py++; |
763 | 764 | ||
764 | //block2_y | 765 | //block2_y |
765 | dest2y=p1y + ((dxy*(p2y-p1y))>>15); | 766 | dest2y=p1y + ((dxy*(p2y-p1y))>>15); |
766 | 767 | ||
767 | dxy += this->step_dx; | 768 | dxy += this->step_dx; |
768 | offdxy=((dxy-1)>>15); | 769 | offdxy=((dxy-1)>>15); |
769 | dxy-=offdxy<<15; | 770 | dxy-=offdxy<<15; |
770 | py+=offdxy-2; | 771 | py+=offdxy-2; |
771 | p1y = *py++; | 772 | p1y = *py++; |
772 | p2y = *py++; | 773 | p2y = *py++; |
773 | 774 | ||
774 | // proceed now with YUV2RGB [conversion part]: | 775 | // proceed now with YUV2RGB [conversion part]: |
775 | // u and v are currently in dest1u and dest1v | 776 | // u and v are currently in dest1u and dest1v |
776 | // the 2 y are in dest1y and dest2y. | 777 | // the 2 y are in dest1y and dest2y. |
777 | // RGB(0),DST1(0), RGB(1), DST1(1) | 778 | // RGB(0),DST1(0), RGB(1), DST1(1) |
778 | r = this->table_rV[dest1v]; | 779 | r = this->table_rV[dest1v]; |
779 | g = (void *) (((uint8_t *)this->table_gU[dest1u]) + this->table_gV[dest1v]); | 780 | g = (void *) (((uint8_t *)this->table_gU[dest1u]) + this->table_gV[dest1v]); |
780 | b = this->table_bU[dest1u]; | 781 | b = this->table_bU[dest1u]; |
781 | 782 | ||
782 | *dst++ = r[dest1y] + g[dest1y] + b[dest1y]; | 783 | *dst++ = r[dest1y] + g[dest1y] + b[dest1y]; |
783 | *dst++ = r[dest2y] + g[dest2y] + b[dest2y]; | 784 | *dst++ = r[dest2y] + g[dest2y] + b[dest2y]; |
784 | 785 | ||
785 | width -=2; | 786 | width -=2; |
786 | } | 787 | } |
787 | } | 788 | } |
788 | else | 789 | else |
789 | { | 790 | { |
790 | // this case is simple, u and v are already evaluated, | 791 | // this case is simple, u and v are already evaluated, |
791 | // Note pour moi: r, g et b pourraient etre reutilises!! | 792 | // Note pour moi: r, g et b pourraient etre reutilises!! |
792 | 793 | ||
793 | // init values: | 794 | // init values: |
794 | p1y = *py++; | 795 | p1y = *py++; |
795 | p2y = *py++; | 796 | p2y = *py++; |
796 | 797 | ||
797 | //width loop (compute all data for a line). | 798 | //width loop (compute all data for a line). |
798 | while (width>0) | 799 | while (width>0) |
799 | { | 800 | { |
800 | // proceed with y [scaling part]: | 801 | // proceed with y [scaling part]: |
801 | // evaluate 2y | 802 | // evaluate 2y |
802 | //block1_y | 803 | //block1_y |
803 | dest1y=p1y + ((dxy*(p2y-p1y))>>15); | 804 | dest1y=p1y + ((dxy*(p2y-p1y))>>15); |
804 | 805 | ||
805 | dxy += this->step_dx; | 806 | dxy += this->step_dx; |
806 | offdxy=((dxy-1)>>15); | 807 | offdxy=((dxy-1)>>15); |
807 | dxy-=offdxy<<15; | 808 | dxy-=offdxy<<15; |
808 | py+=offdxy-2; | 809 | py+=offdxy-2; |
809 | p1y = *py++; | 810 | p1y = *py++; |
810 | p2y = *py++; | 811 | p2y = *py++; |
811 | 812 | ||
812 | //end block1_uvy | 813 | //end block1_uvy |
813 | 814 | ||
814 | //block2_y | 815 | //block2_y |
815 | dest2y=p1y + ((dxy*(p2y-p1y))>>15); | 816 | dest2y=p1y + ((dxy*(p2y-p1y))>>15); |
816 | 817 | ||
817 | dxy += this->step_dx; | 818 | dxy += this->step_dx; |
818 | offdxy=((dxy-1)>>15); | 819 | offdxy=((dxy-1)>>15); |
819 | dxy-=offdxy<<15; | 820 | dxy-=offdxy<<15; |
820 | py+=offdxy-2; | 821 | py+=offdxy-2; |
821 | p1y = *py++; | 822 | p1y = *py++; |
822 | p2y = *py++; | 823 | p2y = *py++; |
823 | //end block2_y | 824 | //end block2_y |
824 | 825 | ||
825 | // proceed now with YUV2RGB [conversion part]: | 826 | // proceed now with YUV2RGB [conversion part]: |
826 | // u and v are currently in dest1u and dest1v | 827 | // u and v are currently in dest1u and dest1v |
827 | // the 2 y are in dest1y and dest2y. | 828 | // the 2 y are in dest1y and dest2y. |
828 | // RGB(0),DST1(0) | 829 | // RGB(0),DST1(0) |
829 | dest1u=*u_buffer++; | 830 | dest1u=*u_buffer++; |
830 | dest1v=*v_buffer++; | 831 | dest1v=*v_buffer++; |
831 | r = this->table_rV[dest1v]; | 832 | r = this->table_rV[dest1v]; |
832 | g = (void *) (((uint8_t *)this->table_gU[dest1u]) + this->table_gV[dest1v]); | 833 | g = (void *) (((uint8_t *)this->table_gU[dest1u]) + this->table_gV[dest1v]); |
833 | b = this->table_bU[dest1u]; | 834 | b = this->table_bU[dest1u]; |
834 | 835 | ||
835 | *dst++ = r[dest1y] + g[dest1y] + b[dest1y]; | 836 | *dst++ = r[dest1y] + g[dest1y] + b[dest1y]; |
836 | *dst++ = r[dest2y] + g[dest2y] + b[dest2y]; | 837 | *dst++ = r[dest2y] + g[dest2y] + b[dest2y]; |
837 | 838 | ||
838 | width -=2; | 839 | width -=2; |
839 | } | 840 | } |
840 | } | 841 | } |
841 | // end of line scaling/conversion | 842 | // end of line scaling/conversion |
842 | dy += this->step_dy; | 843 | dy += this->step_dy; |
843 | _dst += this->rgb_stride; | 844 | _dst += this->rgb_stride; |
844 | 845 | ||
845 | while (--height > 0 && dy < 32768) | 846 | while (--height > 0 && dy < 32768) |
846 | { | 847 | { |
847 | xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2); // *2 because of int8 cast! | 848 | xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2); // *2 because of int8 cast! |
848 | dy += this->step_dy; | 849 | dy += this->step_dy; |
849 | _dst += this->rgb_stride; | 850 | _dst += this->rgb_stride; |
850 | } | 851 | } |
851 | 852 | ||
852 | 853 | ||
853 | if (height <= 0) | 854 | if (height <= 0) |
854 | break; | 855 | break; |
855 | 856 | ||
856 | do | 857 | do |
857 | { | 858 | { |
858 | dy -= 32768; | 859 | dy -= 32768; |
859 | _py += this->y_stride; | 860 | _py += this->y_stride; |
860 | 861 | ||
861 | recal_uv^=2; /*bit 0 for reevaluation of scanline, bit 1 for offset.*/ | 862 | recal_uv^=2; /*bit 0 for reevaluation of scanline, bit 1 for offset.*/ |
862 | 863 | ||
863 | if ((recal_uv&2)==0) | 864 | if ((recal_uv&2)==0) |
864 | { | 865 | { |
865 | _pu += this->uv_stride; | 866 | _pu += this->uv_stride; |
866 | _pv += this->uv_stride; | 867 | _pv += this->uv_stride; |
867 | recal_uv|=1; // if update, then reevaluate scanline! | 868 | recal_uv|=1; // if update, then reevaluate scanline! |
868 | } | 869 | } |
869 | } | 870 | } |
870 | while( dy>=32768); | 871 | while( dy>=32768); |
871 | } | 872 | } |
872 | } | 873 | } |
873 | 874 | ||
874 | 875 | ||
875 | #endif | 876 | #endif |