author | kergoth <kergoth> | 2002-01-25 22:14:26 (UTC) |
---|---|---|
committer | kergoth <kergoth> | 2002-01-25 22:14:26 (UTC) |
commit | 15318cad33835e4e2dc620d033e43cd930676cdd (patch) (unidiff) | |
tree | c2fa0399a2c47fda8e2cd0092c73a809d17f68eb /core/multimedia/opieplayer/libmad/imdct_l_arm.S | |
download | opie-15318cad33835e4e2dc620d033e43cd930676cdd.zip opie-15318cad33835e4e2dc620d033e43cd930676cdd.tar.gz opie-15318cad33835e4e2dc620d033e43cd930676cdd.tar.bz2 |
Initial revision
Diffstat (limited to 'core/multimedia/opieplayer/libmad/imdct_l_arm.S') (more/less context) (ignore whitespace changes)
-rw-r--r-- | core/multimedia/opieplayer/libmad/imdct_l_arm.S | 1000 |
1 files changed, 1000 insertions, 0 deletions
diff --git a/core/multimedia/opieplayer/libmad/imdct_l_arm.S b/core/multimedia/opieplayer/libmad/imdct_l_arm.S new file mode 100644 index 0000000..b86ba11 --- a/dev/null +++ b/core/multimedia/opieplayer/libmad/imdct_l_arm.S | |||
@@ -0,0 +1,1000 @@ | |||
1 | /***************************************************************************** | ||
2 | * Copyright (C) 2000-2001 Andre McCurdy <armccurdy@yahoo.co.uk> | ||
3 | * | ||
4 | * This program is free software. you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation@ either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY, without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program@ if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | * | ||
18 | ***************************************************************************** | ||
19 | * | ||
20 | * Notes: | ||
21 | * | ||
22 | * | ||
23 | ***************************************************************************** | ||
24 | * | ||
25 | * $Id$ | ||
26 | * | ||
27 | * 2001/03/24: Andre McCurdy <armccurdy@yahoo.co.uk> | ||
28 | * - Corrected PIC unsafe loading of address of 'imdct36_long_karray' | ||
29 | * | ||
30 | * 2000/09/20: Robert Leslie <rob@mars.org> | ||
31 | * - Added a global symbol with leading underscore per suggestion of | ||
32 | * Simon Burge to support linking with the a.out format. | ||
33 | * | ||
34 | * 2000/09/15: Robert Leslie <rob@mars.org> | ||
35 | * - Fixed a small bug where flags were changed before a conditional branch. | ||
36 | * | ||
37 | * 2000/09/15: Andre McCurdy <armccurdy@yahoo.co.uk> | ||
38 | * - Applied Nicolas Pitre's rounding optimisation in all remaining places. | ||
39 | * | ||
40 | * 2000/09/09: Nicolas Pitre <nico@cam.org> | ||
41 | * - Optimized rounding + scaling operations. | ||
42 | * | ||
43 | * 2000/08/09: Andre McCurdy <armccurdy@yahoo.co.uk> | ||
44 | * - Original created. | ||
45 | * | ||
46 | ****************************************************************************/ | ||
47 | |||
48 | |||
49 | /* | ||
50 | On entry: | ||
51 | |||
52 | r0 = pointer to 18 element input array | ||
53 | r1 = pointer to 36 element output array | ||
54 | r2 = windowing block type | ||
55 | |||
56 | |||
57 | Stack frame created during execution of the function: | ||
58 | |||
59 | Initial Holds: | ||
60 | Stack | ||
61 | pointer | ||
62 | minus: | ||
63 | |||
64 | 0 | ||
65 | 4 lr | ||
66 | 8 r11 | ||
67 | 12 r10 | ||
68 | 16 r9 | ||
69 | 20 r8 | ||
70 | 24 r7 | ||
71 | 28 r6 | ||
72 | 32 r5 | ||
73 | 36 r4 | ||
74 | |||
75 | 40 r2 : windowing block type | ||
76 | |||
77 | 44 ct00 high | ||
78 | 48 ct00 low | ||
79 | 52 ct01 high | ||
80 | 56 ct01 low | ||
81 | 60 ct04 high | ||
82 | 64 ct04 low | ||
83 | 68 ct06 high | ||
84 | 72 ct06 low | ||
85 | 76 ct05 high | ||
86 | 80 ct05 low | ||
87 | 84 ct03 high | ||
88 | 88 ct03 low | ||
89 | 92 -ct05 high | ||
90 | 96 -ct05 low | ||
91 | 100 -ct07 high | ||
92 | 104 -ct07 low | ||
93 | 108 ct07 high | ||
94 | 112 ct07 low | ||
95 | 116 ct02 high | ||
96 | 120 ct02 low | ||
97 | */ | ||
98 | |||
99 | #define BLOCK_MODE_NORMAL 0 | ||
100 | #define BLOCK_MODE_START 1 | ||
101 | #define BLOCK_MODE_STOP 3 | ||
102 | |||
103 | |||
104 | #define X0 0x00 | ||
105 | #define X1 0x04 | ||
106 | #define X2 0x08 | ||
107 | #define X3 0x0C | ||
108 | #define X4 0x10 | ||
109 | #define X5 0x14 | ||
110 | #define X6 0x18 | ||
111 | #define X7 0x1c | ||
112 | #define X8 0x20 | ||
113 | #define X9 0x24 | ||
114 | #define X10 0x28 | ||
115 | #define X11 0x2c | ||
116 | #define X12 0x30 | ||
117 | #define X13 0x34 | ||
118 | #define X14 0x38 | ||
119 | #define X15 0x3c | ||
120 | #define X16 0x40 | ||
121 | #define X17 0x44 | ||
122 | |||
123 | #define x0 0x00 | ||
124 | #define x1 0x04 | ||
125 | #define x2 0x08 | ||
126 | #define x3 0x0C | ||
127 | #define x4 0x10 | ||
128 | #define x5 0x14 | ||
129 | #define x6 0x18 | ||
130 | #define x7 0x1c | ||
131 | #define x8 0x20 | ||
132 | #define x9 0x24 | ||
133 | #define x10 0x28 | ||
134 | #define x11 0x2c | ||
135 | #define x12 0x30 | ||
136 | #define x13 0x34 | ||
137 | #define x14 0x38 | ||
138 | #define x15 0x3c | ||
139 | #define x16 0x40 | ||
140 | #define x17 0x44 | ||
141 | #define x18 0x48 | ||
142 | #define x19 0x4c | ||
143 | #define x20 0x50 | ||
144 | #define x21 0x54 | ||
145 | #define x22 0x58 | ||
146 | #define x23 0x5c | ||
147 | #define x24 0x60 | ||
148 | #define x25 0x64 | ||
149 | #define x26 0x68 | ||
150 | #define x27 0x6c | ||
151 | #define x28 0x70 | ||
152 | #define x29 0x74 | ||
153 | #define x30 0x78 | ||
154 | #define x31 0x7c | ||
155 | #define x32 0x80 | ||
156 | #define x33 0x84 | ||
157 | #define x34 0x88 | ||
158 | #define x35 0x8c | ||
159 | |||
160 | #define K00 0x0ffc19fd | ||
161 | #define K01 0x00b2aa3e | ||
162 | #define K02 0x0fdcf549 | ||
163 | #define K03 0x0216a2a2 | ||
164 | #define K04 0x0f9ee890 | ||
165 | #define K05 0x03768962 | ||
166 | #define K06 0x0f426cb5 | ||
167 | #define K07 0x04cfb0e2 | ||
168 | #define K08 0x0ec835e8 | ||
169 | #define K09 0x061f78aa | ||
170 | #define K10 0x0e313245 | ||
171 | #define K11 0x07635284 | ||
172 | #define K12 0x0d7e8807 | ||
173 | #define K13 0x0898c779 | ||
174 | #define K14 0x0cb19346 | ||
175 | #define K15 0x09bd7ca0 | ||
176 | #define K16 0x0bcbe352 | ||
177 | #define K17 0x0acf37ad | ||
178 | |||
179 | #define minus_K02 0xf0230ab7 | ||
180 | |||
181 | #define WL0 0x00b2aa3e | ||
182 | #define WL1 0x0216a2a2 | ||
183 | #define WL2 0x03768962 | ||
184 | #define WL3 0x04cfb0e2 | ||
185 | #define WL4 0x061f78aa | ||
186 | #define WL5 0x07635284 | ||
187 | #define WL6 0x0898c779 | ||
188 | #define WL7 0x09bd7ca0 | ||
189 | #define WL8 0x0acf37ad | ||
190 | #define WL9 0x0bcbe352 | ||
191 | #define WL10 0x0cb19346 | ||
192 | #define WL11 0x0d7e8807 | ||
193 | #define WL12 0x0e313245 | ||
194 | #define WL13 0x0ec835e8 | ||
195 | #define WL14 0x0f426cb5 | ||
196 | #define WL15 0x0f9ee890 | ||
197 | #define WL16 0x0fdcf549 | ||
198 | #define WL17 0x0ffc19fd | ||
199 | |||
200 | |||
201 | @***************************************************************************** | ||
202 | |||
203 | |||
204 | .text | ||
205 | .align | ||
206 | |||
207 | .global III_imdct_l | ||
208 | .global _III_imdct_l | ||
209 | |||
210 | III_imdct_l: | ||
211 | _III_imdct_l: | ||
212 | |||
213 | stmdb sp!, { r2, r4 - r11, lr } @ all callee saved regs, plus arg3 | ||
214 | |||
215 | ldr r4, =K08 @ r4 = K08 | ||
216 | ldr r5, =K09 @ r5 = K09 | ||
217 | ldr r8, [r0, #X4] @ r8 = X4 | ||
218 | ldr r9, [r0, #X13] @ r9 = X13 | ||
219 | rsb r6, r4, #0 @ r6 = -K08 | ||
220 | rsb r7, r5, #0 @ r7 = -K09 | ||
221 | |||
222 | smull r2, r3, r4, r8 @ r2..r3 = (X4 * K08) | ||
223 | smlal r2, r3, r5, r9 @ r2..r3 = (X4 * K08) + (X13 * K09) = ct01 | ||
224 | |||
225 | smull r10, lr, r8, r5 @ r10..lr = (X4 * K09) | ||
226 | smlal r10, lr, r9, r6 @ r10..lr = (X4 * K09) + (X13 * -K08) = ct00 | ||
227 | |||
228 | ldr r8, [r0, #X7] @ r8 = X7 | ||
229 | ldr r9, [r0, #X16] @ r9 = X16 | ||
230 | |||
231 | stmdb sp!, { r2, r3, r10, lr } @ stack ct00_h, ct00_l, ct01_h, ct01_l | ||
232 | |||
233 | add r8, r8, r9 @ r8 = (X7 + X16) | ||
234 | ldr r9, [r0, #X1] @ r9 = X1 | ||
235 | |||
236 | smlal r2, r3, r6, r8 @ r2..r3 = ct01 + ((X7 + X16) * -K08) | ||
237 | smlal r2, r3, r7, r9 @ r2..r3 += (X1 * -K09) | ||
238 | |||
239 | ldr r7, [r0, #X10] @ r7 = X10 | ||
240 | |||
241 | rsbs r10, r10, #0 | ||
242 | rsc lr, lr, #0 @ r10..lr = -ct00 | ||
243 | |||
244 | smlal r2, r3, r5, r7 @ r2..r3 += (X10 * K09) = ct06 | ||
245 | |||
246 | smlal r10, lr, r9, r6 @ r10..lr = -ct00 + ( X1 * -K08) | ||
247 | smlal r10, lr, r8, r5 @ r10..lr += ((X7 + X16) * K09) | ||
248 | smlal r10, lr, r7, r4 @ r10..lr += ( X10 * K08) = ct04 | ||
249 | |||
250 | stmdb sp!, { r2, r3, r10, lr } @ stack ct04_h, ct04_l, ct06_h, ct06_l | ||
251 | |||
252 | @---- | ||
253 | |||
254 | ldr r7, [r0, #X0] | ||
255 | ldr r8, [r0, #X11] | ||
256 | ldr r9, [r0, #X12] | ||
257 | sub r7, r7, r8 | ||
258 | sub r7, r7, r9 @ r7 = (X0 - X11 -X12) = ct14 | ||
259 | |||
260 | ldr r9, [r0, #X3] | ||
261 | ldr r8, [r0, #X8] | ||
262 | ldr r11, [r0, #X15] | ||
263 | sub r8, r8, r9 | ||
264 | add r8, r8, r11 @ r8 = (X8 - X3 + X15) = ct16 | ||
265 | |||
266 | add r11, r7, r8 @ r11 = ct14 + ct16 = ct18 | ||
267 | |||
268 | smlal r2, r3, r6, r11 @ r2..r3 = ct06 + ((X0 - X11 - X3 + X15 + X8 - X12) * -K08) | ||
269 | |||
270 | ldr r6, [r0, #X2] | ||
271 | ldr r9, [r0, #X9] | ||
272 | ldr r12, [r0, #X14] | ||
273 | sub r6, r6, r9 | ||
274 | sub r6, r6, r12 @ r6 = (X2 - X9 - X14) = ct15 | ||
275 | |||
276 | ldr r9, [r0, #X5] | ||
277 | ldr r12, [r0, #X6] | ||
278 | sub r9, r9, r12 | ||
279 | ldr r12, [r0, #X17] | ||
280 | sub r9, r9, r12 @ r9 = (X5 - X6 - X17) = ct17 | ||
281 | |||
282 | add r12, r9, r6 @ r12 = ct15 + ct17 = ct19 | ||
283 | |||
284 | smlal r2, r3, r5, r12 @ r2..r3 += ((X2 - X9 + X5 - X6 - X17 - X14) * K09) | ||
285 | |||
286 | smlal r10, lr, r11, r5 @ r10..lr = ct04 + (ct18 * K09) | ||
287 | smlal r10, lr, r12, r4 @ r10..lr = ct04 + (ct18 * K09) + (ct19 * K08) | ||
288 | |||
289 | movs r2, r2, lsr #28 | ||
290 | adc r2, r2, r3, lsl #4 @ r2 = bits[59..28] of r2..r3 | ||
291 | str r2, [r1, #x22] @ store result x22 | ||
292 | |||
293 | movs r10, r10, lsr #28 | ||
294 | adc r10, r10, lr, lsl #4 @ r10 = bits[59..28] of r10..lr | ||
295 | str r10, [r1, #x4] @ store result x4 | ||
296 | |||
297 | @---- | ||
298 | |||
299 | ldmia sp, { r2, r3, r4, r5 } @ r2..r3 = ct06, r4..r5 = ct04 (dont update sp) | ||
300 | |||
301 | @ r2..r3 = ct06 | ||
302 | @ r4..r5 = ct04 | ||
303 | @ r6 = ct15 | ||
304 | @ r7 = ct14 | ||
305 | @ r8 = ct16 | ||
306 | @ r9 = ct17 | ||
307 | @ r10 = . | ||
308 | @ r11 = . | ||
309 | @ r12 = . | ||
310 | @ lr = . | ||
311 | |||
312 | ldr r10, =K03 @ r10 = K03 | ||
313 | ldr lr, =K15 @ lr = K15 | ||
314 | |||
315 | smlal r2, r3, r10, r7 @ r2..r3 = ct06 + (ct14 * K03) | ||
316 | smlal r4, r5, lr, r7 @ r4..r5 = ct04 + (ct14 * K15) | ||
317 | |||
318 | ldr r12, =K14 @ r12 = K14 | ||
319 | rsb r10, r10, #0 @ r10 = -K03 | ||
320 | |||
321 | smlal r2, r3, lr, r6 @ r2..r3 += (ct15 * K15) | ||
322 | smlal r4, r5, r10, r6 @ r4..r5 += (ct15 * -K03) | ||
323 | smlal r2, r3, r12, r8 @ r2..r3 += (ct16 * K14) | ||
324 | |||
325 | ldr r11, =minus_K02 @ r11 = -K02 | ||
326 | rsb r12, r12, #0 @ r12 = -K14 | ||
327 | |||
328 | smlal r4, r5, r12, r9 @ r4..r5 += (ct17 * -K14) | ||
329 | smlal r2, r3, r11, r9 @ r2..r3 += (ct17 * -K02) | ||
330 | smlal r4, r5, r11, r8 @ r4..r5 += (ct16 * -K02) | ||
331 | |||
332 | movs r2, r2, lsr #28 | ||
333 | adc r2, r2, r3, lsl #4 @ r2 = bits[59..28] of r2..r3 | ||
334 | str r2, [r1, #x7] @ store result x7 | ||
335 | |||
336 | movs r4, r4, lsr #28 | ||
337 | adc r4, r4, r5, lsl #4 @ r4 = bits[59..28] of r4..r5 | ||
338 | str r4, [r1, #x1] @ store result x1 | ||
339 | |||
340 | @---- | ||
341 | |||
342 | ldmia sp, { r2, r3, r4, r5 } @ r2..r3 = ct06, r4..r5 = ct04 (dont update sp) | ||
343 | |||
344 | @ r2..r3 = ct06 | ||
345 | @ r4..r5 = ct04 | ||
346 | @ r6 = ct15 | ||
347 | @ r7 = ct14 | ||
348 | @ r8 = ct16 | ||
349 | @ r9 = ct17 | ||
350 | @ r10 = -K03 | ||
351 | @ r11 = -K02 | ||
352 | @ r12 = -K14 | ||
353 | @ lr = K15 | ||
354 | |||
355 | rsbs r2, r2, #0 | ||
356 | rsc r3, r3, #0 @ r2..r3 = -ct06 | ||
357 | |||
358 | smlal r2, r3, r12, r7 @ r2..r3 = -ct06 + (ct14 * -K14) | ||
359 | smlal r2, r3, r10, r8 @ r2..r3 += (ct16 * -K03) | ||
360 | |||
361 | smlal r4, r5, r12, r6 @ r4..r5 = ct04 + (ct15 * -K14) | ||
362 | smlal r4, r5, r10, r9 @ r4..r5 += (ct17 * -K03) | ||
363 | smlal r4, r5, lr, r8 @ r4..r5 += (ct16 * K15) | ||
364 | smlal r4, r5, r11, r7 @ r4..r5 += (ct14 * -K02) | ||
365 | |||
366 | rsb lr, lr, #0 @ lr = -K15 | ||
367 | rsb r11, r11, #0 @ r11 = K02 | ||
368 | |||
369 | smlal r2, r3, lr, r9 @ r2..r3 += (ct17 * -K15) | ||
370 | smlal r2, r3, r11, r6 @ r2..r3 += (ct15 * K02) | ||
371 | |||
372 | movs r4, r4, lsr #28 | ||
373 | adc r4, r4, r5, lsl #4 @ r4 = bits[59..28] of r4..r5 | ||
374 | str r4, [r1, #x25] @ store result x25 | ||
375 | |||
376 | movs r2, r2, lsr #28 | ||
377 | adc r2, r2, r3, lsl #4 @ r2 = bits[59..28] of r2..r3 | ||
378 | str r2, [r1, #x19] @ store result x19 | ||
379 | |||
380 | @---- | ||
381 | |||
382 | ldr r2, [sp, #16] @ r2 = ct01_l | ||
383 | ldr r3, [sp, #20] @ r3 = ct01_h | ||
384 | |||
385 | ldr r6, [r0, #X1] | ||
386 | ldr r8, [r0, #X7] | ||
387 | ldr r9, [r0, #X10] | ||
388 | ldr r7, [r0, #X16] | ||
389 | |||
390 | rsbs r2, r2, #0 | ||
391 | rsc r3, r3, #0 @ r2..r3 = -ct01 | ||
392 | |||
393 | mov r4, r2 | ||
394 | mov r5, r3 @ r4..r5 = -ct01 | ||
395 | |||
396 | @ r2..r3 = -ct01 | ||
397 | @ r4..r5 = -ct01 | ||
398 | @ r6 = X1 | ||
399 | @ r7 = X16 | ||
400 | @ r8 = X7 | ||
401 | @ r9 = X10 | ||
402 | @ r10 = -K03 | ||
403 | @ r11 = K02 | ||
404 | @ r12 = -K14 | ||
405 | @ lr = -K15 | ||
406 | |||
407 | smlal r4, r5, r12, r7 @ r4..r5 = -ct01 + (X16 * -K14) | ||
408 | smlal r2, r3, lr, r9 @ r2..r3 = -ct01 + (X10 * -K15) | ||
409 | |||
410 | smlal r4, r5, r10, r8 @ r4..r5 += (X7 * -K03) | ||
411 | smlal r2, r3, r10, r7 @ r2..r3 += (X16 * -K03) | ||
412 | |||
413 | smlal r4, r5, r11, r9 @ r4..r5 += (X10 * K02) | ||
414 | smlal r2, r3, r12, r8 @ r2..r3 += (X7 * -K14) | ||
415 | |||
416 | rsb lr, lr, #0 @ lr = K15 | ||
417 | rsb r11, r11, #0 @ r11 = -K02 | ||
418 | |||
419 | smlal r4, r5, lr, r6 @ r4..r5 += (X1 * K15) = ct05 | ||
420 | smlal r2, r3, r11, r6 @ r2..r3 += (X1 * -K02) = ct03 | ||
421 | |||
422 | stmdb sp!, { r2, r3, r4, r5 } @ stack ct05_h, ct05_l, ct03_h, ct03_l | ||
423 | |||
424 | rsbs r4, r4, #0 | ||
425 | rsc r5, r5, #0 @ r4..r5 = -ct05 | ||
426 | |||
427 | stmdb sp!, { r4, r5 } @ stack -ct05_h, -ct05_l | ||
428 | |||
429 | ldr r2, [sp, #48] @ r2 = ct00_l | ||
430 | ldr r3, [sp, #52] @ r3 = ct00_h | ||
431 | |||
432 | rsb r10, r10, #0 @ r10 = K03 | ||
433 | |||
434 | rsbs r4, r2, #0 | ||
435 | rsc r5, r3, #0 @ r4..r5 = -ct00 | ||
436 | |||
437 | @ r2..r3 = ct00 | ||
438 | @ r4..r5 = -ct00 | ||
439 | @ r6 = X1 | ||
440 | @ r7 = X16 | ||
441 | @ r8 = X7 | ||
442 | @ r9 = X10 | ||
443 | @ r10 = K03 | ||
444 | @ r11 = -K02 | ||
445 | @ r12 = -K14 | ||
446 | @ lr = K15 | ||
447 | |||
448 | smlal r4, r5, r10, r6 @ r4..r5 = -ct00 + (X1 * K03) | ||
449 | smlal r2, r3, r10, r9 @ r2..r3 = ct00 + (X10 * K03) | ||
450 | |||
451 | smlal r4, r5, r12, r9 @ r4..r5 += (X10 * -K14) | ||
452 | smlal r2, r3, r12, r6 @ r2..r3 += (X1 * -K14) | ||
453 | |||
454 | smlal r4, r5, r11, r7 @ r4..r5 += (X16 * -K02) | ||
455 | smlal r4, r5, lr, r8 @ r4..r5 += (X7 * K15) = ct07 | ||
456 | |||
457 | rsb lr, lr, #0 @ lr = -K15 | ||
458 | rsb r11, r11, #0 @ r11 = K02 | ||
459 | |||
460 | smlal r2, r3, r11, r8 @ r2..r3 += (X7 * K02) | ||
461 | smlal r2, r3, lr, r7 @ r2..r3 += (X16 * -K15) = ct02 | ||
462 | |||
463 | rsbs r6, r4, #0 | ||
464 | rsc r7, r5, #0 @ r6..r7 = -ct07 | ||
465 | |||
466 | stmdb sp!, { r2 - r7 } @ stack -ct07_h, -ct07_l, ct07_h, ct07_l, ct02_h, ct02_l | ||
467 | |||
468 | |||
469 | @---- | ||
470 | |||
471 | add r2, pc, #(imdct36_long_karray-.-8) @ r2 = base address of Knn array (PIC safe ?) | ||
472 | |||
473 | |||
474 | loop: | ||
475 | ldr r12, [r0, #X0] | ||
476 | |||
477 | ldmia r2!, { r5 - r11 } @ first 7 words from Karray element | ||
478 | |||
479 | smull r3, r4, r5, r12 @ sum = (Kxx * X0) | ||
480 | ldr r12, [r0, #X2] | ||
481 | ldr r5, [r0, #X3] | ||
482 | smlal r3, r4, r6, r12 @ sum += (Kxx * X2) | ||
483 | ldr r12, [r0, #X5] | ||
484 | ldr r6, [r0, #X6] | ||
485 | smlal r3, r4, r7, r5 @ sum += (Kxx * X3) | ||
486 | smlal r3, r4, r8, r12 @ sum += (Kxx * X5) | ||
487 | ldr r12, [r0, #X8] | ||
488 | ldr r5, [r0, #X9] | ||
489 | smlal r3, r4, r9, r6 @ sum += (Kxx * X6) | ||
490 | smlal r3, r4, r10, r12 @ sum += (Kxx * X8) | ||
491 | smlal r3, r4, r11, r5 @ sum += (Kxx * X9) | ||
492 | |||
493 | ldmia r2!, { r5 - r10 } @ final 6 words from Karray element | ||
494 | |||
495 | ldr r11, [r0, #X11] | ||
496 | ldr r12, [r0, #X12] | ||
497 | smlal r3, r4, r5, r11 @ sum += (Kxx * X11) | ||
498 | ldr r11, [r0, #X14] | ||
499 | ldr r5, [r0, #X15] | ||
500 | smlal r3, r4, r6, r12 @ sum += (Kxx * X12) | ||
501 | smlal r3, r4, r7, r11 @ sum += (Kxx * X14) | ||
502 | ldr r11, [r0, #X17] | ||
503 | smlal r3, r4, r8, r5 @ sum += (Kxx * X15) | ||
504 | smlal r3, r4, r9, r11 @ sum += (Kxx * X17) | ||
505 | |||
506 | add r5, sp, r10, lsr #16 @ create index back into stack for required ctxx | ||
507 | |||
508 | ldmia r5, { r6, r7 } @ r6..r7 = ctxx | ||
509 | |||
510 | mov r8, r10, lsl #16 @ push ctxx index off the top end | ||
511 | |||
512 | adds r3, r3, r6 @ add low words | ||
513 | adc r4, r4, r7 @ add high words, with carry | ||
514 | movs r3, r3, lsr #28 | ||
515 | adc r3, r3, r4, lsl #4 @ r3 = bits[59..28] of r3..r4 | ||
516 | |||
517 | str r3, [r1, r8, lsr #24] @ push completion flag off the bottom end | ||
518 | |||
519 | movs r8, r8, lsl #8 @ push result location index off the top end | ||
520 | beq loop @ loop back if completion flag not set | ||
521 | b imdct_l_windowing @ branch to windowing stage if looping finished | ||
522 | |||
523 | imdct36_long_karray: | ||
524 | |||
525 | .word K17, -K13, K10, -K06, -K05, K01, -K00, K04, -K07, K11, K12, -K16, 0x00000000 | ||
526 | .word K13, K07, K16, K01, K10, -K05, K04, -K11, K00, -K17, K06, -K12, 0x00200800 | ||
527 | .word K11, K17, K05, K12, -K01, K06, -K07, K00, -K13, K04, -K16, K10, 0x00200c00 | ||
528 | .word K07, K00, -K12, K05, -K16, -K10, K11, -K17, K04, K13, K01, K06, 0x00001400 | ||
529 | .word K05, K10, -K00, -K17, K07, -K13, K12, K06, -K16, K01, -K11, -K04, 0x00181800 | ||
530 | .word K01, K05, -K07, -K11, K13, K17, -K16, -K12, K10, K06, -K04, -K00, 0x00102000 | ||
531 | .word -K16, K12, -K11, K07, K04, -K00, -K01, K05, -K06, K10, K13, -K17, 0x00284800 | ||
532 | .word -K12, K06, K17, -K00, -K11, K04, K05, -K10, K01, K16, -K07, -K13, 0x00085000 | ||
533 | .word -K10, K16, K04, -K13, -K00, K07, K06, -K01, -K12, -K05, K17, K11, 0x00105400 | ||
534 | .word -K06, -K01, K13, K04, K17, -K11, -K10, -K16, -K05, K12, K00, K07, 0x00185c00 | ||
535 | .word -K04, -K11, -K01, K16, K06, K12, K13, -K07, -K17, -K00, -K10, -K05, 0x00006000 | ||
536 | .word -K00, -K04, -K06, -K10, -K12, -K16, -K17, -K13, -K11, -K07, -K05, -K01, 0x00206801 | ||
537 | |||
538 | |||
539 | @---- | ||
540 | @------------------------------------------------------------------------- | ||
541 | @---- | ||
542 | |||
543 | imdct_l_windowing: | ||
544 | |||
545 | ldr r11, [sp, #80] @ fetch function parameter 3 from out of the stack | ||
546 | ldmia r1!, { r0, r2 - r9 } @ load 9 words from x0, update pointer | ||
547 | |||
548 | @ r0 = x0 | ||
549 | @ r1 = &x[9] | ||
550 | @ r2 = x1 | ||
551 | @ r3 = x2 | ||
552 | @ r4 = x3 | ||
553 | @ r5 = x4 | ||
554 | @ r6 = x5 | ||
555 | @ r7 = x6 | ||
556 | @ r8 = x7 | ||
557 | @ r9 = x8 | ||
558 | @ r10 = . | ||
559 | @ r11 = window mode: (0 == normal), (1 == start block), (3 == stop block) | ||
560 | @ r12 = . | ||
561 | @ lr = . | ||
562 | |||
563 | cmp r11, #BLOCK_MODE_STOP @ setup flags | ||
564 | rsb r10, r0, #0 @ r10 = -x0 (DONT change flags !!) | ||
565 | beq stop_block_x0_to_x17 | ||
566 | |||
567 | |||
568 | @ start and normal blocks are treated the same for x[0]..x[17] | ||
569 | |||
570 | normal_block_x0_to_x17: | ||
571 | |||
572 | ldr r12, =WL9 @ r12 = window_l[9] | ||
573 | |||
574 | rsb r0, r9, #0 @ r0 = -x8 | ||
575 | rsb r9, r2, #0 @ r9 = -x1 | ||
576 | rsb r2, r8, #0 @ r2 = -x7 | ||
577 | rsb r8, r3, #0 @ r8 = -x2 | ||
578 | rsb r3, r7, #0 @ r3 = -x6 | ||
579 | rsb r7, r4, #0 @ r7 = -x3 | ||
580 | rsb r4, r6, #0 @ r4 = -x5 | ||
581 | rsb r6, r5, #0 @ r6 = -x4 | ||
582 | |||
583 | @ r0 = -x8 | ||
584 | @ r1 = &x[9] | ||
585 | @ r2 = -x7 | ||
586 | @ r3 = -x6 | ||
587 | @ r4 = -x5 | ||
588 | @ r5 = . | ||
589 | @ r6 = -x4 | ||
590 | @ r7 = -x3 | ||
591 | @ r8 = -x2 | ||
592 | @ r9 = -x1 | ||
593 | @ r10 = -x0 | ||
594 | @ r11 = window mode: (0 == normal), (1 == start block), (3 == stop block) | ||
595 | @ r12 = window_l[9] | ||
596 | @ lr = . | ||
597 | |||
598 | smull r5, lr, r12, r0 @ r5..lr = (window_l[9] * (x[9] == -x[8])) | ||
599 | ldr r12, =WL10 @ r12 = window_l[10] | ||
600 | movs r5, r5, lsr #28 | ||
601 | adc r0, r5, lr, lsl #4 @ r0 = bits[59..28] of windowed x9 | ||
602 | |||
603 | smull r5, lr, r12, r2 @ r5..lr = (window_l[10] * (x[10] == -x[7])) | ||
604 | ldr r12, =WL11 @ r12 = window_l[11] | ||
605 | movs r5, r5, lsr #28 | ||
606 | adc r2, r5, lr, lsl #4 @ r2 = bits[59..28] of windowed x10 | ||
607 | |||
608 | smull r5, lr, r12, r3 @ r5..lr = (window_l[11] * (x[11] == -x[6])) | ||
609 | ldr r12, =WL12 @ r12 = window_l[12] | ||
610 | movs r5, r5, lsr #28 | ||
611 | adc r3, r5, lr, lsl #4 @ r3 = bits[59..28] of windowed x11 | ||
612 | |||
613 | smull r5, lr, r12, r4 @ r5..lr = (window_l[12] * (x[12] == -x[5])) | ||
614 | ldr r12, =WL13 @ r12 = window_l[13] | ||
615 | movs r5, r5, lsr #28 | ||
616 | adc r4, r5, lr, lsl #4 @ r4 = bits[59..28] of windowed x12 | ||
617 | |||
618 | smull r5, lr, r12, r6 @ r5..lr = (window_l[13] * (x[13] == -x[4])) | ||
619 | ldr r12, =WL14 @ r12 = window_l[14] | ||
620 | movs r5, r5, lsr #28 | ||
621 | adc r6, r5, lr, lsl #4 @ r6 = bits[59..28] of windowed x13 | ||
622 | |||
623 | smull r5, lr, r12, r7 @ r5..lr = (window_l[14] * (x[14] == -x[3])) | ||
624 | ldr r12, =WL15 @ r12 = window_l[15] | ||
625 | movs r5, r5, lsr #28 | ||
626 | adc r7, r5, lr, lsl #4 @ r7 = bits[59..28] of windowed x14 | ||
627 | |||
628 | smull r5, lr, r12, r8 @ r5..lr = (window_l[15] * (x[15] == -x[2])) | ||
629 | ldr r12, =WL16 @ r12 = window_l[16] | ||
630 | movs r5, r5, lsr #28 | ||
631 | adc r8, r5, lr, lsl #4 @ r8 = bits[59..28] of windowed x15 | ||
632 | |||
633 | smull r5, lr, r12, r9 @ r5..lr = (window_l[16] * (x[16] == -x[1])) | ||
634 | ldr r12, =WL17 @ r12 = window_l[17] | ||
635 | movs r5, r5, lsr #28 | ||
636 | adc r9, r5, lr, lsl #4 @ r9 = bits[59..28] of windowed x16 | ||
637 | |||
638 | smull r5, lr, r12, r10 @ r5..lr = (window_l[17] * (x[17] == -x[0])) | ||
639 | ldr r12, =WL0 @ r12 = window_l[0] | ||
640 | movs r5, r5, lsr #28 | ||
641 | adc r10, r5, lr, lsl #4 @ r10 = bits[59..28] of windowed x17 | ||
642 | |||
643 | |||
644 | stmia r1, { r0, r2 - r4, r6 - r10 } @ store windowed x[9] .. x[17] | ||
645 | ldmdb r1!, { r0, r2 - r9 } @ load 9 words downto (and including) x0 | ||
646 | |||
647 | |||
648 | smull r10, lr, r12, r0 @ r10..lr = (window_l[0] * x[0]) | ||
649 | ldr r12, =WL1 @ r12 = window_l[1] | ||
650 | movs r10, r10, lsr #28 | ||
651 | adc r0, r10, lr, lsl #4 @ r0 = bits[59..28] of windowed x0 | ||
652 | |||
653 | smull r10, lr, r12, r2 @ r10..lr = (window_l[1] * x[1]) | ||
654 | ldr r12, =WL2 @ r12 = window_l[2] | ||
655 | movs r10, r10, lsr #28 | ||
656 | adc r2, r10, lr, lsl #4 @ r2 = bits[59..28] of windowed x1 | ||
657 | |||
658 | smull r10, lr, r12, r3 @ r10..lr = (window_l[2] * x[2]) | ||
659 | ldr r12, =WL3 @ r12 = window_l[3] | ||
660 | movs r10, r10, lsr #28 | ||
661 | adc r3, r10, lr, lsl #4 @ r3 = bits[59..28] of windowed x2 | ||
662 | |||
663 | smull r10, lr, r12, r4 @ r10..lr = (window_l[3] * x[3]) | ||
664 | ldr r12, =WL4 @ r12 = window_l[4] | ||
665 | movs r10, r10, lsr #28 | ||
666 | adc r4, r10, lr, lsl #4 @ r4 = bits[59..28] of windowed x3 | ||
667 | |||
668 | smull r10, lr, r12, r5 @ r10..lr = (window_l[4] * x[4]) | ||
669 | ldr r12, =WL5 @ r12 = window_l[5] | ||
670 | movs r10, r10, lsr #28 | ||
671 | adc r5, r10, lr, lsl #4 @ r5 = bits[59..28] of windowed x4 | ||
672 | |||
673 | smull r10, lr, r12, r6 @ r10..lr = (window_l[5] * x[5]) | ||
674 | ldr r12, =WL6 @ r12 = window_l[6] | ||
675 | movs r10, r10, lsr #28 | ||
676 | adc r6, r10, lr, lsl #4 @ r6 = bits[59..28] of windowed x5 | ||
677 | |||
678 | smull r10, lr, r12, r7 @ r10..lr = (window_l[6] * x[6]) | ||
679 | ldr r12, =WL7 @ r12 = window_l[7] | ||
680 | movs r10, r10, lsr #28 | ||
681 | adc r7, r10, lr, lsl #4 @ r7 = bits[59..28] of windowed x6 | ||
682 | |||
683 | smull r10, lr, r12, r8 @ r10..lr = (window_l[7] * x[7]) | ||
684 | ldr r12, =WL8 @ r12 = window_l[8] | ||
685 | movs r10, r10, lsr #28 | ||
686 | adc r8, r10, lr, lsl #4 @ r8 = bits[59..28] of windowed x7 | ||
687 | |||
688 | smull r10, lr, r12, r9 @ r10..lr = (window_l[8] * x[8]) | ||
689 | movs r10, r10, lsr #28 | ||
690 | adc r9, r10, lr, lsl #4 @ r9 = bits[59..28] of windowed x8 | ||
691 | |||
692 | stmia r1, { r0, r2 - r9 } @ store windowed x[0] .. x[8] | ||
693 | |||
694 | cmp r11, #BLOCK_MODE_START | ||
695 | beq start_block_x18_to_x35 | ||
696 | |||
697 | |||
698 | @---- | ||
699 | |||
700 | |||
701 | normal_block_x18_to_x35: | ||
702 | |||
703 | ldr r11, =WL3 @ r11 = window_l[3] | ||
704 | ldr r12, =WL4 @ r12 = window_l[4] | ||
705 | |||
706 | add r1, r1, #(18*4) @ r1 = &x[18] | ||
707 | |||
708 | ldmia r1!, { r0, r2 - r4, r6 - r10 } @ load 9 words from x18, update pointer | ||
709 | |||
710 | @ r0 = x18 | ||
711 | @ r1 = &x[27] | ||
712 | @ r2 = x19 | ||
713 | @ r3 = x20 | ||
714 | @ r4 = x21 | ||
715 | @ r5 = . | ||
716 | @ r6 = x22 | ||
717 | @ r7 = x23 | ||
718 | @ r8 = x24 | ||
719 | @ r9 = x25 | ||
720 | @ r10 = x26 | ||
721 | @ r11 = window_l[3] | ||
722 | @ r12 = window_l[4] | ||
723 | @ lr = . | ||
724 | |||
725 | smull r5, lr, r12, r6 @ r5..lr = (window_l[4] * (x[22] == x[31])) | ||
726 | movs r5, r5, lsr #28 | ||
727 | adc r5, r5, lr, lsl #4 @ r5 = bits[59..28] of windowed x31 | ||
728 | |||
729 | smull r6, lr, r11, r4 @ r5..lr = (window_l[3] * (x[21] == x[32])) | ||
730 | ldr r12, =WL5 @ r12 = window_l[5] | ||
731 | movs r6, r6, lsr #28 | ||
732 | adc r6, r6, lr, lsl #4 @ r6 = bits[59..28] of windowed x32 | ||
733 | |||
734 | smull r4, lr, r12, r7 @ r4..lr = (window_l[5] * (x[23] == x[30])) | ||
735 | ldr r11, =WL1 @ r11 = window_l[1] | ||
736 | ldr r12, =WL2 @ r12 = window_l[2] | ||
737 | movs r4, r4, lsr #28 | ||
738 | adc r4, r4, lr, lsl #4 @ r4 = bits[59..28] of windowed x30 | ||
739 | |||
740 | smull r7, lr, r12, r3 @ r7..lr = (window_l[2] * (x[20] == x[33])) | ||
741 | ldr r12, =WL6 @ r12 = window_l[6] | ||
742 | movs r7, r7, lsr #28 | ||
743 | adc r7, r7, lr, lsl #4 @ r7 = bits[59..28] of windowed x33 | ||
744 | |||
745 | smull r3, lr, r12, r8 @ r3..lr = (window_l[6] * (x[24] == x[29])) | ||
746 | movs r3, r3, lsr #28 | ||
747 | adc r3, r3, lr, lsl #4 @ r3 = bits[59..28] of windowed x29 | ||
748 | |||
749 | smull r8, lr, r11, r2 @ r7..lr = (window_l[1] * (x[19] == x[34])) | ||
750 | ldr r12, =WL7 @ r12 = window_l[7] | ||
751 | ldr r11, =WL8 @ r11 = window_l[8] | ||
752 | movs r8, r8, lsr #28 | ||
753 | adc r8, r8, lr, lsl #4 @ r8 = bits[59..28] of windowed x34 | ||
754 | |||
755 | smull r2, lr, r12, r9 @ r7..lr = (window_l[7] * (x[25] == x[28])) | ||
756 | ldr r12, =WL0 @ r12 = window_l[0] | ||
757 | movs r2, r2, lsr #28 | ||
758 | adc r2, r2, lr, lsl #4 @ r2 = bits[59..28] of windowed x28 | ||
759 | |||
760 | smull r9, lr, r12, r0 @ r3..lr = (window_l[0] * (x[18] == x[35])) | ||
761 | movs r9, r9, lsr #28 | ||
762 | adc r9, r9, lr, lsl #4 @ r9 = bits[59..28] of windowed x35 | ||
763 | |||
764 | smull r0, lr, r11, r10 @ r7..lr = (window_l[8] * (x[26] == x[27])) | ||
765 | ldr r11, =WL16 @ r11 = window_l[16] | ||
766 | ldr r12, =WL17 @ r12 = window_l[17] | ||
767 | movs r0, r0, lsr #28 | ||
768 | adc r0, r0, lr, lsl #4 @ r0 = bits[59..28] of windowed x27 | ||
769 | |||
770 | |||
771 | stmia r1, { r0, r2 - r9 } @ store windowed x[27] .. x[35] | ||
772 | ldmdb r1!, { r0, r2 - r9 } @ load 9 words downto (and including) x18 | ||
773 | |||
774 | |||
775 | smull r10, lr, r12, r0 @ r10..lr = (window_l[17] * x[18]) | ||
776 | movs r10, r10, lsr #28 | ||
777 | adc r0, r10, lr, lsl #4 @ r0 = bits[59..28] of windowed x0 | ||
778 | |||
779 | smull r10, lr, r11, r2 @ r10..lr = (window_l[16] * x[19]) | ||
780 | ldr r11, =WL14 @ r11 = window_l[14] | ||
781 | ldr r12, =WL15 @ r12 = window_l[15] | ||
782 | movs r10, r10, lsr #28 | ||
783 | adc r2, r10, lr, lsl #4 @ r2 = bits[59..28] of windowed x1 | ||
784 | |||
785 | smull r10, lr, r12, r3 @ r10..lr = (window_l[15] * x[20]) | ||
786 | movs r10, r10, lsr #28 | ||
787 | adc r3, r10, lr, lsl #4 @ r3 = bits[59..28] of windowed x2 | ||
788 | |||
789 | smull r10, lr, r11, r4 @ r10..lr = (window_l[14] * x[21]) | ||
790 | ldr r11, =WL12 @ r11 = window_l[12] | ||
791 | ldr r12, =WL13 @ r12 = window_l[13] | ||
792 | movs r10, r10, lsr #28 | ||
793 | adc r4, r10, lr, lsl #4 @ r4 = bits[59..28] of windowed x3 | ||
794 | |||
795 | smull r10, lr, r12, r5 @ r10..lr = (window_l[13] * x[22]) | ||
796 | movs r10, r10, lsr #28 | ||
797 | adc r5, r10, lr, lsl #4 @ r5 = bits[59..28] of windowed x4 | ||
798 | |||
799 | smull r10, lr, r11, r6 @ r10..lr = (window_l[12] * x[23]) | ||
800 | ldr r11, =WL10 @ r12 = window_l[10] | ||
801 | ldr r12, =WL11 @ r12 = window_l[11] | ||
802 | movs r10, r10, lsr #28 | ||
803 | adc r6, r10, lr, lsl #4 @ r6 = bits[59..28] of windowed x5 | ||
804 | |||
805 | smull r10, lr, r12, r7 @ r10..lr = (window_l[11] * x[24]) | ||
806 | movs r10, r10, lsr #28 | ||
807 | adc r7, r10, lr, lsl #4 @ r7 = bits[59..28] of windowed x6 | ||
808 | |||
809 | smull r10, lr, r11, r8 @ r10..lr = (window_l[10] * x[25]) | ||
810 | ldr r12, =WL9 @ r12 = window_l[9] | ||
811 | movs r10, r10, lsr #28 | ||
812 | adc r8, r10, lr, lsl #4 @ r8 = bits[59..28] of windowed x7 | ||
813 | |||
814 | smull r10, lr, r12, r9 @ r10..lr = (window_l[9] * x[26]) | ||
815 | |||
816 | movs r10, r10, lsr #28 | ||
817 | adc r9, r10, lr, lsl #4 @ r9 = bits[59..28] of windowed x8 | ||
818 | |||
819 | stmia r1, { r0, r2 - r9 } @ store windowed x[18] .. x[26] | ||
820 | |||
821 | @---- | ||
822 | @ NB there are 2 possible exits from this function - this is only one of them | ||
823 | @---- | ||
824 | |||
825 | add sp, sp, #(21*4) @ return stack frame | ||
826 | ldmia sp!, { r4 - r11, pc } @ restore callee saved regs, and return | ||
827 | |||
828 | @---- | ||
829 | |||
830 | |||
831 | stop_block_x0_to_x17: | ||
832 | |||
833 | @ r0 = x0 | ||
834 | @ r1 = &x[9] | ||
835 | @ r2 = x1 | ||
836 | @ r3 = x2 | ||
837 | @ r4 = x3 | ||
838 | @ r5 = x4 | ||
839 | @ r6 = x5 | ||
840 | @ r7 = x6 | ||
841 | @ r8 = x7 | ||
842 | @ r9 = x8 | ||
843 | @ r10 = -x0 | ||
844 | @ r11 = window mode: (0 == normal), (1 == start block), (3 == stop block) | ||
845 | @ r12 = . | ||
846 | @ lr = . | ||
847 | |||
848 | rsb r0, r6, #0 @ r0 = -x5 | ||
849 | rsb r6, r2, #0 @ r6 = -x1 | ||
850 | rsb r2, r5, #0 @ r2 = -x4 | ||
851 | rsb r5, r3, #0 @ r5 = -x2 | ||
852 | rsb r3, r4, #0 @ r3 = -x3 | ||
853 | |||
854 | add r1, r1, #(3*4) @ r1 = &x[12] | ||
855 | stmia r1, { r0, r2, r3, r5, r6, r10 } @ store unchanged x[12] .. x[17] | ||
856 | |||
857 | ldr r0, =WL1 @ r0 = window_l[1] == window_s[0] | ||
858 | |||
859 | rsb r10, r9, #0 @ r10 = -x8 | ||
860 | rsb r12, r8, #0 @ r12 = -x7 | ||
861 | rsb lr, r7, #0 @ lr = -x6 | ||
862 | |||
863 | @ r0 = WL1 | ||
864 | @ r1 = &x[12] | ||
865 | @ r2 = . | ||
866 | @ r3 = . | ||
867 | @ r4 = . | ||
868 | @ r5 = . | ||
869 | @ r6 = . | ||
870 | @ r7 = x6 | ||
871 | @ r8 = x7 | ||
872 | @ r9 = x8 | ||
873 | @ r10 = -x8 | ||
874 | @ r11 = window mode: (0 == normal), (1 == start block), (3 == stop block) | ||
875 | @ r12 = -x7 | ||
876 | @ lr = -x6 | ||
877 | |||
878 | smull r5, r6, r0, r7 @ r5..r6 = (window_l[1] * x[6]) | ||
879 | ldr r2, =WL4 @ r2 = window_l[4] == window_s[1] | ||
880 | movs r5, r5, lsr #28 | ||
881 | adc r7, r5, r6, lsl #4 @ r7 = bits[59..28] of windowed x6 | ||
882 | |||
883 | smull r5, r6, r2, r8 @ r5..r6 = (window_l[4] * x[7]) | ||
884 | ldr r3, =WL7 @ r3 = window_l[7] == window_s[2] | ||
885 | movs r5, r5, lsr #28 | ||
886 | adc r8, r5, r6, lsl #4 @ r8 = bits[59..28] of windowed x7 | ||
887 | |||
888 | smull r5, r6, r3, r9 @ r5..r6 = (window_l[7] * x[8]) | ||
889 | ldr r4, =WL10 @ r4 = window_l[10] == window_s[3] | ||
890 | movs r5, r5, lsr #28 | ||
891 | adc r9, r5, r6, lsl #4 @ r9 = bits[59..28] of windowed x8 | ||
892 | |||
893 | smull r5, r6, r4, r10 @ r5..r6 = (window_l[10] * (x[9] == -x[8])) | ||
894 | ldr r0, =WL13 @ r0 = window_l[13] == window_s[4] | ||
895 | movs r5, r5, lsr #28 | ||
896 | adc r10, r5, r6, lsl #4 @ r10 = bits[59..28] of windowed x9 | ||
897 | |||
898 | smull r5, r6, r0, r12 @ r5..r6 = (window_l[13] * (x[10] == -x[7])) | ||
899 | ldr r2, =WL16 @ r2 = window_l[16] == window_s[5] | ||
900 | movs r5, r5, lsr #28 | ||
901 | adc r12, r5, r6, lsl #4 @ r10 = bits[59..28] of windowed x9 | ||
902 | |||
903 | smull r5, r6, r2, lr @ r5..r6 = (window_l[16] * (x[11] == -x[6])) | ||
904 | |||
905 | ldr r0, =0x00 | ||
906 | |||
907 | movs r5, r5, lsr #28 | ||
908 | adc lr, r5, r6, lsl #4 @ r10 = bits[59..28] of windowed x9 | ||
909 | |||
910 | stmdb r1!, { r7 - r10, r12, lr } @ store windowed x[6] .. x[11] | ||
911 | |||
912 | ldr r5, =0x00 | ||
913 | ldr r6, =0x00 | ||
914 | ldr r2, =0x00 | ||
915 | ldr r3, =0x00 | ||
916 | ldr r4, =0x00 | ||
917 | |||
918 | stmdb r1!, { r0, r2 - r6 } @ store windowed x[0] .. x[5] | ||
919 | |||
920 | b normal_block_x18_to_x35 | ||
921 | |||
922 | |||
923 | @---- | ||
924 | |||
925 | |||
926 | start_block_x18_to_x35: | ||
927 | |||
928 | ldr r4, =WL1 @ r0 = window_l[1] == window_s[0] | ||
929 | |||
930 | add r1, r1, #(24*4) @ r1 = &x[24] | ||
931 | |||
932 | ldmia r1, { r0, r2, r3 } @ load 3 words from x24, dont update pointer | ||
933 | |||
934 | @ r0 = x24 | ||
935 | @ r1 = &x[24] | ||
936 | @ r2 = x25 | ||
937 | @ r3 = x26 | ||
938 | @ r4 = WL1 | ||
939 | @ r5 = WL4 | ||
940 | @ r6 = WL7 | ||
941 | @ r7 = WL10 | ||
942 | @ r8 = WL13 | ||
943 | @ r9 = WL16 | ||
944 | @ r10 = . | ||
945 | @ r11 = . | ||
946 | @ r12 = . | ||
947 | @ lr = . | ||
948 | |||
949 | ldr r5, =WL4 @ r5 = window_l[4] == window_s[1] | ||
950 | |||
951 | smull r10, r11, r4, r0 @ r10..r11 = (window_l[1] * (x[24] == x[29])) | ||
952 | ldr r6, =WL7 @ r6 = window_l[7] == window_s[2] | ||
953 | movs r10, r10, lsr #28 | ||
954 | adc lr, r10, r11, lsl #4 @ lr = bits[59..28] of windowed x29 | ||
955 | |||
956 | smull r10, r11, r5, r2 @ r10..r11 = (window_l[4] * (x[25] == x[28])) | ||
957 | ldr r7, =WL10 @ r7 = window_l[10] == window_s[3] | ||
958 | movs r10, r10, lsr #28 | ||
959 | adc r12, r10, r11, lsl #4 @ r12 = bits[59..28] of windowed x28 | ||
960 | |||
961 | smull r10, r11, r6, r3 @ r10..r11 = (window_l[7] * (x[26] == x[27])) | ||
962 | ldr r8, =WL13 @ r8 = window_l[13] == window_s[4] | ||
963 | movs r10, r10, lsr #28 | ||
964 | adc r4, r10, r11, lsl #4 @ r4 = bits[59..28] of windowed x27 | ||
965 | |||
966 | smull r10, r11, r7, r3 @ r10..r11 = (window_l[10] * x[26]) | ||
967 | ldr r9, =WL16 @ r9 = window_l[16] == window_s[5] | ||
968 | movs r10, r10, lsr #28 | ||
969 | adc r3, r10, r11, lsl #4 @ r3 = bits[59..28] of windowed x26 | ||
970 | |||
971 | smull r10, r11, r8, r2 @ r10..r11 = (window_l[13] * x[25]) | ||
972 | ldr r5, =0x00 | ||
973 | movs r10, r10, lsr #28 | ||
974 | adc r2, r10, r11, lsl #4 @ r2 = bits[59..28] of windowed x25 | ||
975 | |||
976 | smull r10, r11, r9, r0 @ r10..r11 = (window_l[16] * x[24]) | ||
977 | ldr r6, =0x00 | ||
978 | movs r10, r10, lsr #28 | ||
979 | adc r0, r10, r11, lsl #4 @ r0 = bits[59..28] of windowed x24 | ||
980 | |||
981 | stmia r1!, { r0, r2, r3, r4, r12, lr } @ store windowed x[24] .. x[29] | ||
982 | |||
983 | ldr r7, =0x00 | ||
984 | ldr r8, =0x00 | ||
985 | ldr r9, =0x00 | ||
986 | ldr r10, =0x00 | ||
987 | |||
988 | stmia r1!, { r5 - r10 } @ store windowed x[30] .. x[35] | ||
989 | |||
990 | @---- | ||
991 | @ NB there are 2 possible exits from this function - this is only one of them | ||
992 | @---- | ||
993 | |||
994 | add sp, sp, #(21*4) @ return stack frame | ||
995 | ldmia sp!, { r4 - r11, pc } @ restore callee saved regs, and return | ||
996 | |||
997 | @---- | ||
998 | @END | ||
999 | @---- | ||
1000 | |||