Diffstat (limited to 'noncore/multimedia/opieplayer2/yuv2rgb_arm4l.S') (more/less context) (ignore whitespace changes)
-rw-r--r-- | noncore/multimedia/opieplayer2/yuv2rgb_arm4l.S | 192 |
1 files changed, 192 insertions, 0 deletions
diff --git a/noncore/multimedia/opieplayer2/yuv2rgb_arm4l.S b/noncore/multimedia/opieplayer2/yuv2rgb_arm4l.S new file mode 100644 index 0000000..f4a3395 --- a/dev/null +++ b/noncore/multimedia/opieplayer2/yuv2rgb_arm4l.S | |||
@@ -0,0 +1,192 @@ | |||
1 | /* WARNING : this function only works when stride_U == stride_V (I use some hacks to | ||
2 | not have to do too many computations at line's end)... | ||
3 | |||
4 | C-like prototype : | ||
5 | void convert_yuv420_rgb565(AVPicture *picture, unsigned char *results, int w, int h) ; | ||
6 | |||
7 | */ | ||
8 | |||
9 | #ifdef __arm__ | ||
10 | |||
11 | .text | ||
12 | .align | ||
13 | |||
14 | .global convert_yuv420_rgb565 | ||
15 | convert_yuv420_rgb565: | ||
16 | stmdb sp!, { r4 - r12, lr } @ all callee saved regs | ||
17 | ldr r7, [r0, #0] @ Y ptr | ||
18 | ldr r9, [r0, #4] @ U ptr | ||
19 | ldr r10, [r0, #8] @ V ptr | ||
20 | subs r10, r10, r9 @ V ptr - U ptr | ||
21 | ldr r8, [r0, #12] | ||
22 | add r8, r8, r7 @ Y + stride_Y | ||
23 | ldr r4, [r0, #12] @ Stride_Y | ||
24 | mov r4, r4, lsl #1 | ||
25 | sub r4, r4, r2 @ (2 * Stride_Y) - width | ||
26 | ldr r5, [r0, #16] @ Stride_U | ||
27 | sub r5, r5, r2, lsr #1 @ Stride_U - (width / 2) | ||
28 | ldr r6, [r0, #20] @ Stride_V | ||
29 | sub r6, r6, r2, lsr #1 @ Stride_V - (width / 2) | ||
30 | add r0, r1, r2, lsl #1 @ RGB + 1 | ||
31 | stmdb sp!, { r0-r10 } | ||
32 | @ Stack description : | ||
33 | @ (sp+ 0) RGB + one line | ||
34 | @ (sp+ 4) RGB | ||
35 | @ (sp+ 8) width (save) | ||
36 | @ (sp+12) height | ||
37 | @ (sp+16) (2 * stride_Y) - width | ||
38 | @ (sp+20) stride_U - (width / 2) | ||
39 | @ (sp+24) stride_V - (width / 2) !!! UNUSED !!! | ||
40 | @ (sp+28) Y ptr | ||
41 | @ (sp+32) Y ptr + one line | ||
42 | @ (sp+36) U ptr | ||
43 | @ (sp+40) V - U | ||
44 | mov lr, r2 @ Initialize the width counter | ||
45 | add r0, pc, #(const_storage-.-8) @ r0 = base pointer to the constants array | ||
46 | ldr r8, [r0, #(4*4)] @ r8 = multy | ||
47 | yuv_loop: | ||
48 | add r0, pc, #(const_storage-.-8) @ r0 = base pointer to the constants array | ||
49 | ldr r10, [sp, #28] @ r10 = Y | ||
50 | ldr r1, [sp, #36] @ r1 = U | ||
51 | ldrb r9, [r10, #0] @ r9 = *Y | ||
52 | ldrb r11, [r1] @ r11 = *U | ||
53 | add r1, r1, #1 @ r1 = U++ | ||
54 | ldr r2, [sp, #40] @ r2 = V - U | ||
55 | str r1, [sp, #36] @ store U++ | ||
56 | add r2, r1, r2 @ r2 = V+1 | ||
57 | ldrb r12, [r2, #-1] @ r12 = *V | ||
58 | sub r11, r11, #128 @ r11 = *U - 128 | ||
59 | sub r12, r12, #128 @ r12 = *V - 128 | ||
60 | ldr r1, [r0, #(4*0)] @ r1 = crv | ||
61 | mov r7, #32768 @ r7 = 32768 (for additions in MLA) | ||
62 | ldr r2, [r0, #(4*3)] @ r2 = -cgv | ||
63 | mla r6, r1, r12, r7 @ r6 = nonyc_r = crv * (*V - 128) + 32768 | ||
64 | ldr r3, [r0, #(4*1)] @ r3 = cbu | ||
65 | mla r4, r2, r12, r7 @ r4 = - cgv * (*V - 128) + 32768 | ||
66 | sub r9, r9, #16 @ r9 = *Y - 16 | ||
67 | mla r5, r3, r11, r7 @ r5 = nonyc_b = cbu * (*U - 128) + 32768 | ||
68 | ldr r0, [r0, #(4*2)] @ r0 = -cgu | ||
69 | mla r7, r8, r9, r6 @ r7 = (*Y - 16) * multy + nonyc_r | ||
70 | add r10, r10, #2 @ r10 = Y + 2 | ||
71 | mla r4, r0, r11, r4 @ r4 = nonyc_g = - cgu * (*U - 128) + r4 = - cgu * (*U - 128) - cgv * (*V - 128) + 32768 | ||
72 | add r0, pc, #(rb_clip-.-8) @ r0 contains the pointer to the R and B clipping array | ||
73 | mla r12, r8, r9, r5 @ r12 = (*Y - 16) * multy + nonyc_b | ||
74 | ldrb r7, [r0, r7, asr #(16+3)] @ r7 = R composant | ||
75 | mla r1, r8, r9, r4 @ r1 = (*Y - 16) * multy + nonyc_g | ||
76 | ldrb r9, [r10, #-1] @ r9 = *(Y+1) | ||
77 | str r10, [sp, #28] @ save Y + 2 | ||
78 | ldrb r12, [r0, r12, asr #(16+3)] @ r12 = B composant (and the start of the RGB word) | ||
79 | add r11, pc, #(g_clip-.-8) @ r11 now contains the pointer to the G clipping array | ||
80 | ldrb r1, [r11, r1, asr #(16+2)] @ r1 contains the G part of the RGB triplet | ||
81 | sub r9, r9, #16 @ r9 = *(Y+1) - 16 | ||
82 | mla r10, r8, r9, r6 @ r10 is the Red part of the RGB triplet | ||
83 | add r12, r12, r7, lsl #11 @ r12 = .GB ... | ||
84 | mla r7, r8, r9, r5 @ r7 is the Blue part of the RGB triplet | ||
85 | add r12, r12, r1, lsl #5 @ r12 = RGB ... (ie the first pixel (half-word) is done) | ||
86 | mla r2, r8, r9, r4 @ r2 is the Green part of the RGB triplet | ||
87 | ldrb r10, [r0, r10, asr #(16+3)] @ r10 = R composant | ||
88 | ldrb r7, [r0, r7, asr #(16+3)] @ r7 = B composant | ||
89 | ldr r1, [sp, #32] @ r1 = Ynext | ||
90 | ldrb r2, [r11, r2, asr #(16+2)] @ r2 = G composant | ||
91 | ldrb r9, [r1] @ r9 = *Ynext | ||
92 | add r12, r12, r2, lsl #(5+16) @ r12 = RGB .G. | ||
93 | sub r9, r9, #16 @ r9 = *Ynext - 16 | ||
94 | mla r2, r8, r9, r4 @ r2 is the Green part of the RGB triplet | ||
95 | add r12, r12, r7, lsl #(0+16) @ r12 = RGB .GB | ||
96 | mla r7, r8, r9, r5 @ r7 is the Blue part of the RGB triplet | ||
97 | add r12, r12, r10, lsl #(11+16) @ r12 = RGB RGB | ||
98 | ldr r3, [sp, #4] @ r3 = RGB | ||
99 | mla r10, r8, r9, r6 @ r10 is the Red part of the RGB triplet | ||
100 | str r12, [r3] @ store the rgb pixel at *RGB | ||
101 | add r3, r3, #4 @ r3 = RGB++ (ie next double-pixel) | ||
102 | str r3, [sp, #4] @ store the RGB pointer | ||
103 | ldrb r9, [r1, #1] @ r9 = *(Ynext+1) | ||
104 | add r1, r1, #2 @ r1 = Ynext + 2 | ||
105 | sub r9, r9, #16 @ r9 = *(Ynext+1) - 16 | ||
106 | ldrb r12, [r0, r7, asr #(16+3)] @ r12 = ..B ... | ||
107 | ldrb r10, [r0, r10, asr #(16+3)] @ r10 = B composant | ||
108 | mla r7, r8, r9, r5 @ r7 is the Blue part of the RGB triplet | ||
109 | add r12, r12, r10, lsl #11 @ r12 = R.B ... | ||
110 | ldrb r2, [r11, r2, asr #(16+2)] @ r2 = G composant | ||
111 | mla r10, r8, r9, r6 @ r10 is the Red part of the RGB triplet | ||
112 | add r12, r12, r2, lsl #5 @ r12 = RGB ... | ||
113 | mla r2, r8, r9, r4 @ r2 is the Green part of the RGB triplet | ||
114 | ldrb r7, [r0, r7, asr #(16+3)] @ r7 = B composant | ||
115 | str r1, [sp, #32] @ store the increased Ynext pointer | ||
116 | add r12, r12, r7, lsl #(16+0) @ r12 = RGB ..B | ||
117 | ldrb r10, [r0, r10, asr #(16+3)] @ r10 = R composant | ||
118 | ldr r3, [sp, #0] @ r3 = RGBnext pointer | ||
119 | add r12, r12, r10, lsl #(16+11) @ r12 = RGB R.B | ||
120 | ldrb r2, [r11, r2, asr #(16+2)] @ r2 = G composant | ||
121 | add r3, r3, #4 @ r3 = next pixel on the RGBnext line | ||
122 | add r12, r12, r2, lsl #(16+5) @ r12 = RGB RGB | ||
123 | str r12, [r3, #-4] @ store the next pixel | ||
124 | str r3, [sp, #0] @ store the increased 'next line' pixel pointer | ||
125 | subs lr, lr, #2 @ decrement the line counter | ||
126 | bne yuv_loop @ and restart if not at the end of the line | ||
127 | |||
128 | ldr r0, [sp, #8] @ r0 = saved width | ||
129 | ldr r1, [sp, #0] @ r1 = RGBnext pointer | ||
130 | mov lr, r0 @ lr = saved width (to restart the line counter) | ||
131 | str r1, [sp, #4] @ current RGBnext pointer is next iteration RGB pointer | ||
132 | add r1, r1, r0, lsl #1 @ r1 = update RGBnext to next line | ||
133 | str r1, [sp, #0] @ store updated RGBnext pointer | ||
134 | |||
135 | ldr r3, [sp, #16] @ r3 = (2 * stride_Y) - width | ||
136 | ldr r4, [sp, #28] @ r4 = Y ptr | ||
137 | ldr r5, [sp, #32] @ r5 = Ynext ptr | ||
138 | add r4, r4, r3 @ r4 = Y ptr for the next two lines | ||
139 | add r5, r5, r3 @ r5 = Ynext ptr for the next two lines | ||
140 | str r4, [sp, #28] @ store updated Y pointer | ||
141 | str r5, [sp, #32] @ store update Ynext pointer | ||
142 | |||
143 | ldr r1, [sp, #20] @ r1 = stride_U - (width / 2) | ||
144 | ldr r2, [sp, #36] @ r2 = U ptr | ||
145 | |||
146 | ldr r6, [sp, #12] @ get height counter | ||
147 | |||
148 | add r2, r2, r1 @ update U ptr | ||
149 | str r2, [sp, #36] @ store updated U ptr (and update 'V' at the same time :-) ) | ||
150 | |||
151 | subs r6, r6, #2 | ||
152 | str r6, [sp, #12] | ||
153 | bne yuv_loop | ||
154 | |||
155 | @ Exit cleanly :-) | ||
156 | add sp, sp, #(11*4) @ remove all custom things from stack | ||
157 | ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return | ||
158 | |||
159 | |||
160 | const_storage: | ||
161 | @ In order : crv, cbu, - cgu, - cgv, multy | ||
162 | .word 0x00019895, 0x00020469, 0xffff9bb5, 0xffff2fe1, 0x00012A15 | ||
163 | rb_clip_dummy: | ||
164 | .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | ||
165 | .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | ||
166 | .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | ||
167 | rb_clip: | ||
168 | .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f | ||
169 | .byte 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f | ||
170 | .byte 0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f | ||
171 | .byte 0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f | ||
172 | .byte 0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f | ||
173 | g_clip_dummy: | ||
174 | .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | ||
175 | .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | ||
176 | .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | ||
177 | .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | ||
178 | .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | ||
179 | .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | ||
180 | g_clip: | ||
181 | .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f | ||
182 | .byte 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f | ||
183 | .byte 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f | ||
184 | .byte 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f | ||
185 | .byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f | ||
186 | .byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f | ||
187 | .byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f | ||
188 | .byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f | ||
189 | .byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f | ||
190 | .byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f | ||
191 | |||
192 | #endif | ||