1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
|
/*
Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/* WARNING : this function only works when stride_U == stride_V (I use some hacks to
not have to do too many computations at line's end)...
C-like prototype :
void convert_yuv420_rgb565(AVPicture *picture, unsigned char *results, int w, int h) ;
*/
#ifdef __arm__
.text
.align
.global convert_yuv420_rgb565
convert_yuv420_rgb565:
stmdb sp!, { r4 - r12, lr } @ all callee saved regs
ldr r7, [r0, #0] @ Y ptr
ldr r9, [r0, #4] @ U ptr
ldr r10, [r0, #8] @ V ptr
subs r10, r10, r9 @ V ptr - U ptr
ldr r8, [r0, #12]
add r8, r8, r7 @ Y + stride_Y
ldr r4, [r0, #12] @ Stride_Y
mov r4, r4, lsl #1
sub r4, r4, r2 @ (2 * Stride_Y) - width
ldr r5, [r0, #16] @ Stride_U
sub r5, r5, r2, lsr #1 @ Stride_U - (width / 2)
ldr r6, [r0, #20] @ Stride_V
sub r6, r6, r2, lsr #1 @ Stride_V - (width / 2)
add r0, r1, r2, lsl #1 @ RGB + 1
stmdb sp!, { r0-r10 }
@ Stack description :
@ (sp+ 0) RGB + one line
@ (sp+ 4) RGB
@ (sp+ 8) width (save)
@ (sp+12) height
@ (sp+16) (2 * stride_Y) - width
@ (sp+20) stride_U - (width / 2)
@ (sp+24) stride_V - (width / 2) !!! UNUSED !!!
@ (sp+28) Y ptr
@ (sp+32) Y ptr + one line
@ (sp+36) U ptr
@ (sp+40) V - U
mov lr, r2 @ Initialize the width counter
add r0, pc, #(const_storage-.-8) @ r0 = base pointer to the constants array
ldr r8, [r0, #(4*4)] @ r8 = multy
yuv_loop:
add r0, pc, #(const_storage-.-8) @ r0 = base pointer to the constants array
ldr r10, [sp, #28] @ r10 = Y
ldr r1, [sp, #36] @ r1 = U
ldrb r9, [r10, #0] @ r9 = *Y
ldrb r11, [r1] @ r11 = *U
add r1, r1, #1 @ r1 = U++
ldr r2, [sp, #40] @ r2 = V - U
str r1, [sp, #36] @ store U++
add r2, r1, r2 @ r2 = V+1
ldrb r12, [r2, #-1] @ r12 = *V
sub r11, r11, #128 @ r11 = *U - 128
sub r12, r12, #128 @ r12 = *V - 128
ldr r1, [r0, #(4*0)] @ r1 = crv
mov r7, #32768 @ r7 = 32768 (for additions in MLA)
ldr r2, [r0, #(4*3)] @ r2 = -cgv
mla r6, r1, r12, r7 @ r6 = nonyc_r = crv * (*V - 128) + 32768
ldr r3, [r0, #(4*1)] @ r3 = cbu
mla r4, r2, r12, r7 @ r4 = - cgv * (*V - 128) + 32768
sub r9, r9, #16 @ r9 = *Y - 16
mla r5, r3, r11, r7 @ r5 = nonyc_b = cbu * (*U - 128) + 32768
ldr r0, [r0, #(4*2)] @ r0 = -cgu
mla r7, r8, r9, r6 @ r7 = (*Y - 16) * multy + nonyc_r
add r10, r10, #2 @ r10 = Y + 2
mla r4, r0, r11, r4 @ r4 = nonyc_g = - cgu * (*U - 128) + r4 = - cgu * (*U - 128) - cgv * (*V - 128) + 32768
add r0, pc, #(rb_clip-.-8) @ r0 contains the pointer to the R and B clipping array
mla r12, r8, r9, r5 @ r12 = (*Y - 16) * multy + nonyc_b
ldrb r7, [r0, r7, asr #(16+3)] @ r7 = R composant
mla r1, r8, r9, r4 @ r1 = (*Y - 16) * multy + nonyc_g
ldrb r9, [r10, #-1] @ r9 = *(Y+1)
str r10, [sp, #28] @ save Y + 2
ldrb r12, [r0, r12, asr #(16+3)] @ r12 = B composant (and the start of the RGB word)
add r11, pc, #(g_clip-.-8) @ r11 now contains the pointer to the G clipping array
ldrb r1, [r11, r1, asr #(16+2)] @ r1 contains the G part of the RGB triplet
sub r9, r9, #16 @ r9 = *(Y+1) - 16
mla r10, r8, r9, r6 @ r10 is the Red part of the RGB triplet
add r12, r12, r7, lsl #11 @ r12 = .GB ...
mla r7, r8, r9, r5 @ r7 is the Blue part of the RGB triplet
add r12, r12, r1, lsl #5 @ r12 = RGB ... (ie the first pixel (half-word) is done)
mla r2, r8, r9, r4 @ r2 is the Green part of the RGB triplet
ldrb r10, [r0, r10, asr #(16+3)] @ r10 = R composant
ldrb r7, [r0, r7, asr #(16+3)] @ r7 = B composant
ldr r1, [sp, #32] @ r1 = Ynext
ldrb r2, [r11, r2, asr #(16+2)] @ r2 = G composant
ldrb r9, [r1] @ r9 = *Ynext
add r12, r12, r2, lsl #(5+16) @ r12 = RGB .G.
sub r9, r9, #16 @ r9 = *Ynext - 16
mla r2, r8, r9, r4 @ r2 is the Green part of the RGB triplet
add r12, r12, r7, lsl #(0+16) @ r12 = RGB .GB
mla r7, r8, r9, r5 @ r7 is the Blue part of the RGB triplet
add r12, r12, r10, lsl #(11+16) @ r12 = RGB RGB
ldr r3, [sp, #4] @ r3 = RGB
mla r10, r8, r9, r6 @ r10 is the Red part of the RGB triplet
str r12, [r3] @ store the rgb pixel at *RGB
add r3, r3, #4 @ r3 = RGB++ (ie next double-pixel)
str r3, [sp, #4] @ store the RGB pointer
ldrb r9, [r1, #1] @ r9 = *(Ynext+1)
add r1, r1, #2 @ r1 = Ynext + 2
sub r9, r9, #16 @ r9 = *(Ynext+1) - 16
ldrb r12, [r0, r7, asr #(16+3)] @ r12 = ..B ...
ldrb r10, [r0, r10, asr #(16+3)] @ r10 = B composant
mla r7, r8, r9, r5 @ r7 is the Blue part of the RGB triplet
add r12, r12, r10, lsl #11 @ r12 = R.B ...
ldrb r2, [r11, r2, asr #(16+2)] @ r2 = G composant
mla r10, r8, r9, r6 @ r10 is the Red part of the RGB triplet
add r12, r12, r2, lsl #5 @ r12 = RGB ...
mla r2, r8, r9, r4 @ r2 is the Green part of the RGB triplet
ldrb r7, [r0, r7, asr #(16+3)] @ r7 = B composant
str r1, [sp, #32] @ store the increased Ynext pointer
add r12, r12, r7, lsl #(16+0) @ r12 = RGB ..B
ldrb r10, [r0, r10, asr #(16+3)] @ r10 = R composant
ldr r3, [sp, #0] @ r3 = RGBnext pointer
add r12, r12, r10, lsl #(16+11) @ r12 = RGB R.B
ldrb r2, [r11, r2, asr #(16+2)] @ r2 = G composant
add r3, r3, #4 @ r3 = next pixel on the RGBnext line
add r12, r12, r2, lsl #(16+5) @ r12 = RGB RGB
str r12, [r3, #-4] @ store the next pixel
str r3, [sp, #0] @ store the increased 'next line' pixel pointer
subs lr, lr, #2 @ decrement the line counter
bne yuv_loop @ and restart if not at the end of the line
ldr r0, [sp, #8] @ r0 = saved width
ldr r1, [sp, #0] @ r1 = RGBnext pointer
mov lr, r0 @ lr = saved width (to restart the line counter)
str r1, [sp, #4] @ current RGBnext pointer is next iteration RGB pointer
add r1, r1, r0, lsl #1 @ r1 = update RGBnext to next line
str r1, [sp, #0] @ store updated RGBnext pointer
ldr r3, [sp, #16] @ r3 = (2 * stride_Y) - width
ldr r4, [sp, #28] @ r4 = Y ptr
ldr r5, [sp, #32] @ r5 = Ynext ptr
add r4, r4, r3 @ r4 = Y ptr for the next two lines
add r5, r5, r3 @ r5 = Ynext ptr for the next two lines
str r4, [sp, #28] @ store updated Y pointer
str r5, [sp, #32] @ store update Ynext pointer
ldr r1, [sp, #20] @ r1 = stride_U - (width / 2)
ldr r2, [sp, #36] @ r2 = U ptr
ldr r6, [sp, #12] @ get height counter
add r2, r2, r1 @ update U ptr
str r2, [sp, #36] @ store updated U ptr (and update 'V' at the same time :-) )
subs r6, r6, #2
str r6, [sp, #12]
bne yuv_loop
@ Exit cleanly :-)
add sp, sp, #(11*4) @ remove all custom things from stack
ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return
const_storage:
@ In order : crv, cbu, - cgu, - cgv, multy
.word 0x00019895, 0x00020469, 0xffff9bb5, 0xffff2fe1, 0x00012A15
rb_clip_dummy:
.byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
rb_clip:
.byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
.byte 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f
.byte 0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f
.byte 0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f
.byte 0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f
g_clip_dummy:
.byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
g_clip:
.byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
.byte 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f
.byte 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f
.byte 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f
.byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f
.byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f
.byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f
.byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f
.byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f
.byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f
#endif
|