summaryrefslogtreecommitdiff
path: root/noncore/multimedia/opieplayer2/yuv2rgb_arm4l.S
blob: 521bcaacaed8641a17cd2fa9ba9a252820a4ab48 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
/*
Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org)

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

/* WARNING : this function only works when stride_U == stride_V (I use some hacks to
	     not have to do too many computations at line's end)...

   C-like prototype :
	void convert_yuv420_rgb565(AVPicture *picture, unsigned char *results, int w, int h) ;

*/

#ifdef __arm__

	.text
	.align
	
	.global convert_yuv420_rgb565
convert_yuv420_rgb565:
	stmdb   sp!, { r4 - r12, lr }   @ all callee saved regs
	ldr r7,  [r0,  #0]       @ Y ptr
	ldr r9,  [r0,  #4]       @ U ptr
	ldr r10, [r0,  #8]       @ V ptr
	subs r10, r10, r9        @ V ptr - U ptr
	ldr r8,  [r0, #12]
	add r8, r8, r7           @ Y + stride_Y
	ldr r4,  [r0, #12]       @ Stride_Y
	mov r4, r4, lsl #1
	sub r4, r4, r2           @ (2 * Stride_Y) - width
	ldr r5,  [r0, #16]       @ Stride_U
	sub r5, r5, r2, lsr #1   @ Stride_U - (width / 2)
	ldr r6,  [r0, #20]       @ Stride_V
	sub r6, r6, r2, lsr #1   @ Stride_V - (width / 2)
	add r0, r1, r2, lsl #1   @ RGB + 1
	stmdb   sp!, { r0-r10 }
	@ Stack description :
	@ (sp+ 0) RGB + one line
	@ (sp+ 4) RGB
	@ (sp+ 8) width (save)
	@ (sp+12) height
	@ (sp+16) (2 * stride_Y) - width
	@ (sp+20) stride_U - (width / 2)
	@ (sp+24) stride_V - (width / 2) !!! UNUSED !!!
	@ (sp+28) Y ptr
	@ (sp+32) Y ptr + one line
	@ (sp+36) U ptr
	@ (sp+40) V - U
	mov lr, r2                         @ Initialize the width counter
	add r0, pc, #(const_storage-.-8)   @ r0 = base pointer to the constants array
	ldr r8, [r0, #(4*4)]               @ r8 = multy	
yuv_loop:
	add r0, pc, #(const_storage-.-8)   @ r0 = base pointer to the constants array
	ldr r10, [sp, #28]                 @ r10 = Y
	ldr r1, [sp, #36]                  @ r1 = U
	ldrb r9, [r10, #0]                 @ r9 = *Y
	ldrb r11, [r1]                     @ r11 = *U
	add r1, r1, #1                     @ r1 = U++
	ldr r2, [sp, #40]                  @ r2 = V - U
	str r1, [sp, #36]                  @ store U++
	add r2, r1, r2                     @ r2 = V+1
	ldrb r12, [r2, #-1]                @ r12 = *V
	sub r11, r11, #128                 @ r11 = *U - 128
	sub r12, r12, #128                 @ r12 = *V - 128
	ldr r1, [r0, #(4*0)]               @ r1 = crv
	mov r7, #32768                     @ r7 = 32768 (for additions in MLA)
	ldr r2, [r0, #(4*3)]               @ r2 = -cgv
	mla r6, r1, r12, r7                @ r6 = nonyc_r = crv * (*V - 128) + 32768
	ldr r3, [r0, #(4*1)]               @ r3 = cbu
	mla r4, r2, r12, r7                @ r4 = - cgv * (*V - 128) + 32768
	sub r9, r9, #16                    @ r9 = *Y - 16
	mla r5, r3, r11, r7                @ r5 = nonyc_b = cbu * (*U - 128) + 32768	
	ldr r0, [r0, #(4*2)]               @ r0 = -cgu
	mla r7, r8, r9, r6                 @ r7 = (*Y - 16) * multy + nonyc_r
	add r10, r10, #2                   @ r10 = Y + 2
	mla r4, r0, r11, r4                @ r4 = nonyc_g = - cgu * (*U - 128) + r4 = - cgu * (*U - 128) - cgv * (*V - 128) + 32768
	add r0, pc, #(rb_clip-.-8)         @ r0 contains the pointer to the R and B clipping array
	mla r12, r8, r9, r5                @ r12 = (*Y - 16) * multy + nonyc_b
	ldrb r7, [r0, r7, asr #(16+3)]     @ r7 = R composant
	mla r1, r8, r9, r4                 @ r1 = (*Y - 16) * multy + nonyc_g
	ldrb r9, [r10, #-1]                @ r9 = *(Y+1)
	str r10, [sp, #28]                 @ save Y + 2
	ldrb r12, [r0, r12, asr #(16+3)]   @ r12 = B composant (and the start of the RGB word)
	add r11, pc, #(g_clip-.-8)         @ r11 now contains the pointer to the G clipping array
	ldrb r1, [r11, r1, asr #(16+2)]    @ r1 contains the G part of the RGB triplet
	sub r9, r9, #16                    @ r9 = *(Y+1) - 16
	mla r10, r8, r9, r6                @ r10 is the Red part of the RGB triplet
	add r12, r12, r7, lsl #11          @ r12 = .GB ...
	mla r7, r8, r9, r5                 @ r7 is the Blue part of the RGB triplet
	add r12, r12, r1, lsl #5           @ r12 = RGB ... (ie the first pixel (half-word) is done)
	mla r2, r8, r9, r4                 @ r2 is the Green part of the RGB triplet
	ldrb r10, [r0, r10, asr #(16+3)]   @ r10 = R composant
	ldrb r7, [r0, r7, asr #(16+3)]     @ r7 = B composant
	ldr r1, [sp, #32]                  @ r1 = Ynext
	ldrb r2, [r11, r2, asr #(16+2)]    @ r2 = G composant
	ldrb r9, [r1]                      @ r9 = *Ynext
	add r12, r12, r2, lsl #(5+16)      @ r12 = RGB .G.
	sub r9, r9, #16                    @ r9 = *Ynext - 16
	mla r2, r8, r9, r4                 @ r2 is the Green part of the RGB triplet
	add r12, r12, r7, lsl #(0+16)      @ r12 = RGB .GB
	mla r7, r8, r9, r5                 @ r7 is the Blue part of the RGB triplet
	add r12, r12, r10, lsl #(11+16)    @ r12 = RGB RGB
	ldr r3, [sp, #4]                   @ r3 = RGB
	mla r10, r8, r9, r6                @ r10 is the Red part of the RGB triplet
	str r12, [r3]                      @ store the rgb pixel at *RGB
	add r3, r3, #4                     @ r3 = RGB++ (ie next double-pixel)
	str r3, [sp, #4]                   @ store the RGB pointer
	ldrb r9, [r1, #1]                  @ r9 = *(Ynext+1)
	add r1, r1, #2                     @ r1 = Ynext + 2
	sub r9, r9, #16                    @ r9 = *(Ynext+1) - 16
	ldrb r12, [r0, r7, asr #(16+3)]    @ r12 = ..B ...
	ldrb r10, [r0, r10, asr #(16+3)]   @ r10 = B composant
	mla r7, r8, r9, r5                 @ r7 is the Blue part of the RGB triplet
	add r12, r12, r10, lsl #11         @ r12 = R.B ...
	ldrb r2, [r11, r2, asr #(16+2)]    @ r2 = G composant
	mla r10, r8, r9, r6                @ r10 is the Red part of the RGB triplet
	add r12, r12, r2, lsl #5           @ r12 = RGB ...
	mla r2, r8, r9, r4                 @ r2 is the Green part of the RGB triplet
	ldrb r7, [r0, r7, asr #(16+3)]     @ r7 = B composant
	str r1, [sp, #32]                  @ store the increased Ynext pointer
	add r12, r12, r7, lsl #(16+0)      @ r12 = RGB ..B
	ldrb r10, [r0, r10, asr #(16+3)]   @ r10 = R composant
	ldr r3, [sp, #0]                   @ r3 = RGBnext pointer
	add r12, r12, r10, lsl #(16+11)    @ r12 = RGB R.B
	ldrb r2, [r11, r2, asr #(16+2)]    @ r2 = G composant
	add r3, r3, #4                     @ r3 = next pixel on the RGBnext line
	add r12, r12, r2, lsl #(16+5)      @ r12 = RGB RGB
	str r12, [r3, #-4]                 @ store the next pixel
	str r3, [sp, #0]                   @ store the increased 'next line' pixel pointer
	subs lr, lr, #2                    @ decrement the line counter
	bne yuv_loop                       @ and restart if not at the end of the line

	ldr r0, [sp, #8]                   @ r0 = saved width
	ldr r1, [sp, #0]                   @ r1 = RGBnext pointer
	mov lr, r0                         @ lr = saved width (to restart the line counter)
	str r1, [sp, #4]                   @ current RGBnext pointer is next iteration RGB pointer
	add r1, r1, r0, lsl #1             @ r1 = update RGBnext to next line
	str r1, [sp, #0]                   @ store updated RGBnext pointer

	ldr r3, [sp, #16]                  @ r3 = (2 * stride_Y) - width
	ldr r4, [sp, #28]                  @ r4 = Y ptr
	ldr r5, [sp, #32]                  @ r5 = Ynext ptr
	add r4, r4, r3                     @ r4 = Y ptr for the next two lines
	add r5, r5, r3                     @ r5 = Ynext ptr for the next two lines
	str r4, [sp, #28]                  @ store updated Y pointer
	str r5, [sp, #32]                  @ store update Ynext pointer

	ldr r1, [sp, #20]                  @ r1 = stride_U - (width / 2)
	ldr r2, [sp, #36]                  @ r2 = U ptr

	ldr r6, [sp, #12]                  @ get height counter
	
	add r2, r2, r1                     @ update U ptr
	str r2, [sp, #36]                  @ store updated U ptr (and update 'V' at the same time :-) )

	subs r6, r6, #2
	str r6, [sp, #12]
	bne yuv_loop
	
	@ Exit cleanly :-)
	add sp, sp, #(11*4)             @ remove all custom things from stack
	ldmia   sp!, { r4 - r12, pc }   @ restore callee saved regs and return


const_storage:
	@ In order : crv, cbu, - cgu, - cgv, multy
	.word 0x00019895, 0x00020469, 0xffff9bb5, 0xffff2fe1, 0x00012A15
rb_clip_dummy:	
        .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
        .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
        .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
rb_clip:
        .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
        .byte 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f
        .byte 0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f
        .byte 0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f
        .byte 0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f
g_clip_dummy:	
        .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
        .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
        .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
        .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
        .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
        .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
g_clip:	
        .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
        .byte 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f
        .byte 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f
        .byte 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f
        .byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f
        .byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f
        .byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f
        .byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f
        .byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f
        .byte 0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f

#endif