summaryrefslogtreecommitdiff
Unidiff
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--core/multimedia/opieplayer/libmpeg3/video/output.c217
1 files changed, 107 insertions, 110 deletions
diff --git a/core/multimedia/opieplayer/libmpeg3/video/output.c b/core/multimedia/opieplayer/libmpeg3/video/output.c
index 919a0ff..bf0d6ed 100644
--- a/core/multimedia/opieplayer/libmpeg3/video/output.c
+++ b/core/multimedia/opieplayer/libmpeg3/video/output.c
@@ -1,993 +1,990 @@
1#include "../libmpeg3.h" 1#include "../libmpeg3.h"
2#include "../mpeg3protos.h" 2#include "../mpeg3protos.h"
3#include "mpeg3video.h" 3#include "mpeg3video.h"
4#include <string.h> 4#include <string.h>
5 5
6static LONGLONG mpeg3_MMX_0 = 0L; 6static LONGLONG mpeg3_MMX_0 = 0L;
7static unsigned long mpeg3_MMX_10w[] = {0x00100010, 0x00100010}; /*dd 00010 0010h, 000100010h */ 7static unsigned long mpeg3_MMX_10w[] = {0x00100010, 0x00100010}; /*dd 00010 0010h, 000100010h */
8static unsigned long mpeg3_MMX_80w[] = {0x00800080, 0x00800080}; /*dd 00080 0080h, 000800080h */ 8static unsigned long mpeg3_MMX_80w[] = {0x00800080, 0x00800080}; /*dd 00080 0080h, 000800080h */
9 9
10static unsigned long mpeg3_MMX_00FFw[] = {0x00ff00ff, 0x00ff00ff}; /*dd 000FF 00FFh, 000FF00FFh */ 10static unsigned long mpeg3_MMX_00FFw[] = {0x00ff00ff, 0x00ff00ff}; /*dd 000FF 00FFh, 000FF00FFh */
11 11
12static unsigned short mpeg3_MMX_Ublucoeff[] = {0x81, 0x81, 0x81, 0x81}; /*dd 00081 0081h, 000810081h */ 12static unsigned short mpeg3_MMX_Ublucoeff[] = {0x81, 0x81, 0x81, 0x81}; /*dd 00081 0081h, 000810081h */
13static unsigned short mpeg3_MMX_Vredcoeff[] = {0x66, 0x66, 0x66, 0x66}; /*dd 00066 0066h, 000660066h */ 13static unsigned short mpeg3_MMX_Vredcoeff[] = {0x66, 0x66, 0x66, 0x66}; /*dd 00066 0066h, 000660066h */
14 14
15static unsigned short mpeg3_MMX_Ugrncoeff[] = {0xffe8, 0xffe8, 0xffe8, 0xffe8}; /*dd 0FFE7 FFE7h, 0FFE7FFE7h */ 15static unsigned short mpeg3_MMX_Ugrncoeff[] = {0xffe8, 0xffe8, 0xffe8, 0xffe8}; /*dd 0FFE7 FFE7h, 0FFE7FFE7h */
16static unsigned short mpeg3_MMX_Vgrncoeff[] = {0xffcd, 0xffcd, 0xffcd, 0xffcd}; /*dd 0FFCC FFCCh, 0FFCCFFCCh */ 16static unsigned short mpeg3_MMX_Vgrncoeff[] = {0xffcd, 0xffcd, 0xffcd, 0xffcd}; /*dd 0FFCC FFCCh, 0FFCCFFCCh */
17 17
18static unsigned short mpeg3_MMX_Ycoeff[] = {0x4a, 0x4a, 0x4a, 0x4a}; /*dd 0004A 004Ah, 0004A004Ah */ 18static unsigned short mpeg3_MMX_Ycoeff[] = {0x4a, 0x4a, 0x4a, 0x4a}; /*dd 0004A 004Ah, 0004A004Ah */
19 19
20static unsigned short mpeg3_MMX_redmask[] = {0xf800, 0xf800, 0xf800, 0xf800}; /*dd 07c00 7c00h, 07c007c00h */ 20static unsigned short mpeg3_MMX_redmask[] = {0xf800, 0xf800, 0xf800, 0xf800}; /*dd 07c00 7c00h, 07c007c00h */
21 21
22static unsigned short mpeg3_MMX_grnmask[] = {0x7e0, 0x7e0, 0x7e0, 0x7e0}; /*dd 003e0 03e0h, 003e003e0h */ 22static unsigned short mpeg3_MMX_grnmask[] = {0x7e0, 0x7e0, 0x7e0, 0x7e0}; /*dd 003e0 03e0h, 003e003e0h */
23 23
24static unsigned char mpeg3_601_to_rgb[256]; 24static unsigned char mpeg3_601_to_rgb[256];
25 25
26/* Algorithm */ 26/* Algorithm */
27 /* r = (int)(*y + 1.371 * (*cr - 128)); */ 27 /* r = (int)(*y + 1.371 * (*cr - 128)); */
28 /* g = (int)(*y - 0.698 * (*cr - 128) - 0.336 * (*cb - 128)); */ 28 /* g = (int)(*y - 0.698 * (*cr - 128) - 0.336 * (*cb - 128)); */
29 /* b = (int)(*y + 1.732 * (*cb - 128)); */ 29 /* b = (int)(*y + 1.732 * (*cb - 128)); */
30 30
31#ifdef HAVE_MMX 31#ifdef HAVE_MMX
32inline void mpeg3video_rgb16_mmx(unsigned char *lum, 32inline void mpeg3video_rgb16_mmx(unsigned char *lum,
33 unsigned char *cr, 33 unsigned char *cr,
34 unsigned char *cb, 34 unsigned char *cb,
35 unsigned char *out, 35 unsigned char *out,
36 int rows, 36 int rows,
37 int cols, 37 int cols,
38 int mod) 38 int mod)
39{ 39{
40 unsigned short *row1; 40 unsigned short *row1;
41 int x; 41 int x;
42 unsigned char *y; 42 unsigned char *y;
43 int col1; 43 int col1;
44 44
45 row1 = (unsigned short *)out; 45 row1 = (unsigned short *)out;
46 col1 = cols + mod; 46 col1 = cols + mod;
47 mod += cols + mod; 47 mod += cols + mod;
48 mod *= 2; 48 mod *= 2;
49 y = lum + cols * rows; 49 y = lum + cols * rows;
50 x = 0; 50 x = 0;
51 51
52 __asm__ __volatile__( 52 __asm__ __volatile__(
53 ".align 8\n" 53 ".align 8\n"
54 "1:\n" 54 "1:\n"
55 "movd (%1), %%mm0\n" /* 4 Cb 0 0 0 0 u3 u2 u1 u0 */ 55 "movd (%1), %%mm0\n" /* 4 Cb 0 0 0 0 u3 u2 u1 u0 */
56 "pxor %%mm7, %%mm7\n" 56 "pxor %%mm7, %%mm7\n"
57 "movd (%0), %%mm1\n" /* 4 Cr 0 0 0 0 v3 v2 v1 v0 */ 57 "movd (%0), %%mm1\n" /* 4 Cr 0 0 0 0 v3 v2 v1 v0 */
58 "punpcklbw %%mm7, %%mm0\n" /* 4 W cb 0 u3 0 u2 0 u1 0 u0 */ 58 "punpcklbw %%mm7, %%mm0\n" /* 4 W cb 0 u3 0 u2 0 u1 0 u0 */
59 "punpcklbw %%mm7, %%mm1\n" /* 4 W cr 0 v3 0 v2 0 v1 0 v0 */ 59 "punpcklbw %%mm7, %%mm1\n" /* 4 W cr 0 v3 0 v2 0 v1 0 v0 */
60 60
61 "psubw mpeg3_MMX_80w, %%mm0\n" 61 "psubw mpeg3_MMX_80w, %%mm0\n"
62 "psubw mpeg3_MMX_80w, %%mm1\n" 62 "psubw mpeg3_MMX_80w, %%mm1\n"
63 "movq %%mm0, %%mm2\n" /* Cb 0 u3 0 u2 0 u1 0 u0 */ 63 "movq %%mm0, %%mm2\n" /* Cb 0 u3 0 u2 0 u1 0 u0 */
64 "movq %%mm1, %%mm3\n" /* Cr */ 64 "movq %%mm1, %%mm3\n" /* Cr */
65 "pmullw mpeg3_MMX_Ugrncoeff, %%mm2\n" /* Cb2green 0 R3 0 R2 0 R1 0 R0 */ 65 "pmullw mpeg3_MMX_Ugrncoeff, %%mm2\n" /* Cb2green 0 R3 0 R2 0 R1 0 R0 */
66 "movq (%2), %%mm6\n" /* L1 l7 L6 L5 L4 L3 L2 L1 L0 */ 66 "movq (%2), %%mm6\n" /* L1 l7 L6 L5 L4 L3 L2 L1 L0 */
67 "pmullw mpeg3_MMX_Ublucoeff, %%mm0\n" /* Cb2blue */ 67 "pmullw mpeg3_MMX_Ublucoeff, %%mm0\n" /* Cb2blue */
68 "pand mpeg3_MMX_00FFw, %%mm6\n" /* L1 00 L6 00 L4 00 L2 00 L0 */ 68 "pand mpeg3_MMX_00FFw, %%mm6\n" /* L1 00 L6 00 L4 00 L2 00 L0 */
69 "pmullw mpeg3_MMX_Vgrncoeff, %%mm3\n" /* Cr2green */ 69 "pmullw mpeg3_MMX_Vgrncoeff, %%mm3\n" /* Cr2green */
70 "movq (%2), %%mm7\n" /* L2 */ 70 "movq (%2), %%mm7\n" /* L2 */
71 "pmullw mpeg3_MMX_Vredcoeff, %%mm1\n" /* Cr2red */ 71 "pmullw mpeg3_MMX_Vredcoeff, %%mm1\n" /* Cr2red */
72 "psrlw $8, %%mm7\n" /* L2 00 L7 00 L5 00 L3 00 L1 */ 72 "psrlw $8, %%mm7\n" /* L2 00 L7 00 L5 00 L3 00 L1 */
73 "pmullw mpeg3_MMX_Ycoeff, %%mm6\n" /* lum1 */ 73 "pmullw mpeg3_MMX_Ycoeff, %%mm6\n" /* lum1 */
74 "paddw %%mm3, %%mm2\n" /* Cb2green + Cr2green == green */ 74 "paddw %%mm3, %%mm2\n" /* Cb2green + Cr2green == green */
75 "pmullw mpeg3_MMX_Ycoeff, %%mm7\n" /* lum2 */ 75 "pmullw mpeg3_MMX_Ycoeff, %%mm7\n" /* lum2 */
76 76
77 "movq %%mm6, %%mm4\n" /* lum1 */ 77 "movq %%mm6, %%mm4\n" /* lum1 */
78 "paddw %%mm0, %%mm6\n" /* lum1 +blue 00 B6 00 B4 00 B2 00 B0 */ 78 "paddw %%mm0, %%mm6\n" /* lum1 +blue 00 B6 00 B4 00 B2 00 B0 */
79 "movq %%mm4, %%mm5\n" /* lum1 */ 79 "movq %%mm4, %%mm5\n" /* lum1 */
80 "paddw %%mm1, %%mm4\n" /* lum1 +red 00 R6 00 R4 00 R2 00 R0 */ 80 "paddw %%mm1, %%mm4\n" /* lum1 +red 00 R6 00 R4 00 R2 00 R0 */
81 "paddw %%mm2, %%mm5\n" /* lum1 +green 00 G6 00 G4 00 G2 00 G0 */ 81 "paddw %%mm2, %%mm5\n" /* lum1 +green 00 G6 00 G4 00 G2 00 G0 */
82 "psraw $6, %%mm4\n" /* R1 0 .. 64 */ 82 "psraw $6, %%mm4\n" /* R1 0 .. 64 */
83 "movq %%mm7, %%mm3\n" /* lum2 00 L7 00 L5 00 L3 00 L1 */ 83 "movq %%mm7, %%mm3\n" /* lum2 00 L7 00 L5 00 L3 00 L1 */
84 "psraw $6, %%mm5\n" /* G1 - .. + */ 84 "psraw $6, %%mm5\n" /* G1 - .. + */
85 "paddw %%mm0, %%mm7\n" /* Lum2 +blue 00 B7 00 B5 00 B3 00 B1 */ 85 "paddw %%mm0, %%mm7\n" /* Lum2 +blue 00 B7 00 B5 00 B3 00 B1 */
86 "psraw $6, %%mm6\n" /* B1 0 .. 64 */ 86 "psraw $6, %%mm6\n" /* B1 0 .. 64 */
87 "packuswb %%mm4, %%mm4\n" /* R1 R1 */ 87 "packuswb %%mm4, %%mm4\n" /* R1 R1 */
88 "packuswb %%mm5, %%mm5\n" /* G1 G1 */ 88 "packuswb %%mm5, %%mm5\n" /* G1 G1 */
89 "packuswb %%mm6, %%mm6\n" /* B1 B1 */ 89 "packuswb %%mm6, %%mm6\n" /* B1 B1 */
90 "punpcklbw %%mm4, %%mm4\n" 90 "punpcklbw %%mm4, %%mm4\n"
91 "punpcklbw %%mm5, %%mm5\n" 91 "punpcklbw %%mm5, %%mm5\n"
92 92
93 "pand mpeg3_MMX_redmask, %%mm4\n" 93 "pand mpeg3_MMX_redmask, %%mm4\n"
94 "psllw $3, %%mm5\n" /* GREEN 1 */ 94 "psllw $3, %%mm5\n" /* GREEN 1 */
95 "punpcklbw %%mm6, %%mm6\n" 95 "punpcklbw %%mm6, %%mm6\n"
96 "pand mpeg3_MMX_grnmask, %%mm5\n" 96 "pand mpeg3_MMX_grnmask, %%mm5\n"
97 "pand mpeg3_MMX_redmask, %%mm6\n" 97 "pand mpeg3_MMX_redmask, %%mm6\n"
98 "por %%mm5, %%mm4\n" /* */ 98 "por %%mm5, %%mm4\n" /* */
99 "psrlw $11, %%mm6\n" /* BLUE 1 */ 99 "psrlw $11, %%mm6\n" /* BLUE 1 */
100 "movq %%mm3, %%mm5\n" /* lum2 */ 100 "movq %%mm3, %%mm5\n" /* lum2 */
101 "paddw %%mm1, %%mm3\n" /* lum2 +red 00 R7 00 R5 00 R3 00 R1 */ 101 "paddw %%mm1, %%mm3\n" /* lum2 +red 00 R7 00 R5 00 R3 00 R1 */
102 "paddw %%mm2, %%mm5\n" /* lum2 +green 00 G7 00 G5 00 G3 00 G1 */ 102 "paddw %%mm2, %%mm5\n" /* lum2 +green 00 G7 00 G5 00 G3 00 G1 */
103 "psraw $6, %%mm3\n" /* R2 */ 103 "psraw $6, %%mm3\n" /* R2 */
104 "por %%mm6, %%mm4\n" /* MM4 */ 104 "por %%mm6, %%mm4\n" /* MM4 */
105 "psraw $6, %%mm5\n" /* G2 */ 105 "psraw $6, %%mm5\n" /* G2 */
106 "movq (%2, %3), %%mm6\n" /* L3 */ 106 "movq (%2, %3), %%mm6\n" /* L3 */
107 "psraw $6, %%mm7\n" 107 "psraw $6, %%mm7\n"
108 "packuswb %%mm3, %%mm3\n" 108 "packuswb %%mm3, %%mm3\n"
109 "packuswb %%mm5, %%mm5\n" 109 "packuswb %%mm5, %%mm5\n"
110 "packuswb %%mm7, %%mm7\n" 110 "packuswb %%mm7, %%mm7\n"
111 "pand mpeg3_MMX_00FFw, %%mm6\n" /* L3 */ 111 "pand mpeg3_MMX_00FFw, %%mm6\n" /* L3 */
112 "punpcklbw %%mm3, %%mm3\n" 112 "punpcklbw %%mm3, %%mm3\n"
113 "punpcklbw %%mm5, %%mm5\n" 113 "punpcklbw %%mm5, %%mm5\n"
114 "pmullw mpeg3_MMX_Ycoeff, %%mm6\n" /* lum3 */ 114 "pmullw mpeg3_MMX_Ycoeff, %%mm6\n" /* lum3 */
115 "punpcklbw %%mm7, %%mm7\n" 115 "punpcklbw %%mm7, %%mm7\n"
116 "psllw $3, %%mm5\n" /* GREEN 2 */ 116 "psllw $3, %%mm5\n" /* GREEN 2 */
117 "pand mpeg3_MMX_redmask, %%mm7\n" 117 "pand mpeg3_MMX_redmask, %%mm7\n"
118 "pand mpeg3_MMX_redmask, %%mm3\n" 118 "pand mpeg3_MMX_redmask, %%mm3\n"
119 "psrlw $11, %%mm7\n" /* BLUE 2 */ 119 "psrlw $11, %%mm7\n" /* BLUE 2 */
120 "pand mpeg3_MMX_grnmask, %%mm5\n" 120 "pand mpeg3_MMX_grnmask, %%mm5\n"
121 "por %%mm7, %%mm3\n" 121 "por %%mm7, %%mm3\n"
122 "movq (%2,%3), %%mm7\n" /* L4 */ 122 "movq (%2,%3), %%mm7\n" /* L4 */
123 "por %%mm5, %%mm3\n" /* */ 123 "por %%mm5, %%mm3\n" /* */
124 "psrlw $8, %%mm7\n" /* L4 */ 124 "psrlw $8, %%mm7\n" /* L4 */
125 "movq %%mm4, %%mm5\n" 125 "movq %%mm4, %%mm5\n"
126 "punpcklwd %%mm3, %%mm4\n" 126 "punpcklwd %%mm3, %%mm4\n"
127 "pmullw mpeg3_MMX_Ycoeff, %%mm7\n" /* lum4 */ 127 "pmullw mpeg3_MMX_Ycoeff, %%mm7\n" /* lum4 */
128 "punpckhwd %%mm3, %%mm5\n" 128 "punpckhwd %%mm3, %%mm5\n"
129 129
130 "movq %%mm4, (%4)\n" 130 "movq %%mm4, (%4)\n"
131 "movq %%mm5, 8(%4)\n" 131 "movq %%mm5, 8(%4)\n"
132 132
133 "movq %%mm6, %%mm4\n" /* Lum3 */ 133 "movq %%mm6, %%mm4\n" /* Lum3 */
134 "paddw %%mm0, %%mm6\n" /* Lum3 +blue */ 134 "paddw %%mm0, %%mm6\n" /* Lum3 +blue */
135 135
136 "movq %%mm4, %%mm5\n" /* Lum3 */ 136 "movq %%mm4, %%mm5\n" /* Lum3 */
137 "paddw %%mm1, %%mm4\n" /* Lum3 +red */ 137 "paddw %%mm1, %%mm4\n" /* Lum3 +red */
138 "paddw %%mm2, %%mm5\n" /* Lum3 +green */ 138 "paddw %%mm2, %%mm5\n" /* Lum3 +green */
139 "psraw $6, %%mm4\n" 139 "psraw $6, %%mm4\n"
140 "movq %%mm7, %%mm3\n"/* Lum4 */ 140 "movq %%mm7, %%mm3\n"/* Lum4 */
141 "psraw $6, %%mm5\n" 141 "psraw $6, %%mm5\n"
142 "paddw %%mm0, %%mm7\n" /* Lum4 +blue */ 142 "paddw %%mm0, %%mm7\n" /* Lum4 +blue */
143 "psraw $6, %%mm6\n" /* Lum3 +blue */ 143 "psraw $6, %%mm6\n" /* Lum3 +blue */
144 "movq %%mm3, %%mm0\n" /* Lum4 */ 144 "movq %%mm3, %%mm0\n" /* Lum4 */
145 "packuswb %%mm4, %%mm4\n" 145 "packuswb %%mm4, %%mm4\n"
146 "paddw %%mm1, %%mm3\n" /* Lum4 +red */ 146 "paddw %%mm1, %%mm3\n" /* Lum4 +red */
147 "packuswb %%mm5, %%mm5\n" 147 "packuswb %%mm5, %%mm5\n"
148 "paddw %%mm2, %%mm0\n" /* Lum4 +green */ 148 "paddw %%mm2, %%mm0\n" /* Lum4 +green */
149 "packuswb %%mm6, %%mm6\n" 149 "packuswb %%mm6, %%mm6\n"
150 "punpcklbw %%mm4, %%mm4\n" 150 "punpcklbw %%mm4, %%mm4\n"
151 "punpcklbw %%mm5, %%mm5\n" 151 "punpcklbw %%mm5, %%mm5\n"
152 "punpcklbw %%mm6, %%mm6\n" 152 "punpcklbw %%mm6, %%mm6\n"
153 "psllw $3, %%mm5\n" /* GREEN 3 */ 153 "psllw $3, %%mm5\n" /* GREEN 3 */
154 "pand mpeg3_MMX_redmask, %%mm4\n" 154 "pand mpeg3_MMX_redmask, %%mm4\n"
155 "psraw $6, %%mm3\n" /* psr 6 */ 155 "psraw $6, %%mm3\n" /* psr 6 */
156 "psraw $6, %%mm0\n" 156 "psraw $6, %%mm0\n"
157 "pand mpeg3_MMX_redmask, %%mm6\n" /* BLUE */ 157 "pand mpeg3_MMX_redmask, %%mm6\n" /* BLUE */
158 "pand mpeg3_MMX_grnmask, %%mm5\n" 158 "pand mpeg3_MMX_grnmask, %%mm5\n"
159 "psrlw $11, %%mm6\n" /* BLUE 3 */ 159 "psrlw $11, %%mm6\n" /* BLUE 3 */
160 "por %%mm5, %%mm4\n" 160 "por %%mm5, %%mm4\n"
161 "psraw $6, %%mm7\n" 161 "psraw $6, %%mm7\n"
162 "por %%mm6, %%mm4\n" 162 "por %%mm6, %%mm4\n"
163 "packuswb %%mm3, %%mm3\n" 163 "packuswb %%mm3, %%mm3\n"
164 "packuswb %%mm0, %%mm0\n" 164 "packuswb %%mm0, %%mm0\n"
165 "packuswb %%mm7, %%mm7\n" 165 "packuswb %%mm7, %%mm7\n"
166 "punpcklbw %%mm3, %%mm3\n" 166 "punpcklbw %%mm3, %%mm3\n"
167 "punpcklbw %%mm0, %%mm0\n" 167 "punpcklbw %%mm0, %%mm0\n"
168 "punpcklbw %%mm7, %%mm7\n" 168 "punpcklbw %%mm7, %%mm7\n"
169 "pand mpeg3_MMX_redmask, %%mm3\n" 169 "pand mpeg3_MMX_redmask, %%mm3\n"
170 "pand mpeg3_MMX_redmask, %%mm7\n" /* BLUE */ 170 "pand mpeg3_MMX_redmask, %%mm7\n" /* BLUE */
171 "psllw $3, %%mm0\n" /* GREEN 4 */ 171 "psllw $3, %%mm0\n" /* GREEN 4 */
172 "psrlw $11, %%mm7\n" 172 "psrlw $11, %%mm7\n"
173 "pand mpeg3_MMX_grnmask, %%mm0\n" 173 "pand mpeg3_MMX_grnmask, %%mm0\n"
174 "por %%mm7, %%mm3\n" 174 "por %%mm7, %%mm3\n"
175 "addl $8, %6\n" 175 "addl $8, %6\n"
176 "por %%mm0, %%mm3\n" 176 "por %%mm0, %%mm3\n"
177 177
178 "movq %%mm4, %%mm5\n" 178 "movq %%mm4, %%mm5\n"
179 179
180 "punpcklwd %%mm3, %%mm4\n" 180 "punpcklwd %%mm3, %%mm4\n"
181 "punpckhwd %%mm3, %%mm5\n" 181 "punpckhwd %%mm3, %%mm5\n"
182 182
183 "movq %%mm4, (%4,%5,2)\n" 183 "movq %%mm4, (%4,%5,2)\n"
184 "movq %%mm5, 8(%4,%5,2)\n" 184 "movq %%mm5, 8(%4,%5,2)\n"
185 185
186 "addl $8, %2\n" 186 "addl $8, %2\n"
187 "addl $4, %0\n" 187 "addl $4, %0\n"
188 "addl $4, %1\n" 188 "addl $4, %1\n"
189 "cmpl %3, %6\n" 189 "cmpl %3, %6\n"
190 "leal 16(%4), %4\n" 190 "leal 16(%4), %4\n"
191 "jl 1b\n" 191 "jl 1b\n"
192 "addl %3, %2\n" /* lum += cols */ 192 "addl %3, %2\n" /* lum += cols */
193 "addl %7, %4\n" /* row1 += mod */ 193 "addl %7, %4\n" /* row1 += mod */
194 "movl $0, %6\n" 194 "movl $0, %6\n"
195 "cmpl %8, %2\n" 195 "cmpl %8, %2\n"
196 "jl 1b\n" 196 "jl 1b\n"
197 : : "r" (cr), 197 : : "r" (cr),
198 "r" (cb), 198 "r" (cb),
199 "r" (lum), 199 "r" (lum),
200 "r" (cols), 200 "r" (cols),
201 "r" (row1) , 201 "r" (row1) ,
202 "r" (col1), 202 "r" (col1),
203 "m" (x), 203 "m" (x),
204 "m" (mod), 204 "m" (mod),
205 "m" (y) 205 "m" (y)
206 ); 206 );
207} 207}
208 208
209static unsigned LONGLONG mpeg3_MMX_U_80 = 0x0000008000800000LL; 209static unsigned LONGLONG mpeg3_MMX_U_80 = 0x0000008000800000LL;
210static unsigned LONGLONG mpeg3_MMX_V_80 = 0x0000000000800080LL; 210static unsigned LONGLONG mpeg3_MMX_V_80 = 0x0000000000800080LL;
211static LONGLONG mpeg3_MMX_U_COEF = 0x00000058ffd30000LL; 211static LONGLONG mpeg3_MMX_U_COEF = 0x00000058ffd30000LL;
212static LONGLONG mpeg3_MMX_V_COEF = 0x00000000ffea006fLL; 212static LONGLONG mpeg3_MMX_V_COEF = 0x00000000ffea006fLL;
213static LONGLONG mpeg3_MMX_601_Y_COEF = 0x0000004800480048LL; 213static LONGLONG mpeg3_MMX_601_Y_COEF = 0x0000004800480048LL;
214static LONGLONG mpeg3_MMX_601_Y_DIFF = 0x0000000000000010LL; 214static LONGLONG mpeg3_MMX_601_Y_DIFF = 0x0000000000000010LL;
215 215
216inline void mpeg3_bgra32_mmx(unsigned long y, 216inline void mpeg3_bgra32_mmx(unsigned long y,
217 unsigned long u, 217 unsigned long u,
218 unsigned long v, 218 unsigned long v,
219 unsigned long *output) 219 unsigned long *output)
220{ 220{
221asm(" 221
222asm(
222/* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ 223/* Output will be 0x00rrggbb with the 00 trailing so this can also be used */
223/* for bgr24. */ 224/* for bgr24. */
224 movd (%0), %%mm0; /* Load y 0x00000000000000yy */ 225 "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */
225 movd (%1), %%mm1; /* Load u 0x00000000000000cr */ 226 "movd (%1), %%mm1;" /* Load u 0x00000000000000cr */
226 movq %%mm0, %%mm3; /* Copy y to temp */ 227 "movq %%mm0, %%mm3;" /* Copy y to temp */
227 psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */ 228 "psllq $16, %%mm1;" /* Shift u 0x0000000000cr0000 */
228 movd (%2), %%mm2; /* Load v 0x00000000000000cb */ 229 "movd (%2), %%mm2;" /* Load v 0x00000000000000cb */
229 psllq $16, %%mm3; /* Shift y */ 230 "psllq $16, %%mm3;" /* Shift y */
230 movq %%mm1, %%mm4; /* Copy u to temp */ 231 "movq %%mm1, %%mm4;" /* Copy u to temp */
231 por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ 232 "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */
232 psllq $16, %%mm4; /* Shift u */ 233 "psllq $16, %%mm4;" /* Shift u */
233 movq %%mm2, %%mm5; /* Copy v to temp */ 234 "movq %%mm2, %%mm5;" /* Copy v to temp */
234 psllq $16, %%mm3; /* Shift y */ 235 "psllq $16, %%mm3;" /* Shift y */
235 por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */ 236 "por %%mm4, %%mm1;" /* Overlay new u byte 0x000000cr00cr0000 */
236 psllq $16, %%mm5; /* Shift v */ 237 "psllq $16, %%mm5;" /* Shift v */
237 por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ 238 "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */
238 por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */ 239 "por %%mm5, %%mm2;" /* Overlay new v byte 0x0000000000cb00cb */
239 240
240/* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ 241/* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */
241 psubw mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */ 242 "psubw mpeg3_MMX_U_80, %%mm1;" /* Subtract 128 from u 0x000000uu00uu0000 */
242 pmullw mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */ 243 "pmullw mpeg3_MMX_U_COEF, %%mm1;" /* Multiply u coeffs 0x0000uuuuuuuu0000 */
243 psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ 244 "psllw $6, %%mm0;" /* Shift y coeffs 0x0000yyy0yyy0yyy0 */
244 psubw mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */ 245 "psubw mpeg3_MMX_V_80, %%mm2;" /* Subtract 128 from v 0x0000000000cb00cb */
245 pmullw mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */ 246 "pmullw mpeg3_MMX_V_COEF, %%mm2;" /* Multiply v coeffs 0x0000crcrcrcrcrcr */
246 247
247/* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ 248/* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */
248 paddsw %%mm1, %%mm0; /* Add u to result */ 249 "paddsw %%mm1, %%mm0;" /* Add u to result */
249 paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */ 250 "paddsw %%mm2, %%mm0;" /* Add v to result 0x0000rrrrggggbbbb */
250 psraw $6, %%mm0; /* Demote precision */ 251 "psraw $6, %%mm0;" /* Demote precision */
251 packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */ 252 "packuswb %%mm0, %%mm0;" /* Pack into ARGB 0x0000000000rrggbb */
252 movd %%mm0, (%3); /* Store output */ 253 "movd %%mm0, (%3);" /* Store output */
253 "
254: 254:
255: "r" (&y), "r" (&u), "r" (&v), "r" (output)); 255: "r" (&y), "r" (&u), "r" (&v), "r" (output));
256} 256}
257 257
258inline void mpeg3_601_bgra32_mmx(unsigned long y, 258inline void mpeg3_601_bgra32_mmx(unsigned long y,
259 unsigned long u, 259 unsigned long u,
260 unsigned long v, 260 unsigned long v,
261 unsigned long *output) 261 unsigned long *output)
262{ 262{
263asm(" 263asm(
264/* Output will be 0x00rrggbb with the 00 trailing so this can also be used */ 264/* Output will be 0x00rrggbb with the 00 trailing so this can also be used */
265/* for bgr24. */ 265/* for bgr24. */
266 movd (%0), %%mm0; /* Load y 0x00000000000000yy */ 266 "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */
267 psubsw mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */ 267 "psubsw mpeg3_MMX_601_Y_DIFF, %%mm0;" /* Subtract 16 from y */
268 movd (%1), %%mm1; /* Load u 0x00000000000000cr */ 268 "movd (%1), %%mm1;" /* Load u 0x00000000000000cr */
269 movq %%mm0, %%mm3; /* Copy y to temp */ 269 "movq %%mm0, %%mm3;" /* Copy y to temp */
270 psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */ 270 "psllq $16, %%mm1;" /* Shift u 0x0000000000cr0000 */
271 movd (%2), %%mm2; /* Load v 0x00000000000000cb */ 271 "movd (%2), %%mm2;" /* Load v 0x00000000000000cb */
272 psllq $16, %%mm3; /* Shift y */ 272 "psllq $16, %%mm3;" /* Shift y */
273 movq %%mm1, %%mm4; /* Copy u to temp */ 273 "movq %%mm1, %%mm4;" /* Copy u to temp */
274 por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ 274 "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */
275 psllq $16, %%mm4; /* Shift u */ 275 "psllq $16, %%mm4;" /* Shift u */
276 movq %%mm2, %%mm5; /* Copy v to temp */ 276 "movq %%mm2, %%mm5;" /* Copy v to temp */
277 psllq $16, %%mm3; /* Shift y */ 277 "psllq $16, %%mm3;" /* Shift y */
278 por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */ 278 "por %%mm4, %%mm1;" /* Overlay new u byte 0x000000cr00cr0000 */
279 psllq $16, %%mm5; /* Shift v */ 279 "psllq $16, %%mm5;" /* Shift v */
280 por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ 280 "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */
281 por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */ 281 "por %%mm5, %%mm2;" /* Overlay new v byte 0x0000000000cb00cb */
282 282
283/* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */ 283/* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */
284 pmullw mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale and shift y coeffs */ 284 "pmullw mpeg3_MMX_601_Y_COEF, %%mm0;" /* Scale and shift y coeffs */
285 psubw mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */ 285 "psubw mpeg3_MMX_U_80, %%mm1;" /* Subtract 128 from u 0x000000uu00uu0000 */
286 pmullw mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */ 286 "pmullw mpeg3_MMX_U_COEF, %%mm1;" /* Multiply u coeffs 0x0000uuuuuuuu0000 */
287 psubw mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */ 287 "psubw mpeg3_MMX_V_80, %%mm2;" /* Subtract 128 from v 0x0000000000cb00cb */
288 pmullw mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */ 288 "pmullw mpeg3_MMX_V_COEF, %%mm2;" /* Multiply v coeffs 0x0000crcrcrcrcrcr */
289 289
290/* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */ 290/* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */
291 paddsw %%mm1, %%mm0; /* Add u to result */ 291 "paddsw %%mm1, %%mm0;" /* Add u to result */
292 paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */ 292 "paddsw %%mm2, %%mm0;" /* Add v to result 0x0000rrrrggggbbbb */
293 psraw $6, %%mm0; /* Demote precision */ 293 "psraw $6, %%mm0;" /* Demote precision */
294 packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */ 294 "packuswb %%mm0, %%mm0;" /* Pack into ARGB 0x0000000000rrggbb */
295 movd %%mm0, (%3); /* Store output */ 295 "movd %%mm0, (%3);" /* Store output */
296 "
297: 296:
298: "r" (&y), "r" (&u), "r" (&v), "r" (output)); 297: "r" (&y), "r" (&u), "r" (&v), "r" (output));
299} 298}
300 299
301static unsigned LONGLONG mpeg3_MMX_U_80_RGB = 0x0000000000800080LL; 300static unsigned LONGLONG mpeg3_MMX_U_80_RGB = 0x0000000000800080LL;
302static unsigned LONGLONG mpeg3_MMX_V_80_RGB = 0x0000008000800000LL; 301static unsigned LONGLONG mpeg3_MMX_V_80_RGB = 0x0000008000800000LL;
303static LONGLONG mpeg3_MMX_U_COEF_RGB = 0x00000000ffd30058LL; 302static LONGLONG mpeg3_MMX_U_COEF_RGB = 0x00000000ffd30058LL;
304static LONGLONG mpeg3_MMX_V_COEF_RGB = 0x0000006fffea0000LL; 303static LONGLONG mpeg3_MMX_V_COEF_RGB = 0x0000006fffea0000LL;
305 304
306inline void mpeg3_rgba32_mmx(unsigned long y, 305inline void mpeg3_rgba32_mmx(unsigned long y,
307 unsigned long u, 306 unsigned long u,
308 unsigned long v, 307 unsigned long v,
309 unsigned long *output) 308 unsigned long *output)
310{ 309{
311asm(" 310asm(
312/* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ 311/* Output will be 0x00bbggrr with the 00 trailing so this can also be used */
313/* for rgb24. */ 312/* for rgb24. */
314 movd (%0), %%mm0; /* Load y 0x00000000000000yy */ 313 "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */
315 movd (%1), %%mm1; /* Load v 0x00000000000000vv */ 314 "movd (%1), %%mm1;" /* Load v 0x00000000000000vv */
316 movq %%mm0, %%mm3; /* Copy y to temp */ 315 "movq %%mm0, %%mm3;" /* Copy y to temp */
317 psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */ 316 "psllq $16, %%mm1;" /* Shift v 0x0000000000vv0000 */
318 movd (%2), %%mm2; /* Load u 0x00000000000000uu */ 317 "movd (%2), %%mm2;" /* Load u 0x00000000000000uu */
319 psllq $16, %%mm3; /* Shift y */ 318 "psllq $16, %%mm3;" /* Shift y */
320 movq %%mm1, %%mm4; /* Copy v to temp */ 319 "movq %%mm1, %%mm4;" /* Copy v to temp */
321 por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ 320 "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */
322 psllq $16, %%mm4; /* Shift v */ 321 "psllq $16, %%mm4;" /* Shift v */
323 movq %%mm2, %%mm5; /* Copy u to temp */ 322 "movq %%mm2, %%mm5;" /* Copy u to temp */
324 psllq $16, %%mm3; /* Shift y */ 323 "psllq $16, %%mm3;" /* Shift y */
325 por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */ 324 "por %%mm4, %%mm1;" /* Overlay new v byte 0x000000vv00vv0000 */
326 psllq $16, %%mm5; /* Shift u */ 325 "psllq $16, %%mm5;" /* Shift u */
327 por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ 326 "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */
328 por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */ 327 "por %%mm5, %%mm2;" /* Overlay new u byte 0x0000000000uu00uu */
329 328
330/* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ 329/* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */
331 psubw mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */ 330 "psubw mpeg3_MMX_V_80_RGB, %%mm1;" /* Subtract 128 from v 0x000000vv00vv0000 */
332 pmullw mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */ 331 "pmullw mpeg3_MMX_V_COEF_RGB, %%mm1;" /* Multiply v coeffs 0x0000vvvvvvvv0000 */
333 psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */ 332 "psllw $6, %%mm0;" /* Shift y coeffs 0x0000yyy0yyy0yyy0 */
334 psubw mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */ 333 "psubw mpeg3_MMX_U_80_RGB, %%mm2;" /* Subtract 128 from u 0x0000000000uu00uu */
335 pmullw mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ 334 "pmullw mpeg3_MMX_U_COEF_RGB, %%mm2;" /* Multiply u coeffs 0x0000uuuuuuuuuuuu */
336 335
337/* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ 336/* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */
338 paddsw %%mm1, %%mm0; /* Add v to result */ 337 "paddsw %%mm1, %%mm0;" /* Add v to result */
339 paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */ 338 "paddsw %%mm2, %%mm0;" /* Add u to result 0x0000bbbbggggrrrr */
340 psraw $6, %%mm0; /* Demote precision */ 339 "psraw $6, %%mm0;" /* Demote precision */
341 packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */ 340 "packuswb %%mm0, %%mm0;" /* Pack into RGBA 0x0000000000bbggrr */
342 movd %%mm0, (%3); /* Store output */ 341 "movd %%mm0, (%3);" /* Store output */
343 "
344: 342:
345: "r" (&y), "r" (&v), "r" (&u), "r" (output)); 343: "r" (&y), "r" (&v), "r" (&u), "r" (output));
346} 344}
347 345
348inline void mpeg3_601_rgba32_mmx(unsigned long y, 346inline void mpeg3_601_rgba32_mmx(unsigned long y,
349 unsigned long u, 347 unsigned long u,
350 unsigned long v, 348 unsigned long v,
351 unsigned long *output) 349 unsigned long *output)
352{ 350{
353asm(" 351asm(
354/* Output will be 0x00bbggrr with the 00 trailing so this can also be used */ 352/* Output will be 0x00bbggrr with the 00 trailing so this can also be used */
355/* for rgb24. */ 353/* for rgb24. */
356 movd (%0), %%mm0; /* Load y 0x00000000000000yy */ 354 "movd (%0), %%mm0;" /* Load y 0x00000000000000yy */
357 psubsw mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */ 355 "psubsw mpeg3_MMX_601_Y_DIFF, %%mm0;" /* Subtract 16 from y */
358 movd (%1), %%mm1; /* Load v 0x00000000000000vv */ 356 "movd (%1), %%mm1;" /* Load v 0x00000000000000vv */
359 movq %%mm0, %%mm3; /* Copy y to temp */ 357 "movq %%mm0, %%mm3;" /* Copy y to temp */
360 psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */ 358 "psllq $16, %%mm1;" /* Shift v 0x0000000000vv0000 */
361 movd (%2), %%mm2; /* Load u 0x00000000000000uu */ 359 "movd (%2), %%mm2;" /* Load u 0x00000000000000uu */
362 psllq $16, %%mm3; /* Shift y */ 360 "psllq $16, %%mm3;" /* Shift y */
363 movq %%mm1, %%mm4; /* Copy v to temp */ 361 "movq %%mm1, %%mm4;" /* Copy v to temp */
364 por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */ 362 "por %%mm3, %%mm0;" /* Overlay new y byte 0x0000000000yy00yy */
365 psllq $16, %%mm4; /* Shift v */ 363 "psllq $16, %%mm4;" /* Shift v */
366 movq %%mm2, %%mm5; /* Copy u to temp */ 364 "movq %%mm2, %%mm5;" /* Copy u to temp */
367 psllq $16, %%mm3; /* Shift y */ 365 "psllq $16, %%mm3;" /* Shift y */
368 por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */ 366 "por %%mm4, %%mm1;" /* Overlay new v byte 0x000000vv00vv0000 */
369 psllq $16, %%mm5; /* Shift u */ 367 "psllq $16, %%mm5;" /* Shift u */
370 por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */ 368 "por %%mm3, %%mm0;" /* Overlay new y byte 0x000000yy00yy00yy */
371 por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */ 369 "por %%mm5, %%mm2;" /* Overlay new u byte 0x0000000000uu00uu */
372 370
373/* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */ 371/* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */
374 pmullw mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale y coeffs */ 372 "pmullw mpeg3_MMX_601_Y_COEF, %%mm0;" /* Scale y coeffs */
375 psubw mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */ 373 "psubw mpeg3_MMX_V_80_RGB, %%mm1;" /* Subtract 128 from v 0x000000vv00vv0000 */
376 pmullw mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */ 374 "pmullw mpeg3_MMX_V_COEF_RGB, %%mm1;" /* Multiply v coeffs 0x0000vvvvvvvv0000 */
377 psubw mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */ 375 "psubw mpeg3_MMX_U_80_RGB, %%mm2;" /* Subtract 128 from u 0x0000000000uu00uu */
378 pmullw mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */ 376 "pmullw mpeg3_MMX_U_COEF_RGB, %%mm2;" /* Multiply u coeffs 0x0000uuuuuuuuuuuu */
379 377
380/* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */ 378/* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */
381 paddsw %%mm1, %%mm0; /* Add v to result */ 379 "paddsw %%mm1, %%mm0;" /* Add v to result */
382 paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */ 380 "paddsw %%mm2, %%mm0;" /* Add u to result 0x0000bbbbggggrrrr */
383 psraw $6, %%mm0; /* Demote precision */ 381 "psraw $6, %%mm0;" /* Demote precision */
384 packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */ 382 "packuswb %%mm0, %%mm0;" /* Pack into RGBA 0x0000000000bbggrr */
385 movd %%mm0, (%3); /* Store output */ 383 "movd %%mm0, (%3);" /* Store output */
386 "
387: 384:
388: "r" (&y), "r" (&v), "r" (&u), "r" (output)); 385: "r" (&y), "r" (&v), "r" (&u), "r" (output));
389} 386}
390 387
391#endif 388#endif
392 389
393#define DITHER_ROW_HEAD \ 390#define DITHER_ROW_HEAD \
394 for(h = 0; h < video->out_h; h++) \ 391 for(h = 0; h < video->out_h; h++) \
395 { \ 392 { \
396 y_in = &src[0][(video->y_table[h] + video->in_y) * video->coded_picture_width] + video->in_x; \ 393 y_in = &src[0][(video->y_table[h] + video->in_y) * video->coded_picture_width] + video->in_x; \
397 cb_in = &src[1][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 2); \ 394 cb_in = &src[1][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 2); \
398 cr_in = &src[2][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 1); \ 395 cr_in = &src[2][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 1); \
399 data = output_rows[h]; 396 data = output_rows[h];
400 397
401#define DITHER_ROW_TAIL \ 398#define DITHER_ROW_TAIL \
402 } 399 }
403 400
404#define DITHER_SCALE_HEAD \ 401#define DITHER_SCALE_HEAD \
405 for(w = 0; w < video->out_w; w++) \ 402 for(w = 0; w < video->out_w; w++) \
406 { \ 403 { \
407 uv_subscript = video->x_table[w] / 2; \ 404 uv_subscript = video->x_table[w] / 2; \
408 y_l = y_in[video->x_table[w]]; \ 405 y_l = y_in[video->x_table[w]]; \
409 y_l <<= 16; \ 406 y_l <<= 16; \
410 r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \ 407 r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \
411 g_l = (y_l + video->cr_to_g[cr_in[uv_subscript]] + video->cb_to_g[cb_in[uv_subscript]]) >> 16; \ 408 g_l = (y_l + video->cr_to_g[cr_in[uv_subscript]] + video->cb_to_g[cb_in[uv_subscript]]) >> 16; \
412 b_l = (y_l + video->cb_to_b[cb_in[uv_subscript]]) >> 16; 409 b_l = (y_l + video->cb_to_b[cb_in[uv_subscript]]) >> 16;
413 410
414#define DITHER_SCALE_601_HEAD \ 411#define DITHER_SCALE_601_HEAD \
415 for(w = 0; w < video->out_w; w++) \ 412 for(w = 0; w < video->out_w; w++) \
416 { \ 413 { \
417 uv_subscript = video->x_table[w] / 2; \ 414 uv_subscript = video->x_table[w] / 2; \
418 y_l = mpeg3_601_to_rgb[y_in[video->x_table[w]]]; \ 415 y_l = mpeg3_601_to_rgb[y_in[video->x_table[w]]]; \
419 y_l <<= 16; \ 416 y_l <<= 16; \
420 r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \ 417 r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \
421 g_l = (y_l + video->cr_to_g[cr_in[uv_subscript]] + video->cb_to_g[cb_in[uv_subscript]]) >> 16; \ 418 g_l = (y_l + video->cr_to_g[cr_in[uv_subscript]] + video->cb_to_g[cb_in[uv_subscript]]) >> 16; \
422 b_l = (y_l + video->cb_to_b[cb_in[uv_subscript]]) >> 16; 419 b_l = (y_l + video->cb_to_b[cb_in[uv_subscript]]) >> 16;
423 420
424#define DITHER_SCALE_TAIL \ 421#define DITHER_SCALE_TAIL \
425 } 422 }
426 423
427#define DITHER_MMX_SCALE_HEAD \ 424#define DITHER_MMX_SCALE_HEAD \
428 for(w = 0; w < video->out_w; w++) \ 425 for(w = 0; w < video->out_w; w++) \
429 { \ 426 { \
430 uv_subscript = video->x_table[w] / 2; 427 uv_subscript = video->x_table[w] / 2;
431 428
432#define DITHER_MMX_SCALE_TAIL \ 429#define DITHER_MMX_SCALE_TAIL \
433 data += step; \ 430 data += step; \
434 } 431 }
435 432
436#define DITHER_MMX_HEAD \ 433#define DITHER_MMX_HEAD \
437 for(w = 0; w < video->out_w; w += 2) \ 434 for(w = 0; w < video->out_w; w += 2) \
438 { 435 {
439 436
440#define DITHER_MMX_TAIL \ 437#define DITHER_MMX_TAIL \
441 data += step; \ 438 data += step; \
442 cr_in++; \ 439 cr_in++; \
443 cb_in++; \ 440 cb_in++; \
444 } 441 }
445 442
446#define DITHER_HEAD \ 443#define DITHER_HEAD \
447 for(w = 0; w < video->horizontal_size; w++) \ 444 for(w = 0; w < video->horizontal_size; w++) \
448 { \ 445 { \
449 y_l = *y_in++; \ 446 y_l = *y_in++; \
450 y_l <<= 16; \ 447 y_l <<= 16; \
451 r_l = (y_l + video->cr_to_r[*cr_in]) >> 16; \ 448 r_l = (y_l + video->cr_to_r[*cr_in]) >> 16; \
452 g_l = (y_l + video->cr_to_g[*cr_in] + video->cb_to_g[*cb_in]) >> 16; \ 449 g_l = (y_l + video->cr_to_g[*cr_in] + video->cb_to_g[*cb_in]) >> 16; \
453 b_l = (y_l + video->cb_to_b[*cb_in]) >> 16; 450 b_l = (y_l + video->cb_to_b[*cb_in]) >> 16;
454 451
455#define DITHER_601_HEAD \ 452#define DITHER_601_HEAD \
456 for(w = 0; w < video->horizontal_size; w++) \ 453 for(w = 0; w < video->horizontal_size; w++) \
457 { \ 454 { \
458 y_l = mpeg3_601_to_rgb[*y_in++]; \ 455 y_l = mpeg3_601_to_rgb[*y_in++]; \
459 y_l <<= 16; \ 456 y_l <<= 16; \
460 r_l = (y_l + video->cr_to_r[*cr_in]) >> 16; \ 457 r_l = (y_l + video->cr_to_r[*cr_in]) >> 16; \
461 g_l = (y_l + video->cr_to_g[*cr_in] + video->cb_to_g[*cb_in]) >> 16; \ 458 g_l = (y_l + video->cr_to_g[*cr_in] + video->cb_to_g[*cb_in]) >> 16; \
462 b_l = (y_l + video->cb_to_b[*cb_in]) >> 16; 459 b_l = (y_l + video->cb_to_b[*cb_in]) >> 16;
463 460
464#define DITHER_TAIL \ 461#define DITHER_TAIL \
465 if(w & 1) \ 462 if(w & 1) \
466 { \ 463 { \
467 cr_in++; \ 464 cr_in++; \
468 cb_in++; \ 465 cb_in++; \
469 } \ 466 } \
470 } 467 }
471 468
472 469
473#define STORE_PIXEL_BGR888 \ 470#define STORE_PIXEL_BGR888 \
474 *data++ = CLIP(b_l); \ 471 *data++ = CLIP(b_l); \
475 *data++ = CLIP(g_l); \ 472 *data++ = CLIP(g_l); \
476 *data++ = CLIP(r_l); 473 *data++ = CLIP(r_l);
477 474
478#define STORE_PIXEL_BGRA8888 \ 475#define STORE_PIXEL_BGRA8888 \
479 *data++ = CLIP(b_l); \ 476 *data++ = CLIP(b_l); \
480 *data++ = CLIP(g_l); \ 477 *data++ = CLIP(g_l); \
481 *data++ = CLIP(r_l); \ 478 *data++ = CLIP(r_l); \
482 *data++ = 0; 479 *data++ = 0;
483 480
484#define STORE_PIXEL_RGB565 \ 481#define STORE_PIXEL_RGB565 \
485 *((unsigned short*)data)++ = \ 482 *((unsigned short*)data)++ = \
486 ((CLIP(r_l) & 0xf8) << 8) | \ 483 ((CLIP(r_l) & 0xf8) << 8) | \
487 ((CLIP(g_l) & 0xfc) << 3) | \ 484 ((CLIP(g_l) & 0xfc) << 3) | \
488 ((CLIP(b_l) & 0xf8) >> 3); 485 ((CLIP(b_l) & 0xf8) >> 3);
489 486
490#define STORE_PIXEL_RGB888 \ 487#define STORE_PIXEL_RGB888 \
491 *data++ = CLIP(r_l); \ 488 *data++ = CLIP(r_l); \
492 *data++ = CLIP(g_l); \ 489 *data++ = CLIP(g_l); \
493 *data++ = CLIP(b_l); 490 *data++ = CLIP(b_l);
494 491
495#define STORE_PIXEL_RGBA8888 \ 492#define STORE_PIXEL_RGBA8888 \
496 *data++ = CLIP(r_l); \ 493 *data++ = CLIP(r_l); \
497 *data++ = CLIP(g_l); \ 494 *data++ = CLIP(g_l); \
498 *data++ = CLIP(b_l); \ 495 *data++ = CLIP(b_l); \
499 *data++ = 0; 496 *data++ = 0;
500 497
501#define STORE_PIXEL_RGBA16161616 \ 498#define STORE_PIXEL_RGBA16161616 \
502 *data_s++ = CLIP(r_l); \ 499 *data_s++ = CLIP(r_l); \
503 *data_s++ = CLIP(g_l); \ 500 *data_s++ = CLIP(g_l); \
504 *data_s++ = CLIP(b_l); \ 501 *data_s++ = CLIP(b_l); \
505 *data_s++ = 0; 502 *data_s++ = 0;
506 503
507 504
508 505
509/* Only good for YUV 4:2:0 */ 506/* Only good for YUV 4:2:0 */
510int mpeg3video_ditherframe(mpeg3video_t *video, unsigned char **src, unsigned char **output_rows) 507int mpeg3video_ditherframe(mpeg3video_t *video, unsigned char **src, unsigned char **output_rows)
511{ 508{
512 int h = 0; 509 int h = 0;
513 register unsigned char *y_in, *cb_in, *cr_in; 510 register unsigned char *y_in, *cb_in, *cr_in;
514 long y_l, r_l, b_l, g_l; 511 long y_l, r_l, b_l, g_l;
515 register unsigned char *data; 512 register unsigned char *data;
516 register int uv_subscript, step, w = -1; 513 register int uv_subscript, step, w = -1;
517 514
518#ifdef HAVE_MMX 515#ifdef HAVE_MMX
519/* =================================== MMX ===================================== */ 516/* =================================== MMX ===================================== */
520 if(video->have_mmx && 517 if(video->have_mmx &&
521 video->out_w == video->horizontal_size && 518 video->out_w == video->horizontal_size &&
522 video->out_h == video->vertical_size && 519 video->out_h == video->vertical_size &&
523 video->in_w == video->out_w && 520 video->in_w == video->out_w &&
524 video->in_h == video->out_h && 521 video->in_h == video->out_h &&
525 video->in_x == 0 && 522 video->in_x == 0 &&
526 video->in_y == 0 && 523 video->in_y == 0 &&
527 (video->color_model == MPEG3_RGB565 || video->color_model == MPEG3_601_RGB565)) 524 (video->color_model == MPEG3_RGB565 || video->color_model == MPEG3_601_RGB565))
528 { 525 {
529/* Unscaled 16 bit */ 526/* Unscaled 16 bit */
530 mpeg3video_rgb16_mmx(src[0], 527 mpeg3video_rgb16_mmx(src[0],
531 src[2], 528 src[2],
532 src[1], 529 src[1],
533 output_rows[0], 530 output_rows[0],
534 video->out_h, 531 video->out_h,
535 video->out_w, 532 video->out_w,
536 (output_rows[1] - output_rows[0]) / 2 - video->out_w); 533 (output_rows[1] - output_rows[0]) / 2 - video->out_w);
537 } 534 }
538 else 535 else
539 if(video->have_mmx && 536 if(video->have_mmx &&
540 (video->color_model == MPEG3_BGRA8888 || 537 (video->color_model == MPEG3_BGRA8888 ||
541 video->color_model == MPEG3_BGR888 || 538 video->color_model == MPEG3_BGR888 ||
542 /* video->color_model == MPEG3_RGB888 || */ 539 /* video->color_model == MPEG3_RGB888 || */
543 video->color_model == MPEG3_RGBA8888 || 540 video->color_model == MPEG3_RGBA8888 ||
544 video->color_model == MPEG3_601_BGR888 || 541 video->color_model == MPEG3_601_BGR888 ||
545 video->color_model == MPEG3_601_BGRA8888 || 542 video->color_model == MPEG3_601_BGRA8888 ||
546 video->color_model == MPEG3_601_RGB888 || 543 video->color_model == MPEG3_601_RGB888 ||
547 video->color_model == MPEG3_601_RGBA8888)) 544 video->color_model == MPEG3_601_RGBA8888))
548 { 545 {
549/* Original MMX */ 546/* Original MMX */
550 if(video->color_model == MPEG3_BGRA8888 || 547 if(video->color_model == MPEG3_BGRA8888 ||
551 video->color_model == MPEG3_RGBA8888 || 548 video->color_model == MPEG3_RGBA8888 ||
552 video->color_model == MPEG3_601_BGRA8888 || 549 video->color_model == MPEG3_601_BGRA8888 ||
553 video->color_model == MPEG3_601_RGBA8888) step = 4; 550 video->color_model == MPEG3_601_RGBA8888) step = 4;
554 else 551 else
555 if(video->color_model == MPEG3_BGR888 || 552 if(video->color_model == MPEG3_BGR888 ||
556 video->color_model == MPEG3_RGB888 || 553 video->color_model == MPEG3_RGB888 ||
557 video->color_model == MPEG3_601_BGR888 || 554 video->color_model == MPEG3_601_BGR888 ||
558 video->color_model == MPEG3_601_RGB888) step = 3; 555 video->color_model == MPEG3_601_RGB888) step = 3;
559 556
560 DITHER_ROW_HEAD 557 DITHER_ROW_HEAD
561/* Transfer row with scaling */ 558/* Transfer row with scaling */
562 if(video->out_w != video->horizontal_size) 559 if(video->out_w != video->horizontal_size)
563 { 560 {
564 switch(video->color_model) 561 switch(video->color_model)
565 { 562 {
566 case MPEG3_BGRA8888: 563 case MPEG3_BGRA8888:
567 case MPEG3_BGR888: 564 case MPEG3_BGR888:
568 DITHER_MMX_SCALE_HEAD 565 DITHER_MMX_SCALE_HEAD
569 mpeg3_bgra32_mmx(y_in[video->x_table[w]], 566 mpeg3_bgra32_mmx(y_in[video->x_table[w]],
570 cr_in[uv_subscript], 567 cr_in[uv_subscript],
571 cb_in[uv_subscript], 568 cb_in[uv_subscript],
572 (unsigned long*)data); 569 (unsigned long*)data);
573 DITHER_MMX_SCALE_TAIL 570 DITHER_MMX_SCALE_TAIL
574 break; 571 break;
575 572
576 case MPEG3_601_BGRA8888: 573 case MPEG3_601_BGRA8888:
577 case MPEG3_601_BGR888: 574 case MPEG3_601_BGR888:
578 DITHER_MMX_SCALE_HEAD 575 DITHER_MMX_SCALE_HEAD
579 mpeg3_601_bgra32_mmx(y_in[video->x_table[w]], 576 mpeg3_601_bgra32_mmx(y_in[video->x_table[w]],
580 cr_in[uv_subscript], 577 cr_in[uv_subscript],
581 cb_in[uv_subscript], 578 cb_in[uv_subscript],
582 (unsigned long*)data); 579 (unsigned long*)data);
583 DITHER_MMX_SCALE_TAIL 580 DITHER_MMX_SCALE_TAIL
584 break; 581 break;
585 582
586 case MPEG3_RGBA8888: 583 case MPEG3_RGBA8888:
587 case MPEG3_RGB888: 584 case MPEG3_RGB888:
588 DITHER_MMX_SCALE_HEAD 585 DITHER_MMX_SCALE_HEAD
589 mpeg3_rgba32_mmx(y_in[video->x_table[w]], 586 mpeg3_rgba32_mmx(y_in[video->x_table[w]],
590 cr_in[uv_subscript], 587 cr_in[uv_subscript],
591 cb_in[uv_subscript], 588 cb_in[uv_subscript],
592 (unsigned long*)data); 589 (unsigned long*)data);
593 DITHER_MMX_SCALE_TAIL 590 DITHER_MMX_SCALE_TAIL
594 break; 591 break;
595 592
596 case MPEG3_601_RGBA8888: 593 case MPEG3_601_RGBA8888:
597 case MPEG3_601_RGB888: 594 case MPEG3_601_RGB888:
598 DITHER_MMX_SCALE_HEAD 595 DITHER_MMX_SCALE_HEAD
599 mpeg3_601_rgba32_mmx(y_in[video->x_table[w]], 596 mpeg3_601_rgba32_mmx(y_in[video->x_table[w]],
600 cr_in[uv_subscript], 597 cr_in[uv_subscript],
601 cb_in[uv_subscript], 598 cb_in[uv_subscript],
602 (unsigned long*)data); 599 (unsigned long*)data);
603 DITHER_MMX_SCALE_TAIL 600 DITHER_MMX_SCALE_TAIL
604 break; 601 break;
605 } 602 }
606 } 603 }
607 else 604 else
608/* Transfer row unscaled */ 605/* Transfer row unscaled */
609 { 606 {
610 switch(video->color_model) 607 switch(video->color_model)
611 { 608 {
612/* MMX byte swap 24 and 32 bit */ 609/* MMX byte swap 24 and 32 bit */
613 case MPEG3_BGRA8888: 610 case MPEG3_BGRA8888:
614 case MPEG3_BGR888: 611 case MPEG3_BGR888:
615 DITHER_MMX_HEAD 612 DITHER_MMX_HEAD
616 mpeg3_bgra32_mmx(*y_in++, 613 mpeg3_bgra32_mmx(*y_in++,
617 *cr_in, 614 *cr_in,
618 *cb_in, 615 *cb_in,
619 (unsigned long*)data); 616 (unsigned long*)data);
620 data += step; 617 data += step;
621 mpeg3_bgra32_mmx(*y_in++, 618 mpeg3_bgra32_mmx(*y_in++,
622 *cr_in, 619 *cr_in,
623 *cb_in, 620 *cb_in,
624 (unsigned long*)data); 621 (unsigned long*)data);
625 DITHER_MMX_TAIL 622 DITHER_MMX_TAIL
626 break; 623 break;
627 624
628/* MMX 601 byte swap 24 and 32 bit */ 625/* MMX 601 byte swap 24 and 32 bit */
629 case MPEG3_601_BGRA8888: 626 case MPEG3_601_BGRA8888:
630 case MPEG3_601_BGR888: 627 case MPEG3_601_BGR888:
631 DITHER_MMX_HEAD 628 DITHER_MMX_HEAD
632 mpeg3_601_bgra32_mmx(*y_in++, 629 mpeg3_601_bgra32_mmx(*y_in++,
633 *cr_in, 630 *cr_in,
634 *cb_in, 631 *cb_in,
635 (unsigned long*)data); 632 (unsigned long*)data);
636 data += step; 633 data += step;
637 mpeg3_601_bgra32_mmx(*y_in++, 634 mpeg3_601_bgra32_mmx(*y_in++,
638 *cr_in, 635 *cr_in,
639 *cb_in, 636 *cb_in,
640 (unsigned long*)data); 637 (unsigned long*)data);
641 DITHER_MMX_TAIL 638 DITHER_MMX_TAIL
642 break; 639 break;
643 640
644/* MMX 24 and 32 bit no byte swap */ 641/* MMX 24 and 32 bit no byte swap */
645 case MPEG3_RGBA8888: 642 case MPEG3_RGBA8888:
646 case MPEG3_RGB888: 643 case MPEG3_RGB888:
647 DITHER_MMX_HEAD 644 DITHER_MMX_HEAD
648 mpeg3_rgba32_mmx(*y_in++, 645 mpeg3_rgba32_mmx(*y_in++,
649 *cr_in, 646 *cr_in,
650 *cb_in, 647 *cb_in,
651 (unsigned long*)data); 648 (unsigned long*)data);
652 data += step; 649 data += step;
653 mpeg3_rgba32_mmx(*y_in++, 650 mpeg3_rgba32_mmx(*y_in++,
654 *cr_in, 651 *cr_in,
655 *cb_in, 652 *cb_in,
656 (unsigned long*)data); 653 (unsigned long*)data);
657 DITHER_MMX_TAIL 654 DITHER_MMX_TAIL
658 break; 655 break;
659 656
660/* MMX 601 24 and 32 bit no byte swap */ 657/* MMX 601 24 and 32 bit no byte swap */
661 case MPEG3_601_RGBA8888: 658 case MPEG3_601_RGBA8888:
662 case MPEG3_601_RGB888: 659 case MPEG3_601_RGB888:
663 DITHER_MMX_HEAD 660 DITHER_MMX_HEAD
664 mpeg3_601_rgba32_mmx(*y_in++, 661 mpeg3_601_rgba32_mmx(*y_in++,
665 *cr_in, 662 *cr_in,
666 *cb_in, 663 *cb_in,
667 (unsigned long*)data); 664 (unsigned long*)data);
668 data += step; 665 data += step;
669 mpeg3_601_rgba32_mmx(*y_in++, 666 mpeg3_601_rgba32_mmx(*y_in++,
670 *cr_in, 667 *cr_in,
671 *cb_in, 668 *cb_in,
672 (unsigned long*)data); 669 (unsigned long*)data);
673 DITHER_MMX_TAIL 670 DITHER_MMX_TAIL
674 break; 671 break;
675 } 672 }
676 } 673 }
677 DITHER_ROW_TAIL 674 DITHER_ROW_TAIL
678 } 675 }
679 else 676 else
680#endif 677#endif
681/* ================================== NO MMX ==================================== */ 678/* ================================== NO MMX ==================================== */
682 { 679 {
683 DITHER_ROW_HEAD 680 DITHER_ROW_HEAD
684/* Transfer row with scaling */ 681/* Transfer row with scaling */
685 if(video->out_w != video->horizontal_size) 682 if(video->out_w != video->horizontal_size)
686 { 683 {
687 switch(video->color_model) 684 switch(video->color_model)
688 { 685 {
689 case MPEG3_BGR888: 686 case MPEG3_BGR888:
690 DITHER_SCALE_HEAD 687 DITHER_SCALE_HEAD
691 STORE_PIXEL_BGR888 688 STORE_PIXEL_BGR888
692 DITHER_SCALE_TAIL 689 DITHER_SCALE_TAIL
693 break; 690 break;
694 case MPEG3_BGRA8888: 691 case MPEG3_BGRA8888:
695 DITHER_SCALE_HEAD 692 DITHER_SCALE_HEAD
696 STORE_PIXEL_BGRA8888 693 STORE_PIXEL_BGRA8888
697 DITHER_SCALE_TAIL 694 DITHER_SCALE_TAIL
698 break; 695 break;
699 case MPEG3_RGB565: 696 case MPEG3_RGB565:
700 DITHER_SCALE_HEAD 697 DITHER_SCALE_HEAD
701 STORE_PIXEL_RGB565 698 STORE_PIXEL_RGB565
702 DITHER_SCALE_TAIL 699 DITHER_SCALE_TAIL
703 break; 700 break;
704 case MPEG3_RGB888: 701 case MPEG3_RGB888:
705 DITHER_SCALE_HEAD 702 DITHER_SCALE_HEAD
706 STORE_PIXEL_RGB888 703 STORE_PIXEL_RGB888
707 DITHER_SCALE_TAIL 704 DITHER_SCALE_TAIL
708 break; 705 break;
709 case MPEG3_RGBA8888: 706 case MPEG3_RGBA8888:
710 DITHER_SCALE_HEAD 707 DITHER_SCALE_HEAD
711 STORE_PIXEL_RGBA8888 708 STORE_PIXEL_RGBA8888
712 DITHER_SCALE_TAIL 709 DITHER_SCALE_TAIL
713 break; 710 break;
714 case MPEG3_601_BGR888: 711 case MPEG3_601_BGR888:
715 DITHER_SCALE_601_HEAD 712 DITHER_SCALE_601_HEAD
716 STORE_PIXEL_BGR888 713 STORE_PIXEL_BGR888
717 DITHER_SCALE_TAIL 714 DITHER_SCALE_TAIL
718 break; 715 break;
719 case MPEG3_601_BGRA8888: 716 case MPEG3_601_BGRA8888:
720 DITHER_SCALE_601_HEAD 717 DITHER_SCALE_601_HEAD
721 STORE_PIXEL_BGRA8888 718 STORE_PIXEL_BGRA8888
722 DITHER_SCALE_TAIL 719 DITHER_SCALE_TAIL
723 break; 720 break;
724 case MPEG3_601_RGB565: 721 case MPEG3_601_RGB565:
725 DITHER_SCALE_601_HEAD 722 DITHER_SCALE_601_HEAD
726 STORE_PIXEL_RGB565 723 STORE_PIXEL_RGB565
727 DITHER_SCALE_TAIL 724 DITHER_SCALE_TAIL
728 break; 725 break;
729 case MPEG3_601_RGB888: 726 case MPEG3_601_RGB888:
730 DITHER_SCALE_601_HEAD 727 DITHER_SCALE_601_HEAD
731 STORE_PIXEL_RGB888 728 STORE_PIXEL_RGB888
732 DITHER_SCALE_TAIL 729 DITHER_SCALE_TAIL
733 break; 730 break;
734 case MPEG3_601_RGBA8888: 731 case MPEG3_601_RGBA8888:
735 DITHER_SCALE_601_HEAD 732 DITHER_SCALE_601_HEAD
736 STORE_PIXEL_RGBA8888 733 STORE_PIXEL_RGBA8888
737 DITHER_SCALE_TAIL 734 DITHER_SCALE_TAIL
738 break; 735 break;
739 case MPEG3_RGBA16161616: 736 case MPEG3_RGBA16161616:
740 { 737 {
741 register unsigned short *data_s = (unsigned short*)data; 738 register unsigned short *data_s = (unsigned short*)data;
742 DITHER_SCALE_HEAD 739 DITHER_SCALE_HEAD
743 STORE_PIXEL_RGBA16161616 740 STORE_PIXEL_RGBA16161616
744 DITHER_SCALE_TAIL 741 DITHER_SCALE_TAIL
745 } 742 }
746 break; 743 break;
747 } 744 }
748 } 745 }
749 else 746 else
750 { 747 {
751/* Transfer row unscaled */ 748/* Transfer row unscaled */
752 switch(video->color_model) 749 switch(video->color_model)
753 { 750 {
754 case MPEG3_BGR888: 751 case MPEG3_BGR888:
755 DITHER_HEAD 752 DITHER_HEAD
756 STORE_PIXEL_BGR888 753 STORE_PIXEL_BGR888
757 DITHER_TAIL 754 DITHER_TAIL
758 break; 755 break;
759 case MPEG3_BGRA8888: 756 case MPEG3_BGRA8888:
760 DITHER_HEAD 757 DITHER_HEAD
761 STORE_PIXEL_BGRA8888 758 STORE_PIXEL_BGRA8888
762 DITHER_TAIL 759 DITHER_TAIL
763 break; 760 break;
764 case MPEG3_RGB565: 761 case MPEG3_RGB565:
765 DITHER_HEAD 762 DITHER_HEAD
766 STORE_PIXEL_RGB565 763 STORE_PIXEL_RGB565
767 DITHER_TAIL 764 DITHER_TAIL
768 break; 765 break;
769 case MPEG3_RGB888: 766 case MPEG3_RGB888:
770 DITHER_HEAD 767 DITHER_HEAD
771 STORE_PIXEL_RGB888 768 STORE_PIXEL_RGB888
772 DITHER_TAIL 769 DITHER_TAIL
773 break; 770 break;
774 case MPEG3_RGBA8888: 771 case MPEG3_RGBA8888:
775 DITHER_HEAD 772 DITHER_HEAD
776 STORE_PIXEL_RGBA8888 773 STORE_PIXEL_RGBA8888
777 DITHER_TAIL 774 DITHER_TAIL
778 break; 775 break;
779 case MPEG3_601_BGR888: 776 case MPEG3_601_BGR888:
780 DITHER_601_HEAD 777 DITHER_601_HEAD
781 STORE_PIXEL_BGR888 778 STORE_PIXEL_BGR888
782 DITHER_TAIL 779 DITHER_TAIL
783 break; 780 break;
784 case MPEG3_601_BGRA8888: 781 case MPEG3_601_BGRA8888:
785 DITHER_601_HEAD 782 DITHER_601_HEAD
786 STORE_PIXEL_RGB565 783 STORE_PIXEL_RGB565
787 DITHER_TAIL 784 DITHER_TAIL
788 break; 785 break;
789 case MPEG3_601_RGB565: 786 case MPEG3_601_RGB565:
790 DITHER_601_HEAD 787 DITHER_601_HEAD
791 STORE_PIXEL_RGB565 788 STORE_PIXEL_RGB565
792 DITHER_TAIL 789 DITHER_TAIL
793 break; 790 break;
794 case MPEG3_601_RGB888: 791 case MPEG3_601_RGB888:
795 DITHER_601_HEAD 792 DITHER_601_HEAD
796 STORE_PIXEL_RGB888 793 STORE_PIXEL_RGB888
797 DITHER_TAIL 794 DITHER_TAIL
798 break; 795 break;
799 case MPEG3_601_RGBA8888: 796 case MPEG3_601_RGBA8888:
800 DITHER_601_HEAD 797 DITHER_601_HEAD
801 STORE_PIXEL_RGBA8888 798 STORE_PIXEL_RGBA8888
802 DITHER_TAIL 799 DITHER_TAIL
803 break; 800 break;
804 case MPEG3_RGBA16161616: 801 case MPEG3_RGBA16161616:
805 { 802 {
806 register unsigned short *data_s = (unsigned short*)data; 803 register unsigned short *data_s = (unsigned short*)data;
807 DITHER_HEAD 804 DITHER_HEAD
808 STORE_PIXEL_RGBA16161616 805 STORE_PIXEL_RGBA16161616
809 DITHER_TAIL 806 DITHER_TAIL
810 } 807 }
811 break; 808 break;
812 } 809 }
813 } 810 }
814 DITHER_ROW_TAIL 811 DITHER_ROW_TAIL
815 } /* End of non-MMX */ 812 } /* End of non-MMX */
816 813
817#ifdef HAVE_MMX 814#ifdef HAVE_MMX
818 if(video->have_mmx) 815 if(video->have_mmx)
819 __asm__ __volatile__ ("emms"); 816 __asm__ __volatile__ ("emms");
820#endif 817#endif
821 return 0; 818 return 0;
822} 819}
823 820
824int mpeg3video_ditherframe444(mpeg3video_t *video, unsigned char *src[]) 821int mpeg3video_ditherframe444(mpeg3video_t *video, unsigned char *src[])
825{ 822{
826 return 0; 823 return 0;
827} 824}
828 825
829int mpeg3video_dithertop(mpeg3video_t *video, unsigned char *src[]) 826int mpeg3video_dithertop(mpeg3video_t *video, unsigned char *src[])
830{ 827{
831 return mpeg3video_ditherframe(video, src, video->output_rows); 828 return mpeg3video_ditherframe(video, src, video->output_rows);
832} 829}
833 830
834int mpeg3video_dithertop444(mpeg3video_t *video, unsigned char *src[]) 831int mpeg3video_dithertop444(mpeg3video_t *video, unsigned char *src[])
835{ 832{
836 return 0; 833 return 0;
837} 834}
838 835
839int mpeg3video_ditherbot(mpeg3video_t *video, unsigned char *src[]) 836int mpeg3video_ditherbot(mpeg3video_t *video, unsigned char *src[])
840{ 837{
841 return 0; 838 return 0;
842} 839}
843 840
844int mpeg3video_ditherbot444(mpeg3video_t *video, unsigned char *src[]) 841int mpeg3video_ditherbot444(mpeg3video_t *video, unsigned char *src[])
845{ 842{
846 return 0; 843 return 0;
847} 844}
848 845
849void memcpy_fast(unsigned char *output, unsigned char *input, long len) 846void memcpy_fast(unsigned char *output, unsigned char *input, long len)
850{ 847{
851 int i, len2; 848 int i, len2;
852/* 8 byte alignment */ 849/* 8 byte alignment */
853/* 850/*
854 * if(!((long)input & 0x7)) 851 * if(!((long)input & 0x7))
855 * { 852 * {
856 * len2 = len >> 4; 853 * len2 = len >> 4;
857 * for(i = 0; i < len2; ) 854 * for(i = 0; i < len2; )
858 * { 855 * {
859 * ((MPEG3_INT64*)output)[i] = ((MPEG3_INT64*)input)[i]; 856 * ((MPEG3_INT64*)output)[i] = ((MPEG3_INT64*)input)[i];
860 * i++; 857 * i++;
861 * ((MPEG3_INT64*)output)[i] = ((MPEG3_INT64*)input)[i]; 858 * ((MPEG3_INT64*)output)[i] = ((MPEG3_INT64*)input)[i];
862 * i++; 859 * i++;
863 * } 860 * }
864 * 861 *
865 * for(i *= 16; i < len; i++) 862 * for(i *= 16; i < len; i++)
866 * { 863 * {
867 * output[i] = input[i]; 864 * output[i] = input[i];
868 * } 865 * }
869 * } 866 * }
870 * else 867 * else
871 */ 868 */
872 memcpy(output, input, len); 869 memcpy(output, input, len);
873} 870}
874 871
875int mpeg3video_init_output() 872int mpeg3video_init_output()
876{ 873{
877 int i, value; 874 int i, value;
878 for(i = 0; i < 256; i++) 875 for(i = 0; i < 256; i++)
879 { 876 {
880 value = (int)(1.1644 * i - 255 * 0.0627 + 0.5); 877 value = (int)(1.1644 * i - 255 * 0.0627 + 0.5);
881 if(value < 0) value = 0; 878 if(value < 0) value = 0;
882 else 879 else
883 if(value > 255) value = 255; 880 if(value > 255) value = 255;
884 mpeg3_601_to_rgb[i] = value; 881 mpeg3_601_to_rgb[i] = value;
885 } 882 }
886 return 0; 883 return 0;
887} 884}
888 885
889int mpeg3video_present_frame(mpeg3video_t *video) 886int mpeg3video_present_frame(mpeg3video_t *video)
890{ 887{
891 int i, j, k, l; 888 int i, j, k, l;
892 unsigned char **src = video->output_src; 889 unsigned char **src = video->output_src;
893 890
894/* Copy YUV buffers */ 891/* Copy YUV buffers */
895 if(video->want_yvu) 892 if(video->want_yvu)
896 { 893 {
897 long size[2]; 894 long size[2];
898 long offset[2]; 895 long offset[2];
899 896
900/* Drop a frame */ 897/* Drop a frame */
901 if(!video->y_output) return 0; 898 if(!video->y_output) return 0;
902 899
903/* Copy a frame */ 900/* Copy a frame */
904 if(video->in_x == 0 && 901 if(video->in_x == 0 &&
905 video->in_w >= video->coded_picture_width) 902 video->in_w >= video->coded_picture_width)
906 { 903 {
907 size[0] = video->coded_picture_width * video->in_h; 904 size[0] = video->coded_picture_width * video->in_h;
908 size[1] = video->chrom_width * (int)((float)video->in_h / 2 + 0.5); 905 size[1] = video->chrom_width * (int)((float)video->in_h / 2 + 0.5);
909 offset[0] = video->coded_picture_width * video->in_y; 906 offset[0] = video->coded_picture_width * video->in_y;
910 offset[1] = video->chrom_width * (int)((float)video->in_y / 2 + 0.5); 907 offset[1] = video->chrom_width * (int)((float)video->in_y / 2 + 0.5);
911 908
912/* 909/*
913 * if(video->in_y > 0) 910 * if(video->in_y > 0)
914 * { 911 * {
915 * offset[1] += video->chrom_width / 2; 912 * offset[1] += video->chrom_width / 2;
916 * size[1] += video->chrom_width / 2; 913 * size[1] += video->chrom_width / 2;
917 * } 914 * }
918 */ 915 */
919 916
920 memcpy(video->y_output, src[0] + offset[0], size[0]); 917 memcpy(video->y_output, src[0] + offset[0], size[0]);
921 memcpy(video->u_output, src[1] + offset[1], size[1]); 918 memcpy(video->u_output, src[1] + offset[1], size[1]);
922 memcpy(video->v_output, src[2] + offset[1], size[1]); 919 memcpy(video->v_output, src[2] + offset[1], size[1]);
923 } 920 }
924 else 921 else
925 { 922 {
926 for(i = 0, j = video->in_y; i < video->in_h; i++, j++) 923 for(i = 0, j = video->in_y; i < video->in_h; i++, j++)
927 { 924 {
928 memcpy(video->y_output + i * video->in_w, 925 memcpy(video->y_output + i * video->in_w,
929 src[0] + j * video->coded_picture_width + video->in_x, 926 src[0] + j * video->coded_picture_width + video->in_x,
930 video->in_w); 927 video->in_w);
931 memcpy(video->u_output + i * video->in_w / 4, 928 memcpy(video->u_output + i * video->in_w / 4,
932 src[1] + j * video->chrom_width / 2 + video->in_x / 4, 929 src[1] + j * video->chrom_width / 2 + video->in_x / 4,
933 video->in_w / 4); 930 video->in_w / 4);
934 memcpy(video->v_output + i * video->in_w / 4, 931 memcpy(video->v_output + i * video->in_w / 4,
935 src[2] + j * video->chrom_width / 2 + video->in_x / 4, 932 src[2] + j * video->chrom_width / 2 + video->in_x / 4,
936 video->in_w / 4); 933 video->in_w / 4);
937 } 934 }
938 } 935 }
939 936
940 return 0; 937 return 0;
941 } 938 }
942 939
943/* Want RGB buffer */ 940/* Want RGB buffer */
944/* Copy the frame to the output with YUV to RGB conversion */ 941/* Copy the frame to the output with YUV to RGB conversion */
945 if(video->prog_seq) 942 if(video->prog_seq)
946 { 943 {
947 if(video->chroma_format != CHROMA444) 944 if(video->chroma_format != CHROMA444)
948 { 945 {
949 mpeg3video_ditherframe(video, src, video->output_rows); 946 mpeg3video_ditherframe(video, src, video->output_rows);
950 } 947 }
951 else 948 else
952 mpeg3video_ditherframe444(video, src); 949 mpeg3video_ditherframe444(video, src);
953 } 950 }
954 else 951 else
955 { 952 {
956 if((video->pict_struct == FRAME_PICTURE && video->topfirst) || 953 if((video->pict_struct == FRAME_PICTURE && video->topfirst) ||
957 video->pict_struct == BOTTOM_FIELD) 954 video->pict_struct == BOTTOM_FIELD)
958 { 955 {
959/* top field first */ 956/* top field first */
960 if(video->chroma_format != CHROMA444) 957 if(video->chroma_format != CHROMA444)
961 { 958 {
962 mpeg3video_dithertop(video, src); 959 mpeg3video_dithertop(video, src);
963 mpeg3video_ditherbot(video, src); 960 mpeg3video_ditherbot(video, src);
964 } 961 }
965 else 962 else
966 { 963 {
967 mpeg3video_dithertop444(video, src); 964 mpeg3video_dithertop444(video, src);
968 mpeg3video_ditherbot444(video, src); 965 mpeg3video_ditherbot444(video, src);
969 } 966 }
970 } 967 }
971 else 968 else
972 { 969 {
973/* bottom field first */ 970/* bottom field first */
974 if(video->chroma_format != CHROMA444) 971 if(video->chroma_format != CHROMA444)
975 { 972 {
976 mpeg3video_ditherbot(video, src); 973 mpeg3video_ditherbot(video, src);
977 mpeg3video_dithertop(video, src); 974 mpeg3video_dithertop(video, src);
978 } 975 }
979 else 976 else
980 { 977 {
981 mpeg3video_ditherbot444(video, src); 978 mpeg3video_ditherbot444(video, src);
982 mpeg3video_dithertop444(video, src); 979 mpeg3video_dithertop444(video, src);
983 } 980 }
984 } 981 }
985 } 982 }
986 return 0; 983 return 0;
987} 984}
988 985
989int mpeg3video_display_second_field(mpeg3video_t *video) 986int mpeg3video_display_second_field(mpeg3video_t *video)
990{ 987{
991/* Not used */ 988/* Not used */
992 return 0; 989 return 0;
993} 990}