xine-lib  1.2.10
tomsmocompmacros.h
Go to the documentation of this file.
1 #include <string.h>
2 #include <math.h>
3 #include <stdlib.h>
4 
5 #include "mangle.h"
6 
7 #define USE_FOR_DSCALER
8 
9 #define MyMemCopy xine_fast_memcpy
10 
11 // Define a few macros for CPU dependent instructions.
12 // I suspect I don't really understand how the C macro preprocessor works but
13 // this seems to get the job done. // TRB 7/01
14 
15 // BEFORE USING THESE YOU MUST SET:
16 
17 // #define SSE_TYPE SSE (or MMX or 3DNOW)
18 
19 // some macros for pavgb instruction
20 // V_PAVGB(mmr1, mmr2, mmr work register, smask) mmr2 may = mmrw if you can trash it
21 
22 #define V_PAVGB_MMX(mmr1, mmr2, mmrw, smask) \
23  "movq "mmr2", "mmrw"\n\t" \
24  "pand "smask", "mmrw"\n\t" \
25  "psrlw $1, "mmrw"\n\t" \
26  "pand "smask", "mmr1"\n\t" \
27  "psrlw $1, "mmr1"\n\t" \
28  "paddusb "mmrw", "mmr1"\n\t"
29 #define V_PAVGB_SSE(mmr1, mmr2, mmrw, smask) "pavgb "mmr2", "mmr1"\n\t"
30 #define V_PAVGB_3DNOW(mmr1, mmr2, mmrw, smask) "pavgusb "mmr2", "mmr1"\n\t"
31 #define V_PAVGB(mmr1, mmr2, mmrw, smask) V_PAVGB2(mmr1, mmr2, mmrw, smask, SSE_TYPE)
32 #define V_PAVGB2(mmr1, mmr2, mmrw, smask, ssetyp) V_PAVGB3(mmr1, mmr2, mmrw, smask, ssetyp)
33 #define V_PAVGB3(mmr1, mmr2, mmrw, smask, ssetyp) V_PAVGB_##ssetyp(mmr1, mmr2, mmrw, smask)
34 
35 // some macros for pmaxub instruction
36 #define V_PMAXUB_MMX(mmr1, mmr2) \
37  "psubusb "mmr2", "mmr1"\n\t" \
38  "paddusb "mmr2", "mmr1"\n\t"
39 #define V_PMAXUB_SSE(mmr1, mmr2) "pmaxub "mmr2", "mmr1"\n\t"
40 #define V_PMAXUB_3DNOW(mmr1, mmr2) V_PMAXUB_MMX(mmr1, mmr2) // use MMX version
41 #define V_PMAXUB(mmr1, mmr2) V_PMAXUB2(mmr1, mmr2, SSE_TYPE)
42 #define V_PMAXUB2(mmr1, mmr2, ssetyp) V_PMAXUB3(mmr1, mmr2, ssetyp)
43 #define V_PMAXUB3(mmr1, mmr2, ssetyp) V_PMAXUB_##ssetyp(mmr1, mmr2)
44 
45 // some macros for pminub instruction
46 // V_PMINUB(mmr1, mmr2, mmr work register) mmr2 may NOT = mmrw
47 #define V_PMINUB_MMX(mmr1, mmr2, mmrw) \
48  "pcmpeqb "mmrw", "mmrw"\n\t" \
49  "psubusb "mmr2", "mmrw"\n\t" \
50  "paddusb "mmrw", "mmr1"\n\t" \
51  "psubusb "mmrw", "mmr1"\n\t"
52 #define V_PMINUB_SSE(mmr1, mmr2, mmrw) "pminub "mmr2", "mmr1"\n\t"
53 #define V_PMINUB_3DNOW(mmr1, mmr2, mmrw) V_PMINUB_MMX(mmr1, mmr2, mmrw) // use MMX version
54 #define V_PMINUB(mmr1, mmr2, mmrw) V_PMINUB2(mmr1, mmr2, mmrw, SSE_TYPE)
55 #define V_PMINUB2(mmr1, mmr2, mmrw, ssetyp) V_PMINUB3(mmr1, mmr2, mmrw, ssetyp)
56 #define V_PMINUB3(mmr1, mmr2, mmrw, ssetyp) V_PMINUB_##ssetyp(mmr1, mmr2, mmrw)
57 
58 // some macros for movntq instruction
59 // V_MOVNTQ(mmr1, mmr2)
60 #define V_MOVNTQ_MMX(mmr1, mmr2) "movq "mmr2", "mmr1"\n\t"
61 #define V_MOVNTQ_3DNOW(mmr1, mmr2) "movq "mmr2", "mmr1"\n\t"
62 #define V_MOVNTQ_SSE(mmr1, mmr2) "movntq "mmr2", "mmr1"\n\t"
63 #define V_MOVNTQ(mmr1, mmr2) V_MOVNTQ2(mmr1, mmr2, SSE_TYPE)
64 #define V_MOVNTQ2(mmr1, mmr2, ssetyp) V_MOVNTQ3(mmr1, mmr2, ssetyp)
65 #define V_MOVNTQ3(mmr1, mmr2, ssetyp) V_MOVNTQ_##ssetyp(mmr1, mmr2)
66 
67 // end of macros
68 
69 #ifdef IS_SSE2
70 
71 #define MERGE4PIXavg(PADDR1, PADDR2) \
72  "movdqu "PADDR1", %%xmm0\n\t" /* our 4 pixels */ \
73  "movdqu "PADDR2", %%xmm1\n\t" /* our pixel2 value */ \
74  "movdqa %%xmm0, %%xmm2\n\t" /* another copy of our pixel1 value */ \
75  "movdqa %%xmm1, %%xmm3\n\t" /* another copy of our pixel1 value */ \
76  "psubusb %%xmm1, %%xmm2\n\t" \
77  "psubusb %%xmm0, %%xmm3\n\t" \
78  "por %%xmm3, %%xmm2\n\t" \
79  "pavgb %%xmm1, %%xmm0\n\t" /* avg of 2 pixels */ \
80  "movdqa %%xmm2, %%xmm3\n\t" /* another copy of our our weights */ \
81  "pxor %%xmm1, %%xmm1\n\t" \
82  "psubusb %%xmm7, %%xmm3\n\t" /* nonzero where old weights lower, else 0 */ \
83  "pcmpeqb %%xmm1, %%xmm3\n\t" /* now ff where new better, else 00 */ \
84  "pcmpeqb %%xmm3, %%xmm1\n\t" /* here ff where old better, else 00 */ \
85  "pand %%xmm3, %%xmm0\n\t" /* keep only better new pixels */ \
86  "pand %%xmm3, %%xmm2\n\t" /* and weights */ \
87  "pand %%xmm1, %%xmm5\n\t" /* keep only better old pixels */ \
88  "pand %%xmm1, %%xmm7\n\t" \
89  "por %%xmm0, %%xmm5\n\t" /* and merge new & old vals */ \
90  "por %%xmm2, %%xmm7\n\t"
91 
92 #define MERGE4PIXavgH(PADDR1A, PADDR1B, PADDR2A, PADDR2B) \
93  "movdqu "PADDR1A", %%xmm0\n\t" /* our 4 pixels */ \
94  "movdqu "PADDR2A", %%xmm1\n\t" /* our pixel2 value */ \
95  "movdqu "PADDR1B", %%xmm2\n\t" /* our 4 pixels */ \
96  "movdqu "PADDR2B", %%xmm3\n\t" /* our pixel2 value */ \
97  "pavgb %%xmm2, %%xmm0\n\t" \
98  "pavgb %%xmm3, %%xmm1\n\t" \
99  "movdqa %%xmm0, %%xmm2\n\t" /* another copy of our pixel1 value */ \
100  "movdqa %%xmm1, %%xmm3\n\t" /* another copy of our pixel1 value */ \
101  "psubusb %%xmm1, %%xmm2\n\t" \
102  "psubusb %%xmm0, %%xmm3\n\t" \
103  "por %%xmm3, %%xmm2\n\t" \
104  "pavgb %%xmm1, %%xmm0\n\t" /* avg of 2 pixels */ \
105  "movdqa %%xmm2, %%xmm3\n\t" /* another copy of our our weights */ \
106  "pxor %%xmm1, %%xmm1\n\t" \
107  "psubusb %%xmm7, %%xmm3\n\t" /* nonzero where old weights lower, else 0 */ \
108  "pcmpeqb %%xmm1, %%xmm3\n\t" /* now ff where new better, else 00 */ \
109  "pcmpeqb %%xmm3, %%xmm1\n\t" /* here ff where old better, else 00 */ \
110  "pand %%xmm3, %%xmm0\n\t" /* keep only better new pixels */ \
111  "pand %%xmm3, %%xmm2\n\t" /* and weights */ \
112  "pand %%xmm1, %%xmm5\n\t" /* keep only better old pixels */ \
113  "pand %%xmm1, %%xmm7\n\t" \
114  "por %%xmm0, %%xmm5\n\t" /* and merge new & old vals */ \
115  "por %%xmm2, %%xmm7\n\t"
116 
117 #define RESET_CHROMA "por "_UVMask", %%xmm7\n\t"
118 
119 #else // ifdef IS_SSE2
120 
121 #define MERGE4PIXavg(PADDR1, PADDR2) \
122  "movq "PADDR1", %%mm0\n\t" /* our 4 pixels */ \
123  "movq "PADDR2", %%mm1\n\t" /* our pixel2 value */ \
124  "movq %%mm0, %%mm2\n\t" /* another copy of our pixel1 value */ \
125  "movq %%mm1, %%mm3\n\t" /* another copy of our pixel1 value */ \
126  "psubusb %%mm1, %%mm2\n\t" \
127  "psubusb %%mm0, %%mm3\n\t" \
128  "por %%mm3, %%mm2\n\t" \
129  V_PAVGB ("%%mm0", "%%mm1", "%%mm3", _ShiftMask) /* avg of 2 pixels */ \
130  "movq %%mm2, %%mm3\n\t" /* another copy of our our weights */ \
131  "pxor %%mm1, %%mm1\n\t" \
132  "psubusb %%mm7, %%mm3\n\t" /* nonzero where old weights lower, else 0 */ \
133  "pcmpeqb %%mm1, %%mm3\n\t" /* now ff where new better, else 00 */ \
134  "pcmpeqb %%mm3, %%mm1\n\t" /* here ff where old better, else 00 */ \
135  "pand %%mm3, %%mm0\n\t" /* keep only better new pixels */ \
136  "pand %%mm3, %%mm2\n\t" /* and weights */ \
137  "pand %%mm1, %%mm5\n\t" /* keep only better old pixels */ \
138  "pand %%mm1, %%mm7\n\t" \
139  "por %%mm0, %%mm5\n\t" /* and merge new & old vals */ \
140  "por %%mm2, %%mm7\n\t"
141 
142 #define MERGE4PIXavgH(PADDR1A, PADDR1B, PADDR2A, PADDR2B) \
143  "movq "PADDR1A", %%mm0\n\t" /* our 4 pixels */ \
144  "movq "PADDR2A", %%mm1\n\t" /* our pixel2 value */ \
145  "movq "PADDR1B", %%mm2\n\t" /* our 4 pixels */ \
146  "movq "PADDR2B", %%mm3\n\t" /* our pixel2 value */ \
147  V_PAVGB("%%mm0", "%%mm2", "%%mm2", _ShiftMask) \
148  V_PAVGB("%%mm1", "%%mm3", "%%mm3", _ShiftMask) \
149  "movq %%mm0, %%mm2\n\t" /* another copy of our pixel1 value */ \
150  "movq %%mm1, %%mm3\n\t" /* another copy of our pixel1 value */ \
151  "psubusb %%mm1, %%mm2\n\t" \
152  "psubusb %%mm0, %%mm3\n\t" \
153  "por %%mm3, %%mm2\n\t" \
154  V_PAVGB("%%mm0", "%%mm1", "%%mm3", _ShiftMask) /* avg of 2 pixels */ \
155  "movq %%mm2, %%mm3\n\t" /* another copy of our our weights */ \
156  "pxor %%mm1, %%mm1\n\t" \
157  "psubusb %%mm7, %%mm3\n\t" /* nonzero where old weights lower, else 0 */ \
158  "pcmpeqb %%mm1, %%mm3\n\t" /* now ff where new better, else 00 */ \
159  "pcmpeqb %%mm3, %%mm1\n\t" /* here ff where old better, else 00 */ \
160  "pand %%mm3, %%mm0\n\t" /* keep only better new pixels */ \
161  "pand %%mm3, %%mm2\n\t" /* and weights */ \
162  "pand %%mm1, %%mm5\n\t" /* keep only better old pixels */ \
163  "pand %%mm1, %%mm7\n\t" \
164  "por %%mm0, %%mm5\n\t" /* and merge new & old vals */ \
165  "por %%mm2, %%mm7\n\t"
166 
167 #define RESET_CHROMA "por "_UVMask", %%mm7\n\t"
168 
169 #endif
170 
171 
MERGE4PIXavg
#define MERGE4PIXavg(PADDR1, PADDR2)
Definition: tomsmocompmacros.h:121
scalerbobmethod
static const deinterlace_method_t scalerbobmethod
Definition: scalerbob.c:39
SearchLoop0A.inc
SearchLoopTop.inc
MERGE4PIXavgH
#define MERGE4PIXavgH(PADDR1A, PADDR1B, PADDR2A, PADDR2B)
Definition: tomsmocompmacros.h:142
SearchLoopEdgeA8.inc
movq
#define movq(vars, vard)
Definition: mmx.h:414
SearchLoopOddA6.inc
paddusb
#define paddusb(vars, vard)
Definition: mmx.h:470
SearchLoopOddAH2.inc
__attribute__
char **__environ __attribute__((weak, alias("fake__environ")))
V_MOVNTQ
#define V_MOVNTQ(mmr1, mmr2)
Definition: greedyhmacros.h:70
plugins.h
width
unsigned int width
Definition: gfontrle.c:4
SearchLoopEdgeA.inc
deinterlace_method_s
Definition: deinterlace.h:123
V_PMAXUB
#define V_PMAXUB(mmr1, mmr2)
Definition: greedyhmacros.h:48
SearchLoopOddA.inc
deinterlace_frame_data_s::f0
uint8_t * f0
Definition: deinterlace.h:105
SearchLoopVA.inc
TomsMoCompAll2.inc
height
unsigned int height
Definition: gfontrle.c:5
V_PMINUB
#define V_PMINUB(mmr1, mmr2, mmrw)
Definition: greedyhmacros.h:61
deinterlace_frame_data_s
Definition: deinterlace.h:103
deinterlace_frame_data_s::f2
uint8_t * f2
Definition: deinterlace.h:107
speedy.h
SearchLoopOddA2.inc
deinterlace_frame_data_s::f1
uint8_t * f1
Definition: deinterlace.h:106
scalerbob_get_method
const deinterlace_method_t * scalerbob_get_method(void)
Definition: scalerbob.c:63
config.h
SearchLoopBottom.inc
V_PAVGB
#define V_PAVGB(mmr1, mmr2, mmrw, smask)
Definition: greedyhmacros.h:38
RESET_CHROMA
#define RESET_CHROMA
Definition: tomsmocompmacros.h:167
psubusb
#define psubusb(vars, vard)
Definition: mmx.h:507
mangle.h
MANGLE definition from FFmpeg project, until the code is ported not to require this (considered an ha...
deinterlace.h
SearchLoopVAH.inc