[go: nahoru, domu]

blob: b6fbfcf928a92ab771a380625ea17a15ed6954b5 [file] [log] [blame]
Vignesh Venkatasubramanian5a9753f2016-01-19 11:05:09 -08001/*
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10#ifndef VPX_DSP_X86_CONVOLVE_H_
11#define VPX_DSP_X86_CONVOLVE_H_
12
13#include <assert.h>
14
15#include "./vpx_config.h"
16#include "vpx/vpx_integer.h"
17#include "vpx_ports/mem.h"
18
19typedef void filter8_1dfunction (
20 const uint8_t *src_ptr,
21 ptrdiff_t src_pitch,
22 uint8_t *output_ptr,
23 ptrdiff_t out_pitch,
24 uint32_t output_height,
25 const int16_t *filter
26);
27
28#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
29 void vpx_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \
30 uint8_t *dst, ptrdiff_t dst_stride, \
31 const int16_t *filter_x, int x_step_q4, \
32 const int16_t *filter_y, int y_step_q4, \
33 int w, int h) { \
34 assert(filter[3] != 128); \
35 assert(step_q4 == 16); \
36 if (filter[0] || filter[1] || filter[2]) { \
37 while (w >= 16) { \
38 vpx_filter_block1d16_##dir##8_##avg##opt(src_start, \
39 src_stride, \
40 dst, \
41 dst_stride, \
42 h, \
43 filter); \
44 src += 16; \
45 dst += 16; \
46 w -= 16; \
47 } \
48 while (w >= 8) { \
49 vpx_filter_block1d8_##dir##8_##avg##opt(src_start, \
50 src_stride, \
51 dst, \
52 dst_stride, \
53 h, \
54 filter); \
55 src += 8; \
56 dst += 8; \
57 w -= 8; \
58 } \
59 while (w >= 4) { \
60 vpx_filter_block1d4_##dir##8_##avg##opt(src_start, \
61 src_stride, \
62 dst, \
63 dst_stride, \
64 h, \
65 filter); \
66 src += 4; \
67 dst += 4; \
68 w -= 4; \
69 } \
70 } else { \
71 while (w >= 16) { \
72 vpx_filter_block1d16_##dir##2_##avg##opt(src, \
73 src_stride, \
74 dst, \
75 dst_stride, \
76 h, \
77 filter); \
78 src += 16; \
79 dst += 16; \
80 w -= 16; \
81 } \
82 while (w >= 8) { \
83 vpx_filter_block1d8_##dir##2_##avg##opt(src, \
84 src_stride, \
85 dst, \
86 dst_stride, \
87 h, \
88 filter); \
89 src += 8; \
90 dst += 8; \
91 w -= 8; \
92 } \
93 while (w >= 4) { \
94 vpx_filter_block1d4_##dir##2_##avg##opt(src, \
95 src_stride, \
96 dst, \
97 dst_stride, \
98 h, \
99 filter); \
100 src += 4; \
101 dst += 4; \
102 w -= 4; \
103 } \
104 } \
105}
106
107#define FUN_CONV_2D(avg, opt) \
108void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
109 uint8_t *dst, ptrdiff_t dst_stride, \
110 const int16_t *filter_x, int x_step_q4, \
111 const int16_t *filter_y, int y_step_q4, \
112 int w, int h) { \
113 assert(filter_x[3] != 128); \
114 assert(filter_y[3] != 128); \
115 assert(w <= 64); \
116 assert(h <= 64); \
117 assert(x_step_q4 == 16); \
118 assert(y_step_q4 == 16); \
119 if (filter_x[0] || filter_x[1] || filter_x[2]|| \
120 filter_y[0] || filter_y[1] || filter_y[2]) { \
121 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \
122 vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
123 filter_x, x_step_q4, filter_y, y_step_q4, \
124 w, h + 7); \
125 vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
126 filter_x, x_step_q4, filter_y, \
127 y_step_q4, w, h); \
128 } else { \
129 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \
130 vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \
131 filter_x, x_step_q4, filter_y, y_step_q4, \
132 w, h + 1); \
133 vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \
134 filter_x, x_step_q4, filter_y, \
135 y_step_q4, w, h); \
136 } \
137}
138
139#if CONFIG_VP9_HIGHBITDEPTH
140
141typedef void highbd_filter8_1dfunction (
142 const uint16_t *src_ptr,
143 const ptrdiff_t src_pitch,
144 uint16_t *output_ptr,
145 ptrdiff_t out_pitch,
146 unsigned int output_height,
147 const int16_t *filter,
148 int bd
149);
150
151#define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
152 void vpx_highbd_convolve8_##name##_##opt(const uint8_t *src8, \
153 ptrdiff_t src_stride, \
154 uint8_t *dst8, \
155 ptrdiff_t dst_stride, \
156 const int16_t *filter_x, \
157 int x_step_q4, \
158 const int16_t *filter_y, \
159 int y_step_q4, \
160 int w, int h, int bd) { \
161 if (step_q4 == 16 && filter[3] != 128) { \
162 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
163 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
164 if (filter[0] || filter[1] || filter[2]) { \
165 while (w >= 16) { \
166 vpx_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \
167 src_stride, \
168 dst, \
169 dst_stride, \
170 h, \
171 filter, \
172 bd); \
173 src += 16; \
174 dst += 16; \
175 w -= 16; \
176 } \
177 while (w >= 8) { \
178 vpx_highbd_filter_block1d8_##dir##8_##avg##opt(src_start, \
179 src_stride, \
180 dst, \
181 dst_stride, \
182 h, \
183 filter, \
184 bd); \
185 src += 8; \
186 dst += 8; \
187 w -= 8; \
188 } \
189 while (w >= 4) { \
190 vpx_highbd_filter_block1d4_##dir##8_##avg##opt(src_start, \
191 src_stride, \
192 dst, \
193 dst_stride, \
194 h, \
195 filter, \
196 bd); \
197 src += 4; \
198 dst += 4; \
199 w -= 4; \
200 } \
201 } else { \
202 while (w >= 16) { \
203 vpx_highbd_filter_block1d16_##dir##2_##avg##opt(src, \
204 src_stride, \
205 dst, \
206 dst_stride, \
207 h, \
208 filter, \
209 bd); \
210 src += 16; \
211 dst += 16; \
212 w -= 16; \
213 } \
214 while (w >= 8) { \
215 vpx_highbd_filter_block1d8_##dir##2_##avg##opt(src, \
216 src_stride, \
217 dst, \
218 dst_stride, \
219 h, \
220 filter, \
221 bd); \
222 src += 8; \
223 dst += 8; \
224 w -= 8; \
225 } \
226 while (w >= 4) { \
227 vpx_highbd_filter_block1d4_##dir##2_##avg##opt(src, \
228 src_stride, \
229 dst, \
230 dst_stride, \
231 h, \
232 filter, \
233 bd); \
234 src += 4; \
235 dst += 4; \
236 w -= 4; \
237 } \
238 } \
239 } \
240 if (w) { \
241 vpx_highbd_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \
242 filter_x, x_step_q4, filter_y, y_step_q4, \
243 w, h, bd); \
244 } \
245}
246
247#define HIGH_FUN_CONV_2D(avg, opt) \
248void vpx_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
249 uint8_t *dst, ptrdiff_t dst_stride, \
250 const int16_t *filter_x, int x_step_q4, \
251 const int16_t *filter_y, int y_step_q4, \
252 int w, int h, int bd) { \
253 assert(w <= 64); \
254 assert(h <= 64); \
255 if (x_step_q4 == 16 && y_step_q4 == 16) { \
256 if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \
257 filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \
258 DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
259 vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
260 CONVERT_TO_BYTEPTR(fdata2), 64, \
261 filter_x, x_step_q4, \
262 filter_y, y_step_q4, \
263 w, h + 7, bd); \
264 vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \
265 64, dst, dst_stride, \
266 filter_x, x_step_q4, \
267 filter_y, y_step_q4, \
268 w, h, bd); \
269 } else { \
270 DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \
271 vpx_highbd_convolve8_horiz_##opt(src, src_stride, \
272 CONVERT_TO_BYTEPTR(fdata2), 64, \
273 filter_x, x_step_q4, \
274 filter_y, y_step_q4, \
275 w, h + 1, bd); \
276 vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \
277 dst, dst_stride, \
278 filter_x, x_step_q4, \
279 filter_y, y_step_q4, \
280 w, h, bd); \
281 } \
282 } else { \
283 vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
284 filter_x, x_step_q4, filter_y, y_step_q4, w, \
285 h, bd); \
286 } \
287}
288#endif // CONFIG_VP9_HIGHBITDEPTH
289
290#endif // VPX_DSP_X86_CONVOLVE_H_