Blame - libvpx/vpx_dsp/x86/convolve.h - platform/external/libvpx

blob: b6fbfcf928a92ab771a380625ea17a15ed6954b5 [file] [log] [blame]

Vignesh Venkatasubramanian	5a9753f	2016-01-19 11:05:09 -0800	[diff] [blame^]	1	/*
				2	* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10	#ifndef VPX_DSP_X86_CONVOLVE_H_
				11	#define VPX_DSP_X86_CONVOLVE_H_
				12
				13	#include <assert.h>
				14
				15	#include "./vpx_config.h"
				16	#include "vpx/vpx_integer.h"
				17	#include "vpx_ports/mem.h"
				18
				19	typedef void filter8_1dfunction (
				20	const uint8_t *src_ptr,
				21	ptrdiff_t src_pitch,
				22	uint8_t *output_ptr,
				23	ptrdiff_t out_pitch,
				24	uint32_t output_height,
				25	const int16_t *filter
				26	);
				27
				28	#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
				29	void vpx_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \
				30	uint8_t *dst, ptrdiff_t dst_stride, \
				31	const int16_t *filter_x, int x_step_q4, \
				32	const int16_t *filter_y, int y_step_q4, \
				33	int w, int h) { \
				34	assert(filter[3] != 128); \
				35	assert(step_q4 == 16); \
				36	if (filter[0] \|\| filter[1] \|\| filter[2]) { \
				37	while (w >= 16) { \
				38	vpx_filter_block1d16_##dir##8_##avg##opt(src_start, \
				39	src_stride, \
				40	dst, \
				41	dst_stride, \
				42	h, \
				43	filter); \
				44	src += 16; \
				45	dst += 16; \
				46	w -= 16; \
				47	} \
				48	while (w >= 8) { \
				49	vpx_filter_block1d8_##dir##8_##avg##opt(src_start, \
				50	src_stride, \
				51	dst, \
				52	dst_stride, \
				53	h, \
				54	filter); \
				55	src += 8; \
				56	dst += 8; \
				57	w -= 8; \
				58	} \
				59	while (w >= 4) { \
				60	vpx_filter_block1d4_##dir##8_##avg##opt(src_start, \
				61	src_stride, \
				62	dst, \
				63	dst_stride, \
				64	h, \
				65	filter); \
				66	src += 4; \
				67	dst += 4; \
				68	w -= 4; \
				69	} \
				70	} else { \
				71	while (w >= 16) { \
				72	vpx_filter_block1d16_##dir##2_##avg##opt(src, \
				73	src_stride, \
				74	dst, \
				75	dst_stride, \
				76	h, \
				77	filter); \
				78	src += 16; \
				79	dst += 16; \
				80	w -= 16; \
				81	} \
				82	while (w >= 8) { \
				83	vpx_filter_block1d8_##dir##2_##avg##opt(src, \
				84	src_stride, \
				85	dst, \
				86	dst_stride, \
				87	h, \
				88	filter); \
				89	src += 8; \
				90	dst += 8; \
				91	w -= 8; \
				92	} \
				93	while (w >= 4) { \
				94	vpx_filter_block1d4_##dir##2_##avg##opt(src, \
				95	src_stride, \
				96	dst, \
				97	dst_stride, \
				98	h, \
				99	filter); \
				100	src += 4; \
				101	dst += 4; \
				102	w -= 4; \
				103	} \
				104	} \
				105	}
				106
				107	#define FUN_CONV_2D(avg, opt) \
				108	void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
				109	uint8_t *dst, ptrdiff_t dst_stride, \
				110	const int16_t *filter_x, int x_step_q4, \
				111	const int16_t *filter_y, int y_step_q4, \
				112	int w, int h) { \
				113	assert(filter_x[3] != 128); \
				114	assert(filter_y[3] != 128); \
				115	assert(w <= 64); \
				116	assert(h <= 64); \
				117	assert(x_step_q4 == 16); \
				118	assert(y_step_q4 == 16); \
				119	if (filter_x[0] \|\| filter_x[1] \|\| filter_x[2]\|\| \
				120	filter_y[0] \|\| filter_y[1] \|\| filter_y[2]) { \
				121	DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \
				122	vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
				123	filter_x, x_step_q4, filter_y, y_step_q4, \
				124	w, h + 7); \
				125	vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
				126	filter_x, x_step_q4, filter_y, \
				127	y_step_q4, w, h); \
				128	} else { \
				129	DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \
				130	vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \
				131	filter_x, x_step_q4, filter_y, y_step_q4, \
				132	w, h + 1); \
				133	vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \
				134	filter_x, x_step_q4, filter_y, \
				135	y_step_q4, w, h); \
				136	} \
				137	}
				138
				139	#if CONFIG_VP9_HIGHBITDEPTH
				140
				141	typedef void highbd_filter8_1dfunction (
				142	const uint16_t *src_ptr,
				143	const ptrdiff_t src_pitch,
				144	uint16_t *output_ptr,
				145	ptrdiff_t out_pitch,
				146	unsigned int output_height,
				147	const int16_t *filter,
				148	int bd
				149	);
				150
				151	#define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
				152	void vpx_highbd_convolve8_##name##_##opt(const uint8_t *src8, \
				153	ptrdiff_t src_stride, \
				154	uint8_t *dst8, \
				155	ptrdiff_t dst_stride, \
				156	const int16_t *filter_x, \
				157	int x_step_q4, \
				158	const int16_t *filter_y, \
				159	int y_step_q4, \
				160	int w, int h, int bd) { \
				161	if (step_q4 == 16 && filter[3] != 128) { \
				162	uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
				163	uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
				164	if (filter[0] \|\| filter[1] \|\| filter[2]) { \
				165	while (w >= 16) { \
				166	vpx_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \
				167	src_stride, \
				168	dst, \
				169	dst_stride, \
				170	h, \
				171	filter, \
				172	bd); \
				173	src += 16; \
				174	dst += 16; \
				175	w -= 16; \
				176	} \
				177	while (w >= 8) { \
				178	vpx_highbd_filter_block1d8_##dir##8_##avg##opt(src_start, \
				179	src_stride, \
				180	dst, \
				181	dst_stride, \
				182	h, \
				183	filter, \
				184	bd); \
				185	src += 8; \
				186	dst += 8; \
				187	w -= 8; \
				188	} \
				189	while (w >= 4) { \
				190	vpx_highbd_filter_block1d4_##dir##8_##avg##opt(src_start, \
				191	src_stride, \
				192	dst, \
				193	dst_stride, \
				194	h, \
				195	filter, \
				196	bd); \
				197	src += 4; \
				198	dst += 4; \
				199	w -= 4; \
				200	} \
				201	} else { \
				202	while (w >= 16) { \
				203	vpx_highbd_filter_block1d16_##dir##2_##avg##opt(src, \
				204	src_stride, \
				205	dst, \
				206	dst_stride, \
				207	h, \
				208	filter, \
				209	bd); \
				210	src += 16; \
				211	dst += 16; \
				212	w -= 16; \
				213	} \
				214	while (w >= 8) { \
				215	vpx_highbd_filter_block1d8_##dir##2_##avg##opt(src, \
				216	src_stride, \
				217	dst, \
				218	dst_stride, \
				219	h, \
				220	filter, \
				221	bd); \
				222	src += 8; \
				223	dst += 8; \
				224	w -= 8; \
				225	} \
				226	while (w >= 4) { \
				227	vpx_highbd_filter_block1d4_##dir##2_##avg##opt(src, \
				228	src_stride, \
				229	dst, \
				230	dst_stride, \
				231	h, \
				232	filter, \
				233	bd); \
				234	src += 4; \
				235	dst += 4; \
				236	w -= 4; \
				237	} \
				238	} \
				239	} \
				240	if (w) { \
				241	vpx_highbd_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \
				242	filter_x, x_step_q4, filter_y, y_step_q4, \
				243	w, h, bd); \
				244	} \
				245	}
				246
				247	#define HIGH_FUN_CONV_2D(avg, opt) \
				248	void vpx_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
				249	uint8_t *dst, ptrdiff_t dst_stride, \
				250	const int16_t *filter_x, int x_step_q4, \
				251	const int16_t *filter_y, int y_step_q4, \
				252	int w, int h, int bd) { \
				253	assert(w <= 64); \
				254	assert(h <= 64); \
				255	if (x_step_q4 == 16 && y_step_q4 == 16) { \
				256	if (filter_x[0] \|\| filter_x[1] \|\| filter_x[2] \|\| filter_x[3] == 128 \|\| \
				257	filter_y[0] \|\| filter_y[1] \|\| filter_y[2] \|\| filter_y[3] == 128) { \
				258	DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
				259	vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
				260	CONVERT_TO_BYTEPTR(fdata2), 64, \
				261	filter_x, x_step_q4, \
				262	filter_y, y_step_q4, \
				263	w, h + 7, bd); \
				264	vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \
				265	64, dst, dst_stride, \
				266	filter_x, x_step_q4, \
				267	filter_y, y_step_q4, \
				268	w, h, bd); \
				269	} else { \
				270	DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \
				271	vpx_highbd_convolve8_horiz_##opt(src, src_stride, \
				272	CONVERT_TO_BYTEPTR(fdata2), 64, \
				273	filter_x, x_step_q4, \
				274	filter_y, y_step_q4, \
				275	w, h + 1, bd); \
				276	vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \
				277	dst, dst_stride, \
				278	filter_x, x_step_q4, \
				279	filter_y, y_step_q4, \
				280	w, h, bd); \
				281	} \
				282	} else { \
				283	vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
				284	filter_x, x_step_q4, filter_y, y_step_q4, w, \
				285	h, bd); \
				286	} \
				287	}
				288	#endif // CONFIG_VP9_HIGHBITDEPTH
				289
				290	#endif // VPX_DSP_X86_CONVOLVE_H_