Blame - libvpx/vpx_dsp/vpx_convolve.c - platform/external/libvpx

blob: 2d1c927cbea7a66c2e3b9758649aa1362b302203 [file] [log] [blame]

Vignesh Venkatasubramanian	5a9753f	2016-01-19 11:05:09 -0800	[diff] [blame^]	1	/*
				2	* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
				11	#include <assert.h>
				12	#include <string.h>
				13
				14	#include "./vpx_config.h"
				15	#include "./vpx_dsp_rtcd.h"
				16	#include "vpx/vpx_integer.h"
				17	#include "vpx_dsp/vpx_convolve.h"
				18	#include "vpx_dsp/vpx_dsp_common.h"
				19	#include "vpx_dsp/vpx_filter.h"
				20	#include "vpx_ports/mem.h"
				21
				22	static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
				23	uint8_t *dst, ptrdiff_t dst_stride,
				24	const InterpKernel *x_filters,
				25	int x0_q4, int x_step_q4, int w, int h) {
				26	int x, y;
				27	src -= SUBPEL_TAPS / 2 - 1;
				28	for (y = 0; y < h; ++y) {
				29	int x_q4 = x0_q4;
				30	for (x = 0; x < w; ++x) {
				31	const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
				32	const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
				33	int k, sum = 0;
				34	for (k = 0; k < SUBPEL_TAPS; ++k)
				35	sum += src_x[k] * x_filter[k];
				36	dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
				37	x_q4 += x_step_q4;
				38	}
				39	src += src_stride;
				40	dst += dst_stride;
				41	}
				42	}
				43
				44	static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,
				45	uint8_t *dst, ptrdiff_t dst_stride,
				46	const InterpKernel *x_filters,
				47	int x0_q4, int x_step_q4, int w, int h) {
				48	int x, y;
				49	src -= SUBPEL_TAPS / 2 - 1;
				50	for (y = 0; y < h; ++y) {
				51	int x_q4 = x0_q4;
				52	for (x = 0; x < w; ++x) {
				53	const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
				54	const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
				55	int k, sum = 0;
				56	for (k = 0; k < SUBPEL_TAPS; ++k)
				57	sum += src_x[k] * x_filter[k];
				58	dst[x] = ROUND_POWER_OF_TWO(dst[x] +
				59	clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
				60	x_q4 += x_step_q4;
				61	}
				62	src += src_stride;
				63	dst += dst_stride;
				64	}
				65	}
				66
				67	static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
				68	uint8_t *dst, ptrdiff_t dst_stride,
				69	const InterpKernel *y_filters,
				70	int y0_q4, int y_step_q4, int w, int h) {
				71	int x, y;
				72	src -= src_stride * (SUBPEL_TAPS / 2 - 1);
				73
				74	for (x = 0; x < w; ++x) {
				75	int y_q4 = y0_q4;
				76	for (y = 0; y < h; ++y) {
				77	const unsigned char src_y = &src[(y_q4 >> SUBPEL_BITS) src_stride];
				78	const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
				79	int k, sum = 0;
				80	for (k = 0; k < SUBPEL_TAPS; ++k)
				81	sum += src_y[k * src_stride] * y_filter[k];
				82	dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
				83	y_q4 += y_step_q4;
				84	}
				85	++src;
				86	++dst;
				87	}
				88	}
				89
				90	static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
				91	uint8_t *dst, ptrdiff_t dst_stride,
				92	const InterpKernel *y_filters,
				93	int y0_q4, int y_step_q4, int w, int h) {
				94	int x, y;
				95	src -= src_stride * (SUBPEL_TAPS / 2 - 1);
				96
				97	for (x = 0; x < w; ++x) {
				98	int y_q4 = y0_q4;
				99	for (y = 0; y < h; ++y) {
				100	const unsigned char src_y = &src[(y_q4 >> SUBPEL_BITS) src_stride];
				101	const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
				102	int k, sum = 0;
				103	for (k = 0; k < SUBPEL_TAPS; ++k)
				104	sum += src_y[k * src_stride] * y_filter[k];
				105	dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
				106	clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
				107	y_q4 += y_step_q4;
				108	}
				109	++src;
				110	++dst;
				111	}
				112	}
				113
				114	static void convolve(const uint8_t *src, ptrdiff_t src_stride,
				115	uint8_t *dst, ptrdiff_t dst_stride,
				116	const InterpKernel *const x_filters,
				117	int x0_q4, int x_step_q4,
				118	const InterpKernel *const y_filters,
				119	int y0_q4, int y_step_q4,
				120	int w, int h) {
				121	// Note: Fixed size intermediate buffer, temp, places limits on parameters.
				122	// 2d filtering proceeds in 2 steps:
				123	// (1) Interpolate horizontally into an intermediate buffer, temp.
				124	// (2) Interpolate temp vertically to derive the sub-pixel result.
				125	// Deriving the maximum number of rows in the temp buffer (135):
				126	// --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
				127	// --Largest block size is 64x64 pixels.
				128	// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
				129	// original frame (in 1/16th pixel units).
				130	// --Must round-up because block may be located at sub-pixel position.
				131	// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
				132	// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
				133	uint8_t temp[135 * 64];
				134	int intermediate_height =
				135	(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
				136
				137	assert(w <= 64);
				138	assert(h <= 64);
				139	assert(y_step_q4 <= 32);
				140	assert(x_step_q4 <= 32);
				141
				142	convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,
				143	x_filters, x0_q4, x_step_q4, w, intermediate_height);
				144	convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,
				145	y_filters, y0_q4, y_step_q4, w, h);
				146	}
				147
				148	static const InterpKernel get_filter_base(const int16_t filter) {
				149	// NOTE: This assumes that the filter table is 256-byte aligned.
				150	// TODO(agrange) Modify to make independent of table alignment.
				151	return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
				152	}
				153
				154	static int get_filter_offset(const int16_t f, const InterpKernel base) {
				155	return (int)((const InterpKernel *)(intptr_t)f - base);
				156	}
				157
				158	void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
				159	uint8_t *dst, ptrdiff_t dst_stride,
				160	const int16_t *filter_x, int x_step_q4,
				161	const int16_t *filter_y, int y_step_q4,
				162	int w, int h) {
				163	const InterpKernel *const filters_x = get_filter_base(filter_x);
				164	const int x0_q4 = get_filter_offset(filter_x, filters_x);
				165
				166	(void)filter_y;
				167	(void)y_step_q4;
				168
				169	convolve_horiz(src, src_stride, dst, dst_stride, filters_x,
				170	x0_q4, x_step_q4, w, h);
				171	}
				172
				173	void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
				174	uint8_t *dst, ptrdiff_t dst_stride,
				175	const int16_t *filter_x, int x_step_q4,
				176	const int16_t *filter_y, int y_step_q4,
				177	int w, int h) {
				178	const InterpKernel *const filters_x = get_filter_base(filter_x);
				179	const int x0_q4 = get_filter_offset(filter_x, filters_x);
				180
				181	(void)filter_y;
				182	(void)y_step_q4;
				183
				184	convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x,
				185	x0_q4, x_step_q4, w, h);
				186	}
				187
				188	void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
				189	uint8_t *dst, ptrdiff_t dst_stride,
				190	const int16_t *filter_x, int x_step_q4,
				191	const int16_t *filter_y, int y_step_q4,
				192	int w, int h) {
				193	const InterpKernel *const filters_y = get_filter_base(filter_y);
				194	const int y0_q4 = get_filter_offset(filter_y, filters_y);
				195
				196	(void)filter_x;
				197	(void)x_step_q4;
				198
				199	convolve_vert(src, src_stride, dst, dst_stride, filters_y,
				200	y0_q4, y_step_q4, w, h);
				201	}
				202
				203	void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
				204	uint8_t *dst, ptrdiff_t dst_stride,
				205	const int16_t *filter_x, int x_step_q4,
				206	const int16_t *filter_y, int y_step_q4,
				207	int w, int h) {
				208	const InterpKernel *const filters_y = get_filter_base(filter_y);
				209	const int y0_q4 = get_filter_offset(filter_y, filters_y);
				210
				211	(void)filter_x;
				212	(void)x_step_q4;
				213
				214	convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y,
				215	y0_q4, y_step_q4, w, h);
				216	}
				217
				218	void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
				219	uint8_t *dst, ptrdiff_t dst_stride,
				220	const int16_t *filter_x, int x_step_q4,
				221	const int16_t *filter_y, int y_step_q4,
				222	int w, int h) {
				223	const InterpKernel *const filters_x = get_filter_base(filter_x);
				224	const int x0_q4 = get_filter_offset(filter_x, filters_x);
				225
				226	const InterpKernel *const filters_y = get_filter_base(filter_y);
				227	const int y0_q4 = get_filter_offset(filter_y, filters_y);
				228
				229	convolve(src, src_stride, dst, dst_stride,
				230	filters_x, x0_q4, x_step_q4,
				231	filters_y, y0_q4, y_step_q4, w, h);
				232	}
				233
				234	void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
				235	uint8_t *dst, ptrdiff_t dst_stride,
				236	const int16_t *filter_x, int x_step_q4,
				237	const int16_t *filter_y, int y_step_q4,
				238	int w, int h) {
				239	/* Fixed size intermediate buffer places limits on parameters. */
				240	DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]);
				241	assert(w <= 64);
				242	assert(h <= 64);
				243
				244	vpx_convolve8_c(src, src_stride, temp, 64,
				245	filter_x, x_step_q4, filter_y, y_step_q4, w, h);
				246	vpx_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
				247	}
				248
				249	void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
				250	uint8_t *dst, ptrdiff_t dst_stride,
				251	const int16_t *filter_x, int filter_x_stride,
				252	const int16_t *filter_y, int filter_y_stride,
				253	int w, int h) {
				254	int r;
				255
				256	(void)filter_x; (void)filter_x_stride;
				257	(void)filter_y; (void)filter_y_stride;
				258
				259	for (r = h; r > 0; --r) {
				260	memcpy(dst, src, w);
				261	src += src_stride;
				262	dst += dst_stride;
				263	}
				264	}
				265
				266	void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
				267	uint8_t *dst, ptrdiff_t dst_stride,
				268	const int16_t *filter_x, int filter_x_stride,
				269	const int16_t *filter_y, int filter_y_stride,
				270	int w, int h) {
				271	int x, y;
				272
				273	(void)filter_x; (void)filter_x_stride;
				274	(void)filter_y; (void)filter_y_stride;
				275
				276	for (y = 0; y < h; ++y) {
				277	for (x = 0; x < w; ++x)
				278	dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
				279
				280	src += src_stride;
				281	dst += dst_stride;
				282	}
				283	}
				284
				285	void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
				286	uint8_t *dst, ptrdiff_t dst_stride,
				287	const int16_t *filter_x, int x_step_q4,
				288	const int16_t *filter_y, int y_step_q4,
				289	int w, int h) {
				290	vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
				291	filter_y, y_step_q4, w, h);
				292	}
				293
				294	void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride,
				295	uint8_t *dst, ptrdiff_t dst_stride,
				296	const int16_t *filter_x, int x_step_q4,
				297	const int16_t *filter_y, int y_step_q4,
				298	int w, int h) {
				299	vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
				300	filter_y, y_step_q4, w, h);
				301	}
				302
				303	void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride,
				304	uint8_t *dst, ptrdiff_t dst_stride,
				305	const int16_t *filter_x, int x_step_q4,
				306	const int16_t *filter_y, int y_step_q4,
				307	int w, int h) {
				308	vpx_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
				309	filter_y, y_step_q4, w, h);
				310	}
				311
				312	void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
				313	uint8_t *dst, ptrdiff_t dst_stride,
				314	const int16_t *filter_x, int x_step_q4,
				315	const int16_t *filter_y, int y_step_q4,
				316	int w, int h) {
				317	vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
				318	x_step_q4, filter_y, y_step_q4, w, h);
				319	}
				320
				321	void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
				322	uint8_t *dst, ptrdiff_t dst_stride,
				323	const int16_t *filter_x, int x_step_q4,
				324	const int16_t *filter_y, int y_step_q4,
				325	int w, int h) {
				326	vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
				327	x_step_q4, filter_y, y_step_q4, w, h);
				328	}
				329
				330	void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride,
				331	uint8_t *dst, ptrdiff_t dst_stride,
				332	const int16_t *filter_x, int x_step_q4,
				333	const int16_t *filter_y, int y_step_q4,
				334	int w, int h) {
				335	vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
				336	filter_y, y_step_q4, w, h);
				337	}
				338
				339	#if CONFIG_VP9_HIGHBITDEPTH
				340	static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,
				341	uint8_t *dst8, ptrdiff_t dst_stride,
				342	const InterpKernel *x_filters,
				343	int x0_q4, int x_step_q4,
				344	int w, int h, int bd) {
				345	int x, y;
				346	uint16_t *src = CONVERT_TO_SHORTPTR(src8);
				347	uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
				348	src -= SUBPEL_TAPS / 2 - 1;
				349	for (y = 0; y < h; ++y) {
				350	int x_q4 = x0_q4;
				351	for (x = 0; x < w; ++x) {
				352	const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
				353	const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
				354	int k, sum = 0;
				355	for (k = 0; k < SUBPEL_TAPS; ++k)
				356	sum += src_x[k] * x_filter[k];
				357	dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
				358	x_q4 += x_step_q4;
				359	}
				360	src += src_stride;
				361	dst += dst_stride;
				362	}
				363	}
				364
				365	static void highbd_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,
				366	uint8_t *dst8, ptrdiff_t dst_stride,
				367	const InterpKernel *x_filters,
				368	int x0_q4, int x_step_q4,
				369	int w, int h, int bd) {
				370	int x, y;
				371	uint16_t *src = CONVERT_TO_SHORTPTR(src8);
				372	uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
				373	src -= SUBPEL_TAPS / 2 - 1;
				374	for (y = 0; y < h; ++y) {
				375	int x_q4 = x0_q4;
				376	for (x = 0; x < w; ++x) {
				377	const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
				378	const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
				379	int k, sum = 0;
				380	for (k = 0; k < SUBPEL_TAPS; ++k)
				381	sum += src_x[k] * x_filter[k];
				382	dst[x] = ROUND_POWER_OF_TWO(dst[x] +
				383	clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);
				384	x_q4 += x_step_q4;
				385	}
				386	src += src_stride;
				387	dst += dst_stride;
				388	}
				389	}
				390
				391	static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,
				392	uint8_t *dst8, ptrdiff_t dst_stride,
				393	const InterpKernel *y_filters,
				394	int y0_q4, int y_step_q4, int w, int h,
				395	int bd) {
				396	int x, y;
				397	uint16_t *src = CONVERT_TO_SHORTPTR(src8);
				398	uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
				399	src -= src_stride * (SUBPEL_TAPS / 2 - 1);
				400	for (x = 0; x < w; ++x) {
				401	int y_q4 = y0_q4;
				402	for (y = 0; y < h; ++y) {
				403	const uint16_t src_y = &src[(y_q4 >> SUBPEL_BITS) src_stride];
				404	const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
				405	int k, sum = 0;
				406	for (k = 0; k < SUBPEL_TAPS; ++k)
				407	sum += src_y[k * src_stride] * y_filter[k];
				408	dst[y * dst_stride] = clip_pixel_highbd(
				409	ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
				410	y_q4 += y_step_q4;
				411	}
				412	++src;
				413	++dst;
				414	}
				415	}
				416
				417	static void highbd_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,
				418	uint8_t *dst8, ptrdiff_t dst_stride,
				419	const InterpKernel *y_filters,
				420	int y0_q4, int y_step_q4, int w, int h,
				421	int bd) {
				422	int x, y;
				423	uint16_t *src = CONVERT_TO_SHORTPTR(src8);
				424	uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
				425	src -= src_stride * (SUBPEL_TAPS / 2 - 1);
				426	for (x = 0; x < w; ++x) {
				427	int y_q4 = y0_q4;
				428	for (y = 0; y < h; ++y) {
				429	const uint16_t src_y = &src[(y_q4 >> SUBPEL_BITS) src_stride];
				430	const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
				431	int k, sum = 0;
				432	for (k = 0; k < SUBPEL_TAPS; ++k)
				433	sum += src_y[k * src_stride] * y_filter[k];
				434	dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
				435	clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);
				436	y_q4 += y_step_q4;
				437	}
				438	++src;
				439	++dst;
				440	}
				441	}
				442
				443	static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride,
				444	uint8_t *dst, ptrdiff_t dst_stride,
				445	const InterpKernel *const x_filters,
				446	int x0_q4, int x_step_q4,
				447	const InterpKernel *const y_filters,
				448	int y0_q4, int y_step_q4,
				449	int w, int h, int bd) {
				450	// Note: Fixed size intermediate buffer, temp, places limits on parameters.
				451	// 2d filtering proceeds in 2 steps:
				452	// (1) Interpolate horizontally into an intermediate buffer, temp.
				453	// (2) Interpolate temp vertically to derive the sub-pixel result.
				454	// Deriving the maximum number of rows in the temp buffer (135):
				455	// --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
				456	// --Largest block size is 64x64 pixels.
				457	// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
				458	// original frame (in 1/16th pixel units).
				459	// --Must round-up because block may be located at sub-pixel position.
				460	// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
				461	// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
				462	uint16_t temp[64 * 135];
				463	int intermediate_height =
				464	(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
				465
				466	assert(w <= 64);
				467	assert(h <= 64);
				468	assert(y_step_q4 <= 32);
				469	assert(x_step_q4 <= 32);
				470
				471	highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1),
				472	src_stride, CONVERT_TO_BYTEPTR(temp), 64,
				473	x_filters, x0_q4, x_step_q4, w,
				474	intermediate_height, bd);
				475	highbd_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1),
				476	64, dst, dst_stride, y_filters, y0_q4, y_step_q4,
				477	w, h, bd);
				478	}
				479
				480
				481	void vpx_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
				482	uint8_t *dst, ptrdiff_t dst_stride,
				483	const int16_t *filter_x, int x_step_q4,
				484	const int16_t *filter_y, int y_step_q4,
				485	int w, int h, int bd) {
				486	const InterpKernel *const filters_x = get_filter_base(filter_x);
				487	const int x0_q4 = get_filter_offset(filter_x, filters_x);
				488	(void)filter_y;
				489	(void)y_step_q4;
				490
				491	highbd_convolve_horiz(src, src_stride, dst, dst_stride, filters_x,
				492	x0_q4, x_step_q4, w, h, bd);
				493	}
				494
				495	void vpx_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
				496	uint8_t *dst, ptrdiff_t dst_stride,
				497	const int16_t *filter_x, int x_step_q4,
				498	const int16_t *filter_y, int y_step_q4,
				499	int w, int h, int bd) {
				500	const InterpKernel *const filters_x = get_filter_base(filter_x);
				501	const int x0_q4 = get_filter_offset(filter_x, filters_x);
				502	(void)filter_y;
				503	(void)y_step_q4;
				504
				505	highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x,
				506	x0_q4, x_step_q4, w, h, bd);
				507	}
				508
				509	void vpx_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
				510	uint8_t *dst, ptrdiff_t dst_stride,
				511	const int16_t *filter_x, int x_step_q4,
				512	const int16_t *filter_y, int y_step_q4,
				513	int w, int h, int bd) {
				514	const InterpKernel *const filters_y = get_filter_base(filter_y);
				515	const int y0_q4 = get_filter_offset(filter_y, filters_y);
				516	(void)filter_x;
				517	(void)x_step_q4;
				518
				519	highbd_convolve_vert(src, src_stride, dst, dst_stride, filters_y,
				520	y0_q4, y_step_q4, w, h, bd);
				521	}
				522
				523	void vpx_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
				524	uint8_t *dst, ptrdiff_t dst_stride,
				525	const int16_t *filter_x, int x_step_q4,
				526	const int16_t *filter_y, int y_step_q4,
				527	int w, int h, int bd) {
				528	const InterpKernel *const filters_y = get_filter_base(filter_y);
				529	const int y0_q4 = get_filter_offset(filter_y, filters_y);
				530	(void)filter_x;
				531	(void)x_step_q4;
				532
				533	highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y,
				534	y0_q4, y_step_q4, w, h, bd);
				535	}
				536
				537	void vpx_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
				538	uint8_t *dst, ptrdiff_t dst_stride,
				539	const int16_t *filter_x, int x_step_q4,
				540	const int16_t *filter_y, int y_step_q4,
				541	int w, int h, int bd) {
				542	const InterpKernel *const filters_x = get_filter_base(filter_x);
				543	const int x0_q4 = get_filter_offset(filter_x, filters_x);
				544
				545	const InterpKernel *const filters_y = get_filter_base(filter_y);
				546	const int y0_q4 = get_filter_offset(filter_y, filters_y);
				547
				548	highbd_convolve(src, src_stride, dst, dst_stride,
				549	filters_x, x0_q4, x_step_q4,
				550	filters_y, y0_q4, y_step_q4, w, h, bd);
				551	}
				552
				553	void vpx_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
				554	uint8_t *dst, ptrdiff_t dst_stride,
				555	const int16_t *filter_x, int x_step_q4,
				556	const int16_t *filter_y, int y_step_q4,
				557	int w, int h, int bd) {
				558	// Fixed size intermediate buffer places limits on parameters.
				559	DECLARE_ALIGNED(16, uint16_t, temp[64 * 64]);
				560	assert(w <= 64);
				561	assert(h <= 64);
				562
				563	vpx_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64,
				564	filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd);
				565	vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride,
				566	NULL, 0, NULL, 0, w, h, bd);
				567	}
				568
				569	void vpx_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
				570	uint8_t *dst8, ptrdiff_t dst_stride,
				571	const int16_t *filter_x, int filter_x_stride,
				572	const int16_t *filter_y, int filter_y_stride,
				573	int w, int h, int bd) {
				574	int r;
				575	uint16_t *src = CONVERT_TO_SHORTPTR(src8);
				576	uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
				577	(void)filter_x;
				578	(void)filter_y;
				579	(void)filter_x_stride;
				580	(void)filter_y_stride;
				581	(void)bd;
				582
				583	for (r = h; r > 0; --r) {
				584	memcpy(dst, src, w * sizeof(uint16_t));
				585	src += src_stride;
				586	dst += dst_stride;
				587	}
				588	}
				589
				590	void vpx_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride,
				591	uint8_t *dst8, ptrdiff_t dst_stride,
				592	const int16_t *filter_x, int filter_x_stride,
				593	const int16_t *filter_y, int filter_y_stride,
				594	int w, int h, int bd) {
				595	int x, y;
				596	uint16_t *src = CONVERT_TO_SHORTPTR(src8);
				597	uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
				598	(void)filter_x;
				599	(void)filter_y;
				600	(void)filter_x_stride;
				601	(void)filter_y_stride;
				602	(void)bd;
				603
				604	for (y = 0; y < h; ++y) {
				605	for (x = 0; x < w; ++x) {
				606	dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
				607	}
				608	src += src_stride;
				609	dst += dst_stride;
				610	}
				611	}
				612	#endif