1 month ago · c70fcdef8e
--- a/mozilla-release/patches/1525393-1-75a1.patch
+++ b/mozilla-release/patches/1525393-1-75a1.patch
@@ -0,0 +1,384 @@
 
				+# HG changeset patch
			
 
				+# User Dan Minor <dminor@mozilla.com>
			
 
				+# Date 1582826660 0
			
 
				+# Node ID 6d2821c6e36e659b2d007f1782d1e3346b7c3af6
			
 
				+# Parent  3f2c958afd9e5e713b8f3a186956196a91878b99
			
 
				+Bug 1525393 - Changes to update scripts for libvpx 1.8.2; r=bryce
			
 
				+
			
 
				+This makes the following changes:
			
 
				+* Change update.py to use Python 3.
			
 
				+* Have update.py remove some unused portions of the upstream library.
			
 
				+* Update local patches to apply against libvpx 1.8.2.
			
 
				+* Remove local patches that are no longer necessary.
			
 
				+* Update vs build configurations in generate_sources_mozbuild.sh.
			
 
				+* Remove the #define for stdint from VPXDecoder.h.
			
 
				+* Disable AVX512 support
			
 
				+* Make sure float_control_word.asm is included in win64 builds
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D63919
			
 
				+
			
 
				+diff --git a/dom/media/platforms/agnostic/VPXDecoder.h b/dom/media/platforms/agnostic/VPXDecoder.h
			
 
				+--- a/dom/media/platforms/agnostic/VPXDecoder.h
			
 
				++++ b/dom/media/platforms/agnostic/VPXDecoder.h
			
 
				+@@ -5,16 +5,17 @@
			
 
				+  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
			
 
				+ #if !defined(VPXDecoder_h_)
			
 
				+ #define VPXDecoder_h_
			
 
				+ 
			
 
				+ #include "PlatformDecoderModule.h"
			
 
				+ #include "mozilla/Span.h"
			
 
				+ 
			
 
				+ #include <stdint.h>
			
 
				++// Remove when Bug 1525393 part 2 goes in.
			
 
				+ #define VPX_DONT_DEFINE_STDINT_TYPES
			
 
				+ #include "vpx/vp8dx.h"
			
 
				+ #include "vpx/vpx_codec.h"
			
 
				+ #include "vpx/vpx_decoder.h"
			
 
				+ 
			
 
				+ namespace mozilla {
			
 
				+ 
			
 
				+ DDLoggedTypeDeclNameAndBase(VPXDecoder, MediaDataDecoder);
			
 
				+diff --git a/dom/media/platforms/agnostic/VPXDecoder.h.1525393-1.later b/dom/media/platforms/agnostic/VPXDecoder.h.1525393-1.later
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/dom/media/platforms/agnostic/VPXDecoder.h.1525393-1.later
			
 
				+@@ -0,0 +1,22 @@
			
 
				++--- VPXDecoder.h
			
 
				+++++ VPXDecoder.h
			
 
				++// Fix when Bug 1525393 part 2 goes in.
			
 
				++
			
 
				++@@ -5,17 +5,16 @@
			
 
				++  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
			
 
				++ #if !defined(VPXDecoder_h_)
			
 
				++ #  define VPXDecoder_h_
			
 
				++ 
			
 
				++ #  include "PlatformDecoderModule.h"
			
 
				++ #  include "mozilla/Span.h"
			
 
				++ 
			
 
				++ #  include <stdint.h>
			
 
				++-#  define VPX_DONT_DEFINE_STDINT_TYPES
			
 
				++ #  include "mozilla/gfx/Types.h"
			
 
				++ #  include "vpx/vp8dx.h"
			
 
				++ #  include "vpx/vpx_codec.h"
			
 
				++ #  include "vpx/vpx_decoder.h"
			
 
				++ 
			
 
				++ namespace mozilla {
			
 
				++ 
			
 
				++ DDLoggedTypeDeclNameAndBase(VPXDecoder, MediaDataDecoder);
			
 
				+diff --git a/media/libvpx/aarch64-windows.patch b/media/libvpx/aarch64-windows.patch
			
 
				+deleted file mode 100644
			
 
				+--- a/media/libvpx/aarch64-windows.patch
			
 
				++++ /dev/null
			
 
				+@@ -1,12 +0,0 @@
			
 
				+-diff --git a/media/libvpx/libvpx/configure b/media/libvpx/libvpx/configure
			
 
				+-index e5a74c6..12bab6c 100755
			
 
				+---- a/media/libvpx/libvpx/configure
			
 
				+-+++ b/media/libvpx/libvpx/configure
			
 
				+-@@ -159,6 +159,7 @@ all_platforms="${all_platforms} x86_64-win64-vs11"
			
 
				+- all_platforms="${all_platforms} x86_64-win64-vs12"
			
 
				+- all_platforms="${all_platforms} x86_64-win64-vs14"
			
 
				+- all_platforms="${all_platforms} x86_64-win64-vs15"
			
 
				+-+all_platforms="${all_platforms} aarch64-win64-vs12"
			
 
				+- all_platforms="${all_platforms} generic-gnu"
			
 
				+- 
			
 
				+- # all_targets is a list of all targets that can be configured
			
 
				+diff --git a/media/libvpx/bug1480092.patch b/media/libvpx/bug1480092.patch
			
 
				+deleted file mode 100644
			
 
				+--- a/media/libvpx/bug1480092.patch
			
 
				++++ /dev/null
			
 
				+@@ -1,22 +0,0 @@
			
 
				+-diff --git a/media/libvpx/libvpx/vp8/common/postproc.c b/media/libvpx/libvpx/vp8/common/postproc.c
			
 
				+---- a/media/libvpx/libvpx/vp8/common/postproc.c
			
 
				+-+++ b/media/libvpx/libvpx/vp8/common/postproc.c
			
 
				+-@@ -60,17 +60,17 @@ static void vp8_de_mblock(YV12_BUFFER_CO
			
 
				+- }
			
 
				+- 
			
 
				+- void vp8_deblock(VP8_COMMON *cm, YV12_BUFFER_CONFIG *source,
			
 
				+-                  YV12_BUFFER_CONFIG *post, int q, int low_var_thresh,
			
 
				+-                  int flag) {
			
 
				+-   double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065;
			
 
				+-   int ppl = (int)(level + .5);
			
 
				+- 
			
 
				+--  const MODE_INFO *mode_info_context = cm->show_frame_mi;
			
 
				+-+  const MODE_INFO *mode_info_context = cm->mi;
			
 
				+-   int mbr, mbc;
			
 
				+- 
			
 
				+-   /* The pixel thresholds are adjusted according to if or not the macroblock
			
 
				+-    * is a skipped block.  */
			
 
				+-   unsigned char *ylimits = cm->pp_limits_buffer;
			
 
				+-   unsigned char *uvlimits = cm->pp_limits_buffer + 16 * cm->mb_cols;
			
 
				+-   (void)low_var_thresh;
			
 
				+-   (void)flag;
			
 
				+diff --git a/media/libvpx/generate_sources_mozbuild.sh b/media/libvpx/generate_sources_mozbuild.sh
			
 
				+--- a/media/libvpx/generate_sources_mozbuild.sh
			
 
				++++ b/media/libvpx/generate_sources_mozbuild.sh
			
 
				+@@ -12,16 +12,17 @@
			
 
				+ #
			
 
				+ # Usage:
			
 
				+ # $ ./generate_sources_mozbuild.sh
			
 
				+ 
			
 
				+ export LC_ALL=C
			
 
				+ BASE_DIR=$(pwd)
			
 
				+ LIBVPX_SRC_DIR="libvpx"
			
 
				+ LIBVPX_CONFIG_DIR="config"
			
 
				++DISABLE_AVX="--disable-avx512"
			
 
				+ 
			
 
				+ # Print license header.
			
 
				+ # $1 - Output base name
			
 
				+ function write_license {
			
 
				+   echo "# This file is generated. Do not edit." >> $1
			
 
				+   echo "" >> $1
			
 
				+ }
			
 
				+ 
			
 
				+@@ -201,21 +202,22 @@ all_platforms="${all_platforms} --disabl
			
 
				+ x86_platforms="--enable-postproc --enable-vp9-postproc --as=yasm"
			
 
				+ arm_platforms="--enable-runtime-cpu-detect --enable-realtime-only"
			
 
				+ arm64_platforms="--enable-realtime-only"
			
 
				+ 
			
 
				+ gen_config_files linux/x64 "--target=x86_64-linux-gcc ${all_platforms} ${x86_platforms}"
			
 
				+ gen_config_files linux/ia32 "--target=x86-linux-gcc ${all_platforms} ${x86_platforms}"
			
 
				+ gen_config_files mac/x64 "--target=x86_64-darwin9-gcc ${all_platforms} ${x86_platforms}"
			
 
				+ gen_config_files mac/ia32 "--target=x86-darwin9-gcc ${all_platforms} ${x86_platforms}"
			
 
				+-gen_config_files win/x64 "--target=x86_64-win64-vs12 ${all_platforms} ${x86_platforms}"
			
 
				++gen_config_files win/x64 "--target=x86_64-win64-vs15 ${all_platforms} ${x86_platforms}"
			
 
				+ gen_config_files win/ia32 "--target=x86-win32-gcc ${all_platforms} ${x86_platforms}"
			
 
				+ 
			
 
				+ gen_config_files linux/arm "--target=armv7-linux-gcc ${all_platforms} ${arm_platforms}"
			
 
				+ gen_config_files linux/arm64 "--target=arm64-linux-gcc ${all_platforms} ${arm64_platforms}"
			
 
				++gen_config_files win/aarch64 "--target=arm64-win64-vs15 ${all_platforms} ${arm64_platforms}"
			
 
				+ 
			
 
				+ gen_config_files generic "--target=generic-gnu ${all_platforms}"
			
 
				+ 
			
 
				+ echo "Remove temporary directory."
			
 
				+ cd $BASE_DIR
			
 
				+ rm -rf $TEMP_DIR
			
 
				+ 
			
 
				+ echo "Create temporary directory."
			
 
				+@@ -225,21 +227,20 @@ cp -R $LIBVPX_SRC_DIR $TEMP_DIR
			
 
				+ cd $TEMP_DIR
			
 
				+ 
			
 
				+ gen_rtcd_header linux/x64 x86_64
			
 
				+ gen_rtcd_header linux/ia32 x86
			
 
				+ gen_rtcd_header mac/x64 x86_64
			
 
				+ gen_rtcd_header mac/ia32 x86
			
 
				+ gen_rtcd_header win/x64 x86_64
			
 
				+ gen_rtcd_header win/ia32 x86
			
 
				+-gen_rtcd_header win/aarch64 aarch64
			
 
				+-
			
 
				+ 
			
 
				+ gen_rtcd_header linux/arm armv7
			
 
				+ gen_rtcd_header linux/arm64 arm64
			
 
				++gen_rtcd_header win/aarch64 arm64
			
 
				+ 
			
 
				+ gen_rtcd_header generic generic
			
 
				+ 
			
 
				+ echo "Prepare Makefile."
			
 
				+ ./configure --target=generic-gnu > /dev/null
			
 
				+ make_clean
			
 
				+ 
			
 
				+ # Remove existing source files.
			
 
				+diff --git a/media/libvpx/input_frame_validation.patch b/media/libvpx/input_frame_validation.patch
			
 
				+--- a/media/libvpx/input_frame_validation.patch
			
 
				++++ b/media/libvpx/input_frame_validation.patch
			
 
				+@@ -5,19 +5,19 @@ Bug 1263384: validate input frames again
			
 
				+ 
			
 
				+ MozReview-Commit-ID: BxDCnJe0mzs
			
 
				+ 
			
 
				+ diff --git a/media/libvpx/libvpx/vp8/vp8_cx_iface.c b/media/libvpx/libvpx/vp8/vp8_cx_iface.c
			
 
				+ --- a/media/libvpx/libvpx/vp8/vp8_cx_iface.c
			
 
				+ +++ b/media/libvpx/libvpx/vp8/vp8_cx_iface.c
			
 
				+ @@ -855,20 +855,29 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
			
 
				+      dst_time_stamp =
			
 
				+-         pts * 10000000 * ctx->cfg.g_timebase.num / ctx->cfg.g_timebase.den;
			
 
				+-     dst_end_time_stamp = (pts + duration) * 10000000 * ctx->cfg.g_timebase.num /
			
 
				+-                          ctx->cfg.g_timebase.den;
			
 
				++         pts_val * ctx->timestamp_ratio.num / ctx->timestamp_ratio.den;
			
 
				++     dst_end_time_stamp = (pts_val + (int64_t)duration) *
			
 
				++                          ctx->timestamp_ratio.num / ctx->timestamp_ratio.den;
			
 
				+ 
			
 
				+      if (img != NULL) {
			
 
				+        res = image2yuvconfig(img, &sd);
			
 
				+  
			
 
				+ -      if (vp8_receive_raw_frame(ctx->cpi, ctx->next_frame_flag | lib_flags, &sd,
			
 
				+ -                                dst_time_stamp, dst_end_time_stamp)) {
			
 
				+ -        VP8_COMP *cpi = (VP8_COMP *)ctx->cpi;
			
 
				+ -        res = update_error_state(ctx, &cpi->common.error);
			
 
				+diff --git a/media/libvpx/rename_duplicate_files.patch b/media/libvpx/rename_duplicate_files.patch
			
 
				+--- a/media/libvpx/rename_duplicate_files.patch
			
 
				++++ b/media/libvpx/rename_duplicate_files.patch
			
 
				+@@ -1,23 +1,22 @@
			
 
				+-diff --git a/libvpx/vpx_dsp/vpx_dsp.mk b/libvpx/vpx_dsp/vpx_dsp.mk
			
 
				+-index 84b529136ba9..7f3111320dc9 100644
			
 
				+---- a/libvpx/vpx_dsp/vpx_dsp.mk
			
 
				+-+++ b/libvpx/vpx_dsp/vpx_dsp.mk
			
 
				+-@@ -133,17 +133,17 @@ DSP_SRCS-$(HAVE_DSPR2)  += mips/convolve8_avg_dspr2.c
			
 
				+- DSP_SRCS-$(HAVE_DSPR2)  += mips/convolve8_avg_horiz_dspr2.c
			
 
				+- DSP_SRCS-$(HAVE_DSPR2)  += mips/convolve8_dspr2.c
			
 
				++diff --git a/media/libvpx/libvpx/vpx_dsp/vpx_dsp.mk b/media/libvpx/libvpx/vpx_dsp/vpx_dsp.mk
			
 
				++--- a/media/libvpx/libvpx/vpx_dsp/vpx_dsp.mk
			
 
				+++++ b/media/libvpx/libvpx/vpx_dsp/vpx_dsp.mk
			
 
				++@@ -160,17 +160,17 @@ DSP_SRCS-$(HAVE_DSPR2)  += mips/convolve
			
 
				+  DSP_SRCS-$(HAVE_DSPR2)  += mips/convolve8_horiz_dspr2.c
			
 
				+  DSP_SRCS-$(HAVE_DSPR2)  += mips/convolve8_vert_dspr2.c
			
 
				+-
			
 
				++ 
			
 
				++ DSP_SRCS-$(HAVE_VSX)  += ppc/vpx_convolve_vsx.c
			
 
				++ 
			
 
				+  # loop filters
			
 
				+  DSP_SRCS-yes += loopfilter.c
			
 
				+-
			
 
				+--DSP_SRCS-$(ARCH_X86)$(ARCH_X86_64)   += x86/loopfilter_sse2.c
			
 
				+-+DSP_SRCS-$(ARCH_X86)$(ARCH_X86_64)   += x86/loopfilter_intrin_sse2.c
			
 
				+- DSP_SRCS-$(HAVE_AVX2)                += x86/loopfilter_avx2.c
			
 
				+-
			
 
				++ 
			
 
				++-DSP_SRCS-$(HAVE_SSE2)  += x86/loopfilter_sse2.c
			
 
				+++DSP_SRCS-$(HAVE_SSE2)  += x86/loopfilter_intrin_sse2.c
			
 
				++ DSP_SRCS-$(HAVE_AVX2)  += x86/loopfilter_avx2.c
			
 
				++ 
			
 
				+  ifeq ($(HAVE_NEON_ASM),yes)
			
 
				+  DSP_SRCS-yes  += arm/loopfilter_16_neon$(ASM)
			
 
				+  DSP_SRCS-yes  += arm/loopfilter_8_neon$(ASM)
			
 
				+  DSP_SRCS-yes  += arm/loopfilter_4_neon$(ASM)
			
 
				+  else
			
 
				+  DSP_SRCS-$(HAVE_NEON)   += arm/loopfilter_neon.c
			
 
				+diff --git a/media/libvpx/stdint.patch b/media/libvpx/stdint.patch
			
 
				+deleted file mode 100644
			
 
				+--- a/media/libvpx/stdint.patch
			
 
				++++ /dev/null
			
 
				+@@ -1,41 +0,0 @@
			
 
				+-diff --git a/media/libvpx/libvpx/vpx/vpx_integer.h b/media/libvpx/libvpx/vpx/vpx_integer.h
			
 
				+---- a/media/libvpx/libvpx/vpx/vpx_integer.h
			
 
				+-+++ b/media/libvpx/libvpx/vpx/vpx_integer.h
			
 
				+-@@ -18,16 +18,18 @@
			
 
				+- #define VPX_FORCE_INLINE __forceinline
			
 
				+- #define VPX_INLINE __inline
			
 
				+- #else
			
 
				+- #define VPX_FORCE_INLINE __inline__ __attribute__(always_inline)
			
 
				+- // TODO(jbb): Allow a way to force inline off for older compilers.
			
 
				+- #define VPX_INLINE inline
			
 
				+- #endif
			
 
				+- 
			
 
				+-+#if !defined(VPX_DONT_DEFINE_STDINT_TYPES)
			
 
				+-+
			
 
				+- #if defined(VPX_EMULATE_INTTYPES)
			
 
				+- typedef signed char int8_t;
			
 
				+- typedef signed short int16_t;
			
 
				+- typedef signed int int32_t;
			
 
				+-
			
 
				+- typedef unsigned char uint8_t;
			
 
				+- typedef unsigned short uint16_t;
			
 
				+- typedef unsigned int uint32_t;
			
 
				+-@@ -48,16 +50,18 @@ typedef size_t uintptr_t;
			
 
				+- #define __STDC_LIMIT_MACROS
			
 
				+- #endif
			
 
				+- #endif  // __cplusplus
			
 
				+-
			
 
				+- #include <stdint.h>
			
 
				+-
			
 
				+- #endif
			
 
				+-
			
 
				+-+#endif // VPX_DONT_DEFINE_STDINT_TYPES
			
 
				+-+
			
 
				+- /* VS2010 defines stdint.h, but not inttypes.h */
			
 
				+- #if defined(_MSC_VER) && _MSC_VER < 1800
			
 
				+- #define PRId64 "I64d"
			
 
				+- #else
			
 
				+- #include <inttypes.h>
			
 
				+- #endif
			
 
				+-
			
 
				+- #endif  // VPX_VPX_INTEGER_H_
			
 
				+diff --git a/media/libvpx/update.py b/media/libvpx/update.py
			
 
				+--- a/media/libvpx/update.py
			
 
				++++ b/media/libvpx/update.py
			
 
				+@@ -4,53 +4,48 @@
			
 
				+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
			
 
				+ import argparse
			
 
				+ import os
			
 
				+ import re
			
 
				+ import shutil
			
 
				+ import sys
			
 
				+ import subprocess
			
 
				+ import tarfile
			
 
				+-import urllib
			
 
				++import urllib.request
			
 
				+ from pprint import pprint
			
 
				+-from StringIO import StringIO
			
 
				++from io import StringIO
			
 
				+ 
			
 
				+ def prepare_upstream(prefix, commit=None):
			
 
				+     upstream_url = 'https://chromium.googlesource.com/webm/libvpx'
			
 
				+     shutil.rmtree(os.path.join(base, 'libvpx/'))
			
 
				+     print(upstream_url + '/+archive/' + commit + '.tar.gz')
			
 
				+-    urllib.urlretrieve(upstream_url + '/+archive/' + commit + '.tar.gz', 'libvpx.tar.gz')
			
 
				++    urllib.request.urlretrieve(upstream_url + '/+archive/' + commit + '.tar.gz', 'libvpx.tar.gz')
			
 
				+     tarfile.open('libvpx.tar.gz').extractall(path='libvpx')
			
 
				+     os.remove(os.path.join(base, 'libvpx.tar.gz'))
			
 
				+     os.chdir(base)
			
 
				+     return commit
			
 
				+ 
			
 
				+ def cleanup_upstream():
			
 
				+-    os.remove(os.path.join(base, 'libvpx/.gitattributes'))
			
 
				+-    os.remove(os.path.join(base, 'libvpx/.gitignore'))
			
 
				+-    os.remove(os.path.join(base, 'libvpx/build/.gitattributes'))
			
 
				+-    os.remove(os.path.join(base, 'libvpx/build/.gitignore'))
			
 
				++    os.remove(os.path.join(base, 'libvpx', '.gitattributes'))
			
 
				++    os.remove(os.path.join(base, 'libvpx', '.gitignore'))
			
 
				++    shutil.rmtree(os.path.join(base, 'libvpx', 'third_party', 'libwebm'))
			
 
				++    shutil.rmtree(os.path.join(base, 'libvpx', 'tools'))
			
 
				+ 
			
 
				+ def apply_patches():
			
 
				+-    # Patch to permit vpx users to specify their own <stdint.h> types.
			
 
				+-    os.system("patch -p3 < stdint.patch")
			
 
				+     # Patch to fix a crash caused by MSVC 2013
			
 
				+     os.system("patch -p3 < bug1137614.patch")
			
 
				+     # Bug 1263384 - Check input frame resolution
			
 
				+     os.system("patch -p3 < input_frame_validation.patch")
			
 
				+     # Bug 1315288 - Check input frame resolution for vp9
			
 
				+     os.system("patch -p3 < input_frame_validation_vp9.patch")
			
 
				+     # Avoid c/asm name collision for loopfilter_sse2
			
 
				+-    os.system("patch -p1 < rename_duplicate_files.patch")
			
 
				++    os.system("patch -p3 < rename_duplicate_files.patch")
			
 
				+     os.system("mv libvpx/vpx_dsp/x86/loopfilter_sse2.c libvpx/vpx_dsp/x86/loopfilter_intrin_sse2.c")
			
 
				+-    # Cherrypick fix from upstream
			
 
				+-    os.system("patch -p3 < bug1480092.patch")
			
 
				+-    # AArch64 Windows support
			
 
				+-    os.system("patch -p3 < aarch64-windows.patch")
			
 
				+-
			
 
				++    # Ensure float_control_word.asm is included
			
 
				++    os.system("patch -p3 < win64_build_fix.patch")
			
 
				+ 
			
 
				+ def update_readme(commit):
			
 
				+     with open('README_MOZILLA') as f:
			
 
				+         readme = f.read()
			
 
				+ 
			
 
				+     if 'The git commit ID used was' in readme:
			
 
				+         new_readme = re.sub('The git commit ID used was [v\.a-f0-9]+',
			
 
				+             'The git commit ID used was %s' % commit, readme)
			
 
				+diff --git a/media/libvpx/win64_build_fix.patch b/media/libvpx/win64_build_fix.patch
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/libvpx/win64_build_fix.patch
			
 
				+@@ -0,0 +1,22 @@
			
 
				++diff --git a/media/libvpx/libvpx/vpx_ports/vpx_ports.mk b/media/libvpx/libvpx/vpx_ports/vpx_ports.mk
			
 
				++--- a/media/libvpx/libvpx/vpx_ports/vpx_ports.mk
			
 
				+++++ b/media/libvpx/libvpx/vpx_ports/vpx_ports.mk
			
 
				++@@ -21,17 +21,17 @@ ifeq ($(VPX_ARCH_X86),yes)
			
 
				++ PORTS_SRCS-$(HAVE_MMX) += emms_mmx.c
			
 
				++ endif
			
 
				++ ifeq ($(VPX_ARCH_X86_64),yes)
			
 
				++ # Visual Studio x64 does not support the _mm_empty() intrinsic.
			
 
				++ PORTS_SRCS-$(HAVE_MMX) += emms_mmx.asm
			
 
				++ endif
			
 
				++ 
			
 
				++ ifeq ($(VPX_ARCH_X86_64),yes)
			
 
				++-PORTS_SRCS-$(CONFIG_MSVS) += float_control_word.asm
			
 
				+++PORTS_SRCS-yes += float_control_word.asm
			
 
				++ endif
			
 
				++ 
			
 
				++ ifeq ($(VPX_ARCH_X86)$(VPX_ARCH_X86_64),yes)
			
 
				++ PORTS_SRCS-yes += x86.h
			
 
				++ PORTS_SRCS-yes += x86_abi_support.asm
			
 
				++ endif
			
 
				++ 
			
 
				++ PORTS_SRCS-$(VPX_ARCH_ARM) += arm_cpudetect.c
			
--- a/mozilla-release/patches/1540760-1-68a1.patch
+++ b/mozilla-release/patches/1540760-1-68a1.patch
@@ -0,0 +1,105 @@
 
				+# HG changeset patch
			
 
				+# User Dan Minor <dminor@mozilla.com>
			
 
				+# Date 1556723088 0
			
 
				+# Node ID f1bc4fcd152e66e858c3a1d0b0afd30a78e9474b
			
 
				+# Parent  3ca39e2837e77b27b1058be69bcd6129a3dfcff9
			
 
				+Bug 1540760 - Make it possible to use clang-cl as an assembler; r=firefox-build-system-reviewers,mshal
			
 
				+
			
 
				+Some media libraries use gas syntax in their assembly files. Rather than
			
 
				+converting these arm assembly syntax files for aarch64, we can use clang-cl
			
 
				+to build them directly.
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D27785
			
 
				+
			
 
				+diff --git a/build/moz.configure/toolchain.configure b/build/moz.configure/toolchain.configure
			
 
				+--- a/build/moz.configure/toolchain.configure
			
 
				++++ b/build/moz.configure/toolchain.configure
			
 
				+@@ -2217,16 +2217,30 @@ def have_yasm(yasm_asflags):
			
 
				+         return True
			
 
				+ 
			
 
				+ set_config('HAVE_NASM', have_nasm)
			
 
				+ 
			
 
				+ set_config('HAVE_YASM', have_yasm)
			
 
				+ # Until the YASM variable is not necessary in old-configure.
			
 
				+ add_old_configure_assignment('YASM', have_yasm)
			
 
				+ 
			
 
				++
			
 
				++# clang-cl integrated assembler support
			
 
				++# ==============================================================
			
 
				++@depends(target)
			
 
				++def clangcl_asflags(target):
			
 
				++    asflags = None
			
 
				++    if target.os == 'WINNT' and target.cpu == 'aarch64':
			
 
				++        asflags = ['--target=aarch64-windows-msvc']
			
 
				++    return asflags
			
 
				++
			
 
				++
			
 
				++set_config('CLANGCL_ASFLAGS', clangcl_asflags)
			
 
				++
			
 
				++
			
 
				+ # Code Coverage
			
 
				+ # ==============================================================
			
 
				+ 
			
 
				+ js_option('--enable-coverage', env='MOZ_CODE_COVERAGE',
			
 
				+           help='Enable code coverage')
			
 
				+ 
			
 
				+ @depends('--enable-coverage')
			
 
				+ def code_coverage(value):
			
 
				+diff --git a/python/mozbuild/mozbuild/frontend/context.py b/python/mozbuild/mozbuild/frontend/context.py
			
 
				+--- a/python/mozbuild/mozbuild/frontend/context.py
			
 
				++++ b/python/mozbuild/mozbuild/frontend/context.py
			
 
				+@@ -2274,16 +2274,24 @@ VARIABLES = {
			
 
				+ 
			
 
				+         By default, the build will use the toolchain assembler, $(AS), to
			
 
				+         assemble source files in assembly language (.s or .asm files). Setting
			
 
				+         this value to ``True`` will cause it to use yasm instead.
			
 
				+ 
			
 
				+         If yasm is not available on this system, or does not support the
			
 
				+         current target architecture, an error will be raised.
			
 
				+         """),
			
 
				++
			
 
				++    'USE_INTEGRATED_CLANGCL_AS': (bool, bool,
			
 
				++        """Use the integrated clang-cl assembler to assemble assembly files from SOURCES.
			
 
				++
			
 
				++        This allows using clang-cl to assemble assembly files which is useful
			
 
				++        on platforms like aarch64 where the alternative is to have to run a
			
 
				++        pre-processor to generate files with suitable syntax.
			
 
				++        """),
			
 
				+ }
			
 
				+ 
			
 
				+ # Sanity check: we don't want any variable above to have a list as storage type.
			
 
				+ for name, (storage_type, input_types, docs) in VARIABLES.items():
			
 
				+     if storage_type == list:
			
 
				+         raise RuntimeError('%s has a "list" storage type. Use "List" instead.'
			
 
				+                            % name)
			
 
				+ 
			
 
				+diff --git a/python/mozbuild/mozbuild/frontend/emitter.py b/python/mozbuild/mozbuild/frontend/emitter.py
			
 
				+--- a/python/mozbuild/mozbuild/frontend/emitter.py
			
 
				++++ b/python/mozbuild/mozbuild/frontend/emitter.py
			
 
				+@@ -1327,16 +1327,26 @@ class TreeMetadataEmitter(LoggingMixin):
			
 
				+             if not nasm:
			
 
				+                 raise SandboxValidationError('nasm is not available', context)
			
 
				+             passthru.variables['AS'] = nasm
			
 
				+             passthru.variables['AS_DASH_C_FLAG'] = ''
			
 
				+             passthru.variables['ASOUTOPTION'] = '-o '
			
 
				+             computed_as_flags.resolve_flags('OS',
			
 
				+                                             context.config.substs.get('NASM_ASFLAGS', []))
			
 
				+ 
			
 
				++        if context.get('USE_INTEGRATED_CLANGCL_AS') is True:
			
 
				++            clangcl = context.config.substs.get('CLANG_CL')
			
 
				++            if not clangcl:
			
 
				++                raise SandboxValidationError('clang-cl is not available', context)
			
 
				++            passthru.variables['AS'] = 'clang-cl'
			
 
				++            passthru.variables['AS_DASH_C_FLAG'] = '-c'
			
 
				++            passthru.variables['ASOUTOPTION'] = '-o '
			
 
				++            computed_as_flags.resolve_flags('OS',
			
 
				++                                            context.config.substs.get('CLANGCL_ASFLAGS', []))
			
 
				++
			
 
				+         if passthru.variables:
			
 
				+             yield passthru
			
 
				+ 
			
 
				+         if context.objdir in self._compile_dirs:
			
 
				+             self._compile_flags[context.objdir] = computed_flags
			
 
				+             yield computed_link_flags
			
 
				+ 
			
 
				+         if context.objdir in self._asm_compile_dirs:
			
--- a/mozilla-release/patches/1540760-2-68a1.patch
+++ b/mozilla-release/patches/1540760-2-68a1.patch
@@ -0,0 +1,74 @@
 
				+# HG changeset patch
			
 
				+# User Dan Minor <dminor@mozilla.com>
			
 
				+# Date 1556723089 0
			
 
				+# Node ID cd666f0befca97073d92bf1c46f39ead6eff3646
			
 
				+# Parent  f1bc4fcd152e66e858c3a1d0b0afd30a78e9474b
			
 
				+Bug 1540760 - Enable neon for libyuv for aarch64; r=jya
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D27786
			
 
				+
			
 
				+diff --git a/media/libyuv/aarch64-windows-noneon.patch b/media/libyuv/aarch64-windows-noneon.patch
			
 
				+deleted file mode 100644
			
 
				+--- a/media/libyuv/aarch64-windows-noneon.patch
			
 
				++++ /dev/null
			
 
				+@@ -1,14 +0,0 @@
			
 
				+-diff --git a/media/libyuv/libyuv/libyuv.gyp b/media/libyuv/libyuv/libyuv.gyp
			
 
				+-index 776510b..51ab531 100644
			
 
				+---- a/media/libyuv/libyuv/libyuv.gyp
			
 
				+-+++ b/media/libyuv/libyuv/libyuv.gyp
			
 
				+-@@ -33,7 +33,8 @@
			
 
				+-     'build_msa': 0,
			
 
				+-     'conditions': [
			
 
				+-        ['(target_arch == "armv7" or target_arch == "armv7s" or \
			
 
				+--       (target_arch == "arm" and arm_version >= 7) or target_arch == "arm64")\
			
 
				+-+       (target_arch == "arm" and arm_version >= 7) or \
			
 
				+-+       (OS != "win" and target_arch == "arm64")) \
			
 
				+-        and (arm_neon == 1 or arm_neon_optional == 1)', {
			
 
				+-          'build_neon': 1,
			
 
				+-        }],
			
 
				+diff --git a/media/libyuv/libyuv/libyuv.gyp b/media/libyuv/libyuv/libyuv.gyp
			
 
				+--- a/media/libyuv/libyuv/libyuv.gyp
			
 
				++++ b/media/libyuv/libyuv/libyuv.gyp
			
 
				+@@ -28,18 +28,17 @@
			
 
				+     'use_lto%': 0,
			
 
				+     'yuv_disable_asm%': 0,
			
 
				+     'yuv_disable_avx2%': 0,
			
 
				+     'mips_msa%': 0,  # Default to msa off.
			
 
				+     'build_neon': 0,
			
 
				+     'build_msa': 0,
			
 
				+     'conditions': [
			
 
				+        ['(target_arch == "armv7" or target_arch == "armv7s" or \
			
 
				+-       (target_arch == "arm" and arm_version >= 7) or \
			
 
				+-       (OS != "win" and target_arch == "arm64")) \
			
 
				++       (target_arch == "arm" and arm_version >= 7) or target_arch == "arm64")\
			
 
				+        and (arm_neon == 1 or arm_neon_optional == 1)', {
			
 
				+          'build_neon': 1,
			
 
				+        }],
			
 
				+        ['(target_arch == "mipsel" or target_arch == "mips64el")\
			
 
				+        and (mips_msa == 1)',
			
 
				+        {
			
 
				+          'build_msa': 1,
			
 
				+        }],
			
 
				+diff --git a/media/libyuv/update.py b/media/libyuv/update.py
			
 
				+--- a/media/libyuv/update.py
			
 
				++++ b/media/libyuv/update.py
			
 
				+@@ -47,18 +47,16 @@ def apply_patches(base):
			
 
				+         # fix build errors
			
 
				+         'fix_build_errors.patch',
			
 
				+         # make mjpeg printfs optional at build time
			
 
				+         'make_mjpeg_printfs_optional.patch',
			
 
				+         # allow disabling of inline ASM and AVX2 code
			
 
				+         'allow_disabling_asm_avx2.patch',
			
 
				+         # add H444ToARGB() variant
			
 
				+         'add_H444ToARGB.patch',
			
 
				+-        # avoid selecting neon codepaths on AArch64 Windows
			
 
				+-        'aarch64-windows-noneon.patch',
			
 
				+         # fix the x86 mingw-clang build
			
 
				+         'bug_1491848.patch',
			
 
				+     ]
			
 
				+ 
			
 
				+     for patch in patches:
			
 
				+         print('\nApplying patch %s' % patch)
			
 
				+         with open(os.path.join(base, patch)) as f:
			
 
				+             Popen(["patch", "-p3"], stdin=f, cwd=base).wait()
			
 
				+
			
--- a/mozilla-release/patches/1540760-3-68a1.patch
+++ b/mozilla-release/patches/1540760-3-68a1.patch
@@ -0,0 +1,57 @@
 
				+# HG changeset patch
			
 
				+# User Dan Minor <dminor@mozilla.com>
			
 
				+# Date 1556723089 0
			
 
				+# Node ID a338bdcb894fbfbc4412e9f8efefc54667cd32a6
			
 
				+# Parent  cd666f0befca97073d92bf1c46f39ead6eff3646
			
 
				+Bug 1540760 - Use arm sources for libvpx; r=jya
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D27787
			
 
				+
			
 
				+diff --git a/media/libvpx/generate_sources_mozbuild.sh b/media/libvpx/generate_sources_mozbuild.sh
			
 
				+--- a/media/libvpx/generate_sources_mozbuild.sh
			
 
				++++ b/media/libvpx/generate_sources_mozbuild.sh
			
 
				+@@ -204,17 +204,17 @@ arm64_platforms="--enable-realtime-only"
			
 
				+ 
			
 
				+ gen_config_files linux/x64 "--target=x86_64-linux-gcc ${all_platforms} ${x86_platforms}"
			
 
				+ gen_config_files linux/ia32 "--target=x86-linux-gcc ${all_platforms} ${x86_platforms}"
			
 
				+ gen_config_files mac/x64 "--target=x86_64-darwin9-gcc ${all_platforms} ${x86_platforms}"
			
 
				+ gen_config_files mac/ia32 "--target=x86-darwin9-gcc ${all_platforms} ${x86_platforms}"
			
 
				+ gen_config_files win/x64 "--target=x86_64-win64-vs12 ${all_platforms} ${x86_platforms}"
			
 
				+ gen_config_files win/ia32 "--target=x86-win32-gcc ${all_platforms} ${x86_platforms}"
			
 
				+ gen_config_files win/mingw32 "--target=x86-win32-gcc ${all_platforms} ${x86_platforms}"
			
 
				+-gen_config_files win/aarch64 "--target=aarch64-win64-vs12 ${all_platforms}"
			
 
				++gen_config_files win/aarch64 "--target=aarch64-win64-vs12 ${all_platforms} ${arm64_platforms}"
			
 
				+ 
			
 
				+ gen_config_files linux/arm "--target=armv7-linux-gcc ${all_platforms} ${arm_platforms}"
			
 
				+ gen_config_files linux/arm64 "--target=arm64-linux-gcc ${all_platforms} ${arm64_platforms}"
			
 
				+ 
			
 
				+ gen_config_files generic "--target=generic-gnu ${all_platforms}"
			
 
				+ 
			
 
				+ # vpx doesn't know if mingw32 has winpthreads or not, and doesn't try to detect it.
			
 
				+ sed -i 's/HAVE_PTHREAD_H 0/HAVE_PTHREAD_H 1/' $BASE_DIR/$LIBVPX_CONFIG_DIR/win/mingw32/vpx_config.asm
			
 
				+diff --git a/media/libvpx/moz.build b/media/libvpx/moz.build
			
 
				+--- a/media/libvpx/moz.build
			
 
				++++ b/media/libvpx/moz.build
			
 
				+@@ -71,19 +71,18 @@ elif CONFIG['CPU_ARCH'] == 'arm':
			
 
				+         LOCAL_INCLUDES += [
			
 
				+             '%%%s/sources/android/cpufeatures' % CONFIG['ANDROID_NDK'],
			
 
				+         ]
			
 
				+     if CONFIG['CC_TYPE'] == 'clang':
			
 
				+         ASFLAGS += [
			
 
				+             '-no-integrated-as',
			
 
				+         ]
			
 
				+ elif CONFIG['CPU_ARCH'] == 'aarch64' and CONFIG['OS_TARGET'] == 'WINNT':
			
 
				+-    # Generic C-only configuration
			
 
				+-    EXPORTS.vpx += files['GENERIC_EXPORTS']
			
 
				+-    SOURCES += files['GENERIC_SOURCES']
			
 
				++    EXPORTS.vpx += files['ARM64_EXPORTS']
			
 
				++    SOURCES += files['ARM64_SOURCES']
			
 
				+     ASFLAGS += [ '-I%s/media/libvpx/config/win/aarch64/' % TOPSRCDIR ]
			
 
				+     LOCAL_INCLUDES += [ '/media/libvpx/config/win/aarch64/' ]
			
 
				+ elif CONFIG['CPU_ARCH'] == 'aarch64':
			
 
				+     EXPORTS.vpx += files['ARM64_EXPORTS']
			
 
				+     SOURCES += files['ARM64_SOURCES']
			
 
				+     ASFLAGS += [ '-I%s/media/libvpx/config/linux/arm64/' % TOPSRCDIR ]
			
 
				+     LOCAL_INCLUDES += [ '/media/libvpx/config/linux/arm64/' ]
			
 
				+ else:
			
 
				+
			
--- a/mozilla-release/patches/1540760-4-68a1.patch
+++ b/mozilla-release/patches/1540760-4-68a1.patch
@@ -0,0 +1,115 @@
 
				+# HG changeset patch
			
 
				+# User Dan Minor <dminor@mozilla.com>
			
 
				+# Date 1556723089 0
			
 
				+# Node ID f40ae51578ac27c6ea38af1e2818a12ac0b93dbd
			
 
				+# Parent  a338bdcb894fbfbc4412e9f8efefc54667cd32a6
			
 
				+Bug 1540760 - Rerun generate_sources_mozbuild.sh for arm64 windows; r=jya
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D27788
			
 
				+
			
 
				+diff --git a/media/libvpx/config/win/aarch64/vp8_rtcd.h b/media/libvpx/config/win/aarch64/vp8_rtcd.h
			
 
				+--- a/media/libvpx/config/win/aarch64/vp8_rtcd.h
			
 
				++++ b/media/libvpx/config/win/aarch64/vp8_rtcd.h
			
 
				+@@ -142,19 +142,16 @@ void vp8_sixtap_predict4x4_c(unsigned ch
			
 
				+ #define vp8_sixtap_predict4x4 vp8_sixtap_predict4x4_c
			
 
				+ 
			
 
				+ void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
			
 
				+ #define vp8_sixtap_predict8x4 vp8_sixtap_predict8x4_c
			
 
				+ 
			
 
				+ void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
			
 
				+ #define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_c
			
 
				+ 
			
 
				+-void vp8_temporal_filter_apply_c(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count);
			
 
				+-#define vp8_temporal_filter_apply vp8_temporal_filter_apply_c
			
 
				+-
			
 
				+ void vp8_rtcd(void);
			
 
				+ 
			
 
				+ #include "vpx_config.h"
			
 
				+ 
			
 
				+ #ifdef RTCD_C
			
 
				+ static void setup_rtcd_internal(void)
			
 
				+ {
			
 
				+ }
			
 
				+diff --git a/media/libvpx/config/win/aarch64/vp9_rtcd.h b/media/libvpx/config/win/aarch64/vp9_rtcd.h
			
 
				+--- a/media/libvpx/config/win/aarch64/vp9_rtcd.h
			
 
				++++ b/media/libvpx/config/win/aarch64/vp9_rtcd.h
			
 
				+@@ -68,19 +68,16 @@ void vp9_quantize_fp_c(const tran_low_t 
			
 
				+ #define vp9_quantize_fp vp9_quantize_fp_c
			
 
				+ 
			
 
				+ void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
			
 
				+ #define vp9_quantize_fp_32x32 vp9_quantize_fp_32x32_c
			
 
				+ 
			
 
				+ void vp9_scale_and_extend_frame_c(const struct yv12_buffer_config *src, struct yv12_buffer_config *dst, INTERP_FILTER filter_type, int phase_scaler);
			
 
				+ #define vp9_scale_and_extend_frame vp9_scale_and_extend_frame_c
			
 
				+ 
			
 
				+-void vp9_temporal_filter_apply_c(const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, uint32_t *accumulator, uint16_t *count);
			
 
				+-#define vp9_temporal_filter_apply vp9_temporal_filter_apply_c
			
 
				+-
			
 
				+ void vp9_rtcd(void);
			
 
				+ 
			
 
				+ #include "vpx_config.h"
			
 
				+ 
			
 
				+ #ifdef RTCD_C
			
 
				+ static void setup_rtcd_internal(void)
			
 
				+ {
			
 
				+ }
			
 
				+diff --git a/media/libvpx/config/win/aarch64/vpx_config.asm b/media/libvpx/config/win/aarch64/vpx_config.asm
			
 
				+--- a/media/libvpx/config/win/aarch64/vpx_config.asm
			
 
				++++ b/media/libvpx/config/win/aarch64/vpx_config.asm
			
 
				+@@ -53,17 +53,17 @@
			
 
				+ .equ CONFIG_VP9_ENCODER ,  1
			
 
				+ .equ CONFIG_VP9_DECODER ,  1
			
 
				+ .equ CONFIG_VP8 ,  1
			
 
				+ .equ CONFIG_VP9 ,  1
			
 
				+ .equ CONFIG_ENCODERS ,  1
			
 
				+ .equ CONFIG_DECODERS ,  1
			
 
				+ .equ CONFIG_STATIC_MSVCRT ,  0
			
 
				+ .equ CONFIG_SPATIAL_RESAMPLING ,  1
			
 
				+-.equ CONFIG_REALTIME_ONLY ,  0
			
 
				++.equ CONFIG_REALTIME_ONLY ,  1
			
 
				+ .equ CONFIG_ONTHEFLY_BITPACKING ,  0
			
 
				+ .equ CONFIG_ERROR_CONCEALMENT ,  0
			
 
				+ .equ CONFIG_SHARED ,  0
			
 
				+ .equ CONFIG_STATIC ,  1
			
 
				+ .equ CONFIG_SMALL ,  0
			
 
				+ .equ CONFIG_POSTPROC_VISUALIZER ,  0
			
 
				+ .equ CONFIG_OS_SUPPORT ,  1
			
 
				+ .equ CONFIG_UNIT_TESTS ,  0
			
 
				+diff --git a/media/libvpx/config/win/aarch64/vpx_config.c b/media/libvpx/config/win/aarch64/vpx_config.c
			
 
				+--- a/media/libvpx/config/win/aarch64/vpx_config.c
			
 
				++++ b/media/libvpx/config/win/aarch64/vpx_config.c
			
 
				+@@ -1,10 +1,10 @@
			
 
				+ /* Copyright (c) 2011 The WebM project authors. All Rights Reserved. */
			
 
				+ /*  */
			
 
				+ /* Use of this source code is governed by a BSD-style license */
			
 
				+ /* that can be found in the LICENSE file in the root of the source */
			
 
				+ /* tree. An additional intellectual property rights grant can be found */
			
 
				+ /* in the file PATENTS.  All contributing project authors may */
			
 
				+ /* be found in the AUTHORS file in the root of the source tree. */
			
 
				+ #include "vpx/vpx_codec.h"
			
 
				+-static const char* const cfg = "--target=aarch64-win64-vs12 --enable-external-build --disable-examples --disable-install-docs --disable-unit-tests --enable-multi-res-encoding --size-limit=8192x4608 --enable-pic --disable-avx512";
			
 
				++static const char* const cfg = "--target=aarch64-win64-vs12 --enable-external-build --disable-examples --disable-install-docs --disable-unit-tests --enable-multi-res-encoding --size-limit=8192x4608 --enable-pic --disable-avx512 --enable-realtime-only";
			
 
				+ const char *vpx_codec_build_config(void) {return cfg;}
			
 
				+diff --git a/media/libvpx/config/win/aarch64/vpx_config.h b/media/libvpx/config/win/aarch64/vpx_config.h
			
 
				+--- a/media/libvpx/config/win/aarch64/vpx_config.h
			
 
				++++ b/media/libvpx/config/win/aarch64/vpx_config.h
			
 
				+@@ -62,17 +62,17 @@
			
 
				+ #define CONFIG_VP9_ENCODER 1
			
 
				+ #define CONFIG_VP9_DECODER 1
			
 
				+ #define CONFIG_VP8 1
			
 
				+ #define CONFIG_VP9 1
			
 
				+ #define CONFIG_ENCODERS 1
			
 
				+ #define CONFIG_DECODERS 1
			
 
				+ #define CONFIG_STATIC_MSVCRT 0
			
 
				+ #define CONFIG_SPATIAL_RESAMPLING 1
			
 
				+-#define CONFIG_REALTIME_ONLY 0
			
 
				++#define CONFIG_REALTIME_ONLY 1
			
 
				+ #define CONFIG_ONTHEFLY_BITPACKING 0
			
 
				+ #define CONFIG_ERROR_CONCEALMENT 0
			
 
				+ #define CONFIG_SHARED 0
			
 
				+ #define CONFIG_STATIC 1
			
 
				+ #define CONFIG_SMALL 0
			
 
				+ #define CONFIG_POSTPROC_VISUALIZER 0
			
 
				+ #define CONFIG_OS_SUPPORT 1
			
 
				+ #define CONFIG_UNIT_TESTS 0
			
 
				+
			
--- a/mozilla-release/patches/1540760-5-68a1.patch
+++ b/mozilla-release/patches/1540760-5-68a1.patch
@@ -0,0 +1,12454 @@
 
				+# HG changeset patch
			
 
				+# User Dan Minor <dminor@mozilla.com>
			
 
				+# Date 1556751985 0
			
 
				+# Node ID 742b7c0a4bdbbe5f4004b038b4b5b4467ef4484b
			
 
				+# Parent  f40ae51578ac27c6ea38af1e2818a12ac0b93dbd
			
 
				+Bug 1540760 - Add missing aarch64 files for ffvpx; r=jya
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D27789
			
 
				+
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/fft_init_aarch64.c b/media/ffvpx/libavcodec/aarch64/fft_init_aarch64.c
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/fft_init_aarch64.c
			
 
				+@@ -0,0 +1,50 @@
			
 
				++/*
			
 
				++ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include "config.h"
			
 
				++
			
 
				++#include "libavutil/attributes.h"
			
 
				++#include "libavutil/cpu.h"
			
 
				++#include "libavutil/aarch64/cpu.h"
			
 
				++
			
 
				++#include "libavcodec/fft.h"
			
 
				++
			
 
				++void ff_fft_permute_neon(FFTContext *s, FFTComplex *z);
			
 
				++void ff_fft_calc_neon(FFTContext *s, FFTComplex *z);
			
 
				++
			
 
				++void ff_imdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
			
 
				++void ff_imdct_half_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
			
 
				++void ff_mdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
			
 
				++
			
 
				++av_cold void ff_fft_init_aarch64(FFTContext *s)
			
 
				++{
			
 
				++    int cpu_flags = av_get_cpu_flags();
			
 
				++
			
 
				++    if (have_neon(cpu_flags)) {
			
 
				++        s->fft_permute  = ff_fft_permute_neon;
			
 
				++        s->fft_calc     = ff_fft_calc_neon;
			
 
				++#if CONFIG_MDCT
			
 
				++        s->imdct_calc   = ff_imdct_calc_neon;
			
 
				++        s->imdct_half   = ff_imdct_half_neon;
			
 
				++        s->mdct_calc    = ff_mdct_calc_neon;
			
 
				++        s->mdct_permutation = FF_MDCT_PERM_INTERLEAVE;
			
 
				++#endif
			
 
				++    }
			
 
				++}
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/fft_neon.S b/media/ffvpx/libavcodec/aarch64/fft_neon.S
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/fft_neon.S
			
 
				+@@ -0,0 +1,442 @@
			
 
				++/*
			
 
				++ * ARM NEON optimised FFT
			
 
				++ *
			
 
				++ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
			
 
				++ * Copyright (c) 2009 Naotoshi Nojiri
			
 
				++ * Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
			
 
				++ *
			
 
				++ * This algorithm (though not any of the implementation details) is
			
 
				++ * based on libdjbfft by D. J. Bernstein.
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include "libavutil/aarch64/asm.S"
			
 
				++
			
 
				++#define M_SQRT1_2 0.70710678118654752440
			
 
				++
			
 
				++.macro transpose d0, d1, s0, s1
			
 
				++        trn1            \d0, \s0, \s1
			
 
				++        trn2            \d1, \s0, \s1
			
 
				++.endm
			
 
				++
			
 
				++
			
 
				++function fft4_neon
			
 
				++        ld1             {v0.2s,v1.2s,v2.2s,v3.2s}, [x0]
			
 
				++
			
 
				++        fadd            v4.2s,  v0.2s,  v1.2s   // r0+r1,i0+i1
			
 
				++        fsub            v6.2s,  v0.2s,  v1.2s   // r0-r1,i0-i1
			
 
				++
			
 
				++        ext             v16.8b, v2.8b,  v3.8b,  #4
			
 
				++        ext             v17.8b, v3.8b,  v2.8b,  #4
			
 
				++
			
 
				++        fadd            v5.2s,  v2.2s,  v3.2s   // i2+i3,r2+r3
			
 
				++        fsub            v7.2s,  v16.2s, v17.2s  // r3-r2,i2-i3
			
 
				++
			
 
				++        fadd            v0.2s,  v4.2s,  v5.2s
			
 
				++        fsub            v2.2s,  v4.2s,  v5.2s
			
 
				++        fadd            v1.2s,  v6.2s,  v7.2s
			
 
				++        fsub            v3.2s,  v6.2s,  v7.2s
			
 
				++
			
 
				++        st1             {v0.2s,v1.2s,v2.2s,v3.2s}, [x0]
			
 
				++
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function fft8_neon
			
 
				++        mov             x1,  x0
			
 
				++        ld1             {v0.2s, v1.2s, v2.2s, v3.2s},  [x0], #32
			
 
				++        ld1             {v16.2s,v17.2s,v18.2s,v19.2s}, [x0]
			
 
				++        ext             v22.8b, v2.8b,  v3.8b,  #4
			
 
				++        ext             v23.8b, v3.8b,  v2.8b,  #4
			
 
				++        fadd            v4.2s,  v16.2s, v17.2s           // r4+r5,i4+i5
			
 
				++        fadd            v5.2s,  v18.2s, v19.2s           // r6+r7,i6+i7
			
 
				++        fsub            v17.2s, v16.2s, v17.2s           // r4-r5,i4-i5
			
 
				++        fsub            v19.2s, v18.2s, v19.2s           // r6-r7,i6-i7
			
 
				++        rev64           v27.2s, v28.2s  // ???
			
 
				++        fadd            v20.2s, v0.2s,  v1.2s            // r0+r1,i0+i1
			
 
				++        fadd            v21.2s, v2.2s,  v3.2s            // r2+r3,i2+i3
			
 
				++        fmul            v26.2s, v17.2s, v28.2s           // -a2r*w,a2i*w
			
 
				++        ext             v6.8b,  v4.8b,  v5.8b,  #4
			
 
				++        ext             v7.8b,  v5.8b,  v4.8b,  #4
			
 
				++        fmul            v27.2s, v19.2s, v27.2s           // a3r*w,-a3i*w
			
 
				++        fsub            v23.2s, v22.2s, v23.2s           // i2-i3,r3-r2
			
 
				++        fsub            v22.2s, v0.2s,  v1.2s            // r0-r1,i0-i1
			
 
				++        fmul            v24.2s, v17.2s, v28.s[1]         // a2r*w,a2i*w
			
 
				++        fmul            v25.2s, v19.2s, v28.s[1]         // a3r*w,a3i*w
			
 
				++        fadd            v0.2s,  v20.2s, v21.2s
			
 
				++        fsub            v2.2s,  v20.2s, v21.2s
			
 
				++        fadd            v1.2s,  v22.2s, v23.2s
			
 
				++        rev64           v26.2s, v26.2s
			
 
				++        rev64           v27.2s, v27.2s
			
 
				++        fsub            v3.2s,  v22.2s, v23.2s
			
 
				++        fsub            v6.2s,  v6.2s,  v7.2s
			
 
				++        fadd            v24.2s, v24.2s, v26.2s  // a2r+a2i,a2i-a2r   t1,t2
			
 
				++        fadd            v25.2s, v25.2s, v27.2s  // a3r-a3i,a3i+a3r   t5,t6
			
 
				++        fadd            v7.2s,  v4.2s,  v5.2s
			
 
				++        fsub            v18.2s, v2.2s,  v6.2s
			
 
				++        ext             v26.8b, v24.8b, v25.8b, #4
			
 
				++        ext             v27.8b, v25.8b, v24.8b, #4
			
 
				++        fadd            v2.2s,  v2.2s,  v6.2s
			
 
				++        fsub            v16.2s, v0.2s,  v7.2s
			
 
				++        fadd            v5.2s,  v25.2s, v24.2s
			
 
				++        fsub            v4.2s,  v26.2s, v27.2s
			
 
				++        fadd            v0.2s,  v0.2s,  v7.2s
			
 
				++        fsub            v17.2s, v1.2s,  v5.2s
			
 
				++        fsub            v19.2s, v3.2s,  v4.2s
			
 
				++        fadd            v3.2s,  v3.2s,  v4.2s
			
 
				++        fadd            v1.2s,  v1.2s,  v5.2s
			
 
				++
			
 
				++        st1             {v16.2s,v17.2s,v18.2s,v19.2s}, [x0]
			
 
				++        st1             {v0.2s, v1.2s, v2.2s, v3.2s},  [x1]
			
 
				++
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function fft16_neon
			
 
				++        mov             x1,  x0
			
 
				++        ld1             {v0.2s, v1.2s, v2.2s, v3.2s},  [x0], #32
			
 
				++        ld1             {v16.2s,v17.2s,v18.2s,v19.2s}, [x0], #32
			
 
				++        ext             v22.8b, v2.8b,  v3.8b,  #4
			
 
				++        ext             v23.8b, v3.8b,  v2.8b,  #4
			
 
				++        fadd            v4.2s,  v16.2s, v17.2s           // r4+r5,i4+i5
			
 
				++        fadd            v5.2s,  v18.2s, v19.2s           // r6+r7,i6+i7
			
 
				++        fsub            v17.2s, v16.2s, v17.2s           // r4-r5,i4-i5
			
 
				++        fsub            v19.2s, v18.2s, v19.2s           // r6-r7,i6-i7
			
 
				++        rev64           v27.2s, v28.2s  // ???
			
 
				++        fadd            v20.2s, v0.2s,  v1.2s            // r0+r1,i0+i1
			
 
				++        fadd            v21.2s, v2.2s,  v3.2s            // r2+r3,i2+i3
			
 
				++        fmul            v26.2s, v17.2s, v28.2s           // -a2r*w,a2i*w
			
 
				++        ext             v6.8b,  v4.8b,  v5.8b,  #4
			
 
				++        ext             v7.8b,  v5.8b,  v4.8b,  #4
			
 
				++        fmul            v27.2s, v19.2s, v27.2s           // a3r*w,-a3i*w
			
 
				++        fsub            v23.2s, v22.2s, v23.2s           // i2-i3,r3-r2
			
 
				++        fsub            v22.2s, v0.2s,  v1.2s            // r0-r1,i0-i1
			
 
				++        fmul            v24.2s, v17.2s, v28.s[1]         // a2r*w,a2i*w
			
 
				++        fmul            v25.2s, v19.2s, v28.s[1]         // a3r*w,a3i*w
			
 
				++        fadd            v0.2s,  v20.2s, v21.2s
			
 
				++        fsub            v2.2s,  v20.2s, v21.2s
			
 
				++        fadd            v1.2s,  v22.2s, v23.2s
			
 
				++        rev64           v26.2s, v26.2s
			
 
				++        rev64           v27.2s, v27.2s
			
 
				++        fsub            v3.2s,  v22.2s, v23.2s
			
 
				++        fsub            v6.2s,  v6.2s,  v7.2s
			
 
				++        fadd            v24.2s, v24.2s, v26.2s  // a2r+a2i,a2i-a2r   t1,t2
			
 
				++        fadd            v25.2s, v25.2s, v27.2s  // a3r-a3i,a3i+a3r   t5,t6
			
 
				++        fadd            v7.2s,  v4.2s,  v5.2s
			
 
				++        fsub            v18.2s, v2.2s,  v6.2s
			
 
				++        ld1             {v20.4s,v21.4s}, [x0], #32
			
 
				++        ld1             {v22.4s,v23.4s}, [x0], #32
			
 
				++        ext             v26.8b, v24.8b, v25.8b, #4
			
 
				++        ext             v27.8b, v25.8b, v24.8b, #4
			
 
				++        fadd            v2.2s,  v2.2s,  v6.2s
			
 
				++        fsub            v16.2s, v0.2s,  v7.2s
			
 
				++        fadd            v5.2s,  v25.2s, v24.2s
			
 
				++        fsub            v4.2s,  v26.2s, v27.2s
			
 
				++        transpose       v24.2d, v25.2d, v20.2d, v22.2d
			
 
				++        transpose       v26.2d, v27.2d, v21.2d, v23.2d
			
 
				++        fadd            v0.2s,  v0.2s,  v7.2s
			
 
				++        fsub            v17.2s, v1.2s,  v5.2s
			
 
				++        fsub            v19.2s, v3.2s,  v4.2s
			
 
				++        fadd            v3.2s,  v3.2s,  v4.2s
			
 
				++        fadd            v1.2s,  v1.2s,  v5.2s
			
 
				++        ext             v20.16b, v21.16b, v21.16b,  #4
			
 
				++        ext             v21.16b, v23.16b, v23.16b,  #4
			
 
				++
			
 
				++        zip1            v0.2d,  v0.2d,  v1.2d   // {z[0],   z[1]}
			
 
				++        zip1            v1.2d,  v2.2d,  v3.2d   // {z[2],   z[3]}
			
 
				++        zip1            v2.2d,  v16.2d, v17.2d  // {z[o1],  z[o1+1]}
			
 
				++        zip1            v3.2d,  v18.2d, v19.2d  // {z[o1+2],z[o1+3]}
			
 
				++
			
 
				++        // 2 x fft4
			
 
				++        transpose       v22.2d, v23.2d, v20.2d, v21.2d
			
 
				++
			
 
				++        fadd            v4.4s,  v24.4s, v25.4s
			
 
				++        fadd            v5.4s,  v26.4s, v27.4s
			
 
				++        fsub            v6.4s,  v24.4s, v25.4s
			
 
				++        fsub            v7.4s,  v22.4s, v23.4s
			
 
				++
			
 
				++        ld1             {v23.4s},  [x14]
			
 
				++
			
 
				++        fadd            v24.4s, v4.4s,  v5.4s   // {z[o2+0],z[o2+1]}
			
 
				++        fsub            v26.4s, v4.4s,  v5.4s   // {z[o2+2],z[o2+3]}
			
 
				++        fadd            v25.4s, v6.4s,  v7.4s   // {z[o3+0],z[o3+1]}
			
 
				++        fsub            v27.4s, v6.4s,  v7.4s   // {z[o3+2],z[o3+3]}
			
 
				++
			
 
				++        //fft_pass_neon_16
			
 
				++        rev64           v7.4s,  v25.4s
			
 
				++        fmul            v25.4s, v25.4s, v23.s[1]
			
 
				++        fmul            v7.4s,  v7.4s,  v29.4s
			
 
				++        fmla            v25.4s, v7.4s,  v23.s[3] // {t1a,t2a,t5a,t6a}
			
 
				++
			
 
				++        zip1            v20.4s, v24.4s, v25.4s
			
 
				++        zip2            v21.4s, v24.4s, v25.4s
			
 
				++        fneg            v22.4s, v20.4s
			
 
				++        fadd            v4.4s,  v21.4s, v20.4s
			
 
				++        fsub            v6.4s,  v20.4s, v21.4s  // just the second half
			
 
				++        fadd            v5.4s,  v21.4s, v22.4s  // just the first half
			
 
				++
			
 
				++        tbl             v4.16b, {v4.16b},        v30.16b // trans4_float
			
 
				++        tbl             v5.16b, {v5.16b,v6.16b}, v31.16b // trans8_float
			
 
				++
			
 
				++        fsub            v20.4s, v0.4s,  v4.4s   // {z[o2],z[o2+1]}
			
 
				++        fadd            v16.4s, v0.4s,  v4.4s   // {z[0], z[1]}
			
 
				++        fsub            v22.4s, v2.4s,  v5.4s   // {z[o3],z[o3+1]}
			
 
				++        fadd            v18.4s, v2.4s,  v5.4s   // {z[o1],z[o1+1]}
			
 
				++
			
 
				++//second half
			
 
				++        rev64           v6.4s,  v26.4s
			
 
				++        fmul            v26.4s, v26.4s, v23.s[2]
			
 
				++        rev64           v7.4s,  v27.4s
			
 
				++        fmul            v27.4s, v27.4s, v23.s[3]
			
 
				++        fmul            v6.4s,  v6.4s,  v29.4s
			
 
				++        fmul            v7.4s,  v7.4s,  v29.4s
			
 
				++        fmla            v26.4s, v6.4s,  v23.s[2] // {t1,t2,t5,t6}
			
 
				++        fmla            v27.4s, v7.4s,  v23.s[1] // {t1a,t2a,t5a,t6a}
			
 
				++
			
 
				++        zip1            v24.4s, v26.4s, v27.4s
			
 
				++        zip2            v25.4s, v26.4s, v27.4s
			
 
				++        fneg            v26.4s, v24.4s
			
 
				++        fadd            v4.4s,  v25.4s, v24.4s
			
 
				++        fsub            v6.4s,  v24.4s, v25.4s  // just the second half
			
 
				++        fadd            v5.4s,  v25.4s, v26.4s  // just the first half
			
 
				++
			
 
				++        tbl             v4.16b, {v4.16b},        v30.16b // trans4_float
			
 
				++        tbl             v5.16b, {v5.16b,v6.16b}, v31.16b // trans8_float
			
 
				++
			
 
				++        fadd            v17.4s, v1.4s, v4.4s    // {z[2], z[3]}
			
 
				++        fsub            v21.4s, v1.4s, v4.4s    // {z[o2+2],z[o2+3]}
			
 
				++        fadd            v19.4s, v3.4s, v5.4s    // {z[o1+2],z[o1+3]}
			
 
				++        fsub            v23.4s, v3.4s, v5.4s    // {z[o3+2],z[o3+3]}
			
 
				++
			
 
				++        st1             {v16.4s,v17.4s}, [x1], #32
			
 
				++        st1             {v18.4s,v19.4s}, [x1], #32
			
 
				++        st1             {v20.4s,v21.4s}, [x1], #32
			
 
				++        st1             {v22.4s,v23.4s}, [x1], #32
			
 
				++
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++
			
 
				++const  trans4_float, align=4
			
 
				++        .byte    0,  1,  2,  3
			
 
				++        .byte    8,  9, 10, 11
			
 
				++        .byte    4,  5,  6,  7
			
 
				++        .byte   12, 13, 14, 15
			
 
				++endconst
			
 
				++
			
 
				++const  trans8_float, align=4
			
 
				++        .byte   24, 25, 26, 27
			
 
				++        .byte    0,  1,  2,  3
			
 
				++        .byte   28, 29, 30, 31
			
 
				++        .byte    4,  5,  6,  7
			
 
				++endconst
			
 
				++
			
 
				++function fft_pass_neon
			
 
				++        sub             x6,  x2,  #1            // n - 1, loop counter
			
 
				++        lsl             x5,  x2,  #3            // 2 * n * sizeof FFTSample
			
 
				++        lsl             x1,  x2,  #4            // 2 * n * sizeof FFTComplex
			
 
				++        add             x5,  x4,  x5            // wim
			
 
				++        add             x3,  x1,  x2,  lsl #5   // 4 * n * sizeof FFTComplex
			
 
				++        add             x2,  x0,  x2,  lsl #5   // &z[o2]
			
 
				++        add             x3,  x0,  x3            // &z[o3]
			
 
				++        add             x1,  x0,  x1            // &z[o1]
			
 
				++        ld1             {v20.4s},[x2]           // {z[o2],z[o2+1]}
			
 
				++        ld1             {v22.4s},[x3]           // {z[o3],z[o3+1]}
			
 
				++        ld1             {v4.2s},  [x4], #8      // {wre[0],wre[1]}
			
 
				++        trn2            v25.2d, v20.2d, v22.2d
			
 
				++        sub             x5,  x5,  #4            // wim--
			
 
				++        trn1            v24.2d, v20.2d, v22.2d
			
 
				++        ld1             {v5.s}[0],  [x5], x7    // d5[0] = wim[-1]
			
 
				++        rev64           v7.4s,  v25.4s
			
 
				++        fmul            v25.4s, v25.4s, v4.s[1]
			
 
				++        ld1             {v16.4s}, [x0]          // {z[0],z[1]}
			
 
				++        fmul            v7.4s,  v7.4s,  v29.4s
			
 
				++        ld1             {v17.4s}, [x1]          // {z[o1],z[o1+1]}
			
 
				++        prfm            pldl1keep, [x2, #16]
			
 
				++        prfm            pldl1keep, [x3, #16]
			
 
				++        fmla            v25.4s, v7.4s,  v5.s[0] // {t1a,t2a,t5a,t6a}
			
 
				++        prfm            pldl1keep, [x0, #16]
			
 
				++        prfm            pldl1keep, [x1, #16]
			
 
				++
			
 
				++        zip1            v20.4s, v24.4s, v25.4s
			
 
				++        zip2            v21.4s, v24.4s, v25.4s
			
 
				++        fneg            v22.4s, v20.4s
			
 
				++        fadd            v4.4s,  v21.4s, v20.4s
			
 
				++        fsub            v6.4s,  v20.4s, v21.4s  // just the second half
			
 
				++        fadd            v5.4s,  v21.4s, v22.4s  // just the first half
			
 
				++
			
 
				++        tbl             v4.16b, {v4.16b},        v30.16b // trans4_float
			
 
				++        tbl             v5.16b, {v5.16b,v6.16b}, v31.16b // trans8_float
			
 
				++
			
 
				++        fadd            v20.4s, v16.4s, v4.4s
			
 
				++        fsub            v22.4s, v16.4s, v4.4s
			
 
				++        fadd            v21.4s, v17.4s, v5.4s
			
 
				++        st1             {v20.4s}, [x0], #16     // {z[0], z[1]}
			
 
				++        fsub            v23.4s, v17.4s, v5.4s
			
 
				++
			
 
				++        st1             {v21.4s}, [x1], #16     // {z[o1],z[o1+1]}
			
 
				++        st1             {v22.4s}, [x2], #16     // {z[o2],z[o2+1]}
			
 
				++        st1             {v23.4s}, [x3], #16     // {z[o3],z[o3+1]}
			
 
				++1:
			
 
				++        ld1             {v20.4s},[x2]    // {z[o2],z[o2+1]}
			
 
				++        ld1             {v22.4s},[x3]    // {z[o3],z[o3+1]}
			
 
				++        ld1             {v4.2s}, [x4], #8       // {wre[0],wre[1]}
			
 
				++        transpose       v26.2d, v27.2d, v20.2d, v22.2d
			
 
				++        ld1             {v5.2s}, [x5], x7       // {wim[-1],wim[0]}
			
 
				++        rev64           v6.4s,  v26.4s
			
 
				++        fmul            v26.4s, v26.4s, v4.s[0]
			
 
				++        rev64           v7.4s,  v27.4s
			
 
				++        fmul            v27.4s, v27.4s, v4.s[1]
			
 
				++        fmul            v6.4s,  v6.4s,  v29.4s
			
 
				++        fmul            v7.4s,  v7.4s,  v29.4s
			
 
				++        ld1             {v16.4s},[x0]           // {z[0],z[1]}
			
 
				++        fmla            v26.4s, v6.4s,  v5.s[1] // {t1,t2,t5,t6}
			
 
				++        fmla            v27.4s, v7.4s,  v5.s[0] // {t1a,t2a,t5a,t6a}
			
 
				++        ld1             {v17.4s},[x1]           // {z[o1],z[o1+1]}
			
 
				++
			
 
				++        subs            x6,  x6,  #1            // n--
			
 
				++
			
 
				++        zip1            v20.4s, v26.4s, v27.4s
			
 
				++        zip2            v21.4s, v26.4s, v27.4s
			
 
				++        fneg            v22.4s, v20.4s
			
 
				++        fadd            v4.4s,  v21.4s, v20.4s
			
 
				++        fsub            v6.4s,  v20.4s, v21.4s  // just the second half
			
 
				++        fadd            v5.4s,  v21.4s, v22.4s  // just the first half
			
 
				++
			
 
				++        tbl             v4.16b, {v4.16b},        v30.16b // trans4_float
			
 
				++        tbl             v5.16b, {v5.16b,v6.16b}, v31.16b // trans8_float
			
 
				++
			
 
				++        fadd            v20.4s, v16.4s, v4.4s
			
 
				++        fsub            v22.4s, v16.4s, v4.4s
			
 
				++        fadd            v21.4s, v17.4s, v5.4s
			
 
				++        st1             {v20.4s}, [x0], #16     // {z[0], z[1]}
			
 
				++        fsub            v23.4s, v17.4s, v5.4s
			
 
				++
			
 
				++        st1             {v21.4s}, [x1], #16     // {z[o1],z[o1+1]}
			
 
				++        st1             {v22.4s}, [x2], #16     // {z[o2],z[o2+1]}
			
 
				++        st1             {v23.4s}, [x3], #16     // {z[o3],z[o3+1]}
			
 
				++        b.ne            1b
			
 
				++
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++.macro  def_fft n, n2, n4
			
 
				++function fft\n\()_neon, align=6
			
 
				++        sub             sp,  sp,  #16
			
 
				++        stp             x28, x30, [sp]
			
 
				++        add             x28, x0,  #\n4*2*8
			
 
				++        bl              fft\n2\()_neon
			
 
				++        mov             x0,  x28
			
 
				++        bl              fft\n4\()_neon
			
 
				++        add             x0,  x28, #\n4*1*8
			
 
				++        bl              fft\n4\()_neon
			
 
				++        sub             x0,  x28, #\n4*2*8
			
 
				++        ldp             x28, x30, [sp], #16
			
 
				++        movrel          x4,  X(ff_cos_\n)
			
 
				++        mov             x2,  #\n4>>1
			
 
				++        b               fft_pass_neon
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++        def_fft    32,    16,     8
			
 
				++        def_fft    64,    32,    16
			
 
				++        def_fft   128,    64,    32
			
 
				++        def_fft   256,   128,    64
			
 
				++        def_fft   512,   256,   128
			
 
				++        def_fft  1024,   512,   256
			
 
				++        def_fft  2048,  1024,   512
			
 
				++        def_fft  4096,  2048,  1024
			
 
				++        def_fft  8192,  4096,  2048
			
 
				++        def_fft 16384,  8192,  4096
			
 
				++        def_fft 32768, 16384,  8192
			
 
				++        def_fft 65536, 32768, 16384
			
 
				++
			
 
				++function ff_fft_calc_neon, export=1
			
 
				++        prfm            pldl1keep, [x1]
			
 
				++        movrel          x10, trans4_float
			
 
				++        ldr             w2,  [x0]
			
 
				++        movrel          x11, trans8_float
			
 
				++        sub             w2,  w2,  #2
			
 
				++        movrel          x3,  fft_tab_neon
			
 
				++        ld1             {v30.16b}, [x10]
			
 
				++        mov             x7,  #-8
			
 
				++        movrel          x12, pmmp
			
 
				++        ldr             x3,  [x3, x2, lsl #3]
			
 
				++        movrel          x13, mppm
			
 
				++        movrel          x14, X(ff_cos_16)
			
 
				++        ld1             {v31.16b}, [x11]
			
 
				++        mov             x0,  x1
			
 
				++        ld1             {v29.4s},  [x12]         // pmmp
			
 
				++        ld1             {v28.4s},  [x13]
			
 
				++        br              x3
			
 
				++endfunc
			
 
				++
			
 
				++function ff_fft_permute_neon, export=1
			
 
				++        mov             x6,  #1
			
 
				++        ldr             w2,  [x0]       // nbits
			
 
				++        ldr             x3,  [x0, #16]  // tmp_buf
			
 
				++        ldr             x0,  [x0, #8]   // revtab
			
 
				++        lsl             x6,  x6, x2
			
 
				++        mov             x2,  x6
			
 
				++1:
			
 
				++        ld1             {v0.2s,v1.2s}, [x1], #16
			
 
				++        ldr             w4,  [x0], #4
			
 
				++        uxth            w5,  w4
			
 
				++        lsr             w4,  w4,  #16
			
 
				++        add             x5,  x3,  x5,  lsl #3
			
 
				++        add             x4,  x3,  x4,  lsl #3
			
 
				++        st1             {v0.2s}, [x5]
			
 
				++        st1             {v1.2s}, [x4]
			
 
				++        subs            x6,  x6, #2
			
 
				++        b.gt            1b
			
 
				++
			
 
				++        sub             x1,  x1,  x2,  lsl #3
			
 
				++1:
			
 
				++        ld1             {v0.4s,v1.4s}, [x3], #32
			
 
				++        st1             {v0.4s,v1.4s}, [x1], #32
			
 
				++        subs            x2,  x2,  #4
			
 
				++        b.gt            1b
			
 
				++
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++const   fft_tab_neon, relocate=1
			
 
				++        .quad fft4_neon
			
 
				++        .quad fft8_neon
			
 
				++        .quad fft16_neon
			
 
				++        .quad fft32_neon
			
 
				++        .quad fft64_neon
			
 
				++        .quad fft128_neon
			
 
				++        .quad fft256_neon
			
 
				++        .quad fft512_neon
			
 
				++        .quad fft1024_neon
			
 
				++        .quad fft2048_neon
			
 
				++        .quad fft4096_neon
			
 
				++        .quad fft8192_neon
			
 
				++        .quad fft16384_neon
			
 
				++        .quad fft32768_neon
			
 
				++        .quad fft65536_neon
			
 
				++endconst
			
 
				++
			
 
				++const   pmmp, align=4
			
 
				++        .float          +1.0, -1.0, -1.0, +1.0
			
 
				++endconst
			
 
				++
			
 
				++const   mppm, align=4
			
 
				++        .float          -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2
			
 
				++endconst
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/h264chroma_init_aarch64.c b/media/ffvpx/libavcodec/aarch64/h264chroma_init_aarch64.c
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/h264chroma_init_aarch64.c
			
 
				+@@ -0,0 +1,59 @@
			
 
				++/*
			
 
				++ * ARM NEON optimised H.264 chroma functions
			
 
				++ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include <stdint.h>
			
 
				++
			
 
				++#include "libavutil/attributes.h"
			
 
				++#include "libavutil/cpu.h"
			
 
				++#include "libavutil/aarch64/cpu.h"
			
 
				++#include "libavcodec/h264chroma.h"
			
 
				++
			
 
				++#include "config.h"
			
 
				++
			
 
				++void ff_put_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
			
 
				++                                 int h, int x, int y);
			
 
				++void ff_put_h264_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
			
 
				++                                 int h, int x, int y);
			
 
				++void ff_put_h264_chroma_mc2_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
			
 
				++                                 int h, int x, int y);
			
 
				++
			
 
				++void ff_avg_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
			
 
				++                                 int h, int x, int y);
			
 
				++void ff_avg_h264_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
			
 
				++                                 int h, int x, int y);
			
 
				++void ff_avg_h264_chroma_mc2_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
			
 
				++                                 int h, int x, int y);
			
 
				++
			
 
				++av_cold void ff_h264chroma_init_aarch64(H264ChromaContext *c, int bit_depth)
			
 
				++{
			
 
				++    const int high_bit_depth = bit_depth > 8;
			
 
				++    int cpu_flags = av_get_cpu_flags();
			
 
				++
			
 
				++    if (have_neon(cpu_flags) && !high_bit_depth) {
			
 
				++        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
			
 
				++        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
			
 
				++        c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon;
			
 
				++
			
 
				++        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon;
			
 
				++        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon;
			
 
				++        c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_neon;
			
 
				++    }
			
 
				++}
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/h264cmc_neon.S b/media/ffvpx/libavcodec/aarch64/h264cmc_neon.S
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/h264cmc_neon.S
			
 
				+@@ -0,0 +1,450 @@
			
 
				++/*
			
 
				++ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
			
 
				++ * Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include "libavutil/aarch64/asm.S"
			
 
				++
			
 
				++/* chroma_mc8(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y) */
			
 
				++.macro  h264_chroma_mc8 type, codec=h264
			
 
				++function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
			
 
				++  .ifc \type,avg
			
 
				++        mov             x8,  x0
			
 
				++  .endif
			
 
				++        prfm            pldl1strm, [x1]
			
 
				++        prfm            pldl1strm, [x1, x2]
			
 
				++  .ifc \codec,rv40
			
 
				++        movrel          x6,  rv40bias
			
 
				++        lsr             w9,  w5,  #1
			
 
				++        lsr             w10, w4,  #1
			
 
				++        lsl             w9,  w9,  #3
			
 
				++        lsl             w10, w10, #1
			
 
				++        add             w9,  w9,  w10
			
 
				++        add             x6,  x6,  w9, UXTW
			
 
				++        ld1r            {v22.8H}, [x6]
			
 
				++  .endif
			
 
				++  .ifc \codec,vc1
			
 
				++        movi            v22.8H,   #28
			
 
				++  .endif
			
 
				++        mul             w7,  w4,  w5
			
 
				++        lsl             w14, w5,  #3
			
 
				++        lsl             w13, w4,  #3
			
 
				++        cmp             w7,  #0
			
 
				++        sub             w6,  w14, w7
			
 
				++        sub             w12, w13, w7
			
 
				++        sub             w4,  w7,  w13
			
 
				++        sub             w4,  w4,  w14
			
 
				++        add             w4,  w4,  #64
			
 
				++        b.eq            2f
			
 
				++
			
 
				++        dup             v0.8B,  w4
			
 
				++        dup             v1.8B,  w12
			
 
				++        ld1             {v4.8B, v5.8B}, [x1], x2
			
 
				++        dup             v2.8B,  w6
			
 
				++        dup             v3.8B,  w7
			
 
				++        ext             v5.8B,  v4.8B,  v5.8B,  #1
			
 
				++1:      ld1             {v6.8B, v7.8B}, [x1], x2
			
 
				++        umull           v16.8H, v4.8B,  v0.8B
			
 
				++        umlal           v16.8H, v5.8B,  v1.8B
			
 
				++        ext             v7.8B,  v6.8B,  v7.8B,  #1
			
 
				++        ld1             {v4.8B, v5.8B}, [x1], x2
			
 
				++        umlal           v16.8H, v6.8B,  v2.8B
			
 
				++        prfm            pldl1strm, [x1]
			
 
				++        ext             v5.8B,  v4.8B,  v5.8B,  #1
			
 
				++        umlal           v16.8H, v7.8B,  v3.8B
			
 
				++        umull           v17.8H, v6.8B,  v0.8B
			
 
				++        subs            w3,  w3,  #2
			
 
				++        umlal           v17.8H, v7.8B, v1.8B
			
 
				++        umlal           v17.8H, v4.8B, v2.8B
			
 
				++        umlal           v17.8H, v5.8B, v3.8B
			
 
				++        prfm            pldl1strm, [x1, x2]
			
 
				++  .ifc \codec,h264
			
 
				++        rshrn           v16.8B, v16.8H, #6
			
 
				++        rshrn           v17.8B, v17.8H, #6
			
 
				++  .else
			
 
				++        add             v16.8H, v16.8H, v22.8H
			
 
				++        add             v17.8H, v17.8H, v22.8H
			
 
				++        shrn            v16.8B, v16.8H, #6
			
 
				++        shrn            v17.8B, v17.8H, #6
			
 
				++  .endif
			
 
				++  .ifc \type,avg
			
 
				++        ld1             {v20.8B}, [x8], x2
			
 
				++        ld1             {v21.8B}, [x8], x2
			
 
				++        urhadd          v16.8B, v16.8B, v20.8B
			
 
				++        urhadd          v17.8B, v17.8B, v21.8B
			
 
				++  .endif
			
 
				++        st1             {v16.8B}, [x0], x2
			
 
				++        st1             {v17.8B}, [x0], x2
			
 
				++        b.gt            1b
			
 
				++        ret
			
 
				++
			
 
				++2:      adds            w12, w12, w6
			
 
				++        dup             v0.8B, w4
			
 
				++        b.eq            5f
			
 
				++        tst             w6,  w6
			
 
				++        dup             v1.8B, w12
			
 
				++        b.eq            4f
			
 
				++
			
 
				++        ld1             {v4.8B}, [x1], x2
			
 
				++3:      ld1             {v6.8B}, [x1], x2
			
 
				++        umull           v16.8H, v4.8B,  v0.8B
			
 
				++        umlal           v16.8H, v6.8B,  v1.8B
			
 
				++        ld1             {v4.8B}, [x1], x2
			
 
				++        umull           v17.8H, v6.8B,  v0.8B
			
 
				++        umlal           v17.8H, v4.8B,  v1.8B
			
 
				++        prfm            pldl1strm, [x1]
			
 
				++  .ifc \codec,h264
			
 
				++        rshrn           v16.8B, v16.8H, #6
			
 
				++        rshrn           v17.8B, v17.8H, #6
			
 
				++  .else
			
 
				++        add             v16.8H, v16.8H, v22.8H
			
 
				++        add             v17.8H, v17.8H, v22.8H
			
 
				++        shrn            v16.8B, v16.8H, #6
			
 
				++        shrn            v17.8B, v17.8H, #6
			
 
				++  .endif
			
 
				++        prfm            pldl1strm, [x1, x2]
			
 
				++  .ifc \type,avg
			
 
				++        ld1             {v20.8B}, [x8], x2
			
 
				++        ld1             {v21.8B}, [x8], x2
			
 
				++        urhadd          v16.8B, v16.8B, v20.8B
			
 
				++        urhadd          v17.8B, v17.8B, v21.8B
			
 
				++  .endif
			
 
				++        subs            w3,  w3,  #2
			
 
				++        st1             {v16.8B}, [x0], x2
			
 
				++        st1             {v17.8B}, [x0], x2
			
 
				++        b.gt            3b
			
 
				++        ret
			
 
				++
			
 
				++4:      ld1             {v4.8B, v5.8B}, [x1], x2
			
 
				++        ld1             {v6.8B, v7.8B}, [x1], x2
			
 
				++        ext             v5.8B,  v4.8B,  v5.8B,  #1
			
 
				++        ext             v7.8B,  v6.8B,  v7.8B,  #1
			
 
				++        prfm            pldl1strm, [x1]
			
 
				++        subs            w3,  w3,  #2
			
 
				++        umull           v16.8H, v4.8B, v0.8B
			
 
				++        umlal           v16.8H, v5.8B, v1.8B
			
 
				++        umull           v17.8H, v6.8B, v0.8B
			
 
				++        umlal           v17.8H, v7.8B, v1.8B
			
 
				++        prfm            pldl1strm, [x1, x2]
			
 
				++  .ifc \codec,h264
			
 
				++        rshrn           v16.8B, v16.8H, #6
			
 
				++        rshrn           v17.8B, v17.8H, #6
			
 
				++  .else
			
 
				++        add             v16.8H, v16.8H, v22.8H
			
 
				++        add             v17.8H, v17.8H, v22.8H
			
 
				++        shrn            v16.8B, v16.8H, #6
			
 
				++        shrn            v17.8B, v17.8H, #6
			
 
				++  .endif
			
 
				++  .ifc \type,avg
			
 
				++        ld1             {v20.8B}, [x8], x2
			
 
				++        ld1             {v21.8B}, [x8], x2
			
 
				++        urhadd          v16.8B, v16.8B, v20.8B
			
 
				++        urhadd          v17.8B, v17.8B, v21.8B
			
 
				++  .endif
			
 
				++        st1             {v16.8B}, [x0], x2
			
 
				++        st1             {v17.8B}, [x0], x2
			
 
				++        b.gt            4b
			
 
				++        ret
			
 
				++
			
 
				++5:      ld1             {v4.8B}, [x1], x2
			
 
				++        ld1             {v5.8B}, [x1], x2
			
 
				++        prfm            pldl1strm, [x1]
			
 
				++        subs            w3,  w3,  #2
			
 
				++        umull           v16.8H, v4.8B, v0.8B
			
 
				++        umull           v17.8H, v5.8B, v0.8B
			
 
				++        prfm            pldl1strm, [x1, x2]
			
 
				++  .ifc \codec,h264
			
 
				++        rshrn           v16.8B, v16.8H, #6
			
 
				++        rshrn           v17.8B, v17.8H, #6
			
 
				++  .else
			
 
				++        add             v16.8H, v16.8H, v22.8H
			
 
				++        add             v17.8H, v17.8H, v22.8H
			
 
				++        shrn            v16.8B, v16.8H, #6
			
 
				++        shrn            v17.8B, v17.8H, #6
			
 
				++  .endif
			
 
				++  .ifc \type,avg
			
 
				++        ld1             {v20.8B}, [x8], x2
			
 
				++        ld1             {v21.8B}, [x8], x2
			
 
				++        urhadd          v16.8B, v16.8B, v20.8B
			
 
				++        urhadd          v17.8B, v17.8B, v21.8B
			
 
				++  .endif
			
 
				++        st1             {v16.8B}, [x0], x2
			
 
				++        st1             {v17.8B}, [x0], x2
			
 
				++        b.gt            5b
			
 
				++        ret
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++/* chroma_mc4(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y) */
			
 
				++.macro  h264_chroma_mc4 type, codec=h264
			
 
				++function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
			
 
				++  .ifc \type,avg
			
 
				++        mov             x8,  x0
			
 
				++  .endif
			
 
				++        prfm            pldl1strm, [x1]
			
 
				++        prfm            pldl1strm, [x1, x2]
			
 
				++  .ifc \codec,rv40
			
 
				++        movrel          x6,  rv40bias
			
 
				++        lsr             w9,  w5,  #1
			
 
				++        lsr             w10, w4,  #1
			
 
				++        lsl             w9,  w9,  #3
			
 
				++        lsl             w10, w10, #1
			
 
				++        add             w9,  w9,  w10
			
 
				++        add             x6,  x6,  w9, UXTW
			
 
				++        ld1r            {v22.8H}, [x6]
			
 
				++  .endif
			
 
				++  .ifc \codec,vc1
			
 
				++        movi            v22.8H,   #28
			
 
				++  .endif
			
 
				++        mul             w7,  w4,  w5
			
 
				++        lsl             w14, w5,  #3
			
 
				++        lsl             w13, w4,  #3
			
 
				++        cmp             w7,  #0
			
 
				++        sub             w6,  w14, w7
			
 
				++        sub             w12, w13, w7
			
 
				++        sub             w4,  w7,  w13
			
 
				++        sub             w4,  w4,  w14
			
 
				++        add             w4,  w4,  #64
			
 
				++        b.eq            2f
			
 
				++
			
 
				++        dup             v24.8B,  w4
			
 
				++        dup             v25.8B,  w12
			
 
				++        ld1             {v4.8B}, [x1], x2
			
 
				++        dup             v26.8B,  w6
			
 
				++        dup             v27.8B,  w7
			
 
				++        ext             v5.8B,  v4.8B,  v5.8B, #1
			
 
				++        trn1            v0.2S,  v24.2S, v25.2S
			
 
				++        trn1            v2.2S,  v26.2S, v27.2S
			
 
				++        trn1            v4.2S,  v4.2S,  v5.2S
			
 
				++1:      ld1             {v6.8B}, [x1], x2
			
 
				++        ext             v7.8B,  v6.8B,  v7.8B, #1
			
 
				++        trn1            v6.2S,  v6.2S,  v7.2S
			
 
				++        umull           v18.8H, v4.8B,  v0.8B
			
 
				++        umlal           v18.8H, v6.8B,  v2.8B
			
 
				++        ld1             {v4.8B}, [x1], x2
			
 
				++        ext             v5.8B,  v4.8B,  v5.8B, #1
			
 
				++        trn1            v4.2S,  v4.2S,  v5.2S
			
 
				++        prfm            pldl1strm, [x1]
			
 
				++        umull           v19.8H, v6.8B,  v0.8B
			
 
				++        umlal           v19.8H, v4.8B,  v2.8B
			
 
				++        trn1            v30.2D, v18.2D, v19.2D
			
 
				++        trn2            v31.2D, v18.2D, v19.2D
			
 
				++        add             v18.8H, v30.8H, v31.8H
			
 
				++  .ifc \codec,h264
			
 
				++        rshrn           v16.8B, v18.8H, #6
			
 
				++  .else
			
 
				++        add             v18.8H, v18.8H, v22.8H
			
 
				++        shrn            v16.8B, v18.8H, #6
			
 
				++  .endif
			
 
				++        subs            w3,  w3,  #2
			
 
				++        prfm            pldl1strm, [x1, x2]
			
 
				++  .ifc \type,avg
			
 
				++        ld1             {v20.S}[0], [x8], x2
			
 
				++        ld1             {v20.S}[1], [x8], x2
			
 
				++        urhadd          v16.8B, v16.8B, v20.8B
			
 
				++  .endif
			
 
				++        st1             {v16.S}[0], [x0], x2
			
 
				++        st1             {v16.S}[1], [x0], x2
			
 
				++        b.gt            1b
			
 
				++        ret
			
 
				++
			
 
				++2:      adds            w12, w12, w6
			
 
				++        dup             v30.8B, w4
			
 
				++        b.eq            5f
			
 
				++        tst             w6,  w6
			
 
				++        dup             v31.8B, w12
			
 
				++        trn1            v0.2S,  v30.2S, v31.2S
			
 
				++        trn2            v1.2S,  v30.2S, v31.2S
			
 
				++        b.eq            4f
			
 
				++
			
 
				++        ext             v1.8B,  v0.8B,  v1.8B, #4
			
 
				++        ld1             {v4.S}[0], [x1], x2
			
 
				++3:      ld1             {v4.S}[1], [x1], x2
			
 
				++        umull           v18.8H, v4.8B,  v0.8B
			
 
				++        ld1             {v4.S}[0], [x1], x2
			
 
				++        umull           v19.8H, v4.8B,  v1.8B
			
 
				++        trn1            v30.2D, v18.2D, v19.2D
			
 
				++        trn2            v31.2D, v18.2D, v19.2D
			
 
				++        add             v18.8H, v30.8H, v31.8H
			
 
				++        prfm            pldl1strm, [x1]
			
 
				++  .ifc \codec,h264
			
 
				++        rshrn           v16.8B, v18.8H, #6
			
 
				++  .else
			
 
				++        add             v18.8H, v18.8H, v22.8H
			
 
				++        shrn            v16.8B, v18.8H, #6
			
 
				++  .endif
			
 
				++  .ifc \type,avg
			
 
				++        ld1             {v20.S}[0], [x8], x2
			
 
				++        ld1             {v20.S}[1], [x8], x2
			
 
				++        urhadd          v16.8B, v16.8B, v20.8B
			
 
				++  .endif
			
 
				++        subs            w3,  w3,  #2
			
 
				++        prfm            pldl1strm, [x1, x2]
			
 
				++        st1             {v16.S}[0], [x0], x2
			
 
				++        st1             {v16.S}[1], [x0], x2
			
 
				++        b.gt            3b
			
 
				++        ret
			
 
				++
			
 
				++4:      ld1             {v4.8B}, [x1], x2
			
 
				++        ld1             {v6.8B}, [x1], x2
			
 
				++        ext             v5.8B,  v4.8B,  v5.8B, #1
			
 
				++        ext             v7.8B,  v6.8B,  v7.8B, #1
			
 
				++        trn1            v4.2S,  v4.2S,  v5.2S
			
 
				++        trn1            v6.2S,  v6.2S,  v7.2S
			
 
				++        umull           v18.8H, v4.8B,  v0.8B
			
 
				++        umull           v19.8H, v6.8B,  v0.8B
			
 
				++        subs            w3,  w3,  #2
			
 
				++        trn1            v30.2D, v18.2D, v19.2D
			
 
				++        trn2            v31.2D, v18.2D, v19.2D
			
 
				++        add             v18.8H, v30.8H, v31.8H
			
 
				++        prfm            pldl1strm, [x1]
			
 
				++  .ifc \codec,h264
			
 
				++        rshrn           v16.8B, v18.8H, #6
			
 
				++  .else
			
 
				++        add             v18.8H, v18.8H, v22.8H
			
 
				++        shrn            v16.8B, v18.8H, #6
			
 
				++  .endif
			
 
				++  .ifc \type,avg
			
 
				++        ld1             {v20.S}[0], [x8], x2
			
 
				++        ld1             {v20.S}[1], [x8], x2
			
 
				++        urhadd          v16.8B, v16.8B, v20.8B
			
 
				++  .endif
			
 
				++        prfm            pldl1strm, [x1]
			
 
				++        st1             {v16.S}[0], [x0], x2
			
 
				++        st1             {v16.S}[1], [x0], x2
			
 
				++        b.gt            4b
			
 
				++        ret
			
 
				++
			
 
				++5:      ld1             {v4.S}[0], [x1], x2
			
 
				++        ld1             {v4.S}[1], [x1], x2
			
 
				++        umull           v18.8H, v4.8B,  v30.8B
			
 
				++        subs            w3,  w3,  #2
			
 
				++        prfm            pldl1strm, [x1]
			
 
				++  .ifc \codec,h264
			
 
				++        rshrn           v16.8B, v18.8H, #6
			
 
				++  .else
			
 
				++        add             v18.8H, v18.8H, v22.8H
			
 
				++        shrn            v16.8B, v18.8H, #6
			
 
				++  .endif
			
 
				++  .ifc \type,avg
			
 
				++        ld1             {v20.S}[0], [x8], x2
			
 
				++        ld1             {v20.S}[1], [x8], x2
			
 
				++        urhadd          v16.8B, v16.8B, v20.8B
			
 
				++  .endif
			
 
				++        prfm            pldl1strm, [x1]
			
 
				++        st1             {v16.S}[0], [x0], x2
			
 
				++        st1             {v16.S}[1], [x0], x2
			
 
				++        b.gt            5b
			
 
				++        ret
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++.macro  h264_chroma_mc2 type
			
 
				++function ff_\type\()_h264_chroma_mc2_neon, export=1
			
 
				++        prfm            pldl1strm, [x1]
			
 
				++        prfm            pldl1strm, [x1, x2]
			
 
				++        orr             w7,  w4,  w5
			
 
				++        cbz             w7,  2f
			
 
				++
			
 
				++        mul             w7,  w4,  w5
			
 
				++        lsl             w14, w5,  #3
			
 
				++        lsl             w13, w4,  #3
			
 
				++        sub             w6,  w14, w7
			
 
				++        sub             w12, w13, w7
			
 
				++        sub             w4,  w7,  w13
			
 
				++        sub             w4,  w4,  w14
			
 
				++        add             w4,  w4,  #64
			
 
				++        dup             v0.8B,  w4
			
 
				++        dup             v2.8B,  w12
			
 
				++        dup             v1.8B,  w6
			
 
				++        dup             v3.8B,  w7
			
 
				++        trn1            v0.4H,  v0.4H,  v2.4H
			
 
				++        trn1            v1.4H,  v1.4H,  v3.4H
			
 
				++1:
			
 
				++        ld1             {v4.S}[0],  [x1], x2
			
 
				++        ld1             {v4.S}[1],  [x1], x2
			
 
				++        rev64           v5.2S,  v4.2S
			
 
				++        ld1             {v5.S}[1],  [x1]
			
 
				++        ext             v6.8B,  v4.8B,  v5.8B,  #1
			
 
				++        ext             v7.8B,  v5.8B,  v4.8B,  #1
			
 
				++        trn1            v4.4H,  v4.4H,  v6.4H
			
 
				++        trn1            v5.4H,  v5.4H,  v7.4H
			
 
				++        umull           v16.8H, v4.8B,  v0.8B
			
 
				++        umlal           v16.8H, v5.8B,  v1.8B
			
 
				++  .ifc \type,avg
			
 
				++        ld1             {v18.H}[0], [x0], x2
			
 
				++        ld1             {v18.H}[2], [x0]
			
 
				++        sub             x0,  x0,  x2
			
 
				++  .endif
			
 
				++        rev64           v17.4S, v16.4S
			
 
				++        add             v16.8H, v16.8H, v17.8H
			
 
				++        rshrn           v16.8B, v16.8H, #6
			
 
				++  .ifc \type,avg
			
 
				++        urhadd          v16.8B, v16.8B, v18.8B
			
 
				++  .endif
			
 
				++        st1             {v16.H}[0], [x0], x2
			
 
				++        st1             {v16.H}[2], [x0], x2
			
 
				++        subs            w3,  w3,  #2
			
 
				++        b.gt            1b
			
 
				++        ret
			
 
				++
			
 
				++2:
			
 
				++        ld1             {v16.H}[0], [x1], x2
			
 
				++        ld1             {v16.H}[1], [x1], x2
			
 
				++  .ifc \type,avg
			
 
				++        ld1             {v18.H}[0], [x0], x2
			
 
				++        ld1             {v18.H}[1], [x0]
			
 
				++        sub             x0,  x0,  x2
			
 
				++        urhadd          v16.8B, v16.8B, v18.8B
			
 
				++  .endif
			
 
				++        st1             {v16.H}[0], [x0], x2
			
 
				++        st1             {v16.H}[1], [x0], x2
			
 
				++        subs            w3,  w3,  #2
			
 
				++        b.gt            2b
			
 
				++        ret
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++        h264_chroma_mc8 put
			
 
				++        h264_chroma_mc8 avg
			
 
				++        h264_chroma_mc4 put
			
 
				++        h264_chroma_mc4 avg
			
 
				++        h264_chroma_mc2 put
			
 
				++        h264_chroma_mc2 avg
			
 
				++
			
 
				++#if CONFIG_RV40_DECODER
			
 
				++const   rv40bias
			
 
				++        .short           0, 16, 32, 16
			
 
				++        .short          32, 28, 32, 28
			
 
				++        .short           0, 32, 16, 32
			
 
				++        .short          32, 28, 32, 28
			
 
				++endconst
			
 
				++
			
 
				++        h264_chroma_mc8 put, rv40
			
 
				++        h264_chroma_mc8 avg, rv40
			
 
				++        h264_chroma_mc4 put, rv40
			
 
				++        h264_chroma_mc4 avg, rv40
			
 
				++#endif
			
 
				++
			
 
				++#if CONFIG_VC1DSP
			
 
				++        h264_chroma_mc8 put, vc1
			
 
				++        h264_chroma_mc8 avg, vc1
			
 
				++        h264_chroma_mc4 put, vc1
			
 
				++        h264_chroma_mc4 avg, vc1
			
 
				++#endif
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/h264dsp_init_aarch64.c b/media/ffvpx/libavcodec/aarch64/h264dsp_init_aarch64.c
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/h264dsp_init_aarch64.c
			
 
				+@@ -0,0 +1,102 @@
			
 
				++/*
			
 
				++ * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include <stdint.h>
			
 
				++
			
 
				++#include "libavutil/attributes.h"
			
 
				++#include "libavutil/cpu.h"
			
 
				++#include "libavutil/aarch64/cpu.h"
			
 
				++#include "libavcodec/h264dsp.h"
			
 
				++
			
 
				++void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
			
 
				++                                     int beta, int8_t *tc0);
			
 
				++void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
			
 
				++                                     int beta, int8_t *tc0);
			
 
				++void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
			
 
				++                                       int beta, int8_t *tc0);
			
 
				++void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
			
 
				++                                       int beta, int8_t *tc0);
			
 
				++
			
 
				++void ff_weight_h264_pixels_16_neon(uint8_t *dst, int stride, int height,
			
 
				++                                   int log2_den, int weight, int offset);
			
 
				++void ff_weight_h264_pixels_8_neon(uint8_t *dst, int stride, int height,
			
 
				++                                  int log2_den, int weight, int offset);
			
 
				++void ff_weight_h264_pixels_4_neon(uint8_t *dst, int stride, int height,
			
 
				++                                  int log2_den, int weight, int offset);
			
 
				++
			
 
				++void ff_biweight_h264_pixels_16_neon(uint8_t *dst, uint8_t *src, int stride,
			
 
				++                                     int height, int log2_den, int weightd,
			
 
				++                                     int weights, int offset);
			
 
				++void ff_biweight_h264_pixels_8_neon(uint8_t *dst, uint8_t *src, int stride,
			
 
				++                                    int height, int log2_den, int weightd,
			
 
				++                                    int weights, int offset);
			
 
				++void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, int stride,
			
 
				++                                    int height, int log2_den, int weightd,
			
 
				++                                    int weights, int offset);
			
 
				++
			
 
				++void ff_h264_idct_add_neon(uint8_t *dst, int16_t *block, int stride);
			
 
				++void ff_h264_idct_dc_add_neon(uint8_t *dst, int16_t *block, int stride);
			
 
				++void ff_h264_idct_add16_neon(uint8_t *dst, const int *block_offset,
			
 
				++                             int16_t *block, int stride,
			
 
				++                             const uint8_t nnzc[6*8]);
			
 
				++void ff_h264_idct_add16intra_neon(uint8_t *dst, const int *block_offset,
			
 
				++                                  int16_t *block, int stride,
			
 
				++                                  const uint8_t nnzc[6*8]);
			
 
				++void ff_h264_idct_add8_neon(uint8_t **dest, const int *block_offset,
			
 
				++                            int16_t *block, int stride,
			
 
				++                            const uint8_t nnzc[6*8]);
			
 
				++
			
 
				++void ff_h264_idct8_add_neon(uint8_t *dst, int16_t *block, int stride);
			
 
				++void ff_h264_idct8_dc_add_neon(uint8_t *dst, int16_t *block, int stride);
			
 
				++void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset,
			
 
				++                             int16_t *block, int stride,
			
 
				++                             const uint8_t nnzc[6*8]);
			
 
				++
			
 
				++av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth,
			
 
				++                                     const int chroma_format_idc)
			
 
				++{
			
 
				++    int cpu_flags = av_get_cpu_flags();
			
 
				++
			
 
				++    if (have_neon(cpu_flags) && bit_depth == 8) {
			
 
				++        c->h264_v_loop_filter_luma   = ff_h264_v_loop_filter_luma_neon;
			
 
				++        c->h264_h_loop_filter_luma   = ff_h264_h_loop_filter_luma_neon;
			
 
				++        c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
			
 
				++        if (chroma_format_idc <= 1)
			
 
				++        c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
			
 
				++
			
 
				++        c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon;
			
 
				++        c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon;
			
 
				++        c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_4_neon;
			
 
				++
			
 
				++        c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16_neon;
			
 
				++        c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_8_neon;
			
 
				++        c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_4_neon;
			
 
				++
			
 
				++        c->h264_idct_add        = ff_h264_idct_add_neon;
			
 
				++        c->h264_idct_dc_add     = ff_h264_idct_dc_add_neon;
			
 
				++        c->h264_idct_add16      = ff_h264_idct_add16_neon;
			
 
				++        c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
			
 
				++        if (chroma_format_idc <= 1)
			
 
				++            c->h264_idct_add8   = ff_h264_idct_add8_neon;
			
 
				++        c->h264_idct8_add       = ff_h264_idct8_add_neon;
			
 
				++        c->h264_idct8_dc_add    = ff_h264_idct8_dc_add_neon;
			
 
				++        c->h264_idct8_add4      = ff_h264_idct8_add4_neon;
			
 
				++    }
			
 
				++}
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/h264dsp_neon.S b/media/ffvpx/libavcodec/aarch64/h264dsp_neon.S
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/h264dsp_neon.S
			
 
				+@@ -0,0 +1,498 @@
			
 
				++/*
			
 
				++ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
			
 
				++ * Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include "libavutil/aarch64/asm.S"
			
 
				++#include "neon.S"
			
 
				++
			
 
				++.macro  h264_loop_filter_start
			
 
				++        cmp             w2,  #0
			
 
				++        ldr             w6,  [x4]
			
 
				++        ccmp            w3,  #0, #0, ne
			
 
				++        mov             v24.S[0], w6
			
 
				++        and             w6,  w6,  w6,  lsl #16
			
 
				++        b.eq            1f
			
 
				++        ands            w6,  w6,  w6,  lsl #8
			
 
				++        b.ge            2f
			
 
				++1:
			
 
				++        ret
			
 
				++2:
			
 
				++.endm
			
 
				++
			
 
				++.macro  h264_loop_filter_luma
			
 
				++        dup             v22.16B, w2                     // alpha
			
 
				++        uxtl            v24.8H,  v24.8B
			
 
				++        uabd            v21.16B, v16.16B, v0.16B        // abs(p0 - q0)
			
 
				++        uxtl            v24.4S,  v24.4H
			
 
				++        uabd            v28.16B, v18.16B, v16.16B       // abs(p1 - p0)
			
 
				++        sli             v24.8H,  v24.8H,  #8
			
 
				++        uabd            v30.16B, v2.16B,  v0.16B        // abs(q1 - q0)
			
 
				++        sli             v24.4S,  v24.4S,  #16
			
 
				++        cmhi            v21.16B, v22.16B, v21.16B       // < alpha
			
 
				++        dup             v22.16B, w3                     // beta
			
 
				++        cmlt            v23.16B, v24.16B, #0
			
 
				++        cmhi            v28.16B, v22.16B, v28.16B       // < beta
			
 
				++        cmhi            v30.16B, v22.16B, v30.16B       // < beta
			
 
				++        bic             v21.16B, v21.16B, v23.16B
			
 
				++        uabd            v17.16B, v20.16B, v16.16B       // abs(p2 - p0)
			
 
				++        and             v21.16B, v21.16B, v28.16B
			
 
				++        uabd            v19.16B,  v4.16B,  v0.16B       // abs(q2 - q0)
			
 
				++        cmhi            v17.16B, v22.16B, v17.16B       // < beta
			
 
				++        and             v21.16B, v21.16B, v30.16B
			
 
				++        cmhi            v19.16B, v22.16B, v19.16B       // < beta
			
 
				++        and             v17.16B, v17.16B, v21.16B
			
 
				++        and             v19.16B, v19.16B, v21.16B
			
 
				++        and             v24.16B, v24.16B, v21.16B
			
 
				++        urhadd          v28.16B, v16.16B,  v0.16B
			
 
				++        sub             v21.16B, v24.16B, v17.16B
			
 
				++        uqadd           v23.16B, v18.16B, v24.16B
			
 
				++        uhadd           v20.16B, v20.16B, v28.16B
			
 
				++        sub             v21.16B, v21.16B, v19.16B
			
 
				++        uhadd           v28.16B,  v4.16B, v28.16B
			
 
				++        umin            v23.16B, v23.16B, v20.16B
			
 
				++        uqsub           v22.16B, v18.16B, v24.16B
			
 
				++        uqadd           v4.16B,   v2.16B, v24.16B
			
 
				++        umax            v23.16B, v23.16B, v22.16B
			
 
				++        uqsub           v22.16B,  v2.16B, v24.16B
			
 
				++        umin            v28.16B,  v4.16B, v28.16B
			
 
				++        uxtl            v4.8H,    v0.8B
			
 
				++        umax            v28.16B, v28.16B, v22.16B
			
 
				++        uxtl2           v20.8H,   v0.16B
			
 
				++        usubw           v4.8H,    v4.8H,  v16.8B
			
 
				++        usubw2          v20.8H,  v20.8H,  v16.16B
			
 
				++        shl             v4.8H,    v4.8H,  #2
			
 
				++        shl             v20.8H,  v20.8H,  #2
			
 
				++        uaddw           v4.8H,    v4.8H,  v18.8B
			
 
				++        uaddw2          v20.8H,  v20.8H,  v18.16B
			
 
				++        usubw           v4.8H,    v4.8H,   v2.8B
			
 
				++        usubw2          v20.8H,  v20.8H,   v2.16B
			
 
				++        rshrn           v4.8B,    v4.8H,  #3
			
 
				++        rshrn2          v4.16B,  v20.8H,  #3
			
 
				++        bsl             v17.16B, v23.16B, v18.16B
			
 
				++        bsl             v19.16B, v28.16B,  v2.16B
			
 
				++        neg             v23.16B, v21.16B
			
 
				++        uxtl            v28.8H,  v16.8B
			
 
				++        smin            v4.16B,   v4.16B, v21.16B
			
 
				++        uxtl2           v21.8H,  v16.16B
			
 
				++        smax            v4.16B,   v4.16B, v23.16B
			
 
				++        uxtl            v22.8H,   v0.8B
			
 
				++        uxtl2           v24.8H,   v0.16B
			
 
				++        saddw           v28.8H,  v28.8H,  v4.8B
			
 
				++        saddw2          v21.8H,  v21.8H,  v4.16B
			
 
				++        ssubw           v22.8H,  v22.8H,  v4.8B
			
 
				++        ssubw2          v24.8H,  v24.8H,  v4.16B
			
 
				++        sqxtun          v16.8B,  v28.8H
			
 
				++        sqxtun2         v16.16B, v21.8H
			
 
				++        sqxtun          v0.8B,   v22.8H
			
 
				++        sqxtun2         v0.16B,  v24.8H
			
 
				++.endm
			
 
				++
			
 
				++function ff_h264_v_loop_filter_luma_neon, export=1
			
 
				++        h264_loop_filter_start
			
 
				++        sxtw            x1,  w1
			
 
				++
			
 
				++        ld1             {v0.16B},  [x0], x1
			
 
				++        ld1             {v2.16B},  [x0], x1
			
 
				++        ld1             {v4.16B},  [x0], x1
			
 
				++        sub             x0,  x0,  x1, lsl #2
			
 
				++        sub             x0,  x0,  x1, lsl #1
			
 
				++        ld1             {v20.16B},  [x0], x1
			
 
				++        ld1             {v18.16B},  [x0], x1
			
 
				++        ld1             {v16.16B},  [x0], x1
			
 
				++
			
 
				++        h264_loop_filter_luma
			
 
				++
			
 
				++        sub             x0,  x0,  x1, lsl #1
			
 
				++        st1             {v17.16B},  [x0], x1
			
 
				++        st1             {v16.16B}, [x0], x1
			
 
				++        st1             {v0.16B},  [x0], x1
			
 
				++        st1             {v19.16B}, [x0]
			
 
				++
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_h264_h_loop_filter_luma_neon, export=1
			
 
				++        h264_loop_filter_start
			
 
				++
			
 
				++        sub             x0,  x0,  #4
			
 
				++        ld1             {v6.8B},  [x0], x1
			
 
				++        ld1             {v20.8B}, [x0], x1
			
 
				++        ld1             {v18.8B}, [x0], x1
			
 
				++        ld1             {v16.8B}, [x0], x1
			
 
				++        ld1             {v0.8B},  [x0], x1
			
 
				++        ld1             {v2.8B},  [x0], x1
			
 
				++        ld1             {v4.8B},  [x0], x1
			
 
				++        ld1             {v26.8B}, [x0], x1
			
 
				++        ld1             {v6.D}[1],  [x0], x1
			
 
				++        ld1             {v20.D}[1], [x0], x1
			
 
				++        ld1             {v18.D}[1], [x0], x1
			
 
				++        ld1             {v16.D}[1], [x0], x1
			
 
				++        ld1             {v0.D}[1],  [x0], x1
			
 
				++        ld1             {v2.D}[1],  [x0], x1
			
 
				++        ld1             {v4.D}[1],  [x0], x1
			
 
				++        ld1             {v26.D}[1], [x0], x1
			
 
				++
			
 
				++        transpose_8x16B v6, v20, v18, v16, v0, v2, v4, v26, v21, v23
			
 
				++
			
 
				++        h264_loop_filter_luma
			
 
				++
			
 
				++        transpose_4x16B v17, v16, v0, v19, v21, v23, v25, v27
			
 
				++
			
 
				++        sub             x0,  x0,  x1, lsl #4
			
 
				++        add             x0,  x0,  #2
			
 
				++        st1             {v17.S}[0],  [x0], x1
			
 
				++        st1             {v16.S}[0], [x0], x1
			
 
				++        st1             {v0.S}[0],  [x0], x1
			
 
				++        st1             {v19.S}[0], [x0], x1
			
 
				++        st1             {v17.S}[1],  [x0], x1
			
 
				++        st1             {v16.S}[1], [x0], x1
			
 
				++        st1             {v0.S}[1],  [x0], x1
			
 
				++        st1             {v19.S}[1], [x0], x1
			
 
				++        st1             {v17.S}[2],  [x0], x1
			
 
				++        st1             {v16.S}[2], [x0], x1
			
 
				++        st1             {v0.S}[2],  [x0], x1
			
 
				++        st1             {v19.S}[2], [x0], x1
			
 
				++        st1             {v17.S}[3],  [x0], x1
			
 
				++        st1             {v16.S}[3], [x0], x1
			
 
				++        st1             {v0.S}[3],  [x0], x1
			
 
				++        st1             {v19.S}[3], [x0], x1
			
 
				++
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++.macro  h264_loop_filter_chroma
			
 
				++        dup             v22.8B, w2              // alpha
			
 
				++        uxtl            v24.8H, v24.8B
			
 
				++        uabd            v26.8B, v16.8B, v0.8B   // abs(p0 - q0)
			
 
				++        uxtl            v4.8H,  v0.8B
			
 
				++        uabd            v28.8B, v18.8B, v16.8B  // abs(p1 - p0)
			
 
				++        usubw           v4.8H,  v4.8H,  v16.8B
			
 
				++        sli             v24.8H, v24.8H, #8
			
 
				++        shl             v4.8H,  v4.8H,  #2
			
 
				++        uabd            v30.8B, v2.8B,  v0.8B   // abs(q1 - q0)
			
 
				++        uaddw           v4.8H,  v4.8H,  v18.8B
			
 
				++        cmhi            v26.8B, v22.8B, v26.8B  // < alpha
			
 
				++        usubw           v4.8H,  v4.8H,  v2.8B
			
 
				++        dup             v22.8B, w3              // beta
			
 
				++        rshrn           v4.8B,  v4.8H,  #3
			
 
				++        cmhi            v28.8B, v22.8B, v28.8B  // < beta
			
 
				++        cmhi            v30.8B, v22.8B, v30.8B  // < beta
			
 
				++        smin            v4.8B,  v4.8B,  v24.8B
			
 
				++        neg             v25.8B, v24.8B
			
 
				++        and             v26.8B, v26.8B, v28.8B
			
 
				++        smax            v4.8B,  v4.8B,  v25.8B
			
 
				++        and             v26.8B, v26.8B, v30.8B
			
 
				++        uxtl            v22.8H, v0.8B
			
 
				++        and             v4.8B,  v4.8B,  v26.8B
			
 
				++        uxtl            v28.8H, v16.8B
			
 
				++        saddw           v28.8H, v28.8H, v4.8B
			
 
				++        ssubw           v22.8H, v22.8H, v4.8B
			
 
				++        sqxtun          v16.8B, v28.8H
			
 
				++        sqxtun          v0.8B,  v22.8H
			
 
				++.endm
			
 
				++
			
 
				++function ff_h264_v_loop_filter_chroma_neon, export=1
			
 
				++        h264_loop_filter_start
			
 
				++
			
 
				++        sub             x0,  x0,  x1, lsl #1
			
 
				++        ld1             {v18.8B}, [x0], x1
			
 
				++        ld1             {v16.8B}, [x0], x1
			
 
				++        ld1             {v0.8B},  [x0], x1
			
 
				++        ld1             {v2.8B},  [x0]
			
 
				++
			
 
				++        h264_loop_filter_chroma
			
 
				++
			
 
				++        sub             x0,  x0,  x1, lsl #1
			
 
				++        st1             {v16.8B}, [x0], x1
			
 
				++        st1             {v0.8B},  [x0], x1
			
 
				++
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_h264_h_loop_filter_chroma_neon, export=1
			
 
				++        h264_loop_filter_start
			
 
				++
			
 
				++        sub             x0,  x0,  #2
			
 
				++        ld1             {v18.S}[0], [x0], x1
			
 
				++        ld1             {v16.S}[0], [x0], x1
			
 
				++        ld1             {v0.S}[0],  [x0], x1
			
 
				++        ld1             {v2.S}[0],  [x0], x1
			
 
				++        ld1             {v18.S}[1], [x0], x1
			
 
				++        ld1             {v16.S}[1], [x0], x1
			
 
				++        ld1             {v0.S}[1],  [x0], x1
			
 
				++        ld1             {v2.S}[1],  [x0], x1
			
 
				++
			
 
				++        transpose_4x8B  v18, v16, v0, v2, v28, v29, v30, v31
			
 
				++
			
 
				++        h264_loop_filter_chroma
			
 
				++
			
 
				++        transpose_4x8B  v18, v16, v0, v2, v28, v29, v30, v31
			
 
				++
			
 
				++        sub             x0,  x0,  x1, lsl #3
			
 
				++        st1             {v18.S}[0], [x0], x1
			
 
				++        st1             {v16.S}[0], [x0], x1
			
 
				++        st1             {v0.S}[0],  [x0], x1
			
 
				++        st1             {v2.S}[0],  [x0], x1
			
 
				++        st1             {v18.S}[1], [x0], x1
			
 
				++        st1             {v16.S}[1], [x0], x1
			
 
				++        st1             {v0.S}[1],  [x0], x1
			
 
				++        st1             {v2.S}[1],  [x0], x1
			
 
				++
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++.macro  biweight_16     macs, macd
			
 
				++        dup             v0.16B,  w5
			
 
				++        dup             v1.16B,  w6
			
 
				++        mov             v4.16B,  v16.16B
			
 
				++        mov             v6.16B,  v16.16B
			
 
				++1:      subs            w3,  w3,  #2
			
 
				++        ld1             {v20.16B}, [x0], x2
			
 
				++        \macd           v4.8H,   v0.8B,  v20.8B
			
 
				++        \macd\()2       v6.8H,   v0.16B, v20.16B
			
 
				++        ld1             {v22.16B}, [x1], x2
			
 
				++        \macs           v4.8H,   v1.8B,  v22.8B
			
 
				++        \macs\()2       v6.8H,   v1.16B, v22.16B
			
 
				++        mov             v24.16B, v16.16B
			
 
				++        ld1             {v28.16B}, [x0], x2
			
 
				++        mov             v26.16B, v16.16B
			
 
				++        \macd           v24.8H,  v0.8B,  v28.8B
			
 
				++        \macd\()2       v26.8H,  v0.16B, v28.16B
			
 
				++        ld1             {v30.16B}, [x1], x2
			
 
				++        \macs           v24.8H,  v1.8B,  v30.8B
			
 
				++        \macs\()2       v26.8H,  v1.16B, v30.16B
			
 
				++        sshl            v4.8H,   v4.8H,  v18.8H
			
 
				++        sshl            v6.8H,   v6.8H,  v18.8H
			
 
				++        sqxtun          v4.8B,   v4.8H
			
 
				++        sqxtun2         v4.16B,  v6.8H
			
 
				++        sshl            v24.8H,  v24.8H, v18.8H
			
 
				++        sshl            v26.8H,  v26.8H, v18.8H
			
 
				++        sqxtun          v24.8B,  v24.8H
			
 
				++        sqxtun2         v24.16B, v26.8H
			
 
				++        mov             v6.16B,  v16.16B
			
 
				++        st1             {v4.16B},  [x7], x2
			
 
				++        mov             v4.16B,  v16.16B
			
 
				++        st1             {v24.16B}, [x7], x2
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++.endm
			
 
				++
			
 
				++.macro  biweight_8      macs, macd
			
 
				++        dup             v0.8B,  w5
			
 
				++        dup             v1.8B,  w6
			
 
				++        mov             v2.16B,  v16.16B
			
 
				++        mov             v20.16B, v16.16B
			
 
				++1:      subs            w3,  w3,  #2
			
 
				++        ld1             {v4.8B}, [x0], x2
			
 
				++        \macd           v2.8H,  v0.8B,  v4.8B
			
 
				++        ld1             {v5.8B}, [x1], x2
			
 
				++        \macs           v2.8H,  v1.8B,  v5.8B
			
 
				++        ld1             {v6.8B}, [x0], x2
			
 
				++        \macd           v20.8H, v0.8B,  v6.8B
			
 
				++        ld1             {v7.8B}, [x1], x2
			
 
				++        \macs           v20.8H, v1.8B,  v7.8B
			
 
				++        sshl            v2.8H,  v2.8H,  v18.8H
			
 
				++        sqxtun          v2.8B,  v2.8H
			
 
				++        sshl            v20.8H, v20.8H, v18.8H
			
 
				++        sqxtun          v4.8B,  v20.8H
			
 
				++        mov             v20.16B, v16.16B
			
 
				++        st1             {v2.8B}, [x7], x2
			
 
				++        mov             v2.16B,  v16.16B
			
 
				++        st1             {v4.8B}, [x7], x2
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++.endm
			
 
				++
			
 
				++.macro  biweight_4      macs, macd
			
 
				++        dup             v0.8B,  w5
			
 
				++        dup             v1.8B,  w6
			
 
				++        mov             v2.16B, v16.16B
			
 
				++        mov             v20.16B,v16.16B
			
 
				++1:      subs            w3,  w3,  #4
			
 
				++        ld1             {v4.S}[0], [x0], x2
			
 
				++        ld1             {v4.S}[1], [x0], x2
			
 
				++        \macd           v2.8H,  v0.8B,  v4.8B
			
 
				++        ld1             {v5.S}[0], [x1], x2
			
 
				++        ld1             {v5.S}[1], [x1], x2
			
 
				++        \macs           v2.8H,  v1.8B,  v5.8B
			
 
				++        b.lt            2f
			
 
				++        ld1             {v6.S}[0], [x0], x2
			
 
				++        ld1             {v6.S}[1], [x0], x2
			
 
				++        \macd           v20.8H, v0.8B,  v6.8B
			
 
				++        ld1             {v7.S}[0], [x1], x2
			
 
				++        ld1             {v7.S}[1], [x1], x2
			
 
				++        \macs           v20.8H, v1.8B,  v7.8B
			
 
				++        sshl            v2.8H,  v2.8H,  v18.8H
			
 
				++        sqxtun          v2.8B,  v2.8H
			
 
				++        sshl            v20.8H, v20.8H, v18.8H
			
 
				++        sqxtun          v4.8B,  v20.8H
			
 
				++        mov             v20.16B, v16.16B
			
 
				++        st1             {v2.S}[0], [x7], x2
			
 
				++        st1             {v2.S}[1], [x7], x2
			
 
				++        mov             v2.16B,  v16.16B
			
 
				++        st1             {v4.S}[0], [x7], x2
			
 
				++        st1             {v4.S}[1], [x7], x2
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++2:      sshl            v2.8H,  v2.8H,  v18.8H
			
 
				++        sqxtun          v2.8B,  v2.8H
			
 
				++        st1             {v2.S}[0], [x7], x2
			
 
				++        st1             {v2.S}[1], [x7], x2
			
 
				++        ret
			
 
				++.endm
			
 
				++
			
 
				++.macro  biweight_func   w
			
 
				++function ff_biweight_h264_pixels_\w\()_neon, export=1
			
 
				++        sxtw            x2,  w2
			
 
				++        lsr             w8,  w5,  #31
			
 
				++        add             w7,  w7,  #1
			
 
				++        eor             w8,  w8,  w6,  lsr #30
			
 
				++        orr             w7,  w7,  #1
			
 
				++        dup             v18.8H,   w4
			
 
				++        lsl             w7,  w7,  w4
			
 
				++        not             v18.16B,  v18.16B
			
 
				++        dup             v16.8H,   w7
			
 
				++        mov             x7,  x0
			
 
				++        cbz             w8,  10f
			
 
				++        subs            w8,  w8,  #1
			
 
				++        b.eq            20f
			
 
				++        subs            w8,  w8,  #1
			
 
				++        b.eq            30f
			
 
				++        b               40f
			
 
				++10:     biweight_\w     umlal, umlal
			
 
				++20:     neg             w5, w5
			
 
				++        biweight_\w     umlal, umlsl
			
 
				++30:     neg             w5, w5
			
 
				++        neg             w6, w6
			
 
				++        biweight_\w     umlsl, umlsl
			
 
				++40:     neg             w6, w6
			
 
				++        biweight_\w     umlsl, umlal
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++        biweight_func   16
			
 
				++        biweight_func   8
			
 
				++        biweight_func   4
			
 
				++
			
 
				++.macro  weight_16       add
			
 
				++        dup             v0.16B,  w4
			
 
				++1:      subs            w2,  w2,  #2
			
 
				++        ld1             {v20.16B}, [x0], x1
			
 
				++        umull           v4.8H,   v0.8B,  v20.8B
			
 
				++        umull2          v6.8H,   v0.16B, v20.16B
			
 
				++        ld1             {v28.16B}, [x0], x1
			
 
				++        umull           v24.8H,  v0.8B,  v28.8B
			
 
				++        umull2          v26.8H,  v0.16B, v28.16B
			
 
				++        \add            v4.8H,   v16.8H, v4.8H
			
 
				++        srshl           v4.8H,   v4.8H,  v18.8H
			
 
				++        \add            v6.8H,   v16.8H, v6.8H
			
 
				++        srshl           v6.8H,   v6.8H,  v18.8H
			
 
				++        sqxtun          v4.8B,   v4.8H
			
 
				++        sqxtun2         v4.16B,  v6.8H
			
 
				++        \add            v24.8H,  v16.8H, v24.8H
			
 
				++        srshl           v24.8H,  v24.8H, v18.8H
			
 
				++        \add            v26.8H,  v16.8H, v26.8H
			
 
				++        srshl           v26.8H,  v26.8H, v18.8H
			
 
				++        sqxtun          v24.8B,  v24.8H
			
 
				++        sqxtun2         v24.16B, v26.8H
			
 
				++        st1             {v4.16B},  [x5], x1
			
 
				++        st1             {v24.16B}, [x5], x1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++.endm
			
 
				++
			
 
				++.macro  weight_8        add
			
 
				++        dup             v0.8B,  w4
			
 
				++1:      subs            w2,  w2,  #2
			
 
				++        ld1             {v4.8B}, [x0], x1
			
 
				++        umull           v2.8H,  v0.8B,  v4.8B
			
 
				++        ld1             {v6.8B}, [x0], x1
			
 
				++        umull           v20.8H, v0.8B,  v6.8B
			
 
				++        \add            v2.8H,  v16.8H,  v2.8H
			
 
				++        srshl           v2.8H,  v2.8H,  v18.8H
			
 
				++        sqxtun          v2.8B,  v2.8H
			
 
				++        \add            v20.8H, v16.8H,  v20.8H
			
 
				++        srshl           v20.8H, v20.8H, v18.8H
			
 
				++        sqxtun          v4.8B,  v20.8H
			
 
				++        st1             {v2.8B}, [x5], x1
			
 
				++        st1             {v4.8B}, [x5], x1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++.endm
			
 
				++
			
 
				++.macro  weight_4        add
			
 
				++        dup             v0.8B,  w4
			
 
				++1:      subs            w2,  w2,  #4
			
 
				++        ld1             {v4.S}[0], [x0], x1
			
 
				++        ld1             {v4.S}[1], [x0], x1
			
 
				++        umull           v2.8H,  v0.8B,  v4.8B
			
 
				++        b.lt            2f
			
 
				++        ld1             {v6.S}[0], [x0], x1
			
 
				++        ld1             {v6.S}[1], [x0], x1
			
 
				++        umull           v20.8H, v0.8B,  v6.8B
			
 
				++        \add            v2.8H,  v16.8H,  v2.8H
			
 
				++        srshl           v2.8H,  v2.8H,  v18.8H
			
 
				++        sqxtun          v2.8B,  v2.8H
			
 
				++        \add            v20.8H, v16.8H,  v20.8H
			
 
				++        srshl           v20.8H, v20.8h, v18.8H
			
 
				++        sqxtun          v4.8B,  v20.8H
			
 
				++        st1             {v2.S}[0], [x5], x1
			
 
				++        st1             {v2.S}[1], [x5], x1
			
 
				++        st1             {v4.S}[0], [x5], x1
			
 
				++        st1             {v4.S}[1], [x5], x1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++2:      \add            v2.8H,  v16.8H,  v2.8H
			
 
				++        srshl           v2.8H,  v2.8H,  v18.8H
			
 
				++        sqxtun          v2.8B,  v2.8H
			
 
				++        st1             {v2.S}[0], [x5], x1
			
 
				++        st1             {v2.S}[1], [x5], x1
			
 
				++        ret
			
 
				++.endm
			
 
				++
			
 
				++.macro  weight_func     w
			
 
				++function ff_weight_h264_pixels_\w\()_neon, export=1
			
 
				++        sxtw            x1,  w1
			
 
				++        cmp             w3,  #1
			
 
				++        mov             w6,  #1
			
 
				++        lsl             w5,  w5,  w3
			
 
				++        dup             v16.8H,  w5
			
 
				++        mov             x5,  x0
			
 
				++        b.le            20f
			
 
				++        sub             w6,  w6,  w3
			
 
				++        dup             v18.8H,  w6
			
 
				++        cmp             w4, #0
			
 
				++        b.lt            10f
			
 
				++        weight_\w       shadd
			
 
				++10:     neg             w4,  w4
			
 
				++        weight_\w       shsub
			
 
				++20:     neg             w6,  w3
			
 
				++        dup             v18.8H,  w6
			
 
				++        cmp             w4,  #0
			
 
				++        b.lt            10f
			
 
				++        weight_\w       add
			
 
				++10:     neg             w4,  w4
			
 
				++        weight_\w       sub
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++        weight_func     16
			
 
				++        weight_func     8
			
 
				++        weight_func     4
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/h264idct_neon.S b/media/ffvpx/libavcodec/aarch64/h264idct_neon.S
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/h264idct_neon.S
			
 
				+@@ -0,0 +1,409 @@
			
 
				++/*
			
 
				++ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
			
 
				++ * Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include "libavutil/aarch64/asm.S"
			
 
				++#include "neon.S"
			
 
				++
			
 
				++function ff_h264_idct_add_neon, export=1
			
 
				++        ld1             {v0.4H, v1.4H, v2.4H, v3.4H},  [x1]
			
 
				++        sxtw            x2,     w2
			
 
				++        movi            v30.8H, #0
			
 
				++
			
 
				++        add             v4.4H,  v0.4H,  v2.4H
			
 
				++        sshr            v16.4H, v1.4H,  #1
			
 
				++        st1             {v30.8H},    [x1], #16
			
 
				++        sshr            v17.4H, v3.4H,  #1
			
 
				++        st1             {v30.8H},    [x1], #16
			
 
				++        sub             v5.4H,  v0.4H,  v2.4H
			
 
				++        sub             v6.4H,  v16.4H, v3.4H
			
 
				++        add             v7.4H,  v1.4H,  v17.4H
			
 
				++        add             v0.4H,  v4.4H,  v7.4H
			
 
				++        add             v1.4H,  v5.4H,  v6.4H
			
 
				++        sub             v2.4H,  v5.4H,  v6.4H
			
 
				++        sub             v3.4H,  v4.4H,  v7.4H
			
 
				++
			
 
				++        transpose_4x4H  v0, v1, v2, v3, v4, v5, v6, v7
			
 
				++
			
 
				++        add             v4.4H,  v0.4H,  v2.4H
			
 
				++        ld1             {v18.S}[0], [x0], x2
			
 
				++        sshr            v16.4H,  v3.4H,  #1
			
 
				++        sshr            v17.4H,  v1.4H,  #1
			
 
				++        ld1             {v18.S}[1], [x0], x2
			
 
				++        sub             v5.4H,  v0.4H,  v2.4H
			
 
				++        ld1             {v19.S}[1], [x0], x2
			
 
				++        add             v6.4H,  v16.4H, v1.4H
			
 
				++        ins             v4.D[1],  v5.D[0]
			
 
				++        sub             v7.4H,  v17.4H, v3.4H
			
 
				++        ld1             {v19.S}[0], [x0], x2
			
 
				++        ins             v6.D[1],  v7.D[0]
			
 
				++        sub             x0,  x0,  x2, lsl #2
			
 
				++        add             v0.8H,  v4.8H,  v6.8H
			
 
				++        sub             v1.8H,  v4.8H,  v6.8H
			
 
				++
			
 
				++        srshr           v0.8H,  v0.8H,  #6
			
 
				++        srshr           v1.8H,  v1.8H,  #6
			
 
				++
			
 
				++        uaddw           v0.8H,  v0.8H,  v18.8B
			
 
				++        uaddw           v1.8H,  v1.8H,  v19.8B
			
 
				++
			
 
				++        sqxtun          v0.8B, v0.8H
			
 
				++        sqxtun          v1.8B, v1.8H
			
 
				++
			
 
				++        st1             {v0.S}[0],  [x0], x2
			
 
				++        st1             {v0.S}[1],  [x0], x2
			
 
				++        st1             {v1.S}[1],  [x0], x2
			
 
				++        st1             {v1.S}[0],  [x0], x2
			
 
				++
			
 
				++        sub             x1,  x1,  #32
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_h264_idct_dc_add_neon, export=1
			
 
				++        sxtw            x2,  w2
			
 
				++        mov             w3,       #0
			
 
				++        ld1r            {v2.8H},  [x1]
			
 
				++        strh            w3,       [x1]
			
 
				++        srshr           v2.8H,  v2.8H,  #6
			
 
				++        ld1             {v0.S}[0],  [x0], x2
			
 
				++        ld1             {v0.S}[1],  [x0], x2
			
 
				++        uaddw           v3.8H,  v2.8H,  v0.8B
			
 
				++        ld1             {v1.S}[0],  [x0], x2
			
 
				++        ld1             {v1.S}[1],  [x0], x2
			
 
				++        uaddw           v4.8H,  v2.8H,  v1.8B
			
 
				++        sqxtun          v0.8B,  v3.8H
			
 
				++        sqxtun          v1.8B,  v4.8H
			
 
				++        sub             x0,  x0,  x2, lsl #2
			
 
				++        st1             {v0.S}[0],  [x0], x2
			
 
				++        st1             {v0.S}[1],  [x0], x2
			
 
				++        st1             {v1.S}[0],  [x0], x2
			
 
				++        st1             {v1.S}[1],  [x0], x2
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_h264_idct_add16_neon, export=1
			
 
				++        mov             x12, x30
			
 
				++        mov             x6,  x0         // dest
			
 
				++        mov             x5,  x1         // block_offset
			
 
				++        mov             x1,  x2         // block
			
 
				++        mov             w9,  w3         // stride
			
 
				++        movrel          x7,  scan8
			
 
				++        mov             x10, #16
			
 
				++        movrel          x13, X(ff_h264_idct_dc_add_neon)
			
 
				++        movrel          x14, X(ff_h264_idct_add_neon)
			
 
				++1:      mov             w2,  w9
			
 
				++        ldrb            w3,  [x7], #1
			
 
				++        ldrsw           x0,  [x5], #4
			
 
				++        ldrb            w3,  [x4,  w3,  uxtw]
			
 
				++        subs            w3,  w3,  #1
			
 
				++        b.lt            2f
			
 
				++        ldrsh           w3,  [x1]
			
 
				++        add             x0,  x0,  x6
			
 
				++        ccmp            w3,  #0,  #4,  eq
			
 
				++        csel            x15, x13, x14, ne
			
 
				++        blr             x15
			
 
				++2:      subs            x10, x10, #1
			
 
				++        add             x1,  x1,  #32
			
 
				++        b.ne            1b
			
 
				++        ret             x12
			
 
				++endfunc
			
 
				++
			
 
				++function ff_h264_idct_add16intra_neon, export=1
			
 
				++        mov             x12, x30
			
 
				++        mov             x6,  x0         // dest
			
 
				++        mov             x5,  x1         // block_offset
			
 
				++        mov             x1,  x2         // block
			
 
				++        mov             w9,  w3         // stride
			
 
				++        movrel          x7,  scan8
			
 
				++        mov             x10, #16
			
 
				++        movrel          x13, X(ff_h264_idct_dc_add_neon)
			
 
				++        movrel          x14, X(ff_h264_idct_add_neon)
			
 
				++1:      mov             w2,  w9
			
 
				++        ldrb            w3,  [x7], #1
			
 
				++        ldrsw           x0,  [x5], #4
			
 
				++        ldrb            w3,  [x4,  w3,  uxtw]
			
 
				++        add             x0,  x0,  x6
			
 
				++        cmp             w3,  #0
			
 
				++        ldrsh           w3,  [x1]
			
 
				++        csel            x15, x13, x14, eq
			
 
				++        ccmp            w3,  #0,  #0,  eq
			
 
				++        b.eq            2f
			
 
				++        blr             x15
			
 
				++2:      subs            x10, x10, #1
			
 
				++        add             x1,  x1,  #32
			
 
				++        b.ne            1b
			
 
				++        ret             x12
			
 
				++endfunc
			
 
				++
			
 
				++function ff_h264_idct_add8_neon, export=1
			
 
				++        sub             sp,  sp, #0x40
			
 
				++        stp             x19, x20, [sp]
			
 
				++        mov             x12, x30
			
 
				++        ldp             x6,  x15, [x0]          // dest[0], dest[1]
			
 
				++        add             x5,  x1,  #16*4         // block_offset
			
 
				++        add             x9,  x2,  #16*32        // block
			
 
				++        mov             w19, w3                 // stride
			
 
				++        movrel          x13, X(ff_h264_idct_dc_add_neon)
			
 
				++        movrel          x14, X(ff_h264_idct_add_neon)
			
 
				++        movrel          x7,  scan8, 16
			
 
				++        mov             x10, #0
			
 
				++        mov             x11, #16
			
 
				++1:      mov             w2,  w19
			
 
				++        ldrb            w3,  [x7, x10]          // scan8[i]
			
 
				++        ldrsw           x0,  [x5, x10, lsl #2]  // block_offset[i]
			
 
				++        ldrb            w3,  [x4, w3,  uxtw]    // nnzc[ scan8[i] ]
			
 
				++        add             x0,  x0,  x6            // block_offset[i] + dst[j-1]
			
 
				++        add             x1,  x9,  x10, lsl #5   // block + i * 16
			
 
				++        cmp             w3,  #0
			
 
				++        ldrsh           w3,  [x1]               // block[i*16]
			
 
				++        csel            x20, x13, x14, eq
			
 
				++        ccmp            w3,  #0,  #0,  eq
			
 
				++        b.eq            2f
			
 
				++        blr             x20
			
 
				++2:      add             x10, x10, #1
			
 
				++        cmp             x10, #4
			
 
				++        csel            x10, x11, x10, eq     // mov x10, #16
			
 
				++        csel            x6,  x15, x6,  eq
			
 
				++        cmp             x10, #20
			
 
				++        b.lt            1b
			
 
				++        ldp             x19, x20, [sp]
			
 
				++        add             sp,  sp,  #0x40
			
 
				++        ret             x12
			
 
				++endfunc
			
 
				++
			
 
				++.macro  idct8x8_cols    pass
			
 
				++  .if \pass == 0
			
 
				++        va      .req    v18
			
 
				++        vb      .req    v30
			
 
				++        sshr            v18.8H, v26.8H, #1
			
 
				++        add             v16.8H, v24.8H, v28.8H
			
 
				++        ld1             {v30.8H, v31.8H}, [x1]
			
 
				++        st1             {v19.8H}, [x1],  #16
			
 
				++        st1             {v19.8H}, [x1],  #16
			
 
				++        sub             v17.8H,  v24.8H, v28.8H
			
 
				++        sshr            v19.8H,  v30.8H, #1
			
 
				++        sub             v18.8H,  v18.8H,  v30.8H
			
 
				++        add             v19.8H,  v19.8H,  v26.8H
			
 
				++  .else
			
 
				++        va      .req    v30
			
 
				++        vb      .req    v18
			
 
				++        sshr            v30.8H, v26.8H, #1
			
 
				++        sshr            v19.8H, v18.8H, #1
			
 
				++        add             v16.8H, v24.8H, v28.8H
			
 
				++        sub             v17.8H, v24.8H, v28.8H
			
 
				++        sub             v30.8H, v30.8H, v18.8H
			
 
				++        add             v19.8H, v19.8H, v26.8H
			
 
				++  .endif
			
 
				++        add             v26.8H, v17.8H, va.8H
			
 
				++        sub             v28.8H, v17.8H, va.8H
			
 
				++        add             v24.8H, v16.8H, v19.8H
			
 
				++        sub             vb.8H,  v16.8H, v19.8H
			
 
				++        sub             v16.8H, v29.8H, v27.8H
			
 
				++        add             v17.8H, v31.8H, v25.8H
			
 
				++        sub             va.8H,  v31.8H, v25.8H
			
 
				++        add             v19.8H, v29.8H, v27.8H
			
 
				++        sub             v16.8H, v16.8H, v31.8H
			
 
				++        sub             v17.8H, v17.8H, v27.8H
			
 
				++        add             va.8H,  va.8H,  v29.8H
			
 
				++        add             v19.8H, v19.8H, v25.8H
			
 
				++        sshr            v25.8H, v25.8H, #1
			
 
				++        sshr            v27.8H, v27.8H, #1
			
 
				++        sshr            v29.8H, v29.8H, #1
			
 
				++        sshr            v31.8H, v31.8H, #1
			
 
				++        sub             v16.8H, v16.8H, v31.8H
			
 
				++        sub             v17.8H, v17.8H, v27.8H
			
 
				++        add             va.8H,  va.8H,  v29.8H
			
 
				++        add             v19.8H, v19.8H, v25.8H
			
 
				++        sshr            v25.8H, v16.8H, #2
			
 
				++        sshr            v27.8H, v17.8H, #2
			
 
				++        sshr            v29.8H, va.8H,  #2
			
 
				++        sshr            v31.8H, v19.8H, #2
			
 
				++        sub             v19.8H, v19.8H, v25.8H
			
 
				++        sub             va.8H,  v27.8H, va.8H
			
 
				++        add             v17.8H, v17.8H, v29.8H
			
 
				++        add             v16.8H, v16.8H, v31.8H
			
 
				++  .if \pass == 0
			
 
				++        sub             v31.8H, v24.8H, v19.8H
			
 
				++        add             v24.8H, v24.8H, v19.8H
			
 
				++        add             v25.8H, v26.8H, v18.8H
			
 
				++        sub             v18.8H, v26.8H, v18.8H
			
 
				++        add             v26.8H, v28.8H, v17.8H
			
 
				++        add             v27.8H, v30.8H, v16.8H
			
 
				++        sub             v29.8H, v28.8H, v17.8H
			
 
				++        sub             v28.8H, v30.8H, v16.8H
			
 
				++  .else
			
 
				++        sub             v31.8H, v24.8H, v19.8H
			
 
				++        add             v24.8H, v24.8H, v19.8H
			
 
				++        add             v25.8H, v26.8H, v30.8H
			
 
				++        sub             v30.8H, v26.8H, v30.8H
			
 
				++        add             v26.8H, v28.8H, v17.8H
			
 
				++        sub             v29.8H, v28.8H, v17.8H
			
 
				++        add             v27.8H, v18.8H, v16.8H
			
 
				++        sub             v28.8H, v18.8H, v16.8H
			
 
				++  .endif
			
 
				++        .unreq          va
			
 
				++        .unreq          vb
			
 
				++.endm
			
 
				++
			
 
				++function ff_h264_idct8_add_neon, export=1
			
 
				++        movi            v19.8H,   #0
			
 
				++        sxtw            x2,       w2
			
 
				++        ld1             {v24.8H, v25.8H}, [x1]
			
 
				++        st1             {v19.8H},  [x1],   #16
			
 
				++        st1             {v19.8H},  [x1],   #16
			
 
				++        ld1             {v26.8H, v27.8H}, [x1]
			
 
				++        st1             {v19.8H},  [x1],   #16
			
 
				++        st1             {v19.8H},  [x1],   #16
			
 
				++        ld1             {v28.8H, v29.8H}, [x1]
			
 
				++        st1             {v19.8H},  [x1],   #16
			
 
				++        st1             {v19.8H},  [x1],   #16
			
 
				++
			
 
				++        idct8x8_cols    0
			
 
				++        transpose_8x8H  v24, v25, v26, v27, v28, v29, v18, v31, v6, v7
			
 
				++        idct8x8_cols    1
			
 
				++
			
 
				++        mov             x3,  x0
			
 
				++        srshr           v24.8H, v24.8H, #6
			
 
				++        ld1             {v0.8B},     [x0], x2
			
 
				++        srshr           v25.8H, v25.8H, #6
			
 
				++        ld1             {v1.8B},     [x0], x2
			
 
				++        srshr           v26.8H, v26.8H, #6
			
 
				++        ld1             {v2.8B},     [x0], x2
			
 
				++        srshr           v27.8H, v27.8H, #6
			
 
				++        ld1             {v3.8B},     [x0], x2
			
 
				++        srshr           v28.8H, v28.8H, #6
			
 
				++        ld1             {v4.8B},     [x0], x2
			
 
				++        srshr           v29.8H, v29.8H, #6
			
 
				++        ld1             {v5.8B},     [x0], x2
			
 
				++        srshr           v30.8H, v30.8H, #6
			
 
				++        ld1             {v6.8B},     [x0], x2
			
 
				++        srshr           v31.8H, v31.8H, #6
			
 
				++        ld1             {v7.8B},     [x0], x2
			
 
				++        uaddw           v24.8H, v24.8H, v0.8B
			
 
				++        uaddw           v25.8H, v25.8H, v1.8B
			
 
				++        uaddw           v26.8H, v26.8H, v2.8B
			
 
				++        sqxtun          v0.8B,  v24.8H
			
 
				++        uaddw           v27.8H, v27.8H, v3.8B
			
 
				++        sqxtun          v1.8B,  v25.8H
			
 
				++        uaddw           v28.8H, v28.8H, v4.8B
			
 
				++        sqxtun          v2.8B,  v26.8H
			
 
				++        st1             {v0.8B},     [x3], x2
			
 
				++        uaddw           v29.8H, v29.8H, v5.8B
			
 
				++        sqxtun          v3.8B,  v27.8H
			
 
				++        st1             {v1.8B},     [x3], x2
			
 
				++        uaddw           v30.8H, v30.8H, v6.8B
			
 
				++        sqxtun          v4.8B,  v28.8H
			
 
				++        st1             {v2.8B},     [x3], x2
			
 
				++        uaddw           v31.8H, v31.8H, v7.8B
			
 
				++        sqxtun          v5.8B,  v29.8H
			
 
				++        st1             {v3.8B},     [x3], x2
			
 
				++        sqxtun          v6.8B,  v30.8H
			
 
				++        sqxtun          v7.8B,  v31.8H
			
 
				++        st1             {v4.8B},     [x3], x2
			
 
				++        st1             {v5.8B},     [x3], x2
			
 
				++        st1             {v6.8B},     [x3], x2
			
 
				++        st1             {v7.8B},     [x3], x2
			
 
				++
			
 
				++        sub             x1,  x1,  #128
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_h264_idct8_dc_add_neon, export=1
			
 
				++        mov             w3,       #0
			
 
				++        sxtw            x2,       w2
			
 
				++        ld1r            {v31.8H}, [x1]
			
 
				++        strh            w3,       [x1]
			
 
				++        ld1             {v0.8B},  [x0], x2
			
 
				++        srshr           v31.8H, v31.8H, #6
			
 
				++        ld1             {v1.8B},     [x0], x2
			
 
				++        ld1             {v2.8B},     [x0], x2
			
 
				++        uaddw           v24.8H, v31.8H, v0.8B
			
 
				++        ld1             {v3.8B},     [x0], x2
			
 
				++        uaddw           v25.8H, v31.8H, v1.8B
			
 
				++        ld1             {v4.8B},     [x0], x2
			
 
				++        uaddw           v26.8H, v31.8H, v2.8B
			
 
				++        ld1             {v5.8B},     [x0], x2
			
 
				++        uaddw           v27.8H, v31.8H, v3.8B
			
 
				++        ld1             {v6.8B},     [x0], x2
			
 
				++        uaddw           v28.8H, v31.8H, v4.8B
			
 
				++        ld1             {v7.8B},     [x0], x2
			
 
				++        uaddw           v29.8H, v31.8H, v5.8B
			
 
				++        uaddw           v30.8H, v31.8H, v6.8B
			
 
				++        uaddw           v31.8H, v31.8H, v7.8B
			
 
				++        sqxtun          v0.8B,  v24.8H
			
 
				++        sqxtun          v1.8B,  v25.8H
			
 
				++        sqxtun          v2.8B,  v26.8H
			
 
				++        sqxtun          v3.8B,  v27.8H
			
 
				++        sub             x0,  x0,  x2, lsl #3
			
 
				++        st1             {v0.8B},     [x0], x2
			
 
				++        sqxtun          v4.8B,  v28.8H
			
 
				++        st1             {v1.8B},     [x0], x2
			
 
				++        sqxtun          v5.8B,  v29.8H
			
 
				++        st1             {v2.8B},     [x0], x2
			
 
				++        sqxtun          v6.8B,  v30.8H
			
 
				++        st1             {v3.8B},     [x0], x2
			
 
				++        sqxtun          v7.8B,  v31.8H
			
 
				++        st1             {v4.8B},     [x0], x2
			
 
				++        st1             {v5.8B},     [x0], x2
			
 
				++        st1             {v6.8B},     [x0], x2
			
 
				++        st1             {v7.8B},     [x0], x2
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_h264_idct8_add4_neon, export=1
			
 
				++        mov             x12, x30
			
 
				++        mov             x6,  x0
			
 
				++        mov             x5,  x1
			
 
				++        mov             x1,  x2
			
 
				++        mov             w2,  w3
			
 
				++        movrel          x7,  scan8
			
 
				++        mov             w10, #16
			
 
				++        movrel          x13, X(ff_h264_idct8_dc_add_neon)
			
 
				++        movrel          x14, X(ff_h264_idct8_add_neon)
			
 
				++1:      ldrb            w9,  [x7], #4
			
 
				++        ldrsw           x0,  [x5], #16
			
 
				++        ldrb            w9,  [x4, w9, UXTW]
			
 
				++        subs            w9,  w9,  #1
			
 
				++        b.lt            2f
			
 
				++        ldrsh           w11,  [x1]
			
 
				++        add             x0,  x6,  x0
			
 
				++        ccmp            w11, #0,  #4,  eq
			
 
				++        csel            x15, x13, x14, ne
			
 
				++        blr             x15
			
 
				++2:      subs            w10, w10, #4
			
 
				++        add             x1,  x1,  #128
			
 
				++        b.ne            1b
			
 
				++        ret             x12
			
 
				++endfunc
			
 
				++
			
 
				++const   scan8
			
 
				++        .byte           4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8
			
 
				++        .byte           6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8
			
 
				++        .byte           4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8
			
 
				++        .byte           6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8
			
 
				++        .byte           4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8
			
 
				++        .byte           6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8
			
 
				++        .byte           4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8
			
 
				++        .byte           6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8
			
 
				++        .byte           4+11*8, 5+11*8, 4+12*8, 5+12*8
			
 
				++        .byte           6+11*8, 7+11*8, 6+12*8, 7+12*8
			
 
				++        .byte           4+13*8, 5+13*8, 4+14*8, 5+14*8
			
 
				++        .byte           6+13*8, 7+13*8, 6+14*8, 7+14*8
			
 
				++endconst
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/h264pred_init.c b/media/ffvpx/libavcodec/aarch64/h264pred_init.c
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/h264pred_init.c
			
 
				+@@ -0,0 +1,93 @@
			
 
				++/*
			
 
				++ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include <stdint.h>
			
 
				++
			
 
				++#include "libavutil/attributes.h"
			
 
				++#include "libavutil/aarch64/cpu.h"
			
 
				++#include "libavcodec/avcodec.h"
			
 
				++#include "libavcodec/h264pred.h"
			
 
				++
			
 
				++void ff_pred16x16_vert_neon(uint8_t *src, ptrdiff_t stride);
			
 
				++void ff_pred16x16_hor_neon(uint8_t *src, ptrdiff_t stride);
			
 
				++void ff_pred16x16_plane_neon(uint8_t *src, ptrdiff_t stride);
			
 
				++void ff_pred16x16_dc_neon(uint8_t *src, ptrdiff_t stride);
			
 
				++void ff_pred16x16_128_dc_neon(uint8_t *src, ptrdiff_t stride);
			
 
				++void ff_pred16x16_left_dc_neon(uint8_t *src, ptrdiff_t stride);
			
 
				++void ff_pred16x16_top_dc_neon(uint8_t *src, ptrdiff_t stride);
			
 
				++
			
 
				++void ff_pred8x8_vert_neon(uint8_t *src, ptrdiff_t stride);
			
 
				++void ff_pred8x8_hor_neon(uint8_t *src, ptrdiff_t stride);
			
 
				++void ff_pred8x8_plane_neon(uint8_t *src, ptrdiff_t stride);
			
 
				++void ff_pred8x8_dc_neon(uint8_t *src, ptrdiff_t stride);
			
 
				++void ff_pred8x8_128_dc_neon(uint8_t *src, ptrdiff_t stride);
			
 
				++void ff_pred8x8_left_dc_neon(uint8_t *src, ptrdiff_t stride);
			
 
				++void ff_pred8x8_top_dc_neon(uint8_t *src, ptrdiff_t stride);
			
 
				++void ff_pred8x8_l0t_dc_neon(uint8_t *src, ptrdiff_t stride);
			
 
				++void ff_pred8x8_0lt_dc_neon(uint8_t *src, ptrdiff_t stride);
			
 
				++void ff_pred8x8_l00_dc_neon(uint8_t *src, ptrdiff_t stride);
			
 
				++void ff_pred8x8_0l0_dc_neon(uint8_t *src, ptrdiff_t stride);
			
 
				++
			
 
				++static av_cold void h264_pred_init_neon(H264PredContext *h, int codec_id,
			
 
				++                                        const int bit_depth,
			
 
				++                                        const int chroma_format_idc)
			
 
				++{
			
 
				++    const int high_depth = bit_depth > 8;
			
 
				++
			
 
				++    if (high_depth)
			
 
				++        return;
			
 
				++
			
 
				++    if (chroma_format_idc <= 1) {
			
 
				++        h->pred8x8[VERT_PRED8x8     ] = ff_pred8x8_vert_neon;
			
 
				++        h->pred8x8[HOR_PRED8x8      ] = ff_pred8x8_hor_neon;
			
 
				++        if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8)
			
 
				++            h->pred8x8[PLANE_PRED8x8] = ff_pred8x8_plane_neon;
			
 
				++        h->pred8x8[DC_128_PRED8x8   ] = ff_pred8x8_128_dc_neon;
			
 
				++        if (codec_id != AV_CODEC_ID_RV40 && codec_id != AV_CODEC_ID_VP7 &&
			
 
				++            codec_id != AV_CODEC_ID_VP8) {
			
 
				++            h->pred8x8[DC_PRED8x8     ] = ff_pred8x8_dc_neon;
			
 
				++            h->pred8x8[LEFT_DC_PRED8x8] = ff_pred8x8_left_dc_neon;
			
 
				++            h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_neon;
			
 
				++            h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8] = ff_pred8x8_l0t_dc_neon;
			
 
				++            h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8] = ff_pred8x8_0lt_dc_neon;
			
 
				++            h->pred8x8[ALZHEIMER_DC_L00_PRED8x8] = ff_pred8x8_l00_dc_neon;
			
 
				++            h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8] = ff_pred8x8_0l0_dc_neon;
			
 
				++        }
			
 
				++    }
			
 
				++
			
 
				++    h->pred16x16[DC_PRED8x8     ] = ff_pred16x16_dc_neon;
			
 
				++    h->pred16x16[VERT_PRED8x8   ] = ff_pred16x16_vert_neon;
			
 
				++    h->pred16x16[HOR_PRED8x8    ] = ff_pred16x16_hor_neon;
			
 
				++    h->pred16x16[LEFT_DC_PRED8x8] = ff_pred16x16_left_dc_neon;
			
 
				++    h->pred16x16[TOP_DC_PRED8x8 ] = ff_pred16x16_top_dc_neon;
			
 
				++    h->pred16x16[DC_128_PRED8x8 ] = ff_pred16x16_128_dc_neon;
			
 
				++    if (codec_id != AV_CODEC_ID_SVQ3 && codec_id != AV_CODEC_ID_RV40 &&
			
 
				++        codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8)
			
 
				++        h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_neon;
			
 
				++}
			
 
				++
			
 
				++av_cold void ff_h264_pred_init_aarch64(H264PredContext *h, int codec_id,
			
 
				++                                       int bit_depth, const int chroma_format_idc)
			
 
				++{
			
 
				++    int cpu_flags = av_get_cpu_flags();
			
 
				++
			
 
				++    if (have_neon(cpu_flags))
			
 
				++        h264_pred_init_neon(h, codec_id, bit_depth, chroma_format_idc);
			
 
				++}
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/h264pred_neon.S b/media/ffvpx/libavcodec/aarch64/h264pred_neon.S
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/h264pred_neon.S
			
 
				+@@ -0,0 +1,361 @@
			
 
				++/*
			
 
				++ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include "libavutil/aarch64/asm.S"
			
 
				++
			
 
				++.macro ldcol.8  rd,  rs,  rt,  n=8,  hi=0
			
 
				++.if \n >= 8 || \hi == 0
			
 
				++        ld1             {\rd\().b}[0],  [\rs], \rt
			
 
				++        ld1             {\rd\().b}[1],  [\rs], \rt
			
 
				++        ld1             {\rd\().b}[2],  [\rs], \rt
			
 
				++        ld1             {\rd\().b}[3],  [\rs], \rt
			
 
				++.endif
			
 
				++.if \n >= 8 || \hi == 1
			
 
				++        ld1             {\rd\().b}[4],  [\rs], \rt
			
 
				++        ld1             {\rd\().b}[5],  [\rs], \rt
			
 
				++        ld1             {\rd\().b}[6],  [\rs], \rt
			
 
				++        ld1             {\rd\().b}[7],  [\rs], \rt
			
 
				++.endif
			
 
				++.if \n == 16
			
 
				++        ld1             {\rd\().b}[8],  [\rs], \rt
			
 
				++        ld1             {\rd\().b}[9],  [\rs], \rt
			
 
				++        ld1             {\rd\().b}[10], [\rs], \rt
			
 
				++        ld1             {\rd\().b}[11], [\rs], \rt
			
 
				++        ld1             {\rd\().b}[12], [\rs], \rt
			
 
				++        ld1             {\rd\().b}[13], [\rs], \rt
			
 
				++        ld1             {\rd\().b}[14], [\rs], \rt
			
 
				++        ld1             {\rd\().b}[15], [\rs], \rt
			
 
				++.endif
			
 
				++.endm
			
 
				++
			
 
				++function ff_pred16x16_128_dc_neon, export=1
			
 
				++        movi            v0.16b,  #128
			
 
				++        b               .L_pred16x16_dc_end
			
 
				++endfunc
			
 
				++
			
 
				++function ff_pred16x16_top_dc_neon, export=1
			
 
				++        sub             x2,  x0,  x1
			
 
				++        ld1             {v0.16b},  [x2]
			
 
				++        uaddlv          h0,  v0.16b
			
 
				++        rshrn           v0.8b,  v0.8h,  #4
			
 
				++        dup             v0.16b, v0.b[0]
			
 
				++        b               .L_pred16x16_dc_end
			
 
				++endfunc
			
 
				++
			
 
				++function ff_pred16x16_left_dc_neon, export=1
			
 
				++        sub             x2,  x0,  #1
			
 
				++        ldcol.8         v0,  x2,  x1, 16
			
 
				++        uaddlv          h0,  v0.16b
			
 
				++        rshrn           v0.8b,  v0.8h,  #4
			
 
				++        dup             v0.16b, v0.b[0]
			
 
				++        b               .L_pred16x16_dc_end
			
 
				++endfunc
			
 
				++
			
 
				++function ff_pred16x16_dc_neon, export=1
			
 
				++        sub             x2,  x0,  x1
			
 
				++        sub             x3,  x0,  #1
			
 
				++        ld1             {v0.16b}, [x2]
			
 
				++        ldcol.8         v1,  x3,  x1, 16
			
 
				++        uaddlv          h0,  v0.16b
			
 
				++        uaddlv          h1,  v1.16b
			
 
				++        add             v0.4h,  v0.4h,  v1.4h
			
 
				++        rshrn           v0.8b,  v0.8h,  #5
			
 
				++        dup             v0.16b, v0.b[0]
			
 
				++.L_pred16x16_dc_end:
			
 
				++        mov             w3,  #8
			
 
				++6:      st1             {v0.16b}, [x0], x1
			
 
				++        st1             {v0.16b}, [x0], x1
			
 
				++        subs            w3,  w3,  #1
			
 
				++        b.ne            6b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_pred16x16_hor_neon, export=1
			
 
				++        sub             x2,  x0,  #1
			
 
				++        mov             w3,  #16
			
 
				++1:      ld1r            {v0.16b}, [x2], x1
			
 
				++        st1             {v0.16b}, [x0], x1
			
 
				++        subs            w3,  w3,  #1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_pred16x16_vert_neon, export=1
			
 
				++        sub             x2,  x0,  x1
			
 
				++        add             x1,  x1,  x1
			
 
				++        ld1             {v0.16b}, [x2], x1
			
 
				++        mov             w3,  #8
			
 
				++1:      st1             {v0.16b}, [x0], x1
			
 
				++        st1             {v0.16b}, [x2], x1
			
 
				++        subs            w3,  w3,  #1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_pred16x16_plane_neon, export=1
			
 
				++        sub             x3,  x0,  x1
			
 
				++        movrel          x4,  p16weight
			
 
				++        add             x2,  x3,  #8
			
 
				++        sub             x3,  x3,  #1
			
 
				++        ld1             {v0.8b},  [x3]
			
 
				++        ld1             {v2.8b},  [x2], x1
			
 
				++        ldcol.8         v1,  x3,  x1
			
 
				++        add             x3,  x3,  x1
			
 
				++        ldcol.8         v3,  x3,  x1
			
 
				++        rev64           v0.8b,  v0.8b
			
 
				++        rev64           v1.8b,  v1.8b
			
 
				++        uaddl           v7.8h,  v2.8b,  v3.8b
			
 
				++        usubl           v2.8h,  v2.8b,  v0.8b
			
 
				++        usubl           v3.8h,  v3.8b,  v1.8b
			
 
				++        ld1             {v0.8h},     [x4]
			
 
				++        mul             v2.8h,  v2.8h,  v0.8h
			
 
				++        mul             v3.8h,  v3.8h,  v0.8h
			
 
				++        addp            v2.8h,  v2.8h,  v3.8h
			
 
				++        addp            v2.8h,  v2.8h,  v2.8h
			
 
				++        addp            v2.4h,  v2.4h,  v2.4h
			
 
				++        sshll           v3.4s,  v2.4h,  #2
			
 
				++        saddw           v2.4s,  v3.4s,  v2.4h
			
 
				++        rshrn           v4.4h,  v2.4s,  #6
			
 
				++        trn2            v5.4h,  v4.4h,  v4.4h
			
 
				++        add             v2.4h,  v4.4h,  v5.4h
			
 
				++        shl             v3.4h,  v2.4h,  #3
			
 
				++        ext             v7.16b, v7.16b, v7.16b, #14
			
 
				++        sub             v3.4h,  v3.4h,  v2.4h   // 7 * (b + c)
			
 
				++        add             v7.4h,  v7.4h,  v0.4h
			
 
				++        shl             v2.4h,  v7.4h,  #4
			
 
				++        sub             v2.4h,  v2.4h,  v3.4h
			
 
				++        shl             v3.4h,  v4.4h,  #4
			
 
				++        ext             v0.16b, v0.16b, v0.16b, #14
			
 
				++        sub             v6.4h,  v5.4h,  v3.4h
			
 
				++        mov             v0.h[0],  wzr
			
 
				++        mul             v0.8h,  v0.8h,  v4.h[0]
			
 
				++        dup             v1.8h,  v2.h[0]
			
 
				++        dup             v2.8h,  v4.h[0]
			
 
				++        dup             v3.8h,  v6.h[0]
			
 
				++        shl             v2.8h,  v2.8h,  #3
			
 
				++        add             v1.8h,  v1.8h,  v0.8h
			
 
				++        add             v3.8h,  v3.8h,  v2.8h
			
 
				++        mov             w3,  #16
			
 
				++1:
			
 
				++        sqshrun         v0.8b,  v1.8h,  #5
			
 
				++        add             v1.8h,  v1.8h,  v2.8h
			
 
				++        sqshrun2        v0.16b, v1.8h,  #5
			
 
				++        add             v1.8h,  v1.8h,  v3.8h
			
 
				++        st1             {v0.16b}, [x0], x1
			
 
				++        subs            w3,  w3,  #1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++const   p16weight, align=4
			
 
				++        .short          1,2,3,4,5,6,7,8
			
 
				++endconst
			
 
				++const   p8weight, align=4
			
 
				++        .short          1,2,3,4,1,2,3,4
			
 
				++endconst
			
 
				++
			
 
				++function ff_pred8x8_hor_neon, export=1
			
 
				++        sub             x2,  x0,  #1
			
 
				++        mov             w3,  #8
			
 
				++1:      ld1r            {v0.8b},  [x2], x1
			
 
				++        st1             {v0.8b},  [x0], x1
			
 
				++        subs            w3,  w3,  #1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_pred8x8_vert_neon, export=1
			
 
				++        sub             x2,  x0,  x1
			
 
				++        lsl             x1,  x1,  #1
			
 
				++        ld1             {v0.8b},  [x2], x1
			
 
				++        mov             w3,  #4
			
 
				++1:      st1             {v0.8b},  [x0], x1
			
 
				++        st1             {v0.8b},  [x2], x1
			
 
				++        subs            w3,  w3,  #1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_pred8x8_plane_neon, export=1
			
 
				++        sub             x3,  x0,  x1
			
 
				++        movrel          x4,  p8weight
			
 
				++        movrel          x5,  p16weight
			
 
				++        add             x2,  x3,  #4
			
 
				++        sub             x3,  x3,  #1
			
 
				++        ld1             {v0.s}[0],  [x3]
			
 
				++        ld1             {v2.s}[0],  [x2], x1
			
 
				++        ldcol.8         v0,  x3,  x1,  4,  hi=1
			
 
				++        add             x3,  x3,  x1
			
 
				++        ldcol.8         v3,  x3,  x1,  4
			
 
				++        uaddl           v7.8h,  v2.8b,  v3.8b
			
 
				++        rev32           v0.8b,  v0.8b
			
 
				++        trn1            v2.2s,  v2.2s,  v3.2s
			
 
				++        usubl           v2.8h,  v2.8b,  v0.8b
			
 
				++        ld1             {v6.8h},  [x4]
			
 
				++        mul             v2.8h,  v2.8h,  v6.8h
			
 
				++        ld1             {v0.8h},  [x5]
			
 
				++        saddlp          v2.4s,  v2.8h
			
 
				++        addp            v2.4s,  v2.4s,  v2.4s
			
 
				++        shl             v3.4s,  v2.4s,  #4
			
 
				++        add             v2.4s,  v3.4s,  v2.4s
			
 
				++        rshrn           v5.4h,  v2.4s,  #5
			
 
				++        addp            v2.4h,  v5.4h,  v5.4h
			
 
				++        shl             v3.4h,  v2.4h,  #1
			
 
				++        add             v3.4h,  v3.4h,  v2.4h
			
 
				++        rev64           v7.4h,  v7.4h
			
 
				++        add             v7.4h,  v7.4h,  v0.4h
			
 
				++        shl             v2.4h,  v7.4h,  #4
			
 
				++        sub             v2.4h,  v2.4h,  v3.4h
			
 
				++        ext             v0.16b, v0.16b, v0.16b, #14
			
 
				++        mov             v0.h[0],  wzr
			
 
				++        mul             v0.8h,  v0.8h,  v5.h[0]
			
 
				++        dup             v1.8h,  v2.h[0]
			
 
				++        dup             v2.8h,  v5.h[1]
			
 
				++        add             v1.8h,  v1.8h,  v0.8h
			
 
				++        mov             w3,  #8
			
 
				++1:
			
 
				++        sqshrun         v0.8b,  v1.8h,  #5
			
 
				++        add             v1.8h,  v1.8h,  v2.8h
			
 
				++        st1             {v0.8b},  [x0], x1
			
 
				++        subs            w3,  w3,  #1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_pred8x8_128_dc_neon, export=1
			
 
				++        movi            v0.8b,  #128
			
 
				++        movi            v1.8b,  #128
			
 
				++        b               .L_pred8x8_dc_end
			
 
				++endfunc
			
 
				++
			
 
				++function ff_pred8x8_top_dc_neon, export=1
			
 
				++        sub             x2,  x0,  x1
			
 
				++        ld1             {v0.8b},  [x2]
			
 
				++        uaddlp          v0.4h,  v0.8b
			
 
				++        addp            v0.4h,  v0.4h,  v0.4h
			
 
				++        zip1            v0.8h,  v0.8h,  v0.8h
			
 
				++        rshrn           v2.8b,  v0.8h,  #2
			
 
				++        zip1            v0.8b,  v2.8b,  v2.8b
			
 
				++        zip1            v1.8b,  v2.8b,  v2.8b
			
 
				++        b               .L_pred8x8_dc_end
			
 
				++endfunc
			
 
				++
			
 
				++function ff_pred8x8_left_dc_neon, export=1
			
 
				++        sub             x2,  x0,  #1
			
 
				++        ldcol.8         v0,  x2,  x1
			
 
				++        uaddlp          v0.4h,  v0.8b
			
 
				++        addp            v0.4h,  v0.4h,  v0.4h
			
 
				++        rshrn           v2.8b,  v0.8h,  #2
			
 
				++        dup             v1.8b,  v2.b[1]
			
 
				++        dup             v0.8b,  v2.b[0]
			
 
				++        b               .L_pred8x8_dc_end
			
 
				++endfunc
			
 
				++
			
 
				++function ff_pred8x8_dc_neon, export=1
			
 
				++        sub             x2,  x0,  x1
			
 
				++        sub             x3,  x0,  #1
			
 
				++        ld1             {v0.8b}, [x2]
			
 
				++        ldcol.8         v1,  x3,  x1
			
 
				++        uaddlp          v0.4h,  v0.8b
			
 
				++        uaddlp          v1.4h,  v1.8b
			
 
				++        trn1            v2.2s,  v0.2s,  v1.2s
			
 
				++        trn2            v3.2s,  v0.2s,  v1.2s
			
 
				++        addp            v4.4h,  v2.4h,  v3.4h
			
 
				++        addp            v5.4h,  v4.4h,  v4.4h
			
 
				++        rshrn           v6.8b,  v5.8h,  #3
			
 
				++        rshrn           v7.8b,  v4.8h,  #2
			
 
				++        dup             v0.8b,  v6.b[0]
			
 
				++        dup             v2.8b,  v7.b[2]
			
 
				++        dup             v1.8b,  v7.b[3]
			
 
				++        dup             v3.8b,  v6.b[1]
			
 
				++        zip1            v0.2s,  v0.2s,  v2.2s
			
 
				++        zip1            v1.2s,  v1.2s,  v3.2s
			
 
				++.L_pred8x8_dc_end:
			
 
				++        mov             w3,  #4
			
 
				++        add             x2,  x0,  x1,  lsl #2
			
 
				++6:      st1             {v0.8b},  [x0], x1
			
 
				++        st1             {v1.8b},  [x2], x1
			
 
				++        subs            w3,  w3,  #1
			
 
				++        b.ne            6b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_pred8x8_l0t_dc_neon, export=1
			
 
				++        sub             x2,  x0,  x1
			
 
				++        sub             x3,  x0,  #1
			
 
				++        ld1             {v0.8b},  [x2]
			
 
				++        ldcol.8         v1,  x3,  x1,  4
			
 
				++        zip1            v0.4s,  v0.4s,  v1.4s
			
 
				++        uaddlp          v0.8h,  v0.16b
			
 
				++        addp            v0.8h,  v0.8h,  v0.8h
			
 
				++        addp            v1.4h,  v0.4h,  v0.4h
			
 
				++        rshrn           v2.8b,  v0.8h,  #2
			
 
				++        rshrn           v3.8b,  v1.8h,  #3
			
 
				++        dup             v4.8b,  v3.b[0]
			
 
				++        dup             v6.8b,  v2.b[2]
			
 
				++        dup             v5.8b,  v2.b[0]
			
 
				++        zip1            v0.2s,  v4.2s,  v6.2s
			
 
				++        zip1            v1.2s,  v5.2s,  v6.2s
			
 
				++        b               .L_pred8x8_dc_end
			
 
				++endfunc
			
 
				++
			
 
				++function ff_pred8x8_l00_dc_neon, export=1
			
 
				++        sub             x2,  x0,  #1
			
 
				++        ldcol.8         v0,  x2,  x1,  4
			
 
				++        uaddlp          v0.4h,  v0.8b
			
 
				++        addp            v0.4h,  v0.4h,  v0.4h
			
 
				++        rshrn           v0.8b,  v0.8h,  #2
			
 
				++        movi            v1.8b,  #128
			
 
				++        dup             v0.8b,  v0.b[0]
			
 
				++        b               .L_pred8x8_dc_end
			
 
				++endfunc
			
 
				++
			
 
				++function ff_pred8x8_0lt_dc_neon, export=1
			
 
				++        add             x3,  x0,  x1,  lsl #2
			
 
				++        sub             x2,  x0,  x1
			
 
				++        sub             x3,  x3,  #1
			
 
				++        ld1             {v0.8b},  [x2]
			
 
				++        ldcol.8         v1,  x3,  x1,  4,  hi=1
			
 
				++        zip1            v0.4s,  v0.4s,  v1.4s
			
 
				++        uaddlp          v0.8h,  v0.16b
			
 
				++        addp            v0.8h,  v0.8h,  v0.8h
			
 
				++        addp            v1.4h,  v0.4h,  v0.4h
			
 
				++        rshrn           v2.8b,  v0.8h,  #2
			
 
				++        rshrn           v3.8b,  v1.8h,  #3
			
 
				++        dup             v4.8b,  v2.b[0]
			
 
				++        dup             v5.8b,  v2.b[3]
			
 
				++        dup             v6.8b,  v2.b[2]
			
 
				++        dup             v7.8b,  v3.b[1]
			
 
				++        zip1            v0.2s,  v4.2s,  v6.2s
			
 
				++        zip1            v1.2s,  v5.2s,  v7.2s
			
 
				++        b               .L_pred8x8_dc_end
			
 
				++endfunc
			
 
				++
			
 
				++function ff_pred8x8_0l0_dc_neon, export=1
			
 
				++        add             x2,  x0,  x1,  lsl #2
			
 
				++        sub             x2,  x2,  #1
			
 
				++        ldcol.8         v1,  x2,  x1,  4
			
 
				++        uaddlp          v2.4h,  v1.8b
			
 
				++        addp            v2.4h,  v2.4h,  v2.4h
			
 
				++        rshrn           v1.8b,  v2.8h,  #2
			
 
				++        movi            v0.8b,  #128
			
 
				++        dup             v1.8b,  v1.b[0]
			
 
				++        b               .L_pred8x8_dc_end
			
 
				++endfunc
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/hpeldsp_init_aarch64.c b/media/ffvpx/libavcodec/aarch64/hpeldsp_init_aarch64.c
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/hpeldsp_init_aarch64.c
			
 
				+@@ -0,0 +1,123 @@
			
 
				++/*
			
 
				++ * ARM NEON optimised DSP functions
			
 
				++ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include <stddef.h>
			
 
				++#include <stdint.h>
			
 
				++
			
 
				++#include "config.h"
			
 
				++
			
 
				++#include "libavutil/attributes.h"
			
 
				++#include "libavutil/cpu.h"
			
 
				++#include "libavutil/aarch64/cpu.h"
			
 
				++#include "libavcodec/hpeldsp.h"
			
 
				++
			
 
				++void     ff_put_pixels16_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                              ptrdiff_t line_size, int h);
			
 
				++void  ff_put_pixels16_x2_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                              ptrdiff_t line_size, int h);
			
 
				++void  ff_put_pixels16_y2_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                              ptrdiff_t line_size, int h);
			
 
				++void ff_put_pixels16_xy2_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                              ptrdiff_t line_size, int h);
			
 
				++void      ff_put_pixels8_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                              ptrdiff_t line_size, int h);
			
 
				++void   ff_put_pixels8_x2_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                              ptrdiff_t line_size, int h);
			
 
				++void   ff_put_pixels8_y2_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                              ptrdiff_t line_size, int h);
			
 
				++void  ff_put_pixels8_xy2_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                              ptrdiff_t line_size, int h);
			
 
				++
			
 
				++void  ff_put_pixels16_x2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                                     ptrdiff_t line_size, int h);
			
 
				++void  ff_put_pixels16_y2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                                     ptrdiff_t line_size, int h);
			
 
				++void ff_put_pixels16_xy2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                                     ptrdiff_t line_size, int h);
			
 
				++void   ff_put_pixels8_x2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                                     ptrdiff_t line_size, int h);
			
 
				++void   ff_put_pixels8_y2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                                     ptrdiff_t line_size, int h);
			
 
				++void  ff_put_pixels8_xy2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                                     ptrdiff_t line_size, int h);
			
 
				++
			
 
				++void     ff_avg_pixels16_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                              ptrdiff_t line_size, int h);
			
 
				++void  ff_avg_pixels16_x2_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                              ptrdiff_t line_size, int h);
			
 
				++void  ff_avg_pixels16_y2_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                              ptrdiff_t line_size, int h);
			
 
				++void ff_avg_pixels16_xy2_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                              ptrdiff_t line_size, int h);
			
 
				++void      ff_avg_pixels8_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                              ptrdiff_t line_size, int h);
			
 
				++void   ff_avg_pixels8_x2_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                              ptrdiff_t line_size, int h);
			
 
				++void   ff_avg_pixels8_y2_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                              ptrdiff_t line_size, int h);
			
 
				++void  ff_avg_pixels8_xy2_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                              ptrdiff_t line_size, int h);
			
 
				++
			
 
				++void  ff_avg_pixels16_x2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                                     ptrdiff_t line_size, int h);
			
 
				++void  ff_avg_pixels16_y2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                                     ptrdiff_t line_size, int h);
			
 
				++void ff_avg_pixels16_xy2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
			
 
				++                                     ptrdiff_t line_size, int h);
			
 
				++
			
 
				++av_cold void ff_hpeldsp_init_aarch64(HpelDSPContext *c, int flags)
			
 
				++{
			
 
				++    int cpu_flags = av_get_cpu_flags();
			
 
				++
			
 
				++    if (have_neon(cpu_flags)) {
			
 
				++        c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
			
 
				++        c->put_pixels_tab[0][1] = ff_put_pixels16_x2_neon;
			
 
				++        c->put_pixels_tab[0][2] = ff_put_pixels16_y2_neon;
			
 
				++        c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_neon;
			
 
				++        c->put_pixels_tab[1][0] = ff_put_pixels8_neon;
			
 
				++        c->put_pixels_tab[1][1] = ff_put_pixels8_x2_neon;
			
 
				++        c->put_pixels_tab[1][2] = ff_put_pixels8_y2_neon;
			
 
				++        c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_neon;
			
 
				++
			
 
				++        c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_neon;
			
 
				++        c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_neon;
			
 
				++        c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_neon;
			
 
				++        c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_neon;
			
 
				++        c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_neon;
			
 
				++        c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_neon;
			
 
				++        c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_neon;
			
 
				++        c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon;
			
 
				++
			
 
				++        c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon;
			
 
				++        c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_neon;
			
 
				++        c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_neon;
			
 
				++        c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_neon;
			
 
				++        c->avg_pixels_tab[1][0] = ff_avg_pixels8_neon;
			
 
				++        c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_neon;
			
 
				++        c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_neon;
			
 
				++        c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_neon;
			
 
				++
			
 
				++        c->avg_no_rnd_pixels_tab[0] = ff_avg_pixels16_neon;
			
 
				++        c->avg_no_rnd_pixels_tab[1] = ff_avg_pixels16_x2_no_rnd_neon;
			
 
				++        c->avg_no_rnd_pixels_tab[2] = ff_avg_pixels16_y2_no_rnd_neon;
			
 
				++        c->avg_no_rnd_pixels_tab[3] = ff_avg_pixels16_xy2_no_rnd_neon;
			
 
				++    }
			
 
				++}
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/hpeldsp_neon.S b/media/ffvpx/libavcodec/aarch64/hpeldsp_neon.S
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/hpeldsp_neon.S
			
 
				+@@ -0,0 +1,397 @@
			
 
				++/*
			
 
				++ * ARM NEON optimised DSP functions
			
 
				++ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
			
 
				++ * Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include "libavutil/aarch64/asm.S"
			
 
				++
			
 
				++.macro  pixels16        rnd=1, avg=0
			
 
				++  .if \avg
			
 
				++        mov             x12, x0
			
 
				++  .endif
			
 
				++1:      ld1             {v0.16B},  [x1], x2
			
 
				++        ld1             {v1.16B},  [x1], x2
			
 
				++        ld1             {v2.16B},  [x1], x2
			
 
				++        ld1             {v3.16B},  [x1], x2
			
 
				++  .if \avg
			
 
				++        ld1             {v4.16B},  [x12], x2
			
 
				++        urhadd          v0.16B,  v0.16B,  v4.16B
			
 
				++        ld1             {v5.16B},  [x12], x2
			
 
				++        urhadd          v1.16B,  v1.16B,  v5.16B
			
 
				++        ld1             {v6.16B},  [x12], x2
			
 
				++        urhadd          v2.16B,  v2.16B,  v6.16B
			
 
				++        ld1             {v7.16B},  [x12], x2
			
 
				++        urhadd          v3.16B,  v3.16B,  v7.16B
			
 
				++  .endif
			
 
				++        subs            w3,  w3,  #4
			
 
				++        st1             {v0.16B},  [x0], x2
			
 
				++        st1             {v1.16B},  [x0], x2
			
 
				++        st1             {v2.16B},  [x0], x2
			
 
				++        st1             {v3.16B},  [x0], x2
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++.endm
			
 
				++
			
 
				++.macro  pixels16_x2     rnd=1, avg=0
			
 
				++1:      ld1             {v0.16B, v1.16B}, [x1], x2
			
 
				++        ld1             {v2.16B, v3.16B}, [x1], x2
			
 
				++        subs            w3,  w3,  #2
			
 
				++        ext             v1.16B,  v0.16B,  v1.16B,  #1
			
 
				++        avg             v0.16B,  v0.16B,  v1.16B
			
 
				++        ext             v3.16B,  v2.16B,  v3.16B,  #1
			
 
				++        avg             v2.16B,  v2.16B,  v3.16B
			
 
				++  .if \avg
			
 
				++        ld1             {v1.16B}, [x0], x2
			
 
				++        ld1             {v3.16B}, [x0]
			
 
				++        urhadd          v0.16B,  v0.16B,  v1.16B
			
 
				++        urhadd          v2.16B,  v2.16B,  v3.16B
			
 
				++        sub             x0,  x0,  x2
			
 
				++  .endif
			
 
				++        st1             {v0.16B}, [x0], x2
			
 
				++        st1             {v2.16B}, [x0], x2
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++.endm
			
 
				++
			
 
				++.macro  pixels16_y2     rnd=1, avg=0
			
 
				++        sub             w3,  w3,  #2
			
 
				++        ld1             {v0.16B}, [x1], x2
			
 
				++        ld1             {v1.16B}, [x1], x2
			
 
				++1:      subs            w3,  w3,  #2
			
 
				++        avg             v2.16B,  v0.16B,  v1.16B
			
 
				++        ld1             {v0.16B}, [x1], x2
			
 
				++        avg             v3.16B,  v0.16B,  v1.16B
			
 
				++        ld1             {v1.16B}, [x1], x2
			
 
				++  .if \avg
			
 
				++        ld1             {v4.16B}, [x0], x2
			
 
				++        ld1             {v5.16B}, [x0]
			
 
				++        urhadd          v2.16B,  v2.16B,  v4.16B
			
 
				++        urhadd          v3.16B,  v3.16B,  v5.16B
			
 
				++        sub             x0,  x0,  x2
			
 
				++  .endif
			
 
				++        st1             {v2.16B}, [x0], x2
			
 
				++        st1             {v3.16B}, [x0], x2
			
 
				++        b.ne            1b
			
 
				++
			
 
				++        avg             v2.16B,  v0.16B,  v1.16B
			
 
				++        ld1             {v0.16B}, [x1], x2
			
 
				++        avg             v3.16B,  v0.16B,  v1.16B
			
 
				++  .if \avg
			
 
				++        ld1             {v4.16B}, [x0], x2
			
 
				++        ld1             {v5.16B}, [x0]
			
 
				++        urhadd          v2.16B,  v2.16B,  v4.16B
			
 
				++        urhadd          v3.16B,  v3.16B,  v5.16B
			
 
				++        sub             x0,  x0,  x2
			
 
				++  .endif
			
 
				++        st1             {v2.16B},     [x0], x2
			
 
				++        st1             {v3.16B},     [x0], x2
			
 
				++
			
 
				++        ret
			
 
				++.endm
			
 
				++
			
 
				++.macro  pixels16_xy2    rnd=1, avg=0
			
 
				++        sub             w3,  w3,  #2
			
 
				++        ld1             {v0.16B, v1.16B}, [x1], x2
			
 
				++        ld1             {v4.16B, v5.16B}, [x1], x2
			
 
				++NRND    movi            v26.8H, #1
			
 
				++        ext             v1.16B,  v0.16B,  v1.16B,  #1
			
 
				++        ext             v5.16B,  v4.16B,  v5.16B,  #1
			
 
				++        uaddl           v16.8H,  v0.8B,   v1.8B
			
 
				++        uaddl2          v20.8H,  v0.16B,  v1.16B
			
 
				++        uaddl           v18.8H,  v4.8B,   v5.8B
			
 
				++        uaddl2          v22.8H,  v4.16B,  v5.16B
			
 
				++1:      subs            w3,  w3,  #2
			
 
				++        ld1             {v0.16B, v1.16B}, [x1], x2
			
 
				++        add             v24.8H,  v16.8H,  v18.8H
			
 
				++NRND    add             v24.8H,  v24.8H,  v26.8H
			
 
				++        ext             v30.16B, v0.16B,  v1.16B,  #1
			
 
				++        add             v1.8H,   v20.8H,  v22.8H
			
 
				++        mshrn           v28.8B,  v24.8H,  #2
			
 
				++NRND    add             v1.8H,   v1.8H,   v26.8H
			
 
				++        mshrn2          v28.16B, v1.8H,   #2
			
 
				++  .if \avg
			
 
				++        ld1             {v16.16B},        [x0]
			
 
				++        urhadd          v28.16B, v28.16B, v16.16B
			
 
				++  .endif
			
 
				++        uaddl           v16.8H,  v0.8B,   v30.8B
			
 
				++        ld1             {v2.16B, v3.16B}, [x1], x2
			
 
				++        uaddl2          v20.8H,  v0.16B,  v30.16B
			
 
				++        st1             {v28.16B},        [x0], x2
			
 
				++        add             v24.8H,  v16.8H,  v18.8H
			
 
				++NRND    add             v24.8H,  v24.8H,  v26.8H
			
 
				++        ext             v3.16B,  v2.16B,  v3.16B,  #1
			
 
				++        add             v0.8H,   v20.8H,  v22.8H
			
 
				++        mshrn           v30.8B,  v24.8H,  #2
			
 
				++NRND    add             v0.8H,   v0.8H,   v26.8H
			
 
				++        mshrn2          v30.16B, v0.8H,   #2
			
 
				++  .if \avg
			
 
				++        ld1             {v18.16B},        [x0]
			
 
				++        urhadd          v30.16B, v30.16B, v18.16B
			
 
				++  .endif
			
 
				++        uaddl           v18.8H,   v2.8B,  v3.8B
			
 
				++        uaddl2          v22.8H,   v2.16B, v3.16B
			
 
				++        st1             {v30.16B},        [x0], x2
			
 
				++        b.gt            1b
			
 
				++
			
 
				++        ld1             {v0.16B, v1.16B}, [x1], x2
			
 
				++        add             v24.8H,  v16.8H,  v18.8H
			
 
				++NRND    add             v24.8H,  v24.8H,  v26.8H
			
 
				++        ext             v30.16B, v0.16B,  v1.16B,  #1
			
 
				++        add             v1.8H,   v20.8H,  v22.8H
			
 
				++        mshrn           v28.8B,  v24.8H,  #2
			
 
				++NRND    add             v1.8H,   v1.8H,   v26.8H
			
 
				++        mshrn2          v28.16B, v1.8H,   #2
			
 
				++  .if \avg
			
 
				++        ld1             {v16.16B},        [x0]
			
 
				++        urhadd          v28.16B, v28.16B, v16.16B
			
 
				++  .endif
			
 
				++        uaddl           v16.8H,  v0.8B,   v30.8B
			
 
				++        uaddl2          v20.8H,  v0.16B,  v30.16B
			
 
				++        st1             {v28.16B},        [x0], x2
			
 
				++        add             v24.8H,  v16.8H,  v18.8H
			
 
				++NRND    add             v24.8H,  v24.8H,  v26.8H
			
 
				++        add             v0.8H,   v20.8H,  v22.8H
			
 
				++        mshrn           v30.8B,  v24.8H,  #2
			
 
				++NRND    add             v0.8H,   v0.8H,   v26.8H
			
 
				++        mshrn2          v30.16B, v0.8H,   #2
			
 
				++  .if \avg
			
 
				++        ld1             {v18.16B},        [x0]
			
 
				++        urhadd          v30.16B, v30.16B, v18.16B
			
 
				++  .endif
			
 
				++        st1             {v30.16B},        [x0], x2
			
 
				++
			
 
				++        ret
			
 
				++.endm
			
 
				++
			
 
				++.macro  pixels8         rnd=1, avg=0
			
 
				++1:      ld1             {v0.8B}, [x1], x2
			
 
				++        ld1             {v1.8B}, [x1], x2
			
 
				++        ld1             {v2.8B}, [x1], x2
			
 
				++        ld1             {v3.8B}, [x1], x2
			
 
				++  .if \avg
			
 
				++        ld1             {v4.8B}, [x0], x2
			
 
				++        urhadd          v0.8B,  v0.8B,  v4.8B
			
 
				++        ld1             {v5.8B}, [x0], x2
			
 
				++        urhadd          v1.8B,  v1.8B,  v5.8B
			
 
				++        ld1             {v6.8B}, [x0], x2
			
 
				++        urhadd          v2.8B,  v2.8B,  v6.8B
			
 
				++        ld1             {v7.8B}, [x0], x2
			
 
				++        urhadd          v3.8B,  v3.8B,  v7.8B
			
 
				++        sub             x0,  x0,  x2,  lsl #2
			
 
				++  .endif
			
 
				++        subs            w3,  w3,  #4
			
 
				++        st1             {v0.8B}, [x0], x2
			
 
				++        st1             {v1.8B}, [x0], x2
			
 
				++        st1             {v2.8B}, [x0], x2
			
 
				++        st1             {v3.8B}, [x0], x2
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++.endm
			
 
				++
			
 
				++.macro  pixels8_x2      rnd=1, avg=0
			
 
				++1:      ld1             {v0.8B, v1.8B}, [x1], x2
			
 
				++        ext             v1.8B,  v0.8B,  v1.8B,  #1
			
 
				++        ld1             {v2.8B, v3.8B}, [x1], x2
			
 
				++        ext             v3.8B,  v2.8B,  v3.8B,  #1
			
 
				++        subs            w3,  w3,  #2
			
 
				++        avg             v0.8B,   v0.8B,   v1.8B
			
 
				++        avg             v2.8B,   v2.8B,   v3.8B
			
 
				++  .if \avg
			
 
				++        ld1             {v4.8B},     [x0], x2
			
 
				++        ld1             {v5.8B},     [x0]
			
 
				++        urhadd          v0.8B,   v0.8B,   v4.8B
			
 
				++        urhadd          v2.8B,   v2.8B,   v5.8B
			
 
				++        sub             x0,  x0,  x2
			
 
				++  .endif
			
 
				++        st1             {v0.8B}, [x0], x2
			
 
				++        st1             {v2.8B}, [x0], x2
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++.endm
			
 
				++
			
 
				++.macro  pixels8_y2      rnd=1, avg=0
			
 
				++        sub             w3,  w3,  #2
			
 
				++        ld1             {v0.8B},  [x1], x2
			
 
				++        ld1             {v1.8B},  [x1], x2
			
 
				++1:      subs            w3,  w3,  #2
			
 
				++        avg             v4.8B,  v0.8B,  v1.8B
			
 
				++        ld1             {v0.8B},  [x1], x2
			
 
				++        avg             v5.8B,  v0.8B,  v1.8B
			
 
				++        ld1             {v1.8B},  [x1], x2
			
 
				++  .if \avg
			
 
				++        ld1             {v2.8B},     [x0], x2
			
 
				++        ld1             {v3.8B},     [x0]
			
 
				++        urhadd          v4.8B,  v4.8B,  v2.8B
			
 
				++        urhadd          v5.8B,  v5.8B,  v3.8B
			
 
				++        sub             x0,  x0,  x2
			
 
				++  .endif
			
 
				++        st1             {v4.8B},     [x0], x2
			
 
				++        st1             {v5.8B},     [x0], x2
			
 
				++        b.ne            1b
			
 
				++
			
 
				++        avg             v4.8B,  v0.8B,  v1.8B
			
 
				++        ld1             {v0.8B},  [x1], x2
			
 
				++        avg             v5.8B,  v0.8B,  v1.8B
			
 
				++  .if \avg
			
 
				++        ld1             {v2.8B},     [x0], x2
			
 
				++        ld1             {v3.8B},     [x0]
			
 
				++        urhadd          v4.8B,  v4.8B,  v2.8B
			
 
				++        urhadd          v5.8B,  v5.8B,  v3.8B
			
 
				++        sub             x0,  x0,  x2
			
 
				++  .endif
			
 
				++        st1             {v4.8B},     [x0], x2
			
 
				++        st1             {v5.8B},     [x0], x2
			
 
				++
			
 
				++        ret
			
 
				++.endm
			
 
				++
			
 
				++.macro  pixels8_xy2     rnd=1, avg=0
			
 
				++        sub             w3,  w3,  #2
			
 
				++        ld1             {v0.16B},     [x1], x2
			
 
				++        ld1             {v1.16B},     [x1], x2
			
 
				++NRND    movi            v19.8H, #1
			
 
				++        ext             v4.16B,  v0.16B,  v4.16B,  #1
			
 
				++        ext             v6.16B,  v1.16B,  v6.16B,  #1
			
 
				++        uaddl           v16.8H,  v0.8B,  v4.8B
			
 
				++        uaddl           v17.8H,  v1.8B,  v6.8B
			
 
				++1:      subs            w3,  w3,  #2
			
 
				++        ld1             {v0.16B},     [x1], x2
			
 
				++        add             v18.8H, v16.8H,  v17.8H
			
 
				++        ext             v4.16B,  v0.16B,  v4.16B,  #1
			
 
				++NRND    add             v18.8H, v18.8H, v19.8H
			
 
				++        uaddl           v16.8H,  v0.8B,  v4.8B
			
 
				++        mshrn           v5.8B,  v18.8H, #2
			
 
				++        ld1             {v1.16B},     [x1], x2
			
 
				++        add             v18.8H, v16.8H,  v17.8H
			
 
				++  .if \avg
			
 
				++        ld1             {v7.8B},     [x0]
			
 
				++        urhadd          v5.8B,  v5.8B,  v7.8B
			
 
				++  .endif
			
 
				++NRND    add             v18.8H, v18.8H, v19.8H
			
 
				++        st1             {v5.8B},     [x0], x2
			
 
				++        mshrn           v7.8B,  v18.8H, #2
			
 
				++  .if \avg
			
 
				++        ld1             {v5.8B},     [x0]
			
 
				++        urhadd          v7.8B,  v7.8B,  v5.8B
			
 
				++  .endif
			
 
				++        ext             v6.16B,  v1.16B,  v6.16B,  #1
			
 
				++        uaddl           v17.8H,  v1.8B,   v6.8B
			
 
				++        st1             {v7.8B},     [x0], x2
			
 
				++        b.gt            1b
			
 
				++
			
 
				++        ld1             {v0.16B},     [x1], x2
			
 
				++        add             v18.8H, v16.8H, v17.8H
			
 
				++        ext             v4.16B, v0.16B, v4.16B,  #1
			
 
				++NRND    add             v18.8H, v18.8H, v19.8H
			
 
				++        uaddl           v16.8H,  v0.8B, v4.8B
			
 
				++        mshrn           v5.8B,  v18.8H, #2
			
 
				++        add             v18.8H, v16.8H, v17.8H
			
 
				++  .if \avg
			
 
				++        ld1             {v7.8B},     [x0]
			
 
				++        urhadd          v5.8B,  v5.8B,  v7.8B
			
 
				++  .endif
			
 
				++NRND    add             v18.8H, v18.8H, v19.8H
			
 
				++        st1             {v5.8B},     [x0], x2
			
 
				++        mshrn           v7.8B,  v18.8H, #2
			
 
				++  .if \avg
			
 
				++        ld1             {v5.8B},     [x0]
			
 
				++        urhadd          v7.8B,  v7.8B,  v5.8B
			
 
				++  .endif
			
 
				++        st1             {v7.8B},     [x0], x2
			
 
				++
			
 
				++        ret
			
 
				++.endm
			
 
				++
			
 
				++.macro  pixfunc         pfx, name, suf, rnd=1, avg=0
			
 
				++  .if \rnd
			
 
				++    .macro avg  rd, rn, rm
			
 
				++        urhadd          \rd, \rn, \rm
			
 
				++    .endm
			
 
				++    .macro mshrn rd, rn, rm
			
 
				++        rshrn           \rd, \rn, \rm
			
 
				++    .endm
			
 
				++    .macro mshrn2 rd, rn, rm
			
 
				++        rshrn2          \rd, \rn, \rm
			
 
				++    .endm
			
 
				++    .macro NRND insn:vararg
			
 
				++    .endm
			
 
				++  .else
			
 
				++    .macro avg  rd, rn, rm
			
 
				++        uhadd           \rd, \rn, \rm
			
 
				++    .endm
			
 
				++    .macro mshrn rd, rn, rm
			
 
				++        shrn            \rd, \rn, \rm
			
 
				++    .endm
			
 
				++    .macro mshrn2 rd, rn, rm
			
 
				++        shrn2           \rd, \rn, \rm
			
 
				++    .endm
			
 
				++    .macro NRND insn:vararg
			
 
				++        \insn
			
 
				++    .endm
			
 
				++  .endif
			
 
				++function ff_\pfx\name\suf\()_neon, export=1
			
 
				++        \name           \rnd, \avg
			
 
				++endfunc
			
 
				++        .purgem         avg
			
 
				++        .purgem         mshrn
			
 
				++        .purgem         mshrn2
			
 
				++        .purgem         NRND
			
 
				++.endm
			
 
				++
			
 
				++.macro  pixfunc2        pfx, name, avg=0
			
 
				++        pixfunc         \pfx, \name,          rnd=1, avg=\avg
			
 
				++        pixfunc         \pfx, \name, _no_rnd, rnd=0, avg=\avg
			
 
				++.endm
			
 
				++
			
 
				++function ff_put_h264_qpel16_mc00_neon, export=1
			
 
				++        mov             w3,  #16
			
 
				++endfunc
			
 
				++
			
 
				++        pixfunc         put_, pixels16,     avg=0
			
 
				++        pixfunc2        put_, pixels16_x2,  avg=0
			
 
				++        pixfunc2        put_, pixels16_y2,  avg=0
			
 
				++        pixfunc2        put_, pixels16_xy2, avg=0
			
 
				++
			
 
				++function ff_avg_h264_qpel16_mc00_neon, export=1
			
 
				++        mov             w3,  #16
			
 
				++endfunc
			
 
				++
			
 
				++        pixfunc         avg_, pixels16,     avg=1
			
 
				++        pixfunc2        avg_, pixels16_x2,  avg=1
			
 
				++        pixfunc2        avg_, pixels16_y2,  avg=1
			
 
				++        pixfunc2        avg_, pixels16_xy2, avg=1
			
 
				++
			
 
				++function ff_put_h264_qpel8_mc00_neon, export=1
			
 
				++        mov             w3,  #8
			
 
				++endfunc
			
 
				++
			
 
				++        pixfunc         put_, pixels8,     avg=0
			
 
				++        pixfunc2        put_, pixels8_x2,  avg=0
			
 
				++        pixfunc2        put_, pixels8_y2,  avg=0
			
 
				++        pixfunc2        put_, pixels8_xy2, avg=0
			
 
				++
			
 
				++function ff_avg_h264_qpel8_mc00_neon, export=1
			
 
				++        mov             w3,  #8
			
 
				++endfunc
			
 
				++
			
 
				++        pixfunc         avg_, pixels8,     avg=1
			
 
				++        pixfunc         avg_, pixels8_x2,  avg=1
			
 
				++        pixfunc         avg_, pixels8_y2,  avg=1
			
 
				++        pixfunc         avg_, pixels8_xy2, avg=1
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/idct.h b/media/ffvpx/libavcodec/aarch64/idct.h
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/idct.h
			
 
				+@@ -0,0 +1,28 @@
			
 
				++/*
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#ifndef AVCODEC_AARCH64_IDCT_H
			
 
				++#define AVCODEC_AARCH64_IDCT_H
			
 
				++
			
 
				++#include <stdint.h>
			
 
				++
			
 
				++void ff_simple_idct_neon(int16_t *data);
			
 
				++void ff_simple_idct_put_neon(uint8_t *dest, ptrdiff_t line_size, int16_t *data);
			
 
				++void ff_simple_idct_add_neon(uint8_t *dest, ptrdiff_t line_size, int16_t *data);
			
 
				++
			
 
				++#endif /* AVCODEC_AARCH64_IDCT_H */
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/idctdsp_init_aarch64.c b/media/ffvpx/libavcodec/aarch64/idctdsp_init_aarch64.c
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/idctdsp_init_aarch64.c
			
 
				+@@ -0,0 +1,41 @@
			
 
				++/*
			
 
				++ * ARM-NEON-optimized IDCT functions
			
 
				++ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
			
 
				++ * Copyright (c) 2017 Matthieu Bouron <matthieu.bouron@gmail.com>
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include "libavutil/attributes.h"
			
 
				++#include "libavcodec/avcodec.h"
			
 
				++#include "libavcodec/idctdsp.h"
			
 
				++#include "idct.h"
			
 
				++
			
 
				++av_cold void ff_idctdsp_init_aarch64(IDCTDSPContext *c, AVCodecContext *avctx,
			
 
				++                                     unsigned high_bit_depth)
			
 
				++{
			
 
				++    if (!avctx->lowres && !high_bit_depth) {
			
 
				++        if (avctx->idct_algo == FF_IDCT_AUTO ||
			
 
				++            avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
			
 
				++            avctx->idct_algo == FF_IDCT_SIMPLENEON) {
			
 
				++            c->idct_put  = ff_simple_idct_put_neon;
			
 
				++            c->idct_add  = ff_simple_idct_add_neon;
			
 
				++            c->idct      = ff_simple_idct_neon;
			
 
				++            c->perm_type = FF_IDCT_PERM_PARTTRANS;
			
 
				++        }
			
 
				++    }
			
 
				++}
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/mdct_neon.S b/media/ffvpx/libavcodec/aarch64/mdct_neon.S
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/mdct_neon.S
			
 
				+@@ -0,0 +1,323 @@
			
 
				++/*
			
 
				++ * AArch64 NEON optimised MDCT
			
 
				++ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
			
 
				++ * Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include "libavutil/aarch64/asm.S"
			
 
				++
			
 
				++function ff_imdct_half_neon, export=1
			
 
				++        sub             sp,  sp,  #32
			
 
				++        stp             x19, x20, [sp]
			
 
				++        str             x30, [sp, #16]
			
 
				++        mov             x12, #1
			
 
				++        ldr             w14, [x0, #28]          // mdct_bits
			
 
				++        ldr             x4,  [x0, #32]          // tcos
			
 
				++        ldr             x3,  [x0, #8]           // revtab
			
 
				++        lsl             x12, x12, x14           // n  = 1 << nbits
			
 
				++        lsr             x14, x12, #2            // n4 = n >> 2
			
 
				++        add             x7,  x2,  x12,  lsl #1
			
 
				++        mov             x12, #-16
			
 
				++        sub             x7,  x7,  #16
			
 
				++
			
 
				++        ld2             {v16.2s,v17.2s}, [x7], x12 // d16=x,n1 d17=x,n0
			
 
				++        ld2             {v0.2s,v1.2s},   [x2], #16 // d0 =m0,x d1 =m1,x
			
 
				++        rev64           v17.2s, v17.2s
			
 
				++        ld2             {v2.2s,v3.2s},   [x4], #16 // d2=c0,c1 d3=s0,s2
			
 
				++        fmul            v6.2s,  v17.2s, v2.2s
			
 
				++        fmul            v7.2s,  v0.2s,  v2.2s
			
 
				++1:
			
 
				++        subs            x14, x14, #2
			
 
				++        ldr             w6,  [x3], #4
			
 
				++        fmul            v4.2s,  v0.2s,  v3.2s
			
 
				++        fmul            v5.2s,  v17.2s, v3.2s
			
 
				++        fsub            v4.2s,  v6.2s,  v4.2s
			
 
				++        fadd            v5.2s,  v5.2s,  v7.2s
			
 
				++        ubfm            x8,  x6,  #16, #31
			
 
				++        ubfm            x6,  x6,  #0,  #15
			
 
				++        add             x8,  x1,  x8,  lsl #3
			
 
				++        add             x6,  x1,  x6,  lsl #3
			
 
				++        b.eq            2f
			
 
				++        ld2             {v16.2s,v17.2s}, [x7], x12
			
 
				++        ld2             {v0.2s,v1.2s},   [x2], #16
			
 
				++        rev64           v17.2s, v17.2s
			
 
				++        ld2             {v2.2s,v3.2s},   [x4], #16    // d2=c0,c1 d3=s0,s2
			
 
				++        fmul            v6.2s,  v17.2s, v2.2s
			
 
				++        fmul            v7.2s,  v0.2s,  v2.2s
			
 
				++        st2             {v4.s,v5.s}[0], [x6]
			
 
				++        st2             {v4.s,v5.s}[1], [x8]
			
 
				++        b               1b
			
 
				++2:
			
 
				++        st2             {v4.s,v5.s}[0], [x6]
			
 
				++        st2             {v4.s,v5.s}[1], [x8]
			
 
				++
			
 
				++        mov             x19, x0
			
 
				++        mov             x20, x1
			
 
				++        bl              X(ff_fft_calc_neon)
			
 
				++
			
 
				++        mov             x12, #1
			
 
				++        ldr             w14, [x19, #28]          // mdct_bits
			
 
				++        ldr             x4,  [x19, #32]          // tcos
			
 
				++        lsl             x12, x12, x14            // n  = 1 << nbits
			
 
				++        lsr             x14, x12, #3             // n8 = n >> 3
			
 
				++
			
 
				++        add             x4,  x4,  x14, lsl #3
			
 
				++        add             x6,  x20, x14, lsl #3
			
 
				++        sub             x1,  x4,  #16
			
 
				++        sub             x3,  x6,  #16
			
 
				++
			
 
				++        mov             x7,  #-16
			
 
				++        mov             x8,  x6
			
 
				++        mov             x0,  x3
			
 
				++
			
 
				++        ld2             {v0.2s,v1.2s},  [x3], x7 // d0 =i1,r1 d1 =i0,r0
			
 
				++        ld2             {v20.2s,v21.2s},[x6], #16 // d20=i2,r2 d21=i3,r3
			
 
				++        ld2             {v16.2s,v17.2s},[x1], x7 // d16=c1,c0 d18=s1,s0
			
 
				++3:
			
 
				++        subs            x14, x14, #2
			
 
				++        fmul            v7.2s,  v0.2s,  v17.2s
			
 
				++        ld2             {v18.2s,v19.2s},[x4], #16    // d17=c2,c3 d19=s2,s3
			
 
				++        fmul            v4.2s,  v1.2s,  v17.2s
			
 
				++        fmul            v6.2s,  v21.2s, v19.2s
			
 
				++        fmul            v5.2s,  v20.2s, v19.2s
			
 
				++        fmul            v22.2s, v1.2s,  v16.2s
			
 
				++        fmul            v23.2s, v21.2s, v18.2s
			
 
				++        fmul            v24.2s, v0.2s,  v16.2s
			
 
				++        fmul            v25.2s, v20.2s, v18.2s
			
 
				++        fadd            v7.2s,  v7.2s,  v22.2s
			
 
				++        fadd            v5.2s,  v5.2s,  v23.2s
			
 
				++        fsub            v4.2s,  v4.2s,  v24.2s
			
 
				++        fsub            v6.2s,  v6.2s,  v25.2s
			
 
				++        b.eq            4f
			
 
				++        ld2             {v0.2s,v1.2s},  [x3], x7
			
 
				++        ld2             {v20.2s,v21.2s},[x6], #16
			
 
				++        ld2             {v16.2s,v17.2s},[x1], x7 // d16=c1,c0 d18=s1,s0
			
 
				++        rev64           v5.2s,  v5.2s
			
 
				++        rev64           v7.2s,  v7.2s
			
 
				++        st2             {v4.2s,v5.2s},  [x0], x7
			
 
				++        st2             {v6.2s,v7.2s},  [x8], #16
			
 
				++        b               3b
			
 
				++4:
			
 
				++        rev64           v5.2s,  v5.2s
			
 
				++        rev64           v7.2s,  v7.2s
			
 
				++        st2             {v4.2s,v5.2s},  [x0]
			
 
				++        st2             {v6.2s,v7.2s},  [x8]
			
 
				++
			
 
				++        ldp             x19, x20, [sp]
			
 
				++        ldr             x30, [sp, #16]
			
 
				++        add             sp,  sp,  #32
			
 
				++
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_imdct_calc_neon, export=1
			
 
				++        sub             sp,  sp,  #32
			
 
				++        stp             x19, x20, [sp]
			
 
				++        str             x30, [sp, #16]
			
 
				++        ldr             w3,  [x0, #28]          // mdct_bits
			
 
				++        mov             x19, #1
			
 
				++        mov             x20, x1
			
 
				++        lsl             x19, x19, x3
			
 
				++        add             x1,  x1,  x19
			
 
				++
			
 
				++        bl              X(ff_imdct_half_neon)
			
 
				++
			
 
				++        add             x0,  x20, x19,  lsl #2
			
 
				++        add             x1,  x20, x19,  lsl #1
			
 
				++        sub             x0,  x0,  #8
			
 
				++        sub             x2,  x1,  #16
			
 
				++        mov             x3,  #-16
			
 
				++        mov             x6,  #-8
			
 
				++1:
			
 
				++        ld1             {v0.4s}, [x2], x3
			
 
				++        prfum           pldl1keep, [x0, #-16]
			
 
				++        rev64           v0.4s, v0.4s
			
 
				++        ld1             {v2.2s,v3.2s}, [x1], #16
			
 
				++        fneg            v4.4s,  v0.4s
			
 
				++        prfum           pldl1keep, [x2, #-16]
			
 
				++        rev64           v2.2s, v2.2s
			
 
				++        rev64           v3.2s, v3.2s
			
 
				++        ext             v4.16b, v4.16b, v4.16b, #8
			
 
				++        st1             {v2.2s}, [x0], x6
			
 
				++        st1             {v3.2s}, [x0], x6
			
 
				++        st1             {v4.4s}, [x20], #16
			
 
				++        subs            x19, x19,  #16
			
 
				++        b.gt            1b
			
 
				++
			
 
				++        ldp             x19, x20, [sp], #16
			
 
				++        ldr             x30, [sp], #16
			
 
				++
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++
			
 
				++function ff_mdct_calc_neon, export=1
			
 
				++        sub             sp,  sp,  #32
			
 
				++        stp             x19, x20, [sp]
			
 
				++        str             x30, [sp, #16]
			
 
				++
			
 
				++        mov             x12, #1
			
 
				++        ldr             w14, [x0, #28]          // mdct_bits
			
 
				++        ldr             x4,  [x0, #32]          // tcos
			
 
				++        ldr             x3,  [x0, #8]           // revtab
			
 
				++        lsl             x14, x12, x14           // n  = 1 << nbits
			
 
				++        add             x7,  x2,  x14           // in4u
			
 
				++        sub             x9,  x7,  #16           // in4d
			
 
				++        add             x2,  x7,  x14, lsl #1   // in3u
			
 
				++        add             x8,  x9,  x14, lsl #1   // in3d
			
 
				++        add             x5,  x4,  x14, lsl #1
			
 
				++        sub             x5,  x5,  #16
			
 
				++        sub             x3,  x3,  #4
			
 
				++        mov             x12, #-16
			
 
				++        lsr             x13, x14, #1
			
 
				++
			
 
				++        ld2             {v16.2s,v17.2s}, [x9], x12  // in0u0,in0u1 in4d1,in4d0
			
 
				++        ld2             {v18.2s,v19.2s}, [x8], x12  // in2u0,in2u1 in3d1,in3d0
			
 
				++        ld2             {v0.2s, v1.2s},  [x7], #16  // in4u0,in4u1 in2d1,in2d0
			
 
				++        rev64           v17.2s, v17.2s              // in4d0,in4d1 in3d0,in3d1
			
 
				++        rev64           v19.2s, v19.2s              // in4d0,in4d1 in3d0,in3d1
			
 
				++        ld2             {v2.2s, v3.2s},  [x2], #16  // in3u0,in3u1 in1d1,in1d0
			
 
				++        fsub            v0.2s,  v17.2s, v0.2s       // in4d-in4u      I
			
 
				++        ld2             {v20.2s,v21.2s}, [x4], #16  // c0,c1 s0,s1
			
 
				++        rev64           v1.2s,  v1.2s               // in2d0,in2d1 in1d0,in1d1
			
 
				++        rev64           v3.2s,  v3.2s               // in2d0,in2d1 in1d0,in1d1
			
 
				++        ld2             {v30.2s,v31.2s}, [x5], x12  // c2,c3 s2,s3
			
 
				++        fadd            v2.2s,  v2.2s,  v19.2s      // in3u+in3d     -R
			
 
				++        fsub            v16.2s, v16.2s, v1.2s       // in0u-in2d      R
			
 
				++        fadd            v18.2s, v18.2s, v3.2s       // in2u+in1d     -I
			
 
				++1:
			
 
				++        fmul            v7.2s,  v0.2s,  v21.2s      //  I*s
			
 
				++        ldr             w10, [x3, x13]
			
 
				++        fmul            v6.2s,  v2.2s,  v20.2s      // -R*c
			
 
				++        ldr             w6,  [x3, #4]!
			
 
				++        fmul            v4.2s,  v2.2s,  v21.2s      // -R*s
			
 
				++        fmul            v5.2s,  v0.2s,  v20.2s      //  I*c
			
 
				++        fmul            v24.2s, v16.2s, v30.2s      //  R*c
			
 
				++        fmul            v25.2s, v18.2s, v31.2s      // -I*s
			
 
				++        fmul            v22.2s, v16.2s, v31.2s      //  R*s
			
 
				++        fmul            v23.2s, v18.2s, v30.2s      //  I*c
			
 
				++        subs            x14, x14, #16
			
 
				++        subs            x13, x13, #8
			
 
				++        fsub            v6.2s,  v6.2s,  v7.2s       // -R*c-I*s
			
 
				++        fadd            v7.2s,  v4.2s,  v5.2s       // -R*s+I*c
			
 
				++        fsub            v24.2s, v25.2s, v24.2s      // I*s-R*c
			
 
				++        fadd            v25.2s, v22.2s, v23.2s      // R*s-I*c
			
 
				++        b.eq            1f
			
 
				++        mov             x12, #-16
			
 
				++        ld2             {v16.2s,v17.2s}, [x9], x12  // in0u0,in0u1 in4d1,in4d0
			
 
				++        ld2             {v18.2s,v19.2s}, [x8], x12  // in2u0,in2u1 in3d1,in3d0
			
 
				++        fneg            v7.2s,  v7.2s               //  R*s-I*c
			
 
				++        ld2             {v0.2s, v1.2s},  [x7], #16  // in4u0,in4u1 in2d1,in2d0
			
 
				++        rev64           v17.2s, v17.2s              // in4d0,in4d1 in3d0,in3d1
			
 
				++        rev64           v19.2s, v19.2s              // in4d0,in4d1 in3d0,in3d1
			
 
				++        ld2             {v2.2s, v3.2s},  [x2], #16  // in3u0,in3u1 in1d1,in1d0
			
 
				++        fsub            v0.2s,  v17.2s, v0.2s       // in4d-in4u      I
			
 
				++        ld2             {v20.2s,v21.2s}, [x4], #16  // c0,c1 s0,s1
			
 
				++        rev64           v1.2s,  v1.2s               // in2d0,in2d1 in1d0,in1d1
			
 
				++        rev64           v3.2s,  v3.2s               // in2d0,in2d1 in1d0,in1d1
			
 
				++        ld2             {v30.2s,v31.2s}, [x5], x12  // c2,c3 s2,s3
			
 
				++        fadd            v2.2s,  v2.2s,  v19.2s      // in3u+in3d     -R
			
 
				++        fsub            v16.2s, v16.2s, v1.2s       // in0u-in2d      R
			
 
				++        fadd            v18.2s, v18.2s, v3.2s       // in2u+in1d     -I
			
 
				++        ubfm            x12, x6,  #16, #31
			
 
				++        ubfm            x6,  x6,  #0,  #15
			
 
				++        add             x12, x1,  x12, lsl #3
			
 
				++        add             x6,  x1,  x6,  lsl #3
			
 
				++        st2             {v6.s,v7.s}[0],   [x6]
			
 
				++        st2             {v6.s,v7.s}[1],   [x12]
			
 
				++        ubfm            x6,  x10, #16, #31
			
 
				++        ubfm            x10, x10, #0,  #15
			
 
				++        add             x6 , x1,  x6,  lsl #3
			
 
				++        add             x10, x1,  x10, lsl #3
			
 
				++        st2             {v24.s,v25.s}[0], [x10]
			
 
				++        st2             {v24.s,v25.s}[1], [x6]
			
 
				++        b               1b
			
 
				++1:
			
 
				++        fneg            v7.2s,  v7.2s           //  R*s-I*c
			
 
				++        ubfm            x12, x6,  #16, #31
			
 
				++        ubfm            x6,  x6,  #0,  #15
			
 
				++        add             x12, x1,  x12, lsl #3
			
 
				++        add             x6,  x1,  x6,  lsl #3
			
 
				++        st2             {v6.s,v7.s}[0],   [x6]
			
 
				++        st2             {v6.s,v7.s}[1],   [x12]
			
 
				++        ubfm            x6,  x10, #16, #31
			
 
				++        ubfm            x10, x10, #0,  #15
			
 
				++        add             x6 , x1,  x6,  lsl #3
			
 
				++        add             x10, x1,  x10, lsl #3
			
 
				++        st2             {v24.s,v25.s}[0], [x10]
			
 
				++        st2             {v24.s,v25.s}[1], [x6]
			
 
				++
			
 
				++        mov             x19, x0
			
 
				++        mov             x20, x1
			
 
				++        bl              X(ff_fft_calc_neon)
			
 
				++
			
 
				++        mov             x12, #1
			
 
				++        ldr             w14, [x19, #28]         // mdct_bits
			
 
				++        ldr             x4,  [x19, #32]         // tcos
			
 
				++        lsl             x12, x12, x14           // n  = 1 << nbits
			
 
				++        lsr             x14, x12, #3            // n8 = n >> 3
			
 
				++
			
 
				++        add             x4,  x4,  x14, lsl #3
			
 
				++        add             x6,  x20, x14, lsl #3
			
 
				++        sub             x1,  x4,  #16
			
 
				++        sub             x3,  x6,  #16
			
 
				++
			
 
				++        mov             x7,  #-16
			
 
				++        mov             x8,  x6
			
 
				++        mov             x0,  x3
			
 
				++
			
 
				++        ld2             {v0.2s,v1.2s},   [x3], x7   // d0 =r1,i1 d1 =r0,i0
			
 
				++        ld2             {v20.2s,v21.2s}, [x6], #16  // d20=r2,i2 d21=r3,i3
			
 
				++        ld2             {v16.2s,v17.2s}, [x1], x7   // c1,c0 s1,s0
			
 
				++1:
			
 
				++        subs            x14, x14, #2
			
 
				++        fmul            v7.2s,  v0.2s,  v17.2s      // r1*s1,r0*s0
			
 
				++        ld2             {v18.2s,v19.2s}, [x4], #16  // c2,c3 s2,s3
			
 
				++        fmul            v4.2s,  v1.2s,  v17.2s      // i1*s1,i0*s0
			
 
				++        fmul            v6.2s,  v21.2s, v19.2s      // i2*s2,i3*s3
			
 
				++        fmul            v5.2s,  v20.2s, v19.2s      // r2*s2,r3*s3
			
 
				++        fmul            v24.2s, v0.2s,  v16.2s      // r1*c1,r0*c0
			
 
				++        fmul            v25.2s, v20.2s, v18.2s      // r2*c2,r3*c3
			
 
				++        fmul            v22.2s, v21.2s, v18.2s      // i2*c2,i3*c3
			
 
				++        fmul            v23.2s, v1.2s,  v16.2s      // i1*c1,i0*c0
			
 
				++        fadd            v4.2s,  v4.2s,  v24.2s      // i1*s1+r1*c1,i0*s0+r0*c0
			
 
				++        fadd            v6.2s,  v6.2s,  v25.2s      // i2*s2+r2*c2,i3*s3+r3*c3
			
 
				++        fsub            v5.2s,  v22.2s, v5.2s       // i2*c2-r2*s2,i3*c3-r3*s3
			
 
				++        fsub            v7.2s,  v23.2s, v7.2s       // i1*c1-r1*s1,i0*c0-r0*s0
			
 
				++        fneg            v4.2s,  v4.2s
			
 
				++        fneg            v6.2s,  v6.2s
			
 
				++        b.eq            1f
			
 
				++        ld2             {v0.2s, v1.2s},  [x3], x7
			
 
				++        ld2             {v20.2s,v21.2s}, [x6], #16
			
 
				++        ld2             {v16.2s,v17.2s}, [x1], x7   // c1,c0 s1,s0
			
 
				++        rev64           v5.2s,  v5.2s
			
 
				++        rev64           v7.2s,  v7.2s
			
 
				++        st2             {v4.2s,v5.2s},  [x0], x7
			
 
				++        st2             {v6.2s,v7.2s},  [x8], #16
			
 
				++        b               1b
			
 
				++1:
			
 
				++        rev64           v5.2s,  v5.2s
			
 
				++        rev64           v7.2s,  v7.2s
			
 
				++        st2             {v4.2s,v5.2s},  [x0]
			
 
				++        st2             {v6.2s,v7.2s},  [x8]
			
 
				++
			
 
				++        ldp             x19, x20, [sp], #16
			
 
				++        ldr             x30, [sp], #16
			
 
				++        ret
			
 
				++endfunc
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/neon.S b/media/ffvpx/libavcodec/aarch64/neon.S
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/neon.S
			
 
				+@@ -0,0 +1,149 @@
			
 
				++/*
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++.macro  transpose_8x8B  r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
			
 
				++        trn1            \r8\().8B,  \r0\().8B,  \r1\().8B
			
 
				++        trn2            \r9\().8B,  \r0\().8B,  \r1\().8B
			
 
				++        trn1            \r1\().8B,  \r2\().8B,  \r3\().8B
			
 
				++        trn2            \r3\().8B,  \r2\().8B,  \r3\().8B
			
 
				++        trn1            \r0\().8B,  \r4\().8B,  \r5\().8B
			
 
				++        trn2            \r5\().8B,  \r4\().8B,  \r5\().8B
			
 
				++        trn1            \r2\().8B,  \r6\().8B,  \r7\().8B
			
 
				++        trn2            \r7\().8B,  \r6\().8B,  \r7\().8B
			
 
				++
			
 
				++        trn1            \r4\().4H,  \r0\().4H,  \r2\().4H
			
 
				++        trn2            \r2\().4H,  \r0\().4H,  \r2\().4H
			
 
				++        trn1            \r6\().4H,  \r5\().4H,  \r7\().4H
			
 
				++        trn2            \r7\().4H,  \r5\().4H,  \r7\().4H
			
 
				++        trn1            \r5\().4H,  \r9\().4H,  \r3\().4H
			
 
				++        trn2            \r9\().4H,  \r9\().4H,  \r3\().4H
			
 
				++        trn1            \r3\().4H,  \r8\().4H,  \r1\().4H
			
 
				++        trn2            \r8\().4H,  \r8\().4H,  \r1\().4H
			
 
				++
			
 
				++        trn1            \r0\().2S,  \r3\().2S,  \r4\().2S
			
 
				++        trn2            \r4\().2S,  \r3\().2S,  \r4\().2S
			
 
				++
			
 
				++        trn1            \r1\().2S,  \r5\().2S,  \r6\().2S
			
 
				++        trn2            \r5\().2S,  \r5\().2S,  \r6\().2S
			
 
				++
			
 
				++        trn2            \r6\().2S,  \r8\().2S,  \r2\().2S
			
 
				++        trn1            \r2\().2S,  \r8\().2S,  \r2\().2S
			
 
				++
			
 
				++        trn1            \r3\().2S,  \r9\().2S,  \r7\().2S
			
 
				++        trn2            \r7\().2S,  \r9\().2S,  \r7\().2S
			
 
				++.endm
			
 
				++
			
 
				++.macro  transpose_8x16B r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
			
 
				++        trn1            \t0\().16B, \r0\().16B, \r1\().16B
			
 
				++        trn2            \t1\().16B, \r0\().16B, \r1\().16B
			
 
				++        trn1            \r1\().16B, \r2\().16B, \r3\().16B
			
 
				++        trn2            \r3\().16B, \r2\().16B, \r3\().16B
			
 
				++        trn1            \r0\().16B, \r4\().16B, \r5\().16B
			
 
				++        trn2            \r5\().16B, \r4\().16B, \r5\().16B
			
 
				++        trn1            \r2\().16B, \r6\().16B, \r7\().16B
			
 
				++        trn2            \r7\().16B, \r6\().16B, \r7\().16B
			
 
				++
			
 
				++        trn1            \r4\().8H,  \r0\().8H,  \r2\().8H
			
 
				++        trn2            \r2\().8H,  \r0\().8H,  \r2\().8H
			
 
				++        trn1            \r6\().8H,  \r5\().8H,  \r7\().8H
			
 
				++        trn2            \r7\().8H,  \r5\().8H,  \r7\().8H
			
 
				++        trn1            \r5\().8H,  \t1\().8H,  \r3\().8H
			
 
				++        trn2            \t1\().8H,  \t1\().8H,  \r3\().8H
			
 
				++        trn1            \r3\().8H,  \t0\().8H,  \r1\().8H
			
 
				++        trn2            \t0\().8H,  \t0\().8H,  \r1\().8H
			
 
				++
			
 
				++        trn1            \r0\().4S,  \r3\().4S,  \r4\().4S
			
 
				++        trn2            \r4\().4S,  \r3\().4S,  \r4\().4S
			
 
				++
			
 
				++        trn1            \r1\().4S,  \r5\().4S,  \r6\().4S
			
 
				++        trn2            \r5\().4S,  \r5\().4S,  \r6\().4S
			
 
				++
			
 
				++        trn2            \r6\().4S,  \t0\().4S,  \r2\().4S
			
 
				++        trn1            \r2\().4S,  \t0\().4S,  \r2\().4S
			
 
				++
			
 
				++        trn1            \r3\().4S,  \t1\().4S,  \r7\().4S
			
 
				++        trn2            \r7\().4S,  \t1\().4S,  \r7\().4S
			
 
				++.endm
			
 
				++
			
 
				++.macro  transpose_4x16B r0, r1, r2, r3, t4, t5, t6, t7
			
 
				++        trn1            \t4\().16B, \r0\().16B,  \r1\().16B
			
 
				++        trn2            \t5\().16B, \r0\().16B,  \r1\().16B
			
 
				++        trn1            \t6\().16B, \r2\().16B,  \r3\().16B
			
 
				++        trn2            \t7\().16B, \r2\().16B,  \r3\().16B
			
 
				++
			
 
				++        trn1            \r0\().8H,  \t4\().8H,  \t6\().8H
			
 
				++        trn2            \r2\().8H,  \t4\().8H,  \t6\().8H
			
 
				++        trn1            \r1\().8H,  \t5\().8H,  \t7\().8H
			
 
				++        trn2            \r3\().8H,  \t5\().8H,  \t7\().8H
			
 
				++.endm
			
 
				++
			
 
				++.macro  transpose_4x8B  r0, r1, r2, r3, t4, t5, t6, t7
			
 
				++        trn1            \t4\().8B,  \r0\().8B,  \r1\().8B
			
 
				++        trn2            \t5\().8B,  \r0\().8B,  \r1\().8B
			
 
				++        trn1            \t6\().8B,  \r2\().8B,  \r3\().8B
			
 
				++        trn2            \t7\().8B,  \r2\().8B,  \r3\().8B
			
 
				++
			
 
				++        trn1            \r0\().4H,  \t4\().4H,  \t6\().4H
			
 
				++        trn2            \r2\().4H,  \t4\().4H,  \t6\().4H
			
 
				++        trn1            \r1\().4H,  \t5\().4H,  \t7\().4H
			
 
				++        trn2            \r3\().4H,  \t5\().4H,  \t7\().4H
			
 
				++.endm
			
 
				++
			
 
				++.macro  transpose_4x4H  r0, r1, r2, r3, r4, r5, r6, r7
			
 
				++        trn1            \r4\().4H,  \r0\().4H,  \r1\().4H
			
 
				++        trn2            \r5\().4H,  \r0\().4H,  \r1\().4H
			
 
				++        trn1            \r6\().4H,  \r2\().4H,  \r3\().4H
			
 
				++        trn2            \r7\().4H,  \r2\().4H,  \r3\().4H
			
 
				++        trn1            \r0\().2S,  \r4\().2S,  \r6\().2S
			
 
				++        trn2            \r2\().2S,  \r4\().2S,  \r6\().2S
			
 
				++        trn1            \r1\().2S,  \r5\().2S,  \r7\().2S
			
 
				++        trn2            \r3\().2S,  \r5\().2S,  \r7\().2S
			
 
				++.endm
			
 
				++
			
 
				++.macro  transpose_8x8H  r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
			
 
				++        trn1            \r8\().8H,  \r0\().8H,  \r1\().8H
			
 
				++        trn2            \r9\().8H,  \r0\().8H,  \r1\().8H
			
 
				++        trn1            \r1\().8H,  \r2\().8H,  \r3\().8H
			
 
				++        trn2            \r3\().8H,  \r2\().8H,  \r3\().8H
			
 
				++        trn1            \r0\().8H,  \r4\().8H,  \r5\().8H
			
 
				++        trn2            \r5\().8H,  \r4\().8H,  \r5\().8H
			
 
				++        trn1            \r2\().8H,  \r6\().8H,  \r7\().8H
			
 
				++        trn2            \r7\().8H,  \r6\().8H,  \r7\().8H
			
 
				++
			
 
				++        trn1            \r4\().4S,  \r0\().4S,  \r2\().4S
			
 
				++        trn2            \r2\().4S,  \r0\().4S,  \r2\().4S
			
 
				++        trn1            \r6\().4S,  \r5\().4S,  \r7\().4S
			
 
				++        trn2            \r7\().4S,  \r5\().4S,  \r7\().4S
			
 
				++        trn1            \r5\().4S,  \r9\().4S,  \r3\().4S
			
 
				++        trn2            \r9\().4S,  \r9\().4S,  \r3\().4S
			
 
				++        trn1            \r3\().4S,  \r8\().4S,  \r1\().4S
			
 
				++        trn2            \r8\().4S,  \r8\().4S,  \r1\().4S
			
 
				++
			
 
				++        trn1            \r0\().2D,  \r3\().2D,  \r4\().2D
			
 
				++        trn2            \r4\().2D,  \r3\().2D,  \r4\().2D
			
 
				++
			
 
				++        trn1            \r1\().2D,  \r5\().2D,  \r6\().2D
			
 
				++        trn2            \r5\().2D,  \r5\().2D,  \r6\().2D
			
 
				++
			
 
				++        trn2            \r6\().2D,  \r8\().2D,  \r2\().2D
			
 
				++        trn1            \r2\().2D,  \r8\().2D,  \r2\().2D
			
 
				++
			
 
				++        trn1            \r3\().2D,  \r9\().2D,  \r7\().2D
			
 
				++        trn2            \r7\().2D,  \r9\().2D,  \r7\().2D
			
 
				++
			
 
				++.endm
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/simple_idct_neon.S b/media/ffvpx/libavcodec/aarch64/simple_idct_neon.S
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/simple_idct_neon.S
			
 
				+@@ -0,0 +1,362 @@
			
 
				++/*
			
 
				++ * ARM NEON IDCT
			
 
				++ *
			
 
				++ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
			
 
				++ * Copyright (c) 2017 Matthieu Bouron <matthieu.bouron@gmail.com>
			
 
				++ *
			
 
				++ * Based on Simple IDCT
			
 
				++ * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include "libavutil/aarch64/asm.S"
			
 
				++
			
 
				++#define Z1  22725  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
			
 
				++#define Z2  21407  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
			
 
				++#define Z3  19266  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
			
 
				++#define Z4  16383  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
			
 
				++#define Z5  12873  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
			
 
				++#define Z6  8867   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
			
 
				++#define Z7  4520   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
			
 
				++#define Z4c ((1<<(COL_SHIFT-1))/Z4)
			
 
				++#define ROW_SHIFT 11
			
 
				++#define COL_SHIFT 20
			
 
				++
			
 
				++#define z1 v0.H[0]
			
 
				++#define z2 v0.H[1]
			
 
				++#define z3 v0.H[2]
			
 
				++#define z4 v0.H[3]
			
 
				++#define z5 v0.H[4]
			
 
				++#define z6 v0.H[5]
			
 
				++#define z7 v0.H[6]
			
 
				++#define z4c v0.H[7]
			
 
				++
			
 
				++const   idct_coeff_neon, align=4
			
 
				++        .short Z1, Z2, Z3, Z4, Z5, Z6, Z7, Z4c
			
 
				++endconst
			
 
				++
			
 
				++.macro idct_start data
			
 
				++        prfm            pldl1keep, [\data]
			
 
				++        mov             x10, x30
			
 
				++        movrel          x3, idct_coeff_neon
			
 
				++        ld1             {v0.2D}, [x3]
			
 
				++.endm
			
 
				++
			
 
				++.macro idct_end
			
 
				++        br              x10
			
 
				++.endm
			
 
				++
			
 
				++.macro smull1 a, b, c
			
 
				++        smull           \a, \b, \c
			
 
				++.endm
			
 
				++
			
 
				++.macro smlal1 a, b, c
			
 
				++        smlal           \a, \b, \c
			
 
				++.endm
			
 
				++
			
 
				++.macro smlsl1 a, b, c
			
 
				++        smlsl           \a, \b, \c
			
 
				++.endm
			
 
				++
			
 
				++.macro idct_col4_top y1, y2, y3, y4, i, l
			
 
				++        smull\i         v7.4S,  \y3\l, z2
			
 
				++        smull\i         v16.4S, \y3\l, z6
			
 
				++        smull\i         v17.4S, \y2\l, z1
			
 
				++        add             v19.4S, v23.4S, v7.4S
			
 
				++        smull\i         v18.4S, \y2\l, z3
			
 
				++        add             v20.4S, v23.4S, v16.4S
			
 
				++        smull\i         v5.4S,  \y2\l, z5
			
 
				++        sub             v21.4S, v23.4S, v16.4S
			
 
				++        smull\i         v6.4S,  \y2\l, z7
			
 
				++        sub             v22.4S, v23.4S, v7.4S
			
 
				++
			
 
				++        smlal\i         v17.4S, \y4\l, z3
			
 
				++        smlsl\i         v18.4S, \y4\l, z7
			
 
				++        smlsl\i         v5.4S,  \y4\l, z1
			
 
				++        smlsl\i         v6.4S,  \y4\l, z5
			
 
				++.endm
			
 
				++
			
 
				++.macro idct_row4_neon y1, y2, y3, y4, pass
			
 
				++        ld1             {\y1\().2D,\y2\().2D}, [x2], #32
			
 
				++        movi            v23.4S, #1<<2, lsl #8
			
 
				++        orr             v5.16B, \y1\().16B, \y2\().16B
			
 
				++        ld1             {\y3\().2D,\y4\().2D}, [x2], #32
			
 
				++        orr             v6.16B, \y3\().16B, \y4\().16B
			
 
				++        orr             v5.16B, v5.16B, v6.16B
			
 
				++        mov             x3, v5.D[1]
			
 
				++        smlal           v23.4S, \y1\().4H, z4
			
 
				++
			
 
				++        idct_col4_top   \y1, \y2, \y3, \y4, 1, .4H
			
 
				++
			
 
				++        cmp             x3, #0
			
 
				++        b.eq            \pass\()f
			
 
				++
			
 
				++        smull2          v7.4S, \y1\().8H, z4
			
 
				++        smlal2          v17.4S, \y2\().8H, z5
			
 
				++        smlsl2          v18.4S, \y2\().8H, z1
			
 
				++        smull2          v16.4S, \y3\().8H, z2
			
 
				++        smlal2          v5.4S, \y2\().8H, z7
			
 
				++        add             v19.4S, v19.4S, v7.4S
			
 
				++        sub             v20.4S, v20.4S, v7.4S
			
 
				++        sub             v21.4S, v21.4S, v7.4S
			
 
				++        add             v22.4S, v22.4S, v7.4S
			
 
				++        smlal2          v6.4S, \y2\().8H, z3
			
 
				++        smull2          v7.4S, \y3\().8H, z6
			
 
				++        smlal2          v17.4S, \y4\().8H, z7
			
 
				++        smlsl2          v18.4S, \y4\().8H, z5
			
 
				++        smlal2          v5.4S, \y4\().8H, z3
			
 
				++        smlsl2          v6.4S, \y4\().8H, z1
			
 
				++        add             v19.4S, v19.4S, v7.4S
			
 
				++        sub             v20.4S, v20.4S, v16.4S
			
 
				++        add             v21.4S, v21.4S, v16.4S
			
 
				++        sub             v22.4S, v22.4S, v7.4S
			
 
				++
			
 
				++\pass:  add             \y3\().4S, v19.4S, v17.4S
			
 
				++        add             \y4\().4S, v20.4S, v18.4S
			
 
				++        shrn            \y1\().4H, \y3\().4S, #ROW_SHIFT
			
 
				++        shrn            \y2\().4H, \y4\().4S, #ROW_SHIFT
			
 
				++        add             v7.4S, v21.4S, v5.4S
			
 
				++        add             v16.4S, v22.4S, v6.4S
			
 
				++        shrn            \y3\().4H, v7.4S, #ROW_SHIFT
			
 
				++        shrn            \y4\().4H, v16.4S, #ROW_SHIFT
			
 
				++        sub             v22.4S, v22.4S, v6.4S
			
 
				++        sub             v19.4S, v19.4S, v17.4S
			
 
				++        sub             v21.4S, v21.4S, v5.4S
			
 
				++        shrn2           \y1\().8H, v22.4S, #ROW_SHIFT
			
 
				++        sub             v20.4S, v20.4S, v18.4S
			
 
				++        shrn2           \y2\().8H, v21.4S, #ROW_SHIFT
			
 
				++        shrn2           \y3\().8H, v20.4S, #ROW_SHIFT
			
 
				++        shrn2           \y4\().8H, v19.4S, #ROW_SHIFT
			
 
				++
			
 
				++        trn1            v16.8H, \y1\().8H, \y2\().8H
			
 
				++        trn2            v17.8H, \y1\().8H, \y2\().8H
			
 
				++        trn1            v18.8H, \y3\().8H, \y4\().8H
			
 
				++        trn2            v19.8H, \y3\().8H, \y4\().8H
			
 
				++        trn1            \y1\().4S, v16.4S, v18.4S
			
 
				++        trn1            \y2\().4S, v17.4S, v19.4S
			
 
				++        trn2            \y3\().4S, v16.4S, v18.4S
			
 
				++        trn2            \y4\().4S, v17.4S, v19.4S
			
 
				++.endm
			
 
				++
			
 
				++.macro declare_idct_col4_neon i, l
			
 
				++function idct_col4_neon\i
			
 
				++        dup             v23.4H, z4c
			
 
				++.if \i == 1
			
 
				++        add             v23.4H, v23.4H, v24.4H
			
 
				++.else
			
 
				++        mov             v5.D[0], v24.D[1]
			
 
				++        add             v23.4H, v23.4H, v5.4H
			
 
				++.endif
			
 
				++        smull           v23.4S, v23.4H, z4
			
 
				++
			
 
				++        idct_col4_top   v24, v25, v26, v27, \i, \l
			
 
				++
			
 
				++        mov             x4, v28.D[\i - 1]
			
 
				++        mov             x5, v29.D[\i - 1]
			
 
				++        cmp             x4, #0
			
 
				++        b.eq            1f
			
 
				++
			
 
				++        smull\i         v7.4S,  v28\l,  z4
			
 
				++        add             v19.4S, v19.4S, v7.4S
			
 
				++        sub             v20.4S, v20.4S, v7.4S
			
 
				++        sub             v21.4S, v21.4S, v7.4S
			
 
				++        add             v22.4S, v22.4S, v7.4S
			
 
				++
			
 
				++1:      mov             x4, v30.D[\i - 1]
			
 
				++        cmp             x5, #0
			
 
				++        b.eq            2f
			
 
				++
			
 
				++        smlal\i         v17.4S, v29\l, z5
			
 
				++        smlsl\i         v18.4S, v29\l, z1
			
 
				++        smlal\i         v5.4S,  v29\l, z7
			
 
				++        smlal\i         v6.4S,  v29\l, z3
			
 
				++
			
 
				++2:      mov             x5, v31.D[\i - 1]
			
 
				++        cmp             x4, #0
			
 
				++        b.eq            3f
			
 
				++
			
 
				++        smull\i         v7.4S,  v30\l, z6
			
 
				++        smull\i         v16.4S, v30\l, z2
			
 
				++        add             v19.4S, v19.4S, v7.4S
			
 
				++        sub             v22.4S, v22.4S, v7.4S
			
 
				++        sub             v20.4S, v20.4S, v16.4S
			
 
				++        add             v21.4S, v21.4S, v16.4S
			
 
				++
			
 
				++3:      cmp             x5, #0
			
 
				++        b.eq            4f
			
 
				++
			
 
				++        smlal\i         v17.4S, v31\l, z7
			
 
				++        smlsl\i         v18.4S, v31\l, z5
			
 
				++        smlal\i         v5.4S,  v31\l, z3
			
 
				++        smlsl\i         v6.4S,  v31\l, z1
			
 
				++
			
 
				++4:      addhn           v7.4H, v19.4S, v17.4S
			
 
				++        addhn2          v7.8H, v20.4S, v18.4S
			
 
				++        subhn           v18.4H, v20.4S, v18.4S
			
 
				++        subhn2          v18.8H, v19.4S, v17.4S
			
 
				++
			
 
				++        addhn           v16.4H, v21.4S, v5.4S
			
 
				++        addhn2          v16.8H, v22.4S, v6.4S
			
 
				++        subhn           v17.4H, v22.4S, v6.4S
			
 
				++        subhn2          v17.8H, v21.4S, v5.4S
			
 
				++
			
 
				++        ret
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++declare_idct_col4_neon 1, .4H
			
 
				++declare_idct_col4_neon 2, .8H
			
 
				++
			
 
				++function ff_simple_idct_put_neon, export=1
			
 
				++        idct_start      x2
			
 
				++
			
 
				++        idct_row4_neon  v24, v25, v26, v27, 1
			
 
				++        idct_row4_neon  v28, v29, v30, v31, 2
			
 
				++        bl              idct_col4_neon1
			
 
				++
			
 
				++        sqshrun         v1.8B,  v7.8H, #COL_SHIFT-16
			
 
				++        sqshrun2        v1.16B, v16.8H, #COL_SHIFT-16
			
 
				++        sqshrun         v3.8B,  v17.8H, #COL_SHIFT-16
			
 
				++        sqshrun2        v3.16B, v18.8H, #COL_SHIFT-16
			
 
				++
			
 
				++        bl              idct_col4_neon2
			
 
				++
			
 
				++        sqshrun         v2.8B,  v7.8H, #COL_SHIFT-16
			
 
				++        sqshrun2        v2.16B, v16.8H, #COL_SHIFT-16
			
 
				++        sqshrun         v4.8B,  v17.8H, #COL_SHIFT-16
			
 
				++        sqshrun2        v4.16B, v18.8H, #COL_SHIFT-16
			
 
				++
			
 
				++        zip1            v16.4S, v1.4S, v2.4S
			
 
				++        zip2            v17.4S, v1.4S, v2.4S
			
 
				++
			
 
				++        st1             {v16.D}[0], [x0], x1
			
 
				++        st1             {v16.D}[1], [x0], x1
			
 
				++
			
 
				++        zip1            v18.4S, v3.4S, v4.4S
			
 
				++        zip2            v19.4S, v3.4S, v4.4S
			
 
				++
			
 
				++        st1             {v17.D}[0], [x0], x1
			
 
				++        st1             {v17.D}[1], [x0], x1
			
 
				++        st1             {v18.D}[0], [x0], x1
			
 
				++        st1             {v18.D}[1], [x0], x1
			
 
				++        st1             {v19.D}[0], [x0], x1
			
 
				++        st1             {v19.D}[1], [x0], x1
			
 
				++
			
 
				++        idct_end
			
 
				++endfunc
			
 
				++
			
 
				++function ff_simple_idct_add_neon, export=1
			
 
				++        idct_start      x2
			
 
				++
			
 
				++        idct_row4_neon  v24, v25, v26, v27, 1
			
 
				++        idct_row4_neon  v28, v29, v30, v31, 2
			
 
				++        bl              idct_col4_neon1
			
 
				++
			
 
				++        sshr            v1.8H, v7.8H, #COL_SHIFT-16
			
 
				++        sshr            v2.8H, v16.8H, #COL_SHIFT-16
			
 
				++        sshr            v3.8H, v17.8H, #COL_SHIFT-16
			
 
				++        sshr            v4.8H, v18.8H, #COL_SHIFT-16
			
 
				++
			
 
				++        bl              idct_col4_neon2
			
 
				++
			
 
				++        sshr            v7.8H, v7.8H, #COL_SHIFT-16
			
 
				++        sshr            v16.8H, v16.8H, #COL_SHIFT-16
			
 
				++        sshr            v17.8H, v17.8H, #COL_SHIFT-16
			
 
				++        sshr            v18.8H, v18.8H, #COL_SHIFT-16
			
 
				++
			
 
				++        mov             x9,  x0
			
 
				++        ld1             {v19.D}[0], [x0], x1
			
 
				++        zip1            v23.2D, v1.2D, v7.2D
			
 
				++        zip2            v24.2D, v1.2D, v7.2D
			
 
				++        ld1             {v19.D}[1], [x0], x1
			
 
				++        zip1            v25.2D, v2.2D, v16.2D
			
 
				++        zip2            v26.2D, v2.2D, v16.2D
			
 
				++        ld1             {v20.D}[0], [x0], x1
			
 
				++        zip1            v27.2D, v3.2D, v17.2D
			
 
				++        zip2            v28.2D, v3.2D, v17.2D
			
 
				++        ld1             {v20.D}[1], [x0], x1
			
 
				++        zip1            v29.2D, v4.2D, v18.2D
			
 
				++        zip2            v30.2D, v4.2D, v18.2D
			
 
				++        ld1             {v21.D}[0], [x0], x1
			
 
				++        uaddw           v23.8H, v23.8H, v19.8B
			
 
				++        uaddw2          v24.8H, v24.8H, v19.16B
			
 
				++        ld1             {v21.D}[1], [x0], x1
			
 
				++        sqxtun          v23.8B, v23.8H
			
 
				++        sqxtun2         v23.16B, v24.8H
			
 
				++        ld1             {v22.D}[0], [x0], x1
			
 
				++        uaddw           v24.8H, v25.8H, v20.8B
			
 
				++        uaddw2          v25.8H, v26.8H, v20.16B
			
 
				++        ld1             {v22.D}[1], [x0], x1
			
 
				++        sqxtun          v24.8B, v24.8H
			
 
				++        sqxtun2         v24.16B, v25.8H
			
 
				++        st1             {v23.D}[0], [x9], x1
			
 
				++        uaddw           v25.8H, v27.8H, v21.8B
			
 
				++        uaddw2          v26.8H, v28.8H, v21.16B
			
 
				++        st1             {v23.D}[1], [x9], x1
			
 
				++        sqxtun          v25.8B, v25.8H
			
 
				++        sqxtun2         v25.16B, v26.8H
			
 
				++        st1             {v24.D}[0], [x9], x1
			
 
				++        uaddw           v26.8H, v29.8H, v22.8B
			
 
				++        uaddw2          v27.8H, v30.8H, v22.16B
			
 
				++        st1             {v24.D}[1], [x9], x1
			
 
				++        sqxtun          v26.8B, v26.8H
			
 
				++        sqxtun2         v26.16B, v27.8H
			
 
				++        st1             {v25.D}[0], [x9], x1
			
 
				++        st1             {v25.D}[1], [x9], x1
			
 
				++        st1             {v26.D}[0], [x9], x1
			
 
				++        st1             {v26.D}[1], [x9], x1
			
 
				++
			
 
				++        idct_end
			
 
				++endfunc
			
 
				++
			
 
				++function ff_simple_idct_neon, export=1
			
 
				++        idct_start      x0
			
 
				++
			
 
				++        mov             x2,  x0
			
 
				++        idct_row4_neon  v24, v25, v26, v27, 1
			
 
				++        idct_row4_neon  v28, v29, v30, v31, 2
			
 
				++        sub             x2, x2, #128
			
 
				++        bl              idct_col4_neon1
			
 
				++
			
 
				++        sshr            v1.8H, v7.8H, #COL_SHIFT-16
			
 
				++        sshr            v2.8H, v16.8H, #COL_SHIFT-16
			
 
				++        sshr            v3.8H, v17.8H, #COL_SHIFT-16
			
 
				++        sshr            v4.8H, v18.8H, #COL_SHIFT-16
			
 
				++
			
 
				++        bl              idct_col4_neon2
			
 
				++
			
 
				++        sshr            v7.8H, v7.8H, #COL_SHIFT-16
			
 
				++        sshr            v16.8H, v16.8H, #COL_SHIFT-16
			
 
				++        sshr            v17.8H, v17.8H, #COL_SHIFT-16
			
 
				++        sshr            v18.8H, v18.8H, #COL_SHIFT-16
			
 
				++
			
 
				++        zip1            v23.2D, v1.2D, v7.2D
			
 
				++        zip2            v24.2D, v1.2D, v7.2D
			
 
				++        st1             {v23.2D,v24.2D}, [x2], #32
			
 
				++        zip1            v25.2D, v2.2D, v16.2D
			
 
				++        zip2            v26.2D, v2.2D, v16.2D
			
 
				++        st1             {v25.2D,v26.2D}, [x2], #32
			
 
				++        zip1            v27.2D, v3.2D, v17.2D
			
 
				++        zip2            v28.2D, v3.2D, v17.2D
			
 
				++        st1             {v27.2D,v28.2D}, [x2], #32
			
 
				++        zip1            v29.2D, v4.2D, v18.2D
			
 
				++        zip2            v30.2D, v4.2D, v18.2D
			
 
				++        st1             {v29.2D,v30.2D}, [x2], #32
			
 
				++
			
 
				++        idct_end
			
 
				++endfunc
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/vc1dsp_init_aarch64.c b/media/ffvpx/libavcodec/aarch64/vc1dsp_init_aarch64.c
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/vc1dsp_init_aarch64.c
			
 
				+@@ -0,0 +1,47 @@
			
 
				++/*
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include <stdint.h>
			
 
				++
			
 
				++#include "libavutil/attributes.h"
			
 
				++#include "libavutil/cpu.h"
			
 
				++#include "libavutil/aarch64/cpu.h"
			
 
				++#include "libavcodec/vc1dsp.h"
			
 
				++
			
 
				++#include "config.h"
			
 
				++
			
 
				++void ff_put_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
			
 
				++                                int h, int x, int y);
			
 
				++void ff_avg_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
			
 
				++                                int h, int x, int y);
			
 
				++void ff_put_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
			
 
				++                                int h, int x, int y);
			
 
				++void ff_avg_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
			
 
				++                                int h, int x, int y);
			
 
				++
			
 
				++av_cold void ff_vc1dsp_init_aarch64(VC1DSPContext *dsp)
			
 
				++{
			
 
				++    int cpu_flags = av_get_cpu_flags();
			
 
				++
			
 
				++    if (have_neon(cpu_flags)) {
			
 
				++        dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_neon;
			
 
				++        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_neon;
			
 
				++        dsp->put_no_rnd_vc1_chroma_pixels_tab[1] = ff_put_vc1_chroma_mc4_neon;
			
 
				++        dsp->avg_no_rnd_vc1_chroma_pixels_tab[1] = ff_avg_vc1_chroma_mc4_neon;
			
 
				++    }
			
 
				++}
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/videodsp.S b/media/ffvpx/libavcodec/aarch64/videodsp.S
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/videodsp.S
			
 
				+@@ -0,0 +1,28 @@
			
 
				++/*
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include "libavutil/aarch64/asm.S"
			
 
				++
			
 
				++function ff_prefetch_aarch64, export=1
			
 
				++        subs            w2,  w2,  #2
			
 
				++        prfm            pldl1strm, [x0]
			
 
				++        prfm            pldl1strm, [x0,  x1]
			
 
				++        add             x0,  x0,  x1,  lsl #1
			
 
				++        b.gt            X(ff_prefetch_aarch64)
			
 
				++        ret
			
 
				++endfunc
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/videodsp_init.c b/media/ffvpx/libavcodec/aarch64/videodsp_init.c
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/videodsp_init.c
			
 
				+@@ -0,0 +1,32 @@
			
 
				++/*
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include "libavutil/attributes.h"
			
 
				++#include "libavutil/cpu.h"
			
 
				++#include "libavutil/aarch64/cpu.h"
			
 
				++#include "libavcodec/videodsp.h"
			
 
				++
			
 
				++void ff_prefetch_aarch64(uint8_t *mem, ptrdiff_t stride, int h);
			
 
				++
			
 
				++av_cold void ff_videodsp_init_aarch64(VideoDSPContext *ctx, int bpc)
			
 
				++{
			
 
				++    int cpu_flags = av_get_cpu_flags();
			
 
				++
			
 
				++    if (have_armv8(cpu_flags))
			
 
				++        ctx->prefetch = ff_prefetch_aarch64;
			
 
				++}
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/vp9dsp_init.h b/media/ffvpx/libavcodec/aarch64/vp9dsp_init.h
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/vp9dsp_init.h
			
 
				+@@ -0,0 +1,29 @@
			
 
				++/*
			
 
				++ * Copyright (c) 2017 Google Inc.
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#ifndef AVCODEC_AARCH64_VP9DSP_INIT_H
			
 
				++#define AVCODEC_AARCH64_VP9DSP_INIT_H
			
 
				++
			
 
				++#include "libavcodec/vp9dsp.h"
			
 
				++
			
 
				++void ff_vp9dsp_init_10bpp_aarch64(VP9DSPContext *dsp);
			
 
				++void ff_vp9dsp_init_12bpp_aarch64(VP9DSPContext *dsp);
			
 
				++
			
 
				++#endif /* AVCODEC_AARCH64_VP9DSP_INIT_H */
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/vp9dsp_init_10bpp_aarch64.c b/media/ffvpx/libavcodec/aarch64/vp9dsp_init_10bpp_aarch64.c
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/vp9dsp_init_10bpp_aarch64.c
			
 
				+@@ -0,0 +1,23 @@
			
 
				++/*
			
 
				++ * Copyright (c) 2017 Google Inc.
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#define BPP 10
			
 
				++#define INIT_FUNC ff_vp9dsp_init_10bpp_aarch64
			
 
				++#include "vp9dsp_init_16bpp_aarch64_template.c"
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/vp9dsp_init_12bpp_aarch64.c b/media/ffvpx/libavcodec/aarch64/vp9dsp_init_12bpp_aarch64.c
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/vp9dsp_init_12bpp_aarch64.c
			
 
				+@@ -0,0 +1,23 @@
			
 
				++/*
			
 
				++ * Copyright (c) 2017 Google Inc.
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#define BPP 12
			
 
				++#define INIT_FUNC ff_vp9dsp_init_12bpp_aarch64
			
 
				++#include "vp9dsp_init_16bpp_aarch64_template.c"
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/vp9dsp_init_16bpp_aarch64_template.c b/media/ffvpx/libavcodec/aarch64/vp9dsp_init_16bpp_aarch64_template.c
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/vp9dsp_init_16bpp_aarch64_template.c
			
 
				+@@ -0,0 +1,273 @@
			
 
				++/*
			
 
				++ * Copyright (c) 2017 Google Inc.
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include <stdint.h>
			
 
				++
			
 
				++#include "libavutil/attributes.h"
			
 
				++#include "libavutil/internal.h"
			
 
				++#include "libavutil/aarch64/cpu.h"
			
 
				++#include "vp9dsp_init.h"
			
 
				++
			
 
				++#define declare_fpel(type, sz, suffix)                                          \
			
 
				++void ff_vp9_##type##sz##suffix##_neon(uint8_t *dst, ptrdiff_t dst_stride,       \
			
 
				++                                      const uint8_t *src, ptrdiff_t src_stride, \
			
 
				++                                      int h, int mx, int my)
			
 
				++
			
 
				++#define decl_mc_func(op, filter, dir, sz, bpp)                                                   \
			
 
				++void ff_vp9_##op##_##filter##sz##_##dir##_##bpp##_neon(uint8_t *dst, ptrdiff_t dst_stride,       \
			
 
				++                                                       const uint8_t *src, ptrdiff_t src_stride, \
			
 
				++                                                       int h, int mx, int my)
			
 
				++
			
 
				++#define define_8tap_2d_fn(op, filter, sz, bpp)                                      \
			
 
				++static void op##_##filter##sz##_hv_##bpp##_neon(uint8_t *dst, ptrdiff_t dst_stride, \
			
 
				++                                                const uint8_t *src,                 \
			
 
				++                                                ptrdiff_t src_stride,               \
			
 
				++                                                int h, int mx, int my)              \
			
 
				++{                                                                                   \
			
 
				++    LOCAL_ALIGNED_16(uint8_t, temp, [((1 + (sz < 64)) * sz + 8) * sz * 2]);         \
			
 
				++    /* We only need h + 7 lines, but the horizontal filter assumes an               \
			
 
				++     * even number of rows, so filter h + 8 lines here. */                          \
			
 
				++    ff_vp9_put_##filter##sz##_h_##bpp##_neon(temp, 2 * sz,                          \
			
 
				++                                             src - 3 * src_stride, src_stride,      \
			
 
				++                                             h + 8, mx, 0);                         \
			
 
				++    ff_vp9_##op##_##filter##sz##_v_##bpp##_neon(dst, dst_stride,                    \
			
 
				++                                                temp + 3 * 2 * sz, 2 * sz,          \
			
 
				++                                                h, 0, my);                          \
			
 
				++}
			
 
				++
			
 
				++#define decl_filter_funcs(op, dir, sz, bpp)  \
			
 
				++    decl_mc_func(op, regular, dir, sz, bpp); \
			
 
				++    decl_mc_func(op, sharp,   dir, sz, bpp); \
			
 
				++    decl_mc_func(op, smooth,  dir, sz, bpp)
			
 
				++
			
 
				++#define decl_mc_funcs(sz, bpp)           \
			
 
				++    decl_filter_funcs(put, h,  sz, bpp); \
			
 
				++    decl_filter_funcs(avg, h,  sz, bpp); \
			
 
				++    decl_filter_funcs(put, v,  sz, bpp); \
			
 
				++    decl_filter_funcs(avg, v,  sz, bpp); \
			
 
				++    decl_filter_funcs(put, hv, sz, bpp); \
			
 
				++    decl_filter_funcs(avg, hv, sz, bpp)
			
 
				++
			
 
				++#define ff_vp9_copy32_neon  ff_vp9_copy32_aarch64
			
 
				++#define ff_vp9_copy64_neon  ff_vp9_copy64_aarch64
			
 
				++#define ff_vp9_copy128_neon ff_vp9_copy128_aarch64
			
 
				++
			
 
				++declare_fpel(copy, 128, );
			
 
				++declare_fpel(copy, 64,  );
			
 
				++declare_fpel(copy, 32,  );
			
 
				++declare_fpel(copy, 16,  );
			
 
				++declare_fpel(copy, 8,   );
			
 
				++declare_fpel(avg, 64, _16);
			
 
				++declare_fpel(avg, 32, _16);
			
 
				++declare_fpel(avg, 16, _16);
			
 
				++declare_fpel(avg, 8,  _16);
			
 
				++declare_fpel(avg, 4,  _16);
			
 
				++
			
 
				++decl_mc_funcs(64, BPP);
			
 
				++decl_mc_funcs(32, BPP);
			
 
				++decl_mc_funcs(16, BPP);
			
 
				++decl_mc_funcs(8, BPP);
			
 
				++decl_mc_funcs(4, BPP);
			
 
				++
			
 
				++#define define_8tap_2d_funcs(sz, bpp)        \
			
 
				++    define_8tap_2d_fn(put, regular, sz, bpp) \
			
 
				++    define_8tap_2d_fn(put, sharp,   sz, bpp) \
			
 
				++    define_8tap_2d_fn(put, smooth,  sz, bpp) \
			
 
				++    define_8tap_2d_fn(avg, regular, sz, bpp) \
			
 
				++    define_8tap_2d_fn(avg, sharp,   sz, bpp) \
			
 
				++    define_8tap_2d_fn(avg, smooth,  sz, bpp)
			
 
				++
			
 
				++define_8tap_2d_funcs(64, BPP)
			
 
				++define_8tap_2d_funcs(32, BPP)
			
 
				++define_8tap_2d_funcs(16, BPP)
			
 
				++define_8tap_2d_funcs(8,  BPP)
			
 
				++define_8tap_2d_funcs(4,  BPP)
			
 
				++
			
 
				++static av_cold void vp9dsp_mc_init_aarch64(VP9DSPContext *dsp)
			
 
				++{
			
 
				++    int cpu_flags = av_get_cpu_flags();
			
 
				++
			
 
				++#define init_fpel(idx1, idx2, sz, type, suffix)      \
			
 
				++    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
			
 
				++    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
			
 
				++    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] = \
			
 
				++    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_vp9_##type##sz##suffix
			
 
				++
			
 
				++#define init_copy(idx, sz, suffix) \
			
 
				++    init_fpel(idx, 0, sz, copy, suffix)
			
 
				++
			
 
				++#define init_avg(idx, sz, suffix) \
			
 
				++    init_fpel(idx, 1, sz, avg,  suffix)
			
 
				++
			
 
				++#define init_copy_avg(idx, sz1, sz2) \
			
 
				++    init_copy(idx, sz2, _neon);      \
			
 
				++    init_avg (idx, sz1, _16_neon)
			
 
				++
			
 
				++    if (have_armv8(cpu_flags)) {
			
 
				++        init_copy(0, 128, _aarch64);
			
 
				++        init_copy(1, 64,  _aarch64);
			
 
				++        init_copy(2, 32,  _aarch64);
			
 
				++    }
			
 
				++
			
 
				++    if (have_neon(cpu_flags)) {
			
 
				++#define init_mc_func(idx1, idx2, op, filter, fname, dir, mx, my, sz, pfx, bpp) \
			
 
				++    dsp->mc[idx1][filter][idx2][mx][my] = pfx##op##_##fname##sz##_##dir##_##bpp##_neon
			
 
				++
			
 
				++#define init_mc_funcs(idx, dir, mx, my, sz, pfx, bpp)                                   \
			
 
				++    init_mc_func(idx, 0, put, FILTER_8TAP_REGULAR, regular, dir, mx, my, sz, pfx, bpp); \
			
 
				++    init_mc_func(idx, 0, put, FILTER_8TAP_SHARP,   sharp,   dir, mx, my, sz, pfx, bpp); \
			
 
				++    init_mc_func(idx, 0, put, FILTER_8TAP_SMOOTH,  smooth,  dir, mx, my, sz, pfx, bpp); \
			
 
				++    init_mc_func(idx, 1, avg, FILTER_8TAP_REGULAR, regular, dir, mx, my, sz, pfx, bpp); \
			
 
				++    init_mc_func(idx, 1, avg, FILTER_8TAP_SHARP,   sharp,   dir, mx, my, sz, pfx, bpp); \
			
 
				++    init_mc_func(idx, 1, avg, FILTER_8TAP_SMOOTH,  smooth,  dir, mx, my, sz, pfx, bpp)
			
 
				++
			
 
				++#define init_mc_funcs_dirs(idx, sz, bpp)            \
			
 
				++    init_mc_funcs(idx, v,  0, 1, sz, ff_vp9_, bpp); \
			
 
				++    init_mc_funcs(idx, h,  1, 0, sz, ff_vp9_, bpp); \
			
 
				++    init_mc_funcs(idx, hv, 1, 1, sz,        , bpp)
			
 
				++
			
 
				++
			
 
				++        init_avg(0, 64, _16_neon);
			
 
				++        init_avg(1, 32, _16_neon);
			
 
				++        init_avg(2, 16, _16_neon);
			
 
				++        init_copy_avg(3, 8, 16);
			
 
				++        init_copy_avg(4, 4, 8);
			
 
				++
			
 
				++        init_mc_funcs_dirs(0, 64, BPP);
			
 
				++        init_mc_funcs_dirs(1, 32, BPP);
			
 
				++        init_mc_funcs_dirs(2, 16, BPP);
			
 
				++        init_mc_funcs_dirs(3, 8,  BPP);
			
 
				++        init_mc_funcs_dirs(4, 4,  BPP);
			
 
				++    }
			
 
				++}
			
 
				++
			
 
				++#define define_itxfm2(type_a, type_b, sz, bpp)                                     \
			
 
				++void ff_vp9_##type_a##_##type_b##_##sz##x##sz##_add_##bpp##_neon(uint8_t *_dst,    \
			
 
				++                                                                 ptrdiff_t stride, \
			
 
				++                                                                 int16_t *_block, int eob)
			
 
				++#define define_itxfm(type_a, type_b, sz, bpp) define_itxfm2(type_a, type_b, sz, bpp)
			
 
				++
			
 
				++#define define_itxfm_funcs(sz, bpp)      \
			
 
				++    define_itxfm(idct,  idct,  sz, bpp); \
			
 
				++    define_itxfm(iadst, idct,  sz, bpp); \
			
 
				++    define_itxfm(idct,  iadst, sz, bpp); \
			
 
				++    define_itxfm(iadst, iadst, sz, bpp)
			
 
				++
			
 
				++define_itxfm_funcs(4,  BPP);
			
 
				++define_itxfm_funcs(8,  BPP);
			
 
				++define_itxfm_funcs(16, BPP);
			
 
				++define_itxfm(idct, idct, 32, BPP);
			
 
				++define_itxfm(iwht, iwht, 4,  BPP);
			
 
				++
			
 
				++
			
 
				++static av_cold void vp9dsp_itxfm_init_aarch64(VP9DSPContext *dsp)
			
 
				++{
			
 
				++    int cpu_flags = av_get_cpu_flags();
			
 
				++
			
 
				++    if (have_neon(cpu_flags)) {
			
 
				++#define init_itxfm2(tx, sz, bpp)                                               \
			
 
				++    dsp->itxfm_add[tx][DCT_DCT]   = ff_vp9_idct_idct_##sz##_add_##bpp##_neon;  \
			
 
				++    dsp->itxfm_add[tx][DCT_ADST]  = ff_vp9_iadst_idct_##sz##_add_##bpp##_neon; \
			
 
				++    dsp->itxfm_add[tx][ADST_DCT]  = ff_vp9_idct_iadst_##sz##_add_##bpp##_neon; \
			
 
				++    dsp->itxfm_add[tx][ADST_ADST] = ff_vp9_iadst_iadst_##sz##_add_##bpp##_neon
			
 
				++#define init_itxfm(tx, sz, bpp) init_itxfm2(tx, sz, bpp)
			
 
				++
			
 
				++#define init_idct2(tx, nm, bpp)     \
			
 
				++    dsp->itxfm_add[tx][DCT_DCT]   = \
			
 
				++    dsp->itxfm_add[tx][ADST_DCT]  = \
			
 
				++    dsp->itxfm_add[tx][DCT_ADST]  = \
			
 
				++    dsp->itxfm_add[tx][ADST_ADST] = ff_vp9_##nm##_add_##bpp##_neon
			
 
				++#define init_idct(tx, nm, bpp) init_idct2(tx, nm, bpp)
			
 
				++
			
 
				++        init_itxfm(TX_4X4,   4x4,   BPP);
			
 
				++        init_itxfm(TX_8X8,   8x8,   BPP);
			
 
				++        init_itxfm(TX_16X16, 16x16, BPP);
			
 
				++        init_idct(TX_32X32, idct_idct_32x32, BPP);
			
 
				++        init_idct(4,        iwht_iwht_4x4,   BPP);
			
 
				++    }
			
 
				++}
			
 
				++
			
 
				++#define define_loop_filter(dir, wd, size, bpp) \
			
 
				++void ff_vp9_loop_filter_##dir##_##wd##_##size##_##bpp##_neon(uint8_t *dst, ptrdiff_t stride, int E, int I, int H)
			
 
				++
			
 
				++#define define_loop_filters(wd, size, bpp) \
			
 
				++    define_loop_filter(h, wd, size, bpp);  \
			
 
				++    define_loop_filter(v, wd, size, bpp)
			
 
				++
			
 
				++define_loop_filters(4,  8,  BPP);
			
 
				++define_loop_filters(8,  8,  BPP);
			
 
				++define_loop_filters(16, 8,  BPP);
			
 
				++
			
 
				++define_loop_filters(16, 16, BPP);
			
 
				++
			
 
				++define_loop_filters(44, 16, BPP);
			
 
				++define_loop_filters(48, 16, BPP);
			
 
				++define_loop_filters(84, 16, BPP);
			
 
				++define_loop_filters(88, 16, BPP);
			
 
				++
			
 
				++static av_cold void vp9dsp_loopfilter_init_aarch64(VP9DSPContext *dsp)
			
 
				++{
			
 
				++    int cpu_flags = av_get_cpu_flags();
			
 
				++
			
 
				++    if (have_neon(cpu_flags)) {
			
 
				++#define init_lpf_func_8(idx1, idx2, dir, wd, bpp) \
			
 
				++    dsp->loop_filter_8[idx1][idx2] = ff_vp9_loop_filter_##dir##_##wd##_8_##bpp##_neon
			
 
				++
			
 
				++#define init_lpf_func_16(idx, dir, bpp) \
			
 
				++    dsp->loop_filter_16[idx] = ff_vp9_loop_filter_##dir##_16_16_##bpp##_neon
			
 
				++
			
 
				++#define init_lpf_func_mix2(idx1, idx2, idx3, dir, wd, bpp) \
			
 
				++    dsp->loop_filter_mix2[idx1][idx2][idx3] = ff_vp9_loop_filter_##dir##_##wd##_16_##bpp##_neon
			
 
				++
			
 
				++#define init_lpf_funcs_8_wd(idx, wd, bpp) \
			
 
				++    init_lpf_func_8(idx, 0, h, wd, bpp);  \
			
 
				++    init_lpf_func_8(idx, 1, v, wd, bpp)
			
 
				++
			
 
				++#define init_lpf_funcs_16(bpp)   \
			
 
				++    init_lpf_func_16(0, h, bpp); \
			
 
				++    init_lpf_func_16(1, v, bpp)
			
 
				++
			
 
				++#define init_lpf_funcs_mix2_wd(idx1, idx2, wd, bpp) \
			
 
				++    init_lpf_func_mix2(idx1, idx2, 0, h, wd, bpp);  \
			
 
				++    init_lpf_func_mix2(idx1, idx2, 1, v, wd, bpp)
			
 
				++
			
 
				++#define init_lpf_funcs_8(bpp)        \
			
 
				++    init_lpf_funcs_8_wd(0, 4,  bpp); \
			
 
				++    init_lpf_funcs_8_wd(1, 8,  bpp); \
			
 
				++    init_lpf_funcs_8_wd(2, 16, bpp)
			
 
				++
			
 
				++#define init_lpf_funcs_mix2(bpp)           \
			
 
				++    init_lpf_funcs_mix2_wd(0, 0, 44, bpp); \
			
 
				++    init_lpf_funcs_mix2_wd(0, 1, 48, bpp); \
			
 
				++    init_lpf_funcs_mix2_wd(1, 0, 84, bpp); \
			
 
				++    init_lpf_funcs_mix2_wd(1, 1, 88, bpp)
			
 
				++
			
 
				++        init_lpf_funcs_8(BPP);
			
 
				++        init_lpf_funcs_16(BPP);
			
 
				++        init_lpf_funcs_mix2(BPP);
			
 
				++    }
			
 
				++}
			
 
				++
			
 
				++av_cold void INIT_FUNC(VP9DSPContext *dsp)
			
 
				++{
			
 
				++    vp9dsp_mc_init_aarch64(dsp);
			
 
				++    vp9dsp_loopfilter_init_aarch64(dsp);
			
 
				++    vp9dsp_itxfm_init_aarch64(dsp);
			
 
				++}
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/vp9dsp_init_aarch64.c b/media/ffvpx/libavcodec/aarch64/vp9dsp_init_aarch64.c
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/vp9dsp_init_aarch64.c
			
 
				+@@ -0,0 +1,258 @@
			
 
				++/*
			
 
				++ * Copyright (c) 2016 Google Inc.
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include <stdint.h>
			
 
				++
			
 
				++#include "libavutil/attributes.h"
			
 
				++#include "libavutil/internal.h"
			
 
				++#include "libavutil/aarch64/cpu.h"
			
 
				++#include "libavcodec/vp9dsp.h"
			
 
				++#include "vp9dsp_init.h"
			
 
				++
			
 
				++#define declare_fpel(type, sz)                                          \
			
 
				++void ff_vp9_##type##sz##_neon(uint8_t *dst, ptrdiff_t dst_stride,       \
			
 
				++                              const uint8_t *src, ptrdiff_t src_stride, \
			
 
				++                              int h, int mx, int my)
			
 
				++
			
 
				++#define declare_copy_avg(sz) \
			
 
				++    declare_fpel(copy, sz);  \
			
 
				++    declare_fpel(avg , sz)
			
 
				++
			
 
				++#define decl_mc_func(op, filter, dir, sz)                                                \
			
 
				++void ff_vp9_##op##_##filter##sz##_##dir##_neon(uint8_t *dst, ptrdiff_t dst_stride,       \
			
 
				++                                               const uint8_t *src, ptrdiff_t src_stride, \
			
 
				++                                               int h, int mx, int my)
			
 
				++
			
 
				++#define define_8tap_2d_fn(op, filter, sz)                                         \
			
 
				++static void op##_##filter##sz##_hv_neon(uint8_t *dst, ptrdiff_t dst_stride,       \
			
 
				++                                        const uint8_t *src, ptrdiff_t src_stride, \
			
 
				++                                        int h, int mx, int my)                    \
			
 
				++{                                                                                 \
			
 
				++    LOCAL_ALIGNED_16(uint8_t, temp, [((1 + (sz < 64)) * sz + 8) * sz]);           \
			
 
				++    /* We only need h + 7 lines, but the horizontal filter assumes an             \
			
 
				++     * even number of rows, so filter h + 8 lines here. */                        \
			
 
				++    ff_vp9_put_##filter##sz##_h_neon(temp, sz,                                    \
			
 
				++                                     src - 3 * src_stride, src_stride,            \
			
 
				++                                     h + 8, mx, 0);                               \
			
 
				++    ff_vp9_##op##_##filter##sz##_v_neon(dst, dst_stride,                          \
			
 
				++                                        temp + 3 * sz, sz,                        \
			
 
				++                                        h, 0, my);                                \
			
 
				++}
			
 
				++
			
 
				++#define decl_filter_funcs(op, dir, sz)  \
			
 
				++    decl_mc_func(op, regular, dir, sz); \
			
 
				++    decl_mc_func(op, sharp,   dir, sz); \
			
 
				++    decl_mc_func(op, smooth,  dir, sz)
			
 
				++
			
 
				++#define decl_mc_funcs(sz)           \
			
 
				++    decl_filter_funcs(put, h,  sz); \
			
 
				++    decl_filter_funcs(avg, h,  sz); \
			
 
				++    decl_filter_funcs(put, v,  sz); \
			
 
				++    decl_filter_funcs(avg, v,  sz); \
			
 
				++    decl_filter_funcs(put, hv, sz); \
			
 
				++    decl_filter_funcs(avg, hv, sz)
			
 
				++
			
 
				++#define ff_vp9_copy32_neon ff_vp9_copy32_aarch64
			
 
				++#define ff_vp9_copy64_neon ff_vp9_copy64_aarch64
			
 
				++
			
 
				++declare_copy_avg(64);
			
 
				++declare_copy_avg(32);
			
 
				++declare_copy_avg(16);
			
 
				++declare_copy_avg(8);
			
 
				++declare_copy_avg(4);
			
 
				++
			
 
				++decl_mc_funcs(64);
			
 
				++decl_mc_funcs(32);
			
 
				++decl_mc_funcs(16);
			
 
				++decl_mc_funcs(8);
			
 
				++decl_mc_funcs(4);
			
 
				++
			
 
				++#define define_8tap_2d_funcs(sz)        \
			
 
				++    define_8tap_2d_fn(put, regular, sz) \
			
 
				++    define_8tap_2d_fn(put, sharp,   sz) \
			
 
				++    define_8tap_2d_fn(put, smooth,  sz) \
			
 
				++    define_8tap_2d_fn(avg, regular, sz) \
			
 
				++    define_8tap_2d_fn(avg, sharp,   sz) \
			
 
				++    define_8tap_2d_fn(avg, smooth,  sz)
			
 
				++
			
 
				++define_8tap_2d_funcs(64)
			
 
				++define_8tap_2d_funcs(32)
			
 
				++define_8tap_2d_funcs(16)
			
 
				++define_8tap_2d_funcs(8)
			
 
				++define_8tap_2d_funcs(4)
			
 
				++
			
 
				++static av_cold void vp9dsp_mc_init_aarch64(VP9DSPContext *dsp)
			
 
				++{
			
 
				++    int cpu_flags = av_get_cpu_flags();
			
 
				++
			
 
				++#define init_fpel(idx1, idx2, sz, type, suffix)      \
			
 
				++    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
			
 
				++    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
			
 
				++    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] = \
			
 
				++    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_vp9_##type##sz##suffix
			
 
				++
			
 
				++#define init_copy(idx, sz, suffix) \
			
 
				++    init_fpel(idx, 0, sz, copy, suffix)
			
 
				++
			
 
				++#define init_avg(idx, sz, suffix) \
			
 
				++    init_fpel(idx, 1, sz, avg,  suffix)
			
 
				++
			
 
				++#define init_copy_avg(idx, sz) \
			
 
				++    init_copy(idx, sz, _neon); \
			
 
				++    init_avg (idx, sz, _neon)
			
 
				++
			
 
				++    if (have_armv8(cpu_flags)) {
			
 
				++        init_copy(0, 64, _aarch64);
			
 
				++        init_copy(1, 32, _aarch64);
			
 
				++    }
			
 
				++
			
 
				++    if (have_neon(cpu_flags)) {
			
 
				++#define init_mc_func(idx1, idx2, op, filter, fname, dir, mx, my, sz, pfx) \
			
 
				++    dsp->mc[idx1][filter][idx2][mx][my] = pfx##op##_##fname##sz##_##dir##_neon
			
 
				++
			
 
				++#define init_mc_funcs(idx, dir, mx, my, sz, pfx)                                   \
			
 
				++    init_mc_func(idx, 0, put, FILTER_8TAP_REGULAR, regular, dir, mx, my, sz, pfx); \
			
 
				++    init_mc_func(idx, 0, put, FILTER_8TAP_SHARP,   sharp,   dir, mx, my, sz, pfx); \
			
 
				++    init_mc_func(idx, 0, put, FILTER_8TAP_SMOOTH,  smooth,  dir, mx, my, sz, pfx); \
			
 
				++    init_mc_func(idx, 1, avg, FILTER_8TAP_REGULAR, regular, dir, mx, my, sz, pfx); \
			
 
				++    init_mc_func(idx, 1, avg, FILTER_8TAP_SHARP,   sharp,   dir, mx, my, sz, pfx); \
			
 
				++    init_mc_func(idx, 1, avg, FILTER_8TAP_SMOOTH,  smooth,  dir, mx, my, sz, pfx)
			
 
				++
			
 
				++#define init_mc_funcs_dirs(idx, sz)            \
			
 
				++    init_mc_funcs(idx, h,  1, 0, sz, ff_vp9_); \
			
 
				++    init_mc_funcs(idx, v,  0, 1, sz, ff_vp9_); \
			
 
				++    init_mc_funcs(idx, hv, 1, 1, sz,)
			
 
				++
			
 
				++        init_avg(0, 64, _neon);
			
 
				++        init_avg(1, 32, _neon);
			
 
				++        init_copy_avg(2, 16);
			
 
				++        init_copy_avg(3, 8);
			
 
				++        init_copy_avg(4, 4);
			
 
				++
			
 
				++        init_mc_funcs_dirs(0, 64);
			
 
				++        init_mc_funcs_dirs(1, 32);
			
 
				++        init_mc_funcs_dirs(2, 16);
			
 
				++        init_mc_funcs_dirs(3, 8);
			
 
				++        init_mc_funcs_dirs(4, 4);
			
 
				++    }
			
 
				++}
			
 
				++
			
 
				++#define define_itxfm(type_a, type_b, sz)                                   \
			
 
				++void ff_vp9_##type_a##_##type_b##_##sz##x##sz##_add_neon(uint8_t *_dst,    \
			
 
				++                                                         ptrdiff_t stride, \
			
 
				++                                                         int16_t *_block, int eob)
			
 
				++
			
 
				++#define define_itxfm_funcs(sz)      \
			
 
				++    define_itxfm(idct,  idct,  sz); \
			
 
				++    define_itxfm(iadst, idct,  sz); \
			
 
				++    define_itxfm(idct,  iadst, sz); \
			
 
				++    define_itxfm(iadst, iadst, sz)
			
 
				++
			
 
				++define_itxfm_funcs(4);
			
 
				++define_itxfm_funcs(8);
			
 
				++define_itxfm_funcs(16);
			
 
				++define_itxfm(idct, idct, 32);
			
 
				++define_itxfm(iwht, iwht, 4);
			
 
				++
			
 
				++
			
 
				++static av_cold void vp9dsp_itxfm_init_aarch64(VP9DSPContext *dsp)
			
 
				++{
			
 
				++    int cpu_flags = av_get_cpu_flags();
			
 
				++
			
 
				++    if (have_neon(cpu_flags)) {
			
 
				++#define init_itxfm(tx, sz)                                             \
			
 
				++    dsp->itxfm_add[tx][DCT_DCT]   = ff_vp9_idct_idct_##sz##_add_neon;  \
			
 
				++    dsp->itxfm_add[tx][DCT_ADST]  = ff_vp9_iadst_idct_##sz##_add_neon; \
			
 
				++    dsp->itxfm_add[tx][ADST_DCT]  = ff_vp9_idct_iadst_##sz##_add_neon; \
			
 
				++    dsp->itxfm_add[tx][ADST_ADST] = ff_vp9_iadst_iadst_##sz##_add_neon
			
 
				++
			
 
				++#define init_idct(tx, nm)           \
			
 
				++    dsp->itxfm_add[tx][DCT_DCT]   = \
			
 
				++    dsp->itxfm_add[tx][ADST_DCT]  = \
			
 
				++    dsp->itxfm_add[tx][DCT_ADST]  = \
			
 
				++    dsp->itxfm_add[tx][ADST_ADST] = ff_vp9_##nm##_add_neon
			
 
				++
			
 
				++        init_itxfm(TX_4X4, 4x4);
			
 
				++        init_itxfm(TX_8X8, 8x8);
			
 
				++        init_itxfm(TX_16X16, 16x16);
			
 
				++        init_idct(TX_32X32, idct_idct_32x32);
			
 
				++        init_idct(4, iwht_iwht_4x4);
			
 
				++    }
			
 
				++}
			
 
				++
			
 
				++#define define_loop_filter(dir, wd, len) \
			
 
				++void ff_vp9_loop_filter_##dir##_##wd##_##len##_neon(uint8_t *dst, ptrdiff_t stride, int E, int I, int H)
			
 
				++
			
 
				++#define define_loop_filters(wd, len) \
			
 
				++    define_loop_filter(h, wd, len);  \
			
 
				++    define_loop_filter(v, wd, len)
			
 
				++
			
 
				++define_loop_filters(4, 8);
			
 
				++define_loop_filters(8, 8);
			
 
				++define_loop_filters(16, 8);
			
 
				++
			
 
				++define_loop_filters(16, 16);
			
 
				++
			
 
				++define_loop_filters(44, 16);
			
 
				++define_loop_filters(48, 16);
			
 
				++define_loop_filters(84, 16);
			
 
				++define_loop_filters(88, 16);
			
 
				++
			
 
				++static av_cold void vp9dsp_loopfilter_init_aarch64(VP9DSPContext *dsp)
			
 
				++{
			
 
				++    int cpu_flags = av_get_cpu_flags();
			
 
				++
			
 
				++    if (have_neon(cpu_flags)) {
			
 
				++        dsp->loop_filter_8[0][1] = ff_vp9_loop_filter_v_4_8_neon;
			
 
				++        dsp->loop_filter_8[0][0] = ff_vp9_loop_filter_h_4_8_neon;
			
 
				++        dsp->loop_filter_8[1][1] = ff_vp9_loop_filter_v_8_8_neon;
			
 
				++        dsp->loop_filter_8[1][0] = ff_vp9_loop_filter_h_8_8_neon;
			
 
				++        dsp->loop_filter_8[2][1] = ff_vp9_loop_filter_v_16_8_neon;
			
 
				++        dsp->loop_filter_8[2][0] = ff_vp9_loop_filter_h_16_8_neon;
			
 
				++
			
 
				++        dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_neon;
			
 
				++        dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_neon;
			
 
				++
			
 
				++        dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_neon;
			
 
				++        dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_neon;
			
 
				++        dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_neon;
			
 
				++        dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_neon;
			
 
				++        dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_neon;
			
 
				++        dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_neon;
			
 
				++        dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_neon;
			
 
				++        dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_neon;
			
 
				++    }
			
 
				++}
			
 
				++
			
 
				++av_cold void ff_vp9dsp_init_aarch64(VP9DSPContext *dsp, int bpp)
			
 
				++{
			
 
				++    if (bpp == 10) {
			
 
				++        ff_vp9dsp_init_10bpp_aarch64(dsp);
			
 
				++        return;
			
 
				++    } else if (bpp == 12) {
			
 
				++        ff_vp9dsp_init_12bpp_aarch64(dsp);
			
 
				++        return;
			
 
				++    } else if (bpp != 8)
			
 
				++        return;
			
 
				++
			
 
				++    vp9dsp_mc_init_aarch64(dsp);
			
 
				++    vp9dsp_loopfilter_init_aarch64(dsp);
			
 
				++    vp9dsp_itxfm_init_aarch64(dsp);
			
 
				++}
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/vp9itxfm_16bpp_neon.S b/media/ffvpx/libavcodec/aarch64/vp9itxfm_16bpp_neon.S
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/vp9itxfm_16bpp_neon.S
			
 
				+@@ -0,0 +1,2017 @@
			
 
				++/*
			
 
				++ * Copyright (c) 2017 Google Inc.
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include "libavutil/aarch64/asm.S"
			
 
				++#include "neon.S"
			
 
				++
			
 
				++const itxfm4_coeffs, align=4
			
 
				++        .short  11585, 0, 6270, 15137
			
 
				++iadst4_coeffs:
			
 
				++        .short  5283, 15212, 9929, 13377
			
 
				++endconst
			
 
				++
			
 
				++const iadst8_coeffs, align=4
			
 
				++        .short  16305, 1606, 14449, 7723, 10394, 12665, 4756, 15679
			
 
				++idct_coeffs:
			
 
				++        .short  11585, 0, 6270, 15137, 3196, 16069, 13623, 9102
			
 
				++        .short  1606, 16305, 12665, 10394, 7723, 14449, 15679, 4756
			
 
				++        .short  804, 16364, 12140, 11003, 7005, 14811, 15426, 5520
			
 
				++        .short  3981, 15893, 14053, 8423, 9760, 13160, 16207, 2404
			
 
				++endconst
			
 
				++
			
 
				++const iadst16_coeffs, align=4
			
 
				++        .short  16364, 804, 15893, 3981, 11003, 12140, 8423, 14053
			
 
				++        .short  14811, 7005, 13160, 9760, 5520, 15426, 2404, 16207
			
 
				++endconst
			
 
				++
			
 
				++.macro transpose_4x4s r0, r1, r2, r3, r4, r5, r6, r7
			
 
				++        trn1            \r4\().4s,  \r0\().4s,  \r1\().4s
			
 
				++        trn2            \r5\().4s,  \r0\().4s,  \r1\().4s
			
 
				++        trn1            \r6\().4s,  \r2\().4s,  \r3\().4s
			
 
				++        trn2            \r7\().4s,  \r2\().4s,  \r3\().4s
			
 
				++        trn1            \r0\().2d,  \r4\().2d,  \r6\().2d
			
 
				++        trn2            \r2\().2d,  \r4\().2d,  \r6\().2d
			
 
				++        trn1            \r1\().2d,  \r5\().2d,  \r7\().2d
			
 
				++        trn2            \r3\().2d,  \r5\().2d,  \r7\().2d
			
 
				++.endm
			
 
				++
			
 
				++// Transpose a 8x8 matrix of 32 bit elements, where each row is spread out
			
 
				++// over two registers.
			
 
				++.macro transpose_8x8s r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15, t0, t1, t2, t3
			
 
				++        transpose_4x4s  \r0,  \r2,  \r4,  \r6,  \t0, \t1, \t2, \t3
			
 
				++        transpose_4x4s  \r9,  \r11, \r13, \r15, \t0, \t1, \t2, \t3
			
 
				++
			
 
				++        // Do 4x4 transposes of r1,r3,r5,r7 and r8,r10,r12,r14
			
 
				++        // while swapping the two 4x4 matrices between each other
			
 
				++
			
 
				++        // First step of the 4x4 transpose of r1-r7, into t0-t3
			
 
				++        trn1            \t0\().4s,  \r1\().4s,  \r3\().4s
			
 
				++        trn2            \t1\().4s,  \r1\().4s,  \r3\().4s
			
 
				++        trn1            \t2\().4s,  \r5\().4s,  \r7\().4s
			
 
				++        trn2            \t3\().4s,  \r5\().4s,  \r7\().4s
			
 
				++
			
 
				++        // First step of the 4x4 transpose of r8-r12, into r1-r7
			
 
				++        trn1            \r1\().4s,  \r8\().4s,  \r10\().4s
			
 
				++        trn2            \r3\().4s,  \r8\().4s,  \r10\().4s
			
 
				++        trn1            \r5\().4s,  \r12\().4s, \r14\().4s
			
 
				++        trn2            \r7\().4s,  \r12\().4s, \r14\().4s
			
 
				++
			
 
				++        // Second step of the 4x4 transpose of r1-r7 (now in t0-r3), into r8-r12
			
 
				++        trn1            \r8\().2d,  \t0\().2d,  \t2\().2d
			
 
				++        trn2            \r12\().2d, \t0\().2d,  \t2\().2d
			
 
				++        trn1            \r10\().2d, \t1\().2d,  \t3\().2d
			
 
				++        trn2            \r14\().2d, \t1\().2d,  \t3\().2d
			
 
				++
			
 
				++        // Second step of the 4x4 transpose of r8-r12 (now in r1-r7), in place as far as possible
			
 
				++        trn1            \t0\().2d,  \r1\().2d,  \r5\().2d
			
 
				++        trn2            \r5\().2d,  \r1\().2d,  \r5\().2d
			
 
				++        trn1            \t1\().2d,  \r3\().2d,  \r7\().2d
			
 
				++        trn2            \r7\().2d,  \r3\().2d,  \r7\().2d
			
 
				++
			
 
				++        // Move the outputs of trn1 back in place
			
 
				++        mov             \r1\().16b,  \t0\().16b
			
 
				++        mov             \r3\().16b,  \t1\().16b
			
 
				++.endm
			
 
				++
			
 
				++// out1 = ((in1 + in2) * d0[0] + (1 << 13)) >> 14
			
 
				++// out2 = ((in1 - in2) * d0[0] + (1 << 13)) >> 14
			
 
				++// in/out are .4s registers; this can do with 4 temp registers, but is
			
 
				++// more efficient if 6 temp registers are available.
			
 
				++.macro dmbutterfly0 out1, out2, in1, in2, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, neg=0
			
 
				++.if \neg > 0
			
 
				++        neg             \tmp4\().4s, v0.4s
			
 
				++.endif
			
 
				++        add             \tmp1\().4s, \in1\().4s,  \in2\().4s
			
 
				++        sub             \tmp2\().4s, \in1\().4s,  \in2\().4s
			
 
				++.if \neg > 0
			
 
				++        smull           \tmp3\().2d, \tmp1\().2s, \tmp4\().s[0]
			
 
				++        smull2          \tmp4\().2d, \tmp1\().4s, \tmp4\().s[0]
			
 
				++.else
			
 
				++        smull           \tmp3\().2d, \tmp1\().2s, v0.s[0]
			
 
				++        smull2          \tmp4\().2d, \tmp1\().4s, v0.s[0]
			
 
				++.endif
			
 
				++.ifb \tmp5
			
 
				++        rshrn           \out1\().2s, \tmp3\().2d, #14
			
 
				++        rshrn2          \out1\().4s, \tmp4\().2d, #14
			
 
				++        smull           \tmp3\().2d, \tmp2\().2s, v0.s[0]
			
 
				++        smull2          \tmp4\().2d, \tmp2\().4s, v0.s[0]
			
 
				++        rshrn           \out2\().2s, \tmp3\().2d, #14
			
 
				++        rshrn2          \out2\().4s, \tmp4\().2d, #14
			
 
				++.else
			
 
				++        smull           \tmp5\().2d, \tmp2\().2s, v0.s[0]
			
 
				++        smull2          \tmp6\().2d, \tmp2\().4s, v0.s[0]
			
 
				++        rshrn           \out1\().2s, \tmp3\().2d, #14
			
 
				++        rshrn2          \out1\().4s, \tmp4\().2d, #14
			
 
				++        rshrn           \out2\().2s, \tmp5\().2d, #14
			
 
				++        rshrn2          \out2\().4s, \tmp6\().2d, #14
			
 
				++.endif
			
 
				++.endm
			
 
				++
			
 
				++// Same as dmbutterfly0 above, but treating the input in in2 as zero,
			
 
				++// writing the same output into both out1 and out2.
			
 
				++.macro dmbutterfly0_h out1, out2, in1, in2, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6
			
 
				++        smull           \tmp1\().2d, \in1\().2s,  v0.s[0]
			
 
				++        smull2          \tmp2\().2d, \in1\().4s,  v0.s[0]
			
 
				++        rshrn           \out1\().2s, \tmp1\().2d, #14
			
 
				++        rshrn2          \out1\().4s, \tmp2\().2d, #14
			
 
				++        rshrn           \out2\().2s, \tmp1\().2d, #14
			
 
				++        rshrn2          \out2\().4s, \tmp2\().2d, #14
			
 
				++.endm
			
 
				++
			
 
				++// out1,out2 = in1 * coef1 - in2 * coef2
			
 
				++// out3,out4 = in1 * coef2 + in2 * coef1
			
 
				++// out are 4 x .2d registers, in are 2 x .4s registers
			
 
				++.macro dmbutterfly_l out1, out2, out3, out4, in1, in2, coef1, coef2
			
 
				++        smull           \out1\().2d, \in1\().2s, \coef1
			
 
				++        smull2          \out2\().2d, \in1\().4s, \coef1
			
 
				++        smull           \out3\().2d, \in1\().2s, \coef2
			
 
				++        smull2          \out4\().2d, \in1\().4s, \coef2
			
 
				++        smlsl           \out1\().2d, \in2\().2s, \coef2
			
 
				++        smlsl2          \out2\().2d, \in2\().4s, \coef2
			
 
				++        smlal           \out3\().2d, \in2\().2s, \coef1
			
 
				++        smlal2          \out4\().2d, \in2\().4s, \coef1
			
 
				++.endm
			
 
				++
			
 
				++// inout1 = (inout1 * coef1 - inout2 * coef2 + (1 << 13)) >> 14
			
 
				++// inout2 = (inout1 * coef2 + inout2 * coef1 + (1 << 13)) >> 14
			
 
				++// inout are 2 x .4s registers
			
 
				++.macro dmbutterfly inout1, inout2, coef1, coef2, tmp1, tmp2, tmp3, tmp4, neg=0
			
 
				++        dmbutterfly_l   \tmp1, \tmp2, \tmp3, \tmp4, \inout1, \inout2, \coef1, \coef2
			
 
				++.if \neg > 0
			
 
				++        neg             \tmp3\().2d, \tmp3\().2d
			
 
				++        neg             \tmp4\().2d, \tmp4\().2d
			
 
				++.endif
			
 
				++        rshrn           \inout1\().2s, \tmp1\().2d,  #14
			
 
				++        rshrn2          \inout1\().4s, \tmp2\().2d,  #14
			
 
				++        rshrn           \inout2\().2s, \tmp3\().2d,  #14
			
 
				++        rshrn2          \inout2\().4s, \tmp4\().2d,  #14
			
 
				++.endm
			
 
				++
			
 
				++// Same as dmbutterfly above, but treating the input in inout2 as zero
			
 
				++.macro dmbutterfly_h1 inout1, inout2, coef1, coef2, tmp1, tmp2, tmp3, tmp4
			
 
				++        smull           \tmp1\().2d, \inout1\().2s, \coef1
			
 
				++        smull2          \tmp2\().2d, \inout1\().4s, \coef1
			
 
				++        smull           \tmp3\().2d, \inout1\().2s, \coef2
			
 
				++        smull2          \tmp4\().2d, \inout1\().4s, \coef2
			
 
				++        rshrn           \inout1\().2s, \tmp1\().2d, #14
			
 
				++        rshrn2          \inout1\().4s, \tmp2\().2d, #14
			
 
				++        rshrn           \inout2\().2s, \tmp3\().2d, #14
			
 
				++        rshrn2          \inout2\().4s, \tmp4\().2d, #14
			
 
				++.endm
			
 
				++
			
 
				++// Same as dmbutterfly above, but treating the input in inout1 as zero
			
 
				++.macro dmbutterfly_h2 inout1, inout2, coef1, coef2, tmp1, tmp2, tmp3, tmp4
			
 
				++        smull           \tmp1\().2d, \inout2\().2s, \coef2
			
 
				++        smull2          \tmp2\().2d, \inout2\().4s, \coef2
			
 
				++        smull           \tmp3\().2d, \inout2\().2s, \coef1
			
 
				++        smull2          \tmp4\().2d, \inout2\().4s, \coef1
			
 
				++        neg             \tmp1\().2d, \tmp1\().2d
			
 
				++        neg             \tmp2\().2d, \tmp2\().2d
			
 
				++        rshrn           \inout2\().2s, \tmp3\().2d, #14
			
 
				++        rshrn2          \inout2\().4s, \tmp4\().2d, #14
			
 
				++        rshrn           \inout1\().2s, \tmp1\().2d, #14
			
 
				++        rshrn2          \inout1\().4s, \tmp2\().2d, #14
			
 
				++.endm
			
 
				++
			
 
				++.macro dsmull_h out1, out2, in, coef
			
 
				++        smull           \out1\().2d, \in\().2s, \coef
			
 
				++        smull2          \out2\().2d, \in\().4s, \coef
			
 
				++.endm
			
 
				++
			
 
				++.macro drshrn_h out, in1, in2, shift
			
 
				++        rshrn           \out\().2s, \in1\().2d, \shift
			
 
				++        rshrn2          \out\().4s, \in2\().2d, \shift
			
 
				++.endm
			
 
				++
			
 
				++
			
 
				++// out1 = in1 + in2
			
 
				++// out2 = in1 - in2
			
 
				++.macro butterfly_4s out1, out2, in1, in2
			
 
				++        add             \out1\().4s, \in1\().4s, \in2\().4s
			
 
				++        sub             \out2\().4s, \in1\().4s, \in2\().4s
			
 
				++.endm
			
 
				++
			
 
				++// out1 = in1 - in2
			
 
				++// out2 = in1 + in2
			
 
				++.macro butterfly_4s_r out1, out2, in1, in2
			
 
				++        sub             \out1\().4s, \in1\().4s, \in2\().4s
			
 
				++        add             \out2\().4s, \in1\().4s, \in2\().4s
			
 
				++.endm
			
 
				++
			
 
				++// out1 = (in1,in2 + in3,in4 + (1 << 13)) >> 14
			
 
				++// out2 = (in1,in2 - in3,in4 + (1 << 13)) >> 14
			
 
				++// out are 2 x .4s registers, in are 4 x .2d registers
			
 
				++.macro dbutterfly_n out1, out2, in1, in2, in3, in4, tmp1, tmp2, tmp3, tmp4
			
 
				++        add             \tmp1\().2d, \in1\().2d, \in3\().2d
			
 
				++        add             \tmp2\().2d, \in2\().2d, \in4\().2d
			
 
				++        sub             \tmp3\().2d, \in1\().2d, \in3\().2d
			
 
				++        sub             \tmp4\().2d, \in2\().2d, \in4\().2d
			
 
				++        rshrn           \out1\().2s, \tmp1\().2d,  #14
			
 
				++        rshrn2          \out1\().4s, \tmp2\().2d,  #14
			
 
				++        rshrn           \out2\().2s, \tmp3\().2d,  #14
			
 
				++        rshrn2          \out2\().4s, \tmp4\().2d,  #14
			
 
				++.endm
			
 
				++
			
 
				++.macro iwht4_10 c0, c1, c2, c3
			
 
				++        add             \c0\().4s, \c0\().4s, \c1\().4s
			
 
				++        sub             v17.4s,    \c2\().4s, \c3\().4s
			
 
				++        sub             v16.4s,    \c0\().4s, v17.4s
			
 
				++        sshr            v16.4s,    v16.4s,    #1
			
 
				++        sub             \c2\().4s, v16.4s,    \c1\().4s
			
 
				++        sub             \c1\().4s, v16.4s,    \c3\().4s
			
 
				++        add             \c3\().4s, v17.4s,    \c2\().4s
			
 
				++        sub             \c0\().4s, \c0\().4s, \c1\().4s
			
 
				++.endm
			
 
				++
			
 
				++.macro iwht4_12 c0, c1, c2, c3
			
 
				++        iwht4_10        \c0, \c1, \c2, \c3
			
 
				++.endm
			
 
				++
			
 
				++.macro idct4_10 c0, c1, c2, c3
			
 
				++        mul             v22.4s,    \c1\().4s, v0.s[3]
			
 
				++        mul             v20.4s,    \c1\().4s, v0.s[2]
			
 
				++        add             v16.4s,    \c0\().4s, \c2\().4s
			
 
				++        sub             v17.4s,    \c0\().4s, \c2\().4s
			
 
				++        mla             v22.4s,    \c3\().4s, v0.s[2]
			
 
				++        mul             v18.4s,    v16.4s,    v0.s[0]
			
 
				++        mul             v24.4s,    v17.4s,    v0.s[0]
			
 
				++        mls             v20.4s,    \c3\().4s, v0.s[3]
			
 
				++        srshr           v22.4s,    v22.4s,    #14
			
 
				++        srshr           v18.4s,    v18.4s,    #14
			
 
				++        srshr           v24.4s,    v24.4s,    #14
			
 
				++        srshr           v20.4s,    v20.4s,    #14
			
 
				++        add             \c0\().4s, v18.4s,    v22.4s
			
 
				++        sub             \c3\().4s, v18.4s,    v22.4s
			
 
				++        add             \c1\().4s, v24.4s,    v20.4s
			
 
				++        sub             \c2\().4s, v24.4s,    v20.4s
			
 
				++.endm
			
 
				++
			
 
				++.macro idct4_12 c0, c1, c2, c3
			
 
				++        smull           v22.2d,    \c1\().2s, v0.s[3]
			
 
				++        smull2          v23.2d,    \c1\().4s, v0.s[3]
			
 
				++        smull           v20.2d,    \c1\().2s, v0.s[2]
			
 
				++        smull2          v21.2d,    \c1\().4s, v0.s[2]
			
 
				++        add             v16.4s,    \c0\().4s, \c2\().4s
			
 
				++        sub             v17.4s,    \c0\().4s, \c2\().4s
			
 
				++        smlal           v22.2d,    \c3\().2s, v0.s[2]
			
 
				++        smlal2          v23.2d,    \c3\().4s, v0.s[2]
			
 
				++        smull           v18.2d,    v16.2s,    v0.s[0]
			
 
				++        smull2          v19.2d,    v16.4s,    v0.s[0]
			
 
				++        smull           v24.2d,    v17.2s,    v0.s[0]
			
 
				++        smull2          v25.2d,    v17.4s,    v0.s[0]
			
 
				++        smlsl           v20.2d,    \c3\().2s, v0.s[3]
			
 
				++        smlsl2          v21.2d,    \c3\().4s, v0.s[3]
			
 
				++        rshrn           v22.2s,    v22.2d,    #14
			
 
				++        rshrn2          v22.4s,    v23.2d,    #14
			
 
				++        rshrn           v18.2s,    v18.2d,    #14
			
 
				++        rshrn2          v18.4s,    v19.2d,    #14
			
 
				++        rshrn           v24.2s,    v24.2d,    #14
			
 
				++        rshrn2          v24.4s,    v25.2d,    #14
			
 
				++        rshrn           v20.2s,    v20.2d,    #14
			
 
				++        rshrn2          v20.4s,    v21.2d,    #14
			
 
				++        add             \c0\().4s, v18.4s,    v22.4s
			
 
				++        sub             \c3\().4s, v18.4s,    v22.4s
			
 
				++        add             \c1\().4s, v24.4s,    v20.4s
			
 
				++        sub             \c2\().4s, v24.4s,    v20.4s
			
 
				++.endm
			
 
				++
			
 
				++.macro iadst4_10 c0, c1, c2, c3
			
 
				++        mul             v16.4s,    \c0\().4s, v1.s[0]
			
 
				++        mla             v16.4s,    \c2\().4s, v1.s[1]
			
 
				++        mla             v16.4s,    \c3\().4s, v1.s[2]
			
 
				++        mul             v18.4s,    \c0\().4s, v1.s[2]
			
 
				++        mls             v18.4s,    \c2\().4s, v1.s[0]
			
 
				++        sub             \c0\().4s, \c0\().4s, \c2\().4s
			
 
				++        mls             v18.4s,    \c3\().4s, v1.s[1]
			
 
				++        add             \c0\().4s, \c0\().4s, \c3\().4s
			
 
				++        mul             v22.4s,    \c1\().4s, v1.s[3]
			
 
				++        mul             v20.4s,    \c0\().4s, v1.s[3]
			
 
				++        add             v24.4s,    v16.4s,    v22.4s
			
 
				++        add             v26.4s,    v18.4s,    v22.4s
			
 
				++        srshr           \c0\().4s, v24.4s,    #14
			
 
				++        add             v16.4s,    v16.4s,    v18.4s
			
 
				++        srshr           \c1\().4s, v26.4s,    #14
			
 
				++        sub             v16.4s,    v16.4s,    v22.4s
			
 
				++        srshr           \c2\().4s, v20.4s,    #14
			
 
				++        srshr           \c3\().4s, v16.4s,    #14
			
 
				++.endm
			
 
				++
			
 
				++.macro iadst4_12 c0, c1, c2, c3
			
 
				++        smull           v16.2d,    \c0\().2s, v1.s[0]
			
 
				++        smull2          v17.2d,    \c0\().4s, v1.s[0]
			
 
				++        smlal           v16.2d,    \c2\().2s, v1.s[1]
			
 
				++        smlal2          v17.2d,    \c2\().4s, v1.s[1]
			
 
				++        smlal           v16.2d,    \c3\().2s, v1.s[2]
			
 
				++        smlal2          v17.2d,    \c3\().4s, v1.s[2]
			
 
				++        smull           v18.2d,    \c0\().2s, v1.s[2]
			
 
				++        smull2          v19.2d,    \c0\().4s, v1.s[2]
			
 
				++        smlsl           v18.2d,    \c2\().2s, v1.s[0]
			
 
				++        smlsl2          v19.2d,    \c2\().4s, v1.s[0]
			
 
				++        sub             \c0\().4s, \c0\().4s, \c2\().4s
			
 
				++        smlsl           v18.2d,    \c3\().2s, v1.s[1]
			
 
				++        smlsl2          v19.2d,    \c3\().4s, v1.s[1]
			
 
				++        add             \c0\().4s, \c0\().4s, \c3\().4s
			
 
				++        smull           v22.2d,    \c1\().2s, v1.s[3]
			
 
				++        smull2          v23.2d,    \c1\().4s, v1.s[3]
			
 
				++        smull           v20.2d,    \c0\().2s, v1.s[3]
			
 
				++        smull2          v21.2d,    \c0\().4s, v1.s[3]
			
 
				++        add             v24.2d,    v16.2d,    v22.2d
			
 
				++        add             v25.2d,    v17.2d,    v23.2d
			
 
				++        add             v26.2d,    v18.2d,    v22.2d
			
 
				++        add             v27.2d,    v19.2d,    v23.2d
			
 
				++        rshrn           \c0\().2s, v24.2d,    #14
			
 
				++        rshrn2          \c0\().4s, v25.2d,    #14
			
 
				++        add             v16.2d,    v16.2d,    v18.2d
			
 
				++        add             v17.2d,    v17.2d,    v19.2d
			
 
				++        rshrn           \c1\().2s, v26.2d,    #14
			
 
				++        rshrn2          \c1\().4s, v27.2d,    #14
			
 
				++        sub             v16.2d,    v16.2d,    v22.2d
			
 
				++        sub             v17.2d,    v17.2d,    v23.2d
			
 
				++        rshrn           \c2\().2s, v20.2d,    #14
			
 
				++        rshrn2          \c2\().4s, v21.2d,    #14
			
 
				++        rshrn           \c3\().2s, v16.2d,    #14
			
 
				++        rshrn2          \c3\().4s, v17.2d,    #14
			
 
				++.endm
			
 
				++
			
 
				++// The public functions in this file have got the following signature:
			
 
				++// void itxfm_add(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
			
 
				++
			
 
				++.macro itxfm_func4x4 txfm1, txfm2, bpp
			
 
				++function ff_vp9_\txfm1\()_\txfm2\()_4x4_add_\bpp\()_neon, export=1
			
 
				++.ifc \txfm1,\txfm2
			
 
				++.ifc \txfm1,idct
			
 
				++        movrel          x4,  itxfm4_coeffs
			
 
				++        ld1             {v0.4h}, [x4]
			
 
				++        sxtl            v0.4s,  v0.4h
			
 
				++.endif
			
 
				++.ifc \txfm1,iadst
			
 
				++        movrel          x4,  iadst4_coeffs
			
 
				++        ld1             {v0.d}[1], [x4]
			
 
				++        sxtl2           v1.4s,  v0.8h
			
 
				++.endif
			
 
				++.else
			
 
				++        movrel          x4,  itxfm4_coeffs
			
 
				++        ld1             {v0.8h}, [x4]
			
 
				++        sxtl2           v1.4s,  v0.8h
			
 
				++        sxtl            v0.4s,  v0.4h
			
 
				++.endif
			
 
				++
			
 
				++        movi            v30.4s, #0
			
 
				++        movi            v31.4s, #0
			
 
				++.ifc \txfm1\()_\txfm2,idct_idct
			
 
				++        cmp             w3,  #1
			
 
				++        b.ne            1f
			
 
				++        // DC-only for idct/idct
			
 
				++        ld1             {v2.s}[0],  [x2]
			
 
				++        smull           v2.2d,  v2.2s, v0.s[0]
			
 
				++        rshrn           v2.2s,  v2.2d, #14
			
 
				++        smull           v2.2d,  v2.2s, v0.s[0]
			
 
				++        rshrn           v2.2s,  v2.2d, #14
			
 
				++        st1             {v31.s}[0], [x2]
			
 
				++        dup             v4.4s,  v2.s[0]
			
 
				++        mov             v5.16b, v4.16b
			
 
				++        mov             v6.16b, v4.16b
			
 
				++        mov             v7.16b, v4.16b
			
 
				++        b               2f
			
 
				++.endif
			
 
				++
			
 
				++1:
			
 
				++        ld1             {v4.4s,v5.4s,v6.4s,v7.4s},  [x2]
			
 
				++        st1             {v30.4s,v31.4s}, [x2], #32
			
 
				++
			
 
				++.ifc \txfm1,iwht
			
 
				++        sshr            v4.4s,  v4.4s,  #2
			
 
				++        sshr            v5.4s,  v5.4s,  #2
			
 
				++        sshr            v6.4s,  v6.4s,  #2
			
 
				++        sshr            v7.4s,  v7.4s,  #2
			
 
				++.endif
			
 
				++
			
 
				++        \txfm1\()4_\bpp v4,  v5,  v6,  v7
			
 
				++
			
 
				++        st1             {v30.4s,v31.4s}, [x2], #32
			
 
				++        // Transpose 4x4 with 32 bit elements
			
 
				++        transpose_4x4s  v4,  v5,  v6,  v7,  v16, v17, v18, v19
			
 
				++
			
 
				++        \txfm2\()4_\bpp v4,  v5,  v6,  v7
			
 
				++2:
			
 
				++        mvni            v31.8h, #((0xff << (\bpp - 8)) & 0xff), lsl #8
			
 
				++        ld1             {v0.4h},   [x0], x1
			
 
				++        ld1             {v1.4h},   [x0], x1
			
 
				++.ifnc \txfm1,iwht
			
 
				++        srshr           v4.4s,  v4.4s,  #4
			
 
				++        srshr           v5.4s,  v5.4s,  #4
			
 
				++        srshr           v6.4s,  v6.4s,  #4
			
 
				++        srshr           v7.4s,  v7.4s,  #4
			
 
				++.endif
			
 
				++        uaddw           v4.4s,  v4.4s,  v0.4h
			
 
				++        uaddw           v5.4s,  v5.4s,  v1.4h
			
 
				++        ld1             {v2.4h},   [x0], x1
			
 
				++        ld1             {v3.4h},   [x0], x1
			
 
				++        sqxtun          v0.4h,  v4.4s
			
 
				++        sqxtun2         v0.8h,  v5.4s
			
 
				++        sub             x0,  x0,  x1, lsl #2
			
 
				++
			
 
				++        uaddw           v6.4s,  v6.4s,  v2.4h
			
 
				++        umin            v0.8h,  v0.8h,  v31.8h
			
 
				++        uaddw           v7.4s,  v7.4s,  v3.4h
			
 
				++        st1             {v0.4h},   [x0], x1
			
 
				++        sqxtun          v2.4h,  v6.4s
			
 
				++        sqxtun2         v2.8h,  v7.4s
			
 
				++        umin            v2.8h,  v2.8h,  v31.8h
			
 
				++
			
 
				++        st1             {v0.d}[1], [x0], x1
			
 
				++        st1             {v2.4h},   [x0], x1
			
 
				++        st1             {v2.d}[1], [x0], x1
			
 
				++
			
 
				++        ret
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++.macro itxfm_funcs4x4 bpp
			
 
				++itxfm_func4x4 idct,  idct,  \bpp
			
 
				++itxfm_func4x4 iadst, idct,  \bpp
			
 
				++itxfm_func4x4 idct,  iadst, \bpp
			
 
				++itxfm_func4x4 iadst, iadst, \bpp
			
 
				++itxfm_func4x4 iwht,  iwht,  \bpp
			
 
				++.endm
			
 
				++
			
 
				++itxfm_funcs4x4 10
			
 
				++itxfm_funcs4x4 12
			
 
				++
			
 
				++function idct8x8_dc_add_neon
			
 
				++        movrel          x4,  idct_coeffs
			
 
				++        ld1             {v0.4h}, [x4]
			
 
				++
			
 
				++        movi            v1.4h,  #0
			
 
				++        sxtl            v0.4s,  v0.4h
			
 
				++
			
 
				++        ld1             {v2.s}[0],  [x2]
			
 
				++        smull           v2.2d,  v2.2s,  v0.s[0]
			
 
				++        rshrn           v2.2s,  v2.2d,  #14
			
 
				++        smull           v2.2d,  v2.2s,  v0.s[0]
			
 
				++        rshrn           v2.2s,  v2.2d,  #14
			
 
				++        st1             {v1.s}[0],  [x2]
			
 
				++        dup             v2.4s,  v2.s[0]
			
 
				++
			
 
				++        srshr           v2.4s,  v2.4s,  #5
			
 
				++
			
 
				++        mov             x4,  #8
			
 
				++        mov             x3,  x0
			
 
				++        dup             v31.8h, w5
			
 
				++1:
			
 
				++        // Loop to add the constant from v2 into all 8x8 outputs
			
 
				++        subs            x4,  x4,  #2
			
 
				++        ld1             {v3.8h},  [x0], x1
			
 
				++        ld1             {v4.8h},  [x0], x1
			
 
				++        uaddw           v16.4s, v2.4s,  v3.4h
			
 
				++        uaddw2          v17.4s, v2.4s,  v3.8h
			
 
				++        uaddw           v18.4s, v2.4s,  v4.4h
			
 
				++        uaddw2          v19.4s, v2.4s,  v4.8h
			
 
				++        sqxtun          v3.4h,  v16.4s
			
 
				++        sqxtun2         v3.8h,  v17.4s
			
 
				++        sqxtun          v4.4h,  v18.4s
			
 
				++        sqxtun2         v4.8h,  v19.4s
			
 
				++        umin            v3.8h,  v3.8h,  v31.8h
			
 
				++        umin            v4.8h,  v4.8h,  v31.8h
			
 
				++        st1             {v3.8h},  [x3], x1
			
 
				++        st1             {v4.8h},  [x3], x1
			
 
				++        b.ne            1b
			
 
				++
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++.macro idct8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1, t2, t3, t4, t5
			
 
				++        dmbutterfly0    \r0, \r4, \r0, \r4, \t0, \t1, \t2, \t3, \t4, \t5 // r0 = t0a, r4 = t1a
			
 
				++        dmbutterfly     \r2, \r6, v0.s[2], v0.s[3], \t0, \t1, \t2, \t3   // r2 = t2a, r6 = t3a
			
 
				++        dmbutterfly     \r1, \r7, v1.s[0], v1.s[1], \t0, \t1, \t2, \t3   // r1 = t4a, r7 = t7a
			
 
				++        dmbutterfly     \r5, \r3, v1.s[2], v1.s[3], \t0, \t1, \t2, \t3   // r5 = t5a, r3 = t6a
			
 
				++
			
 
				++        butterfly_4s    \t0, \t1, \r0, \r6 // t0 = t0, t1 = t3
			
 
				++        butterfly_4s    \t2, \r5, \r1, \r5 // t2 = t4, r5 = t5a
			
 
				++        butterfly_4s    \t3, \r6, \r7, \r3 // t3 = t7, r6 = t6a
			
 
				++        butterfly_4s    \r7, \r4, \r4, \r2 // r7 = t1, r4 = t2
			
 
				++
			
 
				++        dmbutterfly0    \r6, \r5, \r6, \r5, \r0, \r1, \r2, \r3, \t4, \t5 // r6 = t6, r5 = t5
			
 
				++
			
 
				++        butterfly_4s    \r1, \r6, \r7, \r6 // r1 = out[1], r6 = out[6]
			
 
				++        butterfly_4s    \r0, \r7, \t0, \t3 // r0 = out[0], r7 = out[7]
			
 
				++        butterfly_4s    \r2, \r5, \r4, \r5 // r2 = out[2], r5 = out[5]
			
 
				++        butterfly_4s    \r3, \r4, \t1, \t2 // r3 = out[3], r4 = out[4]
			
 
				++.endm
			
 
				++
			
 
				++.macro iadst8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1, t2, t3, t4, t5
			
 
				++        dmbutterfly_l   \t2, \t3, \t0, \t1, \r7, \r0, v2.s[1], v2.s[0]   // t2,t3 = t1a, t0,t1 = t0a
			
 
				++        dmbutterfly_l   \r0, \r7, \t4, \t5, \r3, \r4, v3.s[1], v3.s[0]   // r0,r7 = t5a, t4,t5 = t4a
			
 
				++
			
 
				++        dbutterfly_n    \r3, \t0, \t0, \t1, \t4, \t5, \r3, \r4, \t0, \t1 // r3 = t0, t0 = t4
			
 
				++        dbutterfly_n    \r4, \t1, \t2, \t3, \r0, \r7, \r4, \t1, \t4, \t5 // r4 = t1, t1 = t5
			
 
				++
			
 
				++        dmbutterfly_l   \t4, \t5, \t2, \t3, \r5, \r2, v2.s[3], v2.s[2]   // t4,t5 = t3a, t2,t3 = t2a
			
 
				++        dmbutterfly_l   \r2, \r5, \r0, \r7, \r1, \r6, v3.s[3], v3.s[2]   // r2,r5 = t7a, r0,r7 = t6a
			
 
				++
			
 
				++        dbutterfly_n    \r1, \t2, \t2, \t3, \r0, \r7, \r1, \r6, \t2, \t3 // r1 = t2, t2 = t6
			
 
				++        dbutterfly_n    \r0, \t4, \t4, \t5, \r2, \r5, \r0, \r7, \t4, \t5 // r0 = t3, t4 = t7
			
 
				++
			
 
				++        butterfly_4s    \r7, \r4, \r4, \r0   // r7 = -out[7], r4 = t3
			
 
				++        neg             \r7\().4s, \r7\().4s // r7 = out[7]
			
 
				++        butterfly_4s    \r0, \r1, \r3, \r1   // r0 = out[0],  r1 = t2
			
 
				++
			
 
				++        dmbutterfly_l   \r2, \r3, \t3, \t5, \t0, \t1, v0.s[2], v0.s[3]   // r2,r3 = t5a, t3,t5 = t4a
			
 
				++        dmbutterfly_l   \t0, \t1, \r5, \r6, \t4, \t2, v0.s[3], v0.s[2]   // t0,t1 = t6a, r5,r6 = t7a
			
 
				++
			
 
				++        dbutterfly_n    \r6, \t2, \r2, \r3, \r5, \r6, \t2, \t4, \r2, \r3 // r6 = out[6],  t2 = t7
			
 
				++
			
 
				++        dmbutterfly0    \r3, \r4, \r1, \r4, \t4, \r5, \r1, \r2           // r3 = -out[3], r4 = out[4]
			
 
				++        neg             \r3\().4s, \r3\().4s  // r3 = out[3]
			
 
				++
			
 
				++        dbutterfly_n    \r1, \t0, \t3, \t5, \t0, \t1, \r1, \r2, \t0, \t1 // r1 = -out[1], t0 = t6
			
 
				++        neg             \r1\().4s, \r1\().4s  // r1 = out[1]
			
 
				++
			
 
				++        dmbutterfly0    \r2, \r5, \t0, \t2, \t1, \t3, \t4, \t5           // r2 = out[2],  r5 = -out[5]
			
 
				++        neg             \r5\().4s, \r5\().4s  // r5 = out[5]
			
 
				++.endm
			
 
				++
			
 
				++
			
 
				++.macro itxfm_func8x8 txfm1, txfm2
			
 
				++function vp9_\txfm1\()_\txfm2\()_8x8_add_16_neon
			
 
				++.ifc \txfm1\()_\txfm2,idct_idct
			
 
				++        cmp             w3,  #1
			
 
				++        b.eq            idct8x8_dc_add_neon
			
 
				++.endif
			
 
				++        // The iadst also uses a few coefficients from
			
 
				++        // idct, so those always need to be loaded.
			
 
				++.ifc \txfm1\()_\txfm2,idct_idct
			
 
				++        movrel          x4,  idct_coeffs
			
 
				++.else
			
 
				++        movrel          x4,  iadst8_coeffs
			
 
				++        ld1             {v1.8h}, [x4], #16
			
 
				++        stp             d8,  d9,  [sp, #-0x10]!
			
 
				++        sxtl2           v3.4s,  v1.8h
			
 
				++        sxtl            v2.4s,  v1.4h
			
 
				++.endif
			
 
				++        ld1             {v0.8h}, [x4]
			
 
				++        sxtl2           v1.4s,  v0.8h
			
 
				++        sxtl            v0.4s,  v0.4h
			
 
				++
			
 
				++        movi            v4.4s, #0
			
 
				++        movi            v5.4s, #0
			
 
				++        movi            v6.4s, #0
			
 
				++        movi            v7.4s, #0
			
 
				++
			
 
				++1:
			
 
				++        ld1             {v16.4s,v17.4s,v18.4s,v19.4s},  [x2], #64
			
 
				++        ld1             {v20.4s,v21.4s,v22.4s,v23.4s},  [x2], #64
			
 
				++        ld1             {v24.4s,v25.4s,v26.4s,v27.4s},  [x2], #64
			
 
				++        ld1             {v28.4s,v29.4s,v30.4s,v31.4s},  [x2], #64
			
 
				++        sub             x2,  x2,  #256
			
 
				++        st1             {v4.4s,v5.4s,v6.4s,v7.4s},      [x2], #64
			
 
				++        st1             {v4.4s,v5.4s,v6.4s,v7.4s},      [x2], #64
			
 
				++        st1             {v4.4s,v5.4s,v6.4s,v7.4s},      [x2], #64
			
 
				++        st1             {v4.4s,v5.4s,v6.4s,v7.4s},      [x2], #64
			
 
				++
			
 
				++.ifc \txfm1\()_\txfm2,idct_idct
			
 
				++        idct8           v16, v18, v20, v22, v24, v26, v28, v30, v2,  v3,  v4,  v5,  v6,  v7
			
 
				++        idct8           v17, v19, v21, v23, v25, v27, v29, v31, v2,  v3,  v4,  v5,  v6,  v7
			
 
				++.else
			
 
				++        \txfm1\()8      v16, v18, v20, v22, v24, v26, v28, v30, v4,  v5,  v6,  v7,  v8,  v9
			
 
				++        \txfm1\()8      v17, v19, v21, v23, v25, v27, v29, v31, v4,  v5,  v6,  v7,  v8,  v9
			
 
				++.endif
			
 
				++
			
 
				++        // Transpose 8x8 with 16 bit elements
			
 
				++        transpose_8x8s  v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v4, v5, v6, v7
			
 
				++
			
 
				++.ifc \txfm1\()_\txfm2,idct_idct
			
 
				++        idct8           v16, v18, v20, v22, v24, v26, v28, v30, v2,  v3,  v4,  v5,  v6,  v7
			
 
				++        idct8           v17, v19, v21, v23, v25, v27, v29, v31, v2,  v3,  v4,  v5,  v6,  v7
			
 
				++.else
			
 
				++        \txfm2\()8      v16, v18, v20, v22, v24, v26, v28, v30, v4,  v5,  v6,  v7,  v8,  v9
			
 
				++        \txfm2\()8      v17, v19, v21, v23, v25, v27, v29, v31, v4,  v5,  v6,  v7,  v8,  v9
			
 
				++.endif
			
 
				++2:
			
 
				++        mov             x3,  x0
			
 
				++        // Add into the destination
			
 
				++        ld1             {v0.8h},  [x0], x1
			
 
				++        srshr           v16.4s, v16.4s, #5
			
 
				++        srshr           v17.4s, v17.4s, #5
			
 
				++        ld1             {v1.8h},  [x0], x1
			
 
				++        srshr           v18.4s, v18.4s, #5
			
 
				++        srshr           v19.4s, v19.4s, #5
			
 
				++        ld1             {v2.8h},  [x0], x1
			
 
				++        srshr           v20.4s, v20.4s, #5
			
 
				++        srshr           v21.4s, v21.4s, #5
			
 
				++        uaddw           v16.4s, v16.4s, v0.4h
			
 
				++        uaddw2          v17.4s, v17.4s, v0.8h
			
 
				++        ld1             {v3.8h},  [x0], x1
			
 
				++        srshr           v22.4s, v22.4s, #5
			
 
				++        srshr           v23.4s, v23.4s, #5
			
 
				++        uaddw           v18.4s, v18.4s, v1.4h
			
 
				++        uaddw2          v19.4s, v19.4s, v1.8h
			
 
				++        ld1             {v4.8h},  [x0], x1
			
 
				++        srshr           v24.4s, v24.4s, #5
			
 
				++        srshr           v25.4s, v25.4s, #5
			
 
				++        uaddw           v20.4s, v20.4s, v2.4h
			
 
				++        uaddw2          v21.4s, v21.4s, v2.8h
			
 
				++        sqxtun          v0.4h,  v16.4s
			
 
				++        sqxtun2         v0.8h,  v17.4s
			
 
				++        dup             v16.8h, w5
			
 
				++        ld1             {v5.8h},  [x0], x1
			
 
				++        srshr           v26.4s, v26.4s, #5
			
 
				++        srshr           v27.4s, v27.4s, #5
			
 
				++        uaddw           v22.4s, v22.4s, v3.4h
			
 
				++        uaddw2          v23.4s, v23.4s, v3.8h
			
 
				++        sqxtun          v1.4h,  v18.4s
			
 
				++        sqxtun2         v1.8h,  v19.4s
			
 
				++        umin            v0.8h,  v0.8h,  v16.8h
			
 
				++        ld1             {v6.8h},  [x0], x1
			
 
				++        srshr           v28.4s, v28.4s, #5
			
 
				++        srshr           v29.4s, v29.4s, #5
			
 
				++        uaddw           v24.4s, v24.4s, v4.4h
			
 
				++        uaddw2          v25.4s, v25.4s, v4.8h
			
 
				++        sqxtun          v2.4h,  v20.4s
			
 
				++        sqxtun2         v2.8h,  v21.4s
			
 
				++        umin            v1.8h,  v1.8h,  v16.8h
			
 
				++        ld1             {v7.8h},  [x0], x1
			
 
				++        srshr           v30.4s, v30.4s, #5
			
 
				++        srshr           v31.4s, v31.4s, #5
			
 
				++        uaddw           v26.4s, v26.4s, v5.4h
			
 
				++        uaddw2          v27.4s, v27.4s, v5.8h
			
 
				++        sqxtun          v3.4h,  v22.4s
			
 
				++        sqxtun2         v3.8h,  v23.4s
			
 
				++        umin            v2.8h,  v2.8h,  v16.8h
			
 
				++
			
 
				++        st1             {v0.8h},  [x3], x1
			
 
				++        uaddw           v28.4s, v28.4s, v6.4h
			
 
				++        uaddw2          v29.4s, v29.4s, v6.8h
			
 
				++        st1             {v1.8h},  [x3], x1
			
 
				++        sqxtun          v4.4h,  v24.4s
			
 
				++        sqxtun2         v4.8h,  v25.4s
			
 
				++        umin            v3.8h,  v3.8h,  v16.8h
			
 
				++        st1             {v2.8h},  [x3], x1
			
 
				++        uaddw           v30.4s, v30.4s, v7.4h
			
 
				++        uaddw2          v31.4s, v31.4s, v7.8h
			
 
				++        st1             {v3.8h},  [x3], x1
			
 
				++        sqxtun          v5.4h,  v26.4s
			
 
				++        sqxtun2         v5.8h,  v27.4s
			
 
				++        umin            v4.8h,  v4.8h,  v16.8h
			
 
				++        st1             {v4.8h},  [x3], x1
			
 
				++        sqxtun          v6.4h,  v28.4s
			
 
				++        sqxtun2         v6.8h,  v29.4s
			
 
				++        umin            v5.8h,  v5.8h,  v16.8h
			
 
				++        st1             {v5.8h},  [x3], x1
			
 
				++        sqxtun          v7.4h,  v30.4s
			
 
				++        sqxtun2         v7.8h,  v31.4s
			
 
				++        umin            v6.8h,  v6.8h,  v16.8h
			
 
				++
			
 
				++        st1             {v6.8h},  [x3], x1
			
 
				++        umin            v7.8h,  v7.8h,  v16.8h
			
 
				++        st1             {v7.8h},  [x3], x1
			
 
				++
			
 
				++.ifnc \txfm1\()_\txfm2,idct_idct
			
 
				++        ldp             d8,  d9,  [sp], 0x10
			
 
				++.endif
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_10_neon, export=1
			
 
				++        mov             x5,  #0x03ff
			
 
				++        b               vp9_\txfm1\()_\txfm2\()_8x8_add_16_neon
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_12_neon, export=1
			
 
				++        mov             x5,  #0x0fff
			
 
				++        b               vp9_\txfm1\()_\txfm2\()_8x8_add_16_neon
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++itxfm_func8x8 idct,  idct
			
 
				++itxfm_func8x8 iadst, idct
			
 
				++itxfm_func8x8 idct,  iadst
			
 
				++itxfm_func8x8 iadst, iadst
			
 
				++
			
 
				++
			
 
				++function idct16x16_dc_add_neon
			
 
				++        movrel          x4,  idct_coeffs
			
 
				++        ld1             {v0.4h}, [x4]
			
 
				++        sxtl            v0.4s,  v0.4h
			
 
				++
			
 
				++        movi            v1.4h,  #0
			
 
				++
			
 
				++        ld1             {v2.s}[0],  [x2]
			
 
				++        smull           v2.2d,  v2.2s,  v0.s[0]
			
 
				++        rshrn           v2.2s,  v2.2d,  #14
			
 
				++        smull           v2.2d,  v2.2s,  v0.s[0]
			
 
				++        rshrn           v2.2s,  v2.2d,  #14
			
 
				++        st1             {v1.s}[0],  [x2]
			
 
				++        dup             v2.4s,  v2.s[0]
			
 
				++
			
 
				++        srshr           v0.4s,  v2.4s,  #6
			
 
				++
			
 
				++        mov             x3, x0
			
 
				++        mov             x4, #16
			
 
				++        dup             v31.8h, w13
			
 
				++1:
			
 
				++        // Loop to add the constant from v2 into all 16x16 outputs
			
 
				++        subs            x4,  x4,  #2
			
 
				++        ld1             {v1.8h,v2.8h},  [x0], x1
			
 
				++        uaddw           v16.4s, v0.4s,  v1.4h
			
 
				++        uaddw2          v17.4s, v0.4s,  v1.8h
			
 
				++        ld1             {v3.8h,v4.8h},  [x0], x1
			
 
				++        uaddw           v18.4s, v0.4s,  v2.4h
			
 
				++        uaddw2          v19.4s, v0.4s,  v2.8h
			
 
				++        uaddw           v20.4s, v0.4s,  v3.4h
			
 
				++        uaddw2          v21.4s, v0.4s,  v3.8h
			
 
				++        uaddw           v22.4s, v0.4s,  v4.4h
			
 
				++        uaddw2          v23.4s, v0.4s,  v4.8h
			
 
				++        sqxtun          v1.4h,  v16.4s
			
 
				++        sqxtun2         v1.8h,  v17.4s
			
 
				++        sqxtun          v2.4h,  v18.4s
			
 
				++        sqxtun2         v2.8h,  v19.4s
			
 
				++        sqxtun          v3.4h,  v20.4s
			
 
				++        sqxtun2         v3.8h,  v21.4s
			
 
				++        sqxtun          v4.4h,  v22.4s
			
 
				++        sqxtun2         v4.8h,  v23.4s
			
 
				++        umin            v1.8h,  v1.8h,  v31.8h
			
 
				++        umin            v2.8h,  v2.8h,  v31.8h
			
 
				++        st1             {v1.8h,v2.8h},  [x3], x1
			
 
				++        umin            v3.8h,  v3.8h,  v31.8h
			
 
				++        umin            v4.8h,  v4.8h,  v31.8h
			
 
				++        st1             {v3.8h,v4.8h},  [x3], x1
			
 
				++        b.ne            1b
			
 
				++
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++.macro idct16_end
			
 
				++        butterfly_4s    v18, v7,  v4,  v7                // v18 = t0a,  v7  = t7a
			
 
				++        butterfly_4s    v19, v22, v5,  v22               // v19 = t1a,  v22 = t6
			
 
				++        butterfly_4s    v4,  v26, v20, v26               // v4  = t2a,  v26 = t5
			
 
				++        butterfly_4s    v5,  v6,  v28, v6                // v5  = t3a,  v6  = t4
			
 
				++        butterfly_4s    v20, v28, v16, v24               // v20 = t8a,  v28 = t11a
			
 
				++        butterfly_4s    v24, v21, v23, v21               // v24 = t9,   v21 = t10
			
 
				++        butterfly_4s    v23, v27, v25, v27               // v23 = t14,  v27 = t13
			
 
				++        butterfly_4s    v25, v29, v29, v17               // v25 = t15a, v29 = t12a
			
 
				++
			
 
				++        dmbutterfly0    v8,  v9,  v27, v21, v8,  v9,  v16, v17, v30, v31 // v8  = t13a, v9  = t10a
			
 
				++        dmbutterfly0    v28, v27, v29, v28, v21, v29, v16, v17, v30, v31 // v28 = t12,  v27 = t11
			
 
				++
			
 
				++        butterfly_4s    v16, v31, v18, v25               // v16 = out[0], v31 = out[15]
			
 
				++        butterfly_4s    v17, v30, v19, v23               // v17 = out[1], v30 = out[14]
			
 
				++        butterfly_4s_r  v25, v22, v22, v24               // v25 = out[9], v22 = out[6]
			
 
				++        butterfly_4s    v23, v24, v7,  v20               // v23 = out[7], v24 = out[8]
			
 
				++        butterfly_4s    v18, v29, v4,  v8                // v18 = out[2], v29 = out[13]
			
 
				++        butterfly_4s    v19, v28, v5,  v28               // v19 = out[3], v28 = out[12]
			
 
				++        butterfly_4s    v20, v27, v6,  v27               // v20 = out[4], v27 = out[11]
			
 
				++        butterfly_4s    v21, v26, v26, v9                // v21 = out[5], v26 = out[10]
			
 
				++        ret
			
 
				++.endm
			
 
				++
			
 
				++function idct16
			
 
				++        dmbutterfly0    v16, v24, v16, v24, v4, v5, v6, v7, v8, v9 // v16 = t0a,  v24 = t1a
			
 
				++        dmbutterfly     v20, v28, v0.s[2], v0.s[3], v4, v5, v6, v7 // v20 = t2a,  v28 = t3a
			
 
				++        dmbutterfly     v18, v30, v1.s[0], v1.s[1], v4, v5, v6, v7 // v18 = t4a,  v30 = t7a
			
 
				++        dmbutterfly     v26, v22, v1.s[2], v1.s[3], v4, v5, v6, v7 // v26 = t5a,  v22 = t6a
			
 
				++        dmbutterfly     v17, v31, v2.s[0], v2.s[1], v4, v5, v6, v7 // v17 = t8a,  v31 = t15a
			
 
				++        dmbutterfly     v25, v23, v2.s[2], v2.s[3], v4, v5, v6, v7 // v25 = t9a,  v23 = t14a
			
 
				++        dmbutterfly     v21, v27, v3.s[0], v3.s[1], v4, v5, v6, v7 // v21 = t10a, v27 = t13a
			
 
				++        dmbutterfly     v29, v19, v3.s[2], v3.s[3], v4, v5, v6, v7 // v29 = t11a, v19 = t12a
			
 
				++
			
 
				++        butterfly_4s    v4,  v28, v16, v28               // v4  = t0,   v28 = t3
			
 
				++        butterfly_4s    v5,  v20, v24, v20               // v5  = t1,   v20 = t2
			
 
				++        butterfly_4s    v6,  v26, v18, v26               // v6  = t4,   v26 = t5
			
 
				++        butterfly_4s    v7,  v22, v30, v22               // v7  = t7,   v22 = t6
			
 
				++        butterfly_4s    v16, v25, v17, v25               // v16 = t8,   v25 = t9
			
 
				++        butterfly_4s    v24, v21, v29, v21               // v24 = t11,  v21 = t10
			
 
				++        butterfly_4s    v17, v27, v19, v27               // v17 = t12,  v27 = t13
			
 
				++        butterfly_4s    v29, v23, v31, v23               // v29 = t15,  v23 = t14
			
 
				++
			
 
				++        dmbutterfly0    v22, v26, v22, v26, v8, v9, v18, v19, v30, v31        // v22 = t6a,  v26 = t5a
			
 
				++        dmbutterfly     v23, v25, v0.s[2], v0.s[3], v18, v19, v30, v31        // v23 = t9a,  v25 = t14a
			
 
				++        dmbutterfly     v27, v21, v0.s[2], v0.s[3], v18, v19, v30, v31, neg=1 // v27 = t13a, v21 = t10a
			
 
				++        idct16_end
			
 
				++endfunc
			
 
				++
			
 
				++function idct16_half
			
 
				++        dmbutterfly0_h  v16, v24, v16, v24, v4, v5, v6, v7, v8, v9 // v16 = t0a,  v24 = t1a
			
 
				++        dmbutterfly_h1  v20, v28, v0.s[2], v0.s[3], v4, v5, v6, v7 // v20 = t2a,  v28 = t3a
			
 
				++        dmbutterfly_h1  v18, v30, v1.s[0], v1.s[1], v4, v5, v6, v7 // v18 = t4a,  v30 = t7a
			
 
				++        dmbutterfly_h2  v26, v22, v1.s[2], v1.s[3], v4, v5, v6, v7 // v26 = t5a,  v22 = t6a
			
 
				++        dmbutterfly_h1  v17, v31, v2.s[0], v2.s[1], v4, v5, v6, v7 // v17 = t8a,  v31 = t15a
			
 
				++        dmbutterfly_h2  v25, v23, v2.s[2], v2.s[3], v4, v5, v6, v7 // v25 = t9a,  v23 = t14a
			
 
				++        dmbutterfly_h1  v21, v27, v3.s[0], v3.s[1], v4, v5, v6, v7 // v21 = t10a, v27 = t13a
			
 
				++        dmbutterfly_h2  v29, v19, v3.s[2], v3.s[3], v4, v5, v6, v7 // v29 = t11a, v19 = t12a
			
 
				++
			
 
				++        butterfly_4s    v4,  v28, v16, v28               // v4  = t0,   v28 = t3
			
 
				++        butterfly_4s    v5,  v20, v24, v20               // v5  = t1,   v20 = t2
			
 
				++        butterfly_4s    v6,  v26, v18, v26               // v6  = t4,   v26 = t5
			
 
				++        butterfly_4s    v7,  v22, v30, v22               // v7  = t7,   v22 = t6
			
 
				++        butterfly_4s    v16, v25, v17, v25               // v16 = t8,   v25 = t9
			
 
				++        butterfly_4s    v24, v21, v29, v21               // v24 = t11,  v21 = t10
			
 
				++        butterfly_4s    v17, v27, v19, v27               // v17 = t12,  v27 = t13
			
 
				++        butterfly_4s    v29, v23, v31, v23               // v29 = t15,  v23 = t14
			
 
				++
			
 
				++        dmbutterfly0    v22, v26, v22, v26, v8, v9, v18, v19, v30, v31        // v22 = t6a,  v26 = t5a
			
 
				++        dmbutterfly     v23, v25, v0.s[2], v0.s[3], v18, v19, v30, v31        // v23 = t9a,  v25 = t14a
			
 
				++        dmbutterfly     v27, v21, v0.s[2], v0.s[3], v18, v19, v30, v31, neg=1 // v27 = t13a, v21 = t10a
			
 
				++        idct16_end
			
 
				++endfunc
			
 
				++
			
 
				++function idct16_quarter
			
 
				++        dsmull_h        v24, v25, v19, v3.s[3]
			
 
				++        dsmull_h        v4,  v5,  v17, v2.s[0]
			
 
				++        dsmull_h        v7,  v6,  v18, v1.s[1]
			
 
				++        dsmull_h        v30, v31, v18, v1.s[0]
			
 
				++        neg             v24.2d,  v24.2d
			
 
				++        neg             v25.2d,  v25.2d
			
 
				++        dsmull_h        v29, v28, v17, v2.s[1]
			
 
				++        dsmull_h        v26, v27, v19, v3.s[2]
			
 
				++        dsmull_h        v22, v23, v16, v0.s[0]
			
 
				++        drshrn_h        v24, v24, v25, #14
			
 
				++        drshrn_h        v16, v4,  v5,  #14
			
 
				++        drshrn_h        v7,  v7,  v6,  #14
			
 
				++        drshrn_h        v6,  v30, v31, #14
			
 
				++        drshrn_h        v29, v29, v28, #14
			
 
				++        drshrn_h        v17, v26, v27, #14
			
 
				++        drshrn_h        v28, v22, v23, #14
			
 
				++
			
 
				++        dmbutterfly_l   v20, v21, v22, v23, v17, v24, v0.s[2], v0.s[3]
			
 
				++        dmbutterfly_l   v18, v19, v30, v31, v29, v16, v0.s[2], v0.s[3]
			
 
				++        neg             v22.2d,  v22.2d
			
 
				++        neg             v23.2d,  v23.2d
			
 
				++        drshrn_h        v27, v20, v21, #14
			
 
				++        drshrn_h        v21, v22, v23, #14
			
 
				++        drshrn_h        v23, v18, v19, #14
			
 
				++        drshrn_h        v25, v30, v31, #14
			
 
				++        mov             v4.16b,  v28.16b
			
 
				++        mov             v5.16b,  v28.16b
			
 
				++        dmbutterfly0    v22, v26, v7,  v6,  v18, v19, v30, v31
			
 
				++        mov             v20.16b, v28.16b
			
 
				++        idct16_end
			
 
				++endfunc
			
 
				++
			
 
				++function iadst16
			
 
				++        ld1             {v0.8h,v1.8h}, [x11]
			
 
				++        sxtl            v2.4s,  v1.4h
			
 
				++        sxtl2           v3.4s,  v1.8h
			
 
				++        sxtl2           v1.4s,  v0.8h
			
 
				++        sxtl            v0.4s,  v0.4h
			
 
				++
			
 
				++        dmbutterfly_l   v6,  v7,  v4,  v5,  v31, v16, v0.s[1], v0.s[0]   // v6,v7   = t1,   v4,v5   = t0
			
 
				++        dmbutterfly_l   v10, v11, v8,  v9,  v23, v24, v1.s[1], v1.s[0]   // v10,v11 = t9,   v8,v9   = t8
			
 
				++        dbutterfly_n    v31, v24, v6,  v7,  v10, v11, v12, v13, v10, v11 // v31     = t1a,  v24     = t9a
			
 
				++        dmbutterfly_l   v14, v15, v12, v13, v29, v18, v0.s[3], v0.s[2]   // v14,v15 = t3,   v12,v13 = t2
			
 
				++        dbutterfly_n    v16, v23, v4,  v5,  v8,  v9,  v6,  v7,  v8,  v9  // v16     = t0a,  v23     = t8a
			
 
				++
			
 
				++        dmbutterfly_l   v6,  v7,  v4,  v5,  v21, v26, v1.s[3], v1.s[2]   // v6,v7   = t11,  v4,v5   = t10
			
 
				++        dbutterfly_n    v29, v26, v14, v15, v6,  v7,  v8,  v9,  v6,  v7  // v29     = t3a,  v26     = t11a
			
 
				++        dmbutterfly_l   v10, v11, v8,  v9,  v27, v20, v2.s[1], v2.s[0]   // v10,v11 = t5,   v8,v9   = t4
			
 
				++        dbutterfly_n    v18, v21, v12, v13, v4,  v5,  v6,  v7,  v4,  v5  // v18     = t2a,  v21     = t10a
			
 
				++
			
 
				++        dmbutterfly_l   v14, v15, v12, v13, v19, v28, v3.s[1], v3.s[0]   // v14,v15 = t13,  v12,v13 = t12
			
 
				++        dbutterfly_n    v20, v28, v10, v11, v14, v15, v4,  v5,  v14, v15 // v20     = t5a,  v28     = t13a
			
 
				++        dmbutterfly_l   v6,  v7,  v4,  v5,  v25, v22, v2.s[3], v2.s[2]   // v6,v7   = t7,   v4,v5   = t6
			
 
				++        dbutterfly_n    v27, v19, v8,  v9,  v12, v13, v10, v11, v12, v13 // v27     = t4a,  v19     = t12a
			
 
				++
			
 
				++        dmbutterfly_l   v10, v11, v8,  v9,  v17, v30, v3.s[3], v3.s[2]   // v10,v11 = t15,  v8,v9   = t14
			
 
				++        ld1             {v0.8h}, [x10]
			
 
				++        dbutterfly_n    v22, v30, v6,  v7,  v10, v11, v12, v13, v10, v11 // v22     = t7a,  v30     = t15a
			
 
				++        sxtl2           v1.4s,  v0.8h
			
 
				++        sxtl            v0.4s,  v0.4h
			
 
				++        dmbutterfly_l   v14, v15, v12, v13, v23, v24, v1.s[0], v1.s[1]   // v14,v15 = t9,   v12,v13 = t8
			
 
				++        dbutterfly_n    v25, v17, v4,  v5,  v8,  v9,  v6,  v7,  v8,  v9  // v25     = t6a,  v17     = t14a
			
 
				++
			
 
				++        dmbutterfly_l   v4,  v5,  v6,  v7,  v28, v19, v1.s[1], v1.s[0]   // v4,v5   = t12,  v6,v7   = t13
			
 
				++        dbutterfly_n    v23, v19, v12, v13, v4,  v5,  v8,  v9,  v4,  v5  // v23     = t8a,  v19     = t12a
			
 
				++        dmbutterfly_l   v10, v11, v8,  v9,  v21, v26, v1.s[2], v1.s[3]   // v10,v11 = t11,  v8,v9   = t10
			
 
				++        butterfly_4s_r  v4,  v27, v16, v27               // v4  = t4,   v27 = t0
			
 
				++        dbutterfly_n    v24, v28, v14, v15, v6,  v7,  v12, v13, v6,  v7  // v24     = t9a,  v28     = t13a
			
 
				++
			
 
				++        dmbutterfly_l   v12, v13, v14, v15, v30, v17, v1.s[3], v1.s[2]   // v12,v13 = t14,  v14,v15 = t15
			
 
				++        butterfly_4s_r  v5,  v20, v31, v20               // v5  = t5, v20 = t1
			
 
				++        dbutterfly_n    v21, v17, v8,  v9,  v12, v13, v6,  v7,  v12, v13 // v21     = t10a, v17     = t14a
			
 
				++        dbutterfly_n    v26, v30, v10, v11, v14, v15, v8,  v9,  v14, v15 // v26     = t11a, v30     = t15a
			
 
				++
			
 
				++        butterfly_4s_r  v6,  v25, v18, v25               // v6  = t6, v25 = t2
			
 
				++        butterfly_4s_r  v7,  v22, v29, v22               // v7  = t7, v22 = t3
			
 
				++
			
 
				++        dmbutterfly_l   v10, v11, v8,  v9,  v19, v28, v0.s[2], v0.s[3]   // v10,v11 = t13,  v8,v9   = t12
			
 
				++        dmbutterfly_l   v12, v13, v14, v15, v30, v17, v0.s[3], v0.s[2]   // v12,v13 = t14,  v14,v15 = t15
			
 
				++
			
 
				++        dbutterfly_n    v18, v30, v8,  v9,  v12, v13, v16, v17, v12, v13 // v18   = out[2], v30     = t14a
			
 
				++        dbutterfly_n    v29, v17, v10, v11, v14, v15, v12, v13, v14, v15 // v29 = -out[13], v17     = t15a
			
 
				++        neg             v29.4s, v29.4s                   // v29 = out[13]
			
 
				++
			
 
				++        dmbutterfly_l   v10, v11, v8,  v9,  v4,  v5,  v0.s[2], v0.s[3]   // v10,v11 = t5a,  v8,v9   = t4a
			
 
				++        dmbutterfly_l   v12, v13, v14, v15, v7,  v6,  v0.s[3], v0.s[2]   // v12,v13 = t6a,  v14,v15 = t7a
			
 
				++
			
 
				++        butterfly_4s    v2,  v6,  v27, v25               // v2 = out[0], v6 = t2a
			
 
				++        butterfly_4s    v3,  v7,  v23, v21               // v3 =-out[1], v7 = t10
			
 
				++
			
 
				++        dbutterfly_n    v19, v31, v8,  v9,  v12, v13, v4,  v5,  v8,  v9  // v19 = -out[3],  v31 = t6
			
 
				++        neg             v19.4s, v19.4s                   // v19 = out[3]
			
 
				++        dbutterfly_n    v28, v16, v10, v11, v14, v15, v4,  v5,  v10, v11 // v28 = out[12],  v16 = t7
			
 
				++
			
 
				++        butterfly_4s    v5,  v8,  v20, v22               // v5 =-out[15],v8 = t3a
			
 
				++        butterfly_4s    v4,  v9,  v24, v26               // v4 = out[14],v9 = t11
			
 
				++
			
 
				++        dmbutterfly0    v23, v24, v6,  v8,  v10, v11, v12, v13, v14, v15, 1 // v23 = out[7], v24 = out[8]
			
 
				++        dmbutterfly0    v21, v26, v30, v17, v10, v11, v12, v13, v14, v15, 1 // v21 = out[5], v26 = out[10]
			
 
				++        dmbutterfly0    v20, v27, v16, v31, v10, v11, v12, v13, v14, v15    // v20 = out[4], v27 = out[11]
			
 
				++        dmbutterfly0    v22, v25, v9,  v7,  v10, v11, v12, v13, v14, v15    // v22 = out[6], v25 = out[9]
			
 
				++
			
 
				++        neg             v31.4s,  v5.4s                    // v31 = out[15]
			
 
				++        neg             v17.4s,  v3.4s                    // v17 = out[1]
			
 
				++
			
 
				++        mov             v16.16b, v2.16b
			
 
				++        mov             v30.16b, v4.16b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++// Helper macros; we can't use these expressions directly within
			
 
				++// e.g. .irp due to the extra concatenation \(). Therefore wrap
			
 
				++// them in macros to allow using .irp below.
			
 
				++.macro load i, src, inc
			
 
				++        ld1             {v\i\().4s},  [\src], \inc
			
 
				++.endm
			
 
				++.macro store i, dst, inc
			
 
				++        st1             {v\i\().4s},  [\dst], \inc
			
 
				++.endm
			
 
				++.macro movi_v i, size, imm
			
 
				++        movi            v\i\()\size,  \imm
			
 
				++.endm
			
 
				++.macro load_clear i, src, inc
			
 
				++        ld1             {v\i\().4s}, [\src]
			
 
				++        st1             {v4.4s},  [\src], \inc
			
 
				++.endm
			
 
				++
			
 
				++.macro load_add_store coef0, coef1, coef2, coef3, coef4, coef5, coef6, coef7
			
 
				++        srshr           \coef0, \coef0, #6
			
 
				++        ld1             {v4.4h},   [x0], x1
			
 
				++        srshr           \coef1, \coef1, #6
			
 
				++        ld1             {v4.d}[1], [x3], x1
			
 
				++        srshr           \coef2, \coef2, #6
			
 
				++        ld1             {v5.4h},   [x0], x1
			
 
				++        srshr           \coef3, \coef3, #6
			
 
				++        uaddw           \coef0, \coef0, v4.4h
			
 
				++        ld1             {v5.d}[1], [x3], x1
			
 
				++        srshr           \coef4, \coef4, #6
			
 
				++        uaddw2          \coef1, \coef1, v4.8h
			
 
				++        ld1             {v6.4h},   [x0], x1
			
 
				++        srshr           \coef5, \coef5, #6
			
 
				++        uaddw           \coef2, \coef2, v5.4h
			
 
				++        ld1             {v6.d}[1], [x3], x1
			
 
				++        sqxtun          v4.4h,  \coef0
			
 
				++        srshr           \coef6, \coef6, #6
			
 
				++        uaddw2          \coef3, \coef3, v5.8h
			
 
				++        ld1             {v7.4h},   [x0], x1
			
 
				++        sqxtun2         v4.8h,  \coef1
			
 
				++        srshr           \coef7, \coef7, #6
			
 
				++        uaddw           \coef4, \coef4, v6.4h
			
 
				++        ld1             {v7.d}[1], [x3], x1
			
 
				++        umin            v4.8h,  v4.8h,  v8.8h
			
 
				++        sub             x0,  x0,  x1, lsl #2
			
 
				++        sub             x3,  x3,  x1, lsl #2
			
 
				++        sqxtun          v5.4h,  \coef2
			
 
				++        uaddw2          \coef5, \coef5, v6.8h
			
 
				++        st1             {v4.4h},   [x0], x1
			
 
				++        sqxtun2         v5.8h,  \coef3
			
 
				++        uaddw           \coef6, \coef6, v7.4h
			
 
				++        st1             {v4.d}[1], [x3], x1
			
 
				++        umin            v5.8h,  v5.8h,  v8.8h
			
 
				++        sqxtun          v6.4h,  \coef4
			
 
				++        uaddw2          \coef7, \coef7, v7.8h
			
 
				++        st1             {v5.4h},   [x0], x1
			
 
				++        sqxtun2         v6.8h,  \coef5
			
 
				++        st1             {v5.d}[1], [x3], x1
			
 
				++        umin            v6.8h,  v6.8h,  v8.8h
			
 
				++        sqxtun          v7.4h,  \coef6
			
 
				++        st1             {v6.4h},   [x0], x1
			
 
				++        sqxtun2         v7.8h,  \coef7
			
 
				++        st1             {v6.d}[1], [x3], x1
			
 
				++        umin            v7.8h,  v7.8h,  v8.8h
			
 
				++        st1             {v7.4h},   [x0], x1
			
 
				++        st1             {v7.d}[1], [x3], x1
			
 
				++.endm
			
 
				++
			
 
				++// Read a vertical 4x16 slice out of a 16x16 matrix, do a transform on it,
			
 
				++// transpose into a horizontal 16x4 slice and store.
			
 
				++// x0 = dst (temp buffer)
			
 
				++// x1 = slice offset
			
 
				++// x2 = src
			
 
				++// x9 = input stride
			
 
				++.macro itxfm16_1d_funcs txfm
			
 
				++function \txfm\()16_1d_4x16_pass1_neon
			
 
				++        mov             x14, x30
			
 
				++
			
 
				++        movi            v4.4s, #0
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
			
 
				++        load_clear      \i,  x2,  x9
			
 
				++.endr
			
 
				++
			
 
				++        bl              \txfm\()16
			
 
				++
			
 
				++        // Do four 4x4 transposes. Originally, v16-v31 contain the
			
 
				++        // 16 rows. Afterwards, v16-v19, v20-v23, v24-v27 and v28-v31
			
 
				++        // contain the four transposed 4x4 blocks.
			
 
				++        transpose_4x4s  v16, v17, v18, v19, v4, v5, v6, v7
			
 
				++        transpose_4x4s  v20, v21, v22, v23, v4, v5, v6, v7
			
 
				++        transpose_4x4s  v24, v25, v26, v27, v4, v5, v6, v7
			
 
				++        transpose_4x4s  v28, v29, v30, v31, v4, v5, v6, v7
			
 
				++
			
 
				++        // Store the transposed 4x4 blocks horizontally.
			
 
				++        cmp             x1,  #12
			
 
				++        b.eq            1f
			
 
				++.irp i, 16, 20, 24, 28, 17, 21, 25, 29, 18, 22, 26, 30, 19, 23, 27, 31
			
 
				++        store           \i,  x0,  #16
			
 
				++.endr
			
 
				++        br              x14
			
 
				++1:
			
 
				++        // Special case: For the last input column (x1 == 12),
			
 
				++        // which would be stored as the last row in the temp buffer,
			
 
				++        // don't store the first 4x4 block, but keep it in registers
			
 
				++        // for the first slice of the second pass (where it is the
			
 
				++        // last 4x4 block).
			
 
				++        add             x0,  x0,  #16
			
 
				++        st1             {v20.4s},  [x0], #16
			
 
				++        st1             {v24.4s},  [x0], #16
			
 
				++        st1             {v28.4s},  [x0], #16
			
 
				++        add             x0,  x0,  #16
			
 
				++        st1             {v21.4s},  [x0], #16
			
 
				++        st1             {v25.4s},  [x0], #16
			
 
				++        st1             {v29.4s},  [x0], #16
			
 
				++        add             x0,  x0,  #16
			
 
				++        st1             {v22.4s},  [x0], #16
			
 
				++        st1             {v26.4s},  [x0], #16
			
 
				++        st1             {v30.4s},  [x0], #16
			
 
				++        add             x0,  x0,  #16
			
 
				++        st1             {v23.4s},  [x0], #16
			
 
				++        st1             {v27.4s},  [x0], #16
			
 
				++        st1             {v31.4s},  [x0], #16
			
 
				++
			
 
				++        mov             v28.16b, v16.16b
			
 
				++        mov             v29.16b, v17.16b
			
 
				++        mov             v30.16b, v18.16b
			
 
				++        mov             v31.16b, v19.16b
			
 
				++        br              x14
			
 
				++endfunc
			
 
				++
			
 
				++// Read a vertical 4x16 slice out of a 16x16 matrix, do a transform on it,
			
 
				++// load the destination pixels (from a similar 4x16 slice), add and store back.
			
 
				++// x0 = dst
			
 
				++// x1 = dst stride
			
 
				++// x2 = src (temp buffer)
			
 
				++// x3 = slice offset
			
 
				++// x9 = temp buffer stride
			
 
				++function \txfm\()16_1d_4x16_pass2_neon
			
 
				++        mov             x14, x30
			
 
				++
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27
			
 
				++        load            \i,  x2,  x9
			
 
				++.endr
			
 
				++        cbz             x3,  1f
			
 
				++.irp i, 28, 29, 30, 31
			
 
				++        load            \i,  x2,  x9
			
 
				++.endr
			
 
				++1:
			
 
				++
			
 
				++        add             x3,  x0,  x1
			
 
				++        lsl             x1,  x1,  #1
			
 
				++        bl              \txfm\()16
			
 
				++
			
 
				++        dup             v8.8h, w13
			
 
				++        load_add_store  v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s
			
 
				++        load_add_store  v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s
			
 
				++
			
 
				++        br              x14
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++itxfm16_1d_funcs idct
			
 
				++itxfm16_1d_funcs iadst
			
 
				++
			
 
				++// This is the minimum eob value for each subpartition, in increments of 4
			
 
				++const min_eob_idct_idct_16, align=4
			
 
				++        .short  0, 10, 38, 89
			
 
				++endconst
			
 
				++
			
 
				++.macro itxfm_func16x16 txfm1, txfm2
			
 
				++function vp9_\txfm1\()_\txfm2\()_16x16_add_16_neon
			
 
				++.ifc \txfm1\()_\txfm2,idct_idct
			
 
				++        cmp             w3,  #1
			
 
				++        b.eq            idct16x16_dc_add_neon
			
 
				++.endif
			
 
				++        mov             x15, x30
			
 
				++        // iadst16 requires clobbering v8-v15, idct16 only clobbers v8-v9.
			
 
				++.ifnc \txfm1\()_\txfm2,idct_idct
			
 
				++        stp             d14, d15, [sp, #-0x10]!
			
 
				++        stp             d12, d13, [sp, #-0x10]!
			
 
				++        stp             d10, d11, [sp, #-0x10]!
			
 
				++.endif
			
 
				++        stp             d8,  d9,  [sp, #-0x10]!
			
 
				++
			
 
				++        sub             sp,  sp,  #1024
			
 
				++
			
 
				++        mov             x4,  x0
			
 
				++        mov             x5,  x1
			
 
				++        mov             x6,  x2
			
 
				++
			
 
				++        movrel          x10, idct_coeffs
			
 
				++.ifnc \txfm1\()_\txfm2,idct_idct
			
 
				++        movrel          x11, iadst16_coeffs
			
 
				++.endif
			
 
				++.ifc \txfm1,idct
			
 
				++        ld1             {v0.8h,v1.8h}, [x10]
			
 
				++        sxtl            v2.4s,  v1.4h
			
 
				++        sxtl2           v3.4s,  v1.8h
			
 
				++        sxtl2           v1.4s,  v0.8h
			
 
				++        sxtl            v0.4s,  v0.4h
			
 
				++.endif
			
 
				++        mov             x9,  #64
			
 
				++
			
 
				++.ifc \txfm1\()_\txfm2,idct_idct
			
 
				++        cmp             w3,  #10
			
 
				++        b.le            idct16x16_quarter_add_16_neon
			
 
				++        cmp             w3,  #38
			
 
				++        b.le            idct16x16_half_add_16_neon
			
 
				++
			
 
				++        movrel          x12, min_eob_idct_idct_16, 2
			
 
				++.endif
			
 
				++
			
 
				++.irp i, 0, 4, 8, 12
			
 
				++        add             x0,  sp,  #(\i*64)
			
 
				++.ifc \txfm1\()_\txfm2,idct_idct
			
 
				++.if \i > 0
			
 
				++        ldrh            w1,  [x12], #2
			
 
				++        cmp             w3,  w1
			
 
				++        mov             x1,  #(16 - \i)/4
			
 
				++        b.le            1f
			
 
				++.endif
			
 
				++.endif
			
 
				++        mov             x1,  #\i
			
 
				++        add             x2,  x6,  #(\i*4)
			
 
				++        bl              \txfm1\()16_1d_4x16_pass1_neon
			
 
				++.endr
			
 
				++.ifc \txfm1\()_\txfm2,iadst_idct
			
 
				++        ld1             {v0.8h,v1.8h}, [x10]
			
 
				++        sxtl            v2.4s,  v1.4h
			
 
				++        sxtl2           v3.4s,  v1.8h
			
 
				++        sxtl2           v1.4s,  v0.8h
			
 
				++        sxtl            v0.4s,  v0.4h
			
 
				++.endif
			
 
				++
			
 
				++.ifc \txfm1\()_\txfm2,idct_idct
			
 
				++        b               3f
			
 
				++1:
			
 
				++        // Set v28-v31 to zero, for the in-register passthrough of
			
 
				++        // coefficients to pass 2.
			
 
				++        movi            v28.4s,  #0
			
 
				++        movi            v29.4s,  #0
			
 
				++        movi            v30.4s,  #0
			
 
				++        movi            v31.4s,  #0
			
 
				++2:
			
 
				++        subs            x1,  x1,  #1
			
 
				++.rept 4
			
 
				++        st1             {v28.4s,v29.4s,v30.4s,v31.4s}, [x0], x9
			
 
				++.endr
			
 
				++        b.ne            2b
			
 
				++3:
			
 
				++.endif
			
 
				++
			
 
				++.irp i, 0, 4, 8, 12
			
 
				++        add             x0,  x4,  #(\i*2)
			
 
				++        mov             x1,  x5
			
 
				++        add             x2,  sp,  #(\i*4)
			
 
				++        mov             x3,  #\i
			
 
				++        bl              \txfm2\()16_1d_4x16_pass2_neon
			
 
				++.endr
			
 
				++
			
 
				++        add             sp,  sp,  #1024
			
 
				++        ldp             d8,  d9,  [sp], 0x10
			
 
				++.ifnc \txfm1\()_\txfm2,idct_idct
			
 
				++        ldp             d10, d11, [sp], 0x10
			
 
				++        ldp             d12, d13, [sp], 0x10
			
 
				++        ldp             d14, d15, [sp], 0x10
			
 
				++.endif
			
 
				++        br              x15
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_10_neon, export=1
			
 
				++        mov             x13, #0x03ff
			
 
				++        b               vp9_\txfm1\()_\txfm2\()_16x16_add_16_neon
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_12_neon, export=1
			
 
				++        mov             x13, #0x0fff
			
 
				++        b               vp9_\txfm1\()_\txfm2\()_16x16_add_16_neon
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++itxfm_func16x16 idct,  idct
			
 
				++itxfm_func16x16 iadst, idct
			
 
				++itxfm_func16x16 idct,  iadst
			
 
				++itxfm_func16x16 iadst, iadst
			
 
				++
			
 
				++function idct16_1d_4x16_pass1_quarter_neon
			
 
				++        mov             x14, x30
			
 
				++
			
 
				++        movi            v4.4s, #0
			
 
				++.irp i, 16, 17, 18, 19
			
 
				++        load_clear      \i,  x2,  x9
			
 
				++.endr
			
 
				++
			
 
				++        bl              idct16_quarter
			
 
				++
			
 
				++        // Do four 4x4 transposes. Originally, v16-v31 contain the
			
 
				++        // 16 rows. Afterwards, v16-v19, v20-v23, v24-v27 and v28-v31
			
 
				++        // contain the four transposed 4x4 blocks.
			
 
				++        transpose_4x4s  v16, v17, v18, v19, v4, v5, v6, v7
			
 
				++        transpose_4x4s  v20, v21, v22, v23, v4, v5, v6, v7
			
 
				++        transpose_4x4s  v24, v25, v26, v27, v4, v5, v6, v7
			
 
				++        transpose_4x4s  v28, v29, v30, v31, v4, v5, v6, v7
			
 
				++
			
 
				++        // Store the transposed 4x4 blocks horizontally.
			
 
				++        // The first 4x4 block is kept in registers for the second pass,
			
 
				++        // store the rest in the temp buffer.
			
 
				++        add             x0,  x0,  #16
			
 
				++        st1             {v20.4s},  [x0], #16
			
 
				++        st1             {v24.4s},  [x0], #16
			
 
				++        st1             {v28.4s},  [x0], #16
			
 
				++        add             x0,  x0,  #16
			
 
				++        st1             {v21.4s},  [x0], #16
			
 
				++        st1             {v25.4s},  [x0], #16
			
 
				++        st1             {v29.4s},  [x0], #16
			
 
				++        add             x0,  x0,  #16
			
 
				++        st1             {v22.4s},  [x0], #16
			
 
				++        st1             {v26.4s},  [x0], #16
			
 
				++        st1             {v30.4s},  [x0], #16
			
 
				++        add             x0,  x0,  #16
			
 
				++        st1             {v23.4s},  [x0], #16
			
 
				++        st1             {v27.4s},  [x0], #16
			
 
				++        st1             {v31.4s},  [x0], #16
			
 
				++        br              x14
			
 
				++endfunc
			
 
				++
			
 
				++function idct16_1d_4x16_pass2_quarter_neon
			
 
				++        mov             x14, x30
			
 
				++
			
 
				++        // Only load the top 4 lines, and only do it for the later slices.
			
 
				++        // For the first slice, d16-d19 is kept in registers from the first pass.
			
 
				++        cbz             x3,  1f
			
 
				++.irp i, 16, 17, 18, 19
			
 
				++        load            \i,  x2,  x9
			
 
				++.endr
			
 
				++1:
			
 
				++
			
 
				++        add             x3,  x0,  x1
			
 
				++        lsl             x1,  x1,  #1
			
 
				++        bl              idct16_quarter
			
 
				++
			
 
				++        dup             v8.8h, w13
			
 
				++        load_add_store  v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s
			
 
				++        load_add_store  v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s
			
 
				++
			
 
				++        br              x14
			
 
				++endfunc
			
 
				++
			
 
				++function idct16_1d_4x16_pass1_half_neon
			
 
				++        mov             x14, x30
			
 
				++
			
 
				++        movi            v4.4s, #0
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23
			
 
				++        load_clear      \i,  x2,  x9
			
 
				++.endr
			
 
				++
			
 
				++        bl              idct16_half
			
 
				++
			
 
				++        // Do four 4x4 transposes. Originally, v16-v31 contain the
			
 
				++        // 16 rows. Afterwards, v16-v19, v20-v23, v24-v27 and v28-v31
			
 
				++        // contain the four transposed 4x4 blocks.
			
 
				++        transpose_4x4s  v16, v17, v18, v19, v4, v5, v6, v7
			
 
				++        transpose_4x4s  v20, v21, v22, v23, v4, v5, v6, v7
			
 
				++        transpose_4x4s  v24, v25, v26, v27, v4, v5, v6, v7
			
 
				++        transpose_4x4s  v28, v29, v30, v31, v4, v5, v6, v7
			
 
				++
			
 
				++        // Store the transposed 4x4 blocks horizontally.
			
 
				++        cmp             x1,  #4
			
 
				++        b.eq            1f
			
 
				++.irp i, 16, 20, 24, 28, 17, 21, 25, 29, 18, 22, 26, 30, 19, 23, 27, 31
			
 
				++        store           \i,  x0,  #16
			
 
				++.endr
			
 
				++        br              x14
			
 
				++1:
			
 
				++        // Special case: For the second input column (r1 == 4),
			
 
				++        // which would be stored as the second row in the temp buffer,
			
 
				++        // don't store the first 4x4 block, but keep it in registers
			
 
				++        // for the first slice of the second pass (where it is the
			
 
				++        // second 4x4 block).
			
 
				++        add             x0,  x0,  #16
			
 
				++        st1             {v20.4s},  [x0], #16
			
 
				++        st1             {v24.4s},  [x0], #16
			
 
				++        st1             {v28.4s},  [x0], #16
			
 
				++        add             x0,  x0,  #16
			
 
				++        st1             {v21.4s},  [x0], #16
			
 
				++        st1             {v25.4s},  [x0], #16
			
 
				++        st1             {v29.4s},  [x0], #16
			
 
				++        add             x0,  x0,  #16
			
 
				++        st1             {v22.4s},  [x0], #16
			
 
				++        st1             {v26.4s},  [x0], #16
			
 
				++        st1             {v30.4s},  [x0], #16
			
 
				++        add             x0,  x0,  #16
			
 
				++        st1             {v23.4s},  [x0], #16
			
 
				++        st1             {v27.4s},  [x0], #16
			
 
				++        st1             {v31.4s},  [x0], #16
			
 
				++
			
 
				++        mov             v20.16b, v16.16b
			
 
				++        mov             v21.16b, v17.16b
			
 
				++        mov             v22.16b, v18.16b
			
 
				++        mov             v23.16b, v19.16b
			
 
				++        br              x14
			
 
				++endfunc
			
 
				++
			
 
				++function idct16_1d_4x16_pass2_half_neon
			
 
				++        mov             x14, x30
			
 
				++
			
 
				++.irp i, 16, 17, 18, 19
			
 
				++        load            \i,  x2,  x9
			
 
				++.endr
			
 
				++        cbz             x3,  1f
			
 
				++.irp i, 20, 21, 22, 23
			
 
				++        load            \i,  x2,  x9
			
 
				++.endr
			
 
				++1:
			
 
				++
			
 
				++        add             x3,  x0,  x1
			
 
				++        lsl             x1,  x1,  #1
			
 
				++        bl              idct16_half
			
 
				++
			
 
				++        dup             v8.8h, w13
			
 
				++        load_add_store  v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s
			
 
				++        load_add_store  v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s
			
 
				++
			
 
				++        br              x14
			
 
				++endfunc
			
 
				++
			
 
				++.macro idct16_partial size
			
 
				++function idct16x16_\size\()_add_16_neon
			
 
				++        add             x0,  sp,  #(0*64)
			
 
				++        mov             x1,  #0
			
 
				++        add             x2,  x6,  #(0*4)
			
 
				++        bl              idct16_1d_4x16_pass1_\size\()_neon
			
 
				++.ifc \size,half
			
 
				++        add             x0,  sp,  #(4*64)
			
 
				++        mov             x1,  #4
			
 
				++        add             x2,  x6,  #(4*4)
			
 
				++        bl              idct16_1d_4x16_pass1_\size\()_neon
			
 
				++.endif
			
 
				++
			
 
				++.irp i, 0, 4, 8, 12
			
 
				++        add             x0,  x4,  #(\i*2)
			
 
				++        mov             x1,  x5
			
 
				++        add             x2,  sp,  #(\i*4)
			
 
				++        mov             x3,  #\i
			
 
				++        bl              idct16_1d_4x16_pass2_\size\()_neon
			
 
				++.endr
			
 
				++
			
 
				++        add             sp,  sp,  #1024
			
 
				++        ldp             d8,  d9,  [sp], 0x10
			
 
				++        br              x15
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++idct16_partial quarter
			
 
				++idct16_partial half
			
 
				++
			
 
				++function idct32x32_dc_add_neon
			
 
				++        movrel          x4,  idct_coeffs
			
 
				++        ld1             {v0.4h}, [x4]
			
 
				++        sxtl            v0.4s,  v0.4h
			
 
				++
			
 
				++        movi            v1.4h,  #0
			
 
				++
			
 
				++        ld1             {v2.s}[0],  [x2]
			
 
				++        smull           v2.2d,  v2.2s,  v0.s[0]
			
 
				++        rshrn           v2.2s,  v2.2d,  #14
			
 
				++        smull           v2.2d,  v2.2s,  v0.s[0]
			
 
				++        rshrn           v2.2s,  v2.2d,  #14
			
 
				++        st1             {v1.s}[0],  [x2]
			
 
				++        dup             v2.4s,  v2.s[0]
			
 
				++
			
 
				++        srshr           v0.4s,  v2.4s,  #6
			
 
				++
			
 
				++        mov             x3,  x0
			
 
				++        mov             x4,  #32
			
 
				++        sub             x1,  x1,  #32
			
 
				++        dup             v31.8h, w13
			
 
				++1:
			
 
				++        // Loop to add the constant v0 into all 32x32 outputs
			
 
				++        subs            x4,  x4,  #1
			
 
				++        ld1             {v1.8h,v2.8h},  [x0], #32
			
 
				++        uaddw           v16.4s, v0.4s,  v1.4h
			
 
				++        uaddw2          v17.4s, v0.4s,  v1.8h
			
 
				++        ld1             {v3.8h,v4.8h},  [x0], x1
			
 
				++        uaddw           v18.4s, v0.4s,  v2.4h
			
 
				++        uaddw2          v19.4s, v0.4s,  v2.8h
			
 
				++        uaddw           v20.4s, v0.4s,  v3.4h
			
 
				++        uaddw2          v21.4s, v0.4s,  v3.8h
			
 
				++        uaddw           v22.4s, v0.4s,  v4.4h
			
 
				++        uaddw2          v23.4s, v0.4s,  v4.8h
			
 
				++        sqxtun          v1.4h,  v16.4s
			
 
				++        sqxtun2         v1.8h,  v17.4s
			
 
				++        sqxtun          v2.4h,  v18.4s
			
 
				++        sqxtun2         v2.8h,  v19.4s
			
 
				++        sqxtun          v3.4h,  v20.4s
			
 
				++        sqxtun2         v3.8h,  v21.4s
			
 
				++        sqxtun          v4.4h,  v22.4s
			
 
				++        sqxtun2         v4.8h,  v23.4s
			
 
				++        umin            v1.8h,  v1.8h,  v31.8h
			
 
				++        umin            v2.8h,  v2.8h,  v31.8h
			
 
				++        st1             {v1.8h,v2.8h},  [x3], #32
			
 
				++        umin            v3.8h,  v3.8h,  v31.8h
			
 
				++        umin            v4.8h,  v4.8h,  v31.8h
			
 
				++        st1             {v3.8h,v4.8h},  [x3], x1
			
 
				++        b.ne            1b
			
 
				++
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++.macro idct32_end
			
 
				++        butterfly_4s    v16, v5,  v4,  v5  // v16 = t16a, v5  = t19a
			
 
				++        butterfly_4s    v17, v20, v23, v20 // v17 = t17,  v20 = t18
			
 
				++        butterfly_4s    v18, v6,  v7,  v6  // v18 = t23a, v6  = t20a
			
 
				++        butterfly_4s    v19, v21, v22, v21 // v19 = t22,  v21 = t21
			
 
				++        butterfly_4s    v4,  v28, v28, v30 // v4  = t24a, v28 = t27a
			
 
				++        butterfly_4s    v23, v26, v25, v26 // v23 = t25,  v26 = t26
			
 
				++        butterfly_4s    v7,  v8,  v29, v31 // v7  = t31a, v3  = t28a
			
 
				++        butterfly_4s    v22, v27, v24, v27 // v22 = t30,  v27 = t29
			
 
				++
			
 
				++        dmbutterfly     v27, v20, v0.s[2], v0.s[3], v24, v25, v30, v31        // v27 = t18a, v20 = t29a
			
 
				++        dmbutterfly     v8,  v5,  v0.s[2], v0.s[3], v24, v25, v30, v31        // v3  = t19,  v5  = t28
			
 
				++        dmbutterfly     v28, v6,  v0.s[2], v0.s[3], v24, v25, v30, v31, neg=1 // v28 = t27,  v6  = t20
			
 
				++        dmbutterfly     v26, v21, v0.s[2], v0.s[3], v24, v25, v30, v31, neg=1 // v26 = t26a, v21 = t21a
			
 
				++
			
 
				++        butterfly_4s    v31, v24, v7,  v4  // v31 = t31,  v24 = t24
			
 
				++        butterfly_4s    v30, v25, v22, v23 // v30 = t30a, v25 = t25a
			
 
				++        butterfly_4s_r  v23, v16, v16, v18 // v23 = t23,  v16 = t16
			
 
				++        butterfly_4s_r  v22, v17, v17, v19 // v22 = t22a, v17 = t17a
			
 
				++        butterfly_4s    v18, v21, v27, v21 // v18 = t18,  v21 = t21
			
 
				++        butterfly_4s_r  v27, v28, v5,  v28 // v27 = t27a, v28 = t28a
			
 
				++        butterfly_4s    v29, v26, v20, v26 // v29 = t29,  v26 = t26
			
 
				++        butterfly_4s    v19, v20, v8,  v6  // v19 = t19a, v20 = t20
			
 
				++
			
 
				++        dmbutterfly0    v27, v20, v27, v20, v4, v5, v6, v7, v8, v9 // v27 = t27,  v20 = t20
			
 
				++        dmbutterfly0    v26, v21, v26, v21, v4, v5, v6, v7, v8, v9 // v26 = t26a, v21 = t21a
			
 
				++        dmbutterfly0    v25, v22, v25, v22, v4, v5, v6, v7, v8, v9 // v25 = t25,  v22 = t22
			
 
				++        dmbutterfly0    v24, v23, v24, v23, v4, v5, v6, v7, v8, v9 // v24 = t24a, v23 = t23a
			
 
				++        ret
			
 
				++.endm
			
 
				++
			
 
				++function idct32_odd
			
 
				++        dmbutterfly     v16, v31, v10.s[0], v10.s[1], v4, v5, v6, v7 // v16 = t16a, v31 = t31a
			
 
				++        dmbutterfly     v24, v23, v10.s[2], v10.s[3], v4, v5, v6, v7 // v24 = t17a, v23 = t30a
			
 
				++        dmbutterfly     v20, v27, v11.s[0], v11.s[1], v4, v5, v6, v7 // v20 = t18a, v27 = t29a
			
 
				++        dmbutterfly     v28, v19, v11.s[2], v11.s[3], v4, v5, v6, v7 // v28 = t19a, v19 = t28a
			
 
				++        dmbutterfly     v18, v29, v12.s[0], v12.s[1], v4, v5, v6, v7 // v18 = t20a, v29 = t27a
			
 
				++        dmbutterfly     v26, v21, v12.s[2], v12.s[3], v4, v5, v6, v7 // v26 = t21a, v21 = t26a
			
 
				++        dmbutterfly     v22, v25, v13.s[0], v13.s[1], v4, v5, v6, v7 // v22 = t22a, v25 = t25a
			
 
				++        dmbutterfly     v30, v17, v13.s[2], v13.s[3], v4, v5, v6, v7 // v30 = t23a, v17 = t24a
			
 
				++
			
 
				++        butterfly_4s    v4,  v24, v16, v24 // v4  = t16, v24 = t17
			
 
				++        butterfly_4s    v5,  v20, v28, v20 // v5  = t19, v20 = t18
			
 
				++        butterfly_4s    v6,  v26, v18, v26 // v6  = t20, v26 = t21
			
 
				++        butterfly_4s    v7,  v22, v30, v22 // v7  = t23, v22 = t22
			
 
				++        butterfly_4s    v28, v25, v17, v25 // v28 = t24, v25 = t25
			
 
				++        butterfly_4s    v30, v21, v29, v21 // v30 = t27, v21 = t26
			
 
				++        butterfly_4s    v29, v23, v31, v23 // v29 = t31, v23 = t30
			
 
				++        butterfly_4s    v31, v27, v19, v27 // v31 = t28, v27 = t29
			
 
				++
			
 
				++        dmbutterfly     v23, v24, v1.s[0], v1.s[1], v16, v17, v18, v19        // v23 = t17a, v24 = t30a
			
 
				++        dmbutterfly     v27, v20, v1.s[0], v1.s[1], v16, v17, v18, v19, neg=1 // v27 = t29a, v20 = t18a
			
 
				++        dmbutterfly     v21, v26, v1.s[2], v1.s[3], v16, v17, v18, v19        // v21 = t21a, v26 = t26a
			
 
				++        dmbutterfly     v25, v22, v1.s[2], v1.s[3], v16, v17, v18, v19, neg=1 // v25 = t25a, v22 = t22a
			
 
				++        idct32_end
			
 
				++endfunc
			
 
				++
			
 
				++function idct32_odd_half
			
 
				++        dmbutterfly_h1  v16, v31, v10.s[0], v10.s[1], v4, v5, v6, v7 // v16 = t16a, v31 = t31a
			
 
				++        dmbutterfly_h2  v24, v23, v10.s[2], v10.s[3], v4, v5, v6, v7 // v24 = t17a, v23 = t30a
			
 
				++        dmbutterfly_h1  v20, v27, v11.s[0], v11.s[1], v4, v5, v6, v7 // v20 = t18a, v27 = t29a
			
 
				++        dmbutterfly_h2  v28, v19, v11.s[2], v11.s[3], v4, v5, v6, v7 // v28 = t19a, v19 = t28a
			
 
				++        dmbutterfly_h1  v18, v29, v12.s[0], v12.s[1], v4, v5, v6, v7 // v18 = t20a, v29 = t27a
			
 
				++        dmbutterfly_h2  v26, v21, v12.s[2], v12.s[3], v4, v5, v6, v7 // v26 = t21a, v21 = t26a
			
 
				++        dmbutterfly_h1  v22, v25, v13.s[0], v13.s[1], v4, v5, v6, v7 // v22 = t22a, v25 = t25a
			
 
				++        dmbutterfly_h2  v30, v17, v13.s[2], v13.s[3], v4, v5, v6, v7 // v30 = t23a, v17 = t24a
			
 
				++
			
 
				++        butterfly_4s    v4,  v24, v16, v24 // v4  = t16, v24 = t17
			
 
				++        butterfly_4s    v5,  v20, v28, v20 // v5  = t19, v20 = t18
			
 
				++        butterfly_4s    v6,  v26, v18, v26 // v6  = t20, v26 = t21
			
 
				++        butterfly_4s    v7,  v22, v30, v22 // v7  = t23, v22 = t22
			
 
				++        butterfly_4s    v28, v25, v17, v25 // v28 = t24, v25 = t25
			
 
				++        butterfly_4s    v30, v21, v29, v21 // v30 = t27, v21 = t26
			
 
				++        butterfly_4s    v29, v23, v31, v23 // v29 = t31, v23 = t30
			
 
				++        butterfly_4s    v31, v27, v19, v27 // v31 = t28, v27 = t29
			
 
				++
			
 
				++        dmbutterfly     v23, v24, v1.s[0], v1.s[1], v16, v17, v18, v19        // v23 = t17a, v24 = t30a
			
 
				++        dmbutterfly     v27, v20, v1.s[0], v1.s[1], v16, v17, v18, v19, neg=1 // v27 = t29a, v20 = t18a
			
 
				++        dmbutterfly     v21, v26, v1.s[2], v1.s[3], v16, v17, v18, v19        // v21 = t21a, v26 = t26a
			
 
				++        dmbutterfly     v25, v22, v1.s[2], v1.s[3], v16, v17, v18, v19, neg=1 // v25 = t25a, v22 = t22a
			
 
				++        idct32_end
			
 
				++endfunc
			
 
				++
			
 
				++function idct32_odd_quarter
			
 
				++        dsmull_h        v4,  v5,  v16, v10.s[0]
			
 
				++        dsmull_h        v28, v29, v19, v11.s[3]
			
 
				++        dsmull_h        v30, v31, v16, v10.s[1]
			
 
				++        dsmull_h        v22, v23, v17, v13.s[2]
			
 
				++        dsmull_h        v7,  v6,  v17, v13.s[3]
			
 
				++        dsmull_h        v26, v27, v19, v11.s[2]
			
 
				++        dsmull_h        v20, v21, v18, v12.s[0]
			
 
				++        dsmull_h        v24, v25, v18, v12.s[1]
			
 
				++
			
 
				++        neg             v28.2d, v28.2d
			
 
				++        neg             v29.2d, v29.2d
			
 
				++        neg             v7.2d,  v7.2d
			
 
				++        neg             v6.2d,  v6.2d
			
 
				++
			
 
				++        drshrn_h        v4,  v4,  v5,  #14
			
 
				++        drshrn_h        v5,  v28, v29, #14
			
 
				++        drshrn_h        v29, v30, v31, #14
			
 
				++        drshrn_h        v28, v22, v23, #14
			
 
				++        drshrn_h        v7,  v7,  v6,  #14
			
 
				++        drshrn_h        v31, v26, v27, #14
			
 
				++        drshrn_h        v6,  v20, v21, #14
			
 
				++        drshrn_h        v30, v24, v25, #14
			
 
				++
			
 
				++        dmbutterfly_l   v16, v17, v18, v19, v29, v4,  v1.s[0], v1.s[1]
			
 
				++        dmbutterfly_l   v27, v26, v20, v21, v31, v5,  v1.s[0], v1.s[1]
			
 
				++        drshrn_h        v23, v16, v17, #14
			
 
				++        drshrn_h        v24, v18, v19, #14
			
 
				++        neg             v20.2d, v20.2d
			
 
				++        neg             v21.2d, v21.2d
			
 
				++        drshrn_h        v27, v27, v26, #14
			
 
				++        drshrn_h        v20, v20, v21, #14
			
 
				++        dmbutterfly_l   v16, v17, v18, v19, v30, v6,  v1.s[2], v1.s[3]
			
 
				++        drshrn_h        v21, v16, v17, #14
			
 
				++        drshrn_h        v26, v18, v19, #14
			
 
				++        dmbutterfly_l   v16, v17, v18, v19, v28, v7,  v1.s[2], v1.s[3]
			
 
				++        drshrn_h        v25, v16, v17, #14
			
 
				++        neg             v18.2d, v18.2d
			
 
				++        neg             v19.2d, v19.2d
			
 
				++        drshrn_h        v22, v18, v19, #14
			
 
				++
			
 
				++        idct32_end
			
 
				++endfunc
			
 
				++
			
 
				++.macro idct32_funcs suffix
			
 
				++// Do an 32-point IDCT of a 4x32 slice out of a 32x32 matrix.
			
 
				++// The 32-point IDCT can be decomposed into two 16-point IDCTs;
			
 
				++// a normal IDCT16 with every other input component (the even ones, with
			
 
				++// each output written twice), followed by a separate 16-point IDCT
			
 
				++// of the odd inputs, added/subtracted onto the outputs of the first idct16.
			
 
				++// x0 = dst (temp buffer)
			
 
				++// x1 = unused
			
 
				++// x2 = src
			
 
				++// x9 = double input stride
			
 
				++function idct32_1d_4x32_pass1\suffix\()_neon
			
 
				++        mov             x14, x30
			
 
				++
			
 
				++        movi            v4.4s,  #0
			
 
				++
			
 
				++        // v16 = IN(0), v17 = IN(2) ... v31 = IN(30)
			
 
				++.ifb \suffix
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
			
 
				++        load_clear      \i, x2, x9
			
 
				++.endr
			
 
				++.endif
			
 
				++.ifc \suffix,_quarter
			
 
				++.irp i, 16, 17, 18, 19
			
 
				++        load_clear      \i, x2, x9
			
 
				++.endr
			
 
				++.endif
			
 
				++.ifc \suffix,_half
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23
			
 
				++        load_clear      \i, x2, x9
			
 
				++.endr
			
 
				++.endif
			
 
				++
			
 
				++        bl              idct16\suffix
			
 
				++
			
 
				++        // Do four 4x4 transposes. Originally, v16-v31 contain the
			
 
				++        // 16 rows. Afterwards, v16-v19, v20-v23, v24-v27 and v28-v31
			
 
				++        // contain the four transposed 4x4 blocks.
			
 
				++        transpose_4x4s  v16, v17, v18, v19, v4, v5, v6, v7
			
 
				++        transpose_4x4s  v20, v21, v22, v23, v4, v5, v6, v7
			
 
				++        transpose_4x4s  v24, v25, v26, v27, v4, v5, v6, v7
			
 
				++        transpose_4x4s  v28, v29, v30, v31, v4, v5, v6, v7
			
 
				++
			
 
				++        // Store the registers a, b, c, d horizontally, followed by the
			
 
				++        // same registers d, c, b, a mirrored.
			
 
				++.macro store_rev a, b, c, d
			
 
				++        // There's no rev128 instruction, but we reverse each 64 bit
			
 
				++        // half, and then flip them using an ext with 8 bytes offset.
			
 
				++        rev64           v7.4s, \d
			
 
				++        st1             {\a},  [x0], #16
			
 
				++        ext             v7.16b, v7.16b, v7.16b, #8
			
 
				++        st1             {\b},  [x0], #16
			
 
				++        rev64           v6.4s, \c
			
 
				++        st1             {\c},  [x0], #16
			
 
				++        ext             v6.16b, v6.16b, v6.16b, #8
			
 
				++        st1             {\d},  [x0], #16
			
 
				++        rev64           v5.4s, \b
			
 
				++        st1             {v7.4s},  [x0], #16
			
 
				++        ext             v5.16b, v5.16b, v5.16b, #8
			
 
				++        st1             {v6.4s},  [x0], #16
			
 
				++        rev64           v4.4s, \a
			
 
				++        st1             {v5.4s},  [x0], #16
			
 
				++        ext             v4.16b, v4.16b, v4.16b, #8
			
 
				++        st1             {v4.4s},  [x0], #16
			
 
				++.endm
			
 
				++        store_rev       v16.4s, v20.4s, v24.4s, v28.4s
			
 
				++        store_rev       v17.4s, v21.4s, v25.4s, v29.4s
			
 
				++        store_rev       v18.4s, v22.4s, v26.4s, v30.4s
			
 
				++        store_rev       v19.4s, v23.4s, v27.4s, v31.4s
			
 
				++        sub             x0,  x0,  #512
			
 
				++.purgem store_rev
			
 
				++
			
 
				++        // Move x2 back to the start of the input, and move
			
 
				++        // to the first odd row
			
 
				++.ifb \suffix
			
 
				++        sub             x2,  x2,  x9, lsl #4
			
 
				++.endif
			
 
				++.ifc \suffix,_quarter
			
 
				++        sub             x2,  x2,  x9, lsl #2
			
 
				++.endif
			
 
				++.ifc \suffix,_half
			
 
				++        sub             x2,  x2,  x9, lsl #3
			
 
				++.endif
			
 
				++        add             x2,  x2,  #128
			
 
				++
			
 
				++        movi            v4.4s,  #0
			
 
				++        // v16 = IN(1), v17 = IN(3) ... v31 = IN(31)
			
 
				++.ifb \suffix
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
			
 
				++        load_clear      \i, x2, x9
			
 
				++.endr
			
 
				++.endif
			
 
				++.ifc \suffix,_quarter
			
 
				++.irp i, 16, 17, 18, 19
			
 
				++        load_clear      \i, x2, x9
			
 
				++.endr
			
 
				++.endif
			
 
				++.ifc \suffix,_half
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23
			
 
				++        load_clear      \i, x2, x9
			
 
				++.endr
			
 
				++.endif
			
 
				++
			
 
				++        bl              idct32_odd\suffix
			
 
				++
			
 
				++        transpose_4x4s  v31, v30, v29, v28, v4, v5, v6, v7
			
 
				++        transpose_4x4s  v27, v26, v25, v24, v4, v5, v6, v7
			
 
				++        transpose_4x4s  v23, v22, v21, v20, v4, v5, v6, v7
			
 
				++        transpose_4x4s  v19, v18, v17, v16, v4, v5, v6, v7
			
 
				++
			
 
				++        // Store the registers a, b, c, d horizontally,
			
 
				++        // adding into the output first, and the mirrored,
			
 
				++        // subtracted from the output.
			
 
				++.macro store_rev a, b, c, d, a16b, b16b
			
 
				++        ld1             {v4.4s},  [x0]
			
 
				++        rev64           v9.4s, \d
			
 
				++        add             v4.4s, v4.4s, \a
			
 
				++        st1             {v4.4s},  [x0], #16
			
 
				++        rev64           v8.4s, \c
			
 
				++        ld1             {v4.4s},  [x0]
			
 
				++        ext             v9.16b, v9.16b, v9.16b, #8
			
 
				++        add             v4.4s, v4.4s, \b
			
 
				++        st1             {v4.4s},  [x0], #16
			
 
				++        ext             v8.16b, v8.16b, v8.16b, #8
			
 
				++        ld1             {v4.4s},  [x0]
			
 
				++        rev64           \b, \b
			
 
				++        add             v4.4s, v4.4s, \c
			
 
				++        st1             {v4.4s},  [x0], #16
			
 
				++        rev64           \a, \a
			
 
				++        ld1             {v4.4s},  [x0]
			
 
				++        ext             \b16b, \b16b, \b16b, #8
			
 
				++        add             v4.4s, v4.4s, \d
			
 
				++        st1             {v4.4s},  [x0], #16
			
 
				++        ext             \a16b, \a16b, \a16b, #8
			
 
				++        ld1             {v4.4s},  [x0]
			
 
				++        sub             v4.4s, v4.4s, v9.4s
			
 
				++        st1             {v4.4s},  [x0], #16
			
 
				++        ld1             {v4.4s},  [x0]
			
 
				++        sub             v4.4s, v4.4s, v8.4s
			
 
				++        st1             {v4.4s},  [x0], #16
			
 
				++        ld1             {v4.4s},  [x0]
			
 
				++        sub             v4.4s, v4.4s, \b
			
 
				++        st1             {v4.4s},  [x0], #16
			
 
				++        ld1             {v4.4s},  [x0]
			
 
				++        sub             v4.4s, v4.4s, \a
			
 
				++        st1             {v4.4s},  [x0], #16
			
 
				++.endm
			
 
				++
			
 
				++        store_rev       v31.4s, v27.4s, v23.4s, v19.4s, v31.16b, v27.16b
			
 
				++        store_rev       v30.4s, v26.4s, v22.4s, v18.4s, v30.16b, v26.16b
			
 
				++        store_rev       v29.4s, v25.4s, v21.4s, v17.4s, v29.16b, v25.16b
			
 
				++        store_rev       v28.4s, v24.4s, v20.4s, v16.4s, v28.16b, v24.16b
			
 
				++.purgem store_rev
			
 
				++        br              x14
			
 
				++endfunc
			
 
				++
			
 
				++// This is mostly the same as 4x32_pass1, but without the transpose,
			
 
				++// and use the source as temp buffer between the two idct passes, and
			
 
				++// add into the destination.
			
 
				++// x0 = dst
			
 
				++// x1 = dst stride
			
 
				++// x2 = src (temp buffer)
			
 
				++// x7 = negative double temp buffer stride
			
 
				++// x9 = double temp buffer stride
			
 
				++function idct32_1d_4x32_pass2\suffix\()_neon
			
 
				++        mov             x14, x30
			
 
				++
			
 
				++        // v16 = IN(0), v17 = IN(2) ... v31 = IN(30)
			
 
				++.ifb \suffix
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
			
 
				++        load            \i, x2, x9
			
 
				++.endr
			
 
				++        sub             x2,  x2,  x9, lsl #4
			
 
				++.endif
			
 
				++.ifc \suffix,_quarter
			
 
				++.irp i, 16, 17, 18, 19
			
 
				++        load            \i, x2, x9
			
 
				++.endr
			
 
				++        sub             x2,  x2,  x9, lsl #2
			
 
				++.endif
			
 
				++.ifc \suffix,_half
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23
			
 
				++        load            \i, x2, x9
			
 
				++.endr
			
 
				++        sub             x2,  x2,  x9, lsl #3
			
 
				++.endif
			
 
				++
			
 
				++        bl              idct16\suffix
			
 
				++
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
			
 
				++        store           \i, x2, x9
			
 
				++.endr
			
 
				++
			
 
				++        sub             x2,  x2,  x9, lsl #4
			
 
				++        add             x2,  x2,  #128
			
 
				++
			
 
				++        // v16 = IN(1), v17 = IN(3) ... v31 = IN(31)
			
 
				++.ifb \suffix
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
			
 
				++        load            \i, x2, x9
			
 
				++.endr
			
 
				++        sub             x2,  x2,  x9, lsl #4
			
 
				++.endif
			
 
				++.ifc \suffix,_quarter
			
 
				++.irp i, 16, 17, 18, 19
			
 
				++        load            \i, x2, x9
			
 
				++.endr
			
 
				++        sub             x2,  x2,  x9, lsl #2
			
 
				++.endif
			
 
				++.ifc \suffix,_half
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23
			
 
				++        load            \i, x2, x9
			
 
				++.endr
			
 
				++        sub             x2,  x2,  x9, lsl #3
			
 
				++.endif
			
 
				++        sub             x2,  x2,  #128
			
 
				++
			
 
				++        bl              idct32_odd\suffix
			
 
				++
			
 
				++.macro load_acc_store a, b, c, d, neg=0
			
 
				++.if \neg == 0
			
 
				++        ld1             {v4.4s},  [x2], x9
			
 
				++        ld1             {v5.4s},  [x2], x9
			
 
				++        add             v4.4s, v4.4s, \a
			
 
				++        ld1             {v6.4s},  [x2], x9
			
 
				++        add             v5.4s, v5.4s, \b
			
 
				++        ld1             {v7.4s},  [x2], x9
			
 
				++        add             v6.4s, v6.4s, \c
			
 
				++        add             v7.4s, v7.4s, \d
			
 
				++.else
			
 
				++        ld1             {v4.4s},  [x2], x7
			
 
				++        ld1             {v5.4s},  [x2], x7
			
 
				++        sub             v4.4s, v4.4s, \a
			
 
				++        ld1             {v6.4s},  [x2], x7
			
 
				++        sub             v5.4s, v5.4s, \b
			
 
				++        ld1             {v7.4s},  [x2], x7
			
 
				++        sub             v6.4s, v6.4s, \c
			
 
				++        sub             v7.4s, v7.4s, \d
			
 
				++.endif
			
 
				++        ld1             {v8.4h},   [x0], x1
			
 
				++        ld1             {v8.d}[1], [x0], x1
			
 
				++        srshr           v4.4s, v4.4s, #6
			
 
				++        ld1             {v9.4h},   [x0], x1
			
 
				++        srshr           v5.4s, v5.4s, #6
			
 
				++        uaddw           v4.4s, v4.4s, v8.4h
			
 
				++        ld1             {v9.d}[1], [x0], x1
			
 
				++        srshr           v6.4s, v6.4s, #6
			
 
				++        uaddw2          v5.4s, v5.4s, v8.8h
			
 
				++        srshr           v7.4s, v7.4s, #6
			
 
				++        sub             x0,  x0,  x1, lsl #2
			
 
				++        uaddw           v6.4s, v6.4s, v9.4h
			
 
				++        sqxtun          v4.4h, v4.4s
			
 
				++        uaddw2          v7.4s, v7.4s, v9.8h
			
 
				++        sqxtun2         v4.8h, v5.4s
			
 
				++        umin            v4.8h, v4.8h, v15.8h
			
 
				++        st1             {v4.4h},   [x0], x1
			
 
				++        sqxtun          v5.4h, v6.4s
			
 
				++        st1             {v4.d}[1], [x0], x1
			
 
				++        sqxtun2         v5.8h, v7.4s
			
 
				++        umin            v5.8h, v5.8h, v15.8h
			
 
				++        st1             {v5.4h},   [x0], x1
			
 
				++        st1             {v5.d}[1], [x0], x1
			
 
				++.endm
			
 
				++        load_acc_store  v31.4s, v30.4s, v29.4s, v28.4s
			
 
				++        load_acc_store  v27.4s, v26.4s, v25.4s, v24.4s
			
 
				++        load_acc_store  v23.4s, v22.4s, v21.4s, v20.4s
			
 
				++        load_acc_store  v19.4s, v18.4s, v17.4s, v16.4s
			
 
				++        sub             x2,  x2,  x9
			
 
				++        load_acc_store  v16.4s, v17.4s, v18.4s, v19.4s, 1
			
 
				++        load_acc_store  v20.4s, v21.4s, v22.4s, v23.4s, 1
			
 
				++        load_acc_store  v24.4s, v25.4s, v26.4s, v27.4s, 1
			
 
				++        load_acc_store  v28.4s, v29.4s, v30.4s, v31.4s, 1
			
 
				++.purgem load_acc_store
			
 
				++        br              x14
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++idct32_funcs
			
 
				++idct32_funcs _quarter
			
 
				++idct32_funcs _half
			
 
				++
			
 
				++const min_eob_idct_idct_32, align=4
			
 
				++        .short  0, 9, 34, 70, 135, 240, 336, 448
			
 
				++endconst
			
 
				++
			
 
				++function vp9_idct_idct_32x32_add_16_neon
			
 
				++        cmp             w3,  #1
			
 
				++        b.eq            idct32x32_dc_add_neon
			
 
				++
			
 
				++        movrel          x10, idct_coeffs
			
 
				++
			
 
				++        mov             x15, x30
			
 
				++        stp             d8,  d9,  [sp, #-0x10]!
			
 
				++        stp             d10, d11, [sp, #-0x10]!
			
 
				++        stp             d12, d13, [sp, #-0x10]!
			
 
				++        stp             d14, d15, [sp, #-0x10]!
			
 
				++
			
 
				++        sub             sp,  sp,  #4096
			
 
				++
			
 
				++        mov             x4,  x0
			
 
				++        mov             x5,  x1
			
 
				++        mov             x6,  x2
			
 
				++
			
 
				++        // Double stride of the input, since we only read every other line
			
 
				++        mov             x9,  #256
			
 
				++        neg             x7,  x9
			
 
				++
			
 
				++        ld1             {v0.8h,v1.8h},   [x10], #32
			
 
				++        sxtl            v2.4s,  v1.4h
			
 
				++        sxtl2           v3.4s,  v1.8h
			
 
				++        sxtl2           v1.4s,  v0.8h
			
 
				++        sxtl            v0.4s,  v0.4h
			
 
				++        ld1             {v10.8h,v11.8h}, [x10]
			
 
				++        sxtl            v12.4s, v11.4h
			
 
				++        sxtl2           v13.4s, v11.8h
			
 
				++        sxtl2           v11.4s, v10.8h
			
 
				++        sxtl            v10.4s, v10.4h
			
 
				++
			
 
				++        dup             v15.8h, w13
			
 
				++
			
 
				++        cmp             w3,  #34
			
 
				++        b.le            idct32x32_quarter_add_16_neon
			
 
				++        cmp             w3,  #135
			
 
				++        b.le            idct32x32_half_add_16_neon
			
 
				++
			
 
				++        movrel          x12, min_eob_idct_idct_32, 2
			
 
				++
			
 
				++.irp i, 0, 4, 8, 12, 16, 20, 24, 28
			
 
				++        add             x0,  sp,  #(\i*128)
			
 
				++.if \i > 0
			
 
				++        ldrh            w1,  [x12], #2
			
 
				++        cmp             w3,  w1
			
 
				++        mov             x1,  #(32 - \i)/4
			
 
				++        b.le            1f
			
 
				++.endif
			
 
				++        add             x2,  x6,  #(\i*4)
			
 
				++        bl              idct32_1d_4x32_pass1_neon
			
 
				++.endr
			
 
				++        b               3f
			
 
				++
			
 
				++1:
			
 
				++        // Write zeros to the temp buffer for pass 2
			
 
				++        movi            v16.4s,  #0
			
 
				++        movi            v17.4s,  #0
			
 
				++        movi            v18.4s,  #0
			
 
				++        movi            v19.4s,  #0
			
 
				++2:
			
 
				++        subs            x1,  x1,  #1
			
 
				++.rept 4
			
 
				++        st1             {v16.4s,v17.4s,v18.4s,v19.4s},  [x0], #64
			
 
				++        st1             {v16.4s,v17.4s,v18.4s,v19.4s},  [x0], #64
			
 
				++.endr
			
 
				++        b.ne            2b
			
 
				++3:
			
 
				++.irp i, 0, 4, 8, 12, 16, 20, 24, 28
			
 
				++        add             x0,  x4,  #(\i*2)
			
 
				++        mov             x1,  x5
			
 
				++        add             x2,  sp,  #(\i*4)
			
 
				++        bl              idct32_1d_4x32_pass2_neon
			
 
				++.endr
			
 
				++
			
 
				++        add             sp,  sp,  #4096
			
 
				++        ldp             d14, d15, [sp], 0x10
			
 
				++        ldp             d12, d13, [sp], 0x10
			
 
				++        ldp             d10, d11, [sp], 0x10
			
 
				++        ldp             d8,  d9,  [sp], 0x10
			
 
				++
			
 
				++        br              x15
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_idct_idct_32x32_add_10_neon, export=1
			
 
				++        mov             x13, #0x03ff
			
 
				++        b               vp9_idct_idct_32x32_add_16_neon
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_idct_idct_32x32_add_12_neon, export=1
			
 
				++        mov             x13, #0x0fff
			
 
				++        b               vp9_idct_idct_32x32_add_16_neon
			
 
				++endfunc
			
 
				++
			
 
				++.macro idct32_partial size
			
 
				++function idct32x32_\size\()_add_16_neon
			
 
				++.irp i, 0, 4
			
 
				++        add             x0,  sp,  #(\i*128)
			
 
				++.ifc \size,quarter
			
 
				++.if \i == 4
			
 
				++        cmp             w3,  #9
			
 
				++        b.le            1f
			
 
				++.endif
			
 
				++.endif
			
 
				++        add             x2,  x6,  #(\i*4)
			
 
				++        bl              idct32_1d_4x32_pass1_\size\()_neon
			
 
				++.endr
			
 
				++
			
 
				++.ifc \size,half
			
 
				++.irp i, 8, 12
			
 
				++        add             x0,  sp,  #(\i*128)
			
 
				++.if \i == 12
			
 
				++        cmp             w3,  #70
			
 
				++        b.le            1f
			
 
				++.endif
			
 
				++        add             x2,  x6,  #(\i*4)
			
 
				++        bl              idct32_1d_4x32_pass1_\size\()_neon
			
 
				++.endr
			
 
				++.endif
			
 
				++        b               3f
			
 
				++
			
 
				++1:
			
 
				++        // Write zeros to the temp buffer for pass 2
			
 
				++        movi            v16.4s,  #0
			
 
				++        movi            v17.4s,  #0
			
 
				++        movi            v18.4s,  #0
			
 
				++        movi            v19.4s,  #0
			
 
				++
			
 
				++.rept 4
			
 
				++        st1             {v16.4s,v17.4s,v18.4s,v19.4s},  [x0], #64
			
 
				++        st1             {v16.4s,v17.4s,v18.4s,v19.4s},  [x0], #64
			
 
				++.endr
			
 
				++
			
 
				++3:
			
 
				++.irp i, 0, 4, 8, 12, 16, 20, 24, 28
			
 
				++        add             x0,  x4,  #(\i*2)
			
 
				++        mov             x1,  x5
			
 
				++        add             x2,  sp,  #(\i*4)
			
 
				++        bl              idct32_1d_4x32_pass2_\size\()_neon
			
 
				++.endr
			
 
				++
			
 
				++        add             sp,  sp,  #4096
			
 
				++        ldp             d14, d15, [sp], 0x10
			
 
				++        ldp             d12, d13, [sp], 0x10
			
 
				++        ldp             d10, d11, [sp], 0x10
			
 
				++        ldp             d8,  d9,  [sp], 0x10
			
 
				++
			
 
				++        br              x15
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++idct32_partial quarter
			
 
				++idct32_partial half
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/vp9itxfm_neon.S b/media/ffvpx/libavcodec/aarch64/vp9itxfm_neon.S
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/vp9itxfm_neon.S
			
 
				+@@ -0,0 +1,1580 @@
			
 
				++/*
			
 
				++ * Copyright (c) 2016 Google Inc.
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include "libavutil/aarch64/asm.S"
			
 
				++#include "neon.S"
			
 
				++
			
 
				++const itxfm4_coeffs, align=4
			
 
				++        .short  11585, 0, 6270, 15137
			
 
				++iadst4_coeffs:
			
 
				++        .short  5283, 15212, 9929, 13377
			
 
				++endconst
			
 
				++
			
 
				++const iadst8_coeffs, align=4
			
 
				++        .short  16305, 1606, 14449, 7723, 10394, 12665, 4756, 15679
			
 
				++idct_coeffs:
			
 
				++        .short  11585, 0, 6270, 15137, 3196, 16069, 13623, 9102
			
 
				++        .short  1606, 16305, 12665, 10394, 7723, 14449, 15679, 4756
			
 
				++        .short  804, 16364, 12140, 11003, 7005, 14811, 15426, 5520
			
 
				++        .short  3981, 15893, 14053, 8423, 9760, 13160, 16207, 2404
			
 
				++endconst
			
 
				++
			
 
				++const iadst16_coeffs, align=4
			
 
				++        .short  16364, 804, 15893, 3981, 11003, 12140, 8423, 14053
			
 
				++        .short  14811, 7005, 13160, 9760, 5520, 15426, 2404, 16207
			
 
				++endconst
			
 
				++
			
 
				++// out1 = ((in1 + in2) * v0[0] + (1 << 13)) >> 14
			
 
				++// out2 = ((in1 - in2) * v0[0] + (1 << 13)) >> 14
			
 
				++// in/out are .8h registers; this can do with 4 temp registers, but is
			
 
				++// more efficient if 6 temp registers are available.
			
 
				++.macro dmbutterfly0 out1, out2, in1, in2, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, neg=0
			
 
				++.if \neg > 0
			
 
				++        neg             \tmp4\().4h, v0.4h
			
 
				++.endif
			
 
				++        add             \tmp1\().8h, \in1\().8h,  \in2\().8h
			
 
				++        sub             \tmp2\().8h, \in1\().8h,  \in2\().8h
			
 
				++.if \neg > 0
			
 
				++        smull           \tmp3\().4s, \tmp1\().4h, \tmp4\().h[0]
			
 
				++        smull2          \tmp4\().4s, \tmp1\().8h, \tmp4\().h[0]
			
 
				++.else
			
 
				++        smull           \tmp3\().4s, \tmp1\().4h, v0.h[0]
			
 
				++        smull2          \tmp4\().4s, \tmp1\().8h, v0.h[0]
			
 
				++.endif
			
 
				++.ifb \tmp5
			
 
				++        rshrn           \out1\().4h, \tmp3\().4s, #14
			
 
				++        rshrn2          \out1\().8h, \tmp4\().4s, #14
			
 
				++        smull           \tmp3\().4s, \tmp2\().4h, v0.h[0]
			
 
				++        smull2          \tmp4\().4s, \tmp2\().8h, v0.h[0]
			
 
				++        rshrn           \out2\().4h, \tmp3\().4s, #14
			
 
				++        rshrn2          \out2\().8h, \tmp4\().4s, #14
			
 
				++.else
			
 
				++        smull           \tmp5\().4s, \tmp2\().4h, v0.h[0]
			
 
				++        smull2          \tmp6\().4s, \tmp2\().8h, v0.h[0]
			
 
				++        rshrn           \out1\().4h, \tmp3\().4s, #14
			
 
				++        rshrn2          \out1\().8h, \tmp4\().4s, #14
			
 
				++        rshrn           \out2\().4h, \tmp5\().4s, #14
			
 
				++        rshrn2          \out2\().8h, \tmp6\().4s, #14
			
 
				++.endif
			
 
				++.endm
			
 
				++
			
 
				++// Same as dmbutterfly0 above, but treating the input in in2 as zero,
			
 
				++// writing the same output into both out1 and out2.
			
 
				++.macro dmbutterfly0_h out1, out2, in1, in2, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6
			
 
				++        smull           \tmp1\().4s,  \in1\().4h,  v0.h[0]
			
 
				++        smull2          \tmp2\().4s,  \in1\().8h,  v0.h[0]
			
 
				++        rshrn           \out1\().4h,  \tmp1\().4s, #14
			
 
				++        rshrn2          \out1\().8h,  \tmp2\().4s, #14
			
 
				++        rshrn           \out2\().4h,  \tmp1\().4s, #14
			
 
				++        rshrn2          \out2\().8h,  \tmp2\().4s, #14
			
 
				++.endm
			
 
				++
			
 
				++// out1,out2 = in1 * coef1 - in2 * coef2
			
 
				++// out3,out4 = in1 * coef2 + in2 * coef1
			
 
				++// out are 4 x .4s registers, in are 2 x .8h registers
			
 
				++.macro dmbutterfly_l out1, out2, out3, out4, in1, in2, coef1, coef2
			
 
				++        smull           \out1\().4s, \in1\().4h, \coef1
			
 
				++        smull2          \out2\().4s, \in1\().8h, \coef1
			
 
				++        smull           \out3\().4s, \in1\().4h, \coef2
			
 
				++        smull2          \out4\().4s, \in1\().8h, \coef2
			
 
				++        smlsl           \out1\().4s, \in2\().4h, \coef2
			
 
				++        smlsl2          \out2\().4s, \in2\().8h, \coef2
			
 
				++        smlal           \out3\().4s, \in2\().4h, \coef1
			
 
				++        smlal2          \out4\().4s, \in2\().8h, \coef1
			
 
				++.endm
			
 
				++
			
 
				++// inout1 = (inout1 * coef1 - inout2 * coef2 + (1 << 13)) >> 14
			
 
				++// inout2 = (inout1 * coef2 + inout2 * coef1 + (1 << 13)) >> 14
			
 
				++// inout are 2 x .8h registers
			
 
				++.macro dmbutterfly inout1, inout2, coef1, coef2, tmp1, tmp2, tmp3, tmp4, neg=0
			
 
				++        dmbutterfly_l   \tmp1, \tmp2, \tmp3, \tmp4, \inout1, \inout2, \coef1, \coef2
			
 
				++.if \neg > 0
			
 
				++        neg             \tmp3\().4s, \tmp3\().4s
			
 
				++        neg             \tmp4\().4s, \tmp4\().4s
			
 
				++.endif
			
 
				++        rshrn           \inout1\().4h, \tmp1\().4s,  #14
			
 
				++        rshrn2          \inout1\().8h, \tmp2\().4s,  #14
			
 
				++        rshrn           \inout2\().4h, \tmp3\().4s,  #14
			
 
				++        rshrn2          \inout2\().8h, \tmp4\().4s,  #14
			
 
				++.endm
			
 
				++
			
 
				++// Same as dmbutterfly above, but treating the input in inout2 as zero
			
 
				++.macro dmbutterfly_h1 inout1, inout2, coef1, coef2, tmp1, tmp2, tmp3, tmp4
			
 
				++        smull           \tmp1\().4s, \inout1\().4h, \coef1
			
 
				++        smull2          \tmp2\().4s, \inout1\().8h, \coef1
			
 
				++        smull           \tmp3\().4s, \inout1\().4h, \coef2
			
 
				++        smull2          \tmp4\().4s, \inout1\().8h, \coef2
			
 
				++        rshrn           \inout1\().4h, \tmp1\().4s, #14
			
 
				++        rshrn2          \inout1\().8h, \tmp2\().4s, #14
			
 
				++        rshrn           \inout2\().4h, \tmp3\().4s, #14
			
 
				++        rshrn2          \inout2\().8h, \tmp4\().4s, #14
			
 
				++.endm
			
 
				++
			
 
				++// Same as dmbutterfly above, but treating the input in inout1 as zero
			
 
				++.macro dmbutterfly_h2 inout1, inout2, coef1, coef2, tmp1, tmp2, tmp3, tmp4
			
 
				++        smull           \tmp1\().4s, \inout2\().4h, \coef2
			
 
				++        smull2          \tmp2\().4s, \inout2\().8h, \coef2
			
 
				++        smull           \tmp3\().4s, \inout2\().4h, \coef1
			
 
				++        smull2          \tmp4\().4s, \inout2\().8h, \coef1
			
 
				++        neg             \tmp1\().4s, \tmp1\().4s
			
 
				++        neg             \tmp2\().4s, \tmp2\().4s
			
 
				++        rshrn           \inout2\().4h, \tmp3\().4s, #14
			
 
				++        rshrn2          \inout2\().8h, \tmp4\().4s, #14
			
 
				++        rshrn           \inout1\().4h, \tmp1\().4s, #14
			
 
				++        rshrn2          \inout1\().8h, \tmp2\().4s, #14
			
 
				++.endm
			
 
				++
			
 
				++.macro dsmull_h out1, out2, in, coef
			
 
				++        smull           \out1\().4s, \in\().4h, \coef
			
 
				++        smull2          \out2\().4s, \in\().8h, \coef
			
 
				++.endm
			
 
				++
			
 
				++.macro drshrn_h out, in1, in2, shift
			
 
				++        rshrn           \out\().4h, \in1\().4s, \shift
			
 
				++        rshrn2          \out\().8h, \in2\().4s, \shift
			
 
				++.endm
			
 
				++
			
 
				++
			
 
				++// out1 = in1 + in2
			
 
				++// out2 = in1 - in2
			
 
				++.macro butterfly_8h out1, out2, in1, in2
			
 
				++        add             \out1\().8h, \in1\().8h, \in2\().8h
			
 
				++        sub             \out2\().8h, \in1\().8h, \in2\().8h
			
 
				++.endm
			
 
				++
			
 
				++// out1 = in1 - in2
			
 
				++// out2 = in1 + in2
			
 
				++.macro butterfly_8h_r out1, out2, in1, in2
			
 
				++        sub             \out1\().8h, \in1\().8h, \in2\().8h
			
 
				++        add             \out2\().8h, \in1\().8h, \in2\().8h
			
 
				++.endm
			
 
				++
			
 
				++// out1 = (in1,in2 + in3,in4 + (1 << 13)) >> 14
			
 
				++// out2 = (in1,in2 - in3,in4 + (1 << 13)) >> 14
			
 
				++// out are 2 x .8h registers, in are 4 x .4s registers
			
 
				++.macro dbutterfly_n out1, out2, in1, in2, in3, in4, tmp1, tmp2, tmp3, tmp4
			
 
				++        add             \tmp1\().4s, \in1\().4s, \in3\().4s
			
 
				++        add             \tmp2\().4s, \in2\().4s, \in4\().4s
			
 
				++        sub             \tmp3\().4s, \in1\().4s, \in3\().4s
			
 
				++        sub             \tmp4\().4s, \in2\().4s, \in4\().4s
			
 
				++        rshrn           \out1\().4h, \tmp1\().4s,  #14
			
 
				++        rshrn2          \out1\().8h, \tmp2\().4s,  #14
			
 
				++        rshrn           \out2\().4h, \tmp3\().4s,  #14
			
 
				++        rshrn2          \out2\().8h, \tmp4\().4s,  #14
			
 
				++.endm
			
 
				++
			
 
				++.macro iwht4 c0, c1, c2, c3
			
 
				++        add             \c0\().4h, \c0\().4h, \c1\().4h
			
 
				++        sub             v17.4h,    \c2\().4h, \c3\().4h
			
 
				++        sub             v16.4h,    \c0\().4h, v17.4h
			
 
				++        sshr            v16.4h,    v16.4h,    #1
			
 
				++        sub             \c2\().4h, v16.4h,    \c1\().4h
			
 
				++        sub             \c1\().4h, v16.4h,    \c3\().4h
			
 
				++        add             \c3\().4h, v17.4h,    \c2\().4h
			
 
				++        sub             \c0\().4h, \c0\().4h, \c1\().4h
			
 
				++.endm
			
 
				++
			
 
				++.macro idct4 c0, c1, c2, c3
			
 
				++        smull           v22.4s,    \c1\().4h, v0.h[3]
			
 
				++        smull           v20.4s,    \c1\().4h, v0.h[2]
			
 
				++        add             v16.4h,    \c0\().4h, \c2\().4h
			
 
				++        sub             v17.4h,    \c0\().4h, \c2\().4h
			
 
				++        smlal           v22.4s,    \c3\().4h, v0.h[2]
			
 
				++        smull           v18.4s,    v16.4h,    v0.h[0]
			
 
				++        smull           v19.4s,    v17.4h,    v0.h[0]
			
 
				++        smlsl           v20.4s,    \c3\().4h, v0.h[3]
			
 
				++        rshrn           v22.4h,    v22.4s,    #14
			
 
				++        rshrn           v18.4h,    v18.4s,    #14
			
 
				++        rshrn           v19.4h,    v19.4s,    #14
			
 
				++        rshrn           v20.4h,    v20.4s,    #14
			
 
				++        add             \c0\().4h, v18.4h,    v22.4h
			
 
				++        sub             \c3\().4h, v18.4h,    v22.4h
			
 
				++        add             \c1\().4h, v19.4h,    v20.4h
			
 
				++        sub             \c2\().4h, v19.4h,    v20.4h
			
 
				++.endm
			
 
				++
			
 
				++.macro iadst4 c0, c1, c2, c3
			
 
				++        smull           v16.4s,    \c0\().4h, v0.h[4]
			
 
				++        smlal           v16.4s,    \c2\().4h, v0.h[5]
			
 
				++        smlal           v16.4s,    \c3\().4h, v0.h[6]
			
 
				++        smull           v17.4s,    \c0\().4h, v0.h[6]
			
 
				++        smlsl           v17.4s,    \c2\().4h, v0.h[4]
			
 
				++        sub             \c0\().4h, \c0\().4h, \c2\().4h
			
 
				++        smlsl           v17.4s,    \c3\().4h, v0.h[5]
			
 
				++        add             \c0\().4h, \c0\().4h, \c3\().4h
			
 
				++        smull           v19.4s,    \c1\().4h, v0.h[7]
			
 
				++        smull           v18.4s,    \c0\().4h, v0.h[7]
			
 
				++        add             v20.4s,    v16.4s,    v19.4s
			
 
				++        add             v21.4s,    v17.4s,    v19.4s
			
 
				++        rshrn           \c0\().4h, v20.4s,    #14
			
 
				++        add             v16.4s,    v16.4s,    v17.4s
			
 
				++        rshrn           \c1\().4h, v21.4s,    #14
			
 
				++        sub             v16.4s,    v16.4s,    v19.4s
			
 
				++        rshrn           \c2\().4h, v18.4s,    #14
			
 
				++        rshrn           \c3\().4h, v16.4s,    #14
			
 
				++.endm
			
 
				++
			
 
				++// The public functions in this file have got the following signature:
			
 
				++// void itxfm_add(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
			
 
				++
			
 
				++.macro itxfm_func4x4 txfm1, txfm2
			
 
				++function ff_vp9_\txfm1\()_\txfm2\()_4x4_add_neon, export=1
			
 
				++.ifc \txfm1,\txfm2
			
 
				++.ifc \txfm1,idct
			
 
				++        movrel          x4,  itxfm4_coeffs
			
 
				++        ld1             {v0.4h}, [x4]
			
 
				++.endif
			
 
				++.ifc \txfm1,iadst
			
 
				++        movrel          x4,  iadst4_coeffs
			
 
				++        ld1             {v0.d}[1], [x4]
			
 
				++.endif
			
 
				++.else
			
 
				++        movrel          x4,  itxfm4_coeffs
			
 
				++        ld1             {v0.8h}, [x4]
			
 
				++.endif
			
 
				++
			
 
				++        movi            v31.8h, #0
			
 
				++.ifc \txfm1\()_\txfm2,idct_idct
			
 
				++        cmp             w3,  #1
			
 
				++        b.ne            1f
			
 
				++        // DC-only for idct/idct
			
 
				++        ld1             {v2.h}[0], [x2]
			
 
				++        smull           v2.4s,  v2.4h, v0.h[0]
			
 
				++        rshrn           v2.4h,  v2.4s, #14
			
 
				++        smull           v2.4s,  v2.4h, v0.h[0]
			
 
				++        rshrn           v2.4h,  v2.4s, #14
			
 
				++        st1             {v31.h}[0], [x2]
			
 
				++        dup             v4.4h,  v2.h[0]
			
 
				++        mov             v5.16b, v4.16b
			
 
				++        mov             v6.16b, v4.16b
			
 
				++        mov             v7.16b, v4.16b
			
 
				++        b               2f
			
 
				++.endif
			
 
				++
			
 
				++1:
			
 
				++        ld1             {v4.4h,v5.4h,v6.4h,v7.4h},  [x2]
			
 
				++        st1             {v31.8h}, [x2], #16
			
 
				++
			
 
				++.ifc \txfm1,iwht
			
 
				++        sshr            v4.4h,  v4.4h,  #2
			
 
				++        sshr            v5.4h,  v5.4h,  #2
			
 
				++        sshr            v6.4h,  v6.4h,  #2
			
 
				++        sshr            v7.4h,  v7.4h,  #2
			
 
				++.endif
			
 
				++
			
 
				++        \txfm1\()4      v4,  v5,  v6,  v7
			
 
				++
			
 
				++        st1             {v31.8h}, [x2], #16
			
 
				++        // Transpose 4x4 with 16 bit elements
			
 
				++        transpose_4x4H  v4,  v5,  v6,  v7,  v16, v17, v18, v19
			
 
				++
			
 
				++        \txfm2\()4      v4,  v5,  v6,  v7
			
 
				++2:
			
 
				++        ld1             {v0.s}[0],   [x0], x1
			
 
				++        ld1             {v1.s}[0],   [x0], x1
			
 
				++.ifnc \txfm1,iwht
			
 
				++        srshr           v4.4h,  v4.4h,  #4
			
 
				++        srshr           v5.4h,  v5.4h,  #4
			
 
				++        srshr           v6.4h,  v6.4h,  #4
			
 
				++        srshr           v7.4h,  v7.4h,  #4
			
 
				++.endif
			
 
				++        uaddw           v4.8h,  v4.8h,  v0.8b
			
 
				++        uaddw           v5.8h,  v5.8h,  v1.8b
			
 
				++        ld1             {v2.s}[0],   [x0], x1
			
 
				++        ld1             {v3.s}[0],   [x0], x1
			
 
				++        sqxtun          v0.8b,  v4.8h
			
 
				++        sqxtun          v1.8b,  v5.8h
			
 
				++        sub             x0,  x0,  x1, lsl #2
			
 
				++
			
 
				++        uaddw           v6.8h,  v6.8h,  v2.8b
			
 
				++        uaddw           v7.8h,  v7.8h,  v3.8b
			
 
				++        st1             {v0.s}[0],  [x0], x1
			
 
				++        sqxtun          v2.8b,  v6.8h
			
 
				++        sqxtun          v3.8b,  v7.8h
			
 
				++
			
 
				++        st1             {v1.s}[0],  [x0], x1
			
 
				++        st1             {v2.s}[0],  [x0], x1
			
 
				++        st1             {v3.s}[0],  [x0], x1
			
 
				++
			
 
				++        ret
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++itxfm_func4x4 idct,  idct
			
 
				++itxfm_func4x4 iadst, idct
			
 
				++itxfm_func4x4 idct,  iadst
			
 
				++itxfm_func4x4 iadst, iadst
			
 
				++itxfm_func4x4 iwht,  iwht
			
 
				++
			
 
				++
			
 
				++.macro idct8
			
 
				++        dmbutterfly0    v16, v20, v16, v20, v2, v3, v4, v5, v6, v7 // v16 = t0a, v20 = t1a
			
 
				++        dmbutterfly     v18, v22, v0.h[2], v0.h[3], v2, v3, v4, v5 // v18 = t2a, v22 = t3a
			
 
				++        dmbutterfly     v17, v23, v0.h[4], v0.h[5], v2, v3, v4, v5 // v17 = t4a, v23 = t7a
			
 
				++        dmbutterfly     v21, v19, v0.h[6], v0.h[7], v2, v3, v4, v5 // v21 = t5a, v19 = t6a
			
 
				++
			
 
				++        butterfly_8h    v24, v25, v16, v22 // v24 = t0, v25 = t3
			
 
				++        butterfly_8h    v28, v29, v17, v21 // v28 = t4, v29 = t5a
			
 
				++        butterfly_8h    v30, v31, v23, v19 // v30 = t7, v31 = t6a
			
 
				++        butterfly_8h    v26, v27, v20, v18 // v26 = t1, v27 = t2
			
 
				++
			
 
				++        dmbutterfly0    v31, v29, v31, v29, v2, v3, v4, v5, v6, v7 // v31 = t6, v29 = t5
			
 
				++
			
 
				++        butterfly_8h    v16, v23, v24, v30 // v16 = out[0], v23 = out[7]
			
 
				++        butterfly_8h    v17, v22, v26, v31 // v17 = out[1], v22 = out[6]
			
 
				++        butterfly_8h    v18, v21, v27, v29 // q13 = out[2], q10 = out[5]
			
 
				++        butterfly_8h    v19, v20, v25, v28 // v17 = out[3], q12 = out[4]
			
 
				++.endm
			
 
				++
			
 
				++.macro iadst8
			
 
				++        dmbutterfly_l   v24, v25, v26, v27, v23, v16, v1.h[1], v1.h[0]   // v24,v25 = t1a, v26,v27 = t0a
			
 
				++        dmbutterfly_l   v28, v29, v30, v31, v21, v18, v1.h[3], v1.h[2]   // v28,v29 = t3a, v30,v31 = t2a
			
 
				++        dmbutterfly_l   v2,  v3,  v4,  v5,  v19, v20, v1.h[5], v1.h[4]   // v2,v3   = t5a, v4,v5   = t4a
			
 
				++        dmbutterfly_l   v16, v18, v21, v23, v17, v22, v1.h[7], v1.h[6]   // v16,v18 = t7a, v21,v23 = t6a
			
 
				++
			
 
				++        dbutterfly_n    v4,  v5,  v26, v27, v4,  v5,  v6,  v7, v26, v27  // v4  = t0, v5  = t4
			
 
				++        dbutterfly_n    v2,  v3,  v24, v25, v2,  v3,  v6,  v7, v26, v27  // v2  = t1, v3  = t5
			
 
				++        dbutterfly_n    v24, v25, v30, v31, v21, v23, v6,  v7, v26, v27  // v24 = t2, v25 = t6
			
 
				++        dbutterfly_n    v30, v31, v28, v29, v16, v18, v6,  v7, v26, v27  // v30 = t3, v31 = t7
			
 
				++
			
 
				++        butterfly_8h    v16, v6,  v4, v24 // v16 = out[0],  v6 = t2
			
 
				++        butterfly_8h    v23, v7,  v2, v30 // v23 = -out[7], v7 = t3
			
 
				++        neg             v23.8h,   v23.8h  // v23 = out[7]
			
 
				++
			
 
				++        dmbutterfly0    v19, v20, v6, v7, v24, v26, v27, v28, v29, v30   // v19 = -out[3], v20 = out[4]
			
 
				++        neg             v19.8h,   v19.8h  // v19 = out[3]
			
 
				++
			
 
				++        dmbutterfly_l   v26, v27, v28, v29, v5,  v3,  v0.h[2], v0.h[3]   // v26,v27 = t5a, v28,v29 = t4a
			
 
				++        dmbutterfly_l   v2,  v3,  v4,  v5,  v31, v25, v0.h[3], v0.h[2]   // v2,v3   = t6a, v4,v5   = t7a
			
 
				++
			
 
				++        dbutterfly_n    v17, v30, v28, v29, v2,  v3,  v6,  v7,  v24, v25 // v17 = -out[1], v30 = t6
			
 
				++        dbutterfly_n    v22, v31, v26, v27, v4,  v5,  v6,  v7,  v24, v25 // v22 = out[6],  v31 = t7
			
 
				++        neg             v17.8h,   v17.8h  // v17 = out[1]
			
 
				++
			
 
				++        dmbutterfly0    v18, v21, v30, v31, v2,  v3,  v4,  v5,  v6,  v7  // v18 = out[2], v21 = -out[5]
			
 
				++        neg             v21.8h,   v21.8h  // v21 = out[5]
			
 
				++.endm
			
 
				++
			
 
				++
			
 
				++.macro itxfm_func8x8 txfm1, txfm2
			
 
				++function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_neon, export=1
			
 
				++        // The iadst also uses a few coefficients from
			
 
				++        // idct, so those always need to be loaded.
			
 
				++.ifc \txfm1\()_\txfm2,idct_idct
			
 
				++        movrel          x4,  idct_coeffs
			
 
				++.else
			
 
				++        movrel          x4,  iadst8_coeffs
			
 
				++        ld1             {v1.8h}, [x4], #16
			
 
				++.endif
			
 
				++        ld1             {v0.8h}, [x4]
			
 
				++
			
 
				++        movi            v2.8h, #0
			
 
				++        movi            v3.8h, #0
			
 
				++        movi            v4.8h, #0
			
 
				++        movi            v5.8h, #0
			
 
				++
			
 
				++.ifc \txfm1\()_\txfm2,idct_idct
			
 
				++        cmp             w3,  #1
			
 
				++        b.ne            1f
			
 
				++        // DC-only for idct/idct
			
 
				++        ld1             {v2.h}[0],  [x2]
			
 
				++        smull           v2.4s,  v2.4h, v0.h[0]
			
 
				++        rshrn           v2.4h,  v2.4s, #14
			
 
				++        smull           v2.4s,  v2.4h, v0.h[0]
			
 
				++        rshrn           v2.4h,  v2.4s, #14
			
 
				++        st1             {v3.h}[0],  [x2]
			
 
				++        dup             v16.8h,  v2.h[0]
			
 
				++        mov             v17.16b, v16.16b
			
 
				++        mov             v18.16b, v16.16b
			
 
				++        mov             v19.16b, v16.16b
			
 
				++        mov             v20.16b, v16.16b
			
 
				++        mov             v21.16b, v16.16b
			
 
				++        mov             v22.16b, v16.16b
			
 
				++        mov             v23.16b, v16.16b
			
 
				++        b               2f
			
 
				++.endif
			
 
				++1:
			
 
				++        ld1             {v16.8h,v17.8h,v18.8h,v19.8h},  [x2], #64
			
 
				++        ld1             {v20.8h,v21.8h,v22.8h,v23.8h},  [x2], #64
			
 
				++        sub             x2,  x2,  #128
			
 
				++        st1             {v2.8h,v3.8h,v4.8h,v5.8h},      [x2], #64
			
 
				++        st1             {v2.8h,v3.8h,v4.8h,v5.8h},      [x2], #64
			
 
				++
			
 
				++        \txfm1\()8
			
 
				++
			
 
				++        // Transpose 8x8 with 16 bit elements
			
 
				++        transpose_8x8H  v16, v17, v18, v19, v20, v21, v22, v23, v24, v25
			
 
				++
			
 
				++        \txfm2\()8
			
 
				++2:
			
 
				++        mov             x3,  x0
			
 
				++        // Add into the destination
			
 
				++        ld1             {v0.8b},  [x0], x1
			
 
				++        srshr           v16.8h, v16.8h, #5
			
 
				++        ld1             {v1.8b},  [x0], x1
			
 
				++        srshr           v17.8h, v17.8h, #5
			
 
				++        ld1             {v2.8b},  [x0], x1
			
 
				++        srshr           v18.8h, v18.8h, #5
			
 
				++        uaddw           v16.8h, v16.8h, v0.8b
			
 
				++        ld1             {v3.8b},  [x0], x1
			
 
				++        srshr           v19.8h, v19.8h, #5
			
 
				++        uaddw           v17.8h, v17.8h, v1.8b
			
 
				++        ld1             {v4.8b},  [x0], x1
			
 
				++        srshr           v20.8h, v20.8h, #5
			
 
				++        uaddw           v18.8h, v18.8h, v2.8b
			
 
				++        sqxtun          v0.8b,  v16.8h
			
 
				++        ld1             {v5.8b},  [x0], x1
			
 
				++        srshr           v21.8h, v21.8h, #5
			
 
				++        uaddw           v19.8h, v19.8h, v3.8b
			
 
				++        sqxtun          v1.8b,  v17.8h
			
 
				++        ld1             {v6.8b},  [x0], x1
			
 
				++        srshr           v22.8h, v22.8h, #5
			
 
				++        uaddw           v20.8h, v20.8h, v4.8b
			
 
				++        sqxtun          v2.8b,  v18.8h
			
 
				++        ld1             {v7.8b},  [x0], x1
			
 
				++        srshr           v23.8h, v23.8h, #5
			
 
				++        uaddw           v21.8h, v21.8h, v5.8b
			
 
				++        sqxtun          v3.8b,  v19.8h
			
 
				++
			
 
				++        st1             {v0.8b},  [x3], x1
			
 
				++        uaddw           v22.8h, v22.8h, v6.8b
			
 
				++        st1             {v1.8b},  [x3], x1
			
 
				++        sqxtun          v4.8b,  v20.8h
			
 
				++        st1             {v2.8b},  [x3], x1
			
 
				++        uaddw           v23.8h, v23.8h, v7.8b
			
 
				++        st1             {v3.8b},  [x3], x1
			
 
				++        sqxtun          v5.8b,  v21.8h
			
 
				++        st1             {v4.8b},  [x3], x1
			
 
				++        sqxtun          v6.8b,  v22.8h
			
 
				++        st1             {v5.8b},  [x3], x1
			
 
				++        sqxtun          v7.8b,  v23.8h
			
 
				++
			
 
				++        st1             {v6.8b},  [x3], x1
			
 
				++        st1             {v7.8b},  [x3], x1
			
 
				++
			
 
				++        ret
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++itxfm_func8x8 idct,  idct
			
 
				++itxfm_func8x8 iadst, idct
			
 
				++itxfm_func8x8 idct,  iadst
			
 
				++itxfm_func8x8 iadst, iadst
			
 
				++
			
 
				++
			
 
				++function idct16x16_dc_add_neon
			
 
				++        movrel          x4,  idct_coeffs
			
 
				++        ld1             {v0.4h}, [x4]
			
 
				++
			
 
				++        movi            v1.4h,  #0
			
 
				++
			
 
				++        ld1             {v2.h}[0], [x2]
			
 
				++        smull           v2.4s,  v2.4h,  v0.h[0]
			
 
				++        rshrn           v2.4h,  v2.4s,  #14
			
 
				++        smull           v2.4s,  v2.4h,  v0.h[0]
			
 
				++        rshrn           v2.4h,  v2.4s,  #14
			
 
				++        dup             v2.8h,  v2.h[0]
			
 
				++        st1             {v1.h}[0], [x2]
			
 
				++
			
 
				++        srshr           v2.8h,  v2.8h,  #6
			
 
				++
			
 
				++        mov             x3,  x0
			
 
				++        mov             x4,  #16
			
 
				++1:
			
 
				++        // Loop to add the constant from v2 into all 16x16 outputs
			
 
				++        subs            x4,  x4,  #2
			
 
				++        ld1             {v3.16b},  [x0], x1
			
 
				++        ld1             {v4.16b},  [x0], x1
			
 
				++        uaddw           v16.8h, v2.8h,  v3.8b
			
 
				++        uaddw2          v17.8h, v2.8h,  v3.16b
			
 
				++        uaddw           v18.8h, v2.8h,  v4.8b
			
 
				++        uaddw2          v19.8h, v2.8h,  v4.16b
			
 
				++        sqxtun          v3.8b,  v16.8h
			
 
				++        sqxtun2         v3.16b, v17.8h
			
 
				++        sqxtun          v4.8b,  v18.8h
			
 
				++        sqxtun2         v4.16b, v19.8h
			
 
				++        st1             {v3.16b},  [x3], x1
			
 
				++        st1             {v4.16b},  [x3], x1
			
 
				++        b.ne            1b
			
 
				++
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++.macro idct16_end
			
 
				++        butterfly_8h    v18, v7,  v4,  v7                // v18 = t0a,  v7  = t7a
			
 
				++        butterfly_8h    v19, v22, v5,  v22               // v19 = t1a,  v22 = t6
			
 
				++        butterfly_8h    v4,  v26, v20, v26               // v4  = t2a,  v26 = t5
			
 
				++        butterfly_8h    v5,  v6,  v28, v6                // v5  = t3a,  v6  = t4
			
 
				++        butterfly_8h    v20, v28, v16, v24               // v20 = t8a,  v28 = t11a
			
 
				++        butterfly_8h    v24, v21, v23, v21               // v24 = t9,   v21 = t10
			
 
				++        butterfly_8h    v23, v27, v25, v27               // v23 = t14,  v27 = t13
			
 
				++        butterfly_8h    v25, v29, v29, v17               // v25 = t15a, v29 = t12a
			
 
				++
			
 
				++        dmbutterfly0    v2,  v3,  v27, v21, v2,  v3,  v16, v17, v30, v31 // v2  = t13a, v3  = t10a
			
 
				++        dmbutterfly0    v28, v27, v29, v28, v21, v29, v16, v17, v30, v31 // v28 = t12,  v27 = t11
			
 
				++
			
 
				++        butterfly_8h    v16, v31, v18, v25               // v16 = out[0], v31 = out[15]
			
 
				++        butterfly_8h    v17, v30, v19, v23               // v17 = out[1], v30 = out[14]
			
 
				++        butterfly_8h_r  v25, v22, v22, v24               // v25 = out[9], v22 = out[6]
			
 
				++        butterfly_8h    v23, v24, v7,  v20               // v23 = out[7], v24 = out[8]
			
 
				++        butterfly_8h    v18, v29, v4,  v2                // v18 = out[2], v29 = out[13]
			
 
				++        butterfly_8h    v19, v28, v5,  v28               // v19 = out[3], v28 = out[12]
			
 
				++        butterfly_8h    v20, v27, v6,  v27               // v20 = out[4], v27 = out[11]
			
 
				++        butterfly_8h    v21, v26, v26, v3                // v21 = out[5], v26 = out[10]
			
 
				++        ret
			
 
				++.endm
			
 
				++
			
 
				++function idct16
			
 
				++        dmbutterfly0    v16, v24, v16, v24, v2, v3, v4, v5, v6, v7 // v16 = t0a,  v24 = t1a
			
 
				++        dmbutterfly     v20, v28, v0.h[2], v0.h[3], v2, v3, v4, v5 // v20 = t2a,  v28 = t3a
			
 
				++        dmbutterfly     v18, v30, v0.h[4], v0.h[5], v2, v3, v4, v5 // v18 = t4a,  v30 = t7a
			
 
				++        dmbutterfly     v26, v22, v0.h[6], v0.h[7], v2, v3, v4, v5 // v26 = t5a,  v22 = t6a
			
 
				++        dmbutterfly     v17, v31, v1.h[0], v1.h[1], v2, v3, v4, v5 // v17 = t8a,  v31 = t15a
			
 
				++        dmbutterfly     v25, v23, v1.h[2], v1.h[3], v2, v3, v4, v5 // v25 = t9a,  v23 = t14a
			
 
				++        dmbutterfly     v21, v27, v1.h[4], v1.h[5], v2, v3, v4, v5 // v21 = t10a, v27 = t13a
			
 
				++        dmbutterfly     v29, v19, v1.h[6], v1.h[7], v2, v3, v4, v5 // v29 = t11a, v19 = t12a
			
 
				++
			
 
				++        butterfly_8h    v4,  v28, v16, v28               // v4  = t0,   v28 = t3
			
 
				++        butterfly_8h    v5,  v20, v24, v20               // v5  = t1,   v20 = t2
			
 
				++        butterfly_8h    v6,  v26, v18, v26               // v6  = t4,   v26 = t5
			
 
				++        butterfly_8h    v7,  v22, v30, v22               // v7  = t7,   v22 = t6
			
 
				++        butterfly_8h    v16, v25, v17, v25               // v16 = t8,   v25 = t9
			
 
				++        butterfly_8h    v24, v21, v29, v21               // v24 = t11,  v21 = t10
			
 
				++        butterfly_8h    v17, v27, v19, v27               // v17 = t12,  v27 = t13
			
 
				++        butterfly_8h    v29, v23, v31, v23               // v29 = t15,  v23 = t14
			
 
				++
			
 
				++        dmbutterfly0    v22, v26, v22, v26, v2, v3, v18, v19, v30, v31        // v22 = t6a,  v26 = t5a
			
 
				++        dmbutterfly     v23, v25, v0.h[2], v0.h[3], v18, v19, v30, v31        // v23 = t9a,  v25 = t14a
			
 
				++        dmbutterfly     v27, v21, v0.h[2], v0.h[3], v18, v19, v30, v31, neg=1 // v27 = t13a, v21 = t10a
			
 
				++        idct16_end
			
 
				++endfunc
			
 
				++
			
 
				++function idct16_half
			
 
				++        dmbutterfly0_h  v16, v24, v16, v24, v2, v3, v4, v5, v6, v7 // v16 = t0a,  v24 = t1a
			
 
				++        dmbutterfly_h1  v20, v28, v0.h[2], v0.h[3], v2, v3, v4, v5 // v20 = t2a,  v28 = t3a
			
 
				++        dmbutterfly_h1  v18, v30, v0.h[4], v0.h[5], v2, v3, v4, v5 // v18 = t4a,  v30 = t7a
			
 
				++        dmbutterfly_h2  v26, v22, v0.h[6], v0.h[7], v2, v3, v4, v5 // v26 = t5a,  v22 = t6a
			
 
				++        dmbutterfly_h1  v17, v31, v1.h[0], v1.h[1], v2, v3, v4, v5 // v17 = t8a,  v31 = t15a
			
 
				++        dmbutterfly_h2  v25, v23, v1.h[2], v1.h[3], v2, v3, v4, v5 // v25 = t9a,  v23 = t14a
			
 
				++        dmbutterfly_h1  v21, v27, v1.h[4], v1.h[5], v2, v3, v4, v5 // v21 = t10a, v27 = t13a
			
 
				++        dmbutterfly_h2  v29, v19, v1.h[6], v1.h[7], v2, v3, v4, v5 // v29 = t11a, v19 = t12a
			
 
				++
			
 
				++        butterfly_8h    v4,  v28, v16, v28               // v4  = t0,   v28 = t3
			
 
				++        butterfly_8h    v5,  v20, v24, v20               // v5  = t1,   v20 = t2
			
 
				++        butterfly_8h    v6,  v26, v18, v26               // v6  = t4,   v26 = t5
			
 
				++        butterfly_8h    v7,  v22, v30, v22               // v7  = t7,   v22 = t6
			
 
				++        butterfly_8h    v16, v25, v17, v25               // v16 = t8,   v25 = t9
			
 
				++        butterfly_8h    v24, v21, v29, v21               // v24 = t11,  v21 = t10
			
 
				++        butterfly_8h    v17, v27, v19, v27               // v17 = t12,  v27 = t13
			
 
				++        butterfly_8h    v29, v23, v31, v23               // v29 = t15,  v23 = t14
			
 
				++
			
 
				++        dmbutterfly0    v22, v26, v22, v26, v2, v3, v18, v19, v30, v31        // v22 = t6a,  v26 = t5a
			
 
				++        dmbutterfly     v23, v25, v0.h[2], v0.h[3], v18, v19, v30, v31        // v23 = t9a,  v25 = t14a
			
 
				++        dmbutterfly     v27, v21, v0.h[2], v0.h[3], v18, v19, v30, v31, neg=1 // v27 = t13a, v21 = t10a
			
 
				++        idct16_end
			
 
				++endfunc
			
 
				++
			
 
				++function idct16_quarter
			
 
				++        dsmull_h        v24, v25, v19, v1.h[7]
			
 
				++        dsmull_h        v4,  v5,  v17, v1.h[0]
			
 
				++        dsmull_h        v7,  v6,  v18, v0.h[5]
			
 
				++        dsmull_h        v30, v31, v18, v0.h[4]
			
 
				++        neg             v24.4s,  v24.4s
			
 
				++        neg             v25.4s,  v25.4s
			
 
				++        dsmull_h        v29, v28, v17, v1.h[1]
			
 
				++        dsmull_h        v26, v27, v19, v1.h[6]
			
 
				++        dsmull_h        v22, v23, v16, v0.h[0]
			
 
				++        drshrn_h        v24, v24, v25, #14
			
 
				++        drshrn_h        v16, v4,  v5,  #14
			
 
				++        drshrn_h        v7,  v7,  v6,  #14
			
 
				++        drshrn_h        v6,  v30, v31, #14
			
 
				++        drshrn_h        v29, v29, v28, #14
			
 
				++        drshrn_h        v17, v26, v27, #14
			
 
				++        drshrn_h        v28, v22, v23, #14
			
 
				++
			
 
				++        dmbutterfly_l   v20, v21, v22, v23, v17, v24, v0.h[2], v0.h[3]
			
 
				++        dmbutterfly_l   v18, v19, v30, v31, v29, v16, v0.h[2], v0.h[3]
			
 
				++        neg             v22.4s,  v22.4s
			
 
				++        neg             v23.4s,  v23.4s
			
 
				++        drshrn_h        v27, v20, v21, #14
			
 
				++        drshrn_h        v21, v22, v23, #14
			
 
				++        drshrn_h        v23, v18, v19, #14
			
 
				++        drshrn_h        v25, v30, v31, #14
			
 
				++        mov             v4.16b,  v28.16b
			
 
				++        mov             v5.16b,  v28.16b
			
 
				++        dmbutterfly0    v22, v26, v7,  v6,  v18, v19, v30, v31
			
 
				++        mov             v20.16b, v28.16b
			
 
				++        idct16_end
			
 
				++endfunc
			
 
				++
			
 
				++function iadst16
			
 
				++        ld1             {v0.8h,v1.8h}, [x11]
			
 
				++
			
 
				++        dmbutterfly_l   v6,  v7,  v4,  v5,  v31, v16, v0.h[1], v0.h[0]   // v6,v7   = t1,   v4,v5   = t0
			
 
				++        dmbutterfly_l   v10, v11, v8,  v9,  v23, v24, v0.h[5], v0.h[4]   // v10,v11 = t9,   v8,v9   = t8
			
 
				++        dbutterfly_n    v31, v24, v6,  v7,  v10, v11, v12, v13, v10, v11 // v31     = t1a,  v24     = t9a
			
 
				++        dmbutterfly_l   v14, v15, v12, v13, v29, v18, v0.h[3], v0.h[2]   // v14,v15 = t3,   v12,v13 = t2
			
 
				++        dbutterfly_n    v16, v23, v4,  v5,  v8,  v9,  v6,  v7,  v8,  v9  // v16     = t0a,  v23     = t8a
			
 
				++
			
 
				++        dmbutterfly_l   v6,  v7,  v4,  v5,  v21, v26, v0.h[7], v0.h[6]   // v6,v7   = t11,  v4,v5   = t10
			
 
				++        dbutterfly_n    v29, v26, v14, v15, v6,  v7,  v8,  v9,  v6,  v7  // v29     = t3a,  v26     = t11a
			
 
				++        dmbutterfly_l   v10, v11, v8,  v9,  v27, v20, v1.h[1], v1.h[0]   // v10,v11 = t5,   v8,v9   = t4
			
 
				++        dbutterfly_n    v18, v21, v12, v13, v4,  v5,  v6,  v7,  v4,  v5  // v18     = t2a,  v21     = t10a
			
 
				++
			
 
				++        dmbutterfly_l   v14, v15, v12, v13, v19, v28, v1.h[5], v1.h[4]   // v14,v15 = t13,  v12,v13 = t12
			
 
				++        dbutterfly_n    v20, v28, v10, v11, v14, v15, v4,  v5,  v14, v15 // v20     = t5a,  v28     = t13a
			
 
				++        dmbutterfly_l   v6,  v7,  v4,  v5,  v25, v22, v1.h[3], v1.h[2]   // v6,v7   = t7,   v4,v5   = t6
			
 
				++        dbutterfly_n    v27, v19, v8,  v9,  v12, v13, v10, v11, v12, v13 // v27     = t4a,  v19     = t12a
			
 
				++
			
 
				++        dmbutterfly_l   v10, v11, v8,  v9,  v17, v30, v1.h[7], v1.h[6]   // v10,v11 = t15,  v8,v9   = t14
			
 
				++        ld1             {v0.8h}, [x10]
			
 
				++        dbutterfly_n    v22, v30, v6,  v7,  v10, v11, v12, v13, v10, v11 // v22     = t7a,  v30     = t15a
			
 
				++        dmbutterfly_l   v14, v15, v12, v13, v23, v24, v0.h[4], v0.h[5]   // v14,v15 = t9,   v12,v13 = t8
			
 
				++        dbutterfly_n    v25, v17, v4,  v5,  v8,  v9,  v6,  v7,  v8,  v9  // v25     = t6a,  v17     = t14a
			
 
				++
			
 
				++        dmbutterfly_l   v4,  v5,  v6,  v7,  v28, v19, v0.h[5], v0.h[4]   // v4,v5   = t12,  v6,v7   = t13
			
 
				++        dbutterfly_n    v23, v19, v12, v13, v4,  v5,  v8,  v9,  v4,  v5  // v23     = t8a,  v19     = t12a
			
 
				++        dmbutterfly_l   v10, v11, v8,  v9,  v21, v26, v0.h[6], v0.h[7]   // v10,v11 = t11,  v8,v9   = t10
			
 
				++        butterfly_8h_r  v4,  v27, v16, v27               // v4  = t4,   v27 = t0
			
 
				++        dbutterfly_n    v24, v28, v14, v15, v6,  v7,  v12, v13, v6,  v7  // v24     = t9a,  v28     = t13a
			
 
				++
			
 
				++        dmbutterfly_l   v12, v13, v14, v15, v30, v17, v0.h[7], v0.h[6]   // v12,v13 = t14,  v14,v15 = t15
			
 
				++        butterfly_8h_r  v5,  v20, v31, v20               // v5  = t5, v20 = t1
			
 
				++        dbutterfly_n    v21, v17, v8,  v9,  v12, v13, v6,  v7,  v12, v13 // v21     = t10a, v17     = t14a
			
 
				++        dbutterfly_n    v26, v30, v10, v11, v14, v15, v8,  v9,  v14, v15 // v26     = t11a, v30     = t15a
			
 
				++
			
 
				++        butterfly_8h_r  v6,  v25, v18, v25               // v6  = t6, v25 = t2
			
 
				++        butterfly_8h_r  v7,  v22, v29, v22               // v7  = t7, v22 = t3
			
 
				++
			
 
				++        dmbutterfly_l   v10, v11, v8,  v9,  v19, v28, v0.h[2], v0.h[3]   // v10,v11 = t13,  v8,v9   = t12
			
 
				++        dmbutterfly_l   v12, v13, v14, v15, v30, v17, v0.h[3], v0.h[2]   // v12,v13 = t14,  v14,v15 = t15
			
 
				++
			
 
				++        dbutterfly_n    v18, v30, v8,  v9,  v12, v13, v16, v17, v12, v13 // v18   = out[2], v30     = t14a
			
 
				++        dbutterfly_n    v29, v17, v10, v11, v14, v15, v12, v13, v14, v15 // v29 = -out[13], v17     = t15a
			
 
				++        neg             v29.8h, v29.8h                   // v29 = out[13]
			
 
				++
			
 
				++        dmbutterfly_l   v10, v11, v8,  v9,  v4,  v5,  v0.h[2], v0.h[3]   // v10,v11 = t5a,  v8,v9   = t4a
			
 
				++        dmbutterfly_l   v12, v13, v14, v15, v7,  v6,  v0.h[3], v0.h[2]   // v12,v13 = t6a,  v14,v15 = t7a
			
 
				++
			
 
				++        butterfly_8h    v2,  v6,  v27, v25               // v2 = out[0], v6 = t2a
			
 
				++        butterfly_8h    v3,  v7,  v23, v21               // v3 =-out[1], v7 = t10
			
 
				++
			
 
				++        dbutterfly_n    v19, v31, v8,  v9,  v12, v13, v4,  v5,  v8,  v9  // v19 = -out[3],  v31 = t6
			
 
				++        neg             v19.8h, v19.8h                   // v19 = out[3]
			
 
				++        dbutterfly_n    v28, v16, v10, v11, v14, v15, v4,  v5,  v10, v11 // v28 = out[12],  v16 = t7
			
 
				++
			
 
				++        butterfly_8h    v5,  v8,  v20, v22               // v5 =-out[15],v8 = t3a
			
 
				++        butterfly_8h    v4,  v9,  v24, v26               // v4 = out[14],v9 = t11
			
 
				++
			
 
				++        dmbutterfly0    v23, v24, v6,  v8,  v10, v11, v12, v13, v14, v15, 1 // v23 = out[7], v24 = out[8]
			
 
				++        dmbutterfly0    v21, v26, v30, v17, v10, v11, v12, v13, v14, v15, 1 // v21 = out[5], v26 = out[10]
			
 
				++        dmbutterfly0    v20, v27, v16, v31, v10, v11, v12, v13, v14, v15    // v20 = out[4], v27 = out[11]
			
 
				++        dmbutterfly0    v22, v25, v9,  v7,  v10, v11, v12, v13, v14, v15    // v22 = out[6], v25 = out[9]
			
 
				++
			
 
				++        neg             v31.8h,  v5.8h                    // v31 = out[15]
			
 
				++        neg             v17.8h,  v3.8h                    // v17 = out[1]
			
 
				++
			
 
				++        mov             v16.16b, v2.16b
			
 
				++        mov             v30.16b, v4.16b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++// Helper macros; we can't use these expressions directly within
			
 
				++// e.g. .irp due to the extra concatenation \(). Therefore wrap
			
 
				++// them in macros to allow using .irp below.
			
 
				++.macro load i, src, inc
			
 
				++        ld1             {v\i\().8h},  [\src], \inc
			
 
				++.endm
			
 
				++.macro store i, dst, inc
			
 
				++        st1             {v\i\().8h},  [\dst], \inc
			
 
				++.endm
			
 
				++.macro movi_v i, size, imm
			
 
				++        movi            v\i\()\size,  \imm
			
 
				++.endm
			
 
				++.macro load_clear i, src, inc
			
 
				++        ld1             {v\i\().8h}, [\src]
			
 
				++        st1             {v2.8h},  [\src], \inc
			
 
				++.endm
			
 
				++
			
 
				++.macro load_add_store coef0, coef1, coef2, coef3, coef4, coef5, coef6, coef7, tmp1, tmp2
			
 
				++        srshr           \coef0, \coef0, #6
			
 
				++        ld1             {v2.8b},  [x0], x1
			
 
				++        srshr           \coef1, \coef1, #6
			
 
				++        ld1             {v3.8b},  [x3], x1
			
 
				++        srshr           \coef2, \coef2, #6
			
 
				++        ld1             {v4.8b},  [x0], x1
			
 
				++        srshr           \coef3, \coef3, #6
			
 
				++        uaddw           \coef0, \coef0, v2.8b
			
 
				++        ld1             {v5.8b},  [x3], x1
			
 
				++        uaddw           \coef1, \coef1, v3.8b
			
 
				++        srshr           \coef4, \coef4, #6
			
 
				++        ld1             {v6.8b},  [x0], x1
			
 
				++        srshr           \coef5, \coef5, #6
			
 
				++        ld1             {v7.8b},  [x3], x1
			
 
				++        sqxtun          v2.8b,  \coef0
			
 
				++        srshr           \coef6, \coef6, #6
			
 
				++        sqxtun          v3.8b,  \coef1
			
 
				++        srshr           \coef7, \coef7, #6
			
 
				++        uaddw           \coef2, \coef2, v4.8b
			
 
				++        ld1             {\tmp1},  [x0], x1
			
 
				++        uaddw           \coef3, \coef3, v5.8b
			
 
				++        ld1             {\tmp2},  [x3], x1
			
 
				++        sqxtun          v4.8b,  \coef2
			
 
				++        sub             x0,  x0,  x1, lsl #2
			
 
				++        sub             x3,  x3,  x1, lsl #2
			
 
				++        sqxtun          v5.8b,  \coef3
			
 
				++        uaddw           \coef4, \coef4, v6.8b
			
 
				++        st1             {v2.8b},  [x0], x1
			
 
				++        uaddw           \coef5, \coef5, v7.8b
			
 
				++        st1             {v3.8b},  [x3], x1
			
 
				++        sqxtun          v6.8b,  \coef4
			
 
				++        st1             {v4.8b},  [x0], x1
			
 
				++        sqxtun          v7.8b,  \coef5
			
 
				++        st1             {v5.8b},  [x3], x1
			
 
				++        uaddw           \coef6, \coef6, \tmp1
			
 
				++        st1             {v6.8b},  [x0], x1
			
 
				++        uaddw           \coef7, \coef7, \tmp2
			
 
				++        st1             {v7.8b},  [x3], x1
			
 
				++        sqxtun          \tmp1,  \coef6
			
 
				++        sqxtun          \tmp2,  \coef7
			
 
				++        st1             {\tmp1},  [x0], x1
			
 
				++        st1             {\tmp2},  [x3], x1
			
 
				++.endm
			
 
				++
			
 
				++// Read a vertical 8x16 slice out of a 16x16 matrix, do a transform on it,
			
 
				++// transpose into a horizontal 16x8 slice and store.
			
 
				++// x0 = dst (temp buffer)
			
 
				++// x1 = slice offset
			
 
				++// x2 = src
			
 
				++// x9 = input stride
			
 
				++.macro itxfm16_1d_funcs txfm
			
 
				++function \txfm\()16_1d_8x16_pass1_neon
			
 
				++        mov             x14, x30
			
 
				++
			
 
				++        movi            v2.8h, #0
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
			
 
				++        load_clear      \i,  x2,  x9
			
 
				++.endr
			
 
				++
			
 
				++        bl              \txfm\()16
			
 
				++
			
 
				++        // Do two 8x8 transposes. Originally, v16-v31 contain the
			
 
				++        // 16 rows. Afterwards, v16-v23 and v24-v31 contain the two
			
 
				++        // transposed 8x8 blocks.
			
 
				++        transpose_8x8H  v16, v17, v18, v19, v20, v21, v22, v23, v2, v3
			
 
				++        transpose_8x8H  v24, v25, v26, v27, v28, v29, v30, v31, v2, v3
			
 
				++
			
 
				++        // Store the transposed 8x8 blocks horizontally.
			
 
				++        cmp             x1,  #8
			
 
				++        b.eq            1f
			
 
				++.irp i, 16, 24, 17, 25, 18, 26, 19, 27, 20, 28, 21, 29, 22, 30, 23, 31
			
 
				++        store           \i,  x0,  #16
			
 
				++.endr
			
 
				++        br              x14
			
 
				++1:
			
 
				++        // Special case: For the last input column (x1 == 8),
			
 
				++        // which would be stored as the last row in the temp buffer,
			
 
				++        // don't store the first 8x8 block, but keep it in registers
			
 
				++        // for the first slice of the second pass (where it is the
			
 
				++        // last 8x8 block).
			
 
				++.irp i, 24, 25, 26, 27, 28, 29, 30, 31
			
 
				++        add             x0,  x0,  #16
			
 
				++        store           \i,  x0,  #16
			
 
				++.endr
			
 
				++        mov             v24.16b, v16.16b
			
 
				++        mov             v25.16b, v17.16b
			
 
				++        mov             v26.16b, v18.16b
			
 
				++        mov             v27.16b, v19.16b
			
 
				++        mov             v28.16b, v20.16b
			
 
				++        mov             v29.16b, v21.16b
			
 
				++        mov             v30.16b, v22.16b
			
 
				++        mov             v31.16b, v23.16b
			
 
				++        br              x14
			
 
				++endfunc
			
 
				++
			
 
				++// Read a vertical 8x16 slice out of a 16x16 matrix, do a transform on it,
			
 
				++// load the destination pixels (from a similar 8x16 slice), add and store back.
			
 
				++// x0 = dst
			
 
				++// x1 = dst stride
			
 
				++// x2 = src (temp buffer)
			
 
				++// x3 = slice offset
			
 
				++// x9 = temp buffer stride
			
 
				++function \txfm\()16_1d_8x16_pass2_neon
			
 
				++        mov             x14, x30
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23
			
 
				++        load            \i,  x2,  x9
			
 
				++.endr
			
 
				++        cbz             x3,  1f
			
 
				++.irp i, 24, 25, 26, 27, 28, 29, 30, 31
			
 
				++        load            \i,  x2,  x9
			
 
				++.endr
			
 
				++1:
			
 
				++
			
 
				++        add             x3,  x0,  x1
			
 
				++        lsl             x1,  x1,  #1
			
 
				++        bl              \txfm\()16
			
 
				++
			
 
				++        load_add_store  v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v16.8b, v17.8b
			
 
				++        load_add_store  v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h, v16.8b, v17.8b
			
 
				++
			
 
				++        br              x14
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++itxfm16_1d_funcs idct
			
 
				++itxfm16_1d_funcs iadst
			
 
				++
			
 
				++.macro itxfm_func16x16 txfm1, txfm2
			
 
				++function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_neon, export=1
			
 
				++.ifc \txfm1\()_\txfm2,idct_idct
			
 
				++        cmp             w3,  #1
			
 
				++        b.eq            idct16x16_dc_add_neon
			
 
				++.endif
			
 
				++        mov             x15, x30
			
 
				++        // iadst16 requires clobbering v8-v15, but idct16 doesn't need to.
			
 
				++.ifnc \txfm1\()_\txfm2,idct_idct
			
 
				++        stp             d14, d15, [sp, #-0x10]!
			
 
				++        stp             d12, d13, [sp, #-0x10]!
			
 
				++        stp             d10, d11, [sp, #-0x10]!
			
 
				++        stp             d8,  d9,  [sp, #-0x10]!
			
 
				++.endif
			
 
				++
			
 
				++        sub             sp,  sp,  #512
			
 
				++
			
 
				++        mov             x4,  x0
			
 
				++        mov             x5,  x1
			
 
				++        mov             x6,  x2
			
 
				++
			
 
				++        movrel          x10, idct_coeffs
			
 
				++.ifnc \txfm1\()_\txfm2,idct_idct
			
 
				++        movrel          x11, iadst16_coeffs
			
 
				++.endif
			
 
				++.ifc \txfm1,idct
			
 
				++        ld1             {v0.8h,v1.8h}, [x10]
			
 
				++.endif
			
 
				++        mov             x9,  #32
			
 
				++
			
 
				++.ifc \txfm1\()_\txfm2,idct_idct
			
 
				++        cmp             w3,  #10
			
 
				++        b.le            idct16x16_quarter_add_neon
			
 
				++        cmp             w3,  #38
			
 
				++        b.le            idct16x16_half_add_neon
			
 
				++.endif
			
 
				++
			
 
				++.irp i, 0, 8
			
 
				++        add             x0,  sp,  #(\i*32)
			
 
				++.ifc \txfm1\()_\txfm2,idct_idct
			
 
				++.if \i == 8
			
 
				++        cmp             w3,  #38
			
 
				++        b.le            1f
			
 
				++.endif
			
 
				++.endif
			
 
				++        mov             x1,  #\i
			
 
				++        add             x2,  x6,  #(\i*2)
			
 
				++        bl              \txfm1\()16_1d_8x16_pass1_neon
			
 
				++.endr
			
 
				++.ifc \txfm1\()_\txfm2,iadst_idct
			
 
				++        ld1             {v0.8h,v1.8h}, [x10]
			
 
				++.endif
			
 
				++
			
 
				++.ifc \txfm1\()_\txfm2,idct_idct
			
 
				++        b               3f
			
 
				++1:
			
 
				++        // Set v24-v31 to zero, for the in-register passthrough of
			
 
				++        // coefficients to pass 2. Since we only do two slices, this can
			
 
				++        // only ever happen for the second slice. So we only need to store
			
 
				++        // zeros to the temp buffer for the second half of the buffer.
			
 
				++        // Move x0 to the second half, and use x9 == 32 as increment.
			
 
				++        add             x0,  x0,  #16
			
 
				++.irp i, 24, 25, 26, 27, 28, 29, 30, 31
			
 
				++        movi_v          \i,  .16b, #0
			
 
				++        st1             {v24.8h},  [x0], x9
			
 
				++.endr
			
 
				++3:
			
 
				++.endif
			
 
				++
			
 
				++.irp i, 0, 8
			
 
				++        add             x0,  x4,  #(\i)
			
 
				++        mov             x1,  x5
			
 
				++        add             x2,  sp,  #(\i*2)
			
 
				++        mov             x3,  #\i
			
 
				++        bl              \txfm2\()16_1d_8x16_pass2_neon
			
 
				++.endr
			
 
				++
			
 
				++        add             sp,  sp,  #512
			
 
				++.ifnc \txfm1\()_\txfm2,idct_idct
			
 
				++        ldp             d8,  d9,  [sp], 0x10
			
 
				++        ldp             d10, d11, [sp], 0x10
			
 
				++        ldp             d12, d13, [sp], 0x10
			
 
				++        ldp             d14, d15, [sp], 0x10
			
 
				++.endif
			
 
				++        br              x15
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++itxfm_func16x16 idct,  idct
			
 
				++itxfm_func16x16 iadst, idct
			
 
				++itxfm_func16x16 idct,  iadst
			
 
				++itxfm_func16x16 iadst, iadst
			
 
				++
			
 
				++function idct16_1d_8x16_pass1_quarter_neon
			
 
				++        mov             x14, x30
			
 
				++        movi            v2.8h, #0
			
 
				++.irp i, 16, 17, 18, 19
			
 
				++        load_clear      \i,  x2,  x9
			
 
				++.endr
			
 
				++
			
 
				++        bl              idct16_quarter
			
 
				++
			
 
				++        // Do two 8x8 transposes. Originally, v16-v31 contain the
			
 
				++        // 16 rows. Afterwards, v16-v23 and v24-v31 contain the two
			
 
				++        // transposed 8x8 blocks.
			
 
				++        transpose_8x8H  v16, v17, v18, v19, v20, v21, v22, v23, v2, v3
			
 
				++        transpose_8x8H  v24, v25, v26, v27, v28, v29, v30, v31, v2, v3
			
 
				++
			
 
				++        // Store the transposed 8x8 blocks horizontally.
			
 
				++        // The first 8x8 block is kept in registers for the second pass,
			
 
				++        // store the rest in the temp buffer.
			
 
				++        // Since only a 4x4 part of the input was nonzero, this means that
			
 
				++        // only 4 rows are nonzero after transposing, and the second pass
			
 
				++        // only reads the topmost 4 rows. Therefore only store the topmost
			
 
				++        // 4 rows.
			
 
				++        add             x0,  x0,  #16
			
 
				++.irp i, 24, 25, 26, 27
			
 
				++        store           \i,  x0,  x9
			
 
				++.endr
			
 
				++        br              x14
			
 
				++endfunc
			
 
				++
			
 
				++function idct16_1d_8x16_pass2_quarter_neon
			
 
				++        mov             x14, x30
			
 
				++        cbz             x3,  1f
			
 
				++.irp i, 16, 17, 18, 19
			
 
				++        load            \i,  x2,  x9
			
 
				++.endr
			
 
				++1:
			
 
				++
			
 
				++        add             x3,  x0,  x1
			
 
				++        lsl             x1,  x1,  #1
			
 
				++        bl              idct16_quarter
			
 
				++
			
 
				++        load_add_store  v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v16.8b, v17.8b
			
 
				++        load_add_store  v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h, v16.8b, v17.8b
			
 
				++
			
 
				++        br              x14
			
 
				++endfunc
			
 
				++
			
 
				++function idct16_1d_8x16_pass1_half_neon
			
 
				++        mov             x14, x30
			
 
				++        movi            v2.8h, #0
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23
			
 
				++        load_clear      \i,  x2,  x9
			
 
				++.endr
			
 
				++
			
 
				++        bl              idct16_half
			
 
				++
			
 
				++        // Do two 8x8 transposes. Originally, v16-v31 contain the
			
 
				++        // 16 rows. Afterwards, v16-v23 and v24-v31 contain the two
			
 
				++        // transposed 8x8 blocks.
			
 
				++        transpose_8x8H  v16, v17, v18, v19, v20, v21, v22, v23, v2, v3
			
 
				++        transpose_8x8H  v24, v25, v26, v27, v28, v29, v30, v31, v2, v3
			
 
				++
			
 
				++        // Store the transposed 8x8 blocks horizontally.
			
 
				++        // The first 8x8 block is kept in registers for the second pass,
			
 
				++        // store the rest in the temp buffer.
			
 
				++        add             x0,  x0,  #16
			
 
				++.irp i, 24, 25, 26, 27, 28, 29, 30, 31
			
 
				++        store           \i,  x0,  x9
			
 
				++.endr
			
 
				++        br              x14
			
 
				++endfunc
			
 
				++
			
 
				++function idct16_1d_8x16_pass2_half_neon
			
 
				++        mov             x14, x30
			
 
				++        cbz             x3,  1f
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23
			
 
				++        load            \i,  x2,  x9
			
 
				++.endr
			
 
				++1:
			
 
				++
			
 
				++        add             x3,  x0,  x1
			
 
				++        lsl             x1,  x1,  #1
			
 
				++        bl              idct16_half
			
 
				++
			
 
				++        load_add_store  v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v16.8b, v17.8b
			
 
				++        load_add_store  v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h, v16.8b, v17.8b
			
 
				++
			
 
				++        br              x14
			
 
				++endfunc
			
 
				++
			
 
				++.macro idct16_partial size
			
 
				++function idct16x16_\size\()_add_neon
			
 
				++        add             x0,  sp,  #(0*32)
			
 
				++        add             x2,  x6,  #(0*2)
			
 
				++        bl              idct16_1d_8x16_pass1_\size\()_neon
			
 
				++.irp i, 0, 8
			
 
				++        add             x0,  x4,  #(\i)
			
 
				++        mov             x1,  x5
			
 
				++        add             x2,  sp,  #(\i*2)
			
 
				++        mov             x3,  #\i
			
 
				++        bl              idct16_1d_8x16_pass2_\size\()_neon
			
 
				++.endr
			
 
				++
			
 
				++        add             sp,  sp,  #512
			
 
				++        br              x15
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++idct16_partial quarter
			
 
				++idct16_partial half
			
 
				++
			
 
				++function idct32x32_dc_add_neon
			
 
				++        movrel          x4,  idct_coeffs
			
 
				++        ld1             {v0.4h}, [x4]
			
 
				++
			
 
				++        movi            v1.4h,  #0
			
 
				++
			
 
				++        ld1             {v2.h}[0], [x2]
			
 
				++        smull           v2.4s,  v2.4h,  v0.h[0]
			
 
				++        rshrn           v2.4h,  v2.4s,  #14
			
 
				++        smull           v2.4s,  v2.4h,  v0.h[0]
			
 
				++        rshrn           v2.4h,  v2.4s,  #14
			
 
				++        dup             v2.8h,  v2.h[0]
			
 
				++        st1             {v1.h}[0], [x2]
			
 
				++
			
 
				++        srshr           v0.8h,  v2.8h,  #6
			
 
				++
			
 
				++        mov             x3,  x0
			
 
				++        mov             x4,  #32
			
 
				++1:
			
 
				++        // Loop to add the constant v0 into all 32x32 outputs
			
 
				++        subs            x4,  x4,  #2
			
 
				++        ld1             {v1.16b,v2.16b},  [x0], x1
			
 
				++        uaddw           v16.8h, v0.8h,  v1.8b
			
 
				++        uaddw2          v17.8h, v0.8h,  v1.16b
			
 
				++        ld1             {v3.16b,v4.16b},  [x0], x1
			
 
				++        uaddw           v18.8h, v0.8h,  v2.8b
			
 
				++        uaddw2          v19.8h, v0.8h,  v2.16b
			
 
				++        uaddw           v20.8h, v0.8h,  v3.8b
			
 
				++        uaddw2          v21.8h, v0.8h,  v3.16b
			
 
				++        uaddw           v22.8h, v0.8h,  v4.8b
			
 
				++        uaddw2          v23.8h, v0.8h,  v4.16b
			
 
				++        sqxtun          v1.8b,  v16.8h
			
 
				++        sqxtun2         v1.16b, v17.8h
			
 
				++        sqxtun          v2.8b,  v18.8h
			
 
				++        sqxtun2         v2.16b, v19.8h
			
 
				++        sqxtun          v3.8b,  v20.8h
			
 
				++        sqxtun2         v3.16b, v21.8h
			
 
				++        st1             {v1.16b,v2.16b},  [x3], x1
			
 
				++        sqxtun          v4.8b,  v22.8h
			
 
				++        sqxtun2         v4.16b, v23.8h
			
 
				++        st1             {v3.16b,v4.16b},  [x3], x1
			
 
				++        b.ne            1b
			
 
				++
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++.macro idct32_end
			
 
				++        butterfly_8h    v16, v5,  v4,  v5  // v16 = t16a, v5  = t19a
			
 
				++        butterfly_8h    v17, v20, v23, v20 // v17 = t17,  v20 = t18
			
 
				++        butterfly_8h    v18, v6,  v7,  v6  // v18 = t23a, v6  = t20a
			
 
				++        butterfly_8h    v19, v21, v22, v21 // v19 = t22,  v21 = t21
			
 
				++        butterfly_8h    v4,  v28, v28, v30 // v4  = t24a, v28 = t27a
			
 
				++        butterfly_8h    v23, v26, v25, v26 // v23 = t25,  v26 = t26
			
 
				++        butterfly_8h    v7,  v3,  v29, v31 // v7  = t31a, v3  = t28a
			
 
				++        butterfly_8h    v22, v27, v24, v27 // v22 = t30,  v27 = t29
			
 
				++
			
 
				++        dmbutterfly     v27, v20, v0.h[2], v0.h[3], v24, v25, v30, v31        // v27 = t18a, v20 = t29a
			
 
				++        dmbutterfly     v3,  v5,  v0.h[2], v0.h[3], v24, v25, v30, v31        // v3  = t19,  v5  = t28
			
 
				++        dmbutterfly     v28, v6,  v0.h[2], v0.h[3], v24, v25, v30, v31, neg=1 // v28 = t27,  v6  = t20
			
 
				++        dmbutterfly     v26, v21, v0.h[2], v0.h[3], v24, v25, v30, v31, neg=1 // v26 = t26a, v21 = t21a
			
 
				++
			
 
				++        butterfly_8h    v31, v24, v7,  v4  // v31 = t31,  v24 = t24
			
 
				++        butterfly_8h    v30, v25, v22, v23 // v30 = t30a, v25 = t25a
			
 
				++        butterfly_8h_r  v23, v16, v16, v18 // v23 = t23,  v16 = t16
			
 
				++        butterfly_8h_r  v22, v17, v17, v19 // v22 = t22a, v17 = t17a
			
 
				++        butterfly_8h    v18, v21, v27, v21 // v18 = t18,  v21 = t21
			
 
				++        butterfly_8h_r  v27, v28, v5,  v28 // v27 = t27a, v28 = t28a
			
 
				++        butterfly_8h    v29, v26, v20, v26 // v29 = t29,  v26 = t26
			
 
				++        butterfly_8h    v19, v20, v3,  v6  // v19 = t19a, v20 = t20
			
 
				++
			
 
				++        dmbutterfly0    v27, v20, v27, v20, v2, v3, v4, v5, v6, v7 // v27 = t27,  v20 = t20
			
 
				++        dmbutterfly0    v26, v21, v26, v21, v2, v3, v4, v5, v6, v7 // v26 = t26a, v21 = t21a
			
 
				++        dmbutterfly0    v25, v22, v25, v22, v2, v3, v4, v5, v6, v7 // v25 = t25,  v22 = t22
			
 
				++        dmbutterfly0    v24, v23, v24, v23, v2, v3, v4, v5, v6, v7 // v24 = t24a, v23 = t23a
			
 
				++        ret
			
 
				++.endm
			
 
				++
			
 
				++function idct32_odd
			
 
				++        dmbutterfly     v16, v31, v8.h[0], v8.h[1], v4, v5, v6, v7 // v16 = t16a, v31 = t31a
			
 
				++        dmbutterfly     v24, v23, v8.h[2], v8.h[3], v4, v5, v6, v7 // v24 = t17a, v23 = t30a
			
 
				++        dmbutterfly     v20, v27, v8.h[4], v8.h[5], v4, v5, v6, v7 // v20 = t18a, v27 = t29a
			
 
				++        dmbutterfly     v28, v19, v8.h[6], v8.h[7], v4, v5, v6, v7 // v28 = t19a, v19 = t28a
			
 
				++        dmbutterfly     v18, v29, v9.h[0], v9.h[1], v4, v5, v6, v7 // v18 = t20a, v29 = t27a
			
 
				++        dmbutterfly     v26, v21, v9.h[2], v9.h[3], v4, v5, v6, v7 // v26 = t21a, v21 = t26a
			
 
				++        dmbutterfly     v22, v25, v9.h[4], v9.h[5], v4, v5, v6, v7 // v22 = t22a, v25 = t25a
			
 
				++        dmbutterfly     v30, v17, v9.h[6], v9.h[7], v4, v5, v6, v7 // v30 = t23a, v17 = t24a
			
 
				++
			
 
				++        butterfly_8h    v4,  v24, v16, v24 // v4  = t16, v24 = t17
			
 
				++        butterfly_8h    v5,  v20, v28, v20 // v5  = t19, v20 = t18
			
 
				++        butterfly_8h    v6,  v26, v18, v26 // v6  = t20, v26 = t21
			
 
				++        butterfly_8h    v7,  v22, v30, v22 // v7  = t23, v22 = t22
			
 
				++        butterfly_8h    v28, v25, v17, v25 // v28 = t24, v25 = t25
			
 
				++        butterfly_8h    v30, v21, v29, v21 // v30 = t27, v21 = t26
			
 
				++        butterfly_8h    v29, v23, v31, v23 // v29 = t31, v23 = t30
			
 
				++        butterfly_8h    v31, v27, v19, v27 // v31 = t28, v27 = t29
			
 
				++
			
 
				++        dmbutterfly     v23, v24, v0.h[4], v0.h[5], v16, v17, v18, v19        // v23 = t17a, v24 = t30a
			
 
				++        dmbutterfly     v27, v20, v0.h[4], v0.h[5], v16, v17, v18, v19, neg=1 // v27 = t29a, v20 = t18a
			
 
				++        dmbutterfly     v21, v26, v0.h[6], v0.h[7], v16, v17, v18, v19        // v21 = t21a, v26 = t26a
			
 
				++        dmbutterfly     v25, v22, v0.h[6], v0.h[7], v16, v17, v18, v19, neg=1 // v25 = t25a, v22 = t22a
			
 
				++        idct32_end
			
 
				++endfunc
			
 
				++
			
 
				++function idct32_odd_half
			
 
				++        dmbutterfly_h1  v16, v31, v8.h[0], v8.h[1], v4, v5, v6, v7 // v16 = t16a, v31 = t31a
			
 
				++        dmbutterfly_h2  v24, v23, v8.h[2], v8.h[3], v4, v5, v6, v7 // v24 = t17a, v23 = t30a
			
 
				++        dmbutterfly_h1  v20, v27, v8.h[4], v8.h[5], v4, v5, v6, v7 // v20 = t18a, v27 = t29a
			
 
				++        dmbutterfly_h2  v28, v19, v8.h[6], v8.h[7], v4, v5, v6, v7 // v28 = t19a, v19 = t28a
			
 
				++        dmbutterfly_h1  v18, v29, v9.h[0], v9.h[1], v4, v5, v6, v7 // v18 = t20a, v29 = t27a
			
 
				++        dmbutterfly_h2  v26, v21, v9.h[2], v9.h[3], v4, v5, v6, v7 // v26 = t21a, v21 = t26a
			
 
				++        dmbutterfly_h1  v22, v25, v9.h[4], v9.h[5], v4, v5, v6, v7 // v22 = t22a, v25 = t25a
			
 
				++        dmbutterfly_h2  v30, v17, v9.h[6], v9.h[7], v4, v5, v6, v7 // v30 = t23a, v17 = t24a
			
 
				++
			
 
				++        butterfly_8h    v4,  v24, v16, v24 // v4  = t16, v24 = t17
			
 
				++        butterfly_8h    v5,  v20, v28, v20 // v5  = t19, v20 = t18
			
 
				++        butterfly_8h    v6,  v26, v18, v26 // v6  = t20, v26 = t21
			
 
				++        butterfly_8h    v7,  v22, v30, v22 // v7  = t23, v22 = t22
			
 
				++        butterfly_8h    v28, v25, v17, v25 // v28 = t24, v25 = t25
			
 
				++        butterfly_8h    v30, v21, v29, v21 // v30 = t27, v21 = t26
			
 
				++        butterfly_8h    v29, v23, v31, v23 // v29 = t31, v23 = t30
			
 
				++        butterfly_8h    v31, v27, v19, v27 // v31 = t28, v27 = t29
			
 
				++
			
 
				++        dmbutterfly     v23, v24, v0.h[4], v0.h[5], v16, v17, v18, v19        // v23 = t17a, v24 = t30a
			
 
				++        dmbutterfly     v27, v20, v0.h[4], v0.h[5], v16, v17, v18, v19, neg=1 // v27 = t29a, v20 = t18a
			
 
				++        dmbutterfly     v21, v26, v0.h[6], v0.h[7], v16, v17, v18, v19        // v21 = t21a, v26 = t26a
			
 
				++        dmbutterfly     v25, v22, v0.h[6], v0.h[7], v16, v17, v18, v19, neg=1 // v25 = t25a, v22 = t22a
			
 
				++        idct32_end
			
 
				++endfunc
			
 
				++
			
 
				++function idct32_odd_quarter
			
 
				++        dsmull_h        v4,  v5,  v16, v8.h[0]
			
 
				++        dsmull_h        v28, v29, v19, v8.h[7]
			
 
				++        dsmull_h        v30, v31, v16, v8.h[1]
			
 
				++        dsmull_h        v22, v23, v17, v9.h[6]
			
 
				++        dsmull_h        v7,  v6,  v17, v9.h[7]
			
 
				++        dsmull_h        v26, v27, v19, v8.h[6]
			
 
				++        dsmull_h        v20, v21, v18, v9.h[0]
			
 
				++        dsmull_h        v24, v25, v18, v9.h[1]
			
 
				++
			
 
				++        neg             v28.4s, v28.4s
			
 
				++        neg             v29.4s, v29.4s
			
 
				++        neg             v7.4s,  v7.4s
			
 
				++        neg             v6.4s,  v6.4s
			
 
				++
			
 
				++        drshrn_h        v4,  v4,  v5,  #14
			
 
				++        drshrn_h        v5,  v28, v29, #14
			
 
				++        drshrn_h        v29, v30, v31, #14
			
 
				++        drshrn_h        v28, v22, v23, #14
			
 
				++        drshrn_h        v7,  v7,  v6,  #14
			
 
				++        drshrn_h        v31, v26, v27, #14
			
 
				++        drshrn_h        v6,  v20, v21, #14
			
 
				++        drshrn_h        v30, v24, v25, #14
			
 
				++
			
 
				++        dmbutterfly_l   v16, v17, v18, v19, v29, v4,  v0.h[4], v0.h[5]
			
 
				++        dmbutterfly_l   v27, v26, v20, v21, v31, v5,  v0.h[4], v0.h[5]
			
 
				++        drshrn_h        v23, v16, v17, #14
			
 
				++        drshrn_h        v24, v18, v19, #14
			
 
				++        neg             v20.4s, v20.4s
			
 
				++        neg             v21.4s, v21.4s
			
 
				++        drshrn_h        v27, v27, v26, #14
			
 
				++        drshrn_h        v20, v20, v21, #14
			
 
				++        dmbutterfly_l   v16, v17, v18, v19, v30, v6,  v0.h[6], v0.h[7]
			
 
				++        drshrn_h        v21, v16, v17, #14
			
 
				++        drshrn_h        v26, v18, v19, #14
			
 
				++        dmbutterfly_l   v16, v17, v18, v19, v28, v7,  v0.h[6], v0.h[7]
			
 
				++        drshrn_h        v25, v16, v17, #14
			
 
				++        neg             v18.4s, v18.4s
			
 
				++        neg             v19.4s, v19.4s
			
 
				++        drshrn_h        v22, v18, v19, #14
			
 
				++
			
 
				++        idct32_end
			
 
				++endfunc
			
 
				++
			
 
				++.macro idct32_funcs suffix
			
 
				++// Do an 32-point IDCT of a 8x32 slice out of a 32x32 matrix.
			
 
				++// The 32-point IDCT can be decomposed into two 16-point IDCTs;
			
 
				++// a normal IDCT16 with every other input component (the even ones, with
			
 
				++// each output written twice), followed by a separate 16-point IDCT
			
 
				++// of the odd inputs, added/subtracted onto the outputs of the first idct16.
			
 
				++// x0 = dst (temp buffer)
			
 
				++// x1 = unused
			
 
				++// x2 = src
			
 
				++// x9 = double input stride
			
 
				++function idct32_1d_8x32_pass1\suffix\()_neon
			
 
				++        mov             x14, x30
			
 
				++        movi            v2.8h,  #0
			
 
				++
			
 
				++        // v16 = IN(0), v17 = IN(2) ... v31 = IN(30)
			
 
				++.ifb \suffix
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
			
 
				++        load_clear      \i, x2, x9
			
 
				++.endr
			
 
				++.endif
			
 
				++.ifc \suffix,_quarter
			
 
				++.irp i, 16, 17, 18, 19
			
 
				++        load_clear      \i, x2, x9
			
 
				++.endr
			
 
				++.endif
			
 
				++.ifc \suffix,_half
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23
			
 
				++        load_clear      \i, x2, x9
			
 
				++.endr
			
 
				++.endif
			
 
				++
			
 
				++        bl              idct16\suffix
			
 
				++
			
 
				++        // Do two 8x8 transposes. Originally, v16-v31 contain the
			
 
				++        // 16 rows. Afterwards, v16-v23 and v24-v31 contain the
			
 
				++        // two transposed 8x8 blocks.
			
 
				++        transpose_8x8H  v16, v17, v18, v19, v20, v21, v22, v23, v2, v3
			
 
				++        transpose_8x8H  v24, v25, v26, v27, v28, v29, v30, v31, v2, v3
			
 
				++
			
 
				++        // Store the registers a, b horizontally, followed by the
			
 
				++        // same registers b, a mirrored.
			
 
				++.macro store_rev a, b
			
 
				++        // There's no rev128 instruction, but we reverse each 64 bit
			
 
				++        // half, and then flip them using an ext with 8 bytes offset.
			
 
				++        rev64           v3.8h, \b
			
 
				++        st1             {\a},  [x0], #16
			
 
				++        rev64           v2.8h, \a
			
 
				++        ext             v3.16b, v3.16b, v3.16b, #8
			
 
				++        st1             {\b},  [x0], #16
			
 
				++        ext             v2.16b, v2.16b, v2.16b, #8
			
 
				++        st1             {v3.8h},  [x0], #16
			
 
				++        st1             {v2.8h},  [x0], #16
			
 
				++.endm
			
 
				++        store_rev       v16.8h, v24.8h
			
 
				++        store_rev       v17.8h, v25.8h
			
 
				++        store_rev       v18.8h, v26.8h
			
 
				++        store_rev       v19.8h, v27.8h
			
 
				++        store_rev       v20.8h, v28.8h
			
 
				++        store_rev       v21.8h, v29.8h
			
 
				++        store_rev       v22.8h, v30.8h
			
 
				++        store_rev       v23.8h, v31.8h
			
 
				++        sub             x0,  x0,  #512
			
 
				++.purgem store_rev
			
 
				++
			
 
				++        // Move x2 back to the start of the input, and move
			
 
				++        // to the first odd row
			
 
				++.ifb \suffix
			
 
				++        sub             x2,  x2,  x9, lsl #4
			
 
				++.endif
			
 
				++.ifc \suffix,_quarter
			
 
				++        sub             x2,  x2,  x9, lsl #2
			
 
				++.endif
			
 
				++.ifc \suffix,_half
			
 
				++        sub             x2,  x2,  x9, lsl #3
			
 
				++.endif
			
 
				++        add             x2,  x2,  #64
			
 
				++
			
 
				++        movi            v2.8h,  #0
			
 
				++        // v16 = IN(1), v17 = IN(3) ... v31 = IN(31)
			
 
				++.ifb \suffix
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
			
 
				++        load_clear      \i, x2, x9
			
 
				++.endr
			
 
				++.endif
			
 
				++.ifc \suffix,_quarter
			
 
				++.irp i, 16, 17, 18, 19
			
 
				++        load_clear      \i, x2, x9
			
 
				++.endr
			
 
				++.endif
			
 
				++.ifc \suffix,_half
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23
			
 
				++        load_clear      \i, x2, x9
			
 
				++.endr
			
 
				++.endif
			
 
				++
			
 
				++        bl              idct32_odd\suffix
			
 
				++
			
 
				++        transpose_8x8H  v31, v30, v29, v28, v27, v26, v25, v24, v2, v3
			
 
				++        transpose_8x8H  v23, v22, v21, v20, v19, v18, v17, v16, v2, v3
			
 
				++
			
 
				++        // Store the registers a, b horizontally,
			
 
				++        // adding into the output first, and the mirrored,
			
 
				++        // subtracted from the output.
			
 
				++.macro store_rev a, b
			
 
				++        ld1             {v4.8h},  [x0]
			
 
				++        rev64           v3.8h, \b
			
 
				++        add             v4.8h, v4.8h, \a
			
 
				++        rev64           v2.8h, \a
			
 
				++        st1             {v4.8h},  [x0], #16
			
 
				++        ext             v3.16b, v3.16b, v3.16b, #8
			
 
				++        ld1             {v5.8h},  [x0]
			
 
				++        ext             v2.16b, v2.16b, v2.16b, #8
			
 
				++        add             v5.8h, v5.8h, \b
			
 
				++        st1             {v5.8h},  [x0], #16
			
 
				++        ld1             {v6.8h},  [x0]
			
 
				++        sub             v6.8h, v6.8h, v3.8h
			
 
				++        st1             {v6.8h},  [x0], #16
			
 
				++        ld1             {v7.8h},  [x0]
			
 
				++        sub             v7.8h, v7.8h, v2.8h
			
 
				++        st1             {v7.8h},  [x0], #16
			
 
				++.endm
			
 
				++
			
 
				++        store_rev       v31.8h, v23.8h
			
 
				++        store_rev       v30.8h, v22.8h
			
 
				++        store_rev       v29.8h, v21.8h
			
 
				++        store_rev       v28.8h, v20.8h
			
 
				++        store_rev       v27.8h, v19.8h
			
 
				++        store_rev       v26.8h, v18.8h
			
 
				++        store_rev       v25.8h, v17.8h
			
 
				++        store_rev       v24.8h, v16.8h
			
 
				++.purgem store_rev
			
 
				++        br              x14
			
 
				++endfunc
			
 
				++
			
 
				++// This is mostly the same as 8x32_pass1, but without the transpose,
			
 
				++// and use the source as temp buffer between the two idct passes, and
			
 
				++// add into the destination.
			
 
				++// x0 = dst
			
 
				++// x1 = dst stride
			
 
				++// x2 = src (temp buffer)
			
 
				++// x7 = negative double temp buffer stride
			
 
				++// x9 = double temp buffer stride
			
 
				++function idct32_1d_8x32_pass2\suffix\()_neon
			
 
				++        mov             x14, x30
			
 
				++        // v16 = IN(0), v17 = IN(2) ... v31 = IN(30)
			
 
				++.ifb \suffix
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
			
 
				++        load            \i, x2, x9
			
 
				++.endr
			
 
				++        sub             x2,  x2,  x9, lsl #4
			
 
				++.endif
			
 
				++.ifc \suffix,_quarter
			
 
				++.irp i, 16, 17, 18, 19
			
 
				++        load            \i, x2, x9
			
 
				++.endr
			
 
				++        sub             x2,  x2,  x9, lsl #2
			
 
				++.endif
			
 
				++.ifc \suffix,_half
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23
			
 
				++        load            \i, x2, x9
			
 
				++.endr
			
 
				++        sub             x2,  x2,  x9, lsl #3
			
 
				++.endif
			
 
				++
			
 
				++        bl              idct16\suffix
			
 
				++
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
			
 
				++        store           \i, x2, x9
			
 
				++.endr
			
 
				++
			
 
				++        sub             x2,  x2,  x9, lsl #4
			
 
				++        add             x2,  x2,  #64
			
 
				++
			
 
				++        // v16 = IN(1), v17 = IN(3) ... v31 = IN(31)
			
 
				++.ifb \suffix
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
			
 
				++        load            \i, x2, x9
			
 
				++.endr
			
 
				++        sub             x2,  x2,  x9, lsl #4
			
 
				++.endif
			
 
				++.ifc \suffix,_quarter
			
 
				++.irp i, 16, 17, 18, 19
			
 
				++        load            \i, x2, x9
			
 
				++.endr
			
 
				++        sub             x2,  x2,  x9, lsl #2
			
 
				++.endif
			
 
				++.ifc \suffix,_half
			
 
				++.irp i, 16, 17, 18, 19, 20, 21, 22, 23
			
 
				++        load            \i, x2, x9
			
 
				++.endr
			
 
				++        sub             x2,  x2,  x9, lsl #3
			
 
				++.endif
			
 
				++        sub             x2,  x2,  #64
			
 
				++
			
 
				++        bl              idct32_odd\suffix
			
 
				++
			
 
				++.macro load_acc_store a, b, c, d, neg=0
			
 
				++.if \neg == 0
			
 
				++        ld1             {v4.8h},  [x2], x9
			
 
				++        ld1             {v5.8h},  [x2], x9
			
 
				++        add             v4.8h, v4.8h, \a
			
 
				++        ld1             {v6.8h},  [x2], x9
			
 
				++        add             v5.8h, v5.8h, \b
			
 
				++        ld1             {v7.8h},  [x2], x9
			
 
				++        add             v6.8h, v6.8h, \c
			
 
				++        add             v7.8h, v7.8h, \d
			
 
				++.else
			
 
				++        ld1             {v4.8h},  [x2], x7
			
 
				++        ld1             {v5.8h},  [x2], x7
			
 
				++        sub             v4.8h, v4.8h, \a
			
 
				++        ld1             {v6.8h},  [x2], x7
			
 
				++        sub             v5.8h, v5.8h, \b
			
 
				++        ld1             {v7.8h},  [x2], x7
			
 
				++        sub             v6.8h, v6.8h, \c
			
 
				++        sub             v7.8h, v7.8h, \d
			
 
				++.endif
			
 
				++        ld1             {v10.8b}, [x0], x1
			
 
				++        ld1             {v11.8b}, [x0], x1
			
 
				++        srshr           v4.8h, v4.8h, #6
			
 
				++        ld1             {v2.8b}, [x0], x1
			
 
				++        srshr           v5.8h, v5.8h, #6
			
 
				++        uaddw           v4.8h, v4.8h, v10.8b
			
 
				++        ld1             {v3.8b}, [x0], x1
			
 
				++        srshr           v6.8h, v6.8h, #6
			
 
				++        uaddw           v5.8h, v5.8h, v11.8b
			
 
				++        srshr           v7.8h, v7.8h, #6
			
 
				++        sub             x0,  x0,  x1, lsl #2
			
 
				++        uaddw           v6.8h, v6.8h, v2.8b
			
 
				++        sqxtun          v4.8b, v4.8h
			
 
				++        uaddw           v7.8h, v7.8h, v3.8b
			
 
				++        sqxtun          v5.8b, v5.8h
			
 
				++        st1             {v4.8b}, [x0], x1
			
 
				++        sqxtun          v6.8b, v6.8h
			
 
				++        st1             {v5.8b}, [x0], x1
			
 
				++        sqxtun          v7.8b, v7.8h
			
 
				++        st1             {v6.8b}, [x0], x1
			
 
				++        st1             {v7.8b}, [x0], x1
			
 
				++.endm
			
 
				++        load_acc_store  v31.8h, v30.8h, v29.8h, v28.8h
			
 
				++        load_acc_store  v27.8h, v26.8h, v25.8h, v24.8h
			
 
				++        load_acc_store  v23.8h, v22.8h, v21.8h, v20.8h
			
 
				++        load_acc_store  v19.8h, v18.8h, v17.8h, v16.8h
			
 
				++        sub             x2,  x2,  x9
			
 
				++        load_acc_store  v16.8h, v17.8h, v18.8h, v19.8h, 1
			
 
				++        load_acc_store  v20.8h, v21.8h, v22.8h, v23.8h, 1
			
 
				++        load_acc_store  v24.8h, v25.8h, v26.8h, v27.8h, 1
			
 
				++        load_acc_store  v28.8h, v29.8h, v30.8h, v31.8h, 1
			
 
				++.purgem load_acc_store
			
 
				++        br              x14
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++idct32_funcs
			
 
				++idct32_funcs _quarter
			
 
				++idct32_funcs _half
			
 
				++
			
 
				++const min_eob_idct_idct_32, align=4
			
 
				++        .short  0, 34, 135, 336
			
 
				++endconst
			
 
				++
			
 
				++function ff_vp9_idct_idct_32x32_add_neon, export=1
			
 
				++        cmp             w3,  #1
			
 
				++        b.eq            idct32x32_dc_add_neon
			
 
				++
			
 
				++        movrel          x10, idct_coeffs
			
 
				++
			
 
				++        mov             x15, x30
			
 
				++
			
 
				++        stp             d10, d11, [sp, #-0x10]!
			
 
				++        stp             d8,  d9,  [sp, #-0x10]!
			
 
				++
			
 
				++        sub             sp,  sp,  #2048
			
 
				++
			
 
				++        mov             x4,  x0
			
 
				++        mov             x5,  x1
			
 
				++        mov             x6,  x2
			
 
				++
			
 
				++        // Double stride of the input, since we only read every other line
			
 
				++        mov             x9,  #128
			
 
				++        neg             x7,  x9
			
 
				++
			
 
				++        ld1             {v0.8h,v1.8h}, [x10], #32
			
 
				++        ld1             {v8.8h,v9.8h}, [x10]
			
 
				++
			
 
				++        cmp             w3,  #34
			
 
				++        b.le            idct32x32_quarter_add_neon
			
 
				++        cmp             w3,  #135
			
 
				++        b.le            idct32x32_half_add_neon
			
 
				++
			
 
				++        movrel          x12, min_eob_idct_idct_32, 2
			
 
				++
			
 
				++.irp i, 0, 8, 16, 24
			
 
				++        add             x0,  sp,  #(\i*64)
			
 
				++.if \i > 0
			
 
				++        ldrh            w1,  [x12], #2
			
 
				++        cmp             w3,  w1
			
 
				++        mov             x1,  #(32 - \i)/4
			
 
				++        b.le            1f
			
 
				++.endif
			
 
				++        add             x2,  x6,  #(\i*2)
			
 
				++        bl              idct32_1d_8x32_pass1_neon
			
 
				++.endr
			
 
				++        b               3f
			
 
				++
			
 
				++1:
			
 
				++        // Write zeros to the temp buffer for pass 2
			
 
				++        movi            v16.8h,  #0
			
 
				++        movi            v17.8h,  #0
			
 
				++        movi            v18.8h,  #0
			
 
				++        movi            v19.8h,  #0
			
 
				++2:
			
 
				++        subs            x1,  x1,  #1
			
 
				++.rept 4
			
 
				++        st1             {v16.8h,v17.8h,v18.8h,v19.8h},  [x0], #64
			
 
				++.endr
			
 
				++        b.ne            2b
			
 
				++3:
			
 
				++.irp i, 0, 8, 16, 24
			
 
				++        add             x0,  x4,  #(\i)
			
 
				++        mov             x1,  x5
			
 
				++        add             x2,  sp,  #(\i*2)
			
 
				++        bl              idct32_1d_8x32_pass2_neon
			
 
				++.endr
			
 
				++
			
 
				++        add             sp,  sp,  #2048
			
 
				++
			
 
				++        ldp             d8,  d9,  [sp], 0x10
			
 
				++        ldp             d10, d11, [sp], 0x10
			
 
				++
			
 
				++        br              x15
			
 
				++endfunc
			
 
				++
			
 
				++.macro idct32_partial size
			
 
				++function idct32x32_\size\()_add_neon
			
 
				++        add             x0,  sp,  #(0*64)
			
 
				++        add             x2,  x6,  #(0*2)
			
 
				++        bl              idct32_1d_8x32_pass1_\size\()_neon
			
 
				++.ifc \size,half
			
 
				++        add             x0,  sp,  #(8*64)
			
 
				++        add             x2,  x6,  #(8*2)
			
 
				++        bl              idct32_1d_8x32_pass1_\size\()_neon
			
 
				++.endif
			
 
				++.irp i, 0, 8, 16, 24
			
 
				++        add             x0,  x4,  #(\i)
			
 
				++        mov             x1,  x5
			
 
				++        add             x2,  sp,  #(\i*2)
			
 
				++        bl              idct32_1d_8x32_pass2_\size\()_neon
			
 
				++.endr
			
 
				++
			
 
				++        add             sp,  sp,  #2048
			
 
				++
			
 
				++        ldp             d8,  d9,  [sp], 0x10
			
 
				++        ldp             d10, d11, [sp], 0x10
			
 
				++
			
 
				++        br              x15
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++idct32_partial quarter
			
 
				++idct32_partial half
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/vp9lpf_16bpp_neon.S b/media/ffvpx/libavcodec/aarch64/vp9lpf_16bpp_neon.S
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/vp9lpf_16bpp_neon.S
			
 
				+@@ -0,0 +1,873 @@
			
 
				++/*
			
 
				++ * Copyright (c) 2017 Google Inc.
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include "libavutil/aarch64/asm.S"
			
 
				++#include "neon.S"
			
 
				++
			
 
				++
			
 
				++.macro transpose_4x8H r0, r1, r2, r3, t4, t5, t6, t7
			
 
				++        trn1            \t4\().8h,  \r0\().8h,  \r1\().8h
			
 
				++        trn2            \t5\().8h,  \r0\().8h,  \r1\().8h
			
 
				++        trn1            \t6\().8h,  \r2\().8h,  \r3\().8h
			
 
				++        trn2            \t7\().8h,  \r2\().8h,  \r3\().8h
			
 
				++
			
 
				++        trn1            \r0\().4s,  \t4\().4s,  \t6\().4s
			
 
				++        trn2            \r2\().4s,  \t4\().4s,  \t6\().4s
			
 
				++        trn1            \r1\().4s,  \t5\().4s,  \t7\().4s
			
 
				++        trn2            \r3\().4s,  \t5\().4s,  \t7\().4s
			
 
				++.endm
			
 
				++
			
 
				++// The input to and output from this macro is in the registers v16-v31,
			
 
				++// and v0-v7 are used as scratch registers.
			
 
				++// p7 = v16 .. p3 = v20, p0 = v23, q0 = v24, q3 = v27, q7 = v31
			
 
				++// Depending on the width of the loop filter, we either use v16-v19
			
 
				++// and v28-v31 as temp registers, or v8-v15.
			
 
				++.macro loop_filter wd, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8
			
 
				++        dup             v0.8h,  w2                   // E
			
 
				++        dup             v2.8h,  w3                   // I
			
 
				++        dup             v3.8h,  w4                   // H
			
 
				++
			
 
				++        uabd            v4.8h,  v20.8h, v21.8h       // abs(p3 - p2)
			
 
				++        uabd            v5.8h,  v21.8h, v22.8h       // abs(p2 - p1)
			
 
				++        uabd            v6.8h,  v22.8h, v23.8h       // abs(p1 - p0)
			
 
				++        uabd            v7.8h,  v24.8h, v25.8h       // abs(q0 - q1)
			
 
				++        uabd            \tmp1\().8h,  v25.8h, v26.8h // abs(q1 - q2)
			
 
				++        uabd            \tmp2\().8h,  v26.8h, v27.8h // abs(q2 - q3)
			
 
				++        umax            v4.8h,  v4.8h,  v5.8h
			
 
				++        umax            v5.8h,  v6.8h,  v7.8h
			
 
				++        umax            \tmp1\().8h,  \tmp1\().8h, \tmp2\().8h
			
 
				++        uabd            v6.8h,  v23.8h, v24.8h       // abs(p0 - q0)
			
 
				++        umax            v4.8h,  v4.8h,  v5.8h
			
 
				++        add             v6.8h,  v6.8h,  v6.8h        // abs(p0 - q0) * 2
			
 
				++        uabd            v5.8h,  v22.8h, v25.8h       // abs(p1 - q1)
			
 
				++        umax            v4.8h,  v4.8h,  \tmp1\().8h  // max(abs(p3 - p2), ..., abs(q2 - q3))
			
 
				++        ushr            v5.8h,  v5.8h,  #1
			
 
				++        cmhs            v4.8h,  v2.8h,  v4.8h        // max(abs()) <= I
			
 
				++        add             v6.8h,  v6.8h,  v5.8h        // abs(p0 - q0) * 2 + abs(p1 - q1) >> 1
			
 
				++        cmhs            v6.8h,  v0.8h,  v6.8h
			
 
				++        and             v4.16b, v4.16b, v6.16b       // fm
			
 
				++
			
 
				++        // If no pixels need filtering, just exit as soon as possible
			
 
				++        mov             x11, v4.d[0]
			
 
				++        mov             x12, v4.d[1]
			
 
				++        adds            x11, x11, x12
			
 
				++        b.ne            1f
			
 
				++        br              x10
			
 
				++1:
			
 
				++
			
 
				++.if \wd >= 8
			
 
				++        dup             v0.8h,  w5
			
 
				++
			
 
				++        uabd            v6.8h,  v20.8h, v23.8h       // abs(p3 - p0)
			
 
				++        uabd            v2.8h,  v21.8h, v23.8h       // abs(p2 - p0)
			
 
				++        uabd            v1.8h,  v22.8h, v23.8h       // abs(p1 - p0)
			
 
				++        uabd            \tmp1\().8h,  v25.8h, v24.8h // abs(q1 - q0)
			
 
				++        uabd            \tmp2\().8h,  v26.8h, v24.8h // abs(q2 - q0)
			
 
				++        uabd            \tmp3\().8h,  v27.8h, v24.8h // abs(q3 - q0)
			
 
				++        umax            v6.8h,  v6.8h,  v2.8h
			
 
				++        umax            v1.8h,  v1.8h,  \tmp1\().8h
			
 
				++        umax            \tmp2\().8h,  \tmp2\().8h,  \tmp3\().8h
			
 
				++.if \wd == 16
			
 
				++        uabd            v7.8h,  v16.8h, v23.8h       // abs(p7 - p0)
			
 
				++        umax            v6.8h,  v6.8h,  v1.8h
			
 
				++        uabd            v2.8h,  v17.8h, v23.8h       // abs(p6 - p0)
			
 
				++        umax            v6.8h,  v6.8h,  \tmp2\().8h
			
 
				++        uabd            v1.8h,  v18.8h, v23.8h       // abs(p5 - p0)
			
 
				++        cmhs            v6.8h,  v0.8h,  v6.8h        // flat8in
			
 
				++        uabd            v8.8h,  v19.8h, v23.8h       // abs(p4 - p0)
			
 
				++        and             v6.16b, v6.16b, v4.16b       // flat8in && fm
			
 
				++        uabd            v9.8h,  v28.8h, v24.8h       // abs(q4 - q0)
			
 
				++        bic             v4.16b, v4.16b, v6.16b       // fm && !flat8in
			
 
				++        uabd            v10.8h, v29.8h, v24.8h       // abs(q5 - q0)
			
 
				++        uabd            v11.8h, v30.8h, v24.8h       // abs(q6 - q0)
			
 
				++        uabd            v12.8h, v31.8h, v24.8h       // abs(q7 - q0)
			
 
				++
			
 
				++        umax            v7.8h,  v7.8h,  v2.8h
			
 
				++        umax            v1.8h,  v1.8h,  v8.8h
			
 
				++        umax            v9.8h,  v9.8h,  v10.8h
			
 
				++        umax            v11.8h, v11.8h, v12.8h
			
 
				++        // The rest of the calculation of flat8out is interleaved below
			
 
				++.else
			
 
				++        // The rest of the calculation of flat8in is interleaved below
			
 
				++.endif
			
 
				++.endif
			
 
				++
			
 
				++        // Calculate the normal inner loop filter for 2 or 4 pixels
			
 
				++        uabd            v5.8h,  v22.8h, v23.8h                  // abs(p1 - p0)
			
 
				++.if \wd == 16
			
 
				++        umax            v7.8h,  v7.8h,  v1.8h
			
 
				++        umax            v9.8h,  v9.8h,  v11.8h
			
 
				++.elseif \wd == 8
			
 
				++        umax            v6.8h,  v6.8h,  v1.8h
			
 
				++.endif
			
 
				++        uabd            v1.8h,  v25.8h, v24.8h                  // abs(q1 - q0)
			
 
				++.if \wd == 16
			
 
				++        umax            v7.8h,  v7.8h,  v9.8h
			
 
				++.elseif \wd == 8
			
 
				++        umax            v6.8h,  v6.8h,  \tmp2\().8h
			
 
				++.endif
			
 
				++        dup             \tmp2\().8h,  w6                        // left shift for saturation
			
 
				++        sub             \tmp1\().8h,  v22.8h,  v25.8h           // p1 - q1
			
 
				++        neg             \tmp6\().8h,  \tmp2\().8h               // negative left shift after saturation
			
 
				++        umax            v5.8h,  v5.8h,  v1.8h                   // max(abs(p1 - p0), abs(q1 - q0))
			
 
				++        sub             \tmp3\().8h,  v24.8h,  v23.8h           // q0 - p0
			
 
				++        movi            \tmp5\().8h,  #3
			
 
				++.if \wd == 8
			
 
				++        cmhs            v6.8h,  v0.8h,  v6.8h                   // flat8in
			
 
				++.endif
			
 
				++        cmhs            v5.8h,  v3.8h,  v5.8h                   // !hev
			
 
				++.if \wd == 8
			
 
				++        and             v6.16b, v6.16b, v4.16b                  // flat8in && fm
			
 
				++.endif
			
 
				++        sqshl           \tmp1\().8h,  \tmp1\().8h,  \tmp2\().8h
			
 
				++.if \wd == 16
			
 
				++        cmhs            v7.8h,  v0.8h,  v7.8h                   // flat8out
			
 
				++.elseif \wd == 8
			
 
				++        bic             v4.16b, v4.16b, v6.16b                  // fm && !flat8in
			
 
				++.endif
			
 
				++        and             v5.16b,  v5.16b,  v4.16b                // !hev && fm && !flat8in
			
 
				++.if \wd == 16
			
 
				++        and             v7.16b, v7.16b, v6.16b                  // flat8out && flat8in && fm
			
 
				++.endif
			
 
				++        sshl            \tmp1\().8h,  \tmp1\().8h,  \tmp6\().8h // av_clip_int2p(p1 - q1, BIT_DEPTH - 1)
			
 
				++
			
 
				++        mul             \tmp3\().8h,  \tmp3\().8h,  \tmp5\().8h // 3 * (q0 - p0)
			
 
				++        bic             \tmp1\().16b, \tmp1\().16b, v5.16b      // if (!hev) av_clip_int8 = 0
			
 
				++        movi            v2.8h,  #4
			
 
				++        add             \tmp3\().8h,  \tmp3\().8h,  \tmp1\().8h // 3 * (q0 - p0) [+ av_clip_int8(p1 - q1)]
			
 
				++        movi            v3.8h,  #3
			
 
				++        sqshl           \tmp1\().8h,  \tmp3\().8h,  \tmp2\().8h
			
 
				++        movi            \tmp5\().8h,  #0
			
 
				++        sshl            \tmp1\().8h,  \tmp1\().8h,  \tmp6\().8h // av_clip_int2p(3 * (q0 - p0) [+ av_clip_int2p(p1 - q1)], BIT_DEPTH - 1) = f
			
 
				++        dup             \tmp6\().8h,  w7                        // max pixel value
			
 
				++.if \wd == 16
			
 
				++        bic             v6.16b, v6.16b, v7.16b                  // fm && flat8in && !flat8out
			
 
				++.endif
			
 
				++
			
 
				++        ushr            \tmp2\().8h,  \tmp6\().8h,  #1          // (1 << (BIT_DEPTH - 1)) - 1
			
 
				++
			
 
				++        add             \tmp3\().8h,  \tmp1\().8h,  v2.8h       // f + 4
			
 
				++        add             \tmp4\().8h,  \tmp1\().8h,  v3.8h       // f + 3
			
 
				++        smin            \tmp3\().8h,  \tmp3\().8h,  \tmp2\().8h // FFMIN(f + 4, (1 << (BIT_DEPTH - 1)) - 1)
			
 
				++        smin            \tmp4\().8h,  \tmp4\().8h,  \tmp2\().8h // FFMIN(f + 3, (1 << (BIT_DEPTH - 1)) - 1)
			
 
				++        sshr            \tmp3\().8h,  \tmp3\().8h,  #3          // f1
			
 
				++        sshr            \tmp4\().8h,  \tmp4\().8h,  #3          // f2
			
 
				++
			
 
				++        add             v0.8h,   v23.8h,  \tmp4\().8h           // p0 + f2
			
 
				++        sub             v2.8h,   v24.8h,  \tmp3\().8h           // q0 - f1
			
 
				++        smin            v0.8h,   v0.8h,   \tmp6\().8h
			
 
				++        smin            v2.8h,   v2.8h,   \tmp6\().8h
			
 
				++        srshr           \tmp3\().8h, \tmp3\().8h, #1            // f = (f1 + 1) >> 1
			
 
				++        smax            v0.8h,   v0.8h,   \tmp5\().8h           // out p0
			
 
				++        smax            v2.8h,   v2.8h,   \tmp5\().8h           // out q0
			
 
				++        bit             v23.16b, v0.16b,  v4.16b                // if (fm && !flat8in)
			
 
				++        bit             v24.16b, v2.16b,  v4.16b
			
 
				++
			
 
				++        add             v0.8h,  v22.8h,  \tmp3\().8h            // p1 + f
			
 
				++        sub             v2.8h,  v25.8h,  \tmp3\().8h            // q1 - f
			
 
				++.if \wd >= 8
			
 
				++        mov             x11, v6.d[0]
			
 
				++.endif
			
 
				++        smin            v0.8h,  v0.8h,  \tmp6\().8h
			
 
				++        smin            v2.8h,  v2.8h,  \tmp6\().8h
			
 
				++.if \wd >= 8
			
 
				++        mov             x12, v6.d[1]
			
 
				++.endif
			
 
				++        smax            v0.8h,  v0.8h,  \tmp5\().8h             // out p1
			
 
				++        smax            v2.8h,  v2.8h,  \tmp5\().8h             // out q1
			
 
				++.if \wd >= 8
			
 
				++        adds            x11, x11, x12
			
 
				++.endif
			
 
				++        bit             v22.16b, v0.16b,  v5.16b                // if (!hev && fm && !flat8in)
			
 
				++        bit             v25.16b, v2.16b,  v5.16b
			
 
				++
			
 
				++        // If no pixels need flat8in, jump to flat8out
			
 
				++        // (or to a writeout of the inner 4 pixels, for wd=8)
			
 
				++.if \wd >= 8
			
 
				++.if \wd == 16
			
 
				++        b.eq            6f
			
 
				++.else
			
 
				++        b.ne            1f
			
 
				++        br              x13
			
 
				++1:
			
 
				++.endif
			
 
				++
			
 
				++        // flat8in
			
 
				++        add             \tmp1\().8h, v20.8h, v21.8h
			
 
				++        add             \tmp3\().8h, v22.8h, v25.8h
			
 
				++        add             \tmp5\().8h, v20.8h, v22.8h
			
 
				++        add             \tmp7\().8h, v23.8h, v26.8h
			
 
				++        add             v0.8h,  \tmp1\().8h, \tmp1\().8h
			
 
				++        add             v0.8h,  v0.8h,  v23.8h
			
 
				++        add             v0.8h,  v0.8h,  v24.8h
			
 
				++        add             v0.8h,  v0.8h,  \tmp5\().8h
			
 
				++        sub             \tmp3\().8h, \tmp3\().8h, \tmp1\().8h
			
 
				++        sub             \tmp7\().8h, \tmp7\().8h, \tmp5\().8h
			
 
				++        urshr           v2.8h,  v0.8h,  #3                      // out p2
			
 
				++
			
 
				++        add             v0.8h,  v0.8h,  \tmp3\().8h
			
 
				++        add             \tmp1\().8h, v20.8h,  v23.8h
			
 
				++        add             \tmp3\().8h, v24.8h,  v27.8h
			
 
				++        urshr           v3.8h,  v0.8h,  #3                      // out p1
			
 
				++
			
 
				++        add             v0.8h,  v0.8h,  \tmp7\().8h
			
 
				++        sub             \tmp3\().8h, \tmp3\().8h, \tmp1\().8h
			
 
				++        add             \tmp5\().8h, v21.8h,  v24.8h
			
 
				++        add             \tmp7\().8h, v25.8h,  v27.8h
			
 
				++        urshr           v4.8h,  v0.8h,  #3                      // out p0
			
 
				++
			
 
				++        add             v0.8h,  v0.8h,  \tmp3\().8h
			
 
				++        sub             \tmp7\().8h, \tmp7\().8h, \tmp5\().8h
			
 
				++        add             \tmp1\().8h, v22.8h,  v25.8h
			
 
				++        add             \tmp3\().8h, v26.8h,  v27.8h
			
 
				++        urshr           v5.8h,  v0.8h,  #3                      // out q0
			
 
				++
			
 
				++        add             v0.8h,  v0.8h,  \tmp7\().8h
			
 
				++        sub             \tmp3\().8h, \tmp3\().8h, \tmp1\().8h
			
 
				++        urshr           \tmp5\().8h, v0.8h,  #3                 // out q1
			
 
				++
			
 
				++        add             v0.8h,  v0.8h,  \tmp3\().8h
			
 
				++        // The output here is written back into the input registers. This doesn't
			
 
				++        // matter for the flat8part below, since we only update those pixels
			
 
				++        // which won't be touched below.
			
 
				++        bit             v21.16b, v2.16b,  v6.16b
			
 
				++        bit             v22.16b, v3.16b,  v6.16b
			
 
				++        bit             v23.16b, v4.16b,  v6.16b
			
 
				++        urshr           \tmp6\().8h,  v0.8h,  #3                // out q2
			
 
				++        bit             v24.16b, v5.16b,  v6.16b
			
 
				++        bit             v25.16b, \tmp5\().16b,  v6.16b
			
 
				++        bit             v26.16b, \tmp6\().16b,  v6.16b
			
 
				++.endif
			
 
				++.if \wd == 16
			
 
				++6:
			
 
				++        orr             v2.16b,  v6.16b,  v7.16b
			
 
				++        mov             x11, v2.d[0]
			
 
				++        mov             x12, v2.d[1]
			
 
				++        adds            x11, x11, x12
			
 
				++        b.ne            1f
			
 
				++        // If no pixels needed flat8in nor flat8out, jump to a
			
 
				++        // writeout of the inner 4 pixels
			
 
				++        br              x14
			
 
				++1:
			
 
				++
			
 
				++        mov             x11, v7.d[0]
			
 
				++        mov             x12, v7.d[1]
			
 
				++        adds            x11, x11, x12
			
 
				++        b.ne            1f
			
 
				++        // If no pixels need flat8out, jump to a writeout of the inner 6 pixels
			
 
				++        br              x15
			
 
				++
			
 
				++1:
			
 
				++        // flat8out
			
 
				++        // This writes all outputs into v2-v17 (skipping v6 and v16).
			
 
				++        // If this part is skipped, the output is read from v21-v26 (which is the input
			
 
				++        // to this section).
			
 
				++        shl             v0.8h,   v16.8h,  #3     // 8 * v16
			
 
				++        sub             v0.8h,   v0.8h,   v16.8h // 7 * v16
			
 
				++        add             v0.8h,   v0.8h,   v17.8h
			
 
				++        add             v8.8h,   v17.8h,  v18.8h
			
 
				++        add             v10.8h,  v19.8h,  v20.8h
			
 
				++        add             v0.8h,   v0.8h,   v8.8h
			
 
				++        add             v8.8h,   v16.8h,  v17.8h
			
 
				++        add             v12.8h,  v21.8h,  v22.8h
			
 
				++        add             v0.8h,   v0.8h,   v10.8h
			
 
				++        add             v10.8h,  v18.8h,  v25.8h
			
 
				++        add             v14.8h,  v23.8h,  v24.8h
			
 
				++        sub             v10.8h,  v10.8h,  v8.8h
			
 
				++        add             v0.8h,   v0.8h,   v12.8h
			
 
				++        add             v0.8h,   v0.8h,   v14.8h
			
 
				++        add             v12.8h,  v16.8h,  v18.8h
			
 
				++        add             v14.8h,  v19.8h,  v26.8h
			
 
				++        urshr           v2.8h,   v0.8h,   #4
			
 
				++
			
 
				++        add             v0.8h,   v0.8h,   v10.8h
			
 
				++        add             v8.8h,   v16.8h,  v19.8h
			
 
				++        add             v10.8h,  v20.8h,  v27.8h
			
 
				++        sub             v14.8h,  v14.8h,  v12.8h
			
 
				++        bif             v2.16b,  v17.16b, v7.16b
			
 
				++        urshr           v3.8h ,  v0.8h,   #4
			
 
				++
			
 
				++        add             v0.8h,   v0.8h,   v14.8h
			
 
				++        add             v12.8h,  v16.8h,  v20.8h
			
 
				++        add             v14.8h,  v21.8h,  v28.8h
			
 
				++        sub             v10.8h,  v10.8h,  v8.8h
			
 
				++        bif             v3.16b,  v18.16b, v7.16b
			
 
				++        urshr           v4.8h,   v0.8h,   #4
			
 
				++
			
 
				++        add             v0.8h,   v0.8h,   v10.8h
			
 
				++        add             v8.8h,   v16.8h,  v21.8h
			
 
				++        add             v10.8h,  v22.8h,  v29.8h
			
 
				++        sub             v14.8h,  v14.8h,  v12.8h
			
 
				++        bif             v4.16b,  v19.16b, v7.16b
			
 
				++        urshr           v5.8h,   v0.8h,   #4
			
 
				++
			
 
				++        add             v0.8h,   v0.8h,   v14.8h
			
 
				++        add             v12.8h,  v16.8h,  v22.8h
			
 
				++        add             v14.8h,  v23.8h,  v30.8h
			
 
				++        sub             v10.8h,  v10.8h,  v8.8h
			
 
				++        bif             v5.16b,  v20.16b, v7.16b
			
 
				++        urshr           v6.8h,   v0.8h,   #4
			
 
				++
			
 
				++        add             v0.8h,   v0.8h,   v10.8h
			
 
				++        add             v10.8h,  v16.8h,  v23.8h
			
 
				++        sub             v14.8h,  v14.8h,  v12.8h
			
 
				++        add             v12.8h,  v24.8h,  v31.8h
			
 
				++        bif             v6.16b,  v21.16b, v7.16b
			
 
				++        urshr           v8.8h,   v0.8h,   #4
			
 
				++
			
 
				++        add             v0.8h,   v0.8h,   v14.8h
			
 
				++        sub             v10.8h,  v12.8h,  v10.8h
			
 
				++        add             v12.8h,  v17.8h,  v24.8h
			
 
				++        add             v14.8h,  v25.8h,  v31.8h
			
 
				++        bif             v8.16b,  v22.16b, v7.16b
			
 
				++        urshr           v9.8h,   v0.8h,   #4
			
 
				++
			
 
				++        add             v0.8h,   v0.8h,   v10.8h
			
 
				++        sub             v14.8h,  v14.8h,  v12.8h
			
 
				++        add             v12.8h,  v26.8h,  v31.8h
			
 
				++        bif             v9.16b,  v23.16b, v7.16b
			
 
				++        urshr           v10.8h,  v0.8h,   #4
			
 
				++
			
 
				++        add             v0.8h,   v0.8h,   v14.8h
			
 
				++        add             v14.8h,  v18.8h,  v25.8h
			
 
				++        add             v18.8h,  v19.8h,  v26.8h
			
 
				++        sub             v12.8h,  v12.8h,  v14.8h
			
 
				++        add             v14.8h,  v27.8h,  v31.8h
			
 
				++        bif             v10.16b, v24.16b, v7.16b
			
 
				++        urshr           v11.8h,  v0.8h,   #4
			
 
				++
			
 
				++        add             v0.8h,   v0.8h,   v12.8h
			
 
				++        add             v12.8h,  v20.8h,  v27.8h
			
 
				++        sub             v14.8h,  v14.8h,  v18.8h
			
 
				++        add             v18.8h,  v28.8h,  v31.8h
			
 
				++        bif             v11.16b, v25.16b, v7.16b
			
 
				++        sub             v18.8h,  v18.8h,  v12.8h
			
 
				++        urshr           v12.8h,  v0.8h,   #4
			
 
				++
			
 
				++        add             v0.8h,   v0.8h,   v14.8h
			
 
				++        add             v14.8h,  v21.8h,  v28.8h
			
 
				++        add             v20.8h,  v29.8h,  v31.8h
			
 
				++        bif             v12.16b, v26.16b, v7.16b
			
 
				++        urshr           v13.8h,  v0.8h,   #4
			
 
				++
			
 
				++        add             v0.8h,   v0.8h,   v18.8h
			
 
				++        sub             v20.8h,  v20.8h,  v14.8h
			
 
				++        add             v18.8h,  v22.8h,  v29.8h
			
 
				++        add             v22.8h,  v30.8h,  v31.8h
			
 
				++        bif             v13.16b, v27.16b, v7.16b
			
 
				++        urshr           v14.8h,  v0.8h,   #4
			
 
				++
			
 
				++        add             v0.8h,   v0.8h,   v20.8h
			
 
				++        sub             v22.8h,  v22.8h,  v18.8h
			
 
				++        bif             v14.16b, v28.16b, v7.16b
			
 
				++        urshr           v15.8h,  v0.8h,   #4
			
 
				++
			
 
				++        add             v0.8h,   v0.8h,   v22.8h
			
 
				++        bif             v15.16b, v29.16b, v7.16b
			
 
				++        urshr           v17.8h,  v0.8h,   #4
			
 
				++        bif             v17.16b, v30.16b, v7.16b
			
 
				++.endif
			
 
				++.endm
			
 
				++
			
 
				++// For wd <= 8, we use v16-v19 and v28-v31 for temp registers,
			
 
				++// while we need those for inputs/outputs in wd=16 and use v8-v15
			
 
				++// for temp registers there instead.
			
 
				++function vp9_loop_filter_4
			
 
				++        loop_filter     4,  v16, v17, v18, v19, v28, v29, v30, v31
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function vp9_loop_filter_8
			
 
				++        loop_filter     8,  v16, v17, v18, v19, v28, v29, v30, v31
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function vp9_loop_filter_16
			
 
				++        loop_filter     16, v8,  v9,  v10, v11, v12, v13, v14, v15
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++.macro loop_filter_4
			
 
				++        bl              vp9_loop_filter_4
			
 
				++.endm
			
 
				++
			
 
				++.macro loop_filter_8
			
 
				++        // calculate alternative 'return' targets
			
 
				++        adr             x13, 6f
			
 
				++        bl              vp9_loop_filter_8
			
 
				++.endm
			
 
				++
			
 
				++.macro loop_filter_16
			
 
				++        // calculate alternative 'return' targets
			
 
				++        adr             x14, 7f
			
 
				++        adr             x15, 8f
			
 
				++        bl              vp9_loop_filter_16
			
 
				++.endm
			
 
				++
			
 
				++
			
 
				++// The public functions in this file have got the following signature:
			
 
				++// void loop_filter(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr);
			
 
				++
			
 
				++.macro bpp_frontend func, bpp, push
			
 
				++function ff_\func\()_\bpp\()_neon, export=1
			
 
				++.if \push
			
 
				++        mov             x16, x30
			
 
				++        stp             d14, d15, [sp, #-0x10]!
			
 
				++        stp             d12, d13, [sp, #-0x10]!
			
 
				++        stp             d10, d11, [sp, #-0x10]!
			
 
				++        stp             d8,  d9,  [sp, #-0x10]!
			
 
				++.endif
			
 
				++        lsl             w2,  w2,  #\bpp - 8
			
 
				++        lsl             w3,  w3,  #\bpp - 8
			
 
				++        lsl             w4,  w4,  #\bpp - 8
			
 
				++        mov             x5,  #1 << (\bpp - 8)
			
 
				++        mov             x6,  #16 - \bpp
			
 
				++        mov             x7,  #((1 << \bpp) - 1)
			
 
				++.if \push
			
 
				++        bl              \func\()_16_neon
			
 
				++        ldp             d8,  d9,  [sp], 0x10
			
 
				++        ldp             d10, d11, [sp], 0x10
			
 
				++        ldp             d12, d13, [sp], 0x10
			
 
				++        ldp             d14, d15, [sp], 0x10
			
 
				++        br              x16
			
 
				++.else
			
 
				++        b               \func\()_16_neon
			
 
				++.endif
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++.macro bpp_frontends func, push=0
			
 
				++        bpp_frontend    \func, 10, \push
			
 
				++        bpp_frontend    \func, 12, \push
			
 
				++.endm
			
 
				++
			
 
				++.macro bpp_frontend_rep func, suffix, int_suffix, dir, bpp, push
			
 
				++function ff_\func\()_\suffix\()_\bpp\()_neon, export=1
			
 
				++        mov             x16, x30
			
 
				++.if \push
			
 
				++        stp             d14, d15, [sp, #-0x10]!
			
 
				++        stp             d12, d13, [sp, #-0x10]!
			
 
				++        stp             d10, d11, [sp, #-0x10]!
			
 
				++        stp             d8,  d9,  [sp, #-0x10]!
			
 
				++.endif
			
 
				++        lsl             w2,  w2,  #\bpp - 8
			
 
				++        lsl             w3,  w3,  #\bpp - 8
			
 
				++        lsl             w4,  w4,  #\bpp - 8
			
 
				++        mov             x5,  #1 << (\bpp - 8)
			
 
				++        mov             x6,  #16 - \bpp
			
 
				++        mov             x7,  #((1 << \bpp) - 1)
			
 
				++        bl              \func\()_\int_suffix\()_16_neon
			
 
				++.ifc \dir,h
			
 
				++        add             x0,  x0,  x1, lsl #3
			
 
				++.else
			
 
				++        add             x0,  x0,  #16
			
 
				++.endif
			
 
				++        bl              \func\()_\int_suffix\()_16_neon
			
 
				++.if \push
			
 
				++        ldp             d8,  d9,  [sp], 0x10
			
 
				++        ldp             d10, d11, [sp], 0x10
			
 
				++        ldp             d12, d13, [sp], 0x10
			
 
				++        ldp             d14, d15, [sp], 0x10
			
 
				++.endif
			
 
				++        br              x16
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++.macro bpp_frontends_rep func, suffix, int_suffix, dir, push=0
			
 
				++        bpp_frontend_rep \func, \suffix, \int_suffix, \dir, 10, \push
			
 
				++        bpp_frontend_rep \func, \suffix, \int_suffix, \dir, 12, \push
			
 
				++.endm
			
 
				++
			
 
				++.macro bpp_frontend_mix2 wd1, wd2, dir, bpp
			
 
				++function ff_vp9_loop_filter_\dir\()_\wd1\()\wd2\()_16_\bpp\()_neon, export=1
			
 
				++        mov             x16, x30
			
 
				++        lsr             w8,  w2,  #8
			
 
				++        lsr             w14, w3,  #8
			
 
				++        lsr             w15, w4,  #8
			
 
				++        and             w2,  w2,  #0xff
			
 
				++        and             w3,  w3,  #0xff
			
 
				++        and             w4,  w4,  #0xff
			
 
				++        lsl             w2,  w2,  #\bpp - 8
			
 
				++        lsl             w3,  w3,  #\bpp - 8
			
 
				++        lsl             w4,  w4,  #\bpp - 8
			
 
				++        mov             x5,  #1 << (\bpp - 8)
			
 
				++        mov             x6,  #16 - \bpp
			
 
				++        mov             x7,  #((1 << \bpp) - 1)
			
 
				++        bl              vp9_loop_filter_\dir\()_\wd1\()_8_16_neon
			
 
				++.ifc \dir,h
			
 
				++        add             x0,  x0,  x1, lsl #3
			
 
				++.else
			
 
				++        add             x0,  x0,  #16
			
 
				++.endif
			
 
				++        lsl             w2,  w8,  #\bpp - 8
			
 
				++        lsl             w3,  w14, #\bpp - 8
			
 
				++        lsl             w4,  w15, #\bpp - 8
			
 
				++        bl              vp9_loop_filter_\dir\()_\wd2\()_8_16_neon
			
 
				++        br              x16
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++.macro bpp_frontends_mix2 wd1, wd2
			
 
				++        bpp_frontend_mix2 \wd1, \wd2, v, 10
			
 
				++        bpp_frontend_mix2 \wd1, \wd2, v, 12
			
 
				++        bpp_frontend_mix2 \wd1, \wd2, h, 10
			
 
				++        bpp_frontend_mix2 \wd1, \wd2, h, 12
			
 
				++.endm
			
 
				++
			
 
				++function vp9_loop_filter_v_4_8_16_neon
			
 
				++        mov             x10, x30
			
 
				++        sub             x9,  x0,  x1, lsl #2
			
 
				++        ld1             {v20.8h}, [x9], x1 // p3
			
 
				++        ld1             {v24.8h}, [x0], x1 // q0
			
 
				++        ld1             {v21.8h}, [x9], x1 // p2
			
 
				++        ld1             {v25.8h}, [x0], x1 // q1
			
 
				++        ld1             {v22.8h}, [x9], x1 // p1
			
 
				++        ld1             {v26.8h}, [x0], x1 // q2
			
 
				++        ld1             {v23.8h}, [x9], x1 // p0
			
 
				++        ld1             {v27.8h}, [x0], x1 // q3
			
 
				++        sub             x0,  x0,  x1, lsl #2
			
 
				++        sub             x9,  x9,  x1, lsl #1
			
 
				++
			
 
				++        loop_filter_4
			
 
				++
			
 
				++        st1             {v22.8h}, [x9], x1
			
 
				++        st1             {v24.8h}, [x0], x1
			
 
				++        st1             {v23.8h}, [x9], x1
			
 
				++        st1             {v25.8h}, [x0], x1
			
 
				++        sub             x0,  x0,  x1, lsl #1
			
 
				++
			
 
				++        br              x10
			
 
				++endfunc
			
 
				++
			
 
				++bpp_frontends vp9_loop_filter_v_4_8
			
 
				++
			
 
				++function vp9_loop_filter_h_4_8_16_neon
			
 
				++        mov             x10, x30
			
 
				++        sub             x9,  x0,  #8
			
 
				++        add             x0,  x9,  x1, lsl #2
			
 
				++        ld1             {v20.8h}, [x9], x1
			
 
				++        ld1             {v24.8h}, [x0], x1
			
 
				++        ld1             {v21.8h}, [x9], x1
			
 
				++        ld1             {v25.8h}, [x0], x1
			
 
				++        ld1             {v22.8h}, [x9], x1
			
 
				++        ld1             {v26.8h}, [x0], x1
			
 
				++        ld1             {v23.8h}, [x9], x1
			
 
				++        ld1             {v27.8h}, [x0], x1
			
 
				++
			
 
				++        sub             x9,  x9,  x1, lsl #2
			
 
				++        sub             x0,  x0,  x1, lsl #3
			
 
				++        add             x0,  x0,  #8
			
 
				++
			
 
				++        transpose_8x8H  v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
			
 
				++
			
 
				++        loop_filter_4
			
 
				++
			
 
				++        // Move x9 forward by 2 pixels; we don't need to rewrite the
			
 
				++        // outermost 2 pixels since they aren't changed.
			
 
				++        add             x9,  x9,  #4
			
 
				++        add             x0,  x9,  x1, lsl #2
			
 
				++
			
 
				++        // We only will write the mid 4 pixels back; after the loop filter,
			
 
				++        // these are in v22, v23, v24, v25, ordered as rows (8x4 pixels).
			
 
				++        // We need to transpose them to columns, done with a 4x8 transpose
			
 
				++        // (which in practice is two 4x4 transposes of the two 4x4 halves
			
 
				++        // of the 8x4 pixels; into 4x8 pixels).
			
 
				++        transpose_4x8H  v22, v23, v24, v25, v26, v27, v28, v29
			
 
				++        st1             {v22.d}[0], [x9], x1
			
 
				++        st1             {v22.d}[1], [x0], x1
			
 
				++        st1             {v23.d}[0], [x9], x1
			
 
				++        st1             {v23.d}[1], [x0], x1
			
 
				++        st1             {v24.d}[0], [x9], x1
			
 
				++        st1             {v24.d}[1], [x0], x1
			
 
				++        st1             {v25.d}[0], [x9], x1
			
 
				++        st1             {v25.d}[1], [x0], x1
			
 
				++        sub             x0,  x0,  x1, lsl #3
			
 
				++        add             x0,  x0,  #4
			
 
				++
			
 
				++        br              x10
			
 
				++endfunc
			
 
				++
			
 
				++bpp_frontends vp9_loop_filter_h_4_8
			
 
				++
			
 
				++function vp9_loop_filter_v_8_8_16_neon
			
 
				++        mov             x10, x30
			
 
				++        sub             x9,  x0,  x1, lsl #2
			
 
				++        ld1             {v20.8h}, [x9], x1 // p3
			
 
				++        ld1             {v24.8h}, [x0], x1 // q0
			
 
				++        ld1             {v21.8h}, [x9], x1 // p2
			
 
				++        ld1             {v25.8h}, [x0], x1 // q1
			
 
				++        ld1             {v22.8h}, [x9], x1 // p1
			
 
				++        ld1             {v26.8h}, [x0], x1 // q2
			
 
				++        ld1             {v23.8h}, [x9], x1 // p0
			
 
				++        ld1             {v27.8h}, [x0], x1 // q3
			
 
				++        sub             x9,  x9,  x1, lsl #2
			
 
				++        sub             x0,  x0,  x1, lsl #2
			
 
				++        add             x9,  x9,  x1
			
 
				++
			
 
				++        loop_filter_8
			
 
				++
			
 
				++        st1             {v21.8h}, [x9], x1
			
 
				++        st1             {v24.8h}, [x0], x1
			
 
				++        st1             {v22.8h}, [x9], x1
			
 
				++        st1             {v25.8h}, [x0], x1
			
 
				++        st1             {v23.8h}, [x9], x1
			
 
				++        st1             {v26.8h}, [x0], x1
			
 
				++        sub             x0,  x0,  x1, lsl #1
			
 
				++        sub             x0,  x0,  x1
			
 
				++
			
 
				++        br              x10
			
 
				++6:
			
 
				++        sub             x9,  x0,  x1, lsl #1
			
 
				++        st1             {v22.8h}, [x9], x1
			
 
				++        st1             {v24.8h}, [x0], x1
			
 
				++        st1             {v23.8h}, [x9], x1
			
 
				++        st1             {v25.8h}, [x0], x1
			
 
				++        sub             x0,  x0,  x1, lsl #1
			
 
				++        br              x10
			
 
				++endfunc
			
 
				++
			
 
				++bpp_frontends vp9_loop_filter_v_8_8
			
 
				++
			
 
				++function vp9_loop_filter_h_8_8_16_neon
			
 
				++        mov             x10, x30
			
 
				++        sub             x9,  x0,  #8
			
 
				++        add             x0,  x9,  x1, lsl #2
			
 
				++        ld1             {v20.8h}, [x9], x1
			
 
				++        ld1             {v24.8h}, [x0], x1
			
 
				++        ld1             {v21.8h}, [x9], x1
			
 
				++        ld1             {v25.8h}, [x0], x1
			
 
				++        ld1             {v22.8h}, [x9], x1
			
 
				++        ld1             {v26.8h}, [x0], x1
			
 
				++        ld1             {v23.8h}, [x9], x1
			
 
				++        ld1             {v27.8h}, [x0], x1
			
 
				++
			
 
				++        sub             x9,  x9,  x1, lsl #2
			
 
				++        sub             x0,  x0,  x1, lsl #3
			
 
				++        add             x0,  x0,  #8
			
 
				++
			
 
				++        transpose_8x8H  v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
			
 
				++
			
 
				++        loop_filter_8
			
 
				++
			
 
				++        add             x0,  x9,  x1, lsl #2
			
 
				++
			
 
				++        // Even though only 6 pixels per row have been changed, we write the
			
 
				++        // full 8 pixel registers.
			
 
				++        transpose_8x8H  v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
			
 
				++
			
 
				++        st1             {v20.8h}, [x9], x1
			
 
				++        st1             {v24.8h}, [x0], x1
			
 
				++        st1             {v21.8h}, [x9], x1
			
 
				++        st1             {v25.8h}, [x0], x1
			
 
				++        st1             {v22.8h}, [x9], x1
			
 
				++        st1             {v26.8h}, [x0], x1
			
 
				++        st1             {v23.8h}, [x9], x1
			
 
				++        st1             {v27.8h}, [x0], x1
			
 
				++        sub             x0,  x0,  x1, lsl #3
			
 
				++        add             x0,  x0,  #8
			
 
				++
			
 
				++        br              x10
			
 
				++6:
			
 
				++        // If we didn't need to do the flat8in part, we use the same writeback
			
 
				++        // as in loop_filter_h_4_8.
			
 
				++        add             x9,  x9,  #4
			
 
				++        add             x0,  x9,  x1, lsl #2
			
 
				++        transpose_4x8H  v22, v23, v24, v25, v26, v27, v28, v29
			
 
				++        st1             {v22.d}[0], [x9], x1
			
 
				++        st1             {v22.d}[1], [x0], x1
			
 
				++        st1             {v23.d}[0], [x9], x1
			
 
				++        st1             {v23.d}[1], [x0], x1
			
 
				++        st1             {v24.d}[0], [x9], x1
			
 
				++        st1             {v24.d}[1], [x0], x1
			
 
				++        st1             {v25.d}[0], [x9], x1
			
 
				++        st1             {v25.d}[1], [x0], x1
			
 
				++        sub             x0,  x0,  x1, lsl #3
			
 
				++        add             x0,  x0,  #4
			
 
				++        br              x10
			
 
				++endfunc
			
 
				++
			
 
				++bpp_frontends vp9_loop_filter_h_8_8
			
 
				++
			
 
				++bpp_frontends_mix2 4, 4
			
 
				++bpp_frontends_mix2 4, 8
			
 
				++bpp_frontends_mix2 8, 4
			
 
				++bpp_frontends_mix2 8, 8
			
 
				++
			
 
				++function vp9_loop_filter_v_16_8_16_neon
			
 
				++        mov             x10, x30
			
 
				++        sub             x9,  x0,  x1, lsl #3
			
 
				++        ld1             {v16.8h}, [x9], x1 // p7
			
 
				++        ld1             {v24.8h}, [x0], x1 // q0
			
 
				++        ld1             {v17.8h}, [x9], x1 // p6
			
 
				++        ld1             {v25.8h}, [x0], x1 // q1
			
 
				++        ld1             {v18.8h}, [x9], x1 // p5
			
 
				++        ld1             {v26.8h}, [x0], x1 // q2
			
 
				++        ld1             {v19.8h}, [x9], x1 // p4
			
 
				++        ld1             {v27.8h}, [x0], x1 // q3
			
 
				++        ld1             {v20.8h}, [x9], x1 // p3
			
 
				++        ld1             {v28.8h}, [x0], x1 // q4
			
 
				++        ld1             {v21.8h}, [x9], x1 // p2
			
 
				++        ld1             {v29.8h}, [x0], x1 // q5
			
 
				++        ld1             {v22.8h}, [x9], x1 // p1
			
 
				++        ld1             {v30.8h}, [x0], x1 // q6
			
 
				++        ld1             {v23.8h}, [x9], x1 // p0
			
 
				++        ld1             {v31.8h}, [x0], x1 // q7
			
 
				++        sub             x9,  x9,  x1, lsl #3
			
 
				++        sub             x0,  x0,  x1, lsl #3
			
 
				++        add             x9,  x9,  x1
			
 
				++
			
 
				++        loop_filter_16
			
 
				++
			
 
				++        // If we did the flat8out part, we get the output in
			
 
				++        // v2-v17 (skipping v7 and v16). x9 points to x0 - 7 * stride,
			
 
				++        // store v2-v9 there, and v10-v17 into x0.
			
 
				++        st1             {v2.8h},  [x9], x1
			
 
				++        st1             {v10.8h}, [x0], x1
			
 
				++        st1             {v3.8h},  [x9], x1
			
 
				++        st1             {v11.8h}, [x0], x1
			
 
				++        st1             {v4.8h},  [x9], x1
			
 
				++        st1             {v12.8h}, [x0], x1
			
 
				++        st1             {v5.8h},  [x9], x1
			
 
				++        st1             {v13.8h}, [x0], x1
			
 
				++        st1             {v6.8h},  [x9], x1
			
 
				++        st1             {v14.8h}, [x0], x1
			
 
				++        st1             {v8.8h},  [x9], x1
			
 
				++        st1             {v15.8h}, [x0], x1
			
 
				++        st1             {v9.8h},  [x9], x1
			
 
				++        st1             {v17.8h}, [x0], x1
			
 
				++        sub             x0,  x0,  x1, lsl #3
			
 
				++        add             x0,  x0,  x1
			
 
				++
			
 
				++        br              x10
			
 
				++8:
			
 
				++        add             x9,  x9,  x1, lsl #2
			
 
				++        // If we didn't do the flat8out part, the output is left in the
			
 
				++        // input registers.
			
 
				++        st1             {v21.8h}, [x9], x1
			
 
				++        st1             {v24.8h}, [x0], x1
			
 
				++        st1             {v22.8h}, [x9], x1
			
 
				++        st1             {v25.8h}, [x0], x1
			
 
				++        st1             {v23.8h}, [x9], x1
			
 
				++        st1             {v26.8h}, [x0], x1
			
 
				++        sub             x0,  x0,  x1, lsl #1
			
 
				++        sub             x0,  x0,  x1
			
 
				++        br              x10
			
 
				++7:
			
 
				++        sub             x9,  x0,  x1, lsl #1
			
 
				++        st1             {v22.8h}, [x9], x1
			
 
				++        st1             {v24.8h}, [x0], x1
			
 
				++        st1             {v23.8h}, [x9], x1
			
 
				++        st1             {v25.8h}, [x0], x1
			
 
				++        sub             x0,  x0,  x1, lsl #1
			
 
				++        br              x10
			
 
				++endfunc
			
 
				++
			
 
				++bpp_frontends vp9_loop_filter_v_16_8, push=1
			
 
				++bpp_frontends_rep vp9_loop_filter_v_16, 16, 8, v, push=1
			
 
				++
			
 
				++function vp9_loop_filter_h_16_8_16_neon
			
 
				++        mov             x10, x30
			
 
				++        sub             x9,  x0,  #16
			
 
				++        ld1             {v16.8h}, [x9], x1
			
 
				++        ld1             {v24.8h}, [x0], x1
			
 
				++        ld1             {v17.8h}, [x9], x1
			
 
				++        ld1             {v25.8h}, [x0], x1
			
 
				++        ld1             {v18.8h}, [x9], x1
			
 
				++        ld1             {v26.8h}, [x0], x1
			
 
				++        ld1             {v19.8h}, [x9], x1
			
 
				++        ld1             {v27.8h}, [x0], x1
			
 
				++        ld1             {v20.8h}, [x9], x1
			
 
				++        ld1             {v28.8h}, [x0], x1
			
 
				++        ld1             {v21.8h}, [x9], x1
			
 
				++        ld1             {v29.8h}, [x0], x1
			
 
				++        ld1             {v22.8h}, [x9], x1
			
 
				++        ld1             {v30.8h}, [x0], x1
			
 
				++        ld1             {v23.8h}, [x9], x1
			
 
				++        ld1             {v31.8h}, [x0], x1
			
 
				++        sub             x0,  x0,  x1, lsl #3
			
 
				++        sub             x9,  x9,  x1, lsl #3
			
 
				++
			
 
				++        // The 16x8 pixels read above is in two 8x8 blocks; the left
			
 
				++        // half in v16-v23, and the right half in v24-v31. Do two 8x8 transposes
			
 
				++        // of this, to get one column per register.
			
 
				++        transpose_8x8H  v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
			
 
				++        transpose_8x8H  v24, v25, v26, v27, v28, v29, v30, v31, v0, v1
			
 
				++
			
 
				++        loop_filter_16
			
 
				++
			
 
				++        transpose_8x8H  v16, v2,  v3,  v4,  v5,  v6,  v8,  v9,  v0, v1
			
 
				++        transpose_8x8H  v10, v11, v12, v13, v14, v15, v17, v31, v0, v1
			
 
				++
			
 
				++        st1             {v16.8h}, [x9], x1
			
 
				++        st1             {v10.8h}, [x0], x1
			
 
				++        st1             {v2.8h},  [x9], x1
			
 
				++        st1             {v11.8h}, [x0], x1
			
 
				++        st1             {v3.8h},  [x9], x1
			
 
				++        st1             {v12.8h}, [x0], x1
			
 
				++        st1             {v4.8h},  [x9], x1
			
 
				++        st1             {v13.8h}, [x0], x1
			
 
				++        st1             {v5.8h},  [x9], x1
			
 
				++        st1             {v14.8h}, [x0], x1
			
 
				++        st1             {v6.8h},  [x9], x1
			
 
				++        st1             {v15.8h}, [x0], x1
			
 
				++        st1             {v8.8h},  [x9], x1
			
 
				++        st1             {v17.8h}, [x0], x1
			
 
				++        st1             {v9.8h},  [x9], x1
			
 
				++        st1             {v31.8h}, [x0], x1
			
 
				++        sub             x0,  x0,  x1, lsl #3
			
 
				++
			
 
				++        br              x10
			
 
				++8:
			
 
				++        // The same writeback as in loop_filter_h_8_8
			
 
				++        sub             x9,  x0,  #8
			
 
				++        add             x0,  x9,  x1, lsl #2
			
 
				++        transpose_8x8H  v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
			
 
				++
			
 
				++        st1             {v20.8h}, [x9], x1
			
 
				++        st1             {v24.8h}, [x0], x1
			
 
				++        st1             {v21.8h}, [x9], x1
			
 
				++        st1             {v25.8h}, [x0], x1
			
 
				++        st1             {v22.8h}, [x9], x1
			
 
				++        st1             {v26.8h}, [x0], x1
			
 
				++        st1             {v23.8h}, [x9], x1
			
 
				++        st1             {v27.8h}, [x0], x1
			
 
				++        sub             x0,  x0,  x1, lsl #3
			
 
				++        add             x0,  x0,  #8
			
 
				++        br              x10
			
 
				++7:
			
 
				++        // The same writeback as in loop_filter_h_4_8
			
 
				++        sub             x9,  x0,  #4
			
 
				++        add             x0,  x9,  x1, lsl #2
			
 
				++        transpose_4x8H  v22, v23, v24, v25, v26, v27, v28, v29
			
 
				++        st1             {v22.d}[0], [x9], x1
			
 
				++        st1             {v22.d}[1], [x0], x1
			
 
				++        st1             {v23.d}[0], [x9], x1
			
 
				++        st1             {v23.d}[1], [x0], x1
			
 
				++        st1             {v24.d}[0], [x9], x1
			
 
				++        st1             {v24.d}[1], [x0], x1
			
 
				++        st1             {v25.d}[0], [x9], x1
			
 
				++        st1             {v25.d}[1], [x0], x1
			
 
				++        sub             x0,  x0,  x1, lsl #3
			
 
				++        add             x0,  x0,  #4
			
 
				++        br              x10
			
 
				++endfunc
			
 
				++
			
 
				++bpp_frontends vp9_loop_filter_h_16_8, push=1
			
 
				++bpp_frontends_rep vp9_loop_filter_h_16, 16, 8, h, push=1
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/vp9lpf_neon.S b/media/ffvpx/libavcodec/aarch64/vp9lpf_neon.S
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/vp9lpf_neon.S
			
 
				+@@ -0,0 +1,1334 @@
			
 
				++/*
			
 
				++ * Copyright (c) 2016 Google Inc.
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include "libavutil/aarch64/asm.S"
			
 
				++#include "neon.S"
			
 
				++
			
 
				++
			
 
				++// The main loop filter macro is templated and can produce filters for
			
 
				++// vectors of 8 or 16 bytes. The register mapping throughout the filter
			
 
				++// is close to identical to the arm version (please try to maintain this,
			
 
				++// if either is changed!). When the arm version uses e.g. d20 for the
			
 
				++// input variable p3, the aarch64 version uses v20.8b or v20.16b, depending
			
 
				++// on vector length.
			
 
				++//
			
 
				++// The number of elements in the vector is passed in via the macro parameter
			
 
				++// \sz, which is either .8b or .16b. For simple instructions that doesn't
			
 
				++// lengthen or narrow things, this can easily be templated like this:
			
 
				++//      uabd            v4\sz,  v20\sz, v21\sz
			
 
				++//
			
 
				++// For instructions that lengthen or narrow content, the arm version would
			
 
				++// have used q registers. For these instructions, we have macros that expand
			
 
				++// into either a single e.g. uaddl instruction, or into a uaddl + uaddl2
			
 
				++// pair, depending on the \sz parameter. Wherever the arm version would have
			
 
				++// used a q register, these macros instead take two v registers, i.e. q3
			
 
				++// is mapped to v6+v7. For the case with 8 byte input vectors, such a
			
 
				++// lengthening operation is only stored in v6.8h (what was in q3 in the arm
			
 
				++// case), while the 16 byte input vectors will use v6.8h + v7.8h.
			
 
				++// Such a macro invocation would look like this:
			
 
				++//      uaddl_sz        v8.8h,  v9.8h,  v17, v18, \sz
			
 
				++//
			
 
				++// That is, in the 8 byte input vector case, the second register in these
			
 
				++// register pairs will be unused.
			
 
				++// Unfortunately, this makes the code quite hard to read. For readability,
			
 
				++// see the arm version instead.
			
 
				++
			
 
				++
			
 
				++.macro add_sz dst1, dst2, in1, in2, in3, in4, sz
			
 
				++        add             \dst1,  \in1,  \in3
			
 
				++.ifc \sz, .16b
			
 
				++        add             \dst2,  \in2,  \in4
			
 
				++.endif
			
 
				++.endm
			
 
				++
			
 
				++.macro sub_sz dst1, dst2, in1, in2, in3, in4, sz
			
 
				++        sub             \dst1,  \in1,  \in3
			
 
				++.ifc \sz, .16b
			
 
				++        sub             \dst2,  \in2,  \in4
			
 
				++.endif
			
 
				++.endm
			
 
				++
			
 
				++.macro uaddw_sz dst1, dst2, in1, in2, in3, sz
			
 
				++        uaddw           \dst1,  \in1, \in3\().8b
			
 
				++.ifc \sz, .16b
			
 
				++        uaddw2          \dst2,  \in2, \in3\().16b
			
 
				++.endif
			
 
				++.endm
			
 
				++
			
 
				++.macro usubw_sz dst1, dst2, in1, in2, in3, sz
			
 
				++        usubw           \dst1,  \in1, \in3\().8b
			
 
				++.ifc \sz, .16b
			
 
				++        usubw2          \dst2,  \in2, \in3\().16b
			
 
				++.endif
			
 
				++.endm
			
 
				++
			
 
				++.macro usubl_sz dst1, dst2, in1, in2, sz
			
 
				++        usubl           \dst1,  \in1\().8b,  \in2\().8b
			
 
				++.ifc \sz, .16b
			
 
				++        usubl2          \dst2,  \in1\().16b, \in2\().16b
			
 
				++.endif
			
 
				++.endm
			
 
				++
			
 
				++.macro sqxtn_sz dst, in1, in2, sz
			
 
				++        sqxtn           \dst\().8b,  \in1
			
 
				++.ifc \sz, .16b
			
 
				++        sqxtn2          \dst\().16b, \in2
			
 
				++.endif
			
 
				++.endm
			
 
				++
			
 
				++.macro sqxtun_sz dst, in1, in2, sz
			
 
				++        sqxtun          \dst\().8b,  \in1
			
 
				++.ifc \sz, .16b
			
 
				++        sqxtun2         \dst\().16b, \in2
			
 
				++.endif
			
 
				++.endm
			
 
				++
			
 
				++.macro mul_sz dst1, dst2, in1, in2, in3, in4, sz
			
 
				++        mul             \dst1,  \in1,  \in3
			
 
				++.ifc \sz, .16b
			
 
				++        mul             \dst2,  \in2,  \in4
			
 
				++.endif
			
 
				++.endm
			
 
				++
			
 
				++.macro saddw_sz dst1, dst2, in1, in2, in3, sz
			
 
				++        saddw           \dst1,  \in1, \in3\().8b
			
 
				++.ifc \sz, .16b
			
 
				++        saddw2          \dst2,  \in2, \in3\().16b
			
 
				++.endif
			
 
				++.endm
			
 
				++
			
 
				++.macro ssubw_sz dst1, dst2, in1, in2, in3, sz
			
 
				++        ssubw           \dst1,  \in1, \in3\().8b
			
 
				++.ifc \sz, .16b
			
 
				++        ssubw2          \dst2,  \in2, \in3\().16b
			
 
				++.endif
			
 
				++.endm
			
 
				++
			
 
				++.macro uxtl_sz dst1, dst2, in, sz
			
 
				++        uxtl            \dst1,  \in\().8b
			
 
				++.ifc \sz, .16b
			
 
				++        uxtl2           \dst2,  \in\().16b
			
 
				++.endif
			
 
				++.endm
			
 
				++
			
 
				++.macro uaddl_sz dst1, dst2, in1, in2, sz
			
 
				++        uaddl           \dst1,  \in1\().8b,  \in2\().8b
			
 
				++.ifc \sz, .16b
			
 
				++        uaddl2          \dst2,  \in1\().16b, \in2\().16b
			
 
				++.endif
			
 
				++.endm
			
 
				++
			
 
				++.macro rshrn_sz dst, in1, in2, shift, sz
			
 
				++        rshrn           \dst\().8b,  \in1, \shift
			
 
				++.ifc \sz, .16b
			
 
				++        rshrn2          \dst\().16b, \in2, \shift
			
 
				++.endif
			
 
				++.endm
			
 
				++
			
 
				++.macro ushll_sz dst1, dst2, in, shift, sz
			
 
				++        ushll           \dst1,  \in\().8b,  \shift
			
 
				++.ifc \sz, .16b
			
 
				++        ushll2          \dst2,  \in\().16b, \shift
			
 
				++.endif
			
 
				++.endm
			
 
				++
			
 
				++// The input to and output from this macro is in the registers v16-v31,
			
 
				++// and v0-v7 are used as scratch registers.
			
 
				++// p7 = v16 .. p3 = v20, p0 = v23, q0 = v24, q3 = v27, q7 = v31
			
 
				++// Depending on the width of the loop filter, we either use v16-v19
			
 
				++// and v28-v31 as temp registers, or v8-v15.
			
 
				++// When comparing to the arm version, tmpq1 == tmp1 + tmp2,
			
 
				++// tmpq2 == tmp3 + tmp4, etc.
			
 
				++.macro loop_filter wd, sz, mix, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8
			
 
				++.if \mix == 0
			
 
				++        dup             v0\sz,  w2        // E
			
 
				++        dup             v2\sz,  w3        // I
			
 
				++        dup             v3\sz,  w4        // H
			
 
				++.else
			
 
				++        dup             v0.8h,  w2        // E
			
 
				++        dup             v2.8h,  w3        // I
			
 
				++        dup             v3.8h,  w4        // H
			
 
				++        rev16           v1.16b, v0.16b    // E
			
 
				++        rev16           v4.16b, v2.16b    // I
			
 
				++        rev16           v5.16b, v3.16b    // H
			
 
				++        uzp1            v0.16b, v0.16b, v1.16b
			
 
				++        uzp1            v2.16b, v2.16b, v4.16b
			
 
				++        uzp1            v3.16b, v3.16b, v5.16b
			
 
				++.endif
			
 
				++
			
 
				++        uabd            v4\sz,  v20\sz, v21\sz        // abs(p3 - p2)
			
 
				++        uabd            v5\sz,  v21\sz, v22\sz        // abs(p2 - p1)
			
 
				++        uabd            v6\sz,  v22\sz, v23\sz        // abs(p1 - p0)
			
 
				++        uabd            v7\sz,  v24\sz, v25\sz        // abs(q0 - q1)
			
 
				++        uabd            \tmp1\sz,  v25\sz, v26\sz     // abs(q1 - q2)
			
 
				++        uabd            \tmp2\sz,  v26\sz, v27\sz     // abs(q2 - q3)
			
 
				++        umax            v4\sz,  v4\sz,  v5\sz
			
 
				++        umax            v5\sz,  v6\sz,  v7\sz
			
 
				++        umax            \tmp1\sz, \tmp1\sz, \tmp2\sz
			
 
				++        uabd            v6\sz,  v23\sz, v24\sz        // abs(p0 - q0)
			
 
				++        umax            v4\sz,  v4\sz,  v5\sz
			
 
				++        uqadd           v6\sz,  v6\sz,  v6\sz         // abs(p0 - q0) * 2
			
 
				++        uabd            v5\sz,  v22\sz, v25\sz        // abs(p1 - q1)
			
 
				++        umax            v4\sz,  v4\sz,  \tmp1\sz      // max(abs(p3 - p2), ..., abs(q2 - q3))
			
 
				++        ushr            v5\sz,  v5\sz,  #1
			
 
				++        cmhs            v4\sz,  v2\sz,  v4\sz         // max(abs()) <= I
			
 
				++        uqadd           v6\sz,  v6\sz,  v5\sz         // abs(p0 - q0) * 2 + abs(p1 - q1) >> 1
			
 
				++        cmhs            v5\sz,  v0\sz,  v6\sz
			
 
				++        and             v4\sz,  v4\sz,  v5\sz         // fm
			
 
				++
			
 
				++        // If no pixels need filtering, just exit as soon as possible
			
 
				++        mov             x5,  v4.d[0]
			
 
				++.ifc \sz, .16b
			
 
				++        mov             x6,  v4.d[1]
			
 
				++        adds            x5,  x5,  x6
			
 
				++        b.eq            9f
			
 
				++.else
			
 
				++        cbz             x5,  9f
			
 
				++.endif
			
 
				++
			
 
				++.if \wd >= 8
			
 
				++        movi            v0\sz,  #1
			
 
				++
			
 
				++        uabd            v6\sz,  v20\sz, v23\sz    // abs(p3 - p0)
			
 
				++        uabd            v2\sz,  v21\sz, v23\sz    // abs(p2 - p0)
			
 
				++        uabd            v1\sz,  v22\sz, v23\sz    // abs(p1 - p0)
			
 
				++        uabd            \tmp1\sz,  v25\sz, v24\sz // abs(q1 - q0)
			
 
				++        uabd            \tmp2\sz,  v26\sz, v24\sz // abs(q2 - q0)
			
 
				++        uabd            \tmp3\sz,  v27\sz, v24\sz // abs(q3 - q0)
			
 
				++        umax            v6\sz,  v6\sz,  v2\sz
			
 
				++        umax            v1\sz,  v1\sz,  \tmp1\sz
			
 
				++        umax            \tmp2\sz,  \tmp2\sz,  \tmp3\sz
			
 
				++.if \wd == 16
			
 
				++        uabd            v7\sz,  v16\sz, v23\sz    // abs(p7 - p0)
			
 
				++        umax            v6\sz,  v6\sz,  v1\sz
			
 
				++        uabd            v2\sz,  v17\sz, v23\sz    // abs(p6 - p0)
			
 
				++        umax            v6\sz,  v6\sz,  \tmp2\sz
			
 
				++        uabd            v1\sz,  v18\sz, v23\sz    // abs(p5 - p0)
			
 
				++        cmhs            v6\sz,  v0\sz,  v6\sz     // flat8in
			
 
				++        uabd            v8\sz,  v19\sz, v23\sz    // abs(p4 - p0)
			
 
				++        and             v6\sz,  v6\sz,  v4\sz     // flat8in && fm
			
 
				++        uabd            v9\sz,  v28\sz, v24\sz    // abs(q4 - q0)
			
 
				++        bic             v4\sz,  v4\sz,  v6\sz     // fm && !flat8in
			
 
				++        uabd            v10\sz, v29\sz, v24\sz    // abs(q5 - q0)
			
 
				++        uabd            v11\sz, v30\sz, v24\sz    // abs(q6 - q0)
			
 
				++        uabd            v12\sz, v31\sz, v24\sz    // abs(q7 - q0)
			
 
				++
			
 
				++        umax            v7\sz,  v7\sz,  v2\sz
			
 
				++        umax            v1\sz,  v1\sz,  v8\sz
			
 
				++        umax            v9\sz,  v9\sz,  v10\sz
			
 
				++        umax            v11\sz, v11\sz, v12\sz
			
 
				++        // The rest of the calculation of flat8out is interleaved below
			
 
				++.else
			
 
				++        // The rest of the calculation of flat8in is interleaved below
			
 
				++.endif
			
 
				++.endif
			
 
				++
			
 
				++        // Calculate the normal inner loop filter for 2 or 4 pixels
			
 
				++        uabd            v5\sz,  v22\sz, v23\sz // abs(p1 - p0)
			
 
				++.if \wd == 16
			
 
				++        umax            v7\sz,  v7\sz,  v1\sz
			
 
				++        umax            v9\sz,  v9\sz,  v11\sz
			
 
				++.elseif \wd == 8
			
 
				++        umax            v6\sz,  v6\sz,  v1\sz
			
 
				++.endif
			
 
				++        uabd            v1\sz,  v25\sz, v24\sz // abs(q1 - q0)
			
 
				++.if \wd == 16
			
 
				++        umax            v7\sz,  v7\sz,  v9\sz
			
 
				++.elseif \wd == 8
			
 
				++        umax            v6\sz,  v6\sz,  \tmp2\sz
			
 
				++.endif
			
 
				++        usubl_sz        \tmp1\().8h,  \tmp2\().8h,  v22,  v25, \sz // p1 - q1
			
 
				++        umax            v5\sz,  v5\sz,  v1\sz  // max(abs(p1 - p0), abs(q1 - q0))
			
 
				++.if \mix != 0
			
 
				++        mov             v1.d[0], x11
			
 
				++.endif
			
 
				++        usubl_sz        \tmp3\().8h,  \tmp4\().8h,  v24,  v23, \sz // q0 - p0
			
 
				++        movi            \tmp5\().8h,  #3
			
 
				++.if \wd == 8
			
 
				++        cmhs            v6\sz,  v0\sz,  v6\sz  // flat8in
			
 
				++.endif
			
 
				++.if \mix != 0
			
 
				++        sxtl            v1.8h,  v1.8b
			
 
				++.endif
			
 
				++        cmhs            v5\sz,  v3\sz,  v5\sz  // !hev
			
 
				++.if \wd == 8
			
 
				++        // If a 4/8 or 8/4 mix is used, clear the relevant half of v6
			
 
				++.if \mix != 0
			
 
				++        and             v6\sz,  v6\sz,  v1.16b
			
 
				++.endif
			
 
				++        and             v6\sz,  v6\sz,  v4\sz  // flat8in && fm
			
 
				++.endif
			
 
				++        sqxtn_sz        \tmp1,        \tmp1\().8h,  \tmp2\().8h, \sz // av_clip_int8(p1 - q1)
			
 
				++.if \wd == 16
			
 
				++        cmhs            v7\sz,  v0\sz,  v7\sz  // flat8out
			
 
				++.elseif \wd == 8
			
 
				++        bic             v4\sz,  v4\sz,  v6\sz  // fm && !flat8in
			
 
				++.endif
			
 
				++        and             v5\sz,  v5\sz,  v4\sz  // !hev && fm && !flat8in
			
 
				++.if \wd == 16
			
 
				++        and             v7\sz,  v7\sz,  v6\sz  // flat8out && flat8in && fm
			
 
				++.endif
			
 
				++
			
 
				++        mul_sz          \tmp3\().8h,  \tmp4\().8h,  \tmp3\().8h, \tmp4\().8h,  \tmp5\().8h,  \tmp5\().8h, \sz // 3 * (q0 - p0)
			
 
				++        bic             \tmp1\sz,  \tmp1\sz,  v5\sz    // if (!hev) av_clip_int8 = 0
			
 
				++        movi            v2\sz,  #4
			
 
				++        saddw_sz        \tmp3\().8h,  \tmp4\().8h,  \tmp3\().8h, \tmp4\().8h,  \tmp1, \sz // 3 * (q0 - p0) [+ av_clip_int8(p1 - q1)]
			
 
				++        movi            v3\sz,  #3
			
 
				++        sqxtn_sz        \tmp1,        \tmp3\().8h,  \tmp4\().8h, \sz       // f
			
 
				++.if \wd == 16
			
 
				++        bic             v6\sz,  v6\sz,  v7\sz  // fm && flat8in && !flat8out
			
 
				++.endif
			
 
				++
			
 
				++        sqadd           \tmp3\sz,  \tmp1\sz,  v2\sz // FFMIN(f + 4, 127)
			
 
				++        sqadd           \tmp4\sz,  \tmp1\sz,  v3\sz // FFMIN(f + 3, 127)
			
 
				++        uxtl_sz         v0.8h,  v1.8h,  v23, \sz    // p0
			
 
				++        sshr            \tmp3\sz,  \tmp3\sz,  #3    // f1
			
 
				++        sshr            \tmp4\sz,  \tmp4\sz,  #3    // f2
			
 
				++
			
 
				++        uxtl_sz         v2.8h,  v3.8h,  v24, \sz    // q0
			
 
				++        saddw_sz        v0.8h,  v1.8h,  v0.8h,  v1.8h,  \tmp4, \sz // p0 + f2
			
 
				++        ssubw_sz        v2.8h,  v3.8h,  v2.8h,  v3.8h,  \tmp3, \sz // q0 - f1
			
 
				++        sqxtun_sz       v0,  v0.8h,  v1.8h,  \sz    // out p0
			
 
				++        sqxtun_sz       v1,  v2.8h,  v3.8h,  \sz    // out q0
			
 
				++        srshr           \tmp3\sz, \tmp3\sz, #1      // f = (f1 + 1) >> 1
			
 
				++        bit             v23\sz, v0\sz,  v4\sz       // if (fm && !flat8in)
			
 
				++        bit             v24\sz, v1\sz,  v4\sz
			
 
				++
			
 
				++        uxtl_sz         v0.8h,  v1.8h,  v22, \sz    // p1
			
 
				++        uxtl_sz         v2.8h,  v3.8h,  v25, \sz    // q1
			
 
				++.if \wd >= 8
			
 
				++        mov             x5,  v6.d[0]
			
 
				++.ifc \sz, .16b
			
 
				++        mov             x6,  v6.d[1]
			
 
				++.endif
			
 
				++.endif
			
 
				++        saddw_sz        v0.8h,  v1.8h,  v0.8h,  v1.8h,  \tmp3, \sz // p1 + f
			
 
				++        ssubw_sz        v2.8h,  v3.8h,  v2.8h,  v3.8h,  \tmp3, \sz // q1 - f
			
 
				++        sqxtun_sz       v0,  v0.8h,  v1.8h, \sz     // out p1
			
 
				++        sqxtun_sz       v2,  v2.8h,  v3.8h, \sz     // out q1
			
 
				++.if \wd >= 8
			
 
				++.ifc \sz, .16b
			
 
				++        adds            x5,  x5,  x6
			
 
				++.endif
			
 
				++.endif
			
 
				++        bit             v22\sz, v0\sz,  v5\sz       // if (!hev && fm && !flat8in)
			
 
				++        bit             v25\sz, v2\sz,  v5\sz
			
 
				++
			
 
				++        // If no pixels need flat8in, jump to flat8out
			
 
				++        // (or to a writeout of the inner 4 pixels, for wd=8)
			
 
				++.if \wd >= 8
			
 
				++.ifc \sz, .16b
			
 
				++        b.eq            6f
			
 
				++.else
			
 
				++        cbz             x5,  6f
			
 
				++.endif
			
 
				++
			
 
				++        // flat8in
			
 
				++        uaddl_sz        \tmp1\().8h, \tmp2\().8h,  v20, v21, \sz
			
 
				++        uaddl_sz        \tmp3\().8h, \tmp4\().8h,  v22, v25, \sz
			
 
				++        uaddl_sz        \tmp5\().8h, \tmp6\().8h,  v20, v22, \sz
			
 
				++        uaddl_sz        \tmp7\().8h, \tmp8\().8h,  v23, v26, \sz
			
 
				++        add_sz          v0.8h,  v1.8h,  \tmp1\().8h, \tmp2\().8h, \tmp1\().8h, \tmp2\().8h, \sz
			
 
				++        uaddw_sz        v0.8h,  v1.8h,  v0.8h,  v1.8h,  v23, \sz
			
 
				++        uaddw_sz        v0.8h,  v1.8h,  v0.8h,  v1.8h,  v24, \sz
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  \tmp5\().8h, \tmp6\().8h, \sz
			
 
				++        sub_sz          \tmp3\().8h, \tmp4\().8h,  \tmp3\().8h, \tmp4\().8h,  \tmp1\().8h, \tmp2\().8h, \sz
			
 
				++        sub_sz          \tmp7\().8h, \tmp8\().8h,  \tmp7\().8h, \tmp8\().8h,  \tmp5\().8h, \tmp6\().8h, \sz
			
 
				++        rshrn_sz        v2,  v0.8h,  v1.8h,  #3,  \sz // out p2
			
 
				++
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  \tmp3\().8h, \tmp4\().8h, \sz
			
 
				++        uaddl_sz        \tmp1\().8h, \tmp2\().8h,  v20,  v23, \sz
			
 
				++        uaddl_sz        \tmp3\().8h, \tmp4\().8h,  v24,  v27, \sz
			
 
				++        rshrn_sz        v3,  v0.8h,  v1.8h,  #3,  \sz // out p1
			
 
				++
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  \tmp7\().8h, \tmp8\().8h, \sz
			
 
				++        sub_sz          \tmp3\().8h, \tmp4\().8h,  \tmp3\().8h, \tmp4\().8h,  \tmp1\().8h, \tmp2\().8h, \sz
			
 
				++        uaddl_sz        \tmp5\().8h, \tmp6\().8h,  v21,  v24, \sz
			
 
				++        uaddl_sz        \tmp7\().8h, \tmp8\().8h,  v25,  v27, \sz
			
 
				++        rshrn_sz        v4,  v0.8h,  v1.8h,  #3,  \sz // out p0
			
 
				++
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  \tmp3\().8h, \tmp4\().8h, \sz
			
 
				++        sub_sz          \tmp7\().8h, \tmp8\().8h,  \tmp7\().8h, \tmp8\().8h,  \tmp5\().8h, \tmp6\().8h, \sz
			
 
				++        uaddl_sz        \tmp1\().8h, \tmp2\().8h,  v22,  v25, \sz
			
 
				++        uaddl_sz        \tmp3\().8h, \tmp4\().8h,  v26,  v27, \sz
			
 
				++        rshrn_sz        v5,  v0.8h,  v1.8h,  #3,  \sz // out q0
			
 
				++
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  \tmp7\().8h, \tmp8\().8h, \sz
			
 
				++        sub_sz          \tmp3\().8h, \tmp4\().8h,  \tmp3\().8h, \tmp4\().8h,  \tmp1\().8h, \tmp2\().8h, \sz
			
 
				++        rshrn_sz        \tmp5,  v0.8h,  v1.8h,  #3,  \sz // out q1
			
 
				++
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  \tmp3\().8h, \tmp4\().8h, \sz
			
 
				++        // The output here is written back into the input registers. This doesn't
			
 
				++        // matter for the flat8part below, since we only update those pixels
			
 
				++        // which won't be touched below.
			
 
				++        bit             v21\sz, v2\sz,  v6\sz
			
 
				++        bit             v22\sz, v3\sz,  v6\sz
			
 
				++        bit             v23\sz, v4\sz,  v6\sz
			
 
				++        rshrn_sz        \tmp6,  v0.8h,  v1.8h,  #3,  \sz // out q2
			
 
				++        bit             v24\sz, v5\sz,  v6\sz
			
 
				++        bit             v25\sz, \tmp5\sz,  v6\sz
			
 
				++        bit             v26\sz, \tmp6\sz,  v6\sz
			
 
				++.endif
			
 
				++.if \wd == 16
			
 
				++6:
			
 
				++        orr             v2\sz,  v6\sz,  v7\sz
			
 
				++        mov             x5,  v2.d[0]
			
 
				++.ifc \sz, .16b
			
 
				++        mov             x6,  v2.d[1]
			
 
				++        adds            x5,  x5,  x6
			
 
				++        b.ne            1f
			
 
				++.else
			
 
				++        cbnz            x5,  1f
			
 
				++.endif
			
 
				++        // If no pixels needed flat8in nor flat8out, jump to a
			
 
				++        // writeout of the inner 4 pixels
			
 
				++        br              x14
			
 
				++1:
			
 
				++
			
 
				++        mov             x5,  v7.d[0]
			
 
				++.ifc \sz, .16b
			
 
				++        mov             x6,  v7.d[1]
			
 
				++        adds            x5,  x5,  x6
			
 
				++        b.ne            1f
			
 
				++.else
			
 
				++        cbnz            x5,  1f
			
 
				++.endif
			
 
				++        // If no pixels need flat8out, jump to a writeout of the inner 6 pixels
			
 
				++        br              x15
			
 
				++
			
 
				++1:
			
 
				++        // flat8out
			
 
				++        // This writes all outputs into v2-v17 (skipping v6 and v16).
			
 
				++        // If this part is skipped, the output is read from v21-v26 (which is the input
			
 
				++        // to this section).
			
 
				++        ushll_sz        v0.8h,  v1.8h,  v16,  #3,  \sz           // 8 * v16
			
 
				++        usubw_sz        v0.8h,  v1.8h,  v0.8h,  v1.8h,  v16, \sz // 7 * v16
			
 
				++        uaddw_sz        v0.8h,  v1.8h,  v0.8h,  v1.8h,  v17, \sz
			
 
				++        uaddl_sz        v8.8h,  v9.8h,  v17, v18, \sz
			
 
				++        uaddl_sz        v10.8h, v11.8h, v19, v20, \sz
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v8.8h,  v9.8h,  \sz
			
 
				++        uaddl_sz        v8.8h,  v9.8h,  v16, v17, \sz
			
 
				++        uaddl_sz        v12.8h, v13.8h, v21, v22, \sz
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v10.8h, v11.8h, \sz
			
 
				++        uaddl_sz        v10.8h, v11.8h, v18, v25, \sz
			
 
				++        uaddl_sz        v14.8h, v15.8h, v23, v24, \sz
			
 
				++        sub_sz          v10.8h, v11.8h, v10.8h, v11.8h, v8.8h,  v9.8h,  \sz
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v12.8h, v13.8h, \sz
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v14.8h, v15.8h, \sz
			
 
				++        uaddl_sz        v12.8h, v13.8h, v16, v18, \sz
			
 
				++        uaddl_sz        v14.8h, v15.8h, v19, v26, \sz
			
 
				++        rshrn_sz        v2,  v0.8h,  v1.8h,  #4,  \sz
			
 
				++
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v10.8h, v11.8h, \sz
			
 
				++        uaddl_sz        v8.8h,  v9.8h,  v16, v19, \sz
			
 
				++        uaddl_sz        v10.8h, v11.8h, v20, v27, \sz
			
 
				++        sub_sz          v14.8h, v15.8h, v14.8h, v15.8h, v12.8h, v13.8h, \sz
			
 
				++        bif             v2\sz,  v17\sz, v7\sz
			
 
				++        rshrn_sz        v3,  v0.8h,  v1.8h,  #4,  \sz
			
 
				++
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v14.8h, v15.8h, \sz
			
 
				++        uaddl_sz        v12.8h, v13.8h, v16, v20, \sz
			
 
				++        uaddl_sz        v14.8h, v15.8h, v21, v28, \sz
			
 
				++        sub_sz          v10.8h, v11.8h, v10.8h, v11.8h, v8.8h,  v9.8h,  \sz
			
 
				++        bif             v3\sz,  v18\sz, v7\sz
			
 
				++        rshrn_sz        v4,  v0.8h,  v1.8h,  #4,  \sz
			
 
				++
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v10.8h, v11.8h, \sz
			
 
				++        uaddl_sz        v8.8h,  v9.8h,  v16, v21, \sz
			
 
				++        uaddl_sz        v10.8h, v11.8h, v22, v29, \sz
			
 
				++        sub_sz          v14.8h, v15.8h, v14.8h, v15.8h, v12.8h, v13.8h, \sz
			
 
				++        bif             v4\sz,  v19\sz, v7\sz
			
 
				++        rshrn_sz        v5,  v0.8h,  v1.8h,  #4,  \sz
			
 
				++
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v14.8h, v15.8h, \sz
			
 
				++        uaddl_sz        v12.8h, v13.8h, v16, v22, \sz
			
 
				++        uaddl_sz        v14.8h, v15.8h, v23, v30, \sz
			
 
				++        sub_sz          v10.8h, v11.8h, v10.8h, v11.8h, v8.8h,  v9.8h,  \sz
			
 
				++        bif             v5\sz,  v20\sz, v7\sz
			
 
				++        rshrn_sz        v6,  v0.8h,  v1.8h,  #4,  \sz
			
 
				++
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v10.8h, v11.8h, \sz
			
 
				++        uaddl_sz        v10.8h, v11.8h, v16, v23, \sz
			
 
				++        sub_sz          v14.8h, v15.8h, v14.8h, v15.8h, v12.8h, v13.8h, \sz
			
 
				++        uaddl_sz        v12.8h, v13.8h, v24, v31, \sz
			
 
				++        bif             v6\sz,  v21\sz, v7\sz
			
 
				++        rshrn_sz        v8,  v0.8h,  v1.8h,  #4,  \sz
			
 
				++
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v14.8h, v15.8h, \sz
			
 
				++        sub_sz          v10.8h, v11.8h, v12.8h, v13.8h, v10.8h, v11.8h, \sz
			
 
				++        uaddl_sz        v12.8h, v13.8h, v17, v24, \sz
			
 
				++        uaddl_sz        v14.8h, v15.8h, v25, v31, \sz
			
 
				++        bif             v8\sz,  v22\sz, v7\sz
			
 
				++        rshrn_sz        v9,  v0.8h,  v1.8h,  #4,  \sz
			
 
				++
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v10.8h, v11.8h, \sz
			
 
				++        sub_sz          v14.8h, v15.8h, v14.8h, v15.8h, v12.8h, v13.8h, \sz
			
 
				++        uaddl_sz        v12.8h, v13.8h, v26, v31, \sz
			
 
				++        bif             v9\sz,  v23\sz, v7\sz
			
 
				++        rshrn_sz        v10, v0.8h,  v1.8h,  #4,  \sz
			
 
				++
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v14.8h, v15.8h, \sz
			
 
				++        uaddl_sz        v14.8h, v15.8h, v18, v25, \sz
			
 
				++        uaddl_sz        v18.8h, v19.8h, v19, v26, \sz
			
 
				++        sub_sz          v12.8h, v13.8h, v12.8h, v13.8h, v14.8h, v15.8h, \sz
			
 
				++        uaddl_sz        v14.8h, v15.8h, v27, v31, \sz
			
 
				++        bif             v10\sz, v24\sz, v7\sz
			
 
				++        rshrn_sz        v11, v0.8h,  v1.8h,  #4,  \sz
			
 
				++
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v12.8h, v13.8h, \sz
			
 
				++        uaddl_sz        v12.8h, v13.8h, v20, v27, \sz
			
 
				++        sub_sz          v14.8h, v15.8h, v14.8h, v15.8h, v18.8h, v19.8h, \sz
			
 
				++        uaddl_sz        v18.8h, v19.8h, v28, v31, \sz
			
 
				++        bif             v11\sz, v25\sz, v7\sz
			
 
				++        sub_sz          v18.8h, v19.8h, v18.8h, v19.8h, v12.8h, v13.8h, \sz
			
 
				++        rshrn_sz        v12, v0.8h,  v1.8h,  #4,  \sz
			
 
				++
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v14.8h, v15.8h, \sz
			
 
				++        uaddl_sz        v14.8h, v15.8h, v21, v28, \sz
			
 
				++        uaddl_sz        v20.8h, v21.8h, v29, v31, \sz
			
 
				++        bif             v12\sz, v26\sz, v7\sz
			
 
				++        rshrn_sz        v13, v0.8h,  v1.8h,  #4,  \sz
			
 
				++
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v18.8h, v19.8h, \sz
			
 
				++        sub_sz          v20.8h, v21.8h, v20.8h, v21.8h, v14.8h, v15.8h, \sz
			
 
				++        uaddl_sz        v18.8h, v19.8h, v22, v29, \sz
			
 
				++        uaddl_sz        v22.8h, v23.8h, v30, v31, \sz
			
 
				++        bif             v13\sz, v27\sz, v7\sz
			
 
				++        rshrn_sz        v14, v0.8h,  v1.8h,  #4,  \sz
			
 
				++
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v20.8h, v21.8h, \sz
			
 
				++        sub_sz          v22.8h, v23.8h, v22.8h, v23.8h, v18.8h, v19.8h, \sz
			
 
				++        bif             v14\sz, v28\sz, v7\sz
			
 
				++        rshrn_sz        v15, v0.8h,  v1.8h,  #4,  \sz
			
 
				++
			
 
				++        add_sz          v0.8h,  v1.8h,  v0.8h,  v1.8h,  v22.8h, v23.8h, \sz
			
 
				++        bif             v15\sz, v29\sz, v7\sz
			
 
				++        rshrn_sz        v17, v0.8h,  v1.8h,  #4,  \sz
			
 
				++        bif             v17\sz, v30\sz, v7\sz
			
 
				++.endif
			
 
				++.endm
			
 
				++
			
 
				++// For wd <= 8, we use v16-v19 and v28-v31 for temp registers,
			
 
				++// while we need those for inputs/outputs in wd=16 and use v8-v15
			
 
				++// for temp registers there instead.
			
 
				++function vp9_loop_filter_4
			
 
				++        loop_filter     4,  .8b,  0,    v16, v17, v18, v19, v28, v29, v30, v31
			
 
				++        ret
			
 
				++9:
			
 
				++        br              x10
			
 
				++endfunc
			
 
				++
			
 
				++function vp9_loop_filter_4_16b_mix_44
			
 
				++        loop_filter     4,  .16b, 44,   v16, v17, v18, v19, v28, v29, v30, v31
			
 
				++        ret
			
 
				++9:
			
 
				++        br              x10
			
 
				++endfunc
			
 
				++
			
 
				++function vp9_loop_filter_8
			
 
				++        loop_filter     8,  .8b,  0,    v16, v17, v18, v19, v28, v29, v30, v31
			
 
				++        ret
			
 
				++6:
			
 
				++        br              x13
			
 
				++9:
			
 
				++        br              x10
			
 
				++endfunc
			
 
				++
			
 
				++function vp9_loop_filter_8_16b_mix
			
 
				++        loop_filter     8,  .16b, 88,   v16, v17, v18, v19, v28, v29, v30, v31
			
 
				++        ret
			
 
				++6:
			
 
				++        br              x13
			
 
				++9:
			
 
				++        br              x10
			
 
				++endfunc
			
 
				++
			
 
				++function vp9_loop_filter_16
			
 
				++        loop_filter     16, .8b,  0,    v8,  v9,  v10, v11, v12, v13, v14, v15
			
 
				++        ret
			
 
				++9:
			
 
				++        ldp             d8,  d9,  [sp], 0x10
			
 
				++        ldp             d10, d11, [sp], 0x10
			
 
				++        ldp             d12, d13, [sp], 0x10
			
 
				++        ldp             d14, d15, [sp], 0x10
			
 
				++        br              x10
			
 
				++endfunc
			
 
				++
			
 
				++function vp9_loop_filter_16_16b
			
 
				++        loop_filter     16, .16b, 0,    v8,  v9,  v10, v11, v12, v13, v14, v15
			
 
				++        ret
			
 
				++9:
			
 
				++        ldp             d8,  d9,  [sp], 0x10
			
 
				++        ldp             d10, d11, [sp], 0x10
			
 
				++        ldp             d12, d13, [sp], 0x10
			
 
				++        ldp             d14, d15, [sp], 0x10
			
 
				++        br              x10
			
 
				++endfunc
			
 
				++
			
 
				++.macro loop_filter_4
			
 
				++        bl              vp9_loop_filter_4
			
 
				++.endm
			
 
				++
			
 
				++.macro loop_filter_4_16b_mix mix
			
 
				++        bl              vp9_loop_filter_4_16b_mix_\mix
			
 
				++.endm
			
 
				++
			
 
				++.macro loop_filter_8
			
 
				++        // calculate alternative 'return' targets
			
 
				++        adr             x13, 6f
			
 
				++        bl              vp9_loop_filter_8
			
 
				++.endm
			
 
				++
			
 
				++.macro loop_filter_8_16b_mix mix
			
 
				++        // calculate alternative 'return' targets
			
 
				++        adr             x13, 6f
			
 
				++.if \mix == 48
			
 
				++        mov             x11, #0xffffffff00000000
			
 
				++.elseif \mix == 84
			
 
				++        mov             x11, #0x00000000ffffffff
			
 
				++.else
			
 
				++        mov             x11, #0xffffffffffffffff
			
 
				++.endif
			
 
				++        bl              vp9_loop_filter_8_16b_mix
			
 
				++.endm
			
 
				++
			
 
				++.macro loop_filter_16
			
 
				++        // calculate alternative 'return' targets
			
 
				++        adr             x14, 7f
			
 
				++        adr             x15, 8f
			
 
				++        bl              vp9_loop_filter_16
			
 
				++.endm
			
 
				++
			
 
				++.macro loop_filter_16_16b
			
 
				++        // calculate alternative 'return' targets
			
 
				++        adr             x14, 7f
			
 
				++        adr             x15, 8f
			
 
				++        bl              vp9_loop_filter_16_16b
			
 
				++.endm
			
 
				++
			
 
				++
			
 
				++// The public functions in this file have got the following signature:
			
 
				++// void loop_filter(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr);
			
 
				++
			
 
				++function ff_vp9_loop_filter_v_4_8_neon, export=1
			
 
				++        mov             x10, x30
			
 
				++        sub             x9,  x0,  x1, lsl #2
			
 
				++        ld1             {v20.8b}, [x9], x1 // p3
			
 
				++        ld1             {v24.8b}, [x0], x1 // q0
			
 
				++        ld1             {v21.8b}, [x9], x1 // p2
			
 
				++        ld1             {v25.8b}, [x0], x1 // q1
			
 
				++        ld1             {v22.8b}, [x9], x1 // p1
			
 
				++        ld1             {v26.8b}, [x0], x1 // q2
			
 
				++        ld1             {v23.8b}, [x9], x1 // p0
			
 
				++        ld1             {v27.8b}, [x0], x1 // q3
			
 
				++        sub             x0,  x0,  x1, lsl #2
			
 
				++        sub             x9,  x9,  x1, lsl #1
			
 
				++
			
 
				++        loop_filter_4
			
 
				++
			
 
				++        st1             {v22.8b}, [x9], x1
			
 
				++        st1             {v24.8b}, [x0], x1
			
 
				++        st1             {v23.8b}, [x9], x1
			
 
				++        st1             {v25.8b}, [x0], x1
			
 
				++
			
 
				++        br              x10
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_loop_filter_v_44_16_neon, export=1
			
 
				++        mov             x10, x30
			
 
				++        sub             x9,  x0,  x1, lsl #2
			
 
				++        ld1             {v20.16b}, [x9], x1 // p3
			
 
				++        ld1             {v24.16b}, [x0], x1 // q0
			
 
				++        ld1             {v21.16b}, [x9], x1 // p2
			
 
				++        ld1             {v25.16b}, [x0], x1 // q1
			
 
				++        ld1             {v22.16b}, [x9], x1 // p1
			
 
				++        ld1             {v26.16b}, [x0], x1 // q2
			
 
				++        ld1             {v23.16b}, [x9], x1 // p0
			
 
				++        ld1             {v27.16b}, [x0], x1 // q3
			
 
				++        sub             x0,  x0,  x1, lsl #2
			
 
				++        sub             x9,  x9,  x1, lsl #1
			
 
				++
			
 
				++        loop_filter_4_16b_mix 44
			
 
				++
			
 
				++        st1             {v22.16b}, [x9], x1
			
 
				++        st1             {v24.16b}, [x0], x1
			
 
				++        st1             {v23.16b}, [x9], x1
			
 
				++        st1             {v25.16b}, [x0], x1
			
 
				++
			
 
				++        br              x10
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_loop_filter_h_4_8_neon, export=1
			
 
				++        mov             x10, x30
			
 
				++        sub             x9,  x0,  #4
			
 
				++        add             x0,  x9,  x1, lsl #2
			
 
				++        ld1             {v20.8b}, [x9], x1
			
 
				++        ld1             {v24.8b}, [x0], x1
			
 
				++        ld1             {v21.8b}, [x9], x1
			
 
				++        ld1             {v25.8b}, [x0], x1
			
 
				++        ld1             {v22.8b}, [x9], x1
			
 
				++        ld1             {v26.8b}, [x0], x1
			
 
				++        ld1             {v23.8b}, [x9], x1
			
 
				++        ld1             {v27.8b}, [x0], x1
			
 
				++
			
 
				++        sub             x9,  x9,  x1, lsl #2
			
 
				++        sub             x0,  x0,  x1, lsl #2
			
 
				++        // Move x0/x9 forward by 2 pixels; we don't need to rewrite the
			
 
				++        // outermost 2 pixels since they aren't changed.
			
 
				++        add             x9,  x9,  #2
			
 
				++        add             x0,  x0,  #2
			
 
				++
			
 
				++        transpose_8x8B  v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
			
 
				++
			
 
				++        loop_filter_4
			
 
				++
			
 
				++        // We only will write the mid 4 pixels back; after the loop filter,
			
 
				++        // these are in v22, v23, v24, v25, ordered as rows (8x4 pixels).
			
 
				++        // We need to transpose them to columns, done with a 4x8 transpose
			
 
				++        // (which in practice is two 4x4 transposes of the two 4x4 halves
			
 
				++        // of the 8x4 pixels; into 4x8 pixels).
			
 
				++        transpose_4x8B  v22, v23, v24, v25, v26, v27, v28, v29
			
 
				++        st1             {v22.s}[0], [x9], x1
			
 
				++        st1             {v22.s}[1], [x0], x1
			
 
				++        st1             {v23.s}[0], [x9], x1
			
 
				++        st1             {v23.s}[1], [x0], x1
			
 
				++        st1             {v24.s}[0], [x9], x1
			
 
				++        st1             {v24.s}[1], [x0], x1
			
 
				++        st1             {v25.s}[0], [x9], x1
			
 
				++        st1             {v25.s}[1], [x0], x1
			
 
				++
			
 
				++        br              x10
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_loop_filter_h_44_16_neon, export=1
			
 
				++        mov             x10, x30
			
 
				++        sub             x9,  x0,  #4
			
 
				++        add             x0,  x9,  x1, lsl #3
			
 
				++        ld1             {v20.8b},   [x9], x1
			
 
				++        ld1             {v20.d}[1], [x0], x1
			
 
				++        ld1             {v21.8b},   [x9], x1
			
 
				++        ld1             {v21.d}[1], [x0], x1
			
 
				++        ld1             {v22.8b},   [x9], x1
			
 
				++        ld1             {v22.d}[1], [x0], x1
			
 
				++        ld1             {v23.8b},   [x9], x1
			
 
				++        ld1             {v23.d}[1], [x0], x1
			
 
				++        ld1             {v24.8b},   [x9], x1
			
 
				++        ld1             {v24.d}[1], [x0], x1
			
 
				++        ld1             {v25.8b},   [x9], x1
			
 
				++        ld1             {v25.d}[1], [x0], x1
			
 
				++        ld1             {v26.8b},   [x9], x1
			
 
				++        ld1             {v26.d}[1], [x0], x1
			
 
				++        ld1             {v27.8b},   [x9], x1
			
 
				++        ld1             {v27.d}[1], [x0], x1
			
 
				++
			
 
				++        sub             x9,  x9,  x1, lsl #3
			
 
				++        sub             x0,  x0,  x1, lsl #3
			
 
				++        add             x9,  x9,  #2
			
 
				++        add             x0,  x0,  #2
			
 
				++
			
 
				++        transpose_8x16B v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
			
 
				++
			
 
				++        loop_filter_4_16b_mix 44
			
 
				++
			
 
				++        transpose_4x16B v22, v23, v24, v25, v26, v27, v28, v29
			
 
				++
			
 
				++        st1             {v22.s}[0], [x9], x1
			
 
				++        st1             {v22.s}[2], [x0], x1
			
 
				++        st1             {v23.s}[0], [x9], x1
			
 
				++        st1             {v23.s}[2], [x0], x1
			
 
				++        st1             {v24.s}[0], [x9], x1
			
 
				++        st1             {v24.s}[2], [x0], x1
			
 
				++        st1             {v25.s}[0], [x9], x1
			
 
				++        st1             {v25.s}[2], [x0], x1
			
 
				++        st1             {v22.s}[1], [x9], x1
			
 
				++        st1             {v22.s}[3], [x0], x1
			
 
				++        st1             {v23.s}[1], [x9], x1
			
 
				++        st1             {v23.s}[3], [x0], x1
			
 
				++        st1             {v24.s}[1], [x9], x1
			
 
				++        st1             {v24.s}[3], [x0], x1
			
 
				++        st1             {v25.s}[1], [x9], x1
			
 
				++        st1             {v25.s}[3], [x0], x1
			
 
				++
			
 
				++        br              x10
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_loop_filter_v_8_8_neon, export=1
			
 
				++        mov             x10, x30
			
 
				++        sub             x9,  x0,  x1, lsl #2
			
 
				++        ld1             {v20.8b}, [x9], x1 // p3
			
 
				++        ld1             {v24.8b}, [x0], x1 // q0
			
 
				++        ld1             {v21.8b}, [x9], x1 // p2
			
 
				++        ld1             {v25.8b}, [x0], x1 // q1
			
 
				++        ld1             {v22.8b}, [x9], x1 // p1
			
 
				++        ld1             {v26.8b}, [x0], x1 // q2
			
 
				++        ld1             {v23.8b}, [x9], x1 // p0
			
 
				++        ld1             {v27.8b}, [x0], x1 // q3
			
 
				++        sub             x9,  x9,  x1, lsl #2
			
 
				++        sub             x0,  x0,  x1, lsl #2
			
 
				++        add             x9,  x9,  x1
			
 
				++
			
 
				++        loop_filter_8
			
 
				++
			
 
				++        st1             {v21.8b}, [x9], x1
			
 
				++        st1             {v24.8b}, [x0], x1
			
 
				++        st1             {v22.8b}, [x9], x1
			
 
				++        st1             {v25.8b}, [x0], x1
			
 
				++        st1             {v23.8b}, [x9], x1
			
 
				++        st1             {v26.8b}, [x0], x1
			
 
				++
			
 
				++        br              x10
			
 
				++6:
			
 
				++        sub             x9,  x0,  x1, lsl #1
			
 
				++        st1             {v22.8b}, [x9], x1
			
 
				++        st1             {v24.8b}, [x0], x1
			
 
				++        st1             {v23.8b}, [x9], x1
			
 
				++        st1             {v25.8b}, [x0], x1
			
 
				++        br              x10
			
 
				++endfunc
			
 
				++
			
 
				++.macro mix_v_16 mix
			
 
				++function ff_vp9_loop_filter_v_\mix\()_16_neon, export=1
			
 
				++        mov             x10, x30
			
 
				++        sub             x9,  x0,  x1, lsl #2
			
 
				++        ld1             {v20.16b}, [x9], x1 // p3
			
 
				++        ld1             {v24.16b}, [x0], x1 // q0
			
 
				++        ld1             {v21.16b}, [x9], x1 // p2
			
 
				++        ld1             {v25.16b}, [x0], x1 // q1
			
 
				++        ld1             {v22.16b}, [x9], x1 // p1
			
 
				++        ld1             {v26.16b}, [x0], x1 // q2
			
 
				++        ld1             {v23.16b}, [x9], x1 // p0
			
 
				++        ld1             {v27.16b}, [x0], x1 // q3
			
 
				++        sub             x9,  x9,  x1, lsl #2
			
 
				++        sub             x0,  x0,  x1, lsl #2
			
 
				++        add             x9,  x9,  x1
			
 
				++
			
 
				++        loop_filter_8_16b_mix \mix
			
 
				++
			
 
				++        st1             {v21.16b}, [x9], x1
			
 
				++        st1             {v24.16b}, [x0], x1
			
 
				++        st1             {v22.16b}, [x9], x1
			
 
				++        st1             {v25.16b}, [x0], x1
			
 
				++        st1             {v23.16b}, [x9], x1
			
 
				++        st1             {v26.16b}, [x0], x1
			
 
				++
			
 
				++        br              x10
			
 
				++6:
			
 
				++        sub             x9,  x0,  x1, lsl #1
			
 
				++        st1             {v22.16b}, [x9], x1
			
 
				++        st1             {v24.16b}, [x0], x1
			
 
				++        st1             {v23.16b}, [x9], x1
			
 
				++        st1             {v25.16b}, [x0], x1
			
 
				++        br              x10
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++mix_v_16 48
			
 
				++mix_v_16 84
			
 
				++mix_v_16 88
			
 
				++
			
 
				++function ff_vp9_loop_filter_h_8_8_neon, export=1
			
 
				++        mov             x10, x30
			
 
				++        sub             x9,  x0,  #4
			
 
				++        add             x0,  x9,  x1, lsl #2
			
 
				++        ld1             {v20.8b}, [x9], x1
			
 
				++        ld1             {v24.8b}, [x0], x1
			
 
				++        ld1             {v21.8b}, [x9], x1
			
 
				++        ld1             {v25.8b}, [x0], x1
			
 
				++        ld1             {v22.8b}, [x9], x1
			
 
				++        ld1             {v26.8b}, [x0], x1
			
 
				++        ld1             {v23.8b}, [x9], x1
			
 
				++        ld1             {v27.8b}, [x0], x1
			
 
				++
			
 
				++        sub             x9,  x9,  x1, lsl #2
			
 
				++        sub             x0,  x0,  x1, lsl #2
			
 
				++
			
 
				++        transpose_8x8B  v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
			
 
				++
			
 
				++        loop_filter_8
			
 
				++
			
 
				++        // Even though only 6 pixels per row have been changed, we write the
			
 
				++        // full 8 pixel registers.
			
 
				++        transpose_8x8B  v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
			
 
				++
			
 
				++        st1             {v20.8b}, [x9], x1
			
 
				++        st1             {v24.8b}, [x0], x1
			
 
				++        st1             {v21.8b}, [x9], x1
			
 
				++        st1             {v25.8b}, [x0], x1
			
 
				++        st1             {v22.8b}, [x9], x1
			
 
				++        st1             {v26.8b}, [x0], x1
			
 
				++        st1             {v23.8b}, [x9], x1
			
 
				++        st1             {v27.8b}, [x0], x1
			
 
				++
			
 
				++        br              x10
			
 
				++6:
			
 
				++        // If we didn't need to do the flat8in part, we use the same writeback
			
 
				++        // as in loop_filter_h_4_8.
			
 
				++        add             x9,  x9,  #2
			
 
				++        add             x0,  x0,  #2
			
 
				++        transpose_4x8B  v22, v23, v24, v25, v26, v27, v28, v29
			
 
				++        st1             {v22.s}[0], [x9], x1
			
 
				++        st1             {v22.s}[1], [x0], x1
			
 
				++        st1             {v23.s}[0], [x9], x1
			
 
				++        st1             {v23.s}[1], [x0], x1
			
 
				++        st1             {v24.s}[0], [x9], x1
			
 
				++        st1             {v24.s}[1], [x0], x1
			
 
				++        st1             {v25.s}[0], [x9], x1
			
 
				++        st1             {v25.s}[1], [x0], x1
			
 
				++        br              x10
			
 
				++endfunc
			
 
				++
			
 
				++.macro mix_h_16 mix
			
 
				++function ff_vp9_loop_filter_h_\mix\()_16_neon, export=1
			
 
				++        mov             x10, x30
			
 
				++        sub             x9,  x0,  #4
			
 
				++        add             x0,  x9,  x1, lsl #3
			
 
				++        ld1             {v20.8b},   [x9], x1
			
 
				++        ld1             {v20.d}[1], [x0], x1
			
 
				++        ld1             {v21.8b},   [x9], x1
			
 
				++        ld1             {v21.d}[1], [x0], x1
			
 
				++        ld1             {v22.8b},   [x9], x1
			
 
				++        ld1             {v22.d}[1], [x0], x1
			
 
				++        ld1             {v23.8b},   [x9], x1
			
 
				++        ld1             {v23.d}[1], [x0], x1
			
 
				++        ld1             {v24.8b},   [x9], x1
			
 
				++        ld1             {v24.d}[1], [x0], x1
			
 
				++        ld1             {v25.8b},   [x9], x1
			
 
				++        ld1             {v25.d}[1], [x0], x1
			
 
				++        ld1             {v26.8b},   [x9], x1
			
 
				++        ld1             {v26.d}[1], [x0], x1
			
 
				++        ld1             {v27.8b},   [x9], x1
			
 
				++        ld1             {v27.d}[1], [x0], x1
			
 
				++
			
 
				++        sub             x9,  x9,  x1, lsl #3
			
 
				++        sub             x0,  x0,  x1, lsl #3
			
 
				++
			
 
				++        transpose_8x16B v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
			
 
				++
			
 
				++        loop_filter_8_16b_mix \mix
			
 
				++
			
 
				++        transpose_8x16B v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
			
 
				++
			
 
				++        st1             {v20.8b},   [x9], x1
			
 
				++        st1             {v20.d}[1], [x0], x1
			
 
				++        st1             {v21.8b},   [x9], x1
			
 
				++        st1             {v21.d}[1], [x0], x1
			
 
				++        st1             {v22.8b},   [x9], x1
			
 
				++        st1             {v22.d}[1], [x0], x1
			
 
				++        st1             {v23.8b},   [x9], x1
			
 
				++        st1             {v23.d}[1], [x0], x1
			
 
				++        st1             {v24.8b},   [x9], x1
			
 
				++        st1             {v24.d}[1], [x0], x1
			
 
				++        st1             {v25.8b},   [x9], x1
			
 
				++        st1             {v25.d}[1], [x0], x1
			
 
				++        st1             {v26.8b},   [x9], x1
			
 
				++        st1             {v26.d}[1], [x0], x1
			
 
				++        st1             {v27.8b},   [x9], x1
			
 
				++        st1             {v27.d}[1], [x0], x1
			
 
				++
			
 
				++        br              x10
			
 
				++6:
			
 
				++        add             x9,  x9,  #2
			
 
				++        add             x0,  x0,  #2
			
 
				++        transpose_4x16B v22, v23, v24, v25, v26, v27, v28, v29
			
 
				++        st1             {v22.s}[0], [x9], x1
			
 
				++        st1             {v22.s}[2], [x0], x1
			
 
				++        st1             {v23.s}[0], [x9], x1
			
 
				++        st1             {v23.s}[2], [x0], x1
			
 
				++        st1             {v24.s}[0], [x9], x1
			
 
				++        st1             {v24.s}[2], [x0], x1
			
 
				++        st1             {v25.s}[0], [x9], x1
			
 
				++        st1             {v25.s}[2], [x0], x1
			
 
				++        st1             {v22.s}[1], [x9], x1
			
 
				++        st1             {v22.s}[3], [x0], x1
			
 
				++        st1             {v23.s}[1], [x9], x1
			
 
				++        st1             {v23.s}[3], [x0], x1
			
 
				++        st1             {v24.s}[1], [x9], x1
			
 
				++        st1             {v24.s}[3], [x0], x1
			
 
				++        st1             {v25.s}[1], [x9], x1
			
 
				++        st1             {v25.s}[3], [x0], x1
			
 
				++        br              x10
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++mix_h_16 48
			
 
				++mix_h_16 84
			
 
				++mix_h_16 88
			
 
				++
			
 
				++function ff_vp9_loop_filter_v_16_8_neon, export=1
			
 
				++        mov             x10, x30
			
 
				++        stp             d14, d15, [sp, #-0x10]!
			
 
				++        stp             d12, d13, [sp, #-0x10]!
			
 
				++        stp             d10, d11, [sp, #-0x10]!
			
 
				++        stp             d8,  d9,  [sp, #-0x10]!
			
 
				++        sub             x9,  x0,  x1, lsl #3
			
 
				++        ld1             {v16.8b}, [x9], x1 // p7
			
 
				++        ld1             {v24.8b}, [x0], x1 // q0
			
 
				++        ld1             {v17.8b}, [x9], x1 // p6
			
 
				++        ld1             {v25.8b}, [x0], x1 // q1
			
 
				++        ld1             {v18.8b}, [x9], x1 // p5
			
 
				++        ld1             {v26.8b}, [x0], x1 // q2
			
 
				++        ld1             {v19.8b}, [x9], x1 // p4
			
 
				++        ld1             {v27.8b}, [x0], x1 // q3
			
 
				++        ld1             {v20.8b}, [x9], x1 // p3
			
 
				++        ld1             {v28.8b}, [x0], x1 // q4
			
 
				++        ld1             {v21.8b}, [x9], x1 // p2
			
 
				++        ld1             {v29.8b}, [x0], x1 // q5
			
 
				++        ld1             {v22.8b}, [x9], x1 // p1
			
 
				++        ld1             {v30.8b}, [x0], x1 // q6
			
 
				++        ld1             {v23.8b}, [x9], x1 // p0
			
 
				++        ld1             {v31.8b}, [x0], x1 // q7
			
 
				++        sub             x9,  x9,  x1, lsl #3
			
 
				++        sub             x0,  x0,  x1, lsl #3
			
 
				++        add             x9,  x9,  x1
			
 
				++
			
 
				++        loop_filter_16
			
 
				++
			
 
				++        // If we did the flat8out part, we get the output in
			
 
				++        // v2-v17 (skipping v7 and v16). x9 points to x0 - 7 * stride,
			
 
				++        // store v2-v9 there, and v10-v17 into x0.
			
 
				++        st1             {v2.8b},  [x9], x1
			
 
				++        st1             {v10.8b}, [x0], x1
			
 
				++        st1             {v3.8b},  [x9], x1
			
 
				++        st1             {v11.8b}, [x0], x1
			
 
				++        st1             {v4.8b},  [x9], x1
			
 
				++        st1             {v12.8b}, [x0], x1
			
 
				++        st1             {v5.8b},  [x9], x1
			
 
				++        st1             {v13.8b}, [x0], x1
			
 
				++        st1             {v6.8b},  [x9], x1
			
 
				++        st1             {v14.8b}, [x0], x1
			
 
				++        st1             {v8.8b},  [x9], x1
			
 
				++        st1             {v15.8b}, [x0], x1
			
 
				++        st1             {v9.8b},  [x9], x1
			
 
				++        st1             {v17.8b}, [x0], x1
			
 
				++9:
			
 
				++        ldp             d8,  d9,  [sp], 0x10
			
 
				++        ldp             d10, d11, [sp], 0x10
			
 
				++        ldp             d12, d13, [sp], 0x10
			
 
				++        ldp             d14, d15, [sp], 0x10
			
 
				++        br              x10
			
 
				++8:
			
 
				++        add             x9,  x9,  x1, lsl #2
			
 
				++        // If we didn't do the flat8out part, the output is left in the
			
 
				++        // input registers.
			
 
				++        st1             {v21.8b}, [x9], x1
			
 
				++        st1             {v24.8b}, [x0], x1
			
 
				++        st1             {v22.8b}, [x9], x1
			
 
				++        st1             {v25.8b}, [x0], x1
			
 
				++        st1             {v23.8b}, [x9], x1
			
 
				++        st1             {v26.8b}, [x0], x1
			
 
				++        b               9b
			
 
				++7:
			
 
				++        sub             x9,  x0,  x1, lsl #1
			
 
				++        st1             {v22.8b}, [x9], x1
			
 
				++        st1             {v24.8b}, [x0], x1
			
 
				++        st1             {v23.8b}, [x9], x1
			
 
				++        st1             {v25.8b}, [x0], x1
			
 
				++        b               9b
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_loop_filter_v_16_16_neon, export=1
			
 
				++        mov             x10, x30
			
 
				++        stp             d14, d15, [sp, #-0x10]!
			
 
				++        stp             d12, d13, [sp, #-0x10]!
			
 
				++        stp             d10, d11, [sp, #-0x10]!
			
 
				++        stp             d8,  d9,  [sp, #-0x10]!
			
 
				++        sub             x9,  x0,  x1, lsl #3
			
 
				++        ld1             {v16.16b}, [x9], x1 // p7
			
 
				++        ld1             {v24.16b}, [x0], x1 // q0
			
 
				++        ld1             {v17.16b}, [x9], x1 // p6
			
 
				++        ld1             {v25.16b}, [x0], x1 // q1
			
 
				++        ld1             {v18.16b}, [x9], x1 // p5
			
 
				++        ld1             {v26.16b}, [x0], x1 // q2
			
 
				++        ld1             {v19.16b}, [x9], x1 // p4
			
 
				++        ld1             {v27.16b}, [x0], x1 // q3
			
 
				++        ld1             {v20.16b}, [x9], x1 // p3
			
 
				++        ld1             {v28.16b}, [x0], x1 // q4
			
 
				++        ld1             {v21.16b}, [x9], x1 // p2
			
 
				++        ld1             {v29.16b}, [x0], x1 // q5
			
 
				++        ld1             {v22.16b}, [x9], x1 // p1
			
 
				++        ld1             {v30.16b}, [x0], x1 // q6
			
 
				++        ld1             {v23.16b}, [x9], x1 // p0
			
 
				++        ld1             {v31.16b}, [x0], x1 // q7
			
 
				++        sub             x9,  x9,  x1, lsl #3
			
 
				++        sub             x0,  x0,  x1, lsl #3
			
 
				++        add             x9,  x9,  x1
			
 
				++
			
 
				++        loop_filter_16_16b
			
 
				++
			
 
				++        st1             {v2.16b},  [x9], x1
			
 
				++        st1             {v10.16b}, [x0], x1
			
 
				++        st1             {v3.16b},  [x9], x1
			
 
				++        st1             {v11.16b}, [x0], x1
			
 
				++        st1             {v4.16b},  [x9], x1
			
 
				++        st1             {v12.16b}, [x0], x1
			
 
				++        st1             {v5.16b},  [x9], x1
			
 
				++        st1             {v13.16b}, [x0], x1
			
 
				++        st1             {v6.16b},  [x9], x1
			
 
				++        st1             {v14.16b}, [x0], x1
			
 
				++        st1             {v8.16b},  [x9], x1
			
 
				++        st1             {v15.16b}, [x0], x1
			
 
				++        st1             {v9.16b},  [x9], x1
			
 
				++        st1             {v17.16b}, [x0], x1
			
 
				++9:
			
 
				++        ldp             d8,  d9,  [sp], 0x10
			
 
				++        ldp             d10, d11, [sp], 0x10
			
 
				++        ldp             d12, d13, [sp], 0x10
			
 
				++        ldp             d14, d15, [sp], 0x10
			
 
				++        br              x10
			
 
				++8:
			
 
				++        add             x9,  x9,  x1, lsl #2
			
 
				++        st1             {v21.16b}, [x9], x1
			
 
				++        st1             {v24.16b}, [x0], x1
			
 
				++        st1             {v22.16b}, [x9], x1
			
 
				++        st1             {v25.16b}, [x0], x1
			
 
				++        st1             {v23.16b}, [x9], x1
			
 
				++        st1             {v26.16b}, [x0], x1
			
 
				++        b               9b
			
 
				++7:
			
 
				++        sub             x9,  x0,  x1, lsl #1
			
 
				++        st1             {v22.16b}, [x9], x1
			
 
				++        st1             {v24.16b}, [x0], x1
			
 
				++        st1             {v23.16b}, [x9], x1
			
 
				++        st1             {v25.16b}, [x0], x1
			
 
				++        b               9b
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_loop_filter_h_16_8_neon, export=1
			
 
				++        mov             x10, x30
			
 
				++        stp             d14, d15, [sp, #-0x10]!
			
 
				++        stp             d12, d13, [sp, #-0x10]!
			
 
				++        stp             d10, d11, [sp, #-0x10]!
			
 
				++        stp             d8,  d9,  [sp, #-0x10]!
			
 
				++        sub             x9,  x0,  #8
			
 
				++        ld1             {v16.8b}, [x9], x1
			
 
				++        ld1             {v24.8b}, [x0], x1
			
 
				++        ld1             {v17.8b}, [x9], x1
			
 
				++        ld1             {v25.8b}, [x0], x1
			
 
				++        ld1             {v18.8b}, [x9], x1
			
 
				++        ld1             {v26.8b}, [x0], x1
			
 
				++        ld1             {v19.8b}, [x9], x1
			
 
				++        ld1             {v27.8b}, [x0], x1
			
 
				++        ld1             {v20.8b}, [x9], x1
			
 
				++        ld1             {v28.8b}, [x0], x1
			
 
				++        ld1             {v21.8b}, [x9], x1
			
 
				++        ld1             {v29.8b}, [x0], x1
			
 
				++        ld1             {v22.8b}, [x9], x1
			
 
				++        ld1             {v30.8b}, [x0], x1
			
 
				++        ld1             {v23.8b}, [x9], x1
			
 
				++        ld1             {v31.8b}, [x0], x1
			
 
				++        sub             x0,  x0,  x1, lsl #3
			
 
				++        sub             x9,  x9,  x1, lsl #3
			
 
				++
			
 
				++        // The 16x8 pixels read above is in two 8x8 blocks; the left
			
 
				++        // half in v16-v23, and the right half in v24-v31. Do two 8x8 transposes
			
 
				++        // of this, to get one column per register.
			
 
				++        transpose_8x8B  v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
			
 
				++        transpose_8x8B  v24, v25, v26, v27, v28, v29, v30, v31, v0, v1
			
 
				++
			
 
				++        loop_filter_16
			
 
				++
			
 
				++        transpose_8x8B  v16, v2,  v3,  v4,  v5,  v6,  v8,  v9,  v0, v1
			
 
				++        transpose_8x8B  v10, v11, v12, v13, v14, v15, v17, v31, v0, v1
			
 
				++
			
 
				++        st1             {v16.8b}, [x9], x1
			
 
				++        st1             {v10.8b}, [x0], x1
			
 
				++        st1             {v2.8b},  [x9], x1
			
 
				++        st1             {v11.8b}, [x0], x1
			
 
				++        st1             {v3.8b},  [x9], x1
			
 
				++        st1             {v12.8b}, [x0], x1
			
 
				++        st1             {v4.8b},  [x9], x1
			
 
				++        st1             {v13.8b}, [x0], x1
			
 
				++        st1             {v5.8b},  [x9], x1
			
 
				++        st1             {v14.8b}, [x0], x1
			
 
				++        st1             {v6.8b},  [x9], x1
			
 
				++        st1             {v15.8b}, [x0], x1
			
 
				++        st1             {v8.8b},  [x9], x1
			
 
				++        st1             {v17.8b}, [x0], x1
			
 
				++        st1             {v9.8b},  [x9], x1
			
 
				++        st1             {v31.8b}, [x0], x1
			
 
				++9:
			
 
				++        ldp             d8,  d9,  [sp], 0x10
			
 
				++        ldp             d10, d11, [sp], 0x10
			
 
				++        ldp             d12, d13, [sp], 0x10
			
 
				++        ldp             d14, d15, [sp], 0x10
			
 
				++        br              x10
			
 
				++8:
			
 
				++        // The same writeback as in loop_filter_h_8_8
			
 
				++        sub             x9,  x0,  #4
			
 
				++        add             x0,  x9,  x1, lsl #2
			
 
				++        transpose_8x8B  v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
			
 
				++
			
 
				++        st1             {v20.8b}, [x9], x1
			
 
				++        st1             {v24.8b}, [x0], x1
			
 
				++        st1             {v21.8b}, [x9], x1
			
 
				++        st1             {v25.8b}, [x0], x1
			
 
				++        st1             {v22.8b}, [x9], x1
			
 
				++        st1             {v26.8b}, [x0], x1
			
 
				++        st1             {v23.8b}, [x9], x1
			
 
				++        st1             {v27.8b}, [x0], x1
			
 
				++        b               9b
			
 
				++7:
			
 
				++        // The same writeback as in loop_filter_h_4_8
			
 
				++        sub             x9,  x0,  #2
			
 
				++        add             x0,  x9,  x1, lsl #2
			
 
				++        transpose_4x8B  v22, v23, v24, v25, v26, v27, v28, v29
			
 
				++        st1             {v22.s}[0], [x9], x1
			
 
				++        st1             {v22.s}[1], [x0], x1
			
 
				++        st1             {v23.s}[0], [x9], x1
			
 
				++        st1             {v23.s}[1], [x0], x1
			
 
				++        st1             {v24.s}[0], [x9], x1
			
 
				++        st1             {v24.s}[1], [x0], x1
			
 
				++        st1             {v25.s}[0], [x9], x1
			
 
				++        st1             {v25.s}[1], [x0], x1
			
 
				++        b               9b
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_loop_filter_h_16_16_neon, export=1
			
 
				++        mov             x10, x30
			
 
				++        stp             d14, d15, [sp, #-0x10]!
			
 
				++        stp             d12, d13, [sp, #-0x10]!
			
 
				++        stp             d10, d11, [sp, #-0x10]!
			
 
				++        stp             d8,  d9,  [sp, #-0x10]!
			
 
				++        sub             x9,  x0,  #8
			
 
				++        ld1             {v16.8b},   [x9], x1
			
 
				++        ld1             {v24.8b},   [x0], x1
			
 
				++        ld1             {v17.8b},   [x9], x1
			
 
				++        ld1             {v25.8b},   [x0], x1
			
 
				++        ld1             {v18.8b},   [x9], x1
			
 
				++        ld1             {v26.8b},   [x0], x1
			
 
				++        ld1             {v19.8b},   [x9], x1
			
 
				++        ld1             {v27.8b},   [x0], x1
			
 
				++        ld1             {v20.8b},   [x9], x1
			
 
				++        ld1             {v28.8b},   [x0], x1
			
 
				++        ld1             {v21.8b},   [x9], x1
			
 
				++        ld1             {v29.8b},   [x0], x1
			
 
				++        ld1             {v22.8b},   [x9], x1
			
 
				++        ld1             {v30.8b},   [x0], x1
			
 
				++        ld1             {v23.8b},   [x9], x1
			
 
				++        ld1             {v31.8b},   [x0], x1
			
 
				++        ld1             {v16.d}[1], [x9], x1
			
 
				++        ld1             {v24.d}[1], [x0], x1
			
 
				++        ld1             {v17.d}[1], [x9], x1
			
 
				++        ld1             {v25.d}[1], [x0], x1
			
 
				++        ld1             {v18.d}[1], [x9], x1
			
 
				++        ld1             {v26.d}[1], [x0], x1
			
 
				++        ld1             {v19.d}[1], [x9], x1
			
 
				++        ld1             {v27.d}[1], [x0], x1
			
 
				++        ld1             {v20.d}[1], [x9], x1
			
 
				++        ld1             {v28.d}[1], [x0], x1
			
 
				++        ld1             {v21.d}[1], [x9], x1
			
 
				++        ld1             {v29.d}[1], [x0], x1
			
 
				++        ld1             {v22.d}[1], [x9], x1
			
 
				++        ld1             {v30.d}[1], [x0], x1
			
 
				++        ld1             {v23.d}[1], [x9], x1
			
 
				++        ld1             {v31.d}[1], [x0], x1
			
 
				++        sub             x0,  x0,  x1, lsl #4
			
 
				++        sub             x9,  x9,  x1, lsl #4
			
 
				++
			
 
				++        transpose_8x16B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
			
 
				++        transpose_8x16B v24, v25, v26, v27, v28, v29, v30, v31, v0, v1
			
 
				++
			
 
				++        loop_filter_16_16b
			
 
				++
			
 
				++        transpose_8x16B v16, v2,  v3,  v4,  v5,  v6,  v8,  v9,  v0, v1
			
 
				++        transpose_8x16B v10, v11, v12, v13, v14, v15, v17, v31, v0, v1
			
 
				++
			
 
				++        st1             {v16.8b},   [x9], x1
			
 
				++        st1             {v10.8b},   [x0], x1
			
 
				++        st1             {v2.8b},    [x9], x1
			
 
				++        st1             {v11.8b},   [x0], x1
			
 
				++        st1             {v3.8b},    [x9], x1
			
 
				++        st1             {v12.8b},   [x0], x1
			
 
				++        st1             {v4.8b},    [x9], x1
			
 
				++        st1             {v13.8b},   [x0], x1
			
 
				++        st1             {v5.8b},    [x9], x1
			
 
				++        st1             {v14.8b},   [x0], x1
			
 
				++        st1             {v6.8b},    [x9], x1
			
 
				++        st1             {v15.8b},   [x0], x1
			
 
				++        st1             {v8.8b},    [x9], x1
			
 
				++        st1             {v17.8b},   [x0], x1
			
 
				++        st1             {v9.8b},    [x9], x1
			
 
				++        st1             {v31.8b},   [x0], x1
			
 
				++        st1             {v16.d}[1], [x9], x1
			
 
				++        st1             {v10.d}[1], [x0], x1
			
 
				++        st1             {v2.d}[1],  [x9], x1
			
 
				++        st1             {v11.d}[1], [x0], x1
			
 
				++        st1             {v3.d}[1],  [x9], x1
			
 
				++        st1             {v12.d}[1], [x0], x1
			
 
				++        st1             {v4.d}[1],  [x9], x1
			
 
				++        st1             {v13.d}[1], [x0], x1
			
 
				++        st1             {v5.d}[1],  [x9], x1
			
 
				++        st1             {v14.d}[1], [x0], x1
			
 
				++        st1             {v6.d}[1],  [x9], x1
			
 
				++        st1             {v15.d}[1], [x0], x1
			
 
				++        st1             {v8.d}[1],  [x9], x1
			
 
				++        st1             {v17.d}[1], [x0], x1
			
 
				++        st1             {v9.d}[1],  [x9], x1
			
 
				++        st1             {v31.d}[1], [x0], x1
			
 
				++9:
			
 
				++        ldp             d8,  d9,  [sp], 0x10
			
 
				++        ldp             d10, d11, [sp], 0x10
			
 
				++        ldp             d12, d13, [sp], 0x10
			
 
				++        ldp             d14, d15, [sp], 0x10
			
 
				++        br              x10
			
 
				++8:
			
 
				++        sub             x9,  x0,  #4
			
 
				++        add             x0,  x9,  x1, lsl #3
			
 
				++        transpose_8x16B v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
			
 
				++
			
 
				++        st1             {v20.8b},   [x9], x1
			
 
				++        st1             {v20.d}[1], [x0], x1
			
 
				++        st1             {v21.8b},   [x9], x1
			
 
				++        st1             {v21.d}[1], [x0], x1
			
 
				++        st1             {v22.8b},   [x9], x1
			
 
				++        st1             {v22.d}[1], [x0], x1
			
 
				++        st1             {v23.8b},   [x9], x1
			
 
				++        st1             {v23.d}[1], [x0], x1
			
 
				++        st1             {v24.8b},   [x9], x1
			
 
				++        st1             {v24.d}[1], [x0], x1
			
 
				++        st1             {v25.8b},   [x9], x1
			
 
				++        st1             {v25.d}[1], [x0], x1
			
 
				++        st1             {v26.8b},   [x9], x1
			
 
				++        st1             {v26.d}[1], [x0], x1
			
 
				++        st1             {v27.8b},   [x9], x1
			
 
				++        st1             {v27.d}[1], [x0], x1
			
 
				++        b               9b
			
 
				++7:
			
 
				++        sub             x9,  x0,  #2
			
 
				++        add             x0,  x9,  x1, lsl #3
			
 
				++        transpose_4x16B v22, v23, v24, v25, v26, v27, v28, v29
			
 
				++        st1             {v22.s}[0], [x9], x1
			
 
				++        st1             {v22.s}[2], [x0], x1
			
 
				++        st1             {v23.s}[0], [x9], x1
			
 
				++        st1             {v23.s}[2], [x0], x1
			
 
				++        st1             {v24.s}[0], [x9], x1
			
 
				++        st1             {v24.s}[2], [x0], x1
			
 
				++        st1             {v25.s}[0], [x9], x1
			
 
				++        st1             {v25.s}[2], [x0], x1
			
 
				++        st1             {v22.s}[1], [x9], x1
			
 
				++        st1             {v22.s}[3], [x0], x1
			
 
				++        st1             {v23.s}[1], [x9], x1
			
 
				++        st1             {v23.s}[3], [x0], x1
			
 
				++        st1             {v24.s}[1], [x9], x1
			
 
				++        st1             {v24.s}[3], [x0], x1
			
 
				++        st1             {v25.s}[1], [x9], x1
			
 
				++        st1             {v25.s}[3], [x0], x1
			
 
				++        b               9b
			
 
				++endfunc
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/vp9mc_16bpp_neon.S b/media/ffvpx/libavcodec/aarch64/vp9mc_16bpp_neon.S
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/vp9mc_16bpp_neon.S
			
 
				+@@ -0,0 +1,631 @@
			
 
				++/*
			
 
				++ * Copyright (c) 2017 Google Inc.
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include "libavutil/aarch64/asm.S"
			
 
				++
			
 
				++// All public functions in this file have the following signature:
			
 
				++// typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
			
 
				++//                            const uint8_t *ref, ptrdiff_t ref_stride,
			
 
				++//                            int h, int mx, int my);
			
 
				++
			
 
				++function ff_vp9_copy128_aarch64, export=1
			
 
				++1:
			
 
				++        ldp             x5,  x6,  [x2]
			
 
				++        ldp             x7,  x8,  [x2, #16]
			
 
				++        stp             x5,  x6,  [x0]
			
 
				++        ldp             x9,  x10, [x2, #32]
			
 
				++        stp             x7,  x8,  [x0, #16]
			
 
				++        subs            w4,  w4,  #1
			
 
				++        ldp             x11, x12, [x2, #48]
			
 
				++        stp             x9,  x10, [x0, #32]
			
 
				++        stp             x11, x12, [x0, #48]
			
 
				++        ldp             x5,  x6,  [x2, #64]
			
 
				++        ldp             x7,  x8,  [x2, #80]
			
 
				++        stp             x5,  x6,  [x0, #64]
			
 
				++        ldp             x9,  x10, [x2, #96]
			
 
				++        stp             x7,  x8,  [x0, #80]
			
 
				++        ldp             x11, x12, [x2, #112]
			
 
				++        stp             x9,  x10, [x0, #96]
			
 
				++        stp             x11, x12, [x0, #112]
			
 
				++        add             x2,  x2,  x3
			
 
				++        add             x0,  x0,  x1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_avg64_16_neon, export=1
			
 
				++        mov             x5,  x0
			
 
				++        sub             x1,  x1,  #64
			
 
				++        sub             x3,  x3,  #64
			
 
				++1:
			
 
				++        ld1             {v4.8h,  v5.8h,  v6.8h,  v7.8h},  [x2], #64
			
 
				++        ld1             {v0.8h,  v1.8h,  v2.8h,  v3.8h},  [x0], #64
			
 
				++        ld1             {v20.8h, v21.8h, v22.8h, v23.8h}, [x2], x3
			
 
				++        urhadd          v0.8h,  v0.8h,  v4.8h
			
 
				++        urhadd          v1.8h,  v1.8h,  v5.8h
			
 
				++        ld1             {v16.8h, v17.8h, v18.8h, v19.8h}, [x0], x1
			
 
				++        urhadd          v2.8h,  v2.8h,  v6.8h
			
 
				++        urhadd          v3.8h,  v3.8h,  v7.8h
			
 
				++        subs            w4,  w4,  #1
			
 
				++        urhadd          v16.8h, v16.8h, v20.8h
			
 
				++        urhadd          v17.8h, v17.8h, v21.8h
			
 
				++        st1             {v0.8h,  v1.8h,  v2.8h,  v3.8h},  [x5], #64
			
 
				++        urhadd          v18.8h, v18.8h, v22.8h
			
 
				++        urhadd          v19.8h, v19.8h, v23.8h
			
 
				++        st1             {v16.8h, v17.8h, v18.8h, v19.8h}, [x5], x1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_avg32_16_neon, export=1
			
 
				++        mov             x5,  x0
			
 
				++1:
			
 
				++        ld1             {v4.8h,  v5.8h,  v6.8h,  v7.8h},  [x2], x3
			
 
				++        ld1             {v0.8h,  v1.8h,  v2.8h,  v3.8h},  [x0], x1
			
 
				++        ld1             {v20.8h, v21.8h, v22.8h, v23.8h}, [x2], x3
			
 
				++        urhadd          v0.8h,  v0.8h,  v4.8h
			
 
				++        urhadd          v1.8h,  v1.8h,  v5.8h
			
 
				++        ld1             {v16.8h, v17.8h, v18.8h, v19.8h}, [x0], x1
			
 
				++        urhadd          v2.8h,  v2.8h,  v6.8h
			
 
				++        urhadd          v3.8h,  v3.8h,  v7.8h
			
 
				++        subs            w4,  w4,  #2
			
 
				++        urhadd          v16.8h, v16.8h, v20.8h
			
 
				++        urhadd          v17.8h, v17.8h, v21.8h
			
 
				++        st1             {v0.8h,  v1.8h,  v2.8h,  v3.8h},  [x5], x1
			
 
				++        urhadd          v18.8h, v18.8h, v22.8h
			
 
				++        urhadd          v19.8h, v19.8h, v23.8h
			
 
				++        st1             {v16.8h, v17.8h, v18.8h, v19.8h}, [x5], x1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_avg16_16_neon, export=1
			
 
				++1:
			
 
				++        ld1             {v2.8h, v3.8h},  [x2], x3
			
 
				++        ld1             {v0.8h, v1.8h},  [x0]
			
 
				++        urhadd          v0.8h,  v0.8h,  v2.8h
			
 
				++        urhadd          v1.8h,  v1.8h,  v3.8h
			
 
				++        subs            w4,  w4,  #1
			
 
				++        st1             {v0.8h, v1.8h},  [x0], x1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_avg8_16_neon, export=1
			
 
				++        mov             x5,  x0
			
 
				++1:
			
 
				++        ld1             {v2.8h},  [x2], x3
			
 
				++        ld1             {v0.8h},  [x0], x1
			
 
				++        ld1             {v3.8h},  [x2], x3
			
 
				++        urhadd          v0.8h,  v0.8h,  v2.8h
			
 
				++        ld1             {v1.8h},  [x0], x1
			
 
				++        urhadd          v1.8h,  v1.8h,  v3.8h
			
 
				++        subs            w4,  w4,  #2
			
 
				++        st1             {v0.8h},  [x5], x1
			
 
				++        st1             {v1.8h},  [x5], x1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_avg4_16_neon, export=1
			
 
				++        mov             x5,  x0
			
 
				++1:
			
 
				++        ld1             {v2.4h},  [x2], x3
			
 
				++        ld1             {v0.4h},  [x0], x1
			
 
				++        ld1             {v3.4h},  [x2], x3
			
 
				++        urhadd          v0.4h,  v0.4h,  v2.4h
			
 
				++        ld1             {v1.4h},  [x0], x1
			
 
				++        urhadd          v1.4h,  v1.4h,  v3.4h
			
 
				++        subs            w4,  w4,  #2
			
 
				++        st1             {v0.4h},  [x5], x1
			
 
				++        st1             {v1.8b},  [x5], x1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++
			
 
				++// Extract a vector from src1-src2 and src4-src5 (src1-src3 and src4-src6
			
 
				++// for size >= 16), and multiply-accumulate into dst1 and dst5 (or
			
 
				++// dst1-dst2 and dst5-dst6 for size >= 8 and dst1-dst4 and dst5-dst8
			
 
				++// for size >= 16)
			
 
				++.macro extmlal dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8, src1, src2, src3, src4, src5, src6, offset, size
			
 
				++        ext             v20.16b, \src1\().16b, \src2\().16b, #(2*\offset)
			
 
				++        ext             v22.16b, \src4\().16b, \src5\().16b, #(2*\offset)
			
 
				++        smlal           \dst1\().4s, v20.4h, v0.h[\offset]
			
 
				++        smlal           \dst5\().4s, v22.4h, v0.h[\offset]
			
 
				++.if \size >= 16
			
 
				++        ext             v21.16b, \src2\().16b, \src3\().16b, #(2*\offset)
			
 
				++        ext             v23.16b, \src5\().16b, \src6\().16b, #(2*\offset)
			
 
				++.endif
			
 
				++.if \size >= 8
			
 
				++        smlal2          \dst2\().4s, v20.8h, v0.h[\offset]
			
 
				++        smlal2          \dst6\().4s, v22.8h, v0.h[\offset]
			
 
				++.endif
			
 
				++.if \size >= 16
			
 
				++        smlal           \dst3\().4s, v21.4h, v0.h[\offset]
			
 
				++        smlal           \dst7\().4s, v23.4h, v0.h[\offset]
			
 
				++        smlal2          \dst4\().4s, v21.8h, v0.h[\offset]
			
 
				++        smlal2          \dst8\().4s, v23.8h, v0.h[\offset]
			
 
				++.endif
			
 
				++.endm
			
 
				++
			
 
				++
			
 
				++// Instantiate a horizontal filter function for the given size.
			
 
				++// This can work on 4, 8 or 16 pixels in parallel; for larger
			
 
				++// widths it will do 16 pixels at a time and loop horizontally.
			
 
				++// The actual width (in bytes) is passed in x5, the height in w4 and
			
 
				++// the filter coefficients in x9.
			
 
				++.macro do_8tap_h type, size
			
 
				++function \type\()_8tap_\size\()h
			
 
				++        sub             x2,  x2,  #6
			
 
				++        add             x6,  x0,  x1
			
 
				++        add             x7,  x2,  x3
			
 
				++        add             x1,  x1,  x1
			
 
				++        add             x3,  x3,  x3
			
 
				++        // Only size >= 16 loops horizontally and needs
			
 
				++        // reduced dst stride
			
 
				++.if \size >= 16
			
 
				++        sub             x1,  x1,  x5
			
 
				++.endif
			
 
				++        // size >= 16 loads two qwords and increments r2,
			
 
				++        // for size 4/8 it's enough with one qword and no
			
 
				++        // postincrement
			
 
				++.if \size >= 16
			
 
				++        sub             x3,  x3,  x5
			
 
				++        sub             x3,  x3,  #16
			
 
				++.endif
			
 
				++        // Load the filter vector
			
 
				++        ld1             {v0.8h},  [x9]
			
 
				++1:
			
 
				++.if \size >= 16
			
 
				++        mov             x9,  x5
			
 
				++.endif
			
 
				++        // Load src
			
 
				++.if \size >= 16
			
 
				++        ld1             {v5.8h,  v6.8h,  v7.8h},  [x2], #48
			
 
				++        ld1             {v16.8h, v17.8h, v18.8h}, [x7], #48
			
 
				++.else
			
 
				++        ld1             {v5.8h,  v6.8h},  [x2]
			
 
				++        ld1             {v16.8h, v17.8h}, [x7]
			
 
				++.endif
			
 
				++2:
			
 
				++
			
 
				++        smull           v1.4s,  v5.4h,  v0.h[0]
			
 
				++        smull           v24.4s, v16.4h, v0.h[0]
			
 
				++.if \size >= 8
			
 
				++        smull2          v2.4s,  v5.8h,  v0.h[0]
			
 
				++        smull2          v25.4s, v16.8h, v0.h[0]
			
 
				++.endif
			
 
				++.if \size >= 16
			
 
				++        smull           v3.4s,  v6.4h,  v0.h[0]
			
 
				++        smull           v26.4s, v17.4h, v0.h[0]
			
 
				++        smull2          v4.4s,  v6.8h,  v0.h[0]
			
 
				++        smull2          v27.4s, v17.8h, v0.h[0]
			
 
				++.endif
			
 
				++        extmlal         v1,  v2,  v3,  v4,  v24, v25, v26, v27, v5,  v6,  v7,  v16, v17, v18, 1, \size
			
 
				++        extmlal         v1,  v2,  v3,  v4,  v24, v25, v26, v27, v5,  v6,  v7,  v16, v17, v18, 2, \size
			
 
				++        extmlal         v1,  v2,  v3,  v4,  v24, v25, v26, v27, v5,  v6,  v7,  v16, v17, v18, 3, \size
			
 
				++        extmlal         v1,  v2,  v3,  v4,  v24, v25, v26, v27, v5,  v6,  v7,  v16, v17, v18, 4, \size
			
 
				++        extmlal         v1,  v2,  v3,  v4,  v24, v25, v26, v27, v5,  v6,  v7,  v16, v17, v18, 5, \size
			
 
				++        extmlal         v1,  v2,  v3,  v4,  v24, v25, v26, v27, v5,  v6,  v7,  v16, v17, v18, 6, \size
			
 
				++        extmlal         v1,  v2,  v3,  v4,  v24, v25, v26, v27, v5,  v6,  v7,  v16, v17, v18, 7, \size
			
 
				++
			
 
				++        // Round, shift and saturate
			
 
				++        // The sqrshrun takes care of clamping negative values to zero, but
			
 
				++        // we manually need to do umin with the max pixel value.
			
 
				++        sqrshrun        v1.4h,  v1.4s,  #7
			
 
				++        sqrshrun        v24.4h, v24.4s, #7
			
 
				++.if \size >= 8
			
 
				++        sqrshrun2       v1.8h,  v2.4s,  #7
			
 
				++        sqrshrun2       v24.8h, v25.4s, #7
			
 
				++        umin            v1.8h,  v1.8h,  v31.8h
			
 
				++        umin            v24.8h, v24.8h, v31.8h
			
 
				++.if \size >= 16
			
 
				++        sqrshrun        v2.4h,  v3.4s,  #7
			
 
				++        sqrshrun        v25.4h, v26.4s, #7
			
 
				++        sqrshrun2       v2.8h,  v4.4s,  #7
			
 
				++        sqrshrun2       v25.8h, v27.4s, #7
			
 
				++        umin            v2.8h,  v2.8h,  v31.8h
			
 
				++        umin            v25.8h, v25.8h, v31.8h
			
 
				++.endif
			
 
				++.else
			
 
				++        umin            v1.4h,  v1.4h,  v31.4h
			
 
				++        umin            v24.4h, v24.4h, v31.4h
			
 
				++.endif
			
 
				++        // Average
			
 
				++.ifc \type,avg
			
 
				++.if \size >= 16
			
 
				++        ld1             {v3.8h,  v4.8h},  [x0]
			
 
				++        ld1             {v29.8h, v30.8h}, [x6]
			
 
				++        urhadd          v1.8h,  v1.8h,  v3.8h
			
 
				++        urhadd          v2.8h,  v2.8h,  v4.8h
			
 
				++        urhadd          v24.8h, v24.8h, v29.8h
			
 
				++        urhadd          v25.8h, v25.8h, v30.8h
			
 
				++.elseif \size >= 8
			
 
				++        ld1             {v3.8h},  [x0]
			
 
				++        ld1             {v4.8h},  [x6]
			
 
				++        urhadd          v1.8h,  v1.8h,  v3.8h
			
 
				++        urhadd          v24.8h, v24.8h, v4.8h
			
 
				++.else
			
 
				++        ld1             {v3.4h},  [x0]
			
 
				++        ld1             {v4.4h},  [x6]
			
 
				++        urhadd          v1.4h,  v1.4h,  v3.4h
			
 
				++        urhadd          v24.4h, v24.4h, v4.4h
			
 
				++.endif
			
 
				++.endif
			
 
				++        // Store and loop horizontally (for size >= 16)
			
 
				++.if \size >= 16
			
 
				++        subs            x9,  x9,  #32
			
 
				++        st1             {v1.8h,  v2.8h},  [x0], #32
			
 
				++        st1             {v24.8h, v25.8h}, [x6], #32
			
 
				++        b.eq            3f
			
 
				++        mov             v5.16b,  v7.16b
			
 
				++        mov             v16.16b, v18.16b
			
 
				++        ld1             {v6.8h,  v7.8h},  [x2], #32
			
 
				++        ld1             {v17.8h, v18.8h}, [x7], #32
			
 
				++        b               2b
			
 
				++.elseif \size == 8
			
 
				++        st1             {v1.8h},  [x0]
			
 
				++        st1             {v24.8h}, [x6]
			
 
				++.else // \size == 4
			
 
				++        st1             {v1.4h},  [x0]
			
 
				++        st1             {v24.4h}, [x6]
			
 
				++.endif
			
 
				++3:
			
 
				++        // Loop vertically
			
 
				++        add             x0,  x0,  x1
			
 
				++        add             x6,  x6,  x1
			
 
				++        add             x2,  x2,  x3
			
 
				++        add             x7,  x7,  x3
			
 
				++        subs            w4,  w4,  #2
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++.macro do_8tap_h_size size
			
 
				++do_8tap_h put, \size
			
 
				++do_8tap_h avg, \size
			
 
				++.endm
			
 
				++
			
 
				++do_8tap_h_size 4
			
 
				++do_8tap_h_size 8
			
 
				++do_8tap_h_size 16
			
 
				++
			
 
				++.macro do_8tap_h_func type, filter, offset, size, bpp
			
 
				++function ff_vp9_\type\()_\filter\()\size\()_h_\bpp\()_neon, export=1
			
 
				++        mvni            v31.8h, #((0xff << (\bpp - 8)) & 0xff), lsl #8
			
 
				++        movrel          x6,  X(ff_vp9_subpel_filters), 256*\offset
			
 
				++        cmp             w5,  #8
			
 
				++        add             x9,  x6,  w5, uxtw #4
			
 
				++        mov             x5,  #2*\size
			
 
				++.if \size >= 16
			
 
				++        b               \type\()_8tap_16h
			
 
				++.else
			
 
				++        b               \type\()_8tap_\size\()h
			
 
				++.endif
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++.macro do_8tap_h_filters size, bpp
			
 
				++do_8tap_h_func put, regular, 1, \size, \bpp
			
 
				++do_8tap_h_func avg, regular, 1, \size, \bpp
			
 
				++do_8tap_h_func put, sharp,   2, \size, \bpp
			
 
				++do_8tap_h_func avg, sharp,   2, \size, \bpp
			
 
				++do_8tap_h_func put, smooth,  0, \size, \bpp
			
 
				++do_8tap_h_func avg, smooth,  0, \size, \bpp
			
 
				++.endm
			
 
				++
			
 
				++.macro do_8tap_h_filters_bpp bpp
			
 
				++do_8tap_h_filters 64, \bpp
			
 
				++do_8tap_h_filters 32, \bpp
			
 
				++do_8tap_h_filters 16, \bpp
			
 
				++do_8tap_h_filters 8,  \bpp
			
 
				++do_8tap_h_filters 4,  \bpp
			
 
				++.endm
			
 
				++
			
 
				++do_8tap_h_filters_bpp 10
			
 
				++do_8tap_h_filters_bpp 12
			
 
				++
			
 
				++
			
 
				++// Vertical filters
			
 
				++
			
 
				++// Round, shift and saturate and store reg1-reg4
			
 
				++.macro do_store4 reg1, reg2, reg3, reg4, tmp1, tmp2, tmp3, tmp4, minreg, type
			
 
				++        sqrshrun        \reg1\().4h,  \reg1\().4s, #7
			
 
				++        sqrshrun        \reg2\().4h,  \reg2\().4s, #7
			
 
				++        sqrshrun        \reg3\().4h,  \reg3\().4s, #7
			
 
				++        sqrshrun        \reg4\().4h,  \reg4\().4s, #7
			
 
				++.ifc \type,avg
			
 
				++        ld1             {\tmp1\().4h},  [x7], x1
			
 
				++        ld1             {\tmp2\().4h},  [x7], x1
			
 
				++        ld1             {\tmp3\().4h},  [x7], x1
			
 
				++        ld1             {\tmp4\().4h},  [x7], x1
			
 
				++.endif
			
 
				++        umin            \reg1\().4h,  \reg1\().4h,  \minreg\().4h
			
 
				++        umin            \reg2\().4h,  \reg2\().4h,  \minreg\().4h
			
 
				++        umin            \reg3\().4h,  \reg3\().4h,  \minreg\().4h
			
 
				++        umin            \reg4\().4h,  \reg4\().4h,  \minreg\().4h
			
 
				++.ifc \type,avg
			
 
				++        urhadd          \reg1\().4h,  \reg1\().4h,  \tmp1\().4h
			
 
				++        urhadd          \reg2\().4h,  \reg2\().4h,  \tmp2\().4h
			
 
				++        urhadd          \reg3\().4h,  \reg3\().4h,  \tmp3\().4h
			
 
				++        urhadd          \reg4\().4h,  \reg4\().4h,  \tmp4\().4h
			
 
				++.endif
			
 
				++        st1             {\reg1\().4h},  [x0], x1
			
 
				++        st1             {\reg2\().4h},  [x0], x1
			
 
				++        st1             {\reg3\().4h},  [x0], x1
			
 
				++        st1             {\reg4\().4h},  [x0], x1
			
 
				++.endm
			
 
				++
			
 
				++// Round, shift and saturate and store reg1-8, where
			
 
				++// reg1-2, reg3-4 etc pairwise correspond to 4 rows.
			
 
				++.macro do_store8 reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, minreg, type
			
 
				++        sqrshrun        \reg1\().4h,  \reg1\().4s, #7
			
 
				++        sqrshrun2       \reg1\().8h,  \reg2\().4s, #7
			
 
				++        sqrshrun        \reg2\().4h,  \reg3\().4s, #7
			
 
				++        sqrshrun2       \reg2\().8h,  \reg4\().4s, #7
			
 
				++        sqrshrun        \reg3\().4h,  \reg5\().4s, #7
			
 
				++        sqrshrun2       \reg3\().8h,  \reg6\().4s, #7
			
 
				++        sqrshrun        \reg4\().4h,  \reg7\().4s, #7
			
 
				++        sqrshrun2       \reg4\().8h,  \reg8\().4s, #7
			
 
				++.ifc \type,avg
			
 
				++        ld1             {\reg5\().8h},  [x7], x1
			
 
				++        ld1             {\reg6\().8h},  [x7], x1
			
 
				++        ld1             {\reg7\().8h},  [x7], x1
			
 
				++        ld1             {\reg8\().8h},  [x7], x1
			
 
				++.endif
			
 
				++        umin            \reg1\().8h,  \reg1\().8h,  \minreg\().8h
			
 
				++        umin            \reg2\().8h,  \reg2\().8h,  \minreg\().8h
			
 
				++        umin            \reg3\().8h,  \reg3\().8h,  \minreg\().8h
			
 
				++        umin            \reg4\().8h,  \reg4\().8h,  \minreg\().8h
			
 
				++.ifc \type,avg
			
 
				++        urhadd          \reg1\().8h,  \reg1\().8h,  \reg5\().8h
			
 
				++        urhadd          \reg2\().8h,  \reg2\().8h,  \reg6\().8h
			
 
				++        urhadd          \reg3\().8h,  \reg3\().8h,  \reg7\().8h
			
 
				++        urhadd          \reg4\().8h,  \reg4\().8h,  \reg8\().8h
			
 
				++.endif
			
 
				++        st1             {\reg1\().8h},  [x0], x1
			
 
				++        st1             {\reg2\().8h},  [x0], x1
			
 
				++        st1             {\reg3\().8h},  [x0], x1
			
 
				++        st1             {\reg4\().8h},  [x0], x1
			
 
				++.endm
			
 
				++
			
 
				++// Evaluate the filter twice in parallel, from the inputs src1-src9 into dst1-dst2
			
 
				++// (src1-src8 into dst1, src2-src9 into dst2).
			
 
				++.macro convolve4 dst1, dst2, src1, src2, src3, src4, src5, src6, src7, src8, src9, tmp1, tmp2
			
 
				++        smull           \dst1\().4s, \src1\().4h, v0.h[0]
			
 
				++        smull           \dst2\().4s, \src2\().4h, v0.h[0]
			
 
				++        smull           \tmp1\().4s, \src2\().4h, v0.h[1]
			
 
				++        smull           \tmp2\().4s, \src3\().4h, v0.h[1]
			
 
				++        smlal           \dst1\().4s, \src3\().4h, v0.h[2]
			
 
				++        smlal           \dst2\().4s, \src4\().4h, v0.h[2]
			
 
				++        smlal           \tmp1\().4s, \src4\().4h, v0.h[3]
			
 
				++        smlal           \tmp2\().4s, \src5\().4h, v0.h[3]
			
 
				++        smlal           \dst1\().4s, \src5\().4h, v0.h[4]
			
 
				++        smlal           \dst2\().4s, \src6\().4h, v0.h[4]
			
 
				++        smlal           \tmp1\().4s, \src6\().4h, v0.h[5]
			
 
				++        smlal           \tmp2\().4s, \src7\().4h, v0.h[5]
			
 
				++        smlal           \dst1\().4s, \src7\().4h, v0.h[6]
			
 
				++        smlal           \dst2\().4s, \src8\().4h, v0.h[6]
			
 
				++        smlal           \tmp1\().4s, \src8\().4h, v0.h[7]
			
 
				++        smlal           \tmp2\().4s, \src9\().4h, v0.h[7]
			
 
				++        add             \dst1\().4s, \dst1\().4s, \tmp1\().4s
			
 
				++        add             \dst2\().4s, \dst2\().4s, \tmp2\().4s
			
 
				++.endm
			
 
				++
			
 
				++// Evaluate the filter twice in parallel, from the inputs src1-src9 into dst1-dst4
			
 
				++// (src1-src8 into dst1-dst2, src2-src9 into dst3-dst4).
			
 
				++.macro convolve8 dst1, dst2, dst3, dst4, src1, src2, src3, src4, src5, src6, src7, src8, src9
			
 
				++        smull           \dst1\().4s, \src1\().4h, v0.h[0]
			
 
				++        smull2          \dst2\().4s, \src1\().8h, v0.h[0]
			
 
				++        smull           \dst3\().4s, \src2\().4h, v0.h[0]
			
 
				++        smull2          \dst4\().4s, \src2\().8h, v0.h[0]
			
 
				++        smlal           \dst1\().4s, \src2\().4h, v0.h[1]
			
 
				++        smlal2          \dst2\().4s, \src2\().8h, v0.h[1]
			
 
				++        smlal           \dst3\().4s, \src3\().4h, v0.h[1]
			
 
				++        smlal2          \dst4\().4s, \src3\().8h, v0.h[1]
			
 
				++        smlal           \dst1\().4s, \src3\().4h, v0.h[2]
			
 
				++        smlal2          \dst2\().4s, \src3\().8h, v0.h[2]
			
 
				++        smlal           \dst3\().4s, \src4\().4h, v0.h[2]
			
 
				++        smlal2          \dst4\().4s, \src4\().8h, v0.h[2]
			
 
				++        smlal           \dst1\().4s, \src4\().4h, v0.h[3]
			
 
				++        smlal2          \dst2\().4s, \src4\().8h, v0.h[3]
			
 
				++        smlal           \dst3\().4s, \src5\().4h, v0.h[3]
			
 
				++        smlal2          \dst4\().4s, \src5\().8h, v0.h[3]
			
 
				++        smlal           \dst1\().4s, \src5\().4h, v0.h[4]
			
 
				++        smlal2          \dst2\().4s, \src5\().8h, v0.h[4]
			
 
				++        smlal           \dst3\().4s, \src6\().4h, v0.h[4]
			
 
				++        smlal2          \dst4\().4s, \src6\().8h, v0.h[4]
			
 
				++        smlal           \dst1\().4s, \src6\().4h, v0.h[5]
			
 
				++        smlal2          \dst2\().4s, \src6\().8h, v0.h[5]
			
 
				++        smlal           \dst3\().4s, \src7\().4h, v0.h[5]
			
 
				++        smlal2          \dst4\().4s, \src7\().8h, v0.h[5]
			
 
				++        smlal           \dst1\().4s, \src7\().4h, v0.h[6]
			
 
				++        smlal2          \dst2\().4s, \src7\().8h, v0.h[6]
			
 
				++        smlal           \dst3\().4s, \src8\().4h, v0.h[6]
			
 
				++        smlal2          \dst4\().4s, \src8\().8h, v0.h[6]
			
 
				++        smlal           \dst1\().4s, \src8\().4h, v0.h[7]
			
 
				++        smlal2          \dst2\().4s, \src8\().8h, v0.h[7]
			
 
				++        smlal           \dst3\().4s, \src9\().4h, v0.h[7]
			
 
				++        smlal2          \dst4\().4s, \src9\().8h, v0.h[7]
			
 
				++.endm
			
 
				++
			
 
				++// Instantiate a vertical filter function for filtering 8 pixels at a time.
			
 
				++// The height is passed in x4, the width in x5 and the filter coefficients
			
 
				++// in x6.
			
 
				++.macro do_8tap_8v type
			
 
				++function \type\()_8tap_8v
			
 
				++        sub             x2,  x2,  x3, lsl #1
			
 
				++        sub             x2,  x2,  x3
			
 
				++        ld1             {v0.8h},  [x6]
			
 
				++1:
			
 
				++.ifc \type,avg
			
 
				++        mov             x7,  x0
			
 
				++.endif
			
 
				++        mov             x6,  x4
			
 
				++
			
 
				++        ld1             {v17.8h}, [x2], x3
			
 
				++        ld1             {v18.8h}, [x2], x3
			
 
				++        ld1             {v19.8h}, [x2], x3
			
 
				++        ld1             {v20.8h}, [x2], x3
			
 
				++        ld1             {v21.8h}, [x2], x3
			
 
				++        ld1             {v22.8h}, [x2], x3
			
 
				++        ld1             {v23.8h}, [x2], x3
			
 
				++2:
			
 
				++        ld1             {v24.8h}, [x2], x3
			
 
				++        ld1             {v25.8h}, [x2], x3
			
 
				++        ld1             {v26.8h}, [x2], x3
			
 
				++        ld1             {v27.8h}, [x2], x3
			
 
				++
			
 
				++        convolve8       v2,  v3,  v4,  v5,  v17, v18, v19, v20, v21, v22, v23, v24, v25
			
 
				++        convolve8       v6,  v7,  v30, v31, v19, v20, v21, v22, v23, v24, v25, v26, v27
			
 
				++        do_store8       v2,  v3,  v4,  v5,  v6,  v7,  v30, v31, v1,  \type
			
 
				++
			
 
				++        subs            x6,  x6,  #4
			
 
				++        b.eq            8f
			
 
				++
			
 
				++        ld1             {v16.8h}, [x2], x3
			
 
				++        ld1             {v17.8h}, [x2], x3
			
 
				++        ld1             {v18.8h}, [x2], x3
			
 
				++        ld1             {v19.8h}, [x2], x3
			
 
				++        convolve8       v2,  v3,  v4,  v5,  v21, v22, v23, v24, v25, v26, v27, v16, v17
			
 
				++        convolve8       v6,  v7,  v20, v21, v23, v24, v25, v26, v27, v16, v17, v18, v19
			
 
				++        do_store8       v2,  v3,  v4,  v5,  v6,  v7,  v20, v21, v1,  \type
			
 
				++
			
 
				++        subs            x6,  x6,  #4
			
 
				++        b.eq            8f
			
 
				++
			
 
				++        ld1             {v20.8h}, [x2], x3
			
 
				++        ld1             {v21.8h}, [x2], x3
			
 
				++        ld1             {v22.8h}, [x2], x3
			
 
				++        ld1             {v23.8h}, [x2], x3
			
 
				++        convolve8       v2,  v3,  v4,  v5,  v25, v26, v27, v16, v17, v18, v19, v20, v21
			
 
				++        convolve8       v6,  v7,  v24, v25, v27, v16, v17, v18, v19, v20, v21, v22, v23
			
 
				++        do_store8       v2,  v3,  v4,  v5,  v6,  v7,  v24, v25, v1,  \type
			
 
				++
			
 
				++        subs            x6,  x6,  #4
			
 
				++        b.ne            2b
			
 
				++
			
 
				++8:
			
 
				++        subs            x5,  x5,  #8
			
 
				++        b.eq            9f
			
 
				++        // x0 -= h * dst_stride
			
 
				++        msub            x0,  x1,  x4, x0
			
 
				++        // x2 -= h * src_stride
			
 
				++        msub            x2,  x3,  x4, x2
			
 
				++        // x2 -= 8 * src_stride
			
 
				++        sub             x2,  x2,  x3, lsl #3
			
 
				++        // x2 += 1 * src_stride
			
 
				++        add             x2,  x2,  x3
			
 
				++        add             x2,  x2,  #16
			
 
				++        add             x0,  x0,  #16
			
 
				++        b               1b
			
 
				++9:
			
 
				++        ret
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++do_8tap_8v put
			
 
				++do_8tap_8v avg
			
 
				++
			
 
				++
			
 
				++// Instantiate a vertical filter function for filtering a 4 pixels wide
			
 
				++// slice. This only is designed to work for 4 or 8 output lines.
			
 
				++.macro do_8tap_4v type
			
 
				++function \type\()_8tap_4v
			
 
				++        sub             x2,  x2,  x3, lsl #1
			
 
				++        sub             x2,  x2,  x3
			
 
				++        ld1             {v0.8h},  [x6]
			
 
				++.ifc \type,avg
			
 
				++        mov             x7,  x0
			
 
				++.endif
			
 
				++
			
 
				++        ld1             {v16.4h}, [x2], x3
			
 
				++        ld1             {v17.4h}, [x2], x3
			
 
				++        ld1             {v18.4h}, [x2], x3
			
 
				++        ld1             {v19.4h}, [x2], x3
			
 
				++        ld1             {v20.4h}, [x2], x3
			
 
				++        ld1             {v21.4h}, [x2], x3
			
 
				++        ld1             {v22.4h}, [x2], x3
			
 
				++        ld1             {v23.4h}, [x2], x3
			
 
				++        ld1             {v24.4h}, [x2], x3
			
 
				++        ld1             {v25.4h}, [x2], x3
			
 
				++        ld1             {v26.4h}, [x2], x3
			
 
				++
			
 
				++        convolve4       v2,  v3,  v16, v17, v18, v19, v20, v21, v22, v23, v24, v30, v31
			
 
				++        convolve4       v4,  v5,  v18, v19, v20, v21, v22, v23, v24, v25, v26, v30, v31
			
 
				++        do_store4       v2,  v3,  v4,  v5,  v28, v29, v30, v31, v1,  \type
			
 
				++
			
 
				++        subs            x4,  x4,  #4
			
 
				++        b.eq            9f
			
 
				++
			
 
				++        ld1             {v27.4h}, [x2], x3
			
 
				++        ld1             {v28.4h}, [x2], x3
			
 
				++        ld1             {v29.4h}, [x2], x3
			
 
				++        ld1             {v30.4h}, [x2], x3
			
 
				++
			
 
				++        convolve4       v2,  v3,  v20, v21, v22, v23, v24, v25, v26, v27, v28, v16, v17
			
 
				++        convolve4       v4,  v5,  v22, v23, v24, v25, v26, v27, v28, v29, v30, v16, v17
			
 
				++        do_store4       v2,  v3,  v4,  v5,  v16, v17, v18, v19, v1,  \type
			
 
				++
			
 
				++9:
			
 
				++        ret
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++do_8tap_4v put
			
 
				++do_8tap_4v avg
			
 
				++
			
 
				++
			
 
				++.macro do_8tap_v_func type, filter, offset, size, bpp
			
 
				++function ff_vp9_\type\()_\filter\()\size\()_v_\bpp\()_neon, export=1
			
 
				++        uxtw            x4,  w4
			
 
				++        mvni            v1.8h, #((0xff << (\bpp - 8)) & 0xff), lsl #8
			
 
				++        movrel          x5,  X(ff_vp9_subpel_filters), 256*\offset
			
 
				++        add             x6,  x5,  w6, uxtw #4
			
 
				++        mov             x5,  #\size
			
 
				++.if \size >= 8
			
 
				++        b               \type\()_8tap_8v
			
 
				++.else
			
 
				++        b               \type\()_8tap_4v
			
 
				++.endif
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++.macro do_8tap_v_filters size, bpp
			
 
				++do_8tap_v_func put, regular, 1, \size, \bpp
			
 
				++do_8tap_v_func avg, regular, 1, \size, \bpp
			
 
				++do_8tap_v_func put, sharp,   2, \size, \bpp
			
 
				++do_8tap_v_func avg, sharp,   2, \size, \bpp
			
 
				++do_8tap_v_func put, smooth,  0, \size, \bpp
			
 
				++do_8tap_v_func avg, smooth,  0, \size, \bpp
			
 
				++.endm
			
 
				++
			
 
				++.macro do_8tap_v_filters_bpp bpp
			
 
				++do_8tap_v_filters 64, \bpp
			
 
				++do_8tap_v_filters 32, \bpp
			
 
				++do_8tap_v_filters 16, \bpp
			
 
				++do_8tap_v_filters 8,  \bpp
			
 
				++do_8tap_v_filters 4,  \bpp
			
 
				++.endm
			
 
				++
			
 
				++do_8tap_v_filters_bpp 10
			
 
				++do_8tap_v_filters_bpp 12
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/vp9mc_neon.S b/media/ffvpx/libavcodec/aarch64/vp9mc_neon.S
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/vp9mc_neon.S
			
 
				+@@ -0,0 +1,687 @@
			
 
				++/*
			
 
				++ * Copyright (c) 2016 Google Inc.
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include "libavutil/aarch64/asm.S"
			
 
				++
			
 
				++// All public functions in this file have the following signature:
			
 
				++// typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
			
 
				++//                            const uint8_t *ref, ptrdiff_t ref_stride,
			
 
				++//                            int h, int mx, int my);
			
 
				++
			
 
				++function ff_vp9_copy64_aarch64, export=1
			
 
				++1:
			
 
				++        ldp             x5,  x6,  [x2]
			
 
				++        ldp             x7,  x8,  [x2, #16]
			
 
				++        stp             x5,  x6,  [x0]
			
 
				++        ldp             x9,  x10, [x2, #32]
			
 
				++        stp             x7,  x8,  [x0, #16]
			
 
				++        subs            w4,  w4,  #1
			
 
				++        ldp             x11, x12, [x2, #48]
			
 
				++        stp             x9,  x10, [x0, #32]
			
 
				++        stp             x11, x12, [x0, #48]
			
 
				++        add             x2,  x2,  x3
			
 
				++        add             x0,  x0,  x1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_avg64_neon, export=1
			
 
				++        mov             x5,  x0
			
 
				++1:
			
 
				++        ld1             {v4.16b,  v5.16b,  v6.16b,  v7.16b},  [x2], x3
			
 
				++        ld1             {v0.16b,  v1.16b,  v2.16b,  v3.16b},  [x0], x1
			
 
				++        ld1             {v20.16b, v21.16b, v22.16b, v23.16b}, [x2], x3
			
 
				++        urhadd          v0.16b,  v0.16b,  v4.16b
			
 
				++        urhadd          v1.16b,  v1.16b,  v5.16b
			
 
				++        ld1             {v16.16b, v17.16b, v18.16b, v19.16b}, [x0], x1
			
 
				++        urhadd          v2.16b,  v2.16b,  v6.16b
			
 
				++        urhadd          v3.16b,  v3.16b,  v7.16b
			
 
				++        subs            w4,  w4,  #2
			
 
				++        urhadd          v16.16b, v16.16b, v20.16b
			
 
				++        urhadd          v17.16b, v17.16b, v21.16b
			
 
				++        st1             {v0.16b,  v1.16b,  v2.16b,  v3.16b},  [x5], x1
			
 
				++        urhadd          v18.16b, v18.16b, v22.16b
			
 
				++        urhadd          v19.16b, v19.16b, v23.16b
			
 
				++        st1             {v16.16b, v17.16b, v18.16b, v19.16b}, [x5], x1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_copy32_aarch64, export=1
			
 
				++1:
			
 
				++        ldp             x5,  x6,  [x2]
			
 
				++        ldp             x7,  x8,  [x2, #16]
			
 
				++        stp             x5,  x6,  [x0]
			
 
				++        subs            w4,  w4,  #1
			
 
				++        stp             x7,  x8,  [x0, #16]
			
 
				++        add             x2,  x2,  x3
			
 
				++        add             x0,  x0,  x1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_avg32_neon, export=1
			
 
				++1:
			
 
				++        ld1             {v2.16b, v3.16b},  [x2], x3
			
 
				++        ld1             {v0.16b, v1.16b},  [x0]
			
 
				++        urhadd          v0.16b,  v0.16b,  v2.16b
			
 
				++        urhadd          v1.16b,  v1.16b,  v3.16b
			
 
				++        subs            w4,  w4,  #1
			
 
				++        st1             {v0.16b, v1.16b},  [x0], x1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_copy16_neon, export=1
			
 
				++        add             x5,  x0,  x1
			
 
				++        lsl             x1,  x1,  #1
			
 
				++        add             x6,  x2,  x3
			
 
				++        lsl             x3,  x3,  #1
			
 
				++1:
			
 
				++        ld1             {v0.16b},  [x2], x3
			
 
				++        ld1             {v1.16b},  [x6], x3
			
 
				++        ld1             {v2.16b},  [x2], x3
			
 
				++        ld1             {v3.16b},  [x6], x3
			
 
				++        subs            w4,  w4,  #4
			
 
				++        st1             {v0.16b},  [x0], x1
			
 
				++        st1             {v1.16b},  [x5], x1
			
 
				++        st1             {v2.16b},  [x0], x1
			
 
				++        st1             {v3.16b},  [x5], x1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_avg16_neon, export=1
			
 
				++        mov             x5,  x0
			
 
				++1:
			
 
				++        ld1             {v2.16b},  [x2], x3
			
 
				++        ld1             {v0.16b},  [x0], x1
			
 
				++        ld1             {v3.16b},  [x2], x3
			
 
				++        urhadd          v0.16b,  v0.16b,  v2.16b
			
 
				++        ld1             {v1.16b},  [x0], x1
			
 
				++        urhadd          v1.16b,  v1.16b,  v3.16b
			
 
				++        subs            w4,  w4,  #2
			
 
				++        st1             {v0.16b},  [x5], x1
			
 
				++        st1             {v1.16b},  [x5], x1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_copy8_neon, export=1
			
 
				++1:
			
 
				++        ld1             {v0.8b},  [x2], x3
			
 
				++        ld1             {v1.8b},  [x2], x3
			
 
				++        subs            w4,  w4,  #2
			
 
				++        st1             {v0.8b},  [x0], x1
			
 
				++        st1             {v1.8b},  [x0], x1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_avg8_neon, export=1
			
 
				++        mov             x5,  x0
			
 
				++1:
			
 
				++        ld1             {v2.8b},  [x2], x3
			
 
				++        ld1             {v0.8b},  [x0], x1
			
 
				++        ld1             {v3.8b},  [x2], x3
			
 
				++        urhadd          v0.8b,  v0.8b,  v2.8b
			
 
				++        ld1             {v1.8b},  [x0], x1
			
 
				++        urhadd          v1.8b,  v1.8b,  v3.8b
			
 
				++        subs            w4,  w4,  #2
			
 
				++        st1             {v0.8b},  [x5], x1
			
 
				++        st1             {v1.8b},  [x5], x1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_copy4_neon, export=1
			
 
				++1:
			
 
				++        ld1             {v0.s}[0], [x2], x3
			
 
				++        ld1             {v1.s}[0], [x2], x3
			
 
				++        st1             {v0.s}[0], [x0], x1
			
 
				++        ld1             {v2.s}[0], [x2], x3
			
 
				++        st1             {v1.s}[0], [x0], x1
			
 
				++        ld1             {v3.s}[0], [x2], x3
			
 
				++        subs            w4,  w4,  #4
			
 
				++        st1             {v2.s}[0], [x0], x1
			
 
				++        st1             {v3.s}[0], [x0], x1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vp9_avg4_neon, export=1
			
 
				++        mov             x5,  x0
			
 
				++1:
			
 
				++        ld1             {v2.s}[0], [x2], x3
			
 
				++        ld1             {v0.s}[0], [x0], x1
			
 
				++        ld1             {v2.s}[1], [x2], x3
			
 
				++        ld1             {v0.s}[1], [x0], x1
			
 
				++        ld1             {v3.s}[0], [x2], x3
			
 
				++        ld1             {v1.s}[0], [x0], x1
			
 
				++        ld1             {v3.s}[1], [x2], x3
			
 
				++        ld1             {v1.s}[1], [x0], x1
			
 
				++        subs            w4,  w4,  #4
			
 
				++        urhadd          v0.8b,  v0.8b,  v2.8b
			
 
				++        urhadd          v1.8b,  v1.8b,  v3.8b
			
 
				++        st1             {v0.s}[0], [x5], x1
			
 
				++        st1             {v0.s}[1], [x5], x1
			
 
				++        st1             {v1.s}[0], [x5], x1
			
 
				++        st1             {v1.s}[1], [x5], x1
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++
			
 
				++// Extract a vector from src1-src2 and src4-src5 (src1-src3 and src4-src6
			
 
				++// for size >= 16), and multiply-accumulate into dst1 and dst3 (or
			
 
				++// dst1-dst2 and dst3-dst4 for size >= 16)
			
 
				++.macro extmla dst1, dst2, dst3, dst4, src1, src2, src3, src4, src5, src6, offset, size
			
 
				++        ext             v20.16b, \src1\().16b, \src2\().16b, #(2*\offset)
			
 
				++        ext             v22.16b, \src4\().16b, \src5\().16b, #(2*\offset)
			
 
				++.if \size >= 16
			
 
				++        mla             \dst1\().8h, v20.8h, v0.h[\offset]
			
 
				++        ext             v21.16b, \src2\().16b, \src3\().16b, #(2*\offset)
			
 
				++        mla             \dst3\().8h, v22.8h, v0.h[\offset]
			
 
				++        ext             v23.16b, \src5\().16b, \src6\().16b, #(2*\offset)
			
 
				++        mla             \dst2\().8h, v21.8h, v0.h[\offset]
			
 
				++        mla             \dst4\().8h, v23.8h, v0.h[\offset]
			
 
				++.elseif \size == 8
			
 
				++        mla             \dst1\().8h, v20.8h, v0.h[\offset]
			
 
				++        mla             \dst3\().8h, v22.8h, v0.h[\offset]
			
 
				++.else
			
 
				++        mla             \dst1\().4h, v20.4h, v0.h[\offset]
			
 
				++        mla             \dst3\().4h, v22.4h, v0.h[\offset]
			
 
				++.endif
			
 
				++.endm
			
 
				++// The same as above, but don't accumulate straight into the
			
 
				++// destination, but use a temp register and accumulate with saturation.
			
 
				++.macro extmulqadd dst1, dst2, dst3, dst4, src1, src2, src3, src4, src5, src6, offset, size
			
 
				++        ext             v20.16b, \src1\().16b, \src2\().16b, #(2*\offset)
			
 
				++        ext             v22.16b, \src4\().16b, \src5\().16b, #(2*\offset)
			
 
				++.if \size >= 16
			
 
				++        mul             v20.8h, v20.8h, v0.h[\offset]
			
 
				++        ext             v21.16b, \src2\().16b, \src3\().16b, #(2*\offset)
			
 
				++        mul             v22.8h, v22.8h, v0.h[\offset]
			
 
				++        ext             v23.16b, \src5\().16b, \src6\().16b, #(2*\offset)
			
 
				++        mul             v21.8h, v21.8h, v0.h[\offset]
			
 
				++        mul             v23.8h, v23.8h, v0.h[\offset]
			
 
				++.elseif \size == 8
			
 
				++        mul             v20.8h, v20.8h, v0.h[\offset]
			
 
				++        mul             v22.8h, v22.8h, v0.h[\offset]
			
 
				++.else
			
 
				++        mul             v20.4h, v20.4h, v0.h[\offset]
			
 
				++        mul             v22.4h, v22.4h, v0.h[\offset]
			
 
				++.endif
			
 
				++.if \size == 4
			
 
				++        sqadd           \dst1\().4h, \dst1\().4h, v20.4h
			
 
				++        sqadd           \dst3\().4h, \dst3\().4h, v22.4h
			
 
				++.else
			
 
				++        sqadd           \dst1\().8h, \dst1\().8h, v20.8h
			
 
				++        sqadd           \dst3\().8h, \dst3\().8h, v22.8h
			
 
				++.if \size >= 16
			
 
				++        sqadd           \dst2\().8h, \dst2\().8h, v21.8h
			
 
				++        sqadd           \dst4\().8h, \dst4\().8h, v23.8h
			
 
				++.endif
			
 
				++.endif
			
 
				++.endm
			
 
				++
			
 
				++
			
 
				++// Instantiate a horizontal filter function for the given size.
			
 
				++// This can work on 4, 8 or 16 pixels in parallel; for larger
			
 
				++// widths it will do 16 pixels at a time and loop horizontally.
			
 
				++// The actual width is passed in x5, the height in w4 and the
			
 
				++// filter coefficients in x9. idx2 is the index of the largest
			
 
				++// filter coefficient (3 or 4) and idx1 is the other one of them.
			
 
				++.macro do_8tap_h type, size, idx1, idx2
			
 
				++function \type\()_8tap_\size\()h_\idx1\idx2
			
 
				++        sub             x2,  x2,  #3
			
 
				++        add             x6,  x0,  x1
			
 
				++        add             x7,  x2,  x3
			
 
				++        add             x1,  x1,  x1
			
 
				++        add             x3,  x3,  x3
			
 
				++        // Only size >= 16 loops horizontally and needs
			
 
				++        // reduced dst stride
			
 
				++.if \size >= 16
			
 
				++        sub             x1,  x1,  x5
			
 
				++.endif
			
 
				++        // size >= 16 loads two qwords and increments x2,
			
 
				++        // for size 4/8 it's enough with one qword and no
			
 
				++        // postincrement
			
 
				++.if \size >= 16
			
 
				++        sub             x3,  x3,  x5
			
 
				++        sub             x3,  x3,  #8
			
 
				++.endif
			
 
				++        // Load the filter vector
			
 
				++        ld1             {v0.8h},  [x9]
			
 
				++1:
			
 
				++.if \size >= 16
			
 
				++        mov             x9,  x5
			
 
				++.endif
			
 
				++        // Load src
			
 
				++.if \size >= 16
			
 
				++        ld1             {v4.8b,  v5.8b,  v6.8b},  [x2], #24
			
 
				++        ld1             {v16.8b, v17.8b, v18.8b}, [x7], #24
			
 
				++.else
			
 
				++        ld1             {v4.8b,  v5.8b},  [x2]
			
 
				++        ld1             {v16.8b, v17.8b}, [x7]
			
 
				++.endif
			
 
				++        uxtl            v4.8h,  v4.8b
			
 
				++        uxtl            v5.8h,  v5.8b
			
 
				++        uxtl            v16.8h, v16.8b
			
 
				++        uxtl            v17.8h, v17.8b
			
 
				++.if \size >= 16
			
 
				++        uxtl            v6.8h,  v6.8b
			
 
				++        uxtl            v18.8h, v18.8b
			
 
				++.endif
			
 
				++2:
			
 
				++
			
 
				++        // Accumulate, adding idx2 last with a separate
			
 
				++        // saturating add. The positive filter coefficients
			
 
				++        // for all indices except idx2 must add up to less
			
 
				++        // than 127 for this not to overflow.
			
 
				++        mul             v1.8h,  v4.8h,  v0.h[0]
			
 
				++        mul             v24.8h, v16.8h, v0.h[0]
			
 
				++.if \size >= 16
			
 
				++        mul             v2.8h,  v5.8h,  v0.h[0]
			
 
				++        mul             v25.8h, v17.8h, v0.h[0]
			
 
				++.endif
			
 
				++        extmla          v1,  v2,  v24, v25, v4,  v5,  v6,  v16, v17, v18, 1,     \size
			
 
				++        extmla          v1,  v2,  v24, v25, v4,  v5,  v6,  v16, v17, v18, 2,     \size
			
 
				++        extmla          v1,  v2,  v24, v25, v4,  v5,  v6,  v16, v17, v18, \idx1, \size
			
 
				++        extmla          v1,  v2,  v24, v25, v4,  v5,  v6,  v16, v17, v18, 5,     \size
			
 
				++        extmla          v1,  v2,  v24, v25, v4,  v5,  v6,  v16, v17, v18, 6,     \size
			
 
				++        extmla          v1,  v2,  v24, v25, v4,  v5,  v6,  v16, v17, v18, 7,     \size
			
 
				++        extmulqadd      v1,  v2,  v24, v25, v4,  v5,  v6,  v16, v17, v18, \idx2, \size
			
 
				++
			
 
				++        // Round, shift and saturate
			
 
				++        sqrshrun        v1.8b,   v1.8h,  #7
			
 
				++        sqrshrun        v24.8b,  v24.8h, #7
			
 
				++.if \size >= 16
			
 
				++        sqrshrun2       v1.16b,  v2.8h,  #7
			
 
				++        sqrshrun2       v24.16b, v25.8h, #7
			
 
				++.endif
			
 
				++        // Average
			
 
				++.ifc \type,avg
			
 
				++.if \size >= 16
			
 
				++        ld1             {v2.16b}, [x0]
			
 
				++        ld1             {v3.16b}, [x6]
			
 
				++        urhadd          v1.16b,  v1.16b,  v2.16b
			
 
				++        urhadd          v24.16b, v24.16b, v3.16b
			
 
				++.elseif \size == 8
			
 
				++        ld1             {v2.8b},  [x0]
			
 
				++        ld1             {v3.8b},  [x6]
			
 
				++        urhadd          v1.8b,  v1.8b,  v2.8b
			
 
				++        urhadd          v24.8b, v24.8b, v3.8b
			
 
				++.else
			
 
				++        ld1             {v2.s}[0], [x0]
			
 
				++        ld1             {v3.s}[0], [x6]
			
 
				++        urhadd          v1.8b,  v1.8b,  v2.8b
			
 
				++        urhadd          v24.8b, v24.8b, v3.8b
			
 
				++.endif
			
 
				++.endif
			
 
				++        // Store and loop horizontally (for size >= 16)
			
 
				++.if \size >= 16
			
 
				++        subs            x9,  x9,  #16
			
 
				++        st1             {v1.16b},  [x0], #16
			
 
				++        st1             {v24.16b}, [x6], #16
			
 
				++        b.eq            3f
			
 
				++        mov             v4.16b,  v6.16b
			
 
				++        mov             v16.16b, v18.16b
			
 
				++        ld1             {v6.16b},  [x2], #16
			
 
				++        ld1             {v18.16b}, [x7], #16
			
 
				++        uxtl            v5.8h,  v6.8b
			
 
				++        uxtl2           v6.8h,  v6.16b
			
 
				++        uxtl            v17.8h, v18.8b
			
 
				++        uxtl2           v18.8h, v18.16b
			
 
				++        b               2b
			
 
				++.elseif \size == 8
			
 
				++        st1             {v1.8b},    [x0]
			
 
				++        st1             {v24.8b},   [x6]
			
 
				++.else // \size == 4
			
 
				++        st1             {v1.s}[0],  [x0]
			
 
				++        st1             {v24.s}[0], [x6]
			
 
				++.endif
			
 
				++3:
			
 
				++        // Loop vertically
			
 
				++        add             x0,  x0,  x1
			
 
				++        add             x6,  x6,  x1
			
 
				++        add             x2,  x2,  x3
			
 
				++        add             x7,  x7,  x3
			
 
				++        subs            w4,  w4,  #2
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++.macro do_8tap_h_size size
			
 
				++do_8tap_h put, \size, 3, 4
			
 
				++do_8tap_h avg, \size, 3, 4
			
 
				++do_8tap_h put, \size, 4, 3
			
 
				++do_8tap_h avg, \size, 4, 3
			
 
				++.endm
			
 
				++
			
 
				++do_8tap_h_size 4
			
 
				++do_8tap_h_size 8
			
 
				++do_8tap_h_size 16
			
 
				++
			
 
				++.macro do_8tap_h_func type, filter, offset, size
			
 
				++function ff_vp9_\type\()_\filter\()\size\()_h_neon, export=1
			
 
				++        movrel          x6,  X(ff_vp9_subpel_filters), 256*\offset
			
 
				++        cmp             w5,  #8
			
 
				++        add             x9,  x6,  w5, uxtw #4
			
 
				++        mov             x5,  #\size
			
 
				++.if \size >= 16
			
 
				++        b.ge            \type\()_8tap_16h_34
			
 
				++        b               \type\()_8tap_16h_43
			
 
				++.else
			
 
				++        b.ge            \type\()_8tap_\size\()h_34
			
 
				++        b               \type\()_8tap_\size\()h_43
			
 
				++.endif
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++.macro do_8tap_h_filters size
			
 
				++do_8tap_h_func put, regular, 1, \size
			
 
				++do_8tap_h_func avg, regular, 1, \size
			
 
				++do_8tap_h_func put, sharp,   2, \size
			
 
				++do_8tap_h_func avg, sharp,   2, \size
			
 
				++do_8tap_h_func put, smooth,  0, \size
			
 
				++do_8tap_h_func avg, smooth,  0, \size
			
 
				++.endm
			
 
				++
			
 
				++do_8tap_h_filters 64
			
 
				++do_8tap_h_filters 32
			
 
				++do_8tap_h_filters 16
			
 
				++do_8tap_h_filters 8
			
 
				++do_8tap_h_filters 4
			
 
				++
			
 
				++
			
 
				++// Vertical filters
			
 
				++
			
 
				++// Round, shift and saturate and store reg1-reg2 over 4 lines
			
 
				++.macro do_store4 reg1, reg2, tmp1, tmp2, type
			
 
				++        sqrshrun        \reg1\().8b,  \reg1\().8h, #7
			
 
				++        sqrshrun        \reg2\().8b,  \reg2\().8h, #7
			
 
				++.ifc \type,avg
			
 
				++        ld1             {\tmp1\().s}[0],  [x7], x1
			
 
				++        ld1             {\tmp2\().s}[0],  [x7], x1
			
 
				++        ld1             {\tmp1\().s}[1],  [x7], x1
			
 
				++        ld1             {\tmp2\().s}[1],  [x7], x1
			
 
				++        urhadd          \reg1\().8b,  \reg1\().8b,  \tmp1\().8b
			
 
				++        urhadd          \reg2\().8b,  \reg2\().8b,  \tmp2\().8b
			
 
				++.endif
			
 
				++        st1             {\reg1\().s}[0],  [x0], x1
			
 
				++        st1             {\reg2\().s}[0],  [x0], x1
			
 
				++        st1             {\reg1\().s}[1],  [x0], x1
			
 
				++        st1             {\reg2\().s}[1],  [x0], x1
			
 
				++.endm
			
 
				++
			
 
				++// Round, shift and saturate and store reg1-4
			
 
				++.macro do_store reg1, reg2, reg3, reg4, tmp1, tmp2, tmp3, tmp4, type
			
 
				++        sqrshrun        \reg1\().8b,  \reg1\().8h, #7
			
 
				++        sqrshrun        \reg2\().8b,  \reg2\().8h, #7
			
 
				++        sqrshrun        \reg3\().8b,  \reg3\().8h, #7
			
 
				++        sqrshrun        \reg4\().8b,  \reg4\().8h, #7
			
 
				++.ifc \type,avg
			
 
				++        ld1             {\tmp1\().8b},  [x7], x1
			
 
				++        ld1             {\tmp2\().8b},  [x7], x1
			
 
				++        ld1             {\tmp3\().8b},  [x7], x1
			
 
				++        ld1             {\tmp4\().8b},  [x7], x1
			
 
				++        urhadd          \reg1\().8b,  \reg1\().8b,  \tmp1\().8b
			
 
				++        urhadd          \reg2\().8b,  \reg2\().8b,  \tmp2\().8b
			
 
				++        urhadd          \reg3\().8b,  \reg3\().8b,  \tmp3\().8b
			
 
				++        urhadd          \reg4\().8b,  \reg4\().8b,  \tmp4\().8b
			
 
				++.endif
			
 
				++        st1             {\reg1\().8b},  [x0], x1
			
 
				++        st1             {\reg2\().8b},  [x0], x1
			
 
				++        st1             {\reg3\().8b},  [x0], x1
			
 
				++        st1             {\reg4\().8b},  [x0], x1
			
 
				++.endm
			
 
				++
			
 
				++// Evaluate the filter twice in parallel, from the inputs src1-src9 into dst1-dst2
			
 
				++// (src1-src8 into dst1, src2-src9 into dst2), adding idx2 separately
			
 
				++// at the end with saturation. Indices 0 and 7 always have negative or zero
			
 
				++// coefficients, so they can be accumulated into tmp1-tmp2 together with the
			
 
				++// largest coefficient.
			
 
				++.macro convolve dst1, dst2, src1, src2, src3, src4, src5, src6, src7, src8, src9, idx1, idx2, tmp1, tmp2
			
 
				++        mul             \dst1\().8h, \src2\().8h, v0.h[1]
			
 
				++        mul             \dst2\().8h, \src3\().8h, v0.h[1]
			
 
				++        mul             \tmp1\().8h, \src1\().8h, v0.h[0]
			
 
				++        mul             \tmp2\().8h, \src2\().8h, v0.h[0]
			
 
				++        mla             \dst1\().8h, \src3\().8h, v0.h[2]
			
 
				++        mla             \dst2\().8h, \src4\().8h, v0.h[2]
			
 
				++.if \idx1 == 3
			
 
				++        mla             \dst1\().8h, \src4\().8h, v0.h[3]
			
 
				++        mla             \dst2\().8h, \src5\().8h, v0.h[3]
			
 
				++.else
			
 
				++        mla             \dst1\().8h, \src5\().8h, v0.h[4]
			
 
				++        mla             \dst2\().8h, \src6\().8h, v0.h[4]
			
 
				++.endif
			
 
				++        mla             \dst1\().8h, \src6\().8h, v0.h[5]
			
 
				++        mla             \dst2\().8h, \src7\().8h, v0.h[5]
			
 
				++        mla             \tmp1\().8h, \src8\().8h, v0.h[7]
			
 
				++        mla             \tmp2\().8h, \src9\().8h, v0.h[7]
			
 
				++        mla             \dst1\().8h, \src7\().8h, v0.h[6]
			
 
				++        mla             \dst2\().8h, \src8\().8h, v0.h[6]
			
 
				++.if \idx2 == 3
			
 
				++        mla             \tmp1\().8h, \src4\().8h, v0.h[3]
			
 
				++        mla             \tmp2\().8h, \src5\().8h, v0.h[3]
			
 
				++.else
			
 
				++        mla             \tmp1\().8h, \src5\().8h, v0.h[4]
			
 
				++        mla             \tmp2\().8h, \src6\().8h, v0.h[4]
			
 
				++.endif
			
 
				++        sqadd           \dst1\().8h, \dst1\().8h, \tmp1\().8h
			
 
				++        sqadd           \dst2\().8h, \dst2\().8h, \tmp2\().8h
			
 
				++.endm
			
 
				++
			
 
				++// Load pixels and extend them to 16 bit
			
 
				++.macro loadl dst1, dst2, dst3, dst4
			
 
				++        ld1             {v1.8b}, [x2], x3
			
 
				++        ld1             {v2.8b}, [x2], x3
			
 
				++        ld1             {v3.8b}, [x2], x3
			
 
				++.ifnb \dst4
			
 
				++        ld1             {v4.8b}, [x2], x3
			
 
				++.endif
			
 
				++        uxtl            \dst1\().8h, v1.8b
			
 
				++        uxtl            \dst2\().8h, v2.8b
			
 
				++        uxtl            \dst3\().8h, v3.8b
			
 
				++.ifnb \dst4
			
 
				++        uxtl            \dst4\().8h, v4.8b
			
 
				++.endif
			
 
				++.endm
			
 
				++
			
 
				++// Instantiate a vertical filter function for filtering 8 pixels at a time.
			
 
				++// The height is passed in x4, the width in x5 and the filter coefficients
			
 
				++// in x6. idx2 is the index of the largest filter coefficient (3 or 4)
			
 
				++// and idx1 is the other one of them.
			
 
				++.macro do_8tap_8v type, idx1, idx2
			
 
				++function \type\()_8tap_8v_\idx1\idx2
			
 
				++        sub             x2,  x2,  x3, lsl #1
			
 
				++        sub             x2,  x2,  x3
			
 
				++        ld1             {v0.8h},  [x6]
			
 
				++1:
			
 
				++.ifc \type,avg
			
 
				++        mov             x7,  x0
			
 
				++.endif
			
 
				++        mov             x6,  x4
			
 
				++
			
 
				++        loadl           v17, v18, v19
			
 
				++
			
 
				++        loadl           v20, v21, v22, v23
			
 
				++2:
			
 
				++        loadl           v24, v25, v26, v27
			
 
				++        convolve        v1,  v2,  v17, v18, v19, v20, v21, v22, v23, v24, v25, \idx1, \idx2, v5,  v6
			
 
				++        convolve        v3,  v4,  v19, v20, v21, v22, v23, v24, v25, v26, v27, \idx1, \idx2, v5,  v6
			
 
				++        do_store        v1,  v2,  v3,  v4,  v5,  v6,  v7,  v28, \type
			
 
				++
			
 
				++        subs            x6,  x6,  #4
			
 
				++        b.eq            8f
			
 
				++
			
 
				++        loadl           v16, v17, v18, v19
			
 
				++        convolve        v1,  v2,  v21, v22, v23, v24, v25, v26, v27, v16, v17, \idx1, \idx2, v5,  v6
			
 
				++        convolve        v3,  v4,  v23, v24, v25, v26, v27, v16, v17, v18, v19, \idx1, \idx2, v5,  v6
			
 
				++        do_store        v1,  v2,  v3,  v4,  v5,  v6,  v7,  v28, \type
			
 
				++
			
 
				++        subs            x6,  x6,  #4
			
 
				++        b.eq            8f
			
 
				++
			
 
				++        loadl           v20, v21, v22, v23
			
 
				++        convolve        v1,  v2,  v25, v26, v27, v16, v17, v18, v19, v20, v21, \idx1, \idx2, v5,  v6
			
 
				++        convolve        v3,  v4,  v27, v16, v17, v18, v19, v20, v21, v22, v23, \idx1, \idx2, v5,  v6
			
 
				++        do_store        v1,  v2,  v3,  v4,  v5,  v6,  v7,  v28, \type
			
 
				++
			
 
				++        subs            x6,  x6,  #4
			
 
				++        b.ne            2b
			
 
				++
			
 
				++8:
			
 
				++        subs            x5,  x5,  #8
			
 
				++        b.eq            9f
			
 
				++        // x0 -= h * dst_stride
			
 
				++        msub            x0,  x1,  x4, x0
			
 
				++        // x2 -= h * src_stride
			
 
				++        msub            x2,  x3,  x4, x2
			
 
				++        // x2 -= 8 * src_stride
			
 
				++        sub             x2,  x2,  x3, lsl #3
			
 
				++        // x2 += 1 * src_stride
			
 
				++        add             x2,  x2,  x3
			
 
				++        add             x2,  x2,  #8
			
 
				++        add             x0,  x0,  #8
			
 
				++        b               1b
			
 
				++9:
			
 
				++        ret
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++do_8tap_8v put, 3, 4
			
 
				++do_8tap_8v put, 4, 3
			
 
				++do_8tap_8v avg, 3, 4
			
 
				++do_8tap_8v avg, 4, 3
			
 
				++
			
 
				++
			
 
				++// Instantiate a vertical filter function for filtering a 4 pixels wide
			
 
				++// slice. The first half of the registers contain one row, while the second
			
 
				++// half of a register contains the second-next row (also stored in the first
			
 
				++// half of the register two steps ahead). The convolution does two outputs
			
 
				++// at a time; the output of v17-v24 into one, and v18-v25 into another one.
			
 
				++// The first half of first output is the first output row, the first half
			
 
				++// of the other output is the second output row. The second halves of the
			
 
				++// registers are rows 3 and 4.
			
 
				++// This only is designed to work for 4 or 8 output lines.
			
 
				++.macro do_8tap_4v type, idx1, idx2
			
 
				++function \type\()_8tap_4v_\idx1\idx2
			
 
				++        sub             x2,  x2,  x3, lsl #1
			
 
				++        sub             x2,  x2,  x3
			
 
				++        ld1             {v0.8h},  [x6]
			
 
				++.ifc \type,avg
			
 
				++        mov             x7,  x0
			
 
				++.endif
			
 
				++
			
 
				++        ld1             {v1.s}[0],  [x2], x3
			
 
				++        ld1             {v2.s}[0],  [x2], x3
			
 
				++        ld1             {v3.s}[0],  [x2], x3
			
 
				++        ld1             {v4.s}[0],  [x2], x3
			
 
				++        ld1             {v5.s}[0],  [x2], x3
			
 
				++        ld1             {v6.s}[0],  [x2], x3
			
 
				++        trn1            v1.2s,  v1.2s,  v3.2s
			
 
				++        ld1             {v7.s}[0],  [x2], x3
			
 
				++        trn1            v2.2s,  v2.2s,  v4.2s
			
 
				++        ld1             {v26.s}[0], [x2], x3
			
 
				++        uxtl            v17.8h, v1.8b
			
 
				++        trn1            v3.2s,  v3.2s,  v5.2s
			
 
				++        ld1             {v27.s}[0], [x2], x3
			
 
				++        uxtl            v18.8h, v2.8b
			
 
				++        trn1            v4.2s,  v4.2s,  v6.2s
			
 
				++        ld1             {v28.s}[0], [x2], x3
			
 
				++        uxtl            v19.8h, v3.8b
			
 
				++        trn1            v5.2s,  v5.2s,  v7.2s
			
 
				++        ld1             {v29.s}[0], [x2], x3
			
 
				++        uxtl            v20.8h, v4.8b
			
 
				++        trn1            v6.2s,  v6.2s,  v26.2s
			
 
				++        uxtl            v21.8h, v5.8b
			
 
				++        trn1            v7.2s,  v7.2s,  v27.2s
			
 
				++        uxtl            v22.8h, v6.8b
			
 
				++        trn1            v26.2s, v26.2s, v28.2s
			
 
				++        uxtl            v23.8h, v7.8b
			
 
				++        trn1            v27.2s, v27.2s, v29.2s
			
 
				++        uxtl            v24.8h, v26.8b
			
 
				++        uxtl            v25.8h, v27.8b
			
 
				++
			
 
				++        convolve        v1,  v2,  v17, v18, v19, v20, v21, v22, v23, v24, v25, \idx1, \idx2, v3,  v4
			
 
				++        do_store4       v1,  v2,  v5,  v6,  \type
			
 
				++
			
 
				++        subs            x4,  x4,  #4
			
 
				++        b.eq            9f
			
 
				++
			
 
				++        ld1             {v1.s}[0],  [x2], x3
			
 
				++        ld1             {v2.s}[0],  [x2], x3
			
 
				++        trn1            v28.2s, v28.2s, v1.2s
			
 
				++        trn1            v29.2s, v29.2s, v2.2s
			
 
				++        ld1             {v1.s}[1],  [x2], x3
			
 
				++        uxtl            v26.8h, v28.8b
			
 
				++        ld1             {v2.s}[1],  [x2], x3
			
 
				++        uxtl            v27.8h, v29.8b
			
 
				++        uxtl            v28.8h, v1.8b
			
 
				++        uxtl            v29.8h, v2.8b
			
 
				++
			
 
				++        convolve        v1,  v2,  v21, v22, v23, v24, v25, v26, v27, v28, v29, \idx1, \idx2, v3,  v4
			
 
				++        do_store4       v1,  v2,  v5,  v6,  \type
			
 
				++
			
 
				++9:
			
 
				++        ret
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++do_8tap_4v put, 3, 4
			
 
				++do_8tap_4v put, 4, 3
			
 
				++do_8tap_4v avg, 3, 4
			
 
				++do_8tap_4v avg, 4, 3
			
 
				++
			
 
				++
			
 
				++.macro do_8tap_v_func type, filter, offset, size
			
 
				++function ff_vp9_\type\()_\filter\()\size\()_v_neon, export=1
			
 
				++        uxtw            x4,  w4
			
 
				++        movrel          x5,  X(ff_vp9_subpel_filters), 256*\offset
			
 
				++        cmp             w6,  #8
			
 
				++        add             x6,  x5,  w6, uxtw #4
			
 
				++        mov             x5,  #\size
			
 
				++.if \size >= 8
			
 
				++        b.ge            \type\()_8tap_8v_34
			
 
				++        b               \type\()_8tap_8v_43
			
 
				++.else
			
 
				++        b.ge            \type\()_8tap_4v_34
			
 
				++        b               \type\()_8tap_4v_43
			
 
				++.endif
			
 
				++endfunc
			
 
				++.endm
			
 
				++
			
 
				++.macro do_8tap_v_filters size
			
 
				++do_8tap_v_func put, regular, 1, \size
			
 
				++do_8tap_v_func avg, regular, 1, \size
			
 
				++do_8tap_v_func put, sharp,   2, \size
			
 
				++do_8tap_v_func avg, sharp,   2, \size
			
 
				++do_8tap_v_func put, smooth,  0, \size
			
 
				++do_8tap_v_func avg, smooth,  0, \size
			
 
				++.endm
			
 
				++
			
 
				++do_8tap_v_filters 64
			
 
				++do_8tap_v_filters 32
			
 
				++do_8tap_v_filters 16
			
 
				++do_8tap_v_filters 8
			
 
				++do_8tap_v_filters 4
			
 
				+diff --git a/media/ffvpx/libavutil/aarch64/asm.S b/media/ffvpx/libavutil/aarch64/asm.S
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavutil/aarch64/asm.S
			
 
				+@@ -0,0 +1,104 @@
			
 
				++/*
			
 
				++ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include "config.h"
			
 
				++
			
 
				++#ifdef __ELF__
			
 
				++#   define ELF
			
 
				++#else
			
 
				++#   define ELF #
			
 
				++#endif
			
 
				++
			
 
				++#if HAVE_AS_FUNC
			
 
				++#   define FUNC
			
 
				++#else
			
 
				++#   define FUNC #
			
 
				++#endif
			
 
				++
			
 
				++.macro  function name, export=0, align=2
			
 
				++    .macro endfunc
			
 
				++ELF     .size   \name, . - \name
			
 
				++FUNC    .endfunc
			
 
				++        .purgem endfunc
			
 
				++    .endm
			
 
				++        .text
			
 
				++        .align          \align
			
 
				++    .if \export
			
 
				++        .global EXTERN_ASM\name
			
 
				++ELF     .type   EXTERN_ASM\name, %function
			
 
				++FUNC    .func   EXTERN_ASM\name
			
 
				++EXTERN_ASM\name:
			
 
				++    .else
			
 
				++ELF     .type   \name, %function
			
 
				++FUNC    .func   \name
			
 
				++\name:
			
 
				++    .endif
			
 
				++.endm
			
 
				++
			
 
				++.macro  const   name, align=2, relocate=0
			
 
				++    .macro endconst
			
 
				++ELF     .size   \name, . - \name
			
 
				++        .purgem endconst
			
 
				++    .endm
			
 
				++#if HAVE_SECTION_DATA_REL_RO
			
 
				++.if \relocate
			
 
				++        .section        .data.rel.ro
			
 
				++.else
			
 
				++        .section        .rodata
			
 
				++.endif
			
 
				++#elif !defined(__MACH__)
			
 
				++        .section        .rodata
			
 
				++#else
			
 
				++        .const_data
			
 
				++#endif
			
 
				++        .align          \align
			
 
				++\name:
			
 
				++.endm
			
 
				++
			
 
				++.macro  movrel rd, val, offset=0
			
 
				++#if CONFIG_PIC && defined(__APPLE__)
			
 
				++    .if \offset < 0
			
 
				++        adrp            \rd, \val@PAGE
			
 
				++        add             \rd, \rd, \val@PAGEOFF
			
 
				++        sub             \rd, \rd, -(\offset)
			
 
				++    .else
			
 
				++        adrp            \rd, \val+(\offset)@PAGE
			
 
				++        add             \rd, \rd, \val+(\offset)@PAGEOFF
			
 
				++    .endif
			
 
				++#elif CONFIG_PIC && defined(_WIN32)
			
 
				++    .if \offset < 0
			
 
				++        adrp            \rd, \val
			
 
				++        add             \rd, \rd, :lo12:\val
			
 
				++        sub             \rd, \rd, -(\offset)
			
 
				++    .else
			
 
				++        adrp            \rd, \val+(\offset)
			
 
				++        add             \rd, \rd, :lo12:\val+(\offset)
			
 
				++    .endif
			
 
				++#elif CONFIG_PIC
			
 
				++        adrp            \rd, \val+(\offset)
			
 
				++        add             \rd, \rd, :lo12:\val+(\offset)
			
 
				++#else
			
 
				++        ldr             \rd, =\val+\offset
			
 
				++#endif
			
 
				++.endm
			
 
				++
			
 
				++#define GLUE(a, b) a ## b
			
 
				++#define JOIN(a, b) GLUE(a, b)
			
 
				++#define X(s) JOIN(EXTERN_ASM, s)
			
 
				+diff --git a/media/ffvpx/libavutil/aarch64/bswap.h b/media/ffvpx/libavutil/aarch64/bswap.h
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavutil/aarch64/bswap.h
			
 
				+@@ -0,0 +1,51 @@
			
 
				++/*
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#ifndef AVUTIL_AARCH64_BSWAP_H
			
 
				++#define AVUTIL_AARCH64_BSWAP_H
			
 
				++
			
 
				++#include <stdint.h>
			
 
				++#include "config.h"
			
 
				++#include "libavutil/attributes.h"
			
 
				++
			
 
				++#if HAVE_INLINE_ASM
			
 
				++
			
 
				++#define av_bswap16 av_bswap16
			
 
				++static av_always_inline av_const unsigned av_bswap16(unsigned x)
			
 
				++{
			
 
				++    __asm__("rev16 %w0, %w0" : "+r"(x));
			
 
				++    return x;
			
 
				++}
			
 
				++
			
 
				++#define av_bswap32 av_bswap32
			
 
				++static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
			
 
				++{
			
 
				++    __asm__("rev %w0, %w0" : "+r"(x));
			
 
				++    return x;
			
 
				++}
			
 
				++
			
 
				++#define av_bswap64 av_bswap64
			
 
				++static av_always_inline av_const uint64_t av_bswap64(uint64_t x)
			
 
				++{
			
 
				++    __asm__("rev %0, %0" : "+r"(x));
			
 
				++    return x;
			
 
				++}
			
 
				++
			
 
				++#endif /* HAVE_INLINE_ASM */
			
 
				++
			
 
				++#endif /* AVUTIL_AARCH64_BSWAP_H */
			
 
				+diff --git a/media/ffvpx/libavutil/aarch64/cpu.c b/media/ffvpx/libavutil/aarch64/cpu.c
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavutil/aarch64/cpu.c
			
 
				+@@ -0,0 +1,38 @@
			
 
				++/*
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include "libavutil/cpu.h"
			
 
				++#include "libavutil/cpu_internal.h"
			
 
				++#include "config.h"
			
 
				++
			
 
				++int ff_get_cpu_flags_aarch64(void)
			
 
				++{
			
 
				++    return AV_CPU_FLAG_ARMV8 * HAVE_ARMV8 |
			
 
				++           AV_CPU_FLAG_NEON  * HAVE_NEON  |
			
 
				++           AV_CPU_FLAG_VFP   * HAVE_VFP;
			
 
				++}
			
 
				++
			
 
				++size_t ff_get_cpu_max_align_aarch64(void)
			
 
				++{
			
 
				++    int flags = av_get_cpu_flags();
			
 
				++
			
 
				++    if (flags & AV_CPU_FLAG_NEON)
			
 
				++        return 16;
			
 
				++
			
 
				++    return 8;
			
 
				++}
			
 
				+diff --git a/media/ffvpx/libavutil/aarch64/cpu.h b/media/ffvpx/libavutil/aarch64/cpu.h
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavutil/aarch64/cpu.h
			
 
				+@@ -0,0 +1,29 @@
			
 
				++/*
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#ifndef AVUTIL_AARCH64_CPU_H
			
 
				++#define AVUTIL_AARCH64_CPU_H
			
 
				++
			
 
				++#include "libavutil/cpu.h"
			
 
				++#include "libavutil/cpu_internal.h"
			
 
				++
			
 
				++#define have_armv8(flags) CPUEXT(flags, ARMV8)
			
 
				++#define have_neon(flags) CPUEXT(flags, NEON)
			
 
				++#define have_vfp(flags)  CPUEXT(flags, VFP)
			
 
				++
			
 
				++#endif /* AVUTIL_AARCH64_CPU_H */
			
 
				+diff --git a/media/ffvpx/libavutil/aarch64/float_dsp_init.c b/media/ffvpx/libavutil/aarch64/float_dsp_init.c
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavutil/aarch64/float_dsp_init.c
			
 
				+@@ -0,0 +1,69 @@
			
 
				++/*
			
 
				++ * ARM NEON optimised Float DSP functions
			
 
				++ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include <stdint.h>
			
 
				++
			
 
				++#include "libavutil/attributes.h"
			
 
				++#include "libavutil/cpu.h"
			
 
				++#include "libavutil/float_dsp.h"
			
 
				++#include "cpu.h"
			
 
				++
			
 
				++void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1,
			
 
				++                         int len);
			
 
				++
			
 
				++void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
			
 
				++                                int len);
			
 
				++
			
 
				++void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
			
 
				++                                int len);
			
 
				++
			
 
				++void ff_vector_dmul_scalar_neon(double *dst, const double *src, double mul,
			
 
				++                                int len);
			
 
				++
			
 
				++void ff_vector_fmul_window_neon(float *dst, const float *src0,
			
 
				++                                const float *src1, const float *win, int len);
			
 
				++
			
 
				++void ff_vector_fmul_add_neon(float *dst, const float *src0, const float *src1,
			
 
				++                             const float *src2, int len);
			
 
				++
			
 
				++void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
			
 
				++                                 const float *src1, int len);
			
 
				++
			
 
				++void ff_butterflies_float_neon(float *v1, float *v2, int len);
			
 
				++
			
 
				++float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
			
 
				++
			
 
				++av_cold void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp)
			
 
				++{
			
 
				++    int cpu_flags = av_get_cpu_flags();
			
 
				++
			
 
				++    if (have_neon(cpu_flags)) {
			
 
				++        fdsp->butterflies_float   = ff_butterflies_float_neon;
			
 
				++        fdsp->scalarproduct_float = ff_scalarproduct_float_neon;
			
 
				++        fdsp->vector_dmul_scalar  = ff_vector_dmul_scalar_neon;
			
 
				++        fdsp->vector_fmul         = ff_vector_fmul_neon;
			
 
				++        fdsp->vector_fmac_scalar  = ff_vector_fmac_scalar_neon;
			
 
				++        fdsp->vector_fmul_add     = ff_vector_fmul_add_neon;
			
 
				++        fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_neon;
			
 
				++        fdsp->vector_fmul_scalar  = ff_vector_fmul_scalar_neon;
			
 
				++        fdsp->vector_fmul_window  = ff_vector_fmul_window_neon;
			
 
				++    }
			
 
				++}
			
 
				+diff --git a/media/ffvpx/libavutil/aarch64/float_dsp_neon.S b/media/ffvpx/libavutil/aarch64/float_dsp_neon.S
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavutil/aarch64/float_dsp_neon.S
			
 
				+@@ -0,0 +1,202 @@
			
 
				++/*
			
 
				++ * ARM NEON optimised Float DSP functions
			
 
				++ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
			
 
				++ * Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#include "config.h"
			
 
				++#include "asm.S"
			
 
				++
			
 
				++function ff_vector_fmul_neon, export=1
			
 
				++1:      subs            w3,  w3,  #16
			
 
				++        ld1             {v0.4S, v1.4S}, [x1], #32
			
 
				++        ld1             {v2.4S, v3.4S}, [x1], #32
			
 
				++        ld1             {v4.4S, v5.4S}, [x2], #32
			
 
				++        ld1             {v6.4S, v7.4S}, [x2], #32
			
 
				++        fmul            v16.4S, v0.4S,  v4.4S
			
 
				++        fmul            v17.4S, v1.4S,  v5.4S
			
 
				++        fmul            v18.4S, v2.4S,  v6.4S
			
 
				++        fmul            v19.4S, v3.4S,  v7.4S
			
 
				++        st1             {v16.4S, v17.4S}, [x0], #32
			
 
				++        st1             {v18.4S, v19.4S}, [x0], #32
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vector_fmac_scalar_neon, export=1
			
 
				++        mov             x3,  #-32
			
 
				++1:      subs            w2,  w2,  #16
			
 
				++        ld1             {v16.4S, v17.4S}, [x0], #32
			
 
				++        ld1             {v18.4S, v19.4S}, [x0], x3
			
 
				++        ld1             {v4.4S,  v5.4S},  [x1], #32
			
 
				++        ld1             {v6.4S,  v7.4S},  [x1], #32
			
 
				++        fmla            v16.4S, v4.4S,  v0.S[0]
			
 
				++        fmla            v17.4S, v5.4S,  v0.S[0]
			
 
				++        fmla            v18.4S, v6.4S,  v0.S[0]
			
 
				++        fmla            v19.4S, v7.4S,  v0.S[0]
			
 
				++        st1             {v16.4S, v17.4S}, [x0], #32
			
 
				++        st1             {v18.4S, v19.4S}, [x0], #32
			
 
				++        b.ne            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vector_fmul_scalar_neon, export=1
			
 
				++        mov             w4,  #15
			
 
				++        bics            w3,  w2,  w4
			
 
				++        dup             v16.4S, v0.S[0]
			
 
				++        b.eq            3f
			
 
				++        ld1             {v0.4S, v1.4S}, [x1], #32
			
 
				++1:      subs            w3,  w3,  #16
			
 
				++        fmul            v0.4S,  v0.4S,  v16.4S
			
 
				++        ld1             {v2.4S, v3.4S}, [x1], #32
			
 
				++        fmul            v1.4S,  v1.4S,  v16.4S
			
 
				++        fmul            v2.4S,  v2.4S,  v16.4S
			
 
				++        st1             {v0.4S, v1.4S}, [x0], #32
			
 
				++        fmul            v3.4S,  v3.4S,  v16.4S
			
 
				++        b.eq            2f
			
 
				++        ld1             {v0.4S, v1.4S}, [x1], #32
			
 
				++        st1             {v2.4S, v3.4S}, [x0], #32
			
 
				++        b               1b
			
 
				++2:      ands            w2,  w2,  #15
			
 
				++        st1             {v2.4S, v3.4S}, [x0], #32
			
 
				++        b.eq            4f
			
 
				++3:      ld1             {v0.4S}, [x1], #16
			
 
				++        fmul            v0.4S,  v0.4S,  v16.4S
			
 
				++        st1             {v0.4S}, [x0], #16
			
 
				++        subs            w2,  w2,  #4
			
 
				++        b.gt            3b
			
 
				++4:      ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vector_dmul_scalar_neon, export=1
			
 
				++        dup             v16.2D, v0.D[0]
			
 
				++        ld1             {v0.2D, v1.2D}, [x1], #32
			
 
				++1:      subs            w2,  w2,  #8
			
 
				++        fmul            v0.2D,  v0.2D,  v16.2D
			
 
				++        ld1             {v2.2D, v3.2D}, [x1], #32
			
 
				++        fmul            v1.2D,  v1.2D,  v16.2D
			
 
				++        fmul            v2.2D,  v2.2D,  v16.2D
			
 
				++        st1             {v0.2D, v1.2D}, [x0], #32
			
 
				++        fmul            v3.2D,  v3.2D,  v16.2D
			
 
				++        ld1             {v0.2D, v1.2D}, [x1], #32
			
 
				++        st1             {v2.2D, v3.2D}, [x0], #32
			
 
				++        b.gt            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vector_fmul_window_neon, export=1
			
 
				++        sxtw            x4,  w4                 // len
			
 
				++        sub             x2,  x2,  #8
			
 
				++        sub             x5,  x4,  #2
			
 
				++        add             x2,  x2,  x5, lsl #2    // src1 + 4 * (len - 4)
			
 
				++        add             x6,  x3,  x5, lsl #3    // win  + 8 * (len - 2)
			
 
				++        add             x5,  x0,  x5, lsl #3    // dst  + 8 * (len - 2)
			
 
				++        mov             x7,  #-16
			
 
				++        ld1             {v0.4S},  [x1], #16     // s0
			
 
				++        ld1             {v2.4S},  [x3], #16     // wi
			
 
				++        ld1             {v1.4S},  [x2], x7      // s1
			
 
				++1:      ld1             {v3.4S},  [x6], x7      // wj
			
 
				++        subs            x4,  x4,  #4
			
 
				++        fmul            v17.4S, v0.4S,  v2.4S   // s0 * wi
			
 
				++        rev64           v4.4S,  v1.4S
			
 
				++        rev64           v5.4S,  v3.4S
			
 
				++        rev64           v17.4S, v17.4S
			
 
				++        ext             v4.16B,  v4.16B,  v4.16B,  #8 // s1_r
			
 
				++        ext             v5.16B,  v5.16B,  v5.16B,  #8 // wj_r
			
 
				++        ext             v17.16B, v17.16B, v17.16B, #8 // (s0 * wi)_rev
			
 
				++        fmul            v16.4S, v0.4S,  v5.4S  // s0 * wj_r
			
 
				++        fmla            v17.4S, v1.4S,  v3.4S  // (s0 * wi)_rev + s1 * wj
			
 
				++        b.eq            2f
			
 
				++        ld1             {v0.4S},  [x1], #16
			
 
				++        fmls            v16.4S, v4.4S,  v2.4S  // s0 * wj_r - s1_r * wi
			
 
				++        st1             {v17.4S}, [x5], x7
			
 
				++        ld1             {v2.4S},  [x3], #16
			
 
				++        ld1             {v1.4S},  [x2], x7
			
 
				++        st1             {v16.4S}, [x0], #16
			
 
				++        b               1b
			
 
				++2:
			
 
				++        fmls            v16.4S, v4.4S,  v2.4S  // s0 * wj_r - s1_r * wi
			
 
				++        st1             {v17.4S}, [x5], x7
			
 
				++        st1             {v16.4S}, [x0], #16
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vector_fmul_add_neon, export=1
			
 
				++        ld1             {v0.4S, v1.4S},  [x1], #32
			
 
				++        ld1             {v2.4S, v3.4S},  [x2], #32
			
 
				++        ld1             {v4.4S, v5.4S},  [x3], #32
			
 
				++1:      subs            w4,  w4,  #8
			
 
				++        fmla            v4.4S,  v0.4S,  v2.4S
			
 
				++        fmla            v5.4S,  v1.4S,  v3.4S
			
 
				++        b.eq            2f
			
 
				++        ld1             {v0.4S, v1.4S},  [x1], #32
			
 
				++        ld1             {v2.4S, v3.4S},  [x2], #32
			
 
				++        st1             {v4.4S, v5.4S},  [x0], #32
			
 
				++        ld1             {v4.4S, v5.4S},  [x3], #32
			
 
				++        b               1b
			
 
				++2:      st1             {v4.4S, v5.4S},  [x0], #32
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_vector_fmul_reverse_neon, export=1
			
 
				++        sxtw            x3,  w3
			
 
				++        add             x2,  x2,  x3,  lsl #2
			
 
				++        sub             x2,  x2,  #32
			
 
				++        mov             x4, #-32
			
 
				++        ld1             {v2.4S, v3.4S},  [x2], x4
			
 
				++        ld1             {v0.4S, v1.4S},  [x1], #32
			
 
				++1:      subs            x3,  x3,  #8
			
 
				++        rev64           v3.4S,  v3.4S
			
 
				++        rev64           v2.4S,  v2.4S
			
 
				++        ext             v3.16B, v3.16B, v3.16B,  #8
			
 
				++        ext             v2.16B, v2.16B, v2.16B,  #8
			
 
				++        fmul            v16.4S, v0.4S,  v3.4S
			
 
				++        fmul            v17.4S, v1.4S,  v2.4S
			
 
				++        b.eq            2f
			
 
				++        ld1             {v2.4S, v3.4S},  [x2], x4
			
 
				++        ld1             {v0.4S, v1.4S},  [x1], #32
			
 
				++        st1             {v16.4S, v17.4S},  [x0], #32
			
 
				++        b               1b
			
 
				++2:      st1             {v16.4S, v17.4S},  [x0], #32
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_butterflies_float_neon, export=1
			
 
				++1:      ld1             {v0.4S}, [x0]
			
 
				++        ld1             {v1.4S}, [x1]
			
 
				++        subs            w2,  w2,  #4
			
 
				++        fsub            v2.4S,   v0.4S,  v1.4S
			
 
				++        fadd            v3.4S,   v0.4S,  v1.4S
			
 
				++        st1             {v2.4S}, [x1],   #16
			
 
				++        st1             {v3.4S}, [x0],   #16
			
 
				++        b.gt            1b
			
 
				++        ret
			
 
				++endfunc
			
 
				++
			
 
				++function ff_scalarproduct_float_neon, export=1
			
 
				++        movi            v2.4S,  #0
			
 
				++1:      ld1             {v0.4S}, [x0],   #16
			
 
				++        ld1             {v1.4S}, [x1],   #16
			
 
				++        subs            w2,      w2,     #4
			
 
				++        fmla            v2.4S,   v0.4S,  v1.4S
			
 
				++        b.gt            1b
			
 
				++        faddp           v0.4S,   v2.4S,  v2.4S
			
 
				++        faddp           s0,      v0.2S
			
 
				++        ret
			
 
				++endfunc
			
 
				+diff --git a/media/ffvpx/libavutil/aarch64/timer.h b/media/ffvpx/libavutil/aarch64/timer.h
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavutil/aarch64/timer.h
			
 
				+@@ -0,0 +1,44 @@
			
 
				++/*
			
 
				++ * Copyright (c) 2015 Janne Grunau <janne-libav@jannau.net>
			
 
				++ *
			
 
				++ * This file is part of FFmpeg.
			
 
				++ *
			
 
				++ * FFmpeg is free software; you can redistribute it and/or
			
 
				++ * modify it under the terms of the GNU Lesser General Public
			
 
				++ * License as published by the Free Software Foundation; either
			
 
				++ * version 2.1 of the License, or (at your option) any later version.
			
 
				++ *
			
 
				++ * FFmpeg is distributed in the hope that it will be useful,
			
 
				++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				++ * Lesser General Public License for more details.
			
 
				++ *
			
 
				++ * You should have received a copy of the GNU Lesser General Public
			
 
				++ * License along with FFmpeg; if not, write to the Free Software
			
 
				++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				++ */
			
 
				++
			
 
				++#ifndef AVUTIL_AARCH64_TIMER_H
			
 
				++#define AVUTIL_AARCH64_TIMER_H
			
 
				++
			
 
				++#include <stdint.h>
			
 
				++#include "config.h"
			
 
				++
			
 
				++#if HAVE_INLINE_ASM
			
 
				++
			
 
				++#define AV_READ_TIME read_time
			
 
				++
			
 
				++static inline uint64_t read_time(void)
			
 
				++{
			
 
				++    uint64_t cycle_counter;
			
 
				++    __asm__ volatile(
			
 
				++        "isb                   \t\n"
			
 
				++        "mrs %0, pmccntr_el0       "
			
 
				++        : "=r"(cycle_counter) :: "memory" );
			
 
				++
			
 
				++    return cycle_counter;
			
 
				++}
			
 
				++
			
 
				++#endif /* HAVE_INLINE_ASM */
			
 
				++
			
 
				++#endif /* AVUTIL_AARCH64_TIMER_H */
			
 
				+
			
--- a/mozilla-release/patches/1540760-6-68a1.patch
+++ b/mozilla-release/patches/1540760-6-68a1.patch
@@ -0,0 +1,1016 @@
 
				+# HG changeset patch
			
 
				+# User Dan Minor <dminor@mozilla.com>
			
 
				+# Date 1556723090 0
			
 
				+# Node ID d000d40067de32c45c46b39a413ad6a9d2949411
			
 
				+# Parent  a73351520d7b4e2532abbf7ea81767b1c778c0ce
			
 
				+Bug 1540760 - Build system changes for aarch64-win64 support in ffvpx; r=jya
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D27790
			
 
				+
			
 
				+diff --git a/media/ffvpx/config.h b/media/ffvpx/config.h
			
 
				+--- a/media/ffvpx/config.h
			
 
				++++ b/media/ffvpx/config.h
			
 
				+@@ -14,21 +14,25 @@
			
 
				+ #endif
			
 
				+ #else // MOZ_FFVPX_FLACONLY
			
 
				+ #if defined(XP_WIN)
			
 
				+ // Avoid conflicts with mozilla-config.h
			
 
				+ #if !defined(_MSC_VER)
			
 
				+ #undef HAVE_DIRENT_H
			
 
				+ #undef HAVE_UNISTD_H
			
 
				+ #endif
			
 
				++#if defined(_ARM64_)
			
 
				++#include "config_aarch64_win64.h"
			
 
				++#else
			
 
				+ #if defined(HAVE_64BIT_BUILD)
			
 
				+ #include "config_win64.h"
			
 
				+ #else
			
 
				+ #include "config_win32.h"
			
 
				+ #endif
			
 
				++#endif
			
 
				+ // Adjust configure defines for GCC
			
 
				+ #if !defined(_MSC_VER)
			
 
				+ #if !defined(HAVE_64BIT_BUILD)
			
 
				+ #undef HAVE_MM_EMPTY
			
 
				+ #define HAVE_MM_EMPTY 0
			
 
				+ #endif
			
 
				+ #undef HAVE_LIBC_MSVCRT
			
 
				+ #define HAVE_LIBC_MSVCRT 0
			
 
				+diff --git a/media/ffvpx/config_aarch64_win64.h b/media/ffvpx/config_aarch64_win64.h
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/config_aarch64_win64.h
			
 
				+@@ -0,0 +1,665 @@
			
 
				++/* Automatically generated by configure - do not modify! */
			
 
				++#ifndef FFMPEG_CONFIG_H
			
 
				++#define FFMPEG_CONFIG_H
			
 
				++#define FFMPEG_CONFIGURATION "--disable-everything --disable-protocols --disable-demuxers --disable-muxers --disable-filters --disable-programs --disable-doc --disable-parsers --enable-parser=vp8 --enable-parser=vp9 --enable-decoder=vp8 --enable-decoder=vp9 --disable-static --enable-shared --disable-debug --disable-sdl2 --disable-libxcb --disable-securetransport --disable-iconv --disable-swresample --disable-swscale --disable-avdevice --disable-avfilter --disable-avformat --disable-d3d11va --disable-dxva2 --disable-vaapi --disable-vdpau --disable-videotoolbox --enable-decoder=flac --enable-asm --enable-x86asm --toolchain=msvc --disable-cuda --disable-cuvid"
			
 
				++#define FFMPEG_LICENSE "LGPL version 2.1 or later"
			
 
				++#define CONFIG_THIS_YEAR 2018
			
 
				++#define FFMPEG_DATADIR "/usr/local/share/ffmpeg"
			
 
				++#define AVCONV_DATADIR "/usr/local/share/ffmpeg"
			
 
				++#define CC_IDENT "Microsoft (R) C/C++ Optimizing Compiler Version 19.15.26726 for x64"
			
 
				++#define av_restrict __restrict
			
 
				++#define EXTERN_PREFIX ""
			
 
				++#define EXTERN_ASM
			
 
				++#define BUILDSUF ""
			
 
				++#define SLIBSUF ".dll"
			
 
				++#define HAVE_MMX2 HAVE_MMXEXT
			
 
				++#define SWS_MAX_FILTER_SIZE 256
			
 
				++#define ARCH_AARCH64 1
			
 
				++#define ARCH_ALPHA 0
			
 
				++#define ARCH_ARM 0
			
 
				++#define ARCH_AVR32 0
			
 
				++#define ARCH_AVR32_AP 0
			
 
				++#define ARCH_AVR32_UC 0
			
 
				++#define ARCH_BFIN 0
			
 
				++#define ARCH_IA64 0
			
 
				++#define ARCH_M68K 0
			
 
				++#define ARCH_MIPS 0
			
 
				++#define ARCH_MIPS64 0
			
 
				++#define ARCH_PARISC 0
			
 
				++#define ARCH_PPC 0
			
 
				++#define ARCH_PPC64 0
			
 
				++#define ARCH_S390 0
			
 
				++#define ARCH_SH4 0
			
 
				++#define ARCH_SPARC 0
			
 
				++#define ARCH_SPARC64 0
			
 
				++#define ARCH_TILEGX 0
			
 
				++#define ARCH_TILEPRO 0
			
 
				++#define ARCH_TOMI 0
			
 
				++#define ARCH_X86 0
			
 
				++#define ARCH_X86_32 0
			
 
				++#define ARCH_X86_64 0
			
 
				++#define HAVE_ARMV5TE 0
			
 
				++#define HAVE_ARMV6 0
			
 
				++#define HAVE_ARMV6T2 0
			
 
				++#define HAVE_ARMV8 0
			
 
				++#define HAVE_NEON 0
			
 
				++#define HAVE_VFP 0
			
 
				++#define HAVE_VFPV3 0
			
 
				++#define HAVE_SETEND 0
			
 
				++#define HAVE_ALTIVEC 0
			
 
				++#define HAVE_DCBZL 0
			
 
				++#define HAVE_LDBRX 0
			
 
				++#define HAVE_POWER8 0
			
 
				++#define HAVE_PPC4XX 0
			
 
				++#define HAVE_VSX 0
			
 
				++#define HAVE_AESNI 0
			
 
				++#define HAVE_AMD3DNOW 0
			
 
				++#define HAVE_AMD3DNOWEXT 0
			
 
				++#define HAVE_AVX 1
			
 
				++#define HAVE_AVX2 1
			
 
				++#define HAVE_AVX512 1
			
 
				++#define HAVE_FMA3 1
			
 
				++#define HAVE_FMA4 1
			
 
				++#define HAVE_MMX 1
			
 
				++#define HAVE_MMXEXT 1
			
 
				++#define HAVE_SSE 1
			
 
				++#define HAVE_SSE2 1
			
 
				++#define HAVE_SSE3 1
			
 
				++#define HAVE_SSE4 1
			
 
				++#define HAVE_SSE42 1
			
 
				++#define HAVE_SSSE3 1
			
 
				++#define HAVE_XOP 1
			
 
				++#define HAVE_CPUNOP 0
			
 
				++#define HAVE_I686 1
			
 
				++#define HAVE_MIPSFPU 0
			
 
				++#define HAVE_MIPS32R2 0
			
 
				++#define HAVE_MIPS32R5 0
			
 
				++#define HAVE_MIPS64R2 0
			
 
				++#define HAVE_MIPS32R6 0
			
 
				++#define HAVE_MIPS64R6 0
			
 
				++#define HAVE_MIPSDSP 0
			
 
				++#define HAVE_MIPSDSPR2 0
			
 
				++#define HAVE_MSA 0
			
 
				++#define HAVE_LOONGSON2 0
			
 
				++#define HAVE_LOONGSON3 0
			
 
				++#define HAVE_MMI 0
			
 
				++#define HAVE_ARMV5TE_EXTERNAL 0
			
 
				++#define HAVE_ARMV6_EXTERNAL 0
			
 
				++#define HAVE_ARMV6T2_EXTERNAL 0
			
 
				++#define HAVE_ARMV8_EXTERNAL 0
			
 
				++#define HAVE_NEON_EXTERNAL 0
			
 
				++#define HAVE_VFP_EXTERNAL 0
			
 
				++#define HAVE_VFPV3_EXTERNAL 0
			
 
				++#define HAVE_SETEND_EXTERNAL 0
			
 
				++#define HAVE_ALTIVEC_EXTERNAL 0
			
 
				++#define HAVE_DCBZL_EXTERNAL 0
			
 
				++#define HAVE_LDBRX_EXTERNAL 0
			
 
				++#define HAVE_POWER8_EXTERNAL 0
			
 
				++#define HAVE_PPC4XX_EXTERNAL 0
			
 
				++#define HAVE_VSX_EXTERNAL 0
			
 
				++#define HAVE_AESNI_EXTERNAL 0
			
 
				++#define HAVE_AMD3DNOW_EXTERNAL 0
			
 
				++#define HAVE_AMD3DNOWEXT_EXTERNAL 0
			
 
				++#define HAVE_AVX_EXTERNAL 0
			
 
				++#define HAVE_AVX2_EXTERNAL 0
			
 
				++#define HAVE_AVX512_EXTERNAL 0
			
 
				++#define HAVE_FMA3_EXTERNAL 0
			
 
				++#define HAVE_FMA4_EXTERNAL 0
			
 
				++#define HAVE_MMX_EXTERNAL 0 
			
 
				++#define HAVE_MMXEXT_EXTERNAL 0
			
 
				++#define HAVE_SSE_EXTERNAL 0
			
 
				++#define HAVE_SSE2_EXTERNAL 0
			
 
				++#define HAVE_SSE3_EXTERNAL 0
			
 
				++#define HAVE_SSE4_EXTERNAL 0
			
 
				++#define HAVE_SSE42_EXTERNAL 0
			
 
				++#define HAVE_SSSE3_EXTERNAL 0
			
 
				++#define HAVE_XOP_EXTERNAL 0
			
 
				++#define HAVE_CPUNOP_EXTERNAL 0
			
 
				++#define HAVE_I686_EXTERNAL 0
			
 
				++#define HAVE_MIPSFPU_EXTERNAL 0
			
 
				++#define HAVE_MIPS32R2_EXTERNAL 0
			
 
				++#define HAVE_MIPS32R5_EXTERNAL 0
			
 
				++#define HAVE_MIPS64R2_EXTERNAL 0
			
 
				++#define HAVE_MIPS32R6_EXTERNAL 0
			
 
				++#define HAVE_MIPS64R6_EXTERNAL 0
			
 
				++#define HAVE_MIPSDSP_EXTERNAL 0
			
 
				++#define HAVE_MIPSDSPR2_EXTERNAL 0
			
 
				++#define HAVE_MSA_EXTERNAL 0
			
 
				++#define HAVE_LOONGSON2_EXTERNAL 0
			
 
				++#define HAVE_LOONGSON3_EXTERNAL 0
			
 
				++#define HAVE_MMI_EXTERNAL 0
			
 
				++#define HAVE_ARMV5TE_INLINE 0
			
 
				++#define HAVE_ARMV6_INLINE 0
			
 
				++#define HAVE_ARMV6T2_INLINE 0
			
 
				++#define HAVE_ARMV8_INLINE 0
			
 
				++#define HAVE_NEON_INLINE 1
			
 
				++#define HAVE_VFP_INLINE 0
			
 
				++#define HAVE_VFPV3_INLINE 0
			
 
				++#define HAVE_SETEND_INLINE 0
			
 
				++#define HAVE_ALTIVEC_INLINE 0
			
 
				++#define HAVE_DCBZL_INLINE 0
			
 
				++#define HAVE_LDBRX_INLINE 0
			
 
				++#define HAVE_POWER8_INLINE 0
			
 
				++#define HAVE_PPC4XX_INLINE 0
			
 
				++#define HAVE_VSX_INLINE 0
			
 
				++#define HAVE_AESNI_INLINE 0
			
 
				++#define HAVE_AMD3DNOW_INLINE 0
			
 
				++#define HAVE_AMD3DNOWEXT_INLINE 0
			
 
				++#define HAVE_AVX_INLINE 0
			
 
				++#define HAVE_AVX2_INLINE 0
			
 
				++#define HAVE_AVX512_INLINE 0
			
 
				++#define HAVE_FMA3_INLINE 0
			
 
				++#define HAVE_FMA4_INLINE 0
			
 
				++#define HAVE_MMX_INLINE 0
			
 
				++#define HAVE_MMXEXT_INLINE 0
			
 
				++#define HAVE_SSE_INLINE 0
			
 
				++#define HAVE_SSE2_INLINE 0
			
 
				++#define HAVE_SSE3_INLINE 0
			
 
				++#define HAVE_SSE4_INLINE 0
			
 
				++#define HAVE_SSE42_INLINE 0
			
 
				++#define HAVE_SSSE3_INLINE 0
			
 
				++#define HAVE_XOP_INLINE 0
			
 
				++#define HAVE_CPUNOP_INLINE 0
			
 
				++#define HAVE_I686_INLINE 0
			
 
				++#define HAVE_MIPSFPU_INLINE 0
			
 
				++#define HAVE_MIPS32R2_INLINE 0
			
 
				++#define HAVE_MIPS32R5_INLINE 0
			
 
				++#define HAVE_MIPS64R2_INLINE 0
			
 
				++#define HAVE_MIPS32R6_INLINE 0
			
 
				++#define HAVE_MIPS64R6_INLINE 0
			
 
				++#define HAVE_MIPSDSP_INLINE 0
			
 
				++#define HAVE_MIPSDSPR2_INLINE 0
			
 
				++#define HAVE_MSA_INLINE 0
			
 
				++#define HAVE_LOONGSON2_INLINE 0
			
 
				++#define HAVE_LOONGSON3_INLINE 0
			
 
				++#define HAVE_MMI_INLINE 0
			
 
				++#define HAVE_ALIGNED_STACK 1
			
 
				++#define HAVE_FAST_64BIT 1
			
 
				++#define HAVE_FAST_CLZ 0
			
 
				++#define HAVE_FAST_CMOV 1
			
 
				++#define HAVE_LOCAL_ALIGNED 1
			
 
				++#define HAVE_SIMD_ALIGN_16 1
			
 
				++#define HAVE_SIMD_ALIGN_32 1
			
 
				++#define HAVE_SIMD_ALIGN_64 1
			
 
				++#define HAVE_ATOMIC_CAS_PTR 0
			
 
				++#define HAVE_MACHINE_RW_BARRIER 0
			
 
				++#define HAVE_MEMORYBARRIER 1
			
 
				++#define HAVE_MM_EMPTY 0
			
 
				++#define HAVE_RDTSC 1
			
 
				++#define HAVE_SEM_TIMEDWAIT 0
			
 
				++#define HAVE_SYNC_VAL_COMPARE_AND_SWAP 0
			
 
				++#define HAVE_CABS 0
			
 
				++#define HAVE_CEXP 0
			
 
				++#define HAVE_INLINE_ASM 0
			
 
				++#define HAVE_SYMVER 0
			
 
				++#define HAVE_X86ASM 0
			
 
				++#define HAVE_BIGENDIAN 0
			
 
				++#define HAVE_FAST_UNALIGNED 1
			
 
				++#define HAVE_ARPA_INET_H 0
			
 
				++#define HAVE_ASM_TYPES_H 0
			
 
				++#define HAVE_CDIO_PARANOIA_H 0
			
 
				++#define HAVE_CDIO_PARANOIA_PARANOIA_H 0
			
 
				++#define HAVE_CUDA_H 0
			
 
				++#define HAVE_DISPATCH_DISPATCH_H 0
			
 
				++#define HAVE_DEV_BKTR_IOCTL_BT848_H 0
			
 
				++#define HAVE_DEV_BKTR_IOCTL_METEOR_H 0
			
 
				++#define HAVE_DEV_IC_BT8XX_H 0
			
 
				++#define HAVE_DEV_VIDEO_BKTR_IOCTL_BT848_H 0
			
 
				++#define HAVE_DEV_VIDEO_METEOR_IOCTL_METEOR_H 0
			
 
				++#define HAVE_DIRECT_H 1
			
 
				++#define HAVE_DIRENT_H 0
			
 
				++#define HAVE_DXGIDEBUG_H 1
			
 
				++#define HAVE_DXVA_H 1
			
 
				++#define HAVE_ES2_GL_H 0
			
 
				++#define HAVE_GSM_H 0
			
 
				++#define HAVE_IO_H 1
			
 
				++#define HAVE_LINUX_PERF_EVENT_H 0
			
 
				++#define HAVE_MACHINE_IOCTL_BT848_H 0
			
 
				++#define HAVE_MACHINE_IOCTL_METEOR_H 0
			
 
				++#define HAVE_OPENCV2_CORE_CORE_C_H 0
			
 
				++#define HAVE_OPENGL_GL3_H 0
			
 
				++#define HAVE_POLL_H 0
			
 
				++#define HAVE_SYS_PARAM_H 0
			
 
				++#define HAVE_SYS_RESOURCE_H 0
			
 
				++#define HAVE_SYS_SELECT_H 0
			
 
				++#define HAVE_SYS_SOUNDCARD_H 0
			
 
				++#define HAVE_SYS_TIME_H 0
			
 
				++#define HAVE_SYS_UN_H 0
			
 
				++#define HAVE_SYS_VIDEOIO_H 0
			
 
				++#define HAVE_TERMIOS_H 0
			
 
				++#define HAVE_UDPLITE_H 0
			
 
				++#define HAVE_UNISTD_H 0
			
 
				++#define HAVE_VALGRIND_VALGRIND_H 0
			
 
				++#define HAVE_WINDOWS_H 1
			
 
				++#define HAVE_WINSOCK2_H 1
			
 
				++#define HAVE_INTRINSICS_NEON 0
			
 
				++#define HAVE_ATANF 1
			
 
				++#define HAVE_ATAN2F 1
			
 
				++#define HAVE_CBRT 1
			
 
				++#define HAVE_CBRTF 1
			
 
				++#define HAVE_COPYSIGN 1
			
 
				++#define HAVE_COSF 1
			
 
				++#define HAVE_ERF 1
			
 
				++#define HAVE_EXP2 1
			
 
				++#define HAVE_EXP2F 1
			
 
				++#define HAVE_EXPF 1
			
 
				++#define HAVE_HYPOT 1
			
 
				++#define HAVE_ISFINITE 1
			
 
				++#define HAVE_ISINF 1
			
 
				++#define HAVE_ISNAN 1
			
 
				++#define HAVE_LDEXPF 1
			
 
				++#define HAVE_LLRINT 1
			
 
				++#define HAVE_LLRINTF 1
			
 
				++#define HAVE_LOG2 1
			
 
				++#define HAVE_LOG2F 1
			
 
				++#define HAVE_LOG10F 1
			
 
				++#define HAVE_LRINT 1
			
 
				++#define HAVE_LRINTF 1
			
 
				++#define HAVE_POWF 1
			
 
				++#define HAVE_RINT 1
			
 
				++#define HAVE_ROUND 1
			
 
				++#define HAVE_ROUNDF 1
			
 
				++#define HAVE_SINF 1
			
 
				++#define HAVE_TRUNC 1
			
 
				++#define HAVE_TRUNCF 1
			
 
				++#define HAVE_DOS_PATHS 1
			
 
				++#define HAVE_LIBC_MSVCRT 1
			
 
				++#define HAVE_MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS 0
			
 
				++#define HAVE_SECTION_DATA_REL_RO 0
			
 
				++#define HAVE_THREADS 1
			
 
				++#define HAVE_UWP 0
			
 
				++#define HAVE_WINRT 0
			
 
				++#define HAVE_ACCESS 1
			
 
				++#define HAVE_ALIGNED_MALLOC 1
			
 
				++#define HAVE_CLOCK_GETTIME 0
			
 
				++#define HAVE_CLOSESOCKET 1
			
 
				++#define HAVE_COMMANDLINETOARGVW 1
			
 
				++#define HAVE_FCNTL 0
			
 
				++#define HAVE_GETADDRINFO 1
			
 
				++#define HAVE_GETHRTIME 0
			
 
				++#define HAVE_GETOPT 0
			
 
				++#define HAVE_GETPROCESSAFFINITYMASK 1
			
 
				++#define HAVE_GETPROCESSMEMORYINFO 1
			
 
				++#define HAVE_GETPROCESSTIMES 1
			
 
				++#define HAVE_GETRUSAGE 0
			
 
				++#define HAVE_GETSYSTEMTIMEASFILETIME 1
			
 
				++#define HAVE_GETTIMEOFDAY 0
			
 
				++#define HAVE_GLOB 0
			
 
				++#define HAVE_GLXGETPROCADDRESS 0
			
 
				++#define HAVE_GMTIME_R 0
			
 
				++#define HAVE_INET_ATON 0
			
 
				++#define HAVE_ISATTY 1
			
 
				++#define HAVE_KBHIT 1
			
 
				++#define HAVE_LSTAT 0
			
 
				++#define HAVE_LZO1X_999_COMPRESS 0
			
 
				++#define HAVE_MACH_ABSOLUTE_TIME 0
			
 
				++#define HAVE_MAPVIEWOFFILE 1
			
 
				++#define HAVE_MKSTEMP 0
			
 
				++#define HAVE_MMAP 0
			
 
				++#define HAVE_MPROTECT 0
			
 
				++#define HAVE_NANOSLEEP 0
			
 
				++#define HAVE_PEEKNAMEDPIPE 1
			
 
				++#define HAVE_PTHREAD_CANCEL 0
			
 
				++#define HAVE_SCHED_GETAFFINITY 0
			
 
				++#define HAVE_SECITEMIMPORT 0
			
 
				++#define HAVE_SETCONSOLETEXTATTRIBUTE 1
			
 
				++#define HAVE_SETCONSOLECTRLHANDLER 1
			
 
				++#define HAVE_SETMODE 1
			
 
				++#define HAVE_SETRLIMIT 0
			
 
				++#define HAVE_SLEEP 1
			
 
				++#define HAVE_STRERROR_R 0
			
 
				++#define HAVE_SYSCONF 0
			
 
				++#define HAVE_SYSCTL 0
			
 
				++#define HAVE_USLEEP 0
			
 
				++#define HAVE_UTGETOSTYPEFROMSTRING 0
			
 
				++#define HAVE_VIRTUALALLOC 1
			
 
				++#define HAVE_WGLGETPROCADDRESS 0
			
 
				++#define HAVE_BCRYPT 1
			
 
				++#define HAVE_VAAPI_DRM 0
			
 
				++#define HAVE_VAAPI_X11 0
			
 
				++#define HAVE_VDPAU_X11 0
			
 
				++#define HAVE_PTHREADS 0
			
 
				++#define HAVE_OS2THREADS 0
			
 
				++#define HAVE_W32THREADS 1
			
 
				++#define HAVE_AS_ARCH_DIRECTIVE 0
			
 
				++#define HAVE_AS_DN_DIRECTIVE 0
			
 
				++#define HAVE_AS_FPU_DIRECTIVE 0
			
 
				++#define HAVE_AS_FUNC 0
			
 
				++#define HAVE_AS_OBJECT_ARCH 0
			
 
				++#define HAVE_ASM_MOD_Q 0
			
 
				++#define HAVE_BLOCKS_EXTENSION 0
			
 
				++#define HAVE_EBP_AVAILABLE 0
			
 
				++#define HAVE_EBX_AVAILABLE 0
			
 
				++#define HAVE_GNU_AS 0
			
 
				++#define HAVE_GNU_WINDRES 0
			
 
				++#define HAVE_IBM_ASM 0
			
 
				++#define HAVE_INLINE_ASM_DIRECT_SYMBOL_REFS 0
			
 
				++#define HAVE_INLINE_ASM_LABELS 0
			
 
				++#define HAVE_INLINE_ASM_NONLOCAL_LABELS 0
			
 
				++#define HAVE_PRAGMA_DEPRECATED 1
			
 
				++#define HAVE_RSYNC_CONTIMEOUT 0
			
 
				++#define HAVE_SYMVER_ASM_LABEL 0
			
 
				++#define HAVE_SYMVER_GNU_ASM 0
			
 
				++#define HAVE_VFP_ARGS 0
			
 
				++#define HAVE_XFORM_ASM 0
			
 
				++#define HAVE_XMM_CLOBBERS 0
			
 
				++#define HAVE_KCMVIDEOCODECTYPE_HEVC 0
			
 
				++#define HAVE_SOCKLEN_T 1
			
 
				++#define HAVE_STRUCT_ADDRINFO 1
			
 
				++#define HAVE_STRUCT_GROUP_SOURCE_REQ 1
			
 
				++#define HAVE_STRUCT_IP_MREQ_SOURCE 1
			
 
				++#define HAVE_STRUCT_IPV6_MREQ 1
			
 
				++#define HAVE_STRUCT_MSGHDR_MSG_FLAGS 0
			
 
				++#define HAVE_STRUCT_POLLFD 1
			
 
				++#define HAVE_STRUCT_RUSAGE_RU_MAXRSS 0
			
 
				++#define HAVE_STRUCT_SCTP_EVENT_SUBSCRIBE 0
			
 
				++#define HAVE_STRUCT_SOCKADDR_IN6 1
			
 
				++#define HAVE_STRUCT_SOCKADDR_SA_LEN 0
			
 
				++#define HAVE_STRUCT_SOCKADDR_STORAGE 1
			
 
				++#define HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC 0
			
 
				++#define HAVE_STRUCT_V4L2_FRMIVALENUM_DISCRETE 0
			
 
				++#define HAVE_MAKEINFO 1
			
 
				++#define HAVE_MAKEINFO_HTML 0
			
 
				++#define HAVE_OPENCL_D3D11 0
			
 
				++#define HAVE_OPENCL_DRM_ARM 0
			
 
				++#define HAVE_OPENCL_DRM_BEIGNET 0
			
 
				++#define HAVE_OPENCL_DXVA2 0
			
 
				++#define HAVE_OPENCL_VAAPI_BEIGNET 0
			
 
				++#define HAVE_OPENCL_VAAPI_INTEL_MEDIA 0
			
 
				++#define HAVE_PERL 1
			
 
				++#define HAVE_POD2MAN 1
			
 
				++#define HAVE_TEXI2HTML 0
			
 
				++#define CONFIG_DOC 0
			
 
				++#define CONFIG_HTMLPAGES 0
			
 
				++#define CONFIG_MANPAGES 1
			
 
				++#define CONFIG_PODPAGES 1
			
 
				++#define CONFIG_TXTPAGES 1
			
 
				++#define CONFIG_AVIO_DIR_CMD_EXAMPLE 1
			
 
				++#define CONFIG_AVIO_READING_EXAMPLE 1
			
 
				++#define CONFIG_DECODE_AUDIO_EXAMPLE 1
			
 
				++#define CONFIG_DECODE_VIDEO_EXAMPLE 1
			
 
				++#define CONFIG_DEMUXING_DECODING_EXAMPLE 0
			
 
				++#define CONFIG_ENCODE_AUDIO_EXAMPLE 1
			
 
				++#define CONFIG_ENCODE_VIDEO_EXAMPLE 1
			
 
				++#define CONFIG_EXTRACT_MVS_EXAMPLE 0
			
 
				++#define CONFIG_FILTER_AUDIO_EXAMPLE 0
			
 
				++#define CONFIG_FILTERING_AUDIO_EXAMPLE 0
			
 
				++#define CONFIG_FILTERING_VIDEO_EXAMPLE 0
			
 
				++#define CONFIG_HTTP_MULTICLIENT_EXAMPLE 0
			
 
				++#define CONFIG_HW_DECODE_EXAMPLE 0
			
 
				++#define CONFIG_METADATA_EXAMPLE 0
			
 
				++#define CONFIG_MUXING_EXAMPLE 0
			
 
				++#define CONFIG_QSVDEC_EXAMPLE 0
			
 
				++#define CONFIG_REMUXING_EXAMPLE 0
			
 
				++#define CONFIG_RESAMPLING_AUDIO_EXAMPLE 0
			
 
				++#define CONFIG_SCALING_VIDEO_EXAMPLE 0
			
 
				++#define CONFIG_TRANSCODE_AAC_EXAMPLE 0
			
 
				++#define CONFIG_TRANSCODING_EXAMPLE 0
			
 
				++#define CONFIG_VAAPI_ENCODE_EXAMPLE 0
			
 
				++#define CONFIG_VAAPI_TRANSCODE_EXAMPLE 0
			
 
				++#define CONFIG_AVISYNTH 0
			
 
				++#define CONFIG_FREI0R 0
			
 
				++#define CONFIG_LIBCDIO 0
			
 
				++#define CONFIG_LIBDAVS2 0
			
 
				++#define CONFIG_LIBRUBBERBAND 0
			
 
				++#define CONFIG_LIBVIDSTAB 0
			
 
				++#define CONFIG_LIBX264 0
			
 
				++#define CONFIG_LIBX265 0
			
 
				++#define CONFIG_LIBXAVS 0
			
 
				++#define CONFIG_LIBXVID 0
			
 
				++#define CONFIG_DECKLINK 0
			
 
				++#define CONFIG_LIBNDI_NEWTEK 0
			
 
				++#define CONFIG_LIBFDK_AAC 0
			
 
				++#define CONFIG_OPENSSL 0
			
 
				++#define CONFIG_LIBTLS 0
			
 
				++#define CONFIG_GMP 0
			
 
				++#define CONFIG_LIBLENSFUN 0
			
 
				++#define CONFIG_LIBOPENCORE_AMRNB 0
			
 
				++#define CONFIG_LIBOPENCORE_AMRWB 0
			
 
				++#define CONFIG_LIBVMAF 0
			
 
				++#define CONFIG_LIBVO_AMRWBENC 0
			
 
				++#define CONFIG_MBEDTLS 0
			
 
				++#define CONFIG_RKMPP 0
			
 
				++#define CONFIG_LIBSMBCLIENT 0
			
 
				++#define CONFIG_CHROMAPRINT 0
			
 
				++#define CONFIG_GCRYPT 0
			
 
				++#define CONFIG_GNUTLS 0
			
 
				++#define CONFIG_JNI 0
			
 
				++#define CONFIG_LADSPA 0
			
 
				++#define CONFIG_LIBAOM 0
			
 
				++#define CONFIG_LIBASS 0
			
 
				++#define CONFIG_LIBBLURAY 0
			
 
				++#define CONFIG_LIBBS2B 0
			
 
				++#define CONFIG_LIBCACA 0
			
 
				++#define CONFIG_LIBCELT 0
			
 
				++#define CONFIG_LIBCODEC2 0
			
 
				++#define CONFIG_LIBDC1394 0
			
 
				++#define CONFIG_LIBDRM 0
			
 
				++#define CONFIG_LIBFLITE 0
			
 
				++#define CONFIG_LIBFONTCONFIG 0
			
 
				++#define CONFIG_LIBFREETYPE 0
			
 
				++#define CONFIG_LIBFRIBIDI 0
			
 
				++#define CONFIG_LIBGME 0
			
 
				++#define CONFIG_LIBGSM 0
			
 
				++#define CONFIG_LIBIEC61883 0
			
 
				++#define CONFIG_LIBILBC 0
			
 
				++#define CONFIG_LIBJACK 0
			
 
				++#define CONFIG_LIBKVAZAAR 0
			
 
				++#define CONFIG_LIBMODPLUG 0
			
 
				++#define CONFIG_LIBMP3LAME 0
			
 
				++#define CONFIG_LIBMYSOFA 0
			
 
				++#define CONFIG_LIBOPENCV 0
			
 
				++#define CONFIG_LIBOPENH264 0
			
 
				++#define CONFIG_LIBOPENJPEG 0
			
 
				++#define CONFIG_LIBOPENMPT 0
			
 
				++#define CONFIG_LIBOPUS 0
			
 
				++#define CONFIG_LIBPULSE 0
			
 
				++#define CONFIG_LIBRSVG 0
			
 
				++#define CONFIG_LIBRTMP 0
			
 
				++#define CONFIG_LIBSHINE 0
			
 
				++#define CONFIG_LIBSMBCLIENT 0
			
 
				++#define CONFIG_LIBSNAPPY 0
			
 
				++#define CONFIG_LIBSOXR 0
			
 
				++#define CONFIG_LIBSPEEX 0
			
 
				++#define CONFIG_LIBSRT 0
			
 
				++#define CONFIG_LIBSSH 0
			
 
				++#define CONFIG_LIBTENSORFLOW 0
			
 
				++#define CONFIG_LIBTESSERACT 0
			
 
				++#define CONFIG_LIBTHEORA 0
			
 
				++#define CONFIG_LIBTWOLAME 0
			
 
				++#define CONFIG_LIBV4L2 0
			
 
				++#define CONFIG_LIBVORBIS 0
			
 
				++#define CONFIG_LIBVPX 0
			
 
				++#define CONFIG_LIBWAVPACK 0
			
 
				++#define CONFIG_LIBWEBP 0
			
 
				++#define CONFIG_LIBXML2 0
			
 
				++#define CONFIG_LIBZIMG 0
			
 
				++#define CONFIG_LIBZMQ 0
			
 
				++#define CONFIG_LIBZVBI 0
			
 
				++#define CONFIG_LV2 0
			
 
				++#define CONFIG_MEDIACODEC 0
			
 
				++#define CONFIG_OPENAL 0
			
 
				++#define CONFIG_OPENGL 0
			
 
				++#define CONFIG_VAPOURSYNTH 0
			
 
				++#define CONFIG_ALSA 0
			
 
				++#define CONFIG_APPKIT 0
			
 
				++#define CONFIG_AVFOUNDATION 0
			
 
				++#define CONFIG_BZLIB 0
			
 
				++#define CONFIG_COREIMAGE 0
			
 
				++#define CONFIG_ICONV 0
			
 
				++#define CONFIG_LIBXCB 0
			
 
				++#define CONFIG_LIBXCB_SHM 0
			
 
				++#define CONFIG_LIBXCB_SHAPE 0
			
 
				++#define CONFIG_LIBXCB_XFIXES 0
			
 
				++#define CONFIG_LZMA 0
			
 
				++#define CONFIG_SCHANNEL 1
			
 
				++#define CONFIG_SDL2 0
			
 
				++#define CONFIG_SECURETRANSPORT 0
			
 
				++#define CONFIG_SNDIO 0
			
 
				++#define CONFIG_XLIB 0
			
 
				++#define CONFIG_ZLIB 0
			
 
				++#define CONFIG_CUDA_SDK 0
			
 
				++#define CONFIG_LIBNPP 0
			
 
				++#define CONFIG_LIBMFX 0
			
 
				++#define CONFIG_MMAL 0
			
 
				++#define CONFIG_OMX 0
			
 
				++#define CONFIG_OPENCL 0
			
 
				++#define CONFIG_AMF 0
			
 
				++#define CONFIG_AUDIOTOOLBOX 0
			
 
				++#define CONFIG_CRYSTALHD 0
			
 
				++#define CONFIG_CUDA 0
			
 
				++#define CONFIG_CUVID 0
			
 
				++#define CONFIG_D3D11VA 0
			
 
				++#define CONFIG_DXVA2 0
			
 
				++#define CONFIG_FFNVCODEC 0
			
 
				++#define CONFIG_NVDEC 0
			
 
				++#define CONFIG_NVENC 0
			
 
				++#define CONFIG_VAAPI 0
			
 
				++#define CONFIG_VDPAU 0
			
 
				++#define CONFIG_VIDEOTOOLBOX 0
			
 
				++#define CONFIG_V4L2_M2M 0
			
 
				++#define CONFIG_XVMC 0
			
 
				++#define CONFIG_FTRAPV 0
			
 
				++#define CONFIG_GRAY 0
			
 
				++#define CONFIG_HARDCODED_TABLES 0
			
 
				++#define CONFIG_OMX_RPI 0
			
 
				++#define CONFIG_RUNTIME_CPUDETECT 1
			
 
				++#define CONFIG_SAFE_BITSTREAM_READER 1
			
 
				++#define CONFIG_SHARED 1
			
 
				++#define CONFIG_SMALL 0
			
 
				++#define CONFIG_STATIC 0
			
 
				++#define CONFIG_SWSCALE_ALPHA 1
			
 
				++#define CONFIG_GPL 0
			
 
				++#define CONFIG_NONFREE 0
			
 
				++#define CONFIG_VERSION3 0
			
 
				++#define CONFIG_AVDEVICE 0
			
 
				++#define CONFIG_AVFILTER 0
			
 
				++#define CONFIG_SWSCALE 0
			
 
				++#define CONFIG_POSTPROC 0
			
 
				++#define CONFIG_AVFORMAT 0
			
 
				++#define CONFIG_AVCODEC 1
			
 
				++#define CONFIG_SWRESAMPLE 0
			
 
				++#define CONFIG_AVRESAMPLE 0
			
 
				++#define CONFIG_AVUTIL 1
			
 
				++#define CONFIG_FFPLAY 0
			
 
				++#define CONFIG_FFPROBE 0
			
 
				++#define CONFIG_FFMPEG 0
			
 
				++#define CONFIG_DCT 0
			
 
				++#define CONFIG_DWT 0
			
 
				++#define CONFIG_ERROR_RESILIENCE 0
			
 
				++#define CONFIG_FAAN 1
			
 
				++#define CONFIG_FAST_UNALIGNED 1
			
 
				++#define CONFIG_FFT 0
			
 
				++#define CONFIG_LSP 0
			
 
				++#define CONFIG_LZO 0
			
 
				++#define CONFIG_MDCT 0
			
 
				++#define CONFIG_PIXELUTILS 0
			
 
				++#define CONFIG_NETWORK 0
			
 
				++#define CONFIG_RDFT 0
			
 
				++#define CONFIG_AUTODETECT 0
			
 
				++#define CONFIG_FONTCONFIG 0
			
 
				++#define CONFIG_LINUX_PERF 0
			
 
				++#define CONFIG_MEMORY_POISONING 0
			
 
				++#define CONFIG_NEON_CLOBBER_TEST 0
			
 
				++#define CONFIG_OSSFUZZ 0
			
 
				++#define CONFIG_PIC 1
			
 
				++#define CONFIG_THUMB 0
			
 
				++#define CONFIG_VALGRIND_BACKTRACE 0
			
 
				++#define CONFIG_XMM_CLOBBER_TEST 0
			
 
				++#define CONFIG_BSFS 1
			
 
				++#define CONFIG_DECODERS 1
			
 
				++#define CONFIG_PARSERS 1
			
 
				++#define CONFIG_AANDCTTABLES 0
			
 
				++#define CONFIG_AC3DSP 0
			
 
				++#define CONFIG_ADTS_HEADER 0
			
 
				++#define CONFIG_AUDIO_FRAME_QUEUE 0
			
 
				++#define CONFIG_AUDIODSP 0
			
 
				++#define CONFIG_BLOCKDSP 0
			
 
				++#define CONFIG_BSWAPDSP 0
			
 
				++#define CONFIG_CABAC 0
			
 
				++#define CONFIG_CBS 0
			
 
				++#define CONFIG_CBS_H264 0
			
 
				++#define CONFIG_CBS_H265 0
			
 
				++#define CONFIG_CBS_MPEG2 0
			
 
				++#define CONFIG_CBS_VP9 0
			
 
				++#define CONFIG_DIRAC_PARSE 0
			
 
				++#define CONFIG_DNN 0
			
 
				++#define CONFIG_DVPROFILE 0
			
 
				++#define CONFIG_EXIF 0
			
 
				++#define CONFIG_FAANDCT 1
			
 
				++#define CONFIG_FAANIDCT 1
			
 
				++#define CONFIG_FDCTDSP 1
			
 
				++#define CONFIG_FLACDSP 1
			
 
				++#define CONFIG_FMTCONVERT 0
			
 
				++#define CONFIG_G722DSP 0
			
 
				++#define CONFIG_GOLOMB 0
			
 
				++#define CONFIG_GPLV3 0
			
 
				++#define CONFIG_H263DSP 0
			
 
				++#define CONFIG_H264CHROMA 0
			
 
				++#define CONFIG_H264DSP 0
			
 
				++#define CONFIG_H264PARSE 0
			
 
				++#define CONFIG_H264PRED 1
			
 
				++#define CONFIG_H264QPEL 0
			
 
				++#define CONFIG_HEVCPARSE 0
			
 
				++#define CONFIG_HPELDSP 0
			
 
				++#define CONFIG_HUFFMAN 0
			
 
				++#define CONFIG_HUFFYUVDSP 0
			
 
				++#define CONFIG_HUFFYUVENCDSP 0
			
 
				++#define CONFIG_IDCTDSP 1
			
 
				++#define CONFIG_IIRFILTER 0
			
 
				++#define CONFIG_MDCT15 0
			
 
				++#define CONFIG_INTRAX8 0
			
 
				++#define CONFIG_ISO_MEDIA 0
			
 
				++#define CONFIG_IVIDSP 0
			
 
				++#define CONFIG_JPEGTABLES 0
			
 
				++#define CONFIG_LGPLV3 0
			
 
				++#define CONFIG_LIBX262 0
			
 
				++#define CONFIG_LLAUDDSP 0
			
 
				++#define CONFIG_LLVIDDSP 0
			
 
				++#define CONFIG_LLVIDENCDSP 0
			
 
				++#define CONFIG_LPC 0
			
 
				++#define CONFIG_LZF 0
			
 
				++#define CONFIG_ME_CMP 0
			
 
				++#define CONFIG_MPEG_ER 0
			
 
				++#define CONFIG_MPEGAUDIO 0
			
 
				++#define CONFIG_MPEGAUDIODSP 0
			
 
				++#define CONFIG_MPEGAUDIOHEADER 0
			
 
				++#define CONFIG_MPEGVIDEO 0
			
 
				++#define CONFIG_MPEGVIDEOENC 0
			
 
				++#define CONFIG_MSS34DSP 0
			
 
				++#define CONFIG_PIXBLOCKDSP 0
			
 
				++#define CONFIG_QPELDSP 0
			
 
				++#define CONFIG_QSV 0
			
 
				++#define CONFIG_QSVDEC 0
			
 
				++#define CONFIG_QSVENC 0
			
 
				++#define CONFIG_QSVVPP 0
			
 
				++#define CONFIG_RANGECODER 0
			
 
				++#define CONFIG_RIFFDEC 0
			
 
				++#define CONFIG_RIFFENC 0
			
 
				++#define CONFIG_RTPDEC 0
			
 
				++#define CONFIG_RTPENC_CHAIN 0
			
 
				++#define CONFIG_RV34DSP 0
			
 
				++#define CONFIG_SINEWIN 0
			
 
				++#define CONFIG_SNAPPY 0
			
 
				++#define CONFIG_SRTP 0
			
 
				++#define CONFIG_STARTCODE 0
			
 
				++#define CONFIG_TEXTUREDSP 0
			
 
				++#define CONFIG_TEXTUREDSPENC 0
			
 
				++#define CONFIG_TPELDSP 0
			
 
				++#define CONFIG_VAAPI_1 0
			
 
				++#define CONFIG_VAAPI_ENCODE 0
			
 
				++#define CONFIG_VC1DSP 0
			
 
				++#define CONFIG_VIDEODSP 1
			
 
				++#define CONFIG_VP3DSP 0
			
 
				++#define CONFIG_VP56DSP 0
			
 
				++#define CONFIG_VP8DSP 1
			
 
				++#define CONFIG_WMA_FREQS 0
			
 
				++#define CONFIG_WMV2DSP 0
			
 
				++#define CONFIG_NULL_BSF 1
			
 
				++#define CONFIG_VP9_SUPERFRAME_SPLIT_BSF 1
			
 
				++#define CONFIG_VP8_DECODER 1
			
 
				++#define CONFIG_VP9_DECODER 1
			
 
				++#define CONFIG_FLAC_DECODER 1
			
 
				++#define CONFIG_VP8_PARSER 1
			
 
				++#define CONFIG_VP9_PARSER 1
			
 
				++#endif /* FFMPEG_CONFIG_H */
			
 
				+diff --git a/media/ffvpx/ffvpxcommon.mozbuild b/media/ffvpx/ffvpxcommon.mozbuild
			
 
				+--- a/media/ffvpx/ffvpxcommon.mozbuild
			
 
				++++ b/media/ffvpx/ffvpxcommon.mozbuild
			
 
				+@@ -1,19 +1,20 @@
			
 
				+ # -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
			
 
				+ # vim: set filetype=python:
			
 
				+ # This Source Code Form is subject to the terms of the Mozilla Public
			
 
				+ # License, v. 2.0. If a copy of the MPL was not distributed with this
			
 
				+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
			
 
				+ 
			
 
				+ # Add assembler flags and includes
			
 
				+-ASFLAGS += CONFIG['FFVPX_ASFLAGS']
			
 
				+-ASFLAGS += ['-I%s/media/ffvpx/' % TOPSRCDIR]
			
 
				+-ASFLAGS += ['-I%s/media/ffvpx/libavcodec/x86/' % TOPSRCDIR]
			
 
				+-ASFLAGS += ['-I%s/media/ffvpx/libavutil/x86/' % TOPSRCDIR]
			
 
				++if CONFIG['CPU_ARCH'] != 'aarch64':
			
 
				++    ASFLAGS += CONFIG['FFVPX_ASFLAGS']
			
 
				++    ASFLAGS += ['-I%s/media/ffvpx/' % TOPSRCDIR]
			
 
				++    ASFLAGS += ['-I%s/media/ffvpx/libavcodec/x86/' % TOPSRCDIR]
			
 
				++    ASFLAGS += ['-I%s/media/ffvpx/libavutil/x86/' % TOPSRCDIR]
			
 
				+ 
			
 
				+ if CONFIG['FFVPX_ASFLAGS']:
			
 
				+     if CONFIG['FFVPX_USE_YASM']:
			
 
				+         USE_YASM = True
			
 
				+ 
			
 
				+     if CONFIG['OS_ARCH'] == 'WINNT':
			
 
				+        # Fix inline symbols and math defines for windows.
			
 
				+         DEFINES['_USE_MATH_DEFINES'] = True
			
 
				+diff --git a/media/ffvpx/libavcodec/aarch64/moz.build b/media/ffvpx/libavcodec/aarch64/moz.build
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavcodec/aarch64/moz.build
			
 
				+@@ -0,0 +1,47 @@
			
 
				++## -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
			
 
				++## vim: set filetype=python:
			
 
				++## This Source Code Form is subject to the terms of the Mozilla Public
			
 
				++## License, v. 2.0. If a copy of the MPL was not distributed with this
			
 
				++## file, You can obtain one at http://mozilla.org/MPL/2.0/.
			
 
				++
			
 
				++SOURCES += [
			
 
				++    'h264chroma_init_aarch64.c',
			
 
				++    'h264cmc_neon.S',
			
 
				++    'h264dsp_init_aarch64.c',
			
 
				++    'h264dsp_neon.S',
			
 
				++    'h264idct_neon.S',
			
 
				++    'h264pred_init.c',
			
 
				++    'h264pred_neon.S',
			
 
				++    'hpeldsp_init_aarch64.c',
			
 
				++    'hpeldsp_neon.S',
			
 
				++    'idctdsp_init_aarch64.c',
			
 
				++    'mdct_neon.S',
			
 
				++    'neon.S',
			
 
				++    'simple_idct_neon.S',
			
 
				++    'videodsp.S',
			
 
				++    'videodsp_init.c',
			
 
				++    'vp9dsp_init_10bpp_aarch64.c',
			
 
				++    'vp9dsp_init_12bpp_aarch64.c',
			
 
				++    'vp9dsp_init_16bpp_aarch64_template.c',
			
 
				++    'vp9dsp_init_aarch64.c',
			
 
				++    'vp9itxfm_16bpp_neon.S',
			
 
				++    'vp9itxfm_neon.S',
			
 
				++    'vp9lpf_16bpp_neon.S',
			
 
				++    'vp9lpf_neon.S',
			
 
				++    'vp9mc_16bpp_neon.S',
			
 
				++    'vp9mc_neon.S',
			
 
				++]
			
 
				++
			
 
				++if CONFIG['OS_ARCH'] == 'WINNT':
			
 
				++    USE_INTEGRATED_CLANGCL_AS = True
			
 
				++    DEFINES['EXTERN_ASM'] = ''
			
 
				++
			
 
				++if CONFIG['MOZ_LIBAV_FFT']:
			
 
				++    SOURCES += [
			
 
				++        'fft_init_aarch64.c',
			
 
				++        'fft_neon.S',
			
 
				++    ]
			
 
				++
			
 
				++FINAL_LIBRARY = 'mozavcodec'
			
 
				++
			
 
				++include('/media/ffvpx/ffvpxcommon.mozbuild')
			
 
				+diff --git a/media/ffvpx/libavcodec/dummy_funcs.c b/media/ffvpx/libavcodec/dummy_funcs.c
			
 
				+--- a/media/ffvpx/libavcodec/dummy_funcs.c
			
 
				++++ b/media/ffvpx/libavcodec/dummy_funcs.c
			
 
				+@@ -814,43 +814,37 @@ AVBitStreamFilter ff_mjpeg2jpeg_bsf;
			
 
				+ AVBitStreamFilter ff_mjpega_dump_header_bsf;
			
 
				+ AVBitStreamFilter ff_mp3_header_decompress_bsf;
			
 
				+ AVBitStreamFilter ff_mpeg4_unpack_bframes_bsf;
			
 
				+ AVBitStreamFilter ff_mov2textsub_bsf;
			
 
				+ AVBitStreamFilter ff_noise_bsf;
			
 
				+ AVBitStreamFilter ff_remove_extradata_bsf;
			
 
				+ AVBitStreamFilter ff_text2movsub_bsf;
			
 
				+ 
			
 
				+-void ff_fft_init_aarch64(FFTContext *s) {}
			
 
				+ void ff_fft_init_arm(FFTContext *s) {}
			
 
				+ void ff_fft_init_mips(FFTContext *s) {}
			
 
				+ void ff_fft_init_ppc(FFTContext *s) {}
			
 
				+ void ff_rdft_init_arm(RDFTContext *s) {}
			
 
				+-void ff_h264_pred_init_aarch64(H264PredContext *h, int codec_id,
			
 
				+-                               const int bit_depth,
			
 
				+-                               const int chroma_format_idc) {}
			
 
				+ void ff_h264_pred_init_arm(H264PredContext *h, int codec_id,
			
 
				+                            const int bit_depth, const int chroma_format_idc) {}
			
 
				+ void ff_h264_pred_init_mips(H264PredContext *h, int codec_id,
			
 
				+                             const int bit_depth, const int chroma_format_idc) {}
			
 
				+ void ff_me_cmp_init_static(void) {}
			
 
				+ int ff_frame_thread_encoder_init(AVCodecContext *avctx, AVDictionary *options) { return 0; }
			
 
				+ void ff_frame_thread_encoder_free(AVCodecContext *avctx) {}
			
 
				+ int ff_thread_video_encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *frame, int *got_packet_ptr) { return 0; }
			
 
				+-void ff_videodsp_init_aarch64(VideoDSPContext *ctx, int bpc) {}
			
 
				+ void ff_videodsp_init_arm(VideoDSPContext *ctx, int bpc) {}
			
 
				+ void ff_videodsp_init_ppc(VideoDSPContext *ctx, int bpc) {}
			
 
				+ void ff_videodsp_init_mips(VideoDSPContext *ctx, int bpc) {}
			
 
				+ void ff_vp7dsp_init(VP8DSPContext *c) {}
			
 
				+ void ff_vp78dsp_init_arm(VP8DSPContext *c) {}
			
 
				+ void ff_vp78dsp_init_ppc(VP8DSPContext *c) {}
			
 
				+ void ff_vp8dsp_init_arm(VP8DSPContext *c) {}
			
 
				+ void ff_vp8dsp_init_mips(VP8DSPContext *c) {}
			
 
				+ void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp) {}
			
 
				+-void ff_vp9dsp_init_aarch64(VP9DSPContext *dsp, int bpp) {}
			
 
				+ void ff_vp9dsp_init_arm(VP9DSPContext *dsp, int bpp) {}
			
 
				+ #if !defined(__arm__)
			
 
				+ void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int channels, int bps) {}
			
 
				+ #endif
			
 
				+ #if !defined(HAVE_64BIT_BUILD)
			
 
				+ void ff_flac_decorrelate_indep8_16_sse2(uint8_t **out, int32_t **in, int channels, int len, int shift) {}
			
 
				+ void ff_flac_decorrelate_indep8_32_avx(uint8_t **out, int32_t **in, int channels, int len, int shift) {}
			
 
				+ void ff_flac_decorrelate_indep8_16_avx(uint8_t **out, int32_t **in, int channels, int len, int shift) {}
			
 
				+diff --git a/media/ffvpx/libavcodec/moz.build b/media/ffvpx/libavcodec/moz.build
			
 
				+--- a/media/ffvpx/libavcodec/moz.build
			
 
				++++ b/media/ffvpx/libavcodec/moz.build
			
 
				+@@ -4,16 +4,18 @@
			
 
				+ # License, v. 2.0. If a copy of the MPL was not distributed with this
			
 
				+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
			
 
				+ 
			
 
				+ # Due to duplicate file names, we compile libavutil/x86 in its own
			
 
				+ # moz.build file.
			
 
				+ if CONFIG['FFVPX_ASFLAGS']:
			
 
				+     if CONFIG['CPU_ARCH'] == 'x86' or CONFIG['CPU_ARCH'] == 'x86_64':
			
 
				+         DIRS += ['x86']
			
 
				++    elif CONFIG['CPU_ARCH'] == 'aarch64':
			
 
				++        DIRS += ['aarch64']
			
 
				+     elif CONFIG['CPU_ARCH'] == 'arm':
			
 
				+         DIRS += ['arm']
			
 
				+ 
			
 
				+ SharedLibrary('mozavcodec')
			
 
				+ SOURCES += [
			
 
				+     'allcodecs.c',
			
 
				+     'avpacket.c',
			
 
				+     'bitstream_filters.c',
			
 
				+diff --git a/media/ffvpx/libavutil/aarch64/moz.build b/media/ffvpx/libavutil/aarch64/moz.build
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/media/ffvpx/libavutil/aarch64/moz.build
			
 
				+@@ -0,0 +1,19 @@
			
 
				++# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
			
 
				++# vim: set filetype=python:
			
 
				++# This Source Code Form is subject to the terms of the Mozilla Public
			
 
				++# License, v. 2.0. If a copy of the MPL was not distributed with this
			
 
				++# file, You can obtain one at http://mozilla.org/MPL/2.0/.
			
 
				++
			
 
				++SOURCES += [
			
 
				++    'cpu.c',
			
 
				++    'float_dsp_init.c',
			
 
				++    'float_dsp_neon.S',
			
 
				++]
			
 
				++
			
 
				++if CONFIG['OS_ARCH'] == 'WINNT':
			
 
				++    USE_INTEGRATED_CLANGCL_AS = True
			
 
				++    DEFINES['EXTERN_ASM'] = ''
			
 
				++
			
 
				++FINAL_LIBRARY = 'mozavutil'
			
 
				++
			
 
				++include('/media/ffvpx/ffvpxcommon.mozbuild')
			
 
				+diff --git a/media/ffvpx/libavutil/avutil.symbols b/media/ffvpx/libavutil/avutil.symbols
			
 
				+--- a/media/ffvpx/libavutil/avutil.symbols
			
 
				++++ b/media/ffvpx/libavutil/avutil.symbols
			
 
				+@@ -306,17 +306,17 @@ avpriv_alloc_fixed_dsp
			
 
				+ avpriv_float_dsp_alloc
			
 
				+ avpriv_report_missing_feature
			
 
				+ avpriv_request_sample
			
 
				+ avpriv_scalarproduct_float_c
			
 
				+ avpriv_set_systematic_pal2
			
 
				+ avutil_configuration
			
 
				+ avutil_license
			
 
				+ avutil_version
			
 
				+-#ifdef XP_WIN
			
 
				++#if defined(XP_WIN) && !defined(_ARM64_)
			
 
				+ avpriv_emms_asm
			
 
				+ #endif
			
 
				+ avpriv_slicethread_create
			
 
				+ avpriv_slicethread_execute
			
 
				+ avpriv_slicethread_free
			
 
				+ av_hwdevice_get_type_name
			
 
				+ av_hwframe_ctx_alloc
			
 
				+ av_hwframe_ctx_init
			
 
				+diff --git a/media/ffvpx/libavutil/dummy_funcs.c b/media/ffvpx/libavutil/dummy_funcs.c
			
 
				+--- a/media/ffvpx/libavutil/dummy_funcs.c
			
 
				++++ b/media/ffvpx/libavutil/dummy_funcs.c
			
 
				+@@ -3,29 +3,35 @@
			
 
				+ /* This Source Code Form is subject to the terms of the Mozilla Public
			
 
				+  * License, v. 2.0. If a copy of the MPL was not distributed with this
			
 
				+  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
			
 
				+ 
			
 
				+ #include "avutil.h"
			
 
				+ #include "hwcontext.h"
			
 
				+ 
			
 
				+ // cpu_internal.c
			
 
				++#if !defined(_ARM64_)
			
 
				+ int ff_get_cpu_flags_aarch64(void) { return 0; }
			
 
				++#endif
			
 
				+ #if !defined(__arm__)
			
 
				+ int ff_get_cpu_flags_arm(void) { return 0; }
			
 
				+ #endif
			
 
				+ int ff_get_cpu_flags_ppc(void) { return 0; }
			
 
				+ 
			
 
				+ // float_dsp.c
			
 
				+ #include "float_dsp.h"
			
 
				++#if !defined(_ARM64_)
			
 
				+ void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp) {}
			
 
				++#endif
			
 
				+ void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict) {}
			
 
				+ void ff_float_dsp_init_mips(AVFloatDSPContext *fdsp) {}
			
 
				+ #if !defined(__arm__)
			
 
				+ void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp) {}
			
 
				+ #endif
			
 
				+ 
			
 
				+ // cpu.c
			
 
				++#if !defined(_ARM64_)
			
 
				+ size_t ff_get_cpu_max_align_aarch64() { return 0; }
			
 
				++#endif
			
 
				+ size_t ff_get_cpu_max_align_ppc() { return 0; }
			
 
				+ #if !defined(__arm__)
			
 
				+ size_t ff_get_cpu_max_align_arm() { return 0; }
			
 
				+ #endif
			
 
				+diff --git a/media/ffvpx/libavutil/moz.build b/media/ffvpx/libavutil/moz.build
			
 
				+--- a/media/ffvpx/libavutil/moz.build
			
 
				++++ b/media/ffvpx/libavutil/moz.build
			
 
				+@@ -6,16 +6,18 @@
			
 
				+ 
			
 
				+ # Due to duplicate file names, we compile libavutil/x86 in its own
			
 
				+ # moz.build file.
			
 
				+ if CONFIG['FFVPX_ASFLAGS']:
			
 
				+     if CONFIG['CPU_ARCH'] == 'x86' or CONFIG['CPU_ARCH'] == 'x86_64':
			
 
				+         DIRS += ['x86']
			
 
				+     elif CONFIG['CPU_ARCH'] == 'arm':
			
 
				+         DIRS += ['arm']
			
 
				++    elif CONFIG['CPU_ARCH'] == 'aarch64':
			
 
				++        DIRS += ['aarch64']
			
 
				+ 
			
 
				+ SharedLibrary('mozavutil')
			
 
				+ SOURCES += [
			
 
				+     'avstring.c',
			
 
				+     'bprint.c',
			
 
				+     'buffer.c',
			
 
				+     'channel_layout.c',
			
 
				+     'cpu.c',
			
 
				+diff --git a/toolkit/moz.configure b/toolkit/moz.configure
			
 
				+--- a/toolkit/moz.configure
			
 
				++++ b/toolkit/moz.configure
			
 
				+@@ -1432,16 +1432,18 @@ with only_when(compile_environment):
			
 
				+ # Libav-fft Support
			
 
				+ # ==============================================================
			
 
				+ with only_when(compile_environment):
			
 
				+     @depends(target)
			
 
				+     def libav_fft(target):
			
 
				+         flags = None
			
 
				+         if target.kernel == 'WINNT' and target.cpu == 'x86':
			
 
				+             flags = ['-DPIC', '-DWIN32']
			
 
				++        elif target.kernel == 'WINNT' and target.cpu == 'aarch64':
			
 
				++            flags = ['-DPIC', '-DWIN64']
			
 
				+         elif target.cpu == 'x86_64':
			
 
				+             if target.kernel == 'Darwin':
			
 
				+                 flags = ['-D__x86_64__', '-DPIC', '-DMACHO']
			
 
				+             elif target.kernel == 'WINNT':
			
 
				+                 flags = ['-D__x86_64__', '-DPIC', '-DWIN64', '-DMSVC']
			
 
				+             else:
			
 
				+                 flags = ['-D__x86_64__', '-DPIC', '-DELF']
			
 
				+         if flags:
			
 
				+@@ -1467,25 +1469,28 @@ with only_when(compile_environment):
			
 
				+ 
			
 
				+     set_config('YASM_HAS_AVX2', yasm_has_avx2)
			
 
				+ 
			
 
				+ 
			
 
				+     @depends(yasm_has_avx2, libav_fft, vpx_as_flags, target)
			
 
				+     def ffvpx(yasm_has_avx2, libav_fft, vpx_as_flags, target):
			
 
				+         enable = flac_only = use_yasm = False
			
 
				+         flags = []
			
 
				+-        if target.cpu in ('x86', 'x86_64'):
			
 
				++        if target.cpu in ('x86', 'x86_64') or \
			
 
				++                target.cpu == 'aarch64' and target.kernel == 'WINNT':
			
 
				+             enable = True
			
 
				+             if libav_fft and libav_fft.flags:
			
 
				+                 use_yasm = True
			
 
				+                 flags.extend(libav_fft.flags)
			
 
				+                 if target.kernel == 'WINNT':
			
 
				+                     if target.cpu == 'x86':
			
 
				+                         # 32-bit windows need to prefix symbols with an underscore.
			
 
				+                         flags.extend(('-DPREFIX', '-Pconfig_win32.asm'))
			
 
				++                    elif target.cpu == 'aarch64':
			
 
				++                        use_yasm = False
			
 
				+                     else:
			
 
				+                         flags.append('-Pconfig_win64.asm')
			
 
				+                 elif target.kernel == 'Darwin':
			
 
				+                     # 32/64-bit macosx assemblers need to prefix symbols with an
			
 
				+                     # underscore.
			
 
				+                     flags.extend(('-DPREFIX', '-Pconfig_darwin64.asm'))
			
 
				+                 else:
			
 
				+                     # Default to unix.
			
--- a/mozilla-release/patches/1585358-71a1.patch
+++ b/mozilla-release/patches/1585358-71a1.patch
@@ -2,7 +2,7 @@
 
				 # User Tom Ritter <tom@mozilla.com>
			
 
				 # Date 1570732256 0
			
 
				 # Node ID a02ea11484ab8ca20eab416d14527fcd2c1cfd8f
			
 
				-# Parent  03909ce40c67b3e86d9e0d5f1ecb36268bb4bec7
			
 
				+# Parent  5abcb2db682734c9aa8d61033d248d56dec4bce4
			
 
				 Bug 1585358 - Remove mingw-gcc configuration stuf from libvpx r=jya
			
 
				 
			
 
				 mingw-gcc is no longer supported.
			
@@ -4184,7 +4184,7 @@ diff --git a/media/libvpx/generate_sources_mozbuild.sh b/media/libvpx/generate_s
 
				  gen_config_files win/x64 "--target=x86_64-win64-vs12 ${all_platforms} ${x86_platforms}"
			
 
				  gen_config_files win/ia32 "--target=x86-win32-gcc ${all_platforms} ${x86_platforms}"
			
 
				 -gen_config_files win/mingw32 "--target=x86-win32-gcc ${all_platforms} ${x86_platforms}"
			
 
				--gen_config_files win/aarch64 "--target=aarch64-win64-vs12 ${all_platforms}"
			
 
				+-gen_config_files win/aarch64 "--target=aarch64-win64-vs12 ${all_platforms} ${arm64_platforms}"
			
 
				  
			
 
				  gen_config_files linux/arm "--target=armv7-linux-gcc ${all_platforms} ${arm_platforms}"
			
 
				  gen_config_files linux/arm64 "--target=arm64-linux-gcc ${all_platforms} ${arm64_platforms}"
			
--- a/mozilla-release/patches/1585359-71a1.patch
+++ b/mozilla-release/patches/1585359-71a1.patch
@@ -0,0 +1,6250 @@
 
				+# HG changeset patch
			
 
				+# User Tom Ritter <tom@mozilla.com>
			
 
				+# Date 1570732256 0
			
 
				+# Node ID b86bb62c23b1495ebfe4a2a78508408bfd1b722e
			
 
				+# Parent  de69b48bbeb0bc9623fb257c45b663fc67c405a3
			
 
				+Bug 1585359 - Remove mingw-gcc configuration stuff from libaom r=jya
			
 
				+
			
 
				+mingw-gcc is no longer supported.
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D48578
			
 
				+
			
 
				+diff --git a/media/libaom/config/win/mingw32/config/aom_config.asm b/media/libaom/config/win/mingw32/config/aom_config.asm
			
 
				+deleted file mode 100644
			
 
				+--- a/media/libaom/config/win/mingw32/config/aom_config.asm
			
 
				++++ /dev/null
			
 
				+@@ -1,76 +0,0 @@
			
 
				+-;
			
 
				+-; Copyright (c) 2018, Alliance for Open Media. All rights reserved
			
 
				+-;
			
 
				+-; This source code is subject to the terms of the BSD 2 Clause License and
			
 
				+-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
			
 
				+-; was not distributed with this source code in the LICENSE file, you can
			
 
				+-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
			
 
				+-; Media Patent License 1.0 was not distributed with this source code in the
			
 
				+-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
			
 
				+-;
			
 
				+-
			
 
				+-ARCH_ARM equ 0
			
 
				+-ARCH_MIPS equ 0
			
 
				+-ARCH_PPC equ 0
			
 
				+-ARCH_X86 equ 1
			
 
				+-ARCH_X86_64 equ 0
			
 
				+-CONFIG_2PASS_PARTITION_SEARCH_LVL equ 1
			
 
				+-CONFIG_ACCOUNTING equ 0
			
 
				+-CONFIG_ANALYZER equ 0
			
 
				+-CONFIG_AV1_DECODER equ 1
			
 
				+-CONFIG_AV1_ENCODER equ 0
			
 
				+-CONFIG_BIG_ENDIAN equ 0
			
 
				+-CONFIG_BITSTREAM_DEBUG equ 0
			
 
				+-CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
			
 
				+-CONFIG_COLLECT_INTER_MODE_RD_STATS equ 0
			
 
				+-CONFIG_COLLECT_RD_STATS equ 0
			
 
				+-CONFIG_DEBUG equ 0
			
 
				+-CONFIG_DENOISE equ 1
			
 
				+-CONFIG_DIST_8X8 equ 0
			
 
				+-CONFIG_ENTROPY_STATS equ 0
			
 
				+-CONFIG_FILEOPTIONS equ 1
			
 
				+-CONFIG_FIX_GF_LENGTH equ 1
			
 
				+-CONFIG_FP_MB_STATS equ 0
			
 
				+-CONFIG_GCC equ 1
			
 
				+-CONFIG_GCOV equ 0
			
 
				+-CONFIG_GLOBAL_MOTION_SEARCH equ 1
			
 
				+-CONFIG_GPROF equ 0
			
 
				+-CONFIG_INSPECTION equ 0
			
 
				+-CONFIG_INTERNAL_STATS equ 0
			
 
				+-CONFIG_INTER_STATS_ONLY equ 0
			
 
				+-CONFIG_LIBYUV equ 0
			
 
				+-CONFIG_LOWBITDEPTH equ 1
			
 
				+-CONFIG_MAX_DECODE_PROFILE equ 2
			
 
				+-CONFIG_MISMATCH_DEBUG equ 0
			
 
				+-CONFIG_MULTITHREAD equ 1
			
 
				+-CONFIG_NORMAL_TILE_MODE equ 0
			
 
				+-CONFIG_OS_SUPPORT equ 1
			
 
				+-CONFIG_PIC equ 0
			
 
				+-CONFIG_RD_DEBUG equ 0
			
 
				+-CONFIG_REDUCED_ENCODER_BORDER equ 0
			
 
				+-CONFIG_RUNTIME_CPU_DETECT equ 1
			
 
				+-CONFIG_SHARED equ 0
			
 
				+-CONFIG_SHARP_SETTINGS equ 0
			
 
				+-CONFIG_SIZE_LIMIT equ 0
			
 
				+-CONFIG_SPATIAL_RESAMPLING equ 1
			
 
				+-CONFIG_STATIC equ 1
			
 
				+-CONFIG_WEBM_IO equ 0
			
 
				+-DECODE_HEIGHT_LIMIT equ 0
			
 
				+-DECODE_WIDTH_LIMIT equ 0
			
 
				+-HAVE_AVX equ 1
			
 
				+-HAVE_AVX2 equ 1
			
 
				+-HAVE_DSPR2 equ 0
			
 
				+-HAVE_FEXCEPT equ 1
			
 
				+-HAVE_MIPS32 equ 0
			
 
				+-HAVE_MIPS64 equ 0
			
 
				+-HAVE_MMX equ 1
			
 
				+-HAVE_MSA equ 0
			
 
				+-HAVE_NEON equ 0
			
 
				+-HAVE_SSE equ 1
			
 
				+-HAVE_SSE2 equ 1
			
 
				+-HAVE_SSE3 equ 1
			
 
				+-HAVE_SSE4_1 equ 1
			
 
				+-HAVE_SSE4_2 equ 1
			
 
				+-HAVE_SSSE3 equ 1
			
 
				+-HAVE_VSX equ 0
			
 
				+-HAVE_WXWIDGETS equ 0
			
 
				+diff --git a/media/libaom/config/win/mingw32/config/aom_config.h b/media/libaom/config/win/mingw32/config/aom_config.h
			
 
				+deleted file mode 100644
			
 
				+--- a/media/libaom/config/win/mingw32/config/aom_config.h
			
 
				++++ /dev/null
			
 
				+@@ -1,82 +0,0 @@
			
 
				+-/*
			
 
				+- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
			
 
				+- *
			
 
				+- * This source code is subject to the terms of the BSD 2 Clause License and
			
 
				+- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
			
 
				+- * was not distributed with this source code in the LICENSE file, you can
			
 
				+- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
			
 
				+- * Media Patent License 1.0 was not distributed with this source code in the
			
 
				+- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
			
 
				+- */
			
 
				+-#ifndef AOM_CONFIG_H_
			
 
				+-#define AOM_CONFIG_H_
			
 
				+-
			
 
				+-#define ARCH_ARM 0
			
 
				+-#define ARCH_MIPS 0
			
 
				+-#define ARCH_PPC 0
			
 
				+-#define ARCH_X86 1
			
 
				+-#define ARCH_X86_64 0
			
 
				+-#define CONFIG_2PASS_PARTITION_SEARCH_LVL 1
			
 
				+-#define CONFIG_ACCOUNTING 0
			
 
				+-#define CONFIG_ANALYZER 0
			
 
				+-#define CONFIG_AV1_DECODER 1
			
 
				+-#define CONFIG_AV1_ENCODER 0
			
 
				+-#define CONFIG_BIG_ENDIAN 0
			
 
				+-#define CONFIG_BITSTREAM_DEBUG 0
			
 
				+-#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
			
 
				+-#define CONFIG_COLLECT_INTER_MODE_RD_STATS 0
			
 
				+-#define CONFIG_COLLECT_RD_STATS 0
			
 
				+-#define CONFIG_DEBUG 0
			
 
				+-#define CONFIG_DENOISE 1
			
 
				+-#define CONFIG_DIST_8X8 0
			
 
				+-#define CONFIG_ENTROPY_STATS 0
			
 
				+-#define CONFIG_FILEOPTIONS 1
			
 
				+-#define CONFIG_FIX_GF_LENGTH 1
			
 
				+-#define CONFIG_FP_MB_STATS 0
			
 
				+-#define CONFIG_GCC 1
			
 
				+-#define CONFIG_GCOV 0
			
 
				+-#define CONFIG_GLOBAL_MOTION_SEARCH 1
			
 
				+-#define CONFIG_GPROF 0
			
 
				+-#define CONFIG_INSPECTION 0
			
 
				+-#define CONFIG_INTERNAL_STATS 0
			
 
				+-#define CONFIG_INTER_STATS_ONLY 0
			
 
				+-#define CONFIG_LIBYUV 0
			
 
				+-#define CONFIG_LOWBITDEPTH 1
			
 
				+-#define CONFIG_MAX_DECODE_PROFILE 2
			
 
				+-#define CONFIG_MISMATCH_DEBUG 0
			
 
				+-#define CONFIG_MULTITHREAD 1
			
 
				+-#define CONFIG_NORMAL_TILE_MODE 0
			
 
				+-#define CONFIG_OS_SUPPORT 1
			
 
				+-#define CONFIG_PIC 0
			
 
				+-#define CONFIG_RD_DEBUG 0
			
 
				+-#define CONFIG_REDUCED_ENCODER_BORDER 0
			
 
				+-#define CONFIG_RUNTIME_CPU_DETECT 1
			
 
				+-#define CONFIG_SHARED 0
			
 
				+-#define CONFIG_SHARP_SETTINGS 0
			
 
				+-#define CONFIG_SIZE_LIMIT 0
			
 
				+-#define CONFIG_SPATIAL_RESAMPLING 1
			
 
				+-#define CONFIG_STATIC 1
			
 
				+-#define CONFIG_WEBM_IO 0
			
 
				+-#define DECODE_HEIGHT_LIMIT 0
			
 
				+-#define DECODE_WIDTH_LIMIT 0
			
 
				+-#define HAVE_AVX 1
			
 
				+-#define HAVE_AVX2 1
			
 
				+-#define HAVE_DSPR2 0
			
 
				+-#define HAVE_FEXCEPT 1
			
 
				+-#define HAVE_MIPS32 0
			
 
				+-#define HAVE_MIPS64 0
			
 
				+-#define HAVE_MMX 1
			
 
				+-#define HAVE_MSA 0
			
 
				+-#define HAVE_NEON 0
			
 
				+-#define HAVE_SSE 1
			
 
				+-#define HAVE_SSE2 1
			
 
				+-#define HAVE_SSE3 1
			
 
				+-#define HAVE_SSE4_1 1
			
 
				+-#define HAVE_SSE4_2 1
			
 
				+-#define HAVE_SSSE3 1
			
 
				+-#define HAVE_VSX 0
			
 
				+-#define HAVE_WXWIDGETS 0
			
 
				+-#define INCLUDE_INSTALL_DIR INSTALLDIR/include
			
 
				+-#define INLINE inline
			
 
				+-#define LIB_INSTALL_DIR INSTALLDIR/lib
			
 
				+-#endif /* AOM_CONFIG_H_ */
			
 
				+diff --git a/media/libaom/config/win/mingw32/config/aom_dsp_rtcd.h b/media/libaom/config/win/mingw32/config/aom_dsp_rtcd.h
			
 
				+deleted file mode 100644
			
 
				+--- a/media/libaom/config/win/mingw32/config/aom_dsp_rtcd.h
			
 
				++++ /dev/null
			
 
				+@@ -1,2379 +0,0 @@
			
 
				+-// This file is generated. Do not edit.
			
 
				+-#ifndef AOM_DSP_RTCD_H_
			
 
				+-#define AOM_DSP_RTCD_H_
			
 
				+-
			
 
				+-#ifdef RTCD_C
			
 
				+-#define RTCD_EXTERN
			
 
				+-#else
			
 
				+-#define RTCD_EXTERN extern
			
 
				+-#endif
			
 
				+-
			
 
				+-/*
			
 
				+- * DSP
			
 
				+- */
			
 
				+-
			
 
				+-#include "aom/aom_integer.h"
			
 
				+-#include "aom_dsp/aom_dsp_common.h"
			
 
				+-#include "av1/common/enums.h"
			
 
				+-#include "av1/common/blockd.h"
			
 
				+-
			
 
				+-
			
 
				+-#ifdef __cplusplus
			
 
				+-extern "C" {
			
 
				+-#endif
			
 
				+-
			
 
				+-void aom_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h);
			
 
				+-void aom_blend_a64_hmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h);
			
 
				+-RTCD_EXTERN void (*aom_blend_a64_hmask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h);
			
 
				+-
			
 
				+-void aom_blend_a64_mask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby);
			
 
				+-void aom_blend_a64_mask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby);
			
 
				+-void aom_blend_a64_mask_avx2(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby);
			
 
				+-RTCD_EXTERN void (*aom_blend_a64_mask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby);
			
 
				+-
			
 
				+-void aom_blend_a64_vmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h);
			
 
				+-void aom_blend_a64_vmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h);
			
 
				+-RTCD_EXTERN void (*aom_blend_a64_vmask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h);
			
 
				+-
			
 
				+-void aom_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-void aom_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-void aom_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-void aom_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-RTCD_EXTERN void (*aom_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-
			
 
				+-void aom_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-void aom_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-void aom_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-void aom_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-RTCD_EXTERN void (*aom_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-
			
 
				+-void aom_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-void aom_convolve_copy_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-RTCD_EXTERN void (*aom_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_16x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_16x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_16x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_16x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_128_predictor_2x2 aom_dc_128_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_dc_128_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_32x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_32x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_32x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_32x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_32x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_4x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_4x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_64x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_64x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_64x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_64x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_64x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_64x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_64x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_64x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_64x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_64x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_8x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_8x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_16x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_16x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_16x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_16x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_left_predictor_2x2 aom_dc_left_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_dc_left_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_32x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_32x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_32x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_32x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_32x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_4x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_4x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_64x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_64x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_64x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_64x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_64x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_64x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_64x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_64x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_64x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_64x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_8x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_8x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_16x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_16x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_16x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_16x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_predictor_2x2 aom_dc_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_dc_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_32x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_32x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_32x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_32x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_32x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_4x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_4x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_64x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_64x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_64x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_64x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_64x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_64x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_64x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_64x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_64x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_64x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_8x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_8x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_16x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_16x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_16x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_16x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_top_predictor_2x2 aom_dc_top_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_dc_top_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_32x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_32x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_32x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_32x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_32x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_4x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_4x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_64x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_64x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_64x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_64x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_64x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_64x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_64x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_64x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_64x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_64x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_8x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_8x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_h_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_h_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_h_predictor_16x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_h_predictor_16x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_h_predictor_16x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_h_predictor_16x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_h_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_h_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_h_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_h_predictor_2x2 aom_h_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_h_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_h_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_h_predictor_32x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_h_predictor_32x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_h_predictor_32x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_h_predictor_32x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_h_predictor_4x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_h_predictor_4x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_h_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_h_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_h_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_h_predictor_64x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_h_predictor_64x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_h_predictor_64x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_h_predictor_64x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_h_predictor_64x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_64x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_h_predictor_64x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_h_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_h_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_h_predictor_8x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_h_predictor_8x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_h_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_h_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_highbd_blend_a64_d16_mask_c(uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, ConvolveParams *conv_params, const int bd);
			
 
				+-#define aom_highbd_blend_a64_d16_mask aom_highbd_blend_a64_d16_mask_c
			
 
				+-
			
 
				+-void aom_highbd_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd);
			
 
				+-void aom_highbd_blend_a64_hmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_blend_a64_hmask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd);
			
 
				+-
			
 
				+-void aom_highbd_blend_a64_mask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, int bd);
			
 
				+-void aom_highbd_blend_a64_mask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_blend_a64_mask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, int bd);
			
 
				+-
			
 
				+-void aom_highbd_blend_a64_vmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd);
			
 
				+-void aom_highbd_blend_a64_vmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_blend_a64_vmask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd);
			
 
				+-
			
 
				+-void aom_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-void aom_highbd_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-RTCD_EXTERN void (*aom_highbd_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-
			
 
				+-void aom_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-void aom_highbd_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-RTCD_EXTERN void (*aom_highbd_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-
			
 
				+-void aom_highbd_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-void aom_highbd_convolve_copy_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-void aom_highbd_convolve_copy_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-RTCD_EXTERN void (*aom_highbd_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_128_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_128_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_128_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_128_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_16x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_16x4 aom_highbd_dc_128_predictor_16x4_c
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_16x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_16x64 aom_highbd_dc_128_predictor_16x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_128_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_128_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_2x2 aom_highbd_dc_128_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_128_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_128_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_128_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_128_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_32x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_32x64 aom_highbd_dc_128_predictor_32x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_32x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_32x8 aom_highbd_dc_128_predictor_32x8_c
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_4x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_4x16 aom_highbd_dc_128_predictor_4x16_c
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_128_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_128_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_128_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_128_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_64x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_64x16 aom_highbd_dc_128_predictor_64x16_c
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_64x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_64x32 aom_highbd_dc_128_predictor_64x32_c
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_64x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_64x64 aom_highbd_dc_128_predictor_64x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_128_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_128_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_8x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_8x32 aom_highbd_dc_128_predictor_8x32_c
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_128_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_128_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_128_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_128_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_left_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_left_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_left_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_left_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_16x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_16x4 aom_highbd_dc_left_predictor_16x4_c
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_16x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_16x64 aom_highbd_dc_left_predictor_16x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_left_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_left_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_2x2 aom_highbd_dc_left_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_left_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_left_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_left_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_left_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_32x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_32x64 aom_highbd_dc_left_predictor_32x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_32x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_32x8 aom_highbd_dc_left_predictor_32x8_c
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_4x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_4x16 aom_highbd_dc_left_predictor_4x16_c
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_left_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_left_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_left_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_left_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_64x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_64x16 aom_highbd_dc_left_predictor_64x16_c
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_64x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_64x32 aom_highbd_dc_left_predictor_64x32_c
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_64x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_64x64 aom_highbd_dc_left_predictor_64x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_left_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_left_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_8x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_8x32 aom_highbd_dc_left_predictor_8x32_c
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_left_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_left_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_left_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_left_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_16x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_16x4 aom_highbd_dc_predictor_16x4_c
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_16x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_16x64 aom_highbd_dc_predictor_16x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_2x2 aom_highbd_dc_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_32x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_32x64 aom_highbd_dc_predictor_32x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_32x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_32x8 aom_highbd_dc_predictor_32x8_c
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_4x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_4x16 aom_highbd_dc_predictor_4x16_c
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_64x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_64x16 aom_highbd_dc_predictor_64x16_c
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_64x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_64x32 aom_highbd_dc_predictor_64x32_c
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_64x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_64x64 aom_highbd_dc_predictor_64x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_8x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_8x32 aom_highbd_dc_predictor_8x32_c
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_top_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_top_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_top_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_top_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_16x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_16x4 aom_highbd_dc_top_predictor_16x4_c
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_16x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_16x64 aom_highbd_dc_top_predictor_16x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_top_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_top_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_2x2 aom_highbd_dc_top_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_top_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_top_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_top_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_top_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_32x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_32x64 aom_highbd_dc_top_predictor_32x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_32x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_32x8 aom_highbd_dc_top_predictor_32x8_c
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_4x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_4x16 aom_highbd_dc_top_predictor_4x16_c
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_top_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_top_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_top_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_top_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_64x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_64x16 aom_highbd_dc_top_predictor_64x16_c
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_64x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_64x32 aom_highbd_dc_top_predictor_64x32_c
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_64x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_64x64 aom_highbd_dc_top_predictor_64x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_top_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_top_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_8x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_8x32 aom_highbd_dc_top_predictor_8x32_c
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_top_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_top_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_top_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_dc_top_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_h_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_h_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_h_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_h_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_16x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_16x4 aom_highbd_h_predictor_16x4_c
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_16x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_16x64 aom_highbd_h_predictor_16x64_c
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_h_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_h_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_2x2 aom_highbd_h_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_h_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_h_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_h_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_h_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_32x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_32x64 aom_highbd_h_predictor_32x64_c
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_32x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_32x8 aom_highbd_h_predictor_32x8_c
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_4x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_4x16 aom_highbd_h_predictor_4x16_c
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_h_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_h_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_h_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_h_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_64x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_64x16 aom_highbd_h_predictor_64x16_c
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_64x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_64x32 aom_highbd_h_predictor_64x32_c
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_64x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_64x64 aom_highbd_h_predictor_64x64_c
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_h_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_h_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_8x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_8x32 aom_highbd_h_predictor_8x32_c
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_h_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_h_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_h_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_h_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_lpf_horizontal_14_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-void aom_highbd_lpf_horizontal_14_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_lpf_horizontal_14)(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-
			
 
				+-void aom_highbd_lpf_horizontal_14_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limt1, const uint8_t *thresh1,int bd);
			
 
				+-void aom_highbd_lpf_horizontal_14_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limt1, const uint8_t *thresh1,int bd);
			
 
				+-void aom_highbd_lpf_horizontal_14_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limt1, const uint8_t *thresh1,int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_lpf_horizontal_14_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limt1, const uint8_t *thresh1,int bd);
			
 
				+-
			
 
				+-void aom_highbd_lpf_horizontal_4_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_lpf_horizontal_4)(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-
			
 
				+-void aom_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_horizontal_4_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_lpf_horizontal_4_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-
			
 
				+-void aom_highbd_lpf_horizontal_6_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-void aom_highbd_lpf_horizontal_6_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_lpf_horizontal_6)(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-
			
 
				+-void aom_highbd_lpf_horizontal_6_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_horizontal_6_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_lpf_horizontal_6_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-
			
 
				+-void aom_highbd_lpf_horizontal_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-void aom_highbd_lpf_horizontal_8_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_lpf_horizontal_8)(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-
			
 
				+-void aom_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_horizontal_8_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_lpf_horizontal_8_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-
			
 
				+-void aom_highbd_lpf_vertical_14_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-void aom_highbd_lpf_vertical_14_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_lpf_vertical_14)(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-
			
 
				+-void aom_highbd_lpf_vertical_14_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_vertical_14_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_vertical_14_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_lpf_vertical_14_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-
			
 
				+-void aom_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-void aom_highbd_lpf_vertical_4_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_lpf_vertical_4)(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-
			
 
				+-void aom_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_vertical_4_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_lpf_vertical_4_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-
			
 
				+-void aom_highbd_lpf_vertical_6_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-void aom_highbd_lpf_vertical_6_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_lpf_vertical_6)(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-
			
 
				+-void aom_highbd_lpf_vertical_6_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_vertical_6_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_lpf_vertical_6_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-
			
 
				+-void aom_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-void aom_highbd_lpf_vertical_8_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_lpf_vertical_8)(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-
			
 
				+-void aom_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_vertical_8_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_lpf_vertical_8_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_16x16 aom_highbd_paeth_predictor_16x16_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_16x32 aom_highbd_paeth_predictor_16x32_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_16x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_16x4 aom_highbd_paeth_predictor_16x4_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_16x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_16x64 aom_highbd_paeth_predictor_16x64_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_16x8 aom_highbd_paeth_predictor_16x8_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_2x2 aom_highbd_paeth_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_32x16 aom_highbd_paeth_predictor_32x16_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_32x32 aom_highbd_paeth_predictor_32x32_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_32x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_32x64 aom_highbd_paeth_predictor_32x64_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_32x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_32x8 aom_highbd_paeth_predictor_32x8_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_4x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_4x16 aom_highbd_paeth_predictor_4x16_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_4x4 aom_highbd_paeth_predictor_4x4_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_4x8 aom_highbd_paeth_predictor_4x8_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_64x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_64x16 aom_highbd_paeth_predictor_64x16_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_64x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_64x32 aom_highbd_paeth_predictor_64x32_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_64x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_64x64 aom_highbd_paeth_predictor_64x64_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_8x16 aom_highbd_paeth_predictor_8x16_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_8x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_8x32 aom_highbd_paeth_predictor_8x32_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_8x4 aom_highbd_paeth_predictor_8x4_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_8x8 aom_highbd_paeth_predictor_8x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_16x16 aom_highbd_smooth_h_predictor_16x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_16x32 aom_highbd_smooth_h_predictor_16x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_16x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_16x4 aom_highbd_smooth_h_predictor_16x4_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_16x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_16x64 aom_highbd_smooth_h_predictor_16x64_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_16x8 aom_highbd_smooth_h_predictor_16x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_2x2 aom_highbd_smooth_h_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_32x16 aom_highbd_smooth_h_predictor_32x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_32x32 aom_highbd_smooth_h_predictor_32x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_32x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_32x64 aom_highbd_smooth_h_predictor_32x64_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_32x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_32x8 aom_highbd_smooth_h_predictor_32x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_4x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_4x16 aom_highbd_smooth_h_predictor_4x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_4x4 aom_highbd_smooth_h_predictor_4x4_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_4x8 aom_highbd_smooth_h_predictor_4x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_64x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_64x16 aom_highbd_smooth_h_predictor_64x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_64x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_64x32 aom_highbd_smooth_h_predictor_64x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_64x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_64x64 aom_highbd_smooth_h_predictor_64x64_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_8x16 aom_highbd_smooth_h_predictor_8x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_8x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_8x32 aom_highbd_smooth_h_predictor_8x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_8x4 aom_highbd_smooth_h_predictor_8x4_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_8x8 aom_highbd_smooth_h_predictor_8x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_16x16 aom_highbd_smooth_predictor_16x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_16x32 aom_highbd_smooth_predictor_16x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_16x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_16x4 aom_highbd_smooth_predictor_16x4_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_16x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_16x64 aom_highbd_smooth_predictor_16x64_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_16x8 aom_highbd_smooth_predictor_16x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_2x2 aom_highbd_smooth_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_32x16 aom_highbd_smooth_predictor_32x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_32x32 aom_highbd_smooth_predictor_32x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_32x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_32x64 aom_highbd_smooth_predictor_32x64_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_32x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_32x8 aom_highbd_smooth_predictor_32x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_4x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_4x16 aom_highbd_smooth_predictor_4x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_4x4 aom_highbd_smooth_predictor_4x4_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_4x8 aom_highbd_smooth_predictor_4x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_64x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_64x16 aom_highbd_smooth_predictor_64x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_64x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_64x32 aom_highbd_smooth_predictor_64x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_64x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_64x64 aom_highbd_smooth_predictor_64x64_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_8x16 aom_highbd_smooth_predictor_8x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_8x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_8x32 aom_highbd_smooth_predictor_8x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_8x4 aom_highbd_smooth_predictor_8x4_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_8x8 aom_highbd_smooth_predictor_8x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_16x16 aom_highbd_smooth_v_predictor_16x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_16x32 aom_highbd_smooth_v_predictor_16x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_16x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_16x4 aom_highbd_smooth_v_predictor_16x4_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_16x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_16x64 aom_highbd_smooth_v_predictor_16x64_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_16x8 aom_highbd_smooth_v_predictor_16x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_2x2 aom_highbd_smooth_v_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_32x16 aom_highbd_smooth_v_predictor_32x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_32x32 aom_highbd_smooth_v_predictor_32x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_32x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_32x64 aom_highbd_smooth_v_predictor_32x64_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_32x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_32x8 aom_highbd_smooth_v_predictor_32x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_4x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_4x16 aom_highbd_smooth_v_predictor_4x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_4x4 aom_highbd_smooth_v_predictor_4x4_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_4x8 aom_highbd_smooth_v_predictor_4x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_64x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_64x16 aom_highbd_smooth_v_predictor_64x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_64x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_64x32 aom_highbd_smooth_v_predictor_64x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_64x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_64x64 aom_highbd_smooth_v_predictor_64x64_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_8x16 aom_highbd_smooth_v_predictor_8x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_8x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_8x32 aom_highbd_smooth_v_predictor_8x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_8x4 aom_highbd_smooth_v_predictor_8x4_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_8x8 aom_highbd_smooth_v_predictor_8x8_c
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_v_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_v_predictor_16x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_v_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_v_predictor_16x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_16x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_16x4 aom_highbd_v_predictor_16x4_c
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_16x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_16x64 aom_highbd_v_predictor_16x64_c
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_v_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_v_predictor_16x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_2x2 aom_highbd_v_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_v_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_v_predictor_32x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_v_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_v_predictor_32x32)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_32x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_32x64 aom_highbd_v_predictor_32x64_c
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_32x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_32x8 aom_highbd_v_predictor_32x8_c
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_4x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_4x16 aom_highbd_v_predictor_4x16_c
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_v_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_v_predictor_4x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_v_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_v_predictor_4x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_64x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_64x16 aom_highbd_v_predictor_64x16_c
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_64x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_64x32 aom_highbd_v_predictor_64x32_c
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_64x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_64x64 aom_highbd_v_predictor_64x64_c
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_v_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_v_predictor_8x16)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_8x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_8x32 aom_highbd_v_predictor_8x32_c
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_v_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_v_predictor_8x4)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_v_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_v_predictor_8x8)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-
			
 
				+-void aom_lowbd_blend_a64_d16_mask_c(uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, ConvolveParams *conv_params);
			
 
				+-void aom_lowbd_blend_a64_d16_mask_sse4_1(uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, ConvolveParams *conv_params);
			
 
				+-void aom_lowbd_blend_a64_d16_mask_avx2(uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, ConvolveParams *conv_params);
			
 
				+-RTCD_EXTERN void (*aom_lowbd_blend_a64_d16_mask)(uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, ConvolveParams *conv_params);
			
 
				+-
			
 
				+-void aom_lpf_horizontal_14_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-void aom_lpf_horizontal_14_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-RTCD_EXTERN void (*aom_lpf_horizontal_14)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-
			
 
				+-void aom_lpf_horizontal_14_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-void aom_lpf_horizontal_14_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-RTCD_EXTERN void (*aom_lpf_horizontal_14_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-
			
 
				+-void aom_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-void aom_lpf_horizontal_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-RTCD_EXTERN void (*aom_lpf_horizontal_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-
			
 
				+-void aom_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-void aom_lpf_horizontal_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-RTCD_EXTERN void (*aom_lpf_horizontal_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-
			
 
				+-void aom_lpf_horizontal_6_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-void aom_lpf_horizontal_6_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-RTCD_EXTERN void (*aom_lpf_horizontal_6)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-
			
 
				+-void aom_lpf_horizontal_6_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-void aom_lpf_horizontal_6_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-RTCD_EXTERN void (*aom_lpf_horizontal_6_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-
			
 
				+-void aom_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-void aom_lpf_horizontal_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-RTCD_EXTERN void (*aom_lpf_horizontal_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-
			
 
				+-void aom_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-void aom_lpf_horizontal_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-RTCD_EXTERN void (*aom_lpf_horizontal_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-
			
 
				+-void aom_lpf_vertical_14_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-void aom_lpf_vertical_14_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-RTCD_EXTERN void (*aom_lpf_vertical_14)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-
			
 
				+-void aom_lpf_vertical_14_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-void aom_lpf_vertical_14_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-RTCD_EXTERN void (*aom_lpf_vertical_14_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-
			
 
				+-void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-void aom_lpf_vertical_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-RTCD_EXTERN void (*aom_lpf_vertical_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-
			
 
				+-void aom_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-void aom_lpf_vertical_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-RTCD_EXTERN void (*aom_lpf_vertical_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-
			
 
				+-void aom_lpf_vertical_6_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-void aom_lpf_vertical_6_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-RTCD_EXTERN void (*aom_lpf_vertical_6)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-
			
 
				+-void aom_lpf_vertical_6_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-void aom_lpf_vertical_6_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-RTCD_EXTERN void (*aom_lpf_vertical_6_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-
			
 
				+-void aom_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-void aom_lpf_vertical_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-RTCD_EXTERN void (*aom_lpf_vertical_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-
			
 
				+-void aom_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-void aom_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-RTCD_EXTERN void (*aom_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-
			
 
				+-void aom_paeth_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_16x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_16x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_16x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_16x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_16x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_16x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_16x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_16x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_16x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_16x8_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_paeth_predictor_2x2 aom_paeth_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_paeth_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_32x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_32x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_32x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_32x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_32x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_32x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_32x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_4x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_4x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_4x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_64x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_64x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_64x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_64x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_64x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_64x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_64x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_64x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_64x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_64x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_64x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_64x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_8x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_8x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_8x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_16x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_16x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_16x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_16x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_16x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_16x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_smooth_h_predictor_2x2 aom_smooth_h_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_32x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_32x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_32x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_32x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_32x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_32x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_4x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_4x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_4x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_64x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_64x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_64x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_64x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_64x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_64x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_64x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_64x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_64x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_8x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_8x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_8x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_16x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_16x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_16x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_16x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_16x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_16x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_smooth_predictor_2x2 aom_smooth_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_smooth_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_32x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_32x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_32x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_32x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_32x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_32x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_4x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_4x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_4x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_64x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_64x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_64x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_64x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_64x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_64x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_64x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_64x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_64x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_8x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_8x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_8x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_16x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_16x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_16x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_16x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_16x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_16x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_smooth_v_predictor_2x2 aom_smooth_v_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_32x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_32x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_32x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_32x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_32x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_32x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_4x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_4x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_4x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_64x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_64x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_64x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_64x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_64x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_64x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_64x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_64x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_64x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_8x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_8x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_8x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_16x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_16x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_16x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_16x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_v_predictor_2x2 aom_v_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_v_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_32x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_32x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_32x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_32x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_32x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_4x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_4x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_64x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_64x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_64x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_64x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_64x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_64x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_64x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_64x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_64x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_64x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_8x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_8x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void av1_round_shift_array_c(int32_t *arr, int size, int bit);
			
 
				+-void av1_round_shift_array_sse4_1(int32_t *arr, int size, int bit);
			
 
				+-RTCD_EXTERN void (*av1_round_shift_array)(int32_t *arr, int size, int bit);
			
 
				+-
			
 
				+-void aom_dsp_rtcd(void);
			
 
				+-
			
 
				+-#ifdef RTCD_C
			
 
				+-#include "aom_ports/x86.h"
			
 
				+-static void setup_rtcd_internal(void)
			
 
				+-{
			
 
				+-    int flags = x86_simd_caps();
			
 
				+-
			
 
				+-    (void)flags;
			
 
				+-
			
 
				+-    aom_blend_a64_hmask = aom_blend_a64_hmask_c;
			
 
				+-    if (flags & HAS_SSE4_1) aom_blend_a64_hmask = aom_blend_a64_hmask_sse4_1;
			
 
				+-    aom_blend_a64_mask = aom_blend_a64_mask_c;
			
 
				+-    if (flags & HAS_SSE4_1) aom_blend_a64_mask = aom_blend_a64_mask_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) aom_blend_a64_mask = aom_blend_a64_mask_avx2;
			
 
				+-    aom_blend_a64_vmask = aom_blend_a64_vmask_c;
			
 
				+-    if (flags & HAS_SSE4_1) aom_blend_a64_vmask = aom_blend_a64_vmask_sse4_1;
			
 
				+-    aom_convolve8_horiz = aom_convolve8_horiz_c;
			
 
				+-    if (flags & HAS_SSE2) aom_convolve8_horiz = aom_convolve8_horiz_sse2;
			
 
				+-    if (flags & HAS_SSSE3) aom_convolve8_horiz = aom_convolve8_horiz_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_convolve8_horiz = aom_convolve8_horiz_avx2;
			
 
				+-    aom_convolve8_vert = aom_convolve8_vert_c;
			
 
				+-    if (flags & HAS_SSE2) aom_convolve8_vert = aom_convolve8_vert_sse2;
			
 
				+-    if (flags & HAS_SSSE3) aom_convolve8_vert = aom_convolve8_vert_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_convolve8_vert = aom_convolve8_vert_avx2;
			
 
				+-    aom_convolve_copy = aom_convolve_copy_c;
			
 
				+-    if (flags & HAS_SSE2) aom_convolve_copy = aom_convolve_copy_sse2;
			
 
				+-    aom_dc_128_predictor_16x16 = aom_dc_128_predictor_16x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_128_predictor_16x16 = aom_dc_128_predictor_16x16_sse2;
			
 
				+-    aom_dc_128_predictor_16x32 = aom_dc_128_predictor_16x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_128_predictor_16x32 = aom_dc_128_predictor_16x32_sse2;
			
 
				+-    aom_dc_128_predictor_16x4 = aom_dc_128_predictor_16x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_128_predictor_16x4 = aom_dc_128_predictor_16x4_sse2;
			
 
				+-    aom_dc_128_predictor_16x64 = aom_dc_128_predictor_16x64_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_128_predictor_16x64 = aom_dc_128_predictor_16x64_sse2;
			
 
				+-    aom_dc_128_predictor_16x8 = aom_dc_128_predictor_16x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_128_predictor_16x8 = aom_dc_128_predictor_16x8_sse2;
			
 
				+-    aom_dc_128_predictor_32x16 = aom_dc_128_predictor_32x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_128_predictor_32x16 = aom_dc_128_predictor_32x16_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_128_predictor_32x16 = aom_dc_128_predictor_32x16_avx2;
			
 
				+-    aom_dc_128_predictor_32x32 = aom_dc_128_predictor_32x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_128_predictor_32x32 = aom_dc_128_predictor_32x32_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_128_predictor_32x32 = aom_dc_128_predictor_32x32_avx2;
			
 
				+-    aom_dc_128_predictor_32x64 = aom_dc_128_predictor_32x64_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_128_predictor_32x64 = aom_dc_128_predictor_32x64_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_128_predictor_32x64 = aom_dc_128_predictor_32x64_avx2;
			
 
				+-    aom_dc_128_predictor_32x8 = aom_dc_128_predictor_32x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_128_predictor_32x8 = aom_dc_128_predictor_32x8_sse2;
			
 
				+-    aom_dc_128_predictor_4x16 = aom_dc_128_predictor_4x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_128_predictor_4x16 = aom_dc_128_predictor_4x16_sse2;
			
 
				+-    aom_dc_128_predictor_4x4 = aom_dc_128_predictor_4x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_128_predictor_4x4 = aom_dc_128_predictor_4x4_sse2;
			
 
				+-    aom_dc_128_predictor_4x8 = aom_dc_128_predictor_4x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_128_predictor_4x8 = aom_dc_128_predictor_4x8_sse2;
			
 
				+-    aom_dc_128_predictor_64x16 = aom_dc_128_predictor_64x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_128_predictor_64x16 = aom_dc_128_predictor_64x16_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_128_predictor_64x16 = aom_dc_128_predictor_64x16_avx2;
			
 
				+-    aom_dc_128_predictor_64x32 = aom_dc_128_predictor_64x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_128_predictor_64x32 = aom_dc_128_predictor_64x32_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_128_predictor_64x32 = aom_dc_128_predictor_64x32_avx2;
			
 
				+-    aom_dc_128_predictor_64x64 = aom_dc_128_predictor_64x64_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_128_predictor_64x64 = aom_dc_128_predictor_64x64_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_128_predictor_64x64 = aom_dc_128_predictor_64x64_avx2;
			
 
				+-    aom_dc_128_predictor_8x16 = aom_dc_128_predictor_8x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_128_predictor_8x16 = aom_dc_128_predictor_8x16_sse2;
			
 
				+-    aom_dc_128_predictor_8x32 = aom_dc_128_predictor_8x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_128_predictor_8x32 = aom_dc_128_predictor_8x32_sse2;
			
 
				+-    aom_dc_128_predictor_8x4 = aom_dc_128_predictor_8x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_128_predictor_8x4 = aom_dc_128_predictor_8x4_sse2;
			
 
				+-    aom_dc_128_predictor_8x8 = aom_dc_128_predictor_8x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_128_predictor_8x8 = aom_dc_128_predictor_8x8_sse2;
			
 
				+-    aom_dc_left_predictor_16x16 = aom_dc_left_predictor_16x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_left_predictor_16x16 = aom_dc_left_predictor_16x16_sse2;
			
 
				+-    aom_dc_left_predictor_16x32 = aom_dc_left_predictor_16x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_left_predictor_16x32 = aom_dc_left_predictor_16x32_sse2;
			
 
				+-    aom_dc_left_predictor_16x4 = aom_dc_left_predictor_16x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_left_predictor_16x4 = aom_dc_left_predictor_16x4_sse2;
			
 
				+-    aom_dc_left_predictor_16x64 = aom_dc_left_predictor_16x64_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_left_predictor_16x64 = aom_dc_left_predictor_16x64_sse2;
			
 
				+-    aom_dc_left_predictor_16x8 = aom_dc_left_predictor_16x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_left_predictor_16x8 = aom_dc_left_predictor_16x8_sse2;
			
 
				+-    aom_dc_left_predictor_32x16 = aom_dc_left_predictor_32x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_left_predictor_32x16 = aom_dc_left_predictor_32x16_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_left_predictor_32x16 = aom_dc_left_predictor_32x16_avx2;
			
 
				+-    aom_dc_left_predictor_32x32 = aom_dc_left_predictor_32x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_left_predictor_32x32 = aom_dc_left_predictor_32x32_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_left_predictor_32x32 = aom_dc_left_predictor_32x32_avx2;
			
 
				+-    aom_dc_left_predictor_32x64 = aom_dc_left_predictor_32x64_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_left_predictor_32x64 = aom_dc_left_predictor_32x64_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_left_predictor_32x64 = aom_dc_left_predictor_32x64_avx2;
			
 
				+-    aom_dc_left_predictor_32x8 = aom_dc_left_predictor_32x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_left_predictor_32x8 = aom_dc_left_predictor_32x8_sse2;
			
 
				+-    aom_dc_left_predictor_4x16 = aom_dc_left_predictor_4x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_left_predictor_4x16 = aom_dc_left_predictor_4x16_sse2;
			
 
				+-    aom_dc_left_predictor_4x4 = aom_dc_left_predictor_4x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_left_predictor_4x4 = aom_dc_left_predictor_4x4_sse2;
			
 
				+-    aom_dc_left_predictor_4x8 = aom_dc_left_predictor_4x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_left_predictor_4x8 = aom_dc_left_predictor_4x8_sse2;
			
 
				+-    aom_dc_left_predictor_64x16 = aom_dc_left_predictor_64x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_left_predictor_64x16 = aom_dc_left_predictor_64x16_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_left_predictor_64x16 = aom_dc_left_predictor_64x16_avx2;
			
 
				+-    aom_dc_left_predictor_64x32 = aom_dc_left_predictor_64x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_left_predictor_64x32 = aom_dc_left_predictor_64x32_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_left_predictor_64x32 = aom_dc_left_predictor_64x32_avx2;
			
 
				+-    aom_dc_left_predictor_64x64 = aom_dc_left_predictor_64x64_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_left_predictor_64x64 = aom_dc_left_predictor_64x64_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_left_predictor_64x64 = aom_dc_left_predictor_64x64_avx2;
			
 
				+-    aom_dc_left_predictor_8x16 = aom_dc_left_predictor_8x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_left_predictor_8x16 = aom_dc_left_predictor_8x16_sse2;
			
 
				+-    aom_dc_left_predictor_8x32 = aom_dc_left_predictor_8x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_left_predictor_8x32 = aom_dc_left_predictor_8x32_sse2;
			
 
				+-    aom_dc_left_predictor_8x4 = aom_dc_left_predictor_8x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_left_predictor_8x4 = aom_dc_left_predictor_8x4_sse2;
			
 
				+-    aom_dc_left_predictor_8x8 = aom_dc_left_predictor_8x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_left_predictor_8x8 = aom_dc_left_predictor_8x8_sse2;
			
 
				+-    aom_dc_predictor_16x16 = aom_dc_predictor_16x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_predictor_16x16 = aom_dc_predictor_16x16_sse2;
			
 
				+-    aom_dc_predictor_16x32 = aom_dc_predictor_16x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_predictor_16x32 = aom_dc_predictor_16x32_sse2;
			
 
				+-    aom_dc_predictor_16x4 = aom_dc_predictor_16x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_predictor_16x4 = aom_dc_predictor_16x4_sse2;
			
 
				+-    aom_dc_predictor_16x64 = aom_dc_predictor_16x64_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_predictor_16x64 = aom_dc_predictor_16x64_sse2;
			
 
				+-    aom_dc_predictor_16x8 = aom_dc_predictor_16x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_predictor_16x8 = aom_dc_predictor_16x8_sse2;
			
 
				+-    aom_dc_predictor_32x16 = aom_dc_predictor_32x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_predictor_32x16 = aom_dc_predictor_32x16_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_predictor_32x16 = aom_dc_predictor_32x16_avx2;
			
 
				+-    aom_dc_predictor_32x32 = aom_dc_predictor_32x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_predictor_32x32 = aom_dc_predictor_32x32_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_predictor_32x32 = aom_dc_predictor_32x32_avx2;
			
 
				+-    aom_dc_predictor_32x64 = aom_dc_predictor_32x64_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_predictor_32x64 = aom_dc_predictor_32x64_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_predictor_32x64 = aom_dc_predictor_32x64_avx2;
			
 
				+-    aom_dc_predictor_32x8 = aom_dc_predictor_32x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_predictor_32x8 = aom_dc_predictor_32x8_sse2;
			
 
				+-    aom_dc_predictor_4x16 = aom_dc_predictor_4x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_predictor_4x16 = aom_dc_predictor_4x16_sse2;
			
 
				+-    aom_dc_predictor_4x4 = aom_dc_predictor_4x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_predictor_4x4 = aom_dc_predictor_4x4_sse2;
			
 
				+-    aom_dc_predictor_4x8 = aom_dc_predictor_4x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_predictor_4x8 = aom_dc_predictor_4x8_sse2;
			
 
				+-    aom_dc_predictor_64x16 = aom_dc_predictor_64x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_predictor_64x16 = aom_dc_predictor_64x16_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_predictor_64x16 = aom_dc_predictor_64x16_avx2;
			
 
				+-    aom_dc_predictor_64x32 = aom_dc_predictor_64x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_predictor_64x32 = aom_dc_predictor_64x32_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_predictor_64x32 = aom_dc_predictor_64x32_avx2;
			
 
				+-    aom_dc_predictor_64x64 = aom_dc_predictor_64x64_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_predictor_64x64 = aom_dc_predictor_64x64_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_predictor_64x64 = aom_dc_predictor_64x64_avx2;
			
 
				+-    aom_dc_predictor_8x16 = aom_dc_predictor_8x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_predictor_8x16 = aom_dc_predictor_8x16_sse2;
			
 
				+-    aom_dc_predictor_8x32 = aom_dc_predictor_8x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_predictor_8x32 = aom_dc_predictor_8x32_sse2;
			
 
				+-    aom_dc_predictor_8x4 = aom_dc_predictor_8x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_predictor_8x4 = aom_dc_predictor_8x4_sse2;
			
 
				+-    aom_dc_predictor_8x8 = aom_dc_predictor_8x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_predictor_8x8 = aom_dc_predictor_8x8_sse2;
			
 
				+-    aom_dc_top_predictor_16x16 = aom_dc_top_predictor_16x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_top_predictor_16x16 = aom_dc_top_predictor_16x16_sse2;
			
 
				+-    aom_dc_top_predictor_16x32 = aom_dc_top_predictor_16x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_top_predictor_16x32 = aom_dc_top_predictor_16x32_sse2;
			
 
				+-    aom_dc_top_predictor_16x4 = aom_dc_top_predictor_16x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_top_predictor_16x4 = aom_dc_top_predictor_16x4_sse2;
			
 
				+-    aom_dc_top_predictor_16x64 = aom_dc_top_predictor_16x64_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_top_predictor_16x64 = aom_dc_top_predictor_16x64_sse2;
			
 
				+-    aom_dc_top_predictor_16x8 = aom_dc_top_predictor_16x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_top_predictor_16x8 = aom_dc_top_predictor_16x8_sse2;
			
 
				+-    aom_dc_top_predictor_32x16 = aom_dc_top_predictor_32x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_top_predictor_32x16 = aom_dc_top_predictor_32x16_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_top_predictor_32x16 = aom_dc_top_predictor_32x16_avx2;
			
 
				+-    aom_dc_top_predictor_32x32 = aom_dc_top_predictor_32x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_top_predictor_32x32 = aom_dc_top_predictor_32x32_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_top_predictor_32x32 = aom_dc_top_predictor_32x32_avx2;
			
 
				+-    aom_dc_top_predictor_32x64 = aom_dc_top_predictor_32x64_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_top_predictor_32x64 = aom_dc_top_predictor_32x64_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_top_predictor_32x64 = aom_dc_top_predictor_32x64_avx2;
			
 
				+-    aom_dc_top_predictor_32x8 = aom_dc_top_predictor_32x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_top_predictor_32x8 = aom_dc_top_predictor_32x8_sse2;
			
 
				+-    aom_dc_top_predictor_4x16 = aom_dc_top_predictor_4x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_top_predictor_4x16 = aom_dc_top_predictor_4x16_sse2;
			
 
				+-    aom_dc_top_predictor_4x4 = aom_dc_top_predictor_4x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_top_predictor_4x4 = aom_dc_top_predictor_4x4_sse2;
			
 
				+-    aom_dc_top_predictor_4x8 = aom_dc_top_predictor_4x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_top_predictor_4x8 = aom_dc_top_predictor_4x8_sse2;
			
 
				+-    aom_dc_top_predictor_64x16 = aom_dc_top_predictor_64x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_top_predictor_64x16 = aom_dc_top_predictor_64x16_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_top_predictor_64x16 = aom_dc_top_predictor_64x16_avx2;
			
 
				+-    aom_dc_top_predictor_64x32 = aom_dc_top_predictor_64x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_top_predictor_64x32 = aom_dc_top_predictor_64x32_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_top_predictor_64x32 = aom_dc_top_predictor_64x32_avx2;
			
 
				+-    aom_dc_top_predictor_64x64 = aom_dc_top_predictor_64x64_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_top_predictor_64x64 = aom_dc_top_predictor_64x64_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_top_predictor_64x64 = aom_dc_top_predictor_64x64_avx2;
			
 
				+-    aom_dc_top_predictor_8x16 = aom_dc_top_predictor_8x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_top_predictor_8x16 = aom_dc_top_predictor_8x16_sse2;
			
 
				+-    aom_dc_top_predictor_8x32 = aom_dc_top_predictor_8x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_top_predictor_8x32 = aom_dc_top_predictor_8x32_sse2;
			
 
				+-    aom_dc_top_predictor_8x4 = aom_dc_top_predictor_8x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_top_predictor_8x4 = aom_dc_top_predictor_8x4_sse2;
			
 
				+-    aom_dc_top_predictor_8x8 = aom_dc_top_predictor_8x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_dc_top_predictor_8x8 = aom_dc_top_predictor_8x8_sse2;
			
 
				+-    aom_h_predictor_16x16 = aom_h_predictor_16x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_h_predictor_16x16 = aom_h_predictor_16x16_sse2;
			
 
				+-    aom_h_predictor_16x32 = aom_h_predictor_16x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_h_predictor_16x32 = aom_h_predictor_16x32_sse2;
			
 
				+-    aom_h_predictor_16x4 = aom_h_predictor_16x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_h_predictor_16x4 = aom_h_predictor_16x4_sse2;
			
 
				+-    aom_h_predictor_16x64 = aom_h_predictor_16x64_c;
			
 
				+-    if (flags & HAS_SSE2) aom_h_predictor_16x64 = aom_h_predictor_16x64_sse2;
			
 
				+-    aom_h_predictor_16x8 = aom_h_predictor_16x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_h_predictor_16x8 = aom_h_predictor_16x8_sse2;
			
 
				+-    aom_h_predictor_32x16 = aom_h_predictor_32x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_h_predictor_32x16 = aom_h_predictor_32x16_sse2;
			
 
				+-    aom_h_predictor_32x32 = aom_h_predictor_32x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_h_predictor_32x32 = aom_h_predictor_32x32_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_h_predictor_32x32 = aom_h_predictor_32x32_avx2;
			
 
				+-    aom_h_predictor_32x64 = aom_h_predictor_32x64_c;
			
 
				+-    if (flags & HAS_SSE2) aom_h_predictor_32x64 = aom_h_predictor_32x64_sse2;
			
 
				+-    aom_h_predictor_32x8 = aom_h_predictor_32x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_h_predictor_32x8 = aom_h_predictor_32x8_sse2;
			
 
				+-    aom_h_predictor_4x16 = aom_h_predictor_4x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_h_predictor_4x16 = aom_h_predictor_4x16_sse2;
			
 
				+-    aom_h_predictor_4x4 = aom_h_predictor_4x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_h_predictor_4x4 = aom_h_predictor_4x4_sse2;
			
 
				+-    aom_h_predictor_4x8 = aom_h_predictor_4x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_h_predictor_4x8 = aom_h_predictor_4x8_sse2;
			
 
				+-    aom_h_predictor_64x16 = aom_h_predictor_64x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_h_predictor_64x16 = aom_h_predictor_64x16_sse2;
			
 
				+-    aom_h_predictor_64x32 = aom_h_predictor_64x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_h_predictor_64x32 = aom_h_predictor_64x32_sse2;
			
 
				+-    aom_h_predictor_64x64 = aom_h_predictor_64x64_c;
			
 
				+-    if (flags & HAS_SSE2) aom_h_predictor_64x64 = aom_h_predictor_64x64_sse2;
			
 
				+-    aom_h_predictor_8x16 = aom_h_predictor_8x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_h_predictor_8x16 = aom_h_predictor_8x16_sse2;
			
 
				+-    aom_h_predictor_8x32 = aom_h_predictor_8x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_h_predictor_8x32 = aom_h_predictor_8x32_sse2;
			
 
				+-    aom_h_predictor_8x4 = aom_h_predictor_8x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_h_predictor_8x4 = aom_h_predictor_8x4_sse2;
			
 
				+-    aom_h_predictor_8x8 = aom_h_predictor_8x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_h_predictor_8x8 = aom_h_predictor_8x8_sse2;
			
 
				+-    aom_highbd_blend_a64_hmask = aom_highbd_blend_a64_hmask_c;
			
 
				+-    if (flags & HAS_SSE4_1) aom_highbd_blend_a64_hmask = aom_highbd_blend_a64_hmask_sse4_1;
			
 
				+-    aom_highbd_blend_a64_mask = aom_highbd_blend_a64_mask_c;
			
 
				+-    if (flags & HAS_SSE4_1) aom_highbd_blend_a64_mask = aom_highbd_blend_a64_mask_sse4_1;
			
 
				+-    aom_highbd_blend_a64_vmask = aom_highbd_blend_a64_vmask_c;
			
 
				+-    if (flags & HAS_SSE4_1) aom_highbd_blend_a64_vmask = aom_highbd_blend_a64_vmask_sse4_1;
			
 
				+-    aom_highbd_convolve8_horiz = aom_highbd_convolve8_horiz_c;
			
 
				+-    if (flags & HAS_AVX2) aom_highbd_convolve8_horiz = aom_highbd_convolve8_horiz_avx2;
			
 
				+-    aom_highbd_convolve8_vert = aom_highbd_convolve8_vert_c;
			
 
				+-    if (flags & HAS_AVX2) aom_highbd_convolve8_vert = aom_highbd_convolve8_vert_avx2;
			
 
				+-    aom_highbd_convolve_copy = aom_highbd_convolve_copy_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_convolve_copy = aom_highbd_convolve_copy_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_highbd_convolve_copy = aom_highbd_convolve_copy_avx2;
			
 
				+-    aom_highbd_dc_128_predictor_16x16 = aom_highbd_dc_128_predictor_16x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_16x16 = aom_highbd_dc_128_predictor_16x16_sse2;
			
 
				+-    aom_highbd_dc_128_predictor_16x32 = aom_highbd_dc_128_predictor_16x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_16x32 = aom_highbd_dc_128_predictor_16x32_sse2;
			
 
				+-    aom_highbd_dc_128_predictor_16x8 = aom_highbd_dc_128_predictor_16x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_16x8 = aom_highbd_dc_128_predictor_16x8_sse2;
			
 
				+-    aom_highbd_dc_128_predictor_32x16 = aom_highbd_dc_128_predictor_32x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_32x16 = aom_highbd_dc_128_predictor_32x16_sse2;
			
 
				+-    aom_highbd_dc_128_predictor_32x32 = aom_highbd_dc_128_predictor_32x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_32x32 = aom_highbd_dc_128_predictor_32x32_sse2;
			
 
				+-    aom_highbd_dc_128_predictor_4x4 = aom_highbd_dc_128_predictor_4x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_4x4 = aom_highbd_dc_128_predictor_4x4_sse2;
			
 
				+-    aom_highbd_dc_128_predictor_4x8 = aom_highbd_dc_128_predictor_4x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_4x8 = aom_highbd_dc_128_predictor_4x8_sse2;
			
 
				+-    aom_highbd_dc_128_predictor_8x16 = aom_highbd_dc_128_predictor_8x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_8x16 = aom_highbd_dc_128_predictor_8x16_sse2;
			
 
				+-    aom_highbd_dc_128_predictor_8x4 = aom_highbd_dc_128_predictor_8x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_8x4 = aom_highbd_dc_128_predictor_8x4_sse2;
			
 
				+-    aom_highbd_dc_128_predictor_8x8 = aom_highbd_dc_128_predictor_8x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_128_predictor_8x8 = aom_highbd_dc_128_predictor_8x8_sse2;
			
 
				+-    aom_highbd_dc_left_predictor_16x16 = aom_highbd_dc_left_predictor_16x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_16x16 = aom_highbd_dc_left_predictor_16x16_sse2;
			
 
				+-    aom_highbd_dc_left_predictor_16x32 = aom_highbd_dc_left_predictor_16x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_16x32 = aom_highbd_dc_left_predictor_16x32_sse2;
			
 
				+-    aom_highbd_dc_left_predictor_16x8 = aom_highbd_dc_left_predictor_16x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_16x8 = aom_highbd_dc_left_predictor_16x8_sse2;
			
 
				+-    aom_highbd_dc_left_predictor_32x16 = aom_highbd_dc_left_predictor_32x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_32x16 = aom_highbd_dc_left_predictor_32x16_sse2;
			
 
				+-    aom_highbd_dc_left_predictor_32x32 = aom_highbd_dc_left_predictor_32x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_32x32 = aom_highbd_dc_left_predictor_32x32_sse2;
			
 
				+-    aom_highbd_dc_left_predictor_4x4 = aom_highbd_dc_left_predictor_4x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_4x4 = aom_highbd_dc_left_predictor_4x4_sse2;
			
 
				+-    aom_highbd_dc_left_predictor_4x8 = aom_highbd_dc_left_predictor_4x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_4x8 = aom_highbd_dc_left_predictor_4x8_sse2;
			
 
				+-    aom_highbd_dc_left_predictor_8x16 = aom_highbd_dc_left_predictor_8x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_8x16 = aom_highbd_dc_left_predictor_8x16_sse2;
			
 
				+-    aom_highbd_dc_left_predictor_8x4 = aom_highbd_dc_left_predictor_8x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_8x4 = aom_highbd_dc_left_predictor_8x4_sse2;
			
 
				+-    aom_highbd_dc_left_predictor_8x8 = aom_highbd_dc_left_predictor_8x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_left_predictor_8x8 = aom_highbd_dc_left_predictor_8x8_sse2;
			
 
				+-    aom_highbd_dc_predictor_16x16 = aom_highbd_dc_predictor_16x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_predictor_16x16 = aom_highbd_dc_predictor_16x16_sse2;
			
 
				+-    aom_highbd_dc_predictor_16x32 = aom_highbd_dc_predictor_16x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_predictor_16x32 = aom_highbd_dc_predictor_16x32_sse2;
			
 
				+-    aom_highbd_dc_predictor_16x8 = aom_highbd_dc_predictor_16x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_predictor_16x8 = aom_highbd_dc_predictor_16x8_sse2;
			
 
				+-    aom_highbd_dc_predictor_32x16 = aom_highbd_dc_predictor_32x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_predictor_32x16 = aom_highbd_dc_predictor_32x16_sse2;
			
 
				+-    aom_highbd_dc_predictor_32x32 = aom_highbd_dc_predictor_32x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_predictor_32x32 = aom_highbd_dc_predictor_32x32_sse2;
			
 
				+-    aom_highbd_dc_predictor_4x4 = aom_highbd_dc_predictor_4x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_predictor_4x4 = aom_highbd_dc_predictor_4x4_sse2;
			
 
				+-    aom_highbd_dc_predictor_4x8 = aom_highbd_dc_predictor_4x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_predictor_4x8 = aom_highbd_dc_predictor_4x8_sse2;
			
 
				+-    aom_highbd_dc_predictor_8x16 = aom_highbd_dc_predictor_8x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_predictor_8x16 = aom_highbd_dc_predictor_8x16_sse2;
			
 
				+-    aom_highbd_dc_predictor_8x4 = aom_highbd_dc_predictor_8x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_predictor_8x4 = aom_highbd_dc_predictor_8x4_sse2;
			
 
				+-    aom_highbd_dc_predictor_8x8 = aom_highbd_dc_predictor_8x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_predictor_8x8 = aom_highbd_dc_predictor_8x8_sse2;
			
 
				+-    aom_highbd_dc_top_predictor_16x16 = aom_highbd_dc_top_predictor_16x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_16x16 = aom_highbd_dc_top_predictor_16x16_sse2;
			
 
				+-    aom_highbd_dc_top_predictor_16x32 = aom_highbd_dc_top_predictor_16x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_16x32 = aom_highbd_dc_top_predictor_16x32_sse2;
			
 
				+-    aom_highbd_dc_top_predictor_16x8 = aom_highbd_dc_top_predictor_16x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_16x8 = aom_highbd_dc_top_predictor_16x8_sse2;
			
 
				+-    aom_highbd_dc_top_predictor_32x16 = aom_highbd_dc_top_predictor_32x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_32x16 = aom_highbd_dc_top_predictor_32x16_sse2;
			
 
				+-    aom_highbd_dc_top_predictor_32x32 = aom_highbd_dc_top_predictor_32x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_32x32 = aom_highbd_dc_top_predictor_32x32_sse2;
			
 
				+-    aom_highbd_dc_top_predictor_4x4 = aom_highbd_dc_top_predictor_4x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_4x4 = aom_highbd_dc_top_predictor_4x4_sse2;
			
 
				+-    aom_highbd_dc_top_predictor_4x8 = aom_highbd_dc_top_predictor_4x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_4x8 = aom_highbd_dc_top_predictor_4x8_sse2;
			
 
				+-    aom_highbd_dc_top_predictor_8x16 = aom_highbd_dc_top_predictor_8x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_8x16 = aom_highbd_dc_top_predictor_8x16_sse2;
			
 
				+-    aom_highbd_dc_top_predictor_8x4 = aom_highbd_dc_top_predictor_8x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_8x4 = aom_highbd_dc_top_predictor_8x4_sse2;
			
 
				+-    aom_highbd_dc_top_predictor_8x8 = aom_highbd_dc_top_predictor_8x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_dc_top_predictor_8x8 = aom_highbd_dc_top_predictor_8x8_sse2;
			
 
				+-    aom_highbd_h_predictor_16x16 = aom_highbd_h_predictor_16x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_h_predictor_16x16 = aom_highbd_h_predictor_16x16_sse2;
			
 
				+-    aom_highbd_h_predictor_16x32 = aom_highbd_h_predictor_16x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_h_predictor_16x32 = aom_highbd_h_predictor_16x32_sse2;
			
 
				+-    aom_highbd_h_predictor_16x8 = aom_highbd_h_predictor_16x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_h_predictor_16x8 = aom_highbd_h_predictor_16x8_sse2;
			
 
				+-    aom_highbd_h_predictor_32x16 = aom_highbd_h_predictor_32x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_h_predictor_32x16 = aom_highbd_h_predictor_32x16_sse2;
			
 
				+-    aom_highbd_h_predictor_32x32 = aom_highbd_h_predictor_32x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_h_predictor_32x32 = aom_highbd_h_predictor_32x32_sse2;
			
 
				+-    aom_highbd_h_predictor_4x4 = aom_highbd_h_predictor_4x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_h_predictor_4x4 = aom_highbd_h_predictor_4x4_sse2;
			
 
				+-    aom_highbd_h_predictor_4x8 = aom_highbd_h_predictor_4x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_h_predictor_4x8 = aom_highbd_h_predictor_4x8_sse2;
			
 
				+-    aom_highbd_h_predictor_8x16 = aom_highbd_h_predictor_8x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_h_predictor_8x16 = aom_highbd_h_predictor_8x16_sse2;
			
 
				+-    aom_highbd_h_predictor_8x4 = aom_highbd_h_predictor_8x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_h_predictor_8x4 = aom_highbd_h_predictor_8x4_sse2;
			
 
				+-    aom_highbd_h_predictor_8x8 = aom_highbd_h_predictor_8x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_h_predictor_8x8 = aom_highbd_h_predictor_8x8_sse2;
			
 
				+-    aom_highbd_lpf_horizontal_14 = aom_highbd_lpf_horizontal_14_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_14 = aom_highbd_lpf_horizontal_14_sse2;
			
 
				+-    aom_highbd_lpf_horizontal_14_dual = aom_highbd_lpf_horizontal_14_dual_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_14_dual = aom_highbd_lpf_horizontal_14_dual_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_14_dual = aom_highbd_lpf_horizontal_14_dual_avx2;
			
 
				+-    aom_highbd_lpf_horizontal_4 = aom_highbd_lpf_horizontal_4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_4 = aom_highbd_lpf_horizontal_4_sse2;
			
 
				+-    aom_highbd_lpf_horizontal_4_dual = aom_highbd_lpf_horizontal_4_dual_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_4_dual = aom_highbd_lpf_horizontal_4_dual_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_4_dual = aom_highbd_lpf_horizontal_4_dual_avx2;
			
 
				+-    aom_highbd_lpf_horizontal_6 = aom_highbd_lpf_horizontal_6_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_6 = aom_highbd_lpf_horizontal_6_sse2;
			
 
				+-    aom_highbd_lpf_horizontal_6_dual = aom_highbd_lpf_horizontal_6_dual_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_6_dual = aom_highbd_lpf_horizontal_6_dual_sse2;
			
 
				+-    aom_highbd_lpf_horizontal_8 = aom_highbd_lpf_horizontal_8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_8 = aom_highbd_lpf_horizontal_8_sse2;
			
 
				+-    aom_highbd_lpf_horizontal_8_dual = aom_highbd_lpf_horizontal_8_dual_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_lpf_horizontal_8_dual = aom_highbd_lpf_horizontal_8_dual_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_8_dual = aom_highbd_lpf_horizontal_8_dual_avx2;
			
 
				+-    aom_highbd_lpf_vertical_14 = aom_highbd_lpf_vertical_14_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_lpf_vertical_14 = aom_highbd_lpf_vertical_14_sse2;
			
 
				+-    aom_highbd_lpf_vertical_14_dual = aom_highbd_lpf_vertical_14_dual_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_lpf_vertical_14_dual = aom_highbd_lpf_vertical_14_dual_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_highbd_lpf_vertical_14_dual = aom_highbd_lpf_vertical_14_dual_avx2;
			
 
				+-    aom_highbd_lpf_vertical_4 = aom_highbd_lpf_vertical_4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_lpf_vertical_4 = aom_highbd_lpf_vertical_4_sse2;
			
 
				+-    aom_highbd_lpf_vertical_4_dual = aom_highbd_lpf_vertical_4_dual_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_lpf_vertical_4_dual = aom_highbd_lpf_vertical_4_dual_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_highbd_lpf_vertical_4_dual = aom_highbd_lpf_vertical_4_dual_avx2;
			
 
				+-    aom_highbd_lpf_vertical_6 = aom_highbd_lpf_vertical_6_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_lpf_vertical_6 = aom_highbd_lpf_vertical_6_sse2;
			
 
				+-    aom_highbd_lpf_vertical_6_dual = aom_highbd_lpf_vertical_6_dual_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_lpf_vertical_6_dual = aom_highbd_lpf_vertical_6_dual_sse2;
			
 
				+-    aom_highbd_lpf_vertical_8 = aom_highbd_lpf_vertical_8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_lpf_vertical_8 = aom_highbd_lpf_vertical_8_sse2;
			
 
				+-    aom_highbd_lpf_vertical_8_dual = aom_highbd_lpf_vertical_8_dual_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_lpf_vertical_8_dual = aom_highbd_lpf_vertical_8_dual_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_highbd_lpf_vertical_8_dual = aom_highbd_lpf_vertical_8_dual_avx2;
			
 
				+-    aom_highbd_v_predictor_16x16 = aom_highbd_v_predictor_16x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_v_predictor_16x16 = aom_highbd_v_predictor_16x16_sse2;
			
 
				+-    aom_highbd_v_predictor_16x32 = aom_highbd_v_predictor_16x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_v_predictor_16x32 = aom_highbd_v_predictor_16x32_sse2;
			
 
				+-    aom_highbd_v_predictor_16x8 = aom_highbd_v_predictor_16x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_v_predictor_16x8 = aom_highbd_v_predictor_16x8_sse2;
			
 
				+-    aom_highbd_v_predictor_32x16 = aom_highbd_v_predictor_32x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_v_predictor_32x16 = aom_highbd_v_predictor_32x16_sse2;
			
 
				+-    aom_highbd_v_predictor_32x32 = aom_highbd_v_predictor_32x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_v_predictor_32x32 = aom_highbd_v_predictor_32x32_sse2;
			
 
				+-    aom_highbd_v_predictor_4x4 = aom_highbd_v_predictor_4x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_v_predictor_4x4 = aom_highbd_v_predictor_4x4_sse2;
			
 
				+-    aom_highbd_v_predictor_4x8 = aom_highbd_v_predictor_4x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_v_predictor_4x8 = aom_highbd_v_predictor_4x8_sse2;
			
 
				+-    aom_highbd_v_predictor_8x16 = aom_highbd_v_predictor_8x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_v_predictor_8x16 = aom_highbd_v_predictor_8x16_sse2;
			
 
				+-    aom_highbd_v_predictor_8x4 = aom_highbd_v_predictor_8x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_v_predictor_8x4 = aom_highbd_v_predictor_8x4_sse2;
			
 
				+-    aom_highbd_v_predictor_8x8 = aom_highbd_v_predictor_8x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_highbd_v_predictor_8x8 = aom_highbd_v_predictor_8x8_sse2;
			
 
				+-    aom_lowbd_blend_a64_d16_mask = aom_lowbd_blend_a64_d16_mask_c;
			
 
				+-    if (flags & HAS_SSE4_1) aom_lowbd_blend_a64_d16_mask = aom_lowbd_blend_a64_d16_mask_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) aom_lowbd_blend_a64_d16_mask = aom_lowbd_blend_a64_d16_mask_avx2;
			
 
				+-    aom_lpf_horizontal_14 = aom_lpf_horizontal_14_c;
			
 
				+-    if (flags & HAS_SSE2) aom_lpf_horizontal_14 = aom_lpf_horizontal_14_sse2;
			
 
				+-    aom_lpf_horizontal_14_dual = aom_lpf_horizontal_14_dual_c;
			
 
				+-    if (flags & HAS_SSE2) aom_lpf_horizontal_14_dual = aom_lpf_horizontal_14_dual_sse2;
			
 
				+-    aom_lpf_horizontal_4 = aom_lpf_horizontal_4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_lpf_horizontal_4 = aom_lpf_horizontal_4_sse2;
			
 
				+-    aom_lpf_horizontal_4_dual = aom_lpf_horizontal_4_dual_c;
			
 
				+-    if (flags & HAS_SSE2) aom_lpf_horizontal_4_dual = aom_lpf_horizontal_4_dual_sse2;
			
 
				+-    aom_lpf_horizontal_6 = aom_lpf_horizontal_6_c;
			
 
				+-    if (flags & HAS_SSE2) aom_lpf_horizontal_6 = aom_lpf_horizontal_6_sse2;
			
 
				+-    aom_lpf_horizontal_6_dual = aom_lpf_horizontal_6_dual_c;
			
 
				+-    if (flags & HAS_SSE2) aom_lpf_horizontal_6_dual = aom_lpf_horizontal_6_dual_sse2;
			
 
				+-    aom_lpf_horizontal_8 = aom_lpf_horizontal_8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_lpf_horizontal_8 = aom_lpf_horizontal_8_sse2;
			
 
				+-    aom_lpf_horizontal_8_dual = aom_lpf_horizontal_8_dual_c;
			
 
				+-    if (flags & HAS_SSE2) aom_lpf_horizontal_8_dual = aom_lpf_horizontal_8_dual_sse2;
			
 
				+-    aom_lpf_vertical_14 = aom_lpf_vertical_14_c;
			
 
				+-    if (flags & HAS_SSE2) aom_lpf_vertical_14 = aom_lpf_vertical_14_sse2;
			
 
				+-    aom_lpf_vertical_14_dual = aom_lpf_vertical_14_dual_c;
			
 
				+-    if (flags & HAS_SSE2) aom_lpf_vertical_14_dual = aom_lpf_vertical_14_dual_sse2;
			
 
				+-    aom_lpf_vertical_4 = aom_lpf_vertical_4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_lpf_vertical_4 = aom_lpf_vertical_4_sse2;
			
 
				+-    aom_lpf_vertical_4_dual = aom_lpf_vertical_4_dual_c;
			
 
				+-    if (flags & HAS_SSE2) aom_lpf_vertical_4_dual = aom_lpf_vertical_4_dual_sse2;
			
 
				+-    aom_lpf_vertical_6 = aom_lpf_vertical_6_c;
			
 
				+-    if (flags & HAS_SSE2) aom_lpf_vertical_6 = aom_lpf_vertical_6_sse2;
			
 
				+-    aom_lpf_vertical_6_dual = aom_lpf_vertical_6_dual_c;
			
 
				+-    if (flags & HAS_SSE2) aom_lpf_vertical_6_dual = aom_lpf_vertical_6_dual_sse2;
			
 
				+-    aom_lpf_vertical_8 = aom_lpf_vertical_8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_lpf_vertical_8 = aom_lpf_vertical_8_sse2;
			
 
				+-    aom_lpf_vertical_8_dual = aom_lpf_vertical_8_dual_c;
			
 
				+-    if (flags & HAS_SSE2) aom_lpf_vertical_8_dual = aom_lpf_vertical_8_dual_sse2;
			
 
				+-    aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_avx2;
			
 
				+-    aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_avx2;
			
 
				+-    aom_paeth_predictor_16x4 = aom_paeth_predictor_16x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_16x4 = aom_paeth_predictor_16x4_ssse3;
			
 
				+-    aom_paeth_predictor_16x64 = aom_paeth_predictor_16x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_16x64 = aom_paeth_predictor_16x64_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_paeth_predictor_16x64 = aom_paeth_predictor_16x64_avx2;
			
 
				+-    aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_avx2;
			
 
				+-    aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_avx2;
			
 
				+-    aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_avx2;
			
 
				+-    aom_paeth_predictor_32x64 = aom_paeth_predictor_32x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_32x64 = aom_paeth_predictor_32x64_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_paeth_predictor_32x64 = aom_paeth_predictor_32x64_avx2;
			
 
				+-    aom_paeth_predictor_32x8 = aom_paeth_predictor_32x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_32x8 = aom_paeth_predictor_32x8_ssse3;
			
 
				+-    aom_paeth_predictor_4x16 = aom_paeth_predictor_4x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_4x16 = aom_paeth_predictor_4x16_ssse3;
			
 
				+-    aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_ssse3;
			
 
				+-    aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_ssse3;
			
 
				+-    aom_paeth_predictor_64x16 = aom_paeth_predictor_64x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_64x16 = aom_paeth_predictor_64x16_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_paeth_predictor_64x16 = aom_paeth_predictor_64x16_avx2;
			
 
				+-    aom_paeth_predictor_64x32 = aom_paeth_predictor_64x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_64x32 = aom_paeth_predictor_64x32_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_paeth_predictor_64x32 = aom_paeth_predictor_64x32_avx2;
			
 
				+-    aom_paeth_predictor_64x64 = aom_paeth_predictor_64x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_64x64 = aom_paeth_predictor_64x64_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_paeth_predictor_64x64 = aom_paeth_predictor_64x64_avx2;
			
 
				+-    aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_ssse3;
			
 
				+-    aom_paeth_predictor_8x32 = aom_paeth_predictor_8x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_8x32 = aom_paeth_predictor_8x32_ssse3;
			
 
				+-    aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_ssse3;
			
 
				+-    aom_paeth_predictor_8x8 = aom_paeth_predictor_8x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_8x8 = aom_paeth_predictor_8x8_ssse3;
			
 
				+-    aom_smooth_h_predictor_16x16 = aom_smooth_h_predictor_16x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_16x16 = aom_smooth_h_predictor_16x16_ssse3;
			
 
				+-    aom_smooth_h_predictor_16x32 = aom_smooth_h_predictor_16x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_16x32 = aom_smooth_h_predictor_16x32_ssse3;
			
 
				+-    aom_smooth_h_predictor_16x4 = aom_smooth_h_predictor_16x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_16x4 = aom_smooth_h_predictor_16x4_ssse3;
			
 
				+-    aom_smooth_h_predictor_16x64 = aom_smooth_h_predictor_16x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_16x64 = aom_smooth_h_predictor_16x64_ssse3;
			
 
				+-    aom_smooth_h_predictor_16x8 = aom_smooth_h_predictor_16x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_16x8 = aom_smooth_h_predictor_16x8_ssse3;
			
 
				+-    aom_smooth_h_predictor_32x16 = aom_smooth_h_predictor_32x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_32x16 = aom_smooth_h_predictor_32x16_ssse3;
			
 
				+-    aom_smooth_h_predictor_32x32 = aom_smooth_h_predictor_32x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_32x32 = aom_smooth_h_predictor_32x32_ssse3;
			
 
				+-    aom_smooth_h_predictor_32x64 = aom_smooth_h_predictor_32x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_32x64 = aom_smooth_h_predictor_32x64_ssse3;
			
 
				+-    aom_smooth_h_predictor_32x8 = aom_smooth_h_predictor_32x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_32x8 = aom_smooth_h_predictor_32x8_ssse3;
			
 
				+-    aom_smooth_h_predictor_4x16 = aom_smooth_h_predictor_4x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_4x16 = aom_smooth_h_predictor_4x16_ssse3;
			
 
				+-    aom_smooth_h_predictor_4x4 = aom_smooth_h_predictor_4x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_4x4 = aom_smooth_h_predictor_4x4_ssse3;
			
 
				+-    aom_smooth_h_predictor_4x8 = aom_smooth_h_predictor_4x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_4x8 = aom_smooth_h_predictor_4x8_ssse3;
			
 
				+-    aom_smooth_h_predictor_64x16 = aom_smooth_h_predictor_64x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_64x16 = aom_smooth_h_predictor_64x16_ssse3;
			
 
				+-    aom_smooth_h_predictor_64x32 = aom_smooth_h_predictor_64x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_64x32 = aom_smooth_h_predictor_64x32_ssse3;
			
 
				+-    aom_smooth_h_predictor_64x64 = aom_smooth_h_predictor_64x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_64x64 = aom_smooth_h_predictor_64x64_ssse3;
			
 
				+-    aom_smooth_h_predictor_8x16 = aom_smooth_h_predictor_8x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_8x16 = aom_smooth_h_predictor_8x16_ssse3;
			
 
				+-    aom_smooth_h_predictor_8x32 = aom_smooth_h_predictor_8x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_8x32 = aom_smooth_h_predictor_8x32_ssse3;
			
 
				+-    aom_smooth_h_predictor_8x4 = aom_smooth_h_predictor_8x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_8x4 = aom_smooth_h_predictor_8x4_ssse3;
			
 
				+-    aom_smooth_h_predictor_8x8 = aom_smooth_h_predictor_8x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_8x8 = aom_smooth_h_predictor_8x8_ssse3;
			
 
				+-    aom_smooth_predictor_16x16 = aom_smooth_predictor_16x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_16x16 = aom_smooth_predictor_16x16_ssse3;
			
 
				+-    aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_ssse3;
			
 
				+-    aom_smooth_predictor_16x4 = aom_smooth_predictor_16x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_16x4 = aom_smooth_predictor_16x4_ssse3;
			
 
				+-    aom_smooth_predictor_16x64 = aom_smooth_predictor_16x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_16x64 = aom_smooth_predictor_16x64_ssse3;
			
 
				+-    aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_ssse3;
			
 
				+-    aom_smooth_predictor_32x16 = aom_smooth_predictor_32x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_32x16 = aom_smooth_predictor_32x16_ssse3;
			
 
				+-    aom_smooth_predictor_32x32 = aom_smooth_predictor_32x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_32x32 = aom_smooth_predictor_32x32_ssse3;
			
 
				+-    aom_smooth_predictor_32x64 = aom_smooth_predictor_32x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_32x64 = aom_smooth_predictor_32x64_ssse3;
			
 
				+-    aom_smooth_predictor_32x8 = aom_smooth_predictor_32x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_32x8 = aom_smooth_predictor_32x8_ssse3;
			
 
				+-    aom_smooth_predictor_4x16 = aom_smooth_predictor_4x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_4x16 = aom_smooth_predictor_4x16_ssse3;
			
 
				+-    aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_ssse3;
			
 
				+-    aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_ssse3;
			
 
				+-    aom_smooth_predictor_64x16 = aom_smooth_predictor_64x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_64x16 = aom_smooth_predictor_64x16_ssse3;
			
 
				+-    aom_smooth_predictor_64x32 = aom_smooth_predictor_64x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_64x32 = aom_smooth_predictor_64x32_ssse3;
			
 
				+-    aom_smooth_predictor_64x64 = aom_smooth_predictor_64x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_64x64 = aom_smooth_predictor_64x64_ssse3;
			
 
				+-    aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_ssse3;
			
 
				+-    aom_smooth_predictor_8x32 = aom_smooth_predictor_8x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_8x32 = aom_smooth_predictor_8x32_ssse3;
			
 
				+-    aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_ssse3;
			
 
				+-    aom_smooth_predictor_8x8 = aom_smooth_predictor_8x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_8x8 = aom_smooth_predictor_8x8_ssse3;
			
 
				+-    aom_smooth_v_predictor_16x16 = aom_smooth_v_predictor_16x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_16x16 = aom_smooth_v_predictor_16x16_ssse3;
			
 
				+-    aom_smooth_v_predictor_16x32 = aom_smooth_v_predictor_16x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_16x32 = aom_smooth_v_predictor_16x32_ssse3;
			
 
				+-    aom_smooth_v_predictor_16x4 = aom_smooth_v_predictor_16x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_16x4 = aom_smooth_v_predictor_16x4_ssse3;
			
 
				+-    aom_smooth_v_predictor_16x64 = aom_smooth_v_predictor_16x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_16x64 = aom_smooth_v_predictor_16x64_ssse3;
			
 
				+-    aom_smooth_v_predictor_16x8 = aom_smooth_v_predictor_16x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_16x8 = aom_smooth_v_predictor_16x8_ssse3;
			
 
				+-    aom_smooth_v_predictor_32x16 = aom_smooth_v_predictor_32x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_32x16 = aom_smooth_v_predictor_32x16_ssse3;
			
 
				+-    aom_smooth_v_predictor_32x32 = aom_smooth_v_predictor_32x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_32x32 = aom_smooth_v_predictor_32x32_ssse3;
			
 
				+-    aom_smooth_v_predictor_32x64 = aom_smooth_v_predictor_32x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_32x64 = aom_smooth_v_predictor_32x64_ssse3;
			
 
				+-    aom_smooth_v_predictor_32x8 = aom_smooth_v_predictor_32x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_32x8 = aom_smooth_v_predictor_32x8_ssse3;
			
 
				+-    aom_smooth_v_predictor_4x16 = aom_smooth_v_predictor_4x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_4x16 = aom_smooth_v_predictor_4x16_ssse3;
			
 
				+-    aom_smooth_v_predictor_4x4 = aom_smooth_v_predictor_4x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_4x4 = aom_smooth_v_predictor_4x4_ssse3;
			
 
				+-    aom_smooth_v_predictor_4x8 = aom_smooth_v_predictor_4x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_4x8 = aom_smooth_v_predictor_4x8_ssse3;
			
 
				+-    aom_smooth_v_predictor_64x16 = aom_smooth_v_predictor_64x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_64x16 = aom_smooth_v_predictor_64x16_ssse3;
			
 
				+-    aom_smooth_v_predictor_64x32 = aom_smooth_v_predictor_64x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_64x32 = aom_smooth_v_predictor_64x32_ssse3;
			
 
				+-    aom_smooth_v_predictor_64x64 = aom_smooth_v_predictor_64x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_64x64 = aom_smooth_v_predictor_64x64_ssse3;
			
 
				+-    aom_smooth_v_predictor_8x16 = aom_smooth_v_predictor_8x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_8x16 = aom_smooth_v_predictor_8x16_ssse3;
			
 
				+-    aom_smooth_v_predictor_8x32 = aom_smooth_v_predictor_8x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_8x32 = aom_smooth_v_predictor_8x32_ssse3;
			
 
				+-    aom_smooth_v_predictor_8x4 = aom_smooth_v_predictor_8x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_8x4 = aom_smooth_v_predictor_8x4_ssse3;
			
 
				+-    aom_smooth_v_predictor_8x8 = aom_smooth_v_predictor_8x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_8x8 = aom_smooth_v_predictor_8x8_ssse3;
			
 
				+-    aom_v_predictor_16x16 = aom_v_predictor_16x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_v_predictor_16x16 = aom_v_predictor_16x16_sse2;
			
 
				+-    aom_v_predictor_16x32 = aom_v_predictor_16x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_v_predictor_16x32 = aom_v_predictor_16x32_sse2;
			
 
				+-    aom_v_predictor_16x4 = aom_v_predictor_16x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_v_predictor_16x4 = aom_v_predictor_16x4_sse2;
			
 
				+-    aom_v_predictor_16x64 = aom_v_predictor_16x64_c;
			
 
				+-    if (flags & HAS_SSE2) aom_v_predictor_16x64 = aom_v_predictor_16x64_sse2;
			
 
				+-    aom_v_predictor_16x8 = aom_v_predictor_16x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_v_predictor_16x8 = aom_v_predictor_16x8_sse2;
			
 
				+-    aom_v_predictor_32x16 = aom_v_predictor_32x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_v_predictor_32x16 = aom_v_predictor_32x16_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_v_predictor_32x16 = aom_v_predictor_32x16_avx2;
			
 
				+-    aom_v_predictor_32x32 = aom_v_predictor_32x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_v_predictor_32x32 = aom_v_predictor_32x32_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_v_predictor_32x32 = aom_v_predictor_32x32_avx2;
			
 
				+-    aom_v_predictor_32x64 = aom_v_predictor_32x64_c;
			
 
				+-    if (flags & HAS_SSE2) aom_v_predictor_32x64 = aom_v_predictor_32x64_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_v_predictor_32x64 = aom_v_predictor_32x64_avx2;
			
 
				+-    aom_v_predictor_32x8 = aom_v_predictor_32x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_v_predictor_32x8 = aom_v_predictor_32x8_sse2;
			
 
				+-    aom_v_predictor_4x16 = aom_v_predictor_4x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_v_predictor_4x16 = aom_v_predictor_4x16_sse2;
			
 
				+-    aom_v_predictor_4x4 = aom_v_predictor_4x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_v_predictor_4x4 = aom_v_predictor_4x4_sse2;
			
 
				+-    aom_v_predictor_4x8 = aom_v_predictor_4x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_v_predictor_4x8 = aom_v_predictor_4x8_sse2;
			
 
				+-    aom_v_predictor_64x16 = aom_v_predictor_64x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_v_predictor_64x16 = aom_v_predictor_64x16_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_v_predictor_64x16 = aom_v_predictor_64x16_avx2;
			
 
				+-    aom_v_predictor_64x32 = aom_v_predictor_64x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_v_predictor_64x32 = aom_v_predictor_64x32_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_v_predictor_64x32 = aom_v_predictor_64x32_avx2;
			
 
				+-    aom_v_predictor_64x64 = aom_v_predictor_64x64_c;
			
 
				+-    if (flags & HAS_SSE2) aom_v_predictor_64x64 = aom_v_predictor_64x64_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_v_predictor_64x64 = aom_v_predictor_64x64_avx2;
			
 
				+-    aom_v_predictor_8x16 = aom_v_predictor_8x16_c;
			
 
				+-    if (flags & HAS_SSE2) aom_v_predictor_8x16 = aom_v_predictor_8x16_sse2;
			
 
				+-    aom_v_predictor_8x32 = aom_v_predictor_8x32_c;
			
 
				+-    if (flags & HAS_SSE2) aom_v_predictor_8x32 = aom_v_predictor_8x32_sse2;
			
 
				+-    aom_v_predictor_8x4 = aom_v_predictor_8x4_c;
			
 
				+-    if (flags & HAS_SSE2) aom_v_predictor_8x4 = aom_v_predictor_8x4_sse2;
			
 
				+-    aom_v_predictor_8x8 = aom_v_predictor_8x8_c;
			
 
				+-    if (flags & HAS_SSE2) aom_v_predictor_8x8 = aom_v_predictor_8x8_sse2;
			
 
				+-    av1_round_shift_array = av1_round_shift_array_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_round_shift_array = av1_round_shift_array_sse4_1;
			
 
				+-}
			
 
				+-#endif
			
 
				+-
			
 
				+-#ifdef __cplusplus
			
 
				+-}  // extern "C"
			
 
				+-#endif
			
 
				+-
			
 
				+-#endif
			
 
				+diff --git a/media/libaom/config/win/mingw32/config/aom_scale_rtcd.h b/media/libaom/config/win/mingw32/config/aom_scale_rtcd.h
			
 
				+deleted file mode 100644
			
 
				+--- a/media/libaom/config/win/mingw32/config/aom_scale_rtcd.h
			
 
				++++ /dev/null
			
 
				+@@ -1,88 +0,0 @@
			
 
				+-// This file is generated. Do not edit.
			
 
				+-#ifndef AOM_SCALE_RTCD_H_
			
 
				+-#define AOM_SCALE_RTCD_H_
			
 
				+-
			
 
				+-#ifdef RTCD_C
			
 
				+-#define RTCD_EXTERN
			
 
				+-#else
			
 
				+-#define RTCD_EXTERN extern
			
 
				+-#endif
			
 
				+-
			
 
				+-struct yv12_buffer_config;
			
 
				+-
			
 
				+-#ifdef __cplusplus
			
 
				+-extern "C" {
			
 
				+-#endif
			
 
				+-
			
 
				+-void aom_extend_frame_borders_c(struct yv12_buffer_config *ybf, const int num_planes);
			
 
				+-#define aom_extend_frame_borders aom_extend_frame_borders_c
			
 
				+-
			
 
				+-void aom_extend_frame_borders_y_c(struct yv12_buffer_config *ybf);
			
 
				+-#define aom_extend_frame_borders_y aom_extend_frame_borders_y_c
			
 
				+-
			
 
				+-void aom_extend_frame_inner_borders_c(struct yv12_buffer_config *ybf, const int num_planes);
			
 
				+-#define aom_extend_frame_inner_borders aom_extend_frame_inner_borders_c
			
 
				+-
			
 
				+-void aom_horizontal_line_2_1_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
			
 
				+-#define aom_horizontal_line_2_1_scale aom_horizontal_line_2_1_scale_c
			
 
				+-
			
 
				+-void aom_horizontal_line_5_3_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
			
 
				+-#define aom_horizontal_line_5_3_scale aom_horizontal_line_5_3_scale_c
			
 
				+-
			
 
				+-void aom_horizontal_line_5_4_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
			
 
				+-#define aom_horizontal_line_5_4_scale aom_horizontal_line_5_4_scale_c
			
 
				+-
			
 
				+-void aom_vertical_band_2_1_scale_c(unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width);
			
 
				+-#define aom_vertical_band_2_1_scale aom_vertical_band_2_1_scale_c
			
 
				+-
			
 
				+-void aom_vertical_band_2_1_scale_i_c(unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width);
			
 
				+-#define aom_vertical_band_2_1_scale_i aom_vertical_band_2_1_scale_i_c
			
 
				+-
			
 
				+-void aom_vertical_band_5_3_scale_c(unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width);
			
 
				+-#define aom_vertical_band_5_3_scale aom_vertical_band_5_3_scale_c
			
 
				+-
			
 
				+-void aom_vertical_band_5_4_scale_c(unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width);
			
 
				+-#define aom_vertical_band_5_4_scale aom_vertical_band_5_4_scale_c
			
 
				+-
			
 
				+-void aom_yv12_copy_frame_c(const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc, const int num_planes);
			
 
				+-#define aom_yv12_copy_frame aom_yv12_copy_frame_c
			
 
				+-
			
 
				+-void aom_yv12_copy_u_c(const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc);
			
 
				+-#define aom_yv12_copy_u aom_yv12_copy_u_c
			
 
				+-
			
 
				+-void aom_yv12_copy_v_c(const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc);
			
 
				+-#define aom_yv12_copy_v aom_yv12_copy_v_c
			
 
				+-
			
 
				+-void aom_yv12_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc);
			
 
				+-#define aom_yv12_copy_y aom_yv12_copy_y_c
			
 
				+-
			
 
				+-void aom_yv12_extend_frame_borders_c(struct yv12_buffer_config *ybf, const int num_planes);
			
 
				+-#define aom_yv12_extend_frame_borders aom_yv12_extend_frame_borders_c
			
 
				+-
			
 
				+-void aom_yv12_partial_copy_u_c(const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc, int hstart, int hend, int vstart, int vend);
			
 
				+-#define aom_yv12_partial_copy_u aom_yv12_partial_copy_u_c
			
 
				+-
			
 
				+-void aom_yv12_partial_copy_v_c(const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc, int hstart, int hend, int vstart, int vend);
			
 
				+-#define aom_yv12_partial_copy_v aom_yv12_partial_copy_v_c
			
 
				+-
			
 
				+-void aom_yv12_partial_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc, int hstart, int hend, int vstart, int vend);
			
 
				+-#define aom_yv12_partial_copy_y aom_yv12_partial_copy_y_c
			
 
				+-
			
 
				+-void aom_scale_rtcd(void);
			
 
				+-
			
 
				+-#ifdef RTCD_C
			
 
				+-#include "aom_ports/x86.h"
			
 
				+-static void setup_rtcd_internal(void)
			
 
				+-{
			
 
				+-    int flags = x86_simd_caps();
			
 
				+-
			
 
				+-    (void)flags;
			
 
				+-
			
 
				+-}
			
 
				+-#endif
			
 
				+-
			
 
				+-#ifdef __cplusplus
			
 
				+-}  // extern "C"
			
 
				+-#endif
			
 
				+-
			
 
				+-#endif
			
 
				+diff --git a/media/libaom/config/win/mingw32/config/av1_rtcd.h b/media/libaom/config/win/mingw32/config/av1_rtcd.h
			
 
				+deleted file mode 100644
			
 
				+--- a/media/libaom/config/win/mingw32/config/av1_rtcd.h
			
 
				++++ /dev/null
			
 
				+@@ -1,605 +0,0 @@
			
 
				+-// This file is generated. Do not edit.
			
 
				+-#ifndef AV1_RTCD_H_
			
 
				+-#define AV1_RTCD_H_
			
 
				+-
			
 
				+-#ifdef RTCD_C
			
 
				+-#define RTCD_EXTERN
			
 
				+-#else
			
 
				+-#define RTCD_EXTERN extern
			
 
				+-#endif
			
 
				+-
			
 
				+-/*
			
 
				+- * AV1
			
 
				+- */
			
 
				+-
			
 
				+-#include "aom/aom_integer.h"
			
 
				+-#include "aom_dsp/txfm_common.h"
			
 
				+-#include "av1/common/common.h"
			
 
				+-#include "av1/common/enums.h"
			
 
				+-#include "av1/common/quant_common.h"
			
 
				+-#include "av1/common/filter.h"
			
 
				+-#include "av1/common/convolve.h"
			
 
				+-#include "av1/common/av1_txfm.h"
			
 
				+-#include "av1/common/odintrin.h"
			
 
				+-#include "av1/common/restoration.h"
			
 
				+-
			
 
				+-struct macroblockd;
			
 
				+-
			
 
				+-/* Encoder forward decls */
			
 
				+-struct macroblock;
			
 
				+-struct txfm_param;
			
 
				+-struct aom_variance_vtable;
			
 
				+-struct search_site_config;
			
 
				+-struct yv12_buffer_config;
			
 
				+-
			
 
				+-/* Function pointers return by CfL functions */
			
 
				+-typedef void (*cfl_subsample_lbd_fn)(const uint8_t *input, int input_stride,
			
 
				+-                                     uint16_t *output_q3);
			
 
				+-
			
 
				+-typedef void (*cfl_subsample_hbd_fn)(const uint16_t *input, int input_stride,
			
 
				+-                                     uint16_t *output_q3);
			
 
				+-
			
 
				+-typedef void (*cfl_subtract_average_fn)(const uint16_t *src, int16_t *dst);
			
 
				+-
			
 
				+-typedef void (*cfl_predict_lbd_fn)(const int16_t *src, uint8_t *dst,
			
 
				+-                                   int dst_stride, int alpha_q3);
			
 
				+-
			
 
				+-typedef void (*cfl_predict_hbd_fn)(const int16_t *src, uint16_t *dst,
			
 
				+-                                   int dst_stride, int alpha_q3, int bd);
			
 
				+-
			
 
				+-#ifdef __cplusplus
			
 
				+-extern "C" {
			
 
				+-#endif
			
 
				+-
			
 
				+-void apply_selfguided_restoration_c(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd);
			
 
				+-void apply_selfguided_restoration_sse4_1(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd);
			
 
				+-void apply_selfguided_restoration_avx2(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd);
			
 
				+-RTCD_EXTERN void (*apply_selfguided_restoration)(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd);
			
 
				+-
			
 
				+-void av1_build_compound_diffwtd_mask_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w);
			
 
				+-void av1_build_compound_diffwtd_mask_sse4_1(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w);
			
 
				+-void av1_build_compound_diffwtd_mask_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w);
			
 
				+-RTCD_EXTERN void (*av1_build_compound_diffwtd_mask)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w);
			
 
				+-
			
 
				+-void av1_build_compound_diffwtd_mask_d16_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_build_compound_diffwtd_mask_d16_sse4_1(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_build_compound_diffwtd_mask_d16_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, ConvolveParams *conv_params, int bd);
			
 
				+-RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_d16)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, ConvolveParams *conv_params, int bd);
			
 
				+-
			
 
				+-void av1_build_compound_diffwtd_mask_highbd_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
			
 
				+-void av1_build_compound_diffwtd_mask_highbd_ssse3(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
			
 
				+-void av1_build_compound_diffwtd_mask_highbd_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
			
 
				+-RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_highbd)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
			
 
				+-
			
 
				+-void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-
			
 
				+-void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
			
 
				+-void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
			
 
				+-RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
			
 
				+-
			
 
				+-void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-
			
 
				+-void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
			
 
				+-void av1_convolve_horiz_rs_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
			
 
				+-RTCD_EXTERN void (*av1_convolve_horiz_rs)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
			
 
				+-
			
 
				+-void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-
			
 
				+-void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-
			
 
				+-void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy);
			
 
				+-#define av1_dr_prediction_z1 av1_dr_prediction_z1_c
			
 
				+-
			
 
				+-void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int upsample_left, int dx, int dy);
			
 
				+-#define av1_dr_prediction_z2 av1_dr_prediction_z2_c
			
 
				+-
			
 
				+-void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_left, int dx, int dy);
			
 
				+-#define av1_dr_prediction_z3 av1_dr_prediction_z3_c
			
 
				+-
			
 
				+-void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength);
			
 
				+-void av1_filter_intra_edge_sse4_1(uint8_t *p, int sz, int strength);
			
 
				+-RTCD_EXTERN void (*av1_filter_intra_edge)(uint8_t *p, int sz, int strength);
			
 
				+-
			
 
				+-void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength);
			
 
				+-void av1_filter_intra_edge_high_sse4_1(uint16_t *p, int sz, int strength);
			
 
				+-RTCD_EXTERN void (*av1_filter_intra_edge_high)(uint16_t *p, int sz, int strength);
			
 
				+-
			
 
				+-void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left, int mode);
			
 
				+-void av1_filter_intra_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left, int mode);
			
 
				+-RTCD_EXTERN void (*av1_filter_intra_predictor)(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left, int mode);
			
 
				+-
			
 
				+-void av1_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-#define av1_highbd_convolve8 av1_highbd_convolve8_c
			
 
				+-
			
 
				+-void av1_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-#define av1_highbd_convolve8_horiz av1_highbd_convolve8_horiz_c
			
 
				+-
			
 
				+-void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_c
			
 
				+-
			
 
				+-void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-
			
 
				+-void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
			
 
				+-RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
			
 
				+-
			
 
				+-void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-
			
 
				+-void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-#define av1_highbd_convolve_avg av1_highbd_convolve_avg_c
			
 
				+-
			
 
				+-void av1_highbd_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-#define av1_highbd_convolve_copy av1_highbd_convolve_copy_c
			
 
				+-
			
 
				+-void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
			
 
				+-void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
			
 
				+-RTCD_EXTERN void (*av1_highbd_convolve_horiz_rs)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
			
 
				+-
			
 
				+-void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-
			
 
				+-void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-
			
 
				+-void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd);
			
 
				+-#define av1_highbd_dr_prediction_z1 av1_highbd_dr_prediction_z1_c
			
 
				+-
			
 
				+-void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int upsample_left, int dx, int dy, int bd);
			
 
				+-#define av1_highbd_dr_prediction_z2 av1_highbd_dr_prediction_z2_c
			
 
				+-
			
 
				+-void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_left, int dx, int dy, int bd);
			
 
				+-#define av1_highbd_dr_prediction_z3 av1_highbd_dr_prediction_z3_c
			
 
				+-
			
 
				+-void av1_highbd_inv_txfm_add_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-void av1_highbd_inv_txfm_add_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-void av1_highbd_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-RTCD_EXTERN void (*av1_highbd_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-
			
 
				+-void av1_highbd_inv_txfm_add_16x16_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-void av1_highbd_inv_txfm_add_16x16_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-RTCD_EXTERN void (*av1_highbd_inv_txfm_add_16x16)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-
			
 
				+-void av1_highbd_inv_txfm_add_16x8_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-void av1_highbd_inv_txfm_add_16x8_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-RTCD_EXTERN void (*av1_highbd_inv_txfm_add_16x8)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-
			
 
				+-void av1_highbd_inv_txfm_add_32x32_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-void av1_highbd_inv_txfm_add_32x32_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-void av1_highbd_inv_txfm_add_32x32_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-RTCD_EXTERN void (*av1_highbd_inv_txfm_add_32x32)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-
			
 
				+-void av1_highbd_inv_txfm_add_4x4_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-void av1_highbd_inv_txfm_add_4x4_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-RTCD_EXTERN void (*av1_highbd_inv_txfm_add_4x4)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-
			
 
				+-void av1_highbd_inv_txfm_add_8x16_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-void av1_highbd_inv_txfm_add_8x16_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-RTCD_EXTERN void (*av1_highbd_inv_txfm_add_8x16)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-
			
 
				+-void av1_highbd_inv_txfm_add_8x8_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-void av1_highbd_inv_txfm_add_8x8_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-RTCD_EXTERN void (*av1_highbd_inv_txfm_add_8x8)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-
			
 
				+-void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
			
 
				+-#define av1_highbd_iwht4x4_16_add av1_highbd_iwht4x4_16_add_c
			
 
				+-
			
 
				+-void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
			
 
				+-#define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
			
 
				+-
			
 
				+-void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-
			
 
				+-void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-
			
 
				+-void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-
			
 
				+-void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-
			
 
				+-void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
			
 
				+-void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
			
 
				+-RTCD_EXTERN void (*av1_highbd_warp_affine)(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
			
 
				+-
			
 
				+-void av1_highbd_wiener_convolve_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bps);
			
 
				+-void av1_highbd_wiener_convolve_add_src_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bps);
			
 
				+-void av1_highbd_wiener_convolve_add_src_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bps);
			
 
				+-RTCD_EXTERN void (*av1_highbd_wiener_convolve_add_src)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bps);
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_16x16 av1_inv_txfm2d_add_16x16_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_16x32 av1_inv_txfm2d_add_16x32_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_16x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_16x4 av1_inv_txfm2d_add_16x4_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_16x64_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_16x64 av1_inv_txfm2d_add_16x64_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_16x8 av1_inv_txfm2d_add_16x8_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_32x16 av1_inv_txfm2d_add_32x16_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_32x32 av1_inv_txfm2d_add_32x32_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_32x64_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_32x64 av1_inv_txfm2d_add_32x64_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_32x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_32x8 av1_inv_txfm2d_add_32x8_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_4x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_4x16 av1_inv_txfm2d_add_4x16_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-RTCD_EXTERN void (*av1_inv_txfm2d_add_4x4)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_4x8 av1_inv_txfm2d_add_4x8_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_64x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_64x16 av1_inv_txfm2d_add_64x16_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_64x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_64x32 av1_inv_txfm2d_add_64x32_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_64x64 av1_inv_txfm2d_add_64x64_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_8x16 av1_inv_txfm2d_add_8x16_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_8x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_8x32 av1_inv_txfm2d_add_8x32_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_8x4 av1_inv_txfm2d_add_8x4_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-RTCD_EXTERN void (*av1_inv_txfm2d_add_8x8)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-
			
 
				+-void av1_inv_txfm_add_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-void av1_inv_txfm_add_ssse3(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-void av1_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-RTCD_EXTERN void (*av1_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-
			
 
				+-void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-
			
 
				+-void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-
			
 
				+-void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-
			
 
				+-void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-
			
 
				+-int av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
			
 
				+-                                 int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
			
 
				+-                                 int sgr_params_idx, int bit_depth, int highbd);
			
 
				+-int av1_selfguided_restoration_sse4_1(const uint8_t *dgd8, int width, int height,
			
 
				+-                                 int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
			
 
				+-                                 int sgr_params_idx, int bit_depth, int highbd);
			
 
				+-int av1_selfguided_restoration_avx2(const uint8_t *dgd8, int width, int height,
			
 
				+-                                 int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
			
 
				+-                                 int sgr_params_idx, int bit_depth, int highbd);
			
 
				+-RTCD_EXTERN int (*av1_selfguided_restoration)(const uint8_t *dgd8, int width, int height,
			
 
				+-                                 int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
			
 
				+-                                 int sgr_params_idx, int bit_depth, int highbd);
			
 
				+-
			
 
				+-void av1_upsample_intra_edge_c(uint8_t *p, int sz);
			
 
				+-void av1_upsample_intra_edge_sse4_1(uint8_t *p, int sz);
			
 
				+-RTCD_EXTERN void (*av1_upsample_intra_edge)(uint8_t *p, int sz);
			
 
				+-
			
 
				+-void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd);
			
 
				+-void av1_upsample_intra_edge_high_sse4_1(uint16_t *p, int sz, int bd);
			
 
				+-RTCD_EXTERN void (*av1_upsample_intra_edge_high)(uint16_t *p, int sz, int bd);
			
 
				+-
			
 
				+-void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
			
 
				+-void av1_warp_affine_sse4_1(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
			
 
				+-RTCD_EXTERN void (*av1_warp_affine)(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
			
 
				+-
			
 
				+-void av1_wiener_convolve_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params);
			
 
				+-void av1_wiener_convolve_add_src_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params);
			
 
				+-void av1_wiener_convolve_add_src_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params);
			
 
				+-RTCD_EXTERN void (*av1_wiener_convolve_add_src)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params);
			
 
				+-
			
 
				+-void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
			
 
				+-void cdef_filter_block_sse2(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
			
 
				+-void cdef_filter_block_ssse3(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
			
 
				+-void cdef_filter_block_sse4_1(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
			
 
				+-void cdef_filter_block_avx2(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
			
 
				+-RTCD_EXTERN void (*cdef_filter_block)(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
			
 
				+-
			
 
				+-int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
			
 
				+-int cdef_find_dir_sse2(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
			
 
				+-int cdef_find_dir_ssse3(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
			
 
				+-int cdef_find_dir_sse4_1(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
			
 
				+-int cdef_find_dir_avx2(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
			
 
				+-RTCD_EXTERN int (*cdef_find_dir)(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
			
 
				+-
			
 
				+-cfl_subsample_hbd_fn cfl_get_luma_subsampling_420_hbd_c(TX_SIZE tx_size);
			
 
				+-cfl_subsample_hbd_fn cfl_get_luma_subsampling_420_hbd_ssse3(TX_SIZE tx_size);
			
 
				+-cfl_subsample_hbd_fn cfl_get_luma_subsampling_420_hbd_avx2(TX_SIZE tx_size);
			
 
				+-RTCD_EXTERN cfl_subsample_hbd_fn (*cfl_get_luma_subsampling_420_hbd)(TX_SIZE tx_size);
			
 
				+-
			
 
				+-cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_c(TX_SIZE tx_size);
			
 
				+-cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_ssse3(TX_SIZE tx_size);
			
 
				+-cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_avx2(TX_SIZE tx_size);
			
 
				+-RTCD_EXTERN cfl_subsample_lbd_fn (*cfl_get_luma_subsampling_420_lbd)(TX_SIZE tx_size);
			
 
				+-
			
 
				+-cfl_subsample_hbd_fn cfl_get_luma_subsampling_422_hbd_c(TX_SIZE tx_size);
			
 
				+-cfl_subsample_hbd_fn cfl_get_luma_subsampling_422_hbd_ssse3(TX_SIZE tx_size);
			
 
				+-cfl_subsample_hbd_fn cfl_get_luma_subsampling_422_hbd_avx2(TX_SIZE tx_size);
			
 
				+-RTCD_EXTERN cfl_subsample_hbd_fn (*cfl_get_luma_subsampling_422_hbd)(TX_SIZE tx_size);
			
 
				+-
			
 
				+-cfl_subsample_lbd_fn cfl_get_luma_subsampling_422_lbd_c(TX_SIZE tx_size);
			
 
				+-cfl_subsample_lbd_fn cfl_get_luma_subsampling_422_lbd_ssse3(TX_SIZE tx_size);
			
 
				+-cfl_subsample_lbd_fn cfl_get_luma_subsampling_422_lbd_avx2(TX_SIZE tx_size);
			
 
				+-RTCD_EXTERN cfl_subsample_lbd_fn (*cfl_get_luma_subsampling_422_lbd)(TX_SIZE tx_size);
			
 
				+-
			
 
				+-cfl_subsample_hbd_fn cfl_get_luma_subsampling_444_hbd_c(TX_SIZE tx_size);
			
 
				+-cfl_subsample_hbd_fn cfl_get_luma_subsampling_444_hbd_ssse3(TX_SIZE tx_size);
			
 
				+-cfl_subsample_hbd_fn cfl_get_luma_subsampling_444_hbd_avx2(TX_SIZE tx_size);
			
 
				+-RTCD_EXTERN cfl_subsample_hbd_fn (*cfl_get_luma_subsampling_444_hbd)(TX_SIZE tx_size);
			
 
				+-
			
 
				+-cfl_subsample_lbd_fn cfl_get_luma_subsampling_444_lbd_c(TX_SIZE tx_size);
			
 
				+-cfl_subsample_lbd_fn cfl_get_luma_subsampling_444_lbd_ssse3(TX_SIZE tx_size);
			
 
				+-cfl_subsample_lbd_fn cfl_get_luma_subsampling_444_lbd_avx2(TX_SIZE tx_size);
			
 
				+-RTCD_EXTERN cfl_subsample_lbd_fn (*cfl_get_luma_subsampling_444_lbd)(TX_SIZE tx_size);
			
 
				+-
			
 
				+-void copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
			
 
				+-void copy_rect8_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
			
 
				+-void copy_rect8_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
			
 
				+-void copy_rect8_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
			
 
				+-void copy_rect8_16bit_to_16bit_avx2(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
			
 
				+-RTCD_EXTERN void (*copy_rect8_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
			
 
				+-
			
 
				+-void copy_rect8_8bit_to_16bit_c(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
			
 
				+-void copy_rect8_8bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
			
 
				+-void copy_rect8_8bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
			
 
				+-void copy_rect8_8bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
			
 
				+-void copy_rect8_8bit_to_16bit_avx2(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
			
 
				+-RTCD_EXTERN void (*copy_rect8_8bit_to_16bit)(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
			
 
				+-
			
 
				+-cfl_predict_hbd_fn get_predict_hbd_fn_c(TX_SIZE tx_size);
			
 
				+-cfl_predict_hbd_fn get_predict_hbd_fn_ssse3(TX_SIZE tx_size);
			
 
				+-cfl_predict_hbd_fn get_predict_hbd_fn_avx2(TX_SIZE tx_size);
			
 
				+-RTCD_EXTERN cfl_predict_hbd_fn (*get_predict_hbd_fn)(TX_SIZE tx_size);
			
 
				+-
			
 
				+-cfl_predict_lbd_fn get_predict_lbd_fn_c(TX_SIZE tx_size);
			
 
				+-cfl_predict_lbd_fn get_predict_lbd_fn_ssse3(TX_SIZE tx_size);
			
 
				+-cfl_predict_lbd_fn get_predict_lbd_fn_avx2(TX_SIZE tx_size);
			
 
				+-RTCD_EXTERN cfl_predict_lbd_fn (*get_predict_lbd_fn)(TX_SIZE tx_size);
			
 
				+-
			
 
				+-cfl_subtract_average_fn get_subtract_average_fn_c(TX_SIZE tx_size);
			
 
				+-cfl_subtract_average_fn get_subtract_average_fn_sse2(TX_SIZE tx_size);
			
 
				+-cfl_subtract_average_fn get_subtract_average_fn_avx2(TX_SIZE tx_size);
			
 
				+-RTCD_EXTERN cfl_subtract_average_fn (*get_subtract_average_fn)(TX_SIZE tx_size);
			
 
				+-
			
 
				+-void av1_rtcd(void);
			
 
				+-
			
 
				+-#ifdef RTCD_C
			
 
				+-#include "aom_ports/x86.h"
			
 
				+-static void setup_rtcd_internal(void)
			
 
				+-{
			
 
				+-    int flags = x86_simd_caps();
			
 
				+-
			
 
				+-    (void)flags;
			
 
				+-
			
 
				+-    apply_selfguided_restoration = apply_selfguided_restoration_c;
			
 
				+-    if (flags & HAS_SSE4_1) apply_selfguided_restoration = apply_selfguided_restoration_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) apply_selfguided_restoration = apply_selfguided_restoration_avx2;
			
 
				+-    av1_build_compound_diffwtd_mask = av1_build_compound_diffwtd_mask_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_build_compound_diffwtd_mask = av1_build_compound_diffwtd_mask_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) av1_build_compound_diffwtd_mask = av1_build_compound_diffwtd_mask_avx2;
			
 
				+-    av1_build_compound_diffwtd_mask_d16 = av1_build_compound_diffwtd_mask_d16_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_build_compound_diffwtd_mask_d16 = av1_build_compound_diffwtd_mask_d16_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) av1_build_compound_diffwtd_mask_d16 = av1_build_compound_diffwtd_mask_d16_avx2;
			
 
				+-    av1_build_compound_diffwtd_mask_highbd = av1_build_compound_diffwtd_mask_highbd_c;
			
 
				+-    if (flags & HAS_SSSE3) av1_build_compound_diffwtd_mask_highbd = av1_build_compound_diffwtd_mask_highbd_ssse3;
			
 
				+-    if (flags & HAS_AVX2) av1_build_compound_diffwtd_mask_highbd = av1_build_compound_diffwtd_mask_highbd_avx2;
			
 
				+-    av1_convolve_2d_copy_sr = av1_convolve_2d_copy_sr_c;
			
 
				+-    if (flags & HAS_SSE2) av1_convolve_2d_copy_sr = av1_convolve_2d_copy_sr_sse2;
			
 
				+-    if (flags & HAS_AVX2) av1_convolve_2d_copy_sr = av1_convolve_2d_copy_sr_avx2;
			
 
				+-    av1_convolve_2d_scale = av1_convolve_2d_scale_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_convolve_2d_scale = av1_convolve_2d_scale_sse4_1;
			
 
				+-    av1_convolve_2d_sr = av1_convolve_2d_sr_c;
			
 
				+-    if (flags & HAS_SSE2) av1_convolve_2d_sr = av1_convolve_2d_sr_sse2;
			
 
				+-    if (flags & HAS_AVX2) av1_convolve_2d_sr = av1_convolve_2d_sr_avx2;
			
 
				+-    av1_convolve_horiz_rs = av1_convolve_horiz_rs_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_convolve_horiz_rs = av1_convolve_horiz_rs_sse4_1;
			
 
				+-    av1_convolve_x_sr = av1_convolve_x_sr_c;
			
 
				+-    if (flags & HAS_SSE2) av1_convolve_x_sr = av1_convolve_x_sr_sse2;
			
 
				+-    if (flags & HAS_AVX2) av1_convolve_x_sr = av1_convolve_x_sr_avx2;
			
 
				+-    av1_convolve_y_sr = av1_convolve_y_sr_c;
			
 
				+-    if (flags & HAS_SSE2) av1_convolve_y_sr = av1_convolve_y_sr_sse2;
			
 
				+-    if (flags & HAS_AVX2) av1_convolve_y_sr = av1_convolve_y_sr_avx2;
			
 
				+-    av1_filter_intra_edge = av1_filter_intra_edge_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_filter_intra_edge = av1_filter_intra_edge_sse4_1;
			
 
				+-    av1_filter_intra_edge_high = av1_filter_intra_edge_high_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_filter_intra_edge_high = av1_filter_intra_edge_high_sse4_1;
			
 
				+-    av1_filter_intra_predictor = av1_filter_intra_predictor_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_filter_intra_predictor = av1_filter_intra_predictor_sse4_1;
			
 
				+-    av1_highbd_convolve_2d_copy_sr = av1_highbd_convolve_2d_copy_sr_c;
			
 
				+-    if (flags & HAS_SSE2) av1_highbd_convolve_2d_copy_sr = av1_highbd_convolve_2d_copy_sr_sse2;
			
 
				+-    if (flags & HAS_AVX2) av1_highbd_convolve_2d_copy_sr = av1_highbd_convolve_2d_copy_sr_avx2;
			
 
				+-    av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_sse4_1;
			
 
				+-    av1_highbd_convolve_2d_sr = av1_highbd_convolve_2d_sr_c;
			
 
				+-    if (flags & HAS_SSSE3) av1_highbd_convolve_2d_sr = av1_highbd_convolve_2d_sr_ssse3;
			
 
				+-    if (flags & HAS_AVX2) av1_highbd_convolve_2d_sr = av1_highbd_convolve_2d_sr_avx2;
			
 
				+-    av1_highbd_convolve_horiz_rs = av1_highbd_convolve_horiz_rs_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_convolve_horiz_rs = av1_highbd_convolve_horiz_rs_sse4_1;
			
 
				+-    av1_highbd_convolve_x_sr = av1_highbd_convolve_x_sr_c;
			
 
				+-    if (flags & HAS_SSSE3) av1_highbd_convolve_x_sr = av1_highbd_convolve_x_sr_ssse3;
			
 
				+-    if (flags & HAS_AVX2) av1_highbd_convolve_x_sr = av1_highbd_convolve_x_sr_avx2;
			
 
				+-    av1_highbd_convolve_y_sr = av1_highbd_convolve_y_sr_c;
			
 
				+-    if (flags & HAS_SSSE3) av1_highbd_convolve_y_sr = av1_highbd_convolve_y_sr_ssse3;
			
 
				+-    if (flags & HAS_AVX2) av1_highbd_convolve_y_sr = av1_highbd_convolve_y_sr_avx2;
			
 
				+-    av1_highbd_inv_txfm_add = av1_highbd_inv_txfm_add_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add = av1_highbd_inv_txfm_add_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) av1_highbd_inv_txfm_add = av1_highbd_inv_txfm_add_avx2;
			
 
				+-    av1_highbd_inv_txfm_add_16x16 = av1_highbd_inv_txfm_add_16x16_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_16x16 = av1_highbd_inv_txfm_add_16x16_sse4_1;
			
 
				+-    av1_highbd_inv_txfm_add_16x8 = av1_highbd_inv_txfm_add_16x8_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_16x8 = av1_highbd_inv_txfm_add_16x8_sse4_1;
			
 
				+-    av1_highbd_inv_txfm_add_32x32 = av1_highbd_inv_txfm_add_32x32_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_32x32 = av1_highbd_inv_txfm_add_32x32_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) av1_highbd_inv_txfm_add_32x32 = av1_highbd_inv_txfm_add_32x32_avx2;
			
 
				+-    av1_highbd_inv_txfm_add_4x4 = av1_highbd_inv_txfm_add_4x4_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_4x4 = av1_highbd_inv_txfm_add_4x4_sse4_1;
			
 
				+-    av1_highbd_inv_txfm_add_8x16 = av1_highbd_inv_txfm_add_8x16_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_8x16 = av1_highbd_inv_txfm_add_8x16_sse4_1;
			
 
				+-    av1_highbd_inv_txfm_add_8x8 = av1_highbd_inv_txfm_add_8x8_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_8x8 = av1_highbd_inv_txfm_add_8x8_sse4_1;
			
 
				+-    av1_highbd_jnt_convolve_2d = av1_highbd_jnt_convolve_2d_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_jnt_convolve_2d = av1_highbd_jnt_convolve_2d_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) av1_highbd_jnt_convolve_2d = av1_highbd_jnt_convolve_2d_avx2;
			
 
				+-    av1_highbd_jnt_convolve_2d_copy = av1_highbd_jnt_convolve_2d_copy_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_jnt_convolve_2d_copy = av1_highbd_jnt_convolve_2d_copy_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) av1_highbd_jnt_convolve_2d_copy = av1_highbd_jnt_convolve_2d_copy_avx2;
			
 
				+-    av1_highbd_jnt_convolve_x = av1_highbd_jnt_convolve_x_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_jnt_convolve_x = av1_highbd_jnt_convolve_x_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) av1_highbd_jnt_convolve_x = av1_highbd_jnt_convolve_x_avx2;
			
 
				+-    av1_highbd_jnt_convolve_y = av1_highbd_jnt_convolve_y_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_jnt_convolve_y = av1_highbd_jnt_convolve_y_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) av1_highbd_jnt_convolve_y = av1_highbd_jnt_convolve_y_avx2;
			
 
				+-    av1_highbd_warp_affine = av1_highbd_warp_affine_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_warp_affine = av1_highbd_warp_affine_sse4_1;
			
 
				+-    av1_highbd_wiener_convolve_add_src = av1_highbd_wiener_convolve_add_src_c;
			
 
				+-    if (flags & HAS_SSSE3) av1_highbd_wiener_convolve_add_src = av1_highbd_wiener_convolve_add_src_ssse3;
			
 
				+-    if (flags & HAS_AVX2) av1_highbd_wiener_convolve_add_src = av1_highbd_wiener_convolve_add_src_avx2;
			
 
				+-    av1_inv_txfm2d_add_4x4 = av1_inv_txfm2d_add_4x4_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_inv_txfm2d_add_4x4 = av1_inv_txfm2d_add_4x4_sse4_1;
			
 
				+-    av1_inv_txfm2d_add_8x8 = av1_inv_txfm2d_add_8x8_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_inv_txfm2d_add_8x8 = av1_inv_txfm2d_add_8x8_sse4_1;
			
 
				+-    av1_inv_txfm_add = av1_inv_txfm_add_c;
			
 
				+-    if (flags & HAS_SSSE3) av1_inv_txfm_add = av1_inv_txfm_add_ssse3;
			
 
				+-    if (flags & HAS_AVX2) av1_inv_txfm_add = av1_inv_txfm_add_avx2;
			
 
				+-    av1_jnt_convolve_2d = av1_jnt_convolve_2d_c;
			
 
				+-    if (flags & HAS_SSSE3) av1_jnt_convolve_2d = av1_jnt_convolve_2d_ssse3;
			
 
				+-    if (flags & HAS_AVX2) av1_jnt_convolve_2d = av1_jnt_convolve_2d_avx2;
			
 
				+-    av1_jnt_convolve_2d_copy = av1_jnt_convolve_2d_copy_c;
			
 
				+-    if (flags & HAS_SSE2) av1_jnt_convolve_2d_copy = av1_jnt_convolve_2d_copy_sse2;
			
 
				+-    if (flags & HAS_AVX2) av1_jnt_convolve_2d_copy = av1_jnt_convolve_2d_copy_avx2;
			
 
				+-    av1_jnt_convolve_x = av1_jnt_convolve_x_c;
			
 
				+-    if (flags & HAS_SSE2) av1_jnt_convolve_x = av1_jnt_convolve_x_sse2;
			
 
				+-    if (flags & HAS_AVX2) av1_jnt_convolve_x = av1_jnt_convolve_x_avx2;
			
 
				+-    av1_jnt_convolve_y = av1_jnt_convolve_y_c;
			
 
				+-    if (flags & HAS_SSE2) av1_jnt_convolve_y = av1_jnt_convolve_y_sse2;
			
 
				+-    if (flags & HAS_AVX2) av1_jnt_convolve_y = av1_jnt_convolve_y_avx2;
			
 
				+-    av1_selfguided_restoration = av1_selfguided_restoration_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_selfguided_restoration = av1_selfguided_restoration_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) av1_selfguided_restoration = av1_selfguided_restoration_avx2;
			
 
				+-    av1_upsample_intra_edge = av1_upsample_intra_edge_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_upsample_intra_edge = av1_upsample_intra_edge_sse4_1;
			
 
				+-    av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_sse4_1;
			
 
				+-    av1_warp_affine = av1_warp_affine_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_warp_affine = av1_warp_affine_sse4_1;
			
 
				+-    av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_c;
			
 
				+-    if (flags & HAS_SSE2) av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_sse2;
			
 
				+-    if (flags & HAS_AVX2) av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_avx2;
			
 
				+-    cdef_filter_block = cdef_filter_block_c;
			
 
				+-    if (flags & HAS_SSE2) cdef_filter_block = cdef_filter_block_sse2;
			
 
				+-    if (flags & HAS_SSSE3) cdef_filter_block = cdef_filter_block_ssse3;
			
 
				+-    if (flags & HAS_SSE4_1) cdef_filter_block = cdef_filter_block_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) cdef_filter_block = cdef_filter_block_avx2;
			
 
				+-    cdef_find_dir = cdef_find_dir_c;
			
 
				+-    if (flags & HAS_SSE2) cdef_find_dir = cdef_find_dir_sse2;
			
 
				+-    if (flags & HAS_SSSE3) cdef_find_dir = cdef_find_dir_ssse3;
			
 
				+-    if (flags & HAS_SSE4_1) cdef_find_dir = cdef_find_dir_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) cdef_find_dir = cdef_find_dir_avx2;
			
 
				+-    cfl_get_luma_subsampling_420_hbd = cfl_get_luma_subsampling_420_hbd_c;
			
 
				+-    if (flags & HAS_SSSE3) cfl_get_luma_subsampling_420_hbd = cfl_get_luma_subsampling_420_hbd_ssse3;
			
 
				+-    if (flags & HAS_AVX2) cfl_get_luma_subsampling_420_hbd = cfl_get_luma_subsampling_420_hbd_avx2;
			
 
				+-    cfl_get_luma_subsampling_420_lbd = cfl_get_luma_subsampling_420_lbd_c;
			
 
				+-    if (flags & HAS_SSSE3) cfl_get_luma_subsampling_420_lbd = cfl_get_luma_subsampling_420_lbd_ssse3;
			
 
				+-    if (flags & HAS_AVX2) cfl_get_luma_subsampling_420_lbd = cfl_get_luma_subsampling_420_lbd_avx2;
			
 
				+-    cfl_get_luma_subsampling_422_hbd = cfl_get_luma_subsampling_422_hbd_c;
			
 
				+-    if (flags & HAS_SSSE3) cfl_get_luma_subsampling_422_hbd = cfl_get_luma_subsampling_422_hbd_ssse3;
			
 
				+-    if (flags & HAS_AVX2) cfl_get_luma_subsampling_422_hbd = cfl_get_luma_subsampling_422_hbd_avx2;
			
 
				+-    cfl_get_luma_subsampling_422_lbd = cfl_get_luma_subsampling_422_lbd_c;
			
 
				+-    if (flags & HAS_SSSE3) cfl_get_luma_subsampling_422_lbd = cfl_get_luma_subsampling_422_lbd_ssse3;
			
 
				+-    if (flags & HAS_AVX2) cfl_get_luma_subsampling_422_lbd = cfl_get_luma_subsampling_422_lbd_avx2;
			
 
				+-    cfl_get_luma_subsampling_444_hbd = cfl_get_luma_subsampling_444_hbd_c;
			
 
				+-    if (flags & HAS_SSSE3) cfl_get_luma_subsampling_444_hbd = cfl_get_luma_subsampling_444_hbd_ssse3;
			
 
				+-    if (flags & HAS_AVX2) cfl_get_luma_subsampling_444_hbd = cfl_get_luma_subsampling_444_hbd_avx2;
			
 
				+-    cfl_get_luma_subsampling_444_lbd = cfl_get_luma_subsampling_444_lbd_c;
			
 
				+-    if (flags & HAS_SSSE3) cfl_get_luma_subsampling_444_lbd = cfl_get_luma_subsampling_444_lbd_ssse3;
			
 
				+-    if (flags & HAS_AVX2) cfl_get_luma_subsampling_444_lbd = cfl_get_luma_subsampling_444_lbd_avx2;
			
 
				+-    copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_c;
			
 
				+-    if (flags & HAS_SSE2) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_sse2;
			
 
				+-    if (flags & HAS_SSSE3) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_ssse3;
			
 
				+-    if (flags & HAS_SSE4_1) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_avx2;
			
 
				+-    copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_c;
			
 
				+-    if (flags & HAS_SSE2) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_sse2;
			
 
				+-    if (flags & HAS_SSSE3) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_ssse3;
			
 
				+-    if (flags & HAS_SSE4_1) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_avx2;
			
 
				+-    get_predict_hbd_fn = get_predict_hbd_fn_c;
			
 
				+-    if (flags & HAS_SSSE3) get_predict_hbd_fn = get_predict_hbd_fn_ssse3;
			
 
				+-    if (flags & HAS_AVX2) get_predict_hbd_fn = get_predict_hbd_fn_avx2;
			
 
				+-    get_predict_lbd_fn = get_predict_lbd_fn_c;
			
 
				+-    if (flags & HAS_SSSE3) get_predict_lbd_fn = get_predict_lbd_fn_ssse3;
			
 
				+-    if (flags & HAS_AVX2) get_predict_lbd_fn = get_predict_lbd_fn_avx2;
			
 
				+-    get_subtract_average_fn = get_subtract_average_fn_c;
			
 
				+-    if (flags & HAS_SSE2) get_subtract_average_fn = get_subtract_average_fn_sse2;
			
 
				+-    if (flags & HAS_AVX2) get_subtract_average_fn = get_subtract_average_fn_avx2;
			
 
				+-}
			
 
				+-#endif
			
 
				+-
			
 
				+-#ifdef __cplusplus
			
 
				+-}  // extern "C"
			
 
				+-#endif
			
 
				+-
			
 
				+-#endif
			
 
				+diff --git a/media/libaom/config/win/mingw64/config/aom_config.asm b/media/libaom/config/win/mingw64/config/aom_config.asm
			
 
				+deleted file mode 100644
			
 
				+--- a/media/libaom/config/win/mingw64/config/aom_config.asm
			
 
				++++ /dev/null
			
 
				+@@ -1,76 +0,0 @@
			
 
				+-;
			
 
				+-; Copyright (c) 2018, Alliance for Open Media. All rights reserved
			
 
				+-;
			
 
				+-; This source code is subject to the terms of the BSD 2 Clause License and
			
 
				+-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
			
 
				+-; was not distributed with this source code in the LICENSE file, you can
			
 
				+-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
			
 
				+-; Media Patent License 1.0 was not distributed with this source code in the
			
 
				+-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
			
 
				+-;
			
 
				+-
			
 
				+-ARCH_ARM equ 0
			
 
				+-ARCH_MIPS equ 0
			
 
				+-ARCH_PPC equ 0
			
 
				+-ARCH_X86 equ 0
			
 
				+-ARCH_X86_64 equ 1
			
 
				+-CONFIG_2PASS_PARTITION_SEARCH_LVL equ 1
			
 
				+-CONFIG_ACCOUNTING equ 0
			
 
				+-CONFIG_ANALYZER equ 0
			
 
				+-CONFIG_AV1_DECODER equ 1
			
 
				+-CONFIG_AV1_ENCODER equ 0
			
 
				+-CONFIG_BIG_ENDIAN equ 0
			
 
				+-CONFIG_BITSTREAM_DEBUG equ 0
			
 
				+-CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
			
 
				+-CONFIG_COLLECT_INTER_MODE_RD_STATS equ 0
			
 
				+-CONFIG_COLLECT_RD_STATS equ 0
			
 
				+-CONFIG_DEBUG equ 0
			
 
				+-CONFIG_DENOISE equ 1
			
 
				+-CONFIG_DIST_8X8 equ 0
			
 
				+-CONFIG_ENTROPY_STATS equ 0
			
 
				+-CONFIG_FILEOPTIONS equ 1
			
 
				+-CONFIG_FIX_GF_LENGTH equ 1
			
 
				+-CONFIG_FP_MB_STATS equ 0
			
 
				+-CONFIG_GCC equ 1
			
 
				+-CONFIG_GCOV equ 0
			
 
				+-CONFIG_GLOBAL_MOTION_SEARCH equ 1
			
 
				+-CONFIG_GPROF equ 0
			
 
				+-CONFIG_INSPECTION equ 0
			
 
				+-CONFIG_INTERNAL_STATS equ 0
			
 
				+-CONFIG_INTER_STATS_ONLY equ 0
			
 
				+-CONFIG_LIBYUV equ 0
			
 
				+-CONFIG_LOWBITDEPTH equ 1
			
 
				+-CONFIG_MAX_DECODE_PROFILE equ 2
			
 
				+-CONFIG_MISMATCH_DEBUG equ 0
			
 
				+-CONFIG_MULTITHREAD equ 1
			
 
				+-CONFIG_NORMAL_TILE_MODE equ 0
			
 
				+-CONFIG_OS_SUPPORT equ 1
			
 
				+-CONFIG_PIC equ 0
			
 
				+-CONFIG_RD_DEBUG equ 0
			
 
				+-CONFIG_REDUCED_ENCODER_BORDER equ 0
			
 
				+-CONFIG_RUNTIME_CPU_DETECT equ 1
			
 
				+-CONFIG_SHARED equ 0
			
 
				+-CONFIG_SHARP_SETTINGS equ 0
			
 
				+-CONFIG_SIZE_LIMIT equ 0
			
 
				+-CONFIG_SPATIAL_RESAMPLING equ 1
			
 
				+-CONFIG_STATIC equ 1
			
 
				+-CONFIG_WEBM_IO equ 0
			
 
				+-DECODE_HEIGHT_LIMIT equ 0
			
 
				+-DECODE_WIDTH_LIMIT equ 0
			
 
				+-HAVE_AVX equ 1
			
 
				+-HAVE_AVX2 equ 1
			
 
				+-HAVE_DSPR2 equ 0
			
 
				+-HAVE_FEXCEPT equ 1
			
 
				+-HAVE_MIPS32 equ 0
			
 
				+-HAVE_MIPS64 equ 0
			
 
				+-HAVE_MMX equ 1
			
 
				+-HAVE_MSA equ 0
			
 
				+-HAVE_NEON equ 0
			
 
				+-HAVE_SSE equ 1
			
 
				+-HAVE_SSE2 equ 1
			
 
				+-HAVE_SSE3 equ 1
			
 
				+-HAVE_SSE4_1 equ 1
			
 
				+-HAVE_SSE4_2 equ 1
			
 
				+-HAVE_SSSE3 equ 1
			
 
				+-HAVE_VSX equ 0
			
 
				+-HAVE_WXWIDGETS equ 0
			
 
				+diff --git a/media/libaom/config/win/mingw64/config/aom_config.h b/media/libaom/config/win/mingw64/config/aom_config.h
			
 
				+deleted file mode 100644
			
 
				+--- a/media/libaom/config/win/mingw64/config/aom_config.h
			
 
				++++ /dev/null
			
 
				+@@ -1,82 +0,0 @@
			
 
				+-/*
			
 
				+- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
			
 
				+- *
			
 
				+- * This source code is subject to the terms of the BSD 2 Clause License and
			
 
				+- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
			
 
				+- * was not distributed with this source code in the LICENSE file, you can
			
 
				+- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
			
 
				+- * Media Patent License 1.0 was not distributed with this source code in the
			
 
				+- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
			
 
				+- */
			
 
				+-#ifndef AOM_CONFIG_H_
			
 
				+-#define AOM_CONFIG_H_
			
 
				+-
			
 
				+-#define ARCH_ARM 0
			
 
				+-#define ARCH_MIPS 0
			
 
				+-#define ARCH_PPC 0
			
 
				+-#define ARCH_X86 0
			
 
				+-#define ARCH_X86_64 1
			
 
				+-#define CONFIG_2PASS_PARTITION_SEARCH_LVL 1
			
 
				+-#define CONFIG_ACCOUNTING 0
			
 
				+-#define CONFIG_ANALYZER 0
			
 
				+-#define CONFIG_AV1_DECODER 1
			
 
				+-#define CONFIG_AV1_ENCODER 0
			
 
				+-#define CONFIG_BIG_ENDIAN 0
			
 
				+-#define CONFIG_BITSTREAM_DEBUG 0
			
 
				+-#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
			
 
				+-#define CONFIG_COLLECT_INTER_MODE_RD_STATS 0
			
 
				+-#define CONFIG_COLLECT_RD_STATS 0
			
 
				+-#define CONFIG_DEBUG 0
			
 
				+-#define CONFIG_DENOISE 1
			
 
				+-#define CONFIG_DIST_8X8 0
			
 
				+-#define CONFIG_ENTROPY_STATS 0
			
 
				+-#define CONFIG_FILEOPTIONS 1
			
 
				+-#define CONFIG_FIX_GF_LENGTH 1
			
 
				+-#define CONFIG_FP_MB_STATS 0
			
 
				+-#define CONFIG_GCC 1
			
 
				+-#define CONFIG_GCOV 0
			
 
				+-#define CONFIG_GLOBAL_MOTION_SEARCH 1
			
 
				+-#define CONFIG_GPROF 0
			
 
				+-#define CONFIG_INSPECTION 0
			
 
				+-#define CONFIG_INTERNAL_STATS 0
			
 
				+-#define CONFIG_INTER_STATS_ONLY 0
			
 
				+-#define CONFIG_LIBYUV 0
			
 
				+-#define CONFIG_LOWBITDEPTH 1
			
 
				+-#define CONFIG_MAX_DECODE_PROFILE 2
			
 
				+-#define CONFIG_MISMATCH_DEBUG 0
			
 
				+-#define CONFIG_MULTITHREAD 1
			
 
				+-#define CONFIG_NORMAL_TILE_MODE 0
			
 
				+-#define CONFIG_OS_SUPPORT 1
			
 
				+-#define CONFIG_PIC 0
			
 
				+-#define CONFIG_RD_DEBUG 0
			
 
				+-#define CONFIG_REDUCED_ENCODER_BORDER 0
			
 
				+-#define CONFIG_RUNTIME_CPU_DETECT 1
			
 
				+-#define CONFIG_SHARED 0
			
 
				+-#define CONFIG_SHARP_SETTINGS 0
			
 
				+-#define CONFIG_SIZE_LIMIT 0
			
 
				+-#define CONFIG_SPATIAL_RESAMPLING 1
			
 
				+-#define CONFIG_STATIC 1
			
 
				+-#define CONFIG_WEBM_IO 0
			
 
				+-#define DECODE_HEIGHT_LIMIT 0
			
 
				+-#define DECODE_WIDTH_LIMIT 0
			
 
				+-#define HAVE_AVX 1
			
 
				+-#define HAVE_AVX2 1
			
 
				+-#define HAVE_DSPR2 0
			
 
				+-#define HAVE_FEXCEPT 1
			
 
				+-#define HAVE_MIPS32 0
			
 
				+-#define HAVE_MIPS64 0
			
 
				+-#define HAVE_MMX 1
			
 
				+-#define HAVE_MSA 0
			
 
				+-#define HAVE_NEON 0
			
 
				+-#define HAVE_SSE 1
			
 
				+-#define HAVE_SSE2 1
			
 
				+-#define HAVE_SSE3 1
			
 
				+-#define HAVE_SSE4_1 1
			
 
				+-#define HAVE_SSE4_2 1
			
 
				+-#define HAVE_SSSE3 1
			
 
				+-#define HAVE_VSX 0
			
 
				+-#define HAVE_WXWIDGETS 0
			
 
				+-#define INCLUDE_INSTALL_DIR INSTALLDIR/include
			
 
				+-#define INLINE inline
			
 
				+-#define LIB_INSTALL_DIR INSTALLDIR/lib
			
 
				+-#endif /* AOM_CONFIG_H_ */
			
 
				+diff --git a/media/libaom/config/win/mingw64/config/aom_dsp_rtcd.h b/media/libaom/config/win/mingw64/config/aom_dsp_rtcd.h
			
 
				+deleted file mode 100644
			
 
				+--- a/media/libaom/config/win/mingw64/config/aom_dsp_rtcd.h
			
 
				++++ /dev/null
			
 
				+@@ -1,2001 +0,0 @@
			
 
				+-// This file is generated. Do not edit.
			
 
				+-#ifndef AOM_DSP_RTCD_H_
			
 
				+-#define AOM_DSP_RTCD_H_
			
 
				+-
			
 
				+-#ifdef RTCD_C
			
 
				+-#define RTCD_EXTERN
			
 
				+-#else
			
 
				+-#define RTCD_EXTERN extern
			
 
				+-#endif
			
 
				+-
			
 
				+-/*
			
 
				+- * DSP
			
 
				+- */
			
 
				+-
			
 
				+-#include "aom/aom_integer.h"
			
 
				+-#include "aom_dsp/aom_dsp_common.h"
			
 
				+-#include "av1/common/enums.h"
			
 
				+-#include "av1/common/blockd.h"
			
 
				+-
			
 
				+-
			
 
				+-#ifdef __cplusplus
			
 
				+-extern "C" {
			
 
				+-#endif
			
 
				+-
			
 
				+-void aom_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h);
			
 
				+-void aom_blend_a64_hmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h);
			
 
				+-RTCD_EXTERN void (*aom_blend_a64_hmask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h);
			
 
				+-
			
 
				+-void aom_blend_a64_mask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby);
			
 
				+-void aom_blend_a64_mask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby);
			
 
				+-void aom_blend_a64_mask_avx2(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby);
			
 
				+-RTCD_EXTERN void (*aom_blend_a64_mask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby);
			
 
				+-
			
 
				+-void aom_blend_a64_vmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h);
			
 
				+-void aom_blend_a64_vmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h);
			
 
				+-RTCD_EXTERN void (*aom_blend_a64_vmask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h);
			
 
				+-
			
 
				+-void aom_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-void aom_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-void aom_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-void aom_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-RTCD_EXTERN void (*aom_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-
			
 
				+-void aom_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-void aom_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-void aom_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-void aom_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-RTCD_EXTERN void (*aom_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-
			
 
				+-void aom_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-void aom_convolve_copy_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
			
 
				+-#define aom_convolve_copy aom_convolve_copy_sse2
			
 
				+-
			
 
				+-void aom_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_128_predictor_16x16 aom_dc_128_predictor_16x16_sse2
			
 
				+-
			
 
				+-void aom_dc_128_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_sse2
			
 
				+-
			
 
				+-void aom_dc_128_predictor_16x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_128_predictor_16x4 aom_dc_128_predictor_16x4_sse2
			
 
				+-
			
 
				+-void aom_dc_128_predictor_16x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_128_predictor_16x64 aom_dc_128_predictor_16x64_sse2
			
 
				+-
			
 
				+-void aom_dc_128_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_sse2
			
 
				+-
			
 
				+-void aom_dc_128_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_128_predictor_2x2 aom_dc_128_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_dc_128_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_32x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_32x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_32x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_32x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_128_predictor_32x8 aom_dc_128_predictor_32x8_sse2
			
 
				+-
			
 
				+-void aom_dc_128_predictor_4x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_128_predictor_4x16 aom_dc_128_predictor_4x16_sse2
			
 
				+-
			
 
				+-void aom_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_128_predictor_4x4 aom_dc_128_predictor_4x4_sse2
			
 
				+-
			
 
				+-void aom_dc_128_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_sse2
			
 
				+-
			
 
				+-void aom_dc_128_predictor_64x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_64x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_64x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_64x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_64x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_64x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_64x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_64x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_64x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_128_predictor_64x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_128_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_sse2
			
 
				+-
			
 
				+-void aom_dc_128_predictor_8x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_128_predictor_8x32 aom_dc_128_predictor_8x32_sse2
			
 
				+-
			
 
				+-void aom_dc_128_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_128_predictor_8x4 aom_dc_128_predictor_8x4_sse2
			
 
				+-
			
 
				+-void aom_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_128_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_128_predictor_8x8 aom_dc_128_predictor_8x8_sse2
			
 
				+-
			
 
				+-void aom_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_left_predictor_16x16 aom_dc_left_predictor_16x16_sse2
			
 
				+-
			
 
				+-void aom_dc_left_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_sse2
			
 
				+-
			
 
				+-void aom_dc_left_predictor_16x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_left_predictor_16x4 aom_dc_left_predictor_16x4_sse2
			
 
				+-
			
 
				+-void aom_dc_left_predictor_16x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_left_predictor_16x64 aom_dc_left_predictor_16x64_sse2
			
 
				+-
			
 
				+-void aom_dc_left_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_sse2
			
 
				+-
			
 
				+-void aom_dc_left_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_left_predictor_2x2 aom_dc_left_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_dc_left_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_32x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_32x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_32x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_32x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_left_predictor_32x8 aom_dc_left_predictor_32x8_sse2
			
 
				+-
			
 
				+-void aom_dc_left_predictor_4x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_left_predictor_4x16 aom_dc_left_predictor_4x16_sse2
			
 
				+-
			
 
				+-void aom_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_left_predictor_4x4 aom_dc_left_predictor_4x4_sse2
			
 
				+-
			
 
				+-void aom_dc_left_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_sse2
			
 
				+-
			
 
				+-void aom_dc_left_predictor_64x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_64x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_64x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_64x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_64x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_64x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_64x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_64x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_64x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_left_predictor_64x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_left_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_sse2
			
 
				+-
			
 
				+-void aom_dc_left_predictor_8x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_left_predictor_8x32 aom_dc_left_predictor_8x32_sse2
			
 
				+-
			
 
				+-void aom_dc_left_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_left_predictor_8x4 aom_dc_left_predictor_8x4_sse2
			
 
				+-
			
 
				+-void aom_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_left_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_left_predictor_8x8 aom_dc_left_predictor_8x8_sse2
			
 
				+-
			
 
				+-void aom_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_predictor_16x16 aom_dc_predictor_16x16_sse2
			
 
				+-
			
 
				+-void aom_dc_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_predictor_16x32 aom_dc_predictor_16x32_sse2
			
 
				+-
			
 
				+-void aom_dc_predictor_16x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_predictor_16x4 aom_dc_predictor_16x4_sse2
			
 
				+-
			
 
				+-void aom_dc_predictor_16x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_predictor_16x64 aom_dc_predictor_16x64_sse2
			
 
				+-
			
 
				+-void aom_dc_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_predictor_16x8 aom_dc_predictor_16x8_sse2
			
 
				+-
			
 
				+-void aom_dc_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_predictor_2x2 aom_dc_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_dc_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_32x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_32x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_32x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_32x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_predictor_32x8 aom_dc_predictor_32x8_sse2
			
 
				+-
			
 
				+-void aom_dc_predictor_4x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_predictor_4x16 aom_dc_predictor_4x16_sse2
			
 
				+-
			
 
				+-void aom_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_predictor_4x4 aom_dc_predictor_4x4_sse2
			
 
				+-
			
 
				+-void aom_dc_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_predictor_4x8 aom_dc_predictor_4x8_sse2
			
 
				+-
			
 
				+-void aom_dc_predictor_64x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_64x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_64x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_64x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_64x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_64x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_64x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_64x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_64x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_predictor_64x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_predictor_8x16 aom_dc_predictor_8x16_sse2
			
 
				+-
			
 
				+-void aom_dc_predictor_8x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_predictor_8x32 aom_dc_predictor_8x32_sse2
			
 
				+-
			
 
				+-void aom_dc_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_predictor_8x4 aom_dc_predictor_8x4_sse2
			
 
				+-
			
 
				+-void aom_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_predictor_8x8 aom_dc_predictor_8x8_sse2
			
 
				+-
			
 
				+-void aom_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_top_predictor_16x16 aom_dc_top_predictor_16x16_sse2
			
 
				+-
			
 
				+-void aom_dc_top_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_sse2
			
 
				+-
			
 
				+-void aom_dc_top_predictor_16x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_top_predictor_16x4 aom_dc_top_predictor_16x4_sse2
			
 
				+-
			
 
				+-void aom_dc_top_predictor_16x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_top_predictor_16x64 aom_dc_top_predictor_16x64_sse2
			
 
				+-
			
 
				+-void aom_dc_top_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_sse2
			
 
				+-
			
 
				+-void aom_dc_top_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_top_predictor_2x2 aom_dc_top_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_dc_top_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_32x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_32x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_32x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_32x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_top_predictor_32x8 aom_dc_top_predictor_32x8_sse2
			
 
				+-
			
 
				+-void aom_dc_top_predictor_4x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_top_predictor_4x16 aom_dc_top_predictor_4x16_sse2
			
 
				+-
			
 
				+-void aom_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_top_predictor_4x4 aom_dc_top_predictor_4x4_sse2
			
 
				+-
			
 
				+-void aom_dc_top_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_sse2
			
 
				+-
			
 
				+-void aom_dc_top_predictor_64x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_64x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_64x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_64x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_64x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_64x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_64x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_64x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_64x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_dc_top_predictor_64x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_dc_top_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_sse2
			
 
				+-
			
 
				+-void aom_dc_top_predictor_8x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_top_predictor_8x32 aom_dc_top_predictor_8x32_sse2
			
 
				+-
			
 
				+-void aom_dc_top_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_top_predictor_8x4 aom_dc_top_predictor_8x4_sse2
			
 
				+-
			
 
				+-void aom_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_dc_top_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_dc_top_predictor_8x8 aom_dc_top_predictor_8x8_sse2
			
 
				+-
			
 
				+-void aom_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_h_predictor_16x16 aom_h_predictor_16x16_sse2
			
 
				+-
			
 
				+-void aom_h_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_h_predictor_16x32 aom_h_predictor_16x32_sse2
			
 
				+-
			
 
				+-void aom_h_predictor_16x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_h_predictor_16x4 aom_h_predictor_16x4_sse2
			
 
				+-
			
 
				+-void aom_h_predictor_16x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_h_predictor_16x64 aom_h_predictor_16x64_sse2
			
 
				+-
			
 
				+-void aom_h_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_h_predictor_16x8 aom_h_predictor_16x8_sse2
			
 
				+-
			
 
				+-void aom_h_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_h_predictor_2x2 aom_h_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_h_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_h_predictor_32x16 aom_h_predictor_32x16_sse2
			
 
				+-
			
 
				+-void aom_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_h_predictor_32x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_h_predictor_32x64 aom_h_predictor_32x64_sse2
			
 
				+-
			
 
				+-void aom_h_predictor_32x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_h_predictor_32x8 aom_h_predictor_32x8_sse2
			
 
				+-
			
 
				+-void aom_h_predictor_4x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_h_predictor_4x16 aom_h_predictor_4x16_sse2
			
 
				+-
			
 
				+-void aom_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_h_predictor_4x4 aom_h_predictor_4x4_sse2
			
 
				+-
			
 
				+-void aom_h_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_h_predictor_4x8 aom_h_predictor_4x8_sse2
			
 
				+-
			
 
				+-void aom_h_predictor_64x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_h_predictor_64x16 aom_h_predictor_64x16_sse2
			
 
				+-
			
 
				+-void aom_h_predictor_64x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_h_predictor_64x32 aom_h_predictor_64x32_sse2
			
 
				+-
			
 
				+-void aom_h_predictor_64x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_64x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_h_predictor_64x64 aom_h_predictor_64x64_sse2
			
 
				+-
			
 
				+-void aom_h_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_h_predictor_8x16 aom_h_predictor_8x16_sse2
			
 
				+-
			
 
				+-void aom_h_predictor_8x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_h_predictor_8x32 aom_h_predictor_8x32_sse2
			
 
				+-
			
 
				+-void aom_h_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_h_predictor_8x4 aom_h_predictor_8x4_sse2
			
 
				+-
			
 
				+-void aom_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_h_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_h_predictor_8x8 aom_h_predictor_8x8_sse2
			
 
				+-
			
 
				+-void aom_highbd_blend_a64_d16_mask_c(uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, ConvolveParams *conv_params, const int bd);
			
 
				+-#define aom_highbd_blend_a64_d16_mask aom_highbd_blend_a64_d16_mask_c
			
 
				+-
			
 
				+-void aom_highbd_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd);
			
 
				+-void aom_highbd_blend_a64_hmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_blend_a64_hmask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd);
			
 
				+-
			
 
				+-void aom_highbd_blend_a64_mask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, int bd);
			
 
				+-void aom_highbd_blend_a64_mask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_blend_a64_mask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, int bd);
			
 
				+-
			
 
				+-void aom_highbd_blend_a64_vmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd);
			
 
				+-void aom_highbd_blend_a64_vmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_blend_a64_vmask)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd);
			
 
				+-
			
 
				+-void aom_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-void aom_highbd_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-void aom_highbd_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-RTCD_EXTERN void (*aom_highbd_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-
			
 
				+-void aom_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-void aom_highbd_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-void aom_highbd_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-RTCD_EXTERN void (*aom_highbd_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-
			
 
				+-void aom_highbd_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-void aom_highbd_convolve_copy_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-void aom_highbd_convolve_copy_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-RTCD_EXTERN void (*aom_highbd_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_128_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_16x16 aom_highbd_dc_128_predictor_16x16_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_128_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_16x32 aom_highbd_dc_128_predictor_16x32_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_16x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_16x4 aom_highbd_dc_128_predictor_16x4_c
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_16x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_16x64 aom_highbd_dc_128_predictor_16x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_128_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_16x8 aom_highbd_dc_128_predictor_16x8_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_2x2 aom_highbd_dc_128_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_128_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_32x16 aom_highbd_dc_128_predictor_32x16_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_128_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_32x32 aom_highbd_dc_128_predictor_32x32_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_32x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_32x64 aom_highbd_dc_128_predictor_32x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_32x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_32x8 aom_highbd_dc_128_predictor_32x8_c
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_4x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_4x16 aom_highbd_dc_128_predictor_4x16_c
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_128_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_4x4 aom_highbd_dc_128_predictor_4x4_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_128_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_4x8 aom_highbd_dc_128_predictor_4x8_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_64x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_64x16 aom_highbd_dc_128_predictor_64x16_c
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_64x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_64x32 aom_highbd_dc_128_predictor_64x32_c
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_64x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_64x64 aom_highbd_dc_128_predictor_64x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_128_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_8x16 aom_highbd_dc_128_predictor_8x16_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_8x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_8x32 aom_highbd_dc_128_predictor_8x32_c
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_128_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_8x4 aom_highbd_dc_128_predictor_8x4_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_128_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_128_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_128_predictor_8x8 aom_highbd_dc_128_predictor_8x8_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_left_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_16x16 aom_highbd_dc_left_predictor_16x16_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_left_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_16x32 aom_highbd_dc_left_predictor_16x32_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_16x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_16x4 aom_highbd_dc_left_predictor_16x4_c
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_16x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_16x64 aom_highbd_dc_left_predictor_16x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_left_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_16x8 aom_highbd_dc_left_predictor_16x8_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_2x2 aom_highbd_dc_left_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_left_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_32x16 aom_highbd_dc_left_predictor_32x16_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_left_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_32x32 aom_highbd_dc_left_predictor_32x32_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_32x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_32x64 aom_highbd_dc_left_predictor_32x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_32x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_32x8 aom_highbd_dc_left_predictor_32x8_c
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_4x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_4x16 aom_highbd_dc_left_predictor_4x16_c
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_left_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_4x4 aom_highbd_dc_left_predictor_4x4_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_left_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_4x8 aom_highbd_dc_left_predictor_4x8_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_64x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_64x16 aom_highbd_dc_left_predictor_64x16_c
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_64x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_64x32 aom_highbd_dc_left_predictor_64x32_c
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_64x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_64x64 aom_highbd_dc_left_predictor_64x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_left_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_8x16 aom_highbd_dc_left_predictor_8x16_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_8x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_8x32 aom_highbd_dc_left_predictor_8x32_c
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_left_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_8x4 aom_highbd_dc_left_predictor_8x4_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_left_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_left_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_left_predictor_8x8 aom_highbd_dc_left_predictor_8x8_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_16x16 aom_highbd_dc_predictor_16x16_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_16x32 aom_highbd_dc_predictor_16x32_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_16x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_16x4 aom_highbd_dc_predictor_16x4_c
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_16x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_16x64 aom_highbd_dc_predictor_16x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_16x8 aom_highbd_dc_predictor_16x8_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_2x2 aom_highbd_dc_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_32x16 aom_highbd_dc_predictor_32x16_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_32x32 aom_highbd_dc_predictor_32x32_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_32x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_32x64 aom_highbd_dc_predictor_32x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_32x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_32x8 aom_highbd_dc_predictor_32x8_c
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_4x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_4x16 aom_highbd_dc_predictor_4x16_c
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_4x4 aom_highbd_dc_predictor_4x4_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_4x8 aom_highbd_dc_predictor_4x8_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_64x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_64x16 aom_highbd_dc_predictor_64x16_c
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_64x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_64x32 aom_highbd_dc_predictor_64x32_c
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_64x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_64x64 aom_highbd_dc_predictor_64x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_8x16 aom_highbd_dc_predictor_8x16_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_8x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_8x32 aom_highbd_dc_predictor_8x32_c
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_8x4 aom_highbd_dc_predictor_8x4_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_predictor_8x8 aom_highbd_dc_predictor_8x8_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_top_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_16x16 aom_highbd_dc_top_predictor_16x16_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_top_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_16x32 aom_highbd_dc_top_predictor_16x32_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_16x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_16x4 aom_highbd_dc_top_predictor_16x4_c
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_16x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_16x64 aom_highbd_dc_top_predictor_16x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_top_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_16x8 aom_highbd_dc_top_predictor_16x8_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_2x2 aom_highbd_dc_top_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_top_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_32x16 aom_highbd_dc_top_predictor_32x16_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_top_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_32x32 aom_highbd_dc_top_predictor_32x32_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_32x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_32x64 aom_highbd_dc_top_predictor_32x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_32x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_32x8 aom_highbd_dc_top_predictor_32x8_c
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_4x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_4x16 aom_highbd_dc_top_predictor_4x16_c
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_top_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_4x4 aom_highbd_dc_top_predictor_4x4_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_top_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_4x8 aom_highbd_dc_top_predictor_4x8_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_64x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_64x16 aom_highbd_dc_top_predictor_64x16_c
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_64x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_64x32 aom_highbd_dc_top_predictor_64x32_c
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_64x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_64x64 aom_highbd_dc_top_predictor_64x64_c
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_top_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_8x16 aom_highbd_dc_top_predictor_8x16_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_8x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_8x32 aom_highbd_dc_top_predictor_8x32_c
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_top_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_8x4 aom_highbd_dc_top_predictor_8x4_sse2
			
 
				+-
			
 
				+-void aom_highbd_dc_top_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_dc_top_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_dc_top_predictor_8x8 aom_highbd_dc_top_predictor_8x8_sse2
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_h_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_16x16 aom_highbd_h_predictor_16x16_sse2
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_h_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_16x32 aom_highbd_h_predictor_16x32_sse2
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_16x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_16x4 aom_highbd_h_predictor_16x4_c
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_16x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_16x64 aom_highbd_h_predictor_16x64_c
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_h_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_16x8 aom_highbd_h_predictor_16x8_sse2
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_2x2 aom_highbd_h_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_h_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_32x16 aom_highbd_h_predictor_32x16_sse2
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_h_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_32x32 aom_highbd_h_predictor_32x32_sse2
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_32x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_32x64 aom_highbd_h_predictor_32x64_c
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_32x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_32x8 aom_highbd_h_predictor_32x8_c
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_4x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_4x16 aom_highbd_h_predictor_4x16_c
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_h_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_4x4 aom_highbd_h_predictor_4x4_sse2
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_h_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_4x8 aom_highbd_h_predictor_4x8_sse2
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_64x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_64x16 aom_highbd_h_predictor_64x16_c
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_64x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_64x32 aom_highbd_h_predictor_64x32_c
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_64x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_64x64 aom_highbd_h_predictor_64x64_c
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_h_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_8x16 aom_highbd_h_predictor_8x16_sse2
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_8x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_8x32 aom_highbd_h_predictor_8x32_c
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_h_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_8x4 aom_highbd_h_predictor_8x4_sse2
			
 
				+-
			
 
				+-void aom_highbd_h_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_h_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_h_predictor_8x8 aom_highbd_h_predictor_8x8_sse2
			
 
				+-
			
 
				+-void aom_highbd_lpf_horizontal_14_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-void aom_highbd_lpf_horizontal_14_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-#define aom_highbd_lpf_horizontal_14 aom_highbd_lpf_horizontal_14_sse2
			
 
				+-
			
 
				+-void aom_highbd_lpf_horizontal_14_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limt1, const uint8_t *thresh1,int bd);
			
 
				+-void aom_highbd_lpf_horizontal_14_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limt1, const uint8_t *thresh1,int bd);
			
 
				+-void aom_highbd_lpf_horizontal_14_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limt1, const uint8_t *thresh1,int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_lpf_horizontal_14_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limt1, const uint8_t *thresh1,int bd);
			
 
				+-
			
 
				+-void aom_highbd_lpf_horizontal_4_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-#define aom_highbd_lpf_horizontal_4 aom_highbd_lpf_horizontal_4_sse2
			
 
				+-
			
 
				+-void aom_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_horizontal_4_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_lpf_horizontal_4_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-
			
 
				+-void aom_highbd_lpf_horizontal_6_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-void aom_highbd_lpf_horizontal_6_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-#define aom_highbd_lpf_horizontal_6 aom_highbd_lpf_horizontal_6_sse2
			
 
				+-
			
 
				+-void aom_highbd_lpf_horizontal_6_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_horizontal_6_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-#define aom_highbd_lpf_horizontal_6_dual aom_highbd_lpf_horizontal_6_dual_sse2
			
 
				+-
			
 
				+-void aom_highbd_lpf_horizontal_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-void aom_highbd_lpf_horizontal_8_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-#define aom_highbd_lpf_horizontal_8 aom_highbd_lpf_horizontal_8_sse2
			
 
				+-
			
 
				+-void aom_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_horizontal_8_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_lpf_horizontal_8_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-
			
 
				+-void aom_highbd_lpf_vertical_14_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-void aom_highbd_lpf_vertical_14_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-#define aom_highbd_lpf_vertical_14 aom_highbd_lpf_vertical_14_sse2
			
 
				+-
			
 
				+-void aom_highbd_lpf_vertical_14_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_vertical_14_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_vertical_14_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_lpf_vertical_14_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-
			
 
				+-void aom_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-void aom_highbd_lpf_vertical_4_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-#define aom_highbd_lpf_vertical_4 aom_highbd_lpf_vertical_4_sse2
			
 
				+-
			
 
				+-void aom_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_vertical_4_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_lpf_vertical_4_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-
			
 
				+-void aom_highbd_lpf_vertical_6_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-void aom_highbd_lpf_vertical_6_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-#define aom_highbd_lpf_vertical_6 aom_highbd_lpf_vertical_6_sse2
			
 
				+-
			
 
				+-void aom_highbd_lpf_vertical_6_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_vertical_6_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-#define aom_highbd_lpf_vertical_6_dual aom_highbd_lpf_vertical_6_dual_sse2
			
 
				+-
			
 
				+-void aom_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-void aom_highbd_lpf_vertical_8_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd);
			
 
				+-#define aom_highbd_lpf_vertical_8 aom_highbd_lpf_vertical_8_sse2
			
 
				+-
			
 
				+-void aom_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-void aom_highbd_lpf_vertical_8_dual_avx2(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-RTCD_EXTERN void (*aom_highbd_lpf_vertical_8_dual)(uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd);
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_16x16 aom_highbd_paeth_predictor_16x16_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_16x32 aom_highbd_paeth_predictor_16x32_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_16x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_16x4 aom_highbd_paeth_predictor_16x4_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_16x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_16x64 aom_highbd_paeth_predictor_16x64_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_16x8 aom_highbd_paeth_predictor_16x8_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_2x2 aom_highbd_paeth_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_32x16 aom_highbd_paeth_predictor_32x16_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_32x32 aom_highbd_paeth_predictor_32x32_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_32x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_32x64 aom_highbd_paeth_predictor_32x64_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_32x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_32x8 aom_highbd_paeth_predictor_32x8_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_4x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_4x16 aom_highbd_paeth_predictor_4x16_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_4x4 aom_highbd_paeth_predictor_4x4_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_4x8 aom_highbd_paeth_predictor_4x8_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_64x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_64x16 aom_highbd_paeth_predictor_64x16_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_64x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_64x32 aom_highbd_paeth_predictor_64x32_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_64x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_64x64 aom_highbd_paeth_predictor_64x64_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_8x16 aom_highbd_paeth_predictor_8x16_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_8x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_8x32 aom_highbd_paeth_predictor_8x32_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_8x4 aom_highbd_paeth_predictor_8x4_c
			
 
				+-
			
 
				+-void aom_highbd_paeth_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_paeth_predictor_8x8 aom_highbd_paeth_predictor_8x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_16x16 aom_highbd_smooth_h_predictor_16x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_16x32 aom_highbd_smooth_h_predictor_16x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_16x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_16x4 aom_highbd_smooth_h_predictor_16x4_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_16x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_16x64 aom_highbd_smooth_h_predictor_16x64_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_16x8 aom_highbd_smooth_h_predictor_16x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_2x2 aom_highbd_smooth_h_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_32x16 aom_highbd_smooth_h_predictor_32x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_32x32 aom_highbd_smooth_h_predictor_32x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_32x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_32x64 aom_highbd_smooth_h_predictor_32x64_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_32x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_32x8 aom_highbd_smooth_h_predictor_32x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_4x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_4x16 aom_highbd_smooth_h_predictor_4x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_4x4 aom_highbd_smooth_h_predictor_4x4_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_4x8 aom_highbd_smooth_h_predictor_4x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_64x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_64x16 aom_highbd_smooth_h_predictor_64x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_64x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_64x32 aom_highbd_smooth_h_predictor_64x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_64x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_64x64 aom_highbd_smooth_h_predictor_64x64_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_8x16 aom_highbd_smooth_h_predictor_8x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_8x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_8x32 aom_highbd_smooth_h_predictor_8x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_8x4 aom_highbd_smooth_h_predictor_8x4_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_h_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_h_predictor_8x8 aom_highbd_smooth_h_predictor_8x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_16x16 aom_highbd_smooth_predictor_16x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_16x32 aom_highbd_smooth_predictor_16x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_16x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_16x4 aom_highbd_smooth_predictor_16x4_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_16x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_16x64 aom_highbd_smooth_predictor_16x64_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_16x8 aom_highbd_smooth_predictor_16x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_2x2 aom_highbd_smooth_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_32x16 aom_highbd_smooth_predictor_32x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_32x32 aom_highbd_smooth_predictor_32x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_32x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_32x64 aom_highbd_smooth_predictor_32x64_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_32x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_32x8 aom_highbd_smooth_predictor_32x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_4x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_4x16 aom_highbd_smooth_predictor_4x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_4x4 aom_highbd_smooth_predictor_4x4_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_4x8 aom_highbd_smooth_predictor_4x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_64x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_64x16 aom_highbd_smooth_predictor_64x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_64x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_64x32 aom_highbd_smooth_predictor_64x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_64x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_64x64 aom_highbd_smooth_predictor_64x64_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_8x16 aom_highbd_smooth_predictor_8x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_8x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_8x32 aom_highbd_smooth_predictor_8x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_8x4 aom_highbd_smooth_predictor_8x4_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_predictor_8x8 aom_highbd_smooth_predictor_8x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_16x16 aom_highbd_smooth_v_predictor_16x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_16x32 aom_highbd_smooth_v_predictor_16x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_16x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_16x4 aom_highbd_smooth_v_predictor_16x4_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_16x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_16x64 aom_highbd_smooth_v_predictor_16x64_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_16x8 aom_highbd_smooth_v_predictor_16x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_2x2 aom_highbd_smooth_v_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_32x16 aom_highbd_smooth_v_predictor_32x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_32x32 aom_highbd_smooth_v_predictor_32x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_32x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_32x64 aom_highbd_smooth_v_predictor_32x64_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_32x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_32x8 aom_highbd_smooth_v_predictor_32x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_4x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_4x16 aom_highbd_smooth_v_predictor_4x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_4x4 aom_highbd_smooth_v_predictor_4x4_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_4x8 aom_highbd_smooth_v_predictor_4x8_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_64x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_64x16 aom_highbd_smooth_v_predictor_64x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_64x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_64x32 aom_highbd_smooth_v_predictor_64x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_64x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_64x64 aom_highbd_smooth_v_predictor_64x64_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_8x16 aom_highbd_smooth_v_predictor_8x16_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_8x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_8x32 aom_highbd_smooth_v_predictor_8x32_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_8x4 aom_highbd_smooth_v_predictor_8x4_c
			
 
				+-
			
 
				+-void aom_highbd_smooth_v_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_smooth_v_predictor_8x8 aom_highbd_smooth_v_predictor_8x8_c
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_16x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_v_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_16x16 aom_highbd_v_predictor_16x16_sse2
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_16x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_v_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_16x32 aom_highbd_v_predictor_16x32_sse2
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_16x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_16x4 aom_highbd_v_predictor_16x4_c
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_16x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_16x64 aom_highbd_v_predictor_16x64_c
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_16x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_v_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_16x8 aom_highbd_v_predictor_16x8_sse2
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_2x2_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_2x2 aom_highbd_v_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_32x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_v_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_32x16 aom_highbd_v_predictor_32x16_sse2
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_32x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_v_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_32x32 aom_highbd_v_predictor_32x32_sse2
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_32x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_32x64 aom_highbd_v_predictor_32x64_c
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_32x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_32x8 aom_highbd_v_predictor_32x8_c
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_4x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_4x16 aom_highbd_v_predictor_4x16_c
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_4x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_v_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_4x4 aom_highbd_v_predictor_4x4_sse2
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_4x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_v_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_4x8 aom_highbd_v_predictor_4x8_sse2
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_64x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_64x16 aom_highbd_v_predictor_64x16_c
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_64x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_64x32 aom_highbd_v_predictor_64x32_c
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_64x64_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_64x64 aom_highbd_v_predictor_64x64_c
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_8x16_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_v_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_8x16 aom_highbd_v_predictor_8x16_sse2
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_8x32_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_8x32 aom_highbd_v_predictor_8x32_c
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_8x4_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_v_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_8x4 aom_highbd_v_predictor_8x4_sse2
			
 
				+-
			
 
				+-void aom_highbd_v_predictor_8x8_c(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-void aom_highbd_v_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd);
			
 
				+-#define aom_highbd_v_predictor_8x8 aom_highbd_v_predictor_8x8_sse2
			
 
				+-
			
 
				+-void aom_lowbd_blend_a64_d16_mask_c(uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, ConvolveParams *conv_params);
			
 
				+-void aom_lowbd_blend_a64_d16_mask_sse4_1(uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, ConvolveParams *conv_params);
			
 
				+-void aom_lowbd_blend_a64_d16_mask_avx2(uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, ConvolveParams *conv_params);
			
 
				+-RTCD_EXTERN void (*aom_lowbd_blend_a64_d16_mask)(uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, ConvolveParams *conv_params);
			
 
				+-
			
 
				+-void aom_lpf_horizontal_14_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-void aom_lpf_horizontal_14_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-#define aom_lpf_horizontal_14 aom_lpf_horizontal_14_sse2
			
 
				+-
			
 
				+-void aom_lpf_horizontal_14_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-void aom_lpf_horizontal_14_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-#define aom_lpf_horizontal_14_dual aom_lpf_horizontal_14_dual_sse2
			
 
				+-
			
 
				+-void aom_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-void aom_lpf_horizontal_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-#define aom_lpf_horizontal_4 aom_lpf_horizontal_4_sse2
			
 
				+-
			
 
				+-void aom_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-void aom_lpf_horizontal_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-#define aom_lpf_horizontal_4_dual aom_lpf_horizontal_4_dual_sse2
			
 
				+-
			
 
				+-void aom_lpf_horizontal_6_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-void aom_lpf_horizontal_6_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-#define aom_lpf_horizontal_6 aom_lpf_horizontal_6_sse2
			
 
				+-
			
 
				+-void aom_lpf_horizontal_6_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-void aom_lpf_horizontal_6_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-#define aom_lpf_horizontal_6_dual aom_lpf_horizontal_6_dual_sse2
			
 
				+-
			
 
				+-void aom_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-void aom_lpf_horizontal_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-#define aom_lpf_horizontal_8 aom_lpf_horizontal_8_sse2
			
 
				+-
			
 
				+-void aom_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-void aom_lpf_horizontal_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-#define aom_lpf_horizontal_8_dual aom_lpf_horizontal_8_dual_sse2
			
 
				+-
			
 
				+-void aom_lpf_vertical_14_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-void aom_lpf_vertical_14_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-#define aom_lpf_vertical_14 aom_lpf_vertical_14_sse2
			
 
				+-
			
 
				+-void aom_lpf_vertical_14_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-void aom_lpf_vertical_14_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-#define aom_lpf_vertical_14_dual aom_lpf_vertical_14_dual_sse2
			
 
				+-
			
 
				+-void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-void aom_lpf_vertical_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-#define aom_lpf_vertical_4 aom_lpf_vertical_4_sse2
			
 
				+-
			
 
				+-void aom_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-void aom_lpf_vertical_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-#define aom_lpf_vertical_4_dual aom_lpf_vertical_4_dual_sse2
			
 
				+-
			
 
				+-void aom_lpf_vertical_6_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-void aom_lpf_vertical_6_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-#define aom_lpf_vertical_6 aom_lpf_vertical_6_sse2
			
 
				+-
			
 
				+-void aom_lpf_vertical_6_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-void aom_lpf_vertical_6_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-#define aom_lpf_vertical_6_dual aom_lpf_vertical_6_dual_sse2
			
 
				+-
			
 
				+-void aom_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-void aom_lpf_vertical_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
			
 
				+-#define aom_lpf_vertical_8 aom_lpf_vertical_8_sse2
			
 
				+-
			
 
				+-void aom_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-void aom_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
			
 
				+-#define aom_lpf_vertical_8_dual aom_lpf_vertical_8_dual_sse2
			
 
				+-
			
 
				+-void aom_paeth_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_16x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_16x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_16x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_16x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_16x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_16x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_16x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_16x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_16x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_16x8_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_paeth_predictor_2x2 aom_paeth_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_paeth_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_32x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_32x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_32x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_32x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_32x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_32x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_32x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_4x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_4x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_4x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_64x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_64x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_64x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_64x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_64x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_64x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_64x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_64x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_64x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_64x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_64x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_64x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_8x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_8x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_8x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_paeth_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_paeth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_paeth_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_16x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_16x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_16x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_16x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_16x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_16x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_smooth_h_predictor_2x2 aom_smooth_h_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_32x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_32x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_32x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_32x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_32x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_32x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_4x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_4x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_4x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_64x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_64x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_64x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_64x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_64x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_64x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_64x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_64x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_64x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_8x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_8x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_8x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_h_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_16x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_16x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_16x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_16x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_16x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_16x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_smooth_predictor_2x2 aom_smooth_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_smooth_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_32x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_32x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_32x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_32x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_32x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_32x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_4x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_4x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_4x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_64x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_64x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_64x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_64x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_64x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_64x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_64x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_64x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_64x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_8x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_8x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_8x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_16x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_16x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_16x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_16x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_16x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_16x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_16x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_16x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_smooth_v_predictor_2x2 aom_smooth_v_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_32x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_32x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_32x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_32x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_32x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_32x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_4x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_4x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_4x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_4x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_64x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_64x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_64x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_64x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_64x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_64x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_64x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_64x64_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_64x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_8x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_8x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_8x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_8x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_8x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_smooth_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_smooth_v_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_smooth_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_v_predictor_16x16 aom_v_predictor_16x16_sse2
			
 
				+-
			
 
				+-void aom_v_predictor_16x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_v_predictor_16x32 aom_v_predictor_16x32_sse2
			
 
				+-
			
 
				+-void aom_v_predictor_16x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_v_predictor_16x4 aom_v_predictor_16x4_sse2
			
 
				+-
			
 
				+-void aom_v_predictor_16x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_v_predictor_16x64 aom_v_predictor_16x64_sse2
			
 
				+-
			
 
				+-void aom_v_predictor_16x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_v_predictor_16x8 aom_v_predictor_16x8_sse2
			
 
				+-
			
 
				+-void aom_v_predictor_2x2_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_v_predictor_2x2 aom_v_predictor_2x2_c
			
 
				+-
			
 
				+-void aom_v_predictor_32x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_32x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_32x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_32x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_32x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_32x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_v_predictor_32x8 aom_v_predictor_32x8_sse2
			
 
				+-
			
 
				+-void aom_v_predictor_4x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_v_predictor_4x16 aom_v_predictor_4x16_sse2
			
 
				+-
			
 
				+-void aom_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_v_predictor_4x4 aom_v_predictor_4x4_sse2
			
 
				+-
			
 
				+-void aom_v_predictor_4x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_v_predictor_4x8 aom_v_predictor_4x8_sse2
			
 
				+-
			
 
				+-void aom_v_predictor_64x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_64x16_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_64x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_64x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_64x32_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_64x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_64x64_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_64x64_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_64x64_avx2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-RTCD_EXTERN void (*aom_v_predictor_64x64)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-
			
 
				+-void aom_v_predictor_8x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_v_predictor_8x16 aom_v_predictor_8x16_sse2
			
 
				+-
			
 
				+-void aom_v_predictor_8x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_v_predictor_8x32 aom_v_predictor_8x32_sse2
			
 
				+-
			
 
				+-void aom_v_predictor_8x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_v_predictor_8x4 aom_v_predictor_8x4_sse2
			
 
				+-
			
 
				+-void aom_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-void aom_v_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
			
 
				+-#define aom_v_predictor_8x8 aom_v_predictor_8x8_sse2
			
 
				+-
			
 
				+-void av1_round_shift_array_c(int32_t *arr, int size, int bit);
			
 
				+-void av1_round_shift_array_sse4_1(int32_t *arr, int size, int bit);
			
 
				+-RTCD_EXTERN void (*av1_round_shift_array)(int32_t *arr, int size, int bit);
			
 
				+-
			
 
				+-void aom_dsp_rtcd(void);
			
 
				+-
			
 
				+-#ifdef RTCD_C
			
 
				+-#include "aom_ports/x86.h"
			
 
				+-static void setup_rtcd_internal(void)
			
 
				+-{
			
 
				+-    int flags = x86_simd_caps();
			
 
				+-
			
 
				+-    (void)flags;
			
 
				+-
			
 
				+-    aom_blend_a64_hmask = aom_blend_a64_hmask_c;
			
 
				+-    if (flags & HAS_SSE4_1) aom_blend_a64_hmask = aom_blend_a64_hmask_sse4_1;
			
 
				+-    aom_blend_a64_mask = aom_blend_a64_mask_c;
			
 
				+-    if (flags & HAS_SSE4_1) aom_blend_a64_mask = aom_blend_a64_mask_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) aom_blend_a64_mask = aom_blend_a64_mask_avx2;
			
 
				+-    aom_blend_a64_vmask = aom_blend_a64_vmask_c;
			
 
				+-    if (flags & HAS_SSE4_1) aom_blend_a64_vmask = aom_blend_a64_vmask_sse4_1;
			
 
				+-    aom_convolve8_horiz = aom_convolve8_horiz_sse2;
			
 
				+-    if (flags & HAS_SSSE3) aom_convolve8_horiz = aom_convolve8_horiz_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_convolve8_horiz = aom_convolve8_horiz_avx2;
			
 
				+-    aom_convolve8_vert = aom_convolve8_vert_sse2;
			
 
				+-    if (flags & HAS_SSSE3) aom_convolve8_vert = aom_convolve8_vert_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_convolve8_vert = aom_convolve8_vert_avx2;
			
 
				+-    aom_dc_128_predictor_32x16 = aom_dc_128_predictor_32x16_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_128_predictor_32x16 = aom_dc_128_predictor_32x16_avx2;
			
 
				+-    aom_dc_128_predictor_32x32 = aom_dc_128_predictor_32x32_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_128_predictor_32x32 = aom_dc_128_predictor_32x32_avx2;
			
 
				+-    aom_dc_128_predictor_32x64 = aom_dc_128_predictor_32x64_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_128_predictor_32x64 = aom_dc_128_predictor_32x64_avx2;
			
 
				+-    aom_dc_128_predictor_64x16 = aom_dc_128_predictor_64x16_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_128_predictor_64x16 = aom_dc_128_predictor_64x16_avx2;
			
 
				+-    aom_dc_128_predictor_64x32 = aom_dc_128_predictor_64x32_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_128_predictor_64x32 = aom_dc_128_predictor_64x32_avx2;
			
 
				+-    aom_dc_128_predictor_64x64 = aom_dc_128_predictor_64x64_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_128_predictor_64x64 = aom_dc_128_predictor_64x64_avx2;
			
 
				+-    aom_dc_left_predictor_32x16 = aom_dc_left_predictor_32x16_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_left_predictor_32x16 = aom_dc_left_predictor_32x16_avx2;
			
 
				+-    aom_dc_left_predictor_32x32 = aom_dc_left_predictor_32x32_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_left_predictor_32x32 = aom_dc_left_predictor_32x32_avx2;
			
 
				+-    aom_dc_left_predictor_32x64 = aom_dc_left_predictor_32x64_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_left_predictor_32x64 = aom_dc_left_predictor_32x64_avx2;
			
 
				+-    aom_dc_left_predictor_64x16 = aom_dc_left_predictor_64x16_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_left_predictor_64x16 = aom_dc_left_predictor_64x16_avx2;
			
 
				+-    aom_dc_left_predictor_64x32 = aom_dc_left_predictor_64x32_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_left_predictor_64x32 = aom_dc_left_predictor_64x32_avx2;
			
 
				+-    aom_dc_left_predictor_64x64 = aom_dc_left_predictor_64x64_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_left_predictor_64x64 = aom_dc_left_predictor_64x64_avx2;
			
 
				+-    aom_dc_predictor_32x16 = aom_dc_predictor_32x16_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_predictor_32x16 = aom_dc_predictor_32x16_avx2;
			
 
				+-    aom_dc_predictor_32x32 = aom_dc_predictor_32x32_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_predictor_32x32 = aom_dc_predictor_32x32_avx2;
			
 
				+-    aom_dc_predictor_32x64 = aom_dc_predictor_32x64_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_predictor_32x64 = aom_dc_predictor_32x64_avx2;
			
 
				+-    aom_dc_predictor_64x16 = aom_dc_predictor_64x16_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_predictor_64x16 = aom_dc_predictor_64x16_avx2;
			
 
				+-    aom_dc_predictor_64x32 = aom_dc_predictor_64x32_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_predictor_64x32 = aom_dc_predictor_64x32_avx2;
			
 
				+-    aom_dc_predictor_64x64 = aom_dc_predictor_64x64_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_predictor_64x64 = aom_dc_predictor_64x64_avx2;
			
 
				+-    aom_dc_top_predictor_32x16 = aom_dc_top_predictor_32x16_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_top_predictor_32x16 = aom_dc_top_predictor_32x16_avx2;
			
 
				+-    aom_dc_top_predictor_32x32 = aom_dc_top_predictor_32x32_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_top_predictor_32x32 = aom_dc_top_predictor_32x32_avx2;
			
 
				+-    aom_dc_top_predictor_32x64 = aom_dc_top_predictor_32x64_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_top_predictor_32x64 = aom_dc_top_predictor_32x64_avx2;
			
 
				+-    aom_dc_top_predictor_64x16 = aom_dc_top_predictor_64x16_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_top_predictor_64x16 = aom_dc_top_predictor_64x16_avx2;
			
 
				+-    aom_dc_top_predictor_64x32 = aom_dc_top_predictor_64x32_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_top_predictor_64x32 = aom_dc_top_predictor_64x32_avx2;
			
 
				+-    aom_dc_top_predictor_64x64 = aom_dc_top_predictor_64x64_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_dc_top_predictor_64x64 = aom_dc_top_predictor_64x64_avx2;
			
 
				+-    aom_h_predictor_32x32 = aom_h_predictor_32x32_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_h_predictor_32x32 = aom_h_predictor_32x32_avx2;
			
 
				+-    aom_highbd_blend_a64_hmask = aom_highbd_blend_a64_hmask_c;
			
 
				+-    if (flags & HAS_SSE4_1) aom_highbd_blend_a64_hmask = aom_highbd_blend_a64_hmask_sse4_1;
			
 
				+-    aom_highbd_blend_a64_mask = aom_highbd_blend_a64_mask_c;
			
 
				+-    if (flags & HAS_SSE4_1) aom_highbd_blend_a64_mask = aom_highbd_blend_a64_mask_sse4_1;
			
 
				+-    aom_highbd_blend_a64_vmask = aom_highbd_blend_a64_vmask_c;
			
 
				+-    if (flags & HAS_SSE4_1) aom_highbd_blend_a64_vmask = aom_highbd_blend_a64_vmask_sse4_1;
			
 
				+-    aom_highbd_convolve8_horiz = aom_highbd_convolve8_horiz_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_highbd_convolve8_horiz = aom_highbd_convolve8_horiz_avx2;
			
 
				+-    aom_highbd_convolve8_vert = aom_highbd_convolve8_vert_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_highbd_convolve8_vert = aom_highbd_convolve8_vert_avx2;
			
 
				+-    aom_highbd_convolve_copy = aom_highbd_convolve_copy_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_highbd_convolve_copy = aom_highbd_convolve_copy_avx2;
			
 
				+-    aom_highbd_lpf_horizontal_14_dual = aom_highbd_lpf_horizontal_14_dual_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_14_dual = aom_highbd_lpf_horizontal_14_dual_avx2;
			
 
				+-    aom_highbd_lpf_horizontal_4_dual = aom_highbd_lpf_horizontal_4_dual_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_4_dual = aom_highbd_lpf_horizontal_4_dual_avx2;
			
 
				+-    aom_highbd_lpf_horizontal_8_dual = aom_highbd_lpf_horizontal_8_dual_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_highbd_lpf_horizontal_8_dual = aom_highbd_lpf_horizontal_8_dual_avx2;
			
 
				+-    aom_highbd_lpf_vertical_14_dual = aom_highbd_lpf_vertical_14_dual_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_highbd_lpf_vertical_14_dual = aom_highbd_lpf_vertical_14_dual_avx2;
			
 
				+-    aom_highbd_lpf_vertical_4_dual = aom_highbd_lpf_vertical_4_dual_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_highbd_lpf_vertical_4_dual = aom_highbd_lpf_vertical_4_dual_avx2;
			
 
				+-    aom_highbd_lpf_vertical_8_dual = aom_highbd_lpf_vertical_8_dual_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_highbd_lpf_vertical_8_dual = aom_highbd_lpf_vertical_8_dual_avx2;
			
 
				+-    aom_lowbd_blend_a64_d16_mask = aom_lowbd_blend_a64_d16_mask_c;
			
 
				+-    if (flags & HAS_SSE4_1) aom_lowbd_blend_a64_d16_mask = aom_lowbd_blend_a64_d16_mask_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) aom_lowbd_blend_a64_d16_mask = aom_lowbd_blend_a64_d16_mask_avx2;
			
 
				+-    aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_avx2;
			
 
				+-    aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_avx2;
			
 
				+-    aom_paeth_predictor_16x4 = aom_paeth_predictor_16x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_16x4 = aom_paeth_predictor_16x4_ssse3;
			
 
				+-    aom_paeth_predictor_16x64 = aom_paeth_predictor_16x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_16x64 = aom_paeth_predictor_16x64_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_paeth_predictor_16x64 = aom_paeth_predictor_16x64_avx2;
			
 
				+-    aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_avx2;
			
 
				+-    aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_avx2;
			
 
				+-    aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_avx2;
			
 
				+-    aom_paeth_predictor_32x64 = aom_paeth_predictor_32x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_32x64 = aom_paeth_predictor_32x64_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_paeth_predictor_32x64 = aom_paeth_predictor_32x64_avx2;
			
 
				+-    aom_paeth_predictor_32x8 = aom_paeth_predictor_32x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_32x8 = aom_paeth_predictor_32x8_ssse3;
			
 
				+-    aom_paeth_predictor_4x16 = aom_paeth_predictor_4x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_4x16 = aom_paeth_predictor_4x16_ssse3;
			
 
				+-    aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_ssse3;
			
 
				+-    aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_ssse3;
			
 
				+-    aom_paeth_predictor_64x16 = aom_paeth_predictor_64x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_64x16 = aom_paeth_predictor_64x16_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_paeth_predictor_64x16 = aom_paeth_predictor_64x16_avx2;
			
 
				+-    aom_paeth_predictor_64x32 = aom_paeth_predictor_64x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_64x32 = aom_paeth_predictor_64x32_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_paeth_predictor_64x32 = aom_paeth_predictor_64x32_avx2;
			
 
				+-    aom_paeth_predictor_64x64 = aom_paeth_predictor_64x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_64x64 = aom_paeth_predictor_64x64_ssse3;
			
 
				+-    if (flags & HAS_AVX2) aom_paeth_predictor_64x64 = aom_paeth_predictor_64x64_avx2;
			
 
				+-    aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_ssse3;
			
 
				+-    aom_paeth_predictor_8x32 = aom_paeth_predictor_8x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_8x32 = aom_paeth_predictor_8x32_ssse3;
			
 
				+-    aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_ssse3;
			
 
				+-    aom_paeth_predictor_8x8 = aom_paeth_predictor_8x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_paeth_predictor_8x8 = aom_paeth_predictor_8x8_ssse3;
			
 
				+-    aom_smooth_h_predictor_16x16 = aom_smooth_h_predictor_16x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_16x16 = aom_smooth_h_predictor_16x16_ssse3;
			
 
				+-    aom_smooth_h_predictor_16x32 = aom_smooth_h_predictor_16x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_16x32 = aom_smooth_h_predictor_16x32_ssse3;
			
 
				+-    aom_smooth_h_predictor_16x4 = aom_smooth_h_predictor_16x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_16x4 = aom_smooth_h_predictor_16x4_ssse3;
			
 
				+-    aom_smooth_h_predictor_16x64 = aom_smooth_h_predictor_16x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_16x64 = aom_smooth_h_predictor_16x64_ssse3;
			
 
				+-    aom_smooth_h_predictor_16x8 = aom_smooth_h_predictor_16x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_16x8 = aom_smooth_h_predictor_16x8_ssse3;
			
 
				+-    aom_smooth_h_predictor_32x16 = aom_smooth_h_predictor_32x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_32x16 = aom_smooth_h_predictor_32x16_ssse3;
			
 
				+-    aom_smooth_h_predictor_32x32 = aom_smooth_h_predictor_32x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_32x32 = aom_smooth_h_predictor_32x32_ssse3;
			
 
				+-    aom_smooth_h_predictor_32x64 = aom_smooth_h_predictor_32x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_32x64 = aom_smooth_h_predictor_32x64_ssse3;
			
 
				+-    aom_smooth_h_predictor_32x8 = aom_smooth_h_predictor_32x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_32x8 = aom_smooth_h_predictor_32x8_ssse3;
			
 
				+-    aom_smooth_h_predictor_4x16 = aom_smooth_h_predictor_4x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_4x16 = aom_smooth_h_predictor_4x16_ssse3;
			
 
				+-    aom_smooth_h_predictor_4x4 = aom_smooth_h_predictor_4x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_4x4 = aom_smooth_h_predictor_4x4_ssse3;
			
 
				+-    aom_smooth_h_predictor_4x8 = aom_smooth_h_predictor_4x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_4x8 = aom_smooth_h_predictor_4x8_ssse3;
			
 
				+-    aom_smooth_h_predictor_64x16 = aom_smooth_h_predictor_64x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_64x16 = aom_smooth_h_predictor_64x16_ssse3;
			
 
				+-    aom_smooth_h_predictor_64x32 = aom_smooth_h_predictor_64x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_64x32 = aom_smooth_h_predictor_64x32_ssse3;
			
 
				+-    aom_smooth_h_predictor_64x64 = aom_smooth_h_predictor_64x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_64x64 = aom_smooth_h_predictor_64x64_ssse3;
			
 
				+-    aom_smooth_h_predictor_8x16 = aom_smooth_h_predictor_8x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_8x16 = aom_smooth_h_predictor_8x16_ssse3;
			
 
				+-    aom_smooth_h_predictor_8x32 = aom_smooth_h_predictor_8x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_8x32 = aom_smooth_h_predictor_8x32_ssse3;
			
 
				+-    aom_smooth_h_predictor_8x4 = aom_smooth_h_predictor_8x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_8x4 = aom_smooth_h_predictor_8x4_ssse3;
			
 
				+-    aom_smooth_h_predictor_8x8 = aom_smooth_h_predictor_8x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_h_predictor_8x8 = aom_smooth_h_predictor_8x8_ssse3;
			
 
				+-    aom_smooth_predictor_16x16 = aom_smooth_predictor_16x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_16x16 = aom_smooth_predictor_16x16_ssse3;
			
 
				+-    aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_ssse3;
			
 
				+-    aom_smooth_predictor_16x4 = aom_smooth_predictor_16x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_16x4 = aom_smooth_predictor_16x4_ssse3;
			
 
				+-    aom_smooth_predictor_16x64 = aom_smooth_predictor_16x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_16x64 = aom_smooth_predictor_16x64_ssse3;
			
 
				+-    aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_ssse3;
			
 
				+-    aom_smooth_predictor_32x16 = aom_smooth_predictor_32x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_32x16 = aom_smooth_predictor_32x16_ssse3;
			
 
				+-    aom_smooth_predictor_32x32 = aom_smooth_predictor_32x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_32x32 = aom_smooth_predictor_32x32_ssse3;
			
 
				+-    aom_smooth_predictor_32x64 = aom_smooth_predictor_32x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_32x64 = aom_smooth_predictor_32x64_ssse3;
			
 
				+-    aom_smooth_predictor_32x8 = aom_smooth_predictor_32x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_32x8 = aom_smooth_predictor_32x8_ssse3;
			
 
				+-    aom_smooth_predictor_4x16 = aom_smooth_predictor_4x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_4x16 = aom_smooth_predictor_4x16_ssse3;
			
 
				+-    aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_ssse3;
			
 
				+-    aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_ssse3;
			
 
				+-    aom_smooth_predictor_64x16 = aom_smooth_predictor_64x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_64x16 = aom_smooth_predictor_64x16_ssse3;
			
 
				+-    aom_smooth_predictor_64x32 = aom_smooth_predictor_64x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_64x32 = aom_smooth_predictor_64x32_ssse3;
			
 
				+-    aom_smooth_predictor_64x64 = aom_smooth_predictor_64x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_64x64 = aom_smooth_predictor_64x64_ssse3;
			
 
				+-    aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_ssse3;
			
 
				+-    aom_smooth_predictor_8x32 = aom_smooth_predictor_8x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_8x32 = aom_smooth_predictor_8x32_ssse3;
			
 
				+-    aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_ssse3;
			
 
				+-    aom_smooth_predictor_8x8 = aom_smooth_predictor_8x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_predictor_8x8 = aom_smooth_predictor_8x8_ssse3;
			
 
				+-    aom_smooth_v_predictor_16x16 = aom_smooth_v_predictor_16x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_16x16 = aom_smooth_v_predictor_16x16_ssse3;
			
 
				+-    aom_smooth_v_predictor_16x32 = aom_smooth_v_predictor_16x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_16x32 = aom_smooth_v_predictor_16x32_ssse3;
			
 
				+-    aom_smooth_v_predictor_16x4 = aom_smooth_v_predictor_16x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_16x4 = aom_smooth_v_predictor_16x4_ssse3;
			
 
				+-    aom_smooth_v_predictor_16x64 = aom_smooth_v_predictor_16x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_16x64 = aom_smooth_v_predictor_16x64_ssse3;
			
 
				+-    aom_smooth_v_predictor_16x8 = aom_smooth_v_predictor_16x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_16x8 = aom_smooth_v_predictor_16x8_ssse3;
			
 
				+-    aom_smooth_v_predictor_32x16 = aom_smooth_v_predictor_32x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_32x16 = aom_smooth_v_predictor_32x16_ssse3;
			
 
				+-    aom_smooth_v_predictor_32x32 = aom_smooth_v_predictor_32x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_32x32 = aom_smooth_v_predictor_32x32_ssse3;
			
 
				+-    aom_smooth_v_predictor_32x64 = aom_smooth_v_predictor_32x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_32x64 = aom_smooth_v_predictor_32x64_ssse3;
			
 
				+-    aom_smooth_v_predictor_32x8 = aom_smooth_v_predictor_32x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_32x8 = aom_smooth_v_predictor_32x8_ssse3;
			
 
				+-    aom_smooth_v_predictor_4x16 = aom_smooth_v_predictor_4x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_4x16 = aom_smooth_v_predictor_4x16_ssse3;
			
 
				+-    aom_smooth_v_predictor_4x4 = aom_smooth_v_predictor_4x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_4x4 = aom_smooth_v_predictor_4x4_ssse3;
			
 
				+-    aom_smooth_v_predictor_4x8 = aom_smooth_v_predictor_4x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_4x8 = aom_smooth_v_predictor_4x8_ssse3;
			
 
				+-    aom_smooth_v_predictor_64x16 = aom_smooth_v_predictor_64x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_64x16 = aom_smooth_v_predictor_64x16_ssse3;
			
 
				+-    aom_smooth_v_predictor_64x32 = aom_smooth_v_predictor_64x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_64x32 = aom_smooth_v_predictor_64x32_ssse3;
			
 
				+-    aom_smooth_v_predictor_64x64 = aom_smooth_v_predictor_64x64_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_64x64 = aom_smooth_v_predictor_64x64_ssse3;
			
 
				+-    aom_smooth_v_predictor_8x16 = aom_smooth_v_predictor_8x16_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_8x16 = aom_smooth_v_predictor_8x16_ssse3;
			
 
				+-    aom_smooth_v_predictor_8x32 = aom_smooth_v_predictor_8x32_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_8x32 = aom_smooth_v_predictor_8x32_ssse3;
			
 
				+-    aom_smooth_v_predictor_8x4 = aom_smooth_v_predictor_8x4_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_8x4 = aom_smooth_v_predictor_8x4_ssse3;
			
 
				+-    aom_smooth_v_predictor_8x8 = aom_smooth_v_predictor_8x8_c;
			
 
				+-    if (flags & HAS_SSSE3) aom_smooth_v_predictor_8x8 = aom_smooth_v_predictor_8x8_ssse3;
			
 
				+-    aom_v_predictor_32x16 = aom_v_predictor_32x16_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_v_predictor_32x16 = aom_v_predictor_32x16_avx2;
			
 
				+-    aom_v_predictor_32x32 = aom_v_predictor_32x32_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_v_predictor_32x32 = aom_v_predictor_32x32_avx2;
			
 
				+-    aom_v_predictor_32x64 = aom_v_predictor_32x64_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_v_predictor_32x64 = aom_v_predictor_32x64_avx2;
			
 
				+-    aom_v_predictor_64x16 = aom_v_predictor_64x16_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_v_predictor_64x16 = aom_v_predictor_64x16_avx2;
			
 
				+-    aom_v_predictor_64x32 = aom_v_predictor_64x32_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_v_predictor_64x32 = aom_v_predictor_64x32_avx2;
			
 
				+-    aom_v_predictor_64x64 = aom_v_predictor_64x64_sse2;
			
 
				+-    if (flags & HAS_AVX2) aom_v_predictor_64x64 = aom_v_predictor_64x64_avx2;
			
 
				+-    av1_round_shift_array = av1_round_shift_array_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_round_shift_array = av1_round_shift_array_sse4_1;
			
 
				+-}
			
 
				+-#endif
			
 
				+-
			
 
				+-#ifdef __cplusplus
			
 
				+-}  // extern "C"
			
 
				+-#endif
			
 
				+-
			
 
				+-#endif
			
 
				+diff --git a/media/libaom/config/win/mingw64/config/aom_scale_rtcd.h b/media/libaom/config/win/mingw64/config/aom_scale_rtcd.h
			
 
				+deleted file mode 100644
			
 
				+--- a/media/libaom/config/win/mingw64/config/aom_scale_rtcd.h
			
 
				++++ /dev/null
			
 
				+@@ -1,88 +0,0 @@
			
 
				+-// This file is generated. Do not edit.
			
 
				+-#ifndef AOM_SCALE_RTCD_H_
			
 
				+-#define AOM_SCALE_RTCD_H_
			
 
				+-
			
 
				+-#ifdef RTCD_C
			
 
				+-#define RTCD_EXTERN
			
 
				+-#else
			
 
				+-#define RTCD_EXTERN extern
			
 
				+-#endif
			
 
				+-
			
 
				+-struct yv12_buffer_config;
			
 
				+-
			
 
				+-#ifdef __cplusplus
			
 
				+-extern "C" {
			
 
				+-#endif
			
 
				+-
			
 
				+-void aom_extend_frame_borders_c(struct yv12_buffer_config *ybf, const int num_planes);
			
 
				+-#define aom_extend_frame_borders aom_extend_frame_borders_c
			
 
				+-
			
 
				+-void aom_extend_frame_borders_y_c(struct yv12_buffer_config *ybf);
			
 
				+-#define aom_extend_frame_borders_y aom_extend_frame_borders_y_c
			
 
				+-
			
 
				+-void aom_extend_frame_inner_borders_c(struct yv12_buffer_config *ybf, const int num_planes);
			
 
				+-#define aom_extend_frame_inner_borders aom_extend_frame_inner_borders_c
			
 
				+-
			
 
				+-void aom_horizontal_line_2_1_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
			
 
				+-#define aom_horizontal_line_2_1_scale aom_horizontal_line_2_1_scale_c
			
 
				+-
			
 
				+-void aom_horizontal_line_5_3_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
			
 
				+-#define aom_horizontal_line_5_3_scale aom_horizontal_line_5_3_scale_c
			
 
				+-
			
 
				+-void aom_horizontal_line_5_4_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
			
 
				+-#define aom_horizontal_line_5_4_scale aom_horizontal_line_5_4_scale_c
			
 
				+-
			
 
				+-void aom_vertical_band_2_1_scale_c(unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width);
			
 
				+-#define aom_vertical_band_2_1_scale aom_vertical_band_2_1_scale_c
			
 
				+-
			
 
				+-void aom_vertical_band_2_1_scale_i_c(unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width);
			
 
				+-#define aom_vertical_band_2_1_scale_i aom_vertical_band_2_1_scale_i_c
			
 
				+-
			
 
				+-void aom_vertical_band_5_3_scale_c(unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width);
			
 
				+-#define aom_vertical_band_5_3_scale aom_vertical_band_5_3_scale_c
			
 
				+-
			
 
				+-void aom_vertical_band_5_4_scale_c(unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width);
			
 
				+-#define aom_vertical_band_5_4_scale aom_vertical_band_5_4_scale_c
			
 
				+-
			
 
				+-void aom_yv12_copy_frame_c(const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc, const int num_planes);
			
 
				+-#define aom_yv12_copy_frame aom_yv12_copy_frame_c
			
 
				+-
			
 
				+-void aom_yv12_copy_u_c(const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc);
			
 
				+-#define aom_yv12_copy_u aom_yv12_copy_u_c
			
 
				+-
			
 
				+-void aom_yv12_copy_v_c(const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc);
			
 
				+-#define aom_yv12_copy_v aom_yv12_copy_v_c
			
 
				+-
			
 
				+-void aom_yv12_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc);
			
 
				+-#define aom_yv12_copy_y aom_yv12_copy_y_c
			
 
				+-
			
 
				+-void aom_yv12_extend_frame_borders_c(struct yv12_buffer_config *ybf, const int num_planes);
			
 
				+-#define aom_yv12_extend_frame_borders aom_yv12_extend_frame_borders_c
			
 
				+-
			
 
				+-void aom_yv12_partial_copy_u_c(const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc, int hstart, int hend, int vstart, int vend);
			
 
				+-#define aom_yv12_partial_copy_u aom_yv12_partial_copy_u_c
			
 
				+-
			
 
				+-void aom_yv12_partial_copy_v_c(const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc, int hstart, int hend, int vstart, int vend);
			
 
				+-#define aom_yv12_partial_copy_v aom_yv12_partial_copy_v_c
			
 
				+-
			
 
				+-void aom_yv12_partial_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc, int hstart, int hend, int vstart, int vend);
			
 
				+-#define aom_yv12_partial_copy_y aom_yv12_partial_copy_y_c
			
 
				+-
			
 
				+-void aom_scale_rtcd(void);
			
 
				+-
			
 
				+-#ifdef RTCD_C
			
 
				+-#include "aom_ports/x86.h"
			
 
				+-static void setup_rtcd_internal(void)
			
 
				+-{
			
 
				+-    int flags = x86_simd_caps();
			
 
				+-
			
 
				+-    (void)flags;
			
 
				+-
			
 
				+-}
			
 
				+-#endif
			
 
				+-
			
 
				+-#ifdef __cplusplus
			
 
				+-}  // extern "C"
			
 
				+-#endif
			
 
				+-
			
 
				+-#endif
			
 
				+diff --git a/media/libaom/config/win/mingw64/config/av1_rtcd.h b/media/libaom/config/win/mingw64/config/av1_rtcd.h
			
 
				+deleted file mode 100644
			
 
				+--- a/media/libaom/config/win/mingw64/config/av1_rtcd.h
			
 
				++++ /dev/null
			
 
				+@@ -1,594 +0,0 @@
			
 
				+-// This file is generated. Do not edit.
			
 
				+-#ifndef AV1_RTCD_H_
			
 
				+-#define AV1_RTCD_H_
			
 
				+-
			
 
				+-#ifdef RTCD_C
			
 
				+-#define RTCD_EXTERN
			
 
				+-#else
			
 
				+-#define RTCD_EXTERN extern
			
 
				+-#endif
			
 
				+-
			
 
				+-/*
			
 
				+- * AV1
			
 
				+- */
			
 
				+-
			
 
				+-#include "aom/aom_integer.h"
			
 
				+-#include "aom_dsp/txfm_common.h"
			
 
				+-#include "av1/common/common.h"
			
 
				+-#include "av1/common/enums.h"
			
 
				+-#include "av1/common/quant_common.h"
			
 
				+-#include "av1/common/filter.h"
			
 
				+-#include "av1/common/convolve.h"
			
 
				+-#include "av1/common/av1_txfm.h"
			
 
				+-#include "av1/common/odintrin.h"
			
 
				+-#include "av1/common/restoration.h"
			
 
				+-
			
 
				+-struct macroblockd;
			
 
				+-
			
 
				+-/* Encoder forward decls */
			
 
				+-struct macroblock;
			
 
				+-struct txfm_param;
			
 
				+-struct aom_variance_vtable;
			
 
				+-struct search_site_config;
			
 
				+-struct yv12_buffer_config;
			
 
				+-
			
 
				+-/* Function pointers return by CfL functions */
			
 
				+-typedef void (*cfl_subsample_lbd_fn)(const uint8_t *input, int input_stride,
			
 
				+-                                     uint16_t *output_q3);
			
 
				+-
			
 
				+-typedef void (*cfl_subsample_hbd_fn)(const uint16_t *input, int input_stride,
			
 
				+-                                     uint16_t *output_q3);
			
 
				+-
			
 
				+-typedef void (*cfl_subtract_average_fn)(const uint16_t *src, int16_t *dst);
			
 
				+-
			
 
				+-typedef void (*cfl_predict_lbd_fn)(const int16_t *src, uint8_t *dst,
			
 
				+-                                   int dst_stride, int alpha_q3);
			
 
				+-
			
 
				+-typedef void (*cfl_predict_hbd_fn)(const int16_t *src, uint16_t *dst,
			
 
				+-                                   int dst_stride, int alpha_q3, int bd);
			
 
				+-
			
 
				+-#ifdef __cplusplus
			
 
				+-extern "C" {
			
 
				+-#endif
			
 
				+-
			
 
				+-void apply_selfguided_restoration_c(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd);
			
 
				+-void apply_selfguided_restoration_sse4_1(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd);
			
 
				+-void apply_selfguided_restoration_avx2(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd);
			
 
				+-RTCD_EXTERN void (*apply_selfguided_restoration)(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd);
			
 
				+-
			
 
				+-void av1_build_compound_diffwtd_mask_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w);
			
 
				+-void av1_build_compound_diffwtd_mask_sse4_1(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w);
			
 
				+-void av1_build_compound_diffwtd_mask_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w);
			
 
				+-RTCD_EXTERN void (*av1_build_compound_diffwtd_mask)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w);
			
 
				+-
			
 
				+-void av1_build_compound_diffwtd_mask_d16_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_build_compound_diffwtd_mask_d16_sse4_1(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_build_compound_diffwtd_mask_d16_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, ConvolveParams *conv_params, int bd);
			
 
				+-RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_d16)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, ConvolveParams *conv_params, int bd);
			
 
				+-
			
 
				+-void av1_build_compound_diffwtd_mask_highbd_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
			
 
				+-void av1_build_compound_diffwtd_mask_highbd_ssse3(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
			
 
				+-void av1_build_compound_diffwtd_mask_highbd_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
			
 
				+-RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_highbd)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
			
 
				+-
			
 
				+-void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-
			
 
				+-void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
			
 
				+-void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
			
 
				+-RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
			
 
				+-
			
 
				+-void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-
			
 
				+-void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
			
 
				+-void av1_convolve_horiz_rs_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
			
 
				+-RTCD_EXTERN void (*av1_convolve_horiz_rs)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
			
 
				+-
			
 
				+-void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-
			
 
				+-void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-
			
 
				+-void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy);
			
 
				+-#define av1_dr_prediction_z1 av1_dr_prediction_z1_c
			
 
				+-
			
 
				+-void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int upsample_left, int dx, int dy);
			
 
				+-#define av1_dr_prediction_z2 av1_dr_prediction_z2_c
			
 
				+-
			
 
				+-void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_left, int dx, int dy);
			
 
				+-#define av1_dr_prediction_z3 av1_dr_prediction_z3_c
			
 
				+-
			
 
				+-void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength);
			
 
				+-void av1_filter_intra_edge_sse4_1(uint8_t *p, int sz, int strength);
			
 
				+-RTCD_EXTERN void (*av1_filter_intra_edge)(uint8_t *p, int sz, int strength);
			
 
				+-
			
 
				+-void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength);
			
 
				+-void av1_filter_intra_edge_high_sse4_1(uint16_t *p, int sz, int strength);
			
 
				+-RTCD_EXTERN void (*av1_filter_intra_edge_high)(uint16_t *p, int sz, int strength);
			
 
				+-
			
 
				+-void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left, int mode);
			
 
				+-void av1_filter_intra_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left, int mode);
			
 
				+-RTCD_EXTERN void (*av1_filter_intra_predictor)(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left, int mode);
			
 
				+-
			
 
				+-void av1_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-void av1_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-#define av1_highbd_convolve8 av1_highbd_convolve8_sse2
			
 
				+-
			
 
				+-void av1_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-void av1_highbd_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-#define av1_highbd_convolve8_horiz av1_highbd_convolve8_horiz_sse2
			
 
				+-
			
 
				+-void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-void av1_highbd_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_sse2
			
 
				+-
			
 
				+-void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-
			
 
				+-void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
			
 
				+-RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
			
 
				+-
			
 
				+-void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-
			
 
				+-void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-#define av1_highbd_convolve_avg av1_highbd_convolve_avg_c
			
 
				+-
			
 
				+-void av1_highbd_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
			
 
				+-#define av1_highbd_convolve_copy av1_highbd_convolve_copy_c
			
 
				+-
			
 
				+-void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
			
 
				+-void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
			
 
				+-RTCD_EXTERN void (*av1_highbd_convolve_horiz_rs)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
			
 
				+-
			
 
				+-void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-
			
 
				+-void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-
			
 
				+-void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd);
			
 
				+-#define av1_highbd_dr_prediction_z1 av1_highbd_dr_prediction_z1_c
			
 
				+-
			
 
				+-void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int upsample_left, int dx, int dy, int bd);
			
 
				+-#define av1_highbd_dr_prediction_z2 av1_highbd_dr_prediction_z2_c
			
 
				+-
			
 
				+-void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_left, int dx, int dy, int bd);
			
 
				+-#define av1_highbd_dr_prediction_z3 av1_highbd_dr_prediction_z3_c
			
 
				+-
			
 
				+-void av1_highbd_inv_txfm_add_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-void av1_highbd_inv_txfm_add_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-void av1_highbd_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-RTCD_EXTERN void (*av1_highbd_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-
			
 
				+-void av1_highbd_inv_txfm_add_16x16_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-void av1_highbd_inv_txfm_add_16x16_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-RTCD_EXTERN void (*av1_highbd_inv_txfm_add_16x16)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-
			
 
				+-void av1_highbd_inv_txfm_add_16x8_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-void av1_highbd_inv_txfm_add_16x8_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-RTCD_EXTERN void (*av1_highbd_inv_txfm_add_16x8)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-
			
 
				+-void av1_highbd_inv_txfm_add_32x32_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-void av1_highbd_inv_txfm_add_32x32_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-void av1_highbd_inv_txfm_add_32x32_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-RTCD_EXTERN void (*av1_highbd_inv_txfm_add_32x32)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-
			
 
				+-void av1_highbd_inv_txfm_add_4x4_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-void av1_highbd_inv_txfm_add_4x4_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-RTCD_EXTERN void (*av1_highbd_inv_txfm_add_4x4)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-
			
 
				+-void av1_highbd_inv_txfm_add_8x16_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-void av1_highbd_inv_txfm_add_8x16_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-RTCD_EXTERN void (*av1_highbd_inv_txfm_add_8x16)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-
			
 
				+-void av1_highbd_inv_txfm_add_8x8_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-void av1_highbd_inv_txfm_add_8x8_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-RTCD_EXTERN void (*av1_highbd_inv_txfm_add_8x8)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-
			
 
				+-void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
			
 
				+-#define av1_highbd_iwht4x4_16_add av1_highbd_iwht4x4_16_add_c
			
 
				+-
			
 
				+-void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
			
 
				+-#define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
			
 
				+-
			
 
				+-void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-
			
 
				+-void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-
			
 
				+-void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-
			
 
				+-void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
			
 
				+-
			
 
				+-void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
			
 
				+-void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
			
 
				+-RTCD_EXTERN void (*av1_highbd_warp_affine)(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
			
 
				+-
			
 
				+-void av1_highbd_wiener_convolve_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bps);
			
 
				+-void av1_highbd_wiener_convolve_add_src_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bps);
			
 
				+-void av1_highbd_wiener_convolve_add_src_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bps);
			
 
				+-RTCD_EXTERN void (*av1_highbd_wiener_convolve_add_src)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bps);
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_16x16 av1_inv_txfm2d_add_16x16_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_16x32 av1_inv_txfm2d_add_16x32_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_16x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_16x4 av1_inv_txfm2d_add_16x4_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_16x64_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_16x64 av1_inv_txfm2d_add_16x64_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_16x8 av1_inv_txfm2d_add_16x8_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_32x16 av1_inv_txfm2d_add_32x16_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_32x32 av1_inv_txfm2d_add_32x32_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_32x64_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_32x64 av1_inv_txfm2d_add_32x64_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_32x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_32x8 av1_inv_txfm2d_add_32x8_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_4x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_4x16 av1_inv_txfm2d_add_4x16_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-RTCD_EXTERN void (*av1_inv_txfm2d_add_4x4)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_4x8 av1_inv_txfm2d_add_4x8_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_64x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_64x16 av1_inv_txfm2d_add_64x16_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_64x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_64x32 av1_inv_txfm2d_add_64x32_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_64x64 av1_inv_txfm2d_add_64x64_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_8x16 av1_inv_txfm2d_add_8x16_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_8x32_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_8x32 av1_inv_txfm2d_add_8x32_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-#define av1_inv_txfm2d_add_8x4 av1_inv_txfm2d_add_8x4_c
			
 
				+-
			
 
				+-void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-RTCD_EXTERN void (*av1_inv_txfm2d_add_8x8)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
			
 
				+-
			
 
				+-void av1_inv_txfm_add_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-void av1_inv_txfm_add_ssse3(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-void av1_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-RTCD_EXTERN void (*av1_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
			
 
				+-
			
 
				+-void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-
			
 
				+-void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-
			
 
				+-void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-
			
 
				+-void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
			
 
				+-
			
 
				+-int av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
			
 
				+-                                 int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
			
 
				+-                                 int sgr_params_idx, int bit_depth, int highbd);
			
 
				+-int av1_selfguided_restoration_sse4_1(const uint8_t *dgd8, int width, int height,
			
 
				+-                                 int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
			
 
				+-                                 int sgr_params_idx, int bit_depth, int highbd);
			
 
				+-int av1_selfguided_restoration_avx2(const uint8_t *dgd8, int width, int height,
			
 
				+-                                 int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
			
 
				+-                                 int sgr_params_idx, int bit_depth, int highbd);
			
 
				+-RTCD_EXTERN int (*av1_selfguided_restoration)(const uint8_t *dgd8, int width, int height,
			
 
				+-                                 int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
			
 
				+-                                 int sgr_params_idx, int bit_depth, int highbd);
			
 
				+-
			
 
				+-void av1_upsample_intra_edge_c(uint8_t *p, int sz);
			
 
				+-void av1_upsample_intra_edge_sse4_1(uint8_t *p, int sz);
			
 
				+-RTCD_EXTERN void (*av1_upsample_intra_edge)(uint8_t *p, int sz);
			
 
				+-
			
 
				+-void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd);
			
 
				+-void av1_upsample_intra_edge_high_sse4_1(uint16_t *p, int sz, int bd);
			
 
				+-RTCD_EXTERN void (*av1_upsample_intra_edge_high)(uint16_t *p, int sz, int bd);
			
 
				+-
			
 
				+-void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
			
 
				+-void av1_warp_affine_sse4_1(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
			
 
				+-RTCD_EXTERN void (*av1_warp_affine)(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
			
 
				+-
			
 
				+-void av1_wiener_convolve_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params);
			
 
				+-void av1_wiener_convolve_add_src_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params);
			
 
				+-void av1_wiener_convolve_add_src_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params);
			
 
				+-RTCD_EXTERN void (*av1_wiener_convolve_add_src)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params);
			
 
				+-
			
 
				+-void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
			
 
				+-void cdef_filter_block_sse2(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
			
 
				+-void cdef_filter_block_ssse3(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
			
 
				+-void cdef_filter_block_sse4_1(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
			
 
				+-void cdef_filter_block_avx2(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
			
 
				+-RTCD_EXTERN void (*cdef_filter_block)(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
			
 
				+-
			
 
				+-int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
			
 
				+-int cdef_find_dir_sse2(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
			
 
				+-int cdef_find_dir_ssse3(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
			
 
				+-int cdef_find_dir_sse4_1(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
			
 
				+-int cdef_find_dir_avx2(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
			
 
				+-RTCD_EXTERN int (*cdef_find_dir)(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
			
 
				+-
			
 
				+-cfl_subsample_hbd_fn cfl_get_luma_subsampling_420_hbd_c(TX_SIZE tx_size);
			
 
				+-cfl_subsample_hbd_fn cfl_get_luma_subsampling_420_hbd_ssse3(TX_SIZE tx_size);
			
 
				+-cfl_subsample_hbd_fn cfl_get_luma_subsampling_420_hbd_avx2(TX_SIZE tx_size);
			
 
				+-RTCD_EXTERN cfl_subsample_hbd_fn (*cfl_get_luma_subsampling_420_hbd)(TX_SIZE tx_size);
			
 
				+-
			
 
				+-cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_c(TX_SIZE tx_size);
			
 
				+-cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_ssse3(TX_SIZE tx_size);
			
 
				+-cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_avx2(TX_SIZE tx_size);
			
 
				+-RTCD_EXTERN cfl_subsample_lbd_fn (*cfl_get_luma_subsampling_420_lbd)(TX_SIZE tx_size);
			
 
				+-
			
 
				+-cfl_subsample_hbd_fn cfl_get_luma_subsampling_422_hbd_c(TX_SIZE tx_size);
			
 
				+-cfl_subsample_hbd_fn cfl_get_luma_subsampling_422_hbd_ssse3(TX_SIZE tx_size);
			
 
				+-cfl_subsample_hbd_fn cfl_get_luma_subsampling_422_hbd_avx2(TX_SIZE tx_size);
			
 
				+-RTCD_EXTERN cfl_subsample_hbd_fn (*cfl_get_luma_subsampling_422_hbd)(TX_SIZE tx_size);
			
 
				+-
			
 
				+-cfl_subsample_lbd_fn cfl_get_luma_subsampling_422_lbd_c(TX_SIZE tx_size);
			
 
				+-cfl_subsample_lbd_fn cfl_get_luma_subsampling_422_lbd_ssse3(TX_SIZE tx_size);
			
 
				+-cfl_subsample_lbd_fn cfl_get_luma_subsampling_422_lbd_avx2(TX_SIZE tx_size);
			
 
				+-RTCD_EXTERN cfl_subsample_lbd_fn (*cfl_get_luma_subsampling_422_lbd)(TX_SIZE tx_size);
			
 
				+-
			
 
				+-cfl_subsample_hbd_fn cfl_get_luma_subsampling_444_hbd_c(TX_SIZE tx_size);
			
 
				+-cfl_subsample_hbd_fn cfl_get_luma_subsampling_444_hbd_ssse3(TX_SIZE tx_size);
			
 
				+-cfl_subsample_hbd_fn cfl_get_luma_subsampling_444_hbd_avx2(TX_SIZE tx_size);
			
 
				+-RTCD_EXTERN cfl_subsample_hbd_fn (*cfl_get_luma_subsampling_444_hbd)(TX_SIZE tx_size);
			
 
				+-
			
 
				+-cfl_subsample_lbd_fn cfl_get_luma_subsampling_444_lbd_c(TX_SIZE tx_size);
			
 
				+-cfl_subsample_lbd_fn cfl_get_luma_subsampling_444_lbd_ssse3(TX_SIZE tx_size);
			
 
				+-cfl_subsample_lbd_fn cfl_get_luma_subsampling_444_lbd_avx2(TX_SIZE tx_size);
			
 
				+-RTCD_EXTERN cfl_subsample_lbd_fn (*cfl_get_luma_subsampling_444_lbd)(TX_SIZE tx_size);
			
 
				+-
			
 
				+-void copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
			
 
				+-void copy_rect8_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
			
 
				+-void copy_rect8_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
			
 
				+-void copy_rect8_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
			
 
				+-void copy_rect8_16bit_to_16bit_avx2(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
			
 
				+-RTCD_EXTERN void (*copy_rect8_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
			
 
				+-
			
 
				+-void copy_rect8_8bit_to_16bit_c(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
			
 
				+-void copy_rect8_8bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
			
 
				+-void copy_rect8_8bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
			
 
				+-void copy_rect8_8bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
			
 
				+-void copy_rect8_8bit_to_16bit_avx2(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
			
 
				+-RTCD_EXTERN void (*copy_rect8_8bit_to_16bit)(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
			
 
				+-
			
 
				+-cfl_predict_hbd_fn get_predict_hbd_fn_c(TX_SIZE tx_size);
			
 
				+-cfl_predict_hbd_fn get_predict_hbd_fn_ssse3(TX_SIZE tx_size);
			
 
				+-cfl_predict_hbd_fn get_predict_hbd_fn_avx2(TX_SIZE tx_size);
			
 
				+-RTCD_EXTERN cfl_predict_hbd_fn (*get_predict_hbd_fn)(TX_SIZE tx_size);
			
 
				+-
			
 
				+-cfl_predict_lbd_fn get_predict_lbd_fn_c(TX_SIZE tx_size);
			
 
				+-cfl_predict_lbd_fn get_predict_lbd_fn_ssse3(TX_SIZE tx_size);
			
 
				+-cfl_predict_lbd_fn get_predict_lbd_fn_avx2(TX_SIZE tx_size);
			
 
				+-RTCD_EXTERN cfl_predict_lbd_fn (*get_predict_lbd_fn)(TX_SIZE tx_size);
			
 
				+-
			
 
				+-cfl_subtract_average_fn get_subtract_average_fn_c(TX_SIZE tx_size);
			
 
				+-cfl_subtract_average_fn get_subtract_average_fn_sse2(TX_SIZE tx_size);
			
 
				+-cfl_subtract_average_fn get_subtract_average_fn_avx2(TX_SIZE tx_size);
			
 
				+-RTCD_EXTERN cfl_subtract_average_fn (*get_subtract_average_fn)(TX_SIZE tx_size);
			
 
				+-
			
 
				+-void av1_rtcd(void);
			
 
				+-
			
 
				+-#ifdef RTCD_C
			
 
				+-#include "aom_ports/x86.h"
			
 
				+-static void setup_rtcd_internal(void)
			
 
				+-{
			
 
				+-    int flags = x86_simd_caps();
			
 
				+-
			
 
				+-    (void)flags;
			
 
				+-
			
 
				+-    apply_selfguided_restoration = apply_selfguided_restoration_c;
			
 
				+-    if (flags & HAS_SSE4_1) apply_selfguided_restoration = apply_selfguided_restoration_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) apply_selfguided_restoration = apply_selfguided_restoration_avx2;
			
 
				+-    av1_build_compound_diffwtd_mask = av1_build_compound_diffwtd_mask_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_build_compound_diffwtd_mask = av1_build_compound_diffwtd_mask_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) av1_build_compound_diffwtd_mask = av1_build_compound_diffwtd_mask_avx2;
			
 
				+-    av1_build_compound_diffwtd_mask_d16 = av1_build_compound_diffwtd_mask_d16_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_build_compound_diffwtd_mask_d16 = av1_build_compound_diffwtd_mask_d16_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) av1_build_compound_diffwtd_mask_d16 = av1_build_compound_diffwtd_mask_d16_avx2;
			
 
				+-    av1_build_compound_diffwtd_mask_highbd = av1_build_compound_diffwtd_mask_highbd_c;
			
 
				+-    if (flags & HAS_SSSE3) av1_build_compound_diffwtd_mask_highbd = av1_build_compound_diffwtd_mask_highbd_ssse3;
			
 
				+-    if (flags & HAS_AVX2) av1_build_compound_diffwtd_mask_highbd = av1_build_compound_diffwtd_mask_highbd_avx2;
			
 
				+-    av1_convolve_2d_copy_sr = av1_convolve_2d_copy_sr_sse2;
			
 
				+-    if (flags & HAS_AVX2) av1_convolve_2d_copy_sr = av1_convolve_2d_copy_sr_avx2;
			
 
				+-    av1_convolve_2d_scale = av1_convolve_2d_scale_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_convolve_2d_scale = av1_convolve_2d_scale_sse4_1;
			
 
				+-    av1_convolve_2d_sr = av1_convolve_2d_sr_sse2;
			
 
				+-    if (flags & HAS_AVX2) av1_convolve_2d_sr = av1_convolve_2d_sr_avx2;
			
 
				+-    av1_convolve_horiz_rs = av1_convolve_horiz_rs_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_convolve_horiz_rs = av1_convolve_horiz_rs_sse4_1;
			
 
				+-    av1_convolve_x_sr = av1_convolve_x_sr_sse2;
			
 
				+-    if (flags & HAS_AVX2) av1_convolve_x_sr = av1_convolve_x_sr_avx2;
			
 
				+-    av1_convolve_y_sr = av1_convolve_y_sr_sse2;
			
 
				+-    if (flags & HAS_AVX2) av1_convolve_y_sr = av1_convolve_y_sr_avx2;
			
 
				+-    av1_filter_intra_edge = av1_filter_intra_edge_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_filter_intra_edge = av1_filter_intra_edge_sse4_1;
			
 
				+-    av1_filter_intra_edge_high = av1_filter_intra_edge_high_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_filter_intra_edge_high = av1_filter_intra_edge_high_sse4_1;
			
 
				+-    av1_filter_intra_predictor = av1_filter_intra_predictor_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_filter_intra_predictor = av1_filter_intra_predictor_sse4_1;
			
 
				+-    av1_highbd_convolve_2d_copy_sr = av1_highbd_convolve_2d_copy_sr_sse2;
			
 
				+-    if (flags & HAS_AVX2) av1_highbd_convolve_2d_copy_sr = av1_highbd_convolve_2d_copy_sr_avx2;
			
 
				+-    av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_sse4_1;
			
 
				+-    av1_highbd_convolve_2d_sr = av1_highbd_convolve_2d_sr_c;
			
 
				+-    if (flags & HAS_SSSE3) av1_highbd_convolve_2d_sr = av1_highbd_convolve_2d_sr_ssse3;
			
 
				+-    if (flags & HAS_AVX2) av1_highbd_convolve_2d_sr = av1_highbd_convolve_2d_sr_avx2;
			
 
				+-    av1_highbd_convolve_horiz_rs = av1_highbd_convolve_horiz_rs_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_convolve_horiz_rs = av1_highbd_convolve_horiz_rs_sse4_1;
			
 
				+-    av1_highbd_convolve_x_sr = av1_highbd_convolve_x_sr_c;
			
 
				+-    if (flags & HAS_SSSE3) av1_highbd_convolve_x_sr = av1_highbd_convolve_x_sr_ssse3;
			
 
				+-    if (flags & HAS_AVX2) av1_highbd_convolve_x_sr = av1_highbd_convolve_x_sr_avx2;
			
 
				+-    av1_highbd_convolve_y_sr = av1_highbd_convolve_y_sr_c;
			
 
				+-    if (flags & HAS_SSSE3) av1_highbd_convolve_y_sr = av1_highbd_convolve_y_sr_ssse3;
			
 
				+-    if (flags & HAS_AVX2) av1_highbd_convolve_y_sr = av1_highbd_convolve_y_sr_avx2;
			
 
				+-    av1_highbd_inv_txfm_add = av1_highbd_inv_txfm_add_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add = av1_highbd_inv_txfm_add_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) av1_highbd_inv_txfm_add = av1_highbd_inv_txfm_add_avx2;
			
 
				+-    av1_highbd_inv_txfm_add_16x16 = av1_highbd_inv_txfm_add_16x16_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_16x16 = av1_highbd_inv_txfm_add_16x16_sse4_1;
			
 
				+-    av1_highbd_inv_txfm_add_16x8 = av1_highbd_inv_txfm_add_16x8_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_16x8 = av1_highbd_inv_txfm_add_16x8_sse4_1;
			
 
				+-    av1_highbd_inv_txfm_add_32x32 = av1_highbd_inv_txfm_add_32x32_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_32x32 = av1_highbd_inv_txfm_add_32x32_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) av1_highbd_inv_txfm_add_32x32 = av1_highbd_inv_txfm_add_32x32_avx2;
			
 
				+-    av1_highbd_inv_txfm_add_4x4 = av1_highbd_inv_txfm_add_4x4_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_4x4 = av1_highbd_inv_txfm_add_4x4_sse4_1;
			
 
				+-    av1_highbd_inv_txfm_add_8x16 = av1_highbd_inv_txfm_add_8x16_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_8x16 = av1_highbd_inv_txfm_add_8x16_sse4_1;
			
 
				+-    av1_highbd_inv_txfm_add_8x8 = av1_highbd_inv_txfm_add_8x8_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_8x8 = av1_highbd_inv_txfm_add_8x8_sse4_1;
			
 
				+-    av1_highbd_jnt_convolve_2d = av1_highbd_jnt_convolve_2d_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_jnt_convolve_2d = av1_highbd_jnt_convolve_2d_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) av1_highbd_jnt_convolve_2d = av1_highbd_jnt_convolve_2d_avx2;
			
 
				+-    av1_highbd_jnt_convolve_2d_copy = av1_highbd_jnt_convolve_2d_copy_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_jnt_convolve_2d_copy = av1_highbd_jnt_convolve_2d_copy_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) av1_highbd_jnt_convolve_2d_copy = av1_highbd_jnt_convolve_2d_copy_avx2;
			
 
				+-    av1_highbd_jnt_convolve_x = av1_highbd_jnt_convolve_x_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_jnt_convolve_x = av1_highbd_jnt_convolve_x_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) av1_highbd_jnt_convolve_x = av1_highbd_jnt_convolve_x_avx2;
			
 
				+-    av1_highbd_jnt_convolve_y = av1_highbd_jnt_convolve_y_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_jnt_convolve_y = av1_highbd_jnt_convolve_y_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) av1_highbd_jnt_convolve_y = av1_highbd_jnt_convolve_y_avx2;
			
 
				+-    av1_highbd_warp_affine = av1_highbd_warp_affine_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_highbd_warp_affine = av1_highbd_warp_affine_sse4_1;
			
 
				+-    av1_highbd_wiener_convolve_add_src = av1_highbd_wiener_convolve_add_src_c;
			
 
				+-    if (flags & HAS_SSSE3) av1_highbd_wiener_convolve_add_src = av1_highbd_wiener_convolve_add_src_ssse3;
			
 
				+-    if (flags & HAS_AVX2) av1_highbd_wiener_convolve_add_src = av1_highbd_wiener_convolve_add_src_avx2;
			
 
				+-    av1_inv_txfm2d_add_4x4 = av1_inv_txfm2d_add_4x4_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_inv_txfm2d_add_4x4 = av1_inv_txfm2d_add_4x4_sse4_1;
			
 
				+-    av1_inv_txfm2d_add_8x8 = av1_inv_txfm2d_add_8x8_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_inv_txfm2d_add_8x8 = av1_inv_txfm2d_add_8x8_sse4_1;
			
 
				+-    av1_inv_txfm_add = av1_inv_txfm_add_c;
			
 
				+-    if (flags & HAS_SSSE3) av1_inv_txfm_add = av1_inv_txfm_add_ssse3;
			
 
				+-    if (flags & HAS_AVX2) av1_inv_txfm_add = av1_inv_txfm_add_avx2;
			
 
				+-    av1_jnt_convolve_2d = av1_jnt_convolve_2d_c;
			
 
				+-    if (flags & HAS_SSSE3) av1_jnt_convolve_2d = av1_jnt_convolve_2d_ssse3;
			
 
				+-    if (flags & HAS_AVX2) av1_jnt_convolve_2d = av1_jnt_convolve_2d_avx2;
			
 
				+-    av1_jnt_convolve_2d_copy = av1_jnt_convolve_2d_copy_sse2;
			
 
				+-    if (flags & HAS_AVX2) av1_jnt_convolve_2d_copy = av1_jnt_convolve_2d_copy_avx2;
			
 
				+-    av1_jnt_convolve_x = av1_jnt_convolve_x_sse2;
			
 
				+-    if (flags & HAS_AVX2) av1_jnt_convolve_x = av1_jnt_convolve_x_avx2;
			
 
				+-    av1_jnt_convolve_y = av1_jnt_convolve_y_sse2;
			
 
				+-    if (flags & HAS_AVX2) av1_jnt_convolve_y = av1_jnt_convolve_y_avx2;
			
 
				+-    av1_selfguided_restoration = av1_selfguided_restoration_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_selfguided_restoration = av1_selfguided_restoration_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) av1_selfguided_restoration = av1_selfguided_restoration_avx2;
			
 
				+-    av1_upsample_intra_edge = av1_upsample_intra_edge_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_upsample_intra_edge = av1_upsample_intra_edge_sse4_1;
			
 
				+-    av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_sse4_1;
			
 
				+-    av1_warp_affine = av1_warp_affine_c;
			
 
				+-    if (flags & HAS_SSE4_1) av1_warp_affine = av1_warp_affine_sse4_1;
			
 
				+-    av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_sse2;
			
 
				+-    if (flags & HAS_AVX2) av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_avx2;
			
 
				+-    cdef_filter_block = cdef_filter_block_sse2;
			
 
				+-    if (flags & HAS_SSSE3) cdef_filter_block = cdef_filter_block_ssse3;
			
 
				+-    if (flags & HAS_SSE4_1) cdef_filter_block = cdef_filter_block_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) cdef_filter_block = cdef_filter_block_avx2;
			
 
				+-    cdef_find_dir = cdef_find_dir_sse2;
			
 
				+-    if (flags & HAS_SSSE3) cdef_find_dir = cdef_find_dir_ssse3;
			
 
				+-    if (flags & HAS_SSE4_1) cdef_find_dir = cdef_find_dir_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) cdef_find_dir = cdef_find_dir_avx2;
			
 
				+-    cfl_get_luma_subsampling_420_hbd = cfl_get_luma_subsampling_420_hbd_c;
			
 
				+-    if (flags & HAS_SSSE3) cfl_get_luma_subsampling_420_hbd = cfl_get_luma_subsampling_420_hbd_ssse3;
			
 
				+-    if (flags & HAS_AVX2) cfl_get_luma_subsampling_420_hbd = cfl_get_luma_subsampling_420_hbd_avx2;
			
 
				+-    cfl_get_luma_subsampling_420_lbd = cfl_get_luma_subsampling_420_lbd_c;
			
 
				+-    if (flags & HAS_SSSE3) cfl_get_luma_subsampling_420_lbd = cfl_get_luma_subsampling_420_lbd_ssse3;
			
 
				+-    if (flags & HAS_AVX2) cfl_get_luma_subsampling_420_lbd = cfl_get_luma_subsampling_420_lbd_avx2;
			
 
				+-    cfl_get_luma_subsampling_422_hbd = cfl_get_luma_subsampling_422_hbd_c;
			
 
				+-    if (flags & HAS_SSSE3) cfl_get_luma_subsampling_422_hbd = cfl_get_luma_subsampling_422_hbd_ssse3;
			
 
				+-    if (flags & HAS_AVX2) cfl_get_luma_subsampling_422_hbd = cfl_get_luma_subsampling_422_hbd_avx2;
			
 
				+-    cfl_get_luma_subsampling_422_lbd = cfl_get_luma_subsampling_422_lbd_c;
			
 
				+-    if (flags & HAS_SSSE3) cfl_get_luma_subsampling_422_lbd = cfl_get_luma_subsampling_422_lbd_ssse3;
			
 
				+-    if (flags & HAS_AVX2) cfl_get_luma_subsampling_422_lbd = cfl_get_luma_subsampling_422_lbd_avx2;
			
 
				+-    cfl_get_luma_subsampling_444_hbd = cfl_get_luma_subsampling_444_hbd_c;
			
 
				+-    if (flags & HAS_SSSE3) cfl_get_luma_subsampling_444_hbd = cfl_get_luma_subsampling_444_hbd_ssse3;
			
 
				+-    if (flags & HAS_AVX2) cfl_get_luma_subsampling_444_hbd = cfl_get_luma_subsampling_444_hbd_avx2;
			
 
				+-    cfl_get_luma_subsampling_444_lbd = cfl_get_luma_subsampling_444_lbd_c;
			
 
				+-    if (flags & HAS_SSSE3) cfl_get_luma_subsampling_444_lbd = cfl_get_luma_subsampling_444_lbd_ssse3;
			
 
				+-    if (flags & HAS_AVX2) cfl_get_luma_subsampling_444_lbd = cfl_get_luma_subsampling_444_lbd_avx2;
			
 
				+-    copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_sse2;
			
 
				+-    if (flags & HAS_SSSE3) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_ssse3;
			
 
				+-    if (flags & HAS_SSE4_1) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_avx2;
			
 
				+-    copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_sse2;
			
 
				+-    if (flags & HAS_SSSE3) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_ssse3;
			
 
				+-    if (flags & HAS_SSE4_1) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_sse4_1;
			
 
				+-    if (flags & HAS_AVX2) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_avx2;
			
 
				+-    get_predict_hbd_fn = get_predict_hbd_fn_c;
			
 
				+-    if (flags & HAS_SSSE3) get_predict_hbd_fn = get_predict_hbd_fn_ssse3;
			
 
				+-    if (flags & HAS_AVX2) get_predict_hbd_fn = get_predict_hbd_fn_avx2;
			
 
				+-    get_predict_lbd_fn = get_predict_lbd_fn_c;
			
 
				+-    if (flags & HAS_SSSE3) get_predict_lbd_fn = get_predict_lbd_fn_ssse3;
			
 
				+-    if (flags & HAS_AVX2) get_predict_lbd_fn = get_predict_lbd_fn_avx2;
			
 
				+-    get_subtract_average_fn = get_subtract_average_fn_sse2;
			
 
				+-    if (flags & HAS_AVX2) get_subtract_average_fn = get_subtract_average_fn_avx2;
			
 
				+-}
			
 
				+-#endif
			
 
				+-
			
 
				+-#ifdef __cplusplus
			
 
				+-}  // extern "C"
			
 
				+-#endif
			
 
				+-
			
 
				+-#endif
			
 
				+diff --git a/media/libaom/generate_sources_mozbuild.py b/media/libaom/generate_sources_mozbuild.py
			
 
				+--- a/media/libaom/generate_sources_mozbuild.py
			
 
				++++ b/media/libaom/generate_sources_mozbuild.py
			
 
				+@@ -84,36 +84,34 @@ if __name__ == '__main__':
			
 
				+     f = open('sources.mozbuild', 'w')
			
 
				+     f.write('# This file is generated. Do not edit.\n\n')
			
 
				+     f.write('files = {\n')
			
 
				+ 
			
 
				+     platforms = [
			
 
				+         ('armv7', 'linux', 'arm', True),
			
 
				+         ('generic', '', 'generic', True),
			
 
				+         ('x86', 'linux', 'ia32', True),
			
 
				+-        ('x86', 'win', 'mingw32', False),
			
 
				+         ('x86', 'win', 'ia32', False),
			
 
				+         ('x86_64', 'linux', 'x64', True),
			
 
				+         ('x86_64', 'mac', 'x64', False),
			
 
				+         ('x86_64', 'win', 'x64', False),
			
 
				+-        ('x86_64', 'win', 'mingw64', False),
			
 
				+     ]
			
 
				+     for cpu, system, arch, generate_sources in platforms:
			
 
				+         print('Running CMake for %s (%s)' % (cpu, system))
			
 
				+         variables = shared_variables.copy()
			
 
				+         variables['AOM_TARGET_CPU'] = cpu
			
 
				+ 
			
 
				+         # We skip compiling test programs that detect these
			
 
				+         variables['HAVE_FEXCEPT'] = 1
			
 
				+         variables['INLINE'] = 'inline'
			
 
				+         if cpu == 'x86' and system == 'linux':
			
 
				+             variables['CONFIG_PIC'] = 1
			
 
				+         if cpu == 'armv7':
			
 
				+             variables['CONFIG_PIC'] = 1
			
 
				+-        if system == 'win' and not arch.startswith('mingw'):
			
 
				++        if system == 'win':
			
 
				+             variables['MSVC'] = 1
			
 
				+ 
			
 
				+         cache_variables = []
			
 
				+         sources = cp.parse(variables, cache_variables,
			
 
				+                            os.path.join(AOM_DIR, 'CMakeLists.txt'))
			
 
				+ 
			
 
				+         # Disable HAVE_UNISTD_H.
			
 
				+         cache_variables.remove('HAVE_UNISTD_H')
			
 
				+diff --git a/media/libaom/generate_sources_mozbuild.sh b/media/libaom/generate_sources_mozbuild.sh
			
 
				+--- a/media/libaom/generate_sources_mozbuild.sh
			
 
				++++ b/media/libaom/generate_sources_mozbuild.sh
			
 
				+@@ -63,18 +63,16 @@ python generate_sources_mozbuild.py
			
 
				+ # Copy aom_version.h once. The file is the same for all platforms.
			
 
				+ cp aom_version.h $BASE_DIR/$LIBAOM_CONFIG_DIR
			
 
				+ 
			
 
				+ gen_rtcd_header linux/x64 x86_64
			
 
				+ gen_rtcd_header linux/ia32 x86
			
 
				+ gen_rtcd_header mac/x64 x86_64
			
 
				+ gen_rtcd_header win/x64 x86_64
			
 
				+ gen_rtcd_header win/ia32 x86
			
 
				+-gen_rtcd_header win/mingw32 x86
			
 
				+-gen_rtcd_header win/mingw64 x86_64
			
 
				+ 
			
 
				+ gen_rtcd_header linux/arm armv7
			
 
				+ 
			
 
				+ gen_rtcd_header generic generic
			
 
				+ 
			
 
				+ cd $BASE_DIR/$LIBAOM_SRC_DIR
			
 
				+ 
			
 
				+ cd $BASE_DIR
			
 
				+diff --git a/media/libaom/moz.build b/media/libaom/moz.build
			
 
				+--- a/media/libaom/moz.build
			
 
				++++ b/media/libaom/moz.build
			
 
				+@@ -10,45 +10,35 @@ with Files('*'):
			
 
				+ include('sources.mozbuild')
			
 
				+ 
			
 
				+ # Linux, Mac and Win share file lists for x86* but not configurations.
			
 
				+ if CONFIG['CPU_ARCH'] == 'x86_64':
			
 
				+     EXPORTS.aom += files['X64_EXPORTS']
			
 
				+     SOURCES += files['X64_SOURCES']
			
 
				+     USE_YASM = True
			
 
				+     if CONFIG['OS_TARGET'] == 'WINNT':
			
 
				+-        if CONFIG['CC_TYPE'] == 'gcc':
			
 
				+-            ASFLAGS += [ '-I%s/media/libaom/config/win/mingw64/' % TOPSRCDIR ]
			
 
				+-            LOCAL_INCLUDES += [ '/media/libaom/config/win/mingw64/' ]
			
 
				+-            EXPORTS.aom += [ 'config/win/mingw64/config/aom_config.h' ]
			
 
				+-        else:
			
 
				+-            ASFLAGS += [ '-I%s/media/libaom/config/win/x64/' % TOPSRCDIR ]
			
 
				+-            LOCAL_INCLUDES += [ '/media/libaom/config/win/x64/' ]
			
 
				+-            EXPORTS.aom += [ 'config/win/x64/config/aom_config.h' ]
			
 
				++        ASFLAGS += [ '-I%s/media/libaom/config/win/x64/' % TOPSRCDIR ]
			
 
				++        LOCAL_INCLUDES += [ '/media/libaom/config/win/x64/' ]
			
 
				++        EXPORTS.aom += [ 'config/win/x64/config/aom_config.h' ]
			
 
				+     elif CONFIG['OS_TARGET'] == 'Darwin':
			
 
				+         ASFLAGS += [ '-I%s/media/libaom/config/mac/x64/' % TOPSRCDIR ]
			
 
				+         LOCAL_INCLUDES += [ '/media/libaom/config/mac/x64/' ]
			
 
				+         EXPORTS.aom += [ 'config/mac/x64/config/aom_config.h' ]
			
 
				+     else: # Android, Linux, BSDs, etc.
			
 
				+         ASFLAGS += [ '-I%s/media/libaom/config/linux/x64/' % TOPSRCDIR ]
			
 
				+         LOCAL_INCLUDES += [ '/media/libaom/config/linux/x64/' ]
			
 
				+         EXPORTS.aom += [ 'config/linux/x64/config/aom_config.h' ]
			
 
				+ elif CONFIG['CPU_ARCH'] == 'x86':
			
 
				+     EXPORTS.aom += files['IA32_EXPORTS']
			
 
				+     SOURCES += files['IA32_SOURCES']
			
 
				+     USE_YASM = True
			
 
				+     if CONFIG['OS_TARGET'] == 'WINNT':
			
 
				+-        if CONFIG['CC_TYPE'] == 'gcc':
			
 
				+-            ASFLAGS += [ '-I%s/media/libaom/config/win/mingw32/' % TOPSRCDIR ]
			
 
				+-            LOCAL_INCLUDES += [ '/media/libaom/config/win/mingw32/' ]
			
 
				+-            EXPORTS.aom += [ 'config/win/mingw32/config/aom_config.h' ]
			
 
				+-        else:
			
 
				+-            ASFLAGS += [ '-I%s/media/libaom/config/win/ia32/' % TOPSRCDIR ]
			
 
				+-            LOCAL_INCLUDES += [ '/media/libaom/config/win/ia32/' ]
			
 
				+-            EXPORTS.aom += [ 'config/win/ia32/config/aom_config.h' ]
			
 
				++        ASFLAGS += [ '-I%s/media/libaom/config/win/ia32/' % TOPSRCDIR ]
			
 
				++        LOCAL_INCLUDES += [ '/media/libaom/config/win/ia32/' ]
			
 
				++        EXPORTS.aom += [ 'config/win/ia32/config/aom_config.h' ]
			
 
				+     else: # Android, Linux, BSDs, etc.
			
 
				+         ASFLAGS += [ '-I%s/media/libaom/config/linux/ia32/' % TOPSRCDIR ]
			
 
				+         LOCAL_INCLUDES += [ '/media/libaom/config/linux/ia32/' ]
			
 
				+         EXPORTS.aom += [ 'config/linux/ia32/config/aom_config.h' ]
			
 
				+ elif CONFIG['CPU_ARCH'] == 'arm':
			
 
				+     EXPORTS.aom += files['ARM_EXPORTS']
			
 
				+     ASFLAGS += [
			
 
				+         '-I%s/media/libaom/config/linux/arm/' % TOPSRCDIR,
			
--- a/mozilla-release/patches/1650299-80a1.patch
+++ b/mozilla-release/patches/1650299-80a1.patch
@@ -0,0 +1,204 @@
 
				+# HG changeset patch
			
 
				+# User Mike Hommey <mh+mozilla@glandium.org>
			
 
				+# Date 1594076273 0
			
 
				+# Node ID 1773b1745a8c4b938fde6d3dea9a58cf941cff73
			
 
				+# Parent  c6000c544a4c7cc3a7048ca08bcb540c22e2310c
			
 
				+Bug 1650299 - Unify the inclusion of the ICU data file. r=froydnj
			
 
				+
			
 
				+All the supported compilers support a GNU AS-like syntax, with only a
			
 
				+few details varying. It means we can use a single, simpler, way to
			
 
				+include the ICU data file, instead of 3 different ways, including one
			
 
				+that uses armasm64.exe, possibly wrapped with Wine.
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D82144
			
 
				+
			
 
				+Changed for keeping intel msvc supprt.
			
 
				+
			
 
				+diff --git a/config/external/icu/data/genicudata.py b/config/external/icu/data/genicudata.py
			
 
				+deleted file mode 100644
			
 
				+--- a/config/external/icu/data/genicudata.py
			
 
				++++ /dev/null
			
 
				+@@ -1,16 +0,0 @@
			
 
				+-# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
			
 
				+-# vim: set filetype=python:
			
 
				+-# This Source Code Form is subject to the terms of the Mozilla Public
			
 
				+-# License, v. 2.0. If a copy of the MPL was not distributed with this
			
 
				+-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
			
 
				+-
			
 
				+-from __future__ import absolute_import
			
 
				+-
			
 
				+-
			
 
				+-def main(output, data_file, data_symbol):
			
 
				+-    output.write('''    AREA |.rdata|,ALIGN=4,DATA,READONLY
			
 
				+-    EXPORT |{data_symbol}|[DATA]
			
 
				+-|{data_symbol}|
			
 
				+-    INCBIN {data_file}
			
 
				+-    END
			
 
				+-'''.format(data_file=data_file, data_symbol=data_symbol))
			
 
				+diff --git a/config/external/icu/data/icudata.c b/config/external/icu/data/icudata.c
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/config/external/icu/data/icudata.c
			
 
				+@@ -0,0 +1,21 @@
			
 
				++/* This Source Code Form is subject to the terms of the Mozilla Public
			
 
				++ * License, v. 2.0. If a copy of the MPL was not distributed with this
			
 
				++ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
			
 
				++
			
 
				++#ifdef __APPLE__
			
 
				++#  define RODATA ".data\n.const"
			
 
				++#else
			
 
				++#  define RODATA ".section .rodata"
			
 
				++#endif
			
 
				++
			
 
				++#define DATA(sym, file) DATA2(sym, file)
			
 
				++// clang-format off
			
 
				++#define DATA2(sym, file)              \
			
 
				++  __asm__(".global " #sym "\n"        \
			
 
				++          RODATA "\n"                 \
			
 
				++          ".balign 16\n"              \
			
 
				++          #sym ":\n"                  \
			
 
				++          "    .incbin " #file "\n")
			
 
				++// clang-format on
			
 
				++
			
 
				++DATA(ICU_DATA_SYMBOL, ICU_DATA_FILE);
			
 
				+diff --git a/config/external/icu/data/icudata.s b/config/external/icu/data/icudata.s
			
 
				+--- a/config/external/icu/data/icudata.s
			
 
				++++ b/config/external/icu/data/icudata.s
			
 
				+@@ -1,31 +1,11 @@
			
 
				+ ;; This Source Code Form is subject to the terms of the Mozilla Public
			
 
				+ ;; License, v. 2.0. If a copy of the MPL was not distributed with this
			
 
				+ ;; file, You can obtain one at http://mozilla.org/MPL/2.0/.
			
 
				+ 
			
 
				+-%ifdef PREFIX
			
 
				+-    %define DATA_SYMBOL _ %+ ICU_DATA_SYMBOL
			
 
				+-%else
			
 
				+     %define DATA_SYMBOL ICU_DATA_SYMBOL
			
 
				+-%endif
			
 
				+ 
			
 
				+-%ifidn __OUTPUT_FORMAT__,elf
			
 
				+-    %define FORMAT_ELF 1
			
 
				+-%elifidn __OUTPUT_FORMAT__,elf32
			
 
				+-    %define FORMAT_ELF 1
			
 
				+-%elifidn __OUTPUT_FORMAT__,elf64
			
 
				+-    %define FORMAT_ELF 1
			
 
				+-%else
			
 
				+-    %define FORMAT_ELF 0
			
 
				+-%endif
			
 
				+-
			
 
				+-%if FORMAT_ELF
			
 
				+-    global DATA_SYMBOL:data hidden
			
 
				+-    ; This is needed for ELF, otherwise the GNU linker assumes the stack is executable by default.
			
 
				+-    [SECTION .note.GNU-stack noalloc noexec nowrite progbits]
			
 
				+-%else
			
 
				+     global DATA_SYMBOL
			
 
				+-%endif
			
 
				+ 
			
 
				+ SECTION .rodata align=16
			
 
				+ DATA_SYMBOL:
			
 
				+         incbin ICU_DATA_FILE
			
 
				+diff --git a/config/external/icu/data/icudata_gas.S b/config/external/icu/data/icudata_gas.S
			
 
				+deleted file mode 100644
			
 
				+--- a/config/external/icu/data/icudata_gas.S
			
 
				++++ /dev/null
			
 
				+@@ -1,12 +0,0 @@
			
 
				+-# This Source Code Form is subject to the terms of the Mozilla Public
			
 
				+-# License, v. 2.0. If a copy of the MPL was not distributed with this
			
 
				+-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
			
 
				+-
			
 
				+-#if defined(__linux__) && defined(__ELF__)
			
 
				+-.section .note.GNU-stack,"",%progbits
			
 
				+-#endif
			
 
				+-.global ICU_DATA_SYMBOL
			
 
				+-.data
			
 
				+-.balign 16
			
 
				+-ICU_DATA_SYMBOL:
			
 
				+-        .incbin ICU_DATA_FILE
			
 
				+diff --git a/config/external/icu/data/moz.build b/config/external/icu/data/moz.build
			
 
				+--- a/config/external/icu/data/moz.build
			
 
				++++ b/config/external/icu/data/moz.build
			
 
				+@@ -3,34 +3,34 @@
			
 
				+ # This Source Code Form is subject to the terms of the Mozilla Public
			
 
				+ # License, v. 2.0. If a copy of the MPL was not distributed with this
			
 
				+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
			
 
				+ 
			
 
				+ # Build a library containing the ICU data for use in the JS shell, so that
			
 
				+ # JSAPI consumers don't have to deal with setting ICU's data path.
			
 
				+ Library('icudata')
			
 
				+ 
			
 
				+-if CONFIG['OS_ARCH'] == 'WINNT':
			
 
				+-    if CONFIG['CPU_ARCH'] == 'x86':
			
 
				+-        ASFLAGS += ['-DPREFIX']
			
 
				+-elif CONFIG['OS_ARCH'] == 'Darwin':
			
 
				+-    ASFLAGS += ['-DPREFIX']
			
 
				+-
			
 
				+-data_symbol = 'icudt%s_dat' % CONFIG['MOZ_ICU_VERSION']
			
 
				+-asflags = [
			
 
				+-    '-I%s/config/external/icu/data/' % TOPSRCDIR,
			
 
				+-    '-DICU_DATA_FILE="%s"' % CONFIG['ICU_DATA_FILE'],
			
 
				+-    '-DICU_DATA_SYMBOL=%s' % data_symbol,
			
 
				+-]
			
 
				+ LOCAL_INCLUDES += ['.']
			
 
				+ 
			
 
				+-if CONFIG['OS_TARGET'] == 'WINNT' and CONFIG['CPU_ARCH'] == 'aarch64':
			
 
				+-    icudata = 'icudata.asm'
			
 
				+-    GeneratedFile(icudata, script='genicudata.py',
			
 
				+-                  inputs=[CONFIG['ICU_DATA_FILE']], flags=[data_symbol])
			
 
				+-    SOURCES += ['!%s' % icudata]
			
 
				+-elif CONFIG['HAVE_YASM']:
			
 
				+-    USE_YASM = True
			
 
				+-    SOURCES += ['icudata.s']
			
 
				+-    ASFLAGS += asflags
			
 
				+-elif CONFIG['GNU_AS']:
			
 
				+-    SOURCES += ['icudata_gas.S']
			
 
				+-    ASFLAGS += asflags
			
 
				++prefix = ''
			
 
				++if (CONFIG['OS_ARCH'] == 'WINNT' and CONFIG['CPU_ARCH'] == 'x86') or CONFIG['OS_ARCH'] == 'Darwin':
			
 
				++    prefix = '_'
			
 
				++
			
 
				++data_file = '"%s/icudt%sl.dat"' % (SRCDIR, CONFIG['MOZ_ICU_VERSION'])
			
 
				++data_symbol = '%sicudt%s_dat' % (prefix, CONFIG['MOZ_ICU_VERSION'])
			
 
				++
			
 
				++if CONFIG['OS_TARGET'] == 'WINNT' and CONFIG['CC_TYPE'] == 'msvc' and CONFIG['INTEL_ARCHITECTURE']:
			
 
				++    USE_NASM = True
			
 
				++    ASFLAGS += [
			
 
				++        '-I%s/config/external/icu/data/' % TOPSRCDIR,
			
 
				++        '-DICU_DATA_FILE=%s' % data_file,
			
 
				++        '-DICU_DATA_SYMBOL=%s' % data_symbol,
			
 
				++    ]
			
 
				++    SOURCES += [
			
 
				++        'icudata.s',
			
 
				++    ]
			
 
				++else:
			
 
				++    DEFINES['ICU_DATA_FILE'] = data_file
			
 
				++    DEFINES['ICU_DATA_SYMBOL'] = data_symbol
			
 
				++
			
 
				++    SOURCES += [
			
 
				++        'icudata.c',
			
 
				++    ]
			
 
				+diff --git a/js/moz.configure b/js/moz.configure
			
 
				+--- a/js/moz.configure
			
 
				++++ b/js/moz.configure
			
 
				+@@ -517,23 +517,16 @@ def icu_version(build_env):
			
 
				+                     try:
			
 
				+                         return str(int(define[2]))
			
 
				+                     except ValueError:
			
 
				+                         pass
			
 
				+     die('Cannot determine ICU version number from uvernum.h header file')
			
 
				+ 
			
 
				+ set_config('MOZ_ICU_VERSION', icu_version)
			
 
				+ 
			
 
				+-@depends(icu_version, target, when='--with-intl-api')
			
 
				+-def icu_data_file(version, target):
			
 
				+-    # target.endianness is always 'big' or 'little'
			
 
				+-    return 'icudt%s%s.dat' % (version, target.endianness[0])
			
 
				+-
			
 
				+-set_config('ICU_DATA_FILE', icu_data_file)
			
 
				+-
			
 
				+ # Source files that use ICU should have control over which parts of the ICU
			
 
				+ # namespace they want to use.
			
 
				+ set_define('U_USING_ICU_NAMESPACE', '0', when='--with-intl-api')
			
 
				+ 
			
 
				+ # We build ICU as a static library.
			
 
				+ set_define('U_STATIC_IMPLEMENTATION', True, when=depends(system_icu)(lambda x: not x))
			
 
				+ 
			
 
				+ @depends(yasm, gnu_as, target, compile_environment)
			
--- a/mozilla-release/patches/1656063-81a1.patch
+++ b/mozilla-release/patches/1656063-81a1.patch
@@ -0,0 +1,33 @@
 
				+# HG changeset patch
			
 
				+# User Paul Adenot <paul@paul.cx>
			
 
				+# Date 1596198541 0
			
 
				+# Node ID af3620fc2747c1b4dbb72504fd731e9e4e3779d3
			
 
				+# Parent  a73351520d7b4e2532abbf7ea81767b1c778c0ce
			
 
				+Bug 1656063 - Only load function pointer in FFTBlock once. r=karlt
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D85389
			
 
				+
			
 
				+diff --git a/dom/media/webaudio/FFTBlock.h b/dom/media/webaudio/FFTBlock.h
			
 
				+--- a/dom/media/webaudio/FFTBlock.h
			
 
				++++ b/dom/media/webaudio/FFTBlock.h
			
 
				+@@ -40,17 +40,19 @@ class FFTBlock final
			
 
				+     };
			
 
				+   };
			
 
				+ 
			
 
				+ public:
			
 
				+   static void MainThreadInit()
			
 
				+   {
			
 
				+ #ifdef MOZ_LIBAV_FFT
			
 
				+     FFVPXRuntimeLinker::Init();
			
 
				+-    FFVPXRuntimeLinker::GetRDFTFuncs(&sRDFTFuncs);
			
 
				++    if (!sRDFTFuncs.init) {
			
 
				++      FFVPXRuntimeLinker::GetRDFTFuncs(&sRDFTFuncs);
			
 
				++    }
			
 
				+ #endif
			
 
				+   }
			
 
				+ 
			
 
				+   explicit FFTBlock(uint32_t aFFTSize)
			
 
				+ #if defined(MOZ_LIBAV_FFT)
			
 
				+     : mAvRDFT(nullptr)
			
 
				+     , mAvIRDFT(nullptr)
			
 
				+ #else
			
--- a/mozilla-release/patches/1669888-83a1.patch
+++ b/mozilla-release/patches/1669888-83a1.patch
@@ -0,0 +1,57 @@
 
				+# HG changeset patch
			
 
				+# User Mike Hommey <mh+mozilla@glandium.org>
			
 
				+# Date 1602134908 0
			
 
				+# Node ID 28f9b51522350fb69977f08869eee9636e586bee
			
 
				+# Parent  519ca1b069e5305d4f901539ea0f4c5ad9d8f54f
			
 
				+Bug 1669888 - Enable ffvpx on all Mac platforms. r=jya
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D92875
			
 
				+
			
 
				+diff --git a/media/ffvpx/libavcodec/avcodec.symbols b/media/ffvpx/libavcodec/avcodec.symbols
			
 
				+--- a/media/ffvpx/libavcodec/avcodec.symbols
			
 
				++++ b/media/ffvpx/libavcodec/avcodec.symbols
			
 
				+@@ -46,19 +46,21 @@ av_packet_side_data_name
			
 
				+ av_packet_split_side_data
			
 
				+ av_packet_unpack_dictionary
			
 
				+ av_packet_unref
			
 
				+ av_parser_change
			
 
				+ av_parser_close
			
 
				+ av_parser_init
			
 
				+ av_parser_next
			
 
				+ av_parser_parse2
			
 
				++#ifdef MOZ_LIBAV_FFT
			
 
				+ av_rdft_calc
			
 
				+ av_rdft_end
			
 
				+ av_rdft_init
			
 
				++#endif
			
 
				+ av_register_codec_parser
			
 
				+ av_register_hwaccel
			
 
				+ av_shrink_packet
			
 
				+ av_vorbis_parse_frame
			
 
				+ av_vorbis_parse_frame_flags
			
 
				+ av_vorbis_parse_free
			
 
				+ av_vorbis_parse_init
			
 
				+ av_vorbis_parse_reset
			
 
				+diff --git a/toolkit/moz.configure b/toolkit/moz.configure
			
 
				+--- a/toolkit/moz.configure
			
 
				++++ b/toolkit/moz.configure
			
 
				+@@ -1605,17 +1605,17 @@ with only_when(compile_environment | art
			
 
				+     set_config('YASM_HAS_AVX2', yasm_has_avx2)
			
 
				+ 
			
 
				+ 
			
 
				+     @depends(yasm_has_avx2, libav_fft, vpx_as_flags, target)
			
 
				+     def ffvpx(yasm_has_avx2, libav_fft, vpx_as_flags, target):
			
 
				+         enable = flac_only = use_yasm = False
			
 
				+         flags = []
			
 
				+         if target.cpu in ('x86', 'x86_64') or \
			
 
				+-                target.cpu == 'aarch64' and target.kernel == 'WINNT':
			
 
				++                target.cpu == 'aarch64' and target.kernel in ('WINNT', 'Darwin'):
			
 
				+             enable = True
			
 
				+             if libav_fft and libav_fft.flags:
			
 
				+                 use_yasm = True
			
 
				+                 flags.extend(libav_fft.flags)
			
 
				+                 if target.kernel == 'WINNT':
			
 
				+                     if target.cpu == 'x86':
			
 
				+                         # 32-bit windows need to prefix symbols with an underscore.
			
 
				+                         flags.extend(('-DPREFIX', '-Pconfig_win32.asm'))
			
 
				+
			
--- a/mozilla-release/patches/1692940-01-88a1.patch
+++ b/mozilla-release/patches/1692940-01-88a1.patch
@@ -0,0 +1,44 @@
 
				+# HG changeset patch
			
 
				+# User Mike Hommey <mh+mozilla@glandium.org>
			
 
				+# Date 1614043603 0
			
 
				+#      Tue Feb 23 01:26:43 2021 +0000
			
 
				+# Node ID 8b391b7adca21255b4367d2435ba109ae481301b
			
 
				+# Parent  46eab535e154186b019026fb7c56fe776cb5ab97
			
 
				+Bug 1692940 - Revert bug 1508419. r=firefox-build-system-reviewers,andi,dmajor
			
 
				+
			
 
				+Back when bug 1508419 landed, we weren't using a bootstrapped nasm. It
			
 
				+is less useful now that we are.
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D105424
			
 
				+
			
 
				+diff --git a/build/moz.configure/toolchain.configure b/build/moz.configure/toolchain.configure
			
 
				+--- a/build/moz.configure/toolchain.configure
			
 
				++++ b/build/moz.configure/toolchain.configure
			
 
				+@@ -2156,22 +2156,21 @@ add_old_configure_assignment('ENABLE_MOZ
			
 
				+ # nasm detection
			
 
				+ # ==============================================================
			
 
				+ nasm = check_prog('NASM', ['nasm'], allow_missing=True, paths=toolchain_search_path)
			
 
				+ 
			
 
				+ 
			
 
				+ @depends_if(nasm)
			
 
				+ @checking('nasm version')
			
 
				+ def nasm_version(nasm):
			
 
				+-    (retcode, stdout, _) = get_cmd_output(nasm, '-v')
			
 
				+-    if retcode:
			
 
				+-        # mac stub binary
			
 
				+-        return None
			
 
				+-
			
 
				+-    version = stdout.splitlines()[0].split()[2]
			
 
				++    version = (
			
 
				++        check_cmd_output(nasm, "-v", onerror=lambda: die("Failed to get nasm version."))
			
 
				++        .splitlines()[0]
			
 
				++        .split()[2]
			
 
				++    )
			
 
				+     return Version(version)
			
 
				+ 
			
 
				+ 
			
 
				+ @depends(nasm, target)
			
 
				+ def nasm_asflags(nasm, target):
			
 
				+     if nasm:
			
 
				+         asflags = {
			
 
				+             ('OSX', 'x86'): ['-f', 'macho32'],
			
--- a/mozilla-release/patches/1692940-02-88a1.patch
+++ b/mozilla-release/patches/1692940-02-88a1.patch
@@ -0,0 +1,208 @@
 
				+# HG changeset patch
			
 
				+# User Mike Hommey <mh+mozilla@glandium.org>
			
 
				+# Date 1614043603 0
			
 
				+#      Tue Feb 23 01:26:43 2021 +0000
			
 
				+# Node ID a22f5d28effbce01de85f81f00339389727e29ff
			
 
				+# Parent  9d7ea1896cd11743c773a1d72b1e00161c0632d8
			
 
				+Bug 1692940 - Change the logic to check for nasm. r=firefox-build-system-reviewers,dmajor
			
 
				+
			
 
				+Instead of preemptively check for it, and then check if it's good enough to
			
 
				+build AV1, only check for (and bootstrap) nasm when building AV1 requires
			
 
				+it.
			
 
				+
			
 
				+At the same time, we future-proof the code to be able to handle multiple
			
 
				+things requiring nasm, which we're going to add shortly.
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D105425
			
 
				+
			
 
				+diff --git a/build/moz.configure/toolchain.configure b/build/moz.configure/toolchain.configure
			
 
				+--- a/build/moz.configure/toolchain.configure
			
 
				++++ b/build/moz.configure/toolchain.configure
			
 
				+@@ -2148,53 +2148,16 @@ add_old_configure_assignment('ENABLE_CLA
			
 
				+                              depends_if('--enable-clang-plugin')(lambda _: True))
			
 
				+ 
			
 
				+ js_option('--enable-mozsearch-plugin', env='ENABLE_MOZSEARCH_PLUGIN',
			
 
				+           help="Enable building with the mozsearch indexer plugin")
			
 
				+ 
			
 
				+ add_old_configure_assignment('ENABLE_MOZSEARCH_PLUGIN',
			
 
				+                              depends_if('--enable-mozsearch-plugin')(lambda _: True))
			
 
				+ 
			
 
				+-# nasm detection
			
 
				+-# ==============================================================
			
 
				+-nasm = check_prog('NASM', ['nasm'], allow_missing=True, paths=toolchain_search_path)
			
 
				+-
			
 
				+-
			
 
				+-@depends_if(nasm)
			
 
				+-@checking('nasm version')
			
 
				+-def nasm_version(nasm):
			
 
				+-    version = (
			
 
				+-        check_cmd_output(nasm, "-v", onerror=lambda: die("Failed to get nasm version."))
			
 
				+-        .splitlines()[0]
			
 
				+-        .split()[2]
			
 
				+-    )
			
 
				+-    return Version(version)
			
 
				+-
			
 
				+-
			
 
				+-@depends(nasm, target)
			
 
				+-def nasm_asflags(nasm, target):
			
 
				+-    if nasm:
			
 
				+-        asflags = {
			
 
				+-            ('OSX', 'x86'): ['-f', 'macho32'],
			
 
				+-            ('OSX', 'x86_64'): ['-f', 'macho64'],
			
 
				+-            ('WINNT', 'x86'): ['-f', 'win32'],
			
 
				+-            ('WINNT', 'x86_64'): ['-f', 'win64'],
			
 
				+-        }.get((target.os, target.cpu), None)
			
 
				+-        if asflags is None:
			
 
				+-            # We're assuming every x86 platform we support that's
			
 
				+-            # not Windows or Mac is ELF.
			
 
				+-            if target.cpu == 'x86':
			
 
				+-                asflags = ['-f', 'elf32']
			
 
				+-            elif target.cpu == 'x86_64':
			
 
				+-                asflags = ['-f', 'elf64']
			
 
				+-        return asflags
			
 
				+-
			
 
				+-
			
 
				+-set_config('NASM_ASFLAGS', nasm_asflags)
			
 
				+-
			
 
				+ 
			
 
				+ # clang-cl integrated assembler support
			
 
				+ # ==============================================================
			
 
				+ @depends(target)
			
 
				+ def clangcl_asflags(target):
			
 
				+     asflags = None
			
 
				+     if target.os == 'WINNT' and target.cpu == 'aarch64':
			
 
				+         asflags = ['--target=aarch64-windows-msvc']
			
 
				+diff --git a/toolkit/moz.configure b/toolkit/moz.configure
			
 
				+--- a/toolkit/moz.configure
			
 
				++++ b/toolkit/moz.configure
			
 
				+@@ -403,26 +403,27 @@ imply_option('--enable-fmp4', ffmpeg, '-
			
 
				+ option('--disable-av1',
			
 
				+         help='Disable av1 video support')
			
 
				+ 
			
 
				+ @depends('--enable-av1')
			
 
				+ def av1(value):
			
 
				+     if value:
			
 
				+         return True
			
 
				+ 
			
 
				+-@depends(target, nasm_version, when=av1 & compile_environment)
			
 
				+-def dav1d_asm(target, nasm_version):
			
 
				+-    if target.os != 'Android':
			
 
				+-        if target.cpu == 'aarch64':
			
 
				+-            return True
			
 
				+-        elif target.cpu in ('x86', 'x86_64'):
			
 
				+-            if nasm_version < '2.14':
			
 
				+-                die('nasm 2.14 or greater is required for AV1 support. '
			
 
				+-                    'Either install nasm or add --disable-av1 to your configure options.')
			
 
				+-            return True
			
 
				++
			
 
				++@depends(target, when=av1 & compile_environment)
			
 
				++def dav1d_asm(target):
			
 
				++    if target.cpu in ("aarch64", "x86", "x86_64"):
			
 
				++        return True
			
 
				++
			
 
				++
			
 
				++@depends(target, when=av1 & compile_environment)
			
 
				++def dav1d_nasm(target):
			
 
				++    if target.cpu in ("x86", "x86_64"):
			
 
				++        return namespace(version="2.14", what="AV1")
			
 
				+ 
			
 
				+ 
			
 
				+ set_config('MOZ_DAV1D_ASM', dav1d_asm)
			
 
				+ set_define('MOZ_DAV1D_ASM', dav1d_asm)
			
 
				+ set_config('MOZ_AV1', av1)
			
 
				+ set_define('MOZ_AV1', av1)
			
 
				+ 
			
 
				+ # Built-in fragmented MP4 support.
			
 
				+@@ -1533,16 +1534,92 @@ def valid_yasm_version(yasm_version, for
			
 
				+     by_version = sorted(versioned.items(), key=lambda x: x[1])
			
 
				+     if by_version:
			
 
				+         what, version = by_version[-1]
			
 
				+         if yasm_version < version:
			
 
				+             die('Yasm version %s or greater is required to build with %s.'
			
 
				+                 % (version, what))
			
 
				+ 
			
 
				+ 
			
 
				++# nasm detection
			
 
				++# ==============================================================
			
 
				++@depends(dav1d_nasm)
			
 
				++def need_nasm(*requirements):
			
 
				++    requires = {
			
 
				++        x.what: x.version if hasattr(x, "version") else True for x in requirements if x
			
 
				++    }
			
 
				++    if requires:
			
 
				++        items = sorted(requires.keys())
			
 
				++        if len(items) > 1:
			
 
				++            what = " and ".join((", ".join(items[:-1]), items[-1]))
			
 
				++        else:
			
 
				++            what = items[0]
			
 
				++        versioned = {k: v for (k, v) in requires.items() if v is not True}
			
 
				++        return namespace(what=what, versioned=versioned)
			
 
				++
			
 
				++
			
 
				++nasm = check_prog(
			
 
				++    "NASM",
			
 
				++    ["nasm"],
			
 
				++    allow_missing=True,
			
 
				++    paths=bootstrap_search_path("nasm", when=need_nasm),
			
 
				++    when=need_nasm,
			
 
				++)
			
 
				++
			
 
				++
			
 
				++@depends(nasm, need_nasm.what)
			
 
				++def check_nasm(nasm, what):
			
 
				++    if not nasm and what:
			
 
				++        die("Nasm is required to build with %s, but it was not found." % what)
			
 
				++    return nasm
			
 
				++
			
 
				++
			
 
				++@depends_if(check_nasm)
			
 
				++@checking("nasm version")
			
 
				++def nasm_version(nasm):
			
 
				++    version = (
			
 
				++        check_cmd_output(nasm, "-v", onerror=lambda: die("Failed to get nasm version."))
			
 
				++        .splitlines()[0]
			
 
				++        .split()[2]
			
 
				++    )
			
 
				++    return Version(version)
			
 
				++
			
 
				++
			
 
				++@depends(nasm_version, need_nasm.versioned, when=need_nasm.versioned)
			
 
				++def check_nasm_version(nasm_version, versioned):
			
 
				++    by_version = sorted(versioned.items(), key=lambda x: x[1])
			
 
				++    what, version = by_version[-1]
			
 
				++    if nasm_version < version:
			
 
				++        die(
			
 
				++            "Nasm version %s or greater is required to build with %s." % (version, what)
			
 
				++        )
			
 
				++    return nasm_version
			
 
				++
			
 
				++
			
 
				++@depends(target, when=check_nasm_version)
			
 
				++def nasm_asflags(target):
			
 
				++    asflags = {
			
 
				++        ("OSX", "x86"): ["-f", "macho32"],
			
 
				++        ("OSX", "x86_64"): ["-f", "macho64"],
			
 
				++        ("WINNT", "x86"): ["-f", "win32"],
			
 
				++        ("WINNT", "x86_64"): ["-f", "win64"],
			
 
				++    }.get((target.os, target.cpu), None)
			
 
				++    if asflags is None:
			
 
				++        # We're assuming every x86 platform we support that's
			
 
				++        # not Windows or Mac is ELF.
			
 
				++        if target.cpu == "x86":
			
 
				++            asflags = ["-f", "elf32"]
			
 
				++        elif target.cpu == "x86_64":
			
 
				++            asflags = ["-f", "elf64"]
			
 
				++    return asflags
			
 
				++
			
 
				++
			
 
				++set_config("NASM_ASFLAGS", nasm_asflags)
			
 
				++
			
 
				++
			
 
				+ # ANGLE OpenGL->D3D translator for WebGL
			
 
				+ # ==============================================================
			
 
				+ 
			
 
				+ with only_when(compile_environment & target_is_windows):
			
 
				+     def d3d_compiler_dll_result(value):
			
 
				+         if not value.path:
			
 
				+             return 'provided by the OS'
			
 
				+         return value.path
			
--- a/mozilla-release/patches/1692940-03-88a1.patch
+++ b/mozilla-release/patches/1692940-03-88a1.patch
@@ -0,0 +1,120 @@
 
				+# HG changeset patch
			
 
				+# User Mike Hommey <mh+mozilla@glandium.org>
			
 
				+# Date 1614043604 0
			
 
				+#      Tue Feb 23 01:26:44 2021 +0000
			
 
				+# Node ID 157125c6c140d4438c2745355307e4dfb58da631
			
 
				+# Parent  ec793c77c014306408e31e9fa13a2a21d3f16641
			
 
				+Bug 1692940 - Remove test case for yasm version validation. r=firefox-build-system-reviewers,dmajor
			
 
				+
			
 
				+We're going to remove arguments to the function, and eventually remove
			
 
				+it.
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D105426
			
 
				+
			
 
				+diff --git a/python/mozbuild/mozbuild/test/configure/test_toolkit_moz_configure.py b/python/mozbuild/mozbuild/test/configure/test_toolkit_moz_configure.py
			
 
				+--- a/python/mozbuild/mozbuild/test/configure/test_toolkit_moz_configure.py
			
 
				++++ b/python/mozbuild/mozbuild/test/configure/test_toolkit_moz_configure.py
			
 
				+@@ -6,17 +6,16 @@ from __future__ import absolute_import, 
			
 
				+ 
			
 
				+ import os
			
 
				+ 
			
 
				+ from buildconfig import topsrcdir
			
 
				+ from common import BaseConfigureTest
			
 
				+ from six import StringIO
			
 
				+ from mozunit import main
			
 
				+ from mozbuild.configure.options import InvalidOptionError
			
 
				+-from mozbuild.configure.util import Version
			
 
				+ from mozpack import path as mozpath
			
 
				+ 
			
 
				+ 
			
 
				+ class TestToolkitMozConfigure(BaseConfigureTest):
			
 
				+     def test_moz_configure_options(self):
			
 
				+         def get_value_for(args=[], environ={}, mozconfig=''):
			
 
				+             sandbox = self.get_sandbox({}, {}, args, environ, mozconfig)
			
 
				+ 
			
 
				+@@ -82,85 +81,11 @@ class TestToolkitMozConfigure(BaseConfig
			
 
				+         self.assertEqual(get_value(['--enable-release'],
			
 
				+                                    environ={'MOZILLA_OFFICIAL': 1}), None)
			
 
				+ 
			
 
				+         with self.assertRaises(InvalidOptionError):
			
 
				+             get_value(['--disable-release'], environ={'MOZILLA_OFFICIAL': 1})
			
 
				+ 
			
 
				+         self.assertEqual(get_value(environ={'MOZ_AUTOMATION': 1}), None)
			
 
				+ 
			
 
				+-    def test_valid_yasm_version(self):
			
 
				+-        out = StringIO()
			
 
				+-        sandbox = self.get_sandbox({}, {}, out=out)
			
 
				+-        func = sandbox._depends[sandbox['valid_yasm_version']]._func
			
 
				+-
			
 
				+-        # Missing yasm is not an error when nothing requires it.
			
 
				+-        func(None, False, False)
			
 
				+-
			
 
				+-        # Any version of yasm works when nothing requires it.
			
 
				+-        func(Version('1.0'), False, False)
			
 
				+-
			
 
				+-        # Any version of yasm works when something requires any version.
			
 
				+-        func(Version('1.0'), True, False)
			
 
				+-        func(Version('1.0'), True, True)
			
 
				+-        func(Version('1.0'), False, True)
			
 
				+-
			
 
				+-        # A version of yasm greater than any requirement works.
			
 
				+-        func(Version("1.5"), Version("1.0"), True)
			
 
				+-        func(Version("1.5"), True, Version("1.0"))
			
 
				+-        func(Version("1.5"), Version("1.1"), Version("1.0"))
			
 
				+-
			
 
				+-        out.truncate(0)
			
 
				+-        out.seek(0)
			
 
				+-        with self.assertRaises(SystemExit):
			
 
				+-            func(None, Version('1.0'), False)
			
 
				+-
			
 
				+-        self.assertEqual(
			
 
				+-            out.getvalue(),
			
 
				+-            ('ERROR: Yasm is required to build with vpx, but you do not appear '
			
 
				+-             'to have Yasm installed.\n'),
			
 
				+-        )
			
 
				+-
			
 
				+-        out.truncate(0)
			
 
				+-        out.seek(0)
			
 
				+-        with self.assertRaises(SystemExit):
			
 
				+-            func(None, Version('1.0'), Version('1.0'))
			
 
				+-
			
 
				+-        self.assertEqual(
			
 
				+-            out.getvalue(),
			
 
				+-            ('ERROR: Yasm is required to build with jpeg and vpx, but you do not appear '
			
 
				+-             'to have Yasm installed.\n'),
			
 
				+-        )
			
 
				+-
			
 
				+-        out.truncate(0)
			
 
				+-        out.seek(0)
			
 
				+-        with self.assertRaises(SystemExit):
			
 
				+-            func(None, Version('1.0'), Version('1.0'))
			
 
				+-
			
 
				+-        self.assertEqual(
			
 
				+-            out.getvalue(),
			
 
				+-            ('ERROR: Yasm is required to build with jpeg, libav and vpx, but you do not appear '
			
 
				+-             'to have Yasm installed.\n'),
			
 
				+-        )
			
 
				+-
			
 
				+-        out.truncate(0)
			
 
				+-        out.seek(0)
			
 
				+-        with self.assertRaises(SystemExit):
			
 
				+-            func(Version('1.0'), Version('1.1'), Version('1.0'))
			
 
				+-
			
 
				+-        self.assertEqual(
			
 
				+-            out.getvalue(),
			
 
				+-            'ERROR: Yasm version 1.1 or greater is required to build with vpx.\n'
			
 
				+-        )
			
 
				+-
			
 
				+-        out.truncate(0)
			
 
				+-        out.seek(0)
			
 
				+-        with self.assertRaises(SystemExit):
			
 
				+-            func(Version('1.0'), True, Version('1.0.1'))
			
 
				+-
			
 
				+-        self.assertEqual(
			
 
				+-            out.getvalue(),
			
 
				+-            'ERROR: Yasm version 1.0.1 or greater is required to build with jpeg.\n'
			
 
				+-        )
			
 
				+-
			
 
				+ 
			
 
				+ if __name__ == '__main__':
			
 
				+     main()
			
--- a/mozilla-release/patches/1692940-04-88a1.patch
+++ b/mozilla-release/patches/1692940-04-88a1.patch
@@ -0,0 +1,166 @@
 
				+# HG changeset patch
			
 
				+# User Mike Hommey <mh+mozilla@glandium.org>
			
 
				+# Date 1614043604 0
			
 
				+#      Tue Feb 23 01:26:44 2021 +0000
			
 
				+# Node ID 2d3e201b3724cc09a368f6b2ff47ff2364d6694b
			
 
				+# Parent  13824fb0add8cd564564c907fec87cba0785966a
			
 
				+Bug 1692940 - Switch vpx build to nasm instead of yasm. r=firefox-build-system-reviewers,dmajor
			
 
				+
			
 
				+We also remove the dependency on the check for GNU as, because all the
			
 
				+build environments we support for arm use GNU as, and the dependency
			
 
				+causes complications.
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D105427
			
 
				+
			
 
				+diff --git a/media/libvpx/moz.build b/media/libvpx/moz.build
			
 
				+--- a/media/libvpx/moz.build
			
 
				++++ b/media/libvpx/moz.build
			
 
				+@@ -4,18 +4,18 @@
			
 
				+ # License, v. 2.0. If a copy of the MPL was not distributed with this
			
 
				+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
			
 
				+ 
			
 
				+ with Files('*'):
			
 
				+     BUG_COMPONENT = ('Core', 'Audio/Video')
			
 
				+ 
			
 
				+ include('sources.mozbuild')
			
 
				+ 
			
 
				+-if CONFIG['VPX_USE_YASM']:
			
 
				+-    USE_YASM = True
			
 
				++if CONFIG['VPX_USE_NASM']:
			
 
				++    USE_NASM = True
			
 
				+ 
			
 
				+ # Linux, Mac and Win share file lists for x86* but not configurations.
			
 
				+ if CONFIG['CPU_ARCH'] == 'x86_64':
			
 
				+     EXPORTS.vpx += files['X64_EXPORTS']
			
 
				+     SOURCES += files['X64_SOURCES']
			
 
				+     if CONFIG['OS_TARGET'] == 'WINNT':
			
 
				+         ASFLAGS += [ '-I%s/media/libvpx/config/win/x64/' % TOPSRCDIR ]
			
 
				+         LOCAL_INCLUDES += [ '/media/libvpx/config/win/x64/' ]
			
 
				+diff --git a/toolkit/moz.configure b/toolkit/moz.configure
			
 
				+--- a/toolkit/moz.configure
			
 
				++++ b/toolkit/moz.configure
			
 
				+@@ -1232,32 +1232,31 @@ with only_when(compile_environment):
			
 
				+         check_symbol('vpx_codec_dec_init_ver', flags=vpx.libs, onerror=lambda: die(
			
 
				+             "--with-system-libvpx requested but symbol vpx_codec_dec_init_ver "
			
 
				+             "not found"
			
 
				+         ))
			
 
				+ 
			
 
				+         set_config('MOZ_SYSTEM_LIBVPX', True)
			
 
				+ 
			
 
				+ 
			
 
				+-    @depends('--with-system-libvpx', target, gnu_as)
			
 
				+-    def in_tree_vpx(system_libvpx, target, gnu_as):
			
 
				++    @depends('--with-system-libvpx', target)
			
 
				++    def in_tree_vpx(system_libvpx, target):
			
 
				+         if system_libvpx:
			
 
				+             return
			
 
				+ 
			
 
				+-        use_yasm = (target.cpu in ('x86', 'x86_64')) or None
			
 
				+-        need_yasm = False
			
 
				+-        arm_asm = (target.cpu == 'arm' and gnu_as) or None
			
 
				++        arm_asm = (target.cpu == "arm") or None
			
 
				++        return namespace(arm_asm=arm_asm)
			
 
				+ 
			
 
				+-        if use_yasm:
			
 
				+-            need_yasm = True
			
 
				+-            if target.kernel == 'WINNT':
			
 
				+-                need_yasm = Version('1.1')
			
 
				+-
			
 
				+-        return namespace(arm_asm=arm_asm, use_yasm=use_yasm, need_yasm=need_yasm)
			
 
				+-
			
 
				++    @depends(target, when=in_tree_vpx)
			
 
				++    def vpx_nasm(target):
			
 
				++        if target.cpu in ("x86", "x86_64"):
			
 
				++            if target.kernel == "WINNT":
			
 
				++                # Version 2.03 is needed for automatic safeseh support.
			
 
				++                return namespace(version="2.03", what="VPX")
			
 
				++            return namespace(what="VPX")
			
 
				+ 
			
 
				+     # Building with -mfpu=neon requires either the "softfp" or the
			
 
				+     # "hardfp" ABI. Depending on the compiler's default target, and the
			
 
				+     # CFLAGS, the default ABI might be neither, in which case it is the
			
 
				+     # "softfloat" ABI.
			
 
				+     # The "softfloat" ABI is binary-compatible with the "softfp" ABI, so
			
 
				+     # we can safely mix code built with both ABIs. So, if we detect
			
 
				+     # that compiling uses the "softfloat" ABI, force the use of the
			
 
				+@@ -1266,31 +1265,31 @@ with only_when(compile_environment):
			
 
				+     # "softfloat" ABI, not the "softfp" ABI.
			
 
				+     # Note: VPX_ASFLAGS is also used in CFLAGS.
			
 
				+     softfp = cxx_compiler.try_compile(body='''
			
 
				+         #ifndef __SOFTFP__
			
 
				+         #error "compiler target supports -mfpu=neon, so we don't have to add extra flags"
			
 
				+         #endif''', when=in_tree_vpx.arm_asm)
			
 
				+ 
			
 
				+ 
			
 
				+-    @depends(in_tree_vpx, softfp, target)
			
 
				+-    def vpx_as_flags(vpx, softfp, target):
			
 
				++    @depends(in_tree_vpx, vpx_nasm, softfp, target)
			
 
				++    def vpx_as_flags(vpx, vpx_nasm, softfp, target):
			
 
				+         flags = []
			
 
				+         if vpx and vpx.arm_asm:
			
 
				+             # These flags are a lie; they're just used to enable the requisite
			
 
				+             # opcodes; actual arch detection is done at runtime.
			
 
				+             flags = ['-march=armv7-a', '-mfpu=neon']
			
 
				+             if softfp:
			
 
				+                 flags.append('-mfloat-abi=softfp')
			
 
				+-        elif vpx and vpx.use_yasm and target.os != 'WINNT' and target.cpu != 'x86_64':
			
 
				++        elif vpx and vpx_nasm and target.os != "WINNT" and target.cpu != "x86_64":
			
 
				+             flags = ['-DPIC']
			
 
				+         return flags
			
 
				+ 
			
 
				+ 
			
 
				+-    set_config('VPX_USE_YASM', in_tree_vpx.use_yasm)
			
 
				++    set_config("VPX_USE_NASM", True, when=vpx_nasm)
			
 
				+     set_config('VPX_ASFLAGS', vpx_as_flags)
			
 
				+ 
			
 
				+ 
			
 
				+ # JPEG
			
 
				+ # ====
			
 
				+ 
			
 
				+ with only_when(compile_environment):
			
 
				+     option('--with-system-jpeg', nargs='?',
			
 
				+@@ -1505,23 +1504,22 @@ with only_when(compile_environment):
			
 
				+     set_config('MOZ_FFVPX', True, when=ffvpx.enable)
			
 
				+     set_define('MOZ_FFVPX', True, when=ffvpx.enable)
			
 
				+     set_config('MOZ_FFVPX_FLACONLY', True, when=ffvpx.flac_only)
			
 
				+     set_define('MOZ_FFVPX_FLACONLY', True, when=ffvpx.flac_only)
			
 
				+     set_config('FFVPX_ASFLAGS', ffvpx.flags)
			
 
				+     set_config("FFVPX_USE_YASM", True, when=ffvpx.need_yasm)
			
 
				+ 
			
 
				+ 
			
 
				+-@depends(yasm_version, in_tree_vpx.need_yasm, in_tree_jpeg.use_yasm,
			
 
				++@depends(yasm_version, in_tree_jpeg.use_yasm,
			
 
				+          ffvpx.need_yasm)
			
 
				+ @imports(_from='__builtin__', _import='sorted')
			
 
				+-def valid_yasm_version(yasm_version, for_vpx, for_jpeg, for_ffvpx=False):
			
 
				++def valid_yasm_version(yasm_version, for_jpeg, for_ffvpx=False):
			
 
				+     # Note: the default for for_ffvpx above only matters for unit tests.
			
 
				+     requires = {
			
 
				+-        'vpx': for_vpx,
			
 
				+         'jpeg': for_jpeg,
			
 
				+         'ffvpx': for_ffvpx,
			
 
				+     }
			
 
				+     requires = {k: v for (k, v) in requires.items() if v}
			
 
				+     if requires and not yasm_version:
			
 
				+         items = sorted(requires.keys())
			
 
				+         if len(items) > 1:
			
 
				+             what = ' and '.join((', '.join(items[:-1]), items[-1]))
			
 
				+@@ -1536,17 +1534,17 @@ def valid_yasm_version(yasm_version, for
			
 
				+         what, version = by_version[-1]
			
 
				+         if yasm_version < version:
			
 
				+             die('Yasm version %s or greater is required to build with %s.'
			
 
				+                 % (version, what))
			
 
				+ 
			
 
				+ 
			
 
				+ # nasm detection
			
 
				+ # ==============================================================
			
 
				+-@depends(dav1d_nasm)
			
 
				++@depends(dav1d_nasm, vpx_nasm)
			
 
				+ def need_nasm(*requirements):
			
 
				+     requires = {
			
 
				+         x.what: x.version if hasattr(x, "version") else True for x in requirements if x
			
 
				+     }
			
 
				+     if requires:
			
 
				+         items = sorted(requires.keys())
			
 
				+         if len(items) > 1:
			
 
				+             what = " and ".join((", ".join(items[:-1]), items[-1]))
			
--- a/mozilla-release/patches/1692940-05-88a1.patch
+++ b/mozilla-release/patches/1692940-05-88a1.patch
@@ -3,7 +3,7 @@
 
				 # Date 1614043605 0
			
 
				 #      Tue Feb 23 01:26:45 2021 +0000
			
 
				 # Node ID 2d78e4bc3367320976d35629044085e8ee26a1fe
			
 
				-# Parent  9ca7d0aae25674fb8e42387fead0b7ca3571a3be
			
 
				+# Parent  e42b1e9178d2725962db0e7bb52a3880f3242f23
			
 
				 Bug 1692940 - Switch jpeg build to nasm instead of yasm. r=firefox-build-system-reviewers,dmajor
			
 
				 
			
 
				 Differential Revision: https://phabricator.services.mozilla.com/D105428
			
@@ -35,7 +35,7 @@ diff --git a/media/libjpeg/moz.build b/media/libjpeg/moz.build
 
				 diff --git a/toolkit/moz.configure b/toolkit/moz.configure
			
 
				 --- a/toolkit/moz.configure
			
 
				 +++ b/toolkit/moz.configure
			
 
				-@@ -1354,51 +1354,45 @@ with only_when(compile_environment):
			
 
				+@@ -1348,51 +1348,45 @@ with only_when(compile_environment):
			
 
				          set_config('MOZ_JPEG_CFLAGS', jpeg_flags.cflags)
			
 
				          set_config('MOZ_JPEG_LIBS', jpeg_flags.ldflags)
			
 
				  
			
@@ -47,56 +47,44 @@ diff --git a/toolkit/moz.configure b/toolkit/moz.configure
 
				 -        flags = ()
			
 
				 -        use_yasm = None
			
 
				 -        need_yasm = False
			
 
				--        if target.kernel == 'Darwin':
			
 
				--            if target.cpu == 'x86':
			
 
				+         if target.kernel == 'Darwin':
			
 
				+             if target.cpu == 'x86':
			
 
				 -                flags = ('-DPIC', '-DMACHO')
			
 
				--            elif target.cpu == 'x86_64':
			
 
				--                flags = ('-D__x86_64__', '-DPIC', '-DMACHO')
			
 
				--        elif target.kernel == 'WINNT':
			
 
				--            if target.cpu == 'x86':
			
 
				--                flags = ('-DPIC', '-DWIN32')
			
 
				--            elif target.cpu == 'x86_64':
			
 
				--                flags = ('-D__x86_64__', '-DPIC', '-DWIN64', '-DMSVC')
			
 
				--        elif target.cpu == 'arm':
			
 
				--            flags = ('-march=armv7-a', '-mfpu=neon')
			
 
				--        elif target.cpu == 'aarch64':
			
 
				--            flags = ('-march=armv8-a',)
			
 
				--        elif target.cpu == 'mips32':
			
 
				--            flags = ('-mdspr2',)
			
 
				--        elif target.cpu == 'x86':
			
 
				--            flags = ('-DPIC', '-DELF')
			
 
				--        elif target.cpu == 'x86_64':
			
 
				--            flags = ('-D__x86_64__', '-DPIC', '-DELF')
			
 
				-+        if target.kernel == "Darwin":
			
 
				-+            if target.cpu == "x86":
			
 
				 +                return ("-DPIC", "-DMACHO")
			
 
				-+            elif target.cpu == "x86_64":
			
 
				+             elif target.cpu == 'x86_64':
			
 
				+-                flags = ('-D__x86_64__', '-DPIC', '-DMACHO')
			
 
				 +                return ("-D__x86_64__", "-DPIC", "-DMACHO")
			
 
				-+        elif target.kernel == "WINNT":
			
 
				-+            if target.cpu == "x86":
			
 
				+         elif target.kernel == 'WINNT':
			
 
				+             if target.cpu == 'x86':
			
 
				+-                flags = ('-DPIC', '-DWIN32')
			
 
				 +                return ("-DPIC", "-DWIN32")
			
 
				-+            elif target.cpu == "x86_64":
			
 
				+             elif target.cpu == 'x86_64':
			
 
				+-                flags = ('-D__x86_64__', '-DPIC', '-DWIN64', '-DMSVC')
			
 
				 +                return ("-D__x86_64__", "-DPIC", "-DWIN64", "-DMSVC")
			
 
				-+        elif target.cpu == "arm":
			
 
				+         elif target.cpu == 'arm':
			
 
				+-            flags = ('-march=armv7-a', '-mfpu=neon')
			
 
				 +            return ("-march=armv7-a", "-mfpu=neon")
			
 
				-+        elif target.cpu == "aarch64":
			
 
				+         elif target.cpu == 'aarch64':
			
 
				+-            flags = ('-march=armv8-a',)
			
 
				 +            return ("-march=armv8-a",)
			
 
				-+        elif target.cpu == "mips32":
			
 
				+         elif target.cpu == 'mips32':
			
 
				+-            flags = ('-mdspr2',)
			
 
				 +            return ("-mdspr2",)
			
 
				-+        elif target.cpu == "x86":
			
 
				+         elif target.cpu == 'x86':
			
 
				+-            flags = ('-DPIC', '-DELF')
			
 
				 +            return ("-DPIC", "-DELF")
			
 
				-+        elif target.cpu == "x86_64":
			
 
				+         elif target.cpu == 'x86_64':
			
 
				+-            flags = ('-D__x86_64__', '-DPIC', '-DELF')
			
 
				 +            return ("-D__x86_64__", "-DPIC", "-DELF")
			
 
				  
			
 
				--        if target.cpu in ('x86', 'x86_64'):
			
 
				++    @depends(target, when=in_tree_jpeg)
			
 
				++    def jpeg_nasm(target):
			
 
				+         if target.cpu in ('x86', 'x86_64'):
			
 
				 -            use_yasm = True
			
 
				 -            if target.kernel == 'Linux' and target.os == 'GNU':
			
 
				 -                need_yasm = Version('1.0.1')
			
 
				 -            else:
			
 
				 -                need_yasm = Version('1.1')
			
 
				-+    @depends(target, when=in_tree_jpeg)
			
 
				-+    def jpeg_nasm(target):
			
 
				-+        if target.cpu in ("x86", "x86_64"):
			
 
				 +            # libjpeg-turbo 2.0.6 requires nasm 2.10.
			
 
				 +            return namespace(version="2.10", what="JPEG")
			
 
				  
			
@@ -108,9 +96,56 @@ diff --git a/toolkit/moz.configure b/toolkit/moz.configure
 
				 +    set_config("LIBJPEG_TURBO_ASFLAGS", in_tree_jpeg)
			
 
				  
			
 
				  
			
 
				- # Libav-fft Support
			
 
				+ # FFmpeg's ffvpx configuration
			
 
				  # ==============================================================
			
 
				  with only_when(compile_environment):
			
 
				      @depends(target)
			
 
				      def libav_fft(target):
			
 
				-         flags = None
			
 
				+         return target.kernel == "WINNT" or target.cpu == "x86_64"
			
 
				+@@ -1465,23 +1459,23 @@ with only_when(compile_environment):
			
 
				+     set_config('MOZ_FFVPX', True, when=ffvpx.enable)
			
 
				+     set_define('MOZ_FFVPX', True, when=ffvpx.enable)
			
 
				+     set_config('MOZ_FFVPX_FLACONLY', True, when=ffvpx.flac_only)
			
 
				+     set_define('MOZ_FFVPX_FLACONLY', True, when=ffvpx.flac_only)
			
 
				+     set_config('FFVPX_ASFLAGS', ffvpx.flags)
			
 
				+     set_config("FFVPX_USE_YASM", True, when=ffvpx.need_yasm)
			
 
				+ 
			
 
				+ 
			
 
				+-@depends(yasm_version, in_tree_jpeg.use_yasm,
			
 
				+-         ffvpx.need_yasm)
			
 
				++@depends(yasm_version,
			
 
				++         ffvpx.use_yasm,
			
 
				++)
			
 
				+ @imports(_from='__builtin__', _import='sorted')
			
 
				+-def valid_yasm_version(yasm_version, for_jpeg, for_ffvpx=False):
			
 
				++def valid_yasm_version(yasm_version, for_ffvpx=False):
			
 
				+     # Note: the default for for_ffvpx above only matters for unit tests.
			
 
				+     requires = {
			
 
				+-        'jpeg': for_jpeg,
			
 
				+         'ffvpx': for_ffvpx,
			
 
				+     }
			
 
				+     requires = {k: v for (k, v) in requires.items() if v}
			
 
				+     if requires and not yasm_version:
			
 
				+         items = sorted(requires.keys())
			
 
				+         if len(items) > 1:
			
 
				+             what = ' and '.join((', '.join(items[:-1]), items[-1]))
			
 
				+         else:
			
 
				+@@ -1495,17 +1489,17 @@ def valid_yasm_version(yasm_version, for
			
 
				+         what, version = by_version[-1]
			
 
				+         if yasm_version < version:
			
 
				+             die('Yasm version %s or greater is required to build with %s.'
			
 
				+                 % (version, what))
			
 
				+ 
			
 
				+ 
			
 
				+ # nasm detection
			
 
				+ # ==============================================================
			
 
				+-@depends(dav1d_nasm, vpx_nasm)
			
 
				++@depends(dav1d_nasm, vpx_nasm, jpeg_nasm)
			
 
				+ def need_nasm(*requirements):
			
 
				+     requires = {
			
 
				+         x.what: x.version if hasattr(x, "version") else True for x in requirements if x
			
 
				+     }
			
 
				+     if requires:
			
 
				+         items = sorted(requires.keys())
			
 
				+         if len(items) > 1:
			
 
				+             what = " and ".join((", ".join(items[:-1]), items[-1]))
			
--- a/mozilla-release/patches/1692940-06-88a1.patch
+++ b/mozilla-release/patches/1692940-06-88a1.patch
@@ -0,0 +1,218 @@
 
				+# HG changeset patch
			
 
				+# User Mike Hommey <mh+mozilla@glandium.org>
			
 
				+# Date 1614043605 0
			
 
				+#      Tue Feb 23 01:26:45 2021 +0000
			
 
				+# Node ID da0ee340f69903904c61da6b2d1cfac2d3aca4f2
			
 
				+# Parent  d17a4e2acf84047fcb1a47598747760378dbc8a4
			
 
				+Bug 1692940 - Switch ffvpx build to nasm instead of yasm. r=firefox-build-system-reviewers,dmajor
			
 
				+
			
 
				+nasm doesn't like compiling simple_idct10.asm on x86
			
 
				+(https://bugzilla.nasm.us/show_bug.cgi?id=3392738), which is empty once
			
 
				+preprocessed for x86, so exclude it there.
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D105429
			
 
				+
			
 
				+diff --git a/media/ffvpx/ffvpxcommon.mozbuild b/media/ffvpx/ffvpxcommon.mozbuild
			
 
				+--- a/media/ffvpx/ffvpxcommon.mozbuild
			
 
				++++ b/media/ffvpx/ffvpxcommon.mozbuild
			
 
				+@@ -7,18 +7,18 @@
			
 
				+ # Add assembler flags and includes
			
 
				+ if CONFIG['CPU_ARCH'] != 'aarch64':
			
 
				+     ASFLAGS += CONFIG['FFVPX_ASFLAGS']
			
 
				+     ASFLAGS += ['-I%s/media/ffvpx/' % TOPSRCDIR]
			
 
				+     ASFLAGS += ['-I%s/media/ffvpx/libavcodec/x86/' % TOPSRCDIR]
			
 
				+     ASFLAGS += ['-I%s/media/ffvpx/libavutil/x86/' % TOPSRCDIR]
			
 
				+ 
			
 
				+ if CONFIG['FFVPX_ASFLAGS']:
			
 
				+-    if CONFIG['FFVPX_USE_YASM']:
			
 
				+-        USE_YASM = True
			
 
				++    if CONFIG['FFVPX_USE_NASM']:
			
 
				++        USE_NASM = True
			
 
				+ 
			
 
				+     if CONFIG['OS_ARCH'] == 'WINNT':
			
 
				+        # Fix inline symbols and math defines for windows.
			
 
				+         DEFINES['_USE_MATH_DEFINES'] = True
			
 
				+         DEFINES['inline'] = "__inline"
			
 
				+ 
			
 
				+ LOCAL_INCLUDES += ['/media/ffvpx']
			
 
				+ 
			
 
				+diff --git a/media/ffvpx/libavcodec/x86/moz.build b/media/ffvpx/libavcodec/x86/moz.build
			
 
				+--- a/media/ffvpx/libavcodec/x86/moz.build
			
 
				++++ b/media/ffvpx/libavcodec/x86/moz.build
			
 
				+@@ -6,16 +6,18 @@
			
 
				+ 
			
 
				+ SOURCES += [
			
 
				+     'constants.c',
			
 
				+     'flacdsp.asm',
			
 
				+     'flacdsp_init.c',
			
 
				+     'h264_intrapred.asm',
			
 
				+     'h264_intrapred_10bit.asm',
			
 
				+     'h264_intrapred_init.c',
			
 
				++# Bug 1582271
			
 
				++#    -    'simple_idct10.asm',
			
 
				+     'videodsp.asm',
			
 
				+     'videodsp_init.c',
			
 
				+     'vp8dsp.asm',
			
 
				+     'vp8dsp_init.c',
			
 
				+     'vp8dsp_loopfilter.asm',
			
 
				+     'vp9dsp_init.c',
			
 
				+     'vp9dsp_init_10bpp.c',
			
 
				+     'vp9dsp_init_12bpp.c',
			
 
				+@@ -25,16 +27,22 @@ SOURCES += [
			
 
				+     'vp9itxfm.asm',
			
 
				+     'vp9itxfm_16bpp.asm',
			
 
				+     'vp9lpf.asm',
			
 
				+     'vp9lpf_16bpp.asm',
			
 
				+     'vp9mc.asm',
			
 
				+     'vp9mc_16bpp.asm',
			
 
				+ ]
			
 
				+ 
			
 
				++# Bug 1582271
			
 
				++# if CONFIG['CPU_ARCH'] == "x86_64":
			
 
				++#     SOURCES += [
			
 
				++#         'simple_idct10.asm',
			
 
				++#     ]
			
 
				++
			
 
				+ if CONFIG['MOZ_LIBAV_FFT']:
			
 
				+     SOURCES += [
			
 
				+         'fft.asm',
			
 
				+         'fft_init.c',
			
 
				+     ]
			
 
				+ 
			
 
				+ FINAL_LIBRARY = 'mozavcodec'
			
 
				+ 
			
 
				+diff --git a/toolkit/moz.configure b/toolkit/moz.configure
			
 
				+--- a/toolkit/moz.configure
			
 
				++++ b/toolkit/moz.configure
			
 
				+@@ -1392,19 +1392,19 @@ with only_when(compile_environment):
			
 
				+         return target.kernel == "WINNT" or target.cpu == "x86_64"
			
 
				+ 
			
 
				+     set_config('MOZ_LIBAV_FFT', depends(when=libav_fft)(lambda: True))
			
 
				+     set_define('MOZ_LIBAV_FFT', depends(when=libav_fft)(lambda: True))
			
 
				+ 
			
 
				+ 
			
 
				+ with only_when(compile_environment):
			
 
				+ 
			
 
				+-    @depends(vpx_as_flags, target)
			
 
				+-    def ffvpx(vpx_as_flags, target):
			
 
				+-        enable = use_yasm = True
			
 
				++    @depends(target)
			
 
				++    def ffvpx(target):
			
 
				++        enable = use_nasm = True
			
 
				+         flac_only = False
			
 
				+         flags = []
			
 
				+ 
			
 
				+         if target.kernel == "WINNT":
			
 
				+             if target.cpu == "x86":
			
 
				+                 # 32-bit windows need to prefix symbols with an underscore.
			
 
				+                 flags = ["-DPIC", "-DWIN32", "-DPREFIX", "-Pconfig_win32.asm"]
			
 
				+             elif target.cpu == "x86_64":
			
 
				+@@ -1412,17 +1412,17 @@ with only_when(compile_environment):
			
 
				+                     "-D__x86_64__",
			
 
				+                     "-DPIC",
			
 
				+                     "-DWIN64",
			
 
				+                     "-DMSVC",
			
 
				+                     "-Pconfig_win64.asm",
			
 
				+                 ]
			
 
				+             elif target.cpu == "aarch64":
			
 
				+                 flags = ["-DPIC", "-DWIN64"]
			
 
				+-                use_yasm = False
			
 
				++                use_nasm = False
			
 
				+         elif target.kernel == "Darwin":
			
 
				+             if target.cpu == "x86_64":
			
 
				+                 # 32/64-bit macosx asemblers need to prefix symbols with an
			
 
				+                 # underscore.
			
 
				+                 flags = [
			
 
				+                     "-D__x86_64__",
			
 
				+                     "-DPIC",
			
 
				+                     "-DMACHO",
			
 
				+@@ -1430,53 +1430,59 @@ with only_when(compile_environment):
			
 
				+                     "-Pconfig_darwin64.asm",
			
 
				+                 ]
			
 
				+             else:
			
 
				+                 flac_only = True
			
 
				+         elif target.cpu == "x86_64":
			
 
				+             flags = ["-D__x86_64__", "-DPIC", "-DELF", "-Pconfig_unix64.asm"]
			
 
				+         elif target.cpu == "x86":
			
 
				+             flac_only = True
			
 
				+-        elif target.cpu in ("arm", "aarch64"):
			
 
				+-            flac_only = True
			
 
				+-            flags.extend(vpx_as_flags)
			
 
				+         else:
			
 
				+             enable = False
			
 
				+ 
			
 
				+         if flac_only or not enable:
			
 
				+-            use_yasm = False
			
 
				++            use_nasm = False
			
 
				+ 
			
 
				+-        if use_yasm:
			
 
				++        if use_nasm:
			
 
				+             # default disabled components
			
 
				+             flags.append('-Pdefaults_disabled.asm')
			
 
				+ 
			
 
				+         return namespace(
			
 
				+             enable=enable,
			
 
				+-            need_yasm="1.2" if use_yasm else False,
			
 
				++            use_nasm=use_nasm,
			
 
				+             flac_only=flac_only,
			
 
				+             flags=flags,
			
 
				+         )
			
 
				+ 
			
 
				++    @depends(when=ffvpx.use_nasm)
			
 
				++    def ffvpx_nasm():
			
 
				++        # nasm 2.10 for AVX-2 support.
			
 
				++        return namespace(version="2.10", what="FFVPX")
			
 
				++
			
 
				++    # ffvpx_nasm can't indirectly depend on vpx_as_flags, because it depends
			
 
				++    # on a compiler test, so we have to do a little bit of dance here.
			
 
				++    @depends(ffvpx, vpx_as_flags, target)
			
 
				++    def ffvpx(ffvpx, vpx_as_flags, target):
			
 
				++        if ffvpx and target.cpu in ("arm", "aarch64"):
			
 
				++            ffvpx.flags.extend(vpx_as_flags)
			
 
				++        return ffvpx
			
 
				+ 
			
 
				+     set_config('MOZ_FFVPX', True, when=ffvpx.enable)
			
 
				+     set_define('MOZ_FFVPX', True, when=ffvpx.enable)
			
 
				+     set_config('MOZ_FFVPX_FLACONLY', True, when=ffvpx.flac_only)
			
 
				+     set_define('MOZ_FFVPX_FLACONLY', True, when=ffvpx.flac_only)
			
 
				+     set_config('FFVPX_ASFLAGS', ffvpx.flags)
			
 
				+-    set_config("FFVPX_USE_YASM", True, when=ffvpx.need_yasm)
			
 
				++    set_config("FFVPX_USE_NASM", True, when=ffvpx.use_nasm)
			
 
				+ 
			
 
				+ 
			
 
				+ @depends(yasm_version,
			
 
				+-         ffvpx.use_yasm,
			
 
				+ )
			
 
				+ @imports(_from='__builtin__', _import='sorted')
			
 
				+-def valid_yasm_version(yasm_version, for_ffvpx=False):
			
 
				+-    # Note: the default for for_ffvpx above only matters for unit tests.
			
 
				++def valid_yasm_version(yasm_version):
			
 
				+     requires = {
			
 
				+-        'ffvpx': for_ffvpx,
			
 
				+     }
			
 
				+     requires = {k: v for (k, v) in requires.items() if v}
			
 
				+     if requires and not yasm_version:
			
 
				+         items = sorted(requires.keys())
			
 
				+         if len(items) > 1:
			
 
				+             what = ' and '.join((', '.join(items[:-1]), items[-1]))
			
 
				+         else:
			
 
				+             what = items[0]
			
 
				+@@ -1489,17 +1495,17 @@ def valid_yasm_version(yasm_version, for
			
 
				+         what, version = by_version[-1]
			
 
				+         if yasm_version < version:
			
 
				+             die('Yasm version %s or greater is required to build with %s.'
			
 
				+                 % (version, what))
			
 
				+ 
			
 
				+ 
			
 
				+ # nasm detection
			
 
				+ # ==============================================================
			
 
				+-@depends(dav1d_nasm, vpx_nasm, jpeg_nasm)
			
 
				++@depends(dav1d_nasm, vpx_nasm, jpeg_nasm, ffvpx_nasm)
			
 
				+ def need_nasm(*requirements):
			
 
				+     requires = {
			
 
				+         x.what: x.version if hasattr(x, "version") else True for x in requirements if x
			
 
				+     }
			
 
				+     if requires:
			
 
				+         items = sorted(requires.keys())
			
 
				+         if len(items) > 1:
			
 
				+             what = " and ".join((", ".join(items[:-1]), items[-1]))
			
--- a/mozilla-release/patches/1692940-07-88a1.patch
+++ b/mozilla-release/patches/1692940-07-88a1.patch
@@ -0,0 +1,80 @@
 
				+# HG changeset patch
			
 
				+# User Mike Hommey <mh+mozilla@glandium.org>
			
 
				+# Date 1614043606 0
			
 
				+#      Tue Feb 23 01:26:46 2021 +0000
			
 
				+# Node ID 3b1e09a1e421f6767391ff03b91550a2ffb7f66f
			
 
				+# Parent  bfa6009b2b705ad4064c1b74cfa7d65bd854fcc6
			
 
				+Bug 1692940 - Switch aom build to nasm instead of yasm. r=firefox-build-system-reviewers,jbauman,dmajor,andi
			
 
				+
			
 
				+nasm doesn't like compiling x86_abi_support.asm
			
 
				+(https://bugzilla.nasm.us/show_bug.cgi?id=3392738), which is actually an
			
 
				+include file, rather than a source file, so it shouldn't have been in
			
 
				+the list of sources in the first place (libvpx has a similar file that
			
 
				+is excluded already, for instance).
			
 
				+
			
 
				+I was considering updating the vendoring script, but it turns out it
			
 
				+doesn't produce the current contents in-tree (which even breaks the
			
 
				+build), and aom is set to be removed (bug 1635296)...
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D105430
			
 
				+
			
 
				+diff --git a/media/libaom/moz.build b/media/libaom/moz.build
			
 
				+--- a/media/libaom/moz.build
			
 
				++++ b/media/libaom/moz.build
			
 
				+@@ -8,33 +8,33 @@ with Files('*'):
			
 
				+     BUG_COMPONENT = ('Core', 'Audio/Video')
			
 
				+ 
			
 
				+ include('sources.mozbuild')
			
 
				+ 
			
 
				+ # Linux, Mac and Win share file lists for x86* but not configurations.
			
 
				+ if CONFIG['CPU_ARCH'] == 'x86_64':
			
 
				+     EXPORTS.aom += files['X64_EXPORTS']
			
 
				+     SOURCES += files['X64_SOURCES']
			
 
				+-    USE_YASM = True
			
 
				++    USE_NASM = True
			
 
				+     if CONFIG['OS_TARGET'] == 'WINNT':
			
 
				+         ASFLAGS += [ '-I%s/media/libaom/config/win/x64/' % TOPSRCDIR ]
			
 
				+         LOCAL_INCLUDES += [ '/media/libaom/config/win/x64/' ]
			
 
				+         EXPORTS.aom += [ 'config/win/x64/config/aom_config.h' ]
			
 
				+     elif CONFIG['OS_TARGET'] == 'Darwin':
			
 
				+         ASFLAGS += [ '-I%s/media/libaom/config/mac/x64/' % TOPSRCDIR ]
			
 
				+         LOCAL_INCLUDES += [ '/media/libaom/config/mac/x64/' ]
			
 
				+         EXPORTS.aom += [ 'config/mac/x64/config/aom_config.h' ]
			
 
				+     else: # Android, Linux, BSDs, etc.
			
 
				+         ASFLAGS += [ '-I%s/media/libaom/config/linux/x64/' % TOPSRCDIR ]
			
 
				+         LOCAL_INCLUDES += [ '/media/libaom/config/linux/x64/' ]
			
 
				+         EXPORTS.aom += [ 'config/linux/x64/config/aom_config.h' ]
			
 
				+ elif CONFIG['CPU_ARCH'] == 'x86':
			
 
				+     EXPORTS.aom += files['IA32_EXPORTS']
			
 
				+     SOURCES += files['IA32_SOURCES']
			
 
				+-    USE_YASM = True
			
 
				++    USE_NASM = True
			
 
				+     if CONFIG['OS_TARGET'] == 'WINNT':
			
 
				+         ASFLAGS += [ '-I%s/media/libaom/config/win/ia32/' % TOPSRCDIR ]
			
 
				+         LOCAL_INCLUDES += [ '/media/libaom/config/win/ia32/' ]
			
 
				+         EXPORTS.aom += [ 'config/win/ia32/config/aom_config.h' ]
			
 
				+     else: # Android, Linux, BSDs, etc.
			
 
				+         ASFLAGS += [ '-I%s/media/libaom/config/linux/ia32/' % TOPSRCDIR ]
			
 
				+         LOCAL_INCLUDES += [ '/media/libaom/config/linux/ia32/' ]
			
 
				+         EXPORTS.aom += [ 'config/linux/ia32/config/aom_config.h' ]
			
 
				+diff --git a/media/libaom/sources.mozbuild b/media/libaom/sources.mozbuild
			
 
				+--- a/media/libaom/sources.mozbuild
			
 
				++++ b/media/libaom/sources.mozbuild
			
 
				+@@ -271,17 +271,16 @@ files = {
			
 
				+     '../../third_party/aom/aom_dsp/x86/intrapred_avx2.c',
			
 
				+     '../../third_party/aom/aom_dsp/x86/intrapred_sse2.c',
			
 
				+     '../../third_party/aom/aom_dsp/x86/intrapred_sse2_asm.asm',
			
 
				+     '../../third_party/aom/aom_dsp/x86/intrapred_ssse3.c',
			
 
				+     '../../third_party/aom/aom_dsp/x86/inv_wht_sse2.asm',
			
 
				+     '../../third_party/aom/aom_dsp/x86/loopfilter_sse2.c',
			
 
				+     '../../third_party/aom/aom_mem/aom_mem.c',
			
 
				+     '../../third_party/aom/aom_ports/emms.asm',
			
 
				+-    '../../third_party/aom/aom_ports/x86_abi_support.asm',
			
 
				+     '../../third_party/aom/aom_scale/aom_scale_rtcd.c',
			
 
				+     '../../third_party/aom/aom_scale/generic/aom_scale.c',
			
 
				+     '../../third_party/aom/aom_scale/generic/gen_scalers.c',
			
 
				+     '../../third_party/aom/aom_scale/generic/yv12config.c',
			
 
				+     '../../third_party/aom/aom_scale/generic/yv12extend.c',
			
 
				+     '../../third_party/aom/aom_util/aom_thread.c',
			
 
				+     '../../third_party/aom/aom_util/debug_util.c',
			
 
				+     '../../third_party/aom/av1/av1_dx_iface.c',
			
--- a/mozilla-release/patches/1692940-08-88a1.patch
+++ b/mozilla-release/patches/1692940-08-88a1.patch
@@ -0,0 +1,89 @@
 
				+# HG changeset patch
			
 
				+# User Mike Hommey <mh+mozilla@glandium.org>
			
 
				+# Date 1614043606 0
			
 
				+#      Tue Feb 23 01:26:46 2021 +0000
			
 
				+# Node ID d74f2f0996f984e0cfdf6a07421a6fd50dc25db2
			
 
				+# Parent  9fc987186e4c2d1319a3a36b1625eb2484c2cc81
			
 
				+Bug 1692940 - Turn the USE_YASM unit test into a USE_NAME one. r=firefox-build-system-reviewers,dmajor
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D105431
			
 
				+
			
 
				+diff --git a/python/mozbuild/mozbuild/test/frontend/data/use-yasm/moz.build b/python/mozbuild/mozbuild/test/frontend/data/use-nasm/moz.build
			
 
				+rename from python/mozbuild/mozbuild/test/frontend/data/use-yasm/moz.build
			
 
				+rename to python/mozbuild/mozbuild/test/frontend/data/use-nasm/moz.build
			
 
				+--- a/python/mozbuild/mozbuild/test/frontend/data/use-yasm/moz.build
			
 
				++++ b/python/mozbuild/mozbuild/test/frontend/data/use-nasm/moz.build
			
 
				+@@ -3,11 +3,11 @@
			
 
				+ # http://creativecommons.org/publicdomain/zero/1.0/
			
 
				+ 
			
 
				+ @template
			
 
				+ def Library(name):
			
 
				+     LIBRARY_NAME = name
			
 
				+ 
			
 
				+ Library('dummy')
			
 
				+ 
			
 
				+-USE_YASM = True
			
 
				++USE_NASM = True
			
 
				+ 
			
 
				+ SOURCES += ['test1.S']
			
 
				+diff --git a/python/mozbuild/mozbuild/test/frontend/data/use-nasm/test1.S b/python/mozbuild/mozbuild/test/frontend/data/use-nasm/test1.S
			
 
				+new file mode 100644
			
 
				+diff --git a/python/mozbuild/mozbuild/test/frontend/data/use-yasm/test1.S b/python/mozbuild/mozbuild/test/frontend/data/use-yasm/test1.S
			
 
				+deleted file mode 100644
			
 
				+diff --git a/python/mozbuild/mozbuild/test/frontend/test_emitter.py b/python/mozbuild/mozbuild/test/frontend/test_emitter.py
			
 
				+--- a/python/mozbuild/mozbuild/test/frontend/test_emitter.py
			
 
				++++ b/python/mozbuild/mozbuild/test/frontend/test_emitter.py
			
 
				+@@ -425,43 +425,43 @@ class TestEmitterBasic(unittest.TestCase
			
 
				+ 
			
 
				+     def test_disable_compiler_warnings(self):
			
 
				+         reader = self.reader('disable-compiler-warnings', extra_substs={
			
 
				+             'WARNINGS_CFLAGS': '-Wall',
			
 
				+         })
			
 
				+         sources, ldflags, lib, flags = self.read_topsrcdir(reader)
			
 
				+         self.assertEqual(flags.flags['WARNINGS_CFLAGS'], [])
			
 
				+ 
			
 
				+-    def test_use_yasm(self):
			
 
				+-        # When yasm is not available, this should raise.
			
 
				+-        reader = self.reader('use-yasm')
			
 
				++    def test_use_nasm(self):
			
 
				++        # When nasm is not available, this should raise.
			
 
				++        reader = self.reader("use-nasm")
			
 
				+         with six.assertRaisesRegex(self, SandboxValidationError,
			
 
				+-                                   'yasm is not available'):
			
 
				++                                   'nasm is not available'):
			
 
				+             self.read_topsrcdir(reader)
			
 
				+ 
			
 
				+-        # When yasm is available, this should work.
			
 
				+-        reader = self.reader('use-yasm',
			
 
				++        # When nasm is available, this should work.
			
 
				++        reader = self.reader('use-nasm',
			
 
				+                              extra_substs=dict(
			
 
				+-                                 YASM='yasm',
			
 
				+-                                 YASM_ASFLAGS='-foo',
			
 
				++                                 NASM='nasm',
			
 
				++                                 NASM_ASFLAGS='-foo',
			
 
				+                              ))
			
 
				+ 
			
 
				+         sources, passthru, ldflags, lib, flags, asflags = self.read_topsrcdir(reader)
			
 
				+ 
			
 
				+         self.assertIsInstance(passthru, VariablePassthru)
			
 
				+         self.assertIsInstance(ldflags, ComputedFlags)
			
 
				+         self.assertIsInstance(flags, ComputedFlags)
			
 
				+         self.assertIsInstance(asflags, ComputedFlags)
			
 
				+ 
			
 
				+-        self.assertEqual(asflags.flags['OS'], reader.config.substs['YASM_ASFLAGS'])
			
 
				++        self.assertEqual(asflags.flags['OS'], reader.config.substs['NASM_ASFLAGS'])
			
 
				+ 
			
 
				+         maxDiff = self.maxDiff
			
 
				+         self.maxDiff = None
			
 
				+         self.assertEqual(passthru.variables,
			
 
				+-                         {'AS': 'yasm',
			
 
				++                         {'AS': 'nasm',
			
 
				+                           'AS_DASH_C_FLAG': '',
			
 
				+                           'ASOUTOPTION': '-o '})
			
 
				+         self.maxDiff = maxDiff
			
 
				+ 
			
 
				+     def test_generated_files(self):
			
 
				+         reader = self.reader('generated-files')
			
 
				+         objs = self.read_topsrcdir(reader)
			
 
				+ 
			
--- a/mozilla-release/patches/1692940-09-88a1.patch
+++ b/mozilla-release/patches/1692940-09-88a1.patch
@@ -0,0 +1,197 @@
 
				+# HG changeset patch
			
 
				+# User Mike Hommey <mh+mozilla@glandium.org>
			
 
				+# Date 1614043606 0
			
 
				+#      Tue Feb 23 01:26:46 2021 +0000
			
 
				+# Node ID fdba4b86b00aa04bbc8b1c42862b2f4e37b2b101
			
 
				+# Parent  f2b3e8c02580d35fbabd8580c4573dc7bd49ffc0
			
 
				+Bug 1692940 - Remove build system support for yasm. r=firefox-build-system-reviewers,dmajor
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D105432
			
 
				+
			
 
				+diff --git a/build/moz.configure/toolchain.configure b/build/moz.configure/toolchain.configure
			
 
				+--- a/build/moz.configure/toolchain.configure
			
 
				++++ b/build/moz.configure/toolchain.configure
			
 
				+@@ -54,54 +54,16 @@ def moz_optimize(option, _):
			
 
				+         flags=flags,
			
 
				+     )
			
 
				+ 
			
 
				+ 
			
 
				+ set_config('MOZ_OPTIMIZE', moz_optimize.optimize)
			
 
				+ add_old_configure_assignment('MOZ_OPTIMIZE', moz_optimize.optimize)
			
 
				+ add_old_configure_assignment('MOZ_CONFIGURE_OPTIMIZE_FLAGS', moz_optimize.flags)
			
 
				+ 
			
 
				+-# yasm detection
			
 
				+-# ==============================================================
			
 
				+-yasm = check_prog('YASM', ['yasm'], allow_missing=True)
			
 
				+-
			
 
				+-
			
 
				+-@depends_if(yasm)
			
 
				+-@checking('yasm version')
			
 
				+-def yasm_version(yasm):
			
 
				+-    version = check_cmd_output(
			
 
				+-        yasm, '--version',
			
 
				+-        onerror=lambda: die('Failed to get yasm version.')
			
 
				+-    ).splitlines()[0].split()[1]
			
 
				+-    return Version(version)
			
 
				+-
			
 
				+-
			
 
				+-@depends(yasm, target)
			
 
				+-def yasm_asflags(yasm, target):
			
 
				+-    if yasm:
			
 
				+-        asflags = {
			
 
				+-            ('OSX', 'x86'): ['-f', 'macho32'],
			
 
				+-            ('OSX', 'x86_64'): ['-f', 'macho64'],
			
 
				+-            ('WINNT', 'x86'): ['-f', 'win32'],
			
 
				+-            ('WINNT', 'x86_64'): ['-f', 'x64'],
			
 
				+-        }.get((target.os, target.cpu), None)
			
 
				+-        if asflags is None:
			
 
				+-            # We're assuming every x86 platform we support that's
			
 
				+-            # not Windows or Mac is ELF.
			
 
				+-            if target.cpu == 'x86':
			
 
				+-                asflags = ['-f', 'elf32']
			
 
				+-            elif target.cpu == 'x86_64':
			
 
				+-                asflags = ['-f', 'elf64']
			
 
				+-        if asflags:
			
 
				+-            asflags += ['-rnasm', '-pnasm']
			
 
				+-        return asflags
			
 
				+-
			
 
				+-
			
 
				+-set_config('YASM_ASFLAGS', yasm_asflags)
			
 
				+-
			
 
				+ 
			
 
				+ # Android NDK
			
 
				+ # ==============================================================
			
 
				+ 
			
 
				+ 
			
 
				+ @depends('--disable-compile-environment', build_project)
			
 
				+ def compiling_android(compile_env, build_project):
			
 
				+     return compile_env and build_project in ('mobile/android', 'js')
			
 
				+diff --git a/python/mozbuild/mozbuild/frontend/context.py b/python/mozbuild/mozbuild/frontend/context.py
			
 
				+--- a/python/mozbuild/mozbuild/frontend/context.py
			
 
				++++ b/python/mozbuild/mozbuild/frontend/context.py
			
 
				+@@ -398,22 +398,16 @@ class AsmFlags(BaseCompileFlags):
			
 
				+         if (self._context.config.substs.get('MOZ_DEBUG') or
			
 
				+             self._context.config.substs.get('MOZ_DEBUG_SYMBOLS')):
			
 
				+             if self._context.get('USE_NASM'):
			
 
				+                 if (self._context.config.substs.get('OS_ARCH') == 'WINNT' and
			
 
				+                     not self._context.config.substs.get('GNU_CC')):
			
 
				+                     debug_flags += ['-F', 'cv8']
			
 
				+                 elif self._context.config.substs.get('OS_ARCH') != 'Darwin':
			
 
				+                     debug_flags += ['-F', 'dwarf']
			
 
				+-            elif self._context.get('USE_YASM'):
			
 
				+-                if (self._context.config.substs.get('OS_ARCH') == 'WINNT' and
			
 
				+-                    not self._context.config.substs.get('GNU_CC')):
			
 
				+-                    debug_flags += ['-g', 'cv8']
			
 
				+-                elif self._context.config.substs.get('OS_ARCH') != 'Darwin':
			
 
				+-                    debug_flags += ['-g', 'dwarf2']
			
 
				+             elif (self._context.config.substs.get('OS_ARCH') == 'WINNT' and
			
 
				+                   self._context.config.substs.get('CPU_ARCH') == 'aarch64'):
			
 
				+                 # armasm64 accepts a paucity of options compared to ml/ml64.
			
 
				+                 pass
			
 
				+             else:
			
 
				+                 debug_flags += self._context.config.substs.get('MOZ_DEBUG_FLAGS', '').split()
			
 
				+         return debug_flags
			
 
				+ 
			
 
				+@@ -2264,27 +2258,16 @@ VARIABLES = {
			
 
				+         By default, the build will use the toolchain assembler, $(AS), to
			
 
				+         assemble source files in assembly language (.s or .asm files). Setting
			
 
				+         this value to ``True`` will cause it to use nasm instead.
			
 
				+ 
			
 
				+         If nasm is not available on this system, or does not support the
			
 
				+         current target architecture, an error will be raised.
			
 
				+         """),
			
 
				+ 
			
 
				+-    'USE_YASM': (bool, bool,
			
 
				+-                 """Use the yasm assembler to assemble assembly files from SOURCES.
			
 
				+-
			
 
				+-        By default, the build will use the toolchain assembler, $(AS), to
			
 
				+-        assemble source files in assembly language (.s or .asm files). Setting
			
 
				+-        this value to ``True`` will cause it to use yasm instead.
			
 
				+-
			
 
				+-        If yasm is not available on this system, or does not support the
			
 
				+-        current target architecture, an error will be raised.
			
 
				+-        """),
			
 
				+-
			
 
				+     'USE_INTEGRATED_CLANGCL_AS': (bool, bool,
			
 
				+         """Use the integrated clang-cl assembler to assemble assembly files from SOURCES.
			
 
				+ 
			
 
				+         This allows using clang-cl to assemble assembly files which is useful
			
 
				+         on platforms like aarch64 where the alternative is to have to run a
			
 
				+         pre-processor to generate files with suitable syntax.
			
 
				+         """),
			
 
				+ }
			
 
				+diff --git a/python/mozbuild/mozbuild/frontend/emitter.py b/python/mozbuild/mozbuild/frontend/emitter.py
			
 
				+--- a/python/mozbuild/mozbuild/frontend/emitter.py
			
 
				++++ b/python/mozbuild/mozbuild/frontend/emitter.py
			
 
				+@@ -1307,26 +1307,16 @@ class TreeMetadataEmitter(LoggingMixin):
			
 
				+             yield obj
			
 
				+ 
			
 
				+         for obj in self._process_jar_manifests(context):
			
 
				+             yield obj
			
 
				+ 
			
 
				+         computed_as_flags.resolve_flags('MOZBUILD',
			
 
				+                                         context.get('ASFLAGS'))
			
 
				+ 
			
 
				+-        if context.get('USE_YASM') is True:
			
 
				+-            yasm = context.config.substs.get('YASM')
			
 
				+-            if not yasm:
			
 
				+-                raise SandboxValidationError('yasm is not available', context)
			
 
				+-            passthru.variables['AS'] = yasm
			
 
				+-            passthru.variables['AS_DASH_C_FLAG'] = ''
			
 
				+-            passthru.variables['ASOUTOPTION'] = '-o '
			
 
				+-            computed_as_flags.resolve_flags('OS',
			
 
				+-                                            context.config.substs.get('YASM_ASFLAGS', []))
			
 
				+-
			
 
				+         if context.get('USE_NASM') is True:
			
 
				+             nasm = context.config.substs.get('NASM')
			
 
				+             if not nasm:
			
 
				+                 raise SandboxValidationError('nasm is not available', context)
			
 
				+             passthru.variables['AS'] = nasm
			
 
				+             passthru.variables['AS_DASH_C_FLAG'] = ''
			
 
				+             passthru.variables['ASOUTOPTION'] = '-o '
			
 
				+             computed_as_flags.resolve_flags('OS',
			
 
				+diff --git a/toolkit/moz.configure b/toolkit/moz.configure
			
 
				+--- a/toolkit/moz.configure
			
 
				++++ b/toolkit/moz.configure
			
 
				+@@ -1468,41 +1468,16 @@ with only_when(compile_environment):
			
 
				+     set_config('MOZ_FFVPX', True, when=ffvpx.enable)
			
 
				+     set_define('MOZ_FFVPX', True, when=ffvpx.enable)
			
 
				+     set_config('MOZ_FFVPX_FLACONLY', True, when=ffvpx.flac_only)
			
 
				+     set_define('MOZ_FFVPX_FLACONLY', True, when=ffvpx.flac_only)
			
 
				+     set_config('FFVPX_ASFLAGS', ffvpx.flags)
			
 
				+     set_config("FFVPX_USE_NASM", True, when=ffvpx.use_nasm)
			
 
				+ 
			
 
				+ 
			
 
				+-@depends(yasm_version,
			
 
				+-)
			
 
				+-@imports(_from='__builtin__', _import='sorted')
			
 
				+-def valid_yasm_version(yasm_version):
			
 
				+-    requires = {
			
 
				+-    }
			
 
				+-    requires = {k: v for (k, v) in requires.items() if v}
			
 
				+-    if requires and not yasm_version:
			
 
				+-        items = sorted(requires.keys())
			
 
				+-        if len(items) > 1:
			
 
				+-            what = ' and '.join((', '.join(items[:-1]), items[-1]))
			
 
				+-        else:
			
 
				+-            what = items[0]
			
 
				+-        die('Yasm is required to build with %s, but you do not appear to have '
			
 
				+-            'Yasm installed.' % what)
			
 
				+-
			
 
				+-    versioned = {k: v for (k, v) in requires.items() if v is not True}
			
 
				+-    by_version = sorted(versioned.items(), key=lambda x: x[1])
			
 
				+-    if by_version:
			
 
				+-        what, version = by_version[-1]
			
 
				+-        if yasm_version < version:
			
 
				+-            die('Yasm version %s or greater is required to build with %s.'
			
 
				+-                % (version, what))
			
 
				+-
			
 
				+-
			
 
				+ # nasm detection
			
 
				+ # ==============================================================
			
 
				+ @depends(dav1d_nasm, vpx_nasm, jpeg_nasm, ffvpx_nasm)
			
 
				+ def need_nasm(*requirements):
			
 
				+     requires = {
			
 
				+         x.what: x.version if hasattr(x, "version") else True for x in requirements if x
			
 
				+     }
			
 
				+     if requires:
			
--- a/mozilla-release/patches/1692940-10no11-88a1.patch
+++ b/mozilla-release/patches/1692940-10no11-88a1.patch
@@ -0,0 +1,397 @@
 
				+# HG changeset patch
			
 
				+# User Mike Hommey <mh+mozilla@glandium.org>
			
 
				+# Date 1614043607 0
			
 
				+#      Tue Feb 23 01:26:47 2021 +0000
			
 
				+# Node ID 2310bd4635a257928870e576995dd76151f2983e
			
 
				+# Parent  aa2eda8a84e69b50cd81a13ea5e28fd445c9f834
			
 
				+Bug 1692940 - Don't bootstrap yasm. r=firefox-build-system-reviewers,dmajor
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D105599
			
 
				+
			
 
				+diff --git a/python/mozboot/mozboot/archlinux.py b/python/mozboot/mozboot/archlinux.py
			
 
				+--- a/python/mozboot/mozboot/archlinux.py
			
 
				++++ b/python/mozboot/mozboot/archlinux.py
			
 
				+@@ -44,17 +44,16 @@ class ArchlinuxBootstrapper(
			
 
				+         'libvpx',
			
 
				+         'libxt',
			
 
				+         'mime-types',
			
 
				+         'nasm',
			
 
				+         'startup-notification',
			
 
				+         'gst-plugins-base-libs',
			
 
				+         'libpulse',
			
 
				+         'xorg-server-xvfb',
			
 
				+-        'yasm',
			
 
				+         'gst-libav',
			
 
				+         'gst-plugins-good',
			
 
				+     ]
			
 
				+ 
			
 
				+     BROWSER_AUR_PACKAGES = [
			
 
				+         'https://aur.archlinux.org/cgit/aur.git/snapshot/uuid.tar.gz',
			
 
				+     ]
			
 
				+ 
			
 
				+diff --git a/python/mozboot/mozboot/centosfedora.py b/python/mozboot/mozboot/centosfedora.py
			
 
				+--- a/python/mozboot/mozboot/centosfedora.py
			
 
				++++ b/python/mozboot/mozboot/centosfedora.py
			
 
				+@@ -1,16 +1,14 @@
			
 
				+ # This Source Code Form is subject to the terms of the Mozilla Public
			
 
				+ # License, v. 2.0. If a copy of the MPL was not distributed with this file,
			
 
				+ # You can obtain one at http://mozilla.org/MPL/2.0/.
			
 
				+ 
			
 
				+ from __future__ import absolute_import, print_function, unicode_literals
			
 
				+ 
			
 
				+-import platform
			
 
				+-
			
 
				+ from mozboot.base import BaseBootstrapper
			
 
				+ from mozboot.linux_common import LinuxBootstrapper
			
 
				+ 
			
 
				+ 
			
 
				+ class CentOSFedoraBootstrapper(
			
 
				+         LinuxBootstrapper,
			
 
				+         BaseBootstrapper):
			
 
				+     def __init__(self, distro, version, dist_id, **kwargs):
			
 
				+@@ -36,17 +34,16 @@ class CentOSFedoraBootstrapper(
			
 
				+             'alsa-lib-devel',
			
 
				+             'dbus-glib-devel',
			
 
				+             'glibc-static',
			
 
				+             'libstdc++-static',
			
 
				+             'libXt-devel',
			
 
				+             'nasm',
			
 
				+             'pulseaudio-libs-devel',
			
 
				+             'wireless-tools-devel',
			
 
				+-            'yasm',
			
 
				+         ]
			
 
				+ 
			
 
				+         self.mobile_android_packages = [
			
 
				+             'java-1.8.0-openjdk-devel',
			
 
				+             # For downloading the Android SDK and NDK.
			
 
				+             'wget',
			
 
				+         ]
			
 
				+ 
			
 
				+@@ -114,25 +111,16 @@ class CentOSFedoraBootstrapper(
			
 
				+     def install_mobile_android_artifact_mode_packages(self):
			
 
				+         self.ensure_mobile_android_packages(artifact_mode=True)
			
 
				+ 
			
 
				+     def ensure_browser_packages(self, artifact_mode=False):
			
 
				+         # TODO: Figure out what not to install for artifact mode
			
 
				+         self.dnf_groupinstall(*self.browser_group_packages)
			
 
				+         self.dnf_install(*self.browser_packages)
			
 
				+ 
			
 
				+-        if self.distro in ('centos') and self.version == 6:
			
 
				+-            yasm = ('http://dl.fedoraproject.org/pub/epel/6/i386/'
			
 
				+-                    'Packages/y/yasm-1.2.0-1.el6.i686.rpm')
			
 
				+-            if platform.architecture()[0] == '64bit':
			
 
				+-                yasm = ('http://dl.fedoraproject.org/pub/epel/6/x86_64/'
			
 
				+-                        'Packages/y/yasm-1.2.0-1.el6.x86_64.rpm')
			
 
				+-
			
 
				+-            self.run_as_root(['rpm', '-ivh', yasm])
			
 
				+-
			
 
				+     def ensure_mobile_android_packages(self, artifact_mode=False):
			
 
				+         # Install Android specific packages.
			
 
				+         self.dnf_install(*self.mobile_android_packages)
			
 
				+ 
			
 
				+         self.ensure_java()
			
 
				+         from mozboot import android
			
 
				+         android.ensure_android('linux', artifact_mode=artifact_mode,
			
 
				+                                no_interactive=self.no_interactive)
			
 
				+diff --git a/python/mozboot/mozboot/debian.py b/python/mozboot/mozboot/debian.py
			
 
				+--- a/python/mozboot/mozboot/debian.py
			
 
				++++ b/python/mozboot/mozboot/debian.py
			
 
				+@@ -60,17 +60,16 @@ class DebianBootstrapper(
			
 
				+         'libdbus-1-dev',
			
 
				+         'libdbus-glib-1-dev',
			
 
				+         'libgtk-3-dev',
			
 
				+         'libpulse-dev',
			
 
				+         'libx11-xcb-dev',
			
 
				+         'libxt-dev',
			
 
				+         'python-dbus',
			
 
				+         'xvfb',
			
 
				+-        'yasm',
			
 
				+     ]
			
 
				+ 
			
 
				+     # Subclasses can add packages to this variable to have them installed.
			
 
				+     BROWSER_DISTRO_PACKAGES = []
			
 
				+ 
			
 
				+     # These are common packages for building Firefox for Android
			
 
				+     # (mobile/android) for all Debian-derived distros (such as Ubuntu).
			
 
				+     MOBILE_ANDROID_COMMON_PACKAGES = [
			
 
				+diff --git a/python/mozboot/mozboot/freebsd.py b/python/mozboot/mozboot/freebsd.py
			
 
				+--- a/python/mozboot/mozboot/freebsd.py
			
 
				++++ b/python/mozboot/mozboot/freebsd.py
			
 
				+@@ -27,17 +27,16 @@ class FreeBSDBootstrapper(BaseBootstrapp
			
 
				+         self.browser_packages = [
			
 
				+             'dbus-glib',
			
 
				+             'gtk3',
			
 
				+             'libXt',
			
 
				+             'mesa-dri',  # depends on llvm*
			
 
				+             'nasm',
			
 
				+             'pulseaudio',
			
 
				+             'v4l_compat',
			
 
				+-            'yasm',
			
 
				+         ]
			
 
				+ 
			
 
				+         if not self.which('as'):
			
 
				+             self.packages.append('binutils')
			
 
				+ 
			
 
				+         if not self.which('unzip'):
			
 
				+             self.packages.append('unzip')
			
 
				+ 
			
 
				+diff --git a/python/mozboot/mozboot/gentoo.py b/python/mozboot/mozboot/gentoo.py
			
 
				+--- a/python/mozboot/mozboot/gentoo.py
			
 
				++++ b/python/mozboot/mozboot/gentoo.py
			
 
				+@@ -38,17 +38,16 @@ class GentooBootstrapper(
			
 
				+                           'app-arch/zip',
			
 
				+                           'sys-devel/autoconf:2.1'
			
 
				+                           ])
			
 
				+ 
			
 
				+     def ensure_browser_packages(self, artifact_mode=False):
			
 
				+         # TODO: Figure out what not to install for artifact mode
			
 
				+         self.run_as_root(['emerge',
			
 
				+                           '--oneshot', '--noreplace', '--quiet', '--newuse',
			
 
				+-                          'dev-lang/yasm',
			
 
				+                           'dev-libs/dbus-glib',
			
 
				+                           'media-sound/pulseaudio',
			
 
				+                           'x11-libs/gtk+:3',
			
 
				+                           'x11-libs/libXt'
			
 
				+                           ])
			
 
				+ 
			
 
				+     def ensure_mobile_android_packages(self, artifact_mode=False):
			
 
				+         self.run_as_root(['emerge', '--noreplace', '--quiet',
			
 
				+diff --git a/python/mozboot/mozboot/gentoo.py.1672894.later b/python/mozboot/mozboot/gentoo.py.1672894.later
			
 
				+--- a/python/mozboot/mozboot/gentoo.py.1672894.later
			
 
				++++ b/python/mozboot/mozboot/gentoo.py.1672894.later
			
 
				+@@ -11,10 +11,9 @@
			
 
				+                            'app-arch/zip',
			
 
				+ -                          'sys-devel/autoconf:2.1'
			
 
				+                            ])
			
 
				+  
			
 
				+      def ensure_browser_packages(self, artifact_mode=False):
			
 
				+          # TODO: Figure out what not to install for artifact mode
			
 
				+          self.run_as_root(['emerge',
			
 
				+                            '--oneshot', '--noreplace', '--quiet', '--newuse',
			
 
				+-                           'dev-lang/yasm',
			
 
				+                            'dev-libs/dbus-glib',
			
 
				+diff --git a/python/mozboot/mozboot/openbsd.py b/python/mozboot/mozboot/openbsd.py
			
 
				+--- a/python/mozboot/mozboot/openbsd.py
			
 
				++++ b/python/mozboot/mozboot/openbsd.py
			
 
				+@@ -18,17 +18,16 @@ class OpenBSDBootstrapper(BaseBootstrapp
			
 
				+             'wget',
			
 
				+             'unzip',
			
 
				+             'zip',
			
 
				+         ]
			
 
				+ 
			
 
				+         self.browser_packages = [
			
 
				+             'llvm',
			
 
				+             'nasm',
			
 
				+-            'yasm',
			
 
				+             'gtk+3',
			
 
				+             'dbus-glib',
			
 
				+             'pulseaudio',
			
 
				+         ]
			
 
				+ 
			
 
				+     def install_system_packages(self):
			
 
				+         # we use -z because there's no other way to say "any autoconf-2.13"
			
 
				+         self.run_as_root(['pkg_add', '-z'] + self.packages)
			
 
				+diff --git a/python/mozboot/mozboot/opensuse.py b/python/mozboot/mozboot/opensuse.py
			
 
				+--- a/python/mozboot/mozboot/opensuse.py
			
 
				++++ b/python/mozboot/mozboot/opensuse.py
			
 
				+@@ -30,17 +30,16 @@ class OpenSUSEBootstrapper(
			
 
				+         'gtk3-devel',
			
 
				+         'dbus-1-glib-devel',
			
 
				+         'gconf2-devel',
			
 
				+         'glibc-devel-static',
			
 
				+         'libstdc++-devel',
			
 
				+         'libXt-devel',
			
 
				+         'libproxy-devel',
			
 
				+         'libuuid-devel',
			
 
				+-        'yasm',
			
 
				+         'clang-devel',
			
 
				+         'patterns-gnome-devel_gnome',
			
 
				+     ]
			
 
				+ 
			
 
				+     BROWSER_GROUP_PACKAGES = [
			
 
				+         'devel_C_C++',
			
 
				+         'devel_gnome',
			
 
				+     ]
			
 
				+diff --git a/python/mozboot/mozboot/osx.py b/python/mozboot/mozboot/osx.py
			
 
				+--- a/python/mozboot/mozboot/osx.py
			
 
				++++ b/python/mozboot/mozboot/osx.py
			
 
				+@@ -344,17 +344,16 @@ class OSXBootstrapper(BaseBootstrapper):
			
 
				+             'watchman',
			
 
				+         ]
			
 
				+         self._ensure_homebrew_packages(packages)
			
 
				+ 
			
 
				+     def ensure_homebrew_browser_packages(self, artifact_mode=False):
			
 
				+         # TODO: Figure out what not to install for artifact mode
			
 
				+         packages = [
			
 
				+             'nasm',
			
 
				+-            'yasm',
			
 
				+         ]
			
 
				+         self._ensure_homebrew_packages(packages)
			
 
				+ 
			
 
				+     def ensure_homebrew_mobile_android_packages(self, artifact_mode=False):
			
 
				+         # Multi-part process:
			
 
				+         # 1. System packages.
			
 
				+         # 2. Android SDK. Android NDK only if we are not in artifact mode. Android packages.
			
 
				+ 
			
 
				+@@ -423,17 +422,16 @@ class OSXBootstrapper(BaseBootstrapper):
			
 
				+             self.run_as_root([self.port, 'select', '--set', 'python', 'python27'])
			
 
				+         else:
			
 
				+             print('The right python version is already active.')
			
 
				+ 
			
 
				+     def ensure_macports_browser_packages(self, artifact_mode=False):
			
 
				+         # TODO: Figure out what not to install for artifact mode
			
 
				+         packages = [
			
 
				+             'nasm',
			
 
				+-            'yasm',
			
 
				+             'llvm-7.0',
			
 
				+             'clang-7.0',
			
 
				+         ]
			
 
				+ 
			
 
				+         self._ensure_macports_packages(packages)
			
 
				+ 
			
 
				+     def ensure_macports_mobile_android_packages(self, artifact_mode=False):
			
 
				+         # Multi-part process:
			
 
				+diff --git a/python/mozboot/mozboot/osx.py.1692940-10.later b/python/mozboot/mozboot/osx.py.1692940-10.later
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/python/mozboot/mozboot/osx.py.1692940-10.later
			
 
				+@@ -0,0 +1,76 @@
			
 
				++--- osx.py
			
 
				+++++ osx.py
			
 
				++@@ -207,22 +207,20 @@ class OSXBootstrapper(BaseBootstrapper):
			
 
				++         if not hg_modern:
			
 
				++             print(
			
 
				++                 "Mercurial wasn't found or is not sufficiently modern. "
			
 
				++                 "It will be installed with %s" % self.package_manager
			
 
				++             )
			
 
				++         getattr(self, "ensure_%s_system_packages" % self.package_manager)(not hg_modern)
			
 
				++ 
			
 
				++     def install_browser_packages(self, mozconfig_builder):
			
 
				++-        getattr(self, "ensure_%s_browser_packages" % self.package_manager)()
			
 
				+++        pass
			
 
				++ 
			
 
				++     def install_browser_artifact_mode_packages(self, mozconfig_builder):
			
 
				++-        getattr(self, "ensure_%s_browser_packages" % self.package_manager)(
			
 
				++-            artifact_mode=True
			
 
				++-        )
			
 
				+++        pass
			
 
				++ 
			
 
				++     def install_mobile_android_packages(self, mozconfig_builder):
			
 
				++         getattr(self, "ensure_%s_mobile_android_packages" % self.package_manager)(
			
 
				++             mozconfig_builder
			
 
				++         )
			
 
				++ 
			
 
				++     def install_mobile_android_artifact_mode_packages(self, mozconfig_builder):
			
 
				++         getattr(self, "ensure_%s_mobile_android_packages" % self.package_manager)(
			
 
				++@@ -384,23 +382,16 @@ class OSXBootstrapper(BaseBootstrapper):
			
 
				++             "gnu-tar",
			
 
				++             "terminal-notifier",
			
 
				++             "watchman",
			
 
				++         ]
			
 
				++         if install_mercurial:
			
 
				++             packages.append("mercurial")
			
 
				++         self._ensure_homebrew_packages(packages)
			
 
				++ 
			
 
				++-    def ensure_homebrew_browser_packages(self, artifact_mode=False):
			
 
				++-        # TODO: Figure out what not to install for artifact mode
			
 
				++-        packages = [
			
 
				++-            "yasm",
			
 
				++-        ]
			
 
				++-        self._ensure_homebrew_packages(packages)
			
 
				++-
			
 
				++     def ensure_homebrew_mobile_android_packages(
			
 
				++         self, mozconfig_builder, artifact_mode=False
			
 
				++     ):
			
 
				++         # Multi-part process:
			
 
				++         # 1. System packages.
			
 
				++         # 2. Android SDK. Android NDK only if we are not in artifact mode. Android packages.
			
 
				++ 
			
 
				++         # 1. System packages.
			
 
				++@@ -462,24 +453,16 @@ class OSXBootstrapper(BaseBootstrapper):
			
 
				++         for python in pythons:
			
 
				++             if "active" in python:
			
 
				++                 active = python
			
 
				++         if "python27" not in active:
			
 
				++             self.run_as_root([self.port, "select", "--set", "python", "python27"])
			
 
				++         else:
			
 
				++             print("The right python version is already active.")
			
 
				++ 
			
 
				++-    def ensure_macports_browser_packages(self, artifact_mode=False):
			
 
				++-        # TODO: Figure out what not to install for artifact mode
			
 
				++-        packages = [
			
 
				++-            "yasm",
			
 
				++-        ]
			
 
				++-
			
 
				++-        self._ensure_macports_packages(packages)
			
 
				++-
			
 
				++     def ensure_macports_mobile_android_packages(
			
 
				++         self, mozconfig_builder, artifact_mode=False
			
 
				++     ):
			
 
				++         # Multi-part process:
			
 
				++         # 1. System packages.
			
 
				++         # 2. Android SDK. Android NDK only if we are not in artifact mode. Android packages.
			
 
				++ 
			
 
				++         # 1. System packages.
			
 
				+diff --git a/python/mozboot/mozboot/solus.py b/python/mozboot/mozboot/solus.py
			
 
				+--- a/python/mozboot/mozboot/solus.py
			
 
				++++ b/python/mozboot/mozboot/solus.py
			
 
				+@@ -41,17 +41,16 @@ class SolusBootstrapper(
			
 
				+         'libvpx',
			
 
				+         'libxt',
			
 
				+         'nasm',
			
 
				+         'libstartup-notification',
			
 
				+         'gst-plugins-base',
			
 
				+         'gst-plugins-good',
			
 
				+         'pulseaudio',
			
 
				+         'xorg-server-xvfb',
			
 
				+-        'yasm',
			
 
				+     ]
			
 
				+ 
			
 
				+     MOBILE_ANDROID_COMMON_PACKAGES = [
			
 
				+         'openjdk-8',
			
 
				+         # For downloading the Android SDK and NDK.
			
 
				+         'wget',
			
 
				+         # See comment about 32 bit binaries and multilib below.
			
 
				+         'ncurses-32bit',
			
 
				+diff --git a/python/mozboot/mozboot/void.py b/python/mozboot/mozboot/void.py
			
 
				+--- a/python/mozboot/mozboot/void.py
			
 
				++++ b/python/mozboot/mozboot/void.py
			
 
				+@@ -30,17 +30,16 @@ class VoidBootstrapper(
			
 
				+         'dbus-devel',
			
 
				+         'dbus-glib-devel',
			
 
				+         'gtk+3-devel',
			
 
				+         'pulseaudio',
			
 
				+         'pulseaudio-devel',
			
 
				+         'libcurl-devel',
			
 
				+         'libxcb-devel',
			
 
				+         'libXt-devel',
			
 
				+-        'yasm',
			
 
				+     ]
			
 
				+ 
			
 
				+     MOBILE_ANDROID_PACKAGES = [
			
 
				+         'openjdk8',  # Android's `sdkmanager` requires Java 1.8 exactly.
			
 
				+         'wget',  # For downloading the Android SDK and NDK.
			
 
				+     ]
			
 
				+ 
			
 
				+     def __init__(self, version, dist_id, **kwargs):
			
 
				+diff --git a/python/mozboot/mozboot/windows.py b/python/mozboot/mozboot/windows.py
			
 
				+--- a/python/mozboot/mozboot/windows.py
			
 
				++++ b/python/mozboot/mozboot/windows.py
			
 
				+@@ -52,17 +52,16 @@ class WindowsBootstrapper(BaseBootstrapp
			
 
				+         'zip',
			
 
				+         'unzip',
			
 
				+         'mingw-w64-x86_64-toolchain',  # TODO: Remove when Mercurial is installable from a wheel.
			
 
				+         'mingw-w64-i686-toolchain'
			
 
				+     ]
			
 
				+ 
			
 
				+     BROWSER_PACKAGES = [
			
 
				+         'mingw-w64-x86_64-nasm',
			
 
				+-        'mingw-w64-x86_64-yasm',
			
 
				+         'mingw-w64-i686-nsis'
			
 
				+     ]
			
 
				+ 
			
 
				+     MOBILE_ANDROID_COMMON_PACKAGES = [
			
 
				+         'wget'
			
 
				+     ]
			
 
				+ 
			
 
				+     def __init__(self, **kwargs):
			
--- a/mozilla-release/patches/1692940-12-88a1.patch
+++ b/mozilla-release/patches/1692940-12-88a1.patch
@@ -0,0 +1,32 @@
 
				+# HG changeset patch
			
 
				+# User Mihai Alexandru Michis <malexandru@mozilla.com>
			
 
				+# Date 1614049543 -7200
			
 
				+#      Tue Feb 23 05:05:43 2021 +0200
			
 
				+# Node ID 07420cd0943250559d722febc1bee339929ccd24
			
 
				+# Parent  263520df233676201b8e5ba481a2e9a24e26f083
			
 
				+Bug 1692940 - Fix bustages on Win 2012 AArch64. a=bustage-fix
			
 
				+
			
 
				+CLOSED TREE
			
 
				+
			
 
				+diff --git a/toolkit/moz.configure b/toolkit/moz.configure
			
 
				+--- a/toolkit/moz.configure
			
 
				++++ b/toolkit/moz.configure
			
 
				+@@ -1456,17 +1456,17 @@ with only_when(compile_environment):
			
 
				+     def ffvpx_nasm():
			
 
				+         # nasm 2.10 for AVX-2 support.
			
 
				+         return namespace(version="2.10", what="FFVPX")
			
 
				+ 
			
 
				+     # ffvpx_nasm can't indirectly depend on vpx_as_flags, because it depends
			
 
				+     # on a compiler test, so we have to do a little bit of dance here.
			
 
				+     @depends(ffvpx, vpx_as_flags, target)
			
 
				+     def ffvpx(ffvpx, vpx_as_flags, target):
			
 
				+-        if ffvpx and target.cpu in ("arm", "aarch64"):
			
 
				++        if ffvpx and vpx_as_flags and target.cpu in ("arm", "aarch64"):
			
 
				+             ffvpx.flags.extend(vpx_as_flags)
			
 
				+         return ffvpx
			
 
				+ 
			
 
				+     set_config('MOZ_FFVPX', True, when=ffvpx.enable)
			
 
				+     set_define('MOZ_FFVPX', True, when=ffvpx.enable)
			
 
				+     set_config('MOZ_FFVPX_FLACONLY', True, when=ffvpx.flac_only)
			
 
				+     set_define('MOZ_FFVPX_FLACONLY', True, when=ffvpx.flac_only)
			
 
				+     set_config('FFVPX_ASFLAGS', ffvpx.flags)
			
--- a/mozilla-release/patches/1692945-1-87a1.patch
+++ b/mozilla-release/patches/1692945-1-87a1.patch
@@ -0,0 +1,33 @@
 
				+# HG changeset patch
			
 
				+# User Mike Hommey <mh+mozilla@glandium.org>
			
 
				+# Date 1613510225 0
			
 
				+# Node ID 36fd576c711b0d343d59acdfbcb9adf9b1714f91
			
 
				+# Parent  bb9883884262859c95df32b8d1e0ed62ca38bf11
			
 
				+Bug 1692945 - Remove check for yasm/gas in js. r=firefox-build-system-reviewers,andi,dmajor
			
 
				+
			
 
				+Building the ICU data file hasn't required yasm since bug 1650299, and
			
 
				+all the compilers we support now have a GAS-like assembler that we use
			
 
				+unconditionally.
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D105268
			
 
				+
			
 
				+diff --git a/js/moz.configure b/js/moz.configure
			
 
				+--- a/js/moz.configure
			
 
				++++ b/js/moz.configure
			
 
				+@@ -523,16 +523,8 @@ def icu_version(build_env):
			
 
				+ set_config('MOZ_ICU_VERSION', icu_version)
			
 
				+ 
			
 
				+ # Source files that use ICU should have control over which parts of the ICU
			
 
				+ # namespace they want to use.
			
 
				+ set_define('U_USING_ICU_NAMESPACE', '0', when='--with-intl-api')
			
 
				+ 
			
 
				+ # We build ICU as a static library.
			
 
				+ set_define('U_STATIC_IMPLEMENTATION', True, when=depends(system_icu)(lambda x: not x))
			
 
				+-
			
 
				+-@depends(yasm, gnu_as, target, compile_environment)
			
 
				+-def can_build_data_file(yasm, gnu_as, target, compile_environment):
			
 
				+-    if not compile_environment or (target.kernel == 'WINNT' and target.cpu == 'aarch64'):
			
 
				+-        return
			
 
				+-    if not yasm and not gnu_as:
			
 
				+-        die('Building ICU requires either yasm or a GNU assembler. If you do not have '
			
 
				+-            'either of those available for this platform you must use --without-intl-api')
			
--- a/mozilla-release/patches/1692945-2-87a1.patch
+++ b/mozilla-release/patches/1692945-2-87a1.patch
@@ -0,0 +1,80 @@
 
				+# HG changeset patch
			
 
				+# User Mike Hommey <mh+mozilla@glandium.org>
			
 
				+# Date 1613510226 0
			
 
				+# Node ID 44e6140070a0b4088e1ca8017611af569e57c6d7
			
 
				+# Parent  4e0e7d9feca0f1152637d1add13c3d145d5eca46
			
 
				+Bug 1692945 - Remove unused [YN]ASM variables. r=firefox-build-system-reviewers,andi,dmajor
			
 
				+
			
 
				+None of HAVE_NASM, HAVE_YASM, NASM_MAJOR_VERSION and NASM_MINOR_VERSION are
			
 
				+used. Also, the YASM variable is not necessary for old-configure anymore.
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D105269
			
 
				+
			
 
				+diff --git a/build/moz.configure/toolchain.configure b/build/moz.configure/toolchain.configure
			
 
				+--- a/build/moz.configure/toolchain.configure
			
 
				++++ b/build/moz.configure/toolchain.configure
			
 
				+@@ -2165,30 +2165,16 @@ def nasm_version(nasm):
			
 
				+     if retcode:
			
 
				+         # mac stub binary
			
 
				+         return None
			
 
				+ 
			
 
				+     version = stdout.splitlines()[0].split()[2]
			
 
				+     return Version(version)
			
 
				+ 
			
 
				+ 
			
 
				+-@depends_if(nasm_version)
			
 
				+-def nasm_major_version(nasm_version):
			
 
				+-    return str(nasm_version.major)
			
 
				+-
			
 
				+-
			
 
				+-@depends_if(nasm_version)
			
 
				+-def nasm_minor_version(nasm_version):
			
 
				+-    return str(nasm_version.minor)
			
 
				+-
			
 
				+-
			
 
				+-set_config('NASM_MAJOR_VERSION', nasm_major_version)
			
 
				+-set_config('NASM_MINOR_VERSION', nasm_minor_version)
			
 
				+-
			
 
				+-
			
 
				+ @depends(nasm, target)
			
 
				+ def nasm_asflags(nasm, target):
			
 
				+     if nasm:
			
 
				+         asflags = {
			
 
				+             ('OSX', 'x86'): ['-f', 'macho32'],
			
 
				+             ('OSX', 'x86_64'): ['-f', 'macho64'],
			
 
				+             ('WINNT', 'x86'): ['-f', 'win32'],
			
 
				+             ('WINNT', 'x86_64'): ['-f', 'win64'],
			
 
				+@@ -2200,33 +2186,16 @@ def nasm_asflags(nasm, target):
			
 
				+                 asflags = ['-f', 'elf32']
			
 
				+             elif target.cpu == 'x86_64':
			
 
				+                 asflags = ['-f', 'elf64']
			
 
				+         return asflags
			
 
				+ 
			
 
				+ 
			
 
				+ set_config('NASM_ASFLAGS', nasm_asflags)
			
 
				+ 
			
 
				+-@depends(nasm_asflags)
			
 
				+-def have_nasm(value):
			
 
				+-    if value:
			
 
				+-        return True
			
 
				+-
			
 
				+-
			
 
				+-@depends(yasm_asflags)
			
 
				+-def have_yasm(yasm_asflags):
			
 
				+-    if yasm_asflags:
			
 
				+-        return True
			
 
				+-
			
 
				+-set_config('HAVE_NASM', have_nasm)
			
 
				+-
			
 
				+-set_config('HAVE_YASM', have_yasm)
			
 
				+-# Until the YASM variable is not necessary in old-configure.
			
 
				+-add_old_configure_assignment('YASM', have_yasm)
			
 
				+-
			
 
				+ 
			
 
				+ # clang-cl integrated assembler support
			
 
				+ # ==============================================================
			
 
				+ @depends(target)
			
 
				+ def clangcl_asflags(target):
			
 
				+     asflags = None
			
 
				+     if target.os == 'WINNT' and target.cpu == 'aarch64':
			
 
				+         asflags = ['--target=aarch64-windows-msvc']
			
--- a/mozilla-release/patches/1693215-1-88a1.patch
+++ b/mozilla-release/patches/1693215-1-88a1.patch
@@ -0,0 +1,122 @@
 
				+# HG changeset patch
			
 
				+# User Mike Hommey <mh+mozilla@glandium.org>
			
 
				+# Date 1614031885 0
			
 
				+# Node ID cda1a9bb647e59bfb0c10821daa02c9f1728ed5a
			
 
				+# Parent  495ef3b30ea9fbd85f2c69fe0970c6cc7a817935
			
 
				+Bug 1693215 - Always depend on yasm >= 1.2 for ffvpx. r=firefox-build-system-reviewers,dmajor
			
 
				+
			
 
				+Yasm 1.2 was released in October 2011. Let's just assume everyone can
			
 
				+use that now.
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D105398
			
 
				+
			
 
				+diff --git a/media/ffvpx/ffvpxcommon.mozbuild b/media/ffvpx/ffvpxcommon.mozbuild
			
 
				+--- a/media/ffvpx/ffvpxcommon.mozbuild
			
 
				++++ b/media/ffvpx/ffvpxcommon.mozbuild
			
 
				+@@ -15,20 +15,16 @@ if CONFIG['FFVPX_ASFLAGS']:
			
 
				+     if CONFIG['FFVPX_USE_YASM']:
			
 
				+         USE_YASM = True
			
 
				+ 
			
 
				+     if CONFIG['OS_ARCH'] == 'WINNT':
			
 
				+        # Fix inline symbols and math defines for windows.
			
 
				+         DEFINES['_USE_MATH_DEFINES'] = True
			
 
				+         DEFINES['inline'] = "__inline"
			
 
				+ 
			
 
				+-    if USE_YASM and not CONFIG['YASM_HAS_AVX2']:
			
 
				+-        DEFINES['YASM_MISSING_AVX2'] = True
			
 
				+-
			
 
				+-
			
 
				+ LOCAL_INCLUDES += ['/media/ffvpx']
			
 
				+ 
			
 
				+ # We allow warnings for third-party code that can be updated from upstream.
			
 
				+ AllowCompilerWarnings()
			
 
				+ 
			
 
				+ # Suppress warnings in third-party code.
			
 
				+ if CONFIG['CC_TYPE'] in ('clang', 'clang-cl', 'gcc'):
			
 
				+     CFLAGS += [
			
 
				+diff --git a/toolkit/moz.configure b/toolkit/moz.configure
			
 
				+--- a/toolkit/moz.configure
			
 
				++++ b/toolkit/moz.configure
			
 
				+@@ -1448,35 +1448,27 @@ with only_when(compile_environment):
			
 
				+                 flags = ['-D__x86_64__', '-DPIC', '-DELF']
			
 
				+         if flags:
			
 
				+             if target.kernel == 'Linux' and target.os == 'GNU':
			
 
				+                 need_yasm = Version('1.0.1')
			
 
				+             else:
			
 
				+                 need_yasm = Version('1.1')
			
 
				+             return namespace(flags=flags, need_yasm=need_yasm)
			
 
				+ 
			
 
				+-
			
 
				+     set_config('MOZ_LIBAV_FFT', depends(when=libav_fft)(lambda: True))
			
 
				+     set_define('MOZ_LIBAV_FFT', depends(when=libav_fft)(lambda: True))
			
 
				+     set_config('LIBAV_FFT_ASFLAGS', libav_fft.flags)
			
 
				+ 
			
 
				+ 
			
 
				+ # FFmpeg's ffvpx configuration
			
 
				+ # ==============================================================
			
 
				+ with only_when(compile_environment):
			
 
				+-    @depends_if(yasm_version)
			
 
				+-    def yasm_has_avx2(yasm_version):
			
 
				+-        return yasm_version >= '1.2'
			
 
				+ 
			
 
				+-
			
 
				+-    set_config('YASM_HAS_AVX2', yasm_has_avx2)
			
 
				+-
			
 
				+-
			
 
				+-    @depends(yasm_has_avx2, libav_fft, vpx_as_flags, target)
			
 
				+-    def ffvpx(yasm_has_avx2, libav_fft, vpx_as_flags, target):
			
 
				++    @depends(libav_fft, vpx_as_flags, target)
			
 
				++    def ffvpx(libav_fft, vpx_as_flags, target):
			
 
				+         enable = flac_only = use_yasm = False
			
 
				+         flags = []
			
 
				+         if target.cpu in ('x86', 'x86_64') or \
			
 
				+                 target.cpu == 'aarch64' and target.kernel in ('WINNT', 'Darwin'):
			
 
				+             enable = True
			
 
				+             if libav_fft and libav_fft.flags:
			
 
				+                 use_yasm = True
			
 
				+                 flags.extend(libav_fft.flags)
			
 
				+@@ -1500,41 +1492,35 @@ with only_when(compile_environment):
			
 
				+         elif target.cpu in ('arm', 'aarch64') and \
			
 
				+                 target.kernel not in ('WINNT', 'Darwin'):
			
 
				+             enable = flac_only = True
			
 
				+             flags.extend(vpx_as_flags)
			
 
				+ 
			
 
				+         if use_yasm:
			
 
				+             # default disabled components
			
 
				+             flags.append('-Pdefaults_disabled.asm')
			
 
				+-            if not yasm_has_avx2:
			
 
				+-                flags.extend((
			
 
				+-                    '-DHAVE_AVX2=0',
			
 
				+-                    '-DHAVE_AVX2_INTERNAL=0',
			
 
				+-                    '-DHAVE_AVX2_EXTERNAL=0',
			
 
				+-                ))
			
 
				+ 
			
 
				+         return namespace(
			
 
				+             enable=enable,
			
 
				+-            use_yasm=use_yasm,
			
 
				++            need_yasm="1.2" if use_yasm else False,
			
 
				+             flac_only=flac_only,
			
 
				+             flags=flags,
			
 
				+         )
			
 
				+ 
			
 
				+ 
			
 
				+     set_config('MOZ_FFVPX', True, when=ffvpx.enable)
			
 
				+     set_define('MOZ_FFVPX', True, when=ffvpx.enable)
			
 
				+     set_config('MOZ_FFVPX_FLACONLY', True, when=ffvpx.flac_only)
			
 
				+     set_define('MOZ_FFVPX_FLACONLY', True, when=ffvpx.flac_only)
			
 
				+     set_config('FFVPX_ASFLAGS', ffvpx.flags)
			
 
				+-    set_config('FFVPX_USE_YASM', True, when=ffvpx.use_yasm)
			
 
				++    set_config("FFVPX_USE_YASM", True, when=ffvpx.need_yasm)
			
 
				+ 
			
 
				+ 
			
 
				+ @depends(yasm_version, in_tree_vpx.need_yasm, in_tree_jpeg.use_yasm,
			
 
				+-         libav_fft.need_yasm, ffvpx.use_yasm)
			
 
				++         libav_fft.need_yasm, ffvpx.need_yasm)
			
 
				+ @imports(_from='__builtin__', _import='sorted')
			
 
				+ def valid_yasm_version(yasm_version, for_vpx, for_jpeg, for_libav,
			
 
				+                        for_ffvpx=False):
			
 
				+     # Note: the default for for_ffvpx above only matters for unit tests.
			
 
				+     requires = {
			
 
				+         'vpx': for_vpx,
			
 
				+         'jpeg': for_jpeg,
			
 
				+         'libav': for_libav,
			
--- a/mozilla-release/patches/1693215-2-88a1.patch
+++ b/mozilla-release/patches/1693215-2-88a1.patch
@@ -0,0 +1,168 @@
 
				+# HG changeset patch
			
 
				+# User Mike Hommey <mh+mozilla@glandium.org>
			
 
				+# Date 1614031886 0
			
 
				+#      Mon Feb 22 22:11:26 2021 +0000
			
 
				+# Node ID 8680ed398ecf9bf61073ecd1cbf9756ddbdc00e4
			
 
				+# Parent  0227a9e4d949c52b44cc2f5fdf3bc93d575d08f2
			
 
				+Bug 1693215 - Don't check for yasm for libav. r=firefox-build-system-reviewers,dmajor
			
 
				+
			
 
				+Bug 1476231 actually removed libav, so we don't build it, and don't need
			
 
				+neither the yasm check nor the LIBAV_FFT_ASFLAGS variable.
			
 
				+However, we still have checks, both in moz.build and code, for
			
 
				+MOZ_LIBAV_FFT, so we need to keep that.
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D105399
			
 
				+
			
 
				+diff --git a/python/mozbuild/mozbuild/test/configure/test_toolkit_moz_configure.py b/python/mozbuild/mozbuild/test/configure/test_toolkit_moz_configure.py
			
 
				+--- a/python/mozbuild/mozbuild/test/configure/test_toolkit_moz_configure.py
			
 
				++++ b/python/mozbuild/mozbuild/test/configure/test_toolkit_moz_configure.py
			
 
				+@@ -88,78 +88,78 @@ class TestToolkitMozConfigure(BaseConfig
			
 
				+         self.assertEqual(get_value(environ={'MOZ_AUTOMATION': 1}), None)
			
 
				+ 
			
 
				+     def test_valid_yasm_version(self):
			
 
				+         out = StringIO()
			
 
				+         sandbox = self.get_sandbox({}, {}, out=out)
			
 
				+         func = sandbox._depends[sandbox['valid_yasm_version']]._func
			
 
				+ 
			
 
				+         # Missing yasm is not an error when nothing requires it.
			
 
				+-        func(None, False, False, False)
			
 
				++        func(None, False, False)
			
 
				+ 
			
 
				+         # Any version of yasm works when nothing requires it.
			
 
				+-        func(Version('1.0'), False, False, False)
			
 
				++        func(Version('1.0'), False, False)
			
 
				+ 
			
 
				+         # Any version of yasm works when something requires any version.
			
 
				+-        func(Version('1.0'), True, False, False)
			
 
				+-        func(Version('1.0'), True, True, False)
			
 
				+-        func(Version('1.0'), False, True, False)
			
 
				++        func(Version('1.0'), True, False)
			
 
				++        func(Version('1.0'), True, True)
			
 
				++        func(Version('1.0'), False, True)
			
 
				+ 
			
 
				+         # A version of yasm greater than any requirement works.
			
 
				+-        func(Version('1.5'), Version('1.0'), True, False)
			
 
				+-        func(Version('1.5'), True, Version('1.0'), False)
			
 
				+-        func(Version('1.5'), Version('1.1'), Version('1.0'), False)
			
 
				++        func(Version("1.5"), Version("1.0"), True)
			
 
				++        func(Version("1.5"), True, Version("1.0"))
			
 
				++        func(Version("1.5"), Version("1.1"), Version("1.0"))
			
 
				+ 
			
 
				+         out.truncate(0)
			
 
				+         out.seek(0)
			
 
				+         with self.assertRaises(SystemExit):
			
 
				+-            func(None, Version('1.0'), False, False)
			
 
				++            func(None, Version('1.0'), False)
			
 
				+ 
			
 
				+         self.assertEqual(
			
 
				+             out.getvalue(),
			
 
				+             ('ERROR: Yasm is required to build with vpx, but you do not appear '
			
 
				+              'to have Yasm installed.\n'),
			
 
				+         )
			
 
				+ 
			
 
				+         out.truncate(0)
			
 
				+         out.seek(0)
			
 
				+         with self.assertRaises(SystemExit):
			
 
				+-            func(None, Version('1.0'), Version('1.0'), False)
			
 
				++            func(None, Version('1.0'), Version('1.0'))
			
 
				+ 
			
 
				+         self.assertEqual(
			
 
				+             out.getvalue(),
			
 
				+             ('ERROR: Yasm is required to build with jpeg and vpx, but you do not appear '
			
 
				+              'to have Yasm installed.\n'),
			
 
				+         )
			
 
				+ 
			
 
				+         out.truncate(0)
			
 
				+         out.seek(0)
			
 
				+         with self.assertRaises(SystemExit):
			
 
				+-            func(None, Version('1.0'), Version('1.0'), Version('1.0'))
			
 
				++            func(None, Version('1.0'), Version('1.0'))
			
 
				+ 
			
 
				+         self.assertEqual(
			
 
				+             out.getvalue(),
			
 
				+             ('ERROR: Yasm is required to build with jpeg, libav and vpx, but you do not appear '
			
 
				+              'to have Yasm installed.\n'),
			
 
				+         )
			
 
				+ 
			
 
				+         out.truncate(0)
			
 
				+         out.seek(0)
			
 
				+         with self.assertRaises(SystemExit):
			
 
				+-            func(Version('1.0'), Version('1.1'), Version('1.0'), False)
			
 
				++            func(Version('1.0'), Version('1.1'), Version('1.0'))
			
 
				+ 
			
 
				+         self.assertEqual(
			
 
				+             out.getvalue(),
			
 
				+             'ERROR: Yasm version 1.1 or greater is required to build with vpx.\n'
			
 
				+         )
			
 
				+ 
			
 
				+         out.truncate(0)
			
 
				+         out.seek(0)
			
 
				+         with self.assertRaises(SystemExit):
			
 
				+-            func(Version('1.0'), True, Version('1.0.1'), False)
			
 
				++            func(Version('1.0'), True, Version('1.0.1'))
			
 
				+ 
			
 
				+         self.assertEqual(
			
 
				+             out.getvalue(),
			
 
				+             'ERROR: Yasm version 1.0.1 or greater is required to build with jpeg.\n'
			
 
				+         )
			
 
				+ 
			
 
				+ 
			
 
				+ if __name__ == '__main__':
			
 
				+diff --git a/toolkit/moz.configure b/toolkit/moz.configure
			
 
				+--- a/toolkit/moz.configure
			
 
				++++ b/toolkit/moz.configure
			
 
				+@@ -1442,25 +1442,20 @@ with only_when(compile_environment):
			
 
				+         elif target.cpu == 'x86_64':
			
 
				+             if target.kernel == 'Darwin':
			
 
				+                 flags = ['-D__x86_64__', '-DPIC', '-DMACHO']
			
 
				+             elif target.kernel == 'WINNT':
			
 
				+                 flags = ['-D__x86_64__', '-DPIC', '-DWIN64', '-DMSVC']
			
 
				+             else:
			
 
				+                 flags = ['-D__x86_64__', '-DPIC', '-DELF']
			
 
				+         if flags:
			
 
				+-            if target.kernel == 'Linux' and target.os == 'GNU':
			
 
				+-                need_yasm = Version('1.0.1')
			
 
				+-            else:
			
 
				+-                need_yasm = Version('1.1')
			
 
				+-            return namespace(flags=flags, need_yasm=need_yasm)
			
 
				++            return namespace(flags=flags)
			
 
				+ 
			
 
				+     set_config('MOZ_LIBAV_FFT', depends(when=libav_fft)(lambda: True))
			
 
				+     set_define('MOZ_LIBAV_FFT', depends(when=libav_fft)(lambda: True))
			
 
				+-    set_config('LIBAV_FFT_ASFLAGS', libav_fft.flags)
			
 
				+ 
			
 
				+ 
			
 
				+ # FFmpeg's ffvpx configuration
			
 
				+ # ==============================================================
			
 
				+ with only_when(compile_environment):
			
 
				+ 
			
 
				+     @depends(libav_fft, vpx_as_flags, target)
			
 
				+     def ffvpx(libav_fft, vpx_as_flags, target):
			
 
				+@@ -1510,25 +1505,23 @@ with only_when(compile_environment):
			
 
				+     set_define('MOZ_FFVPX', True, when=ffvpx.enable)
			
 
				+     set_config('MOZ_FFVPX_FLACONLY', True, when=ffvpx.flac_only)
			
 
				+     set_define('MOZ_FFVPX_FLACONLY', True, when=ffvpx.flac_only)
			
 
				+     set_config('FFVPX_ASFLAGS', ffvpx.flags)
			
 
				+     set_config("FFVPX_USE_YASM", True, when=ffvpx.need_yasm)
			
 
				+ 
			
 
				+ 
			
 
				+ @depends(yasm_version, in_tree_vpx.need_yasm, in_tree_jpeg.use_yasm,
			
 
				+-         libav_fft.need_yasm, ffvpx.need_yasm)
			
 
				++         ffvpx.need_yasm)
			
 
				+ @imports(_from='__builtin__', _import='sorted')
			
 
				+-def valid_yasm_version(yasm_version, for_vpx, for_jpeg, for_libav,
			
 
				+-                       for_ffvpx=False):
			
 
				++def valid_yasm_version(yasm_version, for_vpx, for_jpeg, for_ffvpx=False):
			
 
				+     # Note: the default for for_ffvpx above only matters for unit tests.
			
 
				+     requires = {
			
 
				+         'vpx': for_vpx,
			
 
				+         'jpeg': for_jpeg,
			
 
				+-        'libav': for_libav,
			
 
				+         'ffvpx': for_ffvpx,
			
 
				+     }
			
 
				+     requires = {k: v for (k, v) in requires.items() if v}
			
 
				+     if requires and not yasm_version:
			
 
				+         items = sorted(requires.keys())
			
 
				+         if len(items) > 1:
			
 
				+             what = ' and '.join((', '.join(items[:-1]), items[-1]))
			
 
				+         else:
			
--- a/mozilla-release/patches/1693215-3-88a1.patch
+++ b/mozilla-release/patches/1693215-3-88a1.patch
@@ -0,0 +1,152 @@
 
				+# HG changeset patch
			
 
				+# User Mike Hommey <mh+mozilla@glandium.org>
			
 
				+# Date 1614031886 0
			
 
				+# Node ID 5d84014a7bff591657ed6cc47c1d8b108121d4a5
			
 
				+# Parent  09c864df41d9750f21bcc6a541303ee03a99902d
			
 
				+Bug 1693215 - Simplify the ffvpx configuration. r=firefox-build-system-reviewers,dmajor
			
 
				+
			
 
				+Since MOZ_LIBAV_FFT doesn't need the flags, we simplify the libav_fft
			
 
				+function to return true in the conditions it used to return flags for,
			
 
				+which is clearer.
			
 
				+We then move all these flags to the ffvpx function, and rearrange the
			
 
				+tests to be less convoluted, and with hope, more readable.
			
 
				+
			
 
				+This has been verified to not change the outcomes on the following
			
 
				+targets:
			
 
				+- i686-pc-linux-gnu
			
 
				+- x86_64-pc-linux-gnu
			
 
				+- aarch64-unknown-linux-gnu
			
 
				+- arm-unknown-linux-gnueabi
			
 
				+- s390x-unknown-linux-gnu
			
 
				+- i686-apple-darwin
			
 
				+- x86_64-apple-darwin
			
 
				+- aarch64-apple-darwin
			
 
				+- i686-pc-mingw32
			
 
				+- x86_64-pc-mingw32
			
 
				+- aarch64-pc-mingw32
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D105400
			
 
				+
			
 
				+diff --git a/toolkit/moz.configure b/toolkit/moz.configure
			
 
				+--- a/toolkit/moz.configure
			
 
				++++ b/toolkit/moz.configure
			
 
				+@@ -1385,75 +1385,75 @@ with only_when(compile_environment):
			
 
				+                 need_yasm = Version('1.1')
			
 
				+ 
			
 
				+         return namespace(flags=flags, use_yasm=use_yasm, need_yasm=need_yasm)
			
 
				+ 
			
 
				+     set_config('LIBJPEG_TURBO_USE_YASM', in_tree_jpeg.use_yasm)
			
 
				+     set_config('LIBJPEG_TURBO_ASFLAGS', in_tree_jpeg.flags)
			
 
				+ 
			
 
				+ 
			
 
				+-# Libav-fft Support
			
 
				++# FFmpeg's ffvpx configuration
			
 
				+ # ==============================================================
			
 
				+ with only_when(compile_environment):
			
 
				+     @depends(target)
			
 
				+     def libav_fft(target):
			
 
				+-        flags = None
			
 
				+-        if target.kernel == 'WINNT' and target.cpu == 'x86':
			
 
				+-            flags = ['-DPIC', '-DWIN32']
			
 
				+-        elif target.kernel == 'WINNT' and target.cpu == 'aarch64':
			
 
				+-            flags = ['-DPIC', '-DWIN64']
			
 
				+-        elif target.cpu == 'x86_64':
			
 
				+-            if target.kernel == 'Darwin':
			
 
				+-                flags = ['-D__x86_64__', '-DPIC', '-DMACHO']
			
 
				+-            elif target.kernel == 'WINNT':
			
 
				+-                flags = ['-D__x86_64__', '-DPIC', '-DWIN64', '-DMSVC']
			
 
				+-            else:
			
 
				+-                flags = ['-D__x86_64__', '-DPIC', '-DELF']
			
 
				+-        if flags:
			
 
				+-            return namespace(flags=flags)
			
 
				++        return target.kernel == "WINNT" or target.cpu == "x86_64"
			
 
				+ 
			
 
				+     set_config('MOZ_LIBAV_FFT', depends(when=libav_fft)(lambda: True))
			
 
				+     set_define('MOZ_LIBAV_FFT', depends(when=libav_fft)(lambda: True))
			
 
				+ 
			
 
				+ 
			
 
				+-# FFmpeg's ffvpx configuration
			
 
				+-# ==============================================================
			
 
				+ with only_when(compile_environment):
			
 
				+ 
			
 
				+-    @depends(libav_fft, vpx_as_flags, target)
			
 
				+-    def ffvpx(libav_fft, vpx_as_flags, target):
			
 
				+-        enable = flac_only = use_yasm = False
			
 
				++    @depends(vpx_as_flags, target)
			
 
				++    def ffvpx(vpx_as_flags, target):
			
 
				++        enable = use_yasm = True
			
 
				++        flac_only = False
			
 
				+         flags = []
			
 
				+-        if target.cpu in ('x86', 'x86_64') or \
			
 
				+-                target.cpu == 'aarch64' and target.kernel in ('WINNT', 'Darwin'):
			
 
				+-            enable = True
			
 
				+-            if libav_fft and libav_fft.flags:
			
 
				+-                use_yasm = True
			
 
				+-                flags.extend(libav_fft.flags)
			
 
				+-                if target.kernel == 'WINNT':
			
 
				+-                    if target.cpu == 'x86':
			
 
				+-                        # 32-bit windows need to prefix symbols with an underscore.
			
 
				+-                        flags.extend(('-DPREFIX', '-Pconfig_win32.asm'))
			
 
				+-                    elif target.cpu == 'aarch64':
			
 
				+-                        use_yasm = False
			
 
				+-                    else:
			
 
				+-                        flags.append('-Pconfig_win64.asm')
			
 
				+-                elif target.kernel == 'Darwin':
			
 
				+-                    # 32/64-bit macosx assemblers need to prefix symbols with an
			
 
				+-                    # underscore.
			
 
				+-                    flags.extend(('-DPREFIX', '-Pconfig_darwin64.asm'))
			
 
				+-                else:
			
 
				+-                    # Default to unix.
			
 
				+-                    flags.append('-Pconfig_unix64.asm')
			
 
				++
			
 
				++        if target.kernel == "WINNT":
			
 
				++            if target.cpu == "x86":
			
 
				++                # 32-bit windows need to prefix symbols with an underscore.
			
 
				++                flags = ["-DPIC", "-DWIN32", "-DPREFIX", "-Pconfig_win32.asm"]
			
 
				++            elif target.cpu == "x86_64":
			
 
				++                flags = [
			
 
				++                    "-D__x86_64__",
			
 
				++                    "-DPIC",
			
 
				++                    "-DWIN64",
			
 
				++                    "-DMSVC",
			
 
				++                    "-Pconfig_win64.asm",
			
 
				++                ]
			
 
				++            elif target.cpu == "aarch64":
			
 
				++                flags = ["-DPIC", "-DWIN64"]
			
 
				++                use_yasm = False
			
 
				++        elif target.kernel == "Darwin":
			
 
				++            if target.cpu == "x86_64":
			
 
				++                # 32/64-bit macosx asemblers need to prefix symbols with an
			
 
				++                # underscore.
			
 
				++                flags = [
			
 
				++                    "-D__x86_64__",
			
 
				++                    "-DPIC",
			
 
				++                    "-DMACHO",
			
 
				++                    "-DPREFIX",
			
 
				++                    "-Pconfig_darwin64.asm",
			
 
				++                ]
			
 
				+             else:
			
 
				+                 flac_only = True
			
 
				+-        elif target.cpu in ('arm', 'aarch64') and \
			
 
				+-                target.kernel not in ('WINNT', 'Darwin'):
			
 
				+-            enable = flac_only = True
			
 
				++        elif target.cpu == "x86_64":
			
 
				++            flags = ["-D__x86_64__", "-DPIC", "-DELF", "-Pconfig_unix64.asm"]
			
 
				++        elif target.cpu == "x86":
			
 
				++            flac_only = True
			
 
				++        elif target.cpu in ("arm", "aarch64"):
			
 
				++            flac_only = True
			
 
				+             flags.extend(vpx_as_flags)
			
 
				++        else:
			
 
				++            enable = False
			
 
				++
			
 
				++        if flac_only or not enable:
			
 
				++            use_yasm = False
			
 
				+ 
			
 
				+         if use_yasm:
			
 
				+             # default disabled components
			
 
				+             flags.append('-Pdefaults_disabled.asm')
			
 
				+ 
			
 
				+         return namespace(
			
 
				+             enable=enable,
			
 
				+             need_yasm="1.2" if use_yasm else False,
			
--- a/mozilla-release/patches/1693498-1-88a1.patch
+++ b/mozilla-release/patches/1693498-1-88a1.patch
@@ -0,0 +1,89 @@
 
				+# HG changeset patch
			
 
				+# User Mike Hommey <mh+mozilla@glandium.org>
			
 
				+# Date 1613684055 0
			
 
				+# Node ID 339e0d88e58b5c20ebe3218d9284d1c3cc5131fa
			
 
				+# Parent  f055ab9e4d98a103f41ec832fd7e42ec548a8804
			
 
				+Bug 1693498 - Remove avoid_avx2 option when building openh264. r=mjf
			
 
				+
			
 
				+Back when it was added, the nasm used for Linux builds was old enough
			
 
				+that it didn't support it. We've been using a version that supports avx2
			
 
				+for a while now.
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D105618
			
 
				+
			
 
				+diff --git a/testing/mozharness/configs/openh264/linux32.py b/testing/mozharness/configs/openh264/linux32.py
			
 
				+--- a/testing/mozharness/configs/openh264/linux32.py
			
 
				++++ b/testing/mozharness/configs/openh264/linux32.py
			
 
				+@@ -13,11 +13,10 @@ config = {
			
 
				+     'exes': {
			
 
				+         'gittool.py': [os.path.join(external_tools_path, 'gittool.py')],
			
 
				+         'tooltool.py': "/builds/tooltool.py",
			
 
				+         'python2.7': "/tools/python27/bin/python2.7",
			
 
				+     },
			
 
				+     'dump_syms_binary': 'dump_syms',
			
 
				+     'arch': 'x86',
			
 
				+     'use_mock': True,
			
 
				+-    'avoid_avx2': True,
			
 
				+     'operating_system': 'linux',
			
 
				+ }
			
 
				+diff --git a/testing/mozharness/configs/openh264/linux64.py b/testing/mozharness/configs/openh264/linux64.py
			
 
				+--- a/testing/mozharness/configs/openh264/linux64.py
			
 
				++++ b/testing/mozharness/configs/openh264/linux64.py
			
 
				+@@ -13,11 +13,10 @@ config = {
			
 
				+     'exes': {
			
 
				+         'gittool.py': [os.path.join(external_tools_path, 'gittool.py')],
			
 
				+         'tooltool.py': "/builds/tooltool.py",
			
 
				+         'python2.7': "/tools/python27/bin/python2.7",
			
 
				+     },
			
 
				+     'dump_syms_binary': 'dump_syms',
			
 
				+     'arch': 'x64',
			
 
				+     'use_mock': True,
			
 
				+-    'avoid_avx2': True,
			
 
				+     'operating_system': 'linux',
			
 
				+ }
			
 
				+diff --git a/testing/mozharness/scripts/openh264_build.py b/testing/mozharness/scripts/openh264_build.py
			
 
				+--- a/testing/mozharness/scripts/openh264_build.py
			
 
				++++ b/testing/mozharness/scripts/openh264_build.py
			
 
				+@@ -77,22 +77,16 @@ class OpenH264Build(MockMixin, TransferM
			
 
				+             "action": "store_true",
			
 
				+             "default": False,
			
 
				+         }],
			
 
				+         [["--use-yasm"], {
			
 
				+             "dest": "use_yasm",
			
 
				+             "help": "use yasm instead of nasm",
			
 
				+             "action": "store_true",
			
 
				+             "default": False,
			
 
				+-        }],
			
 
				+-        [["--avoid-avx2"], {
			
 
				+-            "dest": "avoid_avx2",
			
 
				+-            "help": "Pass HAVE_AVX2='false' through to Make to support older nasm",
			
 
				+-            "action": "store_true",
			
 
				+-            "default": False,
			
 
				+         }]
			
 
				+     ]
			
 
				+ 
			
 
				+     def __init__(self, require_config_file=False, config={},
			
 
				+                  all_actions=all_actions,
			
 
				+                  default_actions=default_actions):
			
 
				+ 
			
 
				+         # Default configuration
			
 
				+@@ -160,19 +154,16 @@ class OpenH264Build(MockMixin, TransferM
			
 
				+         self.fatal("can't determine platform")
			
 
				+ 
			
 
				+     def query_make_params(self):
			
 
				+         dirs = self.query_abs_dirs()
			
 
				+         retval = []
			
 
				+         if self.config['debug_build']:
			
 
				+             retval.append('BUILDTYPE=Debug')
			
 
				+ 
			
 
				+-        if self.config['avoid_avx2']:
			
 
				+-            retval.append('HAVE_AVX2=false')
			
 
				+-
			
 
				+         if self.config['arch'] in ('x64', 'aarch64'):
			
 
				+             retval.append('ENABLE64BIT=Yes')
			
 
				+         else:
			
 
				+             retval.append('ENABLE64BIT=No')
			
 
				+ 
			
 
				+         if "operating_system" in self.config:
			
 
				+             retval.append("OS=%s" % self.config['operating_system'])
			
 
				+             if self.config["operating_system"] == "android":
			
--- a/mozilla-release/patches/1693498-2-88a1.patch
+++ b/mozilla-release/patches/1693498-2-88a1.patch
@@ -0,0 +1,258 @@
 
				+# HG changeset patch
			
 
				+# User Mike Hommey <mh+mozilla@glandium.org>
			
 
				+# Date 1613684056 0
			
 
				+# Node ID 1c5498cdf4c308a63cd6353d4b2498d7ed222024
			
 
				+# Parent  e72c04c64d9fcb9ea87945171d27defd1ede17ec
			
 
				+Bug 1693498 - Build openh264 with nasm on all platforms that need nasm or yasm. r=mjf
			
 
				+
			
 
				+Differential Revision: https://phabricator.services.mozilla.com/D105619
			
 
				+
			
 
				+diff --git a/testing/mozharness/configs/openh264/macosx64-aarch64.py.1693498-2.later b/testing/mozharness/configs/openh264/macosx64-aarch64.py.1693498-2.later
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/testing/mozharness/configs/openh264/macosx64-aarch64.py.1693498-2.later
			
 
				+@@ -0,0 +1,20 @@
			
 
				++--- macosx64-aarch64.py
			
 
				+++++ macosx64-aarch64.py
			
 
				++@@ -14,17 +14,16 @@ external_tools_path = os.path.join(
			
 
				++ 
			
 
				++ config = {
			
 
				++     "exes": {
			
 
				++         "gittool.py": [os.path.join(external_tools_path, "gittool.py")],
			
 
				++         "python2.7": "python2.7",
			
 
				++     },
			
 
				++     "dump_syms_binary": "{}/dump_syms/dump_syms".format(os.environ["MOZ_FETCHES_DIR"]),
			
 
				++     "arch": "aarch64",
			
 
				++-    "use_yasm": True,
			
 
				++     "operating_system": "darwin",
			
 
				++     "partial_env": {
			
 
				++         "CFLAGS": (
			
 
				++             "-target aarch64-apple-darwin -mcpu=apple-a12 "
			
 
				++             "-isysroot {MOZ_FETCHES_DIR}/MacOSX11.0.sdk".format(
			
 
				++                 MOZ_FETCHES_DIR=os.environ["MOZ_FETCHES_DIR"]
			
 
				++             )
			
 
				++         ),
			
 
				+diff --git a/testing/mozharness/configs/openh264/macosx64.py b/testing/mozharness/configs/openh264/macosx64.py
			
 
				+--- a/testing/mozharness/configs/openh264/macosx64.py
			
 
				++++ b/testing/mozharness/configs/openh264/macosx64.py
			
 
				+@@ -12,10 +12,9 @@ config = {
			
 
				+     'tooltool_cache': "/builds/tooltool_cache",
			
 
				+     'exes': {
			
 
				+         'gittool.py': [os.path.join(external_tools_path, 'gittool.py')],
			
 
				+         'tooltool.py': "/builds/tooltool.py",
			
 
				+         'python2.7': "/tools/python27/bin/python2.7",
			
 
				+     },
			
 
				+     'dump_syms_binary': 'dump_syms',
			
 
				+     'arch': 'x64',
			
 
				+-    'use_yasm': True,
			
 
				+ }
			
 
				+diff --git a/testing/mozharness/configs/openh264/macosx64.py.1693498-2.later b/testing/mozharness/configs/openh264/macosx64.py.1693498-2.later
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/testing/mozharness/configs/openh264/macosx64.py.1693498-2.later
			
 
				+@@ -0,0 +1,19 @@
			
 
				++--- macosx64.py
			
 
				++@@ -32,14 +31,16 @@ config = {
			
 
				++         "LDFLAGS": (
			
 
				++             "-target x86_64-apple-darwin "
			
 
				++             "-isysroot {MOZ_FETCHES_DIR}/MacOSX10.11.sdk "
			
 
				++             "-mmacosx-version-min=10.11".format(
			
 
				++                 MOZ_FETCHES_DIR=os.environ["MOZ_FETCHES_DIR"]
			
 
				++             )
			
 
				++         ),
			
 
				++         "PATH": (
			
 
				++-            "{MOZ_FETCHES_DIR}/clang/bin/:{MOZ_FETCHES_DIR}/cctools/bin/:%(PATH)s".format(
			
 
				+++            "{MOZ_FETCHES_DIR}/clang/bin:"
			
 
				+++            "{MOZ_FETCHES_DIR}/cctools/bin:"
			
 
				+++            "{MOZ_FETCHES_DIR}/nasm:%(PATH)s".format(
			
 
				++                 MOZ_FETCHES_DIR=os.environ["MOZ_FETCHES_DIR"]
			
 
				++             )
			
 
				++         ),
			
 
				++     },
			
 
				++ }
			
 
				+diff --git a/testing/mozharness/configs/openh264/win32.py b/testing/mozharness/configs/openh264/win32.py
			
 
				+--- a/testing/mozharness/configs/openh264/win32.py
			
 
				++++ b/testing/mozharness/configs/openh264/win32.py
			
 
				+@@ -13,17 +13,16 @@ config = {
			
 
				+    'tooltool_manifest_file': "win.manifest",
			
 
				+    'exes': {
			
 
				+        'gittool.py': [sys.executable, os.path.join(external_tools_path, 'gittool.py')],
			
 
				+        'python2.7': 'c:\\mozilla-build\\python27\\python2.7.exe',
			
 
				+        'tooltool.py': [sys.executable, "c:\\mozilla-build\\tooltool.py"],
			
 
				+    },
			
 
				+    'dump_syms_binary': 'dump_syms.exe',
			
 
				+    'arch': 'x86',
			
 
				+-   'use_yasm': True,
			
 
				+    'operating_system': 'msvc',
			
 
				+    'partial_env': {
			
 
				+        'PATH': '%s;%s;%s' % (
			
 
				+            '{_VSPATH}/VC/redist/x86/Microsoft.VC140.CRT;{_VSPATH}/VC/redist/x64/Microsoft.VC140.CRT;{_VSPATH}/SDK/Redist/ucrt/DLLs/x86;{_VSPATH}/SDK/Redist/ucrt/DLLs/x64;{_VSPATH}/VC/bin/amd64_x86;{_VSPATH}/VC/bin/amd64;{_VSPATH}/VC/bin;{_VSPATH}/SDK/bin/x86;{_VSPATH}/SDK/bin/x64;{_VSPATH}/DIA SDK/bin'.format(_VSPATH=VSPATH),
			
 
				+            os.environ['PATH'],
			
 
				+            'C:\\mozilla-build\\Git\\bin',
			
 
				+        ),
			
 
				+        'WIN32_REDIST_DIR': '{_VSPATH}/VC/redist/x86/Microsoft.VC140.CRT'.format(_VSPATH=VSPATH),
			
 
				+diff --git a/testing/mozharness/configs/openh264/win32.py.1693498-2.later b/testing/mozharness/configs/openh264/win32.py.1693498-2.later
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/testing/mozharness/configs/openh264/win32.py.1693498-2.later
			
 
				+@@ -0,0 +1,24 @@
			
 
				++--- win32.py
			
 
				+++++ win32.py
			
 
				++@@ -19,20 +19,20 @@ config = {
			
 
				++     "exes": {
			
 
				++         "gittool.py": [sys.executable, os.path.join(external_tools_path, "gittool.py")],
			
 
				++         "python2.7": "c:\\mozilla-build\\python\\python.exe",
			
 
				++     },
			
 
				++     "dump_syms_binary": "{}/dump_syms/dump_syms.exe".format(
			
 
				++         os.environ["MOZ_FETCHES_DIR"]
			
 
				++     ),
			
 
				++     "arch": "x86",
			
 
				++-    "use_yasm": True,
			
 
				++     "partial_env": {
			
 
				++         "PATH": (
			
 
				++             "{MOZ_FETCHES_DIR}\\clang\\bin\\;"
			
 
				+++            "{MOZ_FETCHES_DIR}\\nasm;"
			
 
				++             "{_VSPATH}\\VC\\bin\\Hostx64\\x64;%(PATH)s"
			
 
				++             # 32-bit redist here for our dump_syms.exe
			
 
				++             "{_VSPATH}/VC/redist/x86/Microsoft.VC141.CRT;"
			
 
				++             "{_VSPATH}/SDK/Redist/ucrt/DLLs/x86;"
			
 
				++             "{_VSPATH}/DIA SDK/bin"
			
 
				++         ).format(_VSPATH=VSPATH, MOZ_FETCHES_DIR=os.environ["MOZ_FETCHES_DIR"]),
			
 
				++         "INCLUDES": (
			
 
				++             "-I{_VSPATH}\\VC\\include "
			
 
				+diff --git a/testing/mozharness/configs/openh264/win64-aarch64.py.1693498-2.later b/testing/mozharness/configs/openh264/win64-aarch64.py.1693498-2.later
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/testing/mozharness/configs/openh264/win64-aarch64.py.1693498-2.later
			
 
				+@@ -0,0 +1,20 @@
			
 
				++--- win64-aarch64.py
			
 
				+++++ win64-aarch64.py
			
 
				++@@ -19,17 +19,16 @@ config = {
			
 
				++     "exes": {
			
 
				++         "gittool.py": [sys.executable, os.path.join(external_tools_path, "gittool.py")],
			
 
				++         "python2.7": "c:\\mozilla-build\\python\\python.exe",
			
 
				++     },
			
 
				++     "dump_syms_binary": "{}/dump_syms/dump_syms.exe".format(
			
 
				++         os.environ["MOZ_FETCHES_DIR"]
			
 
				++     ),
			
 
				++     "arch": "aarch64",
			
 
				++-    "use_yasm": False,
			
 
				++     "partial_env": {
			
 
				++         "PATH": (
			
 
				++             "%(abs_work_dir)s\\openh264;"
			
 
				++             "{MOZ_FETCHES_DIR}\\clang\\bin\\;"
			
 
				++             "{_VSPATH}\\VC\\bin\\Hostx64\\arm64;"
			
 
				++             "{_VSPATH}\\VC\\bin\\Hostx64\\x64;"
			
 
				++             # 32-bit redist here for our dump_syms.exe
			
 
				++             "{_VSPATH}/VC/redist/x86/Microsoft.VC141.CRT;"
			
 
				+diff --git a/testing/mozharness/configs/openh264/win64.py b/testing/mozharness/configs/openh264/win64.py
			
 
				+--- a/testing/mozharness/configs/openh264/win64.py
			
 
				++++ b/testing/mozharness/configs/openh264/win64.py
			
 
				+@@ -13,17 +13,16 @@ config = {
			
 
				+    'tooltool_manifest_file': "win.manifest",
			
 
				+    'exes': {
			
 
				+         'gittool.py': [sys.executable, os.path.join(external_tools_path, 'gittool.py')],
			
 
				+         'python2.7': 'c:\\mozilla-build\\python27\\python2.7.exe',
			
 
				+         'tooltool.py': [sys.executable, "c:\\mozilla-build\\tooltool.py"],
			
 
				+    },
			
 
				+    'dump_syms_binary': 'dump_syms.exe',
			
 
				+    'arch': 'x64',
			
 
				+-   'use_yasm': True,
			
 
				+    'operating_system': 'msvc',
			
 
				+    'partial_env': {
			
 
				+        'PATH': '%s;%s;%s' % (
			
 
				+            '{_VSPATH}/VC/bin/amd64;{_VSPATH}/VC/bin;{_VSPATH}/SDK/bin/x64;{_VSPATH}/VC/redist/x64/Microsoft.VC140.CRT;{_VSPATH}/SDK/Redist/ucrt/DLLs/x64;{_VSPATH}/VC/redist/x86/Microsoft.VC140.CRT;{_VSPATH}/SDK/Redist/ucrt/DLLs/x86;{_VSPATH}/DIA SDK/bin'.format(_VSPATH=VSPATH),
			
 
				+            os.environ['PATH'],
			
 
				+            'C:\\mozilla-build\\Git\\bin',
			
 
				+        ),
			
 
				+        'WIN32_REDIST_DIR': '{_VSPATH}/VC/redist/x64/Microsoft.VC140.CRT'.format(_VSPATH=VSPATH),
			
 
				+diff --git a/testing/mozharness/configs/openh264/win64.py.1693498-2.later b/testing/mozharness/configs/openh264/win64.py.1693498-2.later
			
 
				+new file mode 100644
			
 
				+--- /dev/null
			
 
				++++ b/testing/mozharness/configs/openh264/win64.py.1693498-2.later
			
 
				+@@ -0,0 +1,24 @@
			
 
				++--- win64.py
			
 
				+++++ win64.py
			
 
				++@@ -19,20 +19,20 @@ config = {
			
 
				++     "exes": {
			
 
				++         "gittool.py": [sys.executable, os.path.join(external_tools_path, "gittool.py")],
			
 
				++         "python2.7": "c:\\mozilla-build\\python\\python.exe",
			
 
				++     },
			
 
				++     "dump_syms_binary": "{}/dump_syms/dump_syms.exe".format(
			
 
				++         os.environ["MOZ_FETCHES_DIR"]
			
 
				++     ),
			
 
				++     "arch": "x64",
			
 
				++-    "use_yasm": True,
			
 
				++     "partial_env": {
			
 
				++         "PATH": (
			
 
				++             "{MOZ_FETCHES_DIR}\\clang\\bin\\;"
			
 
				+++            "{MOZ_FETCHES_DIR}\\nasm;"
			
 
				++             "{_VSPATH}\\VC\\bin\\Hostx64\\x64;%(PATH)s;"
			
 
				++             # 32-bit redist here for our dump_syms.exe
			
 
				++             "{_VSPATH}/VC/redist/x86/Microsoft.VC141.CRT;"
			
 
				++             "{_VSPATH}/SDK/Redist/ucrt/DLLs/x86;"
			
 
				++             "{_VSPATH}/DIA SDK/bin"
			
 
				++         ).format(_VSPATH=VSPATH, MOZ_FETCHES_DIR=os.environ["MOZ_FETCHES_DIR"]),
			
 
				++         "INCLUDES": (
			
 
				++             "-I{_VSPATH}\\VC\\include "
			
 
				+diff --git a/testing/mozharness/scripts/openh264_build.py b/testing/mozharness/scripts/openh264_build.py
			
 
				+--- a/testing/mozharness/scripts/openh264_build.py
			
 
				++++ b/testing/mozharness/scripts/openh264_build.py
			
 
				+@@ -71,37 +71,30 @@ class OpenH264Build(MockMixin, TransferM
			
 
				+             "dest": "operating_system",
			
 
				+             "help": "Specify the operating system to build for",
			
 
				+         }],
			
 
				+         [["--use-mock"], {
			
 
				+             "dest": "use_mock",
			
 
				+             "help": "use mock to set up build environment",
			
 
				+             "action": "store_true",
			
 
				+             "default": False,
			
 
				+-        }],
			
 
				+-        [["--use-yasm"], {
			
 
				+-            "dest": "use_yasm",
			
 
				+-            "help": "use yasm instead of nasm",
			
 
				+-            "action": "store_true",
			
 
				+-            "default": False,
			
 
				+         }]
			
 
				+     ]
			
 
				+ 
			
 
				+     def __init__(self, require_config_file=False, config={},
			
 
				+                  all_actions=all_actions,
			
 
				+                  default_actions=default_actions):
			
 
				+ 
			
 
				+         # Default configuration
			
 
				+         default_config = {
			
 
				+             'debug_build': False,
			
 
				+             'upload_ssh_key': "~/.ssh/ffxbld_rsa",
			
 
				+             'upload_ssh_user': 'ffxbld',
			
 
				+             'upload_ssh_host': 'upload.ffxbld.productdelivery.prod.mozaws.net',
			
 
				+             'upload_path_base': '/tmp/openh264',
			
 
				+-            'use_yasm': False,
			
 
				+         }
			
 
				+         default_config.update(config)
			
 
				+ 
			
 
				+         VCSScript.__init__(
			
 
				+             self,
			
 
				+             config_options=self.config_options,
			
 
				+             require_config_file=require_config_file,
			
 
				+             config=default_config,
			
 
				+@@ -172,19 +165,16 @@ class OpenH264Build(MockMixin, TransferM
			
 
				+                 elif self.config['arch'] == 'aarch64':
			
 
				+                     retval.append("ARCH=arm64")
			
 
				+                 else:
			
 
				+                     retval.append("ARCH=arm")
			
 
				+                 retval.append('TARGET=invalid')
			
 
				+                 retval.append('NDKLEVEL=%s' % self.config['min_sdk'])
			
 
				+                 retval.append('NDKROOT=%s/android-ndk-r11c' % dirs['abs_work_dir'])
			
 
				+ 
			
 
				+-        if self.config['use_yasm']:
			
 
				+-            retval.append('ASM=yasm')
			
 
				+-
			
 
				+         return retval
			
 
				+ 
			
 
				+     def query_upload_ssh_key(self):
			
 
				+         return self.config['upload_ssh_key']
			
 
				+ 
			
 
				+     def query_upload_ssh_host(self):
			
 
				+         return self.config['upload_ssh_host']
			
 
				+ 
			
--- a/mozilla-release/patches/1709303-1-94a1.patch
+++ b/mozilla-release/patches/1709303-1-94a1.patch
@@ -3,7 +3,7 @@
 
				 # Date 1631570112 0
			
 
				 #      Mon Sep 13 21:55:12 2021 +0000
			
 
				 # Node ID da5b090edd2b4a9d46125c5af15773fff2d9c025
			
 
				-# Parent  1e913f3aee6dd92c10c3847a78555bfea9d3f003
			
 
				+# Parent  8412cbbecb76b52d971cc80034969758a9c1180c
			
 
				 Bug 1709303 - Part 1. Prepare scripts and patches for libjpeg-turbo update. r=jrmuizel,tnikkel
			
 
				 
			
 
				 media/libjpeg/1050342.diff is no longer necessary and a correction
			
@@ -4652,7 +4652,7 @@ diff --git a/media/update-libjpeg.sh b/media/update-libjpeg.sh
 
				 diff --git a/toolkit/moz.configure b/toolkit/moz.configure
			
 
				 --- a/toolkit/moz.configure
			
 
				 +++ b/toolkit/moz.configure
			
 
				-@@ -1349,50 +1349,95 @@ with only_when(compile_environment):
			
 
				+@@ -1343,50 +1343,95 @@ with only_when(compile_environment):
			
 
				              check_msg='for sufficient libjpeg-turbo JCS_EXTENSIONS',
			
 
				              onerror=lambda: die('libjpeg-turbo JCS_EXTENSIONS required for '
			
 
				                                   '--with-system-jpeg'),
			
@@ -4677,37 +4677,37 @@ diff --git a/toolkit/moz.configure b/toolkit/moz.configure
 
				          if system_jpeg:
			
 
				              return
			
 
				  
			
 
				--        if target.kernel == "Darwin":
			
 
				+-        if target.kernel == 'Darwin':
			
 
				 +        if target.cpu == "arm":
			
 
				 +            return ("-march=armv7-a", "-mfpu=neon")
			
 
				 +        elif target.cpu == "aarch64":
			
 
				 +            return ("-march=armv8-a",)
			
 
				 +        elif target.kernel == "Darwin":
			
 
				-             if target.cpu == "x86":
			
 
				+             if target.cpu == 'x86':
			
 
				                  return ("-DPIC", "-DMACHO")
			
 
				-             elif target.cpu == "x86_64":
			
 
				+             elif target.cpu == 'x86_64':
			
 
				                  return ("-D__x86_64__", "-DPIC", "-DMACHO")
			
 
				-         elif target.kernel == "WINNT":
			
 
				-             if target.cpu == "x86":
			
 
				+         elif target.kernel == 'WINNT':
			
 
				+             if target.cpu == 'x86':
			
 
				                  return ("-DPIC", "-DWIN32")
			
 
				-             elif target.cpu == "x86_64":
			
 
				+             elif target.cpu == 'x86_64':
			
 
				                  return ("-D__x86_64__", "-DPIC", "-DWIN64", "-DMSVC")
			
 
				--        elif target.cpu == "arm":
			
 
				+-        elif target.cpu == 'arm':
			
 
				 -            return ("-march=armv7-a", "-mfpu=neon")
			
 
				--        elif target.cpu == "aarch64":
			
 
				+-        elif target.cpu == 'aarch64':
			
 
				 -            return ("-march=armv8-a",)
			
 
				-         elif target.cpu == "mips32":
			
 
				+         elif target.cpu == 'mips32':
			
 
				              return ("-mdspr2",)
			
 
				 +        elif target.cpu == "mips64":
			
 
				 +            return ("-Wa,-mloongson-mmi", "-mloongson-ext")
			
 
				-         elif target.cpu == "x86":
			
 
				+         elif target.cpu == 'x86':
			
 
				              return ("-DPIC", "-DELF")
			
 
				-         elif target.cpu == "x86_64":
			
 
				+         elif target.cpu == 'x86_64':
			
 
				              return ("-D__x86_64__", "-DPIC", "-DELF")
			
 
				  
			
 
				      @depends(target, when=in_tree_jpeg)
			
 
				      def jpeg_nasm(target):
			
 
				-         if target.cpu in ("x86", "x86_64"):
			
 
				+         if target.cpu in ('x86', 'x86_64'):
			
 
				              # libjpeg-turbo 2.0.6 requires nasm 2.10.
			
 
				              return namespace(version="2.10", what="JPEG")
			
 
				  
			
@@ -4749,9 +4749,9 @@ diff --git a/toolkit/moz.configure b/toolkit/moz.configure
 
				 +    )
			
 
				  
			
 
				  
			
 
				- # Libav-fft Support
			
 
				+ # FFmpeg's ffvpx configuration
			
 
				  # ==============================================================
			
 
				  with only_when(compile_environment):
			
 
				      @depends(target)
			
 
				      def libav_fft(target):
			
 
				-         flags = None
			
 
				+         return target.kernel == "WINNT" or target.cpu == "x86_64"
			
--- a/mozilla-release/patches/NOBUG-nasm-icu-25320.patch
+++ b/mozilla-release/patches/NOBUG-nasm-icu-25320.patch
@@ -1,29 +0,0 @@
 
				-# HG changeset patch
			
 
				-# User Frank-Rainer Grahl <frgrahl@gmx.net>
			
 
				-# Date 1726194014 -7200
			
 
				-# Parent  ba57bae2b93ce0551eca096a3eee2e240090603d
			
 
				-No Bug - Use nasm for icu data file. r=me a=me
			
 
				-
			
 
				-Replacement for Bug 1650299 while we keep msvc support.
			
 
				-
			
 
				-diff --git a/config/external/icu/data/moz.build b/config/external/icu/data/moz.build
			
 
				---- a/config/external/icu/data/moz.build
			
 
				-+++ b/config/external/icu/data/moz.build
			
 
				-@@ -22,15 +22,15 @@ asflags = [
			
 
				- ]
			
 
				- LOCAL_INCLUDES += ['.']
			
 
				- 
			
 
				- if CONFIG['OS_TARGET'] == 'WINNT' and CONFIG['CPU_ARCH'] == 'aarch64':
			
 
				-     icudata = 'icudata.asm'
			
 
				-     GeneratedFile(icudata, script='genicudata.py',
			
 
				-                   inputs=[CONFIG['ICU_DATA_FILE']], flags=[data_symbol])
			
 
				-     SOURCES += ['!%s' % icudata]
			
 
				--elif CONFIG['HAVE_YASM']:
			
 
				--    USE_YASM = True
			
 
				-+elif CONFIG['HAVE_NASM']:
			
 
				-+    USE_NASM = True
			
 
				-     SOURCES += ['icudata.s']
			
 
				-     ASFLAGS += asflags
			
 
				- elif CONFIG['GNU_AS']:
			
 
				-     SOURCES += ['icudata_gas.S']
			
 
				-     ASFLAGS += asflags
			
--- a/mozilla-release/patches/TOP-1445683-14-PLASTER-aom-fix-win32-bustage-2535.patch
+++ b/mozilla-release/patches/TOP-1445683-14-PLASTER-aom-fix-win32-bustage-2535.patch
@@ -1,28 +0,0 @@
 
				-# HG changeset patch
			
 
				-# User Bill Gianopoulos <wgianopoulos@gmail.com>
			
 
				-# Date 1598375293 14400
			
 
				-#      Tue Aug 25 13:08:13 2020 -0400
			
 
				-# Parent  d922838bb5be1339946ccb2e0f01644a0ba82572
			
 
				-Bug 1445683 - Don't include x86_abi_support.asm in libaom build to avoid clang ICE/error. r=frg a=frg
			
 
				-
			
 
				-diff --git a/media/libaom/sources.mozbuild b/media/libaom/sources.mozbuild
			
 
				---- a/media/libaom/sources.mozbuild
			
 
				-+++ b/media/libaom/sources.mozbuild
			
 
				-@@ -271,17 +271,16 @@ files = {
			
 
				-     '../../third_party/aom/aom_dsp/x86/intrapred_avx2.c',
			
 
				-     '../../third_party/aom/aom_dsp/x86/intrapred_sse2.c',
			
 
				-     '../../third_party/aom/aom_dsp/x86/intrapred_sse2_asm.asm',
			
 
				-     '../../third_party/aom/aom_dsp/x86/intrapred_ssse3.c',
			
 
				-     '../../third_party/aom/aom_dsp/x86/inv_wht_sse2.asm',
			
 
				-     '../../third_party/aom/aom_dsp/x86/loopfilter_sse2.c',
			
 
				-     '../../third_party/aom/aom_mem/aom_mem.c',
			
 
				-     '../../third_party/aom/aom_ports/emms.asm',
			
 
				--    '../../third_party/aom/aom_ports/x86_abi_support.asm',
			
 
				-     '../../third_party/aom/aom_scale/aom_scale_rtcd.c',
			
 
				-     '../../third_party/aom/aom_scale/generic/aom_scale.c',
			
 
				-     '../../third_party/aom/aom_scale/generic/gen_scalers.c',
			
 
				-     '../../third_party/aom/aom_scale/generic/yv12config.c',
			
 
				-     '../../third_party/aom/aom_scale/generic/yv12extend.c',
			
 
				-     '../../third_party/aom/aom_util/aom_thread.c',
			
 
				-     '../../third_party/aom/aom_util/debug_util.c',
			
 
				-     '../../third_party/aom/av1/av1_dx_iface.c',
			
--- a/mozilla-release/patches/TOP-NOBUG-fixnasmcheck-25320.patch
+++ b/mozilla-release/patches/TOP-NOBUG-fixnasmcheck-25320.patch
@@ -0,0 +1,66 @@
 
				+# HG changeset patch
			
 
				+# User Frank-Rainer Grahl <frgrahl@gmx.net>
			
 
				+# Date 1726338514 -7200
			
 
				+# Parent  d71e0e729ecc13156dc0c7c28310aa42c115703d
			
 
				+No Bug - Fix nasm check. r=me a=me
			
 
				+
			
 
				+We don't have a bootstrap path and need sorted defined.
			
 
				+
			
 
				+diff --git a/toolkit/moz.configure b/toolkit/moz.configure
			
 
				+--- a/toolkit/moz.configure
			
 
				++++ b/toolkit/moz.configure
			
 
				+@@ -1516,16 +1516,17 @@ with only_when(compile_environment):
			
 
				+     set_define('MOZ_FFVPX_FLACONLY', True, when=ffvpx.flac_only)
			
 
				+     set_config('FFVPX_ASFLAGS', ffvpx.flags)
			
 
				+     set_config("FFVPX_USE_NASM", True, when=ffvpx.use_nasm)
			
 
				+ 
			
 
				+ 
			
 
				+ # nasm detection
			
 
				+ # ==============================================================
			
 
				+ @depends(dav1d_nasm, vpx_nasm, jpeg_nasm, ffvpx_nasm)
			
 
				++@imports(_from='__builtin__', _import='sorted')
			
 
				+ def need_nasm(*requirements):
			
 
				+     requires = {
			
 
				+         x.what: x.version if hasattr(x, "version") else True for x in requirements if x
			
 
				+     }
			
 
				+     if requires:
			
 
				+         items = sorted(requires.keys())
			
 
				+         if len(items) > 1:
			
 
				+             what = " and ".join((", ".join(items[:-1]), items[-1]))
			
 
				+@@ -1534,17 +1535,17 @@ def need_nasm(*requirements):
			
 
				+         versioned = {k: v for (k, v) in requires.items() if v is not True}
			
 
				+         return namespace(what=what, versioned=versioned)
			
 
				+ 
			
 
				+ 
			
 
				+ nasm = check_prog(
			
 
				+     "NASM",
			
 
				+     ["nasm"],
			
 
				+     allow_missing=True,
			
 
				+-    paths=bootstrap_search_path("nasm", when=need_nasm),
			
 
				++    paths=toolchain_search_path,
			
 
				+     when=need_nasm,
			
 
				+ )
			
 
				+ 
			
 
				+ 
			
 
				+ @depends(nasm, need_nasm.what)
			
 
				+ def check_nasm(nasm, what):
			
 
				+     if not nasm and what:
			
 
				+         die("Nasm is required to build with %s, but it was not found." % what)
			
 
				+@@ -1558,16 +1559,17 @@ def nasm_version(nasm):
			
 
				+         check_cmd_output(nasm, "-v", onerror=lambda: die("Failed to get nasm version."))
			
 
				+         .splitlines()[0]
			
 
				+         .split()[2]
			
 
				+     )
			
 
				+     return Version(version)
			
 
				+ 
			
 
				+ 
			
 
				+ @depends(nasm_version, need_nasm.versioned, when=need_nasm.versioned)
			
 
				++@imports(_from='__builtin__', _import='sorted')
			
 
				+ def check_nasm_version(nasm_version, versioned):
			
 
				+     by_version = sorted(versioned.items(), key=lambda x: x[1])
			
 
				+     what, version = by_version[-1]
			
 
				+     if nasm_version < version:
			
 
				+         die(
			
 
				+             "Nasm version %s or greater is required to build with %s." % (version, what)
			
 
				+         )
			
 
				+     return nasm_version
			
--- a/mozilla-release/patches/series
+++ b/mozilla-release/patches/series
@@ -5918,7 +5918,6 @@ NOBUG-removenonascii67a1-25314.patch
 
				 1578348-71a1.patch
			
 
				 1576859-71a1.patch
			
 
				 1587187-71a1.patch
			
 
				-1585358-71a1.patch
			
 
				 1587206-1-71a1.patch
			
 
				 1583582-71a1.patch
			
 
				 1579758-71a1.patch
			
@@ -6708,7 +6707,6 @@ NOBUG-removenonascii67a1-25314.patch
 
				 1696581-2-88a1.patch
			
 
				 1696935-88a1.patch
			
 
				 1690604-88a1.patch
			
 
				-1692940-5-88a1.patch
			
 
				 1513184-88a1.patch
			
 
				 1698827-88a1.patch
			
 
				 1698592-89a1.patch
			
@@ -6817,7 +6815,6 @@ NOBUG-removenonascii67a1-25314.patch
 
				 1730397-4only-94a1.patch
			
 
				 1730712-1-94a1.patch
			
 
				 1723031-1-94a1.patch
			
 
				-1709303-1-94a1.patch
			
 
				 1709303-2-94a1.patch
			
 
				 1738598-PARTIAL-95a1.patch
			
 
				 1730048-913.patch
			
@@ -6906,7 +6903,6 @@ NOBUG-removenonascii67a1-25314.patch
 
				 1750760-1-99a1.patch
			
 
				 1750760-3-99a1.patch
			
 
				 1750760-4-99a1.patch
			
 
				-1757308-99a1.patch
			
 
				 1758062-9162.patch
			
 
				 NOBUG-removemobilepromo-25312.patch
			
 
				 NOBUG-disableupdates-25312.patch
			
@@ -7026,7 +7022,6 @@ NOBUG-ppcheck-25315.patch
 
				 1801893-webp-109a1.patch
			
 
				 1801583-109a1.patch
			
 
				 1803469-webp-109a1.patch
			
 
				-1787515-109a1.patch
			
 
				 1466443-110a1.patch
			
 
				 1782344-1-110a1.patch
			
 
				 1782344-2-110a1.patch
			
@@ -7038,7 +7033,6 @@ NOBUG-ppcheck-25315.patch
 
				 1760633-2-110a1.patch
			
 
				 1760633-3-110a1.patch
			
 
				 1810078-webp-111a1.patch
			
 
				-1815737-111a1.patch
			
 
				 1816737-112a1.patch
			
 
				 1817900-13-112a1.patch
			
 
				 1819374-4-112a1.patch
			
@@ -7165,7 +7159,6 @@ PPPPPPP-NSSgetentropy.patch
 
				 WIP-1729459-comment25.patch
			
 
				 TOP-1294490-7-PLASTER-webp-2535.patch
			
 
				 TOP-1493400-6-PLASTER-dav1d-avoid-mColorDepth-2535.patch
			
 
				-TOP-1445683-14-PLASTER-aom-fix-win32-bustage-2535.patch
			
 
				 TOP-1683545-PLASTER-webrender-2536.patch
			
 
				 TOP-1667581-3-PLASTER-2537.patch
			
 
				 TOP-1469021-PLASTER-2538.patch
			
@@ -7364,7 +7357,41 @@ TOP-NOBUG-blockquad0-25319.patch
 
				 1903254-129a1.patch
			
 
				 1903021-129a1.patch
			
 
				 1519636-80-elfhack-130a1.patch
			
 
				-NOBUG-nasm-icu-25320.patch
			
 
				 1902935-seamonkey-credits-25320.patch
			
 
				 1862395-incorrect-version-resistfingerprinting-v2-25320.patch
			
 
				 1737436-use-mozilla-compat-version-define-25320.patch
			
 
				+1540760-1-68a1.patch
			
 
				+1540760-2-68a1.patch
			
 
				+1540760-3-68a1.patch
			
 
				+1540760-4-68a1.patch
			
 
				+1540760-5-68a1.patch
			
 
				+1540760-6-68a1.patch
			
 
				+1585358-71a1.patch
			
 
				+1585359-71a1.patch
			
 
				+1525393-1-75a1.patch
			
 
				+1650299-80a1.patch
			
 
				+1656063-81a1.patch
			
 
				+1669888-83a1.patch
			
 
				+1692945-1-87a1.patch
			
 
				+1692945-2-87a1.patch
			
 
				+1693215-1-88a1.patch
			
 
				+1693215-2-88a1.patch
			
 
				+1693215-3-88a1.patch
			
 
				+1693498-1-88a1.patch
			
 
				+1693498-2-88a1.patch
			
 
				+1692940-01-88a1.patch
			
 
				+1692940-02-88a1.patch
			
 
				+1692940-03-88a1.patch
			
 
				+1692940-04-88a1.patch
			
 
				+1692940-05-88a1.patch
			
 
				+1692940-06-88a1.patch
			
 
				+1692940-07-88a1.patch
			
 
				+1692940-08-88a1.patch
			
 
				+1692940-09-88a1.patch
			
 
				+1692940-10no11-88a1.patch
			
 
				+1692940-12-88a1.patch
			
 
				+1709303-1-94a1.patch
			
 
				+1757308-99a1.patch
			
 
				+1787515-109a1.patch
			
 
				+1815737-111a1.patch
			
 
				+TOP-NOBUG-fixnasmcheck-25320.patch