diff -Nru x264-0.152.2854+gite9a5903/autocomplete.c x264-0.158.2988+git-20191101.7817004/autocomplete.c
--- x264-0.152.2854+gite9a5903/autocomplete.c	1970-01-01 00:00:00.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/autocomplete.c	2019-11-09 05:16:29.000000000 +0000
@@ -0,0 +1,405 @@
+/*****************************************************************************
+ * autocomplete: x264cli shell autocomplete
+ *****************************************************************************
+ * Copyright (C) 2018-2019 x264 project
+ *
+ * Authors: Henrik Gramner <henrik@gramner.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#include "x264cli.h"
+#include "input/input.h"
+
+#if HAVE_LAVF
+#undef DECLARE_ALIGNED
+#include <libavformat/avformat.h>
+#include <libavutil/pixdesc.h>
+#endif
+
+static const char * const level_names[] =
+{
+    "1", "1.1", "1.2", "1.3", "1b",
+    "2", "2.1", "2.2",
+    "3", "3.1", "3.2",
+    "4", "4.1", "4.2",
+    "5", "5.1", "5.2",
+    "6", "6.1", "6.2",
+    NULL
+};
+
+/* Options requiring a value for which we provide suggestions. */
+static const char * const opts_suggest[] =
+{
+    "--alternative-transfer",
+    "--aq-mode",
+    "--asm",
+    "--avcintra-class",
+    "--avcintra-flavor",
+    "--b-adapt",
+    "--b-pyramid",
+    "--colormatrix",
+    "--colorprim",
+    "--cqm",
+    "--demuxer",
+    "--direct",
+    "--frame-packing",
+    "--input-csp",
+    "--input-fmt",
+    "--input-range",
+    "--level",
+    "--log-level",
+    "--me",
+    "--muxer",
+    "--nal-hrd",
+    "--output-csp",
+    "--overscan",
+    "--pass", "-p",
+    "--preset",
+    "--profile",
+    "--pulldown",
+    "--range",
+    "--subme", "-m",
+    "--transfer",
+    "--trellis", "-t",
+    "--tune",
+    "--videoformat",
+    "--weightp",
+    NULL
+};
+
+/* Options requiring a value for which we don't provide suggestions. */
+static const char * const opts_nosuggest[] =
+{
+    "--b-bias",
+    "--bframes", "-b",
+    "--deblock", "-f",
+    "--bitrate", "-B",
+    "--chroma-qp-offset",
+    "--chromaloc",
+    "--cplxblur",
+    "--cqm4",
+    "--cqm4i",
+    "--cqm4ic",
+    "--cqm4iy",
+    "--cqm4p",
+    "--cqm4pc",
+    "--cqm4py",
+    "--cqm8",
+    "--cqm8i",
+    "--cqm8p",
+    "--crf",
+    "--crf-max",
+    "--crop-rect",
+    "--deadzone-inter",
+    "--deadzone-intra",
+    "--fps",
+    "--frames",
+    "--input-depth",
+    "--input-res",
+    "--ipratio",
+    "--keyint", "-I",
+    "--lookahead-threads",
+    "--merange",
+    "--min-keyint", "-i",
+    "--mvrange",
+    "--mvrange-thread",
+    "--nr",
+    "--opencl-device",
+    "--output-depth",
+    "--partitions", "-A",
+    "--pbratio",
+    "--psy-rd",
+    "--qblur",
+    "--qcomp",
+    "--qp", "-q",
+    "--qpmax",
+    "--qpmin",
+    "--qpstep",
+    "--ratetol",
+    "--ref", "-r",
+    "--rc-lookahead",
+    "--sar",
+    "--scenecut",
+    "--seek",
+    "--slices",
+    "--slices-max",
+    "--slice-max-size",
+    "--slice-max-mbs",
+    "--slice-min-mbs",
+    "--sps-id",
+    "--sync-lookahead",
+    "--threads",
+    "--timebase",
+    "--vbv-bufsize",
+    "--vbv-init",
+    "--vbv-maxrate",
+    "--video-filter", "--vf",
+    "--zones",
+    NULL
+};
+
+/* Options requiring a filename. */
+static const char * const opts_filename[] =
+{
+    "--cqmfile",
+    "--dump-yuv",
+    "--index",
+    "--opencl-clbin",
+    "--output", "-o",
+    "--qpfile",
+    "--stats",
+    "--tcfile-in",
+    "--tcfile-out",
+    NULL
+};
+
+/* Options without an associated value. */
+static const char * const opts_standalone[] =
+{
+    "--8x8dct",
+    "--aud",
+    "--bff",
+    "--bluray-compat",
+    "--cabac",
+    "--constrained-intra",
+    "--cpu-independent",
+    "--dts-compress",
+    "--fake-interlaced",
+    "--fast-pskip",
+    "--filler",
+    "--force-cfr",
+    "--mbtree",
+    "--mixed-refs",
+    "--no-8x8dct",
+    "--no-asm",
+    "--no-cabac",
+    "--no-chroma-me",
+    "--no-dct-decimate",
+    "--no-deblock",
+    "--no-fast-pskip",
+    "--no-mbtree",
+    "--no-mixed-refs",
+    "--no-progress",
+    "--no-psy",
+    "--no-scenecut",
+    "--no-weightb",
+    "--non-deterministic",
+    "--open-gop",
+    "--opencl",
+    "--pic-struct",
+    "--psnr",
+    "--quiet",
+    "--sliced-threads",
+    "--slow-firstpass",
+    "--ssim",
+    "--stitchable",
+    "--tff",
+    "--thread-input",
+    "--verbose", "-v",
+    "--weightb",
+    NULL
+};
+
+/* Options which shouldn't be suggested in combination with other options. */
+static const char * const opts_special[] =
+{
+    "--fullhelp",
+    "--help", "-h",
+    "--longhelp",
+    "--version",
+    NULL
+};
+
+static int list_contains( const char * const *list, const char *s )
+{
+    if( *s )
+        for( ; *list; list++ )
+            if( !strcmp( *list, s ) )
+                return 1;
+    return 0;
+}
+
+static void suggest( const char *s, const char *cur, int cur_len )
+{
+    if( s && *s && !strncmp( s, cur, cur_len ) )
+        printf( "%s\n", s );
+}
+
+static void suggest_lower( const char *s, const char *cur, int cur_len )
+{
+    if( s && *s && !strncasecmp( s, cur, cur_len ) )
+    {
+        for( ; *s; s++ )
+            putchar( *s < 'A' || *s > 'Z' ? *s : *s | 0x20 );
+        putchar( '\n' );
+    }
+}
+
+static void suggest_num_range( int start, int end, const char *cur, int cur_len )
+{
+    char buf[16];
+    for( int i = start; i <= end; i++ )
+    {
+        snprintf( buf, sizeof( buf ), "%d", i );
+        suggest( buf, cur, cur_len );
+    }
+}
+
+#if HAVE_LAVF
+/* Suggest each token in a string separated by delimiters. */
+static void suggest_token( const char *s, int delim, const char *cur, int cur_len )
+{
+    if( s && *s )
+    {
+        for( const char *tok_end; (tok_end = strchr( s, delim )); s = tok_end + 1 )
+        {
+            int tok_len = tok_end - s;
+            if( tok_len && tok_len >= cur_len && !strncmp( s, cur, cur_len ) )
+                printf( "%.*s\n", tok_len, s );
+        }
+        suggest( s, cur, cur_len );
+    }
+}
+#endif
+
+#define OPT( opt ) else if( !strcmp( prev, opt ) )
+#define OPT2( opt1, opt2 ) else if( !strcmp( prev, opt1 ) || !strcmp( prev, opt2 ) )
+#define OPT_TYPE( type ) list_contains( opts_##type, prev )
+
+#define suggest( s ) suggest( s, cur, cur_len )
+#define suggest_lower( s ) suggest_lower( s, cur, cur_len )
+#define suggest_list( list ) for( const char * const *s = list; *s; s++ ) suggest( *s )
+#define suggest_num_range( start, end ) suggest_num_range( start, end, cur, cur_len )
+#define suggest_token( s, delim ) suggest_token( s, delim, cur, cur_len )
+
+int x264_cli_autocomplete( const char *prev, const char *cur )
+{
+    int cur_len = strlen( cur );
+    if( 0 );
+    OPT( "--alternative-transfer" )
+        suggest_list( x264_transfer_names );
+    OPT( "--aq-mode" )
+        suggest_num_range( 0, 3 );
+    OPT( "--asm" )
+        for( const x264_cpu_name_t *cpu = x264_cpu_names; cpu->flags; cpu++ )
+            suggest_lower( cpu->name );
+    OPT( "--avcintra-class" )
+        suggest_list( x264_avcintra_class_names );
+    OPT( "--avcintra-flavor" )
+        suggest_list( x264_avcintra_flavor_names );
+    OPT( "--b-adapt" )
+        suggest_num_range( 0, 2 );
+    OPT( "--b-pyramid" )
+        suggest_list( x264_b_pyramid_names );
+    OPT( "--colormatrix" )
+        suggest_list( x264_colmatrix_names );
+    OPT( "--colorprim" )
+        suggest_list( x264_colorprim_names );
+    OPT( "--cqm" )
+        suggest_list( x264_cqm_names );
+    OPT( "--demuxer" )
+        suggest_list( x264_demuxer_names );
+    OPT( "--direct" )
+        suggest_list( x264_direct_pred_names );
+    OPT( "--frame-packing" )
+        suggest_num_range( 0, 7 );
+    OPT( "--input-csp" )
+    {
+        for( int i = X264_CSP_NONE+1; i < X264_CSP_CLI_MAX; i++ )
+            suggest( x264_cli_csps[i].name );
+#if HAVE_LAVF
+        for( const AVPixFmtDescriptor *d = NULL; (d = av_pix_fmt_desc_next( d )); )
+            suggest( d->name );
+#endif
+    }
+    OPT( "--input-fmt" )
+    {
+#if HAVE_LAVF
+        av_register_all();
+        for( const AVInputFormat *f = NULL; (f = av_iformat_next( f )); )
+            suggest_token( f->name, ',' );
+#endif
+    }
+    OPT( "--input-range" )
+        suggest_list( x264_range_names );
+    OPT( "--level" )
+        suggest_list( level_names );
+    OPT( "--log-level" )
+        suggest_list( x264_log_level_names );
+    OPT( "--me" )
+        suggest_list( x264_motion_est_names );
+    OPT( "--muxer" )
+        suggest_list( x264_muxer_names );
+    OPT( "--nal-hrd" )
+        suggest_list( x264_nal_hrd_names );
+    OPT( "--output-csp" )
+        suggest_list( x264_output_csp_names );
+    OPT( "--output-depth" )
+    {
+#if HAVE_BITDEPTH8
+        suggest( "8" );
+#endif
+#if HAVE_BITDEPTH10
+        suggest( "10" );
+#endif
+    }
+    OPT( "--overscan" )
+        suggest_list( x264_overscan_names );
+    OPT2( "--partitions", "-A" )
+        suggest_list( x264_partition_names );
+    OPT2( "--pass", "-p" )
+        suggest_num_range( 1, 3 );
+    OPT( "--preset" )
+        suggest_list( x264_preset_names );
+    OPT( "--profile" )
+        suggest_list( x264_valid_profile_names );
+    OPT( "--pulldown" )
+        suggest_list( x264_pulldown_names );
+    OPT( "--range" )
+        suggest_list( x264_range_names );
+    OPT2( "--subme", "-m" )
+        suggest_num_range( 0, 11 );
+    OPT( "--transfer" )
+        suggest_list( x264_transfer_names );
+    OPT2( "--trellis", "-t" )
+        suggest_num_range( 0, 2 );
+    OPT( "--tune" )
+        suggest_list( x264_tune_names );
+    OPT( "--videoformat" )
+        suggest_list( x264_vidformat_names );
+    OPT( "--weightp" )
+        suggest_num_range( 0, 2 );
+    else if( !OPT_TYPE( nosuggest ) && !OPT_TYPE( special ) )
+    {
+        if( OPT_TYPE( filename ) || strncmp( cur, "--", 2 ) )
+            return 1; /* Fall back to default shell filename autocomplete. */
+
+        /* Suggest options. */
+        suggest_list( opts_suggest );
+        suggest_list( opts_nosuggest );
+        suggest_list( opts_filename );
+        suggest_list( opts_standalone );
+
+        /* Only suggest special options if no other options have been specified. */
+        if( !*prev )
+            suggest_list( opts_special );
+    }
+
+    return 0;
+}
diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/asm-offsets.c x264-0.158.2988+git-20191101.7817004/common/aarch64/asm-offsets.c
--- x264-0.152.2854+gite9a5903/common/aarch64/asm-offsets.c	2017-12-31 12:50:50.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/aarch64/asm-offsets.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * asm-offsets.c: check asm offsets for aarch64
  *****************************************************************************
- * Copyright (C) 2014-2017 x264 project
+ * Copyright (C) 2014-2019 x264 project
  *
  * Authors: Janne Grunau <janne-x264@jannau.net>
  *
diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/asm-offsets.h x264-0.158.2988+git-20191101.7817004/common/aarch64/asm-offsets.h
--- x264-0.152.2854+gite9a5903/common/aarch64/asm-offsets.h	2017-12-31 12:50:50.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/aarch64/asm-offsets.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * asm-offsets.h: asm offsets for aarch64
  *****************************************************************************
- * Copyright (C) 2014-2017 x264 project
+ * Copyright (C) 2014-2019 x264 project
  *
  * Authors: Janne Grunau <janne-x264@jannau.net>
  *
diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/asm.S x264-0.158.2988+git-20191101.7817004/common/aarch64/asm.S
--- x264-0.152.2854+gite9a5903/common/aarch64/asm.S	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/aarch64/asm.S	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * asm.S: AArch64 utility macros
  *****************************************************************************
- * Copyright (C) 2008-2017 x264 project
+ * Copyright (C) 2008-2019 x264 project
  *
  * Authors: Mans Rullgard <mans@mansr.com>
  *          David Conrad <lessen42@gmail.com>
@@ -27,12 +27,27 @@
 
 #include "config.h"
 
+#define GLUE(a, b) a ## b
+#define JOIN(a, b) GLUE(a, b)
+
 #ifdef PREFIX
-#   define EXTERN_ASM _
+#   define BASE _x264_
+#   define SYM_PREFIX _
+#else
+#   define BASE x264_
+#   define SYM_PREFIX
+#endif
+
+#ifdef BIT_DEPTH
+#   define EXTERN_ASM JOIN(JOIN(BASE, BIT_DEPTH), _)
 #else
-#   define EXTERN_ASM
+#   define EXTERN_ASM BASE
 #endif
 
+#define X(s) JOIN(EXTERN_ASM, s)
+#define X264(s) JOIN(BASE, s)
+#define EXT(s) JOIN(SYM_PREFIX, s)
+
 #ifdef __ELF__
 #   define ELF
 #else
@@ -53,7 +68,11 @@
 
 .macro  function name, export=0, align=2
     .macro endfunc
+.if \export
+ELF     .size   EXTERN_ASM\name, . - EXTERN_ASM\name
+.else
 ELF     .size   \name, . - \name
+.endif
 FUNC    .endfunc
         .purgem endfunc
     .endm
@@ -94,10 +113,6 @@
 #endif
 .endm
 
-#define GLUE(a, b) a ## b
-#define JOIN(a, b) GLUE(a, b)
-#define X(s) JOIN(EXTERN_ASM, s)
-
 #define FDEC_STRIDE 32
 #define FENC_STRIDE 16
 
diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/bitstream-a.S x264-0.158.2988+git-20191101.7817004/common/aarch64/bitstream-a.S
--- x264-0.152.2854+gite9a5903/common/aarch64/bitstream-a.S	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/aarch64/bitstream-a.S	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * bitstream-a.S: aarch64 bitstream functions
  *****************************************************************************
- * Copyright (C) 2014-2017 x264 project
+ * Copyright (C) 2014-2019 x264 project
  *
  * Authors: Janne Grunau <janne-x264@jannau.net>
  *
@@ -25,7 +25,7 @@
 
 #include "asm.S"
 
-function x264_nal_escape_neon, export=1
+function nal_escape_neon, export=1
     movi        v0.16b,  #0xff
     movi        v4.16b,  #4
     mov         w3,  #3
diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/bitstream.h x264-0.158.2988+git-20191101.7817004/common/aarch64/bitstream.h
--- x264-0.152.2854+gite9a5903/common/aarch64/bitstream.h	1970-01-01 00:00:00.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/aarch64/bitstream.h	2019-11-09 05:16:29.000000000 +0000
@@ -0,0 +1,32 @@
+/*****************************************************************************
+ * bitstream.h: aarch64 bitstream functions
+ *****************************************************************************
+ * Copyright (C) 2017-2019 x264 project
+ *
+ * Authors: Anton Mitrofanov <BugMaster@narod.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#ifndef X264_AARCH64_BITSTREAM_H
+#define X264_AARCH64_BITSTREAM_H
+
+#define x264_nal_escape_neon x264_template(nal_escape_neon)
+uint8_t *x264_nal_escape_neon( uint8_t *dst, uint8_t *src, uint8_t *end );
+
+#endif
diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/cabac-a.S x264-0.158.2988+git-20191101.7817004/common/aarch64/cabac-a.S
--- x264-0.152.2854+gite9a5903/common/aarch64/cabac-a.S	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/aarch64/cabac-a.S	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cabac-a.S: aarch64 cabac
  *****************************************************************************
- * Copyright (C) 2014-2017 x264 project
+ * Copyright (C) 2014-2019 x264 project
  *
  * Authors: Janne Grunau <janne-x264@jannau.net>
  *
@@ -29,9 +29,9 @@
 // w11 holds x264_cabac_t.i_low
 // w12 holds x264_cabac_t.i_range
 
-function x264_cabac_encode_decision_asm, export=1
-    movrel      x8,  X(x264_cabac_range_lps)
-    movrel      x9,  X(x264_cabac_transition)
+function cabac_encode_decision_asm, export=1
+    movrel      x8,  X264(cabac_range_lps)
+    movrel      x9,  X264(cabac_transition)
     add         w10, w1, #CABAC_STATE
     ldrb        w3,  [x0,  x10]         // i_state
     ldr         w12, [x0,  #CABAC_I_RANGE]
@@ -82,10 +82,10 @@
 1:
     ldr         x7,  [x0, #CABAC_P]
     asr         w5,  w4,  #8            // carry
-    ldrb        w8,  [x7, #-1]
+    ldurb       w8,  [x7, #-1]
     add         w8,  w8,  w5
     sub         w5,  w5,  #1
-    strb        w8,  [x7, #-1]
+    sturb       w8,  [x7, #-1]
     cbz         w6,  3f
 2:
     subs        w6,  w6,  #1
@@ -101,7 +101,7 @@
     ret
 endfunc
 
-function x264_cabac_encode_bypass_asm, export=1
+function cabac_encode_bypass_asm, export=1
     ldr         w12, [x0, #CABAC_I_RANGE]
     ldr         w11, [x0, #CABAC_I_LOW]
     ldr         w2,  [x0, #CABAC_I_QUEUE]
@@ -114,7 +114,7 @@
     ret
 endfunc
 
-function x264_cabac_encode_terminal_asm, export=1
+function cabac_encode_terminal_asm, export=1
     ldr         w12, [x0, #CABAC_I_RANGE]
     ldr         w11, [x0, #CABAC_I_LOW]
     sub         w12, w12, #2
diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/dct-a.S x264-0.158.2988+git-20191101.7817004/common/aarch64/dct-a.S
--- x264-0.152.2854+gite9a5903/common/aarch64/dct-a.S	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/aarch64/dct-a.S	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /****************************************************************************
  * dct-a.S: aarch64 transform and zigzag
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Janne Grunau <janne-x264@jannau.net>
@@ -79,7 +79,7 @@
 .endm
 
 
-function x264_dct4x4dc_neon, export=1
+function dct4x4dc_neon, export=1
     ld1        {v0.4h,v1.4h,v2.4h,v3.4h}, [x0]
     movi        v31.4h, #1
     SUMSUB_AB   v4.4h,  v5.4h,  v0.4h,  v1.4h
@@ -102,7 +102,7 @@
     ret
 endfunc
 
-function x264_idct4x4dc_neon, export=1
+function idct4x4dc_neon, export=1
     ld1        {v0.4h,v1.4h,v2.4h,v3.4h}, [x0]
     SUMSUB_AB   v4.4h,  v5.4h,  v0.4h,  v1.4h
     SUMSUB_AB   v6.4h,  v7.4h,  v2.4h,  v3.4h
@@ -131,7 +131,7 @@
     sub         \v3, \v7, \v5
 .endm
 
-function x264_sub4x4_dct_neon, export=1
+function sub4x4_dct_neon, export=1
     mov         x3, #FENC_STRIDE
     mov         x4, #FDEC_STRIDE
     ld1        {v0.s}[0], [x1], x3
@@ -154,7 +154,7 @@
     ret
 endfunc
 
-function x264_sub8x4_dct_neon
+function sub8x4_dct_neon
     ld1        {v0.8b}, [x1], x3
     ld1        {v1.8b}, [x2], x4
     usubl       v16.8h, v0.8b,  v1.8b
@@ -193,34 +193,34 @@
     ret
 endfunc
 
-function x264_sub8x8_dct_neon, export=1
+function sub8x8_dct_neon, export=1
     mov         x5,  x30
     mov         x3, #FENC_STRIDE
     mov         x4, #FDEC_STRIDE
-    bl          x264_sub8x4_dct_neon
+    bl          sub8x4_dct_neon
     mov         x30, x5
-    b           x264_sub8x4_dct_neon
+    b           sub8x4_dct_neon
 endfunc
 
-function x264_sub16x16_dct_neon, export=1
+function sub16x16_dct_neon, export=1
     mov         x5,  x30
     mov         x3, #FENC_STRIDE
     mov         x4, #FDEC_STRIDE
-    bl          x264_sub8x4_dct_neon
-    bl          x264_sub8x4_dct_neon
+    bl          sub8x4_dct_neon
+    bl          sub8x4_dct_neon
     sub         x1, x1, #8*FENC_STRIDE-8
     sub         x2, x2, #8*FDEC_STRIDE-8
-    bl          x264_sub8x4_dct_neon
-    bl          x264_sub8x4_dct_neon
+    bl          sub8x4_dct_neon
+    bl          sub8x4_dct_neon
     sub         x1, x1, #8
     sub         x2, x2, #8
-    bl          x264_sub8x4_dct_neon
-    bl          x264_sub8x4_dct_neon
+    bl          sub8x4_dct_neon
+    bl          sub8x4_dct_neon
     sub         x1, x1, #8*FENC_STRIDE-8
     sub         x2, x2, #8*FDEC_STRIDE-8
-    bl          x264_sub8x4_dct_neon
+    bl          sub8x4_dct_neon
     mov         x30, x5
-    b           x264_sub8x4_dct_neon
+    b           sub8x4_dct_neon
 endfunc
 
 
@@ -255,7 +255,7 @@
     SUMSUB_SHR2 2, v3.8h,  v5.8h,  v30.8h, v29.8h, v20.8h, v21.8h
 .endm
 
-function x264_sub8x8_dct8_neon, export=1
+function sub8x8_dct8_neon, export=1
     mov         x3, #FENC_STRIDE
     mov         x4, #FDEC_STRIDE
     ld1        {v16.8b}, [x1], x3
@@ -292,19 +292,19 @@
     ret
 endfunc
 
-function x264_sub16x16_dct8_neon, export=1
+function sub16x16_dct8_neon, export=1
     mov         x7,  x30
-    bl          X(x264_sub8x8_dct8_neon)
+    bl          X(sub8x8_dct8_neon)
     sub         x1,  x1,  #FENC_STRIDE*8 - 8
     sub         x2,  x2,  #FDEC_STRIDE*8 - 8
-    bl          X(x264_sub8x8_dct8_neon)
+    bl          X(sub8x8_dct8_neon)
     sub         x1,  x1,  #8
     sub         x2,  x2,  #8
-    bl          X(x264_sub8x8_dct8_neon)
+    bl          X(sub8x8_dct8_neon)
     mov         x30, x7
     sub         x1,  x1,  #FENC_STRIDE*8 - 8
     sub         x2,  x2,  #FDEC_STRIDE*8 - 8
-    b           X(x264_sub8x8_dct8_neon)
+    b           X(sub8x8_dct8_neon)
 endfunc
 
 
@@ -317,7 +317,7 @@
     add         \d6, \d6, \d1
 .endm
 
-function x264_add4x4_idct_neon, export=1
+function add4x4_idct_neon, export=1
     mov         x2, #FDEC_STRIDE
     ld1        {v0.4h,v1.4h,v2.4h,v3.4h}, [x1]
 
@@ -357,7 +357,7 @@
     ret
 endfunc
 
-function x264_add8x4_idct_neon, export=1
+function add8x4_idct_neon, export=1
     ld1        {v0.8h,v1.8h}, [x1], #32
     ld1        {v2.8h,v3.8h}, [x1], #32
     transpose   v20.2d, v21.2d, v0.2d, v2.2d
@@ -398,29 +398,29 @@
     ret
 endfunc
 
-function x264_add8x8_idct_neon, export=1
+function add8x8_idct_neon, export=1
     mov             x2, #FDEC_STRIDE
     mov             x5,  x30
-    bl              X(x264_add8x4_idct_neon)
+    bl              X(add8x4_idct_neon)
     mov             x30, x5
-    b               X(x264_add8x4_idct_neon)
+    b               X(add8x4_idct_neon)
 endfunc
 
-function x264_add16x16_idct_neon, export=1
+function add16x16_idct_neon, export=1
     mov             x2, #FDEC_STRIDE
     mov             x5,  x30
-    bl              X(x264_add8x4_idct_neon)
-    bl              X(x264_add8x4_idct_neon)
+    bl              X(add8x4_idct_neon)
+    bl              X(add8x4_idct_neon)
     sub             x0, x0, #8*FDEC_STRIDE-8
-    bl              X(x264_add8x4_idct_neon)
-    bl              X(x264_add8x4_idct_neon)
+    bl              X(add8x4_idct_neon)
+    bl              X(add8x4_idct_neon)
     sub             x0, x0, #8
-    bl              X(x264_add8x4_idct_neon)
-    bl              X(x264_add8x4_idct_neon)
+    bl              X(add8x4_idct_neon)
+    bl              X(add8x4_idct_neon)
     sub             x0, x0, #8*FDEC_STRIDE-8
-    bl              X(x264_add8x4_idct_neon)
+    bl              X(add8x4_idct_neon)
     mov             x30, x5
-    b               X(x264_add8x4_idct_neon)
+    b               X(add8x4_idct_neon)
 endfunc
 
 .macro IDCT8_1D type
@@ -446,7 +446,7 @@
     SUMSUB_AB   v19.8h, v20.8h, v2.8h,  v20.8h
 .endm
 
-function x264_add8x8_idct8_neon, export=1
+function add8x8_idct8_neon, export=1
     mov         x2,  #FDEC_STRIDE
     ld1        {v16.8h,v17.8h}, [x1], #32
     ld1        {v18.8h,v19.8h}, [x1], #32
@@ -503,19 +503,19 @@
     ret
 endfunc
 
-function x264_add16x16_idct8_neon, export=1
+function add16x16_idct8_neon, export=1
     mov             x7,  x30
-    bl              X(x264_add8x8_idct8_neon)
+    bl              X(add8x8_idct8_neon)
     sub             x0,  x0,  #8*FDEC_STRIDE-8
-    bl              X(x264_add8x8_idct8_neon)
+    bl              X(add8x8_idct8_neon)
     sub             x0,  x0,  #8
-    bl              X(x264_add8x8_idct8_neon)
+    bl              X(add8x8_idct8_neon)
     sub             x0,  x0,  #8*FDEC_STRIDE-8
     mov             x30, x7
-    b               X(x264_add8x8_idct8_neon)
+    b               X(add8x8_idct8_neon)
 endfunc
 
-function x264_add8x8_idct_dc_neon, export=1
+function add8x8_idct_dc_neon, export=1
     mov         x2,  #FDEC_STRIDE
     ld1        {v16.4h}, [x1]
     ld1        {v0.8b}, [x0], x2
@@ -605,7 +605,7 @@
     st1         {v7.16b}, [x2], x3
 .endm
 
-function x264_add16x16_idct_dc_neon, export=1
+function add16x16_idct_dc_neon, export=1
     mov         x2,  x0
     mov         x3,  #FDEC_STRIDE
 
@@ -640,7 +640,7 @@
     add         \dst\().8h, \dst\().8h, \t3\().8h
 .endm
 
-function x264_sub8x8_dct_dc_neon, export=1
+function sub8x8_dct_dc_neon, export=1
     mov             x3,  #FENC_STRIDE
     mov             x4,  #FDEC_STRIDE
 
@@ -660,7 +660,7 @@
     ret
 endfunc
 
-function x264_sub8x16_dct_dc_neon, export=1
+function sub8x16_dct_dc_neon, export=1
     mov             x3,  #FENC_STRIDE
     mov             x4,  #FDEC_STRIDE
     sub4x4x2_dct_dc  v0, v16, v17, v18, v19, v20, v21, v22, v23
@@ -689,7 +689,7 @@
     ret
 endfunc
 
-function x264_zigzag_interleave_8x8_cavlc_neon, export=1
+function zigzag_interleave_8x8_cavlc_neon, export=1
     mov        x3,  #7
     movi       v31.4s, #1
     ld4        {v0.8h,v1.8h,v2.8h,v3.8h}, [x1],  #64
@@ -718,7 +718,7 @@
     ret
 endfunc
 
-function x264_zigzag_scan_4x4_frame_neon, export=1
+function zigzag_scan_4x4_frame_neon, export=1
     movrel      x2, scan4x4_frame
     ld1        {v0.16b,v1.16b}, [x1]
     ld1        {v16.16b,v17.16b}, [x2]
@@ -729,7 +729,7 @@
 endfunc
 
 .macro zigzag_sub_4x4 f ac
-function x264_zigzag_sub_4x4\ac\()_\f\()_neon, export=1
+function zigzag_sub_4x4\ac\()_\f\()_neon, export=1
     mov         x9,  #FENC_STRIDE
     mov         x4,  #FDEC_STRIDE
     movrel      x5,  sub4x4_\f
@@ -772,7 +772,7 @@
 zigzag_sub_4x4 frame
 zigzag_sub_4x4 frame, ac
 
-function x264_zigzag_scan_4x4_field_neon, export=1
+function zigzag_scan_4x4_field_neon, export=1
     movrel      x2, scan4x4_field
     ld1        {v0.8h,v1.8h},   [x1]
     ld1        {v16.16b},       [x2]
@@ -781,7 +781,7 @@
     ret
 endfunc
 
-function x264_zigzag_scan_8x8_frame_neon, export=1
+function zigzag_scan_8x8_frame_neon, export=1
     movrel      x2,  scan8x8_frame
     ld1        {v0.8h,v1.8h},   [x1], #32
     ld1        {v2.8h,v3.8h},   [x1], #32
@@ -841,7 +841,7 @@
     .byte T(7,5), T(7,6), T(6,7), T(7,7)
 endconst
 
-function x264_zigzag_scan_8x8_field_neon, export=1
+function zigzag_scan_8x8_field_neon, export=1
     movrel      x2,  scan8x8_field
     ld1        {v0.8h,v1.8h},   [x1], #32
     ld1        {v2.8h,v3.8h},   [x1], #32
@@ -868,7 +868,7 @@
 endfunc
 
 .macro zigzag_sub8x8 f
-function x264_zigzag_sub_8x8_\f\()_neon, export=1
+function zigzag_sub_8x8_\f\()_neon, export=1
     movrel      x4,  sub8x8_\f
     mov         x5,  #FENC_STRIDE
     mov         x6,  #FDEC_STRIDE
diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/dct.h x264-0.158.2988+git-20191101.7817004/common/aarch64/dct.h
--- x264-0.152.2854+gite9a5903/common/aarch64/dct.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/aarch64/dct.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * dct.h: aarch64 transform and zigzag
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Janne Grunau <janne-x264@jannau.net>
@@ -27,41 +27,68 @@
 #ifndef X264_AARCH64_DCT_H
 #define X264_AARCH64_DCT_H
 
+#define x264_dct4x4dc_neon x264_template(dct4x4dc_neon)
 void x264_dct4x4dc_neon( int16_t d[16] );
+#define x264_idct4x4dc_neon x264_template(idct4x4dc_neon)
 void x264_idct4x4dc_neon( int16_t d[16] );
 
+#define x264_sub4x4_dct_neon x264_template(sub4x4_dct_neon)
 void x264_sub4x4_dct_neon( int16_t dct[16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub8x8_dct_neon x264_template(sub8x8_dct_neon)
 void x264_sub8x8_dct_neon( int16_t dct[4][16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub16x16_dct_neon x264_template(sub16x16_dct_neon)
 void x264_sub16x16_dct_neon( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 );
 
+#define x264_add4x4_idct_neon x264_template(add4x4_idct_neon)
 void x264_add4x4_idct_neon( uint8_t *p_dst, int16_t dct[16] );
+#define x264_add8x8_idct_neon x264_template(add8x8_idct_neon)
 void x264_add8x8_idct_neon( uint8_t *p_dst, int16_t dct[4][16] );
+#define x264_add16x16_idct_neon x264_template(add16x16_idct_neon)
 void x264_add16x16_idct_neon( uint8_t *p_dst, int16_t dct[16][16] );
 
+#define x264_add8x8_idct_dc_neon x264_template(add8x8_idct_dc_neon)
 void x264_add8x8_idct_dc_neon( uint8_t *p_dst, int16_t dct[4] );
+#define x264_add16x16_idct_dc_neon x264_template(add16x16_idct_dc_neon)
 void x264_add16x16_idct_dc_neon( uint8_t *p_dst, int16_t dct[16] );
+#define x264_sub8x8_dct_dc_neon x264_template(sub8x8_dct_dc_neon)
 void x264_sub8x8_dct_dc_neon( int16_t dct[4], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub8x16_dct_dc_neon x264_template(sub8x16_dct_dc_neon)
 void x264_sub8x16_dct_dc_neon( int16_t dct[8], uint8_t *pix1, uint8_t *pix2 );
 
+#define x264_sub8x8_dct8_neon x264_template(sub8x8_dct8_neon)
 void x264_sub8x8_dct8_neon( int16_t dct[64], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub16x16_dct8_neon x264_template(sub16x16_dct8_neon)
 void x264_sub16x16_dct8_neon( int16_t dct[4][64], uint8_t *pix1, uint8_t *pix2 );
 
+#define x264_add8x8_idct8_neon x264_template(add8x8_idct8_neon)
 void x264_add8x8_idct8_neon( uint8_t *p_dst, int16_t dct[64] );
+#define x264_add16x16_idct8_neon x264_template(add16x16_idct8_neon)
 void x264_add16x16_idct8_neon( uint8_t *p_dst, int16_t dct[4][64] );
 
+#define x264_zigzag_scan_4x4_frame_neon x264_template(zigzag_scan_4x4_frame_neon)
 void x264_zigzag_scan_4x4_frame_neon( int16_t level[16], int16_t dct[16] );
+#define x264_zigzag_scan_4x4_field_neon x264_template(zigzag_scan_4x4_field_neon)
 void x264_zigzag_scan_4x4_field_neon( int16_t level[16], int16_t dct[16] );
+#define x264_zigzag_scan_8x8_frame_neon x264_template(zigzag_scan_8x8_frame_neon)
 void x264_zigzag_scan_8x8_frame_neon( int16_t level[64], int16_t dct[64] );
+#define x264_zigzag_scan_8x8_field_neon x264_template(zigzag_scan_8x8_field_neon)
 void x264_zigzag_scan_8x8_field_neon( int16_t level[64], int16_t dct[64] );
 
+#define x264_zigzag_sub_4x4_field_neon x264_template(zigzag_sub_4x4_field_neon)
 int x264_zigzag_sub_4x4_field_neon( dctcoef level[16], const pixel *p_src, pixel *p_dst );
+#define x264_zigzag_sub_4x4ac_field_neon x264_template(zigzag_sub_4x4ac_field_neon)
 int x264_zigzag_sub_4x4ac_field_neon( dctcoef level[16], const pixel *p_src, pixel *p_dst, dctcoef *dc );
+#define x264_zigzag_sub_4x4_frame_neon x264_template(zigzag_sub_4x4_frame_neon)
 int x264_zigzag_sub_4x4_frame_neon( dctcoef level[16], const pixel *p_src, pixel *p_dst );
+#define x264_zigzag_sub_4x4ac_frame_neon x264_template(zigzag_sub_4x4ac_frame_neon)
 int x264_zigzag_sub_4x4ac_frame_neon( dctcoef level[16], const pixel *p_src, pixel *p_dst, dctcoef *dc );
 
+#define x264_zigzag_sub_8x8_field_neon x264_template(zigzag_sub_8x8_field_neon)
 int x264_zigzag_sub_8x8_field_neon( dctcoef level[16], const pixel *p_src, pixel *p_dst );
+#define x264_zigzag_sub_8x8_frame_neon x264_template(zigzag_sub_8x8_frame_neon)
 int x264_zigzag_sub_8x8_frame_neon( dctcoef level[16], const pixel *p_src, pixel *p_dst );
 
+#define x264_zigzag_interleave_8x8_cavlc_neon x264_template(zigzag_interleave_8x8_cavlc_neon)
 void x264_zigzag_interleave_8x8_cavlc_neon( dctcoef *dst, dctcoef *src, uint8_t *nnz );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/deblock-a.S x264-0.158.2988+git-20191101.7817004/common/aarch64/deblock-a.S
--- x264-0.152.2854+gite9a5903/common/aarch64/deblock-a.S	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/aarch64/deblock-a.S	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * deblock.S: aarch64 deblocking
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: Mans Rullgard <mans@mansr.com>
  *          Janne Grunau <janne-x264@jannau.net>
@@ -108,7 +108,7 @@
     sqxtun2         v0.16b,  v24.8h
 .endm
 
-function x264_deblock_v_luma_neon, export=1
+function deblock_v_luma_neon, export=1
     h264_loop_filter_start
 
     ld1             {v0.16b},  [x0], x1
@@ -131,7 +131,7 @@
     ret
 endfunc
 
-function x264_deblock_h_luma_neon, export=1
+function deblock_h_luma_neon, export=1
     h264_loop_filter_start
 
     sub             x0,  x0,  #4
@@ -302,7 +302,7 @@
     bit             v2.16b, v26.16b,  v18.16b  // q2'_2
 .endm
 
-function x264_deblock_v_luma_intra_neon, export=1
+function deblock_v_luma_intra_neon, export=1
     h264_loop_filter_start_intra
 
     ld1             {v0.16b},  [x0], x1 // q0
@@ -328,7 +328,7 @@
     ret
 endfunc
 
-function x264_deblock_h_luma_intra_neon, export=1
+function deblock_h_luma_intra_neon, export=1
     h264_loop_filter_start_intra
 
     sub             x0,  x0,  #4
@@ -421,7 +421,7 @@
     sqxtun2         v0.16b,  v23.8h
 .endm
 
-function x264_deblock_v_chroma_neon, export=1
+function deblock_v_chroma_neon, export=1
     h264_loop_filter_start
 
     sub             x0,  x0,  x1, lsl #1
@@ -439,7 +439,7 @@
     ret
 endfunc
 
-function x264_deblock_h_chroma_neon, export=1
+function deblock_h_chroma_neon, export=1
     h264_loop_filter_start
 
     sub             x0,  x0,  #4
@@ -472,7 +472,7 @@
     ret
 endfunc
 
-function x264_deblock_h_chroma_422_neon, export=1
+function deblock_h_chroma_422_neon, export=1
     add             x5,  x0,  x1
     sub             x0,  x0,  #4
     add             x1,  x1,  x1
@@ -516,7 +516,7 @@
     sqxtun          v17.8b,  v22.8h
 .endm
 
-function x264_deblock_h_chroma_mbaff_neon, export=1
+function deblock_h_chroma_mbaff_neon, export=1
     h264_loop_filter_start
 
     sub             x4,  x0,  #4
@@ -575,7 +575,7 @@
     bit             v17.16b, v25.16b, v26.16b
 .endm
 
-function x264_deblock_v_chroma_intra_neon, export=1
+function deblock_v_chroma_intra_neon, export=1
     h264_loop_filter_start_intra
 
     sub             x0,  x0,  x1, lsl #1
@@ -593,7 +593,7 @@
     ret
 endfunc
 
-function x264_deblock_h_chroma_intra_mbaff_neon, export=1
+function deblock_h_chroma_intra_mbaff_neon, export=1
     h264_loop_filter_start_intra
 
     sub             x4,  x0,  #4
@@ -615,7 +615,7 @@
     ret
 endfunc
 
-function x264_deblock_h_chroma_intra_neon, export=1
+function deblock_h_chroma_intra_neon, export=1
     h264_loop_filter_start_intra
 
     sub             x4,  x0,  #4
@@ -645,7 +645,7 @@
     ret
 endfunc
 
-function x264_deblock_h_chroma_422_intra_neon, export=1
+function deblock_h_chroma_422_intra_neon, export=1
     h264_loop_filter_start_intra
 
     sub             x4,  x0,  #4
@@ -697,12 +697,12 @@
     ret
 endfunc
 
-//static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE],
-//                                int8_t ref[2][X264_SCAN8_LUMA_SIZE],
-//                                int16_t mv[2][X264_SCAN8_LUMA_SIZE][2],
-//                                uint8_t bs[2][8][4], int mvy_limit,
-//                                int bframe )
-function x264_deblock_strength_neon, export=1
+// void deblock_strength( uint8_t nnz[X264_SCAN8_SIZE],
+//                        int8_t ref[2][X264_SCAN8_LUMA_SIZE],
+//                        int16_t mv[2][X264_SCAN8_LUMA_SIZE][2],
+//                        uint8_t bs[2][8][4], int mvy_limit,
+//                        int bframe )
+function deblock_strength_neon, export=1
     movi        v4.16b, #0
     lsl         w4,  w4,  #8
     add         x3,  x3,  #32
diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/deblock.h x264-0.158.2988+git-20191101.7817004/common/aarch64/deblock.h
--- x264-0.152.2854+gite9a5903/common/aarch64/deblock.h	1970-01-01 00:00:00.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/aarch64/deblock.h	2019-11-09 05:16:29.000000000 +0000
@@ -0,0 +1,58 @@
+/*****************************************************************************
+ * deblock.h: aarch64 deblocking
+ *****************************************************************************
+ * Copyright (C) 2017-2019 x264 project
+ *
+ * Authors: Anton Mitrofanov <BugMaster@narod.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#ifndef X264_AARCH64_DEBLOCK_H
+#define X264_AARCH64_DEBLOCK_H
+
+#define x264_deblock_v_luma_neon x264_template(deblock_v_luma_neon)
+void x264_deblock_v_luma_neon  ( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_luma_neon x264_template(deblock_h_luma_neon)
+void x264_deblock_h_luma_neon  ( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_v_chroma_neon x264_template(deblock_v_chroma_neon)
+void x264_deblock_v_chroma_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_chroma_neon x264_template(deblock_h_chroma_neon)
+void x264_deblock_h_chroma_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_strength_neon x264_template(deblock_strength_neon)
+void x264_deblock_strength_neon( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
+                                 int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
+                                 int mvy_limit, int bframe );
+#define x264_deblock_h_chroma_422_neon x264_template(deblock_h_chroma_422_neon)
+void x264_deblock_h_chroma_422_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_chroma_mbaff_neon x264_template(deblock_h_chroma_mbaff_neon)
+void x264_deblock_h_chroma_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_chroma_intra_mbaff_neon x264_template(deblock_h_chroma_intra_mbaff_neon)
+void x264_deblock_h_chroma_intra_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_h_chroma_intra_neon x264_template(deblock_h_chroma_intra_neon)
+void x264_deblock_h_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_h_chroma_422_intra_neon x264_template(deblock_h_chroma_422_intra_neon)
+void x264_deblock_h_chroma_422_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_v_chroma_intra_neon x264_template(deblock_v_chroma_intra_neon)
+void x264_deblock_v_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_h_luma_intra_neon x264_template(deblock_h_luma_intra_neon)
+void x264_deblock_h_luma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_v_luma_intra_neon x264_template(deblock_v_luma_intra_neon)
+void x264_deblock_v_luma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
+
+#endif
diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/mc-a.S x264-0.158.2988+git-20191101.7817004/common/aarch64/mc-a.S
--- x264-0.152.2854+gite9a5903/common/aarch64/mc-a.S	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/aarch64/mc-a.S	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.S: aarch64 motion compensation
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Janne Grunau <janne-x264@jannau.net>
@@ -31,7 +31,7 @@
 // note: prefetch stuff assumes 64-byte cacheline
 
 // void prefetch_ref( uint8_t *pix, intptr_t stride, int parity )
-function x264_prefetch_ref_aarch64, export=1
+function prefetch_ref_aarch64, export=1
     cmp         w2,  #1
     csel        x2,  xzr, x1, eq
     add         x0,  x0,  #64
@@ -54,8 +54,8 @@
 
 // void prefetch_fenc( uint8_t *pix_y,  intptr_t stride_y,
 //                     uint8_t *pix_uv, intptr_t stride_uv, int mb_x )
-.macro x264_prefetch_fenc sub
-function x264_prefetch_fenc_\sub\()_aarch64, export=1
+.macro prefetch_fenc sub
+function prefetch_fenc_\sub\()_aarch64, export=1
     and         w6,  w5,  #3
     and         w7,  w5,  #3
     mul         x6,  x6,  x1
@@ -82,14 +82,14 @@
 endfunc
 .endm
 
-x264_prefetch_fenc 420
-x264_prefetch_fenc 422
+prefetch_fenc 420
+prefetch_fenc 422
 
 // void pixel_avg( uint8_t *dst,  intptr_t dst_stride,
 //                 uint8_t *src1, intptr_t src1_stride,
 //                 uint8_t *src2, intptr_t src2_stride, int weight );
 .macro AVGH w h
-function x264_pixel_avg_\w\()x\h\()_neon, export=1
+function pixel_avg_\w\()x\h\()_neon, export=1
     mov         w10, #64
     cmp         w6,  #32
     mov         w9, #\h
@@ -292,7 +292,7 @@
     ret
 endfunc
 
-function x264_pixel_avg2_w4_neon, export=1
+function pixel_avg2_w4_neon, export=1
 1:
     subs        w5,  w5,  #2
     ld1        {v0.s}[0],  [x2], x3
@@ -307,7 +307,7 @@
     ret
 endfunc
 
-function x264_pixel_avg2_w8_neon, export=1
+function pixel_avg2_w8_neon, export=1
 1:
     subs        w5,  w5,  #2
     ld1        {v0.8b}, [x2], x3
@@ -322,7 +322,7 @@
     ret
 endfunc
 
-function x264_pixel_avg2_w16_neon, export=1
+function pixel_avg2_w16_neon, export=1
 1:
     subs        w5,  w5,  #2
     ld1        {v0.16b}, [x2], x3
@@ -337,7 +337,7 @@
     ret
 endfunc
 
-function x264_pixel_avg2_w20_neon, export=1
+function pixel_avg2_w20_neon, export=1
     sub         x1,  x1,  #16
 1:
     subs        w5,  w5,  #2
@@ -373,7 +373,7 @@
 
 // void mc_weight( uint8_t *src, intptr_t src_stride, uint8_t *dst,
 //                 intptr_t dst_stride, const x264_weight_t *weight, int h )
-function x264_mc_weight_w20_neon, export=1
+function mc_weight_w20_neon, export=1
     weight_prologue full
     sub         x1,  x1,  #16
 1:
@@ -409,7 +409,7 @@
     ret
 endfunc
 
-function x264_mc_weight_w16_neon, export=1
+function mc_weight_w16_neon, export=1
     weight_prologue full
 weight16_loop:
 1:
@@ -438,7 +438,7 @@
     ret
 endfunc
 
-function x264_mc_weight_w8_neon, export=1
+function mc_weight_w8_neon, export=1
     weight_prologue full
 1:
     subs        w9,  w9,  #2
@@ -458,7 +458,7 @@
     ret
 endfunc
 
-function x264_mc_weight_w4_neon, export=1
+function mc_weight_w4_neon, export=1
     weight_prologue full
 1:
     subs        w9,  w9,  #2
@@ -474,7 +474,7 @@
     ret
 endfunc
 
-function x264_mc_weight_w20_nodenom_neon, export=1
+function mc_weight_w20_nodenom_neon, export=1
     weight_prologue nodenom
     sub         x1,  x1,  #16
 1:
@@ -505,7 +505,7 @@
     ret
 endfunc
 
-function x264_mc_weight_w16_nodenom_neon, export=1
+function mc_weight_w16_nodenom_neon, export=1
     weight_prologue nodenom
 1:
     subs        w9,  w9,  #2
@@ -529,7 +529,7 @@
     ret
 endfunc
 
-function x264_mc_weight_w8_nodenom_neon, export=1
+function mc_weight_w8_nodenom_neon, export=1
     weight_prologue nodenom
 1:
     subs        w9,  w9,  #2
@@ -547,7 +547,7 @@
     ret
 endfunc
 
-function x264_mc_weight_w4_nodenom_neon, export=1
+function mc_weight_w4_nodenom_neon, export=1
     weight_prologue nodenom
 1:
     subs        w9,  w9,  #2
@@ -568,7 +568,7 @@
 .endm
 
 .macro weight_simple name op
-function x264_mc_weight_w20_\name\()_neon, export=1
+function mc_weight_w20_\name\()_neon, export=1
     weight_simple_prologue
 1:
     subs        w5,  w5,  #2
@@ -588,7 +588,7 @@
     ret
 endfunc
 
-function x264_mc_weight_w16_\name\()_neon, export=1
+function mc_weight_w16_\name\()_neon, export=1
     weight_simple_prologue
 1:
     subs        w5,  w5,  #2
@@ -602,7 +602,7 @@
     ret
 endfunc
 
-function x264_mc_weight_w8_\name\()_neon, export=1
+function mc_weight_w8_\name\()_neon, export=1
     weight_simple_prologue
 1:
     subs        w5,  w5,  #2
@@ -616,7 +616,7 @@
     ret
 endfunc
 
-function x264_mc_weight_w4_\name\()_neon, export=1
+function mc_weight_w4_\name\()_neon, export=1
     weight_simple_prologue
 1:
     subs        w5,  w5,  #2
@@ -635,7 +635,7 @@
 
 
 // void mc_copy( uint8_t *dst, intptr_t dst_stride, uint8_t *src, intptr_t src_stride, int height )
-function x264_mc_copy_w4_neon, export=1
+function mc_copy_w4_neon, export=1
 1:
     subs        w4,  w4,  #4
     ld1        {v0.s}[0],  [x2],  x3
@@ -650,7 +650,7 @@
     ret
 endfunc
 
-function x264_mc_copy_w8_neon, export=1
+function mc_copy_w8_neon, export=1
 1:  subs        w4,  w4,  #4
     ld1        {v0.8b},  [x2],  x3
     ld1        {v1.8b},  [x2],  x3
@@ -664,7 +664,7 @@
     ret
 endfunc
 
-function x264_mc_copy_w16_neon, export=1
+function mc_copy_w16_neon, export=1
 1:  subs        w4,  w4,  #4
     ld1        {v0.16b}, [x2],  x3
     ld1        {v1.16b}, [x2],  x3
@@ -678,11 +678,11 @@
     ret
 endfunc
 
-// void x264_mc_chroma_neon( uint8_t *dst_u, uint8_t *dst_v,
-//                           intptr_t i_dst_stride,
-//                           uint8_t *src, intptr_t i_src_stride,
-//                           int dx, int dy, int i_width, int i_height );
-function x264_mc_chroma_neon, export=1
+// void mc_chroma( uint8_t *dst_u, uint8_t *dst_v,
+//                 intptr_t i_dst_stride,
+//                 uint8_t *src, intptr_t i_src_stride,
+//                 int dx, int dy, int i_width, int i_height );
+function mc_chroma_neon, export=1
     ldr         w15, [sp]               // height
     sbfx        x12, x6,  #3,  #29      // asr(3) and sign extend
     sbfx        x11, x5,  #3,  #29      // asr(3) and sign extend
@@ -718,9 +718,9 @@
 function mc_chroma_w\width\()_neon
 // since the element size varies, there's a different index for the 2nd store
 .if \width == 4
-    .set st2, 1
+    .set idx2, 1
 .else
-    .set st2, 2
+    .set idx2, 2
 .endif
     CHROMA_MC_START
     b.eq        2f
@@ -785,10 +785,10 @@
     //pld         [x3]
     //pld         [x3, x4]
 
-    st1        {v16.\vsize}[0],   [x0], x2
-    st1        {v16.\vsize}[st2], [x1], x2
-    st1        {v17.\vsize}[0],   [x0], x2
-    st1        {v17.\vsize}[st2], [x1], x2
+    st1        {v16.\vsize}[0],    [x0], x2
+    st1        {v16.\vsize}[idx2], [x1], x2
+    st1        {v17.\vsize}[0],    [x0], x2
+    st1        {v17.\vsize}[idx2], [x1], x2
     b.gt        1b
 
     ret
@@ -820,10 +820,10 @@
     //pld         [x3]
     //pld         [x3, x4]
 
-    st1        {v16.\vsize}[0],   [x0], x2
-    st1        {v16.\vsize}[st2], [x0], x2
-    st1        {v17.\vsize}[0],   [x1], x2
-    st1        {v17.\vsize}[st2], [x1], x2
+    st1        {v16.\vsize}[0],    [x0], x2
+    st1        {v16.\vsize}[idx2], [x0], x2
+    st1        {v17.\vsize}[0],    [x1], x2
+    st1        {v17.\vsize}[idx2], [x1], x2
     b.gt        3b
 
     ret
@@ -853,10 +853,10 @@
     //pld         [x3]
     //pld         [x3, x4]
 
-    st1        {v16.\vsize}[0],   [x0], x2
-    st1        {v16.\vsize}[st2], [x0], x2
-    st1        {v17.\vsize}[0],   [x1], x2
-    st1        {v17.\vsize}[st2], [x1], x2
+    st1        {v16.\vsize}[0],    [x0], x2
+    st1        {v16.\vsize}[idx2], [x0], x2
+    st1        {v17.\vsize}[0],    [x1], x2
+    st1        {v17.\vsize}[idx2], [x1], x2
     b.gt        5b
 
     ret
@@ -1016,9 +1016,9 @@
     ret
 endfunc
 
-//void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
-//                  intptr_t stride, int width, int height, int16_t *buf )
-function x264_hpel_filter_neon, export=1
+// void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
+//                   intptr_t stride, int width, int height, int16_t *buf )
+function hpel_filter_neon, export=1
     ubfm        x9,  x3,  #0,  #3
     add         w15, w5,  w9
     sub         x13, x3,  x9            // align src
@@ -1158,7 +1158,7 @@
 // frame_init_lowres_core( uint8_t *src0, uint8_t *dst0, uint8_t *dsth,
 //                         uint8_t *dstv, uint8_t *dstc, intptr_t src_stride,
 //                         intptr_t dst_stride, int width, int height )
-function x264_frame_init_lowres_core_neon, export=1
+function frame_init_lowres_core_neon, export=1
     ldr         w8,  [sp]
     sub         x10, x6,  w7, uxtw      // dst_stride - width
     and         x10, x10, #~15
@@ -1233,12 +1233,12 @@
     ret
 endfunc
 
-function x264_load_deinterleave_chroma_fenc_neon, export=1
+function load_deinterleave_chroma_fenc_neon, export=1
     mov         x4,  #FENC_STRIDE/2
     b           load_deinterleave_chroma
 endfunc
 
-function x264_load_deinterleave_chroma_fdec_neon, export=1
+function load_deinterleave_chroma_fdec_neon, export=1
     mov         x4,  #FDEC_STRIDE/2
 load_deinterleave_chroma:
     ld2        {v0.8b,v1.8b}, [x1], x2
@@ -1253,7 +1253,7 @@
     ret
 endfunc
 
-function x264_plane_copy_core_neon, export=1
+function plane_copy_core_neon, export=1
     add         w8,  w4,  #15 // 32-bit write clears the upper 32-bit the register
     and         w4,  w8,  #~15
     // safe use of the full reg since negative width makes no sense
@@ -1282,7 +1282,7 @@
     ret
 endfunc
 
-function x264_plane_copy_swap_core_neon, export=1
+function plane_copy_swap_core_neon, export=1
     lsl         w4,  w4,  #1
     sub         x1,  x1,  x4
     sub         x3,  x3,  x4
@@ -1310,7 +1310,7 @@
     ret
 endfunc
 
-function x264_plane_copy_deinterleave_neon, export=1
+function plane_copy_deinterleave_neon, export=1
     add         w9,  w6,  #15
     and         w9,  w9,  #0xfffffff0
     sub         x1,  x1,  x9
@@ -1349,7 +1349,7 @@
     b.gt            1b
 .endm
 
-function x264_plane_copy_deinterleave_rgb_neon, export=1
+function plane_copy_deinterleave_rgb_neon, export=1
 #if SYS_MACOSX
     ldr             w8,  [sp]
     ldp             w9,  w10, [sp, #4]
@@ -1381,7 +1381,7 @@
     ret
 endfunc
 
-function x264_plane_copy_interleave_core_neon, export=1
+function plane_copy_interleave_core_neon, export=1
     add         w9,  w6,  #15
     and         w9,  w9,  #0xfffffff0
     sub         x1,  x1,  x9,  lsl #1
@@ -1404,7 +1404,7 @@
     ret
 endfunc
 
-function x264_store_interleave_chroma_neon, export=1
+function store_interleave_chroma_neon, export=1
     mov             x5,  #FDEC_STRIDE
 1:
     ld1        {v0.8b}, [x2], x5
@@ -1431,7 +1431,7 @@
     add         v0.8h,  v0.8h,  v5.8h
 .endm
 
-function x264_integral_init4h_neon, export=1
+function integral_init4h_neon, export=1
     sub         x3,  x0,  x2, lsl #1
     ld1        {v6.8b,v7.8b}, [x1], #16
 1:
@@ -1466,7 +1466,7 @@
     add         v0.8h,  v0.8h,  \s\().8h
 .endm
 
-function x264_integral_init8h_neon, export=1
+function integral_init8h_neon, export=1
     sub         x3,  x0,  x2, lsl #1
     ld1        {v16.8b,v17.8b}, [x1], #16
 1:
@@ -1483,7 +1483,7 @@
     ret
 endfunc
 
-function x264_integral_init4v_neon, export=1
+function integral_init4v_neon, export=1
     mov         x3,  x0
     add         x4,  x0,  x2,  lsl #3
     add         x8,  x0,  x2,  lsl #4
@@ -1518,7 +1518,7 @@
     ret
 endfunc
 
-function x264_integral_init8v_neon, export=1
+function integral_init8v_neon, export=1
     add         x2,  x0,  x1,  lsl #4
     sub         x1,  x1,  #8
     ands        x3,  x1,  #16 - 1
@@ -1542,7 +1542,7 @@
     ret
 endfunc
 
-function x264_mbtree_propagate_cost_neon, export=1
+function mbtree_propagate_cost_neon, export=1
     ld1r        {v5.4s},  [x5]
 8:
     subs        w6,  w6,  #8
@@ -1593,7 +1593,7 @@
     .short 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 endconst
 
-function x264_mbtree_propagate_list_internal_neon, export=1
+function mbtree_propagate_list_internal_neon, export=1
     movrel      x11,  pw_0to15
     dup         v31.8h,  w4             // bipred_weight
     movi        v30.8h,  #0xc0, lsl #8
@@ -1659,7 +1659,7 @@
     ret
 endfunc
 
-function x264_memcpy_aligned_neon, export=1
+function memcpy_aligned_neon, export=1
     tst         x2,  #16
     b.eq        32f
     sub         x2,  x2,  #16
@@ -1684,7 +1684,7 @@
     ret
 endfunc
 
-function x264_memzero_aligned_neon, export=1
+function memzero_aligned_neon, export=1
     movi        v0.16b,  #0
     movi        v1.16b,  #0
 1:
@@ -1698,7 +1698,7 @@
 endfunc
 
 // void mbtree_fix8_pack( int16_t *dst, float *src, int count )
-function x264_mbtree_fix8_pack_neon, export=1
+function mbtree_fix8_pack_neon, export=1
     subs        w3,  w2,  #8
     b.lt        2f
 1:
@@ -1726,7 +1726,7 @@
 endfunc
 
 // void mbtree_fix8_unpack( float *dst, int16_t *src, int count )
-function x264_mbtree_fix8_unpack_neon, export=1
+function mbtree_fix8_unpack_neon, export=1
     subs        w3,  w2,  #8
     b.lt        2f
 1:
diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/mc-c.c x264-0.158.2988+git-20191101.7817004/common/aarch64/mc-c.c
--- x264-0.152.2854+gite9a5903/common/aarch64/mc-c.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/aarch64/mc-c.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc-c.c: aarch64 motion compensation
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Janne Grunau <janne-x264@jannau.net>
@@ -27,54 +27,96 @@
 #include "common/common.h"
 #include "mc.h"
 
+#define x264_prefetch_ref_aarch64 x264_template(prefetch_ref_aarch64)
 void x264_prefetch_ref_aarch64( uint8_t *, intptr_t, int );
+#define x264_prefetch_fenc_420_aarch64 x264_template(prefetch_fenc_420_aarch64)
 void x264_prefetch_fenc_420_aarch64( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_prefetch_fenc_422_aarch64 x264_template(prefetch_fenc_422_aarch64)
 void x264_prefetch_fenc_422_aarch64( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
 
+#define x264_memcpy_aligned_neon x264_template(memcpy_aligned_neon)
 void *x264_memcpy_aligned_neon( void *dst, const void *src, size_t n );
+#define x264_memzero_aligned_neon x264_template(memzero_aligned_neon)
 void x264_memzero_aligned_neon( void *dst, size_t n );
 
+#define x264_pixel_avg_16x16_neon x264_template(pixel_avg_16x16_neon)
 void x264_pixel_avg_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_16x8_neon x264_template(pixel_avg_16x8_neon)
 void x264_pixel_avg_16x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_8x16_neon x264_template(pixel_avg_8x16_neon)
 void x264_pixel_avg_8x16_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_8x8_neon x264_template(pixel_avg_8x8_neon)
 void x264_pixel_avg_8x8_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_8x4_neon x264_template(pixel_avg_8x4_neon)
 void x264_pixel_avg_8x4_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_4x16_neon x264_template(pixel_avg_4x16_neon)
 void x264_pixel_avg_4x16_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_4x8_neon x264_template(pixel_avg_4x8_neon)
 void x264_pixel_avg_4x8_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_4x4_neon x264_template(pixel_avg_4x4_neon)
 void x264_pixel_avg_4x4_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_4x2_neon x264_template(pixel_avg_4x2_neon)
 void x264_pixel_avg_4x2_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
 
+#define x264_pixel_avg2_w4_neon x264_template(pixel_avg2_w4_neon)
 void x264_pixel_avg2_w4_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
+#define x264_pixel_avg2_w8_neon x264_template(pixel_avg2_w8_neon)
 void x264_pixel_avg2_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
+#define x264_pixel_avg2_w16_neon x264_template(pixel_avg2_w16_neon)
 void x264_pixel_avg2_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
+#define x264_pixel_avg2_w20_neon x264_template(pixel_avg2_w20_neon)
 void x264_pixel_avg2_w20_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
 
+#define x264_plane_copy_core_neon x264_template(plane_copy_core_neon)
 void x264_plane_copy_core_neon( pixel *dst, intptr_t i_dst,
                                 pixel *src, intptr_t i_src, int w, int h );
+#define x264_plane_copy_swap_core_neon x264_template(plane_copy_swap_core_neon)
 void x264_plane_copy_swap_core_neon( pixel *dst, intptr_t i_dst,
                                      pixel *src, intptr_t i_src, int w, int h );
+#define x264_plane_copy_deinterleave_neon x264_template(plane_copy_deinterleave_neon)
 void x264_plane_copy_deinterleave_neon(  pixel *dstu, intptr_t i_dstu,
                                          pixel *dstv, intptr_t i_dstv,
                                          pixel *src,  intptr_t i_src, int w, int h );
+#define x264_plane_copy_deinterleave_rgb_neon x264_template(plane_copy_deinterleave_rgb_neon)
 void x264_plane_copy_deinterleave_rgb_neon( pixel *dsta, intptr_t i_dsta,
                                             pixel *dstb, intptr_t i_dstb,
                                             pixel *dstc, intptr_t i_dstc,
                                             pixel *src,  intptr_t i_src, int pw, int w, int h );
+#define x264_plane_copy_interleave_core_neon x264_template(plane_copy_interleave_core_neon)
 void x264_plane_copy_interleave_core_neon( pixel *dst,  intptr_t i_dst,
                                            pixel *srcu, intptr_t i_srcu,
                                            pixel *srcv, intptr_t i_srcv, int w, int h );
 
+#define x264_store_interleave_chroma_neon x264_template(store_interleave_chroma_neon)
 void x264_store_interleave_chroma_neon( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
+#define x264_load_deinterleave_chroma_fdec_neon x264_template(load_deinterleave_chroma_fdec_neon)
 void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
+#define x264_load_deinterleave_chroma_fenc_neon x264_template(load_deinterleave_chroma_fenc_neon)
 void x264_load_deinterleave_chroma_fenc_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
 
+#define x264_mc_weight_w16_neon x264_template(mc_weight_w16_neon)
+#define x264_mc_weight_w16_nodenom_neon x264_template(mc_weight_w16_nodenom_neon)
+#define x264_mc_weight_w16_offsetadd_neon x264_template(mc_weight_w16_offsetadd_neon)
+#define x264_mc_weight_w16_offsetsub_neon x264_template(mc_weight_w16_offsetsub_neon)
+#define x264_mc_weight_w20_neon x264_template(mc_weight_w20_neon)
+#define x264_mc_weight_w20_nodenom_neon x264_template(mc_weight_w20_nodenom_neon)
+#define x264_mc_weight_w20_offsetadd_neon x264_template(mc_weight_w20_offsetadd_neon)
+#define x264_mc_weight_w20_offsetsub_neon x264_template(mc_weight_w20_offsetsub_neon)
+#define x264_mc_weight_w4_neon x264_template(mc_weight_w4_neon)
+#define x264_mc_weight_w4_nodenom_neon x264_template(mc_weight_w4_nodenom_neon)
+#define x264_mc_weight_w4_offsetadd_neon x264_template(mc_weight_w4_offsetadd_neon)
+#define x264_mc_weight_w4_offsetsub_neon x264_template(mc_weight_w4_offsetsub_neon)
+#define x264_mc_weight_w8_neon x264_template(mc_weight_w8_neon)
+#define x264_mc_weight_w8_nodenom_neon x264_template(mc_weight_w8_nodenom_neon)
+#define x264_mc_weight_w8_offsetadd_neon x264_template(mc_weight_w8_offsetadd_neon)
+#define x264_mc_weight_w8_offsetsub_neon x264_template(mc_weight_w8_offsetsub_neon)
 #define MC_WEIGHT(func)\
 void x264_mc_weight_w20##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
 void x264_mc_weight_w16##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
 void x264_mc_weight_w8##func##_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
 void x264_mc_weight_w4##func##_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
 \
-static void (* x264_mc##func##_wtab_neon[6])( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int ) =\
+static void (* mc##func##_wtab_neon[6])( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int ) =\
 {\
     x264_mc_weight_w4##func##_neon,\
     x264_mc_weight_w4##func##_neon,\
@@ -84,50 +126,64 @@
     x264_mc_weight_w20##func##_neon,\
 };
 
+#if !HIGH_BIT_DEPTH
 MC_WEIGHT()
 MC_WEIGHT(_nodenom)
 MC_WEIGHT(_offsetadd)
 MC_WEIGHT(_offsetsub)
+#endif
 
+#define x264_mc_copy_w4_neon x264_template(mc_copy_w4_neon)
 void x264_mc_copy_w4_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_mc_copy_w8_neon x264_template(mc_copy_w8_neon)
 void x264_mc_copy_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_mc_copy_w16_neon x264_template(mc_copy_w16_neon)
 void x264_mc_copy_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
 
+#define x264_mc_chroma_neon x264_template(mc_chroma_neon)
 void x264_mc_chroma_neon( uint8_t *, uint8_t *, intptr_t, uint8_t *, intptr_t, int, int, int, int );
+#define x264_integral_init4h_neon x264_template(integral_init4h_neon)
 void x264_integral_init4h_neon( uint16_t *, uint8_t *, intptr_t );
+#define x264_integral_init4v_neon x264_template(integral_init4v_neon)
 void x264_integral_init4v_neon( uint16_t *, uint16_t *, intptr_t );
+#define x264_integral_init8h_neon x264_template(integral_init8h_neon)
 void x264_integral_init8h_neon( uint16_t *, uint8_t *, intptr_t );
+#define x264_integral_init8v_neon x264_template(integral_init8v_neon)
 void x264_integral_init8v_neon( uint16_t *, intptr_t );
+#define x264_frame_init_lowres_core_neon x264_template(frame_init_lowres_core_neon)
 void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, intptr_t, intptr_t, int, int );
 
+#define x264_mbtree_propagate_cost_neon x264_template(mbtree_propagate_cost_neon)
 void x264_mbtree_propagate_cost_neon( int16_t *, uint16_t *, uint16_t *, uint16_t *, uint16_t *, float *, int );
 
+#define x264_mbtree_fix8_pack_neon x264_template(mbtree_fix8_pack_neon)
 void x264_mbtree_fix8_pack_neon( uint16_t *dst, float *src, int count );
+#define x264_mbtree_fix8_unpack_neon x264_template(mbtree_fix8_unpack_neon)
 void x264_mbtree_fix8_unpack_neon( float *dst, uint16_t *src, int count );
 
 #if !HIGH_BIT_DEPTH
-static void x264_weight_cache_neon( x264_t *h, x264_weight_t *w )
+static void weight_cache_neon( x264_t *h, x264_weight_t *w )
 {
     if( w->i_scale == 1<<w->i_denom )
     {
         if( w->i_offset < 0 )
         {
-            w->weightfn = x264_mc_offsetsub_wtab_neon;
+            w->weightfn = mc_offsetsub_wtab_neon;
             w->cachea[0] = -w->i_offset;
         }
         else
         {
-            w->weightfn = x264_mc_offsetadd_wtab_neon;
+            w->weightfn = mc_offsetadd_wtab_neon;
             w->cachea[0] = w->i_offset;
         }
     }
     else if( !w->i_denom )
-        w->weightfn = x264_mc_nodenom_wtab_neon;
+        w->weightfn = mc_nodenom_wtab_neon;
     else
-        w->weightfn = x264_mc_wtab_neon;
+        w->weightfn = mc_wtab_neon;
 }
 
-static void (* const x264_pixel_avg_wtab_neon[6])( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ) =
+static void (* const pixel_avg_wtab_neon[6])( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ) =
 {
     NULL,
     x264_pixel_avg2_w4_neon,
@@ -137,7 +193,7 @@
     x264_pixel_avg2_w20_neon,
 };
 
-static void (* const x264_mc_copy_wtab_neon[5])( uint8_t *, intptr_t, uint8_t *, intptr_t, int ) =
+static void (* const mc_copy_wtab_neon[5])( uint8_t *, intptr_t, uint8_t *, intptr_t, int ) =
 {
     NULL,
     x264_mc_copy_w4_neon,
@@ -160,7 +216,7 @@
     if( qpel_idx & 5 ) /* qpel interpolation needed */
     {
         uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
-        x264_pixel_avg_wtab_neon[i_width>>2](
+        pixel_avg_wtab_neon[i_width>>2](
                 dst, i_dst_stride, src1, i_src_stride,
                 src2, i_height );
         if( weight->weightfn )
@@ -169,7 +225,7 @@
     else if( weight->weightfn )
         weight->weightfn[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, weight, i_height );
     else
-        x264_mc_copy_wtab_neon[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, i_height );
+        mc_copy_wtab_neon[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, i_height );
 }
 
 static uint8_t *get_ref_neon( uint8_t *dst,   intptr_t *i_dst_stride,
@@ -186,7 +242,7 @@
     if( qpel_idx & 5 ) /* qpel interpolation needed */
     {
         uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
-        x264_pixel_avg_wtab_neon[i_width>>2](
+        pixel_avg_wtab_neon[i_width>>2](
                 dst, *i_dst_stride, src1, i_src_stride,
                 src2, i_height );
         if( weight->weightfn )
@@ -205,6 +261,7 @@
     }
 }
 
+#define x264_hpel_filter_neon x264_template(hpel_filter_neon)
 void x264_hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,
                             uint8_t *src, intptr_t stride, int width,
                             int height, int16_t *buf );
@@ -212,9 +269,8 @@
 PLANE_COPY(16, neon)
 PLANE_COPY_SWAP(16, neon)
 PLANE_INTERLEAVE(neon)
-#endif // !HIGH_BIT_DEPTH
-
 PROPAGATE_LIST(neon)
+#endif // !HIGH_BIT_DEPTH
 
 void x264_mc_init_aarch64( int cpu, x264_mc_functions_t *pf )
 {
@@ -234,11 +290,11 @@
     pf->copy[PIXEL_8x8]      = x264_mc_copy_w8_neon;
     pf->copy[PIXEL_4x4]      = x264_mc_copy_w4_neon;
 
-    pf->plane_copy                  = x264_plane_copy_neon;
-    pf->plane_copy_swap             = x264_plane_copy_swap_neon;
+    pf->plane_copy                  = plane_copy_neon;
+    pf->plane_copy_swap             = plane_copy_swap_neon;
     pf->plane_copy_deinterleave     = x264_plane_copy_deinterleave_neon;
     pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon;
-    pf->plane_copy_interleave       = x264_plane_copy_interleave_neon;
+    pf->plane_copy_interleave       = plane_copy_interleave_neon;
 
     pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon;
     pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_neon;
@@ -254,10 +310,10 @@
     pf->avg[PIXEL_4x4]   = x264_pixel_avg_4x4_neon;
     pf->avg[PIXEL_4x2]   = x264_pixel_avg_4x2_neon;
 
-    pf->weight       = x264_mc_wtab_neon;
-    pf->offsetadd    = x264_mc_offsetadd_wtab_neon;
-    pf->offsetsub    = x264_mc_offsetsub_wtab_neon;
-    pf->weight_cache = x264_weight_cache_neon;
+    pf->weight       = mc_wtab_neon;
+    pf->offsetadd    = mc_offsetadd_wtab_neon;
+    pf->offsetsub    = mc_offsetsub_wtab_neon;
+    pf->weight_cache = weight_cache_neon;
 
     pf->mc_chroma = x264_mc_chroma_neon;
     pf->mc_luma = mc_luma_neon;
@@ -271,7 +327,7 @@
     pf->integral_init8v = x264_integral_init8v_neon;
 
     pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_neon;
-    pf->mbtree_propagate_list = x264_mbtree_propagate_list_neon;
+    pf->mbtree_propagate_list = mbtree_propagate_list_neon;
     pf->mbtree_fix8_pack      = x264_mbtree_fix8_pack_neon;
     pf->mbtree_fix8_unpack    = x264_mbtree_fix8_unpack_neon;
 
diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/mc.h x264-0.158.2988+git-20191101.7817004/common/aarch64/mc.h
--- x264-0.152.2854+gite9a5903/common/aarch64/mc.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/aarch64/mc.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.h: aarch64 motion compensation
  *****************************************************************************
- * Copyright (C) 2014-2017 x264 project
+ * Copyright (C) 2014-2019 x264 project
  *
  * Authors: Janne Grunau <janne-x264@jannau.net>
  *
@@ -26,6 +26,7 @@
 #ifndef X264_AARCH64_MC_H
 #define X264_AARCH64_MC_H
 
+#define x264_mc_init_aarch64 x264_template(mc_init_aarch64)
 void x264_mc_init_aarch64( int cpu, x264_mc_functions_t *pf );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/pixel-a.S x264-0.158.2988+git-20191101.7817004/common/aarch64/pixel-a.S
--- x264-0.152.2854+gite9a5903/common/aarch64/pixel-a.S	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/aarch64/pixel-a.S	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.S: aarch64 pixel metrics
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Janne Grunau <janne-x264@jannau.net>
@@ -97,7 +97,7 @@
 .endm
 
 .macro SAD_FUNC w, h, name
-function x264_pixel_sad\name\()_\w\()x\h\()_neon, export=1
+function pixel_sad\name\()_\w\()x\h\()_neon, export=1
     SAD_START_\w
 
 .rept \h / 2 - 1
@@ -195,7 +195,7 @@
 .endm
 
 .macro SAD_X_FUNC x, w, h
-function x264_pixel_sad_x\x\()_\w\()x\h\()_neon, export=1
+function pixel_sad_x\x\()_\w\()x\h\()_neon, export=1
 .if \x == 3
     mov         x6,  x5
     mov         x5,  x4
@@ -249,7 +249,7 @@
 SAD_X_FUNC  4, 16, 16
 
 
-function x264_pixel_vsad_neon, export=1
+function pixel_vsad_neon, export=1
     subs        w2,  w2,  #2
     ld1        {v0.16b},  [x0],  x1
     ld1        {v1.16b},  [x0],  x1
@@ -273,7 +273,7 @@
     ret
 endfunc
 
-function x264_pixel_asd8_neon, export=1
+function pixel_asd8_neon, export=1
     sub         w4,  w4,  #2
     ld1        {v0.8b}, [x0], x1
     ld1        {v1.8b}, [x2], x3
@@ -379,7 +379,7 @@
 .endm
 
 .macro SSD_FUNC w h
-function x264_pixel_ssd_\w\()x\h\()_neon, export=1
+function pixel_ssd_\w\()x\h\()_neon, export=1
     SSD_START_\w
 .rept \h-2
     SSD_\w
@@ -402,7 +402,7 @@
 SSD_FUNC  16, 16
 
 
-function x264_pixel_ssd_nv12_core_neon, export=1
+function pixel_ssd_nv12_core_neon, export=1
     sxtw        x8,  w4
     add         x8,  x8,  #8
     and         x8,  x8,  #~15
@@ -473,7 +473,7 @@
 endfunc
 
 .macro pixel_var_8 h
-function x264_pixel_var_8x\h\()_neon, export=1
+function pixel_var_8x\h\()_neon, export=1
     ld1            {v16.8b}, [x0], x1
     ld1            {v17.8b}, [x0], x1
     mov             x2,  \h - 4
@@ -512,14 +512,14 @@
     uadalp          v1.4s,  v28.8h
     uadalp          v2.4s,  v29.8h
 
-    b               x264_var_end
+    b               var_end
 endfunc
 .endm
 
 pixel_var_8  8
 pixel_var_8 16
 
-function x264_pixel_var_16x16_neon, export=1
+function pixel_var_16x16_neon, export=1
     ld1            {v16.16b}, [x0],  x1
     ld1            {v17.16b}, [x0],  x1
     mov             x2,  #14
@@ -556,7 +556,7 @@
     uadalp          v2.4s,  v4.8h
 endfunc
 
-function x264_var_end
+function var_end
     add             v1.4s,  v1.4s,  v2.4s
     uaddlv          s0,  v0.8h
     uaddlv          d1,  v1.4s
@@ -568,7 +568,7 @@
 
 
 .macro pixel_var2_8 h
-function x264_pixel_var2_8x\h\()_neon, export=1
+function pixel_var2_8x\h\()_neon, export=1
     mov             x3,  #16
     ld1            {v16.8b}, [x0], #8
     ld1            {v18.8b}, [x1], x3
@@ -637,7 +637,7 @@
 pixel_var2_8 16
 
 
-function x264_pixel_satd_4x4_neon, export=1
+function pixel_satd_4x4_neon, export=1
     ld1        {v1.s}[0],  [x2], x3
     ld1        {v0.s}[0],  [x0], x1
     ld1        {v3.s}[0],  [x2], x3
@@ -670,7 +670,7 @@
     ret
 endfunc
 
-function x264_pixel_satd_4x8_neon, export=1
+function pixel_satd_4x8_neon, export=1
     ld1        {v1.s}[0],  [x2], x3
     ld1        {v0.s}[0],  [x0], x1
     ld1        {v3.s}[0],  [x2], x3
@@ -687,10 +687,10 @@
     ld1        {v4.s}[1],  [x0], x1
     ld1        {v7.s}[1],  [x2], x3
     ld1        {v6.s}[1],  [x0], x1
-    b           x264_satd_4x8_8x4_end_neon
+    b           satd_4x8_8x4_end_neon
 endfunc
 
-function x264_pixel_satd_8x4_neon, export=1
+function pixel_satd_8x4_neon, export=1
     ld1        {v1.8b},  [x2], x3
     ld1        {v0.8b},  [x0], x1
     ld1        {v3.8b},  [x2], x3
@@ -701,7 +701,7 @@
     ld1        {v6.8b},  [x0], x1
 endfunc
 
-function x264_satd_4x8_8x4_end_neon
+function satd_4x8_8x4_end_neon
     usubl       v0.8h,  v0.8b,  v1.8b
     usubl       v1.8h,  v2.8b,  v3.8b
     usubl       v2.8h,  v4.8b,  v5.8b
@@ -737,10 +737,10 @@
     ret
 endfunc
 
-function x264_pixel_satd_8x8_neon, export=1
+function pixel_satd_8x8_neon, export=1
     mov         x4,  x30
 
-    bl x264_satd_8x8_neon
+    bl satd_8x8_neon
     add         v0.8h,  v0.8h,  v1.8h
     add         v1.8h,  v2.8h,  v3.8h
     add         v0.8h,  v0.8h,  v1.8h
@@ -749,15 +749,15 @@
     ret         x4
 endfunc
 
-function x264_pixel_satd_8x16_neon, export=1
+function pixel_satd_8x16_neon, export=1
     mov         x4,  x30
 
-    bl x264_satd_8x8_neon
+    bl satd_8x8_neon
     add         v0.8h,  v0.8h,  v1.8h
     add         v1.8h,  v2.8h,  v3.8h
     add         v30.8h, v0.8h,  v1.8h
 
-    bl x264_satd_8x8_neon
+    bl satd_8x8_neon
     add         v0.8h,  v0.8h,  v1.8h
     add         v1.8h,  v2.8h,  v3.8h
     add         v31.8h, v0.8h,  v1.8h
@@ -813,12 +813,12 @@
     SUMSUB_ABCD \r1, \r3, \r2, \r4, \t1, \t3, \t2, \t4
 .endm
 
-function x264_satd_8x8_neon
+function satd_8x8_neon
     load_diff_fly_8x8
 endfunc
 
 // one vertical hadamard pass and two horizontal
-function x264_satd_8x4v_8x8h_neon
+function satd_8x4v_8x8h_neon
     SUMSUB_AB   v16.8h, v18.8h, v0.8h,  v2.8h
     SUMSUB_AB   v17.8h, v19.8h, v1.8h,  v3.8h
 
@@ -856,14 +856,14 @@
     ret
 endfunc
 
-function x264_pixel_satd_16x8_neon, export=1
+function pixel_satd_16x8_neon, export=1
     mov         x4,  x30
 
-    bl          x264_satd_16x4_neon
+    bl          satd_16x4_neon
     add         v30.8h, v0.8h,  v1.8h
     add         v31.8h, v2.8h,  v3.8h
 
-    bl          x264_satd_16x4_neon
+    bl          satd_16x4_neon
     add         v0.8h,  v0.8h,  v1.8h
     add         v1.8h,  v2.8h,  v3.8h
     add         v30.8h, v30.8h, v0.8h
@@ -875,26 +875,26 @@
     ret         x4
 endfunc
 
-function x264_pixel_satd_16x16_neon, export=1
+function pixel_satd_16x16_neon, export=1
     mov         x4,  x30
 
-    bl          x264_satd_16x4_neon
+    bl          satd_16x4_neon
     add         v30.8h, v0.8h,  v1.8h
     add         v31.8h, v2.8h,  v3.8h
 
-    bl          x264_satd_16x4_neon
+    bl          satd_16x4_neon
     add         v0.8h,  v0.8h,  v1.8h
     add         v1.8h,  v2.8h,  v3.8h
     add         v30.8h, v30.8h, v0.8h
     add         v31.8h, v31.8h, v1.8h
 
-    bl          x264_satd_16x4_neon
+    bl          satd_16x4_neon
     add         v0.8h,  v0.8h,  v1.8h
     add         v1.8h,  v2.8h,  v3.8h
     add         v30.8h, v30.8h, v0.8h
     add         v31.8h, v31.8h, v1.8h
 
-    bl          x264_satd_16x4_neon
+    bl          satd_16x4_neon
     add         v0.8h,  v0.8h,  v1.8h
     add         v1.8h,  v2.8h,  v3.8h
     add         v30.8h, v30.8h, v0.8h
@@ -906,7 +906,7 @@
     ret         x4
 endfunc
 
-function x264_satd_16x4_neon
+function satd_16x4_neon
     ld1        {v1.16b},  [x2], x3
     ld1        {v0.16b},  [x0], x1
     ld1        {v3.16b},  [x2], x3
@@ -928,10 +928,10 @@
     SUMSUB_AB   v0.8h,  v1.8h,  v16.8h, v17.8h
     SUMSUB_AB   v2.8h,  v3.8h,  v18.8h, v19.8h
 
-    b           x264_satd_8x4v_8x8h_neon
+    b           satd_8x4v_8x8h_neon
 endfunc
 
-function x264_pixel_satd_4x16_neon, export=1
+function pixel_satd_4x16_neon, export=1
     mov         x4,  x30
     ld1        {v1.s}[0],  [x2], x3
     ld1        {v0.s}[0],  [x0], x1
@@ -977,7 +977,7 @@
     SUMSUB_AB   v0.8h,  v1.8h,  v16.8h, v17.8h
     SUMSUB_AB   v2.8h,  v3.8h,  v18.8h, v19.8h
 
-    bl          x264_satd_8x4v_8x8h_neon
+    bl          satd_8x4v_8x8h_neon
 
     add         v30.8h, v0.8h,  v1.8h
     add         v31.8h, v2.8h,  v3.8h
@@ -987,7 +987,7 @@
     ret         x4
 endfunc
 
-function x264_pixel_sa8d_8x8_neon, export=1
+function pixel_sa8d_8x8_neon, export=1
     mov         x4,  x30
     bl          pixel_sa8d_8x8_neon
     add         v0.8h,  v0.8h,  v1.8h
@@ -998,7 +998,7 @@
     ret         x4
 endfunc
 
-function x264_pixel_sa8d_16x16_neon, export=1
+function pixel_sa8d_16x16_neon, export=1
     mov         x4,  x30
     bl          pixel_sa8d_8x8_neon
     uaddlp      v30.4s, v0.8h
@@ -1120,7 +1120,7 @@
 sa8d_satd_8x8
 sa8d_satd_8x8 satd_
 
-function x264_pixel_sa8d_satd_16x16_neon, export=1
+function pixel_sa8d_satd_16x16_neon, export=1
     mov         x4,  x30
     bl          pixel_sa8d_satd_8x8_neon
     uaddlp      v30.4s, v0.8h
@@ -1158,25 +1158,25 @@
 endfunc
 
 .macro HADAMARD_AC w h
-function x264_pixel_hadamard_ac_\w\()x\h\()_neon, export=1
+function pixel_hadamard_ac_\w\()x\h\()_neon, export=1
     movrel      x5, mask_ac_4_8
     mov         x4,  x30
     ld1         {v30.8h,v31.8h}, [x5]
     movi        v28.16b, #0
     movi        v29.16b, #0
 
-    bl          x264_hadamard_ac_8x8_neon
+    bl          hadamard_ac_8x8_neon
 .if \h > 8
-    bl          x264_hadamard_ac_8x8_neon
+    bl          hadamard_ac_8x8_neon
 .endif
 .if \w > 8
     sub         x0,  x0,  x1,  lsl #3
     add         x0,  x0,  #8
-    bl          x264_hadamard_ac_8x8_neon
+    bl          hadamard_ac_8x8_neon
 .endif
 .if \w * \h == 256
     sub         x0,  x0,  x1,  lsl #4
-    bl          x264_hadamard_ac_8x8_neon
+    bl          hadamard_ac_8x8_neon
 .endif
 
     addv        s1,  v29.4s
@@ -1196,7 +1196,7 @@
 HADAMARD_AC 16, 16
 
 // v28: satd  v29: sa8d  v30: mask_ac4  v31: mask_ac8
-function x264_hadamard_ac_8x8_neon
+function hadamard_ac_8x8_neon
     ld1         {v16.8b}, [x0], x1
     ld1         {v17.8b}, [x0], x1
     ld1         {v18.8b}, [x0], x1
@@ -1288,7 +1288,7 @@
 endfunc
 
 
-function x264_pixel_ssim_4x4x2_core_neon, export=1
+function pixel_ssim_4x4x2_core_neon, export=1
     ld1        {v0.8b},  [x0], x1
     ld1        {v2.8b},  [x2], x3
     umull       v16.8h, v0.8b,  v0.8b
@@ -1347,7 +1347,7 @@
     ret
 endfunc
 
-function x264_pixel_ssim_end4_neon, export=1
+function pixel_ssim_end4_neon, export=1
     mov         x5,  #4
     ld1        {v16.4s,v17.4s}, [x0], #32
     ld1        {v18.4s,v19.4s}, [x1], #32
diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/pixel.h x264-0.158.2988+git-20191101.7817004/common/aarch64/pixel.h
--- x264-0.152.2854+gite9a5903/common/aarch64/pixel.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/aarch64/pixel.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.h: aarch64 pixel metrics
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Janne Grunau <janne-x264@jannau.net>
@@ -27,6 +27,44 @@
 #ifndef X264_AARCH64_PIXEL_H
 #define X264_AARCH64_PIXEL_H
 
+#define x264_pixel_sad_16x16_neon x264_template(pixel_sad_16x16_neon)
+#define x264_pixel_sad_16x8_neon x264_template(pixel_sad_16x8_neon)
+#define x264_pixel_sad_4x16_neon x264_template(pixel_sad_4x16_neon)
+#define x264_pixel_sad_4x4_neon x264_template(pixel_sad_4x4_neon)
+#define x264_pixel_sad_4x8_neon x264_template(pixel_sad_4x8_neon)
+#define x264_pixel_sad_8x16_neon x264_template(pixel_sad_8x16_neon)
+#define x264_pixel_sad_8x4_neon x264_template(pixel_sad_8x4_neon)
+#define x264_pixel_sad_8x8_neon x264_template(pixel_sad_8x8_neon)
+#define x264_pixel_sad_x3_16x16_neon x264_template(pixel_sad_x3_16x16_neon)
+#define x264_pixel_sad_x3_16x8_neon x264_template(pixel_sad_x3_16x8_neon)
+#define x264_pixel_sad_x3_4x4_neon x264_template(pixel_sad_x3_4x4_neon)
+#define x264_pixel_sad_x3_4x8_neon x264_template(pixel_sad_x3_4x8_neon)
+#define x264_pixel_sad_x3_8x16_neon x264_template(pixel_sad_x3_8x16_neon)
+#define x264_pixel_sad_x3_8x4_neon x264_template(pixel_sad_x3_8x4_neon)
+#define x264_pixel_sad_x3_8x8_neon x264_template(pixel_sad_x3_8x8_neon)
+#define x264_pixel_sad_x4_16x16_neon x264_template(pixel_sad_x4_16x16_neon)
+#define x264_pixel_sad_x4_16x8_neon x264_template(pixel_sad_x4_16x8_neon)
+#define x264_pixel_sad_x4_4x4_neon x264_template(pixel_sad_x4_4x4_neon)
+#define x264_pixel_sad_x4_4x8_neon x264_template(pixel_sad_x4_4x8_neon)
+#define x264_pixel_sad_x4_8x16_neon x264_template(pixel_sad_x4_8x16_neon)
+#define x264_pixel_sad_x4_8x4_neon x264_template(pixel_sad_x4_8x4_neon)
+#define x264_pixel_sad_x4_8x8_neon x264_template(pixel_sad_x4_8x8_neon)
+#define x264_pixel_satd_16x16_neon x264_template(pixel_satd_16x16_neon)
+#define x264_pixel_satd_16x8_neon x264_template(pixel_satd_16x8_neon)
+#define x264_pixel_satd_4x16_neon x264_template(pixel_satd_4x16_neon)
+#define x264_pixel_satd_4x4_neon x264_template(pixel_satd_4x4_neon)
+#define x264_pixel_satd_4x8_neon x264_template(pixel_satd_4x8_neon)
+#define x264_pixel_satd_8x16_neon x264_template(pixel_satd_8x16_neon)
+#define x264_pixel_satd_8x4_neon x264_template(pixel_satd_8x4_neon)
+#define x264_pixel_satd_8x8_neon x264_template(pixel_satd_8x8_neon)
+#define x264_pixel_ssd_16x16_neon x264_template(pixel_ssd_16x16_neon)
+#define x264_pixel_ssd_16x8_neon x264_template(pixel_ssd_16x8_neon)
+#define x264_pixel_ssd_4x16_neon x264_template(pixel_ssd_4x16_neon)
+#define x264_pixel_ssd_4x4_neon x264_template(pixel_ssd_4x4_neon)
+#define x264_pixel_ssd_4x8_neon x264_template(pixel_ssd_4x8_neon)
+#define x264_pixel_ssd_8x16_neon x264_template(pixel_ssd_8x16_neon)
+#define x264_pixel_ssd_8x4_neon x264_template(pixel_ssd_8x4_neon)
+#define x264_pixel_ssd_8x8_neon x264_template(pixel_ssd_8x8_neon)
 #define DECL_PIXELS( ret, name, suffix, args ) \
     ret x264_pixel_##name##_16x16_##suffix args;\
     ret x264_pixel_##name##_16x8_##suffix args;\
@@ -50,30 +88,47 @@
 DECL_X1( ssd, neon )
 
 
+#define x264_pixel_ssd_nv12_core_neon x264_template(pixel_ssd_nv12_core_neon)
 void x264_pixel_ssd_nv12_core_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int, int, uint64_t *, uint64_t * );
 
+#define x264_pixel_vsad_neon x264_template(pixel_vsad_neon)
 int x264_pixel_vsad_neon( uint8_t *, intptr_t, int );
 
+#define x264_pixel_sa8d_8x8_neon x264_template(pixel_sa8d_8x8_neon)
 int x264_pixel_sa8d_8x8_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t );
+#define x264_pixel_sa8d_16x16_neon x264_template(pixel_sa8d_16x16_neon)
 int x264_pixel_sa8d_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t );
+#define x264_pixel_sa8d_satd_16x16_neon x264_template(pixel_sa8d_satd_16x16_neon)
 uint64_t x264_pixel_sa8d_satd_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t );
 
+#define x264_pixel_var_8x8_neon x264_template(pixel_var_8x8_neon)
 uint64_t x264_pixel_var_8x8_neon  ( uint8_t *, intptr_t );
+#define x264_pixel_var_8x16_neon x264_template(pixel_var_8x16_neon)
 uint64_t x264_pixel_var_8x16_neon ( uint8_t *, intptr_t );
+#define x264_pixel_var_16x16_neon x264_template(pixel_var_16x16_neon)
 uint64_t x264_pixel_var_16x16_neon( uint8_t *, intptr_t );
+#define x264_pixel_var2_8x8_neon x264_template(pixel_var2_8x8_neon)
 int x264_pixel_var2_8x8_neon ( uint8_t *, uint8_t *, int * );
+#define x264_pixel_var2_8x16_neon x264_template(pixel_var2_8x16_neon)
 int x264_pixel_var2_8x16_neon( uint8_t *, uint8_t *, int * );
 
+#define x264_pixel_hadamard_ac_8x8_neon x264_template(pixel_hadamard_ac_8x8_neon)
 uint64_t x264_pixel_hadamard_ac_8x8_neon  ( uint8_t *, intptr_t );
+#define x264_pixel_hadamard_ac_8x16_neon x264_template(pixel_hadamard_ac_8x16_neon)
 uint64_t x264_pixel_hadamard_ac_8x16_neon ( uint8_t *, intptr_t );
+#define x264_pixel_hadamard_ac_16x8_neon x264_template(pixel_hadamard_ac_16x8_neon)
 uint64_t x264_pixel_hadamard_ac_16x8_neon ( uint8_t *, intptr_t );
+#define x264_pixel_hadamard_ac_16x16_neon x264_template(pixel_hadamard_ac_16x16_neon)
 uint64_t x264_pixel_hadamard_ac_16x16_neon( uint8_t *, intptr_t );
 
+#define x264_pixel_ssim_4x4x2_core_neon x264_template(pixel_ssim_4x4x2_core_neon)
 void x264_pixel_ssim_4x4x2_core_neon( const uint8_t *, intptr_t,
                                       const uint8_t *, intptr_t,
                                       int sums[2][4] );
+#define x264_pixel_ssim_end4_neon x264_template(pixel_ssim_end4_neon)
 float x264_pixel_ssim_end4_neon( int sum0[5][4], int sum1[5][4], int width );
 
+#define x264_pixel_asd8_neon x264_template(pixel_asd8_neon)
 int x264_pixel_asd8_neon( uint8_t *, intptr_t,  uint8_t *, intptr_t, int );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/predict-a.S x264-0.158.2988+git-20191101.7817004/common/aarch64/predict-a.S
--- x264-0.152.2854+gite9a5903/common/aarch64/predict-a.S	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/aarch64/predict-a.S	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.S: aarch64 intra prediction
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Mans Rullgard <mans@mansr.com>
@@ -62,8 +62,8 @@
 .endm
 
 
-function x264_predict_4x4_h_aarch64, export=1
-    ldrb    w1,  [x0, #0*FDEC_STRIDE-1]
+function predict_4x4_h_aarch64, export=1
+    ldurb   w1,  [x0, #0*FDEC_STRIDE-1]
     mov     w5,  #0x01010101
     ldrb    w2,  [x0, #1*FDEC_STRIDE-1]
     ldrb    w3,  [x0, #2*FDEC_STRIDE-1]
@@ -79,8 +79,8 @@
     ret
 endfunc
 
-function x264_predict_4x4_v_aarch64, export=1
-    ldr     w1,  [x0, #0 - 1 * FDEC_STRIDE]
+function predict_4x4_v_aarch64, export=1
+    ldur    w1,  [x0, #0 - 1 * FDEC_STRIDE]
     str     w1,  [x0, #0 + 0 * FDEC_STRIDE]
     str     w1,  [x0, #0 + 1 * FDEC_STRIDE]
     str     w1,  [x0, #0 + 2 * FDEC_STRIDE]
@@ -88,9 +88,9 @@
     ret
 endfunc
 
-function x264_predict_4x4_dc_neon, export=1
+function predict_4x4_dc_neon, export=1
     sub         x1,  x0,  #FDEC_STRIDE
-    ldrb        w4,  [x0, #-1 + 0 * FDEC_STRIDE]
+    ldurb       w4,  [x0, #-1 + 0 * FDEC_STRIDE]
     ldrb        w5,  [x0, #-1 + 1 * FDEC_STRIDE]
     ldrb        w6,  [x0, #-1 + 2 * FDEC_STRIDE]
     ldrb        w7,  [x0, #-1 + 3 * FDEC_STRIDE]
@@ -110,7 +110,7 @@
     ret
 endfunc
 
-function x264_predict_4x4_dc_top_neon, export=1
+function predict_4x4_dc_top_neon, export=1
     sub         x1,  x0,  #FDEC_STRIDE
     ldr         s0, [x1]
     uaddlv      h0,  v0.8b
@@ -124,7 +124,7 @@
     ret
 endfunc
 
-function x264_predict_4x4_ddr_neon, export=1
+function predict_4x4_ddr_neon, export=1
     sub         x1,  x0,  #FDEC_STRIDE+1
     mov         x7,  #FDEC_STRIDE
     ld1        {v0.8b}, [x1], x7            // # -FDEC_STRIDE-1
@@ -152,7 +152,7 @@
     ret
 endfunc
 
-function x264_predict_4x4_ddl_neon, export=1
+function predict_4x4_ddl_neon, export=1
     sub         x0,  x0,  #FDEC_STRIDE
     mov         x7,  #FDEC_STRIDE
     ld1        {v0.8b}, [x0],  x7
@@ -171,7 +171,7 @@
     ret
 endfunc
 
-function x264_predict_8x8_dc_neon, export=1
+function predict_8x8_dc_neon, export=1
     mov         x7,  #FDEC_STRIDE
     ld1        {v0.16b}, [x1], #16
     ld1        {v1.8b},  [x1]
@@ -187,7 +187,7 @@
     ret
 endfunc
 
-function x264_predict_8x8_h_neon, export=1
+function predict_8x8_h_neon, export=1
     mov         x7,  #FDEC_STRIDE
     ld1        {v16.16b}, [x1]
     dup         v0.8b, v16.b[14]
@@ -209,7 +209,7 @@
     ret
 endfunc
 
-function x264_predict_8x8_v_neon, export=1
+function predict_8x8_v_neon, export=1
     add         x1,  x1,  #16
     mov         x7,  #FDEC_STRIDE
     ld1        {v0.8b}, [x1]
@@ -219,7 +219,7 @@
     ret
 endfunc
 
-function x264_predict_8x8_ddl_neon, export=1
+function predict_8x8_ddl_neon, export=1
     add         x1,  x1,  #16
     mov         x7,  #FDEC_STRIDE
     ld1        {v0.16b}, [x1]
@@ -248,7 +248,7 @@
     ret
 endfunc
 
-function x264_predict_8x8_ddr_neon, export=1
+function predict_8x8_ddr_neon, export=1
     ld1        {v0.16b,v1.16b}, [x1]
     ext         v2.16b, v0.16b, v1.16b, #7
     ext         v4.16b, v0.16b, v1.16b, #9
@@ -278,7 +278,7 @@
     ret
 endfunc
 
-function x264_predict_8x8_vl_neon, export=1
+function predict_8x8_vl_neon, export=1
     add         x1,  x1,  #16
     mov         x7, #FDEC_STRIDE
 
@@ -309,7 +309,7 @@
     ret
 endfunc
 
-function x264_predict_8x8_vr_neon, export=1
+function predict_8x8_vr_neon, export=1
     add         x1,  x1,  #8
     mov         x7,  #FDEC_STRIDE
     ld1        {v2.16b}, [x1]
@@ -343,7 +343,7 @@
     ret
 endfunc
 
-function x264_predict_8x8_hd_neon, export=1
+function predict_8x8_hd_neon, export=1
     add         x1,  x1,  #7
     mov         x7, #FDEC_STRIDE
 
@@ -378,7 +378,7 @@
     ret
 endfunc
 
-function x264_predict_8x8_hu_neon, export=1
+function predict_8x8_hu_neon, export=1
     add         x1,  x1,  #7
     mov         x7,  #FDEC_STRIDE
     ld1        {v7.8b}, [x1]
@@ -416,7 +416,7 @@
 endfunc
 
 
-function x264_predict_8x8c_dc_top_neon, export=1
+function predict_8x8c_dc_top_neon, export=1
     sub         x2,  x0,  #FDEC_STRIDE
     mov         x1,  #FDEC_STRIDE
     ld1        {v0.8b},  [x2]
@@ -429,8 +429,8 @@
     b           pred8x8c_dc_end
 endfunc
 
-function x264_predict_8x8c_dc_left_neon, export=1
-    ldrb        w2,  [x0, #0 * FDEC_STRIDE - 1]
+function predict_8x8c_dc_left_neon, export=1
+    ldurb       w2,  [x0, #0 * FDEC_STRIDE - 1]
     ldrb        w3,  [x0, #1 * FDEC_STRIDE - 1]
     ldrb        w4,  [x0, #2 * FDEC_STRIDE - 1]
     ldrb        w5,  [x0, #3 * FDEC_STRIDE - 1]
@@ -452,10 +452,10 @@
     b           pred8x8c_dc_end
 endfunc
 
-function x264_predict_8x8c_dc_neon, export=1
+function predict_8x8c_dc_neon, export=1
     mov         x1,  #FDEC_STRIDE
     sub         x2,  x0,  #FDEC_STRIDE
-    ldrb        w10, [x0, #0 * FDEC_STRIDE - 1]
+    ldurb       w10, [x0, #0 * FDEC_STRIDE - 1]
     ldrb        w11, [x0, #1 * FDEC_STRIDE - 1]
     ldrb        w12, [x0, #2 * FDEC_STRIDE - 1]
     ldrb        w13, [x0, #3 * FDEC_STRIDE - 1]
@@ -498,7 +498,7 @@
     ret
 endfunc
 
-function x264_predict_8x8c_h_neon, export=1
+function predict_8x8c_h_neon, export=1
     sub         x1,  x0,  #1
     mov         x7,  #FDEC_STRIDE
 .rept 4
@@ -510,15 +510,15 @@
     ret
 endfunc
 
-function x264_predict_8x8c_v_aarch64, export=1
-    ldr         x1,  [x0, #-FDEC_STRIDE]
+function predict_8x8c_v_aarch64, export=1
+    ldur        x1,  [x0, #-FDEC_STRIDE]
 .irp c, 0,1,2,3,4,5,6,7
     str         x1,  [x0, #\c * FDEC_STRIDE]
 .endr
     ret
 endfunc
 
-function x264_predict_8x8c_p_neon, export=1
+function predict_8x8c_p_neon, export=1
     sub         x3,  x0,  #FDEC_STRIDE
     mov         x1,  #FDEC_STRIDE
     add         x2,  x3,  #4
@@ -568,7 +568,11 @@
 
 
 .macro loadsum4 wd, t1, t2, t3, x, idx
+  .if \idx == 0
+    ldurb       \wd,  [\x, #(\idx + 0) * FDEC_STRIDE - 1]
+  .else
     ldrb        \wd,  [\x, #(\idx + 0) * FDEC_STRIDE - 1]
+  .endif
     ldrb        \t1,  [\x, #(\idx + 1) * FDEC_STRIDE - 1]
     ldrb        \t2,  [\x, #(\idx + 2) * FDEC_STRIDE - 1]
     ldrb        \t3,  [\x, #(\idx + 3) * FDEC_STRIDE - 1]
@@ -577,7 +581,7 @@
     add         \wd,  \wd,  \t1
 .endm
 
-function x264_predict_8x16c_h_neon, export=1
+function predict_8x16c_h_neon, export=1
     sub         x2,  x0,  #1
     add         x3,  x0,  #FDEC_STRIDE - 1
     mov         x7,  #2 * FDEC_STRIDE
@@ -595,7 +599,7 @@
     ret
 endfunc
 
-function x264_predict_8x16c_v_neon, export=1
+function predict_8x16c_v_neon, export=1
     sub         x1,  x0,  #FDEC_STRIDE
     mov         x2,  #2 * FDEC_STRIDE
     ld1        {v0.8b}, [x1], x2
@@ -606,7 +610,7 @@
     ret
 endfunc
 
-function x264_predict_8x16c_p_neon, export=1
+function predict_8x16c_p_neon, export=1
     movrel      x4,  p16weight
     ld1        {v17.8h}, [x4]
     sub         x3,  x0,  #FDEC_STRIDE
@@ -673,7 +677,7 @@
     ret
 endfunc
 
-function x264_predict_8x16c_dc_neon, export=1
+function predict_8x16c_dc_neon, export=1
     mov         x1,  #FDEC_STRIDE
     sub         x10, x0,  #FDEC_STRIDE
     loadsum4    w2, w3, w4, w5, x0, 0
@@ -718,9 +722,9 @@
     ret
 endfunc
 
-function x264_predict_8x16c_dc_left_neon, export=1
+function predict_8x16c_dc_left_neon, export=1
     mov         x1,  #FDEC_STRIDE
-    ldrb        w2,  [x0, # 0 * FDEC_STRIDE - 1]
+    ldurb       w2,  [x0, # 0 * FDEC_STRIDE - 1]
     ldrb        w3,  [x0, # 1 * FDEC_STRIDE - 1]
     ldrb        w4,  [x0, # 2 * FDEC_STRIDE - 1]
     ldrb        w5,  [x0, # 3 * FDEC_STRIDE - 1]
@@ -772,7 +776,7 @@
     ret
 endfunc
 
-function x264_predict_8x16c_dc_top_neon, export=1
+function predict_8x16c_dc_top_neon, export=1
     sub         x2,  x0,  #FDEC_STRIDE
     mov         x1,  #FDEC_STRIDE
     ld1        {v0.8b}, [x2]
@@ -789,7 +793,7 @@
 endfunc
 
 
-function x264_predict_16x16_dc_top_neon, export=1
+function predict_16x16_dc_top_neon, export=1
     sub         x2,  x0,  #FDEC_STRIDE
     mov         x1,  #FDEC_STRIDE
     ld1        {v0.16b}, [x2]
@@ -799,7 +803,7 @@
     b           pred16x16_dc_end
 endfunc
 
-function x264_predict_16x16_dc_left_neon, export=1
+function predict_16x16_dc_left_neon, export=1
     sub         x2,  x0,  #1
     mov         x1,  #FDEC_STRIDE
     ldcol.16    v0,  x2,  x1
@@ -809,7 +813,7 @@
     b           pred16x16_dc_end
 endfunc
 
-function x264_predict_16x16_dc_neon, export=1
+function predict_16x16_dc_neon, export=1
     sub         x3,  x0,  #FDEC_STRIDE
     sub         x2,  x0,  #1
     mov         x1,  #FDEC_STRIDE
@@ -827,7 +831,7 @@
     ret
 endfunc
 
-function x264_predict_16x16_h_neon, export=1
+function predict_16x16_h_neon, export=1
     sub         x1,  x0,  #1
     mov         x7, #FDEC_STRIDE
 .rept 8
@@ -839,7 +843,7 @@
     ret
 endfunc
 
-function x264_predict_16x16_v_neon, export=1
+function predict_16x16_v_neon, export=1
     sub         x0,  x0,  #FDEC_STRIDE
     mov         x7,  #FDEC_STRIDE
     ld1        {v0.16b}, [x0], x7
@@ -849,7 +853,7 @@
     ret
 endfunc
 
-function x264_predict_16x16_p_neon, export=1
+function predict_16x16_p_neon, export=1
     sub         x3,  x0,  #FDEC_STRIDE
     mov         x1,  #FDEC_STRIDE
     add         x2,  x3,  #8
diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/predict-c.c x264-0.158.2988+git-20191101.7817004/common/aarch64/predict-c.c
--- x264-0.152.2854+gite9a5903/common/aarch64/predict-c.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/aarch64/predict-c.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.c: aarch64 intra prediction
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Janne Grunau <janne-x264@jannau.net>
@@ -28,29 +28,6 @@
 #include "predict.h"
 #include "pixel.h"
 
-void x264_predict_4x4_dc_top_neon( uint8_t *src );
-void x264_predict_4x4_ddr_neon( uint8_t *src );
-void x264_predict_4x4_ddl_neon( uint8_t *src );
-
-void x264_predict_8x8c_dc_top_neon( uint8_t *src );
-void x264_predict_8x8c_dc_left_neon( uint8_t *src );
-void x264_predict_8x8c_p_neon( uint8_t *src );
-
-void x264_predict_8x16c_dc_left_neon( uint8_t *src );
-void x264_predict_8x16c_dc_top_neon( uint8_t *src );
-void x264_predict_8x16c_p_neon( uint8_t *src );
-
-void x264_predict_8x8_ddl_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_ddr_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_vl_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_vr_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_hd_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_hu_neon( uint8_t *src, uint8_t edge[36] );
-
-void x264_predict_16x16_dc_top_neon( uint8_t *src );
-void x264_predict_16x16_dc_left_neon( uint8_t *src );
-void x264_predict_16x16_p_neon( uint8_t *src );
-
 void x264_predict_4x4_init_aarch64( int cpu, x264_predict_t pf[12] )
 {
 #if !HIGH_BIT_DEPTH
diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/predict.h x264-0.158.2988+git-20191101.7817004/common/aarch64/predict.h
--- x264-0.152.2854+gite9a5903/common/aarch64/predict.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/aarch64/predict.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.h: aarch64 intra prediction
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Janne Grunau <janne-x264@jannau.net>
@@ -27,8 +27,11 @@
 #ifndef X264_AARCH64_PREDICT_H
 #define X264_AARCH64_PREDICT_H
 
+#define x264_predict_4x4_h_aarch64 x264_template(predict_4x4_h_aarch64)
 void x264_predict_4x4_h_aarch64( uint8_t *src );
+#define x264_predict_4x4_v_aarch64 x264_template(predict_4x4_v_aarch64)
 void x264_predict_4x4_v_aarch64( uint8_t *src );
+#define x264_predict_8x8c_v_aarch64 x264_template(predict_8x8c_v_aarch64)
 void x264_predict_8x8c_v_aarch64( uint8_t *src );
 
 // for the merged 4x4 intra sad/satd which expects unified suffix
@@ -36,23 +39,81 @@
 #define x264_predict_4x4_v_neon x264_predict_4x4_v_aarch64
 #define x264_predict_8x8c_v_neon x264_predict_8x8c_v_aarch64
 
+#define x264_predict_4x4_dc_top_neon x264_template(predict_4x4_dc_top_neon)
+void x264_predict_4x4_dc_top_neon( uint8_t *src );
+#define x264_predict_4x4_ddr_neon x264_template(predict_4x4_ddr_neon)
+void x264_predict_4x4_ddr_neon( uint8_t *src );
+#define x264_predict_4x4_ddl_neon x264_template(predict_4x4_ddl_neon)
+void x264_predict_4x4_ddl_neon( uint8_t *src );
+
+#define x264_predict_8x8c_dc_top_neon x264_template(predict_8x8c_dc_top_neon)
+void x264_predict_8x8c_dc_top_neon( uint8_t *src );
+#define x264_predict_8x8c_dc_left_neon x264_template(predict_8x8c_dc_left_neon)
+void x264_predict_8x8c_dc_left_neon( uint8_t *src );
+#define x264_predict_8x8c_p_neon x264_template(predict_8x8c_p_neon)
+void x264_predict_8x8c_p_neon( uint8_t *src );
+
+#define x264_predict_8x16c_dc_left_neon x264_template(predict_8x16c_dc_left_neon)
+void x264_predict_8x16c_dc_left_neon( uint8_t *src );
+#define x264_predict_8x16c_dc_top_neon x264_template(predict_8x16c_dc_top_neon)
+void x264_predict_8x16c_dc_top_neon( uint8_t *src );
+#define x264_predict_8x16c_p_neon x264_template(predict_8x16c_p_neon)
+void x264_predict_8x16c_p_neon( uint8_t *src );
+
+#define x264_predict_8x8_ddl_neon x264_template(predict_8x8_ddl_neon)
+void x264_predict_8x8_ddl_neon( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_ddr_neon x264_template(predict_8x8_ddr_neon)
+void x264_predict_8x8_ddr_neon( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_vl_neon x264_template(predict_8x8_vl_neon)
+void x264_predict_8x8_vl_neon( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_vr_neon x264_template(predict_8x8_vr_neon)
+void x264_predict_8x8_vr_neon( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_hd_neon x264_template(predict_8x8_hd_neon)
+void x264_predict_8x8_hd_neon( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_hu_neon x264_template(predict_8x8_hu_neon)
+void x264_predict_8x8_hu_neon( uint8_t *src, uint8_t edge[36] );
+
+#define x264_predict_16x16_dc_top_neon x264_template(predict_16x16_dc_top_neon)
+void x264_predict_16x16_dc_top_neon( uint8_t *src );
+#define x264_predict_16x16_dc_left_neon x264_template(predict_16x16_dc_left_neon)
+void x264_predict_16x16_dc_left_neon( uint8_t *src );
+#define x264_predict_16x16_p_neon x264_template(predict_16x16_p_neon)
+void x264_predict_16x16_p_neon( uint8_t *src );
+
+#define x264_predict_4x4_dc_neon x264_template(predict_4x4_dc_neon)
 void x264_predict_4x4_dc_neon( uint8_t *src );
+#define x264_predict_8x8_v_neon x264_template(predict_8x8_v_neon)
 void x264_predict_8x8_v_neon( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_h_neon x264_template(predict_8x8_h_neon)
 void x264_predict_8x8_h_neon( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_dc_neon x264_template(predict_8x8_dc_neon)
 void x264_predict_8x8_dc_neon( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8c_dc_neon x264_template(predict_8x8c_dc_neon)
 void x264_predict_8x8c_dc_neon( uint8_t *src );
+#define x264_predict_8x8c_h_neon x264_template(predict_8x8c_h_neon)
 void x264_predict_8x8c_h_neon( uint8_t *src );
+#define x264_predict_8x16c_v_neon x264_template(predict_8x16c_v_neon)
 void x264_predict_8x16c_v_neon( uint8_t *src );
+#define x264_predict_8x16c_h_neon x264_template(predict_8x16c_h_neon)
 void x264_predict_8x16c_h_neon( uint8_t *src );
+#define x264_predict_8x16c_dc_neon x264_template(predict_8x16c_dc_neon)
 void x264_predict_8x16c_dc_neon( uint8_t *src );
+#define x264_predict_16x16_v_neon x264_template(predict_16x16_v_neon)
 void x264_predict_16x16_v_neon( uint8_t *src );
+#define x264_predict_16x16_h_neon x264_template(predict_16x16_h_neon)
 void x264_predict_16x16_h_neon( uint8_t *src );
+#define x264_predict_16x16_dc_neon x264_template(predict_16x16_dc_neon)
 void x264_predict_16x16_dc_neon( uint8_t *src );
 
+#define x264_predict_4x4_init_aarch64 x264_template(predict_4x4_init_aarch64)
 void x264_predict_4x4_init_aarch64( int cpu, x264_predict_t pf[12] );
+#define x264_predict_8x8_init_aarch64 x264_template(predict_8x8_init_aarch64)
 void x264_predict_8x8_init_aarch64( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter );
+#define x264_predict_8x8c_init_aarch64 x264_template(predict_8x8c_init_aarch64)
 void x264_predict_8x8c_init_aarch64( int cpu, x264_predict_t pf[7] );
+#define x264_predict_8x16c_init_aarch64 x264_template(predict_8x16c_init_aarch64)
 void x264_predict_8x16c_init_aarch64( int cpu, x264_predict_t pf[7] );
+#define x264_predict_16x16_init_aarch64 x264_template(predict_16x16_init_aarch64)
 void x264_predict_16x16_init_aarch64( int cpu, x264_predict_t pf[7] );
 
 #endif /* X264_AARCH64_PREDICT_H */
diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/quant-a.S x264-0.158.2988+git-20191101.7817004/common/aarch64/quant-a.S
--- x264-0.152.2854+gite9a5903/common/aarch64/quant-a.S	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/aarch64/quant-a.S	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /****************************************************************************
  * quant.S: arm quantization and level-run
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Janne Grunau <janne-x264@jannau.net>
@@ -57,7 +57,7 @@
 .endm
 
 // quant_2x2_dc( int16_t dct[4], int mf, int bias )
-function x264_quant_2x2_dc_neon, export=1
+function quant_2x2_dc_neon, export=1
     ld1        {v0.4h}, [x0]
     dup         v2.4h,  w2
     dup         v1.4h,  w1
@@ -73,7 +73,7 @@
 endfunc
 
 // quant_4x4_dc( int16_t dct[16], int mf, int bias )
-function x264_quant_4x4_dc_neon, export=1
+function quant_4x4_dc_neon, export=1
     ld1        {v16.8h,v17.8h}, [x0]
     abs         v18.8h,  v16.8h
     abs         v19.8h,  v17.8h
@@ -85,7 +85,7 @@
 endfunc
 
 // quant_4x4( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] )
-function x264_quant_4x4_neon, export=1
+function quant_4x4_neon, export=1
     ld1        {v16.8h,v17.8h}, [x0]
     abs         v18.8h,  v16.8h
     abs         v19.8h,  v17.8h
@@ -97,7 +97,7 @@
 endfunc
 
 // quant_4x4x4( int16_t dct[4][16], uint16_t mf[16], uint16_t bias[16] )
-function x264_quant_4x4x4_neon, export=1
+function quant_4x4x4_neon, export=1
     ld1        {v16.8h,v17.8h}, [x0]
     abs         v18.8h, v16.8h
     abs         v19.8h, v17.8h
@@ -140,7 +140,7 @@
 endfunc
 
 // quant_8x8( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] )
-function x264_quant_8x8_neon, export=1
+function quant_8x8_neon, export=1
     ld1        {v16.8h,v17.8h}, [x0]
     abs         v18.8h, v16.8h
     abs         v19.8h, v17.8h
@@ -177,7 +177,7 @@
 
 // dequant_4x4( int16_t dct[16], int dequant_mf[6][16], int i_qp )
 .macro DEQUANT size bits
-function x264_dequant_\size\()_neon, export=1
+function dequant_\size\()_neon, export=1
     DEQUANT_START \bits+2, \bits
 .ifc \size, 8x8
     mov         w2,  #4
@@ -258,7 +258,7 @@
 DEQUANT 8x8, 6
 
 // dequant_4x4_dc( int16_t dct[16], int dequant_mf[6][16], int i_qp )
-function x264_dequant_4x4_dc_neon, export=1
+function dequant_4x4_dc_neon, export=1
     DEQUANT_START 6, 6, yes
     b.lt        dequant_4x4_dc_rshift
 
@@ -303,9 +303,9 @@
 endfunc
 
 .macro decimate_score_1x size
-function x264_decimate_score\size\()_neon, export=1
+function decimate_score\size\()_neon, export=1
     ld1        {v0.8h,v1.8h}, [x0]
-    movrel      x5,  X(x264_decimate_table4)
+    movrel      x5,  X264(decimate_table4)
     movi        v3.16b, #0x01
     sqxtn       v0.8b,  v0.8h
     sqxtn2      v0.16b, v1.8h
@@ -348,7 +348,7 @@
     .byte  0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01
 endconst
 
-function x264_decimate_score64_neon, export=1
+function decimate_score64_neon, export=1
     ld1        {v0.8h,v1.8h}, [x0], #32
     ld1        {v2.8h,v3.8h}, [x0], #32
     ld1        {v4.8h,v5.8h}, [x0], #32
@@ -391,7 +391,7 @@
     mvn         x1,  x1
     mov         w0,  #0
     cbz         x1,  0f
-    movrel      x5,  X(x264_decimate_table8)
+    movrel      x5,  X264(decimate_table8)
 1:
     clz         x3,  x1
     lsl         x1,  x1,  x3
@@ -407,7 +407,7 @@
 endfunc
 
 // int coeff_last( int16_t *l )
-function x264_coeff_last4_aarch64, export=1
+function coeff_last4_aarch64, export=1
     ldr         x2,  [x0]
     mov         w4,  #3
     clz         x0,  x2
@@ -415,7 +415,7 @@
     ret
 endfunc
 
-function x264_coeff_last8_aarch64, export=1
+function coeff_last8_aarch64, export=1
     ldr         x3,  [x0, #8]
     mov         w4,  #7
     clz         x2,  x3
@@ -430,7 +430,7 @@
 endfunc
 
 .macro COEFF_LAST_1x size
-function x264_coeff_last\size\()_neon, export=1
+function coeff_last\size\()_neon, export=1
 .if \size == 15
     sub         x0,  x0,  #2
 .endif
@@ -450,7 +450,7 @@
 COEFF_LAST_1x 15
 COEFF_LAST_1x 16
 
-function x264_coeff_last64_neon, export=1
+function coeff_last64_neon, export=1
     ld1        {v0.8h,v1.8h,v2.8h,v3.8h}, [x0], 64
     movi        v31.8h,  #8
     movi        v30.8h,  #1
@@ -523,7 +523,7 @@
     mov         w0,  w7
 .endm
 
-function x264_coeff_level_run4_aarch64, export=1
+function coeff_level_run4_aarch64, export=1
     ldr         x2,  [x0]
 
     coeff_level_run_start 4
@@ -534,7 +534,7 @@
 endfunc
 
 .macro X264_COEFF_LEVEL_RUN size
-function x264_coeff_level_run\size\()_neon, export=1
+function coeff_level_run\size\()_neon, export=1
 .if \size == 15
     sub         x0,  x0,  #2
 .endif
@@ -566,7 +566,7 @@
 X264_COEFF_LEVEL_RUN 15
 X264_COEFF_LEVEL_RUN 16
 
-function x264_denoise_dct_neon, export=1
+function denoise_dct_neon, export=1
 1:  subs        w3,  w3,  #16
     ld1         {v0.8h,v1.8h}, [x0]
     ld1         {v4.4s,v5.4s,v6.4s,v7.4s}, [x1]
diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/quant.h x264-0.158.2988+git-20191101.7817004/common/aarch64/quant.h
--- x264-0.152.2854+gite9a5903/common/aarch64/quant.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/aarch64/quant.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * quant.h: arm quantization and level-run
  *****************************************************************************
- * Copyright (C) 2005-2017 x264 project
+ * Copyright (C) 2005-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Janne Grunau <janne-x264@jannau.net>
@@ -27,33 +27,55 @@
 #ifndef X264_AARCH64_QUANT_H
 #define X264_AARCH64_QUANT_H
 
+#define x264_quant_2x2_dc_aarch64 x264_template(quant_2x2_dc_aarch64)
 int x264_quant_2x2_dc_aarch64( int16_t dct[4], int mf, int bias );
 
+#define x264_quant_2x2_dc_neon x264_template(quant_2x2_dc_neon)
 int x264_quant_2x2_dc_neon( int16_t dct[4], int mf, int bias );
+#define x264_quant_4x4_dc_neon x264_template(quant_4x4_dc_neon)
 int x264_quant_4x4_dc_neon( int16_t dct[16], int mf, int bias );
+#define x264_quant_4x4_neon x264_template(quant_4x4_neon)
 int x264_quant_4x4_neon( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] );
+#define x264_quant_4x4x4_neon x264_template(quant_4x4x4_neon)
 int x264_quant_4x4x4_neon( int16_t dct[4][16], uint16_t mf[16], uint16_t bias[16] );
+#define x264_quant_8x8_neon x264_template(quant_8x8_neon)
 int x264_quant_8x8_neon( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] );
 
+#define x264_dequant_4x4_dc_neon x264_template(dequant_4x4_dc_neon)
 void x264_dequant_4x4_dc_neon( int16_t dct[16], int dequant_mf[6][16], int i_qp );
+#define x264_dequant_4x4_neon x264_template(dequant_4x4_neon)
 void x264_dequant_4x4_neon( int16_t dct[16], int dequant_mf[6][16], int i_qp );
+#define x264_dequant_8x8_neon x264_template(dequant_8x8_neon)
 void x264_dequant_8x8_neon( int16_t dct[64], int dequant_mf[6][64], int i_qp );
 
+#define x264_decimate_score15_neon x264_template(decimate_score15_neon)
 int x264_decimate_score15_neon( int16_t * );
+#define x264_decimate_score16_neon x264_template(decimate_score16_neon)
 int x264_decimate_score16_neon( int16_t * );
+#define x264_decimate_score64_neon x264_template(decimate_score64_neon)
 int x264_decimate_score64_neon( int16_t * );
 
+#define x264_coeff_last4_aarch64 x264_template(coeff_last4_aarch64)
 int x264_coeff_last4_aarch64( int16_t * );
+#define x264_coeff_last8_aarch64 x264_template(coeff_last8_aarch64)
 int x264_coeff_last8_aarch64( int16_t * );
+#define x264_coeff_last15_neon x264_template(coeff_last15_neon)
 int x264_coeff_last15_neon( int16_t * );
+#define x264_coeff_last16_neon x264_template(coeff_last16_neon)
 int x264_coeff_last16_neon( int16_t * );
+#define x264_coeff_last64_neon x264_template(coeff_last64_neon)
 int x264_coeff_last64_neon( int16_t * );
 
+#define x264_coeff_level_run4_aarch64 x264_template(coeff_level_run4_aarch64)
 int x264_coeff_level_run4_aarch64( int16_t *, x264_run_level_t * );
+#define x264_coeff_level_run8_neon x264_template(coeff_level_run8_neon)
 int x264_coeff_level_run8_neon( int16_t *, x264_run_level_t * );
+#define x264_coeff_level_run15_neon x264_template(coeff_level_run15_neon)
 int x264_coeff_level_run15_neon( int16_t *, x264_run_level_t * );
+#define x264_coeff_level_run16_neon x264_template(coeff_level_run16_neon)
 int x264_coeff_level_run16_neon( int16_t *, x264_run_level_t * );
 
+#define x264_denoise_dct_neon x264_template(denoise_dct_neon)
 void x264_denoise_dct_neon( dctcoef *, uint32_t *, udctcoef *, int );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/arm/asm.S x264-0.158.2988+git-20191101.7817004/common/arm/asm.S
--- x264-0.152.2854+gite9a5903/common/arm/asm.S	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/arm/asm.S	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * asm.S: arm utility macros
  *****************************************************************************
- * Copyright (C) 2008-2017 x264 project
+ * Copyright (C) 2008-2019 x264 project
  *
  * Authors: Mans Rullgard <mans@mansr.com>
  *          David Conrad <lessen42@gmail.com>
@@ -28,17 +28,32 @@
 
 .syntax unified
 
-#ifndef __APPLE__
+#ifdef __ELF__
 .arch armv7-a
 .fpu neon
 #endif
 
+#define GLUE(a, b) a ## b
+#define JOIN(a, b) GLUE(a, b)
+
 #ifdef PREFIX
-#   define EXTERN_ASM _
+#   define BASE _x264_
+#   define SYM_PREFIX _
 #else
-#   define EXTERN_ASM
+#   define BASE x264_
+#   define SYM_PREFIX
 #endif
 
+#ifdef BIT_DEPTH
+#   define EXTERN_ASM JOIN(JOIN(BASE, BIT_DEPTH), _)
+#else
+#   define EXTERN_ASM BASE
+#endif
+
+#define X(s) JOIN(EXTERN_ASM, s)
+#define X264(s) JOIN(BASE, s)
+#define EXT(s) JOIN(SYM_PREFIX, s)
+
 #ifdef __ELF__
 #   define ELF
 #else
@@ -75,7 +90,11 @@
 
 .macro function name, export=1
     .macro endfunc
+.if \export
+ELF     .size   EXTERN_ASM\name, . - EXTERN_ASM\name
+.else
 ELF     .size   \name, . - \name
+.endif
 FUNC    .endfunc
         .purgem endfunc
     .endm
@@ -169,10 +188,6 @@
 #endif
 .endm
 
-#define GLUE(a, b) a ## b
-#define JOIN(a, b) GLUE(a, b)
-#define X(s) JOIN(EXTERN_ASM, s)
-
 #define FENC_STRIDE 16
 #define FDEC_STRIDE 32
 
diff -Nru x264-0.152.2854+gite9a5903/common/arm/bitstream-a.S x264-0.158.2988+git-20191101.7817004/common/arm/bitstream-a.S
--- x264-0.152.2854+gite9a5903/common/arm/bitstream-a.S	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/arm/bitstream-a.S	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * bitstream-a.S: arm bitstream functions
  *****************************************************************************
- * Copyright (C) 2014-2017 x264 project
+ * Copyright (C) 2014-2019 x264 project
  *
  * Authors: Janne Grunau <janne-x264@jannau.net>
  *
@@ -25,7 +25,7 @@
 
 #include "asm.S"
 
-function x264_nal_escape_neon
+function nal_escape_neon
     push        {r4-r5,lr}
     vmov.u8     q0,  #0xff
     vmov.u8     q8,  #4
diff -Nru x264-0.152.2854+gite9a5903/common/arm/bitstream.h x264-0.158.2988+git-20191101.7817004/common/arm/bitstream.h
--- x264-0.152.2854+gite9a5903/common/arm/bitstream.h	1970-01-01 00:00:00.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/arm/bitstream.h	2019-11-09 05:16:29.000000000 +0000
@@ -0,0 +1,32 @@
+/*****************************************************************************
+ * bitstream.h: arm bitstream functions
+ *****************************************************************************
+ * Copyright (C) 2017-2019 x264 project
+ *
+ * Authors: Anton Mitrofanov <BugMaster@narod.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#ifndef X264_ARM_BITSTREAM_H
+#define X264_ARM_BITSTREAM_H
+
+#define x264_nal_escape_neon x264_template(nal_escape_neon)
+uint8_t *x264_nal_escape_neon( uint8_t *dst, uint8_t *src, uint8_t *end );
+
+#endif
diff -Nru x264-0.152.2854+gite9a5903/common/arm/cpu-a.S x264-0.158.2988+git-20191101.7817004/common/arm/cpu-a.S
--- x264-0.152.2854+gite9a5903/common/arm/cpu-a.S	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/arm/cpu-a.S	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cpu-a.S: arm cpu detection
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -29,7 +29,7 @@
 
 // done in gas because .fpu neon overrides the refusal to assemble
 // instructions the selected -march/-mcpu doesn't support
-function x264_cpu_neon_test
+function cpu_neon_test
     vadd.i16    q0, q0, q0
     bx          lr
 endfunc
@@ -37,7 +37,7 @@
 // return: 0 on success
 //         1 if counters were already enabled
 //         9 if lo-res counters were already enabled
-function x264_cpu_enable_armv7_counter, export=0
+function cpu_enable_armv7_counter, export=0
     mrc         p15, 0, r2, c9, c12, 0      // read PMNC
     ands        r0, r2, #1
     andne       r0, r2, #9
@@ -50,7 +50,7 @@
     bx          lr
 endfunc
 
-function x264_cpu_disable_armv7_counter, export=0
+function cpu_disable_armv7_counter, export=0
     mrc         p15, 0, r0, c9, c12, 0      // read PMNC
     bic         r0, r0, #1                  // disable counters
     mcr         p15, 0, r0, c9, c12, 0      // write PMNC
@@ -64,14 +64,14 @@
 
 // return: 0 if transfers neon -> arm transfers take more than 10 cycles
 //         nonzero otherwise
-function x264_cpu_fast_neon_mrc_test
+function cpu_fast_neon_mrc_test
     // check for user access to performance counters
     mrc         p15, 0, r0, c9, c14, 0
     cmp         r0, #0
     bxeq        lr
 
     push        {r4-r6,lr}
-    bl          x264_cpu_enable_armv7_counter
+    bl          cpu_enable_armv7_counter
     ands        r1, r0, #8
     mov         r3, #0
     mov         ip, #4
@@ -99,7 +99,7 @@
 
     // disable counters if we enabled them
     ands        r0, r0, #1
-    bleq        x264_cpu_disable_armv7_counter
+    bleq        cpu_disable_armv7_counter
 
     lsr         r0, r3, #5
     cmp         r0, #10
diff -Nru x264-0.152.2854+gite9a5903/common/arm/dct-a.S x264-0.158.2988+git-20191101.7817004/common/arm/dct-a.S
--- x264-0.152.2854+gite9a5903/common/arm/dct-a.S	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/arm/dct-a.S	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /****************************************************************************
  * dct-a.S: arm transform and zigzag
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Martin Storsjo <martin@martin.st>
@@ -62,7 +62,7 @@
 .endm
 
 
-function x264_dct4x4dc_neon
+function dct4x4dc_neon
     vld1.64         {d0-d3}, [r0,:128]
     SUMSUB_ABCD     d4, d5, d6, d7, d0, d1, d2, d3
     SUMSUB_ABCD     d0, d2, d3, d1, d4, d6, d5, d7
@@ -81,7 +81,7 @@
     bx              lr
 endfunc
 
-function x264_idct4x4dc_neon
+function idct4x4dc_neon
     vld1.64         {d0-d3}, [r0,:128]
     SUMSUB_ABCD     d4, d5, d6, d7, d0, d1, d2, d3
     SUMSUB_ABCD     d0, d2, d3, d1, d4, d6, d5, d7
@@ -105,7 +105,7 @@
     vsub.s16        \d3, \d7, \d5
 .endm
 
-function x264_sub4x4_dct_neon
+function sub4x4_dct_neon
     mov             r3, #FENC_STRIDE
     mov             ip, #FDEC_STRIDE
     vld1.32         {d0[]}, [r1,:32], r3
@@ -128,7 +128,7 @@
     bx              lr
 endfunc
 
-function x264_sub8x4_dct_neon, export=0
+function sub8x4_dct_neon, export=0
     vld1.64         {d0}, [r1,:64], r3
     vld1.64         {d1}, [r2,:64], ip
     vsubl.u8        q8,  d0,  d1
@@ -164,34 +164,34 @@
     bx              lr
 endfunc
 
-function x264_sub8x8_dct_neon
+function sub8x8_dct_neon
     push            {lr}
     mov             r3, #FENC_STRIDE
     mov             ip, #FDEC_STRIDE
-    bl              x264_sub8x4_dct_neon
+    bl              sub8x4_dct_neon
     pop             {lr}
-    b               x264_sub8x4_dct_neon
+    b               sub8x4_dct_neon
 endfunc
 
-function x264_sub16x16_dct_neon
+function sub16x16_dct_neon
     push            {lr}
     mov             r3, #FENC_STRIDE
     mov             ip, #FDEC_STRIDE
-    bl              x264_sub8x4_dct_neon
-    bl              x264_sub8x4_dct_neon
+    bl              sub8x4_dct_neon
+    bl              sub8x4_dct_neon
     sub             r1, r1, #8*FENC_STRIDE-8
     sub             r2, r2, #8*FDEC_STRIDE-8
-    bl              x264_sub8x4_dct_neon
-    bl              x264_sub8x4_dct_neon
+    bl              sub8x4_dct_neon
+    bl              sub8x4_dct_neon
     sub             r1, r1, #8
     sub             r2, r2, #8
-    bl              x264_sub8x4_dct_neon
-    bl              x264_sub8x4_dct_neon
+    bl              sub8x4_dct_neon
+    bl              sub8x4_dct_neon
     sub             r1, r1, #8*FENC_STRIDE-8
     sub             r2, r2, #8*FDEC_STRIDE-8
-    bl              x264_sub8x4_dct_neon
+    bl              sub8x4_dct_neon
     pop             {lr}
-    b               x264_sub8x4_dct_neon
+    b               sub8x4_dct_neon
 endfunc
 
 
@@ -226,7 +226,7 @@
     SUMSUB_SHR2  2, q11, q13, q3,  q13,  q0, q1
 .endm
 
-function x264_sub8x8_dct8_neon
+function sub8x8_dct8_neon
     mov             r3, #FENC_STRIDE
     mov             ip, #FDEC_STRIDE
     vld1.64         {d16}, [r1,:64], r3
@@ -278,19 +278,19 @@
     bx              lr
 endfunc
 
-function x264_sub16x16_dct8_neon
+function sub16x16_dct8_neon
     push            {lr}
-    bl              X(x264_sub8x8_dct8_neon)
+    bl              X(sub8x8_dct8_neon)
     sub             r1,  r1,  #FENC_STRIDE*8 - 8
     sub             r2,  r2,  #FDEC_STRIDE*8 - 8
-    bl              X(x264_sub8x8_dct8_neon)
+    bl              X(sub8x8_dct8_neon)
     sub             r1,  r1,  #8
     sub             r2,  r2,  #8
-    bl              X(x264_sub8x8_dct8_neon)
+    bl              X(sub8x8_dct8_neon)
     pop             {lr}
     sub             r1,  r1,  #FENC_STRIDE*8 - 8
     sub             r2,  r2,  #FDEC_STRIDE*8 - 8
-    b               X(x264_sub8x8_dct8_neon)
+    b               X(sub8x8_dct8_neon)
 endfunc
 
 
@@ -303,7 +303,7 @@
     vadd.s16        \d6, \d6, \d1
 .endm
 
-function x264_add4x4_idct_neon
+function add4x4_idct_neon
     mov             r2, #FDEC_STRIDE
     vld1.64         {d0-d3}, [r1,:128]
 
@@ -335,7 +335,7 @@
     bx              lr
 endfunc
 
-function x264_add8x4_idct_neon, export=0
+function add8x4_idct_neon, export=0
     vld1.64         {d0-d3}, [r1,:128]!
     IDCT_1D         d16, d18, d20, d22, d0, d1, d2, d3
     vld1.64         {d4-d7}, [r1,:128]!
@@ -375,29 +375,29 @@
     bx              lr
 endfunc
 
-function x264_add8x8_idct_neon
+function add8x8_idct_neon
     mov             r2, #FDEC_STRIDE
     mov             ip, lr
-    bl              x264_add8x4_idct_neon
+    bl              add8x4_idct_neon
     mov             lr, ip
-    b               x264_add8x4_idct_neon
+    b               add8x4_idct_neon
 endfunc
 
-function x264_add16x16_idct_neon
+function add16x16_idct_neon
     mov             r2, #FDEC_STRIDE
     mov             ip, lr
-    bl              x264_add8x4_idct_neon
-    bl              x264_add8x4_idct_neon
+    bl              add8x4_idct_neon
+    bl              add8x4_idct_neon
     sub             r0, r0, #8*FDEC_STRIDE-8
-    bl              x264_add8x4_idct_neon
-    bl              x264_add8x4_idct_neon
+    bl              add8x4_idct_neon
+    bl              add8x4_idct_neon
     sub             r0, r0, #8
-    bl              x264_add8x4_idct_neon
-    bl              x264_add8x4_idct_neon
+    bl              add8x4_idct_neon
+    bl              add8x4_idct_neon
     sub             r0, r0, #8*FDEC_STRIDE-8
-    bl              x264_add8x4_idct_neon
+    bl              add8x4_idct_neon
     mov             lr, ip
-    b               x264_add8x4_idct_neon
+    b               add8x4_idct_neon
 endfunc
 
 
@@ -435,7 +435,7 @@
     SUMSUB_AB       q11, q12, q2,  q12
 .endm
 
-function x264_add8x8_idct8_neon
+function add8x8_idct8_neon
     mov             r2,  #FDEC_STRIDE
     vld1.64         {d16-d19}, [r1,:128]!
     vld1.64         {d20-d23}, [r1,:128]!
@@ -497,20 +497,20 @@
     bx              lr
 endfunc
 
-function x264_add16x16_idct8_neon
+function add16x16_idct8_neon
     mov             ip,  lr
-    bl              X(x264_add8x8_idct8_neon)
+    bl              X(add8x8_idct8_neon)
     sub             r0,  r0,  #8*FDEC_STRIDE-8
-    bl              X(x264_add8x8_idct8_neon)
+    bl              X(add8x8_idct8_neon)
     sub             r0,  r0,  #8
-    bl              X(x264_add8x8_idct8_neon)
+    bl              X(add8x8_idct8_neon)
     sub             r0,  r0,  #8*FDEC_STRIDE-8
     mov             lr,  ip
-    b               X(x264_add8x8_idct8_neon)
+    b               X(add8x8_idct8_neon)
 endfunc
 
 
-function x264_add8x8_idct_dc_neon
+function add8x8_idct_dc_neon
     mov             r2,  #FDEC_STRIDE
     vld1.64         {d16}, [r1,:64]
     vrshr.s16       d16, d16, #6
@@ -593,7 +593,7 @@
     vst1.64         {d22-d23}, [r2,:128], r3
 .endm
 
-function x264_add16x16_idct_dc_neon
+function add16x16_idct_dc_neon
     mov             r2,  r0
     mov             r3,  #FDEC_STRIDE
     vmov.i16        q15, #0
@@ -609,7 +609,7 @@
     bx              lr
 endfunc
 
-function x264_sub8x8_dct_dc_neon
+function sub8x8_dct_dc_neon
     mov             r3,  #FENC_STRIDE
     mov             ip,  #FDEC_STRIDE
     vld1.64         {d16}, [r1,:64], r3
@@ -657,7 +657,7 @@
     bx              lr
 endfunc
 
-function x264_sub8x16_dct_dc_neon
+function sub8x16_dct_dc_neon
     mov             r3,  #FENC_STRIDE
     mov             ip,  #FDEC_STRIDE
     vld1.64         {d16}, [r1,:64], r3
@@ -751,7 +751,7 @@
 endfunc
 
 
-function x264_zigzag_scan_4x4_frame_neon
+function zigzag_scan_4x4_frame_neon
     movrel      r2, scan4x4_frame
     vld1.64     {d0-d3},   [r1,:128]
     vld1.64     {d16-d19}, [r2,:128]
diff -Nru x264-0.152.2854+gite9a5903/common/arm/dct.h x264-0.158.2988+git-20191101.7817004/common/arm/dct.h
--- x264-0.152.2854+gite9a5903/common/arm/dct.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/arm/dct.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * dct.h: arm transform and zigzag
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -26,28 +26,45 @@
 #ifndef X264_ARM_DCT_H
 #define X264_ARM_DCT_H
 
+#define x264_dct4x4dc_neon x264_template(dct4x4dc_neon)
 void x264_dct4x4dc_neon( int16_t d[16] );
+#define x264_idct4x4dc_neon x264_template(idct4x4dc_neon)
 void x264_idct4x4dc_neon( int16_t d[16] );
 
+#define x264_sub4x4_dct_neon x264_template(sub4x4_dct_neon)
 void x264_sub4x4_dct_neon( int16_t dct[16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub8x8_dct_neon x264_template(sub8x8_dct_neon)
 void x264_sub8x8_dct_neon( int16_t dct[4][16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub16x16_dct_neon x264_template(sub16x16_dct_neon)
 void x264_sub16x16_dct_neon( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 );
 
+#define x264_add4x4_idct_neon x264_template(add4x4_idct_neon)
 void x264_add4x4_idct_neon( uint8_t *p_dst, int16_t dct[16] );
+#define x264_add8x8_idct_neon x264_template(add8x8_idct_neon)
 void x264_add8x8_idct_neon( uint8_t *p_dst, int16_t dct[4][16] );
+#define x264_add16x16_idct_neon x264_template(add16x16_idct_neon)
 void x264_add16x16_idct_neon( uint8_t *p_dst, int16_t dct[16][16] );
 
+#define x264_add8x8_idct_dc_neon x264_template(add8x8_idct_dc_neon)
 void x264_add8x8_idct_dc_neon( uint8_t *p_dst, int16_t dct[4] );
+#define x264_add16x16_idct_dc_neon x264_template(add16x16_idct_dc_neon)
 void x264_add16x16_idct_dc_neon( uint8_t *p_dst, int16_t dct[16] );
+#define x264_sub8x8_dct_dc_neon x264_template(sub8x8_dct_dc_neon)
 void x264_sub8x8_dct_dc_neon( int16_t dct[4], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub8x16_dct_dc_neon x264_template(sub8x16_dct_dc_neon)
 void x264_sub8x16_dct_dc_neon( int16_t dct[8], uint8_t *pix1, uint8_t *pix2 );
 
+#define x264_sub8x8_dct8_neon x264_template(sub8x8_dct8_neon)
 void x264_sub8x8_dct8_neon( int16_t dct[64], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub16x16_dct8_neon x264_template(sub16x16_dct8_neon)
 void x264_sub16x16_dct8_neon( int16_t dct[4][64], uint8_t *pix1, uint8_t *pix2 );
 
+#define x264_add8x8_idct8_neon x264_template(add8x8_idct8_neon)
 void x264_add8x8_idct8_neon( uint8_t *p_dst, int16_t dct[64] );
+#define x264_add16x16_idct8_neon x264_template(add16x16_idct8_neon)
 void x264_add16x16_idct8_neon( uint8_t *p_dst, int16_t dct[4][64] );
 
+#define x264_zigzag_scan_4x4_frame_neon x264_template(zigzag_scan_4x4_frame_neon)
 void x264_zigzag_scan_4x4_frame_neon( int16_t level[16], int16_t dct[16] );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/arm/deblock-a.S x264-0.158.2988+git-20191101.7817004/common/arm/deblock-a.S
--- x264-0.152.2854+gite9a5903/common/arm/deblock-a.S	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/arm/deblock-a.S	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * deblock.S: arm deblocking
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: Mans Rullgard <mans@mansr.com>
  *          Martin Storsjo <martin@martin.st>
@@ -117,7 +117,7 @@
     vqmovun.s16     d1,  q12
 .endm
 
-function x264_deblock_v_luma_neon
+function deblock_v_luma_neon
     h264_loop_filter_start
 
     vld1.64         {d0, d1},  [r0,:128], r1
@@ -143,7 +143,7 @@
     bx              lr
 endfunc
 
-function x264_deblock_h_luma_neon
+function deblock_h_luma_neon
     h264_loop_filter_start
 
     sub             r0,  r0,  #4
@@ -324,7 +324,7 @@
 
 .endm
 
-function x264_deblock_v_luma_intra_neon
+function deblock_v_luma_intra_neon
     push            {lr}
     vld1.64         {d0, d1},  [r0,:128], r1
     vld1.64         {d2, d3},  [r0,:128], r1
@@ -352,7 +352,7 @@
     pop             {pc}
 endfunc
 
-function x264_deblock_h_luma_intra_neon
+function deblock_h_luma_intra_neon
     push            {lr}
     sub             r0,  r0,  #4
     vld1.64         {d22}, [r0], r1
@@ -447,7 +447,7 @@
     vqmovun.s16     d1,  q12
 .endm
 
-function x264_deblock_v_chroma_neon
+function deblock_v_chroma_neon
     h264_loop_filter_start
 
     sub             r0,  r0,  r1, lsl #1
@@ -465,7 +465,7 @@
     bx              lr
 endfunc
 
-function x264_deblock_h_chroma_neon
+function deblock_h_chroma_neon
     h264_loop_filter_start
 
     sub             r0,  r0,  #4
@@ -499,7 +499,7 @@
     bx              lr
 endfunc
 
-function x264_deblock_h_chroma_422_neon
+function deblock_h_chroma_422_neon
     h264_loop_filter_start
     push            {lr}
     sub             r0,  r0,  #4
@@ -547,7 +547,7 @@
     vqmovun.s16     d0,  q11
 .endm
 
-function x264_deblock_h_chroma_mbaff_neon
+function deblock_h_chroma_mbaff_neon
     h264_loop_filter_start
 
     sub             r0,  r0,  #4
@@ -610,7 +610,7 @@
     vbit            q0,  q2,  q13
 .endm
 
-function x264_deblock_v_chroma_intra_neon
+function deblock_v_chroma_intra_neon
     sub             r0,  r0,  r1, lsl #1
     vld2.8          {d18,d19}, [r0,:128], r1
     vld2.8          {d16,d17}, [r0,:128], r1
@@ -626,7 +626,7 @@
     bx              lr
 endfunc
 
-function x264_deblock_h_chroma_intra_neon
+function deblock_h_chroma_intra_neon
     sub             r0,  r0,  #4
     vld1.8          {d18}, [r0], r1
     vld1.8          {d16}, [r0], r1
@@ -657,15 +657,15 @@
     bx              lr
 endfunc
 
-function x264_deblock_h_chroma_422_intra_neon
+function deblock_h_chroma_422_intra_neon
     push            {lr}
-    bl              X(x264_deblock_h_chroma_intra_neon)
+    bl              X(deblock_h_chroma_intra_neon)
     add             r0, r0,  #2
     pop             {lr}
-    b               X(x264_deblock_h_chroma_intra_neon)
+    b               X(deblock_h_chroma_intra_neon)
 endfunc
 
-function x264_deblock_h_chroma_intra_mbaff_neon
+function deblock_h_chroma_intra_mbaff_neon
     sub             r0,  r0,  #4
     vld1.8          {d18}, [r0], r1
     vld1.8          {d16}, [r0], r1
@@ -688,7 +688,7 @@
     bx              lr
 endfunc
 
-function x264_deblock_strength_neon
+function deblock_strength_neon
     ldr             ip,  [sp]
     vmov.i8         q8,  #0
     lsl             ip,  ip,  #8
diff -Nru x264-0.152.2854+gite9a5903/common/arm/deblock.h x264-0.158.2988+git-20191101.7817004/common/arm/deblock.h
--- x264-0.152.2854+gite9a5903/common/arm/deblock.h	1970-01-01 00:00:00.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/arm/deblock.h	2019-11-09 05:16:29.000000000 +0000
@@ -0,0 +1,58 @@
+/*****************************************************************************
+ * deblock.h: arm deblocking
+ *****************************************************************************
+ * Copyright (C) 2017-2019 x264 project
+ *
+ * Authors: Anton Mitrofanov <BugMaster@narod.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#ifndef X264_ARM_DEBLOCK_H
+#define X264_ARM_DEBLOCK_H
+
+#define x264_deblock_v_luma_neon x264_template(deblock_v_luma_neon)
+void x264_deblock_v_luma_neon  ( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_luma_neon x264_template(deblock_h_luma_neon)
+void x264_deblock_h_luma_neon  ( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_v_chroma_neon x264_template(deblock_v_chroma_neon)
+void x264_deblock_v_chroma_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_chroma_neon x264_template(deblock_h_chroma_neon)
+void x264_deblock_h_chroma_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_strength_neon x264_template(deblock_strength_neon)
+void x264_deblock_strength_neon( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
+                                 int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
+                                 int mvy_limit, int bframe );
+#define x264_deblock_h_chroma_422_neon x264_template(deblock_h_chroma_422_neon)
+void x264_deblock_h_chroma_422_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_chroma_mbaff_neon x264_template(deblock_h_chroma_mbaff_neon)
+void x264_deblock_h_chroma_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_chroma_intra_mbaff_neon x264_template(deblock_h_chroma_intra_mbaff_neon)
+void x264_deblock_h_chroma_intra_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_h_chroma_intra_neon x264_template(deblock_h_chroma_intra_neon)
+void x264_deblock_h_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_h_chroma_422_intra_neon x264_template(deblock_h_chroma_422_intra_neon)
+void x264_deblock_h_chroma_422_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_v_chroma_intra_neon x264_template(deblock_v_chroma_intra_neon)
+void x264_deblock_v_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_h_luma_intra_neon x264_template(deblock_h_luma_intra_neon)
+void x264_deblock_h_luma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_v_luma_intra_neon x264_template(deblock_v_luma_intra_neon)
+void x264_deblock_v_luma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
+
+#endif
diff -Nru x264-0.152.2854+gite9a5903/common/arm/mc-a.S x264-0.158.2988+git-20191101.7817004/common/arm/mc-a.S
--- x264-0.152.2854+gite9a5903/common/arm/mc-a.S	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/arm/mc-a.S	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.S: arm motion compensation
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Mans Rullgard <mans@mansr.com>
@@ -38,7 +38,7 @@
 // They also use nothing above armv5te, but we don't care about pre-armv6
 
 // void prefetch_ref( uint8_t *pix, intptr_t stride, int parity )
-function x264_prefetch_ref_arm
+function prefetch_ref_arm
     sub         r2, r2, #1
     add         r0, r0, #64
     and         r2, r2, r1
@@ -58,7 +58,7 @@
 
 // void prefetch_fenc( uint8_t *pix_y,  intptr_t stride_y,
 //                     uint8_t *pix_uv, intptr_t stride_uv, int mb_x )
-function x264_prefetch_fenc_arm
+function prefetch_fenc_arm
     ldr         ip, [sp]
     push        {lr}
     and         lr, ip, #3
@@ -83,8 +83,8 @@
 endfunc
 
 
-// void *x264_memcpy_aligned( void *dst, const void *src, size_t n )
-function x264_memcpy_aligned_neon
+// void *memcpy_aligned( void *dst, const void *src, size_t n )
+function memcpy_aligned_neon
     orr         r3,  r0,  r1,  lsr #1
     movrel      ip,  memcpy_table
     and         r3,  r3,  #0xc
@@ -150,8 +150,8 @@
 
 .ltorg
 
-// void x264_memzero_aligned( void *dst, size_t n )
-function x264_memzero_aligned_neon
+// void memzero_aligned( void *dst, size_t n )
+function memzero_aligned_neon
     vmov.i8     q0, #0
     vmov.i8     q1, #0
 memzero_loop:
@@ -168,18 +168,18 @@
 //                 uint8_t *src1, intptr_t src1_stride,
 //                 uint8_t *src2, intptr_t src2_stride, int weight );
 .macro AVGH w h
-function x264_pixel_avg_\w\()x\h\()_neon
+function pixel_avg_\w\()x\h\()_neon
     ldr         ip, [sp, #8]
     push        {r4-r6,lr}
     cmp         ip, #32
     ldrd        r4, r5, [sp, #16]
     mov         lr, #\h
-    beq         x264_pixel_avg_w\w\()_neon
+    beq         pixel_avg_w\w\()_neon
     rsbs        r6,  ip,  #64
-    blt         x264_pixel_avg_weight_w\w\()_add_sub_neon     // weight > 64
+    blt         pixel_avg_weight_w\w\()_add_sub_neon     // weight > 64
     cmp         ip,  #0
-    bge         x264_pixel_avg_weight_w\w\()_add_add_neon
-    b           x264_pixel_avg_weight_w\w\()_sub_add_neon     // weight < 0
+    bge         pixel_avg_weight_w\w\()_add_add_neon
+    b           pixel_avg_weight_w\w\()_sub_add_neon     // weight < 0
 endfunc
 .endm
 
@@ -244,7 +244,7 @@
 .endm
 
 .macro AVG_WEIGHT ext
-function x264_pixel_avg_weight_w4_\ext\()_neon, export=0
+function pixel_avg_weight_w4_\ext\()_neon, export=0
     load_weights_\ext
 1:  // height loop
     subs            lr,  lr,  #2
@@ -260,7 +260,7 @@
     pop             {r4-r6,pc}
 endfunc
 
-function x264_pixel_avg_weight_w8_\ext\()_neon, export=0
+function pixel_avg_weight_w8_\ext\()_neon, export=0
     load_weights_\ext
 1:  // height loop
     subs            lr,  lr,  #4
@@ -284,7 +284,7 @@
     pop             {r4-r6,pc}
 endfunc
 
-function x264_pixel_avg_weight_w16_\ext\()_neon, export=0
+function pixel_avg_weight_w16_\ext\()_neon, export=0
     load_weights_\ext
 1:  // height loop
     subs            lr,  lr,  #2
@@ -309,7 +309,7 @@
 AVG_WEIGHT add_sub
 AVG_WEIGHT sub_add
 
-function x264_pixel_avg_w4_neon, export=0
+function pixel_avg_w4_neon, export=0
     subs        lr,  lr,  #2
     vld1.32     {d0[]}, [r2], r3
     vld1.32     {d2[]}, [r4], r5
@@ -319,11 +319,11 @@
     vrhadd.u8   d1,  d1,  d3
     vst1.32     {d0[0]}, [r0,:32], r1
     vst1.32     {d1[0]}, [r0,:32], r1
-    bgt         x264_pixel_avg_w4_neon
+    bgt         pixel_avg_w4_neon
     pop         {r4-r6,pc}
 endfunc
 
-function x264_pixel_avg_w8_neon, export=0
+function pixel_avg_w8_neon, export=0
     subs        lr,  lr,  #4
     vld1.64     {d0}, [r2], r3
     vld1.64     {d2}, [r4], r5
@@ -341,11 +341,11 @@
     vrhadd.u8   d3,  d3,  d5
     vst1.64     {d2}, [r0,:64], r1
     vst1.64     {d3}, [r0,:64], r1
-    bgt         x264_pixel_avg_w8_neon
+    bgt         pixel_avg_w8_neon
     pop         {r4-r6,pc}
 endfunc
 
-function x264_pixel_avg_w16_neon, export=0
+function pixel_avg_w16_neon, export=0
     subs        lr,  lr,  #4
     vld1.64     {d0-d1}, [r2], r3
     vld1.64     {d2-d3}, [r4], r5
@@ -363,12 +363,12 @@
     vrhadd.u8   q3,  q3,  q0
     vst1.64     {d4-d5}, [r0,:128], r1
     vst1.64     {d6-d7}, [r0,:128], r1
-    bgt         x264_pixel_avg_w16_neon
+    bgt         pixel_avg_w16_neon
     pop         {r4-r6,pc}
 endfunc
 
 
-function x264_pixel_avg2_w4_neon
+function pixel_avg2_w4_neon
     ldr         ip,  [sp, #4]
     push        {lr}
     ldr         lr,  [sp, #4]
@@ -386,7 +386,7 @@
     pop         {pc}
 endfunc
 
-function x264_pixel_avg2_w8_neon
+function pixel_avg2_w8_neon
     ldr         ip,  [sp, #4]
     push        {lr}
     ldr         lr,  [sp, #4]
@@ -404,7 +404,7 @@
     pop         {pc}
 endfunc
 
-function x264_pixel_avg2_w16_neon
+function pixel_avg2_w16_neon
     ldr         ip,  [sp, #4]
     push        {lr}
     ldr         lr,  [sp, #4]
@@ -422,7 +422,7 @@
     pop         {pc}
 endfunc
 
-function x264_pixel_avg2_w20_neon
+function pixel_avg2_w20_neon
     ldr         ip,  [sp, #4]
     push        {lr}
     sub         r1,  r1,  #16
@@ -464,7 +464,7 @@
 
 // void mc_weight( uint8_t *src, intptr_t src_stride, uint8_t *dst, intptr_t dst_stride,
 //                 const x264_weight_t *weight, int height )
-function x264_mc_weight_w20_neon
+function mc_weight_w20_neon
     weight_prologue full
     sub         r1, #16
 weight20_loop:
@@ -500,7 +500,7 @@
     pop         {r4-r5,pc}
 endfunc
 
-function x264_mc_weight_w16_neon
+function mc_weight_w16_neon
     weight_prologue full
 weight16_loop:
     subs        ip,  #2
@@ -528,7 +528,7 @@
     pop         {r4-r5,pc}
 endfunc
 
-function x264_mc_weight_w8_neon
+function mc_weight_w8_neon
     weight_prologue full
 weight8_loop:
     subs        ip,  #2
@@ -548,7 +548,7 @@
     pop         {r4-r5,pc}
 endfunc
 
-function x264_mc_weight_w4_neon
+function mc_weight_w4_neon
     weight_prologue full
 weight4_loop:
     subs        ip,  #2
@@ -564,7 +564,7 @@
     pop         {r4-r5,pc}
 endfunc
 
-function x264_mc_weight_w20_nodenom_neon
+function mc_weight_w20_nodenom_neon
     weight_prologue nodenom
     sub         r1, #16
 weight20_nodenom_loop:
@@ -595,7 +595,7 @@
     pop         {r4-r5,pc}
 endfunc
 
-function x264_mc_weight_w16_nodenom_neon
+function mc_weight_w16_nodenom_neon
     weight_prologue nodenom
 weight16_nodenom_loop:
     subs        ip,  #2
@@ -619,7 +619,7 @@
     pop         {r4-r5,pc}
 endfunc
 
-function x264_mc_weight_w8_nodenom_neon
+function mc_weight_w8_nodenom_neon
     weight_prologue nodenom
 weight8_nodenom_loop:
     subs        ip,  #2
@@ -637,7 +637,7 @@
     pop         {r4-r5,pc}
 endfunc
 
-function x264_mc_weight_w4_nodenom_neon
+function mc_weight_w4_nodenom_neon
     weight_prologue nodenom
 weight4_nodenom_loop:
     subs        ip,  #2
@@ -661,7 +661,7 @@
 .endm
 
 .macro weight_simple name op
-function x264_mc_weight_w20_\name\()_neon
+function mc_weight_w20_\name\()_neon
     weight_simple_prologue
 weight20_\name\()_loop:
     subs        ip,  #2
@@ -676,7 +676,7 @@
     pop         {pc}
 endfunc
 
-function x264_mc_weight_w16_\name\()_neon
+function mc_weight_w16_\name\()_neon
     weight_simple_prologue
 weight16_\name\()_loop:
     subs        ip,  #2
@@ -690,7 +690,7 @@
     pop         {pc}
 endfunc
 
-function x264_mc_weight_w8_\name\()_neon
+function mc_weight_w8_\name\()_neon
     weight_simple_prologue
 weight8_\name\()_loop:
     subs        ip,  #2
@@ -703,7 +703,7 @@
     pop         {pc}
 endfunc
 
-function x264_mc_weight_w4_\name\()_neon
+function mc_weight_w4_\name\()_neon
     weight_simple_prologue
 weight4_\name\()_loop:
     subs        ip,  #2
@@ -722,7 +722,7 @@
 
 
 // void mc_copy( uint8_t *dst, intptr_t dst_stride, uint8_t *src, intptr_t src_stride, int height )
-function x264_mc_copy_w4_neon
+function mc_copy_w4_neon
     ldr         ip,  [sp]
 copy_w4_loop:
     subs        ip,  ip,  #4
@@ -738,7 +738,7 @@
     bx          lr
 endfunc
 
-function x264_mc_copy_w8_neon
+function mc_copy_w8_neon
     ldr         ip,  [sp]
 copy_w8_loop:
     subs        ip,  ip,  #4
@@ -754,7 +754,7 @@
     bx          lr
 endfunc
 
-function x264_mc_copy_w16_neon
+function mc_copy_w16_neon
     ldr         ip,  [sp]
 copy_w16_loop:
     subs        ip,  ip,  #4
@@ -770,7 +770,7 @@
     bx          lr
 endfunc
 
-function x264_mc_copy_w16_aligned_neon
+function mc_copy_w16_aligned_neon
     ldr         ip,  [sp]
 copy_w16_aligned_loop:
     subs        ip,  ip,  #4
@@ -787,11 +787,10 @@
 endfunc
 
 
-// void x264_mc_chroma_neon( uint8_t *dst, intptr_t i_dst_stride,
-//                           uint8_t *src, intptr_t i_src_stride,
-//                           int dx, int dy, int i_width, int i_height );
-
-function x264_mc_chroma_neon
+// void mc_chroma( uint8_t *dst, intptr_t i_dst_stride,
+//                 uint8_t *src, intptr_t i_src_stride,
+//                 int dx, int dy, int i_width, int i_height );
+function mc_chroma_neon
     push            {r4-r8, lr}
     vpush           {d8-d11}
     ldrd            r4, r5, [sp, #56]
@@ -1138,7 +1137,7 @@
 
 
 // hpel_filter_v( uint8_t *dst, uint8_t *src, int16_t *buf, intptr_t stride, int width )
-function x264_hpel_filter_v_neon
+function hpel_filter_v_neon
     ldr             ip,  [sp]
     sub             r1,  r1,  r3,  lsl #1
     push            {lr}
@@ -1178,7 +1177,7 @@
 endfunc
 
 // hpel_filter_c( uint8_t *dst, int16_t *buf, int width );
-function x264_hpel_filter_c_neon
+function hpel_filter_c_neon
     sub             r1,  #16
     vld1.64         {d0-d3}, [r1,:128]!
 
@@ -1263,7 +1262,7 @@
 endfunc
 
 // hpel_filter_h( uint8_t *dst, uint8_t *src, int width );
-function x264_hpel_filter_h_neon
+function hpel_filter_h_neon
     sub             r1,  #16
     vmov.u8         d30, #5
     vld1.64         {d0-d3}, [r1,:128]!
@@ -1353,7 +1352,7 @@
 // frame_init_lowres_core( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, uint8_t *dstv,
 //                         uint8_t *dstc, intptr_t src_stride, intptr_t dst_stride, int width,
 //                         int height )
-function x264_frame_init_lowres_core_neon
+function frame_init_lowres_core_neon
     push            {r4-r10,lr}
     vpush           {d8-d15}
     ldrd            r4,  r5,  [sp, #96]
@@ -1441,7 +1440,7 @@
     pop             {r4-r10,pc}
 endfunc
 
-function x264_load_deinterleave_chroma_fdec_neon
+function load_deinterleave_chroma_fdec_neon
     mov             ip,  #FDEC_STRIDE/2
 1:
     vld2.8          {d0-d1}, [r1,:128], r2
@@ -1454,7 +1453,7 @@
     bx              lr
 endfunc
 
-function x264_load_deinterleave_chroma_fenc_neon
+function load_deinterleave_chroma_fenc_neon
     mov             ip,  #FENC_STRIDE/2
 1:
     vld2.8          {d0-d1}, [r1,:128], r2
@@ -1467,7 +1466,7 @@
     bx              lr
 endfunc
 
-function x264_plane_copy_core_neon
+function plane_copy_core_neon
     push            {r4,lr}
     ldr             r4,  [sp, #8]
     ldr             lr,  [sp, #12]
@@ -1498,7 +1497,7 @@
     pop             {r4,pc}
 endfunc
 
-function x264_plane_copy_deinterleave_neon
+function plane_copy_deinterleave_neon
     push            {r4-r7, lr}
     ldrd            r6, r7, [sp, #28]
     ldrd            r4, r5, [sp, #20]
@@ -1524,7 +1523,7 @@
     pop             {r4-r7, pc}
 endfunc
 
-function x264_plane_copy_deinterleave_rgb_neon
+function plane_copy_deinterleave_rgb_neon
     push            {r4-r8, r10, r11, lr}
     ldrd            r4,  r5,  [sp, #32]
     ldrd            r6,  r7,  [sp, #40]
@@ -1576,7 +1575,7 @@
     pop             {r4-r8, r10, r11, pc}
 endfunc
 
-function x264_plane_copy_interleave_core_neon
+function plane_copy_interleave_core_neon
     push            {r4-r7, lr}
     ldrd            r6, r7, [sp, #28]
     ldrd            r4, r5, [sp, #20]
@@ -1603,7 +1602,7 @@
     pop             {r4-r7, pc}
 endfunc
 
-function x264_plane_copy_swap_core_neon
+function plane_copy_swap_core_neon
     push            {r4-r5, lr}
     ldrd            r4, r5, [sp, #12]
     add             lr,  r4,  #15
@@ -1627,7 +1626,7 @@
     pop             {r4-r5, pc}
 endfunc
 
-function x264_store_interleave_chroma_neon
+function store_interleave_chroma_neon
     push            {lr}
     ldr             lr,  [sp, #4]
     mov             ip,  #FDEC_STRIDE
@@ -1651,7 +1650,7 @@
     vadd.u16        q0,  q0,  q2
 .endm
 
-function x264_integral_init4h_neon
+function integral_init4h_neon
     sub             r3,  r0,  r2, lsl #1
     vld1.8          {d6, d7}, [r1, :128]!
 1:
@@ -1686,7 +1685,7 @@
     vadd.u16        q0,  q0,   \s
 .endm
 
-function x264_integral_init8h_neon
+function integral_init8h_neon
     sub             r3,  r0,  r2, lsl #1
     vld1.8          {d16, d17}, [r1, :128]!
 1:
@@ -1703,7 +1702,7 @@
     bx              lr
 endfunc
 
-function x264_integral_init4v_neon
+function integral_init4v_neon
     push            {r4-r5}
     mov             r3,   r0
     add             r4,   r0,   r2,  lsl #3
@@ -1742,7 +1741,7 @@
     bx              lr
 endfunc
 
-function x264_integral_init8v_neon
+function integral_init8v_neon
     add             r2,  r0,  r1,  lsl #4
     sub             r1,  r1,  #8
     ands            r3,  r1,  #16 - 1
@@ -1766,7 +1765,7 @@
     bx              lr
 endfunc
 
-function x264_mbtree_propagate_cost_neon
+function mbtree_propagate_cost_neon
     push            {r4-r5,lr}
     ldrd            r4, r5, [sp, #12]
     ldr             lr, [sp, #20]
@@ -1816,7 +1815,7 @@
     pop             {r4-r5,pc}
 endfunc
 
-function x264_mbtree_propagate_list_internal_neon
+function mbtree_propagate_list_internal_neon
     vld1.16         {d4[]}, [sp]            @ bipred_weight
     movrel          r12, pw_0to15
     vmov.u16        q10, #0xc000
@@ -1882,7 +1881,7 @@
 endfunc
 
 @ void mbtree_fix8_pack( int16_t *dst, float *src, int count )
-function x264_mbtree_fix8_pack_neon, export=1
+function mbtree_fix8_pack_neon, export=1
     subs            r3,  r2,  #8
     blt             2f
 1:
@@ -1910,7 +1909,7 @@
 endfunc
 
 @ void mbtree_fix8_unpack( float *dst, int16_t *src, int count )
-function x264_mbtree_fix8_unpack_neon, export=1
+function mbtree_fix8_unpack_neon, export=1
     subs            r3,  r2,  #8
     blt             2f
 1:
diff -Nru x264-0.152.2854+gite9a5903/common/arm/mc-c.c x264-0.158.2988+git-20191101.7817004/common/arm/mc-c.c
--- x264-0.152.2854+gite9a5903/common/arm/mc-c.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/arm/mc-c.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc-c.c: arm motion compensation
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Janne Grunau <janne-x264@jannau.net>
@@ -27,46 +27,87 @@
 #include "common/common.h"
 #include "mc.h"
 
+#define x264_prefetch_ref_arm x264_template(prefetch_ref_arm)
 void x264_prefetch_ref_arm( uint8_t *, intptr_t, int );
+#define x264_prefetch_fenc_arm x264_template(prefetch_fenc_arm)
 void x264_prefetch_fenc_arm( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
 
+#define x264_memcpy_aligned_neon x264_template(memcpy_aligned_neon)
 void *x264_memcpy_aligned_neon( void *dst, const void *src, size_t n );
+#define x264_memzero_aligned_neon x264_template(memzero_aligned_neon)
 void x264_memzero_aligned_neon( void *dst, size_t n );
 
+#define x264_pixel_avg_16x16_neon x264_template(pixel_avg_16x16_neon)
 void x264_pixel_avg_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_16x8_neon x264_template(pixel_avg_16x8_neon)
 void x264_pixel_avg_16x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_8x16_neon x264_template(pixel_avg_8x16_neon)
 void x264_pixel_avg_8x16_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_8x8_neon x264_template(pixel_avg_8x8_neon)
 void x264_pixel_avg_8x8_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_8x4_neon x264_template(pixel_avg_8x4_neon)
 void x264_pixel_avg_8x4_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_4x16_neon x264_template(pixel_avg_4x16_neon)
 void x264_pixel_avg_4x16_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_4x8_neon x264_template(pixel_avg_4x8_neon)
 void x264_pixel_avg_4x8_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_4x4_neon x264_template(pixel_avg_4x4_neon)
 void x264_pixel_avg_4x4_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_pixel_avg_4x2_neon x264_template(pixel_avg_4x2_neon)
 void x264_pixel_avg_4x2_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
 
+#define x264_pixel_avg2_w4_neon x264_template(pixel_avg2_w4_neon)
 void x264_pixel_avg2_w4_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
+#define x264_pixel_avg2_w8_neon x264_template(pixel_avg2_w8_neon)
 void x264_pixel_avg2_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
+#define x264_pixel_avg2_w16_neon x264_template(pixel_avg2_w16_neon)
 void x264_pixel_avg2_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
+#define x264_pixel_avg2_w20_neon x264_template(pixel_avg2_w20_neon)
 void x264_pixel_avg2_w20_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
 
+#define x264_plane_copy_core_neon x264_template(plane_copy_core_neon)
 void x264_plane_copy_core_neon( pixel *dst, intptr_t i_dst,
                                 pixel *src, intptr_t i_src, int w, int h );
+#define x264_plane_copy_deinterleave_neon x264_template(plane_copy_deinterleave_neon)
 void x264_plane_copy_deinterleave_neon(  pixel *dstu, intptr_t i_dstu,
                                          pixel *dstv, intptr_t i_dstv,
                                          pixel *src,  intptr_t i_src, int w, int h );
+#define x264_plane_copy_deinterleave_rgb_neon x264_template(plane_copy_deinterleave_rgb_neon)
 void x264_plane_copy_deinterleave_rgb_neon( pixel *dsta, intptr_t i_dsta,
                                             pixel *dstb, intptr_t i_dstb,
                                             pixel *dstc, intptr_t i_dstc,
                                             pixel *src,  intptr_t i_src, int pw, int w, int h );
+#define x264_plane_copy_interleave_core_neon x264_template(plane_copy_interleave_core_neon)
 void x264_plane_copy_interleave_core_neon( pixel *dst,  intptr_t i_dst,
                                            pixel *srcu, intptr_t i_srcu,
                                            pixel *srcv, intptr_t i_srcv, int w, int h );
+#define x264_plane_copy_swap_core_neon x264_template(plane_copy_swap_core_neon)
 void x264_plane_copy_swap_core_neon( pixel *dst, intptr_t i_dst,
                                      pixel *src, intptr_t i_src, int w, int h );
 
+#define x264_store_interleave_chroma_neon x264_template(store_interleave_chroma_neon)
 void x264_store_interleave_chroma_neon( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
+#define x264_load_deinterleave_chroma_fdec_neon x264_template(load_deinterleave_chroma_fdec_neon)
 void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
+#define x264_load_deinterleave_chroma_fenc_neon x264_template(load_deinterleave_chroma_fenc_neon)
 void x264_load_deinterleave_chroma_fenc_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
 
+#define x264_mc_weight_w16_neon x264_template(mc_weight_w16_neon)
+#define x264_mc_weight_w16_nodenom_neon x264_template(mc_weight_w16_nodenom_neon)
+#define x264_mc_weight_w16_offsetadd_neon x264_template(mc_weight_w16_offsetadd_neon)
+#define x264_mc_weight_w16_offsetsub_neon x264_template(mc_weight_w16_offsetsub_neon)
+#define x264_mc_weight_w20_neon x264_template(mc_weight_w20_neon)
+#define x264_mc_weight_w20_nodenom_neon x264_template(mc_weight_w20_nodenom_neon)
+#define x264_mc_weight_w20_offsetadd_neon x264_template(mc_weight_w20_offsetadd_neon)
+#define x264_mc_weight_w20_offsetsub_neon x264_template(mc_weight_w20_offsetsub_neon)
+#define x264_mc_weight_w4_neon x264_template(mc_weight_w4_neon)
+#define x264_mc_weight_w4_nodenom_neon x264_template(mc_weight_w4_nodenom_neon)
+#define x264_mc_weight_w4_offsetadd_neon x264_template(mc_weight_w4_offsetadd_neon)
+#define x264_mc_weight_w4_offsetsub_neon x264_template(mc_weight_w4_offsetsub_neon)
+#define x264_mc_weight_w8_neon x264_template(mc_weight_w8_neon)
+#define x264_mc_weight_w8_nodenom_neon x264_template(mc_weight_w8_nodenom_neon)
+#define x264_mc_weight_w8_offsetadd_neon x264_template(mc_weight_w8_offsetadd_neon)
+#define x264_mc_weight_w8_offsetsub_neon x264_template(mc_weight_w8_offsetsub_neon)
 #if !HIGH_BIT_DEPTH
 #define MC_WEIGHT(func)\
 void x264_mc_weight_w20##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
@@ -74,7 +115,7 @@
 void x264_mc_weight_w8##func##_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
 void x264_mc_weight_w4##func##_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
 \
-static weight_fn_t x264_mc##func##_wtab_neon[6] =\
+static weight_fn_t mc##func##_wtab_neon[6] =\
 {\
     x264_mc_weight_w4##func##_neon,\
     x264_mc_weight_w4##func##_neon,\
@@ -90,51 +131,67 @@
 MC_WEIGHT(_offsetsub)
 #endif
 
+#define x264_mc_copy_w4_neon x264_template(mc_copy_w4_neon)
 void x264_mc_copy_w4_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_mc_copy_w8_neon x264_template(mc_copy_w8_neon)
 void x264_mc_copy_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_mc_copy_w16_neon x264_template(mc_copy_w16_neon)
 void x264_mc_copy_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+#define x264_mc_copy_w16_aligned_neon x264_template(mc_copy_w16_aligned_neon)
 void x264_mc_copy_w16_aligned_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
 
+#define x264_mc_chroma_neon x264_template(mc_chroma_neon)
 void x264_mc_chroma_neon( uint8_t *, uint8_t *, intptr_t, uint8_t *, intptr_t, int, int, int, int );
+#define x264_frame_init_lowres_core_neon x264_template(frame_init_lowres_core_neon)
 void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, intptr_t, intptr_t, int, int );
 
+#define x264_hpel_filter_v_neon x264_template(hpel_filter_v_neon)
 void x264_hpel_filter_v_neon( uint8_t *, uint8_t *, int16_t *, intptr_t, int );
+#define x264_hpel_filter_c_neon x264_template(hpel_filter_c_neon)
 void x264_hpel_filter_c_neon( uint8_t *, int16_t *, int );
+#define x264_hpel_filter_h_neon x264_template(hpel_filter_h_neon)
 void x264_hpel_filter_h_neon( uint8_t *, uint8_t *, int );
 
+#define x264_integral_init4h_neon x264_template(integral_init4h_neon)
 void x264_integral_init4h_neon( uint16_t *, uint8_t *, intptr_t );
+#define x264_integral_init4v_neon x264_template(integral_init4v_neon)
 void x264_integral_init4v_neon( uint16_t *, uint16_t *, intptr_t );
+#define x264_integral_init8h_neon x264_template(integral_init8h_neon)
 void x264_integral_init8h_neon( uint16_t *, uint8_t *, intptr_t );
+#define x264_integral_init8v_neon x264_template(integral_init8v_neon)
 void x264_integral_init8v_neon( uint16_t *, intptr_t );
 
+#define x264_mbtree_propagate_cost_neon x264_template(mbtree_propagate_cost_neon)
 void x264_mbtree_propagate_cost_neon( int16_t *, uint16_t *, uint16_t *, uint16_t *, uint16_t *, float *, int );
 
+#define x264_mbtree_fix8_pack_neon x264_template(mbtree_fix8_pack_neon)
 void x264_mbtree_fix8_pack_neon( uint16_t *dst, float *src, int count );
+#define x264_mbtree_fix8_unpack_neon x264_template(mbtree_fix8_unpack_neon)
 void x264_mbtree_fix8_unpack_neon( float *dst, uint16_t *src, int count );
 
 #if !HIGH_BIT_DEPTH
-static void x264_weight_cache_neon( x264_t *h, x264_weight_t *w )
+static void weight_cache_neon( x264_t *h, x264_weight_t *w )
 {
     if( w->i_scale == 1<<w->i_denom )
     {
         if( w->i_offset < 0 )
         {
-            w->weightfn = x264_mc_offsetsub_wtab_neon;
+            w->weightfn = mc_offsetsub_wtab_neon;
             w->cachea[0] = -w->i_offset;
         }
         else
         {
-            w->weightfn = x264_mc_offsetadd_wtab_neon;
+            w->weightfn = mc_offsetadd_wtab_neon;
             w->cachea[0] = w->i_offset;
         }
     }
     else if( !w->i_denom )
-        w->weightfn = x264_mc_nodenom_wtab_neon;
+        w->weightfn = mc_nodenom_wtab_neon;
     else
-        w->weightfn = x264_mc_wtab_neon;
+        w->weightfn = mc_wtab_neon;
 }
 
-static void (* const x264_pixel_avg_wtab_neon[6])( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ) =
+static void (* const pixel_avg_wtab_neon[6])( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ) =
 {
     NULL,
     x264_pixel_avg2_w4_neon,
@@ -144,7 +201,7 @@
     x264_pixel_avg2_w20_neon,
 };
 
-static void (* const x264_mc_copy_wtab_neon[5])( uint8_t *, intptr_t, uint8_t *, intptr_t, int ) =
+static void (* const mc_copy_wtab_neon[5])( uint8_t *, intptr_t, uint8_t *, intptr_t, int ) =
 {
     NULL,
     x264_mc_copy_w4_neon,
@@ -167,7 +224,7 @@
     if( qpel_idx & 5 ) /* qpel interpolation needed */
     {
         uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
-        x264_pixel_avg_wtab_neon[i_width>>2](
+        pixel_avg_wtab_neon[i_width>>2](
                 dst, i_dst_stride, src1, i_src_stride,
                 src2, i_height );
         if( weight->weightfn )
@@ -176,7 +233,7 @@
     else if( weight->weightfn )
         weight->weightfn[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, weight, i_height );
     else
-        x264_mc_copy_wtab_neon[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, i_height );
+        mc_copy_wtab_neon[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, i_height );
 }
 
 static uint8_t *get_ref_neon( uint8_t *dst,   intptr_t *i_dst_stride,
@@ -193,7 +250,7 @@
     if( qpel_idx & 5 ) /* qpel interpolation needed */
     {
         uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
-        x264_pixel_avg_wtab_neon[i_width>>2](
+        pixel_avg_wtab_neon[i_width>>2](
                 dst, *i_dst_stride, src1, i_src_stride,
                 src2, i_height );
         if( weight->weightfn )
@@ -236,9 +293,8 @@
 PLANE_COPY(16, neon)
 PLANE_COPY_SWAP(16, neon)
 PLANE_INTERLEAVE(neon)
-#endif // !HIGH_BIT_DEPTH
-
 PROPAGATE_LIST(neon)
+#endif // !HIGH_BIT_DEPTH
 
 void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
 {
@@ -260,11 +316,11 @@
     pf->copy[PIXEL_8x8]   = x264_mc_copy_w8_neon;
     pf->copy[PIXEL_4x4]   = x264_mc_copy_w4_neon;
 
-    pf->plane_copy              = x264_plane_copy_neon;
+    pf->plane_copy              = plane_copy_neon;
     pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
     pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon;
-    pf->plane_copy_interleave = x264_plane_copy_interleave_neon;
-    pf->plane_copy_swap = x264_plane_copy_swap_neon;
+    pf->plane_copy_interleave = plane_copy_interleave_neon;
+    pf->plane_copy_swap = plane_copy_swap_neon;
 
     pf->store_interleave_chroma = x264_store_interleave_chroma_neon;
     pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon;
@@ -280,10 +336,10 @@
     pf->avg[PIXEL_4x4]   = x264_pixel_avg_4x4_neon;
     pf->avg[PIXEL_4x2]   = x264_pixel_avg_4x2_neon;
 
-    pf->weight    = x264_mc_wtab_neon;
-    pf->offsetadd = x264_mc_offsetadd_wtab_neon;
-    pf->offsetsub = x264_mc_offsetsub_wtab_neon;
-    pf->weight_cache = x264_weight_cache_neon;
+    pf->weight    = mc_wtab_neon;
+    pf->offsetadd = mc_offsetadd_wtab_neon;
+    pf->offsetsub = mc_offsetsub_wtab_neon;
+    pf->weight_cache = weight_cache_neon;
 
     pf->mc_chroma = x264_mc_chroma_neon;
     pf->mc_luma = mc_luma_neon;
@@ -297,7 +353,7 @@
     pf->integral_init8v = x264_integral_init8v_neon;
 
     pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_neon;
-    pf->mbtree_propagate_list = x264_mbtree_propagate_list_neon;
+    pf->mbtree_propagate_list = mbtree_propagate_list_neon;
     pf->mbtree_fix8_pack      = x264_mbtree_fix8_pack_neon;
     pf->mbtree_fix8_unpack    = x264_mbtree_fix8_unpack_neon;
 #endif // !HIGH_BIT_DEPTH
diff -Nru x264-0.152.2854+gite9a5903/common/arm/mc.h x264-0.158.2988+git-20191101.7817004/common/arm/mc.h
--- x264-0.152.2854+gite9a5903/common/arm/mc.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/arm/mc.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.h: arm motion compensation
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -26,6 +26,7 @@
 #ifndef X264_ARM_MC_H
 #define X264_ARM_MC_H
 
+#define x264_mc_init_arm x264_template(mc_init_arm)
 void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/arm/pixel-a.S x264-0.158.2988+git-20191101.7817004/common/arm/pixel-a.S
--- x264-0.152.2854+gite9a5903/common/arm/pixel-a.S	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/arm/pixel-a.S	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.S: arm pixel metrics
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Janne Grunau <janne-x264@jannau.net>
@@ -46,7 +46,7 @@
 .text
 
 .macro SAD4_ARMV6 h
-function x264_pixel_sad_4x\h\()_armv6
+function pixel_sad_4x\h\()_armv6
     push        {r4-r6,lr}
     ldr         r4, [r2], r3
     ldr         r5, [r0], r1
@@ -115,7 +115,7 @@
 .endm
 
 .macro SAD_FUNC w, h, name, align:vararg
-function x264_pixel_sad\name\()_\w\()x\h\()_neon
+function pixel_sad\name\()_\w\()x\h\()_neon
     SAD_START_\w \align
 
 .if \w == 16
@@ -206,7 +206,7 @@
 .endm
 
 .macro SAD_FUNC_DUAL w, h
-function x264_pixel_sad_aligned_\w\()x\h\()_neon_dual
+function pixel_sad_aligned_\w\()x\h\()_neon_dual
     SAD_DUAL_START_\w
 .rept \h / 2 - \w / 8
     SAD_DUAL_\w
@@ -328,7 +328,7 @@
 .endm
 
 .macro SAD_X_FUNC x, w, h
-function x264_pixel_sad_x\x\()_\w\()x\h\()_neon
+function pixel_sad_x\x\()_\w\()x\h\()_neon
     push        {r6-r7,lr}
 .if \x == 3
     ldrd        r6,  r7,  [sp, #12]
@@ -390,7 +390,7 @@
 SAD_X_FUNC  4, 16, 8
 SAD_X_FUNC  4, 16, 16
 
-function x264_pixel_vsad_neon
+function pixel_vsad_neon
     subs        r2,  r2,    #2
     vld1.8     {q0}, [r0],  r1
     vld1.8     {q1}, [r0],  r1
@@ -414,7 +414,7 @@
     bx          lr
 endfunc
 
-function x264_pixel_asd8_neon
+function pixel_asd8_neon
     ldr         r12, [sp,  #0]
     sub         r12,  r12, #2
     vld1.8     {d0}, [r0], r1
@@ -523,7 +523,7 @@
 .endm
 
 .macro SSD_FUNC w h
-function x264_pixel_ssd_\w\()x\h\()_neon
+function pixel_ssd_\w\()x\h\()_neon
     SSD_START_\w
 .rept \h-2
     SSD_\w
@@ -544,7 +544,7 @@
 SSD_FUNC  16, 8
 SSD_FUNC  16, 16
 
-function x264_pixel_ssd_nv12_core_neon
+function pixel_ssd_nv12_core_neon
     push       {r4-r5}
     ldrd        r4,  r5,  [sp, #8]
     add         r12, r4,  #8
@@ -624,7 +624,7 @@
     \vpadal         \qsqr_sum, \qsqr_last
 .endm
 
-function x264_pixel_var_8x8_neon
+function pixel_var_8x8_neon
     vld1.64         {d16}, [r0,:64], r1
     vmull.u8        q1,  d16, d16
     vmovl.u8        q0,  d16
@@ -645,10 +645,10 @@
     VAR_SQR_SUM     q1,  q9,   q14, d24
     vld1.64         {d26}, [r0,:64], r1
     VAR_SQR_SUM     q2,  q10,  q15, d26
-    b               x264_var_end
+    b               var_end
 endfunc
 
-function x264_pixel_var_8x16_neon
+function pixel_var_8x16_neon
     vld1.64         {d16}, [r0,:64], r1
     vld1.64         {d18}, [r0,:64], r1
     vmull.u8        q1,  d16, d16
@@ -677,10 +677,10 @@
     b               1b
 2:
     VAR_SQR_SUM     q2,  q13,  q15, d22
-    b               x264_var_end
+    b               var_end
 endfunc
 
-function x264_pixel_var_16x16_neon
+function pixel_var_16x16_neon
     vld1.64         {d16-d17}, [r0,:128], r1
     vmull.u8        q12, d16, d16
     vmovl.u8        q0,  d16
@@ -704,7 +704,7 @@
     bgt             var16_loop
 endfunc
 
-function x264_var_end, export=0
+function var_end, export=0
     vpaddl.u16      q8,  q14
     vpaddl.u16      q9,  q15
     vadd.u32        q1,  q1,  q8
@@ -744,7 +744,7 @@
     vmlal.s16       \acc, \d1, \d1
 .endm
 
-function x264_pixel_var2_8x8_neon
+function pixel_var2_8x8_neon
     mov             r3,  #16
     DIFF_SUM        q0,  q10, d0,  d1,  d20, d21
     DIFF_SUM        q8,  q11, d16, d17, d22, d23
@@ -783,7 +783,7 @@
     bx              lr
 endfunc
 
-function x264_pixel_var2_8x16_neon
+function pixel_var2_8x16_neon
     mov             r3,  #16
     vld1.64         {d16}, [r0,:64]!
     vld1.64         {d17}, [r1,:64], r3
@@ -846,7 +846,7 @@
     vsubl.u8    \q3, d6,  d7
 .endm
 
-function x264_pixel_satd_4x4_neon
+function pixel_satd_4x4_neon
     vld1.32     {d1[]},  [r2], r3
     vld1.32     {d0[]},  [r0,:32], r1
     vld1.32     {d3[]},  [r2], r3
@@ -868,7 +868,7 @@
     bx          lr
 endfunc
 
-function x264_pixel_satd_4x8_neon
+function pixel_satd_4x8_neon
     vld1.32     {d1[]},  [r2], r3
     vld1.32     {d0[]},  [r0,:32], r1
     vld1.32     {d3[]},  [r2], r3
@@ -892,10 +892,10 @@
     vld1.32     {d6[1]}, [r0,:32], r1
     vsubl.u8    q3,  d6,  d7
     SUMSUB_AB   q10, q11, q2,  q3
-    b           x264_satd_4x8_8x4_end_neon
+    b           satd_4x8_8x4_end_neon
 endfunc
 
-function x264_pixel_satd_8x4_neon
+function pixel_satd_8x4_neon
     vld1.64     {d1}, [r2], r3
     vld1.64     {d0}, [r0,:64], r1
     vsubl.u8    q0,  d0,  d1
@@ -912,7 +912,7 @@
     SUMSUB_AB   q10, q11, q2,  q3
 endfunc
 
-function x264_satd_4x8_8x4_end_neon, export=0
+function satd_4x8_8x4_end_neon, export=0
     vadd.s16    q0,  q8,  q10
     vadd.s16    q1,  q9,  q11
     vsub.s16    q2,  q8,  q10
@@ -939,10 +939,10 @@
     bx          lr
 endfunc
 
-function x264_pixel_satd_8x8_neon
+function pixel_satd_8x8_neon
     mov         ip,  lr
 
-    bl x264_satd_8x8_neon
+    bl satd_8x8_neon
     vadd.u16    q0,  q12, q13
     vadd.u16    q1,  q14, q15
 
@@ -953,15 +953,15 @@
     bx          lr
 endfunc
 
-function x264_pixel_satd_8x16_neon
+function pixel_satd_8x16_neon
     vpush       {d8-d11}
     mov         ip,  lr
 
-    bl x264_satd_8x8_neon
+    bl satd_8x8_neon
     vadd.u16    q4,  q12, q13
     vadd.u16    q5,  q14, q15
 
-    bl x264_satd_8x8_neon
+    bl satd_8x8_neon
     vadd.u16    q4,  q4,  q12
     vadd.u16    q5,  q5,  q13
     vadd.u16    q4,  q4,  q14
@@ -975,7 +975,7 @@
     bx          lr
 endfunc
 
-function x264_satd_8x8_neon, export=0
+function satd_8x8_neon, export=0
     LOAD_DIFF_8x4 q8,  q9,  q10, q11
     vld1.64     {d7}, [r2], r3
     SUMSUB_AB   q0,  q1,  q8,  q9
@@ -996,7 +996,7 @@
 endfunc
 
 // one vertical hadamard pass and two horizontal
-function x264_satd_8x4v_8x8h_neon, export=0
+function satd_8x4v_8x8h_neon, export=0
     SUMSUB_ABCD q0, q1, q2, q3, q12, q13, q14, q15
     vtrn.16     q8,  q9
     SUMSUB_AB   q12, q14, q0,  q2
@@ -1024,15 +1024,15 @@
     bx          lr
 endfunc
 
-function x264_pixel_satd_16x8_neon
+function pixel_satd_16x8_neon
     vpush       {d8-d11}
     mov         ip, lr
 
-    bl          x264_satd_16x4_neon
+    bl          satd_16x4_neon
     vadd.u16    q4,  q12, q13
     vadd.u16    q5,  q14, q15
 
-    bl          x264_satd_16x4_neon
+    bl          satd_16x4_neon
     vadd.u16    q4,  q4,  q12
     vadd.u16    q5,  q5,  q13
     vadd.u16    q4,  q4,  q14
@@ -1046,27 +1046,27 @@
     bx          lr
 endfunc
 
-function x264_pixel_satd_16x16_neon
+function pixel_satd_16x16_neon
     vpush       {d8-d11}
     mov         ip, lr
 
-    bl          x264_satd_16x4_neon
+    bl          satd_16x4_neon
     vadd.u16    q4,  q12, q13
     vadd.u16    q5,  q14, q15
 
-    bl          x264_satd_16x4_neon
+    bl          satd_16x4_neon
     vadd.u16    q4,  q4,  q12
     vadd.u16    q5,  q5,  q13
     vadd.u16    q4,  q4,  q14
     vadd.u16    q5,  q5,  q15
 
-    bl          x264_satd_16x4_neon
+    bl          satd_16x4_neon
     vadd.u16    q4,  q4,  q12
     vadd.u16    q5,  q5,  q13
     vadd.u16    q4,  q4,  q14
     vadd.u16    q5,  q5,  q15
 
-    bl          x264_satd_16x4_neon
+    bl          satd_16x4_neon
     vadd.u16    q4,  q4,  q12
     vadd.u16    q5,  q5,  q13
     vadd.u16    q4,  q4,  q14
@@ -1080,7 +1080,7 @@
     bx          lr
 endfunc
 
-function x264_satd_16x4_neon, export=0
+function satd_16x4_neon, export=0
     vld1.64     {d2-d3}, [r2], r3
     vld1.64     {d0-d1}, [r0,:128], r1
     vsubl.u8    q8,  d0,  d2
@@ -1101,13 +1101,13 @@
     vsubl.u8    q15, d5,  d7
     SUMSUB_AB   q2,  q3,  q10, q11
     SUMSUB_ABCD q8,  q10, q9,  q11, q0,  q2,  q1,  q3
-    b           x264_satd_8x4v_8x8h_neon
+    b           satd_8x4v_8x8h_neon
 endfunc
 
 
-function x264_pixel_sa8d_8x8_neon
+function pixel_sa8d_8x8_neon
     mov             ip,  lr
-    bl              x264_sa8d_8x8_neon
+    bl              sa8d_8x8_neon
     vadd.u16        q0,  q8,  q9
     HORIZ_ADD       d0,  d0,  d1
     mov             lr,  ip
@@ -1117,23 +1117,23 @@
     bx              lr
 endfunc
 
-function x264_pixel_sa8d_16x16_neon
+function pixel_sa8d_16x16_neon
     vpush           {d8-d11}
     mov             ip,  lr
-    bl              x264_sa8d_8x8_neon
+    bl              sa8d_8x8_neon
     vpaddl.u16      q4,  q8
     vpaddl.u16      q5,  q9
-    bl              x264_sa8d_8x8_neon
+    bl              sa8d_8x8_neon
     vpadal.u16      q4,  q8
     vpadal.u16      q5,  q9
     sub             r0,  r0,  r1,  lsl #4
     sub             r2,  r2,  r3,  lsl #4
     add             r0,  r0,  #8
     add             r2,  r2,  #8
-    bl              x264_sa8d_8x8_neon
+    bl              sa8d_8x8_neon
     vpadal.u16      q4,  q8
     vpadal.u16      q5,  q9
-    bl              x264_sa8d_8x8_neon
+    bl              sa8d_8x8_neon
     vpaddl.u16      q8,  q8
     vpaddl.u16      q9,  q9
     vadd.u32        q0,  q4,  q8
@@ -1182,7 +1182,7 @@
 .endm
 
 .macro sa8d_satd_8x8 satd=
-function x264_sa8d_\satd\()8x8_neon, export=0
+function sa8d_\satd\()8x8_neon, export=0
     LOAD_DIFF_8x4   q8,  q9,  q10, q11
     vld1.64         {d7}, [r2], r3
     SUMSUB_AB       q0,  q1,  q8,  q9
@@ -1254,19 +1254,19 @@
 sa8d_satd_8x8
 sa8d_satd_8x8 satd_
 
-function x264_pixel_sa8d_satd_16x16_neon
+function pixel_sa8d_satd_16x16_neon
     push            {lr}
     vpush           {q4-q7}
     vmov.u32        q4,  #0
     vmov.u32        q5,  #0
-    bl              x264_sa8d_satd_8x8_neon
-    bl              x264_sa8d_satd_8x8_neon
+    bl              sa8d_satd_8x8_neon
+    bl              sa8d_satd_8x8_neon
     sub             r0,  r0,  r1,  lsl #4
     sub             r2,  r2,  r3,  lsl #4
     add             r0,  r0,  #8
     add             r2,  r2,  #8
-    bl              x264_sa8d_satd_8x8_neon
-    bl              x264_sa8d_satd_8x8_neon
+    bl              sa8d_satd_8x8_neon
+    bl              sa8d_satd_8x8_neon
     vadd.u32        d1,  d10, d11
     vadd.u32        d0,  d8,  d9
     vpadd.u32       d1,  d1,  d1
@@ -1280,7 +1280,7 @@
 
 
 .macro HADAMARD_AC w h
-function x264_pixel_hadamard_ac_\w\()x\h\()_neon
+function pixel_hadamard_ac_\w\()x\h\()_neon
     vpush           {d8-d15}
     movrel          ip, mask_ac4
     vmov.i8         q4, #0
@@ -1289,18 +1289,18 @@
     vmov.i8         q5, #0
 
     mov             ip,  lr
-    bl              x264_hadamard_ac_8x8_neon
+    bl              hadamard_ac_8x8_neon
 .if \h > 8
-    bl              x264_hadamard_ac_8x8_neon
+    bl              hadamard_ac_8x8_neon
 .endif
 .if \w > 8
     sub             r0,  r0,  r1,  lsl #3
     add             r0,  r0,  #8
-    bl              x264_hadamard_ac_8x8_neon
+    bl              hadamard_ac_8x8_neon
 .endif
 .if \w * \h == 256
     sub             r0,  r0,  r1,  lsl #4
-    bl              x264_hadamard_ac_8x8_neon
+    bl              hadamard_ac_8x8_neon
 .endif
 
     vadd.s32        d8,  d8,  d9
@@ -1321,7 +1321,7 @@
 HADAMARD_AC 16, 16
 
 // q4: satd  q5: sa8d  q6: mask_ac4  q7: mask_ac8
-function x264_hadamard_ac_8x8_neon, export=0
+function hadamard_ac_8x8_neon, export=0
     vld1.64         {d2},  [r0,:64], r1
     vld1.64         {d3},  [r0,:64], r1
     vaddl.u8        q0,  d2,  d3
@@ -1435,7 +1435,7 @@
     vmull.u8    \ssb, \db, \db
 .endm
 
-function x264_pixel_ssim_4x4x2_core_neon
+function pixel_ssim_4x4x2_core_neon
     ldr         ip, [sp]
     vld1.64     {d0}, [r0], r1
     vld1.64     {d2}, [r2], r3
@@ -1464,7 +1464,7 @@
 endfunc
 
 // FIXME: see about doing 16x16 -> 32 bit multiplies for s1/s2
-function x264_pixel_ssim_end4_neon
+function pixel_ssim_end4_neon
     vld1.32     {d16-d19}, [r0,:128]!
     vld1.32     {d20-d23}, [r1,:128]!
     vadd.s32    q0,  q8,  q10
diff -Nru x264-0.152.2854+gite9a5903/common/arm/pixel.h x264-0.158.2988+git-20191101.7817004/common/arm/pixel.h
--- x264-0.152.2854+gite9a5903/common/arm/pixel.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/arm/pixel.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.h: arm pixel metrics
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -26,6 +26,68 @@
 #ifndef X264_ARM_PIXEL_H
 #define X264_ARM_PIXEL_H
 
+#define x264_pixel_avg2_w16_neon x264_template(pixel_avg2_w16_neon)
+#define x264_pixel_avg2_w20_neon x264_template(pixel_avg2_w20_neon)
+#define x264_pixel_avg2_w4_neon x264_template(pixel_avg2_w4_neon)
+#define x264_pixel_avg2_w8_neon x264_template(pixel_avg2_w8_neon)
+#define x264_pixel_avg_16x16_neon x264_template(pixel_avg_16x16_neon)
+#define x264_pixel_avg_16x8_neon x264_template(pixel_avg_16x8_neon)
+#define x264_pixel_avg_4x16_neon x264_template(pixel_avg_4x16_neon)
+#define x264_pixel_avg_4x2_neon x264_template(pixel_avg_4x2_neon)
+#define x264_pixel_avg_4x4_neon x264_template(pixel_avg_4x4_neon)
+#define x264_pixel_avg_4x8_neon x264_template(pixel_avg_4x8_neon)
+#define x264_pixel_avg_8x16_neon x264_template(pixel_avg_8x16_neon)
+#define x264_pixel_avg_8x4_neon x264_template(pixel_avg_8x4_neon)
+#define x264_pixel_avg_8x8_neon x264_template(pixel_avg_8x8_neon)
+#define x264_pixel_sad_16x16_neon x264_template(pixel_sad_16x16_neon)
+#define x264_pixel_sad_16x8_neon x264_template(pixel_sad_16x8_neon)
+#define x264_pixel_sad_4x4_armv6 x264_template(pixel_sad_4x4_armv6)
+#define x264_pixel_sad_4x4_neon x264_template(pixel_sad_4x4_neon)
+#define x264_pixel_sad_4x8_armv6 x264_template(pixel_sad_4x8_armv6)
+#define x264_pixel_sad_4x8_neon x264_template(pixel_sad_4x8_neon)
+#define x264_pixel_sad_8x16_neon x264_template(pixel_sad_8x16_neon)
+#define x264_pixel_sad_8x4_neon x264_template(pixel_sad_8x4_neon)
+#define x264_pixel_sad_8x8_neon x264_template(pixel_sad_8x8_neon)
+#define x264_pixel_sad_aligned_16x16_neon x264_template(pixel_sad_aligned_16x16_neon)
+#define x264_pixel_sad_aligned_16x16_neon_dual x264_template(pixel_sad_aligned_16x16_neon_dual)
+#define x264_pixel_sad_aligned_16x8_neon x264_template(pixel_sad_aligned_16x8_neon)
+#define x264_pixel_sad_aligned_16x8_neon_dual x264_template(pixel_sad_aligned_16x8_neon_dual)
+#define x264_pixel_sad_aligned_4x4_neon x264_template(pixel_sad_aligned_4x4_neon)
+#define x264_pixel_sad_aligned_4x8_neon x264_template(pixel_sad_aligned_4x8_neon)
+#define x264_pixel_sad_aligned_8x16_neon x264_template(pixel_sad_aligned_8x16_neon)
+#define x264_pixel_sad_aligned_8x16_neon_dual x264_template(pixel_sad_aligned_8x16_neon_dual)
+#define x264_pixel_sad_aligned_8x4_neon x264_template(pixel_sad_aligned_8x4_neon)
+#define x264_pixel_sad_aligned_8x4_neon_dual x264_template(pixel_sad_aligned_8x4_neon_dual)
+#define x264_pixel_sad_aligned_8x8_neon x264_template(pixel_sad_aligned_8x8_neon)
+#define x264_pixel_sad_aligned_8x8_neon_dual x264_template(pixel_sad_aligned_8x8_neon_dual)
+#define x264_pixel_sad_x3_16x16_neon x264_template(pixel_sad_x3_16x16_neon)
+#define x264_pixel_sad_x3_16x8_neon x264_template(pixel_sad_x3_16x8_neon)
+#define x264_pixel_sad_x3_4x4_neon x264_template(pixel_sad_x3_4x4_neon)
+#define x264_pixel_sad_x3_4x8_neon x264_template(pixel_sad_x3_4x8_neon)
+#define x264_pixel_sad_x3_8x16_neon x264_template(pixel_sad_x3_8x16_neon)
+#define x264_pixel_sad_x3_8x4_neon x264_template(pixel_sad_x3_8x4_neon)
+#define x264_pixel_sad_x3_8x8_neon x264_template(pixel_sad_x3_8x8_neon)
+#define x264_pixel_sad_x4_16x16_neon x264_template(pixel_sad_x4_16x16_neon)
+#define x264_pixel_sad_x4_16x8_neon x264_template(pixel_sad_x4_16x8_neon)
+#define x264_pixel_sad_x4_4x4_neon x264_template(pixel_sad_x4_4x4_neon)
+#define x264_pixel_sad_x4_4x8_neon x264_template(pixel_sad_x4_4x8_neon)
+#define x264_pixel_sad_x4_8x16_neon x264_template(pixel_sad_x4_8x16_neon)
+#define x264_pixel_sad_x4_8x4_neon x264_template(pixel_sad_x4_8x4_neon)
+#define x264_pixel_sad_x4_8x8_neon x264_template(pixel_sad_x4_8x8_neon)
+#define x264_pixel_satd_16x16_neon x264_template(pixel_satd_16x16_neon)
+#define x264_pixel_satd_16x8_neon x264_template(pixel_satd_16x8_neon)
+#define x264_pixel_satd_4x4_neon x264_template(pixel_satd_4x4_neon)
+#define x264_pixel_satd_4x8_neon x264_template(pixel_satd_4x8_neon)
+#define x264_pixel_satd_8x16_neon x264_template(pixel_satd_8x16_neon)
+#define x264_pixel_satd_8x4_neon x264_template(pixel_satd_8x4_neon)
+#define x264_pixel_satd_8x8_neon x264_template(pixel_satd_8x8_neon)
+#define x264_pixel_ssd_16x16_neon x264_template(pixel_ssd_16x16_neon)
+#define x264_pixel_ssd_16x8_neon x264_template(pixel_ssd_16x8_neon)
+#define x264_pixel_ssd_4x4_neon x264_template(pixel_ssd_4x4_neon)
+#define x264_pixel_ssd_4x8_neon x264_template(pixel_ssd_4x8_neon)
+#define x264_pixel_ssd_8x16_neon x264_template(pixel_ssd_8x16_neon)
+#define x264_pixel_ssd_8x4_neon x264_template(pixel_ssd_8x4_neon)
+#define x264_pixel_ssd_8x8_neon x264_template(pixel_ssd_8x8_neon)
 #define DECL_PIXELS( ret, name, suffix, args ) \
     ret x264_pixel_##name##_16x16_##suffix args;\
     ret x264_pixel_##name##_16x8_##suffix args;\
@@ -52,30 +114,47 @@
 DECL_X1( satd, neon )
 DECL_X1( ssd, neon )
 
+#define x264_pixel_ssd_nv12_core_neon x264_template(pixel_ssd_nv12_core_neon)
 void x264_pixel_ssd_nv12_core_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int, int, uint64_t *, uint64_t * );
 
+#define x264_pixel_vsad_neon x264_template(pixel_vsad_neon)
 int x264_pixel_vsad_neon( uint8_t *, intptr_t, int );
 
+#define x264_pixel_sa8d_8x8_neon x264_template(pixel_sa8d_8x8_neon)
 int x264_pixel_sa8d_8x8_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t );
+#define x264_pixel_sa8d_16x16_neon x264_template(pixel_sa8d_16x16_neon)
 int x264_pixel_sa8d_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t );
+#define x264_pixel_sa8d_satd_16x16_neon x264_template(pixel_sa8d_satd_16x16_neon)
 uint64_t x264_pixel_sa8d_satd_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t );
 
+#define x264_pixel_var_8x8_neon x264_template(pixel_var_8x8_neon)
 uint64_t x264_pixel_var_8x8_neon  ( uint8_t *, intptr_t );
+#define x264_pixel_var_8x16_neon x264_template(pixel_var_8x16_neon)
 uint64_t x264_pixel_var_8x16_neon ( uint8_t *, intptr_t );
+#define x264_pixel_var_16x16_neon x264_template(pixel_var_16x16_neon)
 uint64_t x264_pixel_var_16x16_neon( uint8_t *, intptr_t );
+#define x264_pixel_var2_8x8_neon x264_template(pixel_var2_8x8_neon)
 int x264_pixel_var2_8x8_neon ( uint8_t *, uint8_t *, int * );
+#define x264_pixel_var2_8x16_neon x264_template(pixel_var2_8x16_neon)
 int x264_pixel_var2_8x16_neon( uint8_t *, uint8_t *, int * );
 
+#define x264_pixel_hadamard_ac_8x8_neon x264_template(pixel_hadamard_ac_8x8_neon)
 uint64_t x264_pixel_hadamard_ac_8x8_neon  ( uint8_t *, intptr_t );
+#define x264_pixel_hadamard_ac_8x16_neon x264_template(pixel_hadamard_ac_8x16_neon)
 uint64_t x264_pixel_hadamard_ac_8x16_neon ( uint8_t *, intptr_t );
+#define x264_pixel_hadamard_ac_16x8_neon x264_template(pixel_hadamard_ac_16x8_neon)
 uint64_t x264_pixel_hadamard_ac_16x8_neon ( uint8_t *, intptr_t );
+#define x264_pixel_hadamard_ac_16x16_neon x264_template(pixel_hadamard_ac_16x16_neon)
 uint64_t x264_pixel_hadamard_ac_16x16_neon( uint8_t *, intptr_t );
 
+#define x264_pixel_ssim_4x4x2_core_neon x264_template(pixel_ssim_4x4x2_core_neon)
 void x264_pixel_ssim_4x4x2_core_neon( const uint8_t *, intptr_t,
                                       const uint8_t *, intptr_t,
                                       int sums[2][4] );
+#define x264_pixel_ssim_end4_neon x264_template(pixel_ssim_end4_neon)
 float x264_pixel_ssim_end4_neon( int sum0[5][4], int sum1[5][4], int width );
 
+#define x264_pixel_asd8_neon x264_template(pixel_asd8_neon)
 int x264_pixel_asd8_neon( uint8_t *, intptr_t,  uint8_t *, intptr_t, int );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/arm/predict-a.S x264-0.158.2988+git-20191101.7817004/common/arm/predict-a.S
--- x264-0.152.2854+gite9a5903/common/arm/predict-a.S	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/arm/predict-a.S	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.S: arm intra prediction
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Mans Rullgard <mans@mansr.com>
@@ -77,7 +77,7 @@
 
 
 // because gcc doesn't believe in using the free shift in add
-function x264_predict_4x4_h_armv6
+function predict_4x4_h_armv6
     ldrb    r1, [r0, #0*FDEC_STRIDE-1]
     ldrb    r2, [r0, #1*FDEC_STRIDE-1]
     ldrb    r3, [r0, #2*FDEC_STRIDE-1]
@@ -97,7 +97,7 @@
     bx      lr
 endfunc
 
-function x264_predict_4x4_v_armv6
+function predict_4x4_v_armv6
     ldr     r1,  [r0, #0 - 1 * FDEC_STRIDE]
     str     r1,  [r0, #0 + 0 * FDEC_STRIDE]
     str     r1,  [r0, #0 + 1 * FDEC_STRIDE]
@@ -106,7 +106,7 @@
     bx      lr
 endfunc
 
-function x264_predict_4x4_dc_armv6
+function predict_4x4_dc_armv6
     mov     ip, #0
     ldr     r1, [r0, #-FDEC_STRIDE]
     ldrb    r2, [r0, #0*FDEC_STRIDE-1]
@@ -129,7 +129,7 @@
     bx      lr
 endfunc
 
-function x264_predict_4x4_dc_top_neon
+function predict_4x4_dc_top_neon
     mov         r12, #FDEC_STRIDE
     sub         r1, r0, #FDEC_STRIDE
     vld1.32     d1[], [r1,:32]
@@ -158,7 +158,7 @@
     uadd8   \a2, \a2, \c2
 .endm
 
-function x264_predict_4x4_ddr_armv6
+function predict_4x4_ddr_armv6
     ldr     r1, [r0, # -FDEC_STRIDE]
     ldrb    r2, [r0, # -FDEC_STRIDE-1]
     ldrb    r3, [r0, #0*FDEC_STRIDE-1]
@@ -187,7 +187,7 @@
     pop     {r4-r6,pc}
 endfunc
 
-function x264_predict_4x4_ddl_neon
+function predict_4x4_ddl_neon
     sub         r0, #FDEC_STRIDE
     mov         ip, #FDEC_STRIDE
     vld1.64     {d0}, [r0], ip
@@ -206,7 +206,7 @@
     bx          lr
 endfunc
 
-function x264_predict_8x8_dc_neon
+function predict_8x8_dc_neon
     mov     ip, #0
     ldrd    r2, r3, [r1, #8]
     push    {r4-r5,lr}
@@ -230,7 +230,7 @@
     pop    {r4-r5,pc}
 endfunc
 
-function x264_predict_8x8_h_neon
+function predict_8x8_h_neon
     add         r1, r1, #7
     mov         ip, #FDEC_STRIDE
     vld1.64     {d16}, [r1]
@@ -253,7 +253,7 @@
     bx          lr
 endfunc
 
-function x264_predict_8x8_v_neon
+function predict_8x8_v_neon
     add         r1, r1, #16
     mov         r12, #FDEC_STRIDE
     vld1.8      {d0}, [r1,:64]
@@ -263,7 +263,7 @@
     bx          lr
 endfunc
 
-function x264_predict_8x8_ddl_neon
+function predict_8x8_ddl_neon
     add         r1, #16
     vld1.8      {d0, d1}, [r1,:128]
     vmov.i8     q3, #0
@@ -291,7 +291,7 @@
     bx          lr
 endfunc
 
-function x264_predict_8x8_ddr_neon
+function predict_8x8_ddr_neon
     vld1.8      {d0-d3}, [r1,:128]
     vext.8      q2, q0, q1, #7
     vext.8      q3, q0, q1, #9
@@ -321,7 +321,7 @@
     bx          lr
 endfunc
 
-function x264_predict_8x8_vl_neon
+function predict_8x8_vl_neon
     add         r1, #16
     mov         r12, #FDEC_STRIDE
 
@@ -352,7 +352,7 @@
     bx          lr
 endfunc
 
-function x264_predict_8x8_vr_neon
+function predict_8x8_vr_neon
     add         r1, #8
     mov         r12, #FDEC_STRIDE
     vld1.8      {d4,d5}, [r1,:64]
@@ -384,7 +384,7 @@
     bx          lr
 endfunc
 
-function x264_predict_8x8_hd_neon
+function predict_8x8_hd_neon
     mov         r12, #FDEC_STRIDE
     add         r1, #7
 
@@ -417,7 +417,7 @@
     bx          lr
 endfunc
 
-function x264_predict_8x8_hu_neon
+function predict_8x8_hu_neon
     mov         r12, #FDEC_STRIDE
     add         r1, #7
     vld1.8      {d7}, [r1]
@@ -450,7 +450,7 @@
     bx          lr
 endfunc
 
-function x264_predict_8x8c_dc_top_neon
+function predict_8x8c_dc_top_neon
     sub         r2,  r0,  #FDEC_STRIDE
     mov         r1,  #FDEC_STRIDE
     vld1.8      {d0}, [r2,:64]
@@ -463,7 +463,7 @@
     b           pred8x8_dc_end
 endfunc
 
-function x264_predict_8x8c_dc_left_neon
+function predict_8x8c_dc_left_neon
     mov         r1,  #FDEC_STRIDE
     sub         r2,  r0,  #1
     ldcol.8     d0,  r2,  r1
@@ -475,7 +475,7 @@
     b           pred8x8_dc_end
 endfunc
 
-function x264_predict_8x8c_dc_neon
+function predict_8x8c_dc_neon
     sub         r2,  r0,  #FDEC_STRIDE
     mov         r1,  #FDEC_STRIDE
     vld1.8      {d0}, [r2,:64]
@@ -501,7 +501,7 @@
     bx          lr
 endfunc
 
-function x264_predict_8x8c_h_neon
+function predict_8x8c_h_neon
     sub         r1, r0, #1
     mov         ip, #FDEC_STRIDE
 .rept 4
@@ -513,7 +513,7 @@
     bx          lr
 endfunc
 
-function x264_predict_8x8c_v_neon
+function predict_8x8c_v_neon
     sub         r0, r0, #FDEC_STRIDE
     mov         ip, #FDEC_STRIDE
     vld1.64     {d0}, [r0,:64], ip
@@ -523,7 +523,7 @@
     bx          lr
 endfunc
 
-function x264_predict_8x8c_p_neon
+function predict_8x8c_p_neon
     sub         r3,  r0,  #FDEC_STRIDE
     mov         r1,  #FDEC_STRIDE
     add         r2,  r3,  #4
@@ -572,7 +572,7 @@
 endfunc
 
 
-function x264_predict_8x16c_dc_top_neon
+function predict_8x16c_dc_top_neon
     sub         r2,  r0,  #FDEC_STRIDE
     mov         r1,  #FDEC_STRIDE
     vld1.8      {d0}, [r2,:64]
@@ -597,7 +597,7 @@
     bx          lr
 endfunc
 
-function x264_predict_8x16c_h_neon
+function predict_8x16c_h_neon
     sub         r1, r0, #1
     mov         ip, #FDEC_STRIDE
 .rept 8
@@ -609,7 +609,7 @@
     bx          lr
 endfunc
 
-function x264_predict_8x16c_p_neon
+function predict_8x16c_p_neon
     sub         r3,  r0,  #FDEC_STRIDE
     mov         r1,  #FDEC_STRIDE
     add         r2,  r3,  #4
@@ -667,7 +667,7 @@
 endfunc
 
 
-function x264_predict_16x16_dc_top_neon
+function predict_16x16_dc_top_neon
     sub         r2,  r0,  #FDEC_STRIDE
     mov         r1,  #FDEC_STRIDE
     vld1.8      {q0}, [r2,:128]
@@ -677,7 +677,7 @@
     b           pred16x16_dc_end
 endfunc
 
-function x264_predict_16x16_dc_left_neon
+function predict_16x16_dc_left_neon
     mov         r1,  #FDEC_STRIDE
     sub         r2,  r0,  #1
     ldcol.8     d0,  r2,  r1
@@ -688,7 +688,7 @@
     b           pred16x16_dc_end
 endfunc
 
-function x264_predict_16x16_dc_neon
+function predict_16x16_dc_neon
     sub         r3, r0, #FDEC_STRIDE
     sub         r0, r0, #1
     vld1.64     {d0-d1}, [r3,:128]
@@ -726,7 +726,7 @@
     bx          lr
 endfunc
 
-function x264_predict_16x16_h_neon
+function predict_16x16_h_neon
     sub         r1, r0, #1
     mov         ip, #FDEC_STRIDE
 .rept 8
@@ -740,7 +740,7 @@
     bx          lr
 endfunc
 
-function x264_predict_16x16_v_neon
+function predict_16x16_v_neon
     sub         r0, r0, #FDEC_STRIDE
     mov         ip, #FDEC_STRIDE
     vld1.64     {d0-d1}, [r0,:128], ip
@@ -750,7 +750,7 @@
     bx          lr
 endfunc
 
-function x264_predict_16x16_p_neon
+function predict_16x16_p_neon
     sub         r3,  r0,  #FDEC_STRIDE
     mov         r1,  #FDEC_STRIDE
     add         r2,  r3,  #8
diff -Nru x264-0.152.2854+gite9a5903/common/arm/predict-c.c x264-0.158.2988+git-20191101.7817004/common/arm/predict-c.c
--- x264-0.152.2854+gite9a5903/common/arm/predict-c.c	2017-12-31 12:50:50.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/arm/predict-c.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.c: arm intra prediction
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
diff -Nru x264-0.152.2854+gite9a5903/common/arm/predict.h x264-0.158.2988+git-20191101.7817004/common/arm/predict.h
--- x264-0.152.2854+gite9a5903/common/arm/predict.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/arm/predict.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.h: arm intra prediction
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -26,45 +26,80 @@
 #ifndef X264_ARM_PREDICT_H
 #define X264_ARM_PREDICT_H
 
+#define x264_predict_4x4_dc_armv6 x264_template(predict_4x4_dc_armv6)
 void x264_predict_4x4_dc_armv6( uint8_t *src );
+#define x264_predict_4x4_dc_top_neon x264_template(predict_4x4_dc_top_neon)
 void x264_predict_4x4_dc_top_neon( uint8_t *src );
+#define x264_predict_4x4_v_armv6 x264_template(predict_4x4_v_armv6)
 void x264_predict_4x4_v_armv6( uint8_t *src );
+#define x264_predict_4x4_h_armv6 x264_template(predict_4x4_h_armv6)
 void x264_predict_4x4_h_armv6( uint8_t *src );
+#define x264_predict_4x4_ddr_armv6 x264_template(predict_4x4_ddr_armv6)
 void x264_predict_4x4_ddr_armv6( uint8_t *src );
+#define x264_predict_4x4_ddl_neon x264_template(predict_4x4_ddl_neon)
 void x264_predict_4x4_ddl_neon( uint8_t *src );
 
+#define x264_predict_8x8c_dc_neon x264_template(predict_8x8c_dc_neon)
 void x264_predict_8x8c_dc_neon( uint8_t *src );
+#define x264_predict_8x8c_dc_top_neon x264_template(predict_8x8c_dc_top_neon)
 void x264_predict_8x8c_dc_top_neon( uint8_t *src );
+#define x264_predict_8x8c_dc_left_neon x264_template(predict_8x8c_dc_left_neon)
 void x264_predict_8x8c_dc_left_neon( uint8_t *src );
+#define x264_predict_8x8c_h_neon x264_template(predict_8x8c_h_neon)
 void x264_predict_8x8c_h_neon( uint8_t *src );
+#define x264_predict_8x8c_v_neon x264_template(predict_8x8c_v_neon)
 void x264_predict_8x8c_v_neon( uint8_t *src );
+#define x264_predict_8x8c_p_neon x264_template(predict_8x8c_p_neon)
 void x264_predict_8x8c_p_neon( uint8_t *src );
 
+#define x264_predict_8x16c_h_neon x264_template(predict_8x16c_h_neon)
 void x264_predict_8x16c_h_neon( uint8_t *src );
+#define x264_predict_8x16c_dc_top_neon x264_template(predict_8x16c_dc_top_neon)
 void x264_predict_8x16c_dc_top_neon( uint8_t *src );
+#define x264_predict_8x16c_p_neon x264_template(predict_8x16c_p_neon)
 void x264_predict_8x16c_p_neon( uint8_t *src );
 
+#define x264_predict_8x8_dc_neon x264_template(predict_8x8_dc_neon)
 void x264_predict_8x8_dc_neon( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_ddl_neon x264_template(predict_8x8_ddl_neon)
 void x264_predict_8x8_ddl_neon( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_ddr_neon x264_template(predict_8x8_ddr_neon)
 void x264_predict_8x8_ddr_neon( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_vl_neon x264_template(predict_8x8_vl_neon)
 void x264_predict_8x8_vl_neon( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_vr_neon x264_template(predict_8x8_vr_neon)
 void x264_predict_8x8_vr_neon( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_v_neon x264_template(predict_8x8_v_neon)
 void x264_predict_8x8_v_neon( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_h_neon x264_template(predict_8x8_h_neon)
 void x264_predict_8x8_h_neon( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_hd_neon x264_template(predict_8x8_hd_neon)
 void x264_predict_8x8_hd_neon( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_hu_neon x264_template(predict_8x8_hu_neon)
 void x264_predict_8x8_hu_neon( uint8_t *src, uint8_t edge[36] );
 
+#define x264_predict_16x16_dc_neon x264_template(predict_16x16_dc_neon)
 void x264_predict_16x16_dc_neon( uint8_t *src );
+#define x264_predict_16x16_dc_top_neon x264_template(predict_16x16_dc_top_neon)
 void x264_predict_16x16_dc_top_neon( uint8_t *src );
+#define x264_predict_16x16_dc_left_neon x264_template(predict_16x16_dc_left_neon)
 void x264_predict_16x16_dc_left_neon( uint8_t *src );
+#define x264_predict_16x16_h_neon x264_template(predict_16x16_h_neon)
 void x264_predict_16x16_h_neon( uint8_t *src );
+#define x264_predict_16x16_v_neon x264_template(predict_16x16_v_neon)
 void x264_predict_16x16_v_neon( uint8_t *src );
+#define x264_predict_16x16_p_neon x264_template(predict_16x16_p_neon)
 void x264_predict_16x16_p_neon( uint8_t *src );
 
+#define x264_predict_4x4_init_arm x264_template(predict_4x4_init_arm)
 void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] );
+#define x264_predict_8x8_init_arm x264_template(predict_8x8_init_arm)
 void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter );
+#define x264_predict_8x8c_init_arm x264_template(predict_8x8c_init_arm)
 void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] );
+#define x264_predict_8x16c_init_arm x264_template(predict_8x16c_init_arm)
 void x264_predict_8x16c_init_arm( int cpu, x264_predict_t pf[7] );
+#define x264_predict_16x16_init_arm x264_template(predict_16x16_init_arm)
 void x264_predict_16x16_init_arm( int cpu, x264_predict_t pf[7] );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/arm/quant-a.S x264-0.158.2988+git-20191101.7817004/common/arm/quant-a.S
--- x264-0.152.2854+gite9a5903/common/arm/quant-a.S	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/arm/quant-a.S	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /****************************************************************************
  * quant.S: arm quantization and level-run
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Janne Grunau <janne-x264@jannau.net>
@@ -75,7 +75,7 @@
 .endm
 
 // quant_2x2_dc( int16_t dct[4], int mf, int bias )
-function x264_quant_2x2_dc_neon
+function quant_2x2_dc_neon
     vld1.64     {d0}, [r0,:64]
     vabs.s16    d3,  d0
     vdup.16     d2,  r2
@@ -91,7 +91,7 @@
 endfunc
 
 // quant_4x4_dc( int16_t dct[16], int mf, int bias )
-function x264_quant_4x4_dc_neon
+function quant_4x4_dc_neon
     vld1.64     {d28-d31}, [r0,:128]
     vabs.s16    q8,  q14
     vabs.s16    q9,  q15
@@ -103,7 +103,7 @@
 endfunc
 
 // quant_4x4( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] )
-function x264_quant_4x4_neon
+function quant_4x4_neon
     vld1.64     {d28-d31}, [r0,:128]
     vabs.s16    q8,  q14
     vabs.s16    q9,  q15
@@ -115,7 +115,7 @@
 endfunc
 
 // quant_4x4x4( int16_t dct[4][16], uint16_t mf[16], uint16_t bias[16] )
-function x264_quant_4x4x4_neon
+function quant_4x4x4_neon
     vpush       {d8-d15}
     vld1.64     {d28-d31}, [r0,:128]
     vabs.s16    q8,  q14
@@ -156,7 +156,7 @@
 endfunc
 
 // quant_8x8( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] )
-function x264_quant_8x8_neon
+function quant_8x8_neon
     vld1.64     {d28-d31}, [r0,:128]
     vabs.s16    q8,  q14
     vabs.s16    q9,  q15
@@ -191,7 +191,7 @@
 
 // dequant_4x4( int16_t dct[16], int dequant_mf[6][16], int i_qp )
 .macro DEQUANT size bits
-function x264_dequant_\size\()_neon
+function dequant_\size\()_neon
     DEQUANT_START \bits+2, \bits
 .ifc \size, 8x8
     mov         r2,  #4
@@ -272,7 +272,7 @@
 DEQUANT 8x8, 6
 
 // dequant_4x4_dc( int16_t dct[16], int dequant_mf[6][16], int i_qp )
-function x264_dequant_4x4_dc_neon
+function dequant_4x4_dc_neon
     DEQUANT_START 6, 6, yes
     blt         dequant_4x4_dc_rshift
 
@@ -318,7 +318,7 @@
 endfunc
 
 .macro decimate_score_1x size
-function x264_decimate_score\size\()_neon
+function decimate_score\size\()_neon
     vld1.16     {q0, q1}, [r0, :128]
     movrel      r3, mask_2bit
     vmov.s8     q3,  #0x01
@@ -347,7 +347,7 @@
     lsr         r1,  r1,  #2
 .endif
     rbit        r1,  r1
-    movrelx     r3,  X(x264_decimate_table4), r2
+    movrelx     r3,  X264(decimate_table4), r2
 1:
     clz         r2,  r1
     lsl         r1,  r1,  r2
@@ -363,7 +363,7 @@
 decimate_score_1x 15
 decimate_score_1x 16
 
-function x264_decimate_score64_neon
+function decimate_score64_neon
     push        {lr}
     vld1.16     {q8,  q9},  [r0, :128]!
     vld1.16     {q10, q11}, [r0, :128]!
@@ -416,7 +416,7 @@
     mvn         r12, r12
     mov         r0,  #0
     mov         lr,  #32
-    movrelx     r3,  X(x264_decimate_table8), r2
+    movrelx     r3,  X264(decimate_table8), r2
     beq         2f
 1:
     clz         r2,  r1
@@ -449,7 +449,7 @@
 endfunc
 
 // int coeff_last( int16_t *l )
-function x264_coeff_last4_arm
+function coeff_last4_arm
     ldrd        r2,  r3,  [r0]
     subs        r0,  r3,  #0
     movne       r0,  #2
@@ -459,7 +459,7 @@
     bx          lr
 endfunc
 
-function x264_coeff_last8_arm
+function coeff_last8_arm
     ldrd        r2,  r3,  [r0, #8]
     orrs        ip,  r2,  r3
     movne       r0,  #4
@@ -474,7 +474,7 @@
 endfunc
 
 .macro COEFF_LAST_1x size
-function x264_coeff_last\size\()_neon
+function coeff_last\size\()_neon
 .if \size == 15
     sub         r0,  r0,  #2
 .endif
@@ -500,7 +500,7 @@
 COEFF_LAST_1x 15
 COEFF_LAST_1x 16
 
-function x264_coeff_last64_neon
+function coeff_last64_neon
     vld1.64     {d16-d19}, [r0,:128]!
     vqmovn.u16  d16, q8
     vqmovn.u16  d17, q9
@@ -545,7 +545,7 @@
     bx          lr
 endfunc
 
-function x264_denoise_dct_neon
+function denoise_dct_neon
 1:  subs        r3,  r3,  #16
     vld1.16     {q0,  q1},  [r0]
     vld1.32     {q12, q13}, [r1]!
diff -Nru x264-0.152.2854+gite9a5903/common/arm/quant.h x264-0.158.2988+git-20191101.7817004/common/arm/quant.h
--- x264-0.152.2854+gite9a5903/common/arm/quant.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/arm/quant.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * quant.h: arm quantization and level-run
  *****************************************************************************
- * Copyright (C) 2005-2017 x264 project
+ * Copyright (C) 2005-2019 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -26,28 +26,46 @@
 #ifndef X264_ARM_QUANT_H
 #define X264_ARM_QUANT_H
 
+#define x264_quant_2x2_dc_armv6 x264_template(quant_2x2_dc_armv6)
 int x264_quant_2x2_dc_armv6( int16_t dct[4], int mf, int bias );
 
+#define x264_quant_2x2_dc_neon x264_template(quant_2x2_dc_neon)
 int x264_quant_2x2_dc_neon( int16_t dct[4], int mf, int bias );
+#define x264_quant_4x4_dc_neon x264_template(quant_4x4_dc_neon)
 int x264_quant_4x4_dc_neon( int16_t dct[16], int mf, int bias );
+#define x264_quant_4x4_neon x264_template(quant_4x4_neon)
 int x264_quant_4x4_neon( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] );
+#define x264_quant_4x4x4_neon x264_template(quant_4x4x4_neon)
 int x264_quant_4x4x4_neon( int16_t dct[4][16], uint16_t mf[16], uint16_t bias[16] );
+#define x264_quant_8x8_neon x264_template(quant_8x8_neon)
 int x264_quant_8x8_neon( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] );
 
+#define x264_dequant_4x4_dc_neon x264_template(dequant_4x4_dc_neon)
 void x264_dequant_4x4_dc_neon( int16_t dct[16], int dequant_mf[6][16], int i_qp );
+#define x264_dequant_4x4_neon x264_template(dequant_4x4_neon)
 void x264_dequant_4x4_neon( int16_t dct[16], int dequant_mf[6][16], int i_qp );
+#define x264_dequant_8x8_neon x264_template(dequant_8x8_neon)
 void x264_dequant_8x8_neon( int16_t dct[64], int dequant_mf[6][64], int i_qp );
 
+#define x264_decimate_score15_neon x264_template(decimate_score15_neon)
 int x264_decimate_score15_neon( int16_t * );
+#define x264_decimate_score16_neon x264_template(decimate_score16_neon)
 int x264_decimate_score16_neon( int16_t * );
+#define x264_decimate_score64_neon x264_template(decimate_score64_neon)
 int x264_decimate_score64_neon( int16_t * );
 
+#define x264_coeff_last4_arm x264_template(coeff_last4_arm)
 int x264_coeff_last4_arm( int16_t * );
+#define x264_coeff_last8_arm x264_template(coeff_last8_arm)
 int x264_coeff_last8_arm( int16_t * );
+#define x264_coeff_last15_neon x264_template(coeff_last15_neon)
 int x264_coeff_last15_neon( int16_t * );
+#define x264_coeff_last16_neon x264_template(coeff_last16_neon)
 int x264_coeff_last16_neon( int16_t * );
+#define x264_coeff_last64_neon x264_template(coeff_last64_neon)
 int x264_coeff_last64_neon( int16_t * );
 
+#define x264_denoise_dct_neon x264_template(denoise_dct_neon)
 void x264_denoise_dct_neon( dctcoef *, uint32_t *, udctcoef *, int );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/base.c x264-0.158.2988+git-20191101.7817004/common/base.c
--- x264-0.152.2854+gite9a5903/common/base.c	1970-01-01 00:00:00.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/base.c	2019-11-09 05:16:29.000000000 +0000
@@ -0,0 +1,1443 @@
+/*****************************************************************************
+ * base.c: misc common functions (bit depth independent)
+ *****************************************************************************
+ * Copyright (C) 2003-2019 x264 project
+ *
+ * Authors: Loren Merritt <lorenm@u.washington.edu>
+ *          Laurent Aimar <fenrir@via.ecp.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#include "base.h"
+
+#include <ctype.h>
+
+#if HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+#if HAVE_THP
+#include <sys/mman.h>
+#endif
+
+/****************************************************************************
+ * x264_reduce_fraction:
+ ****************************************************************************/
+#define REDUCE_FRACTION( name, type )\
+void name( type *n, type *d )\
+{                   \
+    type a = *n;    \
+    type b = *d;    \
+    type c;         \
+    if( !a || !b )  \
+        return;     \
+    c = a % b;      \
+    while( c )      \
+    {               \
+        a = b;      \
+        b = c;      \
+        c = a % b;  \
+    }               \
+    *n /= b;        \
+    *d /= b;        \
+}
+
+REDUCE_FRACTION( x264_reduce_fraction  , uint32_t )
+REDUCE_FRACTION( x264_reduce_fraction64, uint64_t )
+
+/****************************************************************************
+ * x264_log:
+ ****************************************************************************/
+void x264_log_default( void *p_unused, int i_level, const char *psz_fmt, va_list arg )
+{
+    char *psz_prefix;
+    switch( i_level )
+    {
+        case X264_LOG_ERROR:
+            psz_prefix = "error";
+            break;
+        case X264_LOG_WARNING:
+            psz_prefix = "warning";
+            break;
+        case X264_LOG_INFO:
+            psz_prefix = "info";
+            break;
+        case X264_LOG_DEBUG:
+            psz_prefix = "debug";
+            break;
+        default:
+            psz_prefix = "unknown";
+            break;
+    }
+    fprintf( stderr, "x264 [%s]: ", psz_prefix );
+    x264_vfprintf( stderr, psz_fmt, arg );
+}
+
+void x264_log_internal( int i_level, const char *psz_fmt, ... )
+{
+    va_list arg;
+    va_start( arg, psz_fmt );
+    x264_log_default( NULL, i_level, psz_fmt, arg );
+    va_end( arg );
+}
+
+/****************************************************************************
+ * x264_malloc:
+ ****************************************************************************/
+void *x264_malloc( int i_size )
+{
+    uint8_t *align_buf = NULL;
+#if HAVE_MALLOC_H
+#if HAVE_THP
+#define HUGE_PAGE_SIZE 2*1024*1024
+#define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */
+    /* Attempt to allocate huge pages to reduce TLB misses. */
+    if( i_size >= HUGE_PAGE_THRESHOLD )
+    {
+        align_buf = memalign( HUGE_PAGE_SIZE, i_size );
+        if( align_buf )
+        {
+            /* Round up to the next huge page boundary if we are close enough. */
+            size_t madv_size = (i_size + HUGE_PAGE_SIZE - HUGE_PAGE_THRESHOLD) & ~(HUGE_PAGE_SIZE-1);
+            madvise( align_buf, madv_size, MADV_HUGEPAGE );
+        }
+    }
+    else
+#undef HUGE_PAGE_SIZE
+#undef HUGE_PAGE_THRESHOLD
+#endif
+        align_buf = memalign( NATIVE_ALIGN, i_size );
+#else
+    uint8_t *buf = malloc( i_size + (NATIVE_ALIGN-1) + sizeof(void **) );
+    if( buf )
+    {
+        align_buf = buf + (NATIVE_ALIGN-1) + sizeof(void **);
+        align_buf -= (intptr_t) align_buf & (NATIVE_ALIGN-1);
+        *( (void **) ( align_buf - sizeof(void **) ) ) = buf;
+    }
+#endif
+    if( !align_buf )
+        x264_log_internal( X264_LOG_ERROR, "malloc of size %d failed\n", i_size );
+    return align_buf;
+}
+
+/****************************************************************************
+ * x264_free:
+ ****************************************************************************/
+void x264_free( void *p )
+{
+    if( p )
+    {
+#if HAVE_MALLOC_H
+        free( p );
+#else
+        free( *( ( ( void **) p ) - 1 ) );
+#endif
+    }
+}
+
+/****************************************************************************
+ * x264_slurp_file:
+ ****************************************************************************/
+char *x264_slurp_file( const char *filename )
+{
+    int b_error = 0;
+    int64_t i_size;
+    char *buf;
+    FILE *fh = x264_fopen( filename, "rb" );
+    if( !fh )
+        return NULL;
+
+    b_error |= fseek( fh, 0, SEEK_END ) < 0;
+    b_error |= ( i_size = ftell( fh ) ) <= 0;
+    if( WORD_SIZE == 4 )
+        b_error |= i_size > INT32_MAX;
+    b_error |= fseek( fh, 0, SEEK_SET ) < 0;
+    if( b_error )
+        goto error;
+
+    buf = x264_malloc( i_size+2 );
+    if( !buf )
+        goto error;
+
+    b_error |= fread( buf, 1, i_size, fh ) != i_size;
+    fclose( fh );
+    if( b_error )
+    {
+        x264_free( buf );
+        return NULL;
+    }
+
+    if( buf[i_size-1] != '\n' )
+        buf[i_size++] = '\n';
+    buf[i_size] = '\0';
+
+    return buf;
+error:
+    fclose( fh );
+    return NULL;
+}
+
+/****************************************************************************
+ * x264_picture_init:
+ ****************************************************************************/
+REALIGN_STACK void x264_picture_init( x264_picture_t *pic )
+{
+    memset( pic, 0, sizeof( x264_picture_t ) );
+    pic->i_type = X264_TYPE_AUTO;
+    pic->i_qpplus1 = X264_QP_AUTO;
+    pic->i_pic_struct = PIC_STRUCT_AUTO;
+}
+
+/****************************************************************************
+ * x264_picture_alloc:
+ ****************************************************************************/
+REALIGN_STACK int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
+{
+    typedef struct
+    {
+        int planes;
+        int width_fix8[3];
+        int height_fix8[3];
+    } x264_csp_tab_t;
+
+    static const x264_csp_tab_t csp_tab[] =
+    {
+        [X264_CSP_I400] = { 1, { 256*1 },               { 256*1 }               },
+        [X264_CSP_I420] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } },
+        [X264_CSP_YV12] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } },
+        [X264_CSP_NV12] = { 2, { 256*1, 256*1 },        { 256*1, 256/2 },       },
+        [X264_CSP_NV21] = { 2, { 256*1, 256*1 },        { 256*1, 256/2 },       },
+        [X264_CSP_I422] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } },
+        [X264_CSP_YV16] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } },
+        [X264_CSP_NV16] = { 2, { 256*1, 256*1 },        { 256*1, 256*1 },       },
+        [X264_CSP_YUYV] = { 1, { 256*2 },               { 256*1 },              },
+        [X264_CSP_UYVY] = { 1, { 256*2 },               { 256*1 },              },
+        [X264_CSP_I444] = { 3, { 256*1, 256*1, 256*1 }, { 256*1, 256*1, 256*1 } },
+        [X264_CSP_YV24] = { 3, { 256*1, 256*1, 256*1 }, { 256*1, 256*1, 256*1 } },
+        [X264_CSP_BGR]  = { 1, { 256*3 },               { 256*1 },              },
+        [X264_CSP_BGRA] = { 1, { 256*4 },               { 256*1 },              },
+        [X264_CSP_RGB]  = { 1, { 256*3 },               { 256*1 },              },
+    };
+
+    int csp = i_csp & X264_CSP_MASK;
+    if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX || csp == X264_CSP_V210 )
+        return -1;
+    x264_picture_init( pic );
+    pic->img.i_csp = i_csp;
+    pic->img.i_plane = csp_tab[csp].planes;
+    int depth_factor = i_csp & X264_CSP_HIGH_DEPTH ? 2 : 1;
+    int plane_offset[3] = {0};
+    int frame_size = 0;
+    for( int i = 0; i < pic->img.i_plane; i++ )
+    {
+        int stride = (((int64_t)i_width * csp_tab[csp].width_fix8[i]) >> 8) * depth_factor;
+        int plane_size = (((int64_t)i_height * csp_tab[csp].height_fix8[i]) >> 8) * stride;
+        pic->img.i_stride[i] = stride;
+        plane_offset[i] = frame_size;
+        frame_size += plane_size;
+    }
+    pic->img.plane[0] = x264_malloc( frame_size );
+    if( !pic->img.plane[0] )
+        return -1;
+    for( int i = 1; i < pic->img.i_plane; i++ )
+        pic->img.plane[i] = pic->img.plane[0] + plane_offset[i];
+    return 0;
+}
+
+/****************************************************************************
+ * x264_picture_clean:
+ ****************************************************************************/
+REALIGN_STACK void x264_picture_clean( x264_picture_t *pic )
+{
+    x264_free( pic->img.plane[0] );
+
+    /* just to be safe */
+    memset( pic, 0, sizeof( x264_picture_t ) );
+}
+
+/****************************************************************************
+ * x264_param_default:
+ ****************************************************************************/
+REALIGN_STACK void x264_param_default( x264_param_t *param )
+{
+    /* */
+    memset( param, 0, sizeof( x264_param_t ) );
+
+    /* CPU autodetect */
+    param->cpu = x264_cpu_detect();
+    param->i_threads = X264_THREADS_AUTO;
+    param->i_lookahead_threads = X264_THREADS_AUTO;
+    param->b_deterministic = 1;
+    param->i_sync_lookahead = X264_SYNC_LOOKAHEAD_AUTO;
+
+    /* Video properties */
+    param->i_csp           = X264_CHROMA_FORMAT ? X264_CHROMA_FORMAT : X264_CSP_I420;
+    param->i_width         = 0;
+    param->i_height        = 0;
+    param->vui.i_sar_width = 0;
+    param->vui.i_sar_height= 0;
+    param->vui.i_overscan  = 0;  /* undef */
+    param->vui.i_vidformat = 5;  /* undef */
+    param->vui.b_fullrange = -1; /* default depends on input */
+    param->vui.i_colorprim = 2;  /* undef */
+    param->vui.i_transfer  = 2;  /* undef */
+    param->vui.i_colmatrix = -1; /* default depends on input */
+    param->vui.i_chroma_loc= 0;  /* left center */
+    param->i_fps_num       = 25;
+    param->i_fps_den       = 1;
+    param->i_level_idc     = -1;
+    param->i_slice_max_size = 0;
+    param->i_slice_max_mbs = 0;
+    param->i_slice_count = 0;
+#if HAVE_BITDEPTH8
+    param->i_bitdepth = 8;
+#elif HAVE_BITDEPTH10
+    param->i_bitdepth = 10;
+#else
+    param->i_bitdepth = 8;
+#endif
+
+    /* Encoder parameters */
+    param->i_frame_reference = 3;
+    param->i_keyint_max = 250;
+    param->i_keyint_min = X264_KEYINT_MIN_AUTO;
+    param->i_bframe = 3;
+    param->i_scenecut_threshold = 40;
+    param->i_bframe_adaptive = X264_B_ADAPT_FAST;
+    param->i_bframe_bias = 0;
+    param->i_bframe_pyramid = X264_B_PYRAMID_NORMAL;
+    param->b_interlaced = 0;
+    param->b_constrained_intra = 0;
+
+    param->b_deblocking_filter = 1;
+    param->i_deblocking_filter_alphac0 = 0;
+    param->i_deblocking_filter_beta = 0;
+
+    param->b_cabac = 1;
+    param->i_cabac_init_idc = 0;
+
+    param->rc.i_rc_method = X264_RC_CRF;
+    param->rc.i_bitrate = 0;
+    param->rc.f_rate_tolerance = 1.0;
+    param->rc.i_vbv_max_bitrate = 0;
+    param->rc.i_vbv_buffer_size = 0;
+    param->rc.f_vbv_buffer_init = 0.9;
+    param->rc.i_qp_constant = -1;
+    param->rc.f_rf_constant = 23;
+    param->rc.i_qp_min = 0;
+    param->rc.i_qp_max = INT_MAX;
+    param->rc.i_qp_step = 4;
+    param->rc.f_ip_factor = 1.4;
+    param->rc.f_pb_factor = 1.3;
+    param->rc.i_aq_mode = X264_AQ_VARIANCE;
+    param->rc.f_aq_strength = 1.0;
+    param->rc.i_lookahead = 40;
+
+    param->rc.b_stat_write = 0;
+    param->rc.psz_stat_out = "x264_2pass.log";
+    param->rc.b_stat_read = 0;
+    param->rc.psz_stat_in = "x264_2pass.log";
+    param->rc.f_qcompress = 0.6;
+    param->rc.f_qblur = 0.5;
+    param->rc.f_complexity_blur = 20;
+    param->rc.i_zones = 0;
+    param->rc.b_mb_tree = 1;
+
+    /* Log */
+    param->pf_log = x264_log_default;
+    param->p_log_private = NULL;
+    param->i_log_level = X264_LOG_INFO;
+
+    /* */
+    param->analyse.intra = X264_ANALYSE_I4x4 | X264_ANALYSE_I8x8;
+    param->analyse.inter = X264_ANALYSE_I4x4 | X264_ANALYSE_I8x8
+                         | X264_ANALYSE_PSUB16x16 | X264_ANALYSE_BSUB16x16;
+    param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
+    param->analyse.i_me_method = X264_ME_HEX;
+    param->analyse.f_psy_rd = 1.0;
+    param->analyse.b_psy = 1;
+    param->analyse.f_psy_trellis = 0;
+    param->analyse.i_me_range = 16;
+    param->analyse.i_subpel_refine = 7;
+    param->analyse.b_mixed_references = 1;
+    param->analyse.b_chroma_me = 1;
+    param->analyse.i_mv_range_thread = -1;
+    param->analyse.i_mv_range = -1; // set from level_idc
+    param->analyse.i_chroma_qp_offset = 0;
+    param->analyse.b_fast_pskip = 1;
+    param->analyse.b_weighted_bipred = 1;
+    param->analyse.i_weighted_pred = X264_WEIGHTP_SMART;
+    param->analyse.b_dct_decimate = 1;
+    param->analyse.b_transform_8x8 = 1;
+    param->analyse.i_trellis = 1;
+    param->analyse.i_luma_deadzone[0] = 21;
+    param->analyse.i_luma_deadzone[1] = 11;
+    param->analyse.b_psnr = 0;
+    param->analyse.b_ssim = 0;
+
+    param->i_cqm_preset = X264_CQM_FLAT;
+    memset( param->cqm_4iy, 16, sizeof( param->cqm_4iy ) );
+    memset( param->cqm_4py, 16, sizeof( param->cqm_4py ) );
+    memset( param->cqm_4ic, 16, sizeof( param->cqm_4ic ) );
+    memset( param->cqm_4pc, 16, sizeof( param->cqm_4pc ) );
+    memset( param->cqm_8iy, 16, sizeof( param->cqm_8iy ) );
+    memset( param->cqm_8py, 16, sizeof( param->cqm_8py ) );
+    memset( param->cqm_8ic, 16, sizeof( param->cqm_8ic ) );
+    memset( param->cqm_8pc, 16, sizeof( param->cqm_8pc ) );
+
+    param->b_repeat_headers = 1;
+    param->b_annexb = 1;
+    param->b_aud = 0;
+    param->b_vfr_input = 1;
+    param->i_nal_hrd = X264_NAL_HRD_NONE;
+    param->b_tff = 1;
+    param->b_pic_struct = 0;
+    param->b_fake_interlaced = 0;
+    param->i_frame_packing = -1;
+    param->i_alternative_transfer = 2; /* undef */
+    param->b_opencl = 0;
+    param->i_opencl_device = 0;
+    param->opencl_device_id = NULL;
+    param->psz_clbin_file = NULL;
+    param->i_avcintra_class = 0;
+    param->i_avcintra_flavor = X264_AVCINTRA_FLAVOR_PANASONIC;
+}
+
+static int param_apply_preset( x264_param_t *param, const char *preset )
+{
+    char *end;
+    int i = strtol( preset, &end, 10 );
+    if( *end == 0 && i >= 0 && i < sizeof(x264_preset_names)/sizeof(*x264_preset_names)-1 )
+        preset = x264_preset_names[i];
+
+    if( !strcasecmp( preset, "ultrafast" ) )
+    {
+        param->i_frame_reference = 1;
+        param->i_scenecut_threshold = 0;
+        param->b_deblocking_filter = 0;
+        param->b_cabac = 0;
+        param->i_bframe = 0;
+        param->analyse.intra = 0;
+        param->analyse.inter = 0;
+        param->analyse.b_transform_8x8 = 0;
+        param->analyse.i_me_method = X264_ME_DIA;
+        param->analyse.i_subpel_refine = 0;
+        param->rc.i_aq_mode = 0;
+        param->analyse.b_mixed_references = 0;
+        param->analyse.i_trellis = 0;
+        param->i_bframe_adaptive = X264_B_ADAPT_NONE;
+        param->rc.b_mb_tree = 0;
+        param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
+        param->analyse.b_weighted_bipred = 0;
+        param->rc.i_lookahead = 0;
+    }
+    else if( !strcasecmp( preset, "superfast" ) )
+    {
+        param->analyse.inter = X264_ANALYSE_I8x8|X264_ANALYSE_I4x4;
+        param->analyse.i_me_method = X264_ME_DIA;
+        param->analyse.i_subpel_refine = 1;
+        param->i_frame_reference = 1;
+        param->analyse.b_mixed_references = 0;
+        param->analyse.i_trellis = 0;
+        param->rc.b_mb_tree = 0;
+        param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE;
+        param->rc.i_lookahead = 0;
+    }
+    else if( !strcasecmp( preset, "veryfast" ) )
+    {
+        param->analyse.i_subpel_refine = 2;
+        param->i_frame_reference = 1;
+        param->analyse.b_mixed_references = 0;
+        param->analyse.i_trellis = 0;
+        param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE;
+        param->rc.i_lookahead = 10;
+    }
+    else if( !strcasecmp( preset, "faster" ) )
+    {
+        param->analyse.b_mixed_references = 0;
+        param->i_frame_reference = 2;
+        param->analyse.i_subpel_refine = 4;
+        param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE;
+        param->rc.i_lookahead = 20;
+    }
+    else if( !strcasecmp( preset, "fast" ) )
+    {
+        param->i_frame_reference = 2;
+        param->analyse.i_subpel_refine = 6;
+        param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE;
+        param->rc.i_lookahead = 30;
+    }
+    else if( !strcasecmp( preset, "medium" ) )
+    {
+        /* Default is medium */
+    }
+    else if( !strcasecmp( preset, "slow" ) )
+    {
+        param->analyse.i_subpel_refine = 8;
+        param->i_frame_reference = 5;
+        param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
+        param->analyse.i_trellis = 2;
+        param->rc.i_lookahead = 50;
+    }
+    else if( !strcasecmp( preset, "slower" ) )
+    {
+        param->analyse.i_me_method = X264_ME_UMH;
+        param->analyse.i_subpel_refine = 9;
+        param->i_frame_reference = 8;
+        param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;
+        param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
+        param->analyse.inter |= X264_ANALYSE_PSUB8x8;
+        param->analyse.i_trellis = 2;
+        param->rc.i_lookahead = 60;
+    }
+    else if( !strcasecmp( preset, "veryslow" ) )
+    {
+        param->analyse.i_me_method = X264_ME_UMH;
+        param->analyse.i_subpel_refine = 10;
+        param->analyse.i_me_range = 24;
+        param->i_frame_reference = 16;
+        param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;
+        param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
+        param->analyse.inter |= X264_ANALYSE_PSUB8x8;
+        param->analyse.i_trellis = 2;
+        param->i_bframe = 8;
+        param->rc.i_lookahead = 60;
+    }
+    else if( !strcasecmp( preset, "placebo" ) )
+    {
+        param->analyse.i_me_method = X264_ME_TESA;
+        param->analyse.i_subpel_refine = 11;
+        param->analyse.i_me_range = 24;
+        param->i_frame_reference = 16;
+        param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;
+        param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
+        param->analyse.inter |= X264_ANALYSE_PSUB8x8;
+        param->analyse.b_fast_pskip = 0;
+        param->analyse.i_trellis = 2;
+        param->i_bframe = 16;
+        param->rc.i_lookahead = 60;
+    }
+    else
+    {
+        x264_log_internal( X264_LOG_ERROR, "invalid preset '%s'\n", preset );
+        return -1;
+    }
+    return 0;
+}
+
+static int param_apply_tune( x264_param_t *param, const char *tune )
+{
+    int psy_tuning_used = 0;
+    for( int len; tune += strspn( tune, ",./-+" ), (len = strcspn( tune, ",./-+" )); tune += len )
+    {
+        if( len == 4 && !strncasecmp( tune, "film", 4 ) )
+        {
+            if( psy_tuning_used++ ) goto psy_failure;
+            param->i_deblocking_filter_alphac0 = -1;
+            param->i_deblocking_filter_beta = -1;
+            param->analyse.f_psy_trellis = 0.15;
+        }
+        else if( len == 9 && !strncasecmp( tune, "animation", 9 ) )
+        {
+            if( psy_tuning_used++ ) goto psy_failure;
+            param->i_frame_reference = param->i_frame_reference > 1 ? param->i_frame_reference*2 : 1;
+            param->i_deblocking_filter_alphac0 = 1;
+            param->i_deblocking_filter_beta = 1;
+            param->analyse.f_psy_rd = 0.4;
+            param->rc.f_aq_strength = 0.6;
+            param->i_bframe += 2;
+        }
+        else if( len == 5 && !strncasecmp( tune, "grain", 5 ) )
+        {
+            if( psy_tuning_used++ ) goto psy_failure;
+            param->i_deblocking_filter_alphac0 = -2;
+            param->i_deblocking_filter_beta = -2;
+            param->analyse.f_psy_trellis = 0.25;
+            param->analyse.b_dct_decimate = 0;
+            param->rc.f_pb_factor = 1.1;
+            param->rc.f_ip_factor = 1.1;
+            param->rc.f_aq_strength = 0.5;
+            param->analyse.i_luma_deadzone[0] = 6;
+            param->analyse.i_luma_deadzone[1] = 6;
+            param->rc.f_qcompress = 0.8;
+        }
+        else if( len == 10 && !strncasecmp( tune, "stillimage", 10 ) )
+        {
+            if( psy_tuning_used++ ) goto psy_failure;
+            param->i_deblocking_filter_alphac0 = -3;
+            param->i_deblocking_filter_beta = -3;
+            param->analyse.f_psy_rd = 2.0;
+            param->analyse.f_psy_trellis = 0.7;
+            param->rc.f_aq_strength = 1.2;
+        }
+        else if( len == 4 && !strncasecmp( tune, "psnr", 4 ) )
+        {
+            if( psy_tuning_used++ ) goto psy_failure;
+            param->rc.i_aq_mode = X264_AQ_NONE;
+            param->analyse.b_psy = 0;
+        }
+        else if( len == 4 && !strncasecmp( tune, "ssim", 4 ) )
+        {
+            if( psy_tuning_used++ ) goto psy_failure;
+            param->rc.i_aq_mode = X264_AQ_AUTOVARIANCE;
+            param->analyse.b_psy = 0;
+        }
+        else if( len == 10 && !strncasecmp( tune, "fastdecode", 10 ) )
+        {
+            param->b_deblocking_filter = 0;
+            param->b_cabac = 0;
+            param->analyse.b_weighted_bipred = 0;
+            param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
+        }
+        else if( len == 11 && !strncasecmp( tune, "zerolatency", 11 ) )
+        {
+            param->rc.i_lookahead = 0;
+            param->i_sync_lookahead = 0;
+            param->i_bframe = 0;
+            param->b_sliced_threads = 1;
+            param->b_vfr_input = 0;
+            param->rc.b_mb_tree = 0;
+        }
+        else if( len == 6 && !strncasecmp( tune, "touhou", 6 ) )
+        {
+            if( psy_tuning_used++ ) goto psy_failure;
+            param->i_frame_reference = param->i_frame_reference > 1 ? param->i_frame_reference*2 : 1;
+            param->i_deblocking_filter_alphac0 = -1;
+            param->i_deblocking_filter_beta = -1;
+            param->analyse.f_psy_trellis = 0.2;
+            param->rc.f_aq_strength = 1.3;
+            if( param->analyse.inter & X264_ANALYSE_PSUB16x16 )
+                param->analyse.inter |= X264_ANALYSE_PSUB8x8;
+        }
+        else
+        {
+            x264_log_internal( X264_LOG_ERROR, "invalid tune '%.*s'\n", len, tune );
+            return -1;
+    psy_failure:
+            x264_log_internal( X264_LOG_WARNING, "only 1 psy tuning can be used: ignoring tune %.*s\n", len, tune );
+        }
+    }
+    return 0;
+}
+
+REALIGN_STACK int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune )
+{
+    x264_param_default( param );
+
+    if( preset && param_apply_preset( param, preset ) < 0 )
+        return -1;
+    if( tune && param_apply_tune( param, tune ) < 0 )
+        return -1;
+    return 0;
+}
+
+REALIGN_STACK void x264_param_apply_fastfirstpass( x264_param_t *param )
+{
+    /* Set faster options in case of turbo firstpass. */
+    if( param->rc.b_stat_write && !param->rc.b_stat_read )
+    {
+        param->i_frame_reference = 1;
+        param->analyse.b_transform_8x8 = 0;
+        param->analyse.inter = 0;
+        param->analyse.i_me_method = X264_ME_DIA;
+        param->analyse.i_subpel_refine = X264_MIN( 2, param->analyse.i_subpel_refine );
+        param->analyse.i_trellis = 0;
+        param->analyse.b_fast_pskip = 1;
+    }
+}
+
+static int profile_string_to_int( const char *str )
+{
+    if( !strcasecmp( str, "baseline" ) )
+        return PROFILE_BASELINE;
+    if( !strcasecmp( str, "main" ) )
+        return PROFILE_MAIN;
+    if( !strcasecmp( str, "high" ) )
+        return PROFILE_HIGH;
+    if( !strcasecmp( str, "high10" ) )
+        return PROFILE_HIGH10;
+    if( !strcasecmp( str, "high422" ) )
+        return PROFILE_HIGH422;
+    if( !strcasecmp( str, "high444" ) )
+        return PROFILE_HIGH444_PREDICTIVE;
+    return -1;
+}
+
+REALIGN_STACK int x264_param_apply_profile( x264_param_t *param, const char *profile )
+{
+    if( !profile )
+        return 0;
+
+    const int qp_bd_offset = 6 * (param->i_bitdepth-8);
+    int p = profile_string_to_int( profile );
+    if( p < 0 )
+    {
+        x264_log_internal( X264_LOG_ERROR, "invalid profile: %s\n", profile );
+        return -1;
+    }
+    if( p < PROFILE_HIGH444_PREDICTIVE && ((param->rc.i_rc_method == X264_RC_CQP && param->rc.i_qp_constant <= 0) ||
+        (param->rc.i_rc_method == X264_RC_CRF && (int)(param->rc.f_rf_constant + qp_bd_offset) <= 0)) )
+    {
+        x264_log_internal( X264_LOG_ERROR, "%s profile doesn't support lossless\n", profile );
+        return -1;
+    }
+    if( p < PROFILE_HIGH444_PREDICTIVE && (param->i_csp & X264_CSP_MASK) >= X264_CSP_I444 )
+    {
+        x264_log_internal( X264_LOG_ERROR, "%s profile doesn't support 4:4:4\n", profile );
+        return -1;
+    }
+    if( p < PROFILE_HIGH422 && (param->i_csp & X264_CSP_MASK) >= X264_CSP_I422 )
+    {
+        x264_log_internal( X264_LOG_ERROR, "%s profile doesn't support 4:2:2\n", profile );
+        return -1;
+    }
+    if( p < PROFILE_HIGH10 && param->i_bitdepth > 8 )
+    {
+        x264_log_internal( X264_LOG_ERROR, "%s profile doesn't support a bit depth of %d\n", profile, param->i_bitdepth );
+        return -1;
+    }
+    if( p < PROFILE_HIGH && (param->i_csp & X264_CSP_MASK) == X264_CSP_I400 )
+    {
+        x264_log_internal( X264_LOG_ERROR, "%s profile doesn't support 4:0:0\n", profile );
+        return -1;
+    }
+
+    if( p == PROFILE_BASELINE )
+    {
+        param->analyse.b_transform_8x8 = 0;
+        param->b_cabac = 0;
+        param->i_cqm_preset = X264_CQM_FLAT;
+        param->psz_cqm_file = NULL;
+        param->i_bframe = 0;
+        param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
+        if( param->b_interlaced )
+        {
+            x264_log_internal( X264_LOG_ERROR, "baseline profile doesn't support interlacing\n" );
+            return -1;
+        }
+        if( param->b_fake_interlaced )
+        {
+            x264_log_internal( X264_LOG_ERROR, "baseline profile doesn't support fake interlacing\n" );
+            return -1;
+        }
+    }
+    else if( p == PROFILE_MAIN )
+    {
+        param->analyse.b_transform_8x8 = 0;
+        param->i_cqm_preset = X264_CQM_FLAT;
+        param->psz_cqm_file = NULL;
+    }
+    return 0;
+}
+
+static int parse_enum( const char *arg, const char * const *names, int *dst )
+{
+    for( int i = 0; names[i]; i++ )
+        if( !strcasecmp( arg, names[i] ) )
+        {
+            *dst = i;
+            return 0;
+        }
+    return -1;
+}
+
+static int parse_cqm( const char *str, uint8_t *cqm, int length )
+{
+    int i = 0;
+    do {
+        int coef;
+        if( !sscanf( str, "%d", &coef ) || coef < 1 || coef > 255 )
+            return -1;
+        cqm[i++] = coef;
+    } while( i < length && (str = strchr( str, ',' )) && str++ );
+    return (i == length) ? 0 : -1;
+}
+
+static int atobool_internal( const char *str, int *b_error )
+{
+    if( !strcmp(str, "1") ||
+        !strcasecmp(str, "true") ||
+        !strcasecmp(str, "yes") )
+        return 1;
+    if( !strcmp(str, "0") ||
+        !strcasecmp(str, "false") ||
+        !strcasecmp(str, "no") )
+        return 0;
+    *b_error = 1;
+    return 0;
+}
+
+static int atoi_internal( const char *str, int *b_error )
+{
+    char *end;
+    int v = strtol( str, &end, 0 );
+    if( end == str || *end != '\0' )
+        *b_error = 1;
+    return v;
+}
+
+static double atof_internal( const char *str, int *b_error )
+{
+    char *end;
+    double v = strtod( str, &end );
+    if( end == str || *end != '\0' )
+        *b_error = 1;
+    return v;
+}
+
+#define atobool(str) ( name_was_bool = 1, atobool_internal( str, &b_error ) )
+#undef atoi
+#undef atof
+#define atoi(str) atoi_internal( str, &b_error )
+#define atof(str) atof_internal( str, &b_error )
+
+REALIGN_STACK int x264_param_parse( x264_param_t *p, const char *name, const char *value )
+{
+    char *name_buf = NULL;
+    int b_error = 0;
+    int errortype = X264_PARAM_BAD_VALUE;
+    int name_was_bool;
+    int value_was_null = !value;
+
+    if( !name )
+        return X264_PARAM_BAD_NAME;
+    if( !value )
+        value = "true";
+
+    if( value[0] == '=' )
+        value++;
+
+    if( strchr( name, '_' ) ) // s/_/-/g
+    {
+        char *c;
+        name_buf = strdup(name);
+        if( !name_buf )
+            return X264_PARAM_BAD_NAME;
+        while( (c = strchr( name_buf, '_' )) )
+            *c = '-';
+        name = name_buf;
+    }
+
+    if( !strncmp( name, "no", 2 ) )
+    {
+        name += 2;
+        if( name[0] == '-' )
+            name++;
+        value = atobool(value) ? "false" : "true";
+    }
+    name_was_bool = 0;
+
+#define OPT(STR) else if( !strcmp( name, STR ) )
+#define OPT2(STR0, STR1) else if( !strcmp( name, STR0 ) || !strcmp( name, STR1 ) )
+    if( 0 );
+    OPT("asm")
+    {
+        p->cpu = isdigit(value[0]) ? atoi(value) :
+                 !strcasecmp(value, "auto") || atobool(value) ? x264_cpu_detect() : 0;
+        if( b_error )
+        {
+            char *buf = strdup( value );
+            if( buf )
+            {
+                char *tok, UNUSED *saveptr=NULL, *init;
+                b_error = 0;
+                p->cpu = 0;
+                for( init=buf; (tok=strtok_r(init, ",", &saveptr)); init=NULL )
+                {
+                    int i = 0;
+                    while( x264_cpu_names[i].flags && strcasecmp(tok, x264_cpu_names[i].name) )
+                        i++;
+                    p->cpu |= x264_cpu_names[i].flags;
+                    if( !x264_cpu_names[i].flags )
+                        b_error = 1;
+                }
+                free( buf );
+                if( (p->cpu&X264_CPU_SSSE3) && !(p->cpu&X264_CPU_SSE2_IS_SLOW) )
+                    p->cpu |= X264_CPU_SSE2_IS_FAST;
+            }
+        }
+    }
+    OPT("threads")
+    {
+        if( !strcasecmp(value, "auto") )
+            p->i_threads = X264_THREADS_AUTO;
+        else
+            p->i_threads = atoi(value);
+    }
+    OPT("lookahead-threads")
+    {
+        if( !strcasecmp(value, "auto") )
+            p->i_lookahead_threads = X264_THREADS_AUTO;
+        else
+            p->i_lookahead_threads = atoi(value);
+    }
+    OPT("sliced-threads")
+        p->b_sliced_threads = atobool(value);
+    OPT("sync-lookahead")
+    {
+        if( !strcasecmp(value, "auto") )
+            p->i_sync_lookahead = X264_SYNC_LOOKAHEAD_AUTO;
+        else
+            p->i_sync_lookahead = atoi(value);
+    }
+    OPT2("deterministic", "n-deterministic")
+        p->b_deterministic = atobool(value);
+    OPT("cpu-independent")
+        p->b_cpu_independent = atobool(value);
+    OPT2("level", "level-idc")
+    {
+        if( !strcmp(value, "1b") )
+            p->i_level_idc = 9;
+        else if( atof(value) < 7 )
+            p->i_level_idc = (int)(10*atof(value)+.5);
+        else
+            p->i_level_idc = atoi(value);
+    }
+    OPT("bluray-compat")
+        p->b_bluray_compat = atobool(value);
+    OPT("avcintra-class")
+        p->i_avcintra_class = atoi(value);
+    OPT("avcintra-flavor")
+        b_error |= parse_enum( value, x264_avcintra_flavor_names, &p->i_avcintra_flavor );
+    OPT("sar")
+    {
+        b_error = ( 2 != sscanf( value, "%d:%d", &p->vui.i_sar_width, &p->vui.i_sar_height ) &&
+                    2 != sscanf( value, "%d/%d", &p->vui.i_sar_width, &p->vui.i_sar_height ) );
+    }
+    OPT("overscan")
+        b_error |= parse_enum( value, x264_overscan_names, &p->vui.i_overscan );
+    OPT("videoformat")
+        b_error |= parse_enum( value, x264_vidformat_names, &p->vui.i_vidformat );
+    OPT("fullrange")
+        b_error |= parse_enum( value, x264_fullrange_names, &p->vui.b_fullrange );
+    OPT("colorprim")
+        b_error |= parse_enum( value, x264_colorprim_names, &p->vui.i_colorprim );
+    OPT("transfer")
+        b_error |= parse_enum( value, x264_transfer_names, &p->vui.i_transfer );
+    OPT("colormatrix")
+        b_error |= parse_enum( value, x264_colmatrix_names, &p->vui.i_colmatrix );
+    OPT("chromaloc")
+    {
+        p->vui.i_chroma_loc = atoi(value);
+        b_error = ( p->vui.i_chroma_loc < 0 || p->vui.i_chroma_loc > 5 );
+    }
+    OPT("alternative-transfer")
+        b_error |= parse_enum( value, x264_transfer_names, &p->i_alternative_transfer );
+    OPT("fps")
+    {
+        if( sscanf( value, "%u/%u", &p->i_fps_num, &p->i_fps_den ) != 2 )
+        {
+            double fps = atof(value);
+            if( fps > 0.0 && fps <= INT_MAX/1000.0 )
+            {
+                p->i_fps_num = (int)(fps * 1000.0 + .5);
+                p->i_fps_den = 1000;
+            }
+            else
+            {
+                p->i_fps_num = atoi(value);
+                p->i_fps_den = 1;
+            }
+        }
+    }
+    OPT2("ref", "frameref")
+        p->i_frame_reference = atoi(value);
+    OPT("dpb-size")
+        p->i_dpb_size = atoi(value);
+    OPT("keyint")
+    {
+        if( strstr( value, "infinite" ) )
+            p->i_keyint_max = X264_KEYINT_MAX_INFINITE;
+        else
+            p->i_keyint_max = atoi(value);
+    }
+    OPT2("min-keyint", "keyint-min")
+    {
+        p->i_keyint_min = atoi(value);
+        if( p->i_keyint_max < p->i_keyint_min )
+            p->i_keyint_max = p->i_keyint_min;
+    }
+    OPT("scenecut")
+    {
+        p->i_scenecut_threshold = atobool(value);
+        if( b_error || p->i_scenecut_threshold )
+        {
+            b_error = 0;
+            p->i_scenecut_threshold = atoi(value);
+        }
+    }
+    OPT("intra-refresh")
+        p->b_intra_refresh = atobool(value);
+    OPT("bframes")
+        p->i_bframe = atoi(value);
+    OPT("b-adapt")
+    {
+        p->i_bframe_adaptive = atobool(value);
+        if( b_error )
+        {
+            b_error = 0;
+            p->i_bframe_adaptive = atoi(value);
+        }
+    }
+    OPT("b-bias")
+        p->i_bframe_bias = atoi(value);
+    OPT("b-pyramid")
+    {
+        b_error |= parse_enum( value, x264_b_pyramid_names, &p->i_bframe_pyramid );
+        if( b_error )
+        {
+            b_error = 0;
+            p->i_bframe_pyramid = atoi(value);
+        }
+    }
+    OPT("open-gop")
+        p->b_open_gop = atobool(value);
+    OPT("nf")
+        p->b_deblocking_filter = !atobool(value);
+    OPT2("filter", "deblock")
+    {
+        if( 2 == sscanf( value, "%d:%d", &p->i_deblocking_filter_alphac0, &p->i_deblocking_filter_beta ) ||
+            2 == sscanf( value, "%d,%d", &p->i_deblocking_filter_alphac0, &p->i_deblocking_filter_beta ) )
+        {
+            p->b_deblocking_filter = 1;
+        }
+        else if( sscanf( value, "%d", &p->i_deblocking_filter_alphac0 ) )
+        {
+            p->b_deblocking_filter = 1;
+            p->i_deblocking_filter_beta = p->i_deblocking_filter_alphac0;
+        }
+        else
+            p->b_deblocking_filter = atobool(value);
+    }
+    OPT("slice-max-size")
+        p->i_slice_max_size = atoi(value);
+    OPT("slice-max-mbs")
+        p->i_slice_max_mbs = atoi(value);
+    OPT("slice-min-mbs")
+        p->i_slice_min_mbs = atoi(value);
+    OPT("slices")
+        p->i_slice_count = atoi(value);
+    OPT("slices-max")
+        p->i_slice_count_max = atoi(value);
+    OPT("cabac")
+        p->b_cabac = atobool(value);
+    OPT("cabac-idc")
+        p->i_cabac_init_idc = atoi(value);
+    OPT("interlaced")
+        p->b_interlaced = atobool(value);
+    OPT("tff")
+        p->b_interlaced = p->b_tff = atobool(value);
+    OPT("bff")
+    {
+        p->b_interlaced = atobool(value);
+        p->b_tff = !p->b_interlaced;
+    }
+    OPT("constrained-intra")
+        p->b_constrained_intra = atobool(value);
+    OPT("cqm")
+    {
+        if( strstr( value, "flat" ) )
+            p->i_cqm_preset = X264_CQM_FLAT;
+        else if( strstr( value, "jvt" ) )
+            p->i_cqm_preset = X264_CQM_JVT;
+        else
+            p->psz_cqm_file = strdup(value);
+    }
+    OPT("cqmfile")
+        p->psz_cqm_file = strdup(value);
+    OPT("cqm4")
+    {
+        p->i_cqm_preset = X264_CQM_CUSTOM;
+        b_error |= parse_cqm( value, p->cqm_4iy, 16 );
+        b_error |= parse_cqm( value, p->cqm_4py, 16 );
+        b_error |= parse_cqm( value, p->cqm_4ic, 16 );
+        b_error |= parse_cqm( value, p->cqm_4pc, 16 );
+    }
+    OPT("cqm8")
+    {
+        p->i_cqm_preset = X264_CQM_CUSTOM;
+        b_error |= parse_cqm( value, p->cqm_8iy, 64 );
+        b_error |= parse_cqm( value, p->cqm_8py, 64 );
+        b_error |= parse_cqm( value, p->cqm_8ic, 64 );
+        b_error |= parse_cqm( value, p->cqm_8pc, 64 );
+    }
+    OPT("cqm4i")
+    {
+        p->i_cqm_preset = X264_CQM_CUSTOM;
+        b_error |= parse_cqm( value, p->cqm_4iy, 16 );
+        b_error |= parse_cqm( value, p->cqm_4ic, 16 );
+    }
+    OPT("cqm4p")
+    {
+        p->i_cqm_preset = X264_CQM_CUSTOM;
+        b_error |= parse_cqm( value, p->cqm_4py, 16 );
+        b_error |= parse_cqm( value, p->cqm_4pc, 16 );
+    }
+    OPT("cqm4iy")
+    {
+        p->i_cqm_preset = X264_CQM_CUSTOM;
+        b_error |= parse_cqm( value, p->cqm_4iy, 16 );
+    }
+    OPT("cqm4ic")
+    {
+        p->i_cqm_preset = X264_CQM_CUSTOM;
+        b_error |= parse_cqm( value, p->cqm_4ic, 16 );
+    }
+    OPT("cqm4py")
+    {
+        p->i_cqm_preset = X264_CQM_CUSTOM;
+        b_error |= parse_cqm( value, p->cqm_4py, 16 );
+    }
+    OPT("cqm4pc")
+    {
+        p->i_cqm_preset = X264_CQM_CUSTOM;
+        b_error |= parse_cqm( value, p->cqm_4pc, 16 );
+    }
+    OPT("cqm8i")
+    {
+        p->i_cqm_preset = X264_CQM_CUSTOM;
+        b_error |= parse_cqm( value, p->cqm_8iy, 64 );
+        b_error |= parse_cqm( value, p->cqm_8ic, 64 );
+    }
+    OPT("cqm8p")
+    {
+        p->i_cqm_preset = X264_CQM_CUSTOM;
+        b_error |= parse_cqm( value, p->cqm_8py, 64 );
+        b_error |= parse_cqm( value, p->cqm_8pc, 64 );
+    }
+    OPT("log")
+        p->i_log_level = atoi(value);
+    OPT("dump-yuv")
+        p->psz_dump_yuv = strdup(value);
+    OPT2("analyse", "partitions")
+    {
+        p->analyse.inter = 0;
+        if( strstr( value, "none" ) )  p->analyse.inter =  0;
+        if( strstr( value, "all" ) )   p->analyse.inter = ~0;
+
+        if( strstr( value, "i4x4" ) )  p->analyse.inter |= X264_ANALYSE_I4x4;
+        if( strstr( value, "i8x8" ) )  p->analyse.inter |= X264_ANALYSE_I8x8;
+        if( strstr( value, "p8x8" ) )  p->analyse.inter |= X264_ANALYSE_PSUB16x16;
+        if( strstr( value, "p4x4" ) )  p->analyse.inter |= X264_ANALYSE_PSUB8x8;
+        if( strstr( value, "b8x8" ) )  p->analyse.inter |= X264_ANALYSE_BSUB16x16;
+    }
+    OPT("8x8dct")
+        p->analyse.b_transform_8x8 = atobool(value);
+    OPT2("weightb", "weight-b")
+        p->analyse.b_weighted_bipred = atobool(value);
+    OPT("weightp")
+        p->analyse.i_weighted_pred = atoi(value);
+    OPT2("direct", "direct-pred")
+        b_error |= parse_enum( value, x264_direct_pred_names, &p->analyse.i_direct_mv_pred );
+    OPT("chroma-qp-offset")
+        p->analyse.i_chroma_qp_offset = atoi(value);
+    OPT("me")
+        b_error |= parse_enum( value, x264_motion_est_names, &p->analyse.i_me_method );
+    OPT2("merange", "me-range")
+        p->analyse.i_me_range = atoi(value);
+    OPT2("mvrange", "mv-range")
+        p->analyse.i_mv_range = atoi(value);
+    OPT2("mvrange-thread", "mv-range-thread")
+        p->analyse.i_mv_range_thread = atoi(value);
+    OPT2("subme", "subq")
+        p->analyse.i_subpel_refine = atoi(value);
+    OPT("psy-rd")
+    {
+        if( 2 == sscanf( value, "%f:%f", &p->analyse.f_psy_rd, &p->analyse.f_psy_trellis ) ||
+            2 == sscanf( value, "%f,%f", &p->analyse.f_psy_rd, &p->analyse.f_psy_trellis ) ||
+            2 == sscanf( value, "%f|%f", &p->analyse.f_psy_rd, &p->analyse.f_psy_trellis ))
+        { }
+        else if( sscanf( value, "%f", &p->analyse.f_psy_rd ) )
+        {
+            p->analyse.f_psy_trellis = 0;
+        }
+        else
+        {
+            p->analyse.f_psy_rd = 0;
+            p->analyse.f_psy_trellis = 0;
+        }
+    }
+    OPT("psy")
+        p->analyse.b_psy = atobool(value);
+    OPT("chroma-me")
+        p->analyse.b_chroma_me = atobool(value);
+    OPT("mixed-refs")
+        p->analyse.b_mixed_references = atobool(value);
+    OPT("trellis")
+        p->analyse.i_trellis = atoi(value);
+    OPT("fast-pskip")
+        p->analyse.b_fast_pskip = atobool(value);
+    OPT("dct-decimate")
+        p->analyse.b_dct_decimate = atobool(value);
+    OPT("deadzone-inter")
+        p->analyse.i_luma_deadzone[0] = atoi(value);
+    OPT("deadzone-intra")
+        p->analyse.i_luma_deadzone[1] = atoi(value);
+    OPT("nr")
+        p->analyse.i_noise_reduction = atoi(value);
+    OPT("bitrate")
+    {
+        p->rc.i_bitrate = atoi(value);
+        p->rc.i_rc_method = X264_RC_ABR;
+    }
+    OPT2("qp", "qp_constant")
+    {
+        p->rc.i_qp_constant = atoi(value);
+        p->rc.i_rc_method = X264_RC_CQP;
+    }
+    OPT("crf")
+    {
+        p->rc.f_rf_constant = atof(value);
+        p->rc.i_rc_method = X264_RC_CRF;
+    }
+    OPT("crf-max")
+        p->rc.f_rf_constant_max = atof(value);
+    OPT("rc-lookahead")
+        p->rc.i_lookahead = atoi(value);
+    OPT2("qpmin", "qp-min")
+        p->rc.i_qp_min = atoi(value);
+    OPT2("qpmax", "qp-max")
+        p->rc.i_qp_max = atoi(value);
+    OPT2("qpstep", "qp-step")
+        p->rc.i_qp_step = atoi(value);
+    OPT("ratetol")
+        p->rc.f_rate_tolerance = !strncmp("inf", value, 3) ? 1e9 : atof(value);
+    OPT("vbv-maxrate")
+        p->rc.i_vbv_max_bitrate = atoi(value);
+    OPT("vbv-bufsize")
+        p->rc.i_vbv_buffer_size = atoi(value);
+    OPT("vbv-init")
+        p->rc.f_vbv_buffer_init = atof(value);
+    OPT2("ipratio", "ip-factor")
+        p->rc.f_ip_factor = atof(value);
+    OPT2("pbratio", "pb-factor")
+        p->rc.f_pb_factor = atof(value);
+    OPT("aq-mode")
+        p->rc.i_aq_mode = atoi(value);
+    OPT("aq-strength")
+        p->rc.f_aq_strength = atof(value);
+    OPT("pass")
+    {
+        int pass = x264_clip3( atoi(value), 0, 3 );
+        p->rc.b_stat_write = pass & 1;
+        p->rc.b_stat_read = pass & 2;
+    }
+    OPT("stats")
+    {
+        p->rc.psz_stat_in = strdup(value);
+        p->rc.psz_stat_out = strdup(value);
+    }
+    OPT("qcomp")
+        p->rc.f_qcompress = atof(value);
+    OPT("mbtree")
+        p->rc.b_mb_tree = atobool(value);
+    OPT("qblur")
+        p->rc.f_qblur = atof(value);
+    OPT2("cplxblur", "cplx-blur")
+        p->rc.f_complexity_blur = atof(value);
+    OPT("zones")
+        p->rc.psz_zones = strdup(value);
+    OPT("crop-rect")
+        b_error |= sscanf( value, "%u,%u,%u,%u", &p->crop_rect.i_left, &p->crop_rect.i_top,
+                                                 &p->crop_rect.i_right, &p->crop_rect.i_bottom ) != 4;
+    OPT("psnr")
+        p->analyse.b_psnr = atobool(value);
+    OPT("ssim")
+        p->analyse.b_ssim = atobool(value);
+    OPT("aud")
+        p->b_aud = atobool(value);
+    OPT("sps-id")
+        p->i_sps_id = atoi(value);
+    OPT("global-header")
+        p->b_repeat_headers = !atobool(value);
+    OPT("repeat-headers")
+        p->b_repeat_headers = atobool(value);
+    OPT("annexb")
+        p->b_annexb = atobool(value);
+    OPT("force-cfr")
+        p->b_vfr_input = !atobool(value);
+    OPT("nal-hrd")
+        b_error |= parse_enum( value, x264_nal_hrd_names, &p->i_nal_hrd );
+    OPT("filler")
+        p->rc.b_filler = atobool(value);
+    OPT("pic-struct")
+        p->b_pic_struct = atobool(value);
+    OPT("fake-interlaced")
+        p->b_fake_interlaced = atobool(value);
+    OPT("frame-packing")
+        p->i_frame_packing = atoi(value);
+    OPT("stitchable")
+        p->b_stitchable = atobool(value);
+    OPT("opencl")
+        p->b_opencl = atobool( value );
+    OPT("opencl-clbin")
+        p->psz_clbin_file = strdup( value );
+    OPT("opencl-device")
+        p->i_opencl_device = atoi( value );
+    else
+    {
+        b_error = 1;
+        errortype = X264_PARAM_BAD_NAME;
+    }
+#undef OPT
+#undef OPT2
+#undef atobool
+#undef atoi
+#undef atof
+
+    if( name_buf )
+        free( name_buf );
+
+    b_error |= value_was_null && !name_was_bool;
+    return b_error ? errortype : 0;
+}
+
+/****************************************************************************
+ * x264_param2string:
+ ****************************************************************************/
+char *x264_param2string( x264_param_t *p, int b_res )
+{
+    int len = 1000;
+    char *buf, *s;
+    if( p->rc.psz_zones )
+        len += strlen(p->rc.psz_zones);
+    buf = s = x264_malloc( len );
+    if( !buf )
+        return NULL;
+
+    if( b_res )
+    {
+        s += sprintf( s, "%dx%d ", p->i_width, p->i_height );
+        s += sprintf( s, "fps=%u/%u ", p->i_fps_num, p->i_fps_den );
+        s += sprintf( s, "timebase=%u/%u ", p->i_timebase_num, p->i_timebase_den );
+        s += sprintf( s, "bitdepth=%d ", p->i_bitdepth );
+    }
+
+    if( p->b_opencl )
+        s += sprintf( s, "opencl=%d ", p->b_opencl );
+    s += sprintf( s, "cabac=%d", p->b_cabac );
+    s += sprintf( s, " ref=%d", p->i_frame_reference );
+    s += sprintf( s, " deblock=%d:%d:%d", p->b_deblocking_filter,
+                  p->i_deblocking_filter_alphac0, p->i_deblocking_filter_beta );
+    s += sprintf( s, " analyse=%#x:%#x", p->analyse.intra, p->analyse.inter );
+    s += sprintf( s, " me=%s", x264_motion_est_names[ p->analyse.i_me_method ] );
+    s += sprintf( s, " subme=%d", p->analyse.i_subpel_refine );
+    s += sprintf( s, " psy=%d", p->analyse.b_psy );
+    if( p->analyse.b_psy )
+        s += sprintf( s, " psy_rd=%.2f:%.2f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis );
+    s += sprintf( s, " mixed_ref=%d", p->analyse.b_mixed_references );
+    s += sprintf( s, " me_range=%d", p->analyse.i_me_range );
+    s += sprintf( s, " chroma_me=%d", p->analyse.b_chroma_me );
+    s += sprintf( s, " trellis=%d", p->analyse.i_trellis );
+    s += sprintf( s, " 8x8dct=%d", p->analyse.b_transform_8x8 );
+    s += sprintf( s, " cqm=%d", p->i_cqm_preset );
+    s += sprintf( s, " deadzone=%d,%d", p->analyse.i_luma_deadzone[0], p->analyse.i_luma_deadzone[1] );
+    s += sprintf( s, " fast_pskip=%d", p->analyse.b_fast_pskip );
+    s += sprintf( s, " chroma_qp_offset=%d", p->analyse.i_chroma_qp_offset );
+    s += sprintf( s, " threads=%d", p->i_threads );
+    s += sprintf( s, " lookahead_threads=%d", p->i_lookahead_threads );
+    s += sprintf( s, " sliced_threads=%d", p->b_sliced_threads );
+    if( p->i_slice_count )
+        s += sprintf( s, " slices=%d", p->i_slice_count );
+    if( p->i_slice_count_max )
+        s += sprintf( s, " slices_max=%d", p->i_slice_count_max );
+    if( p->i_slice_max_size )
+        s += sprintf( s, " slice_max_size=%d", p->i_slice_max_size );
+    if( p->i_slice_max_mbs )
+        s += sprintf( s, " slice_max_mbs=%d", p->i_slice_max_mbs );
+    if( p->i_slice_min_mbs )
+        s += sprintf( s, " slice_min_mbs=%d", p->i_slice_min_mbs );
+    s += sprintf( s, " nr=%d", p->analyse.i_noise_reduction );
+    s += sprintf( s, " decimate=%d", p->analyse.b_dct_decimate );
+    s += sprintf( s, " interlaced=%s", p->b_interlaced ? p->b_tff ? "tff" : "bff" : p->b_fake_interlaced ? "fake" : "0" );
+    s += sprintf( s, " bluray_compat=%d", p->b_bluray_compat );
+    if( p->b_stitchable )
+        s += sprintf( s, " stitchable=%d", p->b_stitchable );
+
+    s += sprintf( s, " constrained_intra=%d", p->b_constrained_intra );
+
+    s += sprintf( s, " bframes=%d", p->i_bframe );
+    if( p->i_bframe )
+    {
+        s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d weightb=%d open_gop=%d",
+                      p->i_bframe_pyramid, p->i_bframe_adaptive, p->i_bframe_bias,
+                      p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred, p->b_open_gop );
+    }
+    s += sprintf( s, " weightp=%d", p->analyse.i_weighted_pred > 0 ? p->analyse.i_weighted_pred : 0 );
+
+    if( p->i_keyint_max == X264_KEYINT_MAX_INFINITE )
+        s += sprintf( s, " keyint=infinite" );
+    else
+        s += sprintf( s, " keyint=%d", p->i_keyint_max );
+    s += sprintf( s, " keyint_min=%d scenecut=%d intra_refresh=%d",
+                  p->i_keyint_min, p->i_scenecut_threshold, p->b_intra_refresh );
+
+    if( p->rc.b_mb_tree || p->rc.i_vbv_buffer_size )
+        s += sprintf( s, " rc_lookahead=%d", p->rc.i_lookahead );
+
+    s += sprintf( s, " rc=%s mbtree=%d", p->rc.i_rc_method == X264_RC_ABR ?
+                               ( p->rc.b_stat_read ? "2pass" : p->rc.i_vbv_max_bitrate == p->rc.i_bitrate ? "cbr" : "abr" )
+                               : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp", p->rc.b_mb_tree );
+    if( p->rc.i_rc_method == X264_RC_ABR || p->rc.i_rc_method == X264_RC_CRF )
+    {
+        if( p->rc.i_rc_method == X264_RC_CRF )
+            s += sprintf( s, " crf=%.1f", p->rc.f_rf_constant );
+        else
+            s += sprintf( s, " bitrate=%d ratetol=%.1f",
+                          p->rc.i_bitrate, p->rc.f_rate_tolerance );
+        s += sprintf( s, " qcomp=%.2f qpmin=%d qpmax=%d qpstep=%d",
+                      p->rc.f_qcompress, p->rc.i_qp_min, p->rc.i_qp_max, p->rc.i_qp_step );
+        if( p->rc.b_stat_read )
+            s += sprintf( s, " cplxblur=%.1f qblur=%.1f",
+                          p->rc.f_complexity_blur, p->rc.f_qblur );
+        if( p->rc.i_vbv_buffer_size )
+        {
+            s += sprintf( s, " vbv_maxrate=%d vbv_bufsize=%d",
+                          p->rc.i_vbv_max_bitrate, p->rc.i_vbv_buffer_size );
+            if( p->rc.i_rc_method == X264_RC_CRF )
+                s += sprintf( s, " crf_max=%.1f", p->rc.f_rf_constant_max );
+        }
+    }
+    else if( p->rc.i_rc_method == X264_RC_CQP )
+        s += sprintf( s, " qp=%d", p->rc.i_qp_constant );
+
+    if( p->rc.i_vbv_buffer_size )
+        s += sprintf( s, " nal_hrd=%s filler=%d", x264_nal_hrd_names[p->i_nal_hrd], p->rc.b_filler );
+    if( p->crop_rect.i_left | p->crop_rect.i_top | p->crop_rect.i_right | p->crop_rect.i_bottom )
+        s += sprintf( s, " crop_rect=%u,%u,%u,%u", p->crop_rect.i_left, p->crop_rect.i_top,
+                                                   p->crop_rect.i_right, p->crop_rect.i_bottom );
+    if( p->i_frame_packing >= 0 )
+        s += sprintf( s, " frame-packing=%d", p->i_frame_packing );
+
+    if( !(p->rc.i_rc_method == X264_RC_CQP && p->rc.i_qp_constant == 0) )
+    {
+        s += sprintf( s, " ip_ratio=%.2f", p->rc.f_ip_factor );
+        if( p->i_bframe && !p->rc.b_mb_tree )
+            s += sprintf( s, " pb_ratio=%.2f", p->rc.f_pb_factor );
+        s += sprintf( s, " aq=%d", p->rc.i_aq_mode );
+        if( p->rc.i_aq_mode )
+            s += sprintf( s, ":%.2f", p->rc.f_aq_strength );
+        if( p->rc.psz_zones )
+            s += sprintf( s, " zones=%s", p->rc.psz_zones );
+        else if( p->rc.i_zones )
+            s += sprintf( s, " zones" );
+    }
+
+    return buf;
+}
diff -Nru x264-0.152.2854+gite9a5903/common/base.h x264-0.158.2988+git-20191101.7817004/common/base.h
--- x264-0.152.2854+gite9a5903/common/base.h	1970-01-01 00:00:00.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/base.h	2019-11-09 05:16:29.000000000 +0000
@@ -0,0 +1,316 @@
+/*****************************************************************************
+ * base.h: misc common functions (bit depth independent)
+ *****************************************************************************
+ * Copyright (C) 2003-2019 x264 project
+ *
+ * Authors: Laurent Aimar <fenrir@via.ecp.fr>
+ *          Loren Merritt <lorenm@u.washington.edu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#ifndef X264_BASE_H
+#define X264_BASE_H
+
+/****************************************************************************
+ * Macros (can be used in osdep.h)
+ ****************************************************************************/
+#define X264_MIN(a,b) ( (a)<(b) ? (a) : (b) )
+#define X264_MAX(a,b) ( (a)>(b) ? (a) : (b) )
+#define X264_MIN3(a,b,c) X264_MIN((a),X264_MIN((b),(c)))
+#define X264_MAX3(a,b,c) X264_MAX((a),X264_MAX((b),(c)))
+#define X264_MIN4(a,b,c,d) X264_MIN((a),X264_MIN3((b),(c),(d)))
+#define X264_MAX4(a,b,c,d) X264_MAX((a),X264_MAX3((b),(c),(d)))
+
+/****************************************************************************
+ * System includes
+ ****************************************************************************/
+#include "osdep.h"
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <limits.h>
+
+/****************************************************************************
+ * Macros
+ ****************************************************************************/
+#define XCHG(type,a,b) do { type t = a; a = b; b = t; } while( 0 )
+#define FIX8(f) ((int)(f*(1<<8)+.5))
+#define ARRAY_ELEMS(a) ((sizeof(a))/(sizeof(a[0])))
+#define ALIGN(x,a) (((x)+((a)-1))&~((a)-1))
+#define IS_DISPOSABLE(type) ( type == X264_TYPE_B )
+
+/* Unions for type-punning.
+ * Mn: load or store n bits, aligned, native-endian
+ * CPn: copy n bits, aligned, native-endian
+ * we don't use memcpy for CPn because memcpy's args aren't assumed to be aligned */
+typedef union { uint16_t i; uint8_t  c[2]; } MAY_ALIAS x264_union16_t;
+typedef union { uint32_t i; uint16_t b[2]; uint8_t  c[4]; } MAY_ALIAS x264_union32_t;
+typedef union { uint64_t i; uint32_t a[2]; uint16_t b[4]; uint8_t c[8]; } MAY_ALIAS x264_union64_t;
+typedef struct { uint64_t i[2]; } x264_uint128_t;
+typedef union { x264_uint128_t i; uint64_t a[2]; uint32_t b[4]; uint16_t c[8]; uint8_t d[16]; } MAY_ALIAS x264_union128_t;
+#define M16(src) (((x264_union16_t*)(src))->i)
+#define M32(src) (((x264_union32_t*)(src))->i)
+#define M64(src) (((x264_union64_t*)(src))->i)
+#define M128(src) (((x264_union128_t*)(src))->i)
+#define M128_ZERO ((x264_uint128_t){{0,0}})
+#define CP16(dst,src) M16(dst) = M16(src)
+#define CP32(dst,src) M32(dst) = M32(src)
+#define CP64(dst,src) M64(dst) = M64(src)
+#define CP128(dst,src) M128(dst) = M128(src)
+
+/****************************************************************************
+ * Constants
+ ****************************************************************************/
+enum profile_e
+{
+    PROFILE_BASELINE = 66,
+    PROFILE_MAIN     = 77,
+    PROFILE_HIGH    = 100,
+    PROFILE_HIGH10  = 110,
+    PROFILE_HIGH422 = 122,
+    PROFILE_HIGH444_PREDICTIVE = 244,
+};
+
+enum chroma_format_e
+{
+    CHROMA_400 = 0,
+    CHROMA_420 = 1,
+    CHROMA_422 = 2,
+    CHROMA_444 = 3,
+};
+
+enum slice_type_e
+{
+    SLICE_TYPE_P  = 0,
+    SLICE_TYPE_B  = 1,
+    SLICE_TYPE_I  = 2,
+};
+
+static const char slice_type_to_char[] = { 'P', 'B', 'I' };
+
+enum sei_payload_type_e
+{
+    SEI_BUFFERING_PERIOD       = 0,
+    SEI_PIC_TIMING             = 1,
+    SEI_PAN_SCAN_RECT          = 2,
+    SEI_FILLER                 = 3,
+    SEI_USER_DATA_REGISTERED   = 4,
+    SEI_USER_DATA_UNREGISTERED = 5,
+    SEI_RECOVERY_POINT         = 6,
+    SEI_DEC_REF_PIC_MARKING    = 7,
+    SEI_FRAME_PACKING          = 45,
+    SEI_ALTERNATIVE_TRANSFER   = 147,
+};
+
+#define X264_BFRAME_MAX 16
+#define X264_REF_MAX 16
+#define X264_THREAD_MAX 128
+#define X264_LOOKAHEAD_THREAD_MAX 16
+#define X264_LOOKAHEAD_MAX 250
+
+// number of pixels (per thread) in progress at any given time.
+// 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
+#define X264_THREAD_HEIGHT 24
+
+/* WEIGHTP_FAKE is set when mb_tree & psy are enabled, but normal weightp is disabled
+ * (such as in baseline). It checks for fades in lookahead and adjusts qp accordingly
+ * to increase quality. Defined as (-1) so that if(i_weighted_pred > 0) is true only when
+ * real weights are being used. */
+
+#define X264_WEIGHTP_FAKE (-1)
+
+#define X264_SCAN8_LUMA_SIZE (5*8)
+#define X264_SCAN8_SIZE (X264_SCAN8_LUMA_SIZE*3)
+#define X264_SCAN8_0 (4+1*8)
+
+/* Scan8 organization:
+ *    0 1 2 3 4 5 6 7
+ * 0  DY    y y y y y
+ * 1        y Y Y Y Y
+ * 2        y Y Y Y Y
+ * 3        y Y Y Y Y
+ * 4        y Y Y Y Y
+ * 5  DU    u u u u u
+ * 6        u U U U U
+ * 7        u U U U U
+ * 8        u U U U U
+ * 9        u U U U U
+ * 10 DV    v v v v v
+ * 11       v V V V V
+ * 12       v V V V V
+ * 13       v V V V V
+ * 14       v V V V V
+ * DY/DU/DV are for luma/chroma DC.
+ */
+
+#define LUMA_DC   48
+#define CHROMA_DC 49
+
+static const uint8_t x264_scan8[16*3 + 3] =
+{
+    4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8,
+    6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8,
+    4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8,
+    6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8,
+    4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8,
+    6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8,
+    4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8,
+    6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8,
+    4+11*8, 5+11*8, 4+12*8, 5+12*8,
+    6+11*8, 7+11*8, 6+12*8, 7+12*8,
+    4+13*8, 5+13*8, 4+14*8, 5+14*8,
+    6+13*8, 7+13*8, 6+14*8, 7+14*8,
+    0+ 0*8, 0+ 5*8, 0+10*8
+};
+
+/****************************************************************************
+ * Includes
+ ****************************************************************************/
+#include "cpu.h"
+#include "tables.h"
+
+/****************************************************************************
+ * Inline functions
+ ****************************************************************************/
+static ALWAYS_INLINE int x264_clip3( int v, int i_min, int i_max )
+{
+    return ( (v < i_min) ? i_min : (v > i_max) ? i_max : v );
+}
+
+static ALWAYS_INLINE double x264_clip3f( double v, double f_min, double f_max )
+{
+    return ( (v < f_min) ? f_min : (v > f_max) ? f_max : v );
+}
+
+/* Not a general-purpose function; multiplies input by -1/6 to convert
+ * qp to qscale. */
+static ALWAYS_INLINE int x264_exp2fix8( float x )
+{
+    int i = x*(-64.f/6.f) + 512.5f;
+    if( i < 0 ) return 0;
+    if( i > 1023 ) return 0xffff;
+    return (x264_exp2_lut[i&63]+256) << (i>>6) >> 8;
+}
+
+static ALWAYS_INLINE float x264_log2( uint32_t x )
+{
+    int lz = x264_clz( x );
+    return x264_log2_lut[(x<<lz>>24)&0x7f] + x264_log2_lz_lut[lz];
+}
+
+static ALWAYS_INLINE int x264_median( int a, int b, int c )
+{
+    int t = (a-b)&((a-b)>>31);
+    a -= t;
+    b += t;
+    b -= (b-c)&((b-c)>>31);
+    b += (a-b)&((a-b)>>31);
+    return b;
+}
+
+static ALWAYS_INLINE void x264_median_mv( int16_t *dst, int16_t *a, int16_t *b, int16_t *c )
+{
+    dst[0] = x264_median( a[0], b[0], c[0] );
+    dst[1] = x264_median( a[1], b[1], c[1] );
+}
+
+static ALWAYS_INLINE int x264_predictor_difference( int16_t (*mvc)[2], intptr_t i_mvc )
+{
+    int sum = 0;
+    for( int i = 0; i < i_mvc-1; i++ )
+    {
+        sum += abs( mvc[i][0] - mvc[i+1][0] )
+             + abs( mvc[i][1] - mvc[i+1][1] );
+    }
+    return sum;
+}
+
+static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum( uint8_t *mvdleft, uint8_t *mvdtop )
+{
+    int amvd0 = mvdleft[0] + mvdtop[0];
+    int amvd1 = mvdleft[1] + mvdtop[1];
+    amvd0 = (amvd0 > 2) + (amvd0 > 32);
+    amvd1 = (amvd1 > 2) + (amvd1 > 32);
+    return amvd0 + (amvd1<<8);
+}
+
+/****************************************************************************
+ * General functions
+ ****************************************************************************/
+X264_API void x264_reduce_fraction( uint32_t *n, uint32_t *d );
+X264_API void x264_reduce_fraction64( uint64_t *n, uint64_t *d );
+
+X264_API void x264_log_default( void *p_unused, int i_level, const char *psz_fmt, va_list arg );
+X264_API void x264_log_internal( int i_level, const char *psz_fmt, ... );
+
+/* x264_malloc : will do or emulate a memalign
+ * you have to use x264_free for buffers allocated with x264_malloc */
+X264_API void *x264_malloc( int );
+X264_API void  x264_free( void * );
+
+/* x264_slurp_file: malloc space for the whole file and read it */
+X264_API char *x264_slurp_file( const char *filename );
+
+/* x264_param2string: return a (malloced) string containing most of
+ * the encoding options */
+X264_API char *x264_param2string( x264_param_t *p, int b_res );
+
+/****************************************************************************
+ * Macros
+ ****************************************************************************/
+#define CHECKED_MALLOC( var, size )\
+do {\
+    var = x264_malloc( size );\
+    if( !var )\
+        goto fail;\
+} while( 0 )
+#define CHECKED_MALLOCZERO( var, size )\
+do {\
+    CHECKED_MALLOC( var, size );\
+    memset( var, 0, size );\
+} while( 0 )
+
+/* Macros for merging multiple allocations into a single large malloc, for improved
+ * use with huge pages. */
+
+/* Needs to be enough to contain any set of buffers that use combined allocations */
+#define PREALLOC_BUF_SIZE 1024
+
+#define PREALLOC_INIT\
+    int    prealloc_idx = 0;\
+    size_t prealloc_size = 0;\
+    uint8_t **preallocs[PREALLOC_BUF_SIZE];
+
+#define PREALLOC( var, size )\
+do {\
+    var = (void*)prealloc_size;\
+    preallocs[prealloc_idx++] = (uint8_t**)&var;\
+    prealloc_size += ALIGN(size, NATIVE_ALIGN);\
+} while( 0 )
+
+#define PREALLOC_END( ptr )\
+do {\
+    CHECKED_MALLOC( ptr, prealloc_size );\
+    while( prealloc_idx-- )\
+        *preallocs[prealloc_idx] += (intptr_t)ptr;\
+} while( 0 )
+
+#endif
diff -Nru x264-0.152.2854+gite9a5903/common/bitstream.c x264-0.158.2988+git-20191101.7817004/common/bitstream.c
--- x264-0.152.2854+gite9a5903/common/bitstream.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/bitstream.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * bitstream.c: bitstream writing
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Fiona Glaser <fiona@x264.com>
@@ -26,7 +26,7 @@
 
 #include "common.h"
 
-static uint8_t *x264_nal_escape_c( uint8_t *dst, uint8_t *src, uint8_t *end )
+static uint8_t *nal_escape_c( uint8_t *dst, uint8_t *src, uint8_t *end )
 {
     if( src < end ) *dst++ = *src++;
     if( src < end ) *dst++ = *src++;
@@ -39,25 +39,15 @@
     return dst;
 }
 
-uint8_t *x264_nal_escape_mmx2( uint8_t *dst, uint8_t *src, uint8_t *end );
-uint8_t *x264_nal_escape_sse2( uint8_t *dst, uint8_t *src, uint8_t *end );
-uint8_t *x264_nal_escape_avx2( uint8_t *dst, uint8_t *src, uint8_t *end );
-void x264_cabac_block_residual_rd_internal_sse2       ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
-void x264_cabac_block_residual_rd_internal_lzcnt      ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
-void x264_cabac_block_residual_rd_internal_ssse3      ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
-void x264_cabac_block_residual_rd_internal_ssse3_lzcnt( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
-void x264_cabac_block_residual_rd_internal_avx512     ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
-void x264_cabac_block_residual_8x8_rd_internal_sse2       ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
-void x264_cabac_block_residual_8x8_rd_internal_lzcnt      ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
-void x264_cabac_block_residual_8x8_rd_internal_ssse3      ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
-void x264_cabac_block_residual_8x8_rd_internal_ssse3_lzcnt( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
-void x264_cabac_block_residual_8x8_rd_internal_avx512     ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
-void x264_cabac_block_residual_internal_sse2  ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
-void x264_cabac_block_residual_internal_lzcnt ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
-void x264_cabac_block_residual_internal_avx2  ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
-void x264_cabac_block_residual_internal_avx512( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
-
-uint8_t *x264_nal_escape_neon( uint8_t *dst, uint8_t *src, uint8_t *end );
+#if HAVE_MMX
+#include "x86/bitstream.h"
+#endif
+#if HAVE_ARMV6
+#include "arm/bitstream.h"
+#endif
+#if HAVE_AARCH64
+#include "aarch64/bitstream.h"
+#endif
 
 /****************************************************************************
  * x264_nal_encode:
@@ -117,7 +107,7 @@
 {
     memset( pf, 0, sizeof(*pf) );
 
-    pf->nal_escape = x264_nal_escape_c;
+    pf->nal_escape = nal_escape_c;
 #if HAVE_MMX
 #if ARCH_X86_64 && !defined( __MACH__ )
     pf->cabac_block_residual_internal = x264_cabac_block_residual_internal_sse2;
@@ -169,7 +159,7 @@
     if( cpu&X264_CPU_NEON )
         pf->nal_escape = x264_nal_escape_neon;
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
     if( cpu&X264_CPU_NEON )
         pf->nal_escape = x264_nal_escape_neon;
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/bitstream.h x264-0.158.2988+git-20191101.7817004/common/bitstream.h
--- x264-0.152.2854+gite9a5903/common/bitstream.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/bitstream.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * bitstream.h: bitstream writing
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Fiona Glaser <fiona@x264.com>
@@ -30,12 +30,6 @@
 
 typedef struct
 {
-    uint8_t i_bits;
-    uint8_t i_size;
-} vlc_t;
-
-typedef struct
-{
     uint16_t i_bits;
     uint8_t  i_size;
     /* Next level table to use */
@@ -60,12 +54,6 @@
     ALIGNED_16( dctcoef level[18] );
 } x264_run_level_t;
 
-extern const vlc_t x264_coeff0_token[6];
-extern const vlc_t x264_coeff_token[6][16][4];
-extern const vlc_t x264_total_zeros[15][16];
-extern const vlc_t x264_total_zeros_2x2_dc[3][4];
-extern const vlc_t x264_total_zeros_2x4_dc[7][8];
-
 typedef struct
 {
     uint8_t *(*nal_escape)( uint8_t *dst, uint8_t *src, uint8_t *end );
@@ -77,6 +65,7 @@
                                                   intptr_t ctx_block_cat, x264_cabac_t *cb );
 } x264_bitstream_function_t;
 
+#define x264_bitstream_init x264_template(bitstream_init)
 void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf );
 
 /* A larger level table size theoretically could help a bit at extremely
@@ -85,11 +74,13 @@
  * This size appears to be optimal for QP18 encoding on a Nehalem CPU.
  * FIXME: Do further testing? */
 #define LEVEL_TABLE_SIZE 128
+#define x264_level_token x264_template(level_token)
 extern vlc_large_t x264_level_token[7][LEVEL_TABLE_SIZE];
 
 /* The longest possible set of zero run codes sums to 25 bits.  This leaves
  * plenty of room for both the code (25 bits) and size (5 bits) in a uint32_t. */
 
+#define x264_run_before x264_template(run_before)
 extern uint32_t x264_run_before[1<<16];
 
 static inline void bs_init( bs_t *s, void *p_data, int i_data )
@@ -98,8 +89,13 @@
     s->p       = s->p_start = (uint8_t*)p_data - offset;
     s->p_end   = (uint8_t*)p_data + i_data;
     s->i_left  = (WORD_SIZE - offset)*8;
-    s->cur_bits = endian_fix32( M32(s->p) );
-    s->cur_bits >>= (4-offset)*8;
+    if( offset )
+    {
+        s->cur_bits = endian_fix32( M32(s->p) );
+        s->cur_bits >>= (4-offset)*8;
+    }
+    else
+        s->cur_bits = 0;
 }
 static inline int bs_pos( bs_t *s )
 {
@@ -197,6 +193,7 @@
 {
     if( s->i_left&7 )
         bs_write( s, s->i_left&7, 1 << ( (s->i_left&7) - 1 ) );
+    bs_flush( s );
 }
 
 /* golomb functions */
diff -Nru x264-0.152.2854+gite9a5903/common/cabac.c x264-0.158.2988+git-20191101.7817004/common/cabac.c
--- x264-0.152.2854+gite9a5903/common/cabac.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/cabac.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cabac.c: arithmetic coder
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -27,1305 +27,7 @@
 
 #include "common.h"
 
-
-static const int8_t x264_cabac_context_init_I[1024][2] =
-{
-    /* 0 - 10 */
-    { 20, -15 }, {  2, 54 },  {  3,  74 }, { 20, -15 },
-    {  2,  54 }, {  3, 74 },  { -28,127 }, { -23, 104 },
-    { -6,  53 }, { -1, 54 },  {  7,  51 },
-
-    /* 11 - 23 unused for I */
-    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
-    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
-    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
-    { 0, 0 },
-
-    /* 24- 39 */
-    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
-    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
-    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
-    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
-
-    /* 40 - 53 */
-    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
-    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
-    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
-    { 0, 0 },    { 0, 0 },
-
-    /* 54 - 59 */
-    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
-    { 0, 0 },    { 0, 0 },
-
-    /* 60 - 69 */
-    { 0, 41 },   { 0, 63 },   { 0, 63 },     { 0, 63 },
-    { -9, 83 },  { 4, 86 },   { 0, 97 },     { -7, 72 },
-    { 13, 41 },  { 3, 62 },
-
-    /* 70 -> 87 */
-    { 0, 11 },   { 1, 55 },   { 0, 69 },     { -17, 127 },
-    { -13, 102 },{ 0, 82 },   { -7, 74 },    { -21, 107 },
-    { -27, 127 },{ -31, 127 },{ -24, 127 },  { -18, 95 },
-    { -27, 127 },{ -21, 114 },{ -30, 127 },  { -17, 123 },
-    { -12, 115 },{ -16, 122 },
-
-    /* 88 -> 104 */
-    { -11, 115 },{ -12, 63 }, { -2, 68 },    { -15, 84 },
-    { -13, 104 },{ -3, 70 },  { -8, 93 },    { -10, 90 },
-    { -30, 127 },{ -1, 74 },  { -6, 97 },    { -7, 91 },
-    { -20, 127 },{ -4, 56 },  { -5, 82 },    { -7, 76 },
-    { -22, 125 },
-
-    /* 105 -> 135 */
-    { -7, 93 },  { -11, 87 }, { -3, 77 },    { -5, 71 },
-    { -4, 63 },  { -4, 68 },  { -12, 84 },   { -7, 62 },
-    { -7, 65 },  { 8, 61 },   { 5, 56 },     { -2, 66 },
-    { 1, 64 },   { 0, 61 },   { -2, 78 },    { 1, 50 },
-    { 7, 52 },   { 10, 35 },  { 0, 44 },     { 11, 38 },
-    { 1, 45 },   { 0, 46 },   { 5, 44 },     { 31, 17 },
-    { 1, 51 },   { 7, 50 },   { 28, 19 },    { 16, 33 },
-    { 14, 62 },  { -13, 108 },{ -15, 100 },
-
-    /* 136 -> 165 */
-    { -13, 101 },{ -13, 91 }, { -12, 94 },   { -10, 88 },
-    { -16, 84 }, { -10, 86 }, { -7, 83 },    { -13, 87 },
-    { -19, 94 }, { 1, 70 },   { 0, 72 },     { -5, 74 },
-    { 18, 59 },  { -8, 102 }, { -15, 100 },  { 0, 95 },
-    { -4, 75 },  { 2, 72 },   { -11, 75 },   { -3, 71 },
-    { 15, 46 },  { -13, 69 }, { 0, 62 },     { 0, 65 },
-    { 21, 37 },  { -15, 72 }, { 9, 57 },     { 16, 54 },
-    { 0, 62 },   { 12, 72 },
-
-    /* 166 -> 196 */
-    { 24, 0 },   { 15, 9 },   { 8, 25 },     { 13, 18 },
-    { 15, 9 },   { 13, 19 },  { 10, 37 },    { 12, 18 },
-    { 6, 29 },   { 20, 33 },  { 15, 30 },    { 4, 45 },
-    { 1, 58 },   { 0, 62 },   { 7, 61 },     { 12, 38 },
-    { 11, 45 },  { 15, 39 },  { 11, 42 },    { 13, 44 },
-    { 16, 45 },  { 12, 41 },  { 10, 49 },    { 30, 34 },
-    { 18, 42 },  { 10, 55 },  { 17, 51 },    { 17, 46 },
-    { 0, 89 },   { 26, -19 }, { 22, -17 },
-
-    /* 197 -> 226 */
-    { 26, -17 }, { 30, -25 }, { 28, -20 },   { 33, -23 },
-    { 37, -27 }, { 33, -23 }, { 40, -28 },   { 38, -17 },
-    { 33, -11 }, { 40, -15 }, { 41, -6 },    { 38, 1 },
-    { 41, 17 },  { 30, -6 },  { 27, 3 },     { 26, 22 },
-    { 37, -16 }, { 35, -4 },  { 38, -8 },    { 38, -3 },
-    { 37, 3 },   { 38, 5 },   { 42, 0 },     { 35, 16 },
-    { 39, 22 },  { 14, 48 },  { 27, 37 },    { 21, 60 },
-    { 12, 68 },  { 2, 97 },
-
-    /* 227 -> 251 */
-    { -3, 71 },  { -6, 42 },  { -5, 50 },    { -3, 54 },
-    { -2, 62 },  { 0, 58 },   { 1, 63 },     { -2, 72 },
-    { -1, 74 },  { -9, 91 },  { -5, 67 },    { -5, 27 },
-    { -3, 39 },  { -2, 44 },  { 0, 46 },     { -16, 64 },
-    { -8, 68 },  { -10, 78 }, { -6, 77 },    { -10, 86 },
-    { -12, 92 }, { -15, 55 }, { -10, 60 },   { -6, 62 },
-    { -4, 65 },
-
-    /* 252 -> 275 */
-    { -12, 73 }, { -8, 76 },  { -7, 80 },    { -9, 88 },
-    { -17, 110 },{ -11, 97 }, { -20, 84 },   { -11, 79 },
-    { -6, 73 },  { -4, 74 },  { -13, 86 },   { -13, 96 },
-    { -11, 97 }, { -19, 117 },{ -8, 78 },    { -5, 33 },
-    { -4, 48 },  { -2, 53 },  { -3, 62 },    { -13, 71 },
-    { -10, 79 }, { -12, 86 }, { -13, 90 },   { -14, 97 },
-
-    /* 276 a bit special (not used, x264_cabac_encode_bypass is used instead) */
-    { 0, 0 },
-
-    /* 277 -> 307 */
-    { -6, 93 },  { -6, 84 },  { -8, 79 },    { 0, 66 },
-    { -1, 71 },  { 0, 62 },   { -2, 60 },    { -2, 59 },
-    { -5, 75 },  { -3, 62 },  { -4, 58 },    { -9, 66 },
-    { -1, 79 },  { 0, 71 },   { 3, 68 },     { 10, 44 },
-    { -7, 62 },  { 15, 36 },  { 14, 40 },    { 16, 27 },
-    { 12, 29 },  { 1, 44 },   { 20, 36 },    { 18, 32 },
-    { 5, 42 },   { 1, 48 },   { 10, 62 },    { 17, 46 },
-    { 9, 64 },   { -12, 104 },{ -11, 97 },
-
-    /* 308 -> 337 */
-    { -16, 96 }, { -7, 88 },  { -8, 85 },    { -7, 85 },
-    { -9, 85 },  { -13, 88 }, { 4, 66 },     { -3, 77 },
-    { -3, 76 },  { -6, 76 },  { 10, 58 },    { -1, 76 },
-    { -1, 83 },  { -7, 99 },  { -14, 95 },   { 2, 95 },
-    { 0, 76 },   { -5, 74 },  { 0, 70 },     { -11, 75 },
-    { 1, 68 },   { 0, 65 },   { -14, 73 },   { 3, 62 },
-    { 4, 62 },   { -1, 68 },  { -13, 75 },   { 11, 55 },
-    { 5, 64 },   { 12, 70 },
-
-    /* 338 -> 368 */
-    { 15, 6 },   { 6, 19 },   { 7, 16 },     { 12, 14 },
-    { 18, 13 },  { 13, 11 },  { 13, 15 },    { 15, 16 },
-    { 12, 23 },  { 13, 23 },  { 15, 20 },    { 14, 26 },
-    { 14, 44 },  { 17, 40 },  { 17, 47 },    { 24, 17 },
-    { 21, 21 },  { 25, 22 },  { 31, 27 },    { 22, 29 },
-    { 19, 35 },  { 14, 50 },  { 10, 57 },    { 7, 63 },
-    { -2, 77 },  { -4, 82 },  { -3, 94 },    { 9, 69 },
-    { -12, 109 },{ 36, -35 }, { 36, -34 },
-
-    /* 369 -> 398 */
-    { 32, -26 }, { 37, -30 }, { 44, -32 },   { 34, -18 },
-    { 34, -15 }, { 40, -15 }, { 33, -7 },    { 35, -5 },
-    { 33, 0 },   { 38, 2 },   { 33, 13 },    { 23, 35 },
-    { 13, 58 },  { 29, -3 },  { 26, 0 },     { 22, 30 },
-    { 31, -7 },  { 35, -15 }, { 34, -3 },    { 34, 3 },
-    { 36, -1 },  { 34, 5 },   { 32, 11 },    { 35, 5 },
-    { 34, 12 },  { 39, 11 },  { 30, 29 },    { 34, 26 },
-    { 29, 39 },  { 19, 66 },
-
-    /* 399 -> 435 */
-    {  31,  21 }, {  31,  31 }, {  25,  50 },
-    { -17, 120 }, { -20, 112 }, { -18, 114 }, { -11,  85 },
-    { -15,  92 }, { -14,  89 }, { -26,  71 }, { -15,  81 },
-    { -14,  80 }, {   0,  68 }, { -14,  70 }, { -24,  56 },
-    { -23,  68 }, { -24,  50 }, { -11,  74 }, {  23, -13 },
-    {  26, -13 }, {  40, -15 }, {  49, -14 }, {  44,   3 },
-    {  45,   6 }, {  44,  34 }, {  33,  54 }, {  19,  82 },
-    {  -3,  75 }, {  -1,  23 }, {   1,  34 }, {   1,  43 },
-    {   0,  54 }, {  -2,  55 }, {   0,  61 }, {   1,  64 },
-    {   0,  68 }, {  -9,  92 },
-
-    /* 436 -> 459 */
-    { -14, 106 }, { -13,  97 }, { -15,  90 }, { -12,  90 },
-    { -18,  88 }, { -10,  73 }, {  -9,  79 }, { -14,  86 },
-    { -10,  73 }, { -10,  70 }, { -10,  69 }, {  -5,  66 },
-    {  -9,  64 }, {  -5,  58 }, {   2,  59 }, {  21, -10 },
-    {  24, -11 }, {  28,  -8 }, {  28,  -1 }, {  29,   3 },
-    {  29,   9 }, {  35,  20 }, {  29,  36 }, {  14,  67 },
-
-    /* 460 -> 1024 */
-    { -17, 123 }, { -12, 115 }, { -16, 122 }, { -11, 115 },
-    { -12,  63 }, {  -2,  68 }, { -15,  84 }, { -13, 104 },
-    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 },
-    { -17, 123 }, { -12, 115 }, { -16, 122 }, { -11, 115 },
-    { -12,  63 }, {  -2,  68 }, { -15,  84 }, { -13, 104 },
-    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 },
-    {  -7,  93 }, { -11,  87 }, {  -3,  77 }, {  -5,  71 },
-    {  -4,  63 }, {  -4,  68 }, { -12,  84 }, {  -7,  62 },
-    {  -7,  65 }, {   8,  61 }, {   5,  56 }, {  -2,  66 },
-    {   1,  64 }, {   0,  61 }, {  -2,  78 }, {   1,  50 },
-    {   7,  52 }, {  10,  35 }, {   0,  44 }, {  11,  38 },
-    {   1,  45 }, {   0,  46 }, {   5,  44 }, {  31,  17 },
-    {   1,  51 }, {   7,  50 }, {  28,  19 }, {  16,  33 },
-    {  14,  62 }, { -13, 108 }, { -15, 100 }, { -13, 101 },
-    { -13,  91 }, { -12,  94 }, { -10,  88 }, { -16,  84 },
-    { -10,  86 }, {  -7,  83 }, { -13,  87 }, { -19,  94 },
-    {   1,  70 }, {   0,  72 }, {  -5,  74 }, {  18,  59 },
-    {  -7,  93 }, { -11,  87 }, {  -3,  77 }, {  -5,  71 },
-    {  -4,  63 }, {  -4,  68 }, { -12,  84 }, {  -7,  62 },
-    {  -7,  65 }, {   8,  61 }, {   5,  56 }, {  -2,  66 },
-    {   1,  64 }, {   0,  61 }, {  -2,  78 }, {   1,  50 },
-    {   7,  52 }, {  10,  35 }, {   0,  44 }, {  11,  38 },
-    {   1,  45 }, {   0,  46 }, {   5,  44 }, {  31,  17 },
-    {   1,  51 }, {   7,  50 }, {  28,  19 }, {  16,  33 },
-    {  14,  62 }, { -13, 108 }, { -15, 100 }, { -13, 101 },
-    { -13,  91 }, { -12,  94 }, { -10,  88 }, { -16,  84 },
-    { -10,  86 }, {  -7,  83 }, { -13,  87 }, { -19,  94 },
-    {   1,  70 }, {   0,  72 }, {  -5,  74 }, {  18,  59 },
-    {  24,   0 }, {  15,   9 }, {   8,  25 }, {  13,  18 },
-    {  15,   9 }, {  13,  19 }, {  10,  37 }, {  12,  18 },
-    {   6,  29 }, {  20,  33 }, {  15,  30 }, {   4,  45 },
-    {   1,  58 }, {   0,  62 }, {   7,  61 }, {  12,  38 },
-    {  11,  45 }, {  15,  39 }, {  11,  42 }, {  13,  44 },
-    {  16,  45 }, {  12,  41 }, {  10,  49 }, {  30,  34 },
-    {  18,  42 }, {  10,  55 }, {  17,  51 }, {  17,  46 },
-    {   0,  89 }, {  26, -19 }, {  22, -17 }, {  26, -17 },
-    {  30, -25 }, {  28, -20 }, {  33, -23 }, {  37, -27 },
-    {  33, -23 }, {  40, -28 }, {  38, -17 }, {  33, -11 },
-    {  40, -15 }, {  41,  -6 }, {  38,   1 }, {  41,  17 },
-    {  24,   0 }, {  15,   9 }, {   8,  25 }, {  13,  18 },
-    {  15,   9 }, {  13,  19 }, {  10,  37 }, {  12,  18 },
-    {   6,  29 }, {  20,  33 }, {  15,  30 }, {   4,  45 },
-    {   1,  58 }, {   0,  62 }, {   7,  61 }, {  12,  38 },
-    {  11,  45 }, {  15,  39 }, {  11,  42 }, {  13,  44 },
-    {  16,  45 }, {  12,  41 }, {  10,  49 }, {  30,  34 },
-    {  18,  42 }, {  10,  55 }, {  17,  51 }, {  17,  46 },
-    {   0,  89 }, {  26, -19 }, {  22, -17 }, {  26, -17 },
-    {  30, -25 }, {  28, -20 }, {  33, -23 }, {  37, -27 },
-    {  33, -23 }, {  40, -28 }, {  38, -17 }, {  33, -11 },
-    {  40, -15 }, {  41,  -6 }, {  38,   1 }, {  41,  17 },
-    { -17, 120 }, { -20, 112 }, { -18, 114 }, { -11,  85 },
-    { -15,  92 }, { -14,  89 }, { -26,  71 }, { -15,  81 },
-    { -14,  80 }, {   0,  68 }, { -14,  70 }, { -24,  56 },
-    { -23,  68 }, { -24,  50 }, { -11,  74 }, { -14, 106 },
-    { -13,  97 }, { -15,  90 }, { -12,  90 }, { -18,  88 },
-    { -10,  73 }, {  -9,  79 }, { -14,  86 }, { -10,  73 },
-    { -10,  70 }, { -10,  69 }, {  -5,  66 }, {  -9,  64 },
-    {  -5,  58 }, {   2,  59 }, {  23, -13 }, {  26, -13 },
-    {  40, -15 }, {  49, -14 }, {  44,   3 }, {  45,   6 },
-    {  44,  34 }, {  33,  54 }, {  19,  82 }, {  21, -10 },
-    {  24, -11 }, {  28,  -8 }, {  28,  -1 }, {  29,   3 },
-    {  29,   9 }, {  35,  20 }, {  29,  36 }, {  14,  67 },
-    {  -3,  75 }, {  -1,  23 }, {   1,  34 }, {   1,  43 },
-    {   0,  54 }, {  -2,  55 }, {   0,  61 }, {   1,  64 },
-    {   0,  68 }, {  -9,  92 }, { -17, 120 }, { -20, 112 },
-    { -18, 114 }, { -11,  85 }, { -15,  92 }, { -14,  89 },
-    { -26,  71 }, { -15,  81 }, { -14,  80 }, {   0,  68 },
-    { -14,  70 }, { -24,  56 }, { -23,  68 }, { -24,  50 },
-    { -11,  74 }, { -14, 106 }, { -13,  97 }, { -15,  90 },
-    { -12,  90 }, { -18,  88 }, { -10,  73 }, {  -9,  79 },
-    { -14,  86 }, { -10,  73 }, { -10,  70 }, { -10,  69 },
-    {  -5,  66 }, {  -9,  64 }, {  -5,  58 }, {   2,  59 },
-    {  23, -13 }, {  26, -13 }, {  40, -15 }, {  49, -14 },
-    {  44,   3 }, {  45,   6 }, {  44,  34 }, {  33,  54 },
-    {  19,  82 }, {  21, -10 }, {  24, -11 }, {  28,  -8 },
-    {  28,  -1 }, {  29,   3 }, {  29,   9 }, {  35,  20 },
-    {  29,  36 }, {  14,  67 }, {  -3,  75 }, {  -1,  23 },
-    {   1,  34 }, {   1,  43 }, {   0,  54 }, {  -2,  55 },
-    {   0,  61 }, {   1,  64 }, {   0,  68 }, {  -9,  92 },
-    {  -6,  93 }, {  -6,  84 }, {  -8,  79 }, {   0,  66 },
-    {  -1,  71 }, {   0,  62 }, {  -2,  60 }, {  -2,  59 },
-    {  -5,  75 }, {  -3,  62 }, {  -4,  58 }, {  -9,  66 },
-    {  -1,  79 }, {   0,  71 }, {   3,  68 }, {  10,  44 },
-    {  -7,  62 }, {  15,  36 }, {  14,  40 }, {  16,  27 },
-    {  12,  29 }, {   1,  44 }, {  20,  36 }, {  18,  32 },
-    {   5,  42 }, {   1,  48 }, {  10,  62 }, {  17,  46 },
-    {   9,  64 }, { -12, 104 }, { -11,  97 }, { -16,  96 },
-    {  -7,  88 }, {  -8,  85 }, {  -7,  85 }, {  -9,  85 },
-    { -13,  88 }, {   4,  66 }, {  -3,  77 }, {  -3,  76 },
-    {  -6,  76 }, {  10,  58 }, {  -1,  76 }, {  -1,  83 },
-    {  -6,  93 }, {  -6,  84 }, {  -8,  79 }, {   0,  66 },
-    {  -1,  71 }, {   0,  62 }, {  -2,  60 }, {  -2,  59 },
-    {  -5,  75 }, {  -3,  62 }, {  -4,  58 }, {  -9,  66 },
-    {  -1,  79 }, {   0,  71 }, {   3,  68 }, {  10,  44 },
-    {  -7,  62 }, {  15,  36 }, {  14,  40 }, {  16,  27 },
-    {  12,  29 }, {   1,  44 }, {  20,  36 }, {  18,  32 },
-    {   5,  42 }, {   1,  48 }, {  10,  62 }, {  17,  46 },
-    {   9,  64 }, { -12, 104 }, { -11,  97 }, { -16,  96 },
-    {  -7,  88 }, {  -8,  85 }, {  -7,  85 }, {  -9,  85 },
-    { -13,  88 }, {   4,  66 }, {  -3,  77 }, {  -3,  76 },
-    {  -6,  76 }, {  10,  58 }, {  -1,  76 }, {  -1,  83 },
-    {  15,   6 }, {   6,  19 }, {   7,  16 }, {  12,  14 },
-    {  18,  13 }, {  13,  11 }, {  13,  15 }, {  15,  16 },
-    {  12,  23 }, {  13,  23 }, {  15,  20 }, {  14,  26 },
-    {  14,  44 }, {  17,  40 }, {  17,  47 }, {  24,  17 },
-    {  21,  21 }, {  25,  22 }, {  31,  27 }, {  22,  29 },
-    {  19,  35 }, {  14,  50 }, {  10,  57 }, {   7,  63 },
-    {  -2,  77 }, {  -4,  82 }, {  -3,  94 }, {   9,  69 },
-    { -12, 109 }, {  36, -35 }, {  36, -34 }, {  32, -26 },
-    {  37, -30 }, {  44, -32 }, {  34, -18 }, {  34, -15 },
-    {  40, -15 }, {  33,  -7 }, {  35,  -5 }, {  33,   0 },
-    {  38,   2 }, {  33,  13 }, {  23,  35 }, {  13,  58 },
-    {  15,   6 }, {   6,  19 }, {   7,  16 }, {  12,  14 },
-    {  18,  13 }, {  13,  11 }, {  13,  15 }, {  15,  16 },
-    {  12,  23 }, {  13,  23 }, {  15,  20 }, {  14,  26 },
-    {  14,  44 }, {  17,  40 }, {  17,  47 }, {  24,  17 },
-    {  21,  21 }, {  25,  22 }, {  31,  27 }, {  22,  29 },
-    {  19,  35 }, {  14,  50 }, {  10,  57 }, {   7,  63 },
-    {  -2,  77 }, {  -4,  82 }, {  -3,  94 }, {   9,  69 },
-    { -12, 109 }, {  36, -35 }, {  36, -34 }, {  32, -26 },
-    {  37, -30 }, {  44, -32 }, {  34, -18 }, {  34, -15 },
-    {  40, -15 }, {  33,  -7 }, {  35,  -5 }, {  33,   0 },
-    {  38,   2 }, {  33,  13 }, {  23,  35 }, {  13,  58 },
-    {  -3,  71 }, {  -6,  42 }, {  -5,  50 }, {  -3,  54 },
-    {  -2,  62 }, {   0,  58 }, {   1,  63 }, {  -2,  72 },
-    {  -1,  74 }, {  -9,  91 }, {  -5,  67 }, {  -5,  27 },
-    {  -3,  39 }, {  -2,  44 }, {   0,  46 }, { -16,  64 },
-    {  -8,  68 }, { -10,  78 }, {  -6,  77 }, { -10,  86 },
-    { -12,  92 }, { -15,  55 }, { -10,  60 }, {  -6,  62 },
-    {  -4,  65 }, { -12,  73 }, {  -8,  76 }, {  -7,  80 },
-    {  -9,  88 }, { -17, 110 }, {  -3,  71 }, {  -6,  42 },
-    {  -5,  50 }, {  -3,  54 }, {  -2,  62 }, {   0,  58 },
-    {   1,  63 }, {  -2,  72 }, {  -1,  74 }, {  -9,  91 },
-    {  -5,  67 }, {  -5,  27 }, {  -3,  39 }, {  -2,  44 },
-    {   0,  46 }, { -16,  64 }, {  -8,  68 }, { -10,  78 },
-    {  -6,  77 }, { -10,  86 }, { -12,  92 }, { -15,  55 },
-    { -10,  60 }, {  -6,  62 }, {  -4,  65 }, { -12,  73 },
-    {  -8,  76 }, {  -7,  80 }, {  -9,  88 }, { -17, 110 },
-    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 },
-    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 },
-    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 }
-};
-
-static const int8_t x264_cabac_context_init_PB[3][1024][2] =
-{
-    /* i_cabac_init_idc == 0 */
-    {
-        /* 0 - 10 */
-        {  20, -15 }, {   2,  54 }, {   3,  74 }, {  20, -15 },
-        {   2,  54 }, {   3,  74 }, { -28, 127 }, { -23, 104 },
-        {  -6,  53 }, {  -1,  54 }, {   7,  51 },
-
-        /* 11 - 23 */
-        {  23,  33 }, {  23,   2 }, {  21,   0 }, {   1,   9 },
-        {   0,  49 }, { -37, 118 }, {   5,  57 }, { -13,  78 },
-        { -11,  65 }, {   1,  62 }, {  12,  49 }, {  -4,  73 },
-        {  17,  50 },
-
-        /* 24 - 39 */
-        {  18,  64 }, {   9,  43 }, {  29,   0 }, {  26,  67 },
-        {  16,  90 }, {   9, 104 }, { -46, 127 }, { -20, 104 },
-        {   1,  67 }, { -13,  78 }, { -11,  65 }, {   1,  62 },
-        {  -6,  86 }, { -17,  95 }, {  -6,  61 }, {   9,  45 },
-
-        /* 40 - 53 */
-        {  -3,  69 }, {  -6,  81 }, { -11,  96 }, {   6,  55 },
-        {   7,  67 }, {  -5,  86 }, {   2,  88 }, {   0,  58 },
-        {  -3,  76 }, { -10,  94 }, {   5,  54 }, {   4,  69 },
-        {  -3,  81 }, {   0,  88 },
-
-        /* 54 - 59 */
-        {  -7,  67 }, {  -5,  74 }, {  -4,  74 }, {  -5,  80 },
-        {  -7,  72 }, {   1,  58 },
-
-        /* 60 - 69 */
-        {   0,  41 }, {   0,  63 }, {   0,  63 }, { 0, 63 },
-        {  -9,  83 }, {   4,  86 }, {   0,  97 }, { -7, 72 },
-        {  13,  41 }, {   3,  62 },
-
-        /* 70 - 87 */
-        {   0,  45 }, {  -4,  78 }, {  -3,  96 }, { -27,  126 },
-        { -28,  98 }, { -25, 101 }, { -23,  67 }, { -28,  82 },
-        { -20,  94 }, { -16,  83 }, { -22, 110 }, { -21,  91 },
-        { -18, 102 }, { -13,  93 }, { -29, 127 }, {  -7,  92 },
-        {  -5,  89 }, {  -7,  96 }, { -13, 108 }, {  -3,  46 },
-        {  -1,  65 }, {  -1,  57 }, {  -9,  93 }, {  -3,  74 },
-        {  -9,  92 }, {  -8,  87 }, { -23, 126 }, {   5,  54 },
-        {   6,  60 }, {   6,  59 }, {   6,  69 }, {  -1,  48 },
-        {   0,  68 }, {  -4,  69 }, {  -8,  88 },
-
-        /* 105 -> 165 */
-        {  -2,  85 }, {  -6,  78 }, {  -1,  75 }, {  -7,  77 },
-        {   2,  54 }, {   5,  50 }, {  -3,  68 }, {   1,  50 },
-        {   6,  42 }, {  -4,  81 }, {   1,  63 }, {  -4,  70 },
-        {   0,  67 }, {   2,  57 }, {  -2,  76 }, {  11,  35 },
-        {   4,  64 }, {   1,  61 }, {  11,  35 }, {  18,  25 },
-        {  12,  24 }, {  13,  29 }, {  13,  36 }, { -10,  93 },
-        {  -7,  73 }, {  -2,  73 }, {  13,  46 }, {   9,  49 },
-        {  -7, 100 }, {   9,  53 }, {   2,  53 }, {   5,  53 },
-        {  -2,  61 }, {   0,  56 }, {   0,  56 }, { -13,  63 },
-        {  -5,  60 }, {  -1,  62 }, {   4,  57 }, {  -6,  69 },
-        {   4,  57 }, {  14,  39 }, {   4,  51 }, {  13,  68 },
-        {   3,  64 }, {   1,  61 }, {   9,  63 }, {   7,  50 },
-        {  16,  39 }, {   5,  44 }, {   4,  52 }, {  11,  48 },
-        {  -5,  60 }, {  -1,  59 }, {   0,  59 }, {  22,  33 },
-        {   5,  44 }, {  14,  43 }, {  -1,  78 }, {   0,  60 },
-        {   9,  69 },
-
-        /* 166 - 226 */
-        {  11,  28 }, {   2,  40 }, {   3,  44 }, {   0,  49 },
-        {   0,  46 }, {   2,  44 }, {   2,  51 }, {   0,  47 },
-        {   4,  39 }, {   2,  62 }, {   6,  46 }, {   0,  54 },
-        {   3,  54 }, {   2,  58 }, {   4,  63 }, {   6,  51 },
-        {   6,  57 }, {   7,  53 }, {   6,  52 }, {   6,  55 },
-        {  11,  45 }, {  14,  36 }, {   8,  53 }, {  -1,  82 },
-        {   7,  55 }, {  -3,  78 }, {  15,  46 }, {  22,  31 },
-        {  -1,  84 }, {  25,   7 }, {  30,  -7 }, {  28,   3 },
-        {  28,   4 }, {  32,   0 }, {  34,  -1 }, {  30,   6 },
-        {  30,   6 }, {  32,   9 }, {  31,  19 }, {  26,  27 },
-        {  26,  30 }, {  37,  20 }, {  28,  34 }, {  17,  70 },
-        {   1,  67 }, {   5,  59 }, {   9,  67 }, {  16,  30 },
-        {  18,  32 }, {  18,  35 }, {  22,  29 }, {  24,  31 },
-        {  23,  38 }, {  18,  43 }, {  20,  41 }, {  11,  63 },
-        {   9,  59 }, {   9,  64 }, {  -1,  94 }, {  -2,  89 },
-        {  -9, 108 },
-
-        /* 227 - 275 */
-        {  -6,  76 }, {  -2,  44 }, {   0,  45 }, {   0,  52 },
-        {  -3,  64 }, {  -2,  59 }, {  -4,  70 }, {  -4,  75 },
-        {  -8,  82 }, { -17, 102 }, {  -9,  77 }, {   3,  24 },
-        {   0,  42 }, {   0,  48 }, {   0,  55 }, {  -6,  59 },
-        {  -7,  71 }, { -12,  83 }, { -11,  87 }, { -30, 119 },
-        {   1,  58 }, {  -3,  29 }, {  -1,  36 }, {   1,  38 },
-        {   2,  43 }, {  -6,  55 }, {   0,  58 }, {   0,  64 },
-        {  -3,  74 }, { -10,  90 }, {   0,  70 }, {  -4,  29 },
-        {   5,  31 }, {   7,  42 }, {   1,  59 }, {  -2,  58 },
-        {  -3,  72 }, {  -3,  81 }, { -11,  97 }, {   0,  58 },
-        {   8,   5 }, {  10,  14 }, {  14,  18 }, {  13,  27 },
-        {   2,  40 }, {   0,  58 }, {  -3,  70 }, {  -6,  79 },
-        {  -8,  85 },
-
-        /* 276 a bit special (not used, x264_cabac_encode_bypass is used instead) */
-        { 0, 0 },
-
-        /* 277 - 337 */
-        { -13, 106 }, { -16, 106 }, { -10,  87 }, { -21, 114 },
-        { -18, 110 }, { -14,  98 }, { -22, 110 }, { -21, 106 },
-        { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 },
-        { -10,  96 }, { -12,  95 }, {  -5,  91 }, {  -9,  93 },
-        { -22,  94 }, {  -5,  86 }, {   9,  67 }, {  -4,  80 },
-        { -10,  85 }, {  -1,  70 }, {   7,  60 }, {   9,  58 },
-        {   5,  61 }, {  12,  50 }, {  15,  50 }, {  18,  49 },
-        {  17,  54 }, {  10,  41 }, {   7,  46 }, {  -1,  51 },
-        {   7,  49 }, {   8,  52 }, {   9,  41 }, {   6,  47 },
-        {   2,  55 }, {  13,  41 }, {  10,  44 }, {   6,  50 },
-        {   5,  53 }, {  13,  49 }, {   4,  63 }, {   6,  64 },
-        {  -2,  69 }, {  -2,  59 }, {   6,  70 }, {  10,  44 },
-        {   9,  31 }, {  12,  43 }, {   3,  53 }, {  14,  34 },
-        {  10,  38 }, {  -3,  52 }, {  13,  40 }, {  17,  32 },
-        {   7,  44 }, {   7,  38 }, {  13,  50 }, {  10,  57 },
-        {  26,  43 },
-
-        /* 338 - 398 */
-        {  14,  11 }, {  11,  14 }, {   9,  11 }, {  18,  11 },
-        {  21,   9 }, {  23,  -2 }, {  32, -15 }, {  32, -15 },
-        {  34, -21 }, {  39, -23 }, {  42, -33 }, {  41, -31 },
-        {  46, -28 }, {  38, -12 }, {  21,  29 }, {  45, -24 },
-        {  53, -45 }, {  48, -26 }, {  65, -43 }, {  43, -19 },
-        {  39, -10 }, {  30,   9 }, {  18,  26 }, {  20,  27 },
-        {   0,  57 }, { -14,  82 }, {  -5,  75 }, { -19,  97 },
-        { -35, 125 }, {  27,   0 }, {  28,   0 }, {  31,  -4 },
-        {  27,   6 }, {  34,   8 }, {  30,  10 }, {  24,  22 },
-        {  33,  19 }, {  22,  32 }, {  26,  31 }, {  21,  41 },
-        {  26,  44 }, {  23,  47 }, {  16,  65 }, {  14,  71 },
-        {   8,  60 }, {   6,  63 }, {  17,  65 }, {  21,  24 },
-        {  23,  20 }, {  26,  23 }, {  27,  32 }, {  28,  23 },
-        {  28,  24 }, {  23,  40 }, {  24,  32 }, {  28,  29 },
-        {  23,  42 }, {  19,  57 }, {  22,  53 }, {  22,  61 },
-        {  11,  86 },
-
-        /* 399 -> 435 */
-        {  12,  40 }, {  11,  51 }, {  14,  59 },
-        {  -4,  79 }, {  -7,  71 }, {  -5,  69 }, {  -9,  70 },
-        {  -8,  66 }, { -10,  68 }, { -19,  73 }, { -12,  69 },
-        { -16,  70 }, { -15,  67 }, { -20,  62 }, { -19,  70 },
-        { -16,  66 }, { -22,  65 }, { -20,  63 }, {   9,  -2 },
-        {  26,  -9 }, {  33,  -9 }, {  39,  -7 }, {  41,  -2 },
-        {  45,   3 }, {  49,   9 }, {  45,  27 }, {  36,  59 },
-        {  -6,  66 }, {  -7,  35 }, {  -7,  42 }, {  -8,  45 },
-        {  -5,  48 }, { -12,  56 }, {  -6,  60 }, {  -5,  62 },
-        {  -8,  66 }, {  -8,  76 },
-
-        /* 436 -> 459 */
-        {  -5,  85 }, {  -6,  81 }, { -10,  77 }, {  -7,  81 },
-        { -17,  80 }, { -18,  73 }, {  -4,  74 }, { -10,  83 },
-        {  -9,  71 }, {  -9,  67 }, {  -1,  61 }, {  -8,  66 },
-        { -14,  66 }, {   0,  59 }, {   2,  59 }, {  21, -13 },
-        {  33, -14 }, {  39,  -7 }, {  46,  -2 }, {  51,   2 },
-        {  60,   6 }, {  61,  17 }, {  55,  34 }, {  42,  62 },
-
-        /* 460 - 1024 */
-        {  -7,  92 }, {  -5,  89 }, {  -7,  96 }, { -13, 108 },
-        {  -3,  46 }, {  -1,  65 }, {  -1,  57 }, {  -9,  93 },
-        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 },
-        {  -7,  92 }, {  -5,  89 }, {  -7,  96 }, { -13, 108 },
-        {  -3,  46 }, {  -1,  65 }, {  -1,  57 }, {  -9,  93 },
-        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 },
-        {  -2,  85 }, {  -6,  78 }, {  -1,  75 }, {  -7,  77 },
-        {   2,  54 }, {   5,  50 }, {  -3,  68 }, {   1,  50 },
-        {   6,  42 }, {  -4,  81 }, {   1,  63 }, {  -4,  70 },
-        {   0,  67 }, {   2,  57 }, {  -2,  76 }, {  11,  35 },
-        {   4,  64 }, {   1,  61 }, {  11,  35 }, {  18,  25 },
-        {  12,  24 }, {  13,  29 }, {  13,  36 }, { -10,  93 },
-        {  -7,  73 }, {  -2,  73 }, {  13,  46 }, {   9,  49 },
-        {  -7, 100 }, {   9,  53 }, {   2,  53 }, {   5,  53 },
-        {  -2,  61 }, {   0,  56 }, {   0,  56 }, { -13,  63 },
-        {  -5,  60 }, {  -1,  62 }, {   4,  57 }, {  -6,  69 },
-        {   4,  57 }, {  14,  39 }, {   4,  51 }, {  13,  68 },
-        {  -2,  85 }, {  -6,  78 }, {  -1,  75 }, {  -7,  77 },
-        {   2,  54 }, {   5,  50 }, {  -3,  68 }, {   1,  50 },
-        {   6,  42 }, {  -4,  81 }, {   1,  63 }, {  -4,  70 },
-        {   0,  67 }, {   2,  57 }, {  -2,  76 }, {  11,  35 },
-        {   4,  64 }, {   1,  61 }, {  11,  35 }, {  18,  25 },
-        {  12,  24 }, {  13,  29 }, {  13,  36 }, { -10,  93 },
-        {  -7,  73 }, {  -2,  73 }, {  13,  46 }, {   9,  49 },
-        {  -7, 100 }, {   9,  53 }, {   2,  53 }, {   5,  53 },
-        {  -2,  61 }, {   0,  56 }, {   0,  56 }, { -13,  63 },
-        {  -5,  60 }, {  -1,  62 }, {   4,  57 }, {  -6,  69 },
-        {   4,  57 }, {  14,  39 }, {   4,  51 }, {  13,  68 },
-        {  11,  28 }, {   2,  40 }, {   3,  44 }, {   0,  49 },
-        {   0,  46 }, {   2,  44 }, {   2,  51 }, {   0,  47 },
-        {   4,  39 }, {   2,  62 }, {   6,  46 }, {   0,  54 },
-        {   3,  54 }, {   2,  58 }, {   4,  63 }, {   6,  51 },
-        {   6,  57 }, {   7,  53 }, {   6,  52 }, {   6,  55 },
-        {  11,  45 }, {  14,  36 }, {   8,  53 }, {  -1,  82 },
-        {   7,  55 }, {  -3,  78 }, {  15,  46 }, {  22,  31 },
-        {  -1,  84 }, {  25,   7 }, {  30,  -7 }, {  28,   3 },
-        {  28,   4 }, {  32,   0 }, {  34,  -1 }, {  30,   6 },
-        {  30,   6 }, {  32,   9 }, {  31,  19 }, {  26,  27 },
-        {  26,  30 }, {  37,  20 }, {  28,  34 }, {  17,  70 },
-        {  11,  28 }, {   2,  40 }, {   3,  44 }, {   0,  49 },
-        {   0,  46 }, {   2,  44 }, {   2,  51 }, {   0,  47 },
-        {   4,  39 }, {   2,  62 }, {   6,  46 }, {   0,  54 },
-        {   3,  54 }, {   2,  58 }, {   4,  63 }, {   6,  51 },
-        {   6,  57 }, {   7,  53 }, {   6,  52 }, {   6,  55 },
-        {  11,  45 }, {  14,  36 }, {   8,  53 }, {  -1,  82 },
-        {   7,  55 }, {  -3,  78 }, {  15,  46 }, {  22,  31 },
-        {  -1,  84 }, {  25,   7 }, {  30,  -7 }, {  28,   3 },
-        {  28,   4 }, {  32,   0 }, {  34,  -1 }, {  30,   6 },
-        {  30,   6 }, {  32,   9 }, {  31,  19 }, {  26,  27 },
-        {  26,  30 }, {  37,  20 }, {  28,  34 }, {  17,  70 },
-        {  -4,  79 }, {  -7,  71 }, {  -5,  69 }, {  -9,  70 },
-        {  -8,  66 }, { -10,  68 }, { -19,  73 }, { -12,  69 },
-        { -16,  70 }, { -15,  67 }, { -20,  62 }, { -19,  70 },
-        { -16,  66 }, { -22,  65 }, { -20,  63 }, {  -5,  85 },
-        {  -6,  81 }, { -10,  77 }, {  -7,  81 }, { -17,  80 },
-        { -18,  73 }, {  -4,  74 }, { -10,  83 }, {  -9,  71 },
-        {  -9,  67 }, {  -1,  61 }, {  -8,  66 }, { -14,  66 },
-        {   0,  59 }, {   2,  59 }, {   9,  -2 }, {  26,  -9 },
-        {  33,  -9 }, {  39,  -7 }, {  41,  -2 }, {  45,   3 },
-        {  49,   9 }, {  45,  27 }, {  36,  59 }, {  21, -13 },
-        {  33, -14 }, {  39,  -7 }, {  46,  -2 }, {  51,   2 },
-        {  60,   6 }, {  61,  17 }, {  55,  34 }, {  42,  62 },
-        {  -6,  66 }, {  -7,  35 }, {  -7,  42 }, {  -8,  45 },
-        {  -5,  48 }, { -12,  56 }, {  -6,  60 }, {  -5,  62 },
-        {  -8,  66 }, {  -8,  76 }, {  -4,  79 }, {  -7,  71 },
-        {  -5,  69 }, {  -9,  70 }, {  -8,  66 }, { -10,  68 },
-        { -19,  73 }, { -12,  69 }, { -16,  70 }, { -15,  67 },
-        { -20,  62 }, { -19,  70 }, { -16,  66 }, { -22,  65 },
-        { -20,  63 }, {  -5,  85 }, {  -6,  81 }, { -10,  77 },
-        {  -7,  81 }, { -17,  80 }, { -18,  73 }, {  -4,  74 },
-        { -10,  83 }, {  -9,  71 }, {  -9,  67 }, {  -1,  61 },
-        {  -8,  66 }, { -14,  66 }, {   0,  59 }, {   2,  59 },
-        {   9,  -2 }, {  26,  -9 }, {  33,  -9 }, {  39,  -7 },
-        {  41,  -2 }, {  45,   3 }, {  49,   9 }, {  45,  27 },
-        {  36,  59 }, {  21, -13 }, {  33, -14 }, {  39,  -7 },
-        {  46,  -2 }, {  51,   2 }, {  60,   6 }, {  61,  17 },
-        {  55,  34 }, {  42,  62 }, {  -6,  66 }, {  -7,  35 },
-        {  -7,  42 }, {  -8,  45 }, {  -5,  48 }, { -12,  56 },
-        {  -6,  60 }, {  -5,  62 }, {  -8,  66 }, {  -8,  76 },
-        { -13, 106 }, { -16, 106 }, { -10,  87 }, { -21, 114 },
-        { -18, 110 }, { -14,  98 }, { -22, 110 }, { -21, 106 },
-        { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 },
-        { -10,  96 }, { -12,  95 }, {  -5,  91 }, {  -9,  93 },
-        { -22,  94 }, {  -5,  86 }, {   9,  67 }, {  -4,  80 },
-        { -10,  85 }, {  -1,  70 }, {   7,  60 }, {   9,  58 },
-        {   5,  61 }, {  12,  50 }, {  15,  50 }, {  18,  49 },
-        {  17,  54 }, {  10,  41 }, {   7,  46 }, {  -1,  51 },
-        {   7,  49 }, {   8,  52 }, {   9,  41 }, {   6,  47 },
-        {   2,  55 }, {  13,  41 }, {  10,  44 }, {   6,  50 },
-        {   5,  53 }, {  13,  49 }, {   4,  63 }, {   6,  64 },
-        { -13, 106 }, { -16, 106 }, { -10,  87 }, { -21, 114 },
-        { -18, 110 }, { -14,  98 }, { -22, 110 }, { -21, 106 },
-        { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 },
-        { -10,  96 }, { -12,  95 }, {  -5,  91 }, {  -9,  93 },
-        { -22,  94 }, {  -5,  86 }, {   9,  67 }, {  -4,  80 },
-        { -10,  85 }, {  -1,  70 }, {   7,  60 }, {   9,  58 },
-        {   5,  61 }, {  12,  50 }, {  15,  50 }, {  18,  49 },
-        {  17,  54 }, {  10,  41 }, {   7,  46 }, {  -1,  51 },
-        {   7,  49 }, {   8,  52 }, {   9,  41 }, {   6,  47 },
-        {   2,  55 }, {  13,  41 }, {  10,  44 }, {   6,  50 },
-        {   5,  53 }, {  13,  49 }, {   4,  63 }, {   6,  64 },
-        {  14,  11 }, {  11,  14 }, {   9,  11 }, {  18,  11 },
-        {  21,   9 }, {  23,  -2 }, {  32, -15 }, {  32, -15 },
-        {  34, -21 }, {  39, -23 }, {  42, -33 }, {  41, -31 },
-        {  46, -28 }, {  38, -12 }, {  21,  29 }, {  45, -24 },
-        {  53, -45 }, {  48, -26 }, {  65, -43 }, {  43, -19 },
-        {  39, -10 }, {  30,   9 }, {  18,  26 }, {  20,  27 },
-        {   0,  57 }, { -14,  82 }, {  -5,  75 }, { -19,  97 },
-        { -35, 125 }, {  27,   0 }, {  28,   0 }, {  31,  -4 },
-        {  27,   6 }, {  34,   8 }, {  30,  10 }, {  24,  22 },
-        {  33,  19 }, {  22,  32 }, {  26,  31 }, {  21,  41 },
-        {  26,  44 }, {  23,  47 }, {  16,  65 }, {  14,  71 },
-        {  14,  11 }, {  11,  14 }, {   9,  11 }, {  18,  11 },
-        {  21,   9 }, {  23,  -2 }, {  32, -15 }, {  32, -15 },
-        {  34, -21 }, {  39, -23 }, {  42, -33 }, {  41, -31 },
-        {  46, -28 }, {  38, -12 }, {  21,  29 }, {  45, -24 },
-        {  53, -45 }, {  48, -26 }, {  65, -43 }, {  43, -19 },
-        {  39, -10 }, {  30,   9 }, {  18,  26 }, {  20,  27 },
-        {   0,  57 }, { -14,  82 }, {  -5,  75 }, { -19,  97 },
-        { -35, 125 }, {  27,   0 }, {  28,   0 }, {  31,  -4 },
-        {  27,   6 }, {  34,   8 }, {  30,  10 }, {  24,  22 },
-        {  33,  19 }, {  22,  32 }, {  26,  31 }, {  21,  41 },
-        {  26,  44 }, {  23,  47 }, {  16,  65 }, {  14,  71 },
-        {  -6,  76 }, {  -2,  44 }, {   0,  45 }, {   0,  52 },
-        {  -3,  64 }, {  -2,  59 }, {  -4,  70 }, {  -4,  75 },
-        {  -8,  82 }, { -17, 102 }, {  -9,  77 }, {   3,  24 },
-        {   0,  42 }, {   0,  48 }, {   0,  55 }, {  -6,  59 },
-        {  -7,  71 }, { -12,  83 }, { -11,  87 }, { -30, 119 },
-        {   1,  58 }, {  -3,  29 }, {  -1,  36 }, {   1,  38 },
-        {   2,  43 }, {  -6,  55 }, {   0,  58 }, {   0,  64 },
-        {  -3,  74 }, { -10,  90 }, {  -6,  76 }, {  -2,  44 },
-        {   0,  45 }, {   0,  52 }, {  -3,  64 }, {  -2,  59 },
-        {  -4,  70 }, {  -4,  75 }, {  -8,  82 }, { -17, 102 },
-        {  -9,  77 }, {   3,  24 }, {   0,  42 }, {   0,  48 },
-        {   0,  55 }, {  -6,  59 }, {  -7,  71 }, { -12,  83 },
-        { -11,  87 }, { -30, 119 }, {   1,  58 }, {  -3,  29 },
-        {  -1,  36 }, {   1,  38 }, {   2,  43 }, {  -6,  55 },
-        {   0,  58 }, {   0,  64 }, {  -3,  74 }, { -10,  90 },
-        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 },
-        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 },
-        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 }
-    },
-
-    /* i_cabac_init_idc == 1 */
-    {
-        /* 0 - 10 */
-        {  20, -15 }, {   2,  54 }, {   3,  74 }, {  20, -15 },
-        {   2,  54 }, {   3,  74 }, { -28, 127 }, { -23, 104 },
-        {  -6,  53 }, {  -1,  54 }, {   7,  51 },
-
-        /* 11 - 23 */
-        {  22,  25 }, {  34,   0 }, {  16,   0 }, {  -2,   9 },
-        {   4,  41 }, { -29, 118 }, {   2,  65 }, {  -6,  71 },
-        { -13,  79 }, {   5,  52 }, {   9,  50 }, {  -3,  70 },
-        {  10,  54 },
-
-        /* 24 - 39 */
-        {  26,  34 }, {  19,  22 }, {  40,   0 }, {  57,   2 },
-        {  41,  36 }, {  26,  69 }, { -45, 127 }, { -15, 101 },
-        {  -4,  76 }, {  -6,  71 }, { -13,  79 }, {   5,  52 },
-        {   6,  69 }, { -13,  90 }, {   0,  52 }, {   8,  43 },
-
-        /* 40 - 53 */
-        {  -2,  69 },{  -5,  82 },{ -10,  96 },{   2,  59 },
-        {   2,  75 },{  -3,  87 },{  -3,  100 },{   1,  56 },
-        {  -3,  74 },{  -6,  85 },{   0,  59 },{  -3,  81 },
-        {  -7,  86 },{  -5,  95 },
-
-        /* 54 - 59 */
-        {  -1,  66 },{  -1,  77 },{   1,  70 },{  -2,  86 },
-        {  -5,  72 },{   0,  61 },
-
-        /* 60 - 69 */
-        { 0, 41 },   { 0, 63 },   { 0, 63 },     { 0, 63 },
-        { -9, 83 },  { 4, 86 },   { 0, 97 },     { -7, 72 },
-        { 13, 41 },  { 3, 62 },
-
-        /* 70 - 104 */
-        {  13,  15 }, {   7,  51 }, {   2,  80 }, { -39, 127 },
-        { -18,  91 }, { -17,  96 }, { -26,  81 }, { -35,  98 },
-        { -24, 102 }, { -23,  97 }, { -27, 119 }, { -24,  99 },
-        { -21, 110 }, { -18, 102 }, { -36, 127 }, {   0,  80 },
-        {  -5,  89 }, {  -7,  94 }, {  -4,  92 }, {   0,  39 },
-        {   0,  65 }, { -15,  84 }, { -35, 127 }, {  -2,  73 },
-        { -12, 104 }, {  -9,  91 }, { -31, 127 }, {   3,  55 },
-        {   7,  56 }, {   7,  55 }, {   8,  61 }, {  -3,  53 },
-        {   0,  68 }, {  -7,  74 }, {  -9,  88 },
-
-        /* 105 -> 165 */
-        { -13, 103 }, { -13,  91 }, {  -9,  89 }, { -14,  92 },
-        {  -8,  76 }, { -12,  87 }, { -23, 110 }, { -24, 105 },
-        { -10,  78 }, { -20, 112 }, { -17,  99 }, { -78, 127 },
-        { -70, 127 }, { -50, 127 }, { -46, 127 }, {  -4,  66 },
-        {  -5,  78 }, {  -4,  71 }, {  -8,  72 }, {   2,  59 },
-        {  -1,  55 }, {  -7,  70 }, {  -6,  75 }, {  -8,  89 },
-        { -34, 119 }, {  -3,  75 }, {  32,  20 }, {  30,  22 },
-        { -44, 127 }, {   0,  54 }, {  -5,  61 }, {   0,  58 },
-        {  -1,  60 }, {  -3,  61 }, {  -8,  67 }, { -25,  84 },
-        { -14,  74 }, {  -5,  65 }, {   5,  52 }, {   2,  57 },
-        {   0,  61 }, {  -9,  69 }, { -11,  70 }, {  18,  55 },
-        {  -4,  71 }, {   0,  58 }, {   7,  61 }, {   9,  41 },
-        {  18,  25 }, {   9,  32 }, {   5,  43 }, {   9,  47 },
-        {   0,  44 }, {   0,  51 }, {   2,  46 }, {  19,  38 },
-        {  -4,  66 }, {  15,  38 }, {  12,  42 }, {   9,  34 },
-        {   0,  89 },
-
-        /* 166 - 226 */
-        {   4,  45 }, {  10,  28 }, {  10,  31 }, {  33, -11 },
-        {  52, -43 }, {  18,  15 }, {  28,   0 }, {  35, -22 },
-        {  38, -25 }, {  34,   0 }, {  39, -18 }, {  32, -12 },
-        { 102, -94 }, {   0,   0 }, {  56, -15 }, {  33,  -4 },
-        {  29,  10 }, {  37,  -5 }, {  51, -29 }, {  39,  -9 },
-        {  52, -34 }, {  69, -58 }, {  67, -63 }, {  44,  -5 },
-        {  32,   7 }, {  55, -29 }, {  32,   1 }, {   0,   0 },
-        {  27,  36 }, {  33, -25 }, {  34, -30 }, {  36, -28 },
-        {  38, -28 }, {  38, -27 }, {  34, -18 }, {  35, -16 },
-        {  34, -14 }, {  32,  -8 }, {  37,  -6 }, {  35,   0 },
-        {  30,  10 }, {  28,  18 }, {  26,  25 }, {  29,  41 },
-        {   0,  75 }, {   2,  72 }, {   8,  77 }, {  14,  35 },
-        {  18,  31 }, {  17,  35 }, {  21,  30 }, {  17,  45 },
-        {  20,  42 }, {  18,  45 }, {  27,  26 }, {  16,  54 },
-        {   7,  66 }, {  16,  56 }, {  11,  73 }, {  10,  67 },
-        { -10, 116 },
-
-        /* 227 - 275 */
-        { -23, 112 }, { -15,  71 }, {  -7,  61 }, {   0,  53 },
-        {  -5,  66 }, { -11,  77 }, {  -9,  80 }, {  -9,  84 },
-        { -10,  87 }, { -34, 127 }, { -21, 101 }, {  -3,  39 },
-        {  -5,  53 }, {  -7,  61 }, { -11,  75 }, { -15,  77 },
-        { -17,  91 }, { -25, 107 }, { -25, 111 }, { -28, 122 },
-        { -11,  76 }, { -10,  44 }, { -10,  52 }, { -10,  57 },
-        {  -9,  58 }, { -16,  72 }, {  -7,  69 }, {  -4,  69 },
-        {  -5,  74 }, {  -9,  86 }, {   2,  66 }, {  -9,  34 },
-        {   1,  32 }, {  11,  31 }, {   5,  52 }, {  -2,  55 },
-        {  -2,  67 }, {   0,  73 }, {  -8,  89 }, {   3,  52 },
-        {   7,   4 }, {  10,   8 }, {  17,   8 }, {  16,  19 },
-        {   3,  37 }, {  -1,  61 }, {  -5,  73 }, {  -1,  70 },
-        {  -4,  78 },
-
-        /* 276 a bit special (not used, x264_cabac_encode_bypass is used instead) */
-        { 0, 0 },
-
-        /* 277 - 337 */
-        { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 },
-        { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 },
-        { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 },
-        { -10,  95 }, { -14, 100 }, {  -8,  95 }, { -17, 111 },
-        { -28, 114 }, {  -6,  89 }, {  -2,  80 }, {  -4,  82 },
-        {  -9,  85 }, {  -8,  81 }, {  -1,  72 }, {   5,  64 },
-        {   1,  67 }, {   9,  56 }, {   0,  69 }, {   1,  69 },
-        {   7,  69 }, {  -7,  69 }, {  -6,  67 }, { -16,  77 },
-        {  -2,  64 }, {   2,  61 }, {  -6,  67 }, {  -3,  64 },
-        {   2,  57 }, {  -3,  65 }, {  -3,  66 }, {   0,  62 },
-        {   9,  51 }, {  -1,  66 }, {  -2,  71 }, {  -2,  75 },
-        {  -1,  70 }, {  -9,  72 }, {  14,  60 }, {  16,  37 },
-        {   0,  47 }, {  18,  35 }, {  11,  37 }, {  12,  41 },
-        {  10,  41 }, {   2,  48 }, {  12,  41 }, {  13,  41 },
-        {   0,  59 }, {   3,  50 }, {  19,  40 }, {   3,  66 },
-        {  18,  50 },
-
-        /* 338 - 398 */
-        {  19,  -6 }, {  18,  -6 }, {  14,   0 }, {  26, -12 },
-        {  31, -16 }, {  33, -25 }, {  33, -22 }, {  37, -28 },
-        {  39, -30 }, {  42, -30 }, {  47, -42 }, {  45, -36 },
-        {  49, -34 }, {  41, -17 }, {  32,   9 }, {  69, -71 },
-        {  63, -63 }, {  66, -64 }, {  77, -74 }, {  54, -39 },
-        {  52, -35 }, {  41, -10 }, {  36,   0 }, {  40,  -1 },
-        {  30,  14 }, {  28,  26 }, {  23,  37 }, {  12,  55 },
-        {  11,  65 }, {  37, -33 }, {  39, -36 }, {  40, -37 },
-        {  38, -30 }, {  46, -33 }, {  42, -30 }, {  40, -24 },
-        {  49, -29 }, {  38, -12 }, {  40, -10 }, {  38,  -3 },
-        {  46,  -5 }, {  31,  20 }, {  29,  30 }, {  25,  44 },
-        {  12,  48 }, {  11,  49 }, {  26,  45 }, {  22,  22 },
-        {  23,  22 }, {  27,  21 }, {  33,  20 }, {  26,  28 },
-        {  30,  24 }, {  27,  34 }, {  18,  42 }, {  25,  39 },
-        {  18,  50 }, {  12,  70 }, {  21,  54 }, {  14,  71 },
-        {  11,  83 },
-
-        /* 399 -> 435 */
-        {  25,  32 }, {  21,  49 }, {  21,  54 },
-        {  -5,  85 }, {  -6,  81 }, { -10,  77 }, {  -7,  81 },
-        { -17,  80 }, { -18,  73 }, {  -4,  74 }, { -10,  83 },
-        {  -9,  71 }, {  -9,  67 }, {  -1,  61 }, {  -8,  66 },
-        { -14,  66 }, {   0,  59 }, {   2,  59 }, {  17, -10 },
-        {  32, -13 }, {  42,  -9 }, {  49,  -5 }, {  53,   0 },
-        {  64,   3 }, {  68,  10 }, {  66,  27 }, {  47,  57 },
-        {  -5,  71 }, {   0,  24 }, {  -1,  36 }, {  -2,  42 },
-        {  -2,  52 }, {  -9,  57 }, {  -6,  63 }, {  -4,  65 },
-        {  -4,  67 }, {  -7,  82 },
-
-        /* 436 -> 459 */
-        {  -3,  81 }, {  -3,  76 }, {  -7,  72 }, {  -6,  78 },
-        { -12,  72 }, { -14,  68 }, {  -3,  70 }, {  -6,  76 },
-        {  -5,  66 }, {  -5,  62 }, {   0,  57 }, {  -4,  61 },
-        {  -9,  60 }, {   1,  54 }, {   2,  58 }, {  17, -10 },
-        {  32, -13 }, {  42,  -9 }, {  49,  -5 }, {  53,   0 },
-        {  64,   3 }, {  68,  10 }, {  66,  27 }, {  47,  57 },
-
-        /* 460 - 1024 */
-        {   0,  80 }, {  -5,  89 }, {  -7,  94 }, {  -4,  92 },
-        {   0,  39 }, {   0,  65 }, { -15,  84 }, { -35, 127 },
-        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 },
-        {   0,  80 }, {  -5,  89 }, {  -7,  94 }, {  -4,  92 },
-        {   0,  39 }, {   0,  65 }, { -15,  84 }, { -35, 127 },
-        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 },
-        { -13, 103 }, { -13,  91 }, {  -9,  89 }, { -14,  92 },
-        {  -8,  76 }, { -12,  87 }, { -23, 110 }, { -24, 105 },
-        { -10,  78 }, { -20, 112 }, { -17,  99 }, { -78, 127 },
-        { -70, 127 }, { -50, 127 }, { -46, 127 }, {  -4,  66 },
-        {  -5,  78 }, {  -4,  71 }, {  -8,  72 }, {   2,  59 },
-        {  -1,  55 }, {  -7,  70 }, {  -6,  75 }, {  -8,  89 },
-        { -34, 119 }, {  -3,  75 }, {  32,  20 }, {  30,  22 },
-        { -44, 127 }, {   0,  54 }, {  -5,  61 }, {   0,  58 },
-        {  -1,  60 }, {  -3,  61 }, {  -8,  67 }, { -25,  84 },
-        { -14,  74 }, {  -5,  65 }, {   5,  52 }, {   2,  57 },
-        {   0,  61 }, {  -9,  69 }, { -11,  70 }, {  18,  55 },
-        { -13, 103 }, { -13,  91 }, {  -9,  89 }, { -14,  92 },
-        {  -8,  76 }, { -12,  87 }, { -23, 110 }, { -24, 105 },
-        { -10,  78 }, { -20, 112 }, { -17,  99 }, { -78, 127 },
-        { -70, 127 }, { -50, 127 }, { -46, 127 }, {  -4,  66 },
-        {  -5,  78 }, {  -4,  71 }, {  -8,  72 }, {   2,  59 },
-        {  -1,  55 }, {  -7,  70 }, {  -6,  75 }, {  -8,  89 },
-        { -34, 119 }, {  -3,  75 }, {  32,  20 }, {  30,  22 },
-        { -44, 127 }, {   0,  54 }, {  -5,  61 }, {   0,  58 },
-        {  -1,  60 }, {  -3,  61 }, {  -8,  67 }, { -25,  84 },
-        { -14,  74 }, {  -5,  65 }, {   5,  52 }, {   2,  57 },
-        {   0,  61 }, {  -9,  69 }, { -11,  70 }, {  18,  55 },
-        {   4,  45 }, {  10,  28 }, {  10,  31 }, {  33, -11 },
-        {  52, -43 }, {  18,  15 }, {  28,   0 }, {  35, -22 },
-        {  38, -25 }, {  34,   0 }, {  39, -18 }, {  32, -12 },
-        { 102, -94 }, {   0,   0 }, {  56, -15 }, {  33,  -4 },
-        {  29,  10 }, {  37,  -5 }, {  51, -29 }, {  39,  -9 },
-        {  52, -34 }, {  69, -58 }, {  67, -63 }, {  44,  -5 },
-        {  32,   7 }, {  55, -29 }, {  32,   1 }, {   0,   0 },
-        {  27,  36 }, {  33, -25 }, {  34, -30 }, {  36, -28 },
-        {  38, -28 }, {  38, -27 }, {  34, -18 }, {  35, -16 },
-        {  34, -14 }, {  32,  -8 }, {  37,  -6 }, {  35,   0 },
-        {  30,  10 }, {  28,  18 }, {  26,  25 }, {  29,  41 },
-        {   4,  45 }, {  10,  28 }, {  10,  31 }, {  33, -11 },
-        {  52, -43 }, {  18,  15 }, {  28,   0 }, {  35, -22 },
-        {  38, -25 }, {  34,   0 }, {  39, -18 }, {  32, -12 },
-        { 102, -94 }, {   0,   0 }, {  56, -15 }, {  33,  -4 },
-        {  29,  10 }, {  37,  -5 }, {  51, -29 }, {  39,  -9 },
-        {  52, -34 }, {  69, -58 }, {  67, -63 }, {  44,  -5 },
-        {  32,   7 }, {  55, -29 }, {  32,   1 }, {   0,   0 },
-        {  27,  36 }, {  33, -25 }, {  34, -30 }, {  36, -28 },
-        {  38, -28 }, {  38, -27 }, {  34, -18 }, {  35, -16 },
-        {  34, -14 }, {  32,  -8 }, {  37,  -6 }, {  35,   0 },
-        {  30,  10 }, {  28,  18 }, {  26,  25 }, {  29,  41 },
-        {  -5,  85 }, {  -6,  81 }, { -10,  77 }, {  -7,  81 },
-        { -17,  80 }, { -18,  73 }, {  -4,  74 }, { -10,  83 },
-        {  -9,  71 }, {  -9,  67 }, {  -1,  61 }, {  -8,  66 },
-        { -14,  66 }, {   0,  59 }, {   2,  59 }, {  -3,  81 },
-        {  -3,  76 }, {  -7,  72 }, {  -6,  78 }, { -12,  72 },
-        { -14,  68 }, {  -3,  70 }, {  -6,  76 }, {  -5,  66 },
-        {  -5,  62 }, {   0,  57 }, {  -4,  61 }, {  -9,  60 },
-        {   1,  54 }, {   2,  58 }, {  17, -10 }, {  32, -13 },
-        {  42,  -9 }, {  49,  -5 }, {  53,   0 }, {  64,   3 },
-        {  68,  10 }, {  66,  27 }, {  47,  57 }, {  17, -10 },
-        {  32, -13 }, {  42,  -9 }, {  49,  -5 }, {  53,   0 },
-        {  64,   3 }, {  68,  10 }, {  66,  27 }, {  47,  57 },
-        {  -5,  71 }, {   0,  24 }, {  -1,  36 }, {  -2,  42 },
-        {  -2,  52 }, {  -9,  57 }, {  -6,  63 }, {  -4,  65 },
-        {  -4,  67 }, {  -7,  82 }, {  -5,  85 }, {  -6,  81 },
-        { -10,  77 }, {  -7,  81 }, { -17,  80 }, { -18,  73 },
-        {  -4,  74 }, { -10,  83 }, {  -9,  71 }, {  -9,  67 },
-        {  -1,  61 }, {  -8,  66 }, { -14,  66 }, {   0,  59 },
-        {   2,  59 }, {  -3,  81 }, {  -3,  76 }, {  -7,  72 },
-        {  -6,  78 }, { -12,  72 }, { -14,  68 }, {  -3,  70 },
-        {  -6,  76 }, {  -5,  66 }, {  -5,  62 }, {   0,  57 },
-        {  -4,  61 }, {  -9,  60 }, {   1,  54 }, {   2,  58 },
-        {  17, -10 }, {  32, -13 }, {  42,  -9 }, {  49,  -5 },
-        {  53,   0 }, {  64,   3 }, {  68,  10 }, {  66,  27 },
-        {  47,  57 }, {  17, -10 }, {  32, -13 }, {  42,  -9 },
-        {  49,  -5 }, {  53,   0 }, {  64,   3 }, {  68,  10 },
-        {  66,  27 }, {  47,  57 }, {  -5,  71 }, {   0,  24 },
-        {  -1,  36 }, {  -2,  42 }, {  -2,  52 }, {  -9,  57 },
-        {  -6,  63 }, {  -4,  65 }, {  -4,  67 }, {  -7,  82 },
-        { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 },
-        { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 },
-        { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 },
-        { -10,  95 }, { -14, 100 }, {  -8,  95 }, { -17, 111 },
-        { -28, 114 }, {  -6,  89 }, {  -2,  80 }, {  -4,  82 },
-        {  -9,  85 }, {  -8,  81 }, {  -1,  72 }, {   5,  64 },
-        {   1,  67 }, {   9,  56 }, {   0,  69 }, {   1,  69 },
-        {   7,  69 }, {  -7,  69 }, {  -6,  67 }, { -16,  77 },
-        {  -2,  64 }, {   2,  61 }, {  -6,  67 }, {  -3,  64 },
-        {   2,  57 }, {  -3,  65 }, {  -3,  66 }, {   0,  62 },
-        {   9,  51 }, {  -1,  66 }, {  -2,  71 }, {  -2,  75 },
-        { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 },
-        { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 },
-        { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 },
-        { -10,  95 }, { -14, 100 }, {  -8,  95 }, { -17, 111 },
-        { -28, 114 }, {  -6,  89 }, {  -2,  80 }, {  -4,  82 },
-        {  -9,  85 }, {  -8,  81 }, {  -1,  72 }, {   5,  64 },
-        {   1,  67 }, {   9,  56 }, {   0,  69 }, {   1,  69 },
-        {   7,  69 }, {  -7,  69 }, {  -6,  67 }, { -16,  77 },
-        {  -2,  64 }, {   2,  61 }, {  -6,  67 }, {  -3,  64 },
-        {   2,  57 }, {  -3,  65 }, {  -3,  66 }, {   0,  62 },
-        {   9,  51 }, {  -1,  66 }, {  -2,  71 }, {  -2,  75 },
-        {  19,  -6 }, {  18,  -6 }, {  14,   0 }, {  26, -12 },
-        {  31, -16 }, {  33, -25 }, {  33, -22 }, {  37, -28 },
-        {  39, -30 }, {  42, -30 }, {  47, -42 }, {  45, -36 },
-        {  49, -34 }, {  41, -17 }, {  32,   9 }, {  69, -71 },
-        {  63, -63 }, {  66, -64 }, {  77, -74 }, {  54, -39 },
-        {  52, -35 }, {  41, -10 }, {  36,   0 }, {  40,  -1 },
-        {  30,  14 }, {  28,  26 }, {  23,  37 }, {  12,  55 },
-        {  11,  65 }, {  37, -33 }, {  39, -36 }, {  40, -37 },
-        {  38, -30 }, {  46, -33 }, {  42, -30 }, {  40, -24 },
-        {  49, -29 }, {  38, -12 }, {  40, -10 }, {  38,  -3 },
-        {  46,  -5 }, {  31,  20 }, {  29,  30 }, {  25,  44 },
-        {  19,  -6 }, {  18,  -6 }, {  14,   0 }, {  26, -12 },
-        {  31, -16 }, {  33, -25 }, {  33, -22 }, {  37, -28 },
-        {  39, -30 }, {  42, -30 }, {  47, -42 }, {  45, -36 },
-        {  49, -34 }, {  41, -17 }, {  32,   9 }, {  69, -71 },
-        {  63, -63 }, {  66, -64 }, {  77, -74 }, {  54, -39 },
-        {  52, -35 }, {  41, -10 }, {  36,   0 }, {  40,  -1 },
-        {  30,  14 }, {  28,  26 }, {  23,  37 }, {  12,  55 },
-        {  11,  65 }, {  37, -33 }, {  39, -36 }, {  40, -37 },
-        {  38, -30 }, {  46, -33 }, {  42, -30 }, {  40, -24 },
-        {  49, -29 }, {  38, -12 }, {  40, -10 }, {  38,  -3 },
-        {  46,  -5 }, {  31,  20 }, {  29,  30 }, {  25,  44 },
-        { -23, 112 }, { -15,  71 }, {  -7,  61 }, {   0,  53 },
-        {  -5,  66 }, { -11,  77 }, {  -9,  80 }, {  -9,  84 },
-        { -10,  87 }, { -34, 127 }, { -21, 101 }, {  -3,  39 },
-        {  -5,  53 }, {  -7,  61 }, { -11,  75 }, { -15,  77 },
-        { -17,  91 }, { -25, 107 }, { -25, 111 }, { -28, 122 },
-        { -11,  76 }, { -10,  44 }, { -10,  52 }, { -10,  57 },
-        {  -9,  58 }, { -16,  72 }, {  -7,  69 }, {  -4,  69 },
-        {  -5,  74 }, {  -9,  86 }, { -23, 112 }, { -15,  71 },
-        {  -7,  61 }, {   0,  53 }, {  -5,  66 }, { -11,  77 },
-        {  -9,  80 }, {  -9,  84 }, { -10,  87 }, { -34, 127 },
-        { -21, 101 }, {  -3,  39 }, {  -5,  53 }, {  -7,  61 },
-        { -11,  75 }, { -15,  77 }, { -17,  91 }, { -25, 107 },
-        { -25, 111 }, { -28, 122 }, { -11,  76 }, { -10,  44 },
-        { -10,  52 }, { -10,  57 }, {  -9,  58 }, { -16,  72 },
-        {  -7,  69 }, {  -4,  69 }, {  -5,  74 }, {  -9,  86 },
-        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 },
-        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 },
-        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 }
-    },
-
-    /* i_cabac_init_idc == 2 */
-    {
-        /* 0 - 10 */
-        {  20, -15 }, {   2,  54 }, {   3,  74 }, {  20, -15 },
-        {   2,  54 }, {   3,  74 }, { -28, 127 }, { -23, 104 },
-        {  -6,  53 }, {  -1,  54 }, {   7,  51 },
-
-        /* 11 - 23 */
-        {  29,  16 }, {  25,   0 }, {  14,   0 }, { -10,  51 },
-        {  -3,  62 }, { -27,  99 }, {  26,  16 }, {  -4,  85 },
-        { -24, 102 }, {   5,  57 }, {   6,  57 }, { -17,  73 },
-        {  14,  57 },
-
-        /* 24 - 39 */
-        {  20,  40 }, {  20,  10 }, {  29,   0 }, {  54,   0 },
-        {  37,  42 }, {  12,  97 }, { -32, 127 }, { -22, 117 },
-        {  -2,  74 }, {  -4,  85 }, { -24, 102 }, {   5,  57 },
-        {  -6,  93 }, { -14,  88 }, {  -6,  44 }, {   4,  55 },
-
-        /* 40 - 53 */
-        { -11,  89 },{ -15,  103 },{ -21,  116 },{  19,  57 },
-        {  20,  58 },{   4,  84 },{   6,  96 },{   1,  63 },
-        {  -5,  85 },{ -13,  106 },{   5,  63 },{   6,  75 },
-        {  -3,  90 },{  -1,  101 },
-
-        /* 54 - 59 */
-        {   3,  55 },{  -4,  79 },{  -2,  75 },{ -12,  97 },
-        {  -7,  50 },{   1,  60 },
-
-        /* 60 - 69 */
-        { 0, 41 },   { 0, 63 },   { 0, 63 },     { 0, 63 },
-        { -9, 83 },  { 4, 86 },   { 0, 97 },     { -7, 72 },
-        { 13, 41 },  { 3, 62 },
-
-        /* 70 - 104 */
-        {   7,  34 }, {  -9,  88 }, { -20, 127 }, { -36, 127 },
-        { -17,  91 }, { -14,  95 }, { -25,  84 }, { -25,  86 },
-        { -12,  89 }, { -17,  91 }, { -31, 127 }, { -14,  76 },
-        { -18, 103 }, { -13,  90 }, { -37, 127 }, {  11,  80 },
-        {   5,  76 }, {   2,  84 }, {   5,  78 }, {  -6,  55 },
-        {   4,  61 }, { -14,  83 }, { -37, 127 }, {  -5,  79 },
-        { -11, 104 }, { -11,  91 }, { -30, 127 }, {   0,  65 },
-        {  -2,  79 }, {   0,  72 }, {  -4,  92 }, {  -6,  56 },
-        {   3,  68 }, {  -8,  71 }, { -13,  98 },
-
-        /* 105 -> 165 */
-        {  -4,  86 }, { -12,  88 }, {  -5,  82 }, {  -3,  72 },
-        {  -4,  67 }, {  -8,  72 }, { -16,  89 }, {  -9,  69 },
-        {  -1,  59 }, {   5,  66 }, {   4,  57 }, {  -4,  71 },
-        {  -2,  71 }, {   2,  58 }, {  -1,  74 }, {  -4,  44 },
-        {  -1,  69 }, {   0,  62 }, {  -7,  51 }, {  -4,  47 },
-        {  -6,  42 }, {  -3,  41 }, {  -6,  53 }, {   8,  76 },
-        {  -9,  78 }, { -11,  83 }, {   9,  52 }, {   0,  67 },
-        {  -5,  90 }, {   1,  67 }, { -15,  72 }, {  -5,  75 },
-        {  -8,  80 }, { -21,  83 }, { -21,  64 }, { -13,  31 },
-        { -25,  64 }, { -29,  94 }, {   9,  75 }, {  17,  63 },
-        {  -8,  74 }, {  -5,  35 }, {  -2,  27 }, {  13,  91 },
-        {   3,  65 }, {  -7,  69 }, {   8,  77 }, { -10,  66 },
-        {   3,  62 }, {  -3,  68 }, { -20,  81 }, {   0,  30 },
-        {   1,   7 }, {  -3,  23 }, { -21,  74 }, {  16,  66 },
-        { -23, 124 }, {  17,  37 }, {  44, -18 }, {  50, -34 },
-        { -22, 127 },
-
-        /* 166 - 226 */
-        {   4,  39 }, {   0,  42 }, {   7,  34 }, {  11,  29 },
-        {   8,  31 }, {   6,  37 }, {   7,  42 }, {   3,  40 },
-        {   8,  33 }, {  13,  43 }, {  13,  36 }, {   4,  47 },
-        {   3,  55 }, {   2,  58 }, {   6,  60 }, {   8,  44 },
-        {  11,  44 }, {  14,  42 }, {   7,  48 }, {   4,  56 },
-        {   4,  52 }, {  13,  37 }, {   9,  49 }, {  19,  58 },
-        {  10,  48 }, {  12,  45 }, {   0,  69 }, {  20,  33 },
-        {   8,  63 }, {  35, -18 }, {  33, -25 }, {  28,  -3 },
-        {  24,  10 }, {  27,   0 }, {  34, -14 }, {  52, -44 },
-        {  39, -24 }, {  19,  17 }, {  31,  25 }, {  36,  29 },
-        {  24,  33 }, {  34,  15 }, {  30,  20 }, {  22,  73 },
-        {  20,  34 }, {  19,  31 }, {  27,  44 }, {  19,  16 },
-        {  15,  36 }, {  15,  36 }, {  21,  28 }, {  25,  21 },
-        {  30,  20 }, {  31,  12 }, {  27,  16 }, {  24,  42 },
-        {   0,  93 }, {  14,  56 }, {  15,  57 }, {  26,  38 },
-        { -24, 127 },
-
-        /* 227 - 275 */
-        { -24, 115 }, { -22,  82 }, {  -9,  62 }, {   0,  53 },
-        {   0,  59 }, { -14,  85 }, { -13,  89 }, { -13,  94 },
-        { -11,  92 }, { -29, 127 }, { -21, 100 }, { -14,  57 },
-        { -12,  67 }, { -11,  71 }, { -10,  77 }, { -21,  85 },
-        { -16,  88 }, { -23, 104 }, { -15,  98 }, { -37, 127 },
-        { -10,  82 }, {  -8,  48 }, {  -8,  61 }, {  -8,  66 },
-        {  -7,  70 }, { -14,  75 }, { -10,  79 }, {  -9,  83 },
-        { -12,  92 }, { -18, 108 }, {  -4,  79 }, { -22,  69 },
-        { -16,  75 }, {  -2,  58 }, {   1,  58 }, { -13,  78 },
-        {  -9,  83 }, {  -4,  81 }, { -13,  99 }, { -13,  81 },
-        {  -6,  38 }, { -13,  62 }, {  -6,  58 }, {  -2,  59 },
-        { -16,  73 }, { -10,  76 }, { -13,  86 }, {  -9,  83 },
-        { -10,  87 },
-
-        /* 276 a bit special (not used, x264_cabac_encode_bypass is used instead) */
-        { 0, 0 },
-
-        /* 277 - 337 */
-        { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 },
-        { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 },
-        { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 },
-        { -10,  94 }, { -15, 102 }, { -10,  99 }, { -13, 106 },
-        { -50, 127 }, {  -5,  92 }, {  17,  57 }, {  -5,  86 },
-        { -13,  94 }, { -12,  91 }, {  -2,  77 }, {   0,  71 },
-        {  -1,  73 }, {   4,  64 }, {  -7,  81 }, {   5,  64 },
-        {  15,  57 }, {   1,  67 }, {   0,  68 }, { -10,  67 },
-        {   1,  68 }, {   0,  77 }, {   2,  64 }, {   0,  68 },
-        {  -5,  78 }, {   7,  55 }, {   5,  59 }, {   2,  65 },
-        {  14,  54 }, {  15,  44 }, {   5,  60 }, {   2,  70 },
-        {  -2,  76 }, { -18,  86 }, {  12,  70 }, {   5,  64 },
-        { -12,  70 }, {  11,  55 }, {   5,  56 }, {   0,  69 },
-        {   2,  65 }, {  -6,  74 }, {   5,  54 }, {   7,  54 },
-        {  -6,  76 }, { -11,  82 }, {  -2,  77 }, {  -2,  77 },
-        {  25,  42 },
-
-        /* 338 - 398 */
-        {  17, -13 }, {  16,  -9 }, {  17, -12 }, {  27, -21 },
-        {  37, -30 }, {  41, -40 }, {  42, -41 }, {  48, -47 },
-        {  39, -32 }, {  46, -40 }, {  52, -51 }, {  46, -41 },
-        {  52, -39 }, {  43, -19 }, {  32,  11 }, {  61, -55 },
-        {  56, -46 }, {  62, -50 }, {  81, -67 }, {  45, -20 },
-        {  35,  -2 }, {  28,  15 }, {  34,   1 }, {  39,   1 },
-        {  30,  17 }, {  20,  38 }, {  18,  45 }, {  15,  54 },
-        {   0,  79 }, {  36, -16 }, {  37, -14 }, {  37, -17 },
-        {  32,   1 }, {  34,  15 }, {  29,  15 }, {  24,  25 },
-        {  34,  22 }, {  31,  16 }, {  35,  18 }, {  31,  28 },
-        {  33,  41 }, {  36,  28 }, {  27,  47 }, {  21,  62 },
-        {  18,  31 }, {  19,  26 }, {  36,  24 }, {  24,  23 },
-        {  27,  16 }, {  24,  30 }, {  31,  29 }, {  22,  41 },
-        {  22,  42 }, {  16,  60 }, {  15,  52 }, {  14,  60 },
-        {   3,  78 }, { -16, 123 }, {  21,  53 }, {  22,  56 },
-        {  25,  61 },
-
-        /* 399 -> 435 */
-        {  21,  33 }, {  19,  50 }, {  17,  61 },
-        {  -3,  78 }, {  -8,  74 }, {  -9,  72 }, { -10,  72 },
-        { -18,  75 }, { -12,  71 }, { -11,  63 }, {  -5,  70 },
-        { -17,  75 }, { -14,  72 }, { -16,  67 }, {  -8,  53 },
-        { -14,  59 }, {  -9,  52 }, { -11,  68 }, {   9,  -2 },
-        {  30, -10 }, {  31,  -4 }, {  33,  -1 }, {  33,   7 },
-        {  31,  12 }, {  37,  23 }, {  31,  38 }, {  20,  64 },
-        {  -9,  71 }, {  -7,  37 }, {  -8,  44 }, { -11,  49 },
-        { -10,  56 }, { -12,  59 }, {  -8,  63 }, {  -9,  67 },
-        {  -6,  68 }, { -10,  79 },
-
-        /* 436 -> 459 */
-        {  -3,  78 }, {  -8,  74 }, {  -9,  72 }, { -10,  72 },
-        { -18,  75 }, { -12,  71 }, { -11,  63 }, {  -5,  70 },
-        { -17,  75 }, { -14,  72 }, { -16,  67 }, {  -8,  53 },
-        { -14,  59 }, {  -9,  52 }, { -11,  68 }, {   9,  -2 },
-        {  30, -10 }, {  31,  -4 }, {  33,  -1 }, {  33,   7 },
-        {  31,  12 }, {  37,  23 }, {  31,  38 }, {  20,  64 },
-
-        /* 460 - 1024 */
-        {  11,  80 }, {   5,  76 }, {   2,  84 }, {   5,  78 },
-        {  -6,  55 }, {   4,  61 }, { -14,  83 }, { -37, 127 },
-        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 },
-        {  11,  80 }, {   5,  76 }, {   2,  84 }, {   5,  78 },
-        {  -6,  55 }, {   4,  61 }, { -14,  83 }, { -37, 127 },
-        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 },
-        {  -4,  86 }, { -12,  88 }, {  -5,  82 }, {  -3,  72 },
-        {  -4,  67 }, {  -8,  72 }, { -16,  89 }, {  -9,  69 },
-        {  -1,  59 }, {   5,  66 }, {   4,  57 }, {  -4,  71 },
-        {  -2,  71 }, {   2,  58 }, {  -1,  74 }, {  -4,  44 },
-        {  -1,  69 }, {   0,  62 }, {  -7,  51 }, {  -4,  47 },
-        {  -6,  42 }, {  -3,  41 }, {  -6,  53 }, {   8,  76 },
-        {  -9,  78 }, { -11,  83 }, {   9,  52 }, {   0,  67 },
-        {  -5,  90 }, {   1,  67 }, { -15,  72 }, {  -5,  75 },
-        {  -8,  80 }, { -21,  83 }, { -21,  64 }, { -13,  31 },
-        { -25,  64 }, { -29,  94 }, {   9,  75 }, {  17,  63 },
-        {  -8,  74 }, {  -5,  35 }, {  -2,  27 }, {  13,  91 },
-        {  -4,  86 }, { -12,  88 }, {  -5,  82 }, {  -3,  72 },
-        {  -4,  67 }, {  -8,  72 }, { -16,  89 }, {  -9,  69 },
-        {  -1,  59 }, {   5,  66 }, {   4,  57 }, {  -4,  71 },
-        {  -2,  71 }, {   2,  58 }, {  -1,  74 }, {  -4,  44 },
-        {  -1,  69 }, {   0,  62 }, {  -7,  51 }, {  -4,  47 },
-        {  -6,  42 }, {  -3,  41 }, {  -6,  53 }, {   8,  76 },
-        {  -9,  78 }, { -11,  83 }, {   9,  52 }, {   0,  67 },
-        {  -5,  90 }, {   1,  67 }, { -15,  72 }, {  -5,  75 },
-        {  -8,  80 }, { -21,  83 }, { -21,  64 }, { -13,  31 },
-        { -25,  64 }, { -29,  94 }, {   9,  75 }, {  17,  63 },
-        {  -8,  74 }, {  -5,  35 }, {  -2,  27 }, {  13,  91 },
-        {   4,  39 }, {   0,  42 }, {   7,  34 }, {  11,  29 },
-        {   8,  31 }, {   6,  37 }, {   7,  42 }, {   3,  40 },
-        {   8,  33 }, {  13,  43 }, {  13,  36 }, {   4,  47 },
-        {   3,  55 }, {   2,  58 }, {   6,  60 }, {   8,  44 },
-        {  11,  44 }, {  14,  42 }, {   7,  48 }, {   4,  56 },
-        {   4,  52 }, {  13,  37 }, {   9,  49 }, {  19,  58 },
-        {  10,  48 }, {  12,  45 }, {   0,  69 }, {  20,  33 },
-        {   8,  63 }, {  35, -18 }, {  33, -25 }, {  28,  -3 },
-        {  24,  10 }, {  27,   0 }, {  34, -14 }, {  52, -44 },
-        {  39, -24 }, {  19,  17 }, {  31,  25 }, {  36,  29 },
-        {  24,  33 }, {  34,  15 }, {  30,  20 }, {  22,  73 },
-        {   4,  39 }, {   0,  42 }, {   7,  34 }, {  11,  29 },
-        {   8,  31 }, {   6,  37 }, {   7,  42 }, {   3,  40 },
-        {   8,  33 }, {  13,  43 }, {  13,  36 }, {   4,  47 },
-        {   3,  55 }, {   2,  58 }, {   6,  60 }, {   8,  44 },
-        {  11,  44 }, {  14,  42 }, {   7,  48 }, {   4,  56 },
-        {   4,  52 }, {  13,  37 }, {   9,  49 }, {  19,  58 },
-        {  10,  48 }, {  12,  45 }, {   0,  69 }, {  20,  33 },
-        {   8,  63 }, {  35, -18 }, {  33, -25 }, {  28,  -3 },
-        {  24,  10 }, {  27,   0 }, {  34, -14 }, {  52, -44 },
-        {  39, -24 }, {  19,  17 }, {  31,  25 }, {  36,  29 },
-        {  24,  33 }, {  34,  15 }, {  30,  20 }, {  22,  73 },
-        {  -3,  78 }, {  -8,  74 }, {  -9,  72 }, { -10,  72 },
-        { -18,  75 }, { -12,  71 }, { -11,  63 }, {  -5,  70 },
-        { -17,  75 }, { -14,  72 }, { -16,  67 }, {  -8,  53 },
-        { -14,  59 }, {  -9,  52 }, { -11,  68 }, {  -3,  78 },
-        {  -8,  74 }, {  -9,  72 }, { -10,  72 }, { -18,  75 },
-        { -12,  71 }, { -11,  63 }, {  -5,  70 }, { -17,  75 },
-        { -14,  72 }, { -16,  67 }, {  -8,  53 }, { -14,  59 },
-        {  -9,  52 }, { -11,  68 }, {   9,  -2 }, {  30, -10 },
-        {  31,  -4 }, {  33,  -1 }, {  33,   7 }, {  31,  12 },
-        {  37,  23 }, {  31,  38 }, {  20,  64 }, {   9,  -2 },
-        {  30, -10 }, {  31,  -4 }, {  33,  -1 }, {  33,   7 },
-        {  31,  12 }, {  37,  23 }, {  31,  38 }, {  20,  64 },
-        {  -9,  71 }, {  -7,  37 }, {  -8,  44 }, { -11,  49 },
-        { -10,  56 }, { -12,  59 }, {  -8,  63 }, {  -9,  67 },
-        {  -6,  68 }, { -10,  79 }, {  -3,  78 }, {  -8,  74 },
-        {  -9,  72 }, { -10,  72 }, { -18,  75 }, { -12,  71 },
-        { -11,  63 }, {  -5,  70 }, { -17,  75 }, { -14,  72 },
-        { -16,  67 }, {  -8,  53 }, { -14,  59 }, {  -9,  52 },
-        { -11,  68 }, {  -3,  78 }, {  -8,  74 }, {  -9,  72 },
-        { -10,  72 }, { -18,  75 }, { -12,  71 }, { -11,  63 },
-        {  -5,  70 }, { -17,  75 }, { -14,  72 }, { -16,  67 },
-        {  -8,  53 }, { -14,  59 }, {  -9,  52 }, { -11,  68 },
-        {   9,  -2 }, {  30, -10 }, {  31,  -4 }, {  33,  -1 },
-        {  33,   7 }, {  31,  12 }, {  37,  23 }, {  31,  38 },
-        {  20,  64 }, {   9,  -2 }, {  30, -10 }, {  31,  -4 },
-        {  33,  -1 }, {  33,   7 }, {  31,  12 }, {  37,  23 },
-        {  31,  38 }, {  20,  64 }, {  -9,  71 }, {  -7,  37 },
-        {  -8,  44 }, { -11,  49 }, { -10,  56 }, { -12,  59 },
-        {  -8,  63 }, {  -9,  67 }, {  -6,  68 }, { -10,  79 },
-        { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 },
-        { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 },
-        { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 },
-        { -10,  94 }, { -15, 102 }, { -10,  99 }, { -13, 106 },
-        { -50, 127 }, {  -5,  92 }, {  17,  57 }, {  -5,  86 },
-        { -13,  94 }, { -12,  91 }, {  -2,  77 }, {   0,  71 },
-        {  -1,  73 }, {   4,  64 }, {  -7,  81 }, {   5,  64 },
-        {  15,  57 }, {   1,  67 }, {   0,  68 }, { -10,  67 },
-        {   1,  68 }, {   0,  77 }, {   2,  64 }, {   0,  68 },
-        {  -5,  78 }, {   7,  55 }, {   5,  59 }, {   2,  65 },
-        {  14,  54 }, {  15,  44 }, {   5,  60 }, {   2,  70 },
-        { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 },
-        { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 },
-        { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 },
-        { -10,  94 }, { -15, 102 }, { -10,  99 }, { -13, 106 },
-        { -50, 127 }, {  -5,  92 }, {  17,  57 }, {  -5,  86 },
-        { -13,  94 }, { -12,  91 }, {  -2,  77 }, {   0,  71 },
-        {  -1,  73 }, {   4,  64 }, {  -7,  81 }, {   5,  64 },
-        {  15,  57 }, {   1,  67 }, {   0,  68 }, { -10,  67 },
-        {   1,  68 }, {   0,  77 }, {   2,  64 }, {   0,  68 },
-        {  -5,  78 }, {   7,  55 }, {   5,  59 }, {   2,  65 },
-        {  14,  54 }, {  15,  44 }, {   5,  60 }, {   2,  70 },
-        {  17, -13 }, {  16,  -9 }, {  17, -12 }, {  27, -21 },
-        {  37, -30 }, {  41, -40 }, {  42, -41 }, {  48, -47 },
-        {  39, -32 }, {  46, -40 }, {  52, -51 }, {  46, -41 },
-        {  52, -39 }, {  43, -19 }, {  32,  11 }, {  61, -55 },
-        {  56, -46 }, {  62, -50 }, {  81, -67 }, {  45, -20 },
-        {  35,  -2 }, {  28,  15 }, {  34,   1 }, {  39,   1 },
-        {  30,  17 }, {  20,  38 }, {  18,  45 }, {  15,  54 },
-        {   0,  79 }, {  36, -16 }, {  37, -14 }, {  37, -17 },
-        {  32,   1 }, {  34,  15 }, {  29,  15 }, {  24,  25 },
-        {  34,  22 }, {  31,  16 }, {  35,  18 }, {  31,  28 },
-        {  33,  41 }, {  36,  28 }, {  27,  47 }, {  21,  62 },
-        {  17, -13 }, {  16,  -9 }, {  17, -12 }, {  27, -21 },
-        {  37, -30 }, {  41, -40 }, {  42, -41 }, {  48, -47 },
-        {  39, -32 }, {  46, -40 }, {  52, -51 }, {  46, -41 },
-        {  52, -39 }, {  43, -19 }, {  32,  11 }, {  61, -55 },
-        {  56, -46 }, {  62, -50 }, {  81, -67 }, {  45, -20 },
-        {  35,  -2 }, {  28,  15 }, {  34,   1 }, {  39,   1 },
-        {  30,  17 }, {  20,  38 }, {  18,  45 }, {  15,  54 },
-        {   0,  79 }, {  36, -16 }, {  37, -14 }, {  37, -17 },
-        {  32,   1 }, {  34,  15 }, {  29,  15 }, {  24,  25 },
-        {  34,  22 }, {  31,  16 }, {  35,  18 }, {  31,  28 },
-        {  33,  41 }, {  36,  28 }, {  27,  47 }, {  21,  62 },
-        { -24, 115 }, { -22,  82 }, {  -9,  62 }, {   0,  53 },
-        {   0,  59 }, { -14,  85 }, { -13,  89 }, { -13,  94 },
-        { -11,  92 }, { -29, 127 }, { -21, 100 }, { -14,  57 },
-        { -12,  67 }, { -11,  71 }, { -10,  77 }, { -21,  85 },
-        { -16,  88 }, { -23, 104 }, { -15,  98 }, { -37, 127 },
-        { -10,  82 }, {  -8,  48 }, {  -8,  61 }, {  -8,  66 },
-        {  -7,  70 }, { -14,  75 }, { -10,  79 }, {  -9,  83 },
-        { -12,  92 }, { -18, 108 }, { -24, 115 }, { -22,  82 },
-        {  -9,  62 }, {   0,  53 }, {   0,  59 }, { -14,  85 },
-        { -13,  89 }, { -13,  94 }, { -11,  92 }, { -29, 127 },
-        { -21, 100 }, { -14,  57 }, { -12,  67 }, { -11,  71 },
-        { -10,  77 }, { -21,  85 }, { -16,  88 }, { -23, 104 },
-        { -15,  98 }, { -37, 127 }, { -10,  82 }, {  -8,  48 },
-        {  -8,  61 }, {  -8,  66 }, {  -7,  70 }, { -14,  75 },
-        { -10,  79 }, {  -9,  83 }, { -12,  92 }, { -18, 108 },
-        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 },
-        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 },
-        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 }
-    }
-};
-
-const uint8_t x264_cabac_range_lps[64][4] =
-{
-    {  2,   2,   2,   2}, {  6,   7,   8,   9}, {  6,   7,   9,  10}, {  6,   8,   9,  11},
-    {  7,   8,  10,  11}, {  7,   9,  10,  12}, {  7,   9,  11,  12}, {  8,   9,  11,  13},
-    {  8,  10,  12,  14}, {  9,  11,  12,  14}, {  9,  11,  13,  15}, { 10,  12,  14,  16},
-    { 10,  12,  15,  17}, { 11,  13,  15,  18}, { 11,  14,  16,  19}, { 12,  14,  17,  20},
-    { 12,  15,  18,  21}, { 13,  16,  19,  22}, { 14,  17,  20,  23}, { 14,  18,  21,  24},
-    { 15,  19,  22,  25}, { 16,  20,  23,  27}, { 17,  21,  25,  28}, { 18,  22,  26,  30},
-    { 19,  23,  27,  31}, { 20,  24,  29,  33}, { 21,  26,  30,  35}, { 22,  27,  32,  37},
-    { 23,  28,  33,  39}, { 24,  30,  35,  41}, { 26,  31,  37,  43}, { 27,  33,  39,  45},
-    { 29,  35,  41,  48}, { 30,  37,  43,  50}, { 32,  39,  46,  53}, { 33,  41,  48,  56},
-    { 35,  43,  51,  59}, { 37,  45,  54,  62}, { 39,  48,  56,  65}, { 41,  50,  59,  69},
-    { 43,  53,  63,  72}, { 46,  56,  66,  76}, { 48,  59,  69,  80}, { 51,  62,  73,  85},
-    { 53,  65,  77,  89}, { 56,  69,  81,  94}, { 59,  72,  86,  99}, { 62,  76,  90, 104},
-    { 66,  80,  95, 110}, { 69,  85, 100, 116}, { 73,  89, 105, 122}, { 77,  94, 111, 128},
-    { 81,  99, 117, 135}, { 85, 104, 123, 142}, { 90, 110, 130, 150}, { 95, 116, 137, 158},
-    {100, 122, 144, 166}, {105, 128, 152, 175}, {111, 135, 160, 185}, {116, 142, 169, 195},
-    {123, 150, 178, 205}, {128, 158, 187, 216}, {128, 167, 197, 227}, {128, 176, 208, 240}
-};
-
-const uint8_t x264_cabac_transition[128][2] =
-{
-    {  0,   0}, {  1,   1}, {  2,  50}, { 51,   3}, {  2,  50}, { 51,   3}, {  4,  52}, { 53,   5},
-    {  6,  52}, { 53,   7}, {  8,  52}, { 53,   9}, { 10,  54}, { 55,  11}, { 12,  54}, { 55,  13},
-    { 14,  54}, { 55,  15}, { 16,  56}, { 57,  17}, { 18,  56}, { 57,  19}, { 20,  56}, { 57,  21},
-    { 22,  58}, { 59,  23}, { 24,  58}, { 59,  25}, { 26,  60}, { 61,  27}, { 28,  60}, { 61,  29},
-    { 30,  60}, { 61,  31}, { 32,  62}, { 63,  33}, { 34,  62}, { 63,  35}, { 36,  64}, { 65,  37},
-    { 38,  66}, { 67,  39}, { 40,  66}, { 67,  41}, { 42,  66}, { 67,  43}, { 44,  68}, { 69,  45},
-    { 46,  68}, { 69,  47}, { 48,  70}, { 71,  49}, { 50,  72}, { 73,  51}, { 52,  72}, { 73,  53},
-    { 54,  74}, { 75,  55}, { 56,  74}, { 75,  57}, { 58,  76}, { 77,  59}, { 60,  78}, { 79,  61},
-    { 62,  78}, { 79,  63}, { 64,  80}, { 81,  65}, { 66,  82}, { 83,  67}, { 68,  82}, { 83,  69},
-    { 70,  84}, { 85,  71}, { 72,  84}, { 85,  73}, { 74,  88}, { 89,  75}, { 76,  88}, { 89,  77},
-    { 78,  90}, { 91,  79}, { 80,  90}, { 91,  81}, { 82,  94}, { 95,  83}, { 84,  94}, { 95,  85},
-    { 86,  96}, { 97,  87}, { 88,  96}, { 97,  89}, { 90, 100}, {101,  91}, { 92, 100}, {101,  93},
-    { 94, 102}, {103,  95}, { 96, 104}, {105,  97}, { 98, 104}, {105,  99}, {100, 108}, {109, 101},
-    {102, 108}, {109, 103}, {104, 110}, {111, 105}, {106, 112}, {113, 107}, {108, 114}, {115, 109},
-    {110, 116}, {117, 111}, {112, 118}, {119, 113}, {114, 118}, {119, 115}, {116, 122}, {123, 117},
-    {118, 122}, {123, 119}, {120, 124}, {125, 121}, {122, 126}, {127, 123}, {124, 127}, {126, 125}
-};
-
-const uint8_t x264_cabac_renorm_shift[64] =
-{
-    6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-};
-
-/* -ln2(probability) */
-const uint16_t x264_cabac_entropy[128] =
-{
-    FIX8(0.0273), FIX8(5.7370), FIX8(0.0288), FIX8(5.6618),
-    FIX8(0.0303), FIX8(5.5866), FIX8(0.0320), FIX8(5.5114),
-    FIX8(0.0337), FIX8(5.4362), FIX8(0.0355), FIX8(5.3610),
-    FIX8(0.0375), FIX8(5.2859), FIX8(0.0395), FIX8(5.2106),
-    FIX8(0.0416), FIX8(5.1354), FIX8(0.0439), FIX8(5.0602),
-    FIX8(0.0463), FIX8(4.9851), FIX8(0.0488), FIX8(4.9099),
-    FIX8(0.0515), FIX8(4.8347), FIX8(0.0543), FIX8(4.7595),
-    FIX8(0.0572), FIX8(4.6843), FIX8(0.0604), FIX8(4.6091),
-    FIX8(0.0637), FIX8(4.5339), FIX8(0.0671), FIX8(4.4588),
-    FIX8(0.0708), FIX8(4.3836), FIX8(0.0747), FIX8(4.3083),
-    FIX8(0.0788), FIX8(4.2332), FIX8(0.0832), FIX8(4.1580),
-    FIX8(0.0878), FIX8(4.0828), FIX8(0.0926), FIX8(4.0076),
-    FIX8(0.0977), FIX8(3.9324), FIX8(0.1032), FIX8(3.8572),
-    FIX8(0.1089), FIX8(3.7820), FIX8(0.1149), FIX8(3.7068),
-    FIX8(0.1214), FIX8(3.6316), FIX8(0.1282), FIX8(3.5565),
-    FIX8(0.1353), FIX8(3.4813), FIX8(0.1429), FIX8(3.4061),
-    FIX8(0.1510), FIX8(3.3309), FIX8(0.1596), FIX8(3.2557),
-    FIX8(0.1686), FIX8(3.1805), FIX8(0.1782), FIX8(3.1053),
-    FIX8(0.1884), FIX8(3.0301), FIX8(0.1992), FIX8(2.9549),
-    FIX8(0.2107), FIX8(2.8797), FIX8(0.2229), FIX8(2.8046),
-    FIX8(0.2358), FIX8(2.7294), FIX8(0.2496), FIX8(2.6542),
-    FIX8(0.2642), FIX8(2.5790), FIX8(0.2798), FIX8(2.5038),
-    FIX8(0.2964), FIX8(2.4286), FIX8(0.3142), FIX8(2.3534),
-    FIX8(0.3331), FIX8(2.2782), FIX8(0.3532), FIX8(2.2030),
-    FIX8(0.3748), FIX8(2.1278), FIX8(0.3979), FIX8(2.0527),
-    FIX8(0.4226), FIX8(1.9775), FIX8(0.4491), FIX8(1.9023),
-    FIX8(0.4776), FIX8(1.8271), FIX8(0.5082), FIX8(1.7519),
-    FIX8(0.5412), FIX8(1.6767), FIX8(0.5768), FIX8(1.6015),
-    FIX8(0.6152), FIX8(1.5263), FIX8(0.6568), FIX8(1.4511),
-    FIX8(0.7020), FIX8(1.3759), FIX8(0.7513), FIX8(1.3008),
-    FIX8(0.8050), FIX8(1.2256), FIX8(0.8638), FIX8(1.1504),
-    FIX8(0.9285), FIX8(1.0752), FIX8(1.0000), FIX8(1.0000)
-};
-
-uint8_t x264_cabac_contexts[4][QP_MAX_SPEC+1][1024];
+static uint8_t cabac_contexts[4][QP_MAX_SPEC+1][1024];
 
 void x264_cabac_init( x264_t *h )
 {
@@ -1338,17 +40,14 @@
             for( int j = 0; j < ctx_count; j++ )
             {
                 int state = x264_clip3( (((*cabac_context_init)[j][0] * qp) >> 4) + (*cabac_context_init)[j][1], 1, 126 );
-                x264_cabac_contexts[i][qp][j] = (X264_MIN( state, 127-state ) << 1) | (state >> 6);
+                cabac_contexts[i][qp][j] = (X264_MIN( state, 127-state ) << 1) | (state >> 6);
             }
     }
 }
 
-/*****************************************************************************
- *
- *****************************************************************************/
 void x264_cabac_context_init( x264_t *h, x264_cabac_t *cb, int i_slice_type, int i_qp, int i_model )
 {
-    memcpy( cb->state, x264_cabac_contexts[i_slice_type == SLICE_TYPE_I ? 0 : i_model + 1][i_qp], CHROMA444 ? 1024 : 460 );
+    memcpy( cb->state, cabac_contexts[i_slice_type == SLICE_TYPE_I ? 0 : i_model + 1][i_qp], CHROMA444 ? 1024 : 460 );
 }
 
 void x264_cabac_encode_init_core( x264_cabac_t *cb )
@@ -1367,7 +66,7 @@
     cb->p_end   = p_end;
 }
 
-static inline void x264_cabac_putbyte( x264_cabac_t *cb )
+static inline void cabac_putbyte( x264_cabac_t *cb )
 {
     if( cb->i_queue >= 0 )
     {
@@ -1399,13 +98,13 @@
     }
 }
 
-static inline void x264_cabac_encode_renorm( x264_cabac_t *cb )
+static inline void cabac_encode_renorm( x264_cabac_t *cb )
 {
     int shift = x264_cabac_renorm_shift[cb->i_range>>3];
     cb->i_range <<= shift;
     cb->i_low   <<= shift;
     cb->i_queue  += shift;
-    x264_cabac_putbyte( cb );
+    cabac_putbyte( cb );
 }
 
 /* Making custom versions of this function, even in asm, for the cases where
@@ -1422,7 +121,7 @@
         cb->i_range = i_range_lps;
     }
     cb->state[i_ctx] = x264_cabac_transition[i_state][b];
-    x264_cabac_encode_renorm( cb );
+    cabac_encode_renorm( cb );
 }
 
 /* Note: b is negated for this function */
@@ -1431,7 +130,7 @@
     cb->i_low <<= 1;
     cb->i_low += b & cb->i_range;
     cb->i_queue += 1;
-    x264_cabac_putbyte( cb );
+    cabac_putbyte( cb );
 }
 
 static const int bypass_lut[16] =
@@ -1452,7 +151,7 @@
         cb->i_low <<= i;
         cb->i_low += ((x>>k)&0xff) * cb->i_range;
         cb->i_queue += i;
-        x264_cabac_putbyte( cb );
+        cabac_putbyte( cb );
         i = 8;
     } while( k > 0 );
 }
@@ -1460,7 +159,7 @@
 void x264_cabac_encode_terminal_c( x264_cabac_t *cb )
 {
     cb->i_range -= 2;
-    x264_cabac_encode_renorm( cb );
+    cabac_encode_renorm( cb );
 }
 
 void x264_cabac_encode_flush( x264_t *h, x264_cabac_t *cb )
@@ -1469,12 +168,12 @@
     cb->i_low |= 1;
     cb->i_low <<= 9;
     cb->i_queue += 9;
-    x264_cabac_putbyte( cb );
-    x264_cabac_putbyte( cb );
+    cabac_putbyte( cb );
+    cabac_putbyte( cb );
     cb->i_low <<= -cb->i_queue;
     cb->i_low |= (0x35a4e4f5 >> (h->i_frame & 31) & 1) << 10;
     cb->i_queue = 0;
-    x264_cabac_putbyte( cb );
+    cabac_putbyte( cb );
 
     while( cb->i_bytes_outstanding > 0 )
     {
diff -Nru x264-0.152.2854+gite9a5903/common/cabac.h x264-0.158.2988+git-20191101.7817004/common/cabac.h
--- x264-0.152.2854+gite9a5903/common/cabac.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/cabac.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cabac.h: arithmetic coder
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -51,28 +51,36 @@
     uint8_t padding[12];
 } x264_cabac_t;
 
-extern const uint8_t x264_cabac_transition[128][2];
-extern const uint16_t x264_cabac_entropy[128];
-
 /* init the contexts given i_slice_type, the quantif and the model */
+#define x264_cabac_context_init x264_template(cabac_context_init)
 void x264_cabac_context_init( x264_t *h, x264_cabac_t *cb, int i_slice_type, int i_qp, int i_model );
 
+#define x264_cabac_encode_init_core x264_template(cabac_encode_init_core)
 void x264_cabac_encode_init_core( x264_cabac_t *cb );
+#define x264_cabac_encode_init x264_template(cabac_encode_init)
 void x264_cabac_encode_init( x264_cabac_t *cb, uint8_t *p_data, uint8_t *p_end );
+#define x264_cabac_encode_decision_c x264_template(cabac_encode_decision_c)
 void x264_cabac_encode_decision_c( x264_cabac_t *cb, int i_ctx, int b );
+#define x264_cabac_encode_decision_asm x264_template(cabac_encode_decision_asm)
 void x264_cabac_encode_decision_asm( x264_cabac_t *cb, int i_ctx, int b );
+#define x264_cabac_encode_bypass_c x264_template(cabac_encode_bypass_c)
 void x264_cabac_encode_bypass_c( x264_cabac_t *cb, int b );
+#define x264_cabac_encode_bypass_asm x264_template(cabac_encode_bypass_asm)
 void x264_cabac_encode_bypass_asm( x264_cabac_t *cb, int b );
+#define x264_cabac_encode_terminal_c x264_template(cabac_encode_terminal_c)
 void x264_cabac_encode_terminal_c( x264_cabac_t *cb );
+#define x264_cabac_encode_terminal_asm x264_template(cabac_encode_terminal_asm)
 void x264_cabac_encode_terminal_asm( x264_cabac_t *cb );
+#define x264_cabac_encode_ue_bypass x264_template(cabac_encode_ue_bypass)
 void x264_cabac_encode_ue_bypass( x264_cabac_t *cb, int exp_bits, int val );
+#define x264_cabac_encode_flush x264_template(cabac_encode_flush)
 void x264_cabac_encode_flush( x264_t *h, x264_cabac_t *cb );
 
 #if HAVE_MMX
 #define x264_cabac_encode_decision x264_cabac_encode_decision_asm
 #define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm
 #define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm
-#elif defined(ARCH_AARCH64)
+#elif HAVE_AARCH64
 #define x264_cabac_encode_decision x264_cabac_encode_decision_asm
 #define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm
 #define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm
diff -Nru x264-0.152.2854+gite9a5903/common/common.c x264-0.158.2988+git-20191101.7817004/common/common.c
--- x264-0.152.2854+gite9a5903/common/common.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/common.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * common.c: misc common functions
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -26,1049 +26,6 @@
 
 #include "common.h"
 
-#include <ctype.h>
-
-#if HAVE_MALLOC_H
-#include <malloc.h>
-#endif
-#if HAVE_THP
-#include <sys/mman.h>
-#endif
-
-const int x264_bit_depth = BIT_DEPTH;
-
-const int x264_chroma_format = X264_CHROMA_FORMAT;
-
-static void x264_log_default( void *, int, const char *, va_list );
-
-/****************************************************************************
- * x264_param_default:
- ****************************************************************************/
-void x264_param_default( x264_param_t *param )
-{
-    /* */
-    memset( param, 0, sizeof( x264_param_t ) );
-
-    /* CPU autodetect */
-    param->cpu = x264_cpu_detect();
-    param->i_threads = X264_THREADS_AUTO;
-    param->i_lookahead_threads = X264_THREADS_AUTO;
-    param->b_deterministic = 1;
-    param->i_sync_lookahead = X264_SYNC_LOOKAHEAD_AUTO;
-
-    /* Video properties */
-    param->i_csp           = X264_CHROMA_FORMAT ? X264_CHROMA_FORMAT : X264_CSP_I420;
-    param->i_width         = 0;
-    param->i_height        = 0;
-    param->vui.i_sar_width = 0;
-    param->vui.i_sar_height= 0;
-    param->vui.i_overscan  = 0;  /* undef */
-    param->vui.i_vidformat = 5;  /* undef */
-    param->vui.b_fullrange = -1; /* default depends on input */
-    param->vui.i_colorprim = 2;  /* undef */
-    param->vui.i_transfer  = 2;  /* undef */
-    param->vui.i_colmatrix = -1; /* default depends on input */
-    param->vui.i_chroma_loc= 0;  /* left center */
-    param->i_fps_num       = 25;
-    param->i_fps_den       = 1;
-    param->i_level_idc     = -1;
-    param->i_slice_max_size = 0;
-    param->i_slice_max_mbs = 0;
-    param->i_slice_count = 0;
-
-    /* Encoder parameters */
-    param->i_frame_reference = 3;
-    param->i_keyint_max = 250;
-    param->i_keyint_min = X264_KEYINT_MIN_AUTO;
-    param->i_bframe = 3;
-    param->i_scenecut_threshold = 40;
-    param->i_bframe_adaptive = X264_B_ADAPT_FAST;
-    param->i_bframe_bias = 0;
-    param->i_bframe_pyramid = X264_B_PYRAMID_NORMAL;
-    param->b_interlaced = 0;
-    param->b_constrained_intra = 0;
-
-    param->b_deblocking_filter = 1;
-    param->i_deblocking_filter_alphac0 = 0;
-    param->i_deblocking_filter_beta = 0;
-
-    param->b_cabac = 1;
-    param->i_cabac_init_idc = 0;
-
-    param->rc.i_rc_method = X264_RC_CRF;
-    param->rc.i_bitrate = 0;
-    param->rc.f_rate_tolerance = 1.0;
-    param->rc.i_vbv_max_bitrate = 0;
-    param->rc.i_vbv_buffer_size = 0;
-    param->rc.f_vbv_buffer_init = 0.9;
-    param->rc.i_qp_constant = 23 + QP_BD_OFFSET;
-    param->rc.f_rf_constant = 23;
-    param->rc.i_qp_min = 0;
-    param->rc.i_qp_max = QP_MAX;
-    param->rc.i_qp_step = 4;
-    param->rc.f_ip_factor = 1.4;
-    param->rc.f_pb_factor = 1.3;
-    param->rc.i_aq_mode = X264_AQ_VARIANCE;
-    param->rc.f_aq_strength = 1.0;
-    param->rc.i_lookahead = 40;
-
-    param->rc.b_stat_write = 0;
-    param->rc.psz_stat_out = "x264_2pass.log";
-    param->rc.b_stat_read = 0;
-    param->rc.psz_stat_in = "x264_2pass.log";
-    param->rc.f_qcompress = 0.6;
-    param->rc.f_qblur = 0.5;
-    param->rc.f_complexity_blur = 20;
-    param->rc.i_zones = 0;
-    param->rc.b_mb_tree = 1;
-
-    /* Log */
-    param->pf_log = x264_log_default;
-    param->p_log_private = NULL;
-    param->i_log_level = X264_LOG_INFO;
-
-    /* */
-    param->analyse.intra = X264_ANALYSE_I4x4 | X264_ANALYSE_I8x8;
-    param->analyse.inter = X264_ANALYSE_I4x4 | X264_ANALYSE_I8x8
-                         | X264_ANALYSE_PSUB16x16 | X264_ANALYSE_BSUB16x16;
-    param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
-    param->analyse.i_me_method = X264_ME_HEX;
-    param->analyse.f_psy_rd = 1.0;
-    param->analyse.b_psy = 1;
-    param->analyse.f_psy_trellis = 0;
-    param->analyse.i_me_range = 16;
-    param->analyse.i_subpel_refine = 7;
-    param->analyse.b_mixed_references = 1;
-    param->analyse.b_chroma_me = 1;
-    param->analyse.i_mv_range_thread = -1;
-    param->analyse.i_mv_range = -1; // set from level_idc
-    param->analyse.i_chroma_qp_offset = 0;
-    param->analyse.b_fast_pskip = 1;
-    param->analyse.b_weighted_bipred = 1;
-    param->analyse.i_weighted_pred = X264_WEIGHTP_SMART;
-    param->analyse.b_dct_decimate = 1;
-    param->analyse.b_transform_8x8 = 1;
-    param->analyse.i_trellis = 1;
-    param->analyse.i_luma_deadzone[0] = 21;
-    param->analyse.i_luma_deadzone[1] = 11;
-    param->analyse.b_psnr = 0;
-    param->analyse.b_ssim = 0;
-
-    param->i_cqm_preset = X264_CQM_FLAT;
-    memset( param->cqm_4iy, 16, sizeof( param->cqm_4iy ) );
-    memset( param->cqm_4py, 16, sizeof( param->cqm_4py ) );
-    memset( param->cqm_4ic, 16, sizeof( param->cqm_4ic ) );
-    memset( param->cqm_4pc, 16, sizeof( param->cqm_4pc ) );
-    memset( param->cqm_8iy, 16, sizeof( param->cqm_8iy ) );
-    memset( param->cqm_8py, 16, sizeof( param->cqm_8py ) );
-    memset( param->cqm_8ic, 16, sizeof( param->cqm_8ic ) );
-    memset( param->cqm_8pc, 16, sizeof( param->cqm_8pc ) );
-
-    param->b_repeat_headers = 1;
-    param->b_annexb = 1;
-    param->b_aud = 0;
-    param->b_vfr_input = 1;
-    param->i_nal_hrd = X264_NAL_HRD_NONE;
-    param->b_tff = 1;
-    param->b_pic_struct = 0;
-    param->b_fake_interlaced = 0;
-    param->i_frame_packing = -1;
-    param->b_opencl = 0;
-    param->i_opencl_device = 0;
-    param->opencl_device_id = NULL;
-    param->psz_clbin_file = NULL;
-}
-
-static int x264_param_apply_preset( x264_param_t *param, const char *preset )
-{
-    char *end;
-    int i = strtol( preset, &end, 10 );
-    if( *end == 0 && i >= 0 && i < sizeof(x264_preset_names)/sizeof(*x264_preset_names)-1 )
-        preset = x264_preset_names[i];
-
-    if( !strcasecmp( preset, "ultrafast" ) )
-    {
-        param->i_frame_reference = 1;
-        param->i_scenecut_threshold = 0;
-        param->b_deblocking_filter = 0;
-        param->b_cabac = 0;
-        param->i_bframe = 0;
-        param->analyse.intra = 0;
-        param->analyse.inter = 0;
-        param->analyse.b_transform_8x8 = 0;
-        param->analyse.i_me_method = X264_ME_DIA;
-        param->analyse.i_subpel_refine = 0;
-        param->rc.i_aq_mode = 0;
-        param->analyse.b_mixed_references = 0;
-        param->analyse.i_trellis = 0;
-        param->i_bframe_adaptive = X264_B_ADAPT_NONE;
-        param->rc.b_mb_tree = 0;
-        param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
-        param->analyse.b_weighted_bipred = 0;
-        param->rc.i_lookahead = 0;
-    }
-    else if( !strcasecmp( preset, "superfast" ) )
-    {
-        param->analyse.inter = X264_ANALYSE_I8x8|X264_ANALYSE_I4x4;
-        param->analyse.i_me_method = X264_ME_DIA;
-        param->analyse.i_subpel_refine = 1;
-        param->i_frame_reference = 1;
-        param->analyse.b_mixed_references = 0;
-        param->analyse.i_trellis = 0;
-        param->rc.b_mb_tree = 0;
-        param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE;
-        param->rc.i_lookahead = 0;
-    }
-    else if( !strcasecmp( preset, "veryfast" ) )
-    {
-        param->analyse.i_subpel_refine = 2;
-        param->i_frame_reference = 1;
-        param->analyse.b_mixed_references = 0;
-        param->analyse.i_trellis = 0;
-        param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE;
-        param->rc.i_lookahead = 10;
-    }
-    else if( !strcasecmp( preset, "faster" ) )
-    {
-        param->analyse.b_mixed_references = 0;
-        param->i_frame_reference = 2;
-        param->analyse.i_subpel_refine = 4;
-        param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE;
-        param->rc.i_lookahead = 20;
-    }
-    else if( !strcasecmp( preset, "fast" ) )
-    {
-        param->i_frame_reference = 2;
-        param->analyse.i_subpel_refine = 6;
-        param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE;
-        param->rc.i_lookahead = 30;
-    }
-    else if( !strcasecmp( preset, "medium" ) )
-    {
-        /* Default is medium */
-    }
-    else if( !strcasecmp( preset, "slow" ) )
-    {
-        param->analyse.i_subpel_refine = 8;
-        param->i_frame_reference = 5;
-        param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
-        param->analyse.i_trellis = 2;
-        param->rc.i_lookahead = 50;
-    }
-    else if( !strcasecmp( preset, "slower" ) )
-    {
-        param->analyse.i_me_method = X264_ME_UMH;
-        param->analyse.i_subpel_refine = 9;
-        param->i_frame_reference = 8;
-        param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;
-        param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
-        param->analyse.inter |= X264_ANALYSE_PSUB8x8;
-        param->analyse.i_trellis = 2;
-        param->rc.i_lookahead = 60;
-    }
-    else if( !strcasecmp( preset, "veryslow" ) )
-    {
-        param->analyse.i_me_method = X264_ME_UMH;
-        param->analyse.i_subpel_refine = 10;
-        param->analyse.i_me_range = 24;
-        param->i_frame_reference = 16;
-        param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;
-        param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
-        param->analyse.inter |= X264_ANALYSE_PSUB8x8;
-        param->analyse.i_trellis = 2;
-        param->i_bframe = 8;
-        param->rc.i_lookahead = 60;
-    }
-    else if( !strcasecmp( preset, "placebo" ) )
-    {
-        param->analyse.i_me_method = X264_ME_TESA;
-        param->analyse.i_subpel_refine = 11;
-        param->analyse.i_me_range = 24;
-        param->i_frame_reference = 16;
-        param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;
-        param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
-        param->analyse.inter |= X264_ANALYSE_PSUB8x8;
-        param->analyse.b_fast_pskip = 0;
-        param->analyse.i_trellis = 2;
-        param->i_bframe = 16;
-        param->rc.i_lookahead = 60;
-    }
-    else
-    {
-        x264_log( NULL, X264_LOG_ERROR, "invalid preset '%s'\n", preset );
-        return -1;
-    }
-    return 0;
-}
-
-static int x264_param_apply_tune( x264_param_t *param, const char *tune )
-{
-    char *tmp = x264_malloc( strlen( tune ) + 1 );
-    if( !tmp )
-        return -1;
-    tmp = strcpy( tmp, tune );
-    char *s = strtok( tmp, ",./-+" );
-    int psy_tuning_used = 0;
-    while( s )
-    {
-        if( !strncasecmp( s, "film", 4 ) )
-        {
-            if( psy_tuning_used++ ) goto psy_failure;
-            param->i_deblocking_filter_alphac0 = -1;
-            param->i_deblocking_filter_beta = -1;
-            param->analyse.f_psy_trellis = 0.15;
-        }
-        else if( !strncasecmp( s, "animation", 9 ) )
-        {
-            if( psy_tuning_used++ ) goto psy_failure;
-            param->i_frame_reference = param->i_frame_reference > 1 ? param->i_frame_reference*2 : 1;
-            param->i_deblocking_filter_alphac0 = 1;
-            param->i_deblocking_filter_beta = 1;
-            param->analyse.f_psy_rd = 0.4;
-            param->rc.f_aq_strength = 0.6;
-            param->i_bframe += 2;
-        }
-        else if( !strncasecmp( s, "grain", 5 ) )
-        {
-            if( psy_tuning_used++ ) goto psy_failure;
-            param->i_deblocking_filter_alphac0 = -2;
-            param->i_deblocking_filter_beta = -2;
-            param->analyse.f_psy_trellis = 0.25;
-            param->analyse.b_dct_decimate = 0;
-            param->rc.f_pb_factor = 1.1;
-            param->rc.f_ip_factor = 1.1;
-            param->rc.f_aq_strength = 0.5;
-            param->analyse.i_luma_deadzone[0] = 6;
-            param->analyse.i_luma_deadzone[1] = 6;
-            param->rc.f_qcompress = 0.8;
-        }
-        else if( !strncasecmp( s, "stillimage", 10 ) )
-        {
-            if( psy_tuning_used++ ) goto psy_failure;
-            param->i_deblocking_filter_alphac0 = -3;
-            param->i_deblocking_filter_beta = -3;
-            param->analyse.f_psy_rd = 2.0;
-            param->analyse.f_psy_trellis = 0.7;
-            param->rc.f_aq_strength = 1.2;
-        }
-        else if( !strncasecmp( s, "psnr", 4 ) )
-        {
-            if( psy_tuning_used++ ) goto psy_failure;
-            param->rc.i_aq_mode = X264_AQ_NONE;
-            param->analyse.b_psy = 0;
-        }
-        else if( !strncasecmp( s, "ssim", 4 ) )
-        {
-            if( psy_tuning_used++ ) goto psy_failure;
-            param->rc.i_aq_mode = X264_AQ_AUTOVARIANCE;
-            param->analyse.b_psy = 0;
-        }
-        else if( !strncasecmp( s, "fastdecode", 10 ) )
-        {
-            param->b_deblocking_filter = 0;
-            param->b_cabac = 0;
-            param->analyse.b_weighted_bipred = 0;
-            param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
-        }
-        else if( !strncasecmp( s, "zerolatency", 11 ) )
-        {
-            param->rc.i_lookahead = 0;
-            param->i_sync_lookahead = 0;
-            param->i_bframe = 0;
-            param->b_sliced_threads = 1;
-            param->b_vfr_input = 0;
-            param->rc.b_mb_tree = 0;
-        }
-        else if( !strncasecmp( s, "touhou", 6 ) )
-        {
-            if( psy_tuning_used++ ) goto psy_failure;
-            param->i_frame_reference = param->i_frame_reference > 1 ? param->i_frame_reference*2 : 1;
-            param->i_deblocking_filter_alphac0 = -1;
-            param->i_deblocking_filter_beta = -1;
-            param->analyse.f_psy_trellis = 0.2;
-            param->rc.f_aq_strength = 1.3;
-            if( param->analyse.inter & X264_ANALYSE_PSUB16x16 )
-                param->analyse.inter |= X264_ANALYSE_PSUB8x8;
-        }
-        else
-        {
-            x264_log( NULL, X264_LOG_ERROR, "invalid tune '%s'\n", s );
-            x264_free( tmp );
-            return -1;
-        }
-        if( 0 )
-        {
-    psy_failure:
-            x264_log( NULL, X264_LOG_WARNING, "only 1 psy tuning can be used: ignoring tune %s\n", s );
-        }
-        s = strtok( NULL, ",./-+" );
-    }
-    x264_free( tmp );
-    return 0;
-}
-
-int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune )
-{
-    x264_param_default( param );
-
-    if( preset && x264_param_apply_preset( param, preset ) < 0 )
-        return -1;
-    if( tune && x264_param_apply_tune( param, tune ) < 0 )
-        return -1;
-    return 0;
-}
-
-void x264_param_apply_fastfirstpass( x264_param_t *param )
-{
-    /* Set faster options in case of turbo firstpass. */
-    if( param->rc.b_stat_write && !param->rc.b_stat_read )
-    {
-        param->i_frame_reference = 1;
-        param->analyse.b_transform_8x8 = 0;
-        param->analyse.inter = 0;
-        param->analyse.i_me_method = X264_ME_DIA;
-        param->analyse.i_subpel_refine = X264_MIN( 2, param->analyse.i_subpel_refine );
-        param->analyse.i_trellis = 0;
-        param->analyse.b_fast_pskip = 1;
-    }
-}
-
-static int profile_string_to_int( const char *str )
-{
-    if( !strcasecmp( str, "baseline" ) )
-        return PROFILE_BASELINE;
-    if( !strcasecmp( str, "main" ) )
-        return PROFILE_MAIN;
-    if( !strcasecmp( str, "high" ) )
-        return PROFILE_HIGH;
-    if( !strcasecmp( str, "high10" ) )
-        return PROFILE_HIGH10;
-    if( !strcasecmp( str, "high422" ) )
-        return PROFILE_HIGH422;
-    if( !strcasecmp( str, "high444" ) )
-        return PROFILE_HIGH444_PREDICTIVE;
-    return -1;
-}
-
-int x264_param_apply_profile( x264_param_t *param, const char *profile )
-{
-    if( !profile )
-        return 0;
-
-    int p = profile_string_to_int( profile );
-    if( p < 0 )
-    {
-        x264_log( NULL, X264_LOG_ERROR, "invalid profile: %s\n", profile );
-        return -1;
-    }
-    if( p < PROFILE_HIGH444_PREDICTIVE && ((param->rc.i_rc_method == X264_RC_CQP && param->rc.i_qp_constant <= 0) ||
-        (param->rc.i_rc_method == X264_RC_CRF && (int)(param->rc.f_rf_constant + QP_BD_OFFSET) <= 0)) )
-    {
-        x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support lossless\n", profile );
-        return -1;
-    }
-    if( p < PROFILE_HIGH444_PREDICTIVE && (param->i_csp & X264_CSP_MASK) >= X264_CSP_I444 )
-    {
-        x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support 4:4:4\n", profile );
-        return -1;
-    }
-    if( p < PROFILE_HIGH422 && (param->i_csp & X264_CSP_MASK) >= X264_CSP_I422 )
-    {
-        x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support 4:2:2\n", profile );
-        return -1;
-    }
-    if( p < PROFILE_HIGH10 && BIT_DEPTH > 8 )
-    {
-        x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support a bit depth of %d\n", profile, BIT_DEPTH );
-        return -1;
-    }
-
-    if( p == PROFILE_BASELINE )
-    {
-        param->analyse.b_transform_8x8 = 0;
-        param->b_cabac = 0;
-        param->i_cqm_preset = X264_CQM_FLAT;
-        param->psz_cqm_file = NULL;
-        param->i_bframe = 0;
-        param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
-        if( param->b_interlaced )
-        {
-            x264_log( NULL, X264_LOG_ERROR, "baseline profile doesn't support interlacing\n" );
-            return -1;
-        }
-        if( param->b_fake_interlaced )
-        {
-            x264_log( NULL, X264_LOG_ERROR, "baseline profile doesn't support fake interlacing\n" );
-            return -1;
-        }
-    }
-    else if( p == PROFILE_MAIN )
-    {
-        param->analyse.b_transform_8x8 = 0;
-        param->i_cqm_preset = X264_CQM_FLAT;
-        param->psz_cqm_file = NULL;
-    }
-    return 0;
-}
-
-static int parse_enum( const char *arg, const char * const *names, int *dst )
-{
-    for( int i = 0; names[i]; i++ )
-        if( !strcasecmp( arg, names[i] ) )
-        {
-            *dst = i;
-            return 0;
-        }
-    return -1;
-}
-
-static int parse_cqm( const char *str, uint8_t *cqm, int length )
-{
-    int i = 0;
-    do {
-        int coef;
-        if( !sscanf( str, "%d", &coef ) || coef < 1 || coef > 255 )
-            return -1;
-        cqm[i++] = coef;
-    } while( i < length && (str = strchr( str, ',' )) && str++ );
-    return (i == length) ? 0 : -1;
-}
-
-static int x264_atobool( const char *str, int *b_error )
-{
-    if( !strcmp(str, "1") ||
-        !strcasecmp(str, "true") ||
-        !strcasecmp(str, "yes") )
-        return 1;
-    if( !strcmp(str, "0") ||
-        !strcasecmp(str, "false") ||
-        !strcasecmp(str, "no") )
-        return 0;
-    *b_error = 1;
-    return 0;
-}
-
-static int x264_atoi( const char *str, int *b_error )
-{
-    char *end;
-    int v = strtol( str, &end, 0 );
-    if( end == str || *end != '\0' )
-        *b_error = 1;
-    return v;
-}
-
-static double x264_atof( const char *str, int *b_error )
-{
-    char *end;
-    double v = strtod( str, &end );
-    if( end == str || *end != '\0' )
-        *b_error = 1;
-    return v;
-}
-
-#define atobool(str) ( name_was_bool = 1, x264_atobool( str, &b_error ) )
-#undef atoi
-#undef atof
-#define atoi(str) x264_atoi( str, &b_error )
-#define atof(str) x264_atof( str, &b_error )
-
-int x264_param_parse( x264_param_t *p, const char *name, const char *value )
-{
-    char *name_buf = NULL;
-    int b_error = 0;
-    int errortype = X264_PARAM_BAD_VALUE;
-    int name_was_bool;
-    int value_was_null = !value;
-
-    if( !name )
-        return X264_PARAM_BAD_NAME;
-    if( !value )
-        value = "true";
-
-    if( value[0] == '=' )
-        value++;
-
-    if( strchr( name, '_' ) ) // s/_/-/g
-    {
-        char *c;
-        name_buf = strdup(name);
-        if( !name_buf )
-            return X264_PARAM_BAD_NAME;
-        while( (c = strchr( name_buf, '_' )) )
-            *c = '-';
-        name = name_buf;
-    }
-
-    if( !strncmp( name, "no", 2 ) )
-    {
-        name += 2;
-        if( name[0] == '-' )
-            name++;
-        value = atobool(value) ? "false" : "true";
-    }
-    name_was_bool = 0;
-
-#define OPT(STR) else if( !strcmp( name, STR ) )
-#define OPT2(STR0, STR1) else if( !strcmp( name, STR0 ) || !strcmp( name, STR1 ) )
-    if( 0 );
-    OPT("asm")
-    {
-        p->cpu = isdigit(value[0]) ? atoi(value) :
-                 !strcasecmp(value, "auto") || atobool(value) ? x264_cpu_detect() : 0;
-        if( b_error )
-        {
-            char *buf = strdup( value );
-            if( buf )
-            {
-                char *tok, UNUSED *saveptr=NULL, *init;
-                b_error = 0;
-                p->cpu = 0;
-                for( init=buf; (tok=strtok_r(init, ",", &saveptr)); init=NULL )
-                {
-                    int i = 0;
-                    while( x264_cpu_names[i].flags && strcasecmp(tok, x264_cpu_names[i].name) )
-                        i++;
-                    p->cpu |= x264_cpu_names[i].flags;
-                    if( !x264_cpu_names[i].flags )
-                        b_error = 1;
-                }
-                free( buf );
-                if( (p->cpu&X264_CPU_SSSE3) && !(p->cpu&X264_CPU_SSE2_IS_SLOW) )
-                    p->cpu |= X264_CPU_SSE2_IS_FAST;
-            }
-        }
-    }
-    OPT("threads")
-    {
-        if( !strcasecmp(value, "auto") )
-            p->i_threads = X264_THREADS_AUTO;
-        else
-            p->i_threads = atoi(value);
-    }
-    OPT("lookahead-threads")
-    {
-        if( !strcasecmp(value, "auto") )
-            p->i_lookahead_threads = X264_THREADS_AUTO;
-        else
-            p->i_lookahead_threads = atoi(value);
-    }
-    OPT("sliced-threads")
-        p->b_sliced_threads = atobool(value);
-    OPT("sync-lookahead")
-    {
-        if( !strcasecmp(value, "auto") )
-            p->i_sync_lookahead = X264_SYNC_LOOKAHEAD_AUTO;
-        else
-            p->i_sync_lookahead = atoi(value);
-    }
-    OPT2("deterministic", "n-deterministic")
-        p->b_deterministic = atobool(value);
-    OPT("cpu-independent")
-        p->b_cpu_independent = atobool(value);
-    OPT2("level", "level-idc")
-    {
-        if( !strcmp(value, "1b") )
-            p->i_level_idc = 9;
-        else if( atof(value) < 7 )
-            p->i_level_idc = (int)(10*atof(value)+.5);
-        else
-            p->i_level_idc = atoi(value);
-    }
-    OPT("bluray-compat")
-        p->b_bluray_compat = atobool(value);
-    OPT("avcintra-class")
-        p->i_avcintra_class = atoi(value);
-    OPT("sar")
-    {
-        b_error = ( 2 != sscanf( value, "%d:%d", &p->vui.i_sar_width, &p->vui.i_sar_height ) &&
-                    2 != sscanf( value, "%d/%d", &p->vui.i_sar_width, &p->vui.i_sar_height ) );
-    }
-    OPT("overscan")
-        b_error |= parse_enum( value, x264_overscan_names, &p->vui.i_overscan );
-    OPT("videoformat")
-        b_error |= parse_enum( value, x264_vidformat_names, &p->vui.i_vidformat );
-    OPT("fullrange")
-        b_error |= parse_enum( value, x264_fullrange_names, &p->vui.b_fullrange );
-    OPT("colorprim")
-        b_error |= parse_enum( value, x264_colorprim_names, &p->vui.i_colorprim );
-    OPT("transfer")
-        b_error |= parse_enum( value, x264_transfer_names, &p->vui.i_transfer );
-    OPT("colormatrix")
-        b_error |= parse_enum( value, x264_colmatrix_names, &p->vui.i_colmatrix );
-    OPT("chromaloc")
-    {
-        p->vui.i_chroma_loc = atoi(value);
-        b_error = ( p->vui.i_chroma_loc < 0 || p->vui.i_chroma_loc > 5 );
-    }
-    OPT("fps")
-    {
-        if( sscanf( value, "%u/%u", &p->i_fps_num, &p->i_fps_den ) != 2 )
-        {
-            double fps = atof(value);
-            if( fps > 0.0 && fps <= INT_MAX/1000.0 )
-            {
-                p->i_fps_num = (int)(fps * 1000.0 + .5);
-                p->i_fps_den = 1000;
-            }
-            else
-            {
-                p->i_fps_num = atoi(value);
-                p->i_fps_den = 1;
-            }
-        }
-    }
-    OPT2("ref", "frameref")
-        p->i_frame_reference = atoi(value);
-    OPT("dpb-size")
-        p->i_dpb_size = atoi(value);
-    OPT("keyint")
-    {
-        if( strstr( value, "infinite" ) )
-            p->i_keyint_max = X264_KEYINT_MAX_INFINITE;
-        else
-            p->i_keyint_max = atoi(value);
-    }
-    OPT2("min-keyint", "keyint-min")
-    {
-        p->i_keyint_min = atoi(value);
-        if( p->i_keyint_max < p->i_keyint_min )
-            p->i_keyint_max = p->i_keyint_min;
-    }
-    OPT("scenecut")
-    {
-        p->i_scenecut_threshold = atobool(value);
-        if( b_error || p->i_scenecut_threshold )
-        {
-            b_error = 0;
-            p->i_scenecut_threshold = atoi(value);
-        }
-    }
-    OPT("intra-refresh")
-        p->b_intra_refresh = atobool(value);
-    OPT("bframes")
-        p->i_bframe = atoi(value);
-    OPT("b-adapt")
-    {
-        p->i_bframe_adaptive = atobool(value);
-        if( b_error )
-        {
-            b_error = 0;
-            p->i_bframe_adaptive = atoi(value);
-        }
-    }
-    OPT("b-bias")
-        p->i_bframe_bias = atoi(value);
-    OPT("b-pyramid")
-    {
-        b_error |= parse_enum( value, x264_b_pyramid_names, &p->i_bframe_pyramid );
-        if( b_error )
-        {
-            b_error = 0;
-            p->i_bframe_pyramid = atoi(value);
-        }
-    }
-    OPT("open-gop")
-        p->b_open_gop = atobool(value);
-    OPT("nf")
-        p->b_deblocking_filter = !atobool(value);
-    OPT2("filter", "deblock")
-    {
-        if( 2 == sscanf( value, "%d:%d", &p->i_deblocking_filter_alphac0, &p->i_deblocking_filter_beta ) ||
-            2 == sscanf( value, "%d,%d", &p->i_deblocking_filter_alphac0, &p->i_deblocking_filter_beta ) )
-        {
-            p->b_deblocking_filter = 1;
-        }
-        else if( sscanf( value, "%d", &p->i_deblocking_filter_alphac0 ) )
-        {
-            p->b_deblocking_filter = 1;
-            p->i_deblocking_filter_beta = p->i_deblocking_filter_alphac0;
-        }
-        else
-            p->b_deblocking_filter = atobool(value);
-    }
-    OPT("slice-max-size")
-        p->i_slice_max_size = atoi(value);
-    OPT("slice-max-mbs")
-        p->i_slice_max_mbs = atoi(value);
-    OPT("slice-min-mbs")
-        p->i_slice_min_mbs = atoi(value);
-    OPT("slices")
-        p->i_slice_count = atoi(value);
-    OPT("slices-max")
-        p->i_slice_count_max = atoi(value);
-    OPT("cabac")
-        p->b_cabac = atobool(value);
-    OPT("cabac-idc")
-        p->i_cabac_init_idc = atoi(value);
-    OPT("interlaced")
-        p->b_interlaced = atobool(value);
-    OPT("tff")
-        p->b_interlaced = p->b_tff = atobool(value);
-    OPT("bff")
-    {
-        p->b_interlaced = atobool(value);
-        p->b_tff = !p->b_interlaced;
-    }
-    OPT("constrained-intra")
-        p->b_constrained_intra = atobool(value);
-    OPT("cqm")
-    {
-        if( strstr( value, "flat" ) )
-            p->i_cqm_preset = X264_CQM_FLAT;
-        else if( strstr( value, "jvt" ) )
-            p->i_cqm_preset = X264_CQM_JVT;
-        else
-            p->psz_cqm_file = strdup(value);
-    }
-    OPT("cqmfile")
-        p->psz_cqm_file = strdup(value);
-    OPT("cqm4")
-    {
-        p->i_cqm_preset = X264_CQM_CUSTOM;
-        b_error |= parse_cqm( value, p->cqm_4iy, 16 );
-        b_error |= parse_cqm( value, p->cqm_4py, 16 );
-        b_error |= parse_cqm( value, p->cqm_4ic, 16 );
-        b_error |= parse_cqm( value, p->cqm_4pc, 16 );
-    }
-    OPT("cqm8")
-    {
-        p->i_cqm_preset = X264_CQM_CUSTOM;
-        b_error |= parse_cqm( value, p->cqm_8iy, 64 );
-        b_error |= parse_cqm( value, p->cqm_8py, 64 );
-        b_error |= parse_cqm( value, p->cqm_8ic, 64 );
-        b_error |= parse_cqm( value, p->cqm_8pc, 64 );
-    }
-    OPT("cqm4i")
-    {
-        p->i_cqm_preset = X264_CQM_CUSTOM;
-        b_error |= parse_cqm( value, p->cqm_4iy, 16 );
-        b_error |= parse_cqm( value, p->cqm_4ic, 16 );
-    }
-    OPT("cqm4p")
-    {
-        p->i_cqm_preset = X264_CQM_CUSTOM;
-        b_error |= parse_cqm( value, p->cqm_4py, 16 );
-        b_error |= parse_cqm( value, p->cqm_4pc, 16 );
-    }
-    OPT("cqm4iy")
-    {
-        p->i_cqm_preset = X264_CQM_CUSTOM;
-        b_error |= parse_cqm( value, p->cqm_4iy, 16 );
-    }
-    OPT("cqm4ic")
-    {
-        p->i_cqm_preset = X264_CQM_CUSTOM;
-        b_error |= parse_cqm( value, p->cqm_4ic, 16 );
-    }
-    OPT("cqm4py")
-    {
-        p->i_cqm_preset = X264_CQM_CUSTOM;
-        b_error |= parse_cqm( value, p->cqm_4py, 16 );
-    }
-    OPT("cqm4pc")
-    {
-        p->i_cqm_preset = X264_CQM_CUSTOM;
-        b_error |= parse_cqm( value, p->cqm_4pc, 16 );
-    }
-    OPT("cqm8i")
-    {
-        p->i_cqm_preset = X264_CQM_CUSTOM;
-        b_error |= parse_cqm( value, p->cqm_8iy, 64 );
-        b_error |= parse_cqm( value, p->cqm_8ic, 64 );
-    }
-    OPT("cqm8p")
-    {
-        p->i_cqm_preset = X264_CQM_CUSTOM;
-        b_error |= parse_cqm( value, p->cqm_8py, 64 );
-        b_error |= parse_cqm( value, p->cqm_8pc, 64 );
-    }
-    OPT("log")
-        p->i_log_level = atoi(value);
-    OPT("dump-yuv")
-        p->psz_dump_yuv = strdup(value);
-    OPT2("analyse", "partitions")
-    {
-        p->analyse.inter = 0;
-        if( strstr( value, "none" ) )  p->analyse.inter =  0;
-        if( strstr( value, "all" ) )   p->analyse.inter = ~0;
-
-        if( strstr( value, "i4x4" ) )  p->analyse.inter |= X264_ANALYSE_I4x4;
-        if( strstr( value, "i8x8" ) )  p->analyse.inter |= X264_ANALYSE_I8x8;
-        if( strstr( value, "p8x8" ) )  p->analyse.inter |= X264_ANALYSE_PSUB16x16;
-        if( strstr( value, "p4x4" ) )  p->analyse.inter |= X264_ANALYSE_PSUB8x8;
-        if( strstr( value, "b8x8" ) )  p->analyse.inter |= X264_ANALYSE_BSUB16x16;
-    }
-    OPT("8x8dct")
-        p->analyse.b_transform_8x8 = atobool(value);
-    OPT2("weightb", "weight-b")
-        p->analyse.b_weighted_bipred = atobool(value);
-    OPT("weightp")
-        p->analyse.i_weighted_pred = atoi(value);
-    OPT2("direct", "direct-pred")
-        b_error |= parse_enum( value, x264_direct_pred_names, &p->analyse.i_direct_mv_pred );
-    OPT("chroma-qp-offset")
-        p->analyse.i_chroma_qp_offset = atoi(value);
-    OPT("me")
-        b_error |= parse_enum( value, x264_motion_est_names, &p->analyse.i_me_method );
-    OPT2("merange", "me-range")
-        p->analyse.i_me_range = atoi(value);
-    OPT2("mvrange", "mv-range")
-        p->analyse.i_mv_range = atoi(value);
-    OPT2("mvrange-thread", "mv-range-thread")
-        p->analyse.i_mv_range_thread = atoi(value);
-    OPT2("subme", "subq")
-        p->analyse.i_subpel_refine = atoi(value);
-    OPT("psy-rd")
-    {
-        if( 2 == sscanf( value, "%f:%f", &p->analyse.f_psy_rd, &p->analyse.f_psy_trellis ) ||
-            2 == sscanf( value, "%f,%f", &p->analyse.f_psy_rd, &p->analyse.f_psy_trellis ) ||
-            2 == sscanf( value, "%f|%f", &p->analyse.f_psy_rd, &p->analyse.f_psy_trellis ))
-        { }
-        else if( sscanf( value, "%f", &p->analyse.f_psy_rd ) )
-        {
-            p->analyse.f_psy_trellis = 0;
-        }
-        else
-        {
-            p->analyse.f_psy_rd = 0;
-            p->analyse.f_psy_trellis = 0;
-        }
-    }
-    OPT("psy")
-        p->analyse.b_psy = atobool(value);
-    OPT("chroma-me")
-        p->analyse.b_chroma_me = atobool(value);
-    OPT("mixed-refs")
-        p->analyse.b_mixed_references = atobool(value);
-    OPT("trellis")
-        p->analyse.i_trellis = atoi(value);
-    OPT("fast-pskip")
-        p->analyse.b_fast_pskip = atobool(value);
-    OPT("dct-decimate")
-        p->analyse.b_dct_decimate = atobool(value);
-    OPT("deadzone-inter")
-        p->analyse.i_luma_deadzone[0] = atoi(value);
-    OPT("deadzone-intra")
-        p->analyse.i_luma_deadzone[1] = atoi(value);
-    OPT("nr")
-        p->analyse.i_noise_reduction = atoi(value);
-    OPT("bitrate")
-    {
-        p->rc.i_bitrate = atoi(value);
-        p->rc.i_rc_method = X264_RC_ABR;
-    }
-    OPT2("qp", "qp_constant")
-    {
-        p->rc.i_qp_constant = atoi(value);
-        p->rc.i_rc_method = X264_RC_CQP;
-    }
-    OPT("crf")
-    {
-        p->rc.f_rf_constant = atof(value);
-        p->rc.i_rc_method = X264_RC_CRF;
-    }
-    OPT("crf-max")
-        p->rc.f_rf_constant_max = atof(value);
-    OPT("rc-lookahead")
-        p->rc.i_lookahead = atoi(value);
-    OPT2("qpmin", "qp-min")
-        p->rc.i_qp_min = atoi(value);
-    OPT2("qpmax", "qp-max")
-        p->rc.i_qp_max = atoi(value);
-    OPT2("qpstep", "qp-step")
-        p->rc.i_qp_step = atoi(value);
-    OPT("ratetol")
-        p->rc.f_rate_tolerance = !strncmp("inf", value, 3) ? 1e9 : atof(value);
-    OPT("vbv-maxrate")
-        p->rc.i_vbv_max_bitrate = atoi(value);
-    OPT("vbv-bufsize")
-        p->rc.i_vbv_buffer_size = atoi(value);
-    OPT("vbv-init")
-        p->rc.f_vbv_buffer_init = atof(value);
-    OPT2("ipratio", "ip-factor")
-        p->rc.f_ip_factor = atof(value);
-    OPT2("pbratio", "pb-factor")
-        p->rc.f_pb_factor = atof(value);
-    OPT("aq-mode")
-        p->rc.i_aq_mode = atoi(value);
-    OPT("aq-strength")
-        p->rc.f_aq_strength = atof(value);
-    OPT("pass")
-    {
-        int pass = x264_clip3( atoi(value), 0, 3 );
-        p->rc.b_stat_write = pass & 1;
-        p->rc.b_stat_read = pass & 2;
-    }
-    OPT("stats")
-    {
-        p->rc.psz_stat_in = strdup(value);
-        p->rc.psz_stat_out = strdup(value);
-    }
-    OPT("qcomp")
-        p->rc.f_qcompress = atof(value);
-    OPT("mbtree")
-        p->rc.b_mb_tree = atobool(value);
-    OPT("qblur")
-        p->rc.f_qblur = atof(value);
-    OPT2("cplxblur", "cplx-blur")
-        p->rc.f_complexity_blur = atof(value);
-    OPT("zones")
-        p->rc.psz_zones = strdup(value);
-    OPT("crop-rect")
-        b_error |= sscanf( value, "%u,%u,%u,%u", &p->crop_rect.i_left, &p->crop_rect.i_top,
-                                                 &p->crop_rect.i_right, &p->crop_rect.i_bottom ) != 4;
-    OPT("psnr")
-        p->analyse.b_psnr = atobool(value);
-    OPT("ssim")
-        p->analyse.b_ssim = atobool(value);
-    OPT("aud")
-        p->b_aud = atobool(value);
-    OPT("sps-id")
-        p->i_sps_id = atoi(value);
-    OPT("global-header")
-        p->b_repeat_headers = !atobool(value);
-    OPT("repeat-headers")
-        p->b_repeat_headers = atobool(value);
-    OPT("annexb")
-        p->b_annexb = atobool(value);
-    OPT("force-cfr")
-        p->b_vfr_input = !atobool(value);
-    OPT("nal-hrd")
-        b_error |= parse_enum( value, x264_nal_hrd_names, &p->i_nal_hrd );
-    OPT("filler")
-        p->rc.b_filler = atobool(value);
-    OPT("pic-struct")
-        p->b_pic_struct = atobool(value);
-    OPT("fake-interlaced")
-        p->b_fake_interlaced = atobool(value);
-    OPT("frame-packing")
-        p->i_frame_packing = atoi(value);
-    OPT("stitchable")
-        p->b_stitchable = atobool(value);
-    OPT("opencl")
-        p->b_opencl = atobool( value );
-    OPT("opencl-clbin")
-        p->psz_clbin_file = strdup( value );
-    OPT("opencl-device")
-        p->i_opencl_device = atoi( value );
-    else
-    {
-        b_error = 1;
-        errortype = X264_PARAM_BAD_NAME;
-    }
-#undef OPT
-#undef OPT2
-#undef atobool
-#undef atoi
-#undef atof
-
-    if( name_buf )
-        free( name_buf );
-
-    b_error |= value_was_null && !name_was_bool;
-    return b_error ? errortype : 0;
-}
-
 /****************************************************************************
  * x264_log:
  ****************************************************************************/
@@ -1085,362 +42,3 @@
         va_end( arg );
     }
 }
-
-static void x264_log_default( void *p_unused, int i_level, const char *psz_fmt, va_list arg )
-{
-    char *psz_prefix;
-    switch( i_level )
-    {
-        case X264_LOG_ERROR:
-            psz_prefix = "error";
-            break;
-        case X264_LOG_WARNING:
-            psz_prefix = "warning";
-            break;
-        case X264_LOG_INFO:
-            psz_prefix = "info";
-            break;
-        case X264_LOG_DEBUG:
-            psz_prefix = "debug";
-            break;
-        default:
-            psz_prefix = "unknown";
-            break;
-    }
-    fprintf( stderr, "x264 [%s]: ", psz_prefix );
-    x264_vfprintf( stderr, psz_fmt, arg );
-}
-
-/****************************************************************************
- * x264_picture_init:
- ****************************************************************************/
-void x264_picture_init( x264_picture_t *pic )
-{
-    memset( pic, 0, sizeof( x264_picture_t ) );
-    pic->i_type = X264_TYPE_AUTO;
-    pic->i_qpplus1 = X264_QP_AUTO;
-    pic->i_pic_struct = PIC_STRUCT_AUTO;
-}
-
-/****************************************************************************
- * x264_picture_alloc:
- ****************************************************************************/
-int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
-{
-    typedef struct
-    {
-        int planes;
-        int width_fix8[3];
-        int height_fix8[3];
-    } x264_csp_tab_t;
-
-    static const x264_csp_tab_t x264_csp_tab[] =
-    {
-        [X264_CSP_I420] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } },
-        [X264_CSP_YV12] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } },
-        [X264_CSP_NV12] = { 2, { 256*1, 256*1 },        { 256*1, 256/2 },       },
-        [X264_CSP_NV21] = { 2, { 256*1, 256*1 },        { 256*1, 256/2 },       },
-        [X264_CSP_I422] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } },
-        [X264_CSP_YV16] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } },
-        [X264_CSP_NV16] = { 2, { 256*1, 256*1 },        { 256*1, 256*1 },       },
-        [X264_CSP_YUYV] = { 1, { 256*2 },               { 256*1 },              },
-        [X264_CSP_UYVY] = { 1, { 256*2 },               { 256*1 },              },
-        [X264_CSP_I444] = { 3, { 256*1, 256*1, 256*1 }, { 256*1, 256*1, 256*1 } },
-        [X264_CSP_YV24] = { 3, { 256*1, 256*1, 256*1 }, { 256*1, 256*1, 256*1 } },
-        [X264_CSP_BGR]  = { 1, { 256*3 },               { 256*1 },              },
-        [X264_CSP_BGRA] = { 1, { 256*4 },               { 256*1 },              },
-        [X264_CSP_RGB]  = { 1, { 256*3 },               { 256*1 },              },
-    };
-
-    int csp = i_csp & X264_CSP_MASK;
-    if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX || csp == X264_CSP_V210 )
-        return -1;
-    x264_picture_init( pic );
-    pic->img.i_csp = i_csp;
-    pic->img.i_plane = x264_csp_tab[csp].planes;
-    int depth_factor = i_csp & X264_CSP_HIGH_DEPTH ? 2 : 1;
-    int plane_offset[3] = {0};
-    int frame_size = 0;
-    for( int i = 0; i < pic->img.i_plane; i++ )
-    {
-        int stride = (((int64_t)i_width * x264_csp_tab[csp].width_fix8[i]) >> 8) * depth_factor;
-        int plane_size = (((int64_t)i_height * x264_csp_tab[csp].height_fix8[i]) >> 8) * stride;
-        pic->img.i_stride[i] = stride;
-        plane_offset[i] = frame_size;
-        frame_size += plane_size;
-    }
-    pic->img.plane[0] = x264_malloc( frame_size );
-    if( !pic->img.plane[0] )
-        return -1;
-    for( int i = 1; i < pic->img.i_plane; i++ )
-        pic->img.plane[i] = pic->img.plane[0] + plane_offset[i];
-    return 0;
-}
-
-/****************************************************************************
- * x264_picture_clean:
- ****************************************************************************/
-void x264_picture_clean( x264_picture_t *pic )
-{
-    x264_free( pic->img.plane[0] );
-
-    /* just to be safe */
-    memset( pic, 0, sizeof( x264_picture_t ) );
-}
-
-/****************************************************************************
- * x264_malloc:
- ****************************************************************************/
-void *x264_malloc( int i_size )
-{
-    uint8_t *align_buf = NULL;
-#if HAVE_MALLOC_H
-#if HAVE_THP
-#define HUGE_PAGE_SIZE 2*1024*1024
-#define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */
-    /* Attempt to allocate huge pages to reduce TLB misses. */
-    if( i_size >= HUGE_PAGE_THRESHOLD )
-    {
-        align_buf = memalign( HUGE_PAGE_SIZE, i_size );
-        if( align_buf )
-        {
-            /* Round up to the next huge page boundary if we are close enough. */
-            size_t madv_size = (i_size + HUGE_PAGE_SIZE - HUGE_PAGE_THRESHOLD) & ~(HUGE_PAGE_SIZE-1);
-            madvise( align_buf, madv_size, MADV_HUGEPAGE );
-        }
-    }
-    else
-#undef HUGE_PAGE_SIZE
-#undef HUGE_PAGE_THRESHOLD
-#endif
-        align_buf = memalign( NATIVE_ALIGN, i_size );
-#else
-    uint8_t *buf = malloc( i_size + (NATIVE_ALIGN-1) + sizeof(void **) );
-    if( buf )
-    {
-        align_buf = buf + (NATIVE_ALIGN-1) + sizeof(void **);
-        align_buf -= (intptr_t) align_buf & (NATIVE_ALIGN-1);
-        *( (void **) ( align_buf - sizeof(void **) ) ) = buf;
-    }
-#endif
-    if( !align_buf )
-        x264_log( NULL, X264_LOG_ERROR, "malloc of size %d failed\n", i_size );
-    return align_buf;
-}
-
-/****************************************************************************
- * x264_free:
- ****************************************************************************/
-void x264_free( void *p )
-{
-    if( p )
-    {
-#if HAVE_MALLOC_H
-        free( p );
-#else
-        free( *( ( ( void **) p ) - 1 ) );
-#endif
-    }
-}
-
-/****************************************************************************
- * x264_reduce_fraction:
- ****************************************************************************/
-#define REDUCE_FRACTION( name, type )\
-void name( type *n, type *d )\
-{                   \
-    type a = *n;    \
-    type b = *d;    \
-    type c;         \
-    if( !a || !b )  \
-        return;     \
-    c = a % b;      \
-    while( c )      \
-    {               \
-        a = b;      \
-        b = c;      \
-        c = a % b;  \
-    }               \
-    *n /= b;        \
-    *d /= b;        \
-}
-
-REDUCE_FRACTION( x264_reduce_fraction  , uint32_t )
-REDUCE_FRACTION( x264_reduce_fraction64, uint64_t )
-
-/****************************************************************************
- * x264_slurp_file:
- ****************************************************************************/
-char *x264_slurp_file( const char *filename )
-{
-    int b_error = 0;
-    int64_t i_size;
-    char *buf;
-    FILE *fh = x264_fopen( filename, "rb" );
-    if( !fh )
-        return NULL;
-
-    b_error |= fseek( fh, 0, SEEK_END ) < 0;
-    b_error |= ( i_size = ftell( fh ) ) <= 0;
-    if( WORD_SIZE == 4 )
-        b_error |= i_size > INT32_MAX;
-    b_error |= fseek( fh, 0, SEEK_SET ) < 0;
-    if( b_error )
-        goto error;
-
-    buf = x264_malloc( i_size+2 );
-    if( !buf )
-        goto error;
-
-    b_error |= fread( buf, 1, i_size, fh ) != i_size;
-    fclose( fh );
-    if( b_error )
-    {
-        x264_free( buf );
-        return NULL;
-    }
-
-    if( buf[i_size-1] != '\n' )
-        buf[i_size++] = '\n';
-    buf[i_size] = '\0';
-
-    return buf;
-error:
-    fclose( fh );
-    return NULL;
-}
-
-/****************************************************************************
- * x264_param2string:
- ****************************************************************************/
-char *x264_param2string( x264_param_t *p, int b_res )
-{
-    int len = 1000;
-    char *buf, *s;
-    if( p->rc.psz_zones )
-        len += strlen(p->rc.psz_zones);
-    buf = s = x264_malloc( len );
-    if( !buf )
-        return NULL;
-
-    if( b_res )
-    {
-        s += sprintf( s, "%dx%d ", p->i_width, p->i_height );
-        s += sprintf( s, "fps=%u/%u ", p->i_fps_num, p->i_fps_den );
-        s += sprintf( s, "timebase=%u/%u ", p->i_timebase_num, p->i_timebase_den );
-        s += sprintf( s, "bitdepth=%d ", BIT_DEPTH );
-    }
-
-    if( p->b_opencl )
-        s += sprintf( s, "opencl=%d ", p->b_opencl );
-    s += sprintf( s, "cabac=%d", p->b_cabac );
-    s += sprintf( s, " ref=%d", p->i_frame_reference );
-    s += sprintf( s, " deblock=%d:%d:%d", p->b_deblocking_filter,
-                  p->i_deblocking_filter_alphac0, p->i_deblocking_filter_beta );
-    s += sprintf( s, " analyse=%#x:%#x", p->analyse.intra, p->analyse.inter );
-    s += sprintf( s, " me=%s", x264_motion_est_names[ p->analyse.i_me_method ] );
-    s += sprintf( s, " subme=%d", p->analyse.i_subpel_refine );
-    s += sprintf( s, " psy=%d", p->analyse.b_psy );
-    if( p->analyse.b_psy )
-        s += sprintf( s, " psy_rd=%.2f:%.2f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis );
-    s += sprintf( s, " mixed_ref=%d", p->analyse.b_mixed_references );
-    s += sprintf( s, " me_range=%d", p->analyse.i_me_range );
-    s += sprintf( s, " chroma_me=%d", p->analyse.b_chroma_me );
-    s += sprintf( s, " trellis=%d", p->analyse.i_trellis );
-    s += sprintf( s, " 8x8dct=%d", p->analyse.b_transform_8x8 );
-    s += sprintf( s, " cqm=%d", p->i_cqm_preset );
-    s += sprintf( s, " deadzone=%d,%d", p->analyse.i_luma_deadzone[0], p->analyse.i_luma_deadzone[1] );
-    s += sprintf( s, " fast_pskip=%d", p->analyse.b_fast_pskip );
-    s += sprintf( s, " chroma_qp_offset=%d", p->analyse.i_chroma_qp_offset );
-    s += sprintf( s, " threads=%d", p->i_threads );
-    s += sprintf( s, " lookahead_threads=%d", p->i_lookahead_threads );
-    s += sprintf( s, " sliced_threads=%d", p->b_sliced_threads );
-    if( p->i_slice_count )
-        s += sprintf( s, " slices=%d", p->i_slice_count );
-    if( p->i_slice_count_max )
-        s += sprintf( s, " slices_max=%d", p->i_slice_count_max );
-    if( p->i_slice_max_size )
-        s += sprintf( s, " slice_max_size=%d", p->i_slice_max_size );
-    if( p->i_slice_max_mbs )
-        s += sprintf( s, " slice_max_mbs=%d", p->i_slice_max_mbs );
-    if( p->i_slice_min_mbs )
-        s += sprintf( s, " slice_min_mbs=%d", p->i_slice_min_mbs );
-    s += sprintf( s, " nr=%d", p->analyse.i_noise_reduction );
-    s += sprintf( s, " decimate=%d", p->analyse.b_dct_decimate );
-    s += sprintf( s, " interlaced=%s", p->b_interlaced ? p->b_tff ? "tff" : "bff" : p->b_fake_interlaced ? "fake" : "0" );
-    s += sprintf( s, " bluray_compat=%d", p->b_bluray_compat );
-    if( p->b_stitchable )
-        s += sprintf( s, " stitchable=%d", p->b_stitchable );
-
-    s += sprintf( s, " constrained_intra=%d", p->b_constrained_intra );
-
-    s += sprintf( s, " bframes=%d", p->i_bframe );
-    if( p->i_bframe )
-    {
-        s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d weightb=%d open_gop=%d",
-                      p->i_bframe_pyramid, p->i_bframe_adaptive, p->i_bframe_bias,
-                      p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred, p->b_open_gop );
-    }
-    s += sprintf( s, " weightp=%d", p->analyse.i_weighted_pred > 0 ? p->analyse.i_weighted_pred : 0 );
-
-    if( p->i_keyint_max == X264_KEYINT_MAX_INFINITE )
-        s += sprintf( s, " keyint=infinite" );
-    else
-        s += sprintf( s, " keyint=%d", p->i_keyint_max );
-    s += sprintf( s, " keyint_min=%d scenecut=%d intra_refresh=%d",
-                  p->i_keyint_min, p->i_scenecut_threshold, p->b_intra_refresh );
-
-    if( p->rc.b_mb_tree || p->rc.i_vbv_buffer_size )
-        s += sprintf( s, " rc_lookahead=%d", p->rc.i_lookahead );
-
-    s += sprintf( s, " rc=%s mbtree=%d", p->rc.i_rc_method == X264_RC_ABR ?
-                               ( p->rc.b_stat_read ? "2pass" : p->rc.i_vbv_max_bitrate == p->rc.i_bitrate ? "cbr" : "abr" )
-                               : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp", p->rc.b_mb_tree );
-    if( p->rc.i_rc_method == X264_RC_ABR || p->rc.i_rc_method == X264_RC_CRF )
-    {
-        if( p->rc.i_rc_method == X264_RC_CRF )
-            s += sprintf( s, " crf=%.1f", p->rc.f_rf_constant );
-        else
-            s += sprintf( s, " bitrate=%d ratetol=%.1f",
-                          p->rc.i_bitrate, p->rc.f_rate_tolerance );
-        s += sprintf( s, " qcomp=%.2f qpmin=%d qpmax=%d qpstep=%d",
-                      p->rc.f_qcompress, p->rc.i_qp_min, p->rc.i_qp_max, p->rc.i_qp_step );
-        if( p->rc.b_stat_read )
-            s += sprintf( s, " cplxblur=%.1f qblur=%.1f",
-                          p->rc.f_complexity_blur, p->rc.f_qblur );
-        if( p->rc.i_vbv_buffer_size )
-        {
-            s += sprintf( s, " vbv_maxrate=%d vbv_bufsize=%d",
-                          p->rc.i_vbv_max_bitrate, p->rc.i_vbv_buffer_size );
-            if( p->rc.i_rc_method == X264_RC_CRF )
-                s += sprintf( s, " crf_max=%.1f", p->rc.f_rf_constant_max );
-        }
-    }
-    else if( p->rc.i_rc_method == X264_RC_CQP )
-        s += sprintf( s, " qp=%d", p->rc.i_qp_constant );
-
-    if( p->rc.i_vbv_buffer_size )
-        s += sprintf( s, " nal_hrd=%s filler=%d", x264_nal_hrd_names[p->i_nal_hrd], p->rc.b_filler );
-    if( p->crop_rect.i_left | p->crop_rect.i_top | p->crop_rect.i_right | p->crop_rect.i_bottom )
-        s += sprintf( s, " crop_rect=%u,%u,%u,%u", p->crop_rect.i_left, p->crop_rect.i_top,
-                                                   p->crop_rect.i_right, p->crop_rect.i_bottom );
-    if( p->i_frame_packing >= 0 )
-        s += sprintf( s, " frame-packing=%d", p->i_frame_packing );
-
-    if( !(p->rc.i_rc_method == X264_RC_CQP && p->rc.i_qp_constant == 0) )
-    {
-        s += sprintf( s, " ip_ratio=%.2f", p->rc.f_ip_factor );
-        if( p->i_bframe && !p->rc.b_mb_tree )
-            s += sprintf( s, " pb_ratio=%.2f", p->rc.f_pb_factor );
-        s += sprintf( s, " aq=%d", p->rc.i_aq_mode );
-        if( p->rc.i_aq_mode )
-            s += sprintf( s, ":%.2f", p->rc.f_aq_strength );
-        if( p->rc.psz_zones )
-            s += sprintf( s, " zones=%s", p->rc.psz_zones );
-        else if( p->rc.i_zones )
-            s += sprintf( s, " zones" );
-    }
-
-    return buf;
-}
-
diff -Nru x264-0.152.2854+gite9a5903/common/common.h x264-0.158.2988+git-20191101.7817004/common/common.h
--- x264-0.152.2854+gite9a5903/common/common.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/common.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * common.h: misc common functions
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -27,101 +27,46 @@
 #ifndef X264_COMMON_H
 #define X264_COMMON_H
 
+#include "base.h"
+
+/* Macros for templating function calls according to bit depth */
+#define x264_template(w) x264_glue3(x264, BIT_DEPTH, w)
+
+/****************************************************************************
+ * API Templates
+ ****************************************************************************/
+#define x264_nal_encode x264_template(nal_encode)
+#define x264_encoder_reconfig x264_template(encoder_reconfig)
+#define x264_encoder_parameters x264_template(encoder_parameters)
+#define x264_encoder_headers x264_template(encoder_headers)
+#define x264_encoder_encode x264_template(encoder_encode)
+#define x264_encoder_close x264_template(encoder_close)
+#define x264_encoder_delayed_frames x264_template(encoder_delayed_frames)
+#define x264_encoder_maximum_delayed_frames x264_template(encoder_maximum_delayed_frames)
+#define x264_encoder_intra_refresh x264_template(encoder_intra_refresh)
+#define x264_encoder_invalidate_reference x264_template(encoder_invalidate_reference)
+
+/* This undef allows to rename the external symbol and force link failure in case
+ * of incompatible libraries. Then the define enables templating as above. */
+#undef x264_encoder_open
+#define x264_encoder_open x264_template(encoder_open)
+
 /****************************************************************************
  * Macros
  ****************************************************************************/
-#define X264_MIN(a,b) ( (a)<(b) ? (a) : (b) )
-#define X264_MAX(a,b) ( (a)>(b) ? (a) : (b) )
-#define X264_MIN3(a,b,c) X264_MIN((a),X264_MIN((b),(c)))
-#define X264_MAX3(a,b,c) X264_MAX((a),X264_MAX((b),(c)))
-#define X264_MIN4(a,b,c,d) X264_MIN((a),X264_MIN3((b),(c),(d)))
-#define X264_MAX4(a,b,c,d) X264_MAX((a),X264_MAX3((b),(c),(d)))
-#define XCHG(type,a,b) do { type t = a; a = b; b = t; } while( 0 )
-#define IS_DISPOSABLE(type) ( type == X264_TYPE_B )
-#define FIX8(f) ((int)(f*(1<<8)+.5))
-#define ALIGN(x,a) (((x)+((a)-1))&~((a)-1))
-#define ARRAY_ELEMS(a) ((sizeof(a))/(sizeof(a[0])))
-
-#define CHECKED_MALLOC( var, size )\
-do {\
-    var = x264_malloc( size );\
-    if( !var )\
-        goto fail;\
-} while( 0 )
-#define CHECKED_MALLOCZERO( var, size )\
-do {\
-    CHECKED_MALLOC( var, size );\
-    memset( var, 0, size );\
-} while( 0 )
-
-/* Macros for merging multiple allocations into a single large malloc, for improved
- * use with huge pages. */
-
-/* Needs to be enough to contain any set of buffers that use combined allocations */
-#define PREALLOC_BUF_SIZE 1024
-
-#define PREALLOC_INIT\
-    int    prealloc_idx = 0;\
-    size_t prealloc_size = 0;\
-    uint8_t **preallocs[PREALLOC_BUF_SIZE];
-
-#define PREALLOC( var, size )\
-do {\
-    var = (void*)prealloc_size;\
-    preallocs[prealloc_idx++] = (uint8_t**)&var;\
-    prealloc_size += ALIGN(size, NATIVE_ALIGN);\
-} while( 0 )
-
-#define PREALLOC_END( ptr )\
-do {\
-    CHECKED_MALLOC( ptr, prealloc_size );\
-    while( prealloc_idx-- )\
-        *preallocs[prealloc_idx] += (intptr_t)ptr;\
-} while( 0 )
-
-#define ARRAY_SIZE(array)  (sizeof(array)/sizeof(array[0]))
-
-#define X264_BFRAME_MAX 16
-#define X264_REF_MAX 16
-#define X264_THREAD_MAX 128
-#define X264_LOOKAHEAD_THREAD_MAX 16
 #define X264_PCM_COST (FRAME_SIZE(256*BIT_DEPTH)+16)
-#define X264_LOOKAHEAD_MAX 250
 #define QP_BD_OFFSET (6*(BIT_DEPTH-8))
 #define QP_MAX_SPEC (51+QP_BD_OFFSET)
 #define QP_MAX (QP_MAX_SPEC+18)
-#define QP_MAX_MAX (51+2*6+18)
 #define PIXEL_MAX ((1 << BIT_DEPTH)-1)
 // arbitrary, but low because SATD scores are 1/4 normal
 #define X264_LOOKAHEAD_QP (12+QP_BD_OFFSET)
 #define SPEC_QP(x) X264_MIN((x), QP_MAX_SPEC)
 
-// number of pixels (per thread) in progress at any given time.
-// 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
-#define X264_THREAD_HEIGHT 24
-
-/* WEIGHTP_FAKE is set when mb_tree & psy are enabled, but normal weightp is disabled
- * (such as in baseline). It checks for fades in lookahead and adjusts qp accordingly
- * to increase quality. Defined as (-1) so that if(i_weighted_pred > 0) is true only when
- * real weights are being used. */
-
-#define X264_WEIGHTP_FAKE (-1)
-
 #define NALU_OVERHEAD 5 // startcode + NAL type costs 5 bytes per frame
 #define FILLER_OVERHEAD (NALU_OVERHEAD+1)
 #define SEI_OVERHEAD (NALU_OVERHEAD - (h->param.b_annexb && !h->param.i_avcintra_class && (h->out.i_nal-1)))
 
-/****************************************************************************
- * Includes
- ****************************************************************************/
-#include "osdep.h"
-#include <stdarg.h>
-#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-#include <limits.h>
-
 #if HAVE_INTERLACED
 #   define MB_INTERLACED h->mb.b_interlaced
 #   define SLICE_MBAFF h->sh.b_mbaff
@@ -141,29 +86,10 @@
 #    define CHROMA_V_SHIFT h->mb.chroma_v_shift
 #endif
 
-#define CHROMA_SIZE(s) ((s)>>(CHROMA_H_SHIFT+CHROMA_V_SHIFT))
+#define CHROMA_SIZE(s) (CHROMA_FORMAT ? (s)>>(CHROMA_H_SHIFT+CHROMA_V_SHIFT) : 0)
 #define FRAME_SIZE(s) ((s)+2*CHROMA_SIZE(s))
 #define CHROMA444 (CHROMA_FORMAT == CHROMA_444)
 
-/* Unions for type-punning.
- * Mn: load or store n bits, aligned, native-endian
- * CPn: copy n bits, aligned, native-endian
- * we don't use memcpy for CPn because memcpy's args aren't assumed to be aligned */
-typedef union { uint16_t i; uint8_t  c[2]; } MAY_ALIAS x264_union16_t;
-typedef union { uint32_t i; uint16_t b[2]; uint8_t  c[4]; } MAY_ALIAS x264_union32_t;
-typedef union { uint64_t i; uint32_t a[2]; uint16_t b[4]; uint8_t c[8]; } MAY_ALIAS x264_union64_t;
-typedef struct { uint64_t i[2]; } x264_uint128_t;
-typedef union { x264_uint128_t i; uint64_t a[2]; uint32_t b[4]; uint16_t c[8]; uint8_t d[16]; } MAY_ALIAS x264_union128_t;
-#define M16(src) (((x264_union16_t*)(src))->i)
-#define M32(src) (((x264_union32_t*)(src))->i)
-#define M64(src) (((x264_union64_t*)(src))->i)
-#define M128(src) (((x264_union128_t*)(src))->i)
-#define M128_ZERO ((x264_uint128_t){{0,0}})
-#define CP16(dst,src) M16(dst) = M16(src)
-#define CP32(dst,src) M32(dst) = M32(src)
-#define CP64(dst,src) M64(dst) = M64(src)
-#define CP128(dst,src) M128(dst) = M128(src)
-
 #if HIGH_BIT_DEPTH
     typedef uint16_t pixel;
     typedef uint64_t pixel4;
@@ -182,55 +108,11 @@
 #   define MPIXEL_X4(src) M32(src)
 #endif
 
-#define BIT_DEPTH X264_BIT_DEPTH
-
 #define CPPIXEL_X4(dst,src) MPIXEL_X4(dst) = MPIXEL_X4(src)
 
-#define X264_SCAN8_LUMA_SIZE (5*8)
-#define X264_SCAN8_SIZE (X264_SCAN8_LUMA_SIZE*3)
-#define X264_SCAN8_0 (4+1*8)
-
-/* Scan8 organization:
- *    0 1 2 3 4 5 6 7
- * 0  DY    y y y y y
- * 1        y Y Y Y Y
- * 2        y Y Y Y Y
- * 3        y Y Y Y Y
- * 4        y Y Y Y Y
- * 5  DU    u u u u u
- * 6        u U U U U
- * 7        u U U U U
- * 8        u U U U U
- * 9        u U U U U
- * 10 DV    v v v v v
- * 11       v V V V V
- * 12       v V V V V
- * 13       v V V V V
- * 14       v V V V V
- * DY/DU/DV are for luma/chroma DC.
- */
-
-#define LUMA_DC   48
-#define CHROMA_DC 49
-
-static const uint8_t x264_scan8[16*3 + 3] =
-{
-    4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8,
-    6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8,
-    4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8,
-    6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8,
-    4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8,
-    6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8,
-    4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8,
-    6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8,
-    4+11*8, 5+11*8, 4+12*8, 5+12*8,
-    6+11*8, 7+11*8, 6+12*8, 7+12*8,
-    4+13*8, 5+13*8, 4+14*8, 5+14*8,
-    6+13*8, 7+13*8, 6+14*8, 7+14*8,
-    0+ 0*8, 0+ 5*8, 0+10*8
-};
-
-#include "x264.h"
+/****************************************************************************
+ * Includes
+ ****************************************************************************/
 #if HAVE_OPENCL
 #include "opencl.h"
 #endif
@@ -243,30 +125,19 @@
 #include "frame.h"
 #include "dct.h"
 #include "quant.h"
-#include "cpu.h"
 #include "threadpool.h"
 
 /****************************************************************************
  * General functions
  ****************************************************************************/
-/* x264_malloc : will do or emulate a memalign
- * you have to use x264_free for buffers allocated with x264_malloc */
-void *x264_malloc( int );
-void  x264_free( void * );
-
-/* x264_slurp_file: malloc space for the whole file and read it */
-char *x264_slurp_file( const char *filename );
-
-/* x264_param2string: return a (malloced) string containing most of
- * the encoding options */
-char *x264_param2string( x264_param_t *p, int b_res );
 
 /* log */
+#define x264_log x264_template(log)
 void x264_log( x264_t *h, int i_level, const char *psz_fmt, ... );
 
-void x264_reduce_fraction( uint32_t *n, uint32_t *d );
-void x264_reduce_fraction64( uint64_t *n, uint64_t *d );
+#define x264_cavlc_init x264_template(cavlc_init)
 void x264_cavlc_init( x264_t *h );
+#define x264_cabac_init x264_template(cabac_init)
 void x264_cabac_init( x264_t *h );
 
 static ALWAYS_INLINE pixel x264_clip_pixel( int x )
@@ -274,97 +145,9 @@
     return ( (x & ~PIXEL_MAX) ? (-x)>>31 & PIXEL_MAX : x );
 }
 
-static ALWAYS_INLINE int x264_clip3( int v, int i_min, int i_max )
-{
-    return ( (v < i_min) ? i_min : (v > i_max) ? i_max : v );
-}
-
-static ALWAYS_INLINE double x264_clip3f( double v, double f_min, double f_max )
-{
-    return ( (v < f_min) ? f_min : (v > f_max) ? f_max : v );
-}
-
-static ALWAYS_INLINE int x264_median( int a, int b, int c )
-{
-    int t = (a-b)&((a-b)>>31);
-    a -= t;
-    b += t;
-    b -= (b-c)&((b-c)>>31);
-    b += (a-b)&((a-b)>>31);
-    return b;
-}
-
-static ALWAYS_INLINE void x264_median_mv( int16_t *dst, int16_t *a, int16_t *b, int16_t *c )
-{
-    dst[0] = x264_median( a[0], b[0], c[0] );
-    dst[1] = x264_median( a[1], b[1], c[1] );
-}
-
-static ALWAYS_INLINE int x264_predictor_difference( int16_t (*mvc)[2], intptr_t i_mvc )
-{
-    int sum = 0;
-    for( int i = 0; i < i_mvc-1; i++ )
-    {
-        sum += abs( mvc[i][0] - mvc[i+1][0] )
-             + abs( mvc[i][1] - mvc[i+1][1] );
-    }
-    return sum;
-}
-
-static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum( uint8_t *mvdleft, uint8_t *mvdtop )
-{
-    int amvd0 = mvdleft[0] + mvdtop[0];
-    int amvd1 = mvdleft[1] + mvdtop[1];
-    amvd0 = (amvd0 > 2) + (amvd0 > 32);
-    amvd1 = (amvd1 > 2) + (amvd1 > 32);
-    return amvd0 + (amvd1<<8);
-}
-
-extern const uint8_t x264_exp2_lut[64];
-extern const float x264_log2_lut[128];
-extern const float x264_log2_lz_lut[32];
-
-/* Not a general-purpose function; multiplies input by -1/6 to convert
- * qp to qscale. */
-static ALWAYS_INLINE int x264_exp2fix8( float x )
-{
-    int i = x*(-64.f/6.f) + 512.5f;
-    if( i < 0 ) return 0;
-    if( i > 1023 ) return 0xffff;
-    return (x264_exp2_lut[i&63]+256) << (i>>6) >> 8;
-}
-
-static ALWAYS_INLINE float x264_log2( uint32_t x )
-{
-    int lz = x264_clz( x );
-    return x264_log2_lut[(x<<lz>>24)&0x7f] + x264_log2_lz_lut[lz];
-}
-
 /****************************************************************************
  *
  ****************************************************************************/
-enum slice_type_e
-{
-    SLICE_TYPE_P  = 0,
-    SLICE_TYPE_B  = 1,
-    SLICE_TYPE_I  = 2,
-};
-
-static const char slice_type_to_char[] = { 'P', 'B', 'I' };
-
-enum sei_payload_type_e
-{
-    SEI_BUFFERING_PERIOD       = 0,
-    SEI_PIC_TIMING             = 1,
-    SEI_PAN_SCAN_RECT          = 2,
-    SEI_FILLER                 = 3,
-    SEI_USER_DATA_REGISTERED   = 4,
-    SEI_USER_DATA_UNREGISTERED = 5,
-    SEI_RECOVERY_POINT         = 6,
-    SEI_DEC_REF_PIC_MARKING    = 7,
-    SEI_FRAME_PACKING          = 45,
-};
-
 typedef struct
 {
     x264_sps_t *sps;
@@ -560,9 +343,14 @@
     udctcoef        (*quant8_bias0[4])[64];  /* [4][QP_MAX_SPEC+1][64] */
     udctcoef        (*nr_offset_emergency)[4][64];
 
-    /* mv/ref cost arrays. */
+    /* mv/ref/mode cost arrays. */
     uint16_t *cost_mv[QP_MAX+1];
     uint16_t *cost_mv_fpel[QP_MAX+1][4];
+    struct
+    {
+        uint16_t ref[QP_MAX+1][3][33];
+        uint16_t i4x4_mode[QP_MAX+1][17];
+    } *cost_table;
 
     const uint8_t   *chroma_qp_table; /* includes both the nonlinear luma->chroma mapping and chroma_qp_offset */
 
@@ -781,16 +569,16 @@
             ALIGNED_64( pixel fdec_buf[54*FDEC_STRIDE] );
 
             /* i4x4 and i8x8 backup data, for skipping the encode stage when possible */
-            ALIGNED_16( pixel i4x4_fdec_buf[16*16] );
-            ALIGNED_16( pixel i8x8_fdec_buf[16*16] );
+            ALIGNED_32( pixel i4x4_fdec_buf[16*16] );
+            ALIGNED_32( pixel i8x8_fdec_buf[16*16] );
             ALIGNED_64( dctcoef i8x8_dct_buf[3][64] );
             ALIGNED_64( dctcoef i4x4_dct_buf[15][16] );
             uint32_t i4x4_nnz_buf[4];
             uint32_t i8x8_nnz_buf[4];
 
             /* Psy trellis DCT data */
-            ALIGNED_16( dctcoef fenc_dct8[4][64] );
-            ALIGNED_16( dctcoef fenc_dct4[16][16] );
+            ALIGNED_64( dctcoef fenc_dct8[4][64] );
+            ALIGNED_64( dctcoef fenc_dct4[16][16] );
 
             /* Psy RD SATD/SA8D scores cache */
             ALIGNED_64( uint32_t fenc_satd_cache[32] );
@@ -979,7 +767,7 @@
 // included at the end because it needs x264_t
 #include "macroblock.h"
 
-static int ALWAYS_INLINE x264_predictor_roundclip( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv )
+static ALWAYS_INLINE int x264_predictor_roundclip( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv )
 {
     int cnt = 0;
     for( int i = 0; i < i_mvc; i++ )
@@ -995,7 +783,7 @@
     return cnt;
 }
 
-static int ALWAYS_INLINE x264_predictor_clip( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv )
+static ALWAYS_INLINE int x264_predictor_clip( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv )
 {
     int cnt = 0;
     int qpel_limit[4] = {mv_limit[0][0] << 2, mv_limit[0][1] << 2, mv_limit[1][0] << 2, mv_limit[1][1] << 2};
@@ -1019,4 +807,3 @@
 #include "rectangle.h"
 
 #endif
-
diff -Nru x264-0.152.2854+gite9a5903/common/cpu.c x264-0.158.2988+git-20191101.7817004/common/cpu.c
--- x264-0.152.2854+gite9a5903/common/cpu.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/cpu.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cpu.c: cpu detection
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -25,7 +25,7 @@
  * For more information, contact us at licensing@x264.com.
  *****************************************************************************/
 
-#include "common.h"
+#include "base.h"
 
 #if HAVE_POSIXTHREAD && SYS_LINUX
 #include <sched.h>
@@ -45,7 +45,7 @@
 
 const x264_cpu_name_t x264_cpu_names[] =
 {
-#if HAVE_MMX
+#if ARCH_X86 || ARCH_X86_64
 //  {"MMX",         X264_CPU_MMX},  // we don't support asm on mmx1 cpus anymore
 #define MMX2 X264_CPU_MMX|X264_CPU_MMX2
     {"MMX2",        MMX2},
@@ -97,7 +97,7 @@
     {"", 0},
 };
 
-#if (ARCH_PPC && SYS_LINUX) || (ARCH_ARM && !HAVE_NEON)
+#if (HAVE_ALTIVEC && SYS_LINUX) || (HAVE_ARMV6 && !HAVE_NEON)
 #include <signal.h>
 #include <setjmp.h>
 static sigjmp_buf jmpbuf;
@@ -235,16 +235,8 @@
         int model  = ((eax>>4)&0xf) + ((eax>>12)&0xf0);
         if( family == 6 )
         {
-            /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 6/14 (core1 "yonah")
-             * theoretically support sse2, but it's significantly slower than mmx for
-             * almost all of x264's functions, so let's just pretend they don't. */
-            if( model == 9 || model == 13 || model == 14 )
-            {
-                cpu &= ~(X264_CPU_SSE2|X264_CPU_SSE3);
-                assert(!(cpu&(X264_CPU_SSSE3|X264_CPU_SSE4)));
-            }
             /* Detect Atom CPU */
-            else if( model == 28 )
+            if( model == 28 )
             {
                 cpu |= X264_CPU_SLOW_ATOM;
                 cpu |= X264_CPU_SLOW_PSHUFB;
@@ -296,7 +288,7 @@
         else if( cache == 64 )
             cpu |= X264_CPU_CACHELINE_64;
         else
-            x264_log( NULL, X264_LOG_WARNING, "unable to determine cacheline size\n" );
+            x264_log_internal( X264_LOG_WARNING, "unable to determine cacheline size\n" );
     }
 
 #if STACK_ALIGNMENT < 16
@@ -306,7 +298,7 @@
     return cpu;
 }
 
-#elif ARCH_PPC && HAVE_ALTIVEC
+#elif HAVE_ALTIVEC
 
 #if SYS_MACOSX || SYS_OPENBSD || SYS_FREEBSD
 #include <sys/sysctl.h>
@@ -363,7 +355,7 @@
 }
 #endif
 
-#elif ARCH_ARM
+#elif HAVE_ARMV6
 
 void x264_cpu_neon_test( void );
 int x264_cpu_fast_neon_mrc_test( void );
@@ -371,7 +363,6 @@
 uint32_t x264_cpu_detect( void )
 {
     int flags = 0;
-#if HAVE_ARMV6
     flags |= X264_CPU_ARMV6;
 
     // don't do this hack if compiled with -mfpu=neon
@@ -404,26 +395,25 @@
     flags |= x264_cpu_fast_neon_mrc_test() ? X264_CPU_FAST_NEON_MRC : 0;
 #endif
     // TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc)
-#endif
     return flags;
 }
 
-#elif ARCH_AARCH64
+#elif HAVE_AARCH64
 
 uint32_t x264_cpu_detect( void )
 {
+#if HAVE_NEON
     return X264_CPU_ARMV8 | X264_CPU_NEON;
+#else
+    return X264_CPU_ARMV8;
+#endif
 }
 
-#elif ARCH_MIPS
+#elif HAVE_MSA
 
 uint32_t x264_cpu_detect( void )
 {
-    uint32_t flags = 0;
-#if HAVE_MSA
-    flags |= X264_CPU_MSA;
-#endif
-    return flags;
+    return X264_CPU_MSA;
 }
 
 #else
diff -Nru x264-0.152.2854+gite9a5903/common/cpu.h x264-0.158.2988+git-20191101.7817004/common/cpu.h
--- x264-0.152.2854+gite9a5903/common/cpu.h	2017-12-31 12:50:50.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/cpu.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cpu.h: cpu detection
  *****************************************************************************
- * Copyright (C) 2004-2017 x264 project
+ * Copyright (C) 2004-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *
@@ -26,8 +26,8 @@
 #ifndef X264_CPU_H
 #define X264_CPU_H
 
-uint32_t x264_cpu_detect( void );
-int      x264_cpu_num_processors( void );
+X264_API uint32_t x264_cpu_detect( void );
+X264_API int      x264_cpu_num_processors( void );
 void     x264_cpu_emms( void );
 void     x264_cpu_sfence( void );
 #if HAVE_MMX
@@ -46,28 +46,11 @@
 #endif
 #define x264_sfence x264_cpu_sfence
 
-/* kludge:
- * gcc can't give variables any greater alignment than the stack frame has.
- * We need 32 byte alignment for AVX2, so here we make sure that the stack is
- * aligned to 32 bytes.
- * gcc 4.2 introduced __attribute__((force_align_arg_pointer)) to fix this
- * problem, but I don't want to require such a new version.
- * aligning to 32 bytes only works if the compiler supports keeping that
- * alignment between functions (osdep.h handles manual alignment of arrays
- * if it doesn't).
- */
-#if HAVE_MMX && (STACK_ALIGNMENT > 16 || (ARCH_X86 && STACK_ALIGNMENT > 4))
-intptr_t x264_stack_align( void (*func)(), ... );
-#define x264_stack_align(func,...) x264_stack_align((void (*)())func, __VA_ARGS__)
-#else
-#define x264_stack_align(func,...) func(__VA_ARGS__)
-#endif
-
 typedef struct
 {
     const char *name;
     uint32_t flags;
 } x264_cpu_name_t;
-extern const x264_cpu_name_t x264_cpu_names[];
+X264_API extern const x264_cpu_name_t x264_cpu_names[];
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/dct.c x264-0.158.2988+git-20191101.7817004/common/dct.c
--- x264-0.152.2854+gite9a5903/common/dct.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/dct.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * dct.c: transform and zigzag
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -29,83 +29,19 @@
 #if HAVE_MMX
 #   include "x86/dct.h"
 #endif
-#if ARCH_PPC
+#if HAVE_ALTIVEC
 #   include "ppc/dct.h"
 #endif
-#if ARCH_ARM
+#if HAVE_ARMV6
 #   include "arm/dct.h"
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
 #   include "aarch64/dct.h"
 #endif
-#if ARCH_MIPS
+#if HAVE_MSA
 #   include "mips/dct.h"
 #endif
 
-/* the inverse of the scaling factors introduced by 8x8 fdct */
-/* uint32 is for the asm implementation of trellis. the actual values fit in uint16. */
-#define W(i) (i==0 ? FIX8(1.0000) :\
-              i==1 ? FIX8(0.8859) :\
-              i==2 ? FIX8(1.6000) :\
-              i==3 ? FIX8(0.9415) :\
-              i==4 ? FIX8(1.2651) :\
-              i==5 ? FIX8(1.1910) :0)
-const uint32_t x264_dct8_weight_tab[64] = {
-    W(0), W(3), W(4), W(3),  W(0), W(3), W(4), W(3),
-    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
-    W(4), W(5), W(2), W(5),  W(4), W(5), W(2), W(5),
-    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
-
-    W(0), W(3), W(4), W(3),  W(0), W(3), W(4), W(3),
-    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
-    W(4), W(5), W(2), W(5),  W(4), W(5), W(2), W(5),
-    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1)
-};
-#undef W
-
-#define W(i) (i==0 ? FIX8(1.76777) :\
-              i==1 ? FIX8(1.11803) :\
-              i==2 ? FIX8(0.70711) :0)
-const uint32_t x264_dct4_weight_tab[16] = {
-    W(0), W(1), W(0), W(1),
-    W(1), W(2), W(1), W(2),
-    W(0), W(1), W(0), W(1),
-    W(1), W(2), W(1), W(2)
-};
-#undef W
-
-/* inverse squared */
-#define W(i) (i==0 ? FIX8(3.125) :\
-              i==1 ? FIX8(1.25) :\
-              i==2 ? FIX8(0.5) :0)
-const uint32_t x264_dct4_weight2_tab[16] = {
-    W(0), W(1), W(0), W(1),
-    W(1), W(2), W(1), W(2),
-    W(0), W(1), W(0), W(1),
-    W(1), W(2), W(1), W(2)
-};
-#undef W
-
-#define W(i) (i==0 ? FIX8(1.00000) :\
-              i==1 ? FIX8(0.78487) :\
-              i==2 ? FIX8(2.56132) :\
-              i==3 ? FIX8(0.88637) :\
-              i==4 ? FIX8(1.60040) :\
-              i==5 ? FIX8(1.41850) :0)
-const uint32_t x264_dct8_weight2_tab[64] = {
-    W(0), W(3), W(4), W(3),  W(0), W(3), W(4), W(3),
-    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
-    W(4), W(5), W(2), W(5),  W(4), W(5), W(2), W(5),
-    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
-
-    W(0), W(3), W(4), W(3),  W(0), W(3), W(4), W(3),
-    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
-    W(4), W(5), W(2), W(5),  W(4), W(5), W(2), W(5),
-    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1)
-};
-#undef W
-
-
 static void dct4x4dc( dctcoef d[16] )
 {
     dctcoef tmp[16];
@@ -501,7 +437,7 @@
     add8x8_idct8( &dst[8*FDEC_STRIDE+8], dct[3] );
 }
 
-static void inline add4x4_idct_dc( pixel *p_dst, dctcoef dc )
+static inline void add4x4_idct_dc( pixel *p_dst, dctcoef dc )
 {
     dc = (dc + 32) >> 6;
     for( int i = 0; i < 4; i++, p_dst += FDEC_STRIDE )
@@ -731,6 +667,7 @@
         dctf->sub16x16_dct  = x264_sub16x16_dct_altivec;
 
         dctf->add8x8_idct_dc = x264_add8x8_idct_dc_altivec;
+        dctf->add16x16_idct_dc = x264_add16x16_idct_dc_altivec;
 
         dctf->add4x4_idct   = x264_add4x4_idct_altivec;
         dctf->add8x8_idct   = x264_add8x8_idct_altivec;
@@ -745,7 +682,7 @@
     }
 #endif
 
-#if HAVE_ARMV6 || ARCH_AARCH64
+#if HAVE_ARMV6 || HAVE_AARCH64
     if( cpu&X264_CPU_NEON )
     {
         dctf->sub4x4_dct    = x264_sub4x4_dct_neon;
@@ -1059,11 +996,11 @@
         pf_progressive->scan_8x8  = x264_zigzag_scan_8x8_frame_altivec;
     }
 #endif
-#if HAVE_ARMV6 || ARCH_AARCH64
+#if HAVE_ARMV6 || HAVE_AARCH64
     if( cpu&X264_CPU_NEON )
     {
         pf_progressive->scan_4x4  = x264_zigzag_scan_4x4_frame_neon;
-#if ARCH_AARCH64
+#if HAVE_AARCH64
         pf_interlaced->scan_4x4   = x264_zigzag_scan_4x4_field_neon;
         pf_interlaced->scan_8x8   = x264_zigzag_scan_8x8_field_neon;
         pf_interlaced->sub_4x4    = x264_zigzag_sub_4x4_field_neon;
@@ -1073,9 +1010,9 @@
         pf_progressive->sub_4x4   = x264_zigzag_sub_4x4_frame_neon;
         pf_progressive->sub_4x4ac = x264_zigzag_sub_4x4ac_frame_neon;
         pf_progressive->sub_8x8   = x264_zigzag_sub_8x8_frame_neon;
-#endif // ARCH_AARCH64
+#endif // HAVE_AARCH64
     }
-#endif // HAVE_ARMV6 || ARCH_AARCH64
+#endif // HAVE_ARMV6 || HAVE_AARCH64
 #endif // HIGH_BIT_DEPTH
 
     pf_interlaced->interleave_8x8_cavlc =
@@ -1128,13 +1065,13 @@
 #endif // HIGH_BIT_DEPTH
 #endif
 #if !HIGH_BIT_DEPTH
-#if ARCH_AARCH64
+#if HAVE_AARCH64
     if( cpu&X264_CPU_NEON )
     {
         pf_interlaced->interleave_8x8_cavlc =
         pf_progressive->interleave_8x8_cavlc =  x264_zigzag_interleave_8x8_cavlc_neon;
     }
-#endif // ARCH_AARCH64
+#endif // HAVE_AARCH64
 
 #if HAVE_ALTIVEC
     if( cpu&X264_CPU_ALTIVEC )
diff -Nru x264-0.152.2854+gite9a5903/common/dct.h x264-0.158.2988+git-20191101.7817004/common/dct.h
--- x264-0.152.2854+gite9a5903/common/dct.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/dct.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * dct.h: transform and zigzag
  *****************************************************************************
- * Copyright (C) 2004-2017 x264 project
+ * Copyright (C) 2004-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *
@@ -26,11 +26,6 @@
 #ifndef X264_DCT_H
 #define X264_DCT_H
 
-extern const uint32_t x264_dct4_weight_tab[16];
-extern const uint32_t x264_dct8_weight_tab[64];
-extern const uint32_t x264_dct4_weight2_tab[16];
-extern const uint32_t x264_dct8_weight2_tab[64];
-
 typedef struct
 {
     // pix1  stride = FENC_STRIDE
@@ -74,7 +69,9 @@
 
 } x264_zigzag_function_t;
 
+#define x264_dct_init x264_template(dct_init)
 void x264_dct_init( int cpu, x264_dct_function_t *dctf );
+#define x264_zigzag_init x264_template(zigzag_init)
 void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zigzag_function_t *pf_interlaced );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/deblock.c x264-0.158.2988+git-20191101.7817004/common/deblock.c
--- x264-0.152.2854+gite9a5903/common/deblock.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/deblock.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * deblock.c: deblocking
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -337,7 +337,7 @@
     pf_intra( pix, i_stride, alpha, beta );
 }
 
-static ALWAYS_INLINE void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_y )
+static ALWAYS_INLINE void macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_y )
 {
     int deblock_on_slice_edges = h->sh.i_disable_deblocking_filter_idc != 2;
 
@@ -383,6 +383,7 @@
     int qp_thresh = 15 - X264_MIN( a, b ) - X264_MAX( 0, h->pps->i_chroma_qp_index_offset );
     int stridey   = h->fdec->i_stride[0];
     int strideuv  = h->fdec->i_stride[1];
+    int chroma_format = CHROMA_FORMAT;
     int chroma444 = CHROMA444;
     int chroma_height = 16 >> CHROMA_V_SHIFT;
     intptr_t uvdiff = chroma444 ? h->fdec->plane[2] - h->fdec->plane[1] : 1;
@@ -390,7 +391,7 @@
     for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
     {
         x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );
-        x264_macroblock_cache_load_neighbours_deblock( h, mb_x, mb_y );
+        macroblock_cache_load_neighbours_deblock( h, mb_x, mb_y );
 
         int mb_xy = h->mb.i_mb_xy;
         int transform_8x8 = h->mb.mb_transform_size[mb_xy];
@@ -420,7 +421,7 @@
                 deblock_edge##intra( h, pixy + 4*edge*(dir?stride2y:1),\
                                      stride2y, bs[dir][edge], qp, a, b, 0,\
                                      h->loopf.deblock_luma##intra[dir] );\
-                if( CHROMA_FORMAT == CHROMA_444 )\
+                if( chroma_format == CHROMA_444 )\
                 {\
                     deblock_edge##intra( h, pixuv          + 4*edge*(dir?stride2uv:1),\
                                          stride2uv, bs[dir][edge], chroma_qp, a, b, 0,\
@@ -429,14 +430,14 @@
                                          stride2uv, bs[dir][edge], chroma_qp, a, b, 0,\
                                          h->loopf.deblock_luma##intra[dir] );\
                 }\
-                else if( CHROMA_FORMAT == CHROMA_420 && !(edge & 1) )\
+                else if( chroma_format == CHROMA_420 && !(edge & 1) )\
                 {\
                     deblock_edge##intra( h, pixuv + edge*(dir?2*stride2uv:4),\
                                          stride2uv, bs[dir][edge], chroma_qp, a, b, 1,\
                                          h->loopf.deblock_chroma##intra[dir] );\
                 }\
             }\
-            if( CHROMA_FORMAT == CHROMA_422 && (dir || !(edge & 1)) )\
+            if( chroma_format == CHROMA_422 && (dir || !(edge & 1)) )\
             {\
                 deblock_edge##intra( h, pixuv + edge*(dir?4*stride2uv:4),\
                                      stride2uv, bs[dir][edge], chroma_qp, a, b, 1,\
@@ -463,16 +464,22 @@
                 if( intra_cur || IS_INTRA( h->mb.type[h->mb.i_mb_left_xy[0]] ) )
                 {
                     deblock_edge_intra( h, pixy,           2*stridey,  bs[0][0], luma_qp[0],   a, b, 0, luma_intra_deblock );
-                    deblock_edge_intra( h, pixuv,          2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_intra_deblock );
-                    if( chroma444 )
-                        deblock_edge_intra( h, pixuv + uvdiff, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_intra_deblock );
+                    if( chroma_format )
+                    {
+                        deblock_edge_intra( h, pixuv,          2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_intra_deblock );
+                        if( chroma444 )
+                            deblock_edge_intra( h, pixuv + uvdiff, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_intra_deblock );
+                    }
                 }
                 else
                 {
                     deblock_edge( h, pixy,           2*stridey,  bs[0][0], luma_qp[0],   a, b, 0, luma_deblock );
-                    deblock_edge( h, pixuv,          2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_deblock );
-                    if( chroma444 )
-                        deblock_edge( h, pixuv + uvdiff, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_deblock );
+                    if( chroma_format )
+                    {
+                        deblock_edge( h, pixuv,          2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_deblock );
+                        if( chroma444 )
+                            deblock_edge( h, pixuv + uvdiff, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_deblock );
+                    }
                 }
 
                 int offy = MB_INTERLACED ? 4 : 0;
@@ -483,16 +490,22 @@
                 if( intra_cur || IS_INTRA( h->mb.type[h->mb.i_mb_left_xy[1]] ) )
                 {
                     deblock_edge_intra( h, pixy           + (stridey<<offy),   2*stridey,  bs[0][4], luma_qp[1],   a, b, 0, luma_intra_deblock );
-                    deblock_edge_intra( h, pixuv          + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], a, b, c, chroma_intra_deblock );
-                    if( chroma444 )
-                        deblock_edge_intra( h, pixuv + uvdiff + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], a, b, c, chroma_intra_deblock );
+                    if( chroma_format )
+                    {
+                        deblock_edge_intra( h, pixuv          + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], a, b, c, chroma_intra_deblock );
+                        if( chroma444 )
+                            deblock_edge_intra( h, pixuv + uvdiff + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], a, b, c, chroma_intra_deblock );
+                    }
                 }
                 else
                 {
                     deblock_edge( h, pixy           + (stridey<<offy),   2*stridey,  bs[0][4], luma_qp[1],   a, b, 0, luma_deblock );
-                    deblock_edge( h, pixuv          + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], a, b, c, chroma_deblock );
-                    if( chroma444 )
-                        deblock_edge( h, pixuv + uvdiff + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], a, b, c, chroma_deblock );
+                    if( chroma_format )
+                    {
+                        deblock_edge( h, pixuv          + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], a, b, c, chroma_deblock );
+                        if( chroma444 )
+                            deblock_edge( h, pixuv + uvdiff + (strideuv<<offuv), 2*strideuv, bs[0][4], chroma_qp[1], a, b, c, chroma_deblock );
+                    }
                 }
             }
             else
@@ -548,7 +561,7 @@
                         deblock_edge( h, pixuv          + j*strideuv, 2*strideuv, bs[1][4*j], qpc_top, a, b, 0, h->loopf.deblock_luma[1] );
                         deblock_edge( h, pixuv + uvdiff + j*strideuv, 2*strideuv, bs[1][4*j], qpc_top, a, b, 0, h->loopf.deblock_luma[1] );
                     }
-                    else
+                    else if( chroma_format )
                         deblock_edge( h, pixuv          + j*strideuv, 2*strideuv, bs[1][4*j], qpc_top, a, b, 1, h->loopf.deblock_chroma[1] );
                 }
             }
@@ -652,117 +665,19 @@
 }
 
 #if HAVE_MMX
-void x264_deblock_v_luma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_v_luma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_luma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_luma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_v_chroma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_v_chroma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_chroma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_chroma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_chroma_mbaff_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_chroma_mbaff_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_chroma_422_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_chroma_422_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_chroma_422_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_v_luma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_v_luma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_h_luma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_h_luma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_v_chroma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_v_chroma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_h_chroma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_h_chroma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_h_chroma_422_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_h_chroma_422_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_h_chroma_422_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_strength_sse2  ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
-                                   int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
-                                   int mvy_limit, int bframe );
-void x264_deblock_strength_ssse3 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
-                                   int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
-                                   int mvy_limit, int bframe );
-void x264_deblock_strength_avx   ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
-                                   int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
-                                   int mvy_limit, int bframe );
-void x264_deblock_strength_avx2  ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
-                                   int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
-                                   int mvy_limit, int bframe );
-void x264_deblock_strength_avx512( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
-                                   int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
-                                   int mvy_limit, int bframe );
-
-void x264_deblock_h_chroma_intra_mbaff_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_h_chroma_intra_mbaff_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_h_chroma_intra_mbaff_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
-#if ARCH_X86
-void x264_deblock_h_luma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_v8_luma_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_v_chroma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_chroma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_chroma_mbaff_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_luma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_v8_luma_intra_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_v_chroma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_h_chroma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_h_chroma_intra_mbaff_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
-
-#if HIGH_BIT_DEPTH
-void x264_deblock_v_luma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_v_luma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
-#else
-// FIXME this wrapper has a significant cpu cost
-static void x264_deblock_v_luma_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
-{
-    x264_deblock_v8_luma_mmx2( pix,   stride, alpha, beta, tc0   );
-    x264_deblock_v8_luma_mmx2( pix+8, stride, alpha, beta, tc0+2 );
-}
-static void x264_deblock_v_luma_intra_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta )
-{
-    x264_deblock_v8_luma_intra_mmx2( pix,   stride, alpha, beta );
-    x264_deblock_v8_luma_intra_mmx2( pix+8, stride, alpha, beta );
-}
-#endif // HIGH_BIT_DEPTH
+#include "x86/deblock.h"
 #endif
+#if HAVE_ALTIVEC
+#include "ppc/deblock.h"
 #endif
-
-#if ARCH_PPC
-void x264_deblock_v_luma_altivec( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_luma_altivec( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-#endif // ARCH_PPC
-
-#if HAVE_ARMV6 || ARCH_AARCH64
-void x264_deblock_v_luma_neon  ( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_luma_neon  ( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_v_chroma_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_chroma_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_strength_neon( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
-                                 int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
-                                 int mvy_limit, int bframe );
-void x264_deblock_h_chroma_422_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_chroma_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_chroma_intra_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_h_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_h_chroma_422_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_v_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_h_luma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_v_luma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
+#if HAVE_ARMV6
+#include "arm/deblock.h"
 #endif
-
-#if !HIGH_BIT_DEPTH
-#if HAVE_MSA
-void x264_deblock_v_luma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_luma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_v_chroma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_chroma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_v_luma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_h_luma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_v_chroma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_h_chroma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta );
-void x264_deblock_strength_msa( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
-                                int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int mvy_limit,
-                                int bframe );
+#if HAVE_AARCH64
+#include "aarch64/deblock.h"
 #endif
+#if HAVE_MSA
+#include "mips/deblock.h"
 #endif
 
 void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
@@ -867,7 +782,7 @@
     }
 #endif // HAVE_ALTIVEC
 
-#if HAVE_ARMV6 || ARCH_AARCH64
+#if HAVE_ARMV6 || HAVE_AARCH64
     if( cpu&X264_CPU_NEON )
     {
         pf->deblock_luma[1] = x264_deblock_v_luma_neon;
diff -Nru x264-0.152.2854+gite9a5903/common/frame.c x264-0.158.2988+git-20191101.7817004/common/frame.c
--- x264-0.152.2854+gite9a5903/common/frame.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/frame.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * frame.c: frame handling
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -42,37 +42,24 @@
     return x;
 }
 
-static int x264_frame_internal_csp( int external_csp )
+static int frame_internal_csp( int external_csp )
 {
-    switch( external_csp & X264_CSP_MASK )
-    {
-        case X264_CSP_NV12:
-        case X264_CSP_NV21:
-        case X264_CSP_I420:
-        case X264_CSP_YV12:
-            return X264_CSP_NV12;
-        case X264_CSP_NV16:
-        case X264_CSP_I422:
-        case X264_CSP_YV16:
-        case X264_CSP_YUYV:
-        case X264_CSP_UYVY:
-        case X264_CSP_V210:
-            return X264_CSP_NV16;
-        case X264_CSP_I444:
-        case X264_CSP_YV24:
-        case X264_CSP_BGR:
-        case X264_CSP_BGRA:
-        case X264_CSP_RGB:
-            return X264_CSP_I444;
-        default:
-            return X264_CSP_NONE;
-    }
+    int csp = external_csp & X264_CSP_MASK;
+    if( csp == X264_CSP_I400 )
+        return X264_CSP_I400;
+    if( csp >= X264_CSP_I420 && csp < X264_CSP_I422 )
+        return X264_CSP_NV12;
+    if( csp >= X264_CSP_I422 && csp < X264_CSP_I444 )
+        return X264_CSP_NV16;
+    if( csp >= X264_CSP_I444 && csp <= X264_CSP_RGB )
+        return X264_CSP_I444;
+    return X264_CSP_NONE;
 }
 
-static x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
+static x264_frame_t *frame_new( x264_t *h, int b_fdec )
 {
     x264_frame_t *frame;
-    int i_csp = x264_frame_internal_csp( h->param.i_csp );
+    int i_csp = frame_internal_csp( h->param.i_csp );
     int i_mb_count = h->mb.i_mb_count;
     int i_stride, i_width, i_lines, luma_plane_count;
     int i_padv = PADV << PARAM_INTERLACED;
@@ -89,6 +76,9 @@
     int disalign = 1<<10;
 #endif
 
+    /* ensure frame alignment after PADH is added */
+    int padh_align = X264_MAX( align - PADH * sizeof(pixel), 0 ) / sizeof(pixel);
+
     CHECKED_MALLOCZERO( frame, sizeof(x264_frame_t) );
     PREALLOC_INIT
 
@@ -119,6 +109,14 @@
             frame->i_stride[i] = i_stride;
         }
     }
+    else if( i_csp == X264_CSP_I400 )
+    {
+        luma_plane_count = 1;
+        frame->i_plane = 1;
+        frame->i_width[0] = i_width;
+        frame->i_lines[0] = i_lines;
+        frame->i_stride[0] = i_stride;
+    }
     else
         goto fail;
 
@@ -154,9 +152,9 @@
     {
         int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
         int chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + 2*chroma_padv));
-        PREALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) );
+        PREALLOC( frame->buffer[1], (chroma_plane_size + padh_align) * sizeof(pixel) );
         if( PARAM_INTERLACED )
-            PREALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) );
+            PREALLOC( frame->buffer_fld[1], (chroma_plane_size + padh_align) * sizeof(pixel) );
     }
 
     /* all 4 luma planes allocated together, since the cacheline split code
@@ -166,18 +164,12 @@
     {
         int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
         if( h->param.analyse.i_subpel_refine && b_fdec )
-        {
-            /* FIXME: Don't allocate both buffers in non-adaptive MBAFF. */
-            PREALLOC( frame->buffer[p], 4*luma_plane_size * sizeof(pixel) );
-            if( PARAM_INTERLACED )
-                PREALLOC( frame->buffer_fld[p], 4*luma_plane_size * sizeof(pixel) );
-        }
-        else
-        {
-            PREALLOC( frame->buffer[p], luma_plane_size * sizeof(pixel) );
-            if( PARAM_INTERLACED )
-                PREALLOC( frame->buffer_fld[p], luma_plane_size * sizeof(pixel) );
-        }
+            luma_plane_size *= 4;
+
+        /* FIXME: Don't allocate both buffers in non-adaptive MBAFF. */
+        PREALLOC( frame->buffer[p], (luma_plane_size + padh_align) * sizeof(pixel) );
+        if( PARAM_INTERLACED )
+            PREALLOC( frame->buffer_fld[p], (luma_plane_size + padh_align) * sizeof(pixel) );
     }
 
     frame->b_duplicate = 0;
@@ -215,7 +207,7 @@
         {
             int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
 
-            PREALLOC( frame->buffer_lowres[0], 4 * luma_plane_size * sizeof(pixel) );
+            PREALLOC( frame->buffer_lowres, (4 * luma_plane_size + padh_align) * sizeof(pixel) );
 
             for( int j = 0; j <= !!h->param.i_bframe; j++ )
                 for( int i = 0; i <= h->param.i_bframe; i++ )
@@ -245,9 +237,9 @@
     if( i_csp == X264_CSP_NV12 || i_csp == X264_CSP_NV16 )
     {
         int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
-        frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH;
+        frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH + padh_align;
         if( PARAM_INTERLACED )
-            frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH;
+            frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH + padh_align;
     }
 
     for( int p = 0; p < luma_plane_count; p++ )
@@ -257,16 +249,16 @@
         {
             for( int i = 0; i < 4; i++ )
             {
-                frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
-                frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
+                frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH + padh_align;
+                frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH + padh_align;
             }
             frame->plane[p] = frame->filtered[p][0];
             frame->plane_fld[p] = frame->filtered_fld[p][0];
         }
         else
         {
-            frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH;
-            frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH;
+            frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH + padh_align;
+            frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH + padh_align;
         }
     }
 
@@ -284,7 +276,7 @@
         {
             int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
             for( int i = 0; i < 4; i++ )
-                frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * PADV + PADH) + i * luma_plane_size;
+                frame->lowres[i] = frame->buffer_lowres + frame->i_stride_lowres * PADV + PADH + padh_align + i * luma_plane_size;
 
             for( int j = 0; j <= !!h->param.i_bframe; j++ )
                 for( int i = 0; i <= h->param.i_bframe; i++ )
@@ -366,7 +358,7 @@
 int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
 {
     int i_csp = src->img.i_csp & X264_CSP_MASK;
-    if( dst->i_csp != x264_frame_internal_csp( i_csp ) )
+    if( dst->i_csp != frame_internal_csp( i_csp ) )
     {
         x264_log( h, X264_LOG_ERROR, "Invalid input colorspace\n" );
         return -1;
@@ -470,7 +462,7 @@
                                          (pixel*)pix[2], stride[2]/sizeof(pixel),
                                          h->param.i_width>>1, h->param.i_height>>v_shift );
         }
-        else //if( i_csp == X264_CSP_I444 || i_csp == X264_CSP_YV24 )
+        else if( i_csp == X264_CSP_I444 || i_csp == X264_CSP_YV24 )
         {
             get_plane_ptr( h, src, &pix[1], &stride[1], i_csp==X264_CSP_I444 ? 1 : 2, 0, 0 );
             get_plane_ptr( h, src, &pix[2], &stride[2], i_csp==X264_CSP_I444 ? 2 : 1, 0, 0 );
@@ -483,7 +475,7 @@
     return 0;
 }
 
-static void ALWAYS_INLINE pixel_memset( pixel *dst, pixel *src, int len, int size )
+static ALWAYS_INLINE void pixel_memset( pixel *dst, pixel *src, int len, int size )
 {
     uint8_t *dstp = (uint8_t*)dst;
     uint32_t v1 = *src;
@@ -535,7 +527,7 @@
     }
 }
 
-static void ALWAYS_INLINE plane_expand_border( pixel *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom, int b_chroma )
+static ALWAYS_INLINE void plane_expand_border( pixel *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom, int b_chroma )
 {
 #define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
     for( int y = 0; y < i_height; y++ )
@@ -784,7 +776,7 @@
     if( h->frames.unused[b_fdec][0] )
         frame = x264_frame_pop( h->frames.unused[b_fdec] );
     else
-        frame = x264_frame_new( h, b_fdec );
+        frame = frame_new( h, b_fdec );
     if( !frame )
         return NULL;
     frame->b_last_minigop_bframe = 0;
diff -Nru x264-0.152.2854+gite9a5903/common/frame.h x264-0.158.2988+git-20191101.7817004/common/frame.h
--- x264-0.152.2854+gite9a5903/common/frame.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/frame.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * frame.h: frame handling
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -86,7 +86,7 @@
      * allocated data are stored in buffer */
     pixel *buffer[4];
     pixel *buffer_fld[4];
-    pixel *buffer_lowres[4];
+    pixel *buffer_lowres;
 
     x264_weight_t weight[X264_REF_MAX][3]; /* [ref_index][plane] */
     pixel *weighted[X264_REF_MAX]; /* plane[0] weighted of the reference frames */
@@ -216,47 +216,80 @@
                               int bframe );
 } x264_deblock_function_t;
 
+#define x264_frame_delete x264_template(frame_delete)
 void          x264_frame_delete( x264_frame_t *frame );
 
+#define x264_frame_copy_picture x264_template(frame_copy_picture)
 int           x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src );
 
+#define x264_frame_expand_border x264_template(frame_expand_border)
 void          x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y );
+#define x264_frame_expand_border_filtered x264_template(frame_expand_border_filtered)
 void          x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end );
+#define x264_frame_expand_border_lowres x264_template(frame_expand_border_lowres)
 void          x264_frame_expand_border_lowres( x264_frame_t *frame );
+#define x264_frame_expand_border_chroma x264_template(frame_expand_border_chroma)
 void          x264_frame_expand_border_chroma( x264_t *h, x264_frame_t *frame, int plane );
+#define x264_frame_expand_border_mod16 x264_template(frame_expand_border_mod16)
 void          x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame );
+#define x264_expand_border_mbpair x264_template(expand_border_mbpair)
 void          x264_expand_border_mbpair( x264_t *h, int mb_x, int mb_y );
 
+#define x264_frame_deblock_row x264_template(frame_deblock_row)
 void          x264_frame_deblock_row( x264_t *h, int mb_y );
+#define x264_macroblock_deblock x264_template(macroblock_deblock)
 void          x264_macroblock_deblock( x264_t *h );
 
+#define x264_frame_filter x264_template(frame_filter)
 void          x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end );
+#define x264_frame_init_lowres x264_template(frame_init_lowres)
 void          x264_frame_init_lowres( x264_t *h, x264_frame_t *frame );
 
+#define x264_deblock_init x264_template(deblock_init)
 void          x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff );
 
+#define x264_frame_cond_broadcast x264_template(frame_cond_broadcast)
 void          x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed );
+#define x264_frame_cond_wait x264_template(frame_cond_wait)
 void          x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed );
+#define x264_frame_new_slice x264_template(frame_new_slice)
 int           x264_frame_new_slice( x264_t *h, x264_frame_t *frame );
 
+#define x264_threadslice_cond_broadcast x264_template(threadslice_cond_broadcast)
 void          x264_threadslice_cond_broadcast( x264_t *h, int pass );
+#define x264_threadslice_cond_wait x264_template(threadslice_cond_wait)
 void          x264_threadslice_cond_wait( x264_t *h, int pass );
 
-void          x264_frame_push( x264_frame_t **list, x264_frame_t *frame );
-x264_frame_t *x264_frame_pop( x264_frame_t **list );
-void          x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame );
-x264_frame_t *x264_frame_shift( x264_frame_t **list );
+#define x264_frame_push x264_template(frame_push)
+X264_API void          x264_frame_push( x264_frame_t **list, x264_frame_t *frame );
+#define x264_frame_pop x264_template(frame_pop)
+X264_API x264_frame_t *x264_frame_pop( x264_frame_t **list );
+#define x264_frame_unshift x264_template(frame_unshift)
+X264_API void          x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame );
+#define x264_frame_shift x264_template(frame_shift)
+X264_API x264_frame_t *x264_frame_shift( x264_frame_t **list );
+
+#define x264_frame_push_unused x264_template(frame_push_unused)
 void          x264_frame_push_unused( x264_t *h, x264_frame_t *frame );
+#define x264_frame_push_blank_unused x264_template(frame_push_blank_unused)
 void          x264_frame_push_blank_unused( x264_t *h, x264_frame_t *frame );
+#define x264_frame_pop_blank_unused x264_template(frame_pop_blank_unused)
 x264_frame_t *x264_frame_pop_blank_unused( x264_t *h );
+#define x264_weight_scale_plane x264_template(weight_scale_plane)
 void x264_weight_scale_plane( x264_t *h, pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride,
                               int i_width, int i_height, x264_weight_t *w );
+#define x264_frame_pop_unused x264_template(frame_pop_unused)
 x264_frame_t *x264_frame_pop_unused( x264_t *h, int b_fdec );
+#define x264_frame_delete_list x264_template(frame_delete_list)
 void          x264_frame_delete_list( x264_frame_t **list );
 
+#define x264_sync_frame_list_init x264_template(sync_frame_list_init)
 int           x264_sync_frame_list_init( x264_sync_frame_list_t *slist, int nelem );
+#define x264_sync_frame_list_delete x264_template(sync_frame_list_delete)
 void          x264_sync_frame_list_delete( x264_sync_frame_list_t *slist );
+#define x264_sync_frame_list_push x264_template(sync_frame_list_push)
 void          x264_sync_frame_list_push( x264_sync_frame_list_t *slist, x264_frame_t *frame );
+#define x264_sync_frame_list_pop x264_template(sync_frame_list_pop)
 x264_frame_t *x264_sync_frame_list_pop( x264_sync_frame_list_t *slist );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/macroblock.c x264-0.158.2988+git-20191101.7817004/common/macroblock.c
--- x264-0.152.2854+gite9a5903/common/macroblock.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/macroblock.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * macroblock.c: macroblock common functions
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Fiona Glaser <fiona@x264.com>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -34,7 +34,7 @@
                    mvx, mvy, 4*width, 4*height, \
                    list ? x264_weight_none : &h->sh.weight[i_ref][p] );
 
-static NOINLINE void x264_mb_mc_0xywh( x264_t *h, int x, int y, int width, int height )
+static NOINLINE void mb_mc_0xywh( x264_t *h, int x, int y, int width, int height )
 {
     int i8    = x264_scan8[0]+x+8*y;
     int i_ref = h->mb.cache.ref[0][i8];
@@ -48,7 +48,7 @@
         MC_LUMA( 0, 1 );
         MC_LUMA( 0, 2 );
     }
-    else
+    else if( CHROMA_FORMAT )
     {
         int v_shift = CHROMA_V_SHIFT;
         // Chroma in 4:2:0 is offset if MCing from a field of opposite parity
@@ -73,7 +73,7 @@
                                                        &h->sh.weight[i_ref][2], height );
     }
 }
-static NOINLINE void x264_mb_mc_1xywh( x264_t *h, int x, int y, int width, int height )
+static NOINLINE void mb_mc_1xywh( x264_t *h, int x, int y, int width, int height )
 {
     int i8    = x264_scan8[0]+x+8*y;
     int i_ref = h->mb.cache.ref[1][i8];
@@ -87,7 +87,7 @@
         MC_LUMA( 1, 1 );
         MC_LUMA( 1, 2 );
     }
-    else
+    else if( CHROMA_FORMAT )
     {
         int v_shift = CHROMA_V_SHIFT;
         if( v_shift & MB_INTERLACED & i_ref )
@@ -109,7 +109,7 @@
     h->mc.avg[i_mode]( &h->mb.pic.p_fdec[p][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE, \
                        src0, i_stride0, src1, i_stride1, weight );
 
-static NOINLINE void x264_mb_mc_01xywh( x264_t *h, int x, int y, int width, int height )
+static NOINLINE void mb_mc_01xywh( x264_t *h, int x, int y, int width, int height )
 {
     int i8 = x264_scan8[0]+x+8*y;
     int i_ref0 = h->mb.cache.ref[0][i8];
@@ -132,7 +132,7 @@
         MC_LUMA_BI( 1 );
         MC_LUMA_BI( 2 );
     }
-    else
+    else if( CHROMA_FORMAT )
     {
         int v_shift = CHROMA_V_SHIFT;
         if( v_shift & MB_INTERLACED & i_ref0 )
@@ -165,21 +165,21 @@
         switch( h->mb.i_sub_partition[i8] )
         {
             case D_L0_8x8:
-                x264_mb_mc_0xywh( h, x, y, 2, 2 );
+                mb_mc_0xywh( h, x, y, 2, 2 );
                 break;
             case D_L0_8x4:
-                x264_mb_mc_0xywh( h, x, y+0, 2, 1 );
-                x264_mb_mc_0xywh( h, x, y+1, 2, 1 );
+                mb_mc_0xywh( h, x, y+0, 2, 1 );
+                mb_mc_0xywh( h, x, y+1, 2, 1 );
                 break;
             case D_L0_4x8:
-                x264_mb_mc_0xywh( h, x+0, y, 1, 2 );
-                x264_mb_mc_0xywh( h, x+1, y, 1, 2 );
+                mb_mc_0xywh( h, x+0, y, 1, 2 );
+                mb_mc_0xywh( h, x+1, y, 1, 2 );
                 break;
             case D_L0_4x4:
-                x264_mb_mc_0xywh( h, x+0, y+0, 1, 1 );
-                x264_mb_mc_0xywh( h, x+1, y+0, 1, 1 );
-                x264_mb_mc_0xywh( h, x+0, y+1, 1, 1 );
-                x264_mb_mc_0xywh( h, x+1, y+1, 1, 1 );
+                mb_mc_0xywh( h, x+0, y+0, 1, 1 );
+                mb_mc_0xywh( h, x+1, y+0, 1, 1 );
+                mb_mc_0xywh( h, x+0, y+1, 1, 1 );
+                mb_mc_0xywh( h, x+1, y+1, 1, 1 );
                 break;
         }
     }
@@ -189,11 +189,11 @@
 
         if( h->mb.cache.ref[0][scan8] >= 0 )
             if( h->mb.cache.ref[1][scan8] >= 0 )
-                x264_mb_mc_01xywh( h, x, y, 2, 2 );
+                mb_mc_01xywh( h, x, y, 2, 2 );
             else
-                x264_mb_mc_0xywh( h, x, y, 2, 2 );
+                mb_mc_0xywh( h, x, y, 2, 2 );
         else
-            x264_mb_mc_1xywh( h, x, y, 2, 2 );
+            mb_mc_1xywh( h, x, y, 2, 2 );
     }
 }
 
@@ -214,33 +214,33 @@
         if( h->mb.i_partition == D_16x16 )
         {
             if( ref0a >= 0 )
-                if( ref1a >= 0 ) x264_mb_mc_01xywh( h, 0, 0, 4, 4 );
-                else             x264_mb_mc_0xywh ( h, 0, 0, 4, 4 );
-            else                 x264_mb_mc_1xywh ( h, 0, 0, 4, 4 );
+                if( ref1a >= 0 ) mb_mc_01xywh( h, 0, 0, 4, 4 );
+                else             mb_mc_0xywh ( h, 0, 0, 4, 4 );
+            else                 mb_mc_1xywh ( h, 0, 0, 4, 4 );
         }
         else if( h->mb.i_partition == D_16x8 )
         {
             if( ref0a >= 0 )
-                if( ref1a >= 0 ) x264_mb_mc_01xywh( h, 0, 0, 4, 2 );
-                else             x264_mb_mc_0xywh ( h, 0, 0, 4, 2 );
-            else                 x264_mb_mc_1xywh ( h, 0, 0, 4, 2 );
+                if( ref1a >= 0 ) mb_mc_01xywh( h, 0, 0, 4, 2 );
+                else             mb_mc_0xywh ( h, 0, 0, 4, 2 );
+            else                 mb_mc_1xywh ( h, 0, 0, 4, 2 );
 
             if( ref0b >= 0 )
-                if( ref1b >= 0 ) x264_mb_mc_01xywh( h, 0, 2, 4, 2 );
-                else             x264_mb_mc_0xywh ( h, 0, 2, 4, 2 );
-            else                 x264_mb_mc_1xywh ( h, 0, 2, 4, 2 );
+                if( ref1b >= 0 ) mb_mc_01xywh( h, 0, 2, 4, 2 );
+                else             mb_mc_0xywh ( h, 0, 2, 4, 2 );
+            else                 mb_mc_1xywh ( h, 0, 2, 4, 2 );
         }
         else if( h->mb.i_partition == D_8x16 )
         {
             if( ref0a >= 0 )
-                if( ref1a >= 0 ) x264_mb_mc_01xywh( h, 0, 0, 2, 4 );
-                else             x264_mb_mc_0xywh ( h, 0, 0, 2, 4 );
-            else                 x264_mb_mc_1xywh ( h, 0, 0, 2, 4 );
+                if( ref1a >= 0 ) mb_mc_01xywh( h, 0, 0, 2, 4 );
+                else             mb_mc_0xywh ( h, 0, 0, 2, 4 );
+            else                 mb_mc_1xywh ( h, 0, 0, 2, 4 );
 
             if( ref0b >= 0 )
-                if( ref1b >= 0 ) x264_mb_mc_01xywh( h, 2, 0, 2, 4 );
-                else             x264_mb_mc_0xywh ( h, 2, 0, 2, 4 );
-            else                 x264_mb_mc_1xywh ( h, 2, 0, 2, 4 );
+                if( ref1b >= 0 ) mb_mc_01xywh( h, 2, 0, 2, 4 );
+                else             mb_mc_0xywh ( h, 2, 0, 2, 4 );
+            else                 mb_mc_1xywh ( h, 2, 0, 2, 4 );
         }
     }
 }
@@ -531,17 +531,20 @@
      */
     h->mb.pic.p_fenc[0] = h->mb.pic.fenc_buf;
     h->mb.pic.p_fdec[0] = h->mb.pic.fdec_buf + 2*FDEC_STRIDE;
-    h->mb.pic.p_fenc[1] = h->mb.pic.fenc_buf + 16*FENC_STRIDE;
-    h->mb.pic.p_fdec[1] = h->mb.pic.fdec_buf + 20*FDEC_STRIDE;
-    if( CHROMA444 )
-    {
-        h->mb.pic.p_fenc[2] = h->mb.pic.fenc_buf + 32*FENC_STRIDE;
-        h->mb.pic.p_fdec[2] = h->mb.pic.fdec_buf + 38*FDEC_STRIDE;
-    }
-    else
+    if( CHROMA_FORMAT )
     {
-        h->mb.pic.p_fenc[2] = h->mb.pic.fenc_buf + 16*FENC_STRIDE + 8;
-        h->mb.pic.p_fdec[2] = h->mb.pic.fdec_buf + 20*FDEC_STRIDE + 16;
+        h->mb.pic.p_fenc[1] = h->mb.pic.fenc_buf + 16*FENC_STRIDE;
+        h->mb.pic.p_fdec[1] = h->mb.pic.fdec_buf + 20*FDEC_STRIDE;
+        if( CHROMA444 )
+        {
+            h->mb.pic.p_fenc[2] = h->mb.pic.fenc_buf + 32*FENC_STRIDE;
+            h->mb.pic.p_fdec[2] = h->mb.pic.fdec_buf + 38*FDEC_STRIDE;
+        }
+        else
+        {
+            h->mb.pic.p_fenc[2] = h->mb.pic.fenc_buf + 16*FENC_STRIDE + 8;
+            h->mb.pic.p_fdec[2] = h->mb.pic.fdec_buf + 20*FDEC_STRIDE + 16;
+        }
     }
 }
 
@@ -562,7 +565,7 @@
         dst[i*FDEC_STRIDE] = src[i*FDEC_STRIDE];
 }
 
-static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x, int mb_y, int i, int b_chroma, int b_mbaff )
+static ALWAYS_INLINE void macroblock_load_pic_pointers( x264_t *h, int mb_x, int mb_y, int i, int b_chroma, int b_mbaff )
 {
     int mb_interlaced = b_mbaff && MB_INTERLACED;
     int height = b_chroma ? 16 >> CHROMA_V_SHIFT : 16;
@@ -666,7 +669,7 @@
     {{ 4, 5, 6, 3}, { 3,  7, 11, 15}, {16+1, 16+5, 32+1, 32+5}, {0, 1, 2, 3}, {0, 0, 1, 1}}
 };
 
-static void ALWAYS_INLINE x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, int mb_y, int b_interlaced )
+static ALWAYS_INLINE void macroblock_cache_load_neighbours( x264_t *h, int mb_x, int mb_y, int b_interlaced )
 {
     const int mb_interlaced = b_interlaced && MB_INTERLACED;
     int top_y = mb_y - (1 << mb_interlaced);
@@ -848,9 +851,9 @@
 #   define LBOT 0
 #endif
 
-static void ALWAYS_INLINE x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y, int b_mbaff )
+static ALWAYS_INLINE void macroblock_cache_load( x264_t *h, int mb_x, int mb_y, int b_mbaff )
 {
-    x264_macroblock_cache_load_neighbours( h, mb_x, mb_y, b_mbaff );
+    macroblock_cache_load_neighbours( h, mb_x, mb_y, b_mbaff );
 
     int *left = h->mb.i_mb_left_xy;
     int top  = h->mb.i_mb_top_xy;
@@ -996,17 +999,17 @@
     {
         x264_copy_column8( h->mb.pic.p_fdec[0]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+ 4*FDEC_STRIDE );
         x264_copy_column8( h->mb.pic.p_fdec[0]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+12*FDEC_STRIDE );
-        x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 0, 0, 0 );
+        macroblock_load_pic_pointers( h, mb_x, mb_y, 0, 0, 0 );
         if( CHROMA444 )
         {
             x264_copy_column8( h->mb.pic.p_fdec[1]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[1]+15+ 4*FDEC_STRIDE );
             x264_copy_column8( h->mb.pic.p_fdec[1]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[1]+15+12*FDEC_STRIDE );
             x264_copy_column8( h->mb.pic.p_fdec[2]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[2]+15+ 4*FDEC_STRIDE );
             x264_copy_column8( h->mb.pic.p_fdec[2]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[2]+15+12*FDEC_STRIDE );
-            x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 0, 0 );
-            x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 2, 0, 0 );
+            macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 0, 0 );
+            macroblock_load_pic_pointers( h, mb_x, mb_y, 2, 0, 0 );
         }
-        else
+        else if( CHROMA_FORMAT )
         {
             x264_copy_column8( h->mb.pic.p_fdec[1]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[1]+ 7+ 4*FDEC_STRIDE );
             x264_copy_column8( h->mb.pic.p_fdec[2]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[2]+ 7+ 4*FDEC_STRIDE );
@@ -1015,19 +1018,19 @@
                 x264_copy_column8( h->mb.pic.p_fdec[1]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[1]+ 7+12*FDEC_STRIDE );
                 x264_copy_column8( h->mb.pic.p_fdec[2]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[2]+ 7+12*FDEC_STRIDE );
             }
-            x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 1, 0 );
+            macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 1, 0 );
         }
     }
     else
     {
-        x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 0, 0, 1 );
+        macroblock_load_pic_pointers( h, mb_x, mb_y, 0, 0, 1 );
         if( CHROMA444 )
         {
-            x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 0, 1 );
-            x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 2, 0, 1 );
+            macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 0, 1 );
+            macroblock_load_pic_pointers( h, mb_x, mb_y, 2, 0, 1 );
         }
-        else
-            x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 1, 1 );
+        else if( CHROMA_FORMAT )
+            macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 1, 1 );
     }
 
     if( h->fdec->integral )
@@ -1348,15 +1351,15 @@
 
 void x264_macroblock_cache_load_progressive( x264_t *h, int mb_x, int mb_y )
 {
-    x264_macroblock_cache_load( h, mb_x, mb_y, 0 );
+    macroblock_cache_load( h, mb_x, mb_y, 0 );
 }
 
 void x264_macroblock_cache_load_interlaced( x264_t *h, int mb_x, int mb_y )
 {
-    x264_macroblock_cache_load( h, mb_x, mb_y, 1 );
+    macroblock_cache_load( h, mb_x, mb_y, 1 );
 }
 
-static void x264_macroblock_deblock_strength_mbaff( x264_t *h, uint8_t (*bs)[8][4] )
+static void macroblock_deblock_strength_mbaff( x264_t *h, uint8_t (*bs)[8][4] )
 {
     if( (h->mb.i_neighbour & MB_LEFT) && h->mb.field[h->mb.i_mb_left_xy[0]] != MB_INTERLACED )
     {
@@ -1613,10 +1616,10 @@
                                bs, 4 >> MB_INTERLACED, h->sh.i_type == SLICE_TYPE_B );
 
     if( SLICE_MBAFF )
-        x264_macroblock_deblock_strength_mbaff( h, bs );
+        macroblock_deblock_strength_mbaff( h, bs );
 }
 
-static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb_y, int i, int b_chroma, int b_mbaff )
+static ALWAYS_INLINE void macroblock_store_pic( x264_t *h, int mb_x, int mb_y, int i, int b_chroma, int b_mbaff )
 {
     int height = b_chroma ? 16>>CHROMA_V_SHIFT : 16;
     int i_stride = h->fdec->i_stride[i];
@@ -1630,7 +1633,7 @@
         h->mc.copy[PIXEL_16x16]( &h->fdec->plane[i][i_pix_offset], i_stride2, h->mb.pic.p_fdec[i], FDEC_STRIDE, 16 );
 }
 
-static void ALWAYS_INLINE x264_macroblock_backup_intra( x264_t *h, int mb_x, int mb_y, int b_mbaff )
+static ALWAYS_INLINE void macroblock_backup_intra( x264_t *h, int mb_x, int mb_y, int b_mbaff )
 {
     /* In MBAFF we store the last two rows in intra_border_backup[0] and [1].
      * For progressive mbs this is the bottom two rows, and for interlaced the
@@ -1643,7 +1646,7 @@
         memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16  ], h->mb.pic.p_fdec[1]+FDEC_STRIDE*15, 16*sizeof(pixel) );
         memcpy( &h->intra_border_backup[backup_dst][2][mb_x*16  ], h->mb.pic.p_fdec[2]+FDEC_STRIDE*15, 16*sizeof(pixel) );
     }
-    else
+    else if( CHROMA_FORMAT )
     {
         int backup_src = (15>>CHROMA_V_SHIFT) * FDEC_STRIDE;
         memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16  ], h->mb.pic.p_fdec[1]+backup_src, 8*sizeof(pixel) );
@@ -1661,7 +1664,7 @@
                 memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16  ], h->mb.pic.p_fdec[1]+backup_src, 16*sizeof(pixel) );
                 memcpy( &h->intra_border_backup[backup_dst][2][mb_x*16  ], h->mb.pic.p_fdec[2]+backup_src, 16*sizeof(pixel) );
             }
-            else
+            else if( CHROMA_FORMAT )
             {
                 if( CHROMA_FORMAT == CHROMA_420 )
                     backup_src = (MB_INTERLACED ? 3 : 6) * FDEC_STRIDE;
@@ -1688,27 +1691,27 @@
 
     if( SLICE_MBAFF )
     {
-        x264_macroblock_backup_intra( h, h->mb.i_mb_x, h->mb.i_mb_y, 1 );
-        x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 0, 0, 1 );
+        macroblock_backup_intra( h, h->mb.i_mb_x, h->mb.i_mb_y, 1 );
+        macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 0, 0, 1 );
         if( CHROMA444 )
         {
-            x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 0, 1 );
-            x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 2, 0, 1 );
+            macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 0, 1 );
+            macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 2, 0, 1 );
         }
-        else
-            x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 1, 1 );
+        else if( CHROMA_FORMAT )
+            macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 1, 1 );
     }
     else
     {
-        x264_macroblock_backup_intra( h, h->mb.i_mb_x, h->mb.i_mb_y, 0 );
-        x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 0, 0, 0 );
+        macroblock_backup_intra( h, h->mb.i_mb_x, h->mb.i_mb_y, 0 );
+        macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 0, 0, 0 );
         if( CHROMA444 )
         {
-            x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 0, 0 );
-            x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 2, 0, 0 );
+            macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 0, 0 );
+            macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 2, 0, 0 );
         }
-        else
-            x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 1, 0 );
+        else if( CHROMA_FORMAT )
+            macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 1, 0 );
     }
 
     x264_prefetch_fenc( h, h->fdec, h->mb.i_mb_x, h->mb.i_mb_y );
diff -Nru x264-0.152.2854+gite9a5903/common/macroblock.h x264-0.158.2988+git-20191101.7817004/common/macroblock.h
--- x264-0.152.2854+gite9a5903/common/macroblock.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/macroblock.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * macroblock.h: macroblock common functions
  *****************************************************************************
- * Copyright (C) 2005-2017 x264 project
+ * Copyright (C) 2005-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -299,38 +299,54 @@
 };
 
 /* Per-frame allocation: is allocated per-thread only in frame-threads mode. */
+#define x264_macroblock_cache_allocate x264_template(macroblock_cache_allocate)
 int  x264_macroblock_cache_allocate( x264_t *h );
+#define x264_macroblock_cache_free x264_template(macroblock_cache_free)
 void x264_macroblock_cache_free( x264_t *h );
 
 /* Per-thread allocation: is allocated per-thread even in sliced-threads mode. */
+#define x264_macroblock_thread_allocate x264_template(macroblock_thread_allocate)
 int  x264_macroblock_thread_allocate( x264_t *h, int b_lookahead );
+#define x264_macroblock_thread_free x264_template(macroblock_thread_free)
 void x264_macroblock_thread_free( x264_t *h, int b_lookahead );
 
+#define x264_macroblock_slice_init x264_template(macroblock_slice_init)
 void x264_macroblock_slice_init( x264_t *h );
+#define x264_macroblock_thread_init x264_template(macroblock_thread_init)
 void x264_macroblock_thread_init( x264_t *h );
+#define x264_macroblock_cache_load_interlaced x264_template(macroblock_cache_load_interlaced)
 void x264_macroblock_cache_load_progressive( x264_t *h, int mb_x, int mb_y );
+#define x264_macroblock_cache_load_progressive x264_template(macroblock_cache_load_progressive)
 void x264_macroblock_cache_load_interlaced( x264_t *h, int mb_x, int mb_y );
+#define x264_macroblock_deblock_strength x264_template(macroblock_deblock_strength)
 void x264_macroblock_deblock_strength( x264_t *h );
+#define x264_macroblock_cache_save x264_template(macroblock_cache_save)
 void x264_macroblock_cache_save( x264_t *h );
 
+#define x264_macroblock_bipred_init x264_template(macroblock_bipred_init)
 void x264_macroblock_bipred_init( x264_t *h );
 
+#define x264_prefetch_fenc x264_template(prefetch_fenc)
 void x264_prefetch_fenc( x264_t *h, x264_frame_t *fenc, int i_mb_x, int i_mb_y );
 
+#define x264_copy_column8 x264_template(copy_column8)
 void x264_copy_column8( pixel *dst, pixel *src );
 
 /* x264_mb_predict_mv_16x16:
  *      set mvp with predicted mv for D_16x16 block
  *      h->mb. need only valid values from other blocks */
+#define x264_mb_predict_mv_16x16 x264_template(mb_predict_mv_16x16)
 void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int16_t mvp[2] );
 /* x264_mb_predict_mv_pskip:
  *      set mvp with predicted mv for P_SKIP
  *      h->mb. need only valid values from other blocks */
+#define x264_mb_predict_mv_pskip x264_template(mb_predict_mv_pskip)
 void x264_mb_predict_mv_pskip( x264_t *h, int16_t mv[2] );
 /* x264_mb_predict_mv:
  *      set mvp with predicted mv for all blocks except SKIP and DIRECT
  *      h->mb. need valid ref/partition/sub of current block to be valid
  *      and valid mv/ref from other blocks. */
+#define x264_mb_predict_mv x264_template(mb_predict_mv)
 void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int16_t mvp[2] );
 /* x264_mb_predict_mv_direct16x16:
  *      set h->mb.cache.mv and h->mb.cache.ref for B_SKIP or B_DIRECT
@@ -338,14 +354,18 @@
  *      return 1 on success, 0 on failure.
  *      if b_changed != NULL, set it to whether refs or mvs differ from
  *      before this functioncall. */
+#define x264_mb_predict_mv_direct16x16 x264_template(mb_predict_mv_direct16x16)
 int x264_mb_predict_mv_direct16x16( x264_t *h, int *b_changed );
 /* x264_mb_predict_mv_ref16x16:
  *      set mvc with D_16x16 prediction.
  *      uses all neighbors, even those that didn't end up using this ref.
  *      h->mb. need only valid values from other blocks */
+#define x264_mb_predict_mv_ref16x16 x264_template(mb_predict_mv_ref16x16)
 void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[8][2], int *i_mvc );
 
+#define x264_mb_mc x264_template(mb_mc)
 void x264_mb_mc( x264_t *h );
+#define x264_mb_mc_8x8 x264_template(mb_mc_8x8)
 void x264_mb_mc_8x8( x264_t *h, int i8 );
 
 static ALWAYS_INLINE uint32_t pack16to32( uint32_t a, uint32_t b )
@@ -441,4 +461,3 @@
 }
 
 #endif
-
diff -Nru x264-0.152.2854+gite9a5903/common/mc.c x264-0.158.2988+git-20191101.7817004/common/mc.c
--- x264-0.152.2854+gite9a5903/common/mc.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/mc.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.c: motion compensation
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -29,16 +29,16 @@
 #if HAVE_MMX
 #include "x86/mc.h"
 #endif
-#if ARCH_PPC
+#if HAVE_ALTIVEC
 #include "ppc/mc.h"
 #endif
-#if ARCH_ARM
+#if HAVE_ARMV6
 #include "arm/mc.h"
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
 #include "aarch64/mc.h"
 #endif
-#if ARCH_MIPS
+#if HAVE_MSA
 #include "mips/mc.h"
 #endif
 
@@ -107,7 +107,7 @@
 PIXEL_AVG_C( pixel_avg_2x4,   2, 4 )
 PIXEL_AVG_C( pixel_avg_2x2,   2, 2 )
 
-static void x264_weight_cache( x264_t *h, x264_weight_t *w )
+static void weight_cache( x264_t *h, x264_weight_t *w )
 {
     w->weightfn = h->mc.weight;
 }
@@ -134,7 +134,7 @@
 }
 
 #define MC_WEIGHT_C( name, width ) \
-    static void name( pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride, const x264_weight_t *weight, int height ) \
+static void name( pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride, const x264_weight_t *weight, int height ) \
 { \
     mc_weight( dst, i_dst_stride, src, i_src_stride, weight, width, height );\
 }
@@ -146,7 +146,7 @@
 MC_WEIGHT_C( mc_weight_w4,   4 )
 MC_WEIGHT_C( mc_weight_w2,   2 )
 
-static weight_fn_t x264_mc_weight_wtab[6] =
+static weight_fn_t mc_weight_wtab[6] =
 {
     mc_weight_w2,
     mc_weight_w4,
@@ -155,7 +155,7 @@
     mc_weight_w16,
     mc_weight_w20,
 };
-const x264_weight_t x264_weight_none[3] = { {{0}} };
+
 static void mc_copy( pixel *src, intptr_t i_src_stride, pixel *dst, intptr_t i_dst_stride, int i_width, int i_height )
 {
     for( int y = 0; y < i_height; y++ )
@@ -192,9 +192,6 @@
     }
 }
 
-const uint8_t x264_hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
-const uint8_t x264_hpel_ref1[16] = {0,0,1,0,2,2,3,2,2,2,3,2,2,2,3,2};
-
 static void mc_luma( pixel *dst,    intptr_t i_dst_stride,
                      pixel *src[4], intptr_t i_src_stride,
                      int mvx, int mvy,
@@ -336,10 +333,10 @@
         }
 }
 
-static void x264_plane_copy_deinterleave_rgb_c( pixel *dsta, intptr_t i_dsta,
-                                                pixel *dstb, intptr_t i_dstb,
-                                                pixel *dstc, intptr_t i_dstc,
-                                                pixel *src,  intptr_t i_src, int pw, int w, int h )
+static void plane_copy_deinterleave_rgb_c( pixel *dsta, intptr_t i_dsta,
+                                           pixel *dstb, intptr_t i_dstb,
+                                           pixel *dstc, intptr_t i_dstc,
+                                           pixel *src,  intptr_t i_src, int pw, int w, int h )
 {
     for( int y=0; y<h; y++, dsta+=i_dsta, dstb+=i_dstb, dstc+=i_dstc, src+=i_src )
     {
@@ -361,9 +358,9 @@
 #define v210_endian_fix32(x) (x)
 #endif
 
-static void x264_plane_copy_deinterleave_v210_c( pixel *dsty, intptr_t i_dsty,
-                                                 pixel *dstc, intptr_t i_dstc,
-                                                 uint32_t *src, intptr_t i_src, int w, int h )
+static void plane_copy_deinterleave_v210_c( pixel *dsty, intptr_t i_dsty,
+                                            pixel *dstc, intptr_t i_dstc,
+                                            uint32_t *src, intptr_t i_src, int w, int h )
 {
     for( int l = 0; l < h; l++ )
     {
@@ -630,10 +627,10 @@
     pf->avg[PIXEL_2x4]  = pixel_avg_2x4;
     pf->avg[PIXEL_2x2]  = pixel_avg_2x2;
 
-    pf->weight    = x264_mc_weight_wtab;
-    pf->offsetadd = x264_mc_weight_wtab;
-    pf->offsetsub = x264_mc_weight_wtab;
-    pf->weight_cache = x264_weight_cache;
+    pf->weight    = mc_weight_wtab;
+    pf->offsetadd = mc_weight_wtab;
+    pf->offsetsub = mc_weight_wtab;
+    pf->weight_cache = weight_cache;
 
     pf->copy_16x16_unaligned = mc_copy_w16;
     pf->copy[PIXEL_16x16] = mc_copy_w16;
@@ -647,13 +644,15 @@
     pf->plane_copy = x264_plane_copy_c;
     pf->plane_copy_swap = x264_plane_copy_swap_c;
     pf->plane_copy_interleave = x264_plane_copy_interleave_c;
+
     pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_c;
     pf->plane_copy_deinterleave_yuyv = x264_plane_copy_deinterleave_c;
-    pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_c;
-    pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_c;
+    pf->plane_copy_deinterleave_rgb = plane_copy_deinterleave_rgb_c;
+    pf->plane_copy_deinterleave_v210 = plane_copy_deinterleave_v210_c;
 
     pf->hpel_filter = hpel_filter;
 
+    pf->prefetch_fenc_400 = prefetch_fenc_null;
     pf->prefetch_fenc_420 = prefetch_fenc_null;
     pf->prefetch_fenc_422 = prefetch_fenc_null;
     pf->prefetch_ref  = prefetch_ref_null;
@@ -681,7 +680,7 @@
 #if HAVE_ARMV6
     x264_mc_init_arm( cpu, pf );
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
     x264_mc_init_aarch64( cpu, pf );
 #endif
 #if HAVE_MSA
diff -Nru x264-0.152.2854+gite9a5903/common/mc.h x264-0.158.2988+git-20191101.7817004/common/mc.h
--- x264-0.152.2854+gite9a5903/common/mc.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/mc.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.h: motion compensation
  *****************************************************************************
- * Copyright (C) 2004-2017 x264 project
+ * Copyright (C) 2004-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *
@@ -34,14 +34,15 @@
     MC_CLIP_ADD((s)[1], (x)[1]);\
 } while( 0 )
 
+#define x264_mbtree_propagate_list_internal_neon x264_template(mbtree_propagate_list_internal_neon)
 #define PROPAGATE_LIST(cpu)\
 void x264_mbtree_propagate_list_internal_##cpu( int16_t (*mvs)[2], int16_t *propagate_amount,\
                                                 uint16_t *lowres_costs, int16_t *output,\
                                                 int bipred_weight, int mb_y, int len );\
 \
-static void x264_mbtree_propagate_list_##cpu( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],\
-                                              int16_t *propagate_amount, uint16_t *lowres_costs,\
-                                              int bipred_weight, int mb_y, int len, int list )\
+static void mbtree_propagate_list_##cpu( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],\
+                                         int16_t *propagate_amount, uint16_t *lowres_costs,\
+                                         int bipred_weight, int mb_y, int len, int list )\
 {\
     int16_t *current = h->scratch_buffer2;\
 \
@@ -100,10 +101,11 @@
     }\
 }
 
+#define x264_plane_copy_c x264_template(plane_copy_c)
 void x264_plane_copy_c( pixel *, intptr_t, pixel *, intptr_t, int w, int h );
 
 #define PLANE_COPY(align, cpu)\
-static void x264_plane_copy_##cpu( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )\
+static void plane_copy_##cpu( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )\
 {\
     int c_w = (align) / sizeof(pixel) - 1;\
     if( w < 256 ) /* tiny resolutions don't want non-temporal hints. dunno the exact threshold. */\
@@ -128,10 +130,11 @@
     }\
 }
 
+#define x264_plane_copy_swap_c x264_template(plane_copy_swap_c)
 void x264_plane_copy_swap_c( pixel *, intptr_t, pixel *, intptr_t, int w, int h );
 
 #define PLANE_COPY_SWAP(align, cpu)\
-static void x264_plane_copy_swap_##cpu( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )\
+static void plane_copy_swap_##cpu( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )\
 {\
     int c_w = (align>>1) / sizeof(pixel) - 1;\
     if( !(w&c_w) )\
@@ -160,14 +163,15 @@
         x264_plane_copy_swap_c( dst, i_dst, src, i_src, w, h );\
 }
 
+#define x264_plane_copy_deinterleave_c x264_template(plane_copy_deinterleave_c)
 void x264_plane_copy_deinterleave_c( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb,
                                      pixel *src, intptr_t i_src, int w, int h );
 
 /* We can utilize existing plane_copy_deinterleave() functions for YUYV/UYUV
  * input with the additional constraint that we cannot overread src. */
 #define PLANE_COPY_YUYV(align, cpu)\
-static void x264_plane_copy_deinterleave_yuyv_##cpu( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb,\
-                                                     pixel *src, intptr_t i_src, int w, int h )\
+static void plane_copy_deinterleave_yuyv_##cpu( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb,\
+                                                pixel *src, intptr_t i_src, int w, int h )\
 {\
     int c_w = (align>>1) / sizeof(pixel) - 1;\
     if( !(w&c_w) )\
@@ -193,14 +197,15 @@
         x264_plane_copy_deinterleave_c( dsta, i_dsta, dstb, i_dstb, src, i_src, w, h );\
 }
 
+#define x264_plane_copy_interleave_c x264_template(plane_copy_interleave_c)
 void x264_plane_copy_interleave_c( pixel *dst,  intptr_t i_dst,
                                    pixel *srcu, intptr_t i_srcu,
                                    pixel *srcv, intptr_t i_srcv, int w, int h );
 
 #define PLANE_INTERLEAVE(cpu) \
-static void x264_plane_copy_interleave_##cpu( pixel *dst,  intptr_t i_dst,\
-                                              pixel *srcu, intptr_t i_srcu,\
-                                              pixel *srcv, intptr_t i_srcv, int w, int h )\
+static void plane_copy_interleave_##cpu( pixel *dst,  intptr_t i_dst,\
+                                         pixel *srcu, intptr_t i_srcu,\
+                                         pixel *srcv, intptr_t i_srcv, int w, int h )\
 {\
     int c_w = 16 / sizeof(pixel) - 1;\
     if( !(w&c_w) )\
@@ -239,9 +244,7 @@
     weight_fn_t *weightfn;
 } ALIGNED_16( x264_weight_t );
 
-extern const x264_weight_t x264_weight_none[3];
-extern const uint8_t x264_hpel_ref0[16];
-extern const uint8_t x264_hpel_ref1[16];
+#define x264_weight_none ((const x264_weight_t*)x264_zero)
 
 #define SET_WEIGHT( w, b, s, d, o )\
 {\
@@ -305,6 +308,7 @@
 
     /* prefetch the next few macroblocks of fenc or fdec */
     void (*prefetch_fenc)    ( pixel *pix_y, intptr_t stride_y, pixel *pix_uv, intptr_t stride_uv, int mb_x );
+    void (*prefetch_fenc_400)( pixel *pix_y, intptr_t stride_y, pixel *pix_uv, intptr_t stride_uv, int mb_x );
     void (*prefetch_fenc_420)( pixel *pix_y, intptr_t stride_y, pixel *pix_uv, intptr_t stride_uv, int mb_x );
     void (*prefetch_fenc_422)( pixel *pix_y, intptr_t stride_y, pixel *pix_uv, intptr_t stride_uv, int mb_x );
     /* prefetch the next few macroblocks of a hpel reference frame */
@@ -335,6 +339,7 @@
     void (*mbtree_fix8_unpack)( float *dst, uint16_t *src, int count );
 } x264_mc_functions_t;
 
+#define x264_mc_init x264_template(mc_init)
 void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/mips/dct-c.c x264-0.158.2988+git-20191101.7817004/common/mips/dct-c.c
--- x264-0.152.2854+gite9a5903/common/mips/dct-c.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/mips/dct-c.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * dct-c.c: msa transform and zigzag
  *****************************************************************************
- * Copyright (C) 2015-2017 x264 project
+ * Copyright (C) 2015-2019 x264 project
  *
  * Authors: Rishikesh More <rishikesh.more@imgtec.com>
  *
@@ -25,6 +25,7 @@
 
 #include "common/common.h"
 #include "macros.h"
+#include "dct.h"
 
 #if !HIGH_BIT_DEPTH
 #define AVC_ITRANS_H( in0, in1, in2, in3, out0, out1, out2, out3 )          \
diff -Nru x264-0.152.2854+gite9a5903/common/mips/dct.h x264-0.158.2988+git-20191101.7817004/common/mips/dct.h
--- x264-0.152.2854+gite9a5903/common/mips/dct.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/mips/dct.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * dct.h: msa transform and zigzag
  *****************************************************************************
- * Copyright (C) 2015-2017 x264 project
+ * Copyright (C) 2015-2019 x264 project
  *
  * Authors: Rishikesh More <rishikesh.more@imgtec.com>
  *
@@ -26,24 +26,39 @@
 #ifndef X264_MIPS_DCT_H
 #define X264_MIPS_DCT_H
 
+#define x264_dct4x4dc_msa x264_template(dct4x4dc_msa)
 void x264_dct4x4dc_msa( int16_t d[16] );
+#define x264_idct4x4dc_msa x264_template(idct4x4dc_msa)
 void x264_idct4x4dc_msa( int16_t d[16] );
+#define x264_add4x4_idct_msa x264_template(add4x4_idct_msa)
 void x264_add4x4_idct_msa( uint8_t *p_dst, int16_t pi_dct[16] );
+#define x264_add8x8_idct_msa x264_template(add8x8_idct_msa)
 void x264_add8x8_idct_msa( uint8_t *p_dst, int16_t pi_dct[4][16] );
+#define x264_add16x16_idct_msa x264_template(add16x16_idct_msa)
 void x264_add16x16_idct_msa( uint8_t *p_dst, int16_t pi_dct[16][16] );
+#define x264_add8x8_idct8_msa x264_template(add8x8_idct8_msa)
 void x264_add8x8_idct8_msa( uint8_t *p_dst, int16_t pi_dct[64] );
+#define x264_add16x16_idct8_msa x264_template(add16x16_idct8_msa)
 void x264_add16x16_idct8_msa( uint8_t *p_dst, int16_t pi_dct[4][64] );
+#define x264_add8x8_idct_dc_msa x264_template(add8x8_idct_dc_msa)
 void x264_add8x8_idct_dc_msa( uint8_t *p_dst, int16_t pi_dct[4] );
+#define x264_add16x16_idct_dc_msa x264_template(add16x16_idct_dc_msa)
 void x264_add16x16_idct_dc_msa( uint8_t *p_dst, int16_t pi_dct[16] );
+#define x264_sub4x4_dct_msa x264_template(sub4x4_dct_msa)
 void x264_sub4x4_dct_msa( int16_t p_dst[16], uint8_t *p_src, uint8_t *p_ref );
+#define x264_sub8x8_dct_msa x264_template(sub8x8_dct_msa)
 void x264_sub8x8_dct_msa( int16_t p_dst[4][16], uint8_t *p_src,
                           uint8_t *p_ref );
+#define x264_sub16x16_dct_msa x264_template(sub16x16_dct_msa)
 void x264_sub16x16_dct_msa( int16_t p_dst[16][16], uint8_t *p_src,
                             uint8_t *p_ref );
+#define x264_sub8x8_dct_dc_msa x264_template(sub8x8_dct_dc_msa)
 void x264_sub8x8_dct_dc_msa( int16_t pi_dct[4], uint8_t *p_pix1,
                              uint8_t *p_pix2 );
+#define x264_sub8x16_dct_dc_msa x264_template(sub8x16_dct_dc_msa)
 void x264_sub8x16_dct_dc_msa( int16_t pi_dct[8], uint8_t *p_pix1,
                               uint8_t *p_pix2 );
+#define x264_zigzag_scan_4x4_frame_msa x264_template(zigzag_scan_4x4_frame_msa)
 void x264_zigzag_scan_4x4_frame_msa( int16_t pi_level[16], int16_t pi_dct[16] );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/mips/deblock-c.c x264-0.158.2988+git-20191101.7817004/common/mips/deblock-c.c
--- x264-0.152.2854+gite9a5903/common/mips/deblock-c.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/mips/deblock-c.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * deblock-c.c: msa deblocking
  *****************************************************************************
- * Copyright (C) 2015-2017 x264 project
+ * Copyright (C) 2015-2019 x264 project
  *
  * Authors: Neha Rana <neha.rana@imgtec.com>
  *
@@ -25,6 +25,7 @@
 
 #include "common/common.h"
 #include "macros.h"
+#include "deblock.h"
 
 #if !HIGH_BIT_DEPTH
 #define AVC_LPF_P0P1P2_OR_Q0Q1Q2( p3_or_q3_org_in, p0_or_q0_org_in,           \
diff -Nru x264-0.152.2854+gite9a5903/common/mips/deblock.h x264-0.158.2988+git-20191101.7817004/common/mips/deblock.h
--- x264-0.152.2854+gite9a5903/common/mips/deblock.h	1970-01-01 00:00:00.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/mips/deblock.h	2019-11-09 05:16:29.000000000 +0000
@@ -0,0 +1,52 @@
+/*****************************************************************************
+ * deblock.h: msa deblocking
+ *****************************************************************************
+ * Copyright (C) 2017-2019 x264 project
+ *
+ * Authors: Anton Mitrofanov <BugMaster@narod.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#ifndef X264_MIPS_DEBLOCK_H
+#define X264_MIPS_DEBLOCK_H
+
+#if !HIGH_BIT_DEPTH
+#define x264_deblock_v_luma_msa x264_template(deblock_v_luma_msa)
+void x264_deblock_v_luma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_luma_msa x264_template(deblock_h_luma_msa)
+void x264_deblock_h_luma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_v_chroma_msa x264_template(deblock_v_chroma_msa)
+void x264_deblock_v_chroma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_chroma_msa x264_template(deblock_h_chroma_msa)
+void x264_deblock_h_chroma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_v_luma_intra_msa x264_template(deblock_v_luma_intra_msa)
+void x264_deblock_v_luma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_h_luma_intra_msa x264_template(deblock_h_luma_intra_msa)
+void x264_deblock_h_luma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_v_chroma_intra_msa x264_template(deblock_v_chroma_intra_msa)
+void x264_deblock_v_chroma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_h_chroma_intra_msa x264_template(deblock_h_chroma_intra_msa)
+void x264_deblock_h_chroma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_strength_msa x264_template(deblock_strength_msa)
+void x264_deblock_strength_msa( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
+                                int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int mvy_limit,
+                                int bframe );
+#endif
+
+#endif
diff -Nru x264-0.152.2854+gite9a5903/common/mips/macros.h x264-0.158.2988+git-20191101.7817004/common/mips/macros.h
--- x264-0.152.2854+gite9a5903/common/mips/macros.h	2017-12-31 12:50:50.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/mips/macros.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * macros.h: msa macros
  *****************************************************************************
- * Copyright (C) 2015-2017 x264 project
+ * Copyright (C) 2015-2019 x264 project
  *
  * Authors: Rishikesh More <rishikesh.more@imgtec.com>
  *
diff -Nru x264-0.152.2854+gite9a5903/common/mips/mc-c.c x264-0.158.2988+git-20191101.7817004/common/mips/mc-c.c
--- x264-0.152.2854+gite9a5903/common/mips/mc-c.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/mips/mc-c.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc-c.c: msa motion compensation
  *****************************************************************************
- * Copyright (C) 2015-2017 x264 project
+ * Copyright (C) 2015-2019 x264 project
  *
  * Authors: Neha Rana <neha.rana@imgtec.com>
  *
@@ -51,129 +51,6 @@
     0, 1, 1, 2, 16, 17, 17, 18, 16, 17, 17, 18, 18, 19, 19, 20
 };
 
-void x264_mc_copy_w16_msa( uint8_t *p_dst, intptr_t i_dst_stride,
-                           uint8_t *p_src, intptr_t i_src_stride,
-                           int32_t i_height );
-void x264_mc_copy_w8_msa( uint8_t *p_dst, intptr_t i_dst_stride,
-                          uint8_t *p_src, intptr_t i_src_stride,
-                          int32_t i_height );
-void x264_mc_copy_w4_msa( uint8_t *p_dst, intptr_t i_dst_stride, uint8_t *p_src,
-                          intptr_t i_src_stride, int32_t i_height );
-void x264_memzero_aligned_msa( void *p_dst, size_t n );
-
-void x264_pixel_avg_16x16_msa( uint8_t *p_pix1, intptr_t i_pix1_stride,
-                               uint8_t *p_pix2, intptr_t i_pix2_stride,
-                               uint8_t *p_pix3, intptr_t i_pix3_stride,
-                               int32_t i_weight );
-void x264_pixel_avg_16x8_msa( uint8_t *p_pix1, intptr_t i_pix1_stride,
-                              uint8_t *p_pix2, intptr_t i_pix2_stride,
-                              uint8_t *p_pix3, intptr_t i_pix3_stride,
-                              int32_t i_weight );
-void x264_pixel_avg_8x16_msa( uint8_t *p_pix1, intptr_t i_pix1_stride,
-                              uint8_t *p_pix2, intptr_t i_pix2_stride,
-                              uint8_t *p_pix3, intptr_t i_pix3_stride,
-                              int32_t i_weight );
-void x264_pixel_avg_8x8_msa( uint8_t *p_pix1, intptr_t i_pix1_stride,
-                             uint8_t *p_pix2, intptr_t i_pix2_stride,
-                             uint8_t *p_pix3, intptr_t i_pix3_stride,
-                             int32_t i_weight );
-void x264_pixel_avg_8x4_msa( uint8_t *p_pix1, intptr_t i_pix1_stride,
-                             uint8_t *p_pix2, intptr_t i_pix2_stride,
-                             uint8_t *p_pix3, intptr_t i_pix3_stride,
-                             int32_t i_weight );
-void x264_pixel_avg_4x16_msa( uint8_t *p_pix1, intptr_t pix1_stride,
-                              uint8_t *p_pix2, intptr_t pix2_stride,
-                              uint8_t *p_pix3, intptr_t pix3_stride,
-                              int32_t i_weight );
-void x264_pixel_avg_4x8_msa( uint8_t *p_pix1, intptr_t i_pix1_stride,
-                             uint8_t *p_pix2, intptr_t i_pix2_stride,
-                             uint8_t *p_pix3, intptr_t i_pix3_stride,
-                             int32_t i_weight );
-void x264_pixel_avg_4x4_msa( uint8_t *p_pix1, intptr_t i_pix1_stride,
-                             uint8_t *p_pix2, intptr_t i_pix2_stride,
-                             uint8_t *p_pix3, intptr_t i_pix3_stride,
-                             int32_t i_weight );
-void x264_pixel_avg_4x2_msa( uint8_t *p_pix1, intptr_t i_pix1_stride,
-                             uint8_t *p_pix2, intptr_t i_pix2_stride,
-                             uint8_t *p_pix3, intptr_t i_pix3_stride,
-                             int32_t i_weight );
-
-void x264_mc_weight_w20_msa( uint8_t *p_dst, intptr_t i_dst_stride,
-                             uint8_t *p_src, intptr_t i_src_stride,
-                             const x264_weight_t *pWeight, int32_t i_height );
-void x264_mc_weight_w4_msa( uint8_t *p_dst, intptr_t i_dst_stride,
-                            uint8_t *p_src, intptr_t i_src_stride,
-                            const x264_weight_t *pWeight, int32_t i_height );
-void x264_mc_weight_w8_msa( uint8_t *p_dst, intptr_t i_dst_stride,
-                            uint8_t *p_src, intptr_t i_src_stride,
-                            const x264_weight_t *pWeight, int32_t i_height );
-void x264_mc_weight_w16_msa( uint8_t *p_dst, intptr_t i_dst_stride,
-                             uint8_t *p_src, intptr_t i_src_stride,
-                             const x264_weight_t *pWeight, int32_t i_height );
-
-weight_fn_t x264_mc_weight_wtab_msa[6] =
-{
-    x264_mc_weight_w4_msa,
-    x264_mc_weight_w4_msa,
-    x264_mc_weight_w8_msa,
-    x264_mc_weight_w16_msa,
-    x264_mc_weight_w16_msa,
-    x264_mc_weight_w20_msa,
-};
-
-void x264_mc_luma_msa( uint8_t *p_dst, intptr_t i_dst_stride,
-                       uint8_t *p_src[4], intptr_t i_src_stride,
-                       int32_t m_vx, int32_t m_vy,
-                       int32_t i_width, int32_t i_height,
-                       const x264_weight_t *pWeight );
-uint8_t *x264_get_ref_msa( uint8_t *p_dst,   intptr_t *p_dst_stride,
-                           uint8_t *p_src[4], intptr_t i_src_stride,
-                           int32_t m_vx, int32_t m_vy,
-                           int32_t i_width, int32_t i_height,
-                           const x264_weight_t *pWeight );
-void x264_mc_chroma_msa( uint8_t *p_dst_u, uint8_t *p_dst_v,
-                         intptr_t i_dst_stride,
-                         uint8_t *p_src, intptr_t i_src_stride,
-                         int32_t m_vx, int32_t m_vy,
-                         int32_t i_width, int32_t i_height );
-void x264_hpel_filter_msa( uint8_t *p_dsth, uint8_t *p_dst_v,
-                           uint8_t *p_dstc, uint8_t *p_src,
-                           intptr_t i_stride, int32_t i_width,
-                           int32_t i_height, int16_t *p_buf );
-
-void x264_plane_copy_interleave_msa( uint8_t *p_dst,  intptr_t i_dst_stride,
-                                     uint8_t *p_src0, intptr_t i_src_stride0,
-                                     uint8_t *p_src1, intptr_t i_src_stride1,
-                                     int32_t i_width, int32_t i_height );
-void x264_plane_copy_deinterleave_msa( uint8_t *p_dst0, intptr_t i_dst_stride0,
-                                       uint8_t *p_dst1, intptr_t i_dst_stride1,
-                                       uint8_t *p_src,  intptr_t i_src_stride,
-                                       int32_t i_width, int32_t i_height );
-void x264_plane_copy_deinterleave_rgb_msa( uint8_t *p_dst0,
-                                           intptr_t i_dst_stride0,
-                                           uint8_t *p_dst1,
-                                           intptr_t i_dst_stride1,
-                                           uint8_t *p_dst2,
-                                           intptr_t i_dst_stride2,
-                                           uint8_t *p_src,
-                                           intptr_t i_src_stride,
-                                           int32_t i_src_width, int32_t i_width,
-                                           int32_t i_height );
-void x264_store_interleave_chroma_msa( uint8_t *p_dst, intptr_t i_dst_stride,
-                                       uint8_t *p_src0, uint8_t *p_src1,
-                                       int32_t i_height );
-void x264_load_deinterleave_chroma_fenc_msa( uint8_t *p_dst, uint8_t *p_src,
-                                             intptr_t i_src_stride,
-                                             int32_t i_height );
-void x264_load_deinterleave_chroma_fdec_msa( uint8_t *p_dst, uint8_t *p_src,
-                                             intptr_t i_src_stride,
-                                             int32_t i_height );
-void x264_frame_init_lowres_core_msa( uint8_t *p_src, uint8_t *p_dst0,
-                                      uint8_t *p_dst1, uint8_t *p_dst2,
-                                      uint8_t *p_dst3, intptr_t i_src_stride,
-                                      intptr_t i_dst_stride, int32_t i_width,
-                                      int32_t i_height );
-
 static void avc_luma_hz_16w_msa( uint8_t *p_src, int32_t i_src_stride,
                                  uint8_t *p_dst, int32_t i_dst_stride,
                                  int32_t i_height )
@@ -1861,10 +1738,10 @@
     }
 }
 
-static void plane_copy_interleave_msa( uint8_t *p_src0, int32_t i_src0_stride,
-                                       uint8_t *p_src1, int32_t i_src1_stride,
-                                       uint8_t *p_dst, int32_t i_dst_stride,
-                                       int32_t i_width, int32_t i_height )
+static void core_plane_copy_interleave_msa( uint8_t *p_src0, int32_t i_src0_stride,
+                                            uint8_t *p_src1, int32_t i_src1_stride,
+                                            uint8_t *p_dst, int32_t i_dst_stride,
+                                            int32_t i_width, int32_t i_height )
 {
     int32_t i_loop_width, i_loop_height, i_w_mul8, i_h4w;
     v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
@@ -1966,10 +1843,10 @@
     }
 }
 
-static void plane_copy_deinterleave_msa( uint8_t *p_src, int32_t i_src_stride,
-                                         uint8_t *p_dst0, int32_t dst0_stride,
-                                         uint8_t *p_dst1, int32_t dst1_stride,
-                                         int32_t i_width, int32_t i_height )
+static void core_plane_copy_deinterleave_msa( uint8_t *p_src, int32_t i_src_stride,
+                                              uint8_t *p_dst0, int32_t dst0_stride,
+                                              uint8_t *p_dst1, int32_t dst1_stride,
+                                              int32_t i_width, int32_t i_height )
 {
     int32_t i_loop_width, i_loop_height, i_w_mul4, i_w_mul8, i_h4w;
     uint32_t u_res_w0, u_res_w1;
@@ -2098,16 +1975,16 @@
 }
 
 
-static void plane_copy_deinterleave_rgb_msa( uint8_t *p_src,
-                                             int32_t i_src_stride,
-                                             uint8_t *p_dst0,
-                                             int32_t i_dst0_stride,
-                                             uint8_t *p_dst1,
-                                             int32_t i_dst1_stride,
-                                             uint8_t *p_dst2,
-                                             int32_t i_dst2_stride,
-                                             int32_t i_width,
-                                             int32_t i_height )
+static void core_plane_copy_deinterleave_rgb_msa( uint8_t *p_src,
+                                                  int32_t i_src_stride,
+                                                  uint8_t *p_dst0,
+                                                  int32_t i_dst0_stride,
+                                                  uint8_t *p_dst1,
+                                                  int32_t i_dst1_stride,
+                                                  uint8_t *p_dst2,
+                                                  int32_t i_dst2_stride,
+                                                  int32_t i_width,
+                                                  int32_t i_height )
 {
     uint8_t *p_src_orig = p_src;
     uint8_t *p_dst0_orig = p_dst0;
@@ -2234,16 +2111,16 @@
     }
 }
 
-static void plane_copy_deinterleave_rgba_msa( uint8_t *p_src,
-                                              int32_t i_src_stride,
-                                              uint8_t *p_dst0,
-                                              int32_t i_dst0_stride,
-                                              uint8_t *p_dst1,
-                                              int32_t i_dst1_stride,
-                                              uint8_t *p_dst2,
-                                              int32_t i_dst2_stride,
-                                              int32_t i_width,
-                                              int32_t i_height )
+static void core_plane_copy_deinterleave_rgba_msa( uint8_t *p_src,
+                                                   int32_t i_src_stride,
+                                                   uint8_t *p_dst0,
+                                                   int32_t i_dst0_stride,
+                                                   uint8_t *p_dst1,
+                                                   int32_t i_dst1_stride,
+                                                   uint8_t *p_dst2,
+                                                   int32_t i_dst2_stride,
+                                                   int32_t i_width,
+                                                   int32_t i_height )
 {
     uint8_t *p_src_orig = p_src;
     uint8_t *p_dst0_orig = p_dst0;
@@ -2441,10 +2318,10 @@
     }
 }
 
-static void store_interleave_chroma_msa( uint8_t *p_src0, int32_t i_src0_stride,
-                                         uint8_t *p_src1, int32_t i_src1_stride,
-                                         uint8_t *p_dst, int32_t i_dst_stride,
-                                         int32_t i_height )
+static void core_store_interleave_chroma_msa( uint8_t *p_src0, int32_t i_src0_stride,
+                                              uint8_t *p_src1, int32_t i_src1_stride,
+                                              uint8_t *p_dst, int32_t i_dst_stride,
+                                              int32_t i_height )
 {
     int32_t i_loop_height, i_h4w;
     v16u8 in0, in1, in2, in3, in4, in5, in6, in7;
@@ -2476,12 +2353,12 @@
     }
 }
 
-static void frame_init_lowres_core_msa( uint8_t *p_src, int32_t i_src_stride,
-                                        uint8_t *p_dst0, int32_t dst0_stride,
-                                        uint8_t *p_dst1, int32_t dst1_stride,
-                                        uint8_t *p_dst2, int32_t dst2_stride,
-                                        uint8_t *p_dst3, int32_t dst3_stride,
-                                        int32_t i_width, int32_t i_height )
+static void core_frame_init_lowres_core_msa( uint8_t *p_src, int32_t i_src_stride,
+                                             uint8_t *p_dst0, int32_t dst0_stride,
+                                             uint8_t *p_dst1, int32_t dst1_stride,
+                                             uint8_t *p_dst2, int32_t dst2_stride,
+                                             uint8_t *p_dst3, int32_t dst3_stride,
+                                             int32_t i_width, int32_t i_height )
 {
     int32_t i_loop_width, i_loop_height, i_w16_mul;
     v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
@@ -2583,29 +2460,29 @@
     }
 }
 
-void x264_mc_copy_w16_msa( uint8_t *p_dst, intptr_t i_dst_stride,
-                           uint8_t *p_src, intptr_t i_src_stride,
-                           int32_t i_height )
+static void mc_copy_w16_msa( uint8_t *p_dst, intptr_t i_dst_stride,
+                             uint8_t *p_src, intptr_t i_src_stride,
+                             int32_t i_height )
 {
     copy_width16_msa( p_src, i_src_stride, p_dst, i_dst_stride, i_height );
 }
 
-void x264_mc_copy_w8_msa( uint8_t *p_dst, intptr_t i_dst_stride, uint8_t *p_src,
-                          intptr_t i_src_stride, int32_t i_height )
+static void mc_copy_w8_msa( uint8_t *p_dst, intptr_t i_dst_stride, uint8_t *p_src,
+                            intptr_t i_src_stride, int32_t i_height )
 {
     copy_width8_msa( p_src, i_src_stride, p_dst, i_dst_stride, i_height );
 }
 
-void x264_mc_copy_w4_msa( uint8_t *p_dst, intptr_t i_dst_stride, uint8_t *p_src,
-                          intptr_t i_src_stride, int32_t i_height )
+static void mc_copy_w4_msa( uint8_t *p_dst, intptr_t i_dst_stride, uint8_t *p_src,
+                            intptr_t i_src_stride, int32_t i_height )
 {
     copy_width4_msa( p_src, i_src_stride, p_dst, i_dst_stride, i_height );
 }
 
-void x264_pixel_avg_16x16_msa( uint8_t *p_pix1, intptr_t pix1_stride,
-                               uint8_t *p_pix2, intptr_t pix2_stride,
-                               uint8_t *p_pix3, intptr_t pix3_stride,
-                               int32_t i_weight )
+static void pixel_avg_16x16_msa( uint8_t *p_pix1, intptr_t pix1_stride,
+                                 uint8_t *p_pix2, intptr_t pix2_stride,
+                                 uint8_t *p_pix3, intptr_t pix3_stride,
+                                 int32_t i_weight )
 {
     if( 32 == i_weight )
     {
@@ -2630,10 +2507,10 @@
     }
 }
 
-void x264_pixel_avg_16x8_msa( uint8_t *p_pix1, intptr_t pix1_stride,
-                              uint8_t *p_pix2, intptr_t pix2_stride,
-                              uint8_t *p_pix3, intptr_t pix3_stride,
-                              int32_t i_weight )
+static void pixel_avg_16x8_msa( uint8_t *p_pix1, intptr_t pix1_stride,
+                                uint8_t *p_pix2, intptr_t pix2_stride,
+                                uint8_t *p_pix3, intptr_t pix3_stride,
+                                int32_t i_weight )
 {
     if( 32 == i_weight )
     {
@@ -2658,10 +2535,10 @@
     }
 }
 
-void x264_pixel_avg_8x16_msa( uint8_t *p_pix1, intptr_t pix1_stride,
-                              uint8_t *p_pix2, intptr_t pix2_stride,
-                              uint8_t *p_pix3, intptr_t pix3_stride,
-                              int32_t i_weight )
+static void pixel_avg_8x16_msa( uint8_t *p_pix1, intptr_t pix1_stride,
+                                uint8_t *p_pix2, intptr_t pix2_stride,
+                                uint8_t *p_pix3, intptr_t pix3_stride,
+                                int32_t i_weight )
 {
     if( 32 == i_weight )
     {
@@ -2684,10 +2561,10 @@
     }
 }
 
-void x264_pixel_avg_8x8_msa( uint8_t *p_pix1, intptr_t pix1_stride,
-                             uint8_t *p_pix2, intptr_t pix2_stride,
-                             uint8_t *p_pix3, intptr_t pix3_stride,
-                             int32_t i_weight )
+static void pixel_avg_8x8_msa( uint8_t *p_pix1, intptr_t pix1_stride,
+                               uint8_t *p_pix2, intptr_t pix2_stride,
+                               uint8_t *p_pix3, intptr_t pix3_stride,
+                               int32_t i_weight )
 {
     if( 32 == i_weight )
     {
@@ -2710,10 +2587,10 @@
     }
 }
 
-void x264_pixel_avg_8x4_msa( uint8_t *p_pix1, intptr_t pix1_stride,
-                             uint8_t *p_pix2, intptr_t pix2_stride,
-                             uint8_t *p_pix3, intptr_t pix3_stride,
-                             int32_t i_weight )
+static void pixel_avg_8x4_msa( uint8_t *p_pix1, intptr_t pix1_stride,
+                               uint8_t *p_pix2, intptr_t pix2_stride,
+                               uint8_t *p_pix3, intptr_t pix3_stride,
+                               int32_t i_weight )
 {
     if( 32 == i_weight )
     {
@@ -2736,10 +2613,10 @@
     }
 }
 
-void x264_pixel_avg_4x16_msa( uint8_t *p_pix1, intptr_t pix1_stride,
-                              uint8_t *p_pix2, intptr_t pix2_stride,
-                              uint8_t *p_pix3, intptr_t pix3_stride,
-                              int32_t i_weight )
+static void pixel_avg_4x16_msa( uint8_t *p_pix1, intptr_t pix1_stride,
+                                uint8_t *p_pix2, intptr_t pix2_stride,
+                                uint8_t *p_pix3, intptr_t pix3_stride,
+                                int32_t i_weight )
 {
     if( 32 == i_weight )
     {
@@ -2762,10 +2639,10 @@
     }
 }
 
-void x264_pixel_avg_4x8_msa( uint8_t *p_pix1, intptr_t pix1_stride,
-                             uint8_t *p_pix2, intptr_t pix2_stride,
-                             uint8_t *p_pix3, intptr_t pix3_stride,
-                             int32_t i_weight )
+static void pixel_avg_4x8_msa( uint8_t *p_pix1, intptr_t pix1_stride,
+                               uint8_t *p_pix2, intptr_t pix2_stride,
+                               uint8_t *p_pix3, intptr_t pix3_stride,
+                               int32_t i_weight )
 {
     if( 32 == i_weight )
     {
@@ -2788,10 +2665,10 @@
     }
 }
 
-void x264_pixel_avg_4x4_msa( uint8_t *p_pix1, intptr_t pix1_stride,
-                             uint8_t *p_pix2, intptr_t pix2_stride,
-                             uint8_t *p_pix3, intptr_t pix3_stride,
-                             int32_t i_weight )
+static void pixel_avg_4x4_msa( uint8_t *p_pix1, intptr_t pix1_stride,
+                               uint8_t *p_pix2, intptr_t pix2_stride,
+                               uint8_t *p_pix3, intptr_t pix3_stride,
+                               int32_t i_weight )
 {
     if( 32 == i_weight )
     {
@@ -2814,10 +2691,10 @@
     }
 }
 
-void x264_pixel_avg_4x2_msa( uint8_t *p_pix1, intptr_t pix1_stride,
-                             uint8_t *p_pix2, intptr_t pix2_stride,
-                             uint8_t *p_pix3, intptr_t pix3_stride,
-                             int32_t i_weight )
+static void pixel_avg_4x2_msa( uint8_t *p_pix1, intptr_t pix1_stride,
+                               uint8_t *p_pix2, intptr_t pix2_stride,
+                               uint8_t *p_pix3, intptr_t pix3_stride,
+                               int32_t i_weight )
 {
     if( 32 == i_weight )
     {
@@ -2841,7 +2718,7 @@
 }
 
 
-void x264_memzero_aligned_msa( void *p_dst, size_t n )
+static void memzero_aligned_msa( void *p_dst, size_t n )
 {
     uint32_t u_tot32_mul_lines = n >> 5;
     uint32_t u_remaining = n - ( u_tot32_mul_lines << 5 );
@@ -2854,9 +2731,9 @@
     }
 }
 
-void x264_mc_weight_w4_msa( uint8_t *p_dst, intptr_t i_dst_stride,
-                            uint8_t *p_src, intptr_t i_src_stride,
-                            const x264_weight_t *pWeight, int32_t i_height )
+static void mc_weight_w4_msa( uint8_t *p_dst, intptr_t i_dst_stride,
+                              uint8_t *p_src, intptr_t i_src_stride,
+                              const x264_weight_t *pWeight, int32_t i_height )
 {
     int32_t i_log2_denom = pWeight->i_denom;
     int32_t i_offset = pWeight->i_offset;
@@ -2866,9 +2743,9 @@
                                 i_height, i_log2_denom, i_weight, i_offset );
 }
 
-void x264_mc_weight_w8_msa( uint8_t *p_dst, intptr_t i_dst_stride,
-                            uint8_t *p_src, intptr_t i_src_stride,
-                            const x264_weight_t *pWeight, int32_t i_height )
+static void mc_weight_w8_msa( uint8_t *p_dst, intptr_t i_dst_stride,
+                              uint8_t *p_src, intptr_t i_src_stride,
+                              const x264_weight_t *pWeight, int32_t i_height )
 {
     int32_t i_log2_denom = pWeight->i_denom;
     int32_t i_offset = pWeight->i_offset;
@@ -2878,9 +2755,9 @@
                                 i_height, i_log2_denom, i_weight, i_offset );
 }
 
-void x264_mc_weight_w16_msa( uint8_t *p_dst, intptr_t i_dst_stride,
-                             uint8_t *p_src, intptr_t i_src_stride,
-                             const x264_weight_t *pWeight, int32_t i_height )
+static void mc_weight_w16_msa( uint8_t *p_dst, intptr_t i_dst_stride,
+                               uint8_t *p_src, intptr_t i_src_stride,
+                               const x264_weight_t *pWeight, int32_t i_height )
 {
     int32_t i_log2_denom = pWeight->i_denom;
     int32_t i_offset = pWeight->i_offset;
@@ -2890,21 +2767,21 @@
                                  i_height, i_log2_denom, i_weight, i_offset );
 }
 
-void x264_mc_weight_w20_msa( uint8_t *p_dst, intptr_t i_dst_stride,
-                             uint8_t *p_src, intptr_t i_src_stride,
-                             const x264_weight_t *pWeight, int32_t i_height )
-{
-    x264_mc_weight_w16_msa( p_dst, i_dst_stride, p_src, i_src_stride,
-                            pWeight, i_height );
-    x264_mc_weight_w4_msa( p_dst + 16, i_dst_stride, p_src + 16, i_src_stride,
-                           pWeight, i_height );
+static void mc_weight_w20_msa( uint8_t *p_dst, intptr_t i_dst_stride,
+                               uint8_t *p_src, intptr_t i_src_stride,
+                               const x264_weight_t *pWeight, int32_t i_height )
+{
+    mc_weight_w16_msa( p_dst, i_dst_stride, p_src, i_src_stride,
+                       pWeight, i_height );
+    mc_weight_w4_msa( p_dst + 16, i_dst_stride, p_src + 16, i_src_stride,
+                      pWeight, i_height );
 }
 
-void x264_mc_luma_msa( uint8_t *p_dst, intptr_t i_dst_stride,
-                       uint8_t *p_src[4], intptr_t i_src_stride,
-                       int32_t m_vx, int32_t m_vy,
-                       int32_t i_width, int32_t i_height,
-                       const x264_weight_t *pWeight )
+static void mc_luma_msa( uint8_t *p_dst, intptr_t i_dst_stride,
+                         uint8_t *p_src[4], intptr_t i_src_stride,
+                         int32_t m_vx, int32_t m_vy,
+                         int32_t i_width, int32_t i_height,
+                         const x264_weight_t *pWeight )
 {
     int32_t  i_qpel_idx;
     int32_t  i_offset;
@@ -2940,19 +2817,19 @@
         {
             if( 16 == i_width )
             {
-                x264_mc_weight_w16_msa( p_dst, i_dst_stride,
-                                        p_dst, i_dst_stride,
-                                        pWeight, i_height );
+                mc_weight_w16_msa( p_dst, i_dst_stride,
+                                   p_dst, i_dst_stride,
+                                   pWeight, i_height );
             }
             else if( 8 == i_width )
             {
-                x264_mc_weight_w8_msa( p_dst, i_dst_stride, p_dst, i_dst_stride,
-                                       pWeight, i_height );
+                mc_weight_w8_msa( p_dst, i_dst_stride, p_dst, i_dst_stride,
+                                  pWeight, i_height );
             }
             else if( 4 == i_width )
             {
-                x264_mc_weight_w4_msa( p_dst, i_dst_stride, p_dst, i_dst_stride,
-                                       pWeight, i_height );
+                mc_weight_w4_msa( p_dst, i_dst_stride, p_dst, i_dst_stride,
+                                  pWeight, i_height );
             }
         }
     }
@@ -2960,18 +2837,18 @@
     {
         if( 16 == i_width )
         {
-            x264_mc_weight_w16_msa( p_dst, i_dst_stride, p_src1, i_src_stride,
-                                    pWeight, i_height );
+            mc_weight_w16_msa( p_dst, i_dst_stride, p_src1, i_src_stride,
+                               pWeight, i_height );
         }
         else if( 8 == i_width )
         {
-            x264_mc_weight_w8_msa( p_dst, i_dst_stride, p_src1, i_src_stride,
-                                   pWeight, i_height );
+            mc_weight_w8_msa( p_dst, i_dst_stride, p_src1, i_src_stride,
+                              pWeight, i_height );
         }
         else if( 4 == i_width )
         {
-            x264_mc_weight_w4_msa( p_dst, i_dst_stride, p_src1, i_src_stride,
-                                   pWeight, i_height );
+            mc_weight_w4_msa( p_dst, i_dst_stride, p_src1, i_src_stride,
+                              pWeight, i_height );
         }
     }
     else
@@ -2994,11 +2871,11 @@
     }
 }
 
-void x264_mc_chroma_msa( uint8_t *p_dst_u, uint8_t *p_dst_v,
-                         intptr_t i_dst_stride,
-                         uint8_t *p_src, intptr_t i_src_stride,
-                         int32_t m_vx, int32_t m_vy,
-                         int32_t i_width, int32_t i_height )
+static void mc_chroma_msa( uint8_t *p_dst_u, uint8_t *p_dst_v,
+                           intptr_t i_dst_stride,
+                           uint8_t *p_src, intptr_t i_src_stride,
+                           int32_t m_vx, int32_t m_vy,
+                           int32_t i_width, int32_t i_height )
 {
     int32_t i_d8x = m_vx & 0x07;
     int32_t i_d8y = m_vy & 0x07;
@@ -3035,10 +2912,10 @@
     }
 }
 
-void x264_hpel_filter_msa( uint8_t *p_dsth, uint8_t *p_dst_v,
-                           uint8_t *p_dstc, uint8_t *p_src,
-                           intptr_t i_stride, int32_t i_width,
-                           int32_t i_height, int16_t *p_buf )
+static void hpel_filter_msa( uint8_t *p_dsth, uint8_t *p_dst_v,
+                             uint8_t *p_dstc, uint8_t *p_src,
+                             intptr_t i_stride, int32_t i_width,
+                             int32_t i_height, int16_t *p_buf )
 {
     for( int32_t i = 0; i < ( i_width / 16 ); i++ )
     {
@@ -3055,96 +2932,96 @@
     }
 }
 
-void x264_plane_copy_interleave_msa( uint8_t *p_dst, intptr_t i_dst_stride,
-                                     uint8_t *p_src0, intptr_t i_src_stride0,
-                                     uint8_t *p_src1, intptr_t i_src_stride1,
-                                     int32_t i_width, int32_t i_height )
-{
-    plane_copy_interleave_msa( p_src0, i_src_stride0, p_src1, i_src_stride1,
-                               p_dst, i_dst_stride, i_width, i_height );
+static void plane_copy_interleave_msa( uint8_t *p_dst, intptr_t i_dst_stride,
+                                       uint8_t *p_src0, intptr_t i_src_stride0,
+                                       uint8_t *p_src1, intptr_t i_src_stride1,
+                                       int32_t i_width, int32_t i_height )
+{
+    core_plane_copy_interleave_msa( p_src0, i_src_stride0, p_src1, i_src_stride1,
+                                    p_dst, i_dst_stride, i_width, i_height );
 }
 
-void x264_plane_copy_deinterleave_msa( uint8_t *p_dst0, intptr_t i_dst_stride0,
-                                       uint8_t *p_dst1, intptr_t i_dst_stride1,
-                                       uint8_t *p_src, intptr_t i_src_stride,
-                                       int32_t i_width, int32_t i_height )
+static void plane_copy_deinterleave_msa( uint8_t *p_dst0, intptr_t i_dst_stride0,
+                                         uint8_t *p_dst1, intptr_t i_dst_stride1,
+                                         uint8_t *p_src, intptr_t i_src_stride,
+                                         int32_t i_width, int32_t i_height )
 {
-    plane_copy_deinterleave_msa( p_src, i_src_stride, p_dst0, i_dst_stride0,
-                                 p_dst1, i_dst_stride1, i_width, i_height );
+    core_plane_copy_deinterleave_msa( p_src, i_src_stride, p_dst0, i_dst_stride0,
+                                      p_dst1, i_dst_stride1, i_width, i_height );
 }
 
-void x264_plane_copy_deinterleave_rgb_msa( uint8_t *p_dst0,
-                                           intptr_t i_dst_stride0,
-                                           uint8_t *p_dst1,
-                                           intptr_t i_dst_stride1,
-                                           uint8_t *p_dst2,
-                                           intptr_t i_dst_stride2,
-                                           uint8_t *p_src,
-                                           intptr_t i_src_stride,
-                                           int32_t i_src_width,
-                                           int32_t i_width,
-                                           int32_t i_height )
+static void plane_copy_deinterleave_rgb_msa( uint8_t *p_dst0,
+                                             intptr_t i_dst_stride0,
+                                             uint8_t *p_dst1,
+                                             intptr_t i_dst_stride1,
+                                             uint8_t *p_dst2,
+                                             intptr_t i_dst_stride2,
+                                             uint8_t *p_src,
+                                             intptr_t i_src_stride,
+                                             int32_t i_src_width,
+                                             int32_t i_width,
+                                             int32_t i_height )
 {
     if( 3 == i_src_width )
     {
-        plane_copy_deinterleave_rgb_msa( p_src, i_src_stride,
-                                         p_dst0, i_dst_stride0,
-                                         p_dst1, i_dst_stride1,
-                                         p_dst2, i_dst_stride2,
-                                         i_width, i_height );
+        core_plane_copy_deinterleave_rgb_msa( p_src, i_src_stride,
+                                              p_dst0, i_dst_stride0,
+                                              p_dst1, i_dst_stride1,
+                                              p_dst2, i_dst_stride2,
+                                              i_width, i_height );
     }
     else if( 4 == i_src_width )
     {
-        plane_copy_deinterleave_rgba_msa( p_src, i_src_stride,
-                                          p_dst0, i_dst_stride0,
-                                          p_dst1, i_dst_stride1,
-                                          p_dst2, i_dst_stride2,
-                                          i_width, i_height );
+        core_plane_copy_deinterleave_rgba_msa( p_src, i_src_stride,
+                                               p_dst0, i_dst_stride0,
+                                               p_dst1, i_dst_stride1,
+                                               p_dst2, i_dst_stride2,
+                                               i_width, i_height );
     }
 }
 
-void x264_store_interleave_chroma_msa( uint8_t *p_dst, intptr_t i_dst_stride,
-                                       uint8_t *p_src0, uint8_t *p_src1,
-                                       int32_t i_height )
+static void store_interleave_chroma_msa( uint8_t *p_dst, intptr_t i_dst_stride,
+                                         uint8_t *p_src0, uint8_t *p_src1,
+                                         int32_t i_height )
 {
-    store_interleave_chroma_msa( p_src0, FDEC_STRIDE, p_src1, FDEC_STRIDE,
-                                 p_dst, i_dst_stride, i_height );
+    core_store_interleave_chroma_msa( p_src0, FDEC_STRIDE, p_src1, FDEC_STRIDE,
+                                      p_dst, i_dst_stride, i_height );
 }
 
-void x264_load_deinterleave_chroma_fenc_msa( uint8_t *p_dst, uint8_t *p_src,
-                                             intptr_t i_src_stride,
-                                             int32_t i_height )
+static void load_deinterleave_chroma_fenc_msa( uint8_t *p_dst, uint8_t *p_src,
+                                               intptr_t i_src_stride,
+                                               int32_t i_height )
 {
-    plane_copy_deinterleave_msa( p_src, i_src_stride, p_dst, FENC_STRIDE,
-                                 ( p_dst + ( FENC_STRIDE / 2 ) ), FENC_STRIDE,
-                                 8, i_height );
+    core_plane_copy_deinterleave_msa( p_src, i_src_stride, p_dst, FENC_STRIDE,
+                                     ( p_dst + ( FENC_STRIDE / 2 ) ), FENC_STRIDE,
+                                     8, i_height );
 }
 
-void x264_load_deinterleave_chroma_fdec_msa( uint8_t *p_dst, uint8_t *p_src,
-                                             intptr_t i_src_stride,
-                                             int32_t i_height )
+static void load_deinterleave_chroma_fdec_msa( uint8_t *p_dst, uint8_t *p_src,
+                                               intptr_t i_src_stride,
+                                               int32_t i_height )
 {
-    plane_copy_deinterleave_msa( p_src, i_src_stride, p_dst, FDEC_STRIDE,
-                                 ( p_dst + ( FDEC_STRIDE / 2 ) ), FDEC_STRIDE,
-                                 8, i_height );
+    core_plane_copy_deinterleave_msa( p_src, i_src_stride, p_dst, FDEC_STRIDE,
+                                      ( p_dst + ( FDEC_STRIDE / 2 ) ), FDEC_STRIDE,
+                                      8, i_height );
 }
 
-void x264_frame_init_lowres_core_msa( uint8_t *p_src, uint8_t *p_dst0,
-                                      uint8_t *p_dst1, uint8_t *p_dst2,
-                                      uint8_t *p_dst3, intptr_t i_src_stride,
-                                      intptr_t i_dst_stride, int32_t i_width,
-                                      int32_t i_height )
+static void frame_init_lowres_core_msa( uint8_t *p_src, uint8_t *p_dst0,
+                                        uint8_t *p_dst1, uint8_t *p_dst2,
+                                        uint8_t *p_dst3, intptr_t i_src_stride,
+                                        intptr_t i_dst_stride, int32_t i_width,
+                                        int32_t i_height )
 {
-    frame_init_lowres_core_msa( p_src, i_src_stride, p_dst0, i_dst_stride,
-                                p_dst1, i_dst_stride, p_dst2, i_dst_stride,
-                                p_dst3, i_dst_stride, i_width, i_height );
+    core_frame_init_lowres_core_msa( p_src, i_src_stride, p_dst0, i_dst_stride,
+                                     p_dst1, i_dst_stride, p_dst2, i_dst_stride,
+                                     p_dst3, i_dst_stride, i_width, i_height );
 }
 
-uint8_t *x264_get_ref_msa( uint8_t *p_dst, intptr_t *p_dst_stride,
-                           uint8_t *p_src[4], intptr_t i_src_stride,
-                           int32_t m_vx, int32_t m_vy,
-                           int32_t i_width, int32_t i_height,
-                           const x264_weight_t *pWeight )
+static uint8_t *get_ref_msa( uint8_t *p_dst, intptr_t *p_dst_stride,
+                             uint8_t *p_src[4], intptr_t i_src_stride,
+                             int32_t m_vx, int32_t m_vy,
+                             int32_t i_width, int32_t i_height,
+                             const x264_weight_t *pWeight )
 {
     int32_t i_qpel_idx, i_cnt, i_h4w;
     int32_t i_offset;
@@ -3289,9 +3166,9 @@
 
             if( 16 == i_width || 12 == i_width )
             {
-                x264_mc_weight_w16_msa( p_dst, *p_dst_stride,
-                                        p_dst, *p_dst_stride,
-                                        pWeight, i_h4w );
+                mc_weight_w16_msa( p_dst, *p_dst_stride,
+                                   p_dst, *p_dst_stride,
+                                   pWeight, i_h4w );
                 for( i_cnt = i_h4w; i_cnt < i_height; i_cnt++ )
                 {
                     v16i8 zero = {0};
@@ -3349,9 +3226,9 @@
             }
             else if( 20 == i_width )
             {
-                x264_mc_weight_w20_msa( p_dst, *p_dst_stride,
-                                        p_dst, *p_dst_stride,
-                                        pWeight, i_h4w );
+                mc_weight_w20_msa( p_dst, *p_dst_stride,
+                                   p_dst, *p_dst_stride,
+                                   pWeight, i_h4w );
                 for( i_cnt = i_h4w; i_cnt < i_height; i_cnt++ )
                 {
                     uint32_t temp0;
@@ -3427,9 +3304,9 @@
             }
             else if( 8 == i_width )
             {
-                x264_mc_weight_w8_msa( p_dst, *p_dst_stride,
-                                       p_dst, *p_dst_stride,
-                                       pWeight, i_h4w );
+                mc_weight_w8_msa( p_dst, *p_dst_stride,
+                                  p_dst, *p_dst_stride,
+                                  pWeight, i_h4w );
                 for( i_cnt = i_h4w; i_cnt < i_height; i_cnt++ )
                 {
                     uint64_t temp0;
@@ -3473,9 +3350,9 @@
             }
             else if( 4 == i_width )
             {
-                x264_mc_weight_w4_msa( p_dst, *p_dst_stride,
-                                       p_dst, *p_dst_stride,
-                                       pWeight, i_h4w );
+                mc_weight_w4_msa( p_dst, *p_dst_stride,
+                                  p_dst, *p_dst_stride,
+                                  pWeight, i_h4w );
                 for( i_cnt = i_h4w; i_cnt < i_height; i_cnt++ )
                 {
                     uint32_t temp0;
@@ -3537,8 +3414,8 @@
 
         if( 16 == i_width || 12 == i_width )
         {
-            x264_mc_weight_w16_msa( p_dst, *p_dst_stride, p_src1, i_src_stride,
-                                    pWeight, i_h4w );
+            mc_weight_w16_msa( p_dst, *p_dst_stride, p_src1, i_src_stride,
+                               pWeight, i_h4w );
             p_src1 = src1_org + i_h4w * i_src_stride;
 
             for( i_cnt = i_h4w; i_cnt < i_height; i_cnt++ )
@@ -3591,8 +3468,8 @@
         }
         else if( 20 == i_width )
         {
-            x264_mc_weight_w20_msa( p_dst, *p_dst_stride, p_src1, i_src_stride,
-                                    pWeight, i_h4w );
+            mc_weight_w20_msa( p_dst, *p_dst_stride, p_src1, i_src_stride,
+                               pWeight, i_h4w );
             p_src1 = src1_org + i_h4w * i_src_stride;
 
             for( i_cnt = i_h4w; i_cnt < i_height; i_cnt++ )
@@ -3662,8 +3539,8 @@
         }
         else if( 8 == i_width )
         {
-            x264_mc_weight_w8_msa( p_dst, *p_dst_stride, p_src1, i_src_stride,
-                                   pWeight, i_h4w );
+            mc_weight_w8_msa( p_dst, *p_dst_stride, p_src1, i_src_stride,
+                              pWeight, i_h4w );
             p_src1 = src1_org + i_h4w * i_src_stride;
 
             for( i_cnt = i_h4w; i_cnt < i_height; i_cnt++ )
@@ -3707,8 +3584,8 @@
         }
         else if( 4 == i_width )
         {
-            x264_mc_weight_w4_msa( p_dst, *p_dst_stride, p_src1, i_src_stride,
-                                   pWeight, i_h4w );
+            mc_weight_w4_msa( p_dst, *p_dst_stride, p_src1, i_src_stride,
+                              pWeight, i_h4w );
             p_src1 = src1_org + i_h4w * i_src_stride;
 
             for( i_cnt = i_h4w; i_cnt < i_height; i_cnt++ )
@@ -3761,6 +3638,16 @@
         return p_src1;
     }
 }
+
+static weight_fn_t mc_weight_wtab_msa[6] =
+{
+    mc_weight_w4_msa,
+    mc_weight_w4_msa,
+    mc_weight_w8_msa,
+    mc_weight_w16_msa,
+    mc_weight_w16_msa,
+    mc_weight_w20_msa,
+};
 #endif // !HIGH_BIT_DEPTH
 
 void x264_mc_init_mips( int32_t cpu, x264_mc_functions_t *pf  )
@@ -3768,42 +3655,42 @@
 #if !HIGH_BIT_DEPTH
     if( cpu & X264_CPU_MSA )
     {
-        pf->mc_luma = x264_mc_luma_msa;
-        pf->mc_chroma = x264_mc_chroma_msa;
-        pf->get_ref = x264_get_ref_msa;
-
-        pf->avg[PIXEL_16x16]= x264_pixel_avg_16x16_msa;
-        pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_msa;
-        pf->avg[PIXEL_8x16] = x264_pixel_avg_8x16_msa;
-        pf->avg[PIXEL_8x8] = x264_pixel_avg_8x8_msa;
-        pf->avg[PIXEL_8x4] = x264_pixel_avg_8x4_msa;
-        pf->avg[PIXEL_4x16] = x264_pixel_avg_4x16_msa;
-        pf->avg[PIXEL_4x8] = x264_pixel_avg_4x8_msa;
-        pf->avg[PIXEL_4x4] = x264_pixel_avg_4x4_msa;
-        pf->avg[PIXEL_4x2] = x264_pixel_avg_4x2_msa;
-
-        pf->weight = x264_mc_weight_wtab_msa;
-        pf->offsetadd = x264_mc_weight_wtab_msa;
-        pf->offsetsub = x264_mc_weight_wtab_msa;
-
-        pf->copy_16x16_unaligned = x264_mc_copy_w16_msa;
-        pf->copy[PIXEL_16x16] = x264_mc_copy_w16_msa;
-        pf->copy[PIXEL_8x8] = x264_mc_copy_w8_msa;
-        pf->copy[PIXEL_4x4] = x264_mc_copy_w4_msa;
-
-        pf->store_interleave_chroma = x264_store_interleave_chroma_msa;
-        pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_msa;
-        pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_msa;
-
-        pf->plane_copy_interleave = x264_plane_copy_interleave_msa;
-        pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_msa;
-        pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_msa;
+        pf->mc_luma = mc_luma_msa;
+        pf->mc_chroma = mc_chroma_msa;
+        pf->get_ref = get_ref_msa;
+
+        pf->avg[PIXEL_16x16]= pixel_avg_16x16_msa;
+        pf->avg[PIXEL_16x8] = pixel_avg_16x8_msa;
+        pf->avg[PIXEL_8x16] = pixel_avg_8x16_msa;
+        pf->avg[PIXEL_8x8] = pixel_avg_8x8_msa;
+        pf->avg[PIXEL_8x4] = pixel_avg_8x4_msa;
+        pf->avg[PIXEL_4x16] = pixel_avg_4x16_msa;
+        pf->avg[PIXEL_4x8] = pixel_avg_4x8_msa;
+        pf->avg[PIXEL_4x4] = pixel_avg_4x4_msa;
+        pf->avg[PIXEL_4x2] = pixel_avg_4x2_msa;
+
+        pf->weight = mc_weight_wtab_msa;
+        pf->offsetadd = mc_weight_wtab_msa;
+        pf->offsetsub = mc_weight_wtab_msa;
+
+        pf->copy_16x16_unaligned = mc_copy_w16_msa;
+        pf->copy[PIXEL_16x16] = mc_copy_w16_msa;
+        pf->copy[PIXEL_8x8] = mc_copy_w8_msa;
+        pf->copy[PIXEL_4x4] = mc_copy_w4_msa;
+
+        pf->store_interleave_chroma = store_interleave_chroma_msa;
+        pf->load_deinterleave_chroma_fenc = load_deinterleave_chroma_fenc_msa;
+        pf->load_deinterleave_chroma_fdec = load_deinterleave_chroma_fdec_msa;
+
+        pf->plane_copy_interleave = plane_copy_interleave_msa;
+        pf->plane_copy_deinterleave = plane_copy_deinterleave_msa;
+        pf->plane_copy_deinterleave_rgb = plane_copy_deinterleave_rgb_msa;
 
-        pf->hpel_filter = x264_hpel_filter_msa;
+        pf->hpel_filter = hpel_filter_msa;
 
         pf->memcpy_aligned = memcpy;
-        pf->memzero_aligned = x264_memzero_aligned_msa;
-        pf->frame_init_lowres_core = x264_frame_init_lowres_core_msa;
+        pf->memzero_aligned = memzero_aligned_msa;
+        pf->frame_init_lowres_core = frame_init_lowres_core_msa;
     }
 #endif // !HIGH_BIT_DEPTH
 }
diff -Nru x264-0.152.2854+gite9a5903/common/mips/mc.h x264-0.158.2988+git-20191101.7817004/common/mips/mc.h
--- x264-0.152.2854+gite9a5903/common/mips/mc.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/mips/mc.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.h: msa motion compensation
  *****************************************************************************
- * Copyright (C) 2015-2017 x264 project
+ * Copyright (C) 2015-2019 x264 project
  *
  * Authors: Neha Rana <neha.rana@imgtec.com>
  *
@@ -26,6 +26,7 @@
 #ifndef X264_MIPS_MC_H
 #define X264_MIPS_MC_H
 
+#define x264_mc_init_mips x264_template(mc_init_mips)
 void x264_mc_init_mips( int cpu, x264_mc_functions_t *pf );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/mips/pixel-c.c x264-0.158.2988+git-20191101.7817004/common/mips/pixel-c.c
--- x264-0.152.2854+gite9a5903/common/mips/pixel-c.c	2017-12-31 12:50:50.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/mips/pixel-c.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel-c.c: msa pixel metrics
  *****************************************************************************
- * Copyright (C) 2015-2017 x264 project
+ * Copyright (C) 2015-2019 x264 project
  *
  * Authors: Mandar Sahastrabuddhe <mandar.sahastrabuddhe@imgtec.com>
  *
diff -Nru x264-0.152.2854+gite9a5903/common/mips/pixel.h x264-0.158.2988+git-20191101.7817004/common/mips/pixel.h
--- x264-0.152.2854+gite9a5903/common/mips/pixel.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/mips/pixel.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.h: msa pixel metrics
  *****************************************************************************
- * Copyright (C) 2015-2017 x264 project
+ * Copyright (C) 2015-2019 x264 project
  *
  * Authors: Mandar Sahastrabuddhe <mandar.sahastrabuddhe@imgtec.com>
  *
@@ -23,146 +23,204 @@
  * For more information, contact us at licensing@x264.com.
  *****************************************************************************/
 
-#ifndef X264_MIPS_SAD_H
-#define X264_MIPS_SAD_H
+#ifndef X264_MIPS_PIXEL_H
+#define X264_MIPS_PIXEL_H
 
+#define x264_pixel_sad_16x16_msa x264_template(pixel_sad_16x16_msa)
 int32_t x264_pixel_sad_16x16_msa( uint8_t *p_src, intptr_t i_src_stride,
                                   uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_sad_16x8_msa x264_template(pixel_sad_16x8_msa)
 int32_t x264_pixel_sad_16x8_msa( uint8_t *p_src, intptr_t i_src_stride,
                                  uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_sad_8x16_msa x264_template(pixel_sad_8x16_msa)
 int32_t x264_pixel_sad_8x16_msa( uint8_t *p_src, intptr_t i_src_stride,
                                  uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_sad_8x8_msa x264_template(pixel_sad_8x8_msa)
 int32_t x264_pixel_sad_8x8_msa( uint8_t *p_src, intptr_t i_src_stride,
                                 uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_sad_8x4_msa x264_template(pixel_sad_8x4_msa)
 int32_t x264_pixel_sad_8x4_msa( uint8_t *p_src, intptr_t i_src_stride,
                                 uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_sad_4x16_msa x264_template(pixel_sad_4x16_msa)
 int32_t x264_pixel_sad_4x16_msa( uint8_t *p_src, intptr_t i_src_stride,
                                  uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_sad_4x8_msa x264_template(pixel_sad_4x8_msa)
 int32_t x264_pixel_sad_4x8_msa( uint8_t *p_src, intptr_t i_src_stride,
                                 uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_sad_4x4_msa x264_template(pixel_sad_4x4_msa)
 int32_t x264_pixel_sad_4x4_msa( uint8_t *p_src, intptr_t i_src_stride,
                                 uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_sad_x4_16x16_msa x264_template(pixel_sad_x4_16x16_msa)
 void x264_pixel_sad_x4_16x16_msa( uint8_t *p_src, uint8_t *p_ref0,
                                   uint8_t *p_ref1, uint8_t *p_ref2,
                                   uint8_t *p_ref3, intptr_t i_ref_stride,
                                   int32_t p_sad_array[4] );
+#define x264_pixel_sad_x4_16x8_msa x264_template(pixel_sad_x4_16x8_msa)
 void x264_pixel_sad_x4_16x8_msa( uint8_t *p_src, uint8_t *p_ref0,
                                  uint8_t *p_ref1, uint8_t *p_ref2,
                                  uint8_t *p_ref3, intptr_t i_ref_stride,
                                  int32_t p_sad_array[4] );
+#define x264_pixel_sad_x4_8x16_msa x264_template(pixel_sad_x4_8x16_msa)
 void x264_pixel_sad_x4_8x16_msa( uint8_t *p_src, uint8_t *p_ref0,
                                  uint8_t *p_ref1, uint8_t *p_ref2,
                                  uint8_t *p_ref3, intptr_t i_ref_stride,
                                  int32_t p_sad_array[4] );
+#define x264_pixel_sad_x4_8x8_msa x264_template(pixel_sad_x4_8x8_msa)
 void x264_pixel_sad_x4_8x8_msa( uint8_t *p_src, uint8_t *p_ref0,
                                 uint8_t *p_ref1, uint8_t *p_ref2,
                                 uint8_t *p_ref3, intptr_t i_ref_stride,
                                 int32_t p_sad_array[4] );
+#define x264_pixel_sad_x4_8x4_msa x264_template(pixel_sad_x4_8x4_msa)
 void x264_pixel_sad_x4_8x4_msa( uint8_t *p_src, uint8_t *p_ref0,
                                 uint8_t *p_ref1, uint8_t *p_ref2,
                                 uint8_t *p_ref3, intptr_t i_ref_stride,
                                 int32_t p_sad_array[4] );
+#define x264_pixel_sad_x4_4x8_msa x264_template(pixel_sad_x4_4x8_msa)
 void x264_pixel_sad_x4_4x8_msa( uint8_t *p_src, uint8_t *p_ref0,
                                 uint8_t *p_ref1, uint8_t *p_ref2,
                                 uint8_t *p_ref3, intptr_t i_ref_stride,
                                 int32_t p_sad_array[4] );
+#define x264_pixel_sad_x4_4x4_msa x264_template(pixel_sad_x4_4x4_msa)
 void x264_pixel_sad_x4_4x4_msa( uint8_t *p_src, uint8_t *p_ref0,
                                 uint8_t *p_ref1, uint8_t *p_ref2,
                                 uint8_t *p_ref3, intptr_t i_ref_stride,
                                 int32_t p_sad_array[4] );
+#define x264_pixel_sad_x3_16x16_msa x264_template(pixel_sad_x3_16x16_msa)
 void x264_pixel_sad_x3_16x16_msa( uint8_t *p_src, uint8_t *p_ref0,
                                   uint8_t *p_ref1, uint8_t *p_ref2,
                                   intptr_t i_ref_stride,
                                   int32_t p_sad_array[3] );
+#define x264_pixel_sad_x3_16x8_msa x264_template(pixel_sad_x3_16x8_msa)
 void x264_pixel_sad_x3_16x8_msa( uint8_t *p_src, uint8_t *p_ref0,
                                  uint8_t *p_ref1, uint8_t *p_ref2,
                                  intptr_t i_ref_stride,
                                  int32_t p_sad_array[3] );
+#define x264_pixel_sad_x3_8x16_msa x264_template(pixel_sad_x3_8x16_msa)
 void x264_pixel_sad_x3_8x16_msa( uint8_t *p_src, uint8_t *p_ref0,
                                  uint8_t *p_ref1, uint8_t *p_ref2,
                                  intptr_t i_ref_stride,
                                  int32_t p_sad_array[3] );
+#define x264_pixel_sad_x3_8x8_msa x264_template(pixel_sad_x3_8x8_msa)
 void x264_pixel_sad_x3_8x8_msa( uint8_t *p_src, uint8_t *p_ref0,
                                 uint8_t *p_ref1, uint8_t *p_ref2,
                                 intptr_t i_ref_stride,
                                 int32_t p_sad_array[3] );
+#define x264_pixel_sad_x3_8x4_msa x264_template(pixel_sad_x3_8x4_msa)
 void x264_pixel_sad_x3_8x4_msa( uint8_t *p_src, uint8_t *p_ref0,
                                 uint8_t *p_ref1, uint8_t *p_ref2,
                                 intptr_t i_ref_stride,
                                 int32_t p_sad_array[3] );
+#define x264_pixel_sad_x3_4x8_msa x264_template(pixel_sad_x3_4x8_msa)
 void x264_pixel_sad_x3_4x8_msa( uint8_t *p_src, uint8_t *p_ref0,
                                 uint8_t *p_ref1, uint8_t *p_ref2,
                                 intptr_t i_ref_stride,
                                 int32_t p_sad_array[3] );
+#define x264_pixel_sad_x3_4x4_msa x264_template(pixel_sad_x3_4x4_msa)
 void x264_pixel_sad_x3_4x4_msa( uint8_t *p_src, uint8_t *p_ref0,
                                 uint8_t *p_ref1, uint8_t *p_ref2,
                                 intptr_t i_ref_stride,
                                 int32_t p_sad_array[3] );
+#define x264_pixel_ssd_16x16_msa x264_template(pixel_ssd_16x16_msa)
 int32_t x264_pixel_ssd_16x16_msa( uint8_t *p_src, intptr_t i_src_stride,
                                   uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_ssd_16x8_msa x264_template(pixel_ssd_16x8_msa)
 int32_t x264_pixel_ssd_16x8_msa( uint8_t *p_src, intptr_t i_src_stride,
                                  uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_ssd_8x16_msa x264_template(pixel_ssd_8x16_msa)
 int32_t x264_pixel_ssd_8x16_msa( uint8_t *p_src, intptr_t i_src_stride,
                                  uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_ssd_8x8_msa x264_template(pixel_ssd_8x8_msa)
 int32_t x264_pixel_ssd_8x8_msa( uint8_t *p_src, intptr_t i_src_stride,
                                 uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_ssd_8x4_msa x264_template(pixel_ssd_8x4_msa)
 int32_t x264_pixel_ssd_8x4_msa( uint8_t *p_src, intptr_t i_src_stride,
                                 uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_ssd_4x16_msa x264_template(pixel_ssd_4x16_msa)
 int32_t x264_pixel_ssd_4x16_msa( uint8_t *p_src, intptr_t i_src_stride,
                                  uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_ssd_4x8_msa x264_template(pixel_ssd_4x8_msa)
 int32_t x264_pixel_ssd_4x8_msa( uint8_t *p_src, intptr_t i_src_stride,
                                 uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_pixel_ssd_4x4_msa x264_template(pixel_ssd_4x4_msa)
 int32_t x264_pixel_ssd_4x4_msa( uint8_t *p_src, intptr_t i_src_stride,
                                 uint8_t *p_ref, intptr_t i_ref_stride );
+#define x264_intra_sad_x3_4x4_msa x264_template(intra_sad_x3_4x4_msa)
 void x264_intra_sad_x3_4x4_msa( uint8_t *p_enc, uint8_t *p_dec,
                                 int32_t p_sad_array[3] );
+#define x264_intra_sad_x3_16x16_msa x264_template(intra_sad_x3_16x16_msa)
 void x264_intra_sad_x3_16x16_msa( uint8_t *p_enc, uint8_t *p_dec,
                                   int32_t p_sad_array[3] );
+#define x264_intra_sad_x3_8x8_msa x264_template(intra_sad_x3_8x8_msa)
 void x264_intra_sad_x3_8x8_msa( uint8_t *p_enc, uint8_t p_edge[36],
                                 int32_t p_sad_array[3] );
+#define x264_intra_sad_x3_8x8c_msa x264_template(intra_sad_x3_8x8c_msa)
 void x264_intra_sad_x3_8x8c_msa( uint8_t *p_enc, uint8_t *p_dec,
                                  int32_t p_sad_array[3] );
+#define x264_ssim_4x4x2_core_msa x264_template(ssim_4x4x2_core_msa)
 void x264_ssim_4x4x2_core_msa( const uint8_t *p_pix1, intptr_t i_stride1,
                                const uint8_t *p_pix2, intptr_t i_stride2,
                                int32_t i_sums[2][4] );
+#define x264_pixel_hadamard_ac_8x8_msa x264_template(pixel_hadamard_ac_8x8_msa)
 uint64_t x264_pixel_hadamard_ac_8x8_msa( uint8_t *p_pix, intptr_t i_stride );
+#define x264_pixel_hadamard_ac_8x16_msa x264_template(pixel_hadamard_ac_8x16_msa)
 uint64_t x264_pixel_hadamard_ac_8x16_msa( uint8_t *p_pix, intptr_t i_stride );
+#define x264_pixel_hadamard_ac_16x8_msa x264_template(pixel_hadamard_ac_16x8_msa)
 uint64_t x264_pixel_hadamard_ac_16x8_msa( uint8_t *p_pix, intptr_t i_stride );
+#define x264_pixel_hadamard_ac_16x16_msa x264_template(pixel_hadamard_ac_16x16_msa)
 uint64_t x264_pixel_hadamard_ac_16x16_msa( uint8_t *p_pix, intptr_t i_stride );
+#define x264_pixel_satd_4x4_msa x264_template(pixel_satd_4x4_msa)
 int32_t x264_pixel_satd_4x4_msa( uint8_t *p_pix1, intptr_t i_stride,
                                  uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_satd_4x8_msa x264_template(pixel_satd_4x8_msa)
 int32_t x264_pixel_satd_4x8_msa( uint8_t *p_pix1, intptr_t i_stride,
                                  uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_satd_4x16_msa x264_template(pixel_satd_4x16_msa)
 int32_t x264_pixel_satd_4x16_msa( uint8_t *p_pix1, intptr_t i_stride,
                                   uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_satd_8x4_msa x264_template(pixel_satd_8x4_msa)
 int32_t x264_pixel_satd_8x4_msa( uint8_t *p_pix1, intptr_t i_stride,
                                  uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_satd_8x8_msa x264_template(pixel_satd_8x8_msa)
 int32_t x264_pixel_satd_8x8_msa( uint8_t *p_pix1, intptr_t i_stride,
                                  uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_satd_8x16_msa x264_template(pixel_satd_8x16_msa)
 int32_t x264_pixel_satd_8x16_msa( uint8_t *p_pix1, intptr_t i_stride,
                                   uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_satd_16x8_msa x264_template(pixel_satd_16x8_msa)
 int32_t x264_pixel_satd_16x8_msa( uint8_t *p_pix1, intptr_t i_stride,
                                   uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_satd_16x16_msa x264_template(pixel_satd_16x16_msa)
 int32_t x264_pixel_satd_16x16_msa( uint8_t *p_pix1, intptr_t i_stride,
                                    uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_sa8d_8x8_msa x264_template(pixel_sa8d_8x8_msa)
 int32_t x264_pixel_sa8d_8x8_msa( uint8_t *p_pix1, intptr_t i_stride,
                                  uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_pixel_sa8d_16x16_msa x264_template(pixel_sa8d_16x16_msa)
 int32_t x264_pixel_sa8d_16x16_msa( uint8_t *p_pix1, intptr_t i_stride,
                                    uint8_t *p_pix2, intptr_t i_stride2 );
+#define x264_intra_satd_x3_4x4_msa x264_template(intra_satd_x3_4x4_msa)
 void x264_intra_satd_x3_4x4_msa( uint8_t *p_enc, uint8_t *p_dec,
                                  int32_t p_sad_array[3] );
+#define x264_intra_satd_x3_16x16_msa x264_template(intra_satd_x3_16x16_msa)
 void x264_intra_satd_x3_16x16_msa( uint8_t *p_enc, uint8_t *p_dec,
                                    int32_t p_sad_array[3] );
+#define x264_intra_sa8d_x3_8x8_msa x264_template(intra_sa8d_x3_8x8_msa)
 void x264_intra_sa8d_x3_8x8_msa( uint8_t *p_enc, uint8_t p_edge[36],
                                  int32_t p_sad_array[3] );
+#define x264_intra_satd_x3_8x8c_msa x264_template(intra_satd_x3_8x8c_msa)
 void x264_intra_satd_x3_8x8c_msa( uint8_t *p_enc, uint8_t *p_dec,
                                   int32_t p_sad_array[3] );
+#define x264_pixel_var_16x16_msa x264_template(pixel_var_16x16_msa)
 uint64_t x264_pixel_var_16x16_msa( uint8_t *p_pix, intptr_t i_stride );
+#define x264_pixel_var_8x16_msa x264_template(pixel_var_8x16_msa)
 uint64_t x264_pixel_var_8x16_msa( uint8_t *p_pix, intptr_t i_stride );
+#define x264_pixel_var_8x8_msa x264_template(pixel_var_8x8_msa)
 uint64_t x264_pixel_var_8x8_msa( uint8_t *p_pix, intptr_t i_stride );
+#define x264_pixel_var2_8x16_msa x264_template(pixel_var2_8x16_msa)
 int32_t x264_pixel_var2_8x16_msa( uint8_t *p_pix1, intptr_t i_stride1,
                                   uint8_t *p_pix2, intptr_t i_stride2,
                                   int32_t *p_ssd );
+#define x264_pixel_var2_8x8_msa x264_template(pixel_var2_8x8_msa)
 int32_t x264_pixel_var2_8x8_msa( uint8_t *p_pix1, intptr_t i_stride1,
                                  uint8_t *p_pix2, intptr_t i_stride2,
                                  int32_t *p_ssd );
diff -Nru x264-0.152.2854+gite9a5903/common/mips/predict-c.c x264-0.158.2988+git-20191101.7817004/common/mips/predict-c.c
--- x264-0.152.2854+gite9a5903/common/mips/predict-c.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/mips/predict-c.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict-c.c: msa intra prediction
  *****************************************************************************
- * Copyright (C) 2015-2017 x264 project
+ * Copyright (C) 2015-2019 x264 project
  *
  * Authors: Mandar Sahastrabuddhe <mandar.sahastrabuddhe@imgtec.com>
  *
@@ -25,6 +25,7 @@
 
 #include "common/common.h"
 #include "macros.h"
+#include "predict.h"
 
 #if !HIGH_BIT_DEPTH
 static void intra_predict_vert_4x4_msa( uint8_t *p_src, uint8_t *p_dst,
diff -Nru x264-0.152.2854+gite9a5903/common/mips/predict.h x264-0.158.2988+git-20191101.7817004/common/mips/predict.h
--- x264-0.152.2854+gite9a5903/common/mips/predict.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/mips/predict.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.h: msa intra prediction
  *****************************************************************************
- * Copyright (C) 2015-2017 x264 project
+ * Copyright (C) 2015-2019 x264 project
  *
  * Authors: Rishikesh More <rishikesh.more@imgtec.com>
  *
@@ -26,23 +26,41 @@
 #ifndef X264_MIPS_PREDICT_H
 #define X264_MIPS_PREDICT_H
 
+#define x264_intra_predict_dc_16x16_msa x264_template(intra_predict_dc_16x16_msa)
 void x264_intra_predict_dc_16x16_msa( uint8_t *p_src );
+#define x264_intra_predict_dc_left_16x16_msa x264_template(intra_predict_dc_left_16x16_msa)
 void x264_intra_predict_dc_left_16x16_msa( uint8_t *p_src );
+#define x264_intra_predict_dc_top_16x16_msa x264_template(intra_predict_dc_top_16x16_msa)
 void x264_intra_predict_dc_top_16x16_msa( uint8_t *p_src );
+#define x264_intra_predict_dc_128_16x16_msa x264_template(intra_predict_dc_128_16x16_msa)
 void x264_intra_predict_dc_128_16x16_msa( uint8_t *p_src );
+#define x264_intra_predict_hor_16x16_msa x264_template(intra_predict_hor_16x16_msa)
 void x264_intra_predict_hor_16x16_msa( uint8_t *p_src );
+#define x264_intra_predict_vert_16x16_msa x264_template(intra_predict_vert_16x16_msa)
 void x264_intra_predict_vert_16x16_msa( uint8_t *p_src );
+#define x264_intra_predict_plane_16x16_msa x264_template(intra_predict_plane_16x16_msa)
 void x264_intra_predict_plane_16x16_msa( uint8_t *p_src );
+#define x264_intra_predict_dc_4blk_8x8_msa x264_template(intra_predict_dc_4blk_8x8_msa)
 void x264_intra_predict_dc_4blk_8x8_msa( uint8_t *p_src );
+#define x264_intra_predict_hor_8x8_msa x264_template(intra_predict_hor_8x8_msa)
 void x264_intra_predict_hor_8x8_msa( uint8_t *p_src );
+#define x264_intra_predict_vert_8x8_msa x264_template(intra_predict_vert_8x8_msa)
 void x264_intra_predict_vert_8x8_msa( uint8_t *p_src );
+#define x264_intra_predict_plane_8x8_msa x264_template(intra_predict_plane_8x8_msa)
 void x264_intra_predict_plane_8x8_msa( uint8_t *p_src );
+#define x264_intra_predict_ddl_8x8_msa x264_template(intra_predict_ddl_8x8_msa)
 void x264_intra_predict_ddl_8x8_msa( uint8_t *p_src, uint8_t pu_xyz[36] );
+#define x264_intra_predict_dc_8x8_msa x264_template(intra_predict_dc_8x8_msa)
 void x264_intra_predict_dc_8x8_msa( uint8_t *p_src, uint8_t pu_xyz[36] );
+#define x264_intra_predict_h_8x8_msa x264_template(intra_predict_h_8x8_msa)
 void x264_intra_predict_h_8x8_msa( uint8_t *p_src, uint8_t pu_xyz[36] );
+#define x264_intra_predict_v_8x8_msa x264_template(intra_predict_v_8x8_msa)
 void x264_intra_predict_v_8x8_msa( uint8_t *p_src, uint8_t pu_xyz[36] );
+#define x264_intra_predict_dc_4x4_msa x264_template(intra_predict_dc_4x4_msa)
 void x264_intra_predict_dc_4x4_msa( uint8_t *p_src );
+#define x264_intra_predict_hor_4x4_msa x264_template(intra_predict_hor_4x4_msa)
 void x264_intra_predict_hor_4x4_msa( uint8_t *p_src );
+#define x264_intra_predict_vert_4x4_msa x264_template(intra_predict_vert_4x4_msa)
 void x264_intra_predict_vert_4x4_msa( uint8_t *p_src );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/mips/quant-c.c x264-0.158.2988+git-20191101.7817004/common/mips/quant-c.c
--- x264-0.152.2854+gite9a5903/common/mips/quant-c.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/mips/quant-c.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * quant-c.c: msa quantization and level-run
  *****************************************************************************
- * Copyright (C) 2015-2017 x264 project
+ * Copyright (C) 2015-2019 x264 project
  *
  * Authors: Rishikesh More <rishikesh.more@imgtec.com>
  *
@@ -25,6 +25,7 @@
 
 #include "common/common.h"
 #include "macros.h"
+#include "quant.h"
 
 #if !HIGH_BIT_DEPTH
 static void avc_dequant_4x4_msa( int16_t *p_dct, int32_t pi_dequant_mf[6][16],
diff -Nru x264-0.152.2854+gite9a5903/common/mips/quant.h x264-0.158.2988+git-20191101.7817004/common/mips/quant.h
--- x264-0.152.2854+gite9a5903/common/mips/quant.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/mips/quant.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * quant.h: msa quantization and level-run
  *****************************************************************************
- * Copyright (C) 2015-2017 x264 project
+ * Copyright (C) 2015-2019 x264 project
  *
  * Authors: Rishikesh More <rishikesh.more@imgtec.com>
  *
@@ -26,18 +26,27 @@
 #ifndef X264_MIPS_QUANT_H
 #define X264_MIPS_QUANT_H
 
+#define x264_dequant_4x4_msa x264_template(dequant_4x4_msa)
 void x264_dequant_4x4_msa( int16_t *p_dct, int32_t pi_dequant_mf[6][16],
                            int32_t i_qp );
+#define x264_dequant_8x8_msa x264_template(dequant_8x8_msa)
 void x264_dequant_8x8_msa( int16_t *p_dct, int32_t pi_dequant_mf[6][64],
                            int32_t i_qp );
+#define x264_dequant_4x4_dc_msa x264_template(dequant_4x4_dc_msa)
 void x264_dequant_4x4_dc_msa( int16_t *p_dct, int32_t pi_dequant_mf[6][16],
                               int32_t i_qp );
+#define x264_quant_4x4_msa x264_template(quant_4x4_msa)
 int32_t x264_quant_4x4_msa( int16_t *p_dct, uint16_t *p_mf, uint16_t *p_bias );
+#define x264_quant_4x4x4_msa x264_template(quant_4x4x4_msa)
 int32_t x264_quant_4x4x4_msa( int16_t p_dct[4][16],
                               uint16_t pu_mf[16], uint16_t pu_bias[16] );
+#define x264_quant_8x8_msa x264_template(quant_8x8_msa)
 int32_t x264_quant_8x8_msa( int16_t *p_dct, uint16_t *p_mf, uint16_t *p_bias );
+#define x264_quant_4x4_dc_msa x264_template(quant_4x4_dc_msa)
 int32_t x264_quant_4x4_dc_msa( int16_t *p_dct, int32_t i_mf, int32_t i_bias );
+#define x264_coeff_last64_msa x264_template(coeff_last64_msa)
 int32_t x264_coeff_last64_msa( int16_t *p_src );
+#define x264_coeff_last16_msa x264_template(coeff_last16_msa)
 int32_t x264_coeff_last16_msa( int16_t *p_src );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/mvpred.c x264-0.158.2988+git-20191101.7817004/common/mvpred.c
--- x264-0.152.2854+gite9a5903/common/mvpred.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/mvpred.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mvpred.c: motion vector prediction
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Fiona Glaser <fiona@x264.com>
@@ -180,7 +180,7 @@
         x264_mb_predict_mv_16x16( h, 0, 0, mv );
 }
 
-static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
+static int mb_predict_mv_direct16x16_temporal( x264_t *h )
 {
     int mb_x = h->mb.i_mb_x;
     int mb_y = h->mb.i_mb_y;
@@ -286,7 +286,7 @@
     return 1;
 }
 
-static ALWAYS_INLINE int x264_mb_predict_mv_direct16x16_spatial( x264_t *h, int b_interlaced )
+static ALWAYS_INLINE int mb_predict_mv_direct16x16_spatial( x264_t *h, int b_interlaced )
 {
     int8_t ref[2];
     ALIGNED_ARRAY_8( int16_t, mv,[2],[2] );
@@ -441,14 +441,14 @@
 }
 
 
-static int x264_mb_predict_mv_direct16x16_spatial_interlaced( x264_t *h )
+static int mb_predict_mv_direct16x16_spatial_interlaced( x264_t *h )
 {
-    return x264_mb_predict_mv_direct16x16_spatial( h, 1 );
+    return mb_predict_mv_direct16x16_spatial( h, 1 );
 }
 
-static int x264_mb_predict_mv_direct16x16_spatial_progressive( x264_t *h )
+static int mb_predict_mv_direct16x16_spatial_progressive( x264_t *h )
 {
-    return x264_mb_predict_mv_direct16x16_spatial( h, 0 );
+    return mb_predict_mv_direct16x16_spatial( h, 0 );
 }
 
 int x264_mb_predict_mv_direct16x16( x264_t *h, int *b_changed )
@@ -459,12 +459,12 @@
     else if( h->sh.b_direct_spatial_mv_pred )
     {
         if( SLICE_MBAFF )
-            b_available = x264_mb_predict_mv_direct16x16_spatial_interlaced( h );
+            b_available = mb_predict_mv_direct16x16_spatial_interlaced( h );
         else
-            b_available = x264_mb_predict_mv_direct16x16_spatial_progressive( h );
+            b_available = mb_predict_mv_direct16x16_spatial_progressive( h );
     }
     else
-        b_available = x264_mb_predict_mv_direct16x16_temporal( h );
+        b_available = mb_predict_mv_direct16x16_temporal( h );
 
     if( b_changed != NULL && b_available )
     {
diff -Nru x264-0.152.2854+gite9a5903/common/opencl.c x264-0.158.2988+git-20191101.7817004/common/opencl.c
--- x264-0.152.2854+gite9a5903/common/opencl.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/opencl.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * opencl.c: OpenCL initialization and kernel compilation
  *****************************************************************************
- * Copyright (C) 2012-2017 x264 project
+ * Copyright (C) 2012-2019 x264 project
  *
  * Authors: Steve Borho <sborho@multicorewareinc.com>
  *          Anton Mitrofanov <BugMaster@narod.ru>
@@ -115,11 +115,11 @@
 /* Requires full include path in case of out-of-tree builds */
 #include "common/oclobj.h"
 
-static int x264_detect_switchable_graphics( void );
+static int detect_switchable_graphics( void );
 
 /* Try to load the cached compiled program binary, verify the device context is
  * still valid before reuse */
-static cl_program x264_opencl_cache_load( x264_t *h, const char *dev_name, const char *dev_vendor, const char *driver_version )
+static cl_program opencl_cache_load( x264_t *h, const char *dev_name, const char *dev_vendor, const char *driver_version )
 {
     /* try to load cached program binary */
     FILE *fp = x264_fopen( h->param.psz_clbin_file, "rb" );
@@ -168,7 +168,7 @@
 
 /* Save the compiled program binary to a file for later reuse.  Device context
  * is also saved in the cache file so we do not reuse stale binaries */
-static void x264_opencl_cache_save( x264_t *h, cl_program program, const char *dev_name, const char *dev_vendor, const char *driver_version )
+static void opencl_cache_save( x264_t *h, cl_program program, const char *dev_name, const char *dev_vendor, const char *driver_version )
 {
     FILE *fp = x264_fopen( h->param.psz_clbin_file, "wb" );
     if( !fp )
@@ -216,7 +216,7 @@
  * the Makefile. It defines a x264_opencl_source byte array which we will pass
  * to clCreateProgramWithSource().  We also attempt to use a cache file for the
  * compiled binary, stored in the current working folder. */
-static cl_program x264_opencl_compile( x264_t *h )
+static cl_program opencl_compile( x264_t *h )
 {
     x264_opencl_function_t *ocl = h->opencl.ocl;
     cl_program program = NULL;
@@ -239,7 +239,7 @@
     if( vectorize )
     {
         /* Disable OpenCL on Intel/AMD switchable graphics devices */
-        if( x264_detect_switchable_graphics() )
+        if( detect_switchable_graphics() )
         {
             x264_log( h, X264_LOG_INFO, "OpenCL acceleration disabled, switchable graphics detected\n" );
             return NULL;
@@ -257,7 +257,7 @@
 
     x264_log( h, X264_LOG_INFO, "OpenCL acceleration enabled with %s %s %s\n", dev_vendor, dev_name, h->opencl.b_device_AMD_SI ? "(SI)" : "" );
 
-    program = x264_opencl_cache_load( h, dev_name, dev_vendor, driver_version );
+    program = opencl_cache_load( h, dev_name, dev_vendor, driver_version );
     if( !program )
     {
         /* clCreateProgramWithSource() requires a pointer variable, you cannot just use &x264_opencl_source */
@@ -277,7 +277,7 @@
     status = ocl->clBuildProgram( program, 1, &h->opencl.device, buildopts, NULL, NULL );
     if( status == CL_SUCCESS )
     {
-        x264_opencl_cache_save( h, program, dev_name, dev_vendor, driver_version );
+        opencl_cache_save( h, program, dev_name, dev_vendor, driver_version );
         return program;
     }
 
@@ -322,7 +322,7 @@
     return NULL;
 }
 
-static int x264_opencl_lookahead_alloc( x264_t *h )
+static int opencl_lookahead_alloc( x264_t *h )
 {
     if( !h->param.rc.i_lookahead )
         return -1;
@@ -360,11 +360,11 @@
     x264_opencl_function_t *ocl = h->opencl.ocl;
     cl_int status;
 
-    h->opencl.lookahead_program = x264_opencl_compile( h );
+    h->opencl.lookahead_program = opencl_compile( h );
     if( !h->opencl.lookahead_program )
         goto fail;
 
-    for( int i = 0; i < ARRAY_SIZE(kernelnames); i++ )
+    for( int i = 0; i < ARRAY_ELEMS(kernelnames); i++ )
     {
         *kernels[i] = ocl->clCreateKernel( h->opencl.lookahead_program, kernelnames[i], &status );
         if( status != CL_SUCCESS )
@@ -394,7 +394,7 @@
     return -1;
 }
 
-static void CL_CALLBACK x264_opencl_error_notify( const char *errinfo, const void *private_info, size_t cb, void *user_data )
+static void CL_CALLBACK opencl_error_notify( const char *errinfo, const void *private_info, size_t cb, void *user_data )
 {
     /* Any error notification can be assumed to be fatal to the OpenCL context.
      * We need to stop using it immediately to prevent further damage. */
@@ -470,7 +470,7 @@
 
             if( context )
                 ocl->clReleaseContext( context );
-            context = ocl->clCreateContext( NULL, 1, &h->opencl.device, (void*)x264_opencl_error_notify, (void*)h, &status );
+            context = ocl->clCreateContext( NULL, 1, &h->opencl.device, (void*)opencl_error_notify, (void*)h, &status );
             if( status != CL_SUCCESS || !context )
                 continue;
 
@@ -540,7 +540,7 @@
     if( ret )
         x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to find a compatible device\n" );
     else
-        ret = x264_opencl_lookahead_alloc( h );
+        ret = opencl_lookahead_alloc( h );
 
 fail:
     if( context )
@@ -551,7 +551,7 @@
     return ret;
 }
 
-static void x264_opencl_lookahead_free( x264_t *h )
+static void opencl_lookahead_free( x264_t *h )
 {
     x264_opencl_function_t *ocl = h->opencl.ocl;
 
@@ -600,7 +600,7 @@
     if( h->opencl.queue )
         ocl->clFinish( h->opencl.queue );
 
-    x264_opencl_lookahead_free( h );
+    opencl_lookahead_free( h );
 
     if( h->opencl.queue )
     {
@@ -663,7 +663,7 @@
     return x264_malloc( iSize );
 }
 
-static int x264_detect_switchable_graphics( void )
+static int detect_switchable_graphics( void )
 {
     void *hDLL;
     ADL_MAIN_CONTROL_CREATE          ADL_Main_Control_Create;
diff -Nru x264-0.152.2854+gite9a5903/common/opencl.h x264-0.158.2988+git-20191101.7817004/common/opencl.h
--- x264-0.152.2854+gite9a5903/common/opencl.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/opencl.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * opencl.h: OpenCL structures and defines
  *****************************************************************************
- * Copyright (C) 2012-2017 x264 project
+ * Copyright (C) 2012-2019 x264 project
  *
  * Authors: Steve Borho <sborho@multicorewareinc.com>
  *          Anton Mitrofanov <BugMaster@narod.ru>
@@ -793,12 +793,17 @@
 
 typedef struct x264_frame x264_frame;
 
+#define x264_opencl_load_library x264_template(opencl_load_library)
 x264_opencl_function_t *x264_opencl_load_library( void );
+#define x264_opencl_close_library x264_template(opencl_close_library)
 void x264_opencl_close_library( x264_opencl_function_t *ocl );
 
+#define x264_opencl_lookahead_init x264_template(opencl_lookahead_init)
 int x264_opencl_lookahead_init( x264_t *h );
+#define x264_opencl_lookahead_delete x264_template(opencl_lookahead_delete)
 void x264_opencl_lookahead_delete( x264_t *h );
 
+#define x264_opencl_frame_delete x264_template(opencl_frame_delete)
 void x264_opencl_frame_delete( x264_frame *frame );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/osdep.c x264-0.158.2988+git-20191101.7817004/common/osdep.c
--- x264-0.152.2854+gite9a5903/common/osdep.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/osdep.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * osdep.c: platform-specific code
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -25,12 +25,7 @@
  * For more information, contact us at licensing@x264.com.
  *****************************************************************************/
 
-#include "common.h"
-
-#ifdef _WIN32
-#include <windows.h>
-#include <io.h>
-#endif
+#include "osdep.h"
 
 #if SYS_WINDOWS
 #include <sys/types.h>
@@ -51,6 +46,10 @@
     struct timeb tb;
     ftime( &tb );
     return ((int64_t)tb.time * 1000 + (int64_t)tb.millitm) * 1000;
+#elif HAVE_CLOCK_GETTIME
+    struct timespec ts;
+    clock_gettime( CLOCK_MONOTONIC, &ts );
+    return (int64_t)ts.tv_sec * 1000000 + (int64_t)ts.tv_nsec / 1000;
 #else
     struct timeval tv_date;
     gettimeofday( &tv_date, NULL );
@@ -60,9 +59,9 @@
 
 #if HAVE_WIN32THREAD || PTW32_STATIC_LIB
 /* state of the threading library being initialized */
-static volatile LONG x264_threading_is_init = 0;
+static volatile LONG threading_is_init = 0;
 
-static void x264_threading_destroy( void )
+static void threading_destroy( void )
 {
 #if PTW32_STATIC_LIB
     pthread_win32_thread_detach_np();
@@ -72,11 +71,8 @@
 #endif
 }
 
-int x264_threading_init( void )
+static int threading_init( void )
 {
-    /* if already init, then do nothing */
-    if( InterlockedCompareExchange( &x264_threading_is_init, 1, 0 ) )
-        return 0;
 #if PTW32_STATIC_LIB
     /* if static pthread-win32 is already initialized, then do nothing */
     if( ptw32_processInitialized )
@@ -88,119 +84,25 @@
         return -1;
 #endif
     /* register cleanup to run at process termination */
-    atexit( x264_threading_destroy );
-
+    atexit( threading_destroy );
     return 0;
 }
-#endif
-
-#ifdef _WIN32
-/* Functions for dealing with Unicode on Windows. */
-FILE *x264_fopen( const char *filename, const char *mode )
-{
-    wchar_t filename_utf16[MAX_PATH];
-    wchar_t mode_utf16[16];
-    if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) )
-        return _wfopen( filename_utf16, mode_utf16 );
-    return NULL;
-}
 
-int x264_rename( const char *oldname, const char *newname )
+int x264_threading_init( void )
 {
-    wchar_t oldname_utf16[MAX_PATH];
-    wchar_t newname_utf16[MAX_PATH];
-    if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) )
+    LONG state;
+    while( (state = InterlockedCompareExchange( &threading_is_init, -1, 0 )) != 0 )
     {
-        /* POSIX says that rename() removes the destination, but Win32 doesn't. */
-        _wunlink( newname_utf16 );
-        return _wrename( oldname_utf16, newname_utf16 );
+        /* if already init, then do nothing */
+        if( state > 0 )
+            return 0;
     }
-    return -1;
-}
-
-int x264_stat( const char *path, x264_struct_stat *buf )
-{
-    wchar_t path_utf16[MAX_PATH];
-    if( utf8_to_utf16( path, path_utf16 ) )
-        return _wstati64( path_utf16, buf );
-    return -1;
-}
-
-#if !HAVE_WINRT
-int x264_vfprintf( FILE *stream, const char *format, va_list arg )
-{
-    HANDLE console = NULL;
-    DWORD mode;
-
-    if( stream == stdout )
-        console = GetStdHandle( STD_OUTPUT_HANDLE );
-    else if( stream == stderr )
-        console = GetStdHandle( STD_ERROR_HANDLE );
-
-    /* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */
-    if( GetConsoleMode( console, &mode ) )
+    if( threading_init() < 0 )
     {
-        char buf[4096];
-        wchar_t buf_utf16[4096];
-        va_list arg2;
-
-        va_copy( arg2, arg );
-        int length = vsnprintf( buf, sizeof(buf), format, arg2 );
-        va_end( arg2 );
-
-        if( length > 0 && length < sizeof(buf) )
-        {
-            /* WriteConsoleW is the most reliable way to output Unicode to a console. */
-            int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) );
-            DWORD written;
-            WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL );
-            return length;
-        }
+        InterlockedExchange( &threading_is_init, 0 );
+        return -1;
     }
-    return vfprintf( stream, format, arg );
-}
-
-int x264_is_pipe( const char *path )
-{
-    wchar_t path_utf16[MAX_PATH];
-    if( utf8_to_utf16( path, path_utf16 ) )
-        return WaitNamedPipeW( path_utf16, 0 );
+    InterlockedExchange( &threading_is_init, 1 );
     return 0;
 }
 #endif
-
-#if defined(_MSC_VER) && _MSC_VER < 1900
-/* MSVC pre-VS2015 has broken snprintf/vsnprintf implementations which are incompatible with C99. */
-int x264_snprintf( char *s, size_t n, const char *fmt, ... )
-{
-    va_list arg;
-    va_start( arg, fmt );
-    int length = x264_vsnprintf( s, n, fmt, arg );
-    va_end( arg );
-    return length;
-}
-
-int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg )
-{
-    int length = -1;
-
-    if( n )
-    {
-        va_list arg2;
-        va_copy( arg2, arg );
-        length = _vsnprintf( s, n, fmt, arg2 );
-        va_end( arg2 );
-
-        /* _(v)snprintf adds a null-terminator only if the length is less than the buffer size. */
-        if( length < 0 || length >= n )
-            s[n-1] = '\0';
-    }
-
-    /* _(v)snprintf returns a negative number if the length is greater than the buffer size. */
-    if( length < 0 )
-        return _vscprintf( fmt, arg );
-
-    return length;
-}
-#endif
-#endif
diff -Nru x264-0.152.2854+gite9a5903/common/osdep.h x264-0.158.2988+git-20191101.7817004/common/osdep.h
--- x264-0.152.2854+gite9a5903/common/osdep.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/osdep.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * osdep.h: platform-specific code
  *****************************************************************************
- * Copyright (C) 2007-2017 x264 project
+ * Copyright (C) 2007-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -43,6 +43,13 @@
 #include <math.h>
 #endif
 
+#ifdef _WIN32
+#include <windows.h>
+#include <io.h>
+#endif
+
+#include "x264.h"
+
 #if !HAVE_LOG2F
 #define log2f(x) (logf(x)/0.693147180559945f)
 #define log2(x) (log(x)/0.693147180559945)
@@ -54,12 +61,6 @@
 #define strncasecmp _strnicmp
 #define strtok_r strtok_s
 #define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
-#if _MSC_VER < 1900
-int x264_snprintf( char *s, size_t n, const char *fmt, ... );
-int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg );
-#define snprintf  x264_snprintf
-#define vsnprintf x264_vsnprintf
-#endif
 #else
 #include <strings.h>
 #endif
@@ -72,18 +73,85 @@
 #define isfinite finite
 #endif
 
-#ifdef _WIN32
-#ifndef strtok_r
+#if !HAVE_STRTOK_R && !defined(strtok_r)
 #define strtok_r(str,delim,save) strtok(str,delim)
 #endif
 
+#if defined(_MSC_VER) && _MSC_VER < 1900
+/* MSVC pre-VS2015 has broken snprintf/vsnprintf implementations which are incompatible with C99. */
+static inline int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg )
+{
+    int length = -1;
+
+    if( n )
+    {
+        va_list arg2;
+        va_copy( arg2, arg );
+        length = _vsnprintf( s, n, fmt, arg2 );
+        va_end( arg2 );
+
+        /* _(v)snprintf adds a null-terminator only if the length is less than the buffer size. */
+        if( length < 0 || length >= n )
+            s[n-1] = '\0';
+    }
+
+    /* _(v)snprintf returns a negative number if the length is greater than the buffer size. */
+    if( length < 0 )
+        return _vscprintf( fmt, arg );
+
+    return length;
+}
+
+static inline int x264_snprintf( char *s, size_t n, const char *fmt, ... )
+{
+    va_list arg;
+    va_start( arg, fmt );
+    int length = x264_vsnprintf( s, n, fmt, arg );
+    va_end( arg );
+    return length;
+}
+
+#define snprintf  x264_snprintf
+#define vsnprintf x264_vsnprintf
+#endif
+
+#ifdef _WIN32
 #define utf8_to_utf16( utf8, utf16 )\
     MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, utf8, -1, utf16, sizeof(utf16)/sizeof(wchar_t) )
-FILE *x264_fopen( const char *filename, const char *mode );
-int x264_rename( const char *oldname, const char *newname );
+
+/* Functions for dealing with Unicode on Windows. */
+static inline FILE *x264_fopen( const char *filename, const char *mode )
+{
+    wchar_t filename_utf16[MAX_PATH];
+    wchar_t mode_utf16[16];
+    if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) )
+        return _wfopen( filename_utf16, mode_utf16 );
+    return NULL;
+}
+
+static inline int x264_rename( const char *oldname, const char *newname )
+{
+    wchar_t oldname_utf16[MAX_PATH];
+    wchar_t newname_utf16[MAX_PATH];
+    if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) )
+    {
+        /* POSIX says that rename() removes the destination, but Win32 doesn't. */
+        _wunlink( newname_utf16 );
+        return _wrename( oldname_utf16, newname_utf16 );
+    }
+    return -1;
+}
+
 #define x264_struct_stat struct _stati64
 #define x264_fstat _fstati64
-int x264_stat( const char *path, x264_struct_stat *buf );
+
+static inline int x264_stat( const char *path, x264_struct_stat *buf )
+{
+    wchar_t path_utf16[MAX_PATH];
+    if( utf8_to_utf16( path, path_utf16 ) )
+        return _wstati64( path_utf16, buf );
+    return -1;
+}
 #else
 #define x264_fopen       fopen
 #define x264_rename      rename
@@ -93,16 +161,57 @@
 #endif
 
 /* mdate: return the current date in microsecond */
-int64_t x264_mdate( void );
+X264_API int64_t x264_mdate( void );
 
 #if defined(_WIN32) && !HAVE_WINRT
-int x264_vfprintf( FILE *stream, const char *format, va_list arg );
-int x264_is_pipe( const char *path );
+static inline int x264_vfprintf( FILE *stream, const char *format, va_list arg )
+{
+    HANDLE console = NULL;
+    DWORD mode;
+
+    if( stream == stdout )
+        console = GetStdHandle( STD_OUTPUT_HANDLE );
+    else if( stream == stderr )
+        console = GetStdHandle( STD_ERROR_HANDLE );
+
+    /* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */
+    if( GetConsoleMode( console, &mode ) )
+    {
+        char buf[4096];
+        wchar_t buf_utf16[4096];
+        va_list arg2;
+
+        va_copy( arg2, arg );
+        int length = vsnprintf( buf, sizeof(buf), format, arg2 );
+        va_end( arg2 );
+
+        if( length > 0 && length < sizeof(buf) )
+        {
+            /* WriteConsoleW is the most reliable way to output Unicode to a console. */
+            int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) );
+            DWORD written;
+            WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL );
+            return length;
+        }
+    }
+    return vfprintf( stream, format, arg );
+}
+
+static inline int x264_is_pipe( const char *path )
+{
+    wchar_t path_utf16[MAX_PATH];
+    if( utf8_to_utf16( path, path_utf16 ) )
+        return WaitNamedPipeW( path_utf16, 0 );
+    return 0;
+}
 #else
 #define x264_vfprintf vfprintf
 #define x264_is_pipe(x) 0
 #endif
 
+#define x264_glue3_expand(x,y,z) x##_##y##_##z
+#define x264_glue3(x,y,z) x264_glue3_expand(x,y,z)
+
 #ifdef _MSC_VER
 #define DECLARE_ALIGNED( var, n ) __declspec(align(n)) var
 #else
@@ -160,6 +269,12 @@
 #define ALIGNED_ARRAY_64 ALIGNED_ARRAY_16
 #endif
 
+#if STACK_ALIGNMENT > 16 || (ARCH_X86 && STACK_ALIGNMENT > 4)
+#define REALIGN_STACK __attribute__((force_align_arg_pointer))
+#else
+#define REALIGN_STACK
+#endif
+
 #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
 #define UNUSED __attribute__((unused))
 #define ALWAYS_INLINE __attribute__((always_inline)) inline
@@ -244,7 +359,7 @@
 #endif
 
 #if HAVE_WIN32THREAD || PTW32_STATIC_LIB
-int x264_threading_init( void );
+X264_API int x264_threading_init( void );
 #else
 #define x264_threading_init() 0
 #endif
@@ -319,7 +434,7 @@
 #endif
 
 /* For values with 4 bits or less. */
-static int ALWAYS_INLINE x264_ctz_4bit( uint32_t x )
+static ALWAYS_INLINE int x264_ctz_4bit( uint32_t x )
 {
     static uint8_t lut[16] = {4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0};
     return lut[x];
@@ -329,7 +444,7 @@
 #define x264_clz(x) __builtin_clz(x)
 #define x264_ctz(x) __builtin_ctz(x)
 #else
-static int ALWAYS_INLINE x264_clz( uint32_t x )
+static ALWAYS_INLINE int x264_clz( uint32_t x )
 {
     static uint8_t lut[16] = {4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0};
     int y, z = (((x >> 16) - 1) >> 27) & 16;
@@ -341,7 +456,7 @@
     return z + lut[x];
 }
 
-static int ALWAYS_INLINE x264_ctz( uint32_t x )
+static ALWAYS_INLINE int x264_ctz( uint32_t x )
 {
     static uint8_t lut[16] = {4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0};
     int y, z = (((x & 0xffff) - 1) >> 27) & 16;
diff -Nru x264-0.152.2854+gite9a5903/common/pixel.c x264-0.158.2988+git-20191101.7817004/common/pixel.c
--- x264-0.152.2854+gite9a5903/common/pixel.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/pixel.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.c: pixel metrics
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -31,18 +31,18 @@
 #   include "x86/pixel.h"
 #   include "x86/predict.h"
 #endif
-#if ARCH_PPC
+#if HAVE_ALTIVEC
 #   include "ppc/pixel.h"
 #endif
-#if ARCH_ARM
+#if HAVE_ARMV6
 #   include "arm/pixel.h"
 #   include "arm/predict.h"
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
 #   include "aarch64/pixel.h"
 #   include "aarch64/predict.h"
 #endif
-#if ARCH_MIPS
+#if HAVE_MSA
 #   include "mips/pixel.h"
 #endif
 
@@ -194,9 +194,9 @@
     return sum + ((uint64_t)sqr << 32);       \
 }
 
-PIXEL_VAR_C( x264_pixel_var_16x16, 16, 16 )
-PIXEL_VAR_C( x264_pixel_var_8x16,   8, 16 )
-PIXEL_VAR_C( x264_pixel_var_8x8,    8,  8 )
+PIXEL_VAR_C( pixel_var_16x16, 16, 16 )
+PIXEL_VAR_C( pixel_var_8x16,   8, 16 )
+PIXEL_VAR_C( pixel_var_8x8,    8,  8 )
 
 /****************************************************************************
  * pixel_var2_wxh
@@ -225,8 +225,8 @@
            sqr_v - ((int64_t)sum_v * sum_v >> shift); \
 }
 
-PIXEL_VAR2_C( x264_pixel_var2_8x16, 16, 7 )
-PIXEL_VAR2_C( x264_pixel_var2_8x8,   8, 6 )
+PIXEL_VAR2_C( pixel_var2_8x16, 16, 7 )
+PIXEL_VAR2_C( pixel_var2_8x8,   8, 6 )
 
 #if BIT_DEPTH > 8
     typedef uint32_t sum_t;
@@ -503,17 +503,18 @@
 SATD_X_DECL7( _sse4 )
 SATD_X_DECL7( _avx )
 SATD_X_DECL7( _xop )
+SATD_X_DECL7( _avx512 )
 #endif // !HIGH_BIT_DEPTH
 #endif
 
 #if !HIGH_BIT_DEPTH
-#if HAVE_ARMV6 || ARCH_AARCH64
+#if HAVE_ARMV6 || HAVE_AARCH64
 SATD_X_DECL7( _neon )
 #endif
 #endif // !HIGH_BIT_DEPTH
 
 #define INTRA_MBCMP_8x8( mbcmp, cpu, cpu2 )\
-void x264_intra_##mbcmp##_x3_8x8##cpu( pixel *fenc, pixel edge[36], int res[3] )\
+static void intra_##mbcmp##_x3_8x8##cpu( pixel *fenc, pixel edge[36], int res[3] )\
 {\
     ALIGNED_ARRAY_16( pixel, pix, [8*FDEC_STRIDE] );\
     x264_predict_8x8_v##cpu2( pix, edge );\
@@ -531,13 +532,13 @@
 INTRA_MBCMP_8x8( sad, _mmx2,  _c )
 INTRA_MBCMP_8x8(sa8d, _sse2,  _sse2 )
 #endif
-#if !HIGH_BIT_DEPTH && (HAVE_ARMV6 || ARCH_AARCH64)
+#if !HIGH_BIT_DEPTH && (HAVE_ARMV6 || HAVE_AARCH64)
 INTRA_MBCMP_8x8( sad, _neon, _neon )
 INTRA_MBCMP_8x8(sa8d, _neon, _neon )
 #endif
 
 #define INTRA_MBCMP( mbcmp, size, pred1, pred2, pred3, chroma, cpu, cpu2 )\
-void x264_intra_##mbcmp##_x3_##size##chroma##cpu( pixel *fenc, pixel *fdec, int res[3] )\
+static void intra_##mbcmp##_x3_##size##chroma##cpu( pixel *fenc, pixel *fdec, int res[3] )\
 {\
     x264_predict_##size##chroma##_##pred1##cpu2( fdec );\
     res[0] = x264_pixel_##mbcmp##_##size##cpu( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
@@ -601,7 +602,7 @@
 INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _neon, _neon )
 INTRA_MBCMP(satd, 16x16,  v, h, dc,  , _neon, _neon )
 #endif
-#if !HIGH_BIT_DEPTH && ARCH_AARCH64
+#if !HIGH_BIT_DEPTH && HAVE_AARCH64
 INTRA_MBCMP( sad,  4x4,   v, h, dc,  , _neon, _neon )
 INTRA_MBCMP(satd,  4x4,   v, h, dc,  , _neon, _neon )
 INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _neon, _neon )
@@ -613,7 +614,7 @@
 #endif
 
 // No C implementation of intra_satd_x9. See checkasm for its behavior,
-// or see x264_mb_analyse_intra for the entirely different algorithm we
+// or see mb_analyse_intra for the entirely different algorithm we
 // use when lacking an asm implementation of it.
 
 
@@ -851,11 +852,11 @@
 
     pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16;
     pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8;
-    pixf->var[PIXEL_16x16] = x264_pixel_var_16x16;
-    pixf->var[PIXEL_8x16]  = x264_pixel_var_8x16;
-    pixf->var[PIXEL_8x8]   = x264_pixel_var_8x8;
-    pixf->var2[PIXEL_8x16]  = x264_pixel_var2_8x16;
-    pixf->var2[PIXEL_8x8]   = x264_pixel_var2_8x8;
+    pixf->var[PIXEL_16x16] = pixel_var_16x16;
+    pixf->var[PIXEL_8x16]  = pixel_var_8x16;
+    pixf->var[PIXEL_8x8]   = pixel_var_8x8;
+    pixf->var2[PIXEL_8x16]  = pixel_var2_8x16;
+    pixf->var2[PIXEL_8x8]   = pixel_var2_8x8;
 
     pixf->ssd_nv12_core = pixel_ssd_nv12_core;
     pixf->ssim_4x4x2_core = ssim_4x4x2_core;
@@ -863,16 +864,16 @@
     pixf->vsad = pixel_vsad;
     pixf->asd8 = pixel_asd8;
 
-    pixf->intra_sad_x3_4x4    = x264_intra_sad_x3_4x4;
-    pixf->intra_satd_x3_4x4   = x264_intra_satd_x3_4x4;
-    pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8;
-    pixf->intra_sa8d_x3_8x8   = x264_intra_sa8d_x3_8x8;
-    pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c;
-    pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c;
-    pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c;
-    pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c;
-    pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16;
-    pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16;
+    pixf->intra_sad_x3_4x4    = intra_sad_x3_4x4;
+    pixf->intra_satd_x3_4x4   = intra_satd_x3_4x4;
+    pixf->intra_sad_x3_8x8    = intra_sad_x3_8x8;
+    pixf->intra_sa8d_x3_8x8   = intra_sa8d_x3_8x8;
+    pixf->intra_sad_x3_8x8c   = intra_sad_x3_8x8c;
+    pixf->intra_satd_x3_8x8c  = intra_satd_x3_8x8c;
+    pixf->intra_sad_x3_8x16c  = intra_sad_x3_8x16c;
+    pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c;
+    pixf->intra_sad_x3_16x16  = intra_sad_x3_16x16;
+    pixf->intra_satd_x3_16x16 = intra_satd_x3_16x16;
 
 #if HIGH_BIT_DEPTH
 #if HAVE_MMX
@@ -889,14 +890,14 @@
         INIT8( ssd, _mmx2 );
         INIT_ADS( _mmx2 );
 
-        pixf->intra_sad_x3_4x4    = x264_intra_sad_x3_4x4_mmx2;
+        pixf->intra_sad_x3_4x4    = intra_sad_x3_4x4_mmx2;
         pixf->intra_satd_x3_4x4   = x264_intra_satd_x3_4x4_mmx2;
-        pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_mmx2;
-        pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_mmx2;
+        pixf->intra_sad_x3_8x8    = intra_sad_x3_8x8_mmx2;
+        pixf->intra_sad_x3_8x8c   = intra_sad_x3_8x8c_mmx2;
         pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_mmx2;
-        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_mmx2;
-        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_mmx2;
-        pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_mmx2;
+        pixf->intra_sad_x3_8x16c  = intra_sad_x3_8x16c_mmx2;
+        pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_mmx2;
+        pixf->intra_sad_x3_16x16  = intra_sad_x3_16x16_mmx2;
         pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmx2;
     }
     if( cpu&X264_CPU_SSE2 )
@@ -906,21 +907,22 @@
         INIT6( satd, _sse2 );
         pixf->satd[PIXEL_4x16] = x264_pixel_satd_4x16_sse2;
 
-        pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2;
-        pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_sse2;
-#if ARCH_X86_64
-        pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2;
-        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse2;
-#endif
-        pixf->intra_sad_x3_4x4  = x264_intra_sad_x3_4x4_sse2;
-        pixf->ssd_nv12_core = x264_pixel_ssd_nv12_core_sse2;
+        pixf->ssd_nv12_core    = x264_pixel_ssd_nv12_core_sse2;
         pixf->ssim_4x4x2_core  = x264_pixel_ssim_4x4x2_core_sse2;
         pixf->ssim_end4        = x264_pixel_ssim_end4_sse2;
         pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_sse2;
         pixf->var[PIXEL_8x8]   = x264_pixel_var_8x8_sse2;
         pixf->var2[PIXEL_8x8]  = x264_pixel_var2_8x8_sse2;
         pixf->var2[PIXEL_8x16] = x264_pixel_var2_8x16_sse2;
-        pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_sse2;
+
+        pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2;
+        pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_sse2;
+#if ARCH_X86_64
+        pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse2;
+#endif
+        pixf->intra_sad_x3_4x4  = x264_intra_sad_x3_4x4_sse2;
+        pixf->intra_sad_x3_8x8  = x264_intra_sad_x3_8x8_sse2;
+        pixf->intra_sa8d_x3_8x8 = intra_sa8d_x3_8x8_sse2;
     }
     if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_SSE2_IS_SLOW) )
     {
@@ -936,10 +938,10 @@
         pixf->vsad = x264_pixel_vsad_sse2;
         pixf->asd8 = x264_pixel_asd8_sse2;
         pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_sse2;
-        pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_sse2;
-        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_sse2;
-        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse2;
-        pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_sse2;
+        pixf->intra_sad_x3_8x8c   = intra_sad_x3_8x8c_sse2;
+        pixf->intra_sad_x3_8x16c  = intra_sad_x3_8x16c_sse2;
+        pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_sse2;
+        pixf->intra_sad_x3_16x16  = intra_sad_x3_16x16_sse2;
     }
     if( cpu&X264_CPU_SSE2_IS_FAST )
     {
@@ -971,7 +973,6 @@
         }
         pixf->vsad = x264_pixel_vsad_ssse3;
         pixf->asd8 = x264_pixel_asd8_ssse3;
-        pixf->intra_sad_x3_4x4  = x264_intra_sad_x3_4x4_ssse3;
         pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3;
         pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_ssse3;
 #if ARCH_X86_64
@@ -979,10 +980,10 @@
 #endif
         pixf->intra_sad_x3_4x4    = x264_intra_sad_x3_4x4_ssse3;
         pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_ssse3;
-        pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_ssse3;
-        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_ssse3;
-        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_ssse3;
-        pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_ssse3;
+        pixf->intra_sad_x3_8x8c   = intra_sad_x3_8x8c_ssse3;
+        pixf->intra_sad_x3_8x16c  = intra_sad_x3_8x16c_ssse3;
+        pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_ssse3;
+        pixf->intra_sad_x3_16x16  = intra_sad_x3_16x16_ssse3;
     }
     if( cpu&X264_CPU_SSE4 )
     {
@@ -997,7 +998,7 @@
 #if ARCH_X86_64
         pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse4;
 #endif
-        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse4;
+        pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_sse4;
     }
     if( cpu&X264_CPU_AVX )
     {
@@ -1022,7 +1023,7 @@
 #if ARCH_X86_64
         pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_avx;
 #endif
-        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_avx;
+        pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_avx;
     }
     if( cpu&X264_CPU_XOP )
     {
@@ -1108,8 +1109,8 @@
 #endif
         pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmx2;
         pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_mmx2;
-        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_mmx2;
-        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_mmx2;
+        pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_mmx2;
+        pixf->intra_sad_x3_8x16c  = intra_sad_x3_8x16c_mmx2;
         pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_mmx2;
         pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_mmx2;
         pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_mmx2;
@@ -1151,8 +1152,8 @@
         pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_sse2;
         pixf->var[PIXEL_8x16] = x264_pixel_var_8x16_sse2;
         pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_sse2;
-        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse2;
-        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_sse2;
+        pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_sse2;
+        pixf->intra_sad_x3_8x16c  = intra_sad_x3_8x16c_sse2;
         if( cpu&X264_CPU_CACHELINE_64 )
         {
             INIT2( ssd, _sse2); /* faster for width 16 on p4 */
@@ -1231,7 +1232,7 @@
         pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_ssse3;
         if( !(cpu&X264_CPU_SLOW_PSHUFB) )
             pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_ssse3;
-        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_ssse3;
+        pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_ssse3;
         pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_ssse3;
         pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_ssse3;
         pixf->var2[PIXEL_8x8] = x264_pixel_var2_8x8_ssse3;
@@ -1271,7 +1272,7 @@
         }
         pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_sse4;
         pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_sse4;
-        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse4;
+        pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_sse4;
 #if ARCH_X86_64
         pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse4;
 #endif
@@ -1301,7 +1302,7 @@
         INIT5( ssd, _avx );
         pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_avx;
         pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_avx;
-        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_avx;
+        pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_avx;
         pixf->ssd_nv12_core    = x264_pixel_ssd_nv12_core_avx;
         pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_avx;
         pixf->var[PIXEL_8x16]  = x264_pixel_var_8x16_avx;
@@ -1326,7 +1327,7 @@
         INIT5( ssd, _xop );
         pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_xop;
         pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_xop;
-        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_xop;
+        pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_xop;
         pixf->ssd_nv12_core    = x264_pixel_ssd_nv12_core_xop;
 #if ARCH_X86_64
         pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_xop;
@@ -1363,6 +1364,8 @@
         INIT7( sad_x3, _avx512 );
         INIT7( sad_x4, _avx512 );
         INIT8( satd, _avx512 );
+        INIT7( satd_x3, _avx512 );
+        INIT7( satd_x4, _avx512 );
         pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_avx512;
         pixf->var[PIXEL_8x8]   = x264_pixel_var_8x8_avx512;
         pixf->var[PIXEL_8x16]  = x264_pixel_var_8x16_avx512;
@@ -1402,16 +1405,16 @@
         pixf->vsad = x264_pixel_vsad_neon;
         pixf->asd8 = x264_pixel_asd8_neon;
 
-        pixf->intra_sad_x3_4x4    = x264_intra_sad_x3_4x4_neon;
-        pixf->intra_satd_x3_4x4   = x264_intra_satd_x3_4x4_neon;
-        pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_neon;
-        pixf->intra_sa8d_x3_8x8   = x264_intra_sa8d_x3_8x8_neon;
-        pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_neon;
-        pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_neon;
-        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_neon;
-        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_neon;
-        pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_neon;
-        pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_neon;
+        pixf->intra_sad_x3_4x4    = intra_sad_x3_4x4_neon;
+        pixf->intra_satd_x3_4x4   = intra_satd_x3_4x4_neon;
+        pixf->intra_sad_x3_8x8    = intra_sad_x3_8x8_neon;
+        pixf->intra_sa8d_x3_8x8   = intra_sa8d_x3_8x8_neon;
+        pixf->intra_sad_x3_8x8c   = intra_sad_x3_8x8c_neon;
+        pixf->intra_satd_x3_8x8c  = intra_satd_x3_8x8c_neon;
+        pixf->intra_sad_x3_8x16c  = intra_sad_x3_8x16c_neon;
+        pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_neon;
+        pixf->intra_sad_x3_16x16  = intra_sad_x3_16x16_neon;
+        pixf->intra_satd_x3_16x16 = intra_satd_x3_16x16_neon;
 
         pixf->ssd_nv12_core     = x264_pixel_ssd_nv12_core_neon;
         pixf->ssim_4x4x2_core   = x264_pixel_ssim_4x4x2_core_neon;
@@ -1431,7 +1434,7 @@
     }
 #endif
 
-#if ARCH_AARCH64
+#if HAVE_AARCH64
     if( cpu&X264_CPU_NEON )
     {
         INIT8( sad, _neon );
@@ -1457,22 +1460,22 @@
         pixf->vsad = x264_pixel_vsad_neon;
         pixf->asd8 = x264_pixel_asd8_neon;
 
-        pixf->intra_sad_x3_4x4    = x264_intra_sad_x3_4x4_neon;
-        pixf->intra_satd_x3_4x4   = x264_intra_satd_x3_4x4_neon;
-        pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_neon;
-        pixf->intra_sa8d_x3_8x8   = x264_intra_sa8d_x3_8x8_neon;
-        pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_neon;
-        pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_neon;
-        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_neon;
-        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_neon;
-        pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_neon;
-        pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_neon;
+        pixf->intra_sad_x3_4x4    = intra_sad_x3_4x4_neon;
+        pixf->intra_satd_x3_4x4   = intra_satd_x3_4x4_neon;
+        pixf->intra_sad_x3_8x8    = intra_sad_x3_8x8_neon;
+        pixf->intra_sa8d_x3_8x8   = intra_sa8d_x3_8x8_neon;
+        pixf->intra_sad_x3_8x8c   = intra_sad_x3_8x8c_neon;
+        pixf->intra_satd_x3_8x8c  = intra_satd_x3_8x8c_neon;
+        pixf->intra_sad_x3_8x16c  = intra_sad_x3_8x16c_neon;
+        pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_neon;
+        pixf->intra_sad_x3_16x16  = intra_sad_x3_16x16_neon;
+        pixf->intra_satd_x3_16x16 = intra_satd_x3_16x16_neon;
 
         pixf->ssd_nv12_core     = x264_pixel_ssd_nv12_core_neon;
         pixf->ssim_4x4x2_core   = x264_pixel_ssim_4x4x2_core_neon;
         pixf->ssim_end4         = x264_pixel_ssim_end4_neon;
     }
-#endif // ARCH_AARCH64
+#endif // HAVE_AARCH64
 
 #if HAVE_MSA
     if( cpu&X264_CPU_MSA )
diff -Nru x264-0.152.2854+gite9a5903/common/pixel.h x264-0.158.2988+git-20191101.7817004/common/pixel.h
--- x264-0.152.2854+gite9a5903/common/pixel.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/pixel.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.c: pixel metrics
  *****************************************************************************
- * Copyright (C) 2004-2017 x264 project
+ * Copyright (C) 2004-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Fiona Glaser <fiona@x264.com>
@@ -143,13 +143,18 @@
     int (*intra_sad_x9_8x8)  ( pixel *fenc, pixel *fdec, pixel edge[36], uint16_t *bitcosts, uint16_t *satds );
 } x264_pixel_function_t;
 
+#define x264_pixel_init x264_template(pixel_init)
 void x264_pixel_init( int cpu, x264_pixel_function_t *pixf );
+#define x264_pixel_ssd_nv12 x264_template(pixel_ssd_nv12)
 void x264_pixel_ssd_nv12   ( x264_pixel_function_t *pf, pixel *pix1, intptr_t i_pix1, pixel *pix2, intptr_t i_pix2,
                              int i_width, int i_height, uint64_t *ssd_u, uint64_t *ssd_v );
+#define x264_pixel_ssd_wxh x264_template(pixel_ssd_wxh)
 uint64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, pixel *pix1, intptr_t i_pix1, pixel *pix2, intptr_t i_pix2,
                              int i_width, int i_height );
+#define x264_pixel_ssim_wxh x264_template(pixel_ssim_wxh)
 float x264_pixel_ssim_wxh  ( x264_pixel_function_t *pf, pixel *pix1, intptr_t i_pix1, pixel *pix2, intptr_t i_pix2,
                              int i_width, int i_height, void *buf, int *cnt );
+#define x264_field_vsad x264_template(field_vsad)
 int x264_field_vsad( x264_t *h, int mb_x, int mb_y );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/ppc/dct.c x264-0.158.2988+git-20191101.7817004/common/ppc/dct.c
--- x264-0.152.2854+gite9a5903/common/ppc/dct.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/ppc/dct.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * dct.c: ppc transform and zigzag
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
  *          Eric Petit <eric.petit@lapsus.org>
@@ -26,6 +26,7 @@
 
 #include "common/common.h"
 #include "ppccommon.h"
+#include "dct.h"
 
 #if !HIGH_BIT_DEPTH
 #define VEC_DCT(a0,a1,a2,a3,b0,b1,b2,b3) \
@@ -136,11 +137,11 @@
 
 void x264_sub8x8_dct_dc_altivec( int16_t dct[4], uint8_t *pix1, uint8_t *pix2 )
 {
-    vec_s16_t diff[2];
+    vec_s16_t diff[2], tmp;
     vec_s32_t sum[2];
     vec_s32_t zero32 = vec_splat_s32(0);
-    vec_u8_t mask = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
-                      0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F };
+    vec_u8_t mask = { 0x00, 0x01, 0x00, 0x01, 0x04, 0x05, 0x04, 0x05,
+                      0x02, 0x03, 0x02, 0x03, 0x06, 0x07, 0x06, 0x07 };
 
     pix_diff( &pix1[0], &pix2[0], diff, 0 );
     pix_diff( &pix1[4*FENC_STRIDE], &pix2[4*FDEC_STRIDE], diff, 1 );
@@ -151,20 +152,19 @@
     sum[0] = vec_sum4s( diff[0], zero32 );
     diff[0] = vec_packs( sum[0], zero32 );
 
-    diff[1] = vec_vsx_ld( 0, dct );
-    diff[0] = vec_perm( diff[0], diff[1], mask );
-
-    vec_vsx_st( diff[0], 0, dct );
-
-    /* 2x2 DC transform */
-    int d0 = dct[0] + dct[1];
-    int d1 = dct[2] + dct[3];
-    int d2 = dct[0] - dct[1];
-    int d3 = dct[2] - dct[3];
-    dct[0] = d0 + d1;
-    dct[1] = d0 - d1;
-    dct[2] = d2 + d3;
-    dct[3] = d2 - d3;
+    diff[0] = vec_perm( diff[0], diff[0], mask ); // 0 0 2 2 1 1 3 3
+    tmp = xxpermdi( diff[0], diff[0], 2 );        // 1 1 3 3 0 0 2 2
+    diff[1] = vec_add( diff[0], tmp );            // 0+1 0+1 2+3 2+3
+    diff[0] = vec_sub( diff[0], tmp );            // 0-1 0-1 2-3 2-3
+    tmp = vec_mergeh( diff[1], diff[0] );         // 0+1 0-1 0+1 0-1 2+3 2-3 2+3 2-3
+    diff[0] = xxpermdi( tmp, tmp, 2 );            // 2+3 2-3 2+3 2-3
+    diff[1] = vec_add( tmp, diff[0] );            // 0+1+2+3 0-1+2+3
+    diff[0] = vec_sub( tmp, diff[0] );            // 0+1-2-3 0-1-2+3
+    diff[0] = vec_mergeh( diff[1], diff[0] );
+
+    diff[1] = vec_ld( 0, dct );
+    diff[0] = xxpermdi( diff[0], diff[1], 0 );
+    vec_st( diff[0], 0, dct );
 }
 
 /* DCT8_1D unrolled by 8 in Altivec */
@@ -293,29 +293,125 @@
     vec_vsx_st( dcvsum8, 0, dest );                                   \
 }
 
-static void idct8_dc_altivec( uint8_t *dst, vec_s16_t dcv )
+void x264_add8x8_idct_dc_altivec( uint8_t *p_dst, int16_t dct[4] )
 {
+    vec_s16_t dcv0, dcv1;
+    vec_s16_t v32 = vec_sl( vec_splat_s16( 8 ), vec_splat_u16( 2 ) );
+    vec_u16_t v6 = vec_splat_u16( 6 );
+    vec_s16_t dctv = vec_ld( 0, dct );
+    vec_u8_t dstv0, dstv1, dstv2, dstv3, dstv4, dstv5, dstv6, dstv7;
+    vec_s16_t dcvsum0, dcvsum1, dcvsum2, dcvsum3, dcvsum4, dcvsum5, dcvsum6, dcvsum7;
+    vec_u8_t dcvsum8_0, dcvsum8_1, dcvsum8_2, dcvsum8_3, dcvsum8_4, dcvsum8_5, dcvsum8_6, dcvsum8_7;
     LOAD_ZERO;
-    ALTIVEC_STORE8_DC_SUM_CLIP( &dst[0*FDEC_STRIDE], dcv );
-    ALTIVEC_STORE8_DC_SUM_CLIP( &dst[1*FDEC_STRIDE], dcv );
-    ALTIVEC_STORE8_DC_SUM_CLIP( &dst[2*FDEC_STRIDE], dcv );
-    ALTIVEC_STORE8_DC_SUM_CLIP( &dst[3*FDEC_STRIDE], dcv );
-}
 
-void x264_add8x8_idct_dc_altivec( uint8_t *p_dst, int16_t dct[4] )
+    dctv = vec_sra( vec_add( dctv, v32 ), v6 );
+    dcv1 = (vec_s16_t)vec_mergeh( dctv, dctv );
+    dcv0 = (vec_s16_t)vec_mergeh( (vec_s32_t)dcv1, (vec_s32_t)dcv1 );
+    dcv1 = (vec_s16_t)vec_mergel( (vec_s32_t)dcv1, (vec_s32_t)dcv1 );
+
+    dstv0   = vec_vsx_ld( 0, p_dst );
+    dstv4   = vec_vsx_ld( 0, p_dst + 4*FDEC_STRIDE );
+    dstv1   = vec_vsx_ld( 0, p_dst + 1*FDEC_STRIDE );
+    dstv5   = vec_vsx_ld( 0, p_dst + 4*FDEC_STRIDE + 1*FDEC_STRIDE );
+    dstv2   = vec_vsx_ld( 0, p_dst + 2*FDEC_STRIDE);
+    dstv6   = vec_vsx_ld( 0, p_dst + 4*FDEC_STRIDE + 2*FDEC_STRIDE );
+    dstv3   = vec_vsx_ld( 0, p_dst + 3*FDEC_STRIDE);
+    dstv7   = vec_vsx_ld( 0, p_dst + 4*FDEC_STRIDE + 3*FDEC_STRIDE );
+
+    vec_s16_t s0 = vec_u8_to_s16_h( dstv0 );
+    vec_s16_t s1 = vec_u8_to_s16_h( dstv4 );
+    vec_s16_t s2 = vec_u8_to_s16_h( dstv1 );
+    vec_s16_t s3 = vec_u8_to_s16_h( dstv5 );
+    vec_s16_t s4 = vec_u8_to_s16_h( dstv2 );
+    vec_s16_t s5 = vec_u8_to_s16_h( dstv6 );
+    vec_s16_t s6 = vec_u8_to_s16_h( dstv3 );
+    vec_s16_t s7 = vec_u8_to_s16_h( dstv7 );
+    dcvsum0 = vec_adds( dcv0, s0 );
+    dcvsum4 = vec_adds( dcv1, s1 );
+    dcvsum1 = vec_adds( dcv0, s2 );
+    dcvsum5 = vec_adds( dcv1, s3 );
+    dcvsum2 = vec_adds( dcv0, s4 );
+    dcvsum6 = vec_adds( dcv1, s5 );
+    dcvsum3 = vec_adds( dcv0, s6 );
+    dcvsum7 = vec_adds( dcv1, s7 );
+    dcvsum8_0 = vec_packsu( dcvsum0, vec_u8_to_s16_l( dstv0 ) );
+    dcvsum8_1 = vec_packsu( dcvsum1, vec_u8_to_s16_l( dstv1 ) );
+    dcvsum8_2 = vec_packsu( dcvsum2, vec_u8_to_s16_l( dstv2 ) );
+    dcvsum8_3 = vec_packsu( dcvsum3, vec_u8_to_s16_l( dstv3 ) );
+    dcvsum8_4 = vec_packsu( dcvsum4, vec_u8_to_s16_l( dstv4 ) );
+    dcvsum8_5 = vec_packsu( dcvsum5, vec_u8_to_s16_l( dstv5 ) );
+    dcvsum8_6 = vec_packsu( dcvsum6, vec_u8_to_s16_l( dstv6 ) );
+    dcvsum8_7 = vec_packsu( dcvsum7, vec_u8_to_s16_l( dstv7 ) );
+
+    vec_vsx_st( dcvsum8_0, 0, p_dst );
+    vec_vsx_st( dcvsum8_4, 0, p_dst + 4*FDEC_STRIDE );
+    vec_vsx_st( dcvsum8_1, 0, p_dst + 1*FDEC_STRIDE );
+    vec_vsx_st( dcvsum8_5, 0, p_dst + 4*FDEC_STRIDE + 1*FDEC_STRIDE );
+    vec_vsx_st( dcvsum8_2, 0, p_dst + 2*FDEC_STRIDE );
+    vec_vsx_st( dcvsum8_6, 0, p_dst + 4*FDEC_STRIDE + 2*FDEC_STRIDE );
+    vec_vsx_st( dcvsum8_3, 0, p_dst + 3*FDEC_STRIDE );
+    vec_vsx_st( dcvsum8_7, 0, p_dst + 4*FDEC_STRIDE + 3*FDEC_STRIDE );
+}
+
+#define LOAD16                                  \
+    dstv0 = vec_ld( 0, p_dst );                 \
+    dstv1 = vec_ld( 0, p_dst + 1*FDEC_STRIDE ); \
+    dstv2 = vec_ld( 0, p_dst + 2*FDEC_STRIDE ); \
+    dstv3 = vec_ld( 0, p_dst + 3*FDEC_STRIDE );
+
+#define SUM16                                                 \
+        dcvsum0 = vec_adds( dcv0, vec_u8_to_s16_h( dstv0 ) ); \
+        dcvsum4 = vec_adds( dcv1, vec_u8_to_s16_l( dstv0 ) ); \
+        dcvsum1 = vec_adds( dcv0, vec_u8_to_s16_h( dstv1 ) ); \
+        dcvsum5 = vec_adds( dcv1, vec_u8_to_s16_l( dstv1 ) ); \
+        dcvsum2 = vec_adds( dcv0, vec_u8_to_s16_h( dstv2 ) ); \
+        dcvsum6 = vec_adds( dcv1, vec_u8_to_s16_l( dstv2 ) ); \
+        dcvsum3 = vec_adds( dcv0, vec_u8_to_s16_h( dstv3 ) ); \
+        dcvsum7 = vec_adds( dcv1, vec_u8_to_s16_l( dstv3 ) ); \
+        dcvsum8_0 = vec_packsu( dcvsum0, dcvsum4 );           \
+        dcvsum8_1 = vec_packsu( dcvsum1, dcvsum5 );           \
+        dcvsum8_2 = vec_packsu( dcvsum2, dcvsum6 );           \
+        dcvsum8_3 = vec_packsu( dcvsum3, dcvsum7 );
+
+#define STORE16                                    \
+    vec_st( dcvsum8_0, 0, p_dst );                 \
+    vec_st( dcvsum8_1, 0, p_dst + 1*FDEC_STRIDE ); \
+    vec_st( dcvsum8_2, 0, p_dst + 2*FDEC_STRIDE ); \
+    vec_st( dcvsum8_3, 0, p_dst + 3*FDEC_STRIDE );
+
+void x264_add16x16_idct_dc_altivec( uint8_t *p_dst, int16_t dct[16] )
 {
-    vec_s16_t dcv;
+    vec_s16_t dcv0, dcv1;
     vec_s16_t v32 = vec_sl( vec_splat_s16( 8 ), vec_splat_u16( 2 ) );
     vec_u16_t v6 = vec_splat_u16( 6 );
-    vec_s16_t dctv = vec_vsx_ld( 0, dct );
+    vec_u8_t dstv0, dstv1, dstv2, dstv3;
+    vec_s16_t dcvsum0, dcvsum1, dcvsum2, dcvsum3, dcvsum4, dcvsum5, dcvsum6, dcvsum7;
+    vec_u8_t dcvsum8_0, dcvsum8_1, dcvsum8_2, dcvsum8_3;
+    LOAD_ZERO;
 
-    dctv = vec_sra( vec_add( dctv, v32 ), v6 );
-    dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)vec_splat( dctv, 0 ), (vec_s32_t)vec_splat( dctv, 1 ) );
-    dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)dcv, (vec_s32_t)dcv );
-    idct8_dc_altivec( &p_dst[0], dcv );
-    dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)vec_splat( dctv, 2 ), (vec_s32_t)vec_splat( dctv, 3 ) );
-    dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)dcv, (vec_s32_t)dcv );
-    idct8_dc_altivec( &p_dst[4*FDEC_STRIDE+0], dcv );
+    for( int i = 0; i < 2; i++ )
+    {
+        vec_s16_t dctv = vec_ld( 0, dct );
+
+        dctv = vec_sra( vec_add( dctv, v32 ), v6 );
+        dcv1 = (vec_s16_t)vec_mergeh( dctv, dctv );
+        dcv0 = (vec_s16_t)vec_mergeh( (vec_s32_t)dcv1, (vec_s32_t)dcv1 );
+        dcv1 = (vec_s16_t)vec_mergel( (vec_s32_t)dcv1, (vec_s32_t)dcv1 );
+        LOAD16;
+        SUM16;
+        STORE16;
+
+        p_dst += 4*FDEC_STRIDE;
+        dcv1 = (vec_s16_t)vec_mergel( dctv, dctv );
+        dcv0 = (vec_s16_t)vec_mergeh( (vec_s32_t)dcv1, (vec_s32_t)dcv1 );
+        dcv1 = (vec_s16_t)vec_mergel( (vec_s32_t)dcv1, (vec_s32_t)dcv1 );
+        LOAD16;
+        SUM16;
+        STORE16;
+
+        dct += 8;
+        p_dst += 4*FDEC_STRIDE;
+    }
 }
 
 #define IDCT_1D_ALTIVEC(s0, s1, s2, s3,  d0, d1, d2, d3) \
@@ -640,7 +736,7 @@
 {
     vec_s16_t tmpv[8];
     vec_s16_t merge[2];
-    vec_s16_t permv[2];
+    vec_s16_t permv[3];
     vec_s16_t orv[4];
     vec_s16_t src0v = vec_ld( 0*16, src );
     vec_s16_t src1v = vec_ld( 1*16, src );
diff -Nru x264-0.152.2854+gite9a5903/common/ppc/dct.h x264-0.158.2988+git-20191101.7817004/common/ppc/dct.h
--- x264-0.152.2854+gite9a5903/common/ppc/dct.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/ppc/dct.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * dct.h: ppc transform and zigzag
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Eric Petit <eric.petit@lapsus.org>
  *          Guillaume Poirier <gpoirier@mplayerhq.hu>
@@ -27,26 +27,44 @@
 #ifndef X264_PPC_DCT_H
 #define X264_PPC_DCT_H
 
+#define x264_sub4x4_dct_altivec x264_template(sub4x4_dct_altivec)
 void x264_sub4x4_dct_altivec( int16_t dct[16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub8x8_dct_altivec x264_template(sub8x8_dct_altivec)
 void x264_sub8x8_dct_altivec( int16_t dct[4][16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub16x16_dct_altivec x264_template(sub16x16_dct_altivec)
 void x264_sub16x16_dct_altivec( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 );
 
+#define x264_add8x8_idct_dc_altivec x264_template(add8x8_idct_dc_altivec)
 void x264_add8x8_idct_dc_altivec( uint8_t *p_dst, int16_t dct[4] );
+#define x264_add16x16_idct_dc_altivec x264_template(add16x16_idct_dc_altivec)
+void x264_add16x16_idct_dc_altivec( uint8_t *p_dst, int16_t dct[16] );
 
+#define x264_add4x4_idct_altivec x264_template(add4x4_idct_altivec)
 void x264_add4x4_idct_altivec( uint8_t *p_dst, int16_t dct[16] );
+#define x264_add8x8_idct_altivec x264_template(add8x8_idct_altivec)
 void x264_add8x8_idct_altivec( uint8_t *p_dst, int16_t dct[4][16] );
+#define x264_add16x16_idct_altivec x264_template(add16x16_idct_altivec)
 void x264_add16x16_idct_altivec( uint8_t *p_dst, int16_t dct[16][16] );
 
+#define x264_sub8x8_dct_dc_altivec x264_template(sub8x8_dct_dc_altivec)
 void x264_sub8x8_dct_dc_altivec( int16_t dct[4], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub8x8_dct8_altivec x264_template(sub8x8_dct8_altivec)
 void x264_sub8x8_dct8_altivec( int16_t dct[64], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub16x16_dct8_altivec x264_template(sub16x16_dct8_altivec)
 void x264_sub16x16_dct8_altivec( int16_t dct[4][64], uint8_t *pix1, uint8_t *pix2 );
 
+#define x264_add8x8_idct8_altivec x264_template(add8x8_idct8_altivec)
 void x264_add8x8_idct8_altivec( uint8_t *dst, int16_t dct[64] );
+#define x264_add16x16_idct8_altivec x264_template(add16x16_idct8_altivec)
 void x264_add16x16_idct8_altivec( uint8_t *dst, int16_t dct[4][64] );
 
+#define x264_zigzag_scan_4x4_frame_altivec x264_template(zigzag_scan_4x4_frame_altivec)
 void x264_zigzag_scan_4x4_frame_altivec( int16_t level[16], int16_t dct[16] );
+#define x264_zigzag_scan_4x4_field_altivec x264_template(zigzag_scan_4x4_field_altivec)
 void x264_zigzag_scan_4x4_field_altivec( int16_t level[16], int16_t dct[16] );
+#define x264_zigzag_scan_8x8_frame_altivec x264_template(zigzag_scan_8x8_frame_altivec)
 void x264_zigzag_scan_8x8_frame_altivec( int16_t level[64], int16_t dct[64] );
+#define x264_zigzag_interleave_8x8_cavlc_altivec x264_template(zigzag_interleave_8x8_cavlc_altivec)
 void x264_zigzag_interleave_8x8_cavlc_altivec( int16_t *dst, int16_t *src, uint8_t *nnz );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/ppc/deblock.c x264-0.158.2988+git-20191101.7817004/common/ppc/deblock.c
--- x264-0.152.2854+gite9a5903/common/ppc/deblock.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/ppc/deblock.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * deblock.c: ppc deblocking
  *****************************************************************************
- * Copyright (C) 2007-2017 x264 project
+ * Copyright (C) 2007-2019 x264 project
  *
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
  *
@@ -25,6 +25,7 @@
 
 #include "common/common.h"
 #include "ppccommon.h"
+#include "deblock.h"
 
 #if !HIGH_BIT_DEPTH
 #define transpose4x16(r0, r1, r2, r3)        \
@@ -140,11 +141,7 @@
 // out: o = |x-y| < a
 static inline vec_u8_t diff_lt_altivec( register vec_u8_t x, register vec_u8_t y, register vec_u8_t a )
 {
-    register vec_u8_t diff = vec_subs(x, y);
-    register vec_u8_t diffneg = vec_subs(y, x);
-    register vec_u8_t o = vec_or(diff, diffneg); /* |x-y| */
-    o = (vec_u8_t)vec_cmplt(o, a);
-    return o;
+    return (vec_u8_t)vec_cmplt(vec_absd(x, y), a);
 }
 
 static inline vec_u8_t h264_deblock_mask( register vec_u8_t p0, register vec_u8_t p1, register vec_u8_t q0,
diff -Nru x264-0.152.2854+gite9a5903/common/ppc/deblock.h x264-0.158.2988+git-20191101.7817004/common/ppc/deblock.h
--- x264-0.152.2854+gite9a5903/common/ppc/deblock.h	1970-01-01 00:00:00.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/ppc/deblock.h	2019-11-09 05:16:29.000000000 +0000
@@ -0,0 +1,34 @@
+/*****************************************************************************
+ * deblock.h: ppc deblocking
+ *****************************************************************************
+ * Copyright (C) 2017-2019 x264 project
+ *
+ * Authors: Anton Mitrofanov <BugMaster@narod.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#ifndef X264_PPC_DEBLOCK_H
+#define X264_PPC_DEBLOCK_H
+
+#define x264_deblock_v_luma_altivec x264_template(deblock_v_luma_altivec)
+void x264_deblock_v_luma_altivec( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_luma_altivec x264_template(deblock_h_luma_altivec)
+void x264_deblock_h_luma_altivec( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+
+#endif
diff -Nru x264-0.152.2854+gite9a5903/common/ppc/mc.c x264-0.158.2988+git-20191101.7817004/common/ppc/mc.c
--- x264-0.152.2854+gite9a5903/common/ppc/mc.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/ppc/mc.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.c: ppc motion compensation
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Eric Petit <eric.petit@lapsus.org>
  *          Guillaume Poirier <gpoirier@mplayerhq.hu>
@@ -25,33 +25,40 @@
  *****************************************************************************/
 
 #include "common/common.h"
-#include "mc.h"
 #include "ppccommon.h"
+#include "mc.h"
 
 #if !HIGH_BIT_DEPTH
 typedef void (*pf_mc_t)( uint8_t *src, intptr_t i_src,
                          uint8_t *dst, intptr_t i_dst, int i_height );
 
-static inline void x264_pixel_avg2_w4_altivec( uint8_t *dst,  intptr_t i_dst,
-                                               uint8_t *src1, intptr_t i_src1,
-                                               uint8_t *src2, int i_height )
+static inline void pixel_avg2_w4_altivec( uint8_t *dst,  intptr_t i_dst,
+                                          uint8_t *src1, intptr_t i_src1,
+                                          uint8_t *src2, int i_height )
 {
     for( int y = 0; y < i_height; y++ )
     {
+#ifndef __POWER9_VECTOR__
         for( int x = 0; x < 4; x++ )
             dst[x] = ( src1[x] + src2[x] + 1 ) >> 1;
+#else
+        vec_u8_t s1 = vec_vsx_ld( 0, src1 );
+        vec_u8_t s2 = vec_vsx_ld( 0, src2 );
+        vec_u8_t avg = vec_avg( s1, s2 );
+
+        vec_xst_len( avg, dst, 4 );
+#endif
         dst  += i_dst;
         src1 += i_src1;
         src2 += i_src1;
     }
 }
 
-static inline void x264_pixel_avg2_w8_altivec( uint8_t *dst,  intptr_t i_dst,
-                                               uint8_t *src1, intptr_t i_src1,
-                                               uint8_t *src2, int i_height )
+static inline void pixel_avg2_w8_altivec( uint8_t *dst,  intptr_t i_dst,
+                                          uint8_t *src1, intptr_t i_src1,
+                                          uint8_t *src2, int i_height )
 {
     vec_u8_t src1v, src2v;
-    PREP_STORE8;
 
     for( int y = 0; y < i_height; y++ )
     {
@@ -67,9 +74,9 @@
     }
 }
 
-static inline void x264_pixel_avg2_w16_altivec( uint8_t *dst,  intptr_t i_dst,
-                                                uint8_t *src1, intptr_t i_src1,
-                                                uint8_t *src2, int i_height )
+static inline void pixel_avg2_w16_altivec( uint8_t *dst,  intptr_t i_dst,
+                                           uint8_t *src1, intptr_t i_src1,
+                                           uint8_t *src2, int i_height )
 {
     vec_u8_t src1v, src2v;
 
@@ -86,16 +93,22 @@
     }
 }
 
-static inline void x264_pixel_avg2_w20_altivec( uint8_t *dst,  intptr_t i_dst,
-                                                uint8_t *src1, intptr_t i_src1,
-                                                uint8_t *src2, int i_height )
+static inline void pixel_avg2_w20_altivec( uint8_t *dst,  intptr_t i_dst,
+                                           uint8_t *src1, intptr_t i_src1,
+                                           uint8_t *src2, int i_height )
 {
-    x264_pixel_avg2_w16_altivec(dst, i_dst, src1, i_src1, src2, i_height);
-    x264_pixel_avg2_w4_altivec(dst+16, i_dst, src1+16, i_src1, src2+16, i_height);
+    pixel_avg2_w16_altivec(dst, i_dst, src1, i_src1, src2, i_height);
+    pixel_avg2_w4_altivec(dst+16, i_dst, src1+16, i_src1, src2+16, i_height);
 }
 
 /* mc_copy: plain c */
 
+#ifndef __POWER9_VECTOR__
+#define tiny_copy( d, s, l ) memcpy( d, s, l )
+#else
+#define tiny_copy( d, s, l ) vec_xst_len( vec_vsx_ld( 0, s ), d, l )
+#endif
+
 #define MC_COPY( name, a )                                \
 static void name( uint8_t *dst, intptr_t i_dst,           \
                   uint8_t *src, intptr_t i_src, int i_height ) \
@@ -108,11 +121,11 @@
         dst += i_dst;                                     \
     }                                                     \
 }
-MC_COPY( x264_mc_copy_w4_altivec,  4  )
-MC_COPY( x264_mc_copy_w8_altivec,  8  )
+MC_COPY( mc_copy_w4_altivec,  4  )
+MC_COPY( mc_copy_w8_altivec,  8  )
 
-static void x264_mc_copy_w16_altivec( uint8_t *dst, intptr_t i_dst,
-                                      uint8_t *src, intptr_t i_src, int i_height )
+static void mc_copy_w16_altivec( uint8_t *dst, intptr_t i_dst,
+                                 uint8_t *src, intptr_t i_src, int i_height )
 {
     vec_u8_t cpyV;
 
@@ -127,8 +140,8 @@
 }
 
 
-static void x264_mc_copy_w16_aligned_altivec( uint8_t *dst, intptr_t i_dst,
-                                              uint8_t *src, intptr_t i_src, int i_height )
+static void mc_copy_w16_aligned_altivec( uint8_t *dst, intptr_t i_dst,
+                                         uint8_t *src, intptr_t i_src, int i_height )
 {
     for( int y = 0; y < i_height; ++y )
     {
@@ -140,6 +153,7 @@
     }
 }
 
+#define x264_plane_copy_swap_core_altivec x264_template(plane_copy_swap_core_altivec)
 void x264_plane_copy_swap_core_altivec( uint8_t *dst, intptr_t i_dst,
                                         uint8_t *src, intptr_t i_src, int w, int h )
 {
@@ -155,6 +169,7 @@
         }
 }
 
+#define x264_plane_copy_interleave_core_altivec x264_template(plane_copy_interleave_core_altivec)
 void x264_plane_copy_interleave_core_altivec( uint8_t *dst, intptr_t i_dst,
                                               uint8_t *srcu, intptr_t i_srcu,
                                               uint8_t *srcv, intptr_t i_srcv, int w, int h )
@@ -208,6 +223,20 @@
     }
 }
 
+static void load_deinterleave_chroma_fenc_altivec( uint8_t *dst, uint8_t *src, intptr_t i_src, int height )
+{
+    const vec_u8_t mask = { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, 0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F };
+
+    for( int y = 0; y < height; y += 2, dst += 2*FENC_STRIDE, src += 2*i_src )
+    {
+        vec_u8_t src0 = vec_ld( 0, src );
+        vec_u8_t src1 = vec_ld( i_src, src );
+
+        vec_st( vec_perm( src0, src0, mask ), 0*FENC_STRIDE, dst );
+        vec_st( vec_perm( src1, src1, mask ), 1*FENC_STRIDE, dst );
+    }
+}
+
 #if HAVE_VSX
 void x264_plane_copy_deinterleave_rgb_altivec( uint8_t *dsta, intptr_t i_dsta,
                                                uint8_t *dstb, intptr_t i_dstb,
@@ -287,14 +316,14 @@
         switch( i_width )
         {
             case 4:
-                x264_pixel_avg2_w4_altivec( dst, i_dst_stride, src1, i_src_stride, src2, i_height );
+                pixel_avg2_w4_altivec( dst, i_dst_stride, src1, i_src_stride, src2, i_height );
                 break;
             case 8:
-                x264_pixel_avg2_w8_altivec( dst, i_dst_stride, src1, i_src_stride, src2, i_height );
+                pixel_avg2_w8_altivec( dst, i_dst_stride, src1, i_src_stride, src2, i_height );
                 break;
             case 16:
             default:
-                x264_pixel_avg2_w16_altivec( dst, i_dst_stride, src1, i_src_stride, src2, i_height );
+                pixel_avg2_w16_altivec( dst, i_dst_stride, src1, i_src_stride, src2, i_height );
         }
         if( weight->weightfn )
             weight->weightfn[i_width>>2]( dst, i_dst_stride, dst, i_dst_stride, weight, i_height );
@@ -306,13 +335,13 @@
         switch( i_width )
         {
             case 4:
-                x264_mc_copy_w4_altivec( dst, i_dst_stride, src1, i_src_stride, i_height );
+                mc_copy_w4_altivec( dst, i_dst_stride, src1, i_src_stride, i_height );
                 break;
             case 8:
-                x264_mc_copy_w8_altivec( dst, i_dst_stride, src1, i_src_stride, i_height );
+                mc_copy_w8_altivec( dst, i_dst_stride, src1, i_src_stride, i_height );
                 break;
             case 16:
-                x264_mc_copy_w16_altivec( dst, i_dst_stride, src1, i_src_stride, i_height );
+                mc_copy_w16_altivec( dst, i_dst_stride, src1, i_src_stride, i_height );
                 break;
         }
     }
@@ -334,18 +363,18 @@
         switch( i_width )
         {
             case 4:
-                x264_pixel_avg2_w4_altivec( dst, *i_dst_stride, src1, i_src_stride, src2, i_height );
+                pixel_avg2_w4_altivec( dst, *i_dst_stride, src1, i_src_stride, src2, i_height );
                 break;
             case 8:
-                x264_pixel_avg2_w8_altivec( dst, *i_dst_stride, src1, i_src_stride, src2, i_height );
+                pixel_avg2_w8_altivec( dst, *i_dst_stride, src1, i_src_stride, src2, i_height );
                 break;
             case 12:
             case 16:
             default:
-                x264_pixel_avg2_w16_altivec( dst, *i_dst_stride, src1, i_src_stride, src2, i_height );
+                pixel_avg2_w16_altivec( dst, *i_dst_stride, src1, i_src_stride, src2, i_height );
                 break;
             case 20:
-                x264_pixel_avg2_w20_altivec( dst, *i_dst_stride, src1, i_src_stride, src2, i_height );
+                pixel_avg2_w20_altivec( dst, *i_dst_stride, src1, i_src_stride, src2, i_height );
                 break;
         }
         if( weight->weightfn )
@@ -400,6 +429,14 @@
 #define VSLD(a,b,n) vec_sld(b,a,16-n)
 #endif
 
+#ifndef __POWER9_VECTOR__
+#define STORE4_ALIGNED(d, s) vec_ste( (vec_u32_t)s, 0, (uint32_t*) d )
+#define STORE2_UNALIGNED(d, s) vec_ste( vec_splat( (vec_u16_t)s, 0 ), 0, (uint16_t*)d )
+#else
+#define STORE4_ALIGNED(d, s) vec_xst_len( (vec_u8_t)s, d, 4 )
+#define STORE2_UNALIGNED(d, s) vec_xst_len( (vec_u8_t)s, d, 2 )
+#endif
+
 static void mc_chroma_4xh_altivec( uint8_t *dstu, uint8_t *dstv, intptr_t i_dst_stride,
                                    uint8_t *src, intptr_t i_src_stride,
                                    int mvx, int mvy, int i_height )
@@ -460,8 +497,8 @@
 
         dstuv = (vec_u8_t)vec_perm( dstv16, dstv16, perm0v );
         dstvv = (vec_u8_t)vec_perm( dstv16, dstv16, perm1v );
-        vec_ste( (vec_u32_t)dstuv, 0, (uint32_t*) dstu );
-        vec_ste( (vec_u32_t)dstvv, 0, (uint32_t*) dstv );
+        STORE4_ALIGNED( dstu, dstuv );
+        STORE4_ALIGNED( dstv, dstvv );
 
         srcp += i_src_stride;
         dstu += i_dst_stride;
@@ -482,8 +519,8 @@
 
         dstuv = (vec_u8_t)vec_perm( dstv16, dstv16, perm0v );
         dstvv = (vec_u8_t)vec_perm( dstv16, dstv16, perm1v );
-        vec_ste( (vec_u32_t)dstuv, 0, (uint32_t*) dstu );
-        vec_ste( (vec_u32_t)dstvv, 0, (uint32_t*) dstv );
+        STORE4_ALIGNED( dstu, dstuv );
+        STORE4_ALIGNED( dstv, dstvv );
 
         srcp += i_src_stride;
         dstu += i_dst_stride;
@@ -509,7 +546,6 @@
     srcp = &src[i_src_stride];
 
     LOAD_ZERO;
-    PREP_STORE8;
     vec_u16_t   coeff0v, coeff1v, coeff2v, coeff3v;
     vec_u8_t    src0v_8, src1v_8, src2v_8, src3v_8;
     vec_u8_t    dstuv, dstvv;
@@ -788,20 +824,13 @@
 
     vec_u16_t twov, fourv, fivev, sixv;
     vec_s16_t sixteenv, thirtytwov;
-    vec_u16_u temp_u;
 
-    temp_u.s[0]=2;
-    twov = vec_splat( temp_u.v, 0 );
-    temp_u.s[0]=4;
-    fourv = vec_splat( temp_u.v, 0 );
-    temp_u.s[0]=5;
-    fivev = vec_splat( temp_u.v, 0 );
-    temp_u.s[0]=6;
-    sixv = vec_splat( temp_u.v, 0 );
-    temp_u.s[0]=16;
-    sixteenv = (vec_s16_t)vec_splat( temp_u.v, 0 );
-    temp_u.s[0]=32;
-    thirtytwov = (vec_s16_t)vec_splat( temp_u.v, 0 );
+    twov = vec_splats( (uint16_t)2 );
+    fourv = vec_splats( (uint16_t)4 );
+    fivev = vec_splats( (uint16_t)5 );
+    sixv = vec_splats( (uint16_t)6 );
+    sixteenv = vec_splats( (int16_t)16 );
+    thirtytwov = vec_splats( (int16_t)32 );
 
     for( int y = 0; y < i_height; y++ )
     {
@@ -952,18 +981,14 @@
             hv = vec_perm(avgleftv, avgrightv, inverse_bridge_shuffle_1);
 #endif
 
-            vec_ste((vec_u32_t)lv,16*x,(uint32_t*)dst0);
-            vec_ste((vec_u32_t)lv,16*x+4,(uint32_t*)dst0);
-            vec_ste((vec_u32_t)hv,16*x,(uint32_t*)dsth);
-            vec_ste((vec_u32_t)hv,16*x+4,(uint32_t*)dsth);
+            VEC_STORE8( lv, dst0 + 16 * x );
+            VEC_STORE8( hv, dsth + 16 * x );
 
             lv = vec_sld(lv, lv, 8);
             hv = vec_sld(hv, hv, 8);
 
-            vec_ste((vec_u32_t)lv,16*x,(uint32_t*)dstv);
-            vec_ste((vec_u32_t)lv,16*x+4,(uint32_t*)dstv);
-            vec_ste((vec_u32_t)hv,16*x,(uint32_t*)dstc);
-            vec_ste((vec_u32_t)hv,16*x+4,(uint32_t*)dstc);
+            VEC_STORE8( lv, dstv + 16 * x );
+            VEC_STORE8( hv, dstc + 16 * x );
         }
 
         src0 += src_stride*2;
@@ -981,23 +1006,16 @@
     vec_u8_t srcv;
     vec_s16_t weightv;
     vec_s16_t scalev, offsetv, denomv, roundv;
-    vec_s16_u loadv;
 
     int denom = weight->i_denom;
 
-    loadv.s[0] = weight->i_scale;
-    scalev = vec_splat( loadv.v, 0 );
-
-    loadv.s[0] = weight->i_offset;
-    offsetv = vec_splat( loadv.v, 0 );
+    scalev = vec_splats( (int16_t)weight->i_scale );
+    offsetv = vec_splats( (int16_t)weight->i_offset );
 
     if( denom >= 1 )
     {
-        loadv.s[0] = denom;
-        denomv = vec_splat( loadv.v, 0 );
-
-        loadv.s[0] = 1<<(denom - 1);
-        roundv = vec_splat( loadv.v, 0 );
+        denomv = vec_splats( (int16_t)denom );
+        roundv = vec_splats( (int16_t)(1 << (denom - 1)) );
 
         for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src )
         {
@@ -1009,7 +1027,7 @@
             weightv = vec_add( weightv, offsetv );
 
             srcv = vec_packsu( weightv, zero_s16v );
-            vec_ste( vec_splat( (vec_u16_t)srcv, 0 ), 0, (uint16_t*)dst );
+            STORE2_UNALIGNED( dst, srcv );
         }
     }
     else
@@ -1022,7 +1040,7 @@
             weightv = vec_mladd( weightv, scalev, offsetv );
 
             srcv = vec_packsu( weightv, zero_s16v );
-            vec_ste( vec_splat( (vec_u16_t)srcv, 0 ), 0, (uint16_t*)dst );
+            STORE2_UNALIGNED( dst, srcv );
         }
     }
 }
@@ -1033,23 +1051,16 @@
     vec_u8_t srcv;
     vec_s16_t weightv;
     vec_s16_t scalev, offsetv, denomv, roundv;
-    vec_s16_u loadv;
 
     int denom = weight->i_denom;
 
-    loadv.s[0] = weight->i_scale;
-    scalev = vec_splat( loadv.v, 0 );
-
-    loadv.s[0] = weight->i_offset;
-    offsetv = vec_splat( loadv.v, 0 );
+    scalev = vec_splats( (int16_t)weight->i_scale );
+    offsetv = vec_splats( (int16_t)weight->i_offset );
 
     if( denom >= 1 )
     {
-        loadv.s[0] = denom;
-        denomv = vec_splat( loadv.v, 0 );
-
-        loadv.s[0] = 1<<(denom - 1);
-        roundv = vec_splat( loadv.v, 0 );
+        denomv = vec_splats( (int16_t)denom );
+        roundv = vec_splats( (int16_t)(1 << (denom - 1)) );
 
         for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src )
         {
@@ -1082,27 +1093,19 @@
                                   const x264_weight_t *weight, int i_height )
 {
     LOAD_ZERO;
-    PREP_STORE8;
     vec_u8_t srcv;
     vec_s16_t weightv;
     vec_s16_t scalev, offsetv, denomv, roundv;
-    vec_s16_u loadv;
 
     int denom = weight->i_denom;
 
-    loadv.s[0] = weight->i_scale;
-    scalev = vec_splat( loadv.v, 0 );
-
-    loadv.s[0] = weight->i_offset;
-    offsetv = vec_splat( loadv.v, 0 );
+    scalev = vec_splats( (int16_t)weight->i_scale );
+    offsetv = vec_splats( (int16_t)weight->i_offset );
 
     if( denom >= 1 )
     {
-        loadv.s[0] = denom;
-        denomv = vec_splat( loadv.v, 0 );
-
-        loadv.s[0] = 1<<(denom - 1);
-        roundv = vec_splat( loadv.v, 0 );
+        denomv = vec_splats( (int16_t)denom );
+        roundv = vec_splats( (int16_t)(1 << (denom - 1)) );
 
         for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src )
         {
@@ -1138,23 +1141,16 @@
     vec_u8_t srcv;
     vec_s16_t weight_lv, weight_hv;
     vec_s16_t scalev, offsetv, denomv, roundv;
-    vec_s16_u loadv;
 
     int denom = weight->i_denom;
 
-    loadv.s[0] = weight->i_scale;
-    scalev = vec_splat( loadv.v, 0 );
-
-    loadv.s[0] = weight->i_offset;
-    offsetv = vec_splat( loadv.v, 0 );
+    scalev = vec_splats( (int16_t)weight->i_scale );
+    offsetv = vec_splats( (int16_t)weight->i_offset );
 
     if( denom >= 1 )
     {
-        loadv.s[0] = denom;
-        denomv = vec_splat( loadv.v, 0 );
-
-        loadv.s[0] = 1<<(denom - 1);
-        roundv = vec_splat( loadv.v, 0 );
+        denomv = vec_splats( (int16_t)denom );
+        roundv = vec_splats( (int16_t)(1 << (denom - 1)) );
 
         for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src )
         {
@@ -1196,15 +1192,11 @@
     vec_u8_t srcv, srcv2;
     vec_s16_t weight_lv, weight_hv, weight_3v;
     vec_s16_t scalev, offsetv, denomv, roundv;
-    vec_s16_u loadv;
 
     int denom = weight->i_denom;
 
-    loadv.s[0] = weight->i_scale;
-    scalev = vec_splat( loadv.v, 0 );
-
-    loadv.s[0] = weight->i_offset;
-    offsetv = vec_splat( loadv.v, 0 );
+    scalev = vec_splats( (int16_t)weight->i_scale );
+    offsetv = vec_splats( (int16_t)weight->i_offset );
 
     if( denom >= 1 )
     {
@@ -1216,11 +1208,8 @@
             { round, round, round, round, 0, 0, 0, 0 },
         };
 
-        loadv.s[0] = denom;
-        denomv = vec_splat( loadv.v, 0 );
-
-        loadv.s[0] = round;
-        roundv = vec_splat( loadv.v, 0 );
+        denomv = vec_splats( (int16_t)denom );
+        roundv = vec_splats( (int16_t)(1 << (denom - 1)) );
 
         for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src )
         {
@@ -1274,7 +1263,7 @@
     }
 }
 
-static weight_fn_t x264_mc_weight_wtab_altivec[6] =
+static weight_fn_t mc_weight_wtab_altivec[6] =
 {
     mc_weight_w2_altivec,
     mc_weight_w4_altivec,
@@ -1384,18 +1373,19 @@
     pf->get_ref   = get_ref_altivec;
     pf->mc_chroma = mc_chroma_altivec;
 
-    pf->copy_16x16_unaligned = x264_mc_copy_w16_altivec;
-    pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_altivec;
+    pf->copy_16x16_unaligned = mc_copy_w16_altivec;
+    pf->copy[PIXEL_16x16] = mc_copy_w16_aligned_altivec;
 
     pf->hpel_filter = x264_hpel_filter_altivec;
     pf->frame_init_lowres_core = frame_init_lowres_core_altivec;
 
-    pf->weight = x264_mc_weight_wtab_altivec;
+    pf->weight = mc_weight_wtab_altivec;
 
-    pf->plane_copy_swap = x264_plane_copy_swap_altivec;
-    pf->plane_copy_interleave = x264_plane_copy_interleave_altivec;
+    pf->plane_copy_swap = plane_copy_swap_altivec;
+    pf->plane_copy_interleave = plane_copy_interleave_altivec;
     pf->store_interleave_chroma = x264_store_interleave_chroma_altivec;
     pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_altivec;
+    pf->load_deinterleave_chroma_fenc = load_deinterleave_chroma_fenc_altivec;
 #if HAVE_VSX
     pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_altivec;
 #endif // HAVE_VSX
diff -Nru x264-0.152.2854+gite9a5903/common/ppc/mc.h x264-0.158.2988+git-20191101.7817004/common/ppc/mc.h
--- x264-0.152.2854+gite9a5903/common/ppc/mc.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/ppc/mc.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.h: ppc motion compensation
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Eric Petit <eric.petit@lapsus.org>
  *
@@ -26,6 +26,7 @@
 #ifndef X264_PPC_MC_H
 #define X264_PPC_MC_H
 
+#define x264_mc_init_altivec x264_template(mc_init_altivec)
 void x264_mc_init_altivec( x264_mc_functions_t *pf );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/ppc/pixel.c x264-0.158.2988+git-20191101.7817004/common/ppc/pixel.c
--- x264-0.152.2854+gite9a5903/common/ppc/pixel.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/ppc/pixel.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.c: ppc pixel metrics
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Eric Petit <eric.petit@lapsus.org>
  *          Guillaume Poirier <gpoirier@mplayerhq.hu>
@@ -26,6 +26,7 @@
 
 #include "common/common.h"
 #include "ppccommon.h"
+#include "pixel.h"
 
 #if !HIGH_BIT_DEPTH
 /***********************************************************************
@@ -46,8 +47,7 @@
         pix1v = vec_vsx_ld( 0, pix1 );                 \
         pix2v = vec_vsx_ld( 0, pix2 );                 \
         sumv = (vec_s32_t) vec_sum4s(                  \
-                   vec_sub( vec_max( pix1v, pix2v ),   \
-                            vec_min( pix1v, pix2v ) ), \
+                   vec_absd( pix1v, pix2v ),           \
                    (vec_u32_t) sumv );                 \
         pix1 += i_pix1;                                \
         pix2 += i_pix2;                                \
@@ -114,6 +114,23 @@
     VEC_ABS( a );          \
     c = vec_sum4s( a, b )
 
+static ALWAYS_INLINE vec_s32_t add_abs_4( vec_s16_t a, vec_s16_t b,
+                                          vec_s16_t c, vec_s16_t d )
+{
+    vec_s16_t t0 = vec_abs( a );
+    vec_s16_t t1 = vec_abs( b );
+    vec_s16_t t2 = vec_abs( c );
+    vec_s16_t t3 = vec_abs( d );
+
+    vec_s16_t s0 = vec_adds( t0, t1 );
+    vec_s16_t s1 = vec_adds( t2, t3 );
+
+    vec_s32_t s01 = vec_sum4s( s0, vec_splat_s32( 0 ) );
+    vec_s32_t s23 = vec_sum4s( s1, vec_splat_s32( 0 ) );
+
+    return vec_add( s01, s23 );
+}
+
 /***********************************************************************
  * SATD 4x4
  **********************************************************************/
@@ -142,10 +159,7 @@
     VEC_HADAMAR( diff0v, diff1v, diff2v, diff3v,
                  temp0v, temp1v, temp2v, temp3v );
 
-    VEC_ADD_ABS( temp0v, zero_s32v, satdv );
-    VEC_ADD_ABS( temp1v, satdv,     satdv );
-    VEC_ADD_ABS( temp2v, satdv,     satdv );
-    VEC_ADD_ABS( temp3v, satdv,     satdv );
+    satdv = add_abs_4( temp0v, temp1v, temp2v, temp3v );
 
     satdv = vec_sum2s( satdv, zero_s32v );
     satdv = vec_splat( satdv, 1 );
@@ -177,10 +191,8 @@
                      diff0v, diff1v, diff2v, diff3v );
     VEC_HADAMAR( diff0v, diff1v, diff2v, diff3v,
                  temp0v, temp1v, temp2v, temp3v );
-    VEC_ADD_ABS( temp0v, zero_s32v, satdv );
-    VEC_ADD_ABS( temp1v, satdv,     satdv );
-    VEC_ADD_ABS( temp2v, satdv,     satdv );
-    VEC_ADD_ABS( temp3v, satdv,     satdv );
+
+    satdv = add_abs_4( temp0v, temp1v, temp2v, temp3v );
 
     VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 4, diff0v );
     VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 4, diff1v );
@@ -192,10 +204,8 @@
                      diff0v, diff1v, diff2v, diff3v );
     VEC_HADAMAR( diff0v, diff1v, diff2v, diff3v,
                  temp0v, temp1v, temp2v, temp3v );
-    VEC_ADD_ABS( temp0v, satdv,     satdv );
-    VEC_ADD_ABS( temp1v, satdv,     satdv );
-    VEC_ADD_ABS( temp2v, satdv,     satdv );
-    VEC_ADD_ABS( temp3v, satdv,     satdv );
+
+    satdv = vec_add( satdv, add_abs_4( temp0v, temp1v, temp2v, temp3v ) );
 
     satdv = vec_sum2s( satdv, zero_s32v );
     satdv = vec_splat( satdv, 1 );
@@ -204,6 +214,36 @@
     return i_satd >> 1;
 }
 
+static ALWAYS_INLINE vec_s32_t add_abs_8( vec_s16_t a, vec_s16_t b,
+                                          vec_s16_t c, vec_s16_t d,
+                                          vec_s16_t e, vec_s16_t f,
+                                          vec_s16_t g, vec_s16_t h )
+{
+    vec_s16_t t0 = vec_abs( a );
+    vec_s16_t t1 = vec_abs( b );
+    vec_s16_t t2 = vec_abs( c );
+    vec_s16_t t3 = vec_abs( d );
+
+    vec_s16_t s0 = vec_adds( t0, t1 );
+    vec_s16_t s1 = vec_adds( t2, t3 );
+
+    vec_s32_t s01 = vec_sum4s( s0, vec_splat_s32( 0 ) );
+    vec_s32_t s23 = vec_sum4s( s1, vec_splat_s32( 0 ) );
+
+    vec_s16_t t4 = vec_abs( e );
+    vec_s16_t t5 = vec_abs( f );
+    vec_s16_t t6 = vec_abs( g );
+    vec_s16_t t7 = vec_abs( h );
+
+    vec_s16_t s2 = vec_adds( t4, t5 );
+    vec_s16_t s3 = vec_adds( t6, t7 );
+
+    vec_s32_t s0145 = vec_sum4s( s2, s01 );
+    vec_s32_t s2367 = vec_sum4s( s3, s23 );
+
+    return vec_add( s0145, s2367 );
+}
+
 /***********************************************************************
  * SATD 8x4
  **********************************************************************/
@@ -237,14 +277,8 @@
     VEC_HADAMAR( diff4v, diff5v, diff6v, diff7v,
                  temp4v, temp5v, temp6v, temp7v );
 
-    VEC_ADD_ABS( temp0v, zero_s32v, satdv );
-    VEC_ADD_ABS( temp1v, satdv,     satdv );
-    VEC_ADD_ABS( temp2v, satdv,     satdv );
-    VEC_ADD_ABS( temp3v, satdv,     satdv );
-    VEC_ADD_ABS( temp4v, satdv,     satdv );
-    VEC_ADD_ABS( temp5v, satdv,     satdv );
-    VEC_ADD_ABS( temp6v, satdv,     satdv );
-    VEC_ADD_ABS( temp7v, satdv,     satdv );
+    satdv = add_abs_8( temp0v, temp1v, temp2v, temp3v,
+                       temp4v, temp5v, temp6v, temp7v );
 
     satdv = vec_sum2s( satdv, zero_s32v );
     satdv = vec_splat( satdv, 1 );
@@ -292,14 +326,8 @@
     VEC_HADAMAR( diff4v, diff5v, diff6v, diff7v,
                  temp4v, temp5v, temp6v, temp7v );
 
-    VEC_ADD_ABS( temp0v, zero_s32v, satdv );
-    VEC_ADD_ABS( temp1v, satdv,     satdv );
-    VEC_ADD_ABS( temp2v, satdv,     satdv );
-    VEC_ADD_ABS( temp3v, satdv,     satdv );
-    VEC_ADD_ABS( temp4v, satdv,     satdv );
-    VEC_ADD_ABS( temp5v, satdv,     satdv );
-    VEC_ADD_ABS( temp6v, satdv,     satdv );
-    VEC_ADD_ABS( temp7v, satdv,     satdv );
+    satdv = add_abs_8( temp0v, temp1v, temp2v, temp3v,
+                       temp4v, temp5v, temp6v, temp7v );
 
     satdv = vec_sums( satdv, zero_s32v );
     satdv = vec_splat( satdv, 3 );
@@ -343,14 +371,9 @@
                  temp0v, temp1v, temp2v, temp3v );
     VEC_HADAMAR( diff4v, diff5v, diff6v, diff7v,
                  temp4v, temp5v, temp6v, temp7v );
-    VEC_ADD_ABS( temp0v, zero_s32v, satdv );
-    VEC_ADD_ABS( temp1v, satdv,     satdv );
-    VEC_ADD_ABS( temp2v, satdv,     satdv );
-    VEC_ADD_ABS( temp3v, satdv,     satdv );
-    VEC_ADD_ABS( temp4v, satdv,     satdv );
-    VEC_ADD_ABS( temp5v, satdv,     satdv );
-    VEC_ADD_ABS( temp6v, satdv,     satdv );
-    VEC_ADD_ABS( temp7v, satdv,     satdv );
+
+    satdv = add_abs_8( temp0v, temp1v, temp2v, temp3v,
+                       temp4v, temp5v, temp6v, temp7v );
 
     VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff0v );
     VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff1v );
@@ -372,14 +395,9 @@
                  temp0v, temp1v, temp2v, temp3v );
     VEC_HADAMAR( diff4v, diff5v, diff6v, diff7v,
                  temp4v, temp5v, temp6v, temp7v );
-    VEC_ADD_ABS( temp0v, satdv,     satdv );
-    VEC_ADD_ABS( temp1v, satdv,     satdv );
-    VEC_ADD_ABS( temp2v, satdv,     satdv );
-    VEC_ADD_ABS( temp3v, satdv,     satdv );
-    VEC_ADD_ABS( temp4v, satdv,     satdv );
-    VEC_ADD_ABS( temp5v, satdv,     satdv );
-    VEC_ADD_ABS( temp6v, satdv,     satdv );
-    VEC_ADD_ABS( temp7v, satdv,     satdv );
+
+    satdv = vec_add( satdv, add_abs_8( temp0v, temp1v, temp2v, temp3v,
+                                       temp4v, temp5v, temp6v, temp7v ) );
 
     satdv = vec_sums( satdv, zero_s32v );
     satdv = vec_splat( satdv, 3 );
@@ -430,14 +448,8 @@
     VEC_HADAMAR( diffh4v, diffh5v, diffh6v, diffh7v,
                  temp4v, temp5v, temp6v, temp7v );
 
-    VEC_ADD_ABS( temp0v, zero_s32v, satdv );
-    VEC_ADD_ABS( temp1v, satdv,     satdv );
-    VEC_ADD_ABS( temp2v, satdv,     satdv );
-    VEC_ADD_ABS( temp3v, satdv,     satdv );
-    VEC_ADD_ABS( temp4v, satdv,     satdv );
-    VEC_ADD_ABS( temp5v, satdv,     satdv );
-    VEC_ADD_ABS( temp6v, satdv,     satdv );
-    VEC_ADD_ABS( temp7v, satdv,     satdv );
+    satdv = add_abs_8( temp0v, temp1v, temp2v, temp3v,
+                       temp4v, temp5v, temp6v, temp7v );
 
     VEC_HADAMAR( diffl0v, diffl1v, diffl2v, diffl3v,
                  temp0v, temp1v, temp2v, temp3v );
@@ -454,14 +466,8 @@
     VEC_HADAMAR( diffl4v, diffl5v, diffl6v, diffl7v,
                  temp4v, temp5v, temp6v, temp7v );
 
-    VEC_ADD_ABS( temp0v, satdv,     satdv );
-    VEC_ADD_ABS( temp1v, satdv,     satdv );
-    VEC_ADD_ABS( temp2v, satdv,     satdv );
-    VEC_ADD_ABS( temp3v, satdv,     satdv );
-    VEC_ADD_ABS( temp4v, satdv,     satdv );
-    VEC_ADD_ABS( temp5v, satdv,     satdv );
-    VEC_ADD_ABS( temp6v, satdv,     satdv );
-    VEC_ADD_ABS( temp7v, satdv,     satdv );
+    satdv = vec_add( satdv, add_abs_8( temp0v, temp1v, temp2v, temp3v,
+                                       temp4v, temp5v, temp6v, temp7v ) );
 
     satdv = vec_sums( satdv, zero_s32v );
     satdv = vec_splat( satdv, 3 );
@@ -508,14 +514,10 @@
                  temp0v, temp1v, temp2v, temp3v );
     VEC_HADAMAR( diffh4v, diffh5v, diffh6v, diffh7v,
                  temp4v, temp5v, temp6v, temp7v );
-    VEC_ADD_ABS( temp0v, zero_s32v, satdv );
-    VEC_ADD_ABS( temp1v, satdv,     satdv );
-    VEC_ADD_ABS( temp2v, satdv,     satdv );
-    VEC_ADD_ABS( temp3v, satdv,     satdv );
-    VEC_ADD_ABS( temp4v, satdv,     satdv );
-    VEC_ADD_ABS( temp5v, satdv,     satdv );
-    VEC_ADD_ABS( temp6v, satdv,     satdv );
-    VEC_ADD_ABS( temp7v, satdv,     satdv );
+
+    satdv = add_abs_8( temp0v, temp1v, temp2v, temp3v,
+                       temp4v, temp5v, temp6v, temp7v );
+
     VEC_HADAMAR( diffl0v, diffl1v, diffl2v, diffl3v,
                  temp0v, temp1v, temp2v, temp3v );
     VEC_HADAMAR( diffl4v, diffl5v, diffl6v, diffl7v,
@@ -528,14 +530,9 @@
                  temp0v, temp1v, temp2v, temp3v );
     VEC_HADAMAR( diffl4v, diffl5v, diffl6v, diffl7v,
                  temp4v, temp5v, temp6v, temp7v );
-    VEC_ADD_ABS( temp0v, satdv,     satdv );
-    VEC_ADD_ABS( temp1v, satdv,     satdv );
-    VEC_ADD_ABS( temp2v, satdv,     satdv );
-    VEC_ADD_ABS( temp3v, satdv,     satdv );
-    VEC_ADD_ABS( temp4v, satdv,     satdv );
-    VEC_ADD_ABS( temp5v, satdv,     satdv );
-    VEC_ADD_ABS( temp6v, satdv,     satdv );
-    VEC_ADD_ABS( temp7v, satdv,     satdv );
+
+    satdv = vec_add( satdv, add_abs_8( temp0v, temp1v, temp2v, temp3v,
+                                       temp4v, temp5v, temp6v, temp7v ) );
 
     VEC_DIFF_HL( pix1, i_pix1, pix2, i_pix2, diffh0v, diffl0v );
     VEC_DIFF_HL( pix1, i_pix1, pix2, i_pix2, diffh1v, diffl1v );
@@ -557,14 +554,10 @@
                  temp0v, temp1v, temp2v, temp3v );
     VEC_HADAMAR( diffh4v, diffh5v, diffh6v, diffh7v,
                  temp4v, temp5v, temp6v, temp7v );
-    VEC_ADD_ABS( temp0v, satdv,     satdv );
-    VEC_ADD_ABS( temp1v, satdv,     satdv );
-    VEC_ADD_ABS( temp2v, satdv,     satdv );
-    VEC_ADD_ABS( temp3v, satdv,     satdv );
-    VEC_ADD_ABS( temp4v, satdv,     satdv );
-    VEC_ADD_ABS( temp5v, satdv,     satdv );
-    VEC_ADD_ABS( temp6v, satdv,     satdv );
-    VEC_ADD_ABS( temp7v, satdv,     satdv );
+
+    satdv = vec_add( satdv, add_abs_8( temp0v, temp1v, temp2v, temp3v,
+                                       temp4v, temp5v, temp6v, temp7v ) );
+
     VEC_HADAMAR( diffl0v, diffl1v, diffl2v, diffl3v,
                  temp0v, temp1v, temp2v, temp3v );
     VEC_HADAMAR( diffl4v, diffl5v, diffl6v, diffl7v,
@@ -577,14 +570,9 @@
                  temp0v, temp1v, temp2v, temp3v );
     VEC_HADAMAR( diffl4v, diffl5v, diffl6v, diffl7v,
                  temp4v, temp5v, temp6v, temp7v );
-    VEC_ADD_ABS( temp0v, satdv,     satdv );
-    VEC_ADD_ABS( temp1v, satdv,     satdv );
-    VEC_ADD_ABS( temp2v, satdv,     satdv );
-    VEC_ADD_ABS( temp3v, satdv,     satdv );
-    VEC_ADD_ABS( temp4v, satdv,     satdv );
-    VEC_ADD_ABS( temp5v, satdv,     satdv );
-    VEC_ADD_ABS( temp6v, satdv,     satdv );
-    VEC_ADD_ABS( temp7v, satdv,     satdv );
+
+    satdv = vec_add( satdv, add_abs_8( temp0v, temp1v, temp2v, temp3v,
+                                       temp4v, temp5v, temp6v, temp7v ) );
 
     satdv = vec_sums( satdv, zero_s32v );
     satdv = vec_splat( satdv, 3 );
@@ -635,10 +623,10 @@
         pix3v = vec_vsx_ld( 0, pix3 );
         pix3 += i_stride;
 
-        sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v );
-        sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v );
-        sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v );
-        sum3v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix3v ), vec_min( fencv, pix3v ) ), (vec_u32_t) sum3v );
+        sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v );
+        sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v );
+        sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v );
+        sum3v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix3v ), (vec_u32_t) sum3v );
 
         pix0v = vec_vsx_ld( 0, pix0 );
         pix0 += i_stride;
@@ -655,10 +643,11 @@
         pix3v = vec_vsx_ld( 0, pix3 );
         pix3 += i_stride;
 
-        sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v );
-        sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v );
-        sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v );
-        sum3v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix3v ), vec_min( fencv, pix3v ) ), (vec_u32_t) sum3v );
+        sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v );
+        sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v );
+        sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v );
+        sum3v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix3v ), (vec_u32_t) sum3v );
+
     }
 
     sum0v = vec_sums( sum0v, zero_s32v );
@@ -712,10 +701,9 @@
         pix2v = vec_vsx_ld( 0, pix2 );
         pix2 += i_stride;
 
-        sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v );
-        sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v );
-        sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v );
-
+        sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v );
+        sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v );
+        sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v );
         pix0v = vec_vsx_ld( 0, pix0 );
         pix0 += i_stride;
 
@@ -729,9 +717,9 @@
         pix2v = vec_vsx_ld( 0, pix2 );
         pix2 += i_stride;
 
-        sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v );
-        sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v );
-        sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v );
+        sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v );
+        sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v );
+        sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v );
     }
 
     sum0v = vec_sums( sum0v, zero_s32v );
@@ -785,10 +773,10 @@
         pix3v = vec_vsx_ld( 0, pix3 );
         pix3 += i_stride;
 
-        sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v );
-        sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v );
-        sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v );
-        sum3v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix3v ), vec_min( fencv, pix3v ) ), (vec_u32_t) sum3v );
+        sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v );
+        sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v );
+        sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v );
+        sum3v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix3v ), (vec_u32_t) sum3v );
 
         pix0v = vec_vsx_ld( 0, pix0 );
         pix0 += i_stride;
@@ -805,10 +793,10 @@
         pix3v = vec_vsx_ld( 0, pix3 );
         pix3 += i_stride;
 
-        sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v );
-        sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v );
-        sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v );
-        sum3v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix3v ), vec_min( fencv, pix3v ) ), (vec_u32_t) sum3v );
+        sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v );
+        sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v );
+        sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v );
+        sum3v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix3v ), (vec_u32_t) sum3v );
     }
 
     sum0v = vec_sums( sum0v, zero_s32v );
@@ -832,6 +820,82 @@
     scores[3] = sum3;
 }
 
+#define PROCESS_PIXS                                                                  \
+        vec_u8_t pix0vH = vec_vsx_ld( 0, pix0 );                                      \
+        pix0 += i_stride;                                                             \
+                                                                                      \
+        vec_u8_t pix1vH = vec_vsx_ld( 0, pix1 );                                      \
+        pix1 += i_stride;                                                             \
+                                                                                      \
+        vec_u8_t fencvH = vec_vsx_ld( 0, fenc );                                      \
+        fenc += FENC_STRIDE;                                                          \
+                                                                                      \
+        vec_u8_t pix2vH = vec_vsx_ld( 0, pix2 );                                      \
+        pix2 += i_stride;                                                             \
+                                                                                      \
+        vec_u8_t pix0vL = vec_vsx_ld( 0, pix0 );                                      \
+        pix0 += i_stride;                                                             \
+                                                                                      \
+        vec_u8_t pix1vL = vec_vsx_ld( 0, pix1 );                                      \
+        pix1 += i_stride;                                                             \
+                                                                                      \
+        vec_u8_t fencvL = vec_vsx_ld( 0, fenc );                                      \
+        fenc += FENC_STRIDE;                                                          \
+                                                                                      \
+        vec_u8_t pix2vL = vec_vsx_ld( 0, pix2 );                                      \
+        pix2 += i_stride;                                                             \
+                                                                                      \
+        fencv = xxpermdi( fencvH, fencvL, 0 );                                        \
+        pix0v = xxpermdi( pix0vH, pix0vL, 0 );                                        \
+        pix1v = xxpermdi( pix1vH, pix1vL, 0 );                                        \
+        pix2v = xxpermdi( pix2vH, pix2vL, 0 );                                        \
+                                                                                      \
+        sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v ); \
+        sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v ); \
+        sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v );
+
+#define PIXEL_SAD_X3_ALTIVEC( name, ly )            \
+static int name( uint8_t *fenc, uint8_t *pix0,      \
+                 uint8_t *pix1, uint8_t *pix2,      \
+                 intptr_t i_stride, int scores[3] ) \
+{                                                   \
+    ALIGNED_16( int sum0 );                         \
+    ALIGNED_16( int sum1 );                         \
+    ALIGNED_16( int sum2 );                         \
+                                                    \
+    LOAD_ZERO;                                      \
+    vec_u8_t fencv, pix0v, pix1v, pix2v;            \
+    vec_s32_t sum0v, sum1v, sum2v;                  \
+                                                    \
+    sum0v = vec_splat_s32( 0 );                     \
+    sum1v = vec_splat_s32( 0 );                     \
+    sum2v = vec_splat_s32( 0 );                     \
+                                                    \
+    for( int y = 0; y < ly; y++ )                   \
+    {                                               \
+        PROCESS_PIXS                                \
+    }                                               \
+                                                    \
+    sum0v = vec_sums( sum0v, zero_s32v );           \
+    sum1v = vec_sums( sum1v, zero_s32v );           \
+    sum2v = vec_sums( sum2v, zero_s32v );           \
+                                                    \
+    sum0v = vec_splat( sum0v, 3 );                  \
+    sum1v = vec_splat( sum1v, 3 );                  \
+    sum2v = vec_splat( sum2v, 3 );                  \
+                                                    \
+    vec_ste( sum0v, 0, &sum0 );                     \
+    vec_ste( sum1v, 0, &sum1 );                     \
+    vec_ste( sum2v, 0, &sum2 );                     \
+                                                    \
+    scores[0] = sum0;                               \
+    scores[1] = sum1;                               \
+    scores[2] = sum2;                               \
+}
+
+PIXEL_SAD_X3_ALTIVEC( pixel_sad_x3_8x8_altivec, 4 )
+PIXEL_SAD_X3_ALTIVEC( pixel_sad_x3_8x16_altivec, 8 )
+
 static void pixel_sad_x3_16x8_altivec( uint8_t *fenc, uint8_t *pix0,
                                        uint8_t *pix1, uint8_t *pix2,
                                        intptr_t i_stride, int scores[3] )
@@ -862,9 +926,9 @@
         pix2v = vec_vsx_ld(0, pix2);
         pix2 += i_stride;
 
-        sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v );
-        sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v );
-        sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v );
+        sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v );
+        sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v );
+        sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v );
 
         pix0v = vec_vsx_ld(0, pix0);
         pix0 += i_stride;
@@ -878,9 +942,9 @@
         pix2v = vec_vsx_ld(0, pix2);
         pix2 += i_stride;
 
-        sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v );
-        sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v );
-        sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v );
+        sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v );
+        sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v );
+        sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v );
     }
 
     sum0v = vec_sums( sum0v, zero_s32v );
@@ -900,308 +964,49 @@
     scores[2] = sum2;
 }
 
-
-static void pixel_sad_x4_8x16_altivec( uint8_t *fenc,
-                                       uint8_t *pix0, uint8_t *pix1,
-                                       uint8_t *pix2, uint8_t *pix3,
-                                       intptr_t i_stride, int scores[4] )
-{
-    ALIGNED_16( int sum0 );
-    ALIGNED_16( int sum1 );
-    ALIGNED_16( int sum2 );
-    ALIGNED_16( int sum3 );
-
-    LOAD_ZERO;
-    vec_u8_t fencv, pix0v, pix1v, pix2v, pix3v;
-    vec_s32_t sum0v, sum1v, sum2v, sum3v;
-
-    sum0v = vec_splat_s32(0);
-    sum1v = vec_splat_s32(0);
-    sum2v = vec_splat_s32(0);
-    sum3v = vec_splat_s32(0);
-
-    for( int y = 0; y < 8; y++ )
-    {
-        pix0v = vec_vsx_ld(0, pix0);
-        pix0 += i_stride;
-
-        pix1v = vec_vsx_ld(0, pix1);
-        pix1 += i_stride;
-
-        fencv = vec_vsx_ld(0, fenc);
-        fenc += FENC_STRIDE;
-
-        pix2v = vec_vsx_ld(0, pix2);
-        pix2 += i_stride;
-
-        pix3v = vec_vsx_ld(0, pix3);
-        pix3 += i_stride;
-
-        sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v );
-        sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v );
-        sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v );
-        sum3v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix3v ), vec_min( fencv, pix3v ) ), (vec_u32_t) sum3v );
-
-        pix0v = vec_vsx_ld(0, pix0);
-        pix0 += i_stride;
-
-        pix1v = vec_vsx_ld(0, pix1);
-        pix1 += i_stride;
-
-        fencv = vec_vsx_ld(0, fenc);
-        fenc += FENC_STRIDE;
-
-        pix2v = vec_vsx_ld(0, pix2);
-        pix2 += i_stride;
-
-        pix3v = vec_vsx_ld(0, pix3);
-        pix3 += i_stride;
-
-        sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v );
-        sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v );
-        sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v );
-        sum3v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix3v ), vec_min( fencv, pix3v ) ), (vec_u32_t) sum3v );
-    }
-
-    sum0v = vec_sum2s( sum0v, zero_s32v );
-    sum1v = vec_sum2s( sum1v, zero_s32v );
-    sum2v = vec_sum2s( sum2v, zero_s32v );
-    sum3v = vec_sum2s( sum3v, zero_s32v );
-
-    sum0v = vec_splat( sum0v, 1 );
-    sum1v = vec_splat( sum1v, 1 );
-    sum2v = vec_splat( sum2v, 1 );
-    sum3v = vec_splat( sum3v, 1 );
-
-    vec_ste( sum0v, 0, &sum0);
-    vec_ste( sum1v, 0, &sum1);
-    vec_ste( sum2v, 0, &sum2);
-    vec_ste( sum3v, 0, &sum3);
-
-    scores[0] = sum0;
-    scores[1] = sum1;
-    scores[2] = sum2;
-    scores[3] = sum3;
+#define PIXEL_SAD_X4_ALTIVEC( name, ly )                                              \
+static int name( uint8_t *fenc,                                                       \
+                 uint8_t *pix0, uint8_t *pix1,                                        \
+                 uint8_t *pix2, uint8_t *pix3,                                        \
+                 intptr_t i_stride, int scores[4] )                                   \
+{                                                                                     \
+    ALIGNED_16( int sum0 );                                                           \
+    ALIGNED_16( int sum1 );                                                           \
+    ALIGNED_16( int sum2 );                                                           \
+                                                                                      \
+    LOAD_ZERO;                                                                        \
+    vec_u8_t fencv, pix0v, pix1v, pix2v, pix3v;                                       \
+    vec_s32_t sum0v, sum1v, sum2v, sum3v;                                             \
+                                                                                      \
+    sum0v = vec_splat_s32( 0 );                                                       \
+    sum1v = vec_splat_s32( 0 );                                                       \
+    sum2v = vec_splat_s32( 0 );                                                       \
+                                                                                      \
+    for( int y = 0; y < ly; y++ )                                                     \
+    {                                                                                 \
+        PROCESS_PIXS                                                                  \
+        vec_u8_t pix3vH = vec_vsx_ld( 0, pix3 );                                      \
+        pix3 += i_stride;                                                             \
+        vec_u8_t pix3vL = vec_vsx_ld( 0, pix3 );                                      \
+        pix3 += i_stride;                                                             \
+        pix3v = xxpermdi( pix3vH, pix3vL, 0 );                                        \
+        sum3v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix3v ), (vec_u32_t) sum3v ); \
+    }                                                                                 \
+                                                                                      \
+    sum0v = vec_sums( sum0v, zero_s32v );                                             \
+    sum1v = vec_sums( sum1v, zero_s32v );                                             \
+    sum2v = vec_sums( sum2v, zero_s32v );                                             \
+    sum3v = vec_sums( sum3v, zero_s32v );                                             \
+                                                                                      \
+    vec_s32_t s01 = vec_mergel( sum0v, sum1v );                                       \
+    vec_s32_t s23 = vec_mergel( sum2v, sum3v );                                       \
+    vec_s32_t s = xxpermdi( s01, s23, 3 );                                            \
+                                                                                      \
+    vec_vsx_st( s, 0, scores );                                                       \
 }
 
-static void pixel_sad_x3_8x16_altivec( uint8_t *fenc, uint8_t *pix0,
-                                       uint8_t *pix1, uint8_t *pix2,
-                                       intptr_t i_stride, int scores[3] )
-{
-    ALIGNED_16( int sum0 );
-    ALIGNED_16( int sum1 );
-    ALIGNED_16( int sum2 );
-
-    LOAD_ZERO;
-    vec_u8_t fencv, pix0v, pix1v, pix2v;
-    vec_s32_t sum0v, sum1v, sum2v;
-
-    sum0v = vec_splat_s32(0);
-    sum1v = vec_splat_s32(0);
-    sum2v = vec_splat_s32(0);
-
-    for( int y = 0; y < 8; y++ )
-    {
-        pix0v = vec_vsx_ld(0, pix0);
-        pix0 += i_stride;
-
-        pix1v = vec_vsx_ld(0, pix1);
-        pix1 += i_stride;
-
-        fencv = vec_vsx_ld(0, fenc);
-        fenc += FENC_STRIDE;
-
-        pix2v = vec_vsx_ld(0, pix2);
-        pix2 += i_stride;
-
-        sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v );
-        sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v );
-        sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v );
-
-        pix0v = vec_vsx_ld(0, pix0);
-        pix0 += i_stride;
-
-        pix1v = vec_vsx_ld(0, pix1);
-        pix1 += i_stride;
-
-        fencv = vec_vsx_ld(0, fenc);
-        fenc += FENC_STRIDE;
-
-        pix2v = vec_vsx_ld(0, pix2);
-        pix2 += i_stride;
-
-        sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v );
-        sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v );
-        sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v );
-    }
-
-    sum0v = vec_sum2s( sum0v, zero_s32v );
-    sum1v = vec_sum2s( sum1v, zero_s32v );
-    sum2v = vec_sum2s( sum2v, zero_s32v );
-
-    sum0v = vec_splat( sum0v, 1 );
-    sum1v = vec_splat( sum1v, 1 );
-    sum2v = vec_splat( sum2v, 1 );
-
-    vec_ste( sum0v, 0, &sum0);
-    vec_ste( sum1v, 0, &sum1);
-    vec_ste( sum2v, 0, &sum2);
-
-    scores[0] = sum0;
-    scores[1] = sum1;
-    scores[2] = sum2;
-}
-
-static void pixel_sad_x4_8x8_altivec( uint8_t *fenc,
-                                      uint8_t *pix0, uint8_t *pix1,
-                                      uint8_t *pix2, uint8_t *pix3,
-                                      intptr_t i_stride, int scores[4] )
-{
-    ALIGNED_16( int sum0 );
-    ALIGNED_16( int sum1 );
-    ALIGNED_16( int sum2 );
-    ALIGNED_16( int sum3 );
-
-    LOAD_ZERO;
-    vec_u8_t fencv, pix0v, pix1v, pix2v, pix3v;
-    vec_s32_t sum0v, sum1v, sum2v, sum3v;
-
-    sum0v = vec_splat_s32(0);
-    sum1v = vec_splat_s32(0);
-    sum2v = vec_splat_s32(0);
-    sum3v = vec_splat_s32(0);
-
-    for( int y = 0; y < 4; y++ )
-    {
-        pix0v = vec_vsx_ld(0, pix0);
-        pix0 += i_stride;
-
-        pix1v = vec_vsx_ld(0, pix1);
-        pix1 += i_stride;
-
-        fencv = vec_vsx_ld(0, fenc);
-        fenc += FENC_STRIDE;
-
-        pix2v = vec_vsx_ld(0, pix2);
-        pix2 += i_stride;
-
-        pix3v = vec_vsx_ld(0, pix3);
-        pix3 += i_stride;
-
-        sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v );
-        sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v );
-        sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v );
-        sum3v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix3v ), vec_min( fencv, pix3v ) ), (vec_u32_t) sum3v );
-
-        pix0v = vec_vsx_ld(0, pix0);
-        pix0 += i_stride;
-
-        pix1v = vec_vsx_ld(0, pix1);
-        pix1 += i_stride;
-
-        fencv = vec_vsx_ld(0, fenc);
-        fenc += FENC_STRIDE;
-
-        pix2v = vec_vsx_ld(0, pix2);
-        pix2 += i_stride;
-
-        pix3v = vec_vsx_ld(0, pix3);
-        pix3 += i_stride;
-
-        sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v );
-        sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v );
-        sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v );
-        sum3v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix3v ), vec_min( fencv, pix3v ) ), (vec_u32_t) sum3v );
-    }
-
-    sum0v = vec_sum2s( sum0v, zero_s32v );
-    sum1v = vec_sum2s( sum1v, zero_s32v );
-    sum2v = vec_sum2s( sum2v, zero_s32v );
-    sum3v = vec_sum2s( sum3v, zero_s32v );
-
-    sum0v = vec_splat( sum0v, 1 );
-    sum1v = vec_splat( sum1v, 1 );
-    sum2v = vec_splat( sum2v, 1 );
-    sum3v = vec_splat( sum3v, 1 );
-
-    vec_ste( sum0v, 0, &sum0);
-    vec_ste( sum1v, 0, &sum1);
-    vec_ste( sum2v, 0, &sum2);
-    vec_ste( sum3v, 0, &sum3);
-
-    scores[0] = sum0;
-    scores[1] = sum1;
-    scores[2] = sum2;
-    scores[3] = sum3;
-}
-
-static void pixel_sad_x3_8x8_altivec( uint8_t *fenc, uint8_t *pix0,
-                                      uint8_t *pix1, uint8_t *pix2,
-                                      intptr_t i_stride, int scores[3] )
-{
-    ALIGNED_16( int sum0 );
-    ALIGNED_16( int sum1 );
-    ALIGNED_16( int sum2 );
-
-    LOAD_ZERO;
-    vec_u8_t fencv, pix0v, pix1v, pix2v;
-    vec_s32_t sum0v, sum1v, sum2v;
-
-    sum0v = vec_splat_s32(0);
-    sum1v = vec_splat_s32(0);
-    sum2v = vec_splat_s32(0);
-
-    for( int y = 0; y < 4; y++ )
-    {
-        pix0v = vec_vsx_ld(0, pix0);
-        pix0 += i_stride;
-
-        pix1v = vec_vsx_ld(0, pix1);
-        pix1 += i_stride;
-
-        fencv = vec_vsx_ld(0, fenc);
-        fenc += FENC_STRIDE;
-
-        pix2v = vec_vsx_ld(0, pix2);
-        pix2 += i_stride;
-
-        sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v );
-        sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v );
-        sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v );
-
-        pix0v = vec_vsx_ld(0, pix0);
-        pix0 += i_stride;
-
-        pix1v = vec_vsx_ld(0, pix1);
-        pix1 += i_stride;
-
-        fencv = vec_vsx_ld(0, fenc);
-        fenc += FENC_STRIDE;
-
-        pix2v = vec_vsx_ld(0, pix2);
-        pix2 += i_stride;
-
-        sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v );
-        sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v );
-        sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v );
-    }
-
-    sum0v = vec_sum2s( sum0v, zero_s32v );
-    sum1v = vec_sum2s( sum1v, zero_s32v );
-    sum2v = vec_sum2s( sum2v, zero_s32v );
-
-    sum0v = vec_splat( sum0v, 1 );
-    sum1v = vec_splat( sum1v, 1 );
-    sum2v = vec_splat( sum2v, 1 );
-
-    vec_ste( sum0v, 0, &sum0);
-    vec_ste( sum1v, 0, &sum1);
-    vec_ste( sum2v, 0, &sum2);
-
-    scores[0] = sum0;
-    scores[1] = sum1;
-    scores[2] = sum2;
-}
+PIXEL_SAD_X4_ALTIVEC( pixel_sad_x4_8x8_altivec, 4 )
+PIXEL_SAD_X4_ALTIVEC( pixel_sad_x4_8x16_altivec, 8 )
 
 /***********************************************************************
 * SSD routines
@@ -1215,7 +1020,7 @@
     LOAD_ZERO;
     vec_u8_t  pix1vA, pix2vA, pix1vB, pix2vB;
     vec_u32_t sumv;
-    vec_u8_t maxA, minA, diffA, maxB, minB, diffB;
+    vec_u8_t diffA, diffB;
 
     sumv = vec_splat_u32(0);
 
@@ -1227,25 +1032,19 @@
         pix1 += i_stride_pix1;
         pix2 += i_stride_pix2;
 
-        maxA = vec_max(pix1vA, pix2vA);
-        minA = vec_min(pix1vA, pix2vA);
-
         pix2vB = vec_vsx_ld(0, pix2);
         pix1vB = vec_ld(0, pix1);
 
-        diffA = vec_sub(maxA, minA);
+        diffA = vec_absd(pix1vA, pix2vA);
         sumv = vec_msum(diffA, diffA, sumv);
 
         pix1 += i_stride_pix1;
         pix2 += i_stride_pix2;
 
-        maxB = vec_max(pix1vB, pix2vB);
-        minB = vec_min(pix1vB, pix2vB);
-
         pix2vA = vec_vsx_ld(0, pix2);
         pix1vA = vec_ld(0, pix1);
 
-        diffB = vec_sub(maxB, minB);
+        diffB = vec_absd(pix1vB, pix2vB);
         sumv = vec_msum(diffB, diffB, sumv);
     }
 
@@ -1255,16 +1054,10 @@
     pix2vB = vec_vsx_ld(0, pix2);
     pix1vB = vec_ld(0, pix1);
 
-    maxA = vec_max(pix1vA, pix2vA);
-    minA = vec_min(pix1vA, pix2vA);
-
-    maxB = vec_max(pix1vB, pix2vB);
-    minB = vec_min(pix1vB, pix2vB);
-
-    diffA = vec_sub(maxA, minA);
+    diffA = vec_absd(pix1vA, pix2vA);
     sumv = vec_msum(diffA, diffA, sumv);
 
-    diffB = vec_sub(maxB, minB);
+    diffB = vec_absd(pix1vB, pix2vB);
     sumv = vec_msum(diffB, diffB, sumv);
 
     sumv = (vec_u32_t) vec_sums((vec_s32_t) sumv, zero_s32v);
@@ -1281,7 +1074,7 @@
     LOAD_ZERO;
     vec_u8_t  pix1v, pix2v;
     vec_u32_t sumv;
-    vec_u8_t maxv, minv, diffv;
+    vec_u8_t diffv;
 
     const vec_u32_t sel = (vec_u32_t)CV(-1,-1,0,0);
 
@@ -1292,10 +1085,7 @@
         pix1v = vec_vsx_ld(0, pix1);
         pix2v = vec_vsx_ld(0, pix2);
 
-        maxv = vec_max(pix1v, pix2v);
-        minv = vec_min(pix1v, pix2v);
-
-        diffv = vec_sub(maxv, minv);
+        diffv = vec_absd( pix1v, pix2v );
         sumv = vec_msum(diffv, diffv, sumv);
 
         pix1 += i_stride_pix1;
@@ -1315,7 +1105,7 @@
 /****************************************************************************
  * variance
  ****************************************************************************/
-static uint64_t x264_pixel_var_16x16_altivec( uint8_t *pix, intptr_t i_stride )
+static uint64_t pixel_var_16x16_altivec( uint8_t *pix, intptr_t i_stride )
 {
     ALIGNED_16(uint32_t sum_tab[4]);
     ALIGNED_16(uint32_t sqr_tab[4]);
@@ -1342,7 +1132,7 @@
     return sum + ((uint64_t)sqr<<32);
 }
 
-static uint64_t x264_pixel_var_8x8_altivec( uint8_t *pix, intptr_t i_stride )
+static uint64_t pixel_var_8x8_altivec( uint8_t *pix, intptr_t i_stride )
 {
     ALIGNED_16(uint32_t sum_tab[4]);
     ALIGNED_16(uint32_t sqr_tab[4]);
@@ -1634,7 +1424,7 @@
        0x1C,0x0C,0x1D,0x0D, 0x1E,0x0E,0x1F,0x0F )
  };
 
-static uint64_t x264_pixel_hadamard_ac_16x16_altivec( uint8_t *pix, intptr_t stride )
+static uint64_t pixel_hadamard_ac_16x16_altivec( uint8_t *pix, intptr_t stride )
 {
     int idx =  ((uintptr_t)pix & 8) >> 3;
     vec_u8_t permh = hadamard_permtab[idx];
@@ -1646,7 +1436,7 @@
     return ((sum>>34)<<32) + ((uint32_t)sum>>1);
 }
 
-static uint64_t x264_pixel_hadamard_ac_16x8_altivec( uint8_t *pix, intptr_t stride )
+static uint64_t pixel_hadamard_ac_16x8_altivec( uint8_t *pix, intptr_t stride )
 {
     int idx =  ((uintptr_t)pix & 8) >> 3;
     vec_u8_t permh = hadamard_permtab[idx];
@@ -1656,7 +1446,7 @@
     return ((sum>>34)<<32) + ((uint32_t)sum>>1);
 }
 
-static uint64_t x264_pixel_hadamard_ac_8x16_altivec( uint8_t *pix, intptr_t stride )
+static uint64_t pixel_hadamard_ac_8x16_altivec( uint8_t *pix, intptr_t stride )
 {
     vec_u8_t perm = hadamard_permtab[ (((uintptr_t)pix & 8) >> 3) ];
     uint64_t sum = pixel_hadamard_ac_altivec( pix, stride, perm );
@@ -1664,7 +1454,7 @@
     return ((sum>>34)<<32) + ((uint32_t)sum>>1);
 }
 
-static uint64_t x264_pixel_hadamard_ac_8x8_altivec( uint8_t *pix, intptr_t stride )
+static uint64_t pixel_hadamard_ac_8x8_altivec( uint8_t *pix, intptr_t stride )
 {
     vec_u8_t perm = hadamard_permtab[ (((uintptr_t)pix & 8) >> 3) ];
     uint64_t sum = pixel_hadamard_ac_altivec( pix, stride, perm );
@@ -1739,7 +1529,7 @@
 
 
 #define INTRA_MBCMP_8x8( mbcmp )\
-void intra_##mbcmp##_x3_8x8_altivec( uint8_t *fenc, uint8_t edge[36], int res[3] )\
+static void intra_##mbcmp##_x3_8x8_altivec( uint8_t *fenc, uint8_t edge[36], int res[3] )\
 {\
     ALIGNED_8( uint8_t pix[8*FDEC_STRIDE] );\
     x264_predict_8x8_v_c( pix, edge );\
@@ -1754,7 +1544,7 @@
 INTRA_MBCMP_8x8(sa8d)
 
 #define INTRA_MBCMP( mbcmp, size, pred1, pred2, pred3, chroma )\
-void intra_##mbcmp##_x3_##size##x##size##chroma##_altivec( uint8_t *fenc, uint8_t *fdec, int res[3] )\
+static void intra_##mbcmp##_x3_##size##x##size##chroma##_altivec( uint8_t *fenc, uint8_t *fdec, int res[3] )\
 {\
     x264_predict_##size##x##size##chroma##_##pred1##_c( fdec );\
     res[0] = pixel_##mbcmp##_##size##x##size##_altivec( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
@@ -1832,13 +1622,13 @@
 
     pixf->intra_sa8d_x3_8x8   = intra_sa8d_x3_8x8_altivec;
 
-    pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_altivec;
-    pixf->var[PIXEL_8x8]   = x264_pixel_var_8x8_altivec;
+    pixf->var[PIXEL_16x16] = pixel_var_16x16_altivec;
+    pixf->var[PIXEL_8x8]   = pixel_var_8x8_altivec;
 
-    pixf->hadamard_ac[PIXEL_16x16] = x264_pixel_hadamard_ac_16x16_altivec;
-    pixf->hadamard_ac[PIXEL_16x8]  = x264_pixel_hadamard_ac_16x8_altivec;
-    pixf->hadamard_ac[PIXEL_8x16]  = x264_pixel_hadamard_ac_8x16_altivec;
-    pixf->hadamard_ac[PIXEL_8x8]   = x264_pixel_hadamard_ac_8x8_altivec;
+    pixf->hadamard_ac[PIXEL_16x16] = pixel_hadamard_ac_16x16_altivec;
+    pixf->hadamard_ac[PIXEL_16x8]  = pixel_hadamard_ac_16x8_altivec;
+    pixf->hadamard_ac[PIXEL_8x16]  = pixel_hadamard_ac_8x16_altivec;
+    pixf->hadamard_ac[PIXEL_8x8]   = pixel_hadamard_ac_8x8_altivec;
 
     pixf->ssim_4x4x2_core = ssim_4x4x2_core_altivec;
 #endif // !HIGH_BIT_DEPTH
diff -Nru x264-0.152.2854+gite9a5903/common/ppc/pixel.h x264-0.158.2988+git-20191101.7817004/common/ppc/pixel.h
--- x264-0.152.2854+gite9a5903/common/ppc/pixel.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/ppc/pixel.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.h: ppc pixel metrics
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Eric Petit <eric.petit@lapsus.org>
  *
@@ -26,6 +26,7 @@
 #ifndef X264_PPC_PIXEL_H
 #define X264_PPC_PIXEL_H
 
+#define x264_pixel_init_altivec x264_template(pixel_init_altivec)
 void x264_pixel_init_altivec( x264_pixel_function_t *pixf );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/ppc/ppccommon.h x264-0.158.2988+git-20191101.7817004/common/ppc/ppccommon.h
--- x264-0.152.2854+gite9a5903/common/ppc/ppccommon.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/ppc/ppccommon.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * ppccommon.h: ppc utility macros
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Eric Petit <eric.petit@lapsus.org>
  *
@@ -146,19 +146,14 @@
 #define vec_s32_to_u16(v) vec_packsu( v, zero_s32v )
 
 /***********************************************************************
- * PREP_STORE##n: declares required vectors to store n bytes to a
- *                potentially unaligned address
  * VEC_STORE##n:  stores n bytes from vector v to address p
  **********************************************************************/
-#define PREP_STORE8                                                    \
-    vec_u8_t _tmp3v;                                                   \
-    vec_u8_t mask = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,  \
-                      0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F } \
-
-#define VEC_STORE8( v, p )           \
-    _tmp3v = vec_vsx_ld( 0, p );     \
-    v = vec_perm( v, _tmp3v, mask ); \
-    vec_vsx_st( v, 0, p )
+#ifndef __POWER9_VECTOR__
+#define VEC_STORE8( v, p ) \
+    vec_vsx_st( vec_xxpermdi( v, vec_vsx_ld( 0, p ), 1 ), 0, p )
+#else
+#define VEC_STORE8( v, p ) vec_xst_len( v, p, 8 )
+#endif
 
 /***********************************************************************
  * VEC_TRANSPOSE_8
@@ -304,3 +299,38 @@
         vec_st(vec_perm(_e, _v, _m), off, _dst);           \
     } while( 0 )
 #endif
+
+#ifndef __POWER9_VECTOR__
+#define vec_absd( a, b ) vec_sub( vec_max( a, b ), vec_min( a, b ) )
+#endif
+
+// vec_xxpermdi is quite useful but some version of clang do not expose it
+#if !HAVE_VSX || (defined(__clang__) && __clang_major__ < 6)
+static const vec_u8_t xxpermdi0_perm = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+                                         0x06, 0x07, 0x10, 0x11, 0x12, 0x13,
+                                         0x14, 0x15, 0x16, 0x17 };
+static const vec_u8_t xxpermdi1_perm = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+                                         0x06, 0x07, 0x18, 0x19, 0x1A, 0x1B,
+                                         0x1C, 0x1D, 0x1E, 0x1F };
+static const vec_u8_t xxpermdi2_perm = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
+                                         0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13,
+                                         0x14, 0x15, 0x16, 0x17 };
+static const vec_u8_t xxpermdi3_perm = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
+                                         0x0E, 0x0F, 0x18, 0x19, 0x1A, 0x1B,
+                                         0x1C, 0x1D, 0x1E, 0x1F };
+#define xxpermdi(a, b, c) vec_perm(a, b, xxpermdi##c##_perm)
+#elif (defined(__GNUC__) && (__GNUC__ > 6 || (__GNUC__ == 6 && __GNUC_MINOR__ >= 3))) || \
+      (defined(__clang__) && __clang_major__ >= 7)
+#define xxpermdi(a, b, c) vec_xxpermdi(a, b, c)
+#endif
+
+// vec_xxpermdi has its endianness bias exposed in early gcc and clang
+#ifdef WORDS_BIGENDIAN
+#ifndef xxpermdi
+#define xxpermdi(a, b, c) vec_xxpermdi(a, b, c)
+#endif
+#else
+#ifndef xxpermdi
+#define xxpermdi(a, b, c) vec_xxpermdi(b, a, ((c >> 1) | (c & 1) << 1) ^ 3)
+#endif
+#endif
diff -Nru x264-0.152.2854+gite9a5903/common/ppc/predict.c x264-0.158.2988+git-20191101.7817004/common/ppc/predict.c
--- x264-0.152.2854+gite9a5903/common/ppc/predict.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/ppc/predict.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.c: ppc intra prediction
  *****************************************************************************
- * Copyright (C) 2007-2017 x264 project
+ * Copyright (C) 2007-2019 x264 project
  *
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
  *
@@ -24,9 +24,9 @@
  *****************************************************************************/
 
 #include "common/common.h"
+#include "ppccommon.h"
 #include "predict.h"
 #include "pixel.h"
-#include "ppccommon.h"
 
 #if !HIGH_BIT_DEPTH
 static void predict_8x8c_p_altivec( uint8_t *src )
@@ -58,8 +58,6 @@
     vec_s16_t induc_v  = (vec_s16_t) CV(0, 1, 2, 3, 4, 5, 6, 7);
     vec_s16_t add_i0_b_0v = vec_mladd(induc_v, b_v, i00_v);
 
-    PREP_STORE8;
-
     for( int i = 0; i < 8; ++i )
     {
         vec_s16_t shift_0_v = vec_sra(add_i0_b_0v, val5_v);
@@ -67,7 +65,6 @@
         VEC_STORE8(com_sat_v, &src[0]);
         src += FDEC_STRIDE;
         add_i0_b_0v = vec_adds(add_i0_b_0v, c_v);
-
     }
 }
 
diff -Nru x264-0.152.2854+gite9a5903/common/ppc/predict.h x264-0.158.2988+git-20191101.7817004/common/ppc/predict.h
--- x264-0.152.2854+gite9a5903/common/ppc/predict.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/ppc/predict.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.h: ppc intra prediction
  *****************************************************************************
- * Copyright (C) 2007-2017 x264 project
+ * Copyright (C) 2007-2019 x264 project
  *
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
  *
@@ -26,7 +26,9 @@
 #ifndef X264_PPC_PREDICT_H
 #define X264_PPC_PREDICT_H
 
+#define x264_predict_16x16_init_altivec x264_template(predict_16x16_init_altivec)
 void x264_predict_16x16_init_altivec( x264_predict_t pf[7] );
+#define x264_predict_8x8c_init_altivec x264_template(predict_8x8c_init_altivec)
 void x264_predict_8x8c_init_altivec( x264_predict_t pf[7] );
 
 #endif /* X264_PPC_PREDICT_H */
diff -Nru x264-0.152.2854+gite9a5903/common/ppc/quant.c x264-0.158.2988+git-20191101.7817004/common/ppc/quant.c
--- x264-0.152.2854+gite9a5903/common/ppc/quant.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/ppc/quant.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * quant.c: ppc quantization
  *****************************************************************************
- * Copyright (C) 2007-2017 x264 project
+ * Copyright (C) 2007-2019 x264 project
  *
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
  *
@@ -39,8 +39,8 @@
     biasvB = vec_ld((idx1), bias);                                  \
     mskA = vec_cmplt(temp1v, zero_s16v);                            \
     mskB = vec_cmplt(temp2v, zero_s16v);                            \
-    coefvA = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp1v), temp1v);\
-    coefvB = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp2v), temp2v);\
+    coefvA = (vec_u16_t)vec_abs( temp1v );                          \
+    coefvB = (vec_u16_t)vec_abs( temp2v );                          \
     coefvA = vec_adds(coefvA, biasvA);                              \
     coefvB = vec_adds(coefvB, biasvB);                              \
     multEvenvA = vec_mule(coefvA, mfvA);                            \
@@ -51,8 +51,12 @@
     multOddvA = vec_sr(multOddvA, i_qbitsv);                        \
     multEvenvB = vec_sr(multEvenvB, i_qbitsv);                      \
     multOddvB = vec_sr(multOddvB, i_qbitsv);                        \
-    temp1v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvA, multOddvA), vec_mergel(multEvenvA, multOddvA)); \
-    temp2v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvB, multOddvB), vec_mergel(multEvenvB, multOddvB)); \
+    temp1v = (vec_s16_t) vec_packs( multEvenvA, multOddvA );        \
+    tmpv = xxpermdi( temp1v, temp1v, 2 );                           \
+    temp1v = vec_mergeh( temp1v, tmpv );                            \
+    temp2v = (vec_s16_t) vec_packs( multEvenvB, multOddvB );        \
+    tmpv = xxpermdi( temp2v, temp2v, 2 );                           \
+    temp2v = vec_mergeh( temp2v, tmpv );                            \
     temp1v = vec_xor(temp1v, mskA);                                 \
     temp2v = vec_xor(temp2v, mskB);                                 \
     temp1v = vec_adds(temp1v, vec_and(mskA, one));                  \
@@ -66,7 +70,7 @@
 {
     LOAD_ZERO;
     vector bool short mskA;
-    vec_u32_t i_qbitsv;
+    vec_u32_t i_qbitsv = vec_splats( (uint32_t)16 );
     vec_u16_t coefvA;
     vec_u32_t multEvenvA, multOddvA;
     vec_u16_t mfvA;
@@ -80,16 +84,214 @@
     vec_u16_t mfvB;
     vec_u16_t biasvB;
 
-    vec_s16_t temp1v, temp2v;
-
-    vec_u32_u qbits_u;
-    qbits_u.s[0]=16;
-    i_qbitsv = vec_splat(qbits_u.v, 0);
+    vec_s16_t temp1v, temp2v, tmpv;
 
     QUANT_16_U( 0, 16 );
     return vec_any_ne(nz, zero_s16v);
 }
 
+int x264_quant_4x4x4_altivec( dctcoef dcta[4][16], udctcoef mf[16], udctcoef bias[16] )
+{
+    LOAD_ZERO;
+    vec_u32_t i_qbitsv = vec_splats( (uint32_t)16 );
+    vec_s16_t one = vec_splat_s16( 1 );
+    vec_s16_t nz0, nz1, nz2, nz3;
+
+    vector bool short mskA0;
+    vec_u16_t coefvA0;
+    vec_u32_t multEvenvA0, multOddvA0;
+    vec_u16_t mfvA0;
+    vec_u16_t biasvA0;
+    vector bool short mskB0;
+    vec_u16_t coefvB0;
+    vec_u32_t multEvenvB0, multOddvB0;
+    vec_u16_t mfvB0;
+    vec_u16_t biasvB0;
+
+    vector bool short mskA1;
+    vec_u16_t coefvA1;
+    vec_u32_t multEvenvA1, multOddvA1;
+    vec_u16_t mfvA1;
+    vec_u16_t biasvA1;
+    vector bool short mskB1;
+    vec_u16_t coefvB1;
+    vec_u32_t multEvenvB1, multOddvB1;
+    vec_u16_t mfvB1;
+    vec_u16_t biasvB1;
+
+    vector bool short mskA2;
+    vec_u16_t coefvA2;
+    vec_u32_t multEvenvA2, multOddvA2;
+    vec_u16_t mfvA2;
+    vec_u16_t biasvA2;
+    vector bool short mskB2;
+    vec_u16_t coefvB2;
+    vec_u32_t multEvenvB2, multOddvB2;
+    vec_u16_t mfvB2;
+    vec_u16_t biasvB2;
+
+    vector bool short mskA3;
+    vec_u16_t coefvA3;
+    vec_u32_t multEvenvA3, multOddvA3;
+    vec_u16_t mfvA3;
+    vec_u16_t biasvA3;
+    vector bool short mskB3;
+    vec_u16_t coefvB3;
+    vec_u32_t multEvenvB3, multOddvB3;
+    vec_u16_t mfvB3;
+    vec_u16_t biasvB3;
+
+    vec_s16_t temp1v, temp2v;
+    vec_s16_t tmpv0;
+    vec_s16_t tmpv1;
+
+    dctcoef *dct0 = dcta[0];
+    dctcoef *dct1 = dcta[1];
+    dctcoef *dct2 = dcta[2];
+    dctcoef *dct3 = dcta[3];
+
+    temp1v = vec_ld( 0,  dct0 );
+    temp2v = vec_ld( 16, dct0 );
+    mfvA0 = vec_ld( 0,  mf );
+    mfvB0 = vec_ld( 16, mf );
+    biasvA0 = vec_ld( 0,  bias );
+    biasvB0 = vec_ld( 16, bias );
+    mskA0 = vec_cmplt( temp1v, zero_s16v );
+    mskB0 = vec_cmplt( temp2v, zero_s16v );
+    coefvA0 = (vec_u16_t)vec_abs( temp1v );
+    coefvB0 = (vec_u16_t)vec_abs( temp2v );
+    temp1v = vec_ld( 0,  dct1 );
+    temp2v = vec_ld( 16, dct1 );
+    mfvA1 = vec_ld( 0,  mf );
+    mfvB1 = vec_ld( 16, mf );
+    biasvA1 = vec_ld( 0,  bias );
+    biasvB1 = vec_ld( 16, bias );
+    mskA1 = vec_cmplt( temp1v, zero_s16v );
+    mskB1 = vec_cmplt( temp2v, zero_s16v );
+    coefvA1 = (vec_u16_t)vec_abs( temp1v );
+    coefvB1 = (vec_u16_t)vec_abs( temp2v );
+    temp1v = vec_ld( 0,  dct2 );
+    temp2v = vec_ld( 16, dct2 );
+    mfvA2 = vec_ld( 0,  mf );
+    mfvB2 = vec_ld( 16, mf );
+    biasvA2 = vec_ld( 0,  bias );
+    biasvB2 = vec_ld( 16, bias );
+    mskA2 = vec_cmplt( temp1v, zero_s16v );
+    mskB2 = vec_cmplt( temp2v, zero_s16v );
+    coefvA2 = (vec_u16_t)vec_abs( temp1v );
+    coefvB2 = (vec_u16_t)vec_abs( temp2v );
+    temp1v = vec_ld( 0,  dct3 );
+    temp2v = vec_ld( 16, dct3 );
+    mfvA3 = vec_ld( 0,  mf );
+    mfvB3 = vec_ld( 16, mf );
+    biasvA3 = vec_ld( 0,  bias );
+    biasvB3 = vec_ld( 16, bias );
+    mskA3 = vec_cmplt( temp1v, zero_s16v );
+    mskB3 = vec_cmplt( temp2v, zero_s16v );
+    coefvA3 = (vec_u16_t)vec_abs( temp1v );
+    coefvB3 = (vec_u16_t)vec_abs( temp2v );
+
+    coefvA0 = vec_adds( coefvA0, biasvA0 );
+    coefvB0 = vec_adds( coefvB0, biasvB0 );
+    coefvA1 = vec_adds( coefvA1, biasvA1 );
+    coefvB1 = vec_adds( coefvB1, biasvB1 );
+    coefvA2 = vec_adds( coefvA2, biasvA2 );
+    coefvB2 = vec_adds( coefvB2, biasvB2 );
+    coefvA3 = vec_adds( coefvA3, biasvA3 );
+    coefvB3 = vec_adds( coefvB3, biasvB3 );
+
+    multEvenvA0 = vec_mule( coefvA0, mfvA0 );
+    multOddvA0  = vec_mulo( coefvA0, mfvA0 );
+    multEvenvB0 = vec_mule( coefvB0, mfvB0 );
+    multOddvB0  = vec_mulo( coefvB0, mfvB0 );
+    multEvenvA0 = vec_sr( multEvenvA0, i_qbitsv );
+    multOddvA0  = vec_sr( multOddvA0,  i_qbitsv );
+    multEvenvB0 = vec_sr( multEvenvB0, i_qbitsv );
+    multOddvB0  = vec_sr( multOddvB0,  i_qbitsv );
+    temp1v = (vec_s16_t)vec_packs( multEvenvA0, multOddvA0 );
+    temp2v = (vec_s16_t)vec_packs( multEvenvB0, multOddvB0 );
+    tmpv0 = xxpermdi( temp1v, temp1v, 2 );
+    tmpv1 = xxpermdi( temp2v, temp2v, 2 );
+    temp1v = vec_mergeh( temp1v, tmpv0 );
+    temp2v = vec_mergeh( temp2v, tmpv1 );
+    temp1v = vec_xor( temp1v, mskA0 );
+    temp2v = vec_xor( temp2v, mskB0 );
+    temp1v = vec_adds( temp1v, vec_and( mskA0, one ) );
+    temp2v = vec_adds( temp2v, vec_and( mskB0, one ) );
+    vec_st( temp1v, 0,  dct0 );
+    vec_st( temp2v, 16, dct0 );
+    nz0 = vec_or( temp1v, temp2v );
+
+    multEvenvA1 = vec_mule( coefvA1, mfvA1 );
+    multOddvA1  = vec_mulo( coefvA1, mfvA1 );
+    multEvenvB1 = vec_mule( coefvB1, mfvB1 );
+    multOddvB1  = vec_mulo( coefvB1, mfvB1 );
+    multEvenvA1 = vec_sr( multEvenvA1, i_qbitsv );
+    multOddvA1  = vec_sr( multOddvA1,  i_qbitsv );
+    multEvenvB1 = vec_sr( multEvenvB1, i_qbitsv );
+    multOddvB1  = vec_sr( multOddvB1,  i_qbitsv );
+    temp1v = (vec_s16_t)vec_packs( multEvenvA1, multOddvA1 );
+    temp2v = (vec_s16_t)vec_packs( multEvenvB1, multOddvB1 );
+    tmpv0 = xxpermdi( temp1v, temp1v, 2 );
+    tmpv1 = xxpermdi( temp2v, temp2v, 2 );
+    temp1v = vec_mergeh( temp1v, tmpv0 );
+    temp2v = vec_mergeh( temp2v, tmpv1 );
+    temp1v = vec_xor( temp1v, mskA1 );
+    temp2v = vec_xor( temp2v, mskB1 );
+    temp1v = vec_adds( temp1v, vec_and( mskA1, one ) );
+    temp2v = vec_adds( temp2v, vec_and( mskB1, one ) );
+    vec_st( temp1v, 0,  dct1 );
+    vec_st( temp2v, 16, dct1 );
+    nz1 = vec_or( temp1v, temp2v );
+
+    multEvenvA2 = vec_mule( coefvA2, mfvA2 );
+    multOddvA2  = vec_mulo( coefvA2, mfvA2 );
+    multEvenvB2 = vec_mule( coefvB2, mfvB2 );
+    multOddvB2  = vec_mulo( coefvB2, mfvB2 );
+    multEvenvA2 = vec_sr( multEvenvA2, i_qbitsv );
+    multOddvA2  = vec_sr( multOddvA2,  i_qbitsv );
+    multEvenvB2 = vec_sr( multEvenvB2, i_qbitsv );
+    multOddvB2  = vec_sr( multOddvB2,  i_qbitsv );
+    temp1v = (vec_s16_t)vec_packs( multEvenvA2, multOddvA2 );
+    temp2v = (vec_s16_t)vec_packs( multEvenvB2, multOddvB2 );
+    tmpv0 = xxpermdi( temp1v, temp1v, 2 );
+    tmpv1 = xxpermdi( temp2v, temp2v, 2 );
+    temp1v = vec_mergeh( temp1v, tmpv0 );
+    temp2v = vec_mergeh( temp2v, tmpv1 );
+    temp1v = vec_xor( temp1v, mskA2 );
+    temp2v = vec_xor( temp2v, mskB2 );
+    temp1v = vec_adds( temp1v, vec_and( mskA2, one ) );
+    temp2v = vec_adds( temp2v, vec_and( mskB2, one ) );
+    vec_st( temp1v, 0,  dct2 );
+    vec_st( temp2v, 16, dct2 );
+    nz2 = vec_or( temp1v, temp2v );
+
+    multEvenvA3 = vec_mule( coefvA3, mfvA3 );
+    multOddvA3  = vec_mulo( coefvA3, mfvA3 );
+    multEvenvB3 = vec_mule( coefvB3, mfvB3 );
+    multOddvB3  = vec_mulo( coefvB3, mfvB3 );
+    multEvenvA3 = vec_sr( multEvenvA3, i_qbitsv );
+    multOddvA3  = vec_sr( multOddvA3,  i_qbitsv );
+    multEvenvB3 = vec_sr( multEvenvB3, i_qbitsv );
+    multOddvB3  = vec_sr( multOddvB3,  i_qbitsv );
+    temp1v = (vec_s16_t)vec_packs( multEvenvA3, multOddvA3 );
+    temp2v = (vec_s16_t)vec_packs( multEvenvB3, multOddvB3 );
+    tmpv0 = xxpermdi( temp1v, temp1v, 2 );
+    tmpv1 = xxpermdi( temp2v, temp2v, 2 );
+    temp1v = vec_mergeh( temp1v, tmpv0 );
+    temp2v = vec_mergeh( temp2v, tmpv1 );
+    temp1v = vec_xor( temp1v, mskA3 );
+    temp2v = vec_xor( temp2v, mskB3 );
+    temp1v = vec_adds( temp1v, vec_and( mskA3, one ) );
+    temp2v = vec_adds( temp2v, vec_and( mskB3, one ) );
+    vec_st( temp1v, 0,  dct3 );
+    vec_st( temp2v, 16, dct3 );
+    nz3 = vec_or( temp1v, temp2v );
+
+    return (vec_any_ne( nz0, zero_s16v ) << 0) | (vec_any_ne( nz1, zero_s16v ) << 1) |
+           (vec_any_ne( nz2, zero_s16v ) << 2) | (vec_any_ne( nz3, zero_s16v ) << 3);
+}
+
 // DC quant of a whole 4x4 block, unrolled 2x and "pre-scheduled"
 #define QUANT_16_U_DC( idx0, idx1 )                                 \
 {                                                                   \
@@ -139,17 +341,9 @@
     vec_u16_t mfv;
     vec_u16_t biasv;
 
-    vec_u16_u mf_u;
-    mf_u.s[0]=mf;
-    mfv = vec_splat( mf_u.v, 0 );
-
-    vec_u32_u qbits_u;
-    qbits_u.s[0]=16;
-    i_qbitsv = vec_splat(qbits_u.v, 0);
-
-    vec_u16_u bias_u;
-    bias_u.s[0]=bias;
-    biasv = vec_splat(bias_u.v, 0);
+    mfv = vec_splats( (uint16_t)mf );
+    i_qbitsv = vec_splats( (uint32_t) 16 );
+    biasv = vec_splats( (uint16_t)bias );
 
     QUANT_16_U_DC( 0, 16 );
     return vec_any_ne(nz, zero_s16v);
@@ -184,25 +378,17 @@
     vec_u32_t multEvenvA, multOddvA;
     vec_s16_t one = vec_splat_s16(1);
     vec_s16_t nz = zero_s16v;
+    static const vec_s16_t mask2 = CV(-1, -1, -1, -1,  0, 0, 0, 0);
 
     vec_s16_t temp1v, temp2v;
 
     vec_u16_t mfv;
     vec_u16_t biasv;
 
-    vec_u16_u mf_u;
-    mf_u.s[0]=mf;
-    mfv = vec_splat( mf_u.v, 0 );
-
-    vec_u32_u qbits_u;
-    qbits_u.s[0]=16;
-    i_qbitsv = vec_splat(qbits_u.v, 0);
-
-    vec_u16_u bias_u;
-    bias_u.s[0]=bias;
-    biasv = vec_splat(bias_u.v, 0);
+    mfv = vec_splats( (uint16_t)mf );
+    i_qbitsv = vec_splats( (uint32_t) 16 );
+    biasv = vec_splats( (uint16_t)bias );
 
-    static const vec_s16_t mask2 = CV(-1, -1, -1, -1,  0, 0, 0, 0);
     QUANT_4_U_DC(0);
     return vec_any_ne(vec_and(nz, mask2), zero_s16v);
 }
@@ -225,11 +411,9 @@
     vec_u16_t mfvB;
     vec_u16_t biasvB;
 
-    vec_s16_t temp1v, temp2v;
+    vec_s16_t temp1v, temp2v, tmpv;
 
-    vec_u32_u qbits_u;
-    qbits_u.s[0]=16;
-    i_qbitsv = vec_splat(qbits_u.v, 0);
+    i_qbitsv = vec_splats( (uint32_t)16 );
 
     for( int i = 0; i < 4; i++ )
         QUANT_16_U( i*2*16, i*2*16+16 );
@@ -245,8 +429,9 @@
                                                                      \
     multEvenvA = vec_mule(dctv, mfv);                                \
     multOddvA = vec_mulo(dctv, mfv);                                 \
-    dctv = (vec_s16_t) vec_packs(vec_mergeh(multEvenvA, multOddvA),  \
-                                 vec_mergel(multEvenvA, multOddvA)); \
+    dctv = (vec_s16_t) vec_packs( multEvenvA, multOddvA );           \
+    tmpv = xxpermdi( dctv, dctv, 2 );                                \
+    dctv = vec_mergeh( dctv, tmpv );                                 \
     dctv = vec_sl(dctv, i_qbitsv);                                   \
     vec_st(dctv, 8*y, dct);                                          \
 }
@@ -288,7 +473,7 @@
     int i_mf = i_qp%6;
     int i_qbits = i_qp/6 - 4;
 
-    vec_s16_t dctv;
+    vec_s16_t dctv, tmpv;
     vec_s16_t dct1v, dct2v;
     vec_s32_t mf1v, mf2v;
     vec_s16_t mfv;
@@ -298,9 +483,7 @@
     if( i_qbits >= 0 )
     {
         vec_u16_t i_qbitsv;
-        vec_u16_u qbits_u;
-        qbits_u.s[0]=i_qbits;
-        i_qbitsv = vec_splat(qbits_u.v, 0);
+        i_qbitsv = vec_splats( (uint16_t) i_qbits );
 
         for( int y = 0; y < 4; y+=2 )
             DEQUANT_SHL();
@@ -310,19 +493,13 @@
         const int f = 1 << (-i_qbits-1);
 
         vec_s32_t fv;
-        vec_u32_u f_u;
-        f_u.s[0]=f;
-        fv = (vec_s32_t)vec_splat(f_u.v, 0);
+        fv = vec_splats( f );
 
         vec_u32_t i_qbitsv;
-        vec_u32_u qbits_u;
-        qbits_u.s[0]=-i_qbits;
-        i_qbitsv = vec_splat(qbits_u.v, 0);
+        i_qbitsv = vec_splats( (uint32_t)-i_qbits );
 
         vec_u32_t sixteenv;
-        vec_u32_u sixteen_u;
-        sixteen_u.s[0]=16;
-        sixteenv = vec_splat(sixteen_u.v, 0);
+        sixteenv = vec_splats( (uint32_t)16 );
 
         for( int y = 0; y < 4; y+=2 )
             DEQUANT_SHR();
@@ -334,7 +511,7 @@
     int i_mf = i_qp%6;
     int i_qbits = i_qp/6 - 6;
 
-    vec_s16_t dctv;
+    vec_s16_t dctv, tmpv;
     vec_s16_t dct1v, dct2v;
     vec_s32_t mf1v, mf2v;
     vec_s16_t mfv;
@@ -344,9 +521,7 @@
     if( i_qbits >= 0 )
     {
         vec_u16_t i_qbitsv;
-        vec_u16_u qbits_u;
-        qbits_u.s[0]=i_qbits;
-        i_qbitsv = vec_splat(qbits_u.v, 0);
+        i_qbitsv = vec_splats((uint16_t)i_qbits );
 
         for( int y = 0; y < 16; y+=2 )
             DEQUANT_SHL();
@@ -356,19 +531,13 @@
         const int f = 1 << (-i_qbits-1);
 
         vec_s32_t fv;
-        vec_u32_u f_u;
-        f_u.s[0]=f;
-        fv = (vec_s32_t)vec_splat(f_u.v, 0);
+        fv = vec_splats( f );
 
         vec_u32_t i_qbitsv;
-        vec_u32_u qbits_u;
-        qbits_u.s[0]=-i_qbits;
-        i_qbitsv = vec_splat(qbits_u.v, 0);
+        i_qbitsv = vec_splats( (uint32_t)-i_qbits );
 
         vec_u32_t sixteenv;
-        vec_u32_u sixteen_u;
-        sixteen_u.s[0]=16;
-        sixteenv = vec_splat(sixteen_u.v, 0);
+        sixteenv = vec_splats( (uint32_t)16 );
 
         for( int y = 0; y < 16; y+=2 )
             DEQUANT_SHR();
diff -Nru x264-0.152.2854+gite9a5903/common/ppc/quant.h x264-0.158.2988+git-20191101.7817004/common/ppc/quant.h
--- x264-0.152.2854+gite9a5903/common/ppc/quant.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/ppc/quant.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * quant.h: ppc quantization
  *****************************************************************************
- * Copyright (C) 2007-2017 x264 project
+ * Copyright (C) 2007-2019 x264 project
  *
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
  *
@@ -26,12 +26,21 @@
 #ifndef X264_PPC_QUANT_H
 #define X264_PPC_QUANT_H
 
+#define x264_quant_4x4x4_altivec x264_template(quant_4x4x4_altivec)
+int x264_quant_4x4x4_altivec( int16_t dct[4][16], uint16_t mf[16], uint16_t bias[16] );
+#define x264_quant_4x4_altivec x264_template(quant_4x4_altivec)
 int x264_quant_4x4_altivec( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] );
+#define x264_quant_8x8_altivec x264_template(quant_8x8_altivec)
 int x264_quant_8x8_altivec( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] );
 
+#define x264_quant_4x4_dc_altivec x264_template(quant_4x4_dc_altivec)
 int x264_quant_4x4_dc_altivec( int16_t dct[16], int mf, int bias );
+#define x264_quant_2x2_dc_altivec x264_template(quant_2x2_dc_altivec)
 int x264_quant_2x2_dc_altivec( int16_t dct[4], int mf, int bias );
 
+#define x264_dequant_4x4_altivec x264_template(dequant_4x4_altivec)
 void x264_dequant_4x4_altivec( int16_t dct[16], int dequant_mf[6][16], int i_qp );
+#define x264_dequant_8x8_altivec x264_template(dequant_8x8_altivec)
 void x264_dequant_8x8_altivec( int16_t dct[64], int dequant_mf[6][64], int i_qp );
+
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/predict.c x264-0.158.2988+git-20191101.7817004/common/predict.c
--- x264-0.152.2854+gite9a5903/common/predict.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/predict.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.c: intra prediction
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -34,16 +34,16 @@
 #if HAVE_MMX
 #   include "x86/predict.h"
 #endif
-#if ARCH_PPC
+#if HAVE_ALTIVEC
 #   include "ppc/predict.h"
 #endif
-#if ARCH_ARM
+#if HAVE_ARMV6
 #   include "arm/predict.h"
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
 #   include "aarch64/predict.h"
 #endif
-#if ARCH_MIPS
+#if HAVE_MSA
 #   include "mips/predict.h"
 #endif
 
@@ -74,7 +74,7 @@
 
     PREDICT_16x16_DC( dcsplat );
 }
-static void x264_predict_16x16_dc_left_c( pixel *src )
+static void predict_16x16_dc_left_c( pixel *src )
 {
     int dc = 0;
 
@@ -84,7 +84,7 @@
 
     PREDICT_16x16_DC( dcsplat );
 }
-static void x264_predict_16x16_dc_top_c( pixel *src )
+static void predict_16x16_dc_top_c( pixel *src )
 {
     int dc = 0;
 
@@ -94,7 +94,7 @@
 
     PREDICT_16x16_DC( dcsplat );
 }
-static void x264_predict_16x16_dc_128_c( pixel *src )
+static void predict_16x16_dc_128_c( pixel *src )
 {
     PREDICT_16x16_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) );
 }
@@ -161,7 +161,7 @@
  * 8x8 prediction for intra chroma block (4:2:0)
  ****************************************************************************/
 
-static void x264_predict_8x8c_dc_128_c( pixel *src )
+static void predict_8x8c_dc_128_c( pixel *src )
 {
     for( int y = 0; y < 8; y++ )
     {
@@ -170,7 +170,7 @@
         src += FDEC_STRIDE;
     }
 }
-static void x264_predict_8x8c_dc_left_c( pixel *src )
+static void predict_8x8c_dc_left_c( pixel *src )
 {
     int dc0 = 0, dc1 = 0;
 
@@ -196,7 +196,7 @@
     }
 
 }
-static void x264_predict_8x8c_dc_top_c( pixel *src )
+static void predict_8x8c_dc_top_c( pixel *src )
 {
     int dc0 = 0, dc1 = 0;
 
@@ -308,7 +308,7 @@
  * 8x16 prediction for intra chroma block (4:2:2)
  ****************************************************************************/
 
-static void x264_predict_8x16c_dc_128_c( pixel *src )
+static void predict_8x16c_dc_128_c( pixel *src )
 {
     for( int y = 0; y < 16; y++ )
     {
@@ -317,7 +317,7 @@
         src += FDEC_STRIDE;
     }
 }
-static void x264_predict_8x16c_dc_left_c( pixel *src )
+static void predict_8x16c_dc_left_c( pixel *src )
 {
     for( int i = 0; i < 4; i++ )
     {
@@ -336,7 +336,7 @@
         }
     }
 }
-static void x264_predict_8x16c_dc_top_c( pixel *src )
+static void predict_8x16c_dc_top_c( pixel *src )
 {
     int dc0 = 0, dc1 = 0;
 
@@ -475,16 +475,16 @@
 #define PREDICT_4x4_DC(v)\
     SRC_X4(0,0) = SRC_X4(0,1) = SRC_X4(0,2) = SRC_X4(0,3) = v;
 
-static void x264_predict_4x4_dc_128_c( pixel *src )
+static void predict_4x4_dc_128_c( pixel *src )
 {
     PREDICT_4x4_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) );
 }
-static void x264_predict_4x4_dc_left_c( pixel *src )
+static void predict_4x4_dc_left_c( pixel *src )
 {
     pixel4 dc = PIXEL_SPLAT_X4( (SRC(-1,0) + SRC(-1,1) + SRC(-1,2) + SRC(-1,3) + 2) >> 2 );
     PREDICT_4x4_DC( dc );
 }
-static void x264_predict_4x4_dc_top_c( pixel *src )
+static void predict_4x4_dc_top_c( pixel *src )
 {
     pixel4 dc = PIXEL_SPLAT_X4( (SRC(0,-1) + SRC(1,-1) + SRC(2,-1) + SRC(3,-1) + 2) >> 2 );
     PREDICT_4x4_DC( dc );
@@ -528,7 +528,7 @@
 #define F1(a,b)   (((a)+(b)+1)>>1)
 #define F2(a,b,c) (((a)+2*(b)+(c)+2)>>2)
 
-static void x264_predict_4x4_ddl_c( pixel *src )
+static void predict_4x4_ddl_c( pixel *src )
 {
     PREDICT_4x4_LOAD_TOP
     PREDICT_4x4_LOAD_TOP_RIGHT
@@ -540,7 +540,7 @@
     SRC(3,2)=SRC(2,3)= F2(t5,t6,t7);
     SRC(3,3)= F2(t6,t7,t7);
 }
-static void x264_predict_4x4_ddr_c( pixel *src )
+static void predict_4x4_ddr_c( pixel *src )
 {
     int lt = SRC(-1,-1);
     PREDICT_4x4_LOAD_LEFT
@@ -554,7 +554,7 @@
     SRC(0,3)= F2(l1,l2,l3);
 }
 
-static void x264_predict_4x4_vr_c( pixel *src )
+static void predict_4x4_vr_c( pixel *src )
 {
     int lt = SRC(-1,-1);
     PREDICT_4x4_LOAD_LEFT
@@ -571,7 +571,7 @@
     SRC(3,0)= F1(t2,t3);
 }
 
-static void x264_predict_4x4_hd_c( pixel *src )
+static void predict_4x4_hd_c( pixel *src )
 {
     int lt= SRC(-1,-1);
     PREDICT_4x4_LOAD_LEFT
@@ -588,7 +588,7 @@
     SRC(3,0)= F2(t2,t1,t0);
 }
 
-static void x264_predict_4x4_vl_c( pixel *src )
+static void predict_4x4_vl_c( pixel *src )
 {
     PREDICT_4x4_LOAD_TOP
     PREDICT_4x4_LOAD_TOP_RIGHT
@@ -604,7 +604,7 @@
     SRC(3,3)= F2(t4,t5,t6);
 }
 
-static void x264_predict_4x4_hu_c( pixel *src )
+static void predict_4x4_hu_c( pixel *src )
 {
     PREDICT_4x4_LOAD_LEFT
     SRC(0,0)= F1(l0,l1);
@@ -626,7 +626,7 @@
 #define PT(x) \
     edge[16+x] = F2(SRC(x-1,-1), SRC(x,-1), SRC(x+1,-1));
 
-static void x264_predict_8x8_filter_c( pixel *src, pixel edge[36], int i_neighbor, int i_filters )
+static void predict_8x8_filter_c( pixel *src, pixel edge[36], int i_neighbor, int i_filters )
 {
     /* edge[7..14] = l7..l0
      * edge[15] = lt
@@ -694,17 +694,17 @@
         src += FDEC_STRIDE; \
     }
 
-static void x264_predict_8x8_dc_128_c( pixel *src, pixel edge[36] )
+static void predict_8x8_dc_128_c( pixel *src, pixel edge[36] )
 {
     PREDICT_8x8_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) );
 }
-static void x264_predict_8x8_dc_left_c( pixel *src, pixel edge[36] )
+static void predict_8x8_dc_left_c( pixel *src, pixel edge[36] )
 {
     PREDICT_8x8_LOAD_LEFT
     pixel4 dc = PIXEL_SPLAT_X4( (l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3 );
     PREDICT_8x8_DC( dc );
 }
-static void x264_predict_8x8_dc_top_c( pixel *src, pixel edge[36] )
+static void predict_8x8_dc_top_c( pixel *src, pixel edge[36] )
 {
     PREDICT_8x8_LOAD_TOP
     pixel4 dc = PIXEL_SPLAT_X4( (t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3 );
@@ -735,7 +735,7 @@
         MPIXEL_X4( src+y*FDEC_STRIDE+4 ) = top[1];
     }
 }
-static void x264_predict_8x8_ddl_c( pixel *src, pixel edge[36] )
+static void predict_8x8_ddl_c( pixel *src, pixel edge[36] )
 {
     PREDICT_8x8_LOAD_TOP
     PREDICT_8x8_LOAD_TOPRIGHT
@@ -755,7 +755,7 @@
     SRC(6,7)=SRC(7,6)= F2(t13,t14,t15);
     SRC(7,7)= F2(t14,t15,t15);
 }
-static void x264_predict_8x8_ddr_c( pixel *src, pixel edge[36] )
+static void predict_8x8_ddr_c( pixel *src, pixel edge[36] )
 {
     PREDICT_8x8_LOAD_TOP
     PREDICT_8x8_LOAD_LEFT
@@ -777,7 +777,7 @@
     SRC(7,0)= F2(t5,t6,t7);
 
 }
-static void x264_predict_8x8_vr_c( pixel *src, pixel edge[36] )
+static void predict_8x8_vr_c( pixel *src, pixel edge[36] )
 {
     PREDICT_8x8_LOAD_TOP
     PREDICT_8x8_LOAD_LEFT
@@ -805,7 +805,7 @@
     SRC(7,1)= F2(t5,t6,t7);
     SRC(7,0)= F1(t6,t7);
 }
-static void x264_predict_8x8_hd_c( pixel *src, pixel edge[36] )
+static void predict_8x8_hd_c( pixel *src, pixel edge[36] )
 {
     PREDICT_8x8_LOAD_TOP
     PREDICT_8x8_LOAD_LEFT
@@ -832,7 +832,7 @@
     SRC_X4(4,1)= pack_pixel_2to4(p9,p10);
     SRC_X4(4,0)= pack_pixel_2to4(p10,p11);
 }
-static void x264_predict_8x8_vl_c( pixel *src, pixel edge[36] )
+static void predict_8x8_vl_c( pixel *src, pixel edge[36] )
 {
     PREDICT_8x8_LOAD_TOP
     PREDICT_8x8_LOAD_TOPRIGHT
@@ -859,7 +859,7 @@
     SRC(7,6)= F1(t10,t11);
     SRC(7,7)= F2(t10,t11,t12);
 }
-static void x264_predict_8x8_hu_c( pixel *src, pixel edge[36] )
+static void predict_8x8_hu_c( pixel *src, pixel edge[36] )
 {
     PREDICT_8x8_LOAD_LEFT
     int p1 = pack_pixel_1to2(F1(l0,l1), F2(l0,l1,l2));
@@ -889,9 +889,9 @@
     pf[I_PRED_16x16_H ]     = x264_predict_16x16_h_c;
     pf[I_PRED_16x16_DC]     = x264_predict_16x16_dc_c;
     pf[I_PRED_16x16_P ]     = x264_predict_16x16_p_c;
-    pf[I_PRED_16x16_DC_LEFT]= x264_predict_16x16_dc_left_c;
-    pf[I_PRED_16x16_DC_TOP ]= x264_predict_16x16_dc_top_c;
-    pf[I_PRED_16x16_DC_128 ]= x264_predict_16x16_dc_128_c;
+    pf[I_PRED_16x16_DC_LEFT]= predict_16x16_dc_left_c;
+    pf[I_PRED_16x16_DC_TOP ]= predict_16x16_dc_top_c;
+    pf[I_PRED_16x16_DC_128 ]= predict_16x16_dc_128_c;
 
 #if HAVE_MMX
     x264_predict_16x16_init_mmx( cpu, pf );
@@ -906,7 +906,7 @@
     x264_predict_16x16_init_arm( cpu, pf );
 #endif
 
-#if ARCH_AARCH64
+#if HAVE_AARCH64
     x264_predict_16x16_init_aarch64( cpu, pf );
 #endif
 
@@ -932,9 +932,9 @@
     pf[I_PRED_CHROMA_H ]     = x264_predict_8x8c_h_c;
     pf[I_PRED_CHROMA_DC]     = x264_predict_8x8c_dc_c;
     pf[I_PRED_CHROMA_P ]     = x264_predict_8x8c_p_c;
-    pf[I_PRED_CHROMA_DC_LEFT]= x264_predict_8x8c_dc_left_c;
-    pf[I_PRED_CHROMA_DC_TOP ]= x264_predict_8x8c_dc_top_c;
-    pf[I_PRED_CHROMA_DC_128 ]= x264_predict_8x8c_dc_128_c;
+    pf[I_PRED_CHROMA_DC_LEFT]= predict_8x8c_dc_left_c;
+    pf[I_PRED_CHROMA_DC_TOP ]= predict_8x8c_dc_top_c;
+    pf[I_PRED_CHROMA_DC_128 ]= predict_8x8c_dc_128_c;
 
 #if HAVE_MMX
     x264_predict_8x8c_init_mmx( cpu, pf );
@@ -949,7 +949,7 @@
     x264_predict_8x8c_init_arm( cpu, pf );
 #endif
 
-#if ARCH_AARCH64
+#if HAVE_AARCH64
     x264_predict_8x8c_init_aarch64( cpu, pf );
 #endif
 
@@ -969,9 +969,9 @@
     pf[I_PRED_CHROMA_H ]     = x264_predict_8x16c_h_c;
     pf[I_PRED_CHROMA_DC]     = x264_predict_8x16c_dc_c;
     pf[I_PRED_CHROMA_P ]     = x264_predict_8x16c_p_c;
-    pf[I_PRED_CHROMA_DC_LEFT]= x264_predict_8x16c_dc_left_c;
-    pf[I_PRED_CHROMA_DC_TOP ]= x264_predict_8x16c_dc_top_c;
-    pf[I_PRED_CHROMA_DC_128 ]= x264_predict_8x16c_dc_128_c;
+    pf[I_PRED_CHROMA_DC_LEFT]= predict_8x16c_dc_left_c;
+    pf[I_PRED_CHROMA_DC_TOP ]= predict_8x16c_dc_top_c;
+    pf[I_PRED_CHROMA_DC_128 ]= predict_8x16c_dc_128_c;
 
 #if HAVE_MMX
     x264_predict_8x16c_init_mmx( cpu, pf );
@@ -981,7 +981,7 @@
     x264_predict_8x16c_init_arm( cpu, pf );
 #endif
 
-#if ARCH_AARCH64
+#if HAVE_AARCH64
     x264_predict_8x16c_init_aarch64( cpu, pf );
 #endif
 }
@@ -991,16 +991,16 @@
     pf[I_PRED_8x8_V]      = x264_predict_8x8_v_c;
     pf[I_PRED_8x8_H]      = x264_predict_8x8_h_c;
     pf[I_PRED_8x8_DC]     = x264_predict_8x8_dc_c;
-    pf[I_PRED_8x8_DDL]    = x264_predict_8x8_ddl_c;
-    pf[I_PRED_8x8_DDR]    = x264_predict_8x8_ddr_c;
-    pf[I_PRED_8x8_VR]     = x264_predict_8x8_vr_c;
-    pf[I_PRED_8x8_HD]     = x264_predict_8x8_hd_c;
-    pf[I_PRED_8x8_VL]     = x264_predict_8x8_vl_c;
-    pf[I_PRED_8x8_HU]     = x264_predict_8x8_hu_c;
-    pf[I_PRED_8x8_DC_LEFT]= x264_predict_8x8_dc_left_c;
-    pf[I_PRED_8x8_DC_TOP] = x264_predict_8x8_dc_top_c;
-    pf[I_PRED_8x8_DC_128] = x264_predict_8x8_dc_128_c;
-    *predict_filter       = x264_predict_8x8_filter_c;
+    pf[I_PRED_8x8_DDL]    = predict_8x8_ddl_c;
+    pf[I_PRED_8x8_DDR]    = predict_8x8_ddr_c;
+    pf[I_PRED_8x8_VR]     = predict_8x8_vr_c;
+    pf[I_PRED_8x8_HD]     = predict_8x8_hd_c;
+    pf[I_PRED_8x8_VL]     = predict_8x8_vl_c;
+    pf[I_PRED_8x8_HU]     = predict_8x8_hu_c;
+    pf[I_PRED_8x8_DC_LEFT]= predict_8x8_dc_left_c;
+    pf[I_PRED_8x8_DC_TOP] = predict_8x8_dc_top_c;
+    pf[I_PRED_8x8_DC_128] = predict_8x8_dc_128_c;
+    *predict_filter       = predict_8x8_filter_c;
 
 #if HAVE_MMX
     x264_predict_8x8_init_mmx( cpu, pf, predict_filter );
@@ -1010,7 +1010,7 @@
     x264_predict_8x8_init_arm( cpu, pf, predict_filter );
 #endif
 
-#if ARCH_AARCH64
+#if HAVE_AARCH64
     x264_predict_8x8_init_aarch64( cpu, pf, predict_filter );
 #endif
 
@@ -1029,15 +1029,15 @@
     pf[I_PRED_4x4_V]      = x264_predict_4x4_v_c;
     pf[I_PRED_4x4_H]      = x264_predict_4x4_h_c;
     pf[I_PRED_4x4_DC]     = x264_predict_4x4_dc_c;
-    pf[I_PRED_4x4_DDL]    = x264_predict_4x4_ddl_c;
-    pf[I_PRED_4x4_DDR]    = x264_predict_4x4_ddr_c;
-    pf[I_PRED_4x4_VR]     = x264_predict_4x4_vr_c;
-    pf[I_PRED_4x4_HD]     = x264_predict_4x4_hd_c;
-    pf[I_PRED_4x4_VL]     = x264_predict_4x4_vl_c;
-    pf[I_PRED_4x4_HU]     = x264_predict_4x4_hu_c;
-    pf[I_PRED_4x4_DC_LEFT]= x264_predict_4x4_dc_left_c;
-    pf[I_PRED_4x4_DC_TOP] = x264_predict_4x4_dc_top_c;
-    pf[I_PRED_4x4_DC_128] = x264_predict_4x4_dc_128_c;
+    pf[I_PRED_4x4_DDL]    = predict_4x4_ddl_c;
+    pf[I_PRED_4x4_DDR]    = predict_4x4_ddr_c;
+    pf[I_PRED_4x4_VR]     = predict_4x4_vr_c;
+    pf[I_PRED_4x4_HD]     = predict_4x4_hd_c;
+    pf[I_PRED_4x4_VL]     = predict_4x4_vl_c;
+    pf[I_PRED_4x4_HU]     = predict_4x4_hu_c;
+    pf[I_PRED_4x4_DC_LEFT]= predict_4x4_dc_left_c;
+    pf[I_PRED_4x4_DC_TOP] = predict_4x4_dc_top_c;
+    pf[I_PRED_4x4_DC_128] = predict_4x4_dc_128_c;
 
 #if HAVE_MMX
     x264_predict_4x4_init_mmx( cpu, pf );
@@ -1047,7 +1047,7 @@
     x264_predict_4x4_init_arm( cpu, pf );
 #endif
 
-#if ARCH_AARCH64
+#if HAVE_AARCH64
     x264_predict_4x4_init_aarch64( cpu, pf );
 #endif
 }
diff -Nru x264-0.152.2854+gite9a5903/common/predict.h x264-0.158.2988+git-20191101.7817004/common/predict.h
--- x264-0.152.2854+gite9a5903/common/predict.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/predict.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.h: intra prediction
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -109,30 +109,52 @@
     I_PRED_8x8_DC_128  = 11,
 };
 
+#define x264_predict_8x8_dc_c x264_template(predict_8x8_dc_c)
 void x264_predict_8x8_dc_c  ( pixel *src, pixel edge[36] );
+#define x264_predict_8x8_h_c x264_template(predict_8x8_h_c)
 void x264_predict_8x8_h_c   ( pixel *src, pixel edge[36] );
+#define x264_predict_8x8_v_c x264_template(predict_8x8_v_c)
 void x264_predict_8x8_v_c   ( pixel *src, pixel edge[36] );
+#define x264_predict_4x4_dc_c x264_template(predict_4x4_dc_c)
 void x264_predict_4x4_dc_c  ( pixel *src );
+#define x264_predict_4x4_h_c x264_template(predict_4x4_h_c)
 void x264_predict_4x4_h_c   ( pixel *src );
+#define x264_predict_4x4_v_c x264_template(predict_4x4_v_c)
 void x264_predict_4x4_v_c   ( pixel *src );
+#define x264_predict_16x16_dc_c x264_template(predict_16x16_dc_c)
 void x264_predict_16x16_dc_c( pixel *src );
+#define x264_predict_16x16_h_c x264_template(predict_16x16_h_c)
 void x264_predict_16x16_h_c ( pixel *src );
+#define x264_predict_16x16_v_c x264_template(predict_16x16_v_c)
 void x264_predict_16x16_v_c ( pixel *src );
+#define x264_predict_16x16_p_c x264_template(predict_16x16_p_c)
 void x264_predict_16x16_p_c ( pixel *src );
+#define x264_predict_8x8c_dc_c x264_template(predict_8x8c_dc_c)
 void x264_predict_8x8c_dc_c ( pixel *src );
+#define x264_predict_8x8c_h_c x264_template(predict_8x8c_h_c)
 void x264_predict_8x8c_h_c  ( pixel *src );
+#define x264_predict_8x8c_v_c x264_template(predict_8x8c_v_c)
 void x264_predict_8x8c_v_c  ( pixel *src );
+#define x264_predict_8x8c_p_c x264_template(predict_8x8c_p_c)
 void x264_predict_8x8c_p_c  ( pixel *src );
+#define x264_predict_8x16c_dc_c x264_template(predict_8x16c_dc_c)
 void x264_predict_8x16c_dc_c( pixel *src );
+#define x264_predict_8x16c_h_c x264_template(predict_8x16c_h_c)
 void x264_predict_8x16c_h_c ( pixel *src );
+#define x264_predict_8x16c_v_c x264_template(predict_8x16c_v_c)
 void x264_predict_8x16c_v_c ( pixel *src );
+#define x264_predict_8x16c_p_c x264_template(predict_8x16c_p_c)
 void x264_predict_8x16c_p_c ( pixel *src );
 
+#define x264_predict_16x16_init x264_template(predict_16x16_init)
 void x264_predict_16x16_init ( int cpu, x264_predict_t pf[7] );
+#define x264_predict_8x8c_init x264_template(predict_8x8c_init)
 void x264_predict_8x8c_init  ( int cpu, x264_predict_t pf[7] );
+#define x264_predict_8x16c_init x264_template(predict_8x16c_init)
 void x264_predict_8x16c_init ( int cpu, x264_predict_t pf[7] );
+#define x264_predict_4x4_init x264_template(predict_4x4_init)
 void x264_predict_4x4_init   ( int cpu, x264_predict_t pf[12] );
+#define x264_predict_8x8_init x264_template(predict_8x8_init)
 void x264_predict_8x8_init   ( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter );
 
-
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/quant.c x264-0.158.2988+git-20191101.7817004/common/quant.c
--- x264-0.152.2854+gite9a5903/common/quant.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/quant.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * quant.c: quantization and level-run
  *****************************************************************************
- * Copyright (C) 2005-2017 x264 project
+ * Copyright (C) 2005-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Fiona Glaser <fiona@x264.com>
@@ -31,16 +31,16 @@
 #if HAVE_MMX
 #include "x86/quant.h"
 #endif
-#if ARCH_PPC
+#if HAVE_ALTIVEC
 #   include "ppc/quant.h"
 #endif
-#if ARCH_ARM
+#if HAVE_ARMV6
 #   include "arm/quant.h"
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
 #   include "aarch64/quant.h"
 #endif
-#if ARCH_MIPS
+#if HAVE_MSA
 #   include "mips/quant.h"
 #endif
 
@@ -298,7 +298,7 @@
     return optimize_chroma_dc_internal( dct, dequant_mf, 1 );
 }
 
-static void x264_denoise_dct( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size )
+static void denoise_dct( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size )
 {
     for( int i = 0; i < size; i++ )
     {
@@ -320,19 +320,7 @@
  *  chroma: for the complete mb: if score < 7 -> null
  */
 
-const uint8_t x264_decimate_table4[16] =
-{
-    3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0
-};
-const uint8_t x264_decimate_table8[64] =
-{
-    3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,
-    1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-};
-
-static int ALWAYS_INLINE x264_decimate_score_internal( dctcoef *dct, int i_max )
+static ALWAYS_INLINE int decimate_score_internal( dctcoef *dct, int i_max )
 {
     const uint8_t *ds_table = (i_max == 64) ? x264_decimate_table8 : x264_decimate_table4;
     int i_score = 0;
@@ -359,21 +347,21 @@
     return i_score;
 }
 
-static int x264_decimate_score15( dctcoef *dct )
+static int decimate_score15( dctcoef *dct )
 {
-    return x264_decimate_score_internal( dct+1, 15 );
+    return decimate_score_internal( dct+1, 15 );
 }
-static int x264_decimate_score16( dctcoef *dct )
+static int decimate_score16( dctcoef *dct )
 {
-    return x264_decimate_score_internal( dct, 16 );
+    return decimate_score_internal( dct, 16 );
 }
-static int x264_decimate_score64( dctcoef *dct )
+static int decimate_score64( dctcoef *dct )
 {
-    return x264_decimate_score_internal( dct, 64 );
+    return decimate_score_internal( dct, 64 );
 }
 
 #define last(num)\
-static int x264_coeff_last##num( dctcoef *l )\
+static int coeff_last##num( dctcoef *l )\
 {\
     int i_last = num-1;\
     while( i_last >= 0 && l[i_last] == 0 )\
@@ -388,9 +376,9 @@
 last(64)
 
 #define level_run(num)\
-static int x264_coeff_level_run##num( dctcoef *dct, x264_run_level_t *runlevel )\
+static int coeff_level_run##num( dctcoef *dct, x264_run_level_t *runlevel )\
 {\
-    int i_last = runlevel->last = x264_coeff_last##num(dct);\
+    int i_last = runlevel->last = coeff_last##num(dct);\
     int i_total = 0;\
     int mask = 0;\
     do\
@@ -438,20 +426,20 @@
     pf->optimize_chroma_2x2_dc = optimize_chroma_2x2_dc;
     pf->optimize_chroma_2x4_dc = optimize_chroma_2x4_dc;
 
-    pf->denoise_dct = x264_denoise_dct;
-    pf->decimate_score15 = x264_decimate_score15;
-    pf->decimate_score16 = x264_decimate_score16;
-    pf->decimate_score64 = x264_decimate_score64;
-
-    pf->coeff_last4 = x264_coeff_last4;
-    pf->coeff_last8 = x264_coeff_last8;
-    pf->coeff_last[  DCT_LUMA_AC] = x264_coeff_last15;
-    pf->coeff_last[ DCT_LUMA_4x4] = x264_coeff_last16;
-    pf->coeff_last[ DCT_LUMA_8x8] = x264_coeff_last64;
-    pf->coeff_level_run4 = x264_coeff_level_run4;
-    pf->coeff_level_run8 = x264_coeff_level_run8;
-    pf->coeff_level_run[  DCT_LUMA_AC] = x264_coeff_level_run15;
-    pf->coeff_level_run[ DCT_LUMA_4x4] = x264_coeff_level_run16;
+    pf->denoise_dct = denoise_dct;
+    pf->decimate_score15 = decimate_score15;
+    pf->decimate_score16 = decimate_score16;
+    pf->decimate_score64 = decimate_score64;
+
+    pf->coeff_last4 = coeff_last4;
+    pf->coeff_last8 = coeff_last8;
+    pf->coeff_last[  DCT_LUMA_AC] = coeff_last15;
+    pf->coeff_last[ DCT_LUMA_4x4] = coeff_last16;
+    pf->coeff_last[ DCT_LUMA_8x8] = coeff_last64;
+    pf->coeff_level_run4 = coeff_level_run4;
+    pf->coeff_level_run8 = coeff_level_run8;
+    pf->coeff_level_run[  DCT_LUMA_AC] = coeff_level_run15;
+    pf->coeff_level_run[ DCT_LUMA_4x4] = coeff_level_run16;
 
 #if HIGH_BIT_DEPTH
 #if HAVE_MMX
@@ -753,6 +741,7 @@
         pf->quant_2x2_dc = x264_quant_2x2_dc_altivec;
         pf->quant_4x4_dc = x264_quant_4x4_dc_altivec;
         pf->quant_4x4 = x264_quant_4x4_altivec;
+        pf->quant_4x4x4 = x264_quant_4x4x4_altivec;
         pf->quant_8x8 = x264_quant_8x8_altivec;
 
         pf->dequant_4x4 = x264_dequant_4x4_altivec;
@@ -767,7 +756,7 @@
         pf->coeff_last8 = x264_coeff_last8_arm;
     }
 #endif
-#if HAVE_ARMV6 || ARCH_AARCH64
+#if HAVE_ARMV6 || HAVE_AARCH64
     if( cpu&X264_CPU_NEON )
     {
         pf->quant_2x2_dc   = x264_quant_2x2_dc_neon;
@@ -787,7 +776,7 @@
         pf->decimate_score64 = x264_decimate_score64_neon;
     }
 #endif
-#if ARCH_AARCH64
+#if HAVE_AARCH64
     if( cpu&X264_CPU_ARMV8 )
     {
         pf->coeff_last4 = x264_coeff_last4_aarch64;
diff -Nru x264-0.152.2854+gite9a5903/common/quant.h x264-0.158.2988+git-20191101.7817004/common/quant.h
--- x264-0.152.2854+gite9a5903/common/quant.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/quant.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * quant.h: quantization and level-run
  *****************************************************************************
- * Copyright (C) 2005-2017 x264 project
+ * Copyright (C) 2005-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Fiona Glaser <fiona@x264.com>
@@ -69,6 +69,7 @@
     int (*trellis_cabac_chroma_422_dc)( TRELLIS_PARAMS );
 } x264_quant_function_t;
 
+#define x264_quant_init x264_template(quant_init)
 void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/rectangle.c x264-0.158.2988+git-20191101.7817004/common/rectangle.c
--- x264-0.152.2854+gite9a5903/common/rectangle.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/rectangle.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * rectangle.c: rectangle filling
  *****************************************************************************
- * Copyright (C) 2010-2017 x264 project
+ * Copyright (C) 2010-2019 x264 project
  *
  * Authors: Fiona Glaser <fiona@x264.com>
  *
@@ -26,7 +26,7 @@
 #include "common.h"
 
 #define CACHE_FUNC(name,size,width,height)\
-static void x264_macroblock_cache_##name##_##width##_##height( void *target, uint32_t val )\
+static void macroblock_cache_##name##_##width##_##height( void *target, uint32_t val )\
 {\
     x264_macroblock_cache_rect( target, width*size, height, size, val );\
 }
@@ -41,16 +41,16 @@
 CACHE_FUNC(name,size,1,1)\
 void (*x264_cache_##name##_func_table[10])(void *, uint32_t) =\
 {\
-    x264_macroblock_cache_##name##_1_1,\
-    x264_macroblock_cache_##name##_2_1,\
-    x264_macroblock_cache_##name##_1_2,\
-    x264_macroblock_cache_##name##_2_2,\
+    macroblock_cache_##name##_1_1,\
+    macroblock_cache_##name##_2_1,\
+    macroblock_cache_##name##_1_2,\
+    macroblock_cache_##name##_2_2,\
     NULL,\
-    x264_macroblock_cache_##name##_4_2,\
+    macroblock_cache_##name##_4_2,\
     NULL,\
-    x264_macroblock_cache_##name##_2_4,\
+    macroblock_cache_##name##_2_4,\
     NULL,\
-    x264_macroblock_cache_##name##_4_4\
+    macroblock_cache_##name##_4_4\
 };\
 
 CACHE_FUNCS(mv, 4)
diff -Nru x264-0.152.2854+gite9a5903/common/rectangle.h x264-0.158.2988+git-20191101.7817004/common/rectangle.h
--- x264-0.152.2854+gite9a5903/common/rectangle.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/rectangle.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * rectangle.h: rectangle filling
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Fiona Glaser <fiona@x264.com>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -118,9 +118,12 @@
         assert(0);
 }
 
-extern void (*x264_cache_mv_func_table[10])(void *, uint32_t);\
-extern void (*x264_cache_mvd_func_table[10])(void *, uint32_t);\
-extern void (*x264_cache_ref_func_table[10])(void *, uint32_t);\
+#define x264_cache_mv_func_table x264_template(cache_mv_func_table)
+extern void (*x264_cache_mv_func_table[10])(void *, uint32_t);
+#define x264_cache_mvd_func_table x264_template(cache_mvd_func_table)
+extern void (*x264_cache_mvd_func_table[10])(void *, uint32_t);
+#define x264_cache_ref_func_table x264_template(cache_ref_func_table)
+extern void (*x264_cache_ref_func_table[10])(void *, uint32_t);
 
 #define x264_macroblock_cache_mv_ptr( a, x, y, w, h, l, mv ) x264_macroblock_cache_mv( a, x, y, w, h, l, M32( mv ) )
 static ALWAYS_INLINE void x264_macroblock_cache_mv( x264_t *h, int x, int y, int width, int height, int i_list, uint32_t mv )
diff -Nru x264-0.152.2854+gite9a5903/common/set.c x264-0.158.2988+git-20191101.7817004/common/set.c
--- x264-0.152.2854+gite9a5903/common/set.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/set.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * set.c: quantization init
  *****************************************************************************
- * Copyright (C) 2005-2017 x264 project
+ * Copyright (C) 2005-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *
@@ -94,7 +94,7 @@
         int start = w == 8 ? 4 : 0;\
         int j;\
         for( j = 0; j < i; j++ )\
-            if( !memcmp( h->pps->scaling_list[i+start], h->pps->scaling_list[j+start], size*sizeof(uint8_t) ) )\
+            if( !memcmp( h->sps->scaling_list[i+start], h->sps->scaling_list[j+start], size*sizeof(uint8_t) ) )\
                 break;\
         if( j < i )\
         {\
@@ -110,7 +110,7 @@
         }\
         for( j = 0; j < i; j++ )\
             if( deadzone[j] == deadzone[i] &&\
-                !memcmp( h->pps->scaling_list[i+start], h->pps->scaling_list[j+start], size*sizeof(uint8_t) ) )\
+                !memcmp( h->sps->scaling_list[i+start], h->sps->scaling_list[j+start], size*sizeof(uint8_t) ) )\
                 break;\
         if( j < i )\
         {\
@@ -148,14 +148,14 @@
         for( int i_list = 0; i_list < 4; i_list++ )
             for( int i = 0; i < 16; i++ )
             {
-                h->dequant4_mf[i_list][q][i] = def_dequant4[q][i] * h->pps->scaling_list[i_list][i];
-                     quant4_mf[i_list][q][i] = DIV(def_quant4[q][i] * 16, h->pps->scaling_list[i_list][i]);
+                h->dequant4_mf[i_list][q][i] = def_dequant4[q][i] * h->sps->scaling_list[i_list][i];
+                     quant4_mf[i_list][q][i] = DIV(def_quant4[q][i] * 16, h->sps->scaling_list[i_list][i]);
             }
         for( int i_list = 0; i_list < num_8x8_lists; i_list++ )
             for( int i = 0; i < 64; i++ )
             {
-                h->dequant8_mf[i_list][q][i] = def_dequant8[q][i] * h->pps->scaling_list[4+i_list][i];
-                     quant8_mf[i_list][q][i] = DIV(def_quant8[q][i] * 16, h->pps->scaling_list[4+i_list][i]);
+                h->dequant8_mf[i_list][q][i] = def_dequant8[q][i] * h->sps->scaling_list[4+i_list][i];
+                     quant8_mf[i_list][q][i] = DIV(def_quant8[q][i] * 16, h->sps->scaling_list[4+i_list][i]);
             }
     }
     for( int q = 0; q <= QP_MAX_SPEC; q++ )
@@ -300,8 +300,8 @@
     x264_free( h->nr_offset_emergency );
 }
 
-static int x264_cqm_parse_jmlist( x264_t *h, const char *buf, const char *name,
-                                  uint8_t *cqm, const uint8_t *jvt, int length )
+static int cqm_parse_jmlist( x264_t *h, const char *buf, const char *name,
+                             uint8_t *cqm, const uint8_t *jvt, int length )
 {
     int i;
 
@@ -361,16 +361,16 @@
     while( (p = strchr( buf, '#' )) != NULL )
         memset( p, ' ', strcspn( p, "\n" ) );
 
-    b_error |= x264_cqm_parse_jmlist( h, buf, "INTRA4X4_LUMA",   h->param.cqm_4iy, x264_cqm_jvt4i, 16 );
-    b_error |= x264_cqm_parse_jmlist( h, buf, "INTER4X4_LUMA",   h->param.cqm_4py, x264_cqm_jvt4p, 16 );
-    b_error |= x264_cqm_parse_jmlist( h, buf, "INTRA4X4_CHROMA", h->param.cqm_4ic, x264_cqm_jvt4i, 16 );
-    b_error |= x264_cqm_parse_jmlist( h, buf, "INTER4X4_CHROMA", h->param.cqm_4pc, x264_cqm_jvt4p, 16 );
-    b_error |= x264_cqm_parse_jmlist( h, buf, "INTRA8X8_LUMA",   h->param.cqm_8iy, x264_cqm_jvt8i, 64 );
-    b_error |= x264_cqm_parse_jmlist( h, buf, "INTER8X8_LUMA",   h->param.cqm_8py, x264_cqm_jvt8p, 64 );
+    b_error |= cqm_parse_jmlist( h, buf, "INTRA4X4_LUMA",   h->param.cqm_4iy, x264_cqm_jvt4i, 16 );
+    b_error |= cqm_parse_jmlist( h, buf, "INTER4X4_LUMA",   h->param.cqm_4py, x264_cqm_jvt4p, 16 );
+    b_error |= cqm_parse_jmlist( h, buf, "INTRA4X4_CHROMA", h->param.cqm_4ic, x264_cqm_jvt4i, 16 );
+    b_error |= cqm_parse_jmlist( h, buf, "INTER4X4_CHROMA", h->param.cqm_4pc, x264_cqm_jvt4p, 16 );
+    b_error |= cqm_parse_jmlist( h, buf, "INTRA8X8_LUMA",   h->param.cqm_8iy, x264_cqm_jvt8i, 64 );
+    b_error |= cqm_parse_jmlist( h, buf, "INTER8X8_LUMA",   h->param.cqm_8py, x264_cqm_jvt8p, 64 );
     if( CHROMA444 )
     {
-        b_error |= x264_cqm_parse_jmlist( h, buf, "INTRA8X8_CHROMA", h->param.cqm_8ic, x264_cqm_jvt8i, 64 );
-        b_error |= x264_cqm_parse_jmlist( h, buf, "INTER8X8_CHROMA", h->param.cqm_8pc, x264_cqm_jvt8p, 64 );
+        b_error |= cqm_parse_jmlist( h, buf, "INTRA8X8_CHROMA", h->param.cqm_8ic, x264_cqm_jvt8i, 64 );
+        b_error |= cqm_parse_jmlist( h, buf, "INTER8X8_CHROMA", h->param.cqm_8pc, x264_cqm_jvt8p, 64 );
     }
 
     x264_free( buf );
diff -Nru x264-0.152.2854+gite9a5903/common/set.h x264-0.158.2988+git-20191101.7817004/common/set.h
--- x264-0.152.2854+gite9a5903/common/set.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/set.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * set.h: quantization init
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -27,24 +27,6 @@
 #ifndef X264_SET_H
 #define X264_SET_H
 
-enum profile_e
-{
-    PROFILE_BASELINE = 66,
-    PROFILE_MAIN     = 77,
-    PROFILE_HIGH    = 100,
-    PROFILE_HIGH10  = 110,
-    PROFILE_HIGH422 = 122,
-    PROFILE_HIGH444_PREDICTIVE = 244,
-};
-
-enum chroma_format_e
-{
-    CHROMA_400 = 0,
-    CHROMA_420 = 1,
-    CHROMA_422 = 2,
-    CHROMA_444 = 3,
-};
-
 enum cqm4_e
 {
     CQM_4IY = 0,
@@ -158,6 +140,10 @@
     int b_qpprime_y_zero_transform_bypass;
     int i_chroma_format_idc;
 
+    int b_avcintra;
+    int i_cqm_preset;
+    const uint8_t *scaling_list[8]; /* could be 12, but we don't allow separate Cb/Cr lists */
+
 } x264_sps_t;
 
 typedef struct
@@ -187,161 +173,13 @@
 
     int b_transform_8x8_mode;
 
-    int i_cqm_preset;
-    const uint8_t *scaling_list[8]; /* could be 12, but we don't allow separate Cb/Cr lists */
-
 } x264_pps_t;
 
-/* default quant matrices */
-static const uint8_t x264_cqm_jvt4i[16] =
-{
-      6,13,20,28,
-     13,20,28,32,
-     20,28,32,37,
-     28,32,37,42
-};
-static const uint8_t x264_cqm_jvt4p[16] =
-{
-    10,14,20,24,
-    14,20,24,27,
-    20,24,27,30,
-    24,27,30,34
-};
-static const uint8_t x264_cqm_jvt8i[64] =
-{
-     6,10,13,16,18,23,25,27,
-    10,11,16,18,23,25,27,29,
-    13,16,18,23,25,27,29,31,
-    16,18,23,25,27,29,31,33,
-    18,23,25,27,29,31,33,36,
-    23,25,27,29,31,33,36,38,
-    25,27,29,31,33,36,38,40,
-    27,29,31,33,36,38,40,42
-};
-static const uint8_t x264_cqm_jvt8p[64] =
-{
-     9,13,15,17,19,21,22,24,
-    13,13,17,19,21,22,24,25,
-    15,17,19,21,22,24,25,27,
-    17,19,21,22,24,25,27,28,
-    19,21,22,24,25,27,28,30,
-    21,22,24,25,27,28,30,32,
-    22,24,25,27,28,30,32,33,
-    24,25,27,28,30,32,33,35
-};
-static const uint8_t x264_cqm_flat16[64] =
-{
-    16,16,16,16,16,16,16,16,
-    16,16,16,16,16,16,16,16,
-    16,16,16,16,16,16,16,16,
-    16,16,16,16,16,16,16,16,
-    16,16,16,16,16,16,16,16,
-    16,16,16,16,16,16,16,16,
-    16,16,16,16,16,16,16,16,
-    16,16,16,16,16,16,16,16
-};
-static const uint8_t * const x264_cqm_jvt[8] =
-{
-    x264_cqm_jvt4i, x264_cqm_jvt4p,
-    x264_cqm_jvt4i, x264_cqm_jvt4p,
-    x264_cqm_jvt8i, x264_cqm_jvt8p,
-    x264_cqm_jvt8i, x264_cqm_jvt8p
-};
-
-// 1080i25_avci50, 1080p25_avci50
-static const uint8_t x264_cqm_avci50_4ic[16] =
-{
-    16,22,28,40,
-    22,28,40,44,
-    28,40,44,48,
-    40,44,48,60
-};
-
-//  1080i25_avci50,
-static const uint8_t x264_cqm_avci50_1080i_8iy[64] =
-{
-    16,18,19,21,27,33,81,87,
-    18,19,21,24,30,33,81,87,
-    19,21,24,27,30,78,84,90,
-    21,24,27,30,33,78,84,90,
-    24,27,30,33,78,81,84,90,
-    24,27,30,33,78,81,84,93,
-    27,30,33,78,78,81,87,93,
-    30,33,33,78,81,84,87,96
-};
-
-//  1080p25_avci50, 720p25_avci50, 720p50_avci50
-static const uint8_t x264_cqm_avci50_p_8iy[64] =
-{
-    16,18,19,21,24,27,30,33,
-    18,19,21,24,27,30,33,78,
-    19,21,24,27,30,33,78,81,
-    21,24,27,30,33,78,81,84,
-    24,27,30,33,78,81,84,87,
-    27,30,33,78,81,84,87,90,
-    30,33,78,81,84,87,90,93,
-    33,78,81,84,87,90,93,96
-};
-
-//  1080i25_avci100, 1080p25_avci100
-static const uint8_t x264_cqm_avci100_1080_4ic[16] =
-{
-    16,20,26,32,
-    20,26,32,38,
-    26,32,38,44,
-    32,38,44,50
-};
-
-// 720p25_avci100, 720p50_avci100
-static const uint8_t x264_cqm_avci100_720p_4ic[16] =
-{
-    16,21,27,34,
-    21,27,34,41,
-    27,34,41,46,
-    34,41,46,54
-};
-
-//  1080i25_avci100,
-static const uint8_t x264_cqm_avci100_1080i_8iy[64] =
-{
-    16,19,20,23,24,26,32,42,
-    18,19,22,24,26,32,36,42,
-    18,20,23,24,26,32,36,63,
-    19,20,23,26,32,36,42,63,
-    20,22,24,26,32,36,59,63,
-    22,23,24,26,32,36,59,68,
-    22,23,24,26,32,42,59,68,
-    22,23,24,26,36,42,59,72
-};
-
-// 1080p25_avci100,
-static const uint8_t x264_cqm_avci100_1080p_8iy[64] =
-{
-    16,18,19,20,22,23,24,26,
-    18,19,20,22,23,24,26,32,
-    19,20,22,23,24,26,32,36,
-    20,22,23,24,26,32,36,42,
-    22,23,24,26,32,36,42,59,
-    23,24,26,32,36,42,59,63,
-    24,26,32,36,42,59,63,68,
-    26,32,36,42,59,63,68,72
-};
-
-// 720p25_avci100, 720p50_avci100
-static const uint8_t x264_cqm_avci100_720p_8iy[64] =
-{
-    16,18,19,21,22,24,26,32,
-    18,19,19,21,22,24,26,32,
-    19,19,21,22,22,24,26,32,
-    21,21,22,22,23,24,26,34,
-    22,22,22,23,24,25,26,34,
-    24,24,24,24,25,26,34,36,
-    26,26,26,26,26,34,36,38,
-    32,32,32,34,34,36,38,42
-};
-
+#define x264_cqm_init x264_template(cqm_init)
 int  x264_cqm_init( x264_t *h );
+#define x264_cqm_delete x264_template(cqm_delete)
 void x264_cqm_delete( x264_t *h );
+#define x264_cqm_parse_file x264_template(cqm_parse_file)
 int  x264_cqm_parse_file( x264_t *h, const char *filename );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/tables.c x264-0.158.2988+git-20191101.7817004/common/tables.c
--- x264-0.152.2854+gite9a5903/common/tables.c	1970-01-01 00:00:00.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/tables.c	2019-11-09 05:16:29.000000000 +0000
@@ -0,0 +1,2539 @@
+/*****************************************************************************
+ * tables.c: const tables
+ *****************************************************************************
+ * Copyright (C) 2003-2019 x264 project
+ *
+ * Authors: Laurent Aimar <fenrir@via.ecp.fr>
+ *          Loren Merritt <lorenm@u.washington.edu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#include "base.h"
+
+const x264_level_t x264_levels[] =
+{
+    { 10,     1485,     99,    396,     64,    175,   64, 64,  0, 2, 0, 0, 1 },
+    {  9,     1485,     99,    396,    128,    350,   64, 64,  0, 2, 0, 0, 1 }, /* "1b" */
+    { 11,     3000,    396,    900,    192,    500,  128, 64,  0, 2, 0, 0, 1 },
+    { 12,     6000,    396,   2376,    384,   1000,  128, 64,  0, 2, 0, 0, 1 },
+    { 13,    11880,    396,   2376,    768,   2000,  128, 64,  0, 2, 0, 0, 1 },
+    { 20,    11880,    396,   2376,   2000,   2000,  128, 64,  0, 2, 0, 0, 1 },
+    { 21,    19800,    792,   4752,   4000,   4000,  256, 64,  0, 2, 0, 0, 0 },
+    { 22,    20250,   1620,   8100,   4000,   4000,  256, 64,  0, 2, 0, 0, 0 },
+    { 30,    40500,   1620,   8100,  10000,  10000,  256, 32, 22, 2, 0, 1, 0 },
+    { 31,   108000,   3600,  18000,  14000,  14000,  512, 16, 60, 4, 1, 1, 0 },
+    { 32,   216000,   5120,  20480,  20000,  20000,  512, 16, 60, 4, 1, 1, 0 },
+    { 40,   245760,   8192,  32768,  20000,  25000,  512, 16, 60, 4, 1, 1, 0 },
+    { 41,   245760,   8192,  32768,  50000,  62500,  512, 16, 24, 2, 1, 1, 0 },
+    { 42,   522240,   8704,  34816,  50000,  62500,  512, 16, 24, 2, 1, 1, 1 },
+    { 50,   589824,  22080, 110400, 135000, 135000,  512, 16, 24, 2, 1, 1, 1 },
+    { 51,   983040,  36864, 184320, 240000, 240000,  512, 16, 24, 2, 1, 1, 1 },
+    { 52,  2073600,  36864, 184320, 240000, 240000,  512, 16, 24, 2, 1, 1, 1 },
+    { 60,  4177920, 139264, 696320, 240000, 240000, 8192, 16, 24, 2, 1, 1, 1 },
+    { 61,  8355840, 139264, 696320, 480000, 480000, 8192, 16, 24, 2, 1, 1, 1 },
+    { 62, 16711680, 139264, 696320, 800000, 800000, 8192, 16, 24, 2, 1, 1, 1 },
+    { 0 }
+};
+
+/*****************************************************************************
+ * MATH
+ *****************************************************************************/
+
+const uint8_t x264_exp2_lut[64] =
+{
+      0,   3,   6,   8,  11,  14,  17,  20,  23,  26,  29,  32,  36,  39,  42,  45,
+     48,  52,  55,  58,  62,  65,  69,  72,  76,  80,  83,  87,  91,  94,  98, 102,
+    106, 110, 114, 118, 122, 126, 130, 135, 139, 143, 147, 152, 156, 161, 165, 170,
+    175, 179, 184, 189, 194, 198, 203, 208, 214, 219, 224, 229, 234, 240, 245, 250
+};
+
+const float x264_log2_lut[128] =
+{
+    0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682,
+    0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987,
+    0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840,
+    0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288,
+    0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370,
+    0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121,
+    0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570,
+    0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743,
+    0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662,
+    0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349,
+    0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819,
+    0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090,
+    0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175,
+    0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087,
+    0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837,
+    0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435,
+};
+
+/* Avoid an int/float conversion. */
+const float x264_log2_lz_lut[32] =
+{
+    31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+};
+
+/*****************************************************************************
+ * ANALYSE
+ *****************************************************************************/
+
+/* lambda = pow(2,qp/6-2) */
+const uint16_t x264_lambda_tab[QP_MAX_MAX+1] =
+{
+   1,   1,   1,   1,   1,   1,   1,   1, /*  0- 7 */
+   1,   1,   1,   1,   1,   1,   1,   1, /*  8-15 */
+   2,   2,   2,   2,   3,   3,   3,   4, /* 16-23 */
+   4,   4,   5,   6,   6,   7,   8,   9, /* 24-31 */
+  10,  11,  13,  14,  16,  18,  20,  23, /* 32-39 */
+  25,  29,  32,  36,  40,  45,  51,  57, /* 40-47 */
+  64,  72,  81,  91, 102, 114, 128, 144, /* 48-55 */
+ 161, 181, 203, 228, 256, 287, 323, 362, /* 56-63 */
+ 406, 456, 512, 575, 645, 724, 813, 912, /* 64-71 */
+1024,1149,1290,1448,1625,1825,2048,2299, /* 72-79 */
+2580,2896,                               /* 80-81 */
+};
+
+/* lambda2 = pow(lambda,2) * .9 * 256 */
+/* Capped to avoid overflow */
+const int x264_lambda2_tab[QP_MAX_MAX+1] =
+{
+       14,       18,       22,       28,       36,       45,      57,      72, /*  0- 7 */
+       91,      115,      145,      182,      230,      290,     365,     460, /*  8-15 */
+      580,      731,      921,     1161,     1462,     1843,    2322,    2925, /* 16-23 */
+     3686,     4644,     5851,     7372,     9289,    11703,   14745,   18578, /* 24-31 */
+    23407,    29491,    37156,    46814,    58982,    74313,   93628,  117964, /* 32-39 */
+   148626,   187257,   235929,   297252,   374514,   471859,  594505,  749029, /* 40-47 */
+   943718,  1189010,  1498059,  1887436,  2378021,  2996119, 3774873, 4756042, /* 48-55 */
+  5992238,  7549747,  9512085, 11984476, 15099494, 19024170,23968953,30198988, /* 56-63 */
+ 38048341, 47937906, 60397977, 76096683, 95875813,120795955,                   /* 64-69 */
+134217727,134217727,134217727,134217727,134217727,134217727,                   /* 70-75 */
+134217727,134217727,134217727,134217727,134217727,134217727,                   /* 76-81 */
+};
+
+// should the intra and inter lambdas be different?
+// I'm just matching the behaviour of deadzone quant.
+const int x264_trellis_lambda2_tab[2][QP_MAX_MAX+1] =
+{
+    // inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS)
+    {
+               46,       58,       73,       92,      117,      147,
+              185,      233,      294,      370,      466,      587,
+              740,      932,     1174,     1480,     1864,     2349,
+             2959,     3728,     4697,     5918,     7457,     9395,
+            11837,    14914,    18790,    23674,    29828,    37581,
+            47349,    59656,    75163,    94699,   119313,   150326,
+           189399,   238627,   300652,   378798,   477255,   601304,
+           757596,   954511,  1202608,  1515192,  1909022,  2405217,
+          3030384,  3818045,  4810435,  6060769,  7636091,  9620872,
+         12121539, 15272182, 19241743, 24243077, 30544363, 38483486,
+         48486154, 61088726, 76966972, 96972308,
+        122177453,134217727,134217727,134217727,134217727,134217727,
+        134217727,134217727,134217727,134217727,134217727,134217727,
+    },
+    // intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS)
+    {
+               27,       34,       43,       54,       68,       86,
+              108,      136,      172,      216,      273,      343,
+              433,      545,      687,      865,     1090,     1374,
+             1731,     2180,     2747,     3461,     4361,     5494,
+             6922,     8721,    10988,    13844,    17442,    21976,
+            27688,    34885,    43953,    55377,    69771,    87906,
+           110755,   139543,   175813,   221511,   279087,   351627,
+           443023,   558174,   703255,   886046,  1116348,  1406511,
+          1772093,  2232697,  2813022,  3544186,  4465396,  5626046,
+          7088374,  8930791, 11252092, 14176748, 17861583, 22504184,
+         28353495, 35723165, 45008368, 56706990,
+         71446330, 90016736,113413980,134217727,134217727,134217727,
+        134217727,134217727,134217727,134217727,134217727,134217727,
+        134217727,134217727,134217727,134217727,134217727,134217727,
+    }
+};
+
+const uint16_t x264_chroma_lambda2_offset_tab[MAX_CHROMA_LAMBDA_OFFSET+1] =
+{
+       16,    20,    25,    32,    40,    50,
+       64,    80,   101,   128,   161,   203,
+      256,   322,   406,   512,   645,   812,
+     1024,  1290,  1625,  2048,  2580,  3250,
+     4096,  5160,  6501,  8192, 10321, 13003,
+    16384, 20642, 26007, 32768, 41285, 52015,
+    65535
+};
+
+/*****************************************************************************
+ * MC
+ *****************************************************************************/
+
+const uint8_t x264_hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
+const uint8_t x264_hpel_ref1[16] = {0,0,1,0,2,2,3,2,2,2,3,2,2,2,3,2};
+
+/*****************************************************************************
+ * CQM
+ *****************************************************************************/
+
+/* default quant matrices */
+const uint8_t x264_cqm_jvt4i[16] =
+{
+      6,13,20,28,
+     13,20,28,32,
+     20,28,32,37,
+     28,32,37,42
+};
+const uint8_t x264_cqm_jvt4p[16] =
+{
+    10,14,20,24,
+    14,20,24,27,
+    20,24,27,30,
+    24,27,30,34
+};
+const uint8_t x264_cqm_jvt8i[64] =
+{
+     6,10,13,16,18,23,25,27,
+    10,11,16,18,23,25,27,29,
+    13,16,18,23,25,27,29,31,
+    16,18,23,25,27,29,31,33,
+    18,23,25,27,29,31,33,36,
+    23,25,27,29,31,33,36,38,
+    25,27,29,31,33,36,38,40,
+    27,29,31,33,36,38,40,42
+};
+const uint8_t x264_cqm_jvt8p[64] =
+{
+     9,13,15,17,19,21,22,24,
+    13,13,17,19,21,22,24,25,
+    15,17,19,21,22,24,25,27,
+    17,19,21,22,24,25,27,28,
+    19,21,22,24,25,27,28,30,
+    21,22,24,25,27,28,30,32,
+    22,24,25,27,28,30,32,33,
+    24,25,27,28,30,32,33,35
+};
+const uint8_t x264_cqm_flat16[64] =
+{
+    16,16,16,16,16,16,16,16,
+    16,16,16,16,16,16,16,16,
+    16,16,16,16,16,16,16,16,
+    16,16,16,16,16,16,16,16,
+    16,16,16,16,16,16,16,16,
+    16,16,16,16,16,16,16,16,
+    16,16,16,16,16,16,16,16,
+    16,16,16,16,16,16,16,16
+};
+const uint8_t * const x264_cqm_jvt[8] =
+{
+    x264_cqm_jvt4i, x264_cqm_jvt4p,
+    x264_cqm_jvt4i, x264_cqm_jvt4p,
+    x264_cqm_jvt8i, x264_cqm_jvt8p,
+    x264_cqm_jvt8i, x264_cqm_jvt8p
+};
+
+// 1080i25_avci50, 1080p25_avci50
+const uint8_t x264_cqm_avci50_4ic[16] =
+{
+    16,22,28,40,
+    22,28,40,44,
+    28,40,44,48,
+    40,44,48,60
+};
+
+//  1080p25_avci50, 720p25_avci50, 720p50_avci50
+const uint8_t x264_cqm_avci50_p_8iy[64] =
+{
+    16,18,19,21,24,27,30,33,
+    18,19,21,24,27,30,33,78,
+    19,21,24,27,30,33,78,81,
+    21,24,27,30,33,78,81,84,
+    24,27,30,33,78,81,84,87,
+    27,30,33,78,81,84,87,90,
+    30,33,78,81,84,87,90,93,
+    33,78,81,84,87,90,93,96
+};
+
+//  1080i25_avci50,
+const uint8_t x264_cqm_avci50_1080i_8iy[64] =
+{
+    16,18,19,21,27,33,81,87,
+    18,19,21,24,30,33,81,87,
+    19,21,24,27,30,78,84,90,
+    21,24,27,30,33,78,84,90,
+    24,27,30,33,78,81,84,90,
+    24,27,30,33,78,81,84,93,
+    27,30,33,78,78,81,87,93,
+    30,33,33,78,81,84,87,96
+};
+
+// 720p25_avci100, 720p50_avci100
+const uint8_t x264_cqm_avci100_720p_4ic[16] =
+{
+    16,21,27,34,
+    21,27,34,41,
+    27,34,41,46,
+    34,41,46,54
+};
+
+// 720p25_avci100, 720p50_avci100
+const uint8_t x264_cqm_avci100_720p_8iy[64] =
+{
+    16,18,19,21,22,24,26,32,
+    18,19,19,21,22,24,26,32,
+    19,19,21,22,22,24,26,32,
+    21,21,22,22,23,24,26,34,
+    22,22,22,23,24,25,26,34,
+    24,24,24,24,25,26,34,36,
+    26,26,26,26,26,34,36,38,
+    32,32,32,34,34,36,38,42
+};
+
+//  1080i25_avci100, 1080p25_avci100
+const uint8_t x264_cqm_avci100_1080_4ic[16] =
+{
+    16,20,26,32,
+    20,26,32,38,
+    26,32,38,44,
+    32,38,44,50
+};
+
+//  1080i25_avci100,
+const uint8_t x264_cqm_avci100_1080i_8iy[64] =
+{
+    16,19,20,23,24,26,32,42,
+    18,19,22,24,26,32,36,42,
+    18,20,23,24,26,32,36,63,
+    19,20,23,26,32,36,42,63,
+    20,22,24,26,32,36,59,63,
+    22,23,24,26,32,36,59,68,
+    22,23,24,26,32,42,59,68,
+    22,23,24,26,36,42,59,72
+};
+
+// 1080p25_avci100,
+const uint8_t x264_cqm_avci100_1080p_8iy[64] =
+{
+    16,18,19,20,22,23,24,26,
+    18,19,20,22,23,24,26,32,
+    19,20,22,23,24,26,32,36,
+    20,22,23,24,26,32,36,42,
+    22,23,24,26,32,36,42,59,
+    23,24,26,32,36,42,59,63,
+    24,26,32,36,42,59,63,68,
+    26,32,36,42,59,63,68,72
+};
+
+/*****************************************************************************
+ * QUANT
+ *****************************************************************************/
+
+const uint8_t x264_decimate_table4[16] =
+{
+    3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0
+};
+const uint8_t x264_decimate_table8[64] =
+{
+    3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,
+    1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+};
+
+/*****************************************************************************
+ * DCT
+ *****************************************************************************/
+
+/* the inverse of the scaling factors introduced by 8x8 fdct */
+/* uint32 is for the asm implementation of trellis. the actual values fit in uint16. */
+#define W(i) (i==0 ? FIX8(1.0000) :\
+              i==1 ? FIX8(0.8859) :\
+              i==2 ? FIX8(1.6000) :\
+              i==3 ? FIX8(0.9415) :\
+              i==4 ? FIX8(1.2651) :\
+              i==5 ? FIX8(1.1910) :0)
+const uint32_t x264_dct8_weight_tab[64] = {
+    W(0), W(3), W(4), W(3),  W(0), W(3), W(4), W(3),
+    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
+    W(4), W(5), W(2), W(5),  W(4), W(5), W(2), W(5),
+    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
+
+    W(0), W(3), W(4), W(3),  W(0), W(3), W(4), W(3),
+    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
+    W(4), W(5), W(2), W(5),  W(4), W(5), W(2), W(5),
+    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1)
+};
+#undef W
+
+#define W(i) (i==0 ? FIX8(1.76777) :\
+              i==1 ? FIX8(1.11803) :\
+              i==2 ? FIX8(0.70711) :0)
+const uint32_t x264_dct4_weight_tab[16] = {
+    W(0), W(1), W(0), W(1),
+    W(1), W(2), W(1), W(2),
+    W(0), W(1), W(0), W(1),
+    W(1), W(2), W(1), W(2)
+};
+#undef W
+
+/* inverse squared */
+#define W(i) (i==0 ? FIX8(3.125) :\
+              i==1 ? FIX8(1.25) :\
+              i==2 ? FIX8(0.5) :0)
+const uint32_t x264_dct4_weight2_tab[16] = {
+    W(0), W(1), W(0), W(1),
+    W(1), W(2), W(1), W(2),
+    W(0), W(1), W(0), W(1),
+    W(1), W(2), W(1), W(2)
+};
+#undef W
+
+#define W(i) (i==0 ? FIX8(1.00000) :\
+              i==1 ? FIX8(0.78487) :\
+              i==2 ? FIX8(2.56132) :\
+              i==3 ? FIX8(0.88637) :\
+              i==4 ? FIX8(1.60040) :\
+              i==5 ? FIX8(1.41850) :0)
+const uint32_t x264_dct8_weight2_tab[64] = {
+    W(0), W(3), W(4), W(3),  W(0), W(3), W(4), W(3),
+    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
+    W(4), W(5), W(2), W(5),  W(4), W(5), W(2), W(5),
+    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
+
+    W(0), W(3), W(4), W(3),  W(0), W(3), W(4), W(3),
+    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
+    W(4), W(5), W(2), W(5),  W(4), W(5), W(2), W(5),
+    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1)
+};
+#undef W
+
+/*****************************************************************************
+ * CABAC
+ *****************************************************************************/
+
+const int8_t x264_cabac_context_init_I[1024][2] =
+{
+    /* 0 - 10 */
+    { 20, -15 }, {  2, 54 },  {  3,  74 }, { 20, -15 },
+    {  2,  54 }, {  3, 74 },  { -28,127 }, { -23, 104 },
+    { -6,  53 }, { -1, 54 },  {  7,  51 },
+
+    /* 11 - 23 unused for I */
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },
+
+    /* 24- 39 */
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+
+    /* 40 - 53 */
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },
+
+    /* 54 - 59 */
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },
+
+    /* 60 - 69 */
+    { 0, 41 },   { 0, 63 },   { 0, 63 },     { 0, 63 },
+    { -9, 83 },  { 4, 86 },   { 0, 97 },     { -7, 72 },
+    { 13, 41 },  { 3, 62 },
+
+    /* 70 -> 87 */
+    { 0, 11 },   { 1, 55 },   { 0, 69 },     { -17, 127 },
+    { -13, 102 },{ 0, 82 },   { -7, 74 },    { -21, 107 },
+    { -27, 127 },{ -31, 127 },{ -24, 127 },  { -18, 95 },
+    { -27, 127 },{ -21, 114 },{ -30, 127 },  { -17, 123 },
+    { -12, 115 },{ -16, 122 },
+
+    /* 88 -> 104 */
+    { -11, 115 },{ -12, 63 }, { -2, 68 },    { -15, 84 },
+    { -13, 104 },{ -3, 70 },  { -8, 93 },    { -10, 90 },
+    { -30, 127 },{ -1, 74 },  { -6, 97 },    { -7, 91 },
+    { -20, 127 },{ -4, 56 },  { -5, 82 },    { -7, 76 },
+    { -22, 125 },
+
+    /* 105 -> 135 */
+    { -7, 93 },  { -11, 87 }, { -3, 77 },    { -5, 71 },
+    { -4, 63 },  { -4, 68 },  { -12, 84 },   { -7, 62 },
+    { -7, 65 },  { 8, 61 },   { 5, 56 },     { -2, 66 },
+    { 1, 64 },   { 0, 61 },   { -2, 78 },    { 1, 50 },
+    { 7, 52 },   { 10, 35 },  { 0, 44 },     { 11, 38 },
+    { 1, 45 },   { 0, 46 },   { 5, 44 },     { 31, 17 },
+    { 1, 51 },   { 7, 50 },   { 28, 19 },    { 16, 33 },
+    { 14, 62 },  { -13, 108 },{ -15, 100 },
+
+    /* 136 -> 165 */
+    { -13, 101 },{ -13, 91 }, { -12, 94 },   { -10, 88 },
+    { -16, 84 }, { -10, 86 }, { -7, 83 },    { -13, 87 },
+    { -19, 94 }, { 1, 70 },   { 0, 72 },     { -5, 74 },
+    { 18, 59 },  { -8, 102 }, { -15, 100 },  { 0, 95 },
+    { -4, 75 },  { 2, 72 },   { -11, 75 },   { -3, 71 },
+    { 15, 46 },  { -13, 69 }, { 0, 62 },     { 0, 65 },
+    { 21, 37 },  { -15, 72 }, { 9, 57 },     { 16, 54 },
+    { 0, 62 },   { 12, 72 },
+
+    /* 166 -> 196 */
+    { 24, 0 },   { 15, 9 },   { 8, 25 },     { 13, 18 },
+    { 15, 9 },   { 13, 19 },  { 10, 37 },    { 12, 18 },
+    { 6, 29 },   { 20, 33 },  { 15, 30 },    { 4, 45 },
+    { 1, 58 },   { 0, 62 },   { 7, 61 },     { 12, 38 },
+    { 11, 45 },  { 15, 39 },  { 11, 42 },    { 13, 44 },
+    { 16, 45 },  { 12, 41 },  { 10, 49 },    { 30, 34 },
+    { 18, 42 },  { 10, 55 },  { 17, 51 },    { 17, 46 },
+    { 0, 89 },   { 26, -19 }, { 22, -17 },
+
+    /* 197 -> 226 */
+    { 26, -17 }, { 30, -25 }, { 28, -20 },   { 33, -23 },
+    { 37, -27 }, { 33, -23 }, { 40, -28 },   { 38, -17 },
+    { 33, -11 }, { 40, -15 }, { 41, -6 },    { 38, 1 },
+    { 41, 17 },  { 30, -6 },  { 27, 3 },     { 26, 22 },
+    { 37, -16 }, { 35, -4 },  { 38, -8 },    { 38, -3 },
+    { 37, 3 },   { 38, 5 },   { 42, 0 },     { 35, 16 },
+    { 39, 22 },  { 14, 48 },  { 27, 37 },    { 21, 60 },
+    { 12, 68 },  { 2, 97 },
+
+    /* 227 -> 251 */
+    { -3, 71 },  { -6, 42 },  { -5, 50 },    { -3, 54 },
+    { -2, 62 },  { 0, 58 },   { 1, 63 },     { -2, 72 },
+    { -1, 74 },  { -9, 91 },  { -5, 67 },    { -5, 27 },
+    { -3, 39 },  { -2, 44 },  { 0, 46 },     { -16, 64 },
+    { -8, 68 },  { -10, 78 }, { -6, 77 },    { -10, 86 },
+    { -12, 92 }, { -15, 55 }, { -10, 60 },   { -6, 62 },
+    { -4, 65 },
+
+    /* 252 -> 275 */
+    { -12, 73 }, { -8, 76 },  { -7, 80 },    { -9, 88 },
+    { -17, 110 },{ -11, 97 }, { -20, 84 },   { -11, 79 },
+    { -6, 73 },  { -4, 74 },  { -13, 86 },   { -13, 96 },
+    { -11, 97 }, { -19, 117 },{ -8, 78 },    { -5, 33 },
+    { -4, 48 },  { -2, 53 },  { -3, 62 },    { -13, 71 },
+    { -10, 79 }, { -12, 86 }, { -13, 90 },   { -14, 97 },
+
+    /* 276 a bit special (not used, x264_cabac_encode_bypass is used instead) */
+    { 0, 0 },
+
+    /* 277 -> 307 */
+    { -6, 93 },  { -6, 84 },  { -8, 79 },    { 0, 66 },
+    { -1, 71 },  { 0, 62 },   { -2, 60 },    { -2, 59 },
+    { -5, 75 },  { -3, 62 },  { -4, 58 },    { -9, 66 },
+    { -1, 79 },  { 0, 71 },   { 3, 68 },     { 10, 44 },
+    { -7, 62 },  { 15, 36 },  { 14, 40 },    { 16, 27 },
+    { 12, 29 },  { 1, 44 },   { 20, 36 },    { 18, 32 },
+    { 5, 42 },   { 1, 48 },   { 10, 62 },    { 17, 46 },
+    { 9, 64 },   { -12, 104 },{ -11, 97 },
+
+    /* 308 -> 337 */
+    { -16, 96 }, { -7, 88 },  { -8, 85 },    { -7, 85 },
+    { -9, 85 },  { -13, 88 }, { 4, 66 },     { -3, 77 },
+    { -3, 76 },  { -6, 76 },  { 10, 58 },    { -1, 76 },
+    { -1, 83 },  { -7, 99 },  { -14, 95 },   { 2, 95 },
+    { 0, 76 },   { -5, 74 },  { 0, 70 },     { -11, 75 },
+    { 1, 68 },   { 0, 65 },   { -14, 73 },   { 3, 62 },
+    { 4, 62 },   { -1, 68 },  { -13, 75 },   { 11, 55 },
+    { 5, 64 },   { 12, 70 },
+
+    /* 338 -> 368 */
+    { 15, 6 },   { 6, 19 },   { 7, 16 },     { 12, 14 },
+    { 18, 13 },  { 13, 11 },  { 13, 15 },    { 15, 16 },
+    { 12, 23 },  { 13, 23 },  { 15, 20 },    { 14, 26 },
+    { 14, 44 },  { 17, 40 },  { 17, 47 },    { 24, 17 },
+    { 21, 21 },  { 25, 22 },  { 31, 27 },    { 22, 29 },
+    { 19, 35 },  { 14, 50 },  { 10, 57 },    { 7, 63 },
+    { -2, 77 },  { -4, 82 },  { -3, 94 },    { 9, 69 },
+    { -12, 109 },{ 36, -35 }, { 36, -34 },
+
+    /* 369 -> 398 */
+    { 32, -26 }, { 37, -30 }, { 44, -32 },   { 34, -18 },
+    { 34, -15 }, { 40, -15 }, { 33, -7 },    { 35, -5 },
+    { 33, 0 },   { 38, 2 },   { 33, 13 },    { 23, 35 },
+    { 13, 58 },  { 29, -3 },  { 26, 0 },     { 22, 30 },
+    { 31, -7 },  { 35, -15 }, { 34, -3 },    { 34, 3 },
+    { 36, -1 },  { 34, 5 },   { 32, 11 },    { 35, 5 },
+    { 34, 12 },  { 39, 11 },  { 30, 29 },    { 34, 26 },
+    { 29, 39 },  { 19, 66 },
+
+    /* 399 -> 435 */
+    {  31,  21 }, {  31,  31 }, {  25,  50 },
+    { -17, 120 }, { -20, 112 }, { -18, 114 }, { -11,  85 },
+    { -15,  92 }, { -14,  89 }, { -26,  71 }, { -15,  81 },
+    { -14,  80 }, {   0,  68 }, { -14,  70 }, { -24,  56 },
+    { -23,  68 }, { -24,  50 }, { -11,  74 }, {  23, -13 },
+    {  26, -13 }, {  40, -15 }, {  49, -14 }, {  44,   3 },
+    {  45,   6 }, {  44,  34 }, {  33,  54 }, {  19,  82 },
+    {  -3,  75 }, {  -1,  23 }, {   1,  34 }, {   1,  43 },
+    {   0,  54 }, {  -2,  55 }, {   0,  61 }, {   1,  64 },
+    {   0,  68 }, {  -9,  92 },
+
+    /* 436 -> 459 */
+    { -14, 106 }, { -13,  97 }, { -15,  90 }, { -12,  90 },
+    { -18,  88 }, { -10,  73 }, {  -9,  79 }, { -14,  86 },
+    { -10,  73 }, { -10,  70 }, { -10,  69 }, {  -5,  66 },
+    {  -9,  64 }, {  -5,  58 }, {   2,  59 }, {  21, -10 },
+    {  24, -11 }, {  28,  -8 }, {  28,  -1 }, {  29,   3 },
+    {  29,   9 }, {  35,  20 }, {  29,  36 }, {  14,  67 },
+
+    /* 460 -> 1024 */
+    { -17, 123 }, { -12, 115 }, { -16, 122 }, { -11, 115 },
+    { -12,  63 }, {  -2,  68 }, { -15,  84 }, { -13, 104 },
+    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 },
+    { -17, 123 }, { -12, 115 }, { -16, 122 }, { -11, 115 },
+    { -12,  63 }, {  -2,  68 }, { -15,  84 }, { -13, 104 },
+    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 },
+    {  -7,  93 }, { -11,  87 }, {  -3,  77 }, {  -5,  71 },
+    {  -4,  63 }, {  -4,  68 }, { -12,  84 }, {  -7,  62 },
+    {  -7,  65 }, {   8,  61 }, {   5,  56 }, {  -2,  66 },
+    {   1,  64 }, {   0,  61 }, {  -2,  78 }, {   1,  50 },
+    {   7,  52 }, {  10,  35 }, {   0,  44 }, {  11,  38 },
+    {   1,  45 }, {   0,  46 }, {   5,  44 }, {  31,  17 },
+    {   1,  51 }, {   7,  50 }, {  28,  19 }, {  16,  33 },
+    {  14,  62 }, { -13, 108 }, { -15, 100 }, { -13, 101 },
+    { -13,  91 }, { -12,  94 }, { -10,  88 }, { -16,  84 },
+    { -10,  86 }, {  -7,  83 }, { -13,  87 }, { -19,  94 },
+    {   1,  70 }, {   0,  72 }, {  -5,  74 }, {  18,  59 },
+    {  -7,  93 }, { -11,  87 }, {  -3,  77 }, {  -5,  71 },
+    {  -4,  63 }, {  -4,  68 }, { -12,  84 }, {  -7,  62 },
+    {  -7,  65 }, {   8,  61 }, {   5,  56 }, {  -2,  66 },
+    {   1,  64 }, {   0,  61 }, {  -2,  78 }, {   1,  50 },
+    {   7,  52 }, {  10,  35 }, {   0,  44 }, {  11,  38 },
+    {   1,  45 }, {   0,  46 }, {   5,  44 }, {  31,  17 },
+    {   1,  51 }, {   7,  50 }, {  28,  19 }, {  16,  33 },
+    {  14,  62 }, { -13, 108 }, { -15, 100 }, { -13, 101 },
+    { -13,  91 }, { -12,  94 }, { -10,  88 }, { -16,  84 },
+    { -10,  86 }, {  -7,  83 }, { -13,  87 }, { -19,  94 },
+    {   1,  70 }, {   0,  72 }, {  -5,  74 }, {  18,  59 },
+    {  24,   0 }, {  15,   9 }, {   8,  25 }, {  13,  18 },
+    {  15,   9 }, {  13,  19 }, {  10,  37 }, {  12,  18 },
+    {   6,  29 }, {  20,  33 }, {  15,  30 }, {   4,  45 },
+    {   1,  58 }, {   0,  62 }, {   7,  61 }, {  12,  38 },
+    {  11,  45 }, {  15,  39 }, {  11,  42 }, {  13,  44 },
+    {  16,  45 }, {  12,  41 }, {  10,  49 }, {  30,  34 },
+    {  18,  42 }, {  10,  55 }, {  17,  51 }, {  17,  46 },
+    {   0,  89 }, {  26, -19 }, {  22, -17 }, {  26, -17 },
+    {  30, -25 }, {  28, -20 }, {  33, -23 }, {  37, -27 },
+    {  33, -23 }, {  40, -28 }, {  38, -17 }, {  33, -11 },
+    {  40, -15 }, {  41,  -6 }, {  38,   1 }, {  41,  17 },
+    {  24,   0 }, {  15,   9 }, {   8,  25 }, {  13,  18 },
+    {  15,   9 }, {  13,  19 }, {  10,  37 }, {  12,  18 },
+    {   6,  29 }, {  20,  33 }, {  15,  30 }, {   4,  45 },
+    {   1,  58 }, {   0,  62 }, {   7,  61 }, {  12,  38 },
+    {  11,  45 }, {  15,  39 }, {  11,  42 }, {  13,  44 },
+    {  16,  45 }, {  12,  41 }, {  10,  49 }, {  30,  34 },
+    {  18,  42 }, {  10,  55 }, {  17,  51 }, {  17,  46 },
+    {   0,  89 }, {  26, -19 }, {  22, -17 }, {  26, -17 },
+    {  30, -25 }, {  28, -20 }, {  33, -23 }, {  37, -27 },
+    {  33, -23 }, {  40, -28 }, {  38, -17 }, {  33, -11 },
+    {  40, -15 }, {  41,  -6 }, {  38,   1 }, {  41,  17 },
+    { -17, 120 }, { -20, 112 }, { -18, 114 }, { -11,  85 },
+    { -15,  92 }, { -14,  89 }, { -26,  71 }, { -15,  81 },
+    { -14,  80 }, {   0,  68 }, { -14,  70 }, { -24,  56 },
+    { -23,  68 }, { -24,  50 }, { -11,  74 }, { -14, 106 },
+    { -13,  97 }, { -15,  90 }, { -12,  90 }, { -18,  88 },
+    { -10,  73 }, {  -9,  79 }, { -14,  86 }, { -10,  73 },
+    { -10,  70 }, { -10,  69 }, {  -5,  66 }, {  -9,  64 },
+    {  -5,  58 }, {   2,  59 }, {  23, -13 }, {  26, -13 },
+    {  40, -15 }, {  49, -14 }, {  44,   3 }, {  45,   6 },
+    {  44,  34 }, {  33,  54 }, {  19,  82 }, {  21, -10 },
+    {  24, -11 }, {  28,  -8 }, {  28,  -1 }, {  29,   3 },
+    {  29,   9 }, {  35,  20 }, {  29,  36 }, {  14,  67 },
+    {  -3,  75 }, {  -1,  23 }, {   1,  34 }, {   1,  43 },
+    {   0,  54 }, {  -2,  55 }, {   0,  61 }, {   1,  64 },
+    {   0,  68 }, {  -9,  92 }, { -17, 120 }, { -20, 112 },
+    { -18, 114 }, { -11,  85 }, { -15,  92 }, { -14,  89 },
+    { -26,  71 }, { -15,  81 }, { -14,  80 }, {   0,  68 },
+    { -14,  70 }, { -24,  56 }, { -23,  68 }, { -24,  50 },
+    { -11,  74 }, { -14, 106 }, { -13,  97 }, { -15,  90 },
+    { -12,  90 }, { -18,  88 }, { -10,  73 }, {  -9,  79 },
+    { -14,  86 }, { -10,  73 }, { -10,  70 }, { -10,  69 },
+    {  -5,  66 }, {  -9,  64 }, {  -5,  58 }, {   2,  59 },
+    {  23, -13 }, {  26, -13 }, {  40, -15 }, {  49, -14 },
+    {  44,   3 }, {  45,   6 }, {  44,  34 }, {  33,  54 },
+    {  19,  82 }, {  21, -10 }, {  24, -11 }, {  28,  -8 },
+    {  28,  -1 }, {  29,   3 }, {  29,   9 }, {  35,  20 },
+    {  29,  36 }, {  14,  67 }, {  -3,  75 }, {  -1,  23 },
+    {   1,  34 }, {   1,  43 }, {   0,  54 }, {  -2,  55 },
+    {   0,  61 }, {   1,  64 }, {   0,  68 }, {  -9,  92 },
+    {  -6,  93 }, {  -6,  84 }, {  -8,  79 }, {   0,  66 },
+    {  -1,  71 }, {   0,  62 }, {  -2,  60 }, {  -2,  59 },
+    {  -5,  75 }, {  -3,  62 }, {  -4,  58 }, {  -9,  66 },
+    {  -1,  79 }, {   0,  71 }, {   3,  68 }, {  10,  44 },
+    {  -7,  62 }, {  15,  36 }, {  14,  40 }, {  16,  27 },
+    {  12,  29 }, {   1,  44 }, {  20,  36 }, {  18,  32 },
+    {   5,  42 }, {   1,  48 }, {  10,  62 }, {  17,  46 },
+    {   9,  64 }, { -12, 104 }, { -11,  97 }, { -16,  96 },
+    {  -7,  88 }, {  -8,  85 }, {  -7,  85 }, {  -9,  85 },
+    { -13,  88 }, {   4,  66 }, {  -3,  77 }, {  -3,  76 },
+    {  -6,  76 }, {  10,  58 }, {  -1,  76 }, {  -1,  83 },
+    {  -6,  93 }, {  -6,  84 }, {  -8,  79 }, {   0,  66 },
+    {  -1,  71 }, {   0,  62 }, {  -2,  60 }, {  -2,  59 },
+    {  -5,  75 }, {  -3,  62 }, {  -4,  58 }, {  -9,  66 },
+    {  -1,  79 }, {   0,  71 }, {   3,  68 }, {  10,  44 },
+    {  -7,  62 }, {  15,  36 }, {  14,  40 }, {  16,  27 },
+    {  12,  29 }, {   1,  44 }, {  20,  36 }, {  18,  32 },
+    {   5,  42 }, {   1,  48 }, {  10,  62 }, {  17,  46 },
+    {   9,  64 }, { -12, 104 }, { -11,  97 }, { -16,  96 },
+    {  -7,  88 }, {  -8,  85 }, {  -7,  85 }, {  -9,  85 },
+    { -13,  88 }, {   4,  66 }, {  -3,  77 }, {  -3,  76 },
+    {  -6,  76 }, {  10,  58 }, {  -1,  76 }, {  -1,  83 },
+    {  15,   6 }, {   6,  19 }, {   7,  16 }, {  12,  14 },
+    {  18,  13 }, {  13,  11 }, {  13,  15 }, {  15,  16 },
+    {  12,  23 }, {  13,  23 }, {  15,  20 }, {  14,  26 },
+    {  14,  44 }, {  17,  40 }, {  17,  47 }, {  24,  17 },
+    {  21,  21 }, {  25,  22 }, {  31,  27 }, {  22,  29 },
+    {  19,  35 }, {  14,  50 }, {  10,  57 }, {   7,  63 },
+    {  -2,  77 }, {  -4,  82 }, {  -3,  94 }, {   9,  69 },
+    { -12, 109 }, {  36, -35 }, {  36, -34 }, {  32, -26 },
+    {  37, -30 }, {  44, -32 }, {  34, -18 }, {  34, -15 },
+    {  40, -15 }, {  33,  -7 }, {  35,  -5 }, {  33,   0 },
+    {  38,   2 }, {  33,  13 }, {  23,  35 }, {  13,  58 },
+    {  15,   6 }, {   6,  19 }, {   7,  16 }, {  12,  14 },
+    {  18,  13 }, {  13,  11 }, {  13,  15 }, {  15,  16 },
+    {  12,  23 }, {  13,  23 }, {  15,  20 }, {  14,  26 },
+    {  14,  44 }, {  17,  40 }, {  17,  47 }, {  24,  17 },
+    {  21,  21 }, {  25,  22 }, {  31,  27 }, {  22,  29 },
+    {  19,  35 }, {  14,  50 }, {  10,  57 }, {   7,  63 },
+    {  -2,  77 }, {  -4,  82 }, {  -3,  94 }, {   9,  69 },
+    { -12, 109 }, {  36, -35 }, {  36, -34 }, {  32, -26 },
+    {  37, -30 }, {  44, -32 }, {  34, -18 }, {  34, -15 },
+    {  40, -15 }, {  33,  -7 }, {  35,  -5 }, {  33,   0 },
+    {  38,   2 }, {  33,  13 }, {  23,  35 }, {  13,  58 },
+    {  -3,  71 }, {  -6,  42 }, {  -5,  50 }, {  -3,  54 },
+    {  -2,  62 }, {   0,  58 }, {   1,  63 }, {  -2,  72 },
+    {  -1,  74 }, {  -9,  91 }, {  -5,  67 }, {  -5,  27 },
+    {  -3,  39 }, {  -2,  44 }, {   0,  46 }, { -16,  64 },
+    {  -8,  68 }, { -10,  78 }, {  -6,  77 }, { -10,  86 },
+    { -12,  92 }, { -15,  55 }, { -10,  60 }, {  -6,  62 },
+    {  -4,  65 }, { -12,  73 }, {  -8,  76 }, {  -7,  80 },
+    {  -9,  88 }, { -17, 110 }, {  -3,  71 }, {  -6,  42 },
+    {  -5,  50 }, {  -3,  54 }, {  -2,  62 }, {   0,  58 },
+    {   1,  63 }, {  -2,  72 }, {  -1,  74 }, {  -9,  91 },
+    {  -5,  67 }, {  -5,  27 }, {  -3,  39 }, {  -2,  44 },
+    {   0,  46 }, { -16,  64 }, {  -8,  68 }, { -10,  78 },
+    {  -6,  77 }, { -10,  86 }, { -12,  92 }, { -15,  55 },
+    { -10,  60 }, {  -6,  62 }, {  -4,  65 }, { -12,  73 },
+    {  -8,  76 }, {  -7,  80 }, {  -9,  88 }, { -17, 110 },
+    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 },
+    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 },
+    {  -3,  70 }, {  -8,  93 }, { -10,  90 }, { -30, 127 }
+};
+
+const int8_t x264_cabac_context_init_PB[3][1024][2] =
+{
+    /* i_cabac_init_idc == 0 */
+    {
+        /* 0 - 10 */
+        {  20, -15 }, {   2,  54 }, {   3,  74 }, {  20, -15 },
+        {   2,  54 }, {   3,  74 }, { -28, 127 }, { -23, 104 },
+        {  -6,  53 }, {  -1,  54 }, {   7,  51 },
+
+        /* 11 - 23 */
+        {  23,  33 }, {  23,   2 }, {  21,   0 }, {   1,   9 },
+        {   0,  49 }, { -37, 118 }, {   5,  57 }, { -13,  78 },
+        { -11,  65 }, {   1,  62 }, {  12,  49 }, {  -4,  73 },
+        {  17,  50 },
+
+        /* 24 - 39 */
+        {  18,  64 }, {   9,  43 }, {  29,   0 }, {  26,  67 },
+        {  16,  90 }, {   9, 104 }, { -46, 127 }, { -20, 104 },
+        {   1,  67 }, { -13,  78 }, { -11,  65 }, {   1,  62 },
+        {  -6,  86 }, { -17,  95 }, {  -6,  61 }, {   9,  45 },
+
+        /* 40 - 53 */
+        {  -3,  69 }, {  -6,  81 }, { -11,  96 }, {   6,  55 },
+        {   7,  67 }, {  -5,  86 }, {   2,  88 }, {   0,  58 },
+        {  -3,  76 }, { -10,  94 }, {   5,  54 }, {   4,  69 },
+        {  -3,  81 }, {   0,  88 },
+
+        /* 54 - 59 */
+        {  -7,  67 }, {  -5,  74 }, {  -4,  74 }, {  -5,  80 },
+        {  -7,  72 }, {   1,  58 },
+
+        /* 60 - 69 */
+        {   0,  41 }, {   0,  63 }, {   0,  63 }, { 0, 63 },
+        {  -9,  83 }, {   4,  86 }, {   0,  97 }, { -7, 72 },
+        {  13,  41 }, {   3,  62 },
+
+        /* 70 - 87 */
+        {   0,  45 }, {  -4,  78 }, {  -3,  96 }, { -27,  126 },
+        { -28,  98 }, { -25, 101 }, { -23,  67 }, { -28,  82 },
+        { -20,  94 }, { -16,  83 }, { -22, 110 }, { -21,  91 },
+        { -18, 102 }, { -13,  93 }, { -29, 127 }, {  -7,  92 },
+        {  -5,  89 }, {  -7,  96 }, { -13, 108 }, {  -3,  46 },
+        {  -1,  65 }, {  -1,  57 }, {  -9,  93 }, {  -3,  74 },
+        {  -9,  92 }, {  -8,  87 }, { -23, 126 }, {   5,  54 },
+        {   6,  60 }, {   6,  59 }, {   6,  69 }, {  -1,  48 },
+        {   0,  68 }, {  -4,  69 }, {  -8,  88 },
+
+        /* 105 -> 165 */
+        {  -2,  85 }, {  -6,  78 }, {  -1,  75 }, {  -7,  77 },
+        {   2,  54 }, {   5,  50 }, {  -3,  68 }, {   1,  50 },
+        {   6,  42 }, {  -4,  81 }, {   1,  63 }, {  -4,  70 },
+        {   0,  67 }, {   2,  57 }, {  -2,  76 }, {  11,  35 },
+        {   4,  64 }, {   1,  61 }, {  11,  35 }, {  18,  25 },
+        {  12,  24 }, {  13,  29 }, {  13,  36 }, { -10,  93 },
+        {  -7,  73 }, {  -2,  73 }, {  13,  46 }, {   9,  49 },
+        {  -7, 100 }, {   9,  53 }, {   2,  53 }, {   5,  53 },
+        {  -2,  61 }, {   0,  56 }, {   0,  56 }, { -13,  63 },
+        {  -5,  60 }, {  -1,  62 }, {   4,  57 }, {  -6,  69 },
+        {   4,  57 }, {  14,  39 }, {   4,  51 }, {  13,  68 },
+        {   3,  64 }, {   1,  61 }, {   9,  63 }, {   7,  50 },
+        {  16,  39 }, {   5,  44 }, {   4,  52 }, {  11,  48 },
+        {  -5,  60 }, {  -1,  59 }, {   0,  59 }, {  22,  33 },
+        {   5,  44 }, {  14,  43 }, {  -1,  78 }, {   0,  60 },
+        {   9,  69 },
+
+        /* 166 - 226 */
+        {  11,  28 }, {   2,  40 }, {   3,  44 }, {   0,  49 },
+        {   0,  46 }, {   2,  44 }, {   2,  51 }, {   0,  47 },
+        {   4,  39 }, {   2,  62 }, {   6,  46 }, {   0,  54 },
+        {   3,  54 }, {   2,  58 }, {   4,  63 }, {   6,  51 },
+        {   6,  57 }, {   7,  53 }, {   6,  52 }, {   6,  55 },
+        {  11,  45 }, {  14,  36 }, {   8,  53 }, {  -1,  82 },
+        {   7,  55 }, {  -3,  78 }, {  15,  46 }, {  22,  31 },
+        {  -1,  84 }, {  25,   7 }, {  30,  -7 }, {  28,   3 },
+        {  28,   4 }, {  32,   0 }, {  34,  -1 }, {  30,   6 },
+        {  30,   6 }, {  32,   9 }, {  31,  19 }, {  26,  27 },
+        {  26,  30 }, {  37,  20 }, {  28,  34 }, {  17,  70 },
+        {   1,  67 }, {   5,  59 }, {   9,  67 }, {  16,  30 },
+        {  18,  32 }, {  18,  35 }, {  22,  29 }, {  24,  31 },
+        {  23,  38 }, {  18,  43 }, {  20,  41 }, {  11,  63 },
+        {   9,  59 }, {   9,  64 }, {  -1,  94 }, {  -2,  89 },
+        {  -9, 108 },
+
+        /* 227 - 275 */
+        {  -6,  76 }, {  -2,  44 }, {   0,  45 }, {   0,  52 },
+        {  -3,  64 }, {  -2,  59 }, {  -4,  70 }, {  -4,  75 },
+        {  -8,  82 }, { -17, 102 }, {  -9,  77 }, {   3,  24 },
+        {   0,  42 }, {   0,  48 }, {   0,  55 }, {  -6,  59 },
+        {  -7,  71 }, { -12,  83 }, { -11,  87 }, { -30, 119 },
+        {   1,  58 }, {  -3,  29 }, {  -1,  36 }, {   1,  38 },
+        {   2,  43 }, {  -6,  55 }, {   0,  58 }, {   0,  64 },
+        {  -3,  74 }, { -10,  90 }, {   0,  70 }, {  -4,  29 },
+        {   5,  31 }, {   7,  42 }, {   1,  59 }, {  -2,  58 },
+        {  -3,  72 }, {  -3,  81 }, { -11,  97 }, {   0,  58 },
+        {   8,   5 }, {  10,  14 }, {  14,  18 }, {  13,  27 },
+        {   2,  40 }, {   0,  58 }, {  -3,  70 }, {  -6,  79 },
+        {  -8,  85 },
+
+        /* 276 a bit special (not used, x264_cabac_encode_bypass is used instead) */
+        { 0, 0 },
+
+        /* 277 - 337 */
+        { -13, 106 }, { -16, 106 }, { -10,  87 }, { -21, 114 },
+        { -18, 110 }, { -14,  98 }, { -22, 110 }, { -21, 106 },
+        { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 },
+        { -10,  96 }, { -12,  95 }, {  -5,  91 }, {  -9,  93 },
+        { -22,  94 }, {  -5,  86 }, {   9,  67 }, {  -4,  80 },
+        { -10,  85 }, {  -1,  70 }, {   7,  60 }, {   9,  58 },
+        {   5,  61 }, {  12,  50 }, {  15,  50 }, {  18,  49 },
+        {  17,  54 }, {  10,  41 }, {   7,  46 }, {  -1,  51 },
+        {   7,  49 }, {   8,  52 }, {   9,  41 }, {   6,  47 },
+        {   2,  55 }, {  13,  41 }, {  10,  44 }, {   6,  50 },
+        {   5,  53 }, {  13,  49 }, {   4,  63 }, {   6,  64 },
+        {  -2,  69 }, {  -2,  59 }, {   6,  70 }, {  10,  44 },
+        {   9,  31 }, {  12,  43 }, {   3,  53 }, {  14,  34 },
+        {  10,  38 }, {  -3,  52 }, {  13,  40 }, {  17,  32 },
+        {   7,  44 }, {   7,  38 }, {  13,  50 }, {  10,  57 },
+        {  26,  43 },
+
+        /* 338 - 398 */
+        {  14,  11 }, {  11,  14 }, {   9,  11 }, {  18,  11 },
+        {  21,   9 }, {  23,  -2 }, {  32, -15 }, {  32, -15 },
+        {  34, -21 }, {  39, -23 }, {  42, -33 }, {  41, -31 },
+        {  46, -28 }, {  38, -12 }, {  21,  29 }, {  45, -24 },
+        {  53, -45 }, {  48, -26 }, {  65, -43 }, {  43, -19 },
+        {  39, -10 }, {  30,   9 }, {  18,  26 }, {  20,  27 },
+        {   0,  57 }, { -14,  82 }, {  -5,  75 }, { -19,  97 },
+        { -35, 125 }, {  27,   0 }, {  28,   0 }, {  31,  -4 },
+        {  27,   6 }, {  34,   8 }, {  30,  10 }, {  24,  22 },
+        {  33,  19 }, {  22,  32 }, {  26,  31 }, {  21,  41 },
+        {  26,  44 }, {  23,  47 }, {  16,  65 }, {  14,  71 },
+        {   8,  60 }, {   6,  63 }, {  17,  65 }, {  21,  24 },
+        {  23,  20 }, {  26,  23 }, {  27,  32 }, {  28,  23 },
+        {  28,  24 }, {  23,  40 }, {  24,  32 }, {  28,  29 },
+        {  23,  42 }, {  19,  57 }, {  22,  53 }, {  22,  61 },
+        {  11,  86 },
+
+        /* 399 -> 435 */
+        {  12,  40 }, {  11,  51 }, {  14,  59 },
+        {  -4,  79 }, {  -7,  71 }, {  -5,  69 }, {  -9,  70 },
+        {  -8,  66 }, { -10,  68 }, { -19,  73 }, { -12,  69 },
+        { -16,  70 }, { -15,  67 }, { -20,  62 }, { -19,  70 },
+        { -16,  66 }, { -22,  65 }, { -20,  63 }, {   9,  -2 },
+        {  26,  -9 }, {  33,  -9 }, {  39,  -7 }, {  41,  -2 },
+        {  45,   3 }, {  49,   9 }, {  45,  27 }, {  36,  59 },
+        {  -6,  66 }, {  -7,  35 }, {  -7,  42 }, {  -8,  45 },
+        {  -5,  48 }, { -12,  56 }, {  -6,  60 }, {  -5,  62 },
+        {  -8,  66 }, {  -8,  76 },
+
+        /* 436 -> 459 */
+        {  -5,  85 }, {  -6,  81 }, { -10,  77 }, {  -7,  81 },
+        { -17,  80 }, { -18,  73 }, {  -4,  74 }, { -10,  83 },
+        {  -9,  71 }, {  -9,  67 }, {  -1,  61 }, {  -8,  66 },
+        { -14,  66 }, {   0,  59 }, {   2,  59 }, {  21, -13 },
+        {  33, -14 }, {  39,  -7 }, {  46,  -2 }, {  51,   2 },
+        {  60,   6 }, {  61,  17 }, {  55,  34 }, {  42,  62 },
+
+        /* 460 - 1024 */
+        {  -7,  92 }, {  -5,  89 }, {  -7,  96 }, { -13, 108 },
+        {  -3,  46 }, {  -1,  65 }, {  -1,  57 }, {  -9,  93 },
+        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 },
+        {  -7,  92 }, {  -5,  89 }, {  -7,  96 }, { -13, 108 },
+        {  -3,  46 }, {  -1,  65 }, {  -1,  57 }, {  -9,  93 },
+        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 },
+        {  -2,  85 }, {  -6,  78 }, {  -1,  75 }, {  -7,  77 },
+        {   2,  54 }, {   5,  50 }, {  -3,  68 }, {   1,  50 },
+        {   6,  42 }, {  -4,  81 }, {   1,  63 }, {  -4,  70 },
+        {   0,  67 }, {   2,  57 }, {  -2,  76 }, {  11,  35 },
+        {   4,  64 }, {   1,  61 }, {  11,  35 }, {  18,  25 },
+        {  12,  24 }, {  13,  29 }, {  13,  36 }, { -10,  93 },
+        {  -7,  73 }, {  -2,  73 }, {  13,  46 }, {   9,  49 },
+        {  -7, 100 }, {   9,  53 }, {   2,  53 }, {   5,  53 },
+        {  -2,  61 }, {   0,  56 }, {   0,  56 }, { -13,  63 },
+        {  -5,  60 }, {  -1,  62 }, {   4,  57 }, {  -6,  69 },
+        {   4,  57 }, {  14,  39 }, {   4,  51 }, {  13,  68 },
+        {  -2,  85 }, {  -6,  78 }, {  -1,  75 }, {  -7,  77 },
+        {   2,  54 }, {   5,  50 }, {  -3,  68 }, {   1,  50 },
+        {   6,  42 }, {  -4,  81 }, {   1,  63 }, {  -4,  70 },
+        {   0,  67 }, {   2,  57 }, {  -2,  76 }, {  11,  35 },
+        {   4,  64 }, {   1,  61 }, {  11,  35 }, {  18,  25 },
+        {  12,  24 }, {  13,  29 }, {  13,  36 }, { -10,  93 },
+        {  -7,  73 }, {  -2,  73 }, {  13,  46 }, {   9,  49 },
+        {  -7, 100 }, {   9,  53 }, {   2,  53 }, {   5,  53 },
+        {  -2,  61 }, {   0,  56 }, {   0,  56 }, { -13,  63 },
+        {  -5,  60 }, {  -1,  62 }, {   4,  57 }, {  -6,  69 },
+        {   4,  57 }, {  14,  39 }, {   4,  51 }, {  13,  68 },
+        {  11,  28 }, {   2,  40 }, {   3,  44 }, {   0,  49 },
+        {   0,  46 }, {   2,  44 }, {   2,  51 }, {   0,  47 },
+        {   4,  39 }, {   2,  62 }, {   6,  46 }, {   0,  54 },
+        {   3,  54 }, {   2,  58 }, {   4,  63 }, {   6,  51 },
+        {   6,  57 }, {   7,  53 }, {   6,  52 }, {   6,  55 },
+        {  11,  45 }, {  14,  36 }, {   8,  53 }, {  -1,  82 },
+        {   7,  55 }, {  -3,  78 }, {  15,  46 }, {  22,  31 },
+        {  -1,  84 }, {  25,   7 }, {  30,  -7 }, {  28,   3 },
+        {  28,   4 }, {  32,   0 }, {  34,  -1 }, {  30,   6 },
+        {  30,   6 }, {  32,   9 }, {  31,  19 }, {  26,  27 },
+        {  26,  30 }, {  37,  20 }, {  28,  34 }, {  17,  70 },
+        {  11,  28 }, {   2,  40 }, {   3,  44 }, {   0,  49 },
+        {   0,  46 }, {   2,  44 }, {   2,  51 }, {   0,  47 },
+        {   4,  39 }, {   2,  62 }, {   6,  46 }, {   0,  54 },
+        {   3,  54 }, {   2,  58 }, {   4,  63 }, {   6,  51 },
+        {   6,  57 }, {   7,  53 }, {   6,  52 }, {   6,  55 },
+        {  11,  45 }, {  14,  36 }, {   8,  53 }, {  -1,  82 },
+        {   7,  55 }, {  -3,  78 }, {  15,  46 }, {  22,  31 },
+        {  -1,  84 }, {  25,   7 }, {  30,  -7 }, {  28,   3 },
+        {  28,   4 }, {  32,   0 }, {  34,  -1 }, {  30,   6 },
+        {  30,   6 }, {  32,   9 }, {  31,  19 }, {  26,  27 },
+        {  26,  30 }, {  37,  20 }, {  28,  34 }, {  17,  70 },
+        {  -4,  79 }, {  -7,  71 }, {  -5,  69 }, {  -9,  70 },
+        {  -8,  66 }, { -10,  68 }, { -19,  73 }, { -12,  69 },
+        { -16,  70 }, { -15,  67 }, { -20,  62 }, { -19,  70 },
+        { -16,  66 }, { -22,  65 }, { -20,  63 }, {  -5,  85 },
+        {  -6,  81 }, { -10,  77 }, {  -7,  81 }, { -17,  80 },
+        { -18,  73 }, {  -4,  74 }, { -10,  83 }, {  -9,  71 },
+        {  -9,  67 }, {  -1,  61 }, {  -8,  66 }, { -14,  66 },
+        {   0,  59 }, {   2,  59 }, {   9,  -2 }, {  26,  -9 },
+        {  33,  -9 }, {  39,  -7 }, {  41,  -2 }, {  45,   3 },
+        {  49,   9 }, {  45,  27 }, {  36,  59 }, {  21, -13 },
+        {  33, -14 }, {  39,  -7 }, {  46,  -2 }, {  51,   2 },
+        {  60,   6 }, {  61,  17 }, {  55,  34 }, {  42,  62 },
+        {  -6,  66 }, {  -7,  35 }, {  -7,  42 }, {  -8,  45 },
+        {  -5,  48 }, { -12,  56 }, {  -6,  60 }, {  -5,  62 },
+        {  -8,  66 }, {  -8,  76 }, {  -4,  79 }, {  -7,  71 },
+        {  -5,  69 }, {  -9,  70 }, {  -8,  66 }, { -10,  68 },
+        { -19,  73 }, { -12,  69 }, { -16,  70 }, { -15,  67 },
+        { -20,  62 }, { -19,  70 }, { -16,  66 }, { -22,  65 },
+        { -20,  63 }, {  -5,  85 }, {  -6,  81 }, { -10,  77 },
+        {  -7,  81 }, { -17,  80 }, { -18,  73 }, {  -4,  74 },
+        { -10,  83 }, {  -9,  71 }, {  -9,  67 }, {  -1,  61 },
+        {  -8,  66 }, { -14,  66 }, {   0,  59 }, {   2,  59 },
+        {   9,  -2 }, {  26,  -9 }, {  33,  -9 }, {  39,  -7 },
+        {  41,  -2 }, {  45,   3 }, {  49,   9 }, {  45,  27 },
+        {  36,  59 }, {  21, -13 }, {  33, -14 }, {  39,  -7 },
+        {  46,  -2 }, {  51,   2 }, {  60,   6 }, {  61,  17 },
+        {  55,  34 }, {  42,  62 }, {  -6,  66 }, {  -7,  35 },
+        {  -7,  42 }, {  -8,  45 }, {  -5,  48 }, { -12,  56 },
+        {  -6,  60 }, {  -5,  62 }, {  -8,  66 }, {  -8,  76 },
+        { -13, 106 }, { -16, 106 }, { -10,  87 }, { -21, 114 },
+        { -18, 110 }, { -14,  98 }, { -22, 110 }, { -21, 106 },
+        { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 },
+        { -10,  96 }, { -12,  95 }, {  -5,  91 }, {  -9,  93 },
+        { -22,  94 }, {  -5,  86 }, {   9,  67 }, {  -4,  80 },
+        { -10,  85 }, {  -1,  70 }, {   7,  60 }, {   9,  58 },
+        {   5,  61 }, {  12,  50 }, {  15,  50 }, {  18,  49 },
+        {  17,  54 }, {  10,  41 }, {   7,  46 }, {  -1,  51 },
+        {   7,  49 }, {   8,  52 }, {   9,  41 }, {   6,  47 },
+        {   2,  55 }, {  13,  41 }, {  10,  44 }, {   6,  50 },
+        {   5,  53 }, {  13,  49 }, {   4,  63 }, {   6,  64 },
+        { -13, 106 }, { -16, 106 }, { -10,  87 }, { -21, 114 },
+        { -18, 110 }, { -14,  98 }, { -22, 110 }, { -21, 106 },
+        { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 },
+        { -10,  96 }, { -12,  95 }, {  -5,  91 }, {  -9,  93 },
+        { -22,  94 }, {  -5,  86 }, {   9,  67 }, {  -4,  80 },
+        { -10,  85 }, {  -1,  70 }, {   7,  60 }, {   9,  58 },
+        {   5,  61 }, {  12,  50 }, {  15,  50 }, {  18,  49 },
+        {  17,  54 }, {  10,  41 }, {   7,  46 }, {  -1,  51 },
+        {   7,  49 }, {   8,  52 }, {   9,  41 }, {   6,  47 },
+        {   2,  55 }, {  13,  41 }, {  10,  44 }, {   6,  50 },
+        {   5,  53 }, {  13,  49 }, {   4,  63 }, {   6,  64 },
+        {  14,  11 }, {  11,  14 }, {   9,  11 }, {  18,  11 },
+        {  21,   9 }, {  23,  -2 }, {  32, -15 }, {  32, -15 },
+        {  34, -21 }, {  39, -23 }, {  42, -33 }, {  41, -31 },
+        {  46, -28 }, {  38, -12 }, {  21,  29 }, {  45, -24 },
+        {  53, -45 }, {  48, -26 }, {  65, -43 }, {  43, -19 },
+        {  39, -10 }, {  30,   9 }, {  18,  26 }, {  20,  27 },
+        {   0,  57 }, { -14,  82 }, {  -5,  75 }, { -19,  97 },
+        { -35, 125 }, {  27,   0 }, {  28,   0 }, {  31,  -4 },
+        {  27,   6 }, {  34,   8 }, {  30,  10 }, {  24,  22 },
+        {  33,  19 }, {  22,  32 }, {  26,  31 }, {  21,  41 },
+        {  26,  44 }, {  23,  47 }, {  16,  65 }, {  14,  71 },
+        {  14,  11 }, {  11,  14 }, {   9,  11 }, {  18,  11 },
+        {  21,   9 }, {  23,  -2 }, {  32, -15 }, {  32, -15 },
+        {  34, -21 }, {  39, -23 }, {  42, -33 }, {  41, -31 },
+        {  46, -28 }, {  38, -12 }, {  21,  29 }, {  45, -24 },
+        {  53, -45 }, {  48, -26 }, {  65, -43 }, {  43, -19 },
+        {  39, -10 }, {  30,   9 }, {  18,  26 }, {  20,  27 },
+        {   0,  57 }, { -14,  82 }, {  -5,  75 }, { -19,  97 },
+        { -35, 125 }, {  27,   0 }, {  28,   0 }, {  31,  -4 },
+        {  27,   6 }, {  34,   8 }, {  30,  10 }, {  24,  22 },
+        {  33,  19 }, {  22,  32 }, {  26,  31 }, {  21,  41 },
+        {  26,  44 }, {  23,  47 }, {  16,  65 }, {  14,  71 },
+        {  -6,  76 }, {  -2,  44 }, {   0,  45 }, {   0,  52 },
+        {  -3,  64 }, {  -2,  59 }, {  -4,  70 }, {  -4,  75 },
+        {  -8,  82 }, { -17, 102 }, {  -9,  77 }, {   3,  24 },
+        {   0,  42 }, {   0,  48 }, {   0,  55 }, {  -6,  59 },
+        {  -7,  71 }, { -12,  83 }, { -11,  87 }, { -30, 119 },
+        {   1,  58 }, {  -3,  29 }, {  -1,  36 }, {   1,  38 },
+        {   2,  43 }, {  -6,  55 }, {   0,  58 }, {   0,  64 },
+        {  -3,  74 }, { -10,  90 }, {  -6,  76 }, {  -2,  44 },
+        {   0,  45 }, {   0,  52 }, {  -3,  64 }, {  -2,  59 },
+        {  -4,  70 }, {  -4,  75 }, {  -8,  82 }, { -17, 102 },
+        {  -9,  77 }, {   3,  24 }, {   0,  42 }, {   0,  48 },
+        {   0,  55 }, {  -6,  59 }, {  -7,  71 }, { -12,  83 },
+        { -11,  87 }, { -30, 119 }, {   1,  58 }, {  -3,  29 },
+        {  -1,  36 }, {   1,  38 }, {   2,  43 }, {  -6,  55 },
+        {   0,  58 }, {   0,  64 }, {  -3,  74 }, { -10,  90 },
+        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 },
+        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 },
+        {  -3,  74 }, {  -9,  92 }, {  -8,  87 }, { -23, 126 }
+    },
+
+    /* i_cabac_init_idc == 1 */
+    {
+        /* 0 - 10 */
+        {  20, -15 }, {   2,  54 }, {   3,  74 }, {  20, -15 },
+        {   2,  54 }, {   3,  74 }, { -28, 127 }, { -23, 104 },
+        {  -6,  53 }, {  -1,  54 }, {   7,  51 },
+
+        /* 11 - 23 */
+        {  22,  25 }, {  34,   0 }, {  16,   0 }, {  -2,   9 },
+        {   4,  41 }, { -29, 118 }, {   2,  65 }, {  -6,  71 },
+        { -13,  79 }, {   5,  52 }, {   9,  50 }, {  -3,  70 },
+        {  10,  54 },
+
+        /* 24 - 39 */
+        {  26,  34 }, {  19,  22 }, {  40,   0 }, {  57,   2 },
+        {  41,  36 }, {  26,  69 }, { -45, 127 }, { -15, 101 },
+        {  -4,  76 }, {  -6,  71 }, { -13,  79 }, {   5,  52 },
+        {   6,  69 }, { -13,  90 }, {   0,  52 }, {   8,  43 },
+
+        /* 40 - 53 */
+        {  -2,  69 },{  -5,  82 },{ -10,  96 },{   2,  59 },
+        {   2,  75 },{  -3,  87 },{  -3,  100 },{   1,  56 },
+        {  -3,  74 },{  -6,  85 },{   0,  59 },{  -3,  81 },
+        {  -7,  86 },{  -5,  95 },
+
+        /* 54 - 59 */
+        {  -1,  66 },{  -1,  77 },{   1,  70 },{  -2,  86 },
+        {  -5,  72 },{   0,  61 },
+
+        /* 60 - 69 */
+        { 0, 41 },   { 0, 63 },   { 0, 63 },     { 0, 63 },
+        { -9, 83 },  { 4, 86 },   { 0, 97 },     { -7, 72 },
+        { 13, 41 },  { 3, 62 },
+
+        /* 70 - 104 */
+        {  13,  15 }, {   7,  51 }, {   2,  80 }, { -39, 127 },
+        { -18,  91 }, { -17,  96 }, { -26,  81 }, { -35,  98 },
+        { -24, 102 }, { -23,  97 }, { -27, 119 }, { -24,  99 },
+        { -21, 110 }, { -18, 102 }, { -36, 127 }, {   0,  80 },
+        {  -5,  89 }, {  -7,  94 }, {  -4,  92 }, {   0,  39 },
+        {   0,  65 }, { -15,  84 }, { -35, 127 }, {  -2,  73 },
+        { -12, 104 }, {  -9,  91 }, { -31, 127 }, {   3,  55 },
+        {   7,  56 }, {   7,  55 }, {   8,  61 }, {  -3,  53 },
+        {   0,  68 }, {  -7,  74 }, {  -9,  88 },
+
+        /* 105 -> 165 */
+        { -13, 103 }, { -13,  91 }, {  -9,  89 }, { -14,  92 },
+        {  -8,  76 }, { -12,  87 }, { -23, 110 }, { -24, 105 },
+        { -10,  78 }, { -20, 112 }, { -17,  99 }, { -78, 127 },
+        { -70, 127 }, { -50, 127 }, { -46, 127 }, {  -4,  66 },
+        {  -5,  78 }, {  -4,  71 }, {  -8,  72 }, {   2,  59 },
+        {  -1,  55 }, {  -7,  70 }, {  -6,  75 }, {  -8,  89 },
+        { -34, 119 }, {  -3,  75 }, {  32,  20 }, {  30,  22 },
+        { -44, 127 }, {   0,  54 }, {  -5,  61 }, {   0,  58 },
+        {  -1,  60 }, {  -3,  61 }, {  -8,  67 }, { -25,  84 },
+        { -14,  74 }, {  -5,  65 }, {   5,  52 }, {   2,  57 },
+        {   0,  61 }, {  -9,  69 }, { -11,  70 }, {  18,  55 },
+        {  -4,  71 }, {   0,  58 }, {   7,  61 }, {   9,  41 },
+        {  18,  25 }, {   9,  32 }, {   5,  43 }, {   9,  47 },
+        {   0,  44 }, {   0,  51 }, {   2,  46 }, {  19,  38 },
+        {  -4,  66 }, {  15,  38 }, {  12,  42 }, {   9,  34 },
+        {   0,  89 },
+
+        /* 166 - 226 */
+        {   4,  45 }, {  10,  28 }, {  10,  31 }, {  33, -11 },
+        {  52, -43 }, {  18,  15 }, {  28,   0 }, {  35, -22 },
+        {  38, -25 }, {  34,   0 }, {  39, -18 }, {  32, -12 },
+        { 102, -94 }, {   0,   0 }, {  56, -15 }, {  33,  -4 },
+        {  29,  10 }, {  37,  -5 }, {  51, -29 }, {  39,  -9 },
+        {  52, -34 }, {  69, -58 }, {  67, -63 }, {  44,  -5 },
+        {  32,   7 }, {  55, -29 }, {  32,   1 }, {   0,   0 },
+        {  27,  36 }, {  33, -25 }, {  34, -30 }, {  36, -28 },
+        {  38, -28 }, {  38, -27 }, {  34, -18 }, {  35, -16 },
+        {  34, -14 }, {  32,  -8 }, {  37,  -6 }, {  35,   0 },
+        {  30,  10 }, {  28,  18 }, {  26,  25 }, {  29,  41 },
+        {   0,  75 }, {   2,  72 }, {   8,  77 }, {  14,  35 },
+        {  18,  31 }, {  17,  35 }, {  21,  30 }, {  17,  45 },
+        {  20,  42 }, {  18,  45 }, {  27,  26 }, {  16,  54 },
+        {   7,  66 }, {  16,  56 }, {  11,  73 }, {  10,  67 },
+        { -10, 116 },
+
+        /* 227 - 275 */
+        { -23, 112 }, { -15,  71 }, {  -7,  61 }, {   0,  53 },
+        {  -5,  66 }, { -11,  77 }, {  -9,  80 }, {  -9,  84 },
+        { -10,  87 }, { -34, 127 }, { -21, 101 }, {  -3,  39 },
+        {  -5,  53 }, {  -7,  61 }, { -11,  75 }, { -15,  77 },
+        { -17,  91 }, { -25, 107 }, { -25, 111 }, { -28, 122 },
+        { -11,  76 }, { -10,  44 }, { -10,  52 }, { -10,  57 },
+        {  -9,  58 }, { -16,  72 }, {  -7,  69 }, {  -4,  69 },
+        {  -5,  74 }, {  -9,  86 }, {   2,  66 }, {  -9,  34 },
+        {   1,  32 }, {  11,  31 }, {   5,  52 }, {  -2,  55 },
+        {  -2,  67 }, {   0,  73 }, {  -8,  89 }, {   3,  52 },
+        {   7,   4 }, {  10,   8 }, {  17,   8 }, {  16,  19 },
+        {   3,  37 }, {  -1,  61 }, {  -5,  73 }, {  -1,  70 },
+        {  -4,  78 },
+
+        /* 276 a bit special (not used, x264_cabac_encode_bypass is used instead) */
+        { 0, 0 },
+
+        /* 277 - 337 */
+        { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 },
+        { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 },
+        { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 },
+        { -10,  95 }, { -14, 100 }, {  -8,  95 }, { -17, 111 },
+        { -28, 114 }, {  -6,  89 }, {  -2,  80 }, {  -4,  82 },
+        {  -9,  85 }, {  -8,  81 }, {  -1,  72 }, {   5,  64 },
+        {   1,  67 }, {   9,  56 }, {   0,  69 }, {   1,  69 },
+        {   7,  69 }, {  -7,  69 }, {  -6,  67 }, { -16,  77 },
+        {  -2,  64 }, {   2,  61 }, {  -6,  67 }, {  -3,  64 },
+        {   2,  57 }, {  -3,  65 }, {  -3,  66 }, {   0,  62 },
+        {   9,  51 }, {  -1,  66 }, {  -2,  71 }, {  -2,  75 },
+        {  -1,  70 }, {  -9,  72 }, {  14,  60 }, {  16,  37 },
+        {   0,  47 }, {  18,  35 }, {  11,  37 }, {  12,  41 },
+        {  10,  41 }, {   2,  48 }, {  12,  41 }, {  13,  41 },
+        {   0,  59 }, {   3,  50 }, {  19,  40 }, {   3,  66 },
+        {  18,  50 },
+
+        /* 338 - 398 */
+        {  19,  -6 }, {  18,  -6 }, {  14,   0 }, {  26, -12 },
+        {  31, -16 }, {  33, -25 }, {  33, -22 }, {  37, -28 },
+        {  39, -30 }, {  42, -30 }, {  47, -42 }, {  45, -36 },
+        {  49, -34 }, {  41, -17 }, {  32,   9 }, {  69, -71 },
+        {  63, -63 }, {  66, -64 }, {  77, -74 }, {  54, -39 },
+        {  52, -35 }, {  41, -10 }, {  36,   0 }, {  40,  -1 },
+        {  30,  14 }, {  28,  26 }, {  23,  37 }, {  12,  55 },
+        {  11,  65 }, {  37, -33 }, {  39, -36 }, {  40, -37 },
+        {  38, -30 }, {  46, -33 }, {  42, -30 }, {  40, -24 },
+        {  49, -29 }, {  38, -12 }, {  40, -10 }, {  38,  -3 },
+        {  46,  -5 }, {  31,  20 }, {  29,  30 }, {  25,  44 },
+        {  12,  48 }, {  11,  49 }, {  26,  45 }, {  22,  22 },
+        {  23,  22 }, {  27,  21 }, {  33,  20 }, {  26,  28 },
+        {  30,  24 }, {  27,  34 }, {  18,  42 }, {  25,  39 },
+        {  18,  50 }, {  12,  70 }, {  21,  54 }, {  14,  71 },
+        {  11,  83 },
+
+        /* 399 -> 435 */
+        {  25,  32 }, {  21,  49 }, {  21,  54 },
+        {  -5,  85 }, {  -6,  81 }, { -10,  77 }, {  -7,  81 },
+        { -17,  80 }, { -18,  73 }, {  -4,  74 }, { -10,  83 },
+        {  -9,  71 }, {  -9,  67 }, {  -1,  61 }, {  -8,  66 },
+        { -14,  66 }, {   0,  59 }, {   2,  59 }, {  17, -10 },
+        {  32, -13 }, {  42,  -9 }, {  49,  -5 }, {  53,   0 },
+        {  64,   3 }, {  68,  10 }, {  66,  27 }, {  47,  57 },
+        {  -5,  71 }, {   0,  24 }, {  -1,  36 }, {  -2,  42 },
+        {  -2,  52 }, {  -9,  57 }, {  -6,  63 }, {  -4,  65 },
+        {  -4,  67 }, {  -7,  82 },
+
+        /* 436 -> 459 */
+        {  -3,  81 }, {  -3,  76 }, {  -7,  72 }, {  -6,  78 },
+        { -12,  72 }, { -14,  68 }, {  -3,  70 }, {  -6,  76 },
+        {  -5,  66 }, {  -5,  62 }, {   0,  57 }, {  -4,  61 },
+        {  -9,  60 }, {   1,  54 }, {   2,  58 }, {  17, -10 },
+        {  32, -13 }, {  42,  -9 }, {  49,  -5 }, {  53,   0 },
+        {  64,   3 }, {  68,  10 }, {  66,  27 }, {  47,  57 },
+
+        /* 460 - 1024 */
+        {   0,  80 }, {  -5,  89 }, {  -7,  94 }, {  -4,  92 },
+        {   0,  39 }, {   0,  65 }, { -15,  84 }, { -35, 127 },
+        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 },
+        {   0,  80 }, {  -5,  89 }, {  -7,  94 }, {  -4,  92 },
+        {   0,  39 }, {   0,  65 }, { -15,  84 }, { -35, 127 },
+        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 },
+        { -13, 103 }, { -13,  91 }, {  -9,  89 }, { -14,  92 },
+        {  -8,  76 }, { -12,  87 }, { -23, 110 }, { -24, 105 },
+        { -10,  78 }, { -20, 112 }, { -17,  99 }, { -78, 127 },
+        { -70, 127 }, { -50, 127 }, { -46, 127 }, {  -4,  66 },
+        {  -5,  78 }, {  -4,  71 }, {  -8,  72 }, {   2,  59 },
+        {  -1,  55 }, {  -7,  70 }, {  -6,  75 }, {  -8,  89 },
+        { -34, 119 }, {  -3,  75 }, {  32,  20 }, {  30,  22 },
+        { -44, 127 }, {   0,  54 }, {  -5,  61 }, {   0,  58 },
+        {  -1,  60 }, {  -3,  61 }, {  -8,  67 }, { -25,  84 },
+        { -14,  74 }, {  -5,  65 }, {   5,  52 }, {   2,  57 },
+        {   0,  61 }, {  -9,  69 }, { -11,  70 }, {  18,  55 },
+        { -13, 103 }, { -13,  91 }, {  -9,  89 }, { -14,  92 },
+        {  -8,  76 }, { -12,  87 }, { -23, 110 }, { -24, 105 },
+        { -10,  78 }, { -20, 112 }, { -17,  99 }, { -78, 127 },
+        { -70, 127 }, { -50, 127 }, { -46, 127 }, {  -4,  66 },
+        {  -5,  78 }, {  -4,  71 }, {  -8,  72 }, {   2,  59 },
+        {  -1,  55 }, {  -7,  70 }, {  -6,  75 }, {  -8,  89 },
+        { -34, 119 }, {  -3,  75 }, {  32,  20 }, {  30,  22 },
+        { -44, 127 }, {   0,  54 }, {  -5,  61 }, {   0,  58 },
+        {  -1,  60 }, {  -3,  61 }, {  -8,  67 }, { -25,  84 },
+        { -14,  74 }, {  -5,  65 }, {   5,  52 }, {   2,  57 },
+        {   0,  61 }, {  -9,  69 }, { -11,  70 }, {  18,  55 },
+        {   4,  45 }, {  10,  28 }, {  10,  31 }, {  33, -11 },
+        {  52, -43 }, {  18,  15 }, {  28,   0 }, {  35, -22 },
+        {  38, -25 }, {  34,   0 }, {  39, -18 }, {  32, -12 },
+        { 102, -94 }, {   0,   0 }, {  56, -15 }, {  33,  -4 },
+        {  29,  10 }, {  37,  -5 }, {  51, -29 }, {  39,  -9 },
+        {  52, -34 }, {  69, -58 }, {  67, -63 }, {  44,  -5 },
+        {  32,   7 }, {  55, -29 }, {  32,   1 }, {   0,   0 },
+        {  27,  36 }, {  33, -25 }, {  34, -30 }, {  36, -28 },
+        {  38, -28 }, {  38, -27 }, {  34, -18 }, {  35, -16 },
+        {  34, -14 }, {  32,  -8 }, {  37,  -6 }, {  35,   0 },
+        {  30,  10 }, {  28,  18 }, {  26,  25 }, {  29,  41 },
+        {   4,  45 }, {  10,  28 }, {  10,  31 }, {  33, -11 },
+        {  52, -43 }, {  18,  15 }, {  28,   0 }, {  35, -22 },
+        {  38, -25 }, {  34,   0 }, {  39, -18 }, {  32, -12 },
+        { 102, -94 }, {   0,   0 }, {  56, -15 }, {  33,  -4 },
+        {  29,  10 }, {  37,  -5 }, {  51, -29 }, {  39,  -9 },
+        {  52, -34 }, {  69, -58 }, {  67, -63 }, {  44,  -5 },
+        {  32,   7 }, {  55, -29 }, {  32,   1 }, {   0,   0 },
+        {  27,  36 }, {  33, -25 }, {  34, -30 }, {  36, -28 },
+        {  38, -28 }, {  38, -27 }, {  34, -18 }, {  35, -16 },
+        {  34, -14 }, {  32,  -8 }, {  37,  -6 }, {  35,   0 },
+        {  30,  10 }, {  28,  18 }, {  26,  25 }, {  29,  41 },
+        {  -5,  85 }, {  -6,  81 }, { -10,  77 }, {  -7,  81 },
+        { -17,  80 }, { -18,  73 }, {  -4,  74 }, { -10,  83 },
+        {  -9,  71 }, {  -9,  67 }, {  -1,  61 }, {  -8,  66 },
+        { -14,  66 }, {   0,  59 }, {   2,  59 }, {  -3,  81 },
+        {  -3,  76 }, {  -7,  72 }, {  -6,  78 }, { -12,  72 },
+        { -14,  68 }, {  -3,  70 }, {  -6,  76 }, {  -5,  66 },
+        {  -5,  62 }, {   0,  57 }, {  -4,  61 }, {  -9,  60 },
+        {   1,  54 }, {   2,  58 }, {  17, -10 }, {  32, -13 },
+        {  42,  -9 }, {  49,  -5 }, {  53,   0 }, {  64,   3 },
+        {  68,  10 }, {  66,  27 }, {  47,  57 }, {  17, -10 },
+        {  32, -13 }, {  42,  -9 }, {  49,  -5 }, {  53,   0 },
+        {  64,   3 }, {  68,  10 }, {  66,  27 }, {  47,  57 },
+        {  -5,  71 }, {   0,  24 }, {  -1,  36 }, {  -2,  42 },
+        {  -2,  52 }, {  -9,  57 }, {  -6,  63 }, {  -4,  65 },
+        {  -4,  67 }, {  -7,  82 }, {  -5,  85 }, {  -6,  81 },
+        { -10,  77 }, {  -7,  81 }, { -17,  80 }, { -18,  73 },
+        {  -4,  74 }, { -10,  83 }, {  -9,  71 }, {  -9,  67 },
+        {  -1,  61 }, {  -8,  66 }, { -14,  66 }, {   0,  59 },
+        {   2,  59 }, {  -3,  81 }, {  -3,  76 }, {  -7,  72 },
+        {  -6,  78 }, { -12,  72 }, { -14,  68 }, {  -3,  70 },
+        {  -6,  76 }, {  -5,  66 }, {  -5,  62 }, {   0,  57 },
+        {  -4,  61 }, {  -9,  60 }, {   1,  54 }, {   2,  58 },
+        {  17, -10 }, {  32, -13 }, {  42,  -9 }, {  49,  -5 },
+        {  53,   0 }, {  64,   3 }, {  68,  10 }, {  66,  27 },
+        {  47,  57 }, {  17, -10 }, {  32, -13 }, {  42,  -9 },
+        {  49,  -5 }, {  53,   0 }, {  64,   3 }, {  68,  10 },
+        {  66,  27 }, {  47,  57 }, {  -5,  71 }, {   0,  24 },
+        {  -1,  36 }, {  -2,  42 }, {  -2,  52 }, {  -9,  57 },
+        {  -6,  63 }, {  -4,  65 }, {  -4,  67 }, {  -7,  82 },
+        { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 },
+        { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 },
+        { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 },
+        { -10,  95 }, { -14, 100 }, {  -8,  95 }, { -17, 111 },
+        { -28, 114 }, {  -6,  89 }, {  -2,  80 }, {  -4,  82 },
+        {  -9,  85 }, {  -8,  81 }, {  -1,  72 }, {   5,  64 },
+        {   1,  67 }, {   9,  56 }, {   0,  69 }, {   1,  69 },
+        {   7,  69 }, {  -7,  69 }, {  -6,  67 }, { -16,  77 },
+        {  -2,  64 }, {   2,  61 }, {  -6,  67 }, {  -3,  64 },
+        {   2,  57 }, {  -3,  65 }, {  -3,  66 }, {   0,  62 },
+        {   9,  51 }, {  -1,  66 }, {  -2,  71 }, {  -2,  75 },
+        { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 },
+        { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 },
+        { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 },
+        { -10,  95 }, { -14, 100 }, {  -8,  95 }, { -17, 111 },
+        { -28, 114 }, {  -6,  89 }, {  -2,  80 }, {  -4,  82 },
+        {  -9,  85 }, {  -8,  81 }, {  -1,  72 }, {   5,  64 },
+        {   1,  67 }, {   9,  56 }, {   0,  69 }, {   1,  69 },
+        {   7,  69 }, {  -7,  69 }, {  -6,  67 }, { -16,  77 },
+        {  -2,  64 }, {   2,  61 }, {  -6,  67 }, {  -3,  64 },
+        {   2,  57 }, {  -3,  65 }, {  -3,  66 }, {   0,  62 },
+        {   9,  51 }, {  -1,  66 }, {  -2,  71 }, {  -2,  75 },
+        {  19,  -6 }, {  18,  -6 }, {  14,   0 }, {  26, -12 },
+        {  31, -16 }, {  33, -25 }, {  33, -22 }, {  37, -28 },
+        {  39, -30 }, {  42, -30 }, {  47, -42 }, {  45, -36 },
+        {  49, -34 }, {  41, -17 }, {  32,   9 }, {  69, -71 },
+        {  63, -63 }, {  66, -64 }, {  77, -74 }, {  54, -39 },
+        {  52, -35 }, {  41, -10 }, {  36,   0 }, {  40,  -1 },
+        {  30,  14 }, {  28,  26 }, {  23,  37 }, {  12,  55 },
+        {  11,  65 }, {  37, -33 }, {  39, -36 }, {  40, -37 },
+        {  38, -30 }, {  46, -33 }, {  42, -30 }, {  40, -24 },
+        {  49, -29 }, {  38, -12 }, {  40, -10 }, {  38,  -3 },
+        {  46,  -5 }, {  31,  20 }, {  29,  30 }, {  25,  44 },
+        {  19,  -6 }, {  18,  -6 }, {  14,   0 }, {  26, -12 },
+        {  31, -16 }, {  33, -25 }, {  33, -22 }, {  37, -28 },
+        {  39, -30 }, {  42, -30 }, {  47, -42 }, {  45, -36 },
+        {  49, -34 }, {  41, -17 }, {  32,   9 }, {  69, -71 },
+        {  63, -63 }, {  66, -64 }, {  77, -74 }, {  54, -39 },
+        {  52, -35 }, {  41, -10 }, {  36,   0 }, {  40,  -1 },
+        {  30,  14 }, {  28,  26 }, {  23,  37 }, {  12,  55 },
+        {  11,  65 }, {  37, -33 }, {  39, -36 }, {  40, -37 },
+        {  38, -30 }, {  46, -33 }, {  42, -30 }, {  40, -24 },
+        {  49, -29 }, {  38, -12 }, {  40, -10 }, {  38,  -3 },
+        {  46,  -5 }, {  31,  20 }, {  29,  30 }, {  25,  44 },
+        { -23, 112 }, { -15,  71 }, {  -7,  61 }, {   0,  53 },
+        {  -5,  66 }, { -11,  77 }, {  -9,  80 }, {  -9,  84 },
+        { -10,  87 }, { -34, 127 }, { -21, 101 }, {  -3,  39 },
+        {  -5,  53 }, {  -7,  61 }, { -11,  75 }, { -15,  77 },
+        { -17,  91 }, { -25, 107 }, { -25, 111 }, { -28, 122 },
+        { -11,  76 }, { -10,  44 }, { -10,  52 }, { -10,  57 },
+        {  -9,  58 }, { -16,  72 }, {  -7,  69 }, {  -4,  69 },
+        {  -5,  74 }, {  -9,  86 }, { -23, 112 }, { -15,  71 },
+        {  -7,  61 }, {   0,  53 }, {  -5,  66 }, { -11,  77 },
+        {  -9,  80 }, {  -9,  84 }, { -10,  87 }, { -34, 127 },
+        { -21, 101 }, {  -3,  39 }, {  -5,  53 }, {  -7,  61 },
+        { -11,  75 }, { -15,  77 }, { -17,  91 }, { -25, 107 },
+        { -25, 111 }, { -28, 122 }, { -11,  76 }, { -10,  44 },
+        { -10,  52 }, { -10,  57 }, {  -9,  58 }, { -16,  72 },
+        {  -7,  69 }, {  -4,  69 }, {  -5,  74 }, {  -9,  86 },
+        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 },
+        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 },
+        {  -2,  73 }, { -12, 104 }, {  -9,  91 }, { -31, 127 }
+    },
+
+    /* i_cabac_init_idc == 2 */
+    {
+        /* 0 - 10 */
+        {  20, -15 }, {   2,  54 }, {   3,  74 }, {  20, -15 },
+        {   2,  54 }, {   3,  74 }, { -28, 127 }, { -23, 104 },
+        {  -6,  53 }, {  -1,  54 }, {   7,  51 },
+
+        /* 11 - 23 */
+        {  29,  16 }, {  25,   0 }, {  14,   0 }, { -10,  51 },
+        {  -3,  62 }, { -27,  99 }, {  26,  16 }, {  -4,  85 },
+        { -24, 102 }, {   5,  57 }, {   6,  57 }, { -17,  73 },
+        {  14,  57 },
+
+        /* 24 - 39 */
+        {  20,  40 }, {  20,  10 }, {  29,   0 }, {  54,   0 },
+        {  37,  42 }, {  12,  97 }, { -32, 127 }, { -22, 117 },
+        {  -2,  74 }, {  -4,  85 }, { -24, 102 }, {   5,  57 },
+        {  -6,  93 }, { -14,  88 }, {  -6,  44 }, {   4,  55 },
+
+        /* 40 - 53 */
+        { -11,  89 },{ -15,  103 },{ -21,  116 },{  19,  57 },
+        {  20,  58 },{   4,  84 },{   6,  96 },{   1,  63 },
+        {  -5,  85 },{ -13,  106 },{   5,  63 },{   6,  75 },
+        {  -3,  90 },{  -1,  101 },
+
+        /* 54 - 59 */
+        {   3,  55 },{  -4,  79 },{  -2,  75 },{ -12,  97 },
+        {  -7,  50 },{   1,  60 },
+
+        /* 60 - 69 */
+        { 0, 41 },   { 0, 63 },   { 0, 63 },     { 0, 63 },
+        { -9, 83 },  { 4, 86 },   { 0, 97 },     { -7, 72 },
+        { 13, 41 },  { 3, 62 },
+
+        /* 70 - 104 */
+        {   7,  34 }, {  -9,  88 }, { -20, 127 }, { -36, 127 },
+        { -17,  91 }, { -14,  95 }, { -25,  84 }, { -25,  86 },
+        { -12,  89 }, { -17,  91 }, { -31, 127 }, { -14,  76 },
+        { -18, 103 }, { -13,  90 }, { -37, 127 }, {  11,  80 },
+        {   5,  76 }, {   2,  84 }, {   5,  78 }, {  -6,  55 },
+        {   4,  61 }, { -14,  83 }, { -37, 127 }, {  -5,  79 },
+        { -11, 104 }, { -11,  91 }, { -30, 127 }, {   0,  65 },
+        {  -2,  79 }, {   0,  72 }, {  -4,  92 }, {  -6,  56 },
+        {   3,  68 }, {  -8,  71 }, { -13,  98 },
+
+        /* 105 -> 165 */
+        {  -4,  86 }, { -12,  88 }, {  -5,  82 }, {  -3,  72 },
+        {  -4,  67 }, {  -8,  72 }, { -16,  89 }, {  -9,  69 },
+        {  -1,  59 }, {   5,  66 }, {   4,  57 }, {  -4,  71 },
+        {  -2,  71 }, {   2,  58 }, {  -1,  74 }, {  -4,  44 },
+        {  -1,  69 }, {   0,  62 }, {  -7,  51 }, {  -4,  47 },
+        {  -6,  42 }, {  -3,  41 }, {  -6,  53 }, {   8,  76 },
+        {  -9,  78 }, { -11,  83 }, {   9,  52 }, {   0,  67 },
+        {  -5,  90 }, {   1,  67 }, { -15,  72 }, {  -5,  75 },
+        {  -8,  80 }, { -21,  83 }, { -21,  64 }, { -13,  31 },
+        { -25,  64 }, { -29,  94 }, {   9,  75 }, {  17,  63 },
+        {  -8,  74 }, {  -5,  35 }, {  -2,  27 }, {  13,  91 },
+        {   3,  65 }, {  -7,  69 }, {   8,  77 }, { -10,  66 },
+        {   3,  62 }, {  -3,  68 }, { -20,  81 }, {   0,  30 },
+        {   1,   7 }, {  -3,  23 }, { -21,  74 }, {  16,  66 },
+        { -23, 124 }, {  17,  37 }, {  44, -18 }, {  50, -34 },
+        { -22, 127 },
+
+        /* 166 - 226 */
+        {   4,  39 }, {   0,  42 }, {   7,  34 }, {  11,  29 },
+        {   8,  31 }, {   6,  37 }, {   7,  42 }, {   3,  40 },
+        {   8,  33 }, {  13,  43 }, {  13,  36 }, {   4,  47 },
+        {   3,  55 }, {   2,  58 }, {   6,  60 }, {   8,  44 },
+        {  11,  44 }, {  14,  42 }, {   7,  48 }, {   4,  56 },
+        {   4,  52 }, {  13,  37 }, {   9,  49 }, {  19,  58 },
+        {  10,  48 }, {  12,  45 }, {   0,  69 }, {  20,  33 },
+        {   8,  63 }, {  35, -18 }, {  33, -25 }, {  28,  -3 },
+        {  24,  10 }, {  27,   0 }, {  34, -14 }, {  52, -44 },
+        {  39, -24 }, {  19,  17 }, {  31,  25 }, {  36,  29 },
+        {  24,  33 }, {  34,  15 }, {  30,  20 }, {  22,  73 },
+        {  20,  34 }, {  19,  31 }, {  27,  44 }, {  19,  16 },
+        {  15,  36 }, {  15,  36 }, {  21,  28 }, {  25,  21 },
+        {  30,  20 }, {  31,  12 }, {  27,  16 }, {  24,  42 },
+        {   0,  93 }, {  14,  56 }, {  15,  57 }, {  26,  38 },
+        { -24, 127 },
+
+        /* 227 - 275 */
+        { -24, 115 }, { -22,  82 }, {  -9,  62 }, {   0,  53 },
+        {   0,  59 }, { -14,  85 }, { -13,  89 }, { -13,  94 },
+        { -11,  92 }, { -29, 127 }, { -21, 100 }, { -14,  57 },
+        { -12,  67 }, { -11,  71 }, { -10,  77 }, { -21,  85 },
+        { -16,  88 }, { -23, 104 }, { -15,  98 }, { -37, 127 },
+        { -10,  82 }, {  -8,  48 }, {  -8,  61 }, {  -8,  66 },
+        {  -7,  70 }, { -14,  75 }, { -10,  79 }, {  -9,  83 },
+        { -12,  92 }, { -18, 108 }, {  -4,  79 }, { -22,  69 },
+        { -16,  75 }, {  -2,  58 }, {   1,  58 }, { -13,  78 },
+        {  -9,  83 }, {  -4,  81 }, { -13,  99 }, { -13,  81 },
+        {  -6,  38 }, { -13,  62 }, {  -6,  58 }, {  -2,  59 },
+        { -16,  73 }, { -10,  76 }, { -13,  86 }, {  -9,  83 },
+        { -10,  87 },
+
+        /* 276 a bit special (not used, x264_cabac_encode_bypass is used instead) */
+        { 0, 0 },
+
+        /* 277 - 337 */
+        { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 },
+        { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 },
+        { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 },
+        { -10,  94 }, { -15, 102 }, { -10,  99 }, { -13, 106 },
+        { -50, 127 }, {  -5,  92 }, {  17,  57 }, {  -5,  86 },
+        { -13,  94 }, { -12,  91 }, {  -2,  77 }, {   0,  71 },
+        {  -1,  73 }, {   4,  64 }, {  -7,  81 }, {   5,  64 },
+        {  15,  57 }, {   1,  67 }, {   0,  68 }, { -10,  67 },
+        {   1,  68 }, {   0,  77 }, {   2,  64 }, {   0,  68 },
+        {  -5,  78 }, {   7,  55 }, {   5,  59 }, {   2,  65 },
+        {  14,  54 }, {  15,  44 }, {   5,  60 }, {   2,  70 },
+        {  -2,  76 }, { -18,  86 }, {  12,  70 }, {   5,  64 },
+        { -12,  70 }, {  11,  55 }, {   5,  56 }, {   0,  69 },
+        {   2,  65 }, {  -6,  74 }, {   5,  54 }, {   7,  54 },
+        {  -6,  76 }, { -11,  82 }, {  -2,  77 }, {  -2,  77 },
+        {  25,  42 },
+
+        /* 338 - 398 */
+        {  17, -13 }, {  16,  -9 }, {  17, -12 }, {  27, -21 },
+        {  37, -30 }, {  41, -40 }, {  42, -41 }, {  48, -47 },
+        {  39, -32 }, {  46, -40 }, {  52, -51 }, {  46, -41 },
+        {  52, -39 }, {  43, -19 }, {  32,  11 }, {  61, -55 },
+        {  56, -46 }, {  62, -50 }, {  81, -67 }, {  45, -20 },
+        {  35,  -2 }, {  28,  15 }, {  34,   1 }, {  39,   1 },
+        {  30,  17 }, {  20,  38 }, {  18,  45 }, {  15,  54 },
+        {   0,  79 }, {  36, -16 }, {  37, -14 }, {  37, -17 },
+        {  32,   1 }, {  34,  15 }, {  29,  15 }, {  24,  25 },
+        {  34,  22 }, {  31,  16 }, {  35,  18 }, {  31,  28 },
+        {  33,  41 }, {  36,  28 }, {  27,  47 }, {  21,  62 },
+        {  18,  31 }, {  19,  26 }, {  36,  24 }, {  24,  23 },
+        {  27,  16 }, {  24,  30 }, {  31,  29 }, {  22,  41 },
+        {  22,  42 }, {  16,  60 }, {  15,  52 }, {  14,  60 },
+        {   3,  78 }, { -16, 123 }, {  21,  53 }, {  22,  56 },
+        {  25,  61 },
+
+        /* 399 -> 435 */
+        {  21,  33 }, {  19,  50 }, {  17,  61 },
+        {  -3,  78 }, {  -8,  74 }, {  -9,  72 }, { -10,  72 },
+        { -18,  75 }, { -12,  71 }, { -11,  63 }, {  -5,  70 },
+        { -17,  75 }, { -14,  72 }, { -16,  67 }, {  -8,  53 },
+        { -14,  59 }, {  -9,  52 }, { -11,  68 }, {   9,  -2 },
+        {  30, -10 }, {  31,  -4 }, {  33,  -1 }, {  33,   7 },
+        {  31,  12 }, {  37,  23 }, {  31,  38 }, {  20,  64 },
+        {  -9,  71 }, {  -7,  37 }, {  -8,  44 }, { -11,  49 },
+        { -10,  56 }, { -12,  59 }, {  -8,  63 }, {  -9,  67 },
+        {  -6,  68 }, { -10,  79 },
+
+        /* 436 -> 459 */
+        {  -3,  78 }, {  -8,  74 }, {  -9,  72 }, { -10,  72 },
+        { -18,  75 }, { -12,  71 }, { -11,  63 }, {  -5,  70 },
+        { -17,  75 }, { -14,  72 }, { -16,  67 }, {  -8,  53 },
+        { -14,  59 }, {  -9,  52 }, { -11,  68 }, {   9,  -2 },
+        {  30, -10 }, {  31,  -4 }, {  33,  -1 }, {  33,   7 },
+        {  31,  12 }, {  37,  23 }, {  31,  38 }, {  20,  64 },
+
+        /* 460 - 1024 */
+        {  11,  80 }, {   5,  76 }, {   2,  84 }, {   5,  78 },
+        {  -6,  55 }, {   4,  61 }, { -14,  83 }, { -37, 127 },
+        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 },
+        {  11,  80 }, {   5,  76 }, {   2,  84 }, {   5,  78 },
+        {  -6,  55 }, {   4,  61 }, { -14,  83 }, { -37, 127 },
+        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 },
+        {  -4,  86 }, { -12,  88 }, {  -5,  82 }, {  -3,  72 },
+        {  -4,  67 }, {  -8,  72 }, { -16,  89 }, {  -9,  69 },
+        {  -1,  59 }, {   5,  66 }, {   4,  57 }, {  -4,  71 },
+        {  -2,  71 }, {   2,  58 }, {  -1,  74 }, {  -4,  44 },
+        {  -1,  69 }, {   0,  62 }, {  -7,  51 }, {  -4,  47 },
+        {  -6,  42 }, {  -3,  41 }, {  -6,  53 }, {   8,  76 },
+        {  -9,  78 }, { -11,  83 }, {   9,  52 }, {   0,  67 },
+        {  -5,  90 }, {   1,  67 }, { -15,  72 }, {  -5,  75 },
+        {  -8,  80 }, { -21,  83 }, { -21,  64 }, { -13,  31 },
+        { -25,  64 }, { -29,  94 }, {   9,  75 }, {  17,  63 },
+        {  -8,  74 }, {  -5,  35 }, {  -2,  27 }, {  13,  91 },
+        {  -4,  86 }, { -12,  88 }, {  -5,  82 }, {  -3,  72 },
+        {  -4,  67 }, {  -8,  72 }, { -16,  89 }, {  -9,  69 },
+        {  -1,  59 }, {   5,  66 }, {   4,  57 }, {  -4,  71 },
+        {  -2,  71 }, {   2,  58 }, {  -1,  74 }, {  -4,  44 },
+        {  -1,  69 }, {   0,  62 }, {  -7,  51 }, {  -4,  47 },
+        {  -6,  42 }, {  -3,  41 }, {  -6,  53 }, {   8,  76 },
+        {  -9,  78 }, { -11,  83 }, {   9,  52 }, {   0,  67 },
+        {  -5,  90 }, {   1,  67 }, { -15,  72 }, {  -5,  75 },
+        {  -8,  80 }, { -21,  83 }, { -21,  64 }, { -13,  31 },
+        { -25,  64 }, { -29,  94 }, {   9,  75 }, {  17,  63 },
+        {  -8,  74 }, {  -5,  35 }, {  -2,  27 }, {  13,  91 },
+        {   4,  39 }, {   0,  42 }, {   7,  34 }, {  11,  29 },
+        {   8,  31 }, {   6,  37 }, {   7,  42 }, {   3,  40 },
+        {   8,  33 }, {  13,  43 }, {  13,  36 }, {   4,  47 },
+        {   3,  55 }, {   2,  58 }, {   6,  60 }, {   8,  44 },
+        {  11,  44 }, {  14,  42 }, {   7,  48 }, {   4,  56 },
+        {   4,  52 }, {  13,  37 }, {   9,  49 }, {  19,  58 },
+        {  10,  48 }, {  12,  45 }, {   0,  69 }, {  20,  33 },
+        {   8,  63 }, {  35, -18 }, {  33, -25 }, {  28,  -3 },
+        {  24,  10 }, {  27,   0 }, {  34, -14 }, {  52, -44 },
+        {  39, -24 }, {  19,  17 }, {  31,  25 }, {  36,  29 },
+        {  24,  33 }, {  34,  15 }, {  30,  20 }, {  22,  73 },
+        {   4,  39 }, {   0,  42 }, {   7,  34 }, {  11,  29 },
+        {   8,  31 }, {   6,  37 }, {   7,  42 }, {   3,  40 },
+        {   8,  33 }, {  13,  43 }, {  13,  36 }, {   4,  47 },
+        {   3,  55 }, {   2,  58 }, {   6,  60 }, {   8,  44 },
+        {  11,  44 }, {  14,  42 }, {   7,  48 }, {   4,  56 },
+        {   4,  52 }, {  13,  37 }, {   9,  49 }, {  19,  58 },
+        {  10,  48 }, {  12,  45 }, {   0,  69 }, {  20,  33 },
+        {   8,  63 }, {  35, -18 }, {  33, -25 }, {  28,  -3 },
+        {  24,  10 }, {  27,   0 }, {  34, -14 }, {  52, -44 },
+        {  39, -24 }, {  19,  17 }, {  31,  25 }, {  36,  29 },
+        {  24,  33 }, {  34,  15 }, {  30,  20 }, {  22,  73 },
+        {  -3,  78 }, {  -8,  74 }, {  -9,  72 }, { -10,  72 },
+        { -18,  75 }, { -12,  71 }, { -11,  63 }, {  -5,  70 },
+        { -17,  75 }, { -14,  72 }, { -16,  67 }, {  -8,  53 },
+        { -14,  59 }, {  -9,  52 }, { -11,  68 }, {  -3,  78 },
+        {  -8,  74 }, {  -9,  72 }, { -10,  72 }, { -18,  75 },
+        { -12,  71 }, { -11,  63 }, {  -5,  70 }, { -17,  75 },
+        { -14,  72 }, { -16,  67 }, {  -8,  53 }, { -14,  59 },
+        {  -9,  52 }, { -11,  68 }, {   9,  -2 }, {  30, -10 },
+        {  31,  -4 }, {  33,  -1 }, {  33,   7 }, {  31,  12 },
+        {  37,  23 }, {  31,  38 }, {  20,  64 }, {   9,  -2 },
+        {  30, -10 }, {  31,  -4 }, {  33,  -1 }, {  33,   7 },
+        {  31,  12 }, {  37,  23 }, {  31,  38 }, {  20,  64 },
+        {  -9,  71 }, {  -7,  37 }, {  -8,  44 }, { -11,  49 },
+        { -10,  56 }, { -12,  59 }, {  -8,  63 }, {  -9,  67 },
+        {  -6,  68 }, { -10,  79 }, {  -3,  78 }, {  -8,  74 },
+        {  -9,  72 }, { -10,  72 }, { -18,  75 }, { -12,  71 },
+        { -11,  63 }, {  -5,  70 }, { -17,  75 }, { -14,  72 },
+        { -16,  67 }, {  -8,  53 }, { -14,  59 }, {  -9,  52 },
+        { -11,  68 }, {  -3,  78 }, {  -8,  74 }, {  -9,  72 },
+        { -10,  72 }, { -18,  75 }, { -12,  71 }, { -11,  63 },
+        {  -5,  70 }, { -17,  75 }, { -14,  72 }, { -16,  67 },
+        {  -8,  53 }, { -14,  59 }, {  -9,  52 }, { -11,  68 },
+        {   9,  -2 }, {  30, -10 }, {  31,  -4 }, {  33,  -1 },
+        {  33,   7 }, {  31,  12 }, {  37,  23 }, {  31,  38 },
+        {  20,  64 }, {   9,  -2 }, {  30, -10 }, {  31,  -4 },
+        {  33,  -1 }, {  33,   7 }, {  31,  12 }, {  37,  23 },
+        {  31,  38 }, {  20,  64 }, {  -9,  71 }, {  -7,  37 },
+        {  -8,  44 }, { -11,  49 }, { -10,  56 }, { -12,  59 },
+        {  -8,  63 }, {  -9,  67 }, {  -6,  68 }, { -10,  79 },
+        { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 },
+        { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 },
+        { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 },
+        { -10,  94 }, { -15, 102 }, { -10,  99 }, { -13, 106 },
+        { -50, 127 }, {  -5,  92 }, {  17,  57 }, {  -5,  86 },
+        { -13,  94 }, { -12,  91 }, {  -2,  77 }, {   0,  71 },
+        {  -1,  73 }, {   4,  64 }, {  -7,  81 }, {   5,  64 },
+        {  15,  57 }, {   1,  67 }, {   0,  68 }, { -10,  67 },
+        {   1,  68 }, {   0,  77 }, {   2,  64 }, {   0,  68 },
+        {  -5,  78 }, {   7,  55 }, {   5,  59 }, {   2,  65 },
+        {  14,  54 }, {  15,  44 }, {   5,  60 }, {   2,  70 },
+        { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 },
+        { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 },
+        { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 },
+        { -10,  94 }, { -15, 102 }, { -10,  99 }, { -13, 106 },
+        { -50, 127 }, {  -5,  92 }, {  17,  57 }, {  -5,  86 },
+        { -13,  94 }, { -12,  91 }, {  -2,  77 }, {   0,  71 },
+        {  -1,  73 }, {   4,  64 }, {  -7,  81 }, {   5,  64 },
+        {  15,  57 }, {   1,  67 }, {   0,  68 }, { -10,  67 },
+        {   1,  68 }, {   0,  77 }, {   2,  64 }, {   0,  68 },
+        {  -5,  78 }, {   7,  55 }, {   5,  59 }, {   2,  65 },
+        {  14,  54 }, {  15,  44 }, {   5,  60 }, {   2,  70 },
+        {  17, -13 }, {  16,  -9 }, {  17, -12 }, {  27, -21 },
+        {  37, -30 }, {  41, -40 }, {  42, -41 }, {  48, -47 },
+        {  39, -32 }, {  46, -40 }, {  52, -51 }, {  46, -41 },
+        {  52, -39 }, {  43, -19 }, {  32,  11 }, {  61, -55 },
+        {  56, -46 }, {  62, -50 }, {  81, -67 }, {  45, -20 },
+        {  35,  -2 }, {  28,  15 }, {  34,   1 }, {  39,   1 },
+        {  30,  17 }, {  20,  38 }, {  18,  45 }, {  15,  54 },
+        {   0,  79 }, {  36, -16 }, {  37, -14 }, {  37, -17 },
+        {  32,   1 }, {  34,  15 }, {  29,  15 }, {  24,  25 },
+        {  34,  22 }, {  31,  16 }, {  35,  18 }, {  31,  28 },
+        {  33,  41 }, {  36,  28 }, {  27,  47 }, {  21,  62 },
+        {  17, -13 }, {  16,  -9 }, {  17, -12 }, {  27, -21 },
+        {  37, -30 }, {  41, -40 }, {  42, -41 }, {  48, -47 },
+        {  39, -32 }, {  46, -40 }, {  52, -51 }, {  46, -41 },
+        {  52, -39 }, {  43, -19 }, {  32,  11 }, {  61, -55 },
+        {  56, -46 }, {  62, -50 }, {  81, -67 }, {  45, -20 },
+        {  35,  -2 }, {  28,  15 }, {  34,   1 }, {  39,   1 },
+        {  30,  17 }, {  20,  38 }, {  18,  45 }, {  15,  54 },
+        {   0,  79 }, {  36, -16 }, {  37, -14 }, {  37, -17 },
+        {  32,   1 }, {  34,  15 }, {  29,  15 }, {  24,  25 },
+        {  34,  22 }, {  31,  16 }, {  35,  18 }, {  31,  28 },
+        {  33,  41 }, {  36,  28 }, {  27,  47 }, {  21,  62 },
+        { -24, 115 }, { -22,  82 }, {  -9,  62 }, {   0,  53 },
+        {   0,  59 }, { -14,  85 }, { -13,  89 }, { -13,  94 },
+        { -11,  92 }, { -29, 127 }, { -21, 100 }, { -14,  57 },
+        { -12,  67 }, { -11,  71 }, { -10,  77 }, { -21,  85 },
+        { -16,  88 }, { -23, 104 }, { -15,  98 }, { -37, 127 },
+        { -10,  82 }, {  -8,  48 }, {  -8,  61 }, {  -8,  66 },
+        {  -7,  70 }, { -14,  75 }, { -10,  79 }, {  -9,  83 },
+        { -12,  92 }, { -18, 108 }, { -24, 115 }, { -22,  82 },
+        {  -9,  62 }, {   0,  53 }, {   0,  59 }, { -14,  85 },
+        { -13,  89 }, { -13,  94 }, { -11,  92 }, { -29, 127 },
+        { -21, 100 }, { -14,  57 }, { -12,  67 }, { -11,  71 },
+        { -10,  77 }, { -21,  85 }, { -16,  88 }, { -23, 104 },
+        { -15,  98 }, { -37, 127 }, { -10,  82 }, {  -8,  48 },
+        {  -8,  61 }, {  -8,  66 }, {  -7,  70 }, { -14,  75 },
+        { -10,  79 }, {  -9,  83 }, { -12,  92 }, { -18, 108 },
+        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 },
+        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 },
+        {  -5,  79 }, { -11, 104 }, { -11,  91 }, { -30, 127 }
+    }
+};
+
+const uint8_t x264_cabac_range_lps[64][4] =
+{
+    {  2,   2,   2,   2}, {  6,   7,   8,   9}, {  6,   7,   9,  10}, {  6,   8,   9,  11},
+    {  7,   8,  10,  11}, {  7,   9,  10,  12}, {  7,   9,  11,  12}, {  8,   9,  11,  13},
+    {  8,  10,  12,  14}, {  9,  11,  12,  14}, {  9,  11,  13,  15}, { 10,  12,  14,  16},
+    { 10,  12,  15,  17}, { 11,  13,  15,  18}, { 11,  14,  16,  19}, { 12,  14,  17,  20},
+    { 12,  15,  18,  21}, { 13,  16,  19,  22}, { 14,  17,  20,  23}, { 14,  18,  21,  24},
+    { 15,  19,  22,  25}, { 16,  20,  23,  27}, { 17,  21,  25,  28}, { 18,  22,  26,  30},
+    { 19,  23,  27,  31}, { 20,  24,  29,  33}, { 21,  26,  30,  35}, { 22,  27,  32,  37},
+    { 23,  28,  33,  39}, { 24,  30,  35,  41}, { 26,  31,  37,  43}, { 27,  33,  39,  45},
+    { 29,  35,  41,  48}, { 30,  37,  43,  50}, { 32,  39,  46,  53}, { 33,  41,  48,  56},
+    { 35,  43,  51,  59}, { 37,  45,  54,  62}, { 39,  48,  56,  65}, { 41,  50,  59,  69},
+    { 43,  53,  63,  72}, { 46,  56,  66,  76}, { 48,  59,  69,  80}, { 51,  62,  73,  85},
+    { 53,  65,  77,  89}, { 56,  69,  81,  94}, { 59,  72,  86,  99}, { 62,  76,  90, 104},
+    { 66,  80,  95, 110}, { 69,  85, 100, 116}, { 73,  89, 105, 122}, { 77,  94, 111, 128},
+    { 81,  99, 117, 135}, { 85, 104, 123, 142}, { 90, 110, 130, 150}, { 95, 116, 137, 158},
+    {100, 122, 144, 166}, {105, 128, 152, 175}, {111, 135, 160, 185}, {116, 142, 169, 195},
+    {123, 150, 178, 205}, {128, 158, 187, 216}, {128, 167, 197, 227}, {128, 176, 208, 240}
+};
+
+const uint8_t x264_cabac_transition[128][2] =
+{
+    {  0,   0}, {  1,   1}, {  2,  50}, { 51,   3}, {  2,  50}, { 51,   3}, {  4,  52}, { 53,   5},
+    {  6,  52}, { 53,   7}, {  8,  52}, { 53,   9}, { 10,  54}, { 55,  11}, { 12,  54}, { 55,  13},
+    { 14,  54}, { 55,  15}, { 16,  56}, { 57,  17}, { 18,  56}, { 57,  19}, { 20,  56}, { 57,  21},
+    { 22,  58}, { 59,  23}, { 24,  58}, { 59,  25}, { 26,  60}, { 61,  27}, { 28,  60}, { 61,  29},
+    { 30,  60}, { 61,  31}, { 32,  62}, { 63,  33}, { 34,  62}, { 63,  35}, { 36,  64}, { 65,  37},
+    { 38,  66}, { 67,  39}, { 40,  66}, { 67,  41}, { 42,  66}, { 67,  43}, { 44,  68}, { 69,  45},
+    { 46,  68}, { 69,  47}, { 48,  70}, { 71,  49}, { 50,  72}, { 73,  51}, { 52,  72}, { 73,  53},
+    { 54,  74}, { 75,  55}, { 56,  74}, { 75,  57}, { 58,  76}, { 77,  59}, { 60,  78}, { 79,  61},
+    { 62,  78}, { 79,  63}, { 64,  80}, { 81,  65}, { 66,  82}, { 83,  67}, { 68,  82}, { 83,  69},
+    { 70,  84}, { 85,  71}, { 72,  84}, { 85,  73}, { 74,  88}, { 89,  75}, { 76,  88}, { 89,  77},
+    { 78,  90}, { 91,  79}, { 80,  90}, { 91,  81}, { 82,  94}, { 95,  83}, { 84,  94}, { 95,  85},
+    { 86,  96}, { 97,  87}, { 88,  96}, { 97,  89}, { 90, 100}, {101,  91}, { 92, 100}, {101,  93},
+    { 94, 102}, {103,  95}, { 96, 104}, {105,  97}, { 98, 104}, {105,  99}, {100, 108}, {109, 101},
+    {102, 108}, {109, 103}, {104, 110}, {111, 105}, {106, 112}, {113, 107}, {108, 114}, {115, 109},
+    {110, 116}, {117, 111}, {112, 118}, {119, 113}, {114, 118}, {119, 115}, {116, 122}, {123, 117},
+    {118, 122}, {123, 119}, {120, 124}, {125, 121}, {122, 126}, {127, 123}, {124, 127}, {126, 125}
+};
+
+const uint8_t x264_cabac_renorm_shift[64] =
+{
+    6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+};
+
+/* -ln2(probability) */
+const uint16_t x264_cabac_entropy[128] =
+{
+    FIX8(0.0273), FIX8(5.7370), FIX8(0.0288), FIX8(5.6618),
+    FIX8(0.0303), FIX8(5.5866), FIX8(0.0320), FIX8(5.5114),
+    FIX8(0.0337), FIX8(5.4362), FIX8(0.0355), FIX8(5.3610),
+    FIX8(0.0375), FIX8(5.2859), FIX8(0.0395), FIX8(5.2106),
+    FIX8(0.0416), FIX8(5.1354), FIX8(0.0439), FIX8(5.0602),
+    FIX8(0.0463), FIX8(4.9851), FIX8(0.0488), FIX8(4.9099),
+    FIX8(0.0515), FIX8(4.8347), FIX8(0.0543), FIX8(4.7595),
+    FIX8(0.0572), FIX8(4.6843), FIX8(0.0604), FIX8(4.6091),
+    FIX8(0.0637), FIX8(4.5339), FIX8(0.0671), FIX8(4.4588),
+    FIX8(0.0708), FIX8(4.3836), FIX8(0.0747), FIX8(4.3083),
+    FIX8(0.0788), FIX8(4.2332), FIX8(0.0832), FIX8(4.1580),
+    FIX8(0.0878), FIX8(4.0828), FIX8(0.0926), FIX8(4.0076),
+    FIX8(0.0977), FIX8(3.9324), FIX8(0.1032), FIX8(3.8572),
+    FIX8(0.1089), FIX8(3.7820), FIX8(0.1149), FIX8(3.7068),
+    FIX8(0.1214), FIX8(3.6316), FIX8(0.1282), FIX8(3.5565),
+    FIX8(0.1353), FIX8(3.4813), FIX8(0.1429), FIX8(3.4061),
+    FIX8(0.1510), FIX8(3.3309), FIX8(0.1596), FIX8(3.2557),
+    FIX8(0.1686), FIX8(3.1805), FIX8(0.1782), FIX8(3.1053),
+    FIX8(0.1884), FIX8(3.0301), FIX8(0.1992), FIX8(2.9549),
+    FIX8(0.2107), FIX8(2.8797), FIX8(0.2229), FIX8(2.8046),
+    FIX8(0.2358), FIX8(2.7294), FIX8(0.2496), FIX8(2.6542),
+    FIX8(0.2642), FIX8(2.5790), FIX8(0.2798), FIX8(2.5038),
+    FIX8(0.2964), FIX8(2.4286), FIX8(0.3142), FIX8(2.3534),
+    FIX8(0.3331), FIX8(2.2782), FIX8(0.3532), FIX8(2.2030),
+    FIX8(0.3748), FIX8(2.1278), FIX8(0.3979), FIX8(2.0527),
+    FIX8(0.4226), FIX8(1.9775), FIX8(0.4491), FIX8(1.9023),
+    FIX8(0.4776), FIX8(1.8271), FIX8(0.5082), FIX8(1.7519),
+    FIX8(0.5412), FIX8(1.6767), FIX8(0.5768), FIX8(1.6015),
+    FIX8(0.6152), FIX8(1.5263), FIX8(0.6568), FIX8(1.4511),
+    FIX8(0.7020), FIX8(1.3759), FIX8(0.7513), FIX8(1.3008),
+    FIX8(0.8050), FIX8(1.2256), FIX8(0.8638), FIX8(1.1504),
+    FIX8(0.9285), FIX8(1.0752), FIX8(1.0000), FIX8(1.0000)
+};
+
+/*****************************************************************************
+ * RDO
+ *****************************************************************************/
+
+/* Padded to [64] for easier addressing */
+const uint8_t x264_significant_coeff_flag_offset_8x8[2][64] =
+{{
+    0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
+    4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
+    7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
+   12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12
+},{
+    0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
+    6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
+    9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
+    9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14
+}};
+const uint8_t x264_last_coeff_flag_offset_8x8[63] =
+{
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+    5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
+};
+const uint8_t x264_coeff_flag_offset_chroma_422_dc[7] = { 0, 0, 1, 1, 2, 2, 2 }; /* MIN( i/2, 2 ) */
+const uint16_t x264_significant_coeff_flag_offset[2][16] =
+{
+    { 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718, 0, 0 },
+    { 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733, 0, 0 }
+};
+const uint16_t x264_last_coeff_flag_offset[2][16] =
+{
+    { 166+0, 166+15, 166+29, 166+44, 166+47, 417, 572+0, 572+15, 572+29, 690, 616+0, 616+15, 616+29, 748, 0, 0 },
+    { 338+0, 338+15, 338+29, 338+44, 338+47, 451, 864+0, 864+15, 864+29, 699, 908+0, 908+15, 908+29, 757, 0, 0 }
+};
+const uint16_t x264_coeff_abs_level_m1_offset[16] =
+{
+    227+0, 227+10, 227+20, 227+30, 227+39, 426, 952+0, 952+10, 952+20, 708, 982+0, 982+10, 982+20, 766
+};
+const uint8_t x264_count_cat_m1[14] = {15, 14, 15, 3, 14, 63, 15, 14, 15, 63, 15, 14, 15, 63};
+
+/*****************************************************************************
+ * VLC
+ *****************************************************************************/
+
+/* [nC] */
+const vlc_t x264_coeff0_token[6] =
+{
+    { 0x1, 1 }, /* str=1 */
+    { 0x3, 2 }, /* str=11 */
+    { 0xf, 4 }, /* str=1111 */
+    { 0x3, 6 }, /* str=000011 */
+    { 0x1, 2 }, /* str=01 */
+    { 0x1, 1 }, /* str=1 */
+};
+
+/* [nC][i_total_coeff-1][i_trailing] */
+const vlc_t x264_coeff_token[6][16][4] =
+{
+    { /* table 0 */
+        { /* i_total 1 */
+            { 0x5, 6 }, /* str=000101 */
+            { 0x1, 2 }, /* str=01 */
+        },
+        { /* i_total 2 */
+            { 0x7, 8 }, /* str=00000111 */
+            { 0x4, 6 }, /* str=000100 */
+            { 0x1, 3 }, /* str=001 */
+        },
+        { /* i_total 3 */
+            { 0x7, 9 }, /* str=000000111 */
+            { 0x6, 8 }, /* str=00000110 */
+            { 0x5, 7 }, /* str=0000101 */
+            { 0x3, 5 }, /* str=00011 */
+        },
+        { /* i_total 4 */
+            { 0x7, 10 }, /* str=0000000111 */
+            { 0x6, 9 },  /* str=000000110 */
+            { 0x5, 8 },  /* str=00000101 */
+            { 0x3, 6 },  /* str=000011 */
+        },
+        { /* i_total 5 */
+            { 0x7, 11 }, /* str=00000000111 */
+            { 0x6, 10 }, /* str=0000000110 */
+            { 0x5, 9 },  /* str=000000101 */
+            { 0x4, 7 },  /* str=0000100 */
+        },
+        { /* i_total 6 */
+            { 0xf, 13 }, /* str=0000000001111 */
+            { 0x6, 11 }, /* str=00000000110 */
+            { 0x5, 10 }, /* str=0000000101 */
+            { 0x4, 8 },  /* str=00000100 */
+        },
+        { /* i_total 7 */
+            { 0xb, 13 }, /* str=0000000001011 */
+            { 0xe, 13 }, /* str=0000000001110 */
+            { 0x5, 11 }, /* str=00000000101 */
+            { 0x4, 9 },  /* str=000000100 */
+        },
+        { /* i_total 8 */
+            { 0x8, 13 }, /* str=0000000001000 */
+            { 0xa, 13 }, /* str=0000000001010 */
+            { 0xd, 13 }, /* str=0000000001101 */
+            { 0x4, 10 }, /* str=0000000100 */
+        },
+        { /* i_total 9 */
+            { 0xf, 14 }, /* str=00000000001111 */
+            { 0xe, 14 }, /* str=00000000001110 */
+            { 0x9, 13 }, /* str=0000000001001 */
+            { 0x4, 11 }, /* str=00000000100 */
+        },
+        { /* i_total 10 */
+            { 0xb, 14 }, /* str=00000000001011 */
+            { 0xa, 14 }, /* str=00000000001010 */
+            { 0xd, 14 }, /* str=00000000001101 */
+            { 0xc, 13 }, /* str=0000000001100 */
+        },
+        { /* i_total 14 */
+            { 0xf, 15 }, /* str=000000000001111 */
+            { 0xe, 15 }, /* str=000000000001110 */
+            { 0x9, 14 }, /* str=00000000001001 */
+            { 0xc, 14 }, /* str=00000000001100 */
+        },
+        { /* i_total 12 */
+            { 0xb, 15 }, /* str=000000000001011 */
+            { 0xa, 15 }, /* str=000000000001010 */
+            { 0xd, 15 }, /* str=000000000001101 */
+            { 0x8, 14 }, /* str=00000000001000 */
+        },
+        { /* i_total 13 */
+            { 0xf, 16 }, /* str=0000000000001111 */
+            { 0x1, 15 }, /* str=000000000000001 */
+            { 0x9, 15 }, /* str=000000000001001 */
+            { 0xc, 15 }, /* str=000000000001100 */
+        },
+        { /* i_total 14 */
+            { 0xb, 16 }, /* str=0000000000001011 */
+            { 0xe, 16 }, /* str=0000000000001110 */
+            { 0xd, 16 }, /* str=0000000000001101 */
+            { 0x8, 15 }, /* str=000000000001000 */
+        },
+        { /* i_total 15 */
+            { 0x7, 16 }, /* str=0000000000000111 */
+            { 0xa, 16 }, /* str=0000000000001010 */
+            { 0x9, 16 }, /* str=0000000000001001 */
+            { 0xc, 16 }, /* str=0000000000001100 */
+        },
+        { /* i_total 16 */
+            { 0x4, 16 }, /* str=0000000000000100 */
+            { 0x6, 16 }, /* str=0000000000000110 */
+            { 0x5, 16 }, /* str=0000000000000101 */
+            { 0x8, 16 }, /* str=0000000000001000 */
+        },
+    },
+    { /* table 1 */
+        { /* i_total 1 */
+            { 0xb, 6 }, /* str=001011 */
+            { 0x2, 2 }, /* str=10 */
+        },
+        { /* i_total 2 */
+            { 0x7, 6 }, /* str=000111 */
+            { 0x7, 5 }, /* str=00111 */
+            { 0x3, 3 }, /* str=011 */
+        },
+        { /* i_total 3 */
+            { 0x7, 7 }, /* str=0000111 */
+            { 0xa, 6 }, /* str=001010 */
+            { 0x9, 6 }, /* str=001001 */
+            { 0x5, 4 }, /* str=0101 */
+        },
+        { /* i_total 4 */
+            { 0x7, 8 }, /* str=00000111 */
+            { 0x6, 6 }, /* str=000110 */
+            { 0x5, 6 }, /* str=000101 */
+            { 0x4, 4 }, /* str=0100 */
+        },
+        { /* i_total 5 */
+            { 0x4, 8 }, /* str=00000100 */
+            { 0x6, 7 }, /* str=0000110 */
+            { 0x5, 7 }, /* str=0000101 */
+            { 0x6, 5 }, /* str=00110 */
+        },
+        { /* i_total 6 */
+            { 0x7, 9 }, /* str=000000111 */
+            { 0x6, 8 }, /* str=00000110 */
+            { 0x5, 8 }, /* str=00000101 */
+            { 0x8, 6 }, /* str=001000 */
+        },
+        { /* i_total 7 */
+            { 0xf, 11 }, /* str=00000001111 */
+            { 0x6, 9 },  /* str=000000110 */
+            { 0x5, 9 },  /* str=000000101 */
+            { 0x4, 6 },  /* str=000100 */
+        },
+        { /* i_total 8 */
+            { 0xb, 11 }, /* str=00000001011 */
+            { 0xe, 11 }, /* str=00000001110 */
+            { 0xd, 11 }, /* str=00000001101 */
+            { 0x4, 7 },  /* str=0000100 */
+        },
+        { /* i_total 9 */
+            { 0xf, 12 }, /* str=000000001111 */
+            { 0xa, 11 }, /* str=00000001010 */
+            { 0x9, 11 }, /* str=00000001001 */
+            { 0x4, 9 },  /* str=000000100 */
+        },
+        { /* i_total 10 */
+            { 0xb, 12 }, /* str=000000001011 */
+            { 0xe, 12 }, /* str=000000001110 */
+            { 0xd, 12 }, /* str=000000001101 */
+            { 0xc, 11 }, /* str=00000001100 */
+        },
+        { /* i_total 11 */
+            { 0x8, 12 }, /* str=000000001000 */
+            { 0xa, 12 }, /* str=000000001010 */
+            { 0x9, 12 }, /* str=000000001001 */
+            { 0x8, 11 }, /* str=00000001000 */
+        },
+        { /* i_total 12 */
+            { 0xf, 13 }, /* str=0000000001111 */
+            { 0xe, 13 }, /* str=0000000001110 */
+            { 0xd, 13 }, /* str=0000000001101 */
+            { 0xc, 12 }, /* str=000000001100 */
+        },
+        { /* i_total 13 */
+            { 0xb, 13 }, /* str=0000000001011 */
+            { 0xa, 13 }, /* str=0000000001010 */
+            { 0x9, 13 }, /* str=0000000001001 */
+            { 0xc, 13 }, /* str=0000000001100 */
+        },
+        { /* i_total 14 */
+            { 0x7, 13 }, /* str=0000000000111 */
+            { 0xb, 14 }, /* str=00000000001011 */
+            { 0x6, 13 }, /* str=0000000000110 */
+            { 0x8, 13 }, /* str=0000000001000 */
+        },
+        { /* i_total 15 */
+            { 0x9, 14 }, /* str=00000000001001 */
+            { 0x8, 14 }, /* str=00000000001000 */
+            { 0xa, 14 }, /* str=00000000001010 */
+            { 0x1, 13 }, /* str=0000000000001 */
+        },
+        { /* i_total 16 */
+            { 0x7, 14 }, /* str=00000000000111 */
+            { 0x6, 14 }, /* str=00000000000110 */
+            { 0x5, 14 }, /* str=00000000000101 */
+            { 0x4, 14 }, /* str=00000000000100 */
+        },
+    },
+    { /* table 2 */
+        { /* i_total 1 */
+            { 0xf, 6 }, /* str=001111 */
+            { 0xe, 4 }, /* str=1110 */
+        },
+        { /* i_total 2 */
+            { 0xb, 6 }, /* str=001011 */
+            { 0xf, 5 }, /* str=01111 */
+            { 0xd, 4 }, /* str=1101 */
+        },
+        { /* i_total 3 */
+            { 0x8, 6 }, /* str=001000 */
+            { 0xc, 5 }, /* str=01100 */
+            { 0xe, 5 }, /* str=01110 */
+            { 0xc, 4 }, /* str=1100 */
+        },
+        { /* i_total 4 */
+            { 0xf, 7 }, /* str=0001111 */
+            { 0xa, 5 }, /* str=01010 */
+            { 0xb, 5 }, /* str=01011 */
+            { 0xb, 4 }, /* str=1011 */
+        },
+        { /* i_total 5 */
+            { 0xb, 7 }, /* str=0001011 */
+            { 0x8, 5 }, /* str=01000 */
+            { 0x9, 5 }, /* str=01001 */
+            { 0xa, 4 }, /* str=1010 */
+        },
+        { /* i_total 6 */
+            { 0x9, 7 }, /* str=0001001 */
+            { 0xe, 6 }, /* str=001110 */
+            { 0xd, 6 }, /* str=001101 */
+            { 0x9, 4 }, /* str=1001 */
+        },
+        { /* i_total 7 */
+            { 0x8, 7 }, /* str=0001000 */
+            { 0xa, 6 }, /* str=001010 */
+            { 0x9, 6 }, /* str=001001 */
+            { 0x8, 4 }, /* str=1000 */
+        },
+        { /* i_total 8 */
+            { 0xf, 8 }, /* str=00001111 */
+            { 0xe, 7 }, /* str=0001110 */
+            { 0xd, 7 }, /* str=0001101 */
+            { 0xd, 5 }, /* str=01101 */
+        },
+        { /* i_total 9 */
+            { 0xb, 8 }, /* str=00001011 */
+            { 0xe, 8 }, /* str=00001110 */
+            { 0xa, 7 }, /* str=0001010 */
+            { 0xc, 6 }, /* str=001100 */
+        },
+        { /* i_total 10 */
+            { 0xf, 9 }, /* str=000001111 */
+            { 0xa, 8 }, /* str=00001010 */
+            { 0xd, 8 }, /* str=00001101 */
+            { 0xc, 7 }, /* str=0001100 */
+        },
+        { /* i_total 11 */
+            { 0xb, 9 }, /* str=000001011 */
+            { 0xe, 9 }, /* str=000001110 */
+            { 0x9, 8 }, /* str=00001001 */
+            { 0xc, 8 }, /* str=00001100 */
+        },
+        { /* i_total 12 */
+            { 0x8, 9 }, /* str=000001000 */
+            { 0xa, 9 }, /* str=000001010 */
+            { 0xd, 9 }, /* str=000001101 */
+            { 0x8, 8 }, /* str=00001000 */
+        },
+        { /* i_total 13 */
+            { 0xd, 10 }, /* str=0000001101 */
+            { 0x7, 9 },  /* str=000000111 */
+            { 0x9, 9 },  /* str=000001001 */
+            { 0xc, 9 },  /* str=000001100 */
+        },
+        { /* i_total 14 */
+            { 0x9, 10 }, /* str=0000001001 */
+            { 0xc, 10 }, /* str=0000001100 */
+            { 0xb, 10 }, /* str=0000001011 */
+            { 0xa, 10 }, /* str=0000001010 */
+        },
+        { /* i_total 15 */
+            { 0x5, 10 }, /* str=0000000101 */
+            { 0x8, 10 }, /* str=0000001000 */
+            { 0x7, 10 }, /* str=0000000111 */
+            { 0x6, 10 }, /* str=0000000110 */
+        },
+        { /* i_total 16 */
+            { 0x1, 10 }, /* str=0000000001 */
+            { 0x4, 10 }, /* str=0000000100 */
+            { 0x3, 10 }, /* str=0000000011 */
+            { 0x2, 10 }, /* str=0000000010 */
+        },
+    },
+    { /* table 3 */
+        { /* i_total 1 */
+            { 0x0, 6 }, /* str=000000 */
+            { 0x1, 6 }, /* str=000001 */
+        },
+        { /* i_total 2 */
+            { 0x4, 6 }, /* str=000100 */
+            { 0x5, 6 }, /* str=000101 */
+            { 0x6, 6 }, /* str=000110 */
+        },
+        { /* i_total 3 */
+            { 0x8, 6 }, /* str=001000 */
+            { 0x9, 6 }, /* str=001001 */
+            { 0xa, 6 }, /* str=001010 */
+            { 0xb, 6 }, /* str=001011 */
+        },
+        { /* i_total 4 */
+            { 0xc, 6 }, /* str=001100 */
+            { 0xd, 6 }, /* str=001101 */
+            { 0xe, 6 }, /* str=001110 */
+            { 0xf, 6 }, /* str=001111 */
+        },
+        { /* i_total 5 */
+            { 0x10, 6 }, /* str=010000 */
+            { 0x11, 6 }, /* str=010001 */
+            { 0x12, 6 }, /* str=010010 */
+            { 0x13, 6 }, /* str=010011 */
+        },
+        { /* i_total 6 */
+            { 0x14, 6 }, /* str=010100 */
+            { 0x15, 6 }, /* str=010101 */
+            { 0x16, 6 }, /* str=010110 */
+            { 0x17, 6 }, /* str=010111 */
+        },
+        { /* i_total 7 */
+            { 0x18, 6 }, /* str=011000 */
+            { 0x19, 6 }, /* str=011001 */
+            { 0x1a, 6 }, /* str=011010 */
+            { 0x1b, 6 }, /* str=011011 */
+        },
+        { /* i_total 8 */
+            { 0x1c, 6 }, /* str=011100 */
+            { 0x1d, 6 }, /* str=011101 */
+            { 0x1e, 6 }, /* str=011110 */
+            { 0x1f, 6 }, /* str=011111 */
+        },
+        { /* i_total 9 */
+            { 0x20, 6 }, /* str=100000 */
+            { 0x21, 6 }, /* str=100001 */
+            { 0x22, 6 }, /* str=100010 */
+            { 0x23, 6 }, /* str=100011 */
+        },
+        { /* i_total 10 */
+            { 0x24, 6 }, /* str=100100 */
+            { 0x25, 6 }, /* str=100101 */
+            { 0x26, 6 }, /* str=100110 */
+            { 0x27, 6 }, /* str=100111 */
+        },
+        { /* i_total 11 */
+            { 0x28, 6 }, /* str=101000 */
+            { 0x29, 6 }, /* str=101001 */
+            { 0x2a, 6 }, /* str=101010 */
+            { 0x2b, 6 }, /* str=101011 */
+        },
+        { /* i_total 12 */
+            { 0x2c, 6 }, /* str=101100 */
+            { 0x2d, 6 }, /* str=101101 */
+            { 0x2e, 6 }, /* str=101110 */
+            { 0x2f, 6 }, /* str=101111 */
+        },
+        { /* i_total 13 */
+            { 0x30, 6 }, /* str=110000 */
+            { 0x31, 6 }, /* str=110001 */
+            { 0x32, 6 }, /* str=110010 */
+            { 0x33, 6 }, /* str=110011 */
+        },
+        { /* i_total 14 */
+            { 0x34, 6 }, /* str=110100 */
+            { 0x35, 6 }, /* str=110101 */
+            { 0x36, 6 }, /* str=110110 */
+            { 0x37, 6 }, /* str=110111 */
+        },
+        { /* i_total 15 */
+            { 0x38, 6 }, /* str=111000 */
+            { 0x39, 6 }, /* str=111001 */
+            { 0x3a, 6 }, /* str=111010 */
+            { 0x3b, 6 }, /* str=111011 */
+        },
+        { /* i_total 16 */
+            { 0x3c, 6 }, /* str=111100 */
+            { 0x3d, 6 }, /* str=111101 */
+            { 0x3e, 6 }, /* str=111110 */
+            { 0x3f, 6 }, /* str=111111 */
+        },
+    },
+    { /* table 4 */
+        { /* i_total 1 */
+            { 0x7, 6 }, /* str=000111 */
+            { 0x1, 1 }, /* str=1 */
+        },
+        { /* i_total 2 */
+            { 0x4, 6 }, /* str=000100 */
+            { 0x6, 6 }, /* str=000110 */
+            { 0x1, 3 }, /* str=001 */
+        },
+        { /* i_total 3 */
+            { 0x3, 6 }, /* str=000011 */
+            { 0x3, 7 }, /* str=0000011 */
+            { 0x2, 7 }, /* str=0000010 */
+            { 0x5, 6 }, /* str=000101 */
+        },
+        { /* i_total 4 */
+            { 0x2, 6 }, /* str=000010 */
+            { 0x3, 8 }, /* str=00000011 */
+            { 0x2, 8 }, /* str=00000010 */
+            { 0x0, 7 }, /* str=0000000 */
+        },
+    },
+    { /* table 5 */
+        { /* i_total 1 */
+            { 0xf, 7 }, /* str=0001111 */
+            { 0x1, 2 }, /* str=01 */
+        },
+        { /* i_total 2 */
+            { 0xe, 7 }, /* str=0001110 */
+            { 0xd, 7 }, /* str=0001101 */
+            { 0x1, 3 }, /* str=001 */
+        },
+        { /* i_total 3 */
+            { 0x7, 9 }, /* str=000000111 */
+            { 0xc, 7 }, /* str=0001100 */
+            { 0xb, 7 }, /* str=0001011 */
+            { 0x1, 5 }, /* str=00001 */
+        },
+        { /* i_total 4 */
+            { 0x6, 9 }, /* str=000000110 */
+            { 0x5, 9 }, /* str=000000101 */
+            { 0xa, 7 }, /* str=0001010 */
+            { 0x1, 6 }, /* str=000001 */
+        },
+        { /* i_total 5 */
+            { 0x7, 10 }, /* str=0000000111 */
+            { 0x6, 10 }, /* str=0000000110 */
+            { 0x4, 9 },  /* str=000000100 */
+            { 0x9, 7 },  /* str=0001001 */
+        },
+        { /* i_total 6 */
+            { 0x7, 11 }, /* str=00000000111 */
+            { 0x6, 11 }, /* str=00000000110 */
+            { 0x5, 10 }, /* str=0000000101 */
+            { 0x8, 7 },  /* str=0001000 */
+        },
+        { /* i_total 7 */
+            { 0x7, 12 }, /* str=000000000111 */
+            { 0x6, 12 }, /* str=000000000110 */
+            { 0x5, 11 }, /* str=00000000101 */
+            { 0x4, 10 }, /* str=0000000100 */
+        },
+        { /* i_total 8 */
+            { 0x7, 13 }, /* str=0000000000111 */
+            { 0x5, 12 }, /* str=000000000101 */
+            { 0x4, 12 }, /* str=000000000100 */
+            { 0x4, 11 }, /* str=00000000100 */
+        },
+    },
+};
+
+/* [i_total_coeff-1][i_total_zeros] */
+const vlc_t x264_total_zeros[15][16] =
+{
+    { /* i_total 1 */
+        { 0x1, 1 }, /* str=1 */
+        { 0x3, 3 }, /* str=011 */
+        { 0x2, 3 }, /* str=010 */
+        { 0x3, 4 }, /* str=0011 */
+        { 0x2, 4 }, /* str=0010 */
+        { 0x3, 5 }, /* str=00011 */
+        { 0x2, 5 }, /* str=00010 */
+        { 0x3, 6 }, /* str=000011 */
+        { 0x2, 6 }, /* str=000010 */
+        { 0x3, 7 }, /* str=0000011 */
+        { 0x2, 7 }, /* str=0000010 */
+        { 0x3, 8 }, /* str=00000011 */
+        { 0x2, 8 }, /* str=00000010 */
+        { 0x3, 9 }, /* str=000000011 */
+        { 0x2, 9 }, /* str=000000010 */
+        { 0x1, 9 }, /* str=000000001 */
+    },
+    { /* i_total 2 */
+        { 0x7, 3 }, /* str=111 */
+        { 0x6, 3 }, /* str=110 */
+        { 0x5, 3 }, /* str=101 */
+        { 0x4, 3 }, /* str=100 */
+        { 0x3, 3 }, /* str=011 */
+        { 0x5, 4 }, /* str=0101 */
+        { 0x4, 4 }, /* str=0100 */
+        { 0x3, 4 }, /* str=0011 */
+        { 0x2, 4 }, /* str=0010 */
+        { 0x3, 5 }, /* str=00011 */
+        { 0x2, 5 }, /* str=00010 */
+        { 0x3, 6 }, /* str=000011 */
+        { 0x2, 6 }, /* str=000010 */
+        { 0x1, 6 }, /* str=000001 */
+        { 0x0, 6 }, /* str=000000 */
+    },
+    { /* i_total 3 */
+        { 0x5, 4 }, /* str=0101 */
+        { 0x7, 3 }, /* str=111 */
+        { 0x6, 3 }, /* str=110 */
+        { 0x5, 3 }, /* str=101 */
+        { 0x4, 4 }, /* str=0100 */
+        { 0x3, 4 }, /* str=0011 */
+        { 0x4, 3 }, /* str=100 */
+        { 0x3, 3 }, /* str=011 */
+        { 0x2, 4 }, /* str=0010 */
+        { 0x3, 5 }, /* str=00011 */
+        { 0x2, 5 }, /* str=00010 */
+        { 0x1, 6 }, /* str=000001 */
+        { 0x1, 5 }, /* str=00001 */
+        { 0x0, 6 }, /* str=000000 */
+    },
+    { /* i_total 4 */
+        { 0x3, 5 }, /* str=00011 */
+        { 0x7, 3 }, /* str=111 */
+        { 0x5, 4 }, /* str=0101 */
+        { 0x4, 4 }, /* str=0100 */
+        { 0x6, 3 }, /* str=110 */
+        { 0x5, 3 }, /* str=101 */
+        { 0x4, 3 }, /* str=100 */
+        { 0x3, 4 }, /* str=0011 */
+        { 0x3, 3 }, /* str=011 */
+        { 0x2, 4 }, /* str=0010 */
+        { 0x2, 5 }, /* str=00010 */
+        { 0x1, 5 }, /* str=00001 */
+        { 0x0, 5 }, /* str=00000 */
+    },
+    { /* i_total 5 */
+        { 0x5, 4 }, /* str=0101 */
+        { 0x4, 4 }, /* str=0100 */
+        { 0x3, 4 }, /* str=0011 */
+        { 0x7, 3 }, /* str=111 */
+        { 0x6, 3 }, /* str=110 */
+        { 0x5, 3 }, /* str=101 */
+        { 0x4, 3 }, /* str=100 */
+        { 0x3, 3 }, /* str=011 */
+        { 0x2, 4 }, /* str=0010 */
+        { 0x1, 5 }, /* str=00001 */
+        { 0x1, 4 }, /* str=0001 */
+        { 0x0, 5 }, /* str=00000 */
+    },
+    { /* i_total 6 */
+        { 0x1, 6 }, /* str=000001 */
+        { 0x1, 5 }, /* str=00001 */
+        { 0x7, 3 }, /* str=111 */
+        { 0x6, 3 }, /* str=110 */
+        { 0x5, 3 }, /* str=101 */
+        { 0x4, 3 }, /* str=100 */
+        { 0x3, 3 }, /* str=011 */
+        { 0x2, 3 }, /* str=010 */
+        { 0x1, 4 }, /* str=0001 */
+        { 0x1, 3 }, /* str=001 */
+        { 0x0, 6 }, /* str=000000 */
+    },
+    { /* i_total 7 */
+        { 0x1, 6 }, /* str=000001 */
+        { 0x1, 5 }, /* str=00001 */
+        { 0x5, 3 }, /* str=101 */
+        { 0x4, 3 }, /* str=100 */
+        { 0x3, 3 }, /* str=011 */
+        { 0x3, 2 }, /* str=11 */
+        { 0x2, 3 }, /* str=010 */
+        { 0x1, 4 }, /* str=0001 */
+        { 0x1, 3 }, /* str=001 */
+        { 0x0, 6 }, /* str=000000 */
+    },
+    { /* i_total 8 */
+        { 0x1, 6 }, /* str=000001 */
+        { 0x1, 4 }, /* str=0001 */
+        { 0x1, 5 }, /* str=00001 */
+        { 0x3, 3 }, /* str=011 */
+        { 0x3, 2 }, /* str=11 */
+        { 0x2, 2 }, /* str=10 */
+        { 0x2, 3 }, /* str=010 */
+        { 0x1, 3 }, /* str=001 */
+        { 0x0, 6 }, /* str=000000 */
+    },
+    { /* i_total 9 */
+        { 0x1, 6 }, /* str=000001 */
+        { 0x0, 6 }, /* str=000000 */
+        { 0x1, 4 }, /* str=0001 */
+        { 0x3, 2 }, /* str=11 */
+        { 0x2, 2 }, /* str=10 */
+        { 0x1, 3 }, /* str=001 */
+        { 0x1, 2 }, /* str=01 */
+        { 0x1, 5 }, /* str=00001 */
+    },
+    { /* i_total 10 */
+        { 0x1, 5 }, /* str=00001 */
+        { 0x0, 5 }, /* str=00000 */
+        { 0x1, 3 }, /* str=001 */
+        { 0x3, 2 }, /* str=11 */
+        { 0x2, 2 }, /* str=10 */
+        { 0x1, 2 }, /* str=01 */
+        { 0x1, 4 }, /* str=0001 */
+    },
+    { /* i_total 11 */
+        { 0x0, 4 }, /* str=0000 */
+        { 0x1, 4 }, /* str=0001 */
+        { 0x1, 3 }, /* str=001 */
+        { 0x2, 3 }, /* str=010 */
+        { 0x1, 1 }, /* str=1 */
+        { 0x3, 3 }, /* str=011 */
+    },
+    { /* i_total 12 */
+        { 0x0, 4 }, /* str=0000 */
+        { 0x1, 4 }, /* str=0001 */
+        { 0x1, 2 }, /* str=01 */
+        { 0x1, 1 }, /* str=1 */
+        { 0x1, 3 }, /* str=001 */
+    },
+    { /* i_total 13 */
+        { 0x0, 3 }, /* str=000 */
+        { 0x1, 3 }, /* str=001 */
+        { 0x1, 1 }, /* str=1 */
+        { 0x1, 2 }, /* str=01 */
+    },
+    { /* i_total 14 */
+        { 0x0, 2 }, /* str=00 */
+        { 0x1, 2 }, /* str=01 */
+        { 0x1, 1 }, /* str=1 */
+    },
+    { /* i_total 15 */
+        { 0x0, 1 }, /* str=0 */
+        { 0x1, 1 }, /* str=1 */
+    },
+};
+
+/* [i_total_coeff-1][i_total_zeros] */
+const vlc_t x264_total_zeros_2x2_dc[3][4] =
+{
+    { /* i_total 1 */
+        { 0x1, 1 }, /* str=1 */
+        { 0x1, 2 }, /* str=01 */
+        { 0x1, 3 }, /* str=001 */
+        { 0x0, 3 }  /* str=000 */
+    },
+    { /* i_total 2 */
+        { 0x1, 1 }, /* str=1 */
+        { 0x1, 2 }, /* str=01 */
+        { 0x0, 2 }, /* str=00 */
+    },
+    { /* i_total 3 */
+        { 0x1, 1 }, /* str=1 */
+        { 0x0, 1 }, /* str=0 */
+    },
+};
+
+/* [i_total_coeff-1][i_total_zeros] */
+const vlc_t x264_total_zeros_2x4_dc[7][8] =
+{
+    { /* i_total 1 */
+        { 0x1, 1 }, /* str=1 */
+        { 0x2, 3 }, /* str=010 */
+        { 0x3, 3 }, /* str=011 */
+        { 0x2, 4 }, /* str=0010 */
+        { 0x3, 4 }, /* str=0011 */
+        { 0x1, 4 }, /* str=0001 */
+        { 0x1, 5 }, /* str=00001 */
+        { 0x0, 5 }, /* str=00000 */
+    },
+    { /* i_total 2 */
+        { 0x0, 3 }, /* str=000 */
+        { 0x1, 2 }, /* str=01 */
+        { 0x1, 3 }, /* str=001 */
+        { 0x4, 3 }, /* str=100 */
+        { 0x5, 3 }, /* str=101 */
+        { 0x6, 3 }, /* str=110 */
+        { 0x7, 3 }, /* str=111 */
+    },
+    { /* i_total 3 */
+        { 0x0, 3 }, /* str=000 */
+        { 0x1, 3 }, /* str=001 */
+        { 0x1, 2 }, /* str=01 */
+        { 0x2, 2 }, /* str=10 */
+        { 0x6, 3 }, /* str=110 */
+        { 0x7, 3 }, /* str=111 */
+    },
+    { /* i_total 4 */
+        { 0x6, 3 }, /* str=110 */
+        { 0x0, 2 }, /* str=00 */
+        { 0x1, 2 }, /* str=01 */
+        { 0x2, 2 }, /* str=10 */
+        { 0x7, 3 }, /* str=111 */
+    },
+    { /* i_total 5 */
+        { 0x0, 2 }, /* str=00 */
+        { 0x1, 2 }, /* str=01 */
+        { 0x2, 2 }, /* str=10 */
+        { 0x3, 2 }, /* str=11 */
+    },
+    { /* i_total 6 */
+        { 0x0, 2 }, /* str=00 */
+        { 0x1, 2 }, /* str=01 */
+        { 0x1, 1 }, /* str=1 */
+    },
+    { /* i_total 7 */
+        { 0x0, 1 }, /* str=0 */
+        { 0x1, 1 }, /* str=1 */
+    }
+};
+
+/* [MIN( i_zero_left-1, 6 )][run_before] */
+const vlc_t x264_run_before_init[7][16] =
+{
+    { /* i_zero_left 1 */
+        { 0x1, 1 }, /* str=1 */
+        { 0x0, 1 }, /* str=0 */
+    },
+    { /* i_zero_left 2 */
+        { 0x1, 1 }, /* str=1 */
+        { 0x1, 2 }, /* str=01 */
+        { 0x0, 2 }, /* str=00 */
+    },
+    { /* i_zero_left 3 */
+        { 0x3, 2 }, /* str=11 */
+        { 0x2, 2 }, /* str=10 */
+        { 0x1, 2 }, /* str=01 */
+        { 0x0, 2 }, /* str=00 */
+    },
+    { /* i_zero_left 4 */
+        { 0x3, 2 }, /* str=11 */
+        { 0x2, 2 }, /* str=10 */
+        { 0x1, 2 }, /* str=01 */
+        { 0x1, 3 }, /* str=001 */
+        { 0x0, 3 }, /* str=000 */
+    },
+    { /* i_zero_left 5 */
+        { 0x3, 2 }, /* str=11 */
+        { 0x2, 2 }, /* str=10 */
+        { 0x3, 3 }, /* str=011 */
+        { 0x2, 3 }, /* str=010 */
+        { 0x1, 3 }, /* str=001 */
+        { 0x0, 3 }, /* str=000 */
+    },
+    { /* i_zero_left 6 */
+        { 0x3, 2 }, /* str=11 */
+        { 0x0, 3 }, /* str=000 */
+        { 0x1, 3 }, /* str=001 */
+        { 0x3, 3 }, /* str=011 */
+        { 0x2, 3 }, /* str=010 */
+        { 0x5, 3 }, /* str=101 */
+        { 0x4, 3 }, /* str=100 */
+    },
+    { /* i_zero_left >6 */
+        { 0x7, 3 }, /* str=111 */
+        { 0x6, 3 }, /* str=110 */
+        { 0x5, 3 }, /* str=101 */
+        { 0x4, 3 }, /* str=100 */
+        { 0x3, 3 }, /* str=011 */
+        { 0x2, 3 }, /* str=010 */
+        { 0x1, 3 }, /* str=001 */
+        { 0x1, 4 }, /* str=0001 */
+        { 0x1, 5 }, /* str=00001 */
+        { 0x1, 6 }, /* str=000001 */
+        { 0x1, 7 }, /* str=0000001 */
+        { 0x1, 8 }, /* str=00000001 */
+        { 0x1, 9 }, /* str=000000001 */
+        { 0x1, 10 }, /* str=0000000001 */
+        { 0x1, 11 }, /* str=00000000001 */
+    },
+};
+
+/* psy_trellis_init() has the largest size requirement of 16*FDEC_STRIDE*sizeof(pixel) */
+ALIGNED_64( uint8_t x264_zero[1024] ) = { 0 };
diff -Nru x264-0.152.2854+gite9a5903/common/tables.h x264-0.158.2988+git-20191101.7817004/common/tables.h
--- x264-0.152.2854+gite9a5903/common/tables.h	1970-01-01 00:00:00.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/tables.h	2019-11-09 05:16:29.000000000 +0000
@@ -0,0 +1,99 @@
+/*****************************************************************************
+ * tables.h: const tables
+ *****************************************************************************
+ * Copyright (C) 2003-2019 x264 project
+ *
+ * Authors: Laurent Aimar <fenrir@via.ecp.fr>
+ *          Loren Merritt <lorenm@u.washington.edu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#ifndef X264_TABLES_H
+#define X264_TABLES_H
+
+typedef struct
+{
+    uint8_t i_bits;
+    uint8_t i_size;
+} vlc_t;
+
+X264_API extern const x264_level_t x264_levels[];
+
+extern const uint8_t x264_exp2_lut[64];
+extern const float   x264_log2_lut[128];
+extern const float   x264_log2_lz_lut[32];
+
+#define QP_MAX_MAX (51+6*2+18)
+extern const uint16_t x264_lambda_tab[QP_MAX_MAX+1];
+extern const int      x264_lambda2_tab[QP_MAX_MAX+1];
+extern const int      x264_trellis_lambda2_tab[2][QP_MAX_MAX+1];
+#define MAX_CHROMA_LAMBDA_OFFSET 36
+extern const uint16_t x264_chroma_lambda2_offset_tab[MAX_CHROMA_LAMBDA_OFFSET+1];
+
+extern const uint8_t x264_hpel_ref0[16];
+extern const uint8_t x264_hpel_ref1[16];
+
+extern const uint8_t x264_cqm_jvt4i[16];
+extern const uint8_t x264_cqm_jvt4p[16];
+extern const uint8_t x264_cqm_jvt8i[64];
+extern const uint8_t x264_cqm_jvt8p[64];
+extern const uint8_t x264_cqm_flat16[64];
+extern const uint8_t * const x264_cqm_jvt[8];
+extern const uint8_t x264_cqm_avci50_4ic[16];
+extern const uint8_t x264_cqm_avci50_p_8iy[64];
+extern const uint8_t x264_cqm_avci50_1080i_8iy[64];
+extern const uint8_t x264_cqm_avci100_720p_4ic[16];
+extern const uint8_t x264_cqm_avci100_720p_8iy[64];
+extern const uint8_t x264_cqm_avci100_1080_4ic[16];
+extern const uint8_t x264_cqm_avci100_1080i_8iy[64];
+extern const uint8_t x264_cqm_avci100_1080p_8iy[64];
+
+extern const uint8_t x264_decimate_table4[16];
+extern const uint8_t x264_decimate_table8[64];
+
+extern const uint32_t x264_dct4_weight_tab[16];
+extern const uint32_t x264_dct8_weight_tab[64];
+extern const uint32_t x264_dct4_weight2_tab[16];
+extern const uint32_t x264_dct8_weight2_tab[64];
+
+extern const int8_t   x264_cabac_context_init_I[1024][2];
+extern const int8_t   x264_cabac_context_init_PB[3][1024][2];
+extern const uint8_t  x264_cabac_range_lps[64][4];
+extern const uint8_t  x264_cabac_transition[128][2];
+extern const uint8_t  x264_cabac_renorm_shift[64];
+extern const uint16_t x264_cabac_entropy[128];
+
+extern const uint8_t  x264_significant_coeff_flag_offset_8x8[2][64];
+extern const uint8_t  x264_last_coeff_flag_offset_8x8[63];
+extern const uint8_t  x264_coeff_flag_offset_chroma_422_dc[7];
+extern const uint16_t x264_significant_coeff_flag_offset[2][16];
+extern const uint16_t x264_last_coeff_flag_offset[2][16];
+extern const uint16_t x264_coeff_abs_level_m1_offset[16];
+extern const uint8_t  x264_count_cat_m1[14];
+
+extern const vlc_t x264_coeff0_token[6];
+extern const vlc_t x264_coeff_token[6][16][4];
+extern const vlc_t x264_total_zeros[15][16];
+extern const vlc_t x264_total_zeros_2x2_dc[3][4];
+extern const vlc_t x264_total_zeros_2x4_dc[7][8];
+extern const vlc_t x264_run_before_init[7][16];
+
+extern uint8_t x264_zero[1024];
+
+#endif
diff -Nru x264-0.152.2854+gite9a5903/common/threadpool.c x264-0.158.2988+git-20191101.7817004/common/threadpool.c
--- x264-0.152.2854+gite9a5903/common/threadpool.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/threadpool.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * threadpool.c: thread pooling
  *****************************************************************************
- * Copyright (C) 2010-2017 x264 project
+ * Copyright (C) 2010-2019 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
@@ -34,7 +34,7 @@
 
 struct x264_threadpool_t
 {
-    int            exit;
+    volatile int   exit;
     int            threads;
     x264_pthread_t *thread_handle;
     void           (*init_func)(void *);
@@ -47,7 +47,7 @@
     x264_sync_frame_list_t done;   /* list of jobs that have finished processing */
 };
 
-static void *x264_threadpool_thread( x264_threadpool_t *pool )
+REALIGN_STACK static void *threadpool_thread( x264_threadpool_t *pool )
 {
     if( pool->init_func )
         pool->init_func( pool->init_arg );
@@ -66,7 +66,7 @@
         x264_pthread_mutex_unlock( &pool->run.mutex );
         if( !job )
             continue;
-        job->ret = (void*)x264_stack_align( job->func, job->arg ); /* execute the function */
+        job->ret = job->func( job->arg );
         x264_sync_frame_list_push( &pool->done, (void*)job );
     }
     return NULL;
@@ -78,6 +78,9 @@
     if( threads <= 0 )
         return -1;
 
+    if( x264_threading_init() < 0 )
+        return -1;
+
     x264_threadpool_t *pool;
     CHECKED_MALLOCZERO( pool, sizeof(x264_threadpool_t) );
     *p_pool = pool;
@@ -100,7 +103,7 @@
        x264_sync_frame_list_push( &pool->uninit, (void*)job );
     }
     for( int i = 0; i < pool->threads; i++ )
-        if( x264_pthread_create( pool->thread_handle+i, NULL, (void*)x264_threadpool_thread, pool ) )
+        if( x264_pthread_create( pool->thread_handle+i, NULL, (void*)threadpool_thread, pool ) )
             goto fail;
 
     return 0;
@@ -137,7 +140,7 @@
     }
 }
 
-static void x264_threadpool_list_delete( x264_sync_frame_list_t *slist )
+static void threadpool_list_delete( x264_sync_frame_list_t *slist )
 {
     for( int i = 0; slist->list[i]; i++ )
     {
@@ -156,9 +159,9 @@
     for( int i = 0; i < pool->threads; i++ )
         x264_pthread_join( pool->thread_handle[i], NULL );
 
-    x264_threadpool_list_delete( &pool->uninit );
-    x264_threadpool_list_delete( &pool->run );
-    x264_threadpool_list_delete( &pool->done );
+    threadpool_list_delete( &pool->uninit );
+    threadpool_list_delete( &pool->run );
+    threadpool_list_delete( &pool->done );
     x264_free( pool->thread_handle );
     x264_free( pool );
 }
diff -Nru x264-0.152.2854+gite9a5903/common/threadpool.h x264-0.158.2988+git-20191101.7817004/common/threadpool.h
--- x264-0.152.2854+gite9a5903/common/threadpool.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/threadpool.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * threadpool.h: thread pooling
  *****************************************************************************
- * Copyright (C) 2010-2017 x264 project
+ * Copyright (C) 2010-2019 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
@@ -29,11 +29,15 @@
 typedef struct x264_threadpool_t x264_threadpool_t;
 
 #if HAVE_THREAD
-int   x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
-                            void (*init_func)(void *), void *init_arg );
-void  x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg );
-void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg );
-void  x264_threadpool_delete( x264_threadpool_t *pool );
+#define x264_threadpool_init x264_template(threadpool_init)
+X264_API int   x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
+                                     void (*init_func)(void *), void *init_arg );
+#define x264_threadpool_run x264_template(threadpool_run)
+X264_API void  x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg );
+#define x264_threadpool_wait x264_template(threadpool_wait)
+X264_API void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg );
+#define x264_threadpool_delete x264_template(threadpool_delete)
+X264_API void  x264_threadpool_delete( x264_threadpool_t *pool );
 #else
 #define x264_threadpool_init(p,t,f,a) -1
 #define x264_threadpool_run(p,f,a)
diff -Nru x264-0.152.2854+gite9a5903/common/vlc.c x264-0.158.2988+git-20191101.7817004/common/vlc.c
--- x264-0.152.2854+gite9a5903/common/vlc.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/vlc.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * vlc.c : vlc tables
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Fiona Glaser <fiona@x264.com>
@@ -27,777 +27,6 @@
 
 #include "common.h"
 
-/* [nC] */
-const vlc_t x264_coeff0_token[6] =
-{
-    { 0x1, 1 }, /* str=1 */
-    { 0x3, 2 }, /* str=11 */
-    { 0xf, 4 }, /* str=1111 */
-    { 0x3, 6 }, /* str=000011 */
-    { 0x1, 2 }, /* str=01 */
-    { 0x1, 1 }, /* str=1 */
-};
-
-/* [nC][i_total_coeff-1][i_trailing] */
-const vlc_t x264_coeff_token[6][16][4] =
-{
-    { /* table 0 */
-        { /* i_total 1 */
-            { 0x5, 6 }, /* str=000101 */
-            { 0x1, 2 }, /* str=01 */
-        },
-        { /* i_total 2 */
-            { 0x7, 8 }, /* str=00000111 */
-            { 0x4, 6 }, /* str=000100 */
-            { 0x1, 3 }, /* str=001 */
-        },
-        { /* i_total 3 */
-            { 0x7, 9 }, /* str=000000111 */
-            { 0x6, 8 }, /* str=00000110 */
-            { 0x5, 7 }, /* str=0000101 */
-            { 0x3, 5 }, /* str=00011 */
-        },
-        { /* i_total 4 */
-            { 0x7, 10 }, /* str=0000000111 */
-            { 0x6, 9 },  /* str=000000110 */
-            { 0x5, 8 },  /* str=00000101 */
-            { 0x3, 6 },  /* str=000011 */
-        },
-        { /* i_total 5 */
-            { 0x7, 11 }, /* str=00000000111 */
-            { 0x6, 10 }, /* str=0000000110 */
-            { 0x5, 9 },  /* str=000000101 */
-            { 0x4, 7 },  /* str=0000100 */
-        },
-        { /* i_total 6 */
-            { 0xf, 13 }, /* str=0000000001111 */
-            { 0x6, 11 }, /* str=00000000110 */
-            { 0x5, 10 }, /* str=0000000101 */
-            { 0x4, 8 },  /* str=00000100 */
-        },
-        { /* i_total 7 */
-            { 0xb, 13 }, /* str=0000000001011 */
-            { 0xe, 13 }, /* str=0000000001110 */
-            { 0x5, 11 }, /* str=00000000101 */
-            { 0x4, 9 },  /* str=000000100 */
-        },
-        { /* i_total 8 */
-            { 0x8, 13 }, /* str=0000000001000 */
-            { 0xa, 13 }, /* str=0000000001010 */
-            { 0xd, 13 }, /* str=0000000001101 */
-            { 0x4, 10 }, /* str=0000000100 */
-        },
-        { /* i_total 9 */
-            { 0xf, 14 }, /* str=00000000001111 */
-            { 0xe, 14 }, /* str=00000000001110 */
-            { 0x9, 13 }, /* str=0000000001001 */
-            { 0x4, 11 }, /* str=00000000100 */
-        },
-        { /* i_total 10 */
-            { 0xb, 14 }, /* str=00000000001011 */
-            { 0xa, 14 }, /* str=00000000001010 */
-            { 0xd, 14 }, /* str=00000000001101 */
-            { 0xc, 13 }, /* str=0000000001100 */
-        },
-        { /* i_total 14 */
-            { 0xf, 15 }, /* str=000000000001111 */
-            { 0xe, 15 }, /* str=000000000001110 */
-            { 0x9, 14 }, /* str=00000000001001 */
-            { 0xc, 14 }, /* str=00000000001100 */
-        },
-        { /* i_total 12 */
-            { 0xb, 15 }, /* str=000000000001011 */
-            { 0xa, 15 }, /* str=000000000001010 */
-            { 0xd, 15 }, /* str=000000000001101 */
-            { 0x8, 14 }, /* str=00000000001000 */
-        },
-        { /* i_total 13 */
-            { 0xf, 16 }, /* str=0000000000001111 */
-            { 0x1, 15 }, /* str=000000000000001 */
-            { 0x9, 15 }, /* str=000000000001001 */
-            { 0xc, 15 }, /* str=000000000001100 */
-        },
-        { /* i_total 14 */
-            { 0xb, 16 }, /* str=0000000000001011 */
-            { 0xe, 16 }, /* str=0000000000001110 */
-            { 0xd, 16 }, /* str=0000000000001101 */
-            { 0x8, 15 }, /* str=000000000001000 */
-        },
-        { /* i_total 15 */
-            { 0x7, 16 }, /* str=0000000000000111 */
-            { 0xa, 16 }, /* str=0000000000001010 */
-            { 0x9, 16 }, /* str=0000000000001001 */
-            { 0xc, 16 }, /* str=0000000000001100 */
-        },
-        { /* i_total 16 */
-            { 0x4, 16 }, /* str=0000000000000100 */
-            { 0x6, 16 }, /* str=0000000000000110 */
-            { 0x5, 16 }, /* str=0000000000000101 */
-            { 0x8, 16 }, /* str=0000000000001000 */
-        },
-    },
-    { /* table 1 */
-        { /* i_total 1 */
-            { 0xb, 6 }, /* str=001011 */
-            { 0x2, 2 }, /* str=10 */
-        },
-        { /* i_total 2 */
-            { 0x7, 6 }, /* str=000111 */
-            { 0x7, 5 }, /* str=00111 */
-            { 0x3, 3 }, /* str=011 */
-        },
-        { /* i_total 3 */
-            { 0x7, 7 }, /* str=0000111 */
-            { 0xa, 6 }, /* str=001010 */
-            { 0x9, 6 }, /* str=001001 */
-            { 0x5, 4 }, /* str=0101 */
-        },
-        { /* i_total 4 */
-            { 0x7, 8 }, /* str=00000111 */
-            { 0x6, 6 }, /* str=000110 */
-            { 0x5, 6 }, /* str=000101 */
-            { 0x4, 4 }, /* str=0100 */
-        },
-        { /* i_total 5 */
-            { 0x4, 8 }, /* str=00000100 */
-            { 0x6, 7 }, /* str=0000110 */
-            { 0x5, 7 }, /* str=0000101 */
-            { 0x6, 5 }, /* str=00110 */
-        },
-        { /* i_total 6 */
-            { 0x7, 9 }, /* str=000000111 */
-            { 0x6, 8 }, /* str=00000110 */
-            { 0x5, 8 }, /* str=00000101 */
-            { 0x8, 6 }, /* str=001000 */
-        },
-        { /* i_total 7 */
-            { 0xf, 11 }, /* str=00000001111 */
-            { 0x6, 9 },  /* str=000000110 */
-            { 0x5, 9 },  /* str=000000101 */
-            { 0x4, 6 },  /* str=000100 */
-        },
-        { /* i_total 8 */
-            { 0xb, 11 }, /* str=00000001011 */
-            { 0xe, 11 }, /* str=00000001110 */
-            { 0xd, 11 }, /* str=00000001101 */
-            { 0x4, 7 },  /* str=0000100 */
-        },
-        { /* i_total 9 */
-            { 0xf, 12 }, /* str=000000001111 */
-            { 0xa, 11 }, /* str=00000001010 */
-            { 0x9, 11 }, /* str=00000001001 */
-            { 0x4, 9 },  /* str=000000100 */
-        },
-        { /* i_total 10 */
-            { 0xb, 12 }, /* str=000000001011 */
-            { 0xe, 12 }, /* str=000000001110 */
-            { 0xd, 12 }, /* str=000000001101 */
-            { 0xc, 11 }, /* str=00000001100 */
-        },
-        { /* i_total 11 */
-            { 0x8, 12 }, /* str=000000001000 */
-            { 0xa, 12 }, /* str=000000001010 */
-            { 0x9, 12 }, /* str=000000001001 */
-            { 0x8, 11 }, /* str=00000001000 */
-        },
-        { /* i_total 12 */
-            { 0xf, 13 }, /* str=0000000001111 */
-            { 0xe, 13 }, /* str=0000000001110 */
-            { 0xd, 13 }, /* str=0000000001101 */
-            { 0xc, 12 }, /* str=000000001100 */
-        },
-        { /* i_total 13 */
-            { 0xb, 13 }, /* str=0000000001011 */
-            { 0xa, 13 }, /* str=0000000001010 */
-            { 0x9, 13 }, /* str=0000000001001 */
-            { 0xc, 13 }, /* str=0000000001100 */
-        },
-        { /* i_total 14 */
-            { 0x7, 13 }, /* str=0000000000111 */
-            { 0xb, 14 }, /* str=00000000001011 */
-            { 0x6, 13 }, /* str=0000000000110 */
-            { 0x8, 13 }, /* str=0000000001000 */
-        },
-        { /* i_total 15 */
-            { 0x9, 14 }, /* str=00000000001001 */
-            { 0x8, 14 }, /* str=00000000001000 */
-            { 0xa, 14 }, /* str=00000000001010 */
-            { 0x1, 13 }, /* str=0000000000001 */
-        },
-        { /* i_total 16 */
-            { 0x7, 14 }, /* str=00000000000111 */
-            { 0x6, 14 }, /* str=00000000000110 */
-            { 0x5, 14 }, /* str=00000000000101 */
-            { 0x4, 14 }, /* str=00000000000100 */
-        },
-    },
-    { /* table 2 */
-        { /* i_total 1 */
-            { 0xf, 6 }, /* str=001111 */
-            { 0xe, 4 }, /* str=1110 */
-        },
-        { /* i_total 2 */
-            { 0xb, 6 }, /* str=001011 */
-            { 0xf, 5 }, /* str=01111 */
-            { 0xd, 4 }, /* str=1101 */
-        },
-        { /* i_total 3 */
-            { 0x8, 6 }, /* str=001000 */
-            { 0xc, 5 }, /* str=01100 */
-            { 0xe, 5 }, /* str=01110 */
-            { 0xc, 4 }, /* str=1100 */
-        },
-        { /* i_total 4 */
-            { 0xf, 7 }, /* str=0001111 */
-            { 0xa, 5 }, /* str=01010 */
-            { 0xb, 5 }, /* str=01011 */
-            { 0xb, 4 }, /* str=1011 */
-        },
-        { /* i_total 5 */
-            { 0xb, 7 }, /* str=0001011 */
-            { 0x8, 5 }, /* str=01000 */
-            { 0x9, 5 }, /* str=01001 */
-            { 0xa, 4 }, /* str=1010 */
-        },
-        { /* i_total 6 */
-            { 0x9, 7 }, /* str=0001001 */
-            { 0xe, 6 }, /* str=001110 */
-            { 0xd, 6 }, /* str=001101 */
-            { 0x9, 4 }, /* str=1001 */
-        },
-        { /* i_total 7 */
-            { 0x8, 7 }, /* str=0001000 */
-            { 0xa, 6 }, /* str=001010 */
-            { 0x9, 6 }, /* str=001001 */
-            { 0x8, 4 }, /* str=1000 */
-        },
-        { /* i_total 8 */
-            { 0xf, 8 }, /* str=00001111 */
-            { 0xe, 7 }, /* str=0001110 */
-            { 0xd, 7 }, /* str=0001101 */
-            { 0xd, 5 }, /* str=01101 */
-        },
-        { /* i_total 9 */
-            { 0xb, 8 }, /* str=00001011 */
-            { 0xe, 8 }, /* str=00001110 */
-            { 0xa, 7 }, /* str=0001010 */
-            { 0xc, 6 }, /* str=001100 */
-        },
-        { /* i_total 10 */
-            { 0xf, 9 }, /* str=000001111 */
-            { 0xa, 8 }, /* str=00001010 */
-            { 0xd, 8 }, /* str=00001101 */
-            { 0xc, 7 }, /* str=0001100 */
-        },
-        { /* i_total 11 */
-            { 0xb, 9 }, /* str=000001011 */
-            { 0xe, 9 }, /* str=000001110 */
-            { 0x9, 8 }, /* str=00001001 */
-            { 0xc, 8 }, /* str=00001100 */
-        },
-        { /* i_total 12 */
-            { 0x8, 9 }, /* str=000001000 */
-            { 0xa, 9 }, /* str=000001010 */
-            { 0xd, 9 }, /* str=000001101 */
-            { 0x8, 8 }, /* str=00001000 */
-        },
-        { /* i_total 13 */
-            { 0xd, 10 }, /* str=0000001101 */
-            { 0x7, 9 },  /* str=000000111 */
-            { 0x9, 9 },  /* str=000001001 */
-            { 0xc, 9 },  /* str=000001100 */
-        },
-        { /* i_total 14 */
-            { 0x9, 10 }, /* str=0000001001 */
-            { 0xc, 10 }, /* str=0000001100 */
-            { 0xb, 10 }, /* str=0000001011 */
-            { 0xa, 10 }, /* str=0000001010 */
-        },
-        { /* i_total 15 */
-            { 0x5, 10 }, /* str=0000000101 */
-            { 0x8, 10 }, /* str=0000001000 */
-            { 0x7, 10 }, /* str=0000000111 */
-            { 0x6, 10 }, /* str=0000000110 */
-        },
-        { /* i_total 16 */
-            { 0x1, 10 }, /* str=0000000001 */
-            { 0x4, 10 }, /* str=0000000100 */
-            { 0x3, 10 }, /* str=0000000011 */
-            { 0x2, 10 }, /* str=0000000010 */
-        },
-    },
-    { /* table 3 */
-        { /* i_total 1 */
-            { 0x0, 6 }, /* str=000000 */
-            { 0x1, 6 }, /* str=000001 */
-        },
-        { /* i_total 2 */
-            { 0x4, 6 }, /* str=000100 */
-            { 0x5, 6 }, /* str=000101 */
-            { 0x6, 6 }, /* str=000110 */
-        },
-        { /* i_total 3 */
-            { 0x8, 6 }, /* str=001000 */
-            { 0x9, 6 }, /* str=001001 */
-            { 0xa, 6 }, /* str=001010 */
-            { 0xb, 6 }, /* str=001011 */
-        },
-        { /* i_total 4 */
-            { 0xc, 6 }, /* str=001100 */
-            { 0xd, 6 }, /* str=001101 */
-            { 0xe, 6 }, /* str=001110 */
-            { 0xf, 6 }, /* str=001111 */
-        },
-        { /* i_total 5 */
-            { 0x10, 6 }, /* str=010000 */
-            { 0x11, 6 }, /* str=010001 */
-            { 0x12, 6 }, /* str=010010 */
-            { 0x13, 6 }, /* str=010011 */
-        },
-        { /* i_total 6 */
-            { 0x14, 6 }, /* str=010100 */
-            { 0x15, 6 }, /* str=010101 */
-            { 0x16, 6 }, /* str=010110 */
-            { 0x17, 6 }, /* str=010111 */
-        },
-        { /* i_total 7 */
-            { 0x18, 6 }, /* str=011000 */
-            { 0x19, 6 }, /* str=011001 */
-            { 0x1a, 6 }, /* str=011010 */
-            { 0x1b, 6 }, /* str=011011 */
-        },
-        { /* i_total 8 */
-            { 0x1c, 6 }, /* str=011100 */
-            { 0x1d, 6 }, /* str=011101 */
-            { 0x1e, 6 }, /* str=011110 */
-            { 0x1f, 6 }, /* str=011111 */
-        },
-        { /* i_total 9 */
-            { 0x20, 6 }, /* str=100000 */
-            { 0x21, 6 }, /* str=100001 */
-            { 0x22, 6 }, /* str=100010 */
-            { 0x23, 6 }, /* str=100011 */
-        },
-        { /* i_total 10 */
-            { 0x24, 6 }, /* str=100100 */
-            { 0x25, 6 }, /* str=100101 */
-            { 0x26, 6 }, /* str=100110 */
-            { 0x27, 6 }, /* str=100111 */
-        },
-        { /* i_total 11 */
-            { 0x28, 6 }, /* str=101000 */
-            { 0x29, 6 }, /* str=101001 */
-            { 0x2a, 6 }, /* str=101010 */
-            { 0x2b, 6 }, /* str=101011 */
-        },
-        { /* i_total 12 */
-            { 0x2c, 6 }, /* str=101100 */
-            { 0x2d, 6 }, /* str=101101 */
-            { 0x2e, 6 }, /* str=101110 */
-            { 0x2f, 6 }, /* str=101111 */
-        },
-        { /* i_total 13 */
-            { 0x30, 6 }, /* str=110000 */
-            { 0x31, 6 }, /* str=110001 */
-            { 0x32, 6 }, /* str=110010 */
-            { 0x33, 6 }, /* str=110011 */
-        },
-        { /* i_total 14 */
-            { 0x34, 6 }, /* str=110100 */
-            { 0x35, 6 }, /* str=110101 */
-            { 0x36, 6 }, /* str=110110 */
-            { 0x37, 6 }, /* str=110111 */
-        },
-        { /* i_total 15 */
-            { 0x38, 6 }, /* str=111000 */
-            { 0x39, 6 }, /* str=111001 */
-            { 0x3a, 6 }, /* str=111010 */
-            { 0x3b, 6 }, /* str=111011 */
-        },
-        { /* i_total 16 */
-            { 0x3c, 6 }, /* str=111100 */
-            { 0x3d, 6 }, /* str=111101 */
-            { 0x3e, 6 }, /* str=111110 */
-            { 0x3f, 6 }, /* str=111111 */
-        },
-    },
-    { /* table 4 */
-        { /* i_total 1 */
-            { 0x7, 6 }, /* str=000111 */
-            { 0x1, 1 }, /* str=1 */
-        },
-        { /* i_total 2 */
-            { 0x4, 6 }, /* str=000100 */
-            { 0x6, 6 }, /* str=000110 */
-            { 0x1, 3 }, /* str=001 */
-        },
-        { /* i_total 3 */
-            { 0x3, 6 }, /* str=000011 */
-            { 0x3, 7 }, /* str=0000011 */
-            { 0x2, 7 }, /* str=0000010 */
-            { 0x5, 6 }, /* str=000101 */
-        },
-        { /* i_total 4 */
-            { 0x2, 6 }, /* str=000010 */
-            { 0x3, 8 }, /* str=00000011 */
-            { 0x2, 8 }, /* str=00000010 */
-            { 0x0, 7 }, /* str=0000000 */
-        },
-    },
-    { /* table 5 */
-        { /* i_total 1 */
-            { 0xf, 7 }, /* str=0001111 */
-            { 0x1, 2 }, /* str=01 */
-        },
-        { /* i_total 2 */
-            { 0xe, 7 }, /* str=0001110 */
-            { 0xd, 7 }, /* str=0001101 */
-            { 0x1, 3 }, /* str=001 */
-        },
-        { /* i_total 3 */
-            { 0x7, 9 }, /* str=000000111 */
-            { 0xc, 7 }, /* str=0001100 */
-            { 0xb, 7 }, /* str=0001011 */
-            { 0x1, 5 }, /* str=00001 */
-        },
-        { /* i_total 4 */
-            { 0x6, 9 }, /* str=000000110 */
-            { 0x5, 9 }, /* str=000000101 */
-            { 0xa, 7 }, /* str=0001010 */
-            { 0x1, 6 }, /* str=000001 */
-        },
-        { /* i_total 5 */
-            { 0x7, 10 }, /* str=0000000111 */
-            { 0x6, 10 }, /* str=0000000110 */
-            { 0x4, 9 },  /* str=000000100 */
-            { 0x9, 7 },  /* str=0001001 */
-        },
-        { /* i_total 6 */
-            { 0x7, 11 }, /* str=00000000111 */
-            { 0x6, 11 }, /* str=00000000110 */
-            { 0x5, 10 }, /* str=0000000101 */
-            { 0x8, 7 },  /* str=0001000 */
-        },
-        { /* i_total 7 */
-            { 0x7, 12 }, /* str=000000000111 */
-            { 0x6, 12 }, /* str=000000000110 */
-            { 0x5, 11 }, /* str=00000000101 */
-            { 0x4, 10 }, /* str=0000000100 */
-        },
-        { /* i_total 8 */
-            { 0x7, 13 }, /* str=0000000000111 */
-            { 0x5, 12 }, /* str=000000000101 */
-            { 0x4, 12 }, /* str=000000000100 */
-            { 0x4, 11 }, /* str=00000000100 */
-        },
-    },
-};
-
-/* [i_total_coeff-1][i_total_zeros] */
-const vlc_t x264_total_zeros[15][16] =
-{
-    { /* i_total 1 */
-        { 0x1, 1 }, /* str=1 */
-        { 0x3, 3 }, /* str=011 */
-        { 0x2, 3 }, /* str=010 */
-        { 0x3, 4 }, /* str=0011 */
-        { 0x2, 4 }, /* str=0010 */
-        { 0x3, 5 }, /* str=00011 */
-        { 0x2, 5 }, /* str=00010 */
-        { 0x3, 6 }, /* str=000011 */
-        { 0x2, 6 }, /* str=000010 */
-        { 0x3, 7 }, /* str=0000011 */
-        { 0x2, 7 }, /* str=0000010 */
-        { 0x3, 8 }, /* str=00000011 */
-        { 0x2, 8 }, /* str=00000010 */
-        { 0x3, 9 }, /* str=000000011 */
-        { 0x2, 9 }, /* str=000000010 */
-        { 0x1, 9 }, /* str=000000001 */
-    },
-    { /* i_total 2 */
-        { 0x7, 3 }, /* str=111 */
-        { 0x6, 3 }, /* str=110 */
-        { 0x5, 3 }, /* str=101 */
-        { 0x4, 3 }, /* str=100 */
-        { 0x3, 3 }, /* str=011 */
-        { 0x5, 4 }, /* str=0101 */
-        { 0x4, 4 }, /* str=0100 */
-        { 0x3, 4 }, /* str=0011 */
-        { 0x2, 4 }, /* str=0010 */
-        { 0x3, 5 }, /* str=00011 */
-        { 0x2, 5 }, /* str=00010 */
-        { 0x3, 6 }, /* str=000011 */
-        { 0x2, 6 }, /* str=000010 */
-        { 0x1, 6 }, /* str=000001 */
-        { 0x0, 6 }, /* str=000000 */
-    },
-    { /* i_total 3 */
-        { 0x5, 4 }, /* str=0101 */
-        { 0x7, 3 }, /* str=111 */
-        { 0x6, 3 }, /* str=110 */
-        { 0x5, 3 }, /* str=101 */
-        { 0x4, 4 }, /* str=0100 */
-        { 0x3, 4 }, /* str=0011 */
-        { 0x4, 3 }, /* str=100 */
-        { 0x3, 3 }, /* str=011 */
-        { 0x2, 4 }, /* str=0010 */
-        { 0x3, 5 }, /* str=00011 */
-        { 0x2, 5 }, /* str=00010 */
-        { 0x1, 6 }, /* str=000001 */
-        { 0x1, 5 }, /* str=00001 */
-        { 0x0, 6 }, /* str=000000 */
-    },
-    { /* i_total 4 */
-        { 0x3, 5 }, /* str=00011 */
-        { 0x7, 3 }, /* str=111 */
-        { 0x5, 4 }, /* str=0101 */
-        { 0x4, 4 }, /* str=0100 */
-        { 0x6, 3 }, /* str=110 */
-        { 0x5, 3 }, /* str=101 */
-        { 0x4, 3 }, /* str=100 */
-        { 0x3, 4 }, /* str=0011 */
-        { 0x3, 3 }, /* str=011 */
-        { 0x2, 4 }, /* str=0010 */
-        { 0x2, 5 }, /* str=00010 */
-        { 0x1, 5 }, /* str=00001 */
-        { 0x0, 5 }, /* str=00000 */
-    },
-    { /* i_total 5 */
-        { 0x5, 4 }, /* str=0101 */
-        { 0x4, 4 }, /* str=0100 */
-        { 0x3, 4 }, /* str=0011 */
-        { 0x7, 3 }, /* str=111 */
-        { 0x6, 3 }, /* str=110 */
-        { 0x5, 3 }, /* str=101 */
-        { 0x4, 3 }, /* str=100 */
-        { 0x3, 3 }, /* str=011 */
-        { 0x2, 4 }, /* str=0010 */
-        { 0x1, 5 }, /* str=00001 */
-        { 0x1, 4 }, /* str=0001 */
-        { 0x0, 5 }, /* str=00000 */
-    },
-    { /* i_total 6 */
-        { 0x1, 6 }, /* str=000001 */
-        { 0x1, 5 }, /* str=00001 */
-        { 0x7, 3 }, /* str=111 */
-        { 0x6, 3 }, /* str=110 */
-        { 0x5, 3 }, /* str=101 */
-        { 0x4, 3 }, /* str=100 */
-        { 0x3, 3 }, /* str=011 */
-        { 0x2, 3 }, /* str=010 */
-        { 0x1, 4 }, /* str=0001 */
-        { 0x1, 3 }, /* str=001 */
-        { 0x0, 6 }, /* str=000000 */
-    },
-    { /* i_total 7 */
-        { 0x1, 6 }, /* str=000001 */
-        { 0x1, 5 }, /* str=00001 */
-        { 0x5, 3 }, /* str=101 */
-        { 0x4, 3 }, /* str=100 */
-        { 0x3, 3 }, /* str=011 */
-        { 0x3, 2 }, /* str=11 */
-        { 0x2, 3 }, /* str=010 */
-        { 0x1, 4 }, /* str=0001 */
-        { 0x1, 3 }, /* str=001 */
-        { 0x0, 6 }, /* str=000000 */
-    },
-    { /* i_total 8 */
-        { 0x1, 6 }, /* str=000001 */
-        { 0x1, 4 }, /* str=0001 */
-        { 0x1, 5 }, /* str=00001 */
-        { 0x3, 3 }, /* str=011 */
-        { 0x3, 2 }, /* str=11 */
-        { 0x2, 2 }, /* str=10 */
-        { 0x2, 3 }, /* str=010 */
-        { 0x1, 3 }, /* str=001 */
-        { 0x0, 6 }, /* str=000000 */
-    },
-    { /* i_total 9 */
-        { 0x1, 6 }, /* str=000001 */
-        { 0x0, 6 }, /* str=000000 */
-        { 0x1, 4 }, /* str=0001 */
-        { 0x3, 2 }, /* str=11 */
-        { 0x2, 2 }, /* str=10 */
-        { 0x1, 3 }, /* str=001 */
-        { 0x1, 2 }, /* str=01 */
-        { 0x1, 5 }, /* str=00001 */
-    },
-    { /* i_total 10 */
-        { 0x1, 5 }, /* str=00001 */
-        { 0x0, 5 }, /* str=00000 */
-        { 0x1, 3 }, /* str=001 */
-        { 0x3, 2 }, /* str=11 */
-        { 0x2, 2 }, /* str=10 */
-        { 0x1, 2 }, /* str=01 */
-        { 0x1, 4 }, /* str=0001 */
-    },
-    { /* i_total 11 */
-        { 0x0, 4 }, /* str=0000 */
-        { 0x1, 4 }, /* str=0001 */
-        { 0x1, 3 }, /* str=001 */
-        { 0x2, 3 }, /* str=010 */
-        { 0x1, 1 }, /* str=1 */
-        { 0x3, 3 }, /* str=011 */
-    },
-    { /* i_total 12 */
-        { 0x0, 4 }, /* str=0000 */
-        { 0x1, 4 }, /* str=0001 */
-        { 0x1, 2 }, /* str=01 */
-        { 0x1, 1 }, /* str=1 */
-        { 0x1, 3 }, /* str=001 */
-    },
-    { /* i_total 13 */
-        { 0x0, 3 }, /* str=000 */
-        { 0x1, 3 }, /* str=001 */
-        { 0x1, 1 }, /* str=1 */
-        { 0x1, 2 }, /* str=01 */
-    },
-    { /* i_total 14 */
-        { 0x0, 2 }, /* str=00 */
-        { 0x1, 2 }, /* str=01 */
-        { 0x1, 1 }, /* str=1 */
-    },
-    { /* i_total 15 */
-        { 0x0, 1 }, /* str=0 */
-        { 0x1, 1 }, /* str=1 */
-    },
-};
-
-/* [i_total_coeff-1][i_total_zeros] */
-const vlc_t x264_total_zeros_2x2_dc[3][4] =
-{
-    { /* i_total 1 */
-        { 0x1, 1 }, /* str=1 */
-        { 0x1, 2 }, /* str=01 */
-        { 0x1, 3 }, /* str=001 */
-        { 0x0, 3 }  /* str=000 */
-    },
-    { /* i_total 2 */
-        { 0x1, 1 }, /* str=1 */
-        { 0x1, 2 }, /* str=01 */
-        { 0x0, 2 }, /* str=00 */
-    },
-    { /* i_total 3 */
-        { 0x1, 1 }, /* str=1 */
-        { 0x0, 1 }, /* str=0 */
-    },
-};
-
-/* [i_total_coeff-1][i_total_zeros] */
-const vlc_t x264_total_zeros_2x4_dc[7][8] =
-{
-    { /* i_total 1 */
-        { 0x1, 1 }, /* str=1 */
-        { 0x2, 3 }, /* str=010 */
-        { 0x3, 3 }, /* str=011 */
-        { 0x2, 4 }, /* str=0010 */
-        { 0x3, 4 }, /* str=0011 */
-        { 0x1, 4 }, /* str=0001 */
-        { 0x1, 5 }, /* str=00001 */
-        { 0x0, 5 }, /* str=00000 */
-    },
-    { /* i_total 2 */
-        { 0x0, 3 }, /* str=000 */
-        { 0x1, 2 }, /* str=01 */
-        { 0x1, 3 }, /* str=001 */
-        { 0x4, 3 }, /* str=100 */
-        { 0x5, 3 }, /* str=101 */
-        { 0x6, 3 }, /* str=110 */
-        { 0x7, 3 }, /* str=111 */
-    },
-    { /* i_total 3 */
-        { 0x0, 3 }, /* str=000 */
-        { 0x1, 3 }, /* str=001 */
-        { 0x1, 2 }, /* str=01 */
-        { 0x2, 2 }, /* str=10 */
-        { 0x6, 3 }, /* str=110 */
-        { 0x7, 3 }, /* str=111 */
-    },
-    { /* i_total 4 */
-        { 0x6, 3 }, /* str=110 */
-        { 0x0, 2 }, /* str=00 */
-        { 0x1, 2 }, /* str=01 */
-        { 0x2, 2 }, /* str=10 */
-        { 0x7, 3 }, /* str=111 */
-    },
-    { /* i_total 5 */
-        { 0x0, 2 }, /* str=00 */
-        { 0x1, 2 }, /* str=01 */
-        { 0x2, 2 }, /* str=10 */
-        { 0x3, 2 }, /* str=11 */
-    },
-    { /* i_total 6 */
-        { 0x0, 2 }, /* str=00 */
-        { 0x1, 2 }, /* str=01 */
-        { 0x1, 1 }, /* str=1 */
-    },
-    { /* i_total 7 */
-        { 0x0, 1 }, /* str=0 */
-        { 0x1, 1 }, /* str=1 */
-    }
-};
-
-/* [MIN( i_zero_left-1, 6 )][run_before] */
-static const vlc_t run_before[7][16] =
-{
-    { /* i_zero_left 1 */
-        { 0x1, 1 }, /* str=1 */
-        { 0x0, 1 }, /* str=0 */
-    },
-    { /* i_zero_left 2 */
-        { 0x1, 1 }, /* str=1 */
-        { 0x1, 2 }, /* str=01 */
-        { 0x0, 2 }, /* str=00 */
-    },
-    { /* i_zero_left 3 */
-        { 0x3, 2 }, /* str=11 */
-        { 0x2, 2 }, /* str=10 */
-        { 0x1, 2 }, /* str=01 */
-        { 0x0, 2 }, /* str=00 */
-    },
-    { /* i_zero_left 4 */
-        { 0x3, 2 }, /* str=11 */
-        { 0x2, 2 }, /* str=10 */
-        { 0x1, 2 }, /* str=01 */
-        { 0x1, 3 }, /* str=001 */
-        { 0x0, 3 }, /* str=000 */
-    },
-    { /* i_zero_left 5 */
-        { 0x3, 2 }, /* str=11 */
-        { 0x2, 2 }, /* str=10 */
-        { 0x3, 3 }, /* str=011 */
-        { 0x2, 3 }, /* str=010 */
-        { 0x1, 3 }, /* str=001 */
-        { 0x0, 3 }, /* str=000 */
-    },
-    { /* i_zero_left 6 */
-        { 0x3, 2 }, /* str=11 */
-        { 0x0, 3 }, /* str=000 */
-        { 0x1, 3 }, /* str=001 */
-        { 0x3, 3 }, /* str=011 */
-        { 0x2, 3 }, /* str=010 */
-        { 0x5, 3 }, /* str=101 */
-        { 0x4, 3 }, /* str=100 */
-    },
-    { /* i_zero_left >6 */
-        { 0x7, 3 }, /* str=111 */
-        { 0x6, 3 }, /* str=110 */
-        { 0x5, 3 }, /* str=101 */
-        { 0x4, 3 }, /* str=100 */
-        { 0x3, 3 }, /* str=011 */
-        { 0x2, 3 }, /* str=010 */
-        { 0x1, 3 }, /* str=001 */
-        { 0x1, 4 }, /* str=0001 */
-        { 0x1, 5 }, /* str=00001 */
-        { 0x1, 6 }, /* str=000001 */
-        { 0x1, 7 }, /* str=0000001 */
-        { 0x1, 8 }, /* str=00000001 */
-        { 0x1, 9 }, /* str=000000001 */
-        { 0x1, 10 }, /* str=0000000001 */
-        { 0x1, 11 }, /* str=00000000001 */
-    },
-};
-
 vlc_large_t x264_level_token[7][LEVEL_TABLE_SIZE];
 uint32_t x264_run_before[1<<16];
 
@@ -842,7 +71,9 @@
             vlc->i_next = i_next;
         }
 
-    for( int i = 1; i < (1<<16); i++ )
+    x264_run_before[0] = 0;
+    x264_run_before[1] = 0;
+    for( uint32_t i = 2; i < (1<<16); i++ )
     {
         x264_run_level_t runlevel;
         ALIGNED_ARRAY_16( dctcoef, dct, [16] );
@@ -857,10 +88,10 @@
         {
             int idx = X264_MIN(zeros, 7) - 1;
             int run = x264_clz( mask );
-            int len = run_before[idx][run].i_size;
+            int len = x264_run_before_init[idx][run].i_size;
             size += len;
             bits <<= len;
-            bits |= run_before[idx][run].i_bits;
+            bits |= x264_run_before_init[idx][run].i_bits;
             zeros -= run;
             mask <<= run + 1;
         }
diff -Nru x264-0.152.2854+gite9a5903/common/win32thread.c x264-0.158.2988+git-20191101.7817004/common/win32thread.c
--- x264-0.152.2854+gite9a5903/common/win32thread.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/win32thread.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * win32thread.c: windows threading
  *****************************************************************************
- * Copyright (C) 2010-2017 x264 project
+ * Copyright (C) 2010-2019 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *          Pegasys Inc. <http://www.pegasys-inc.com>
@@ -31,7 +31,7 @@
 /* Based on the agreed standing that x264 does not need to utilize >64 logical cpus,
  * this API does not detect nor utilize more than 64 cpus for systems that have them. */
 
-#include "common.h"
+#include "base.h"
 
 #if HAVE_WINRT
 /* _beginthreadex() is technically the correct option, but it's only available for Desktop applications.
@@ -51,7 +51,7 @@
 static x264_pthread_mutex_t static_mutex;
 
 /* _beginthreadex requires that the start routine is __stdcall */
-static unsigned __stdcall x264_win32thread_worker( void *arg )
+static unsigned __stdcall win32thread_worker( void *arg )
 {
     x264_pthread_t *h = arg;
     *h->p_ret = h->func( h->arg );
@@ -65,7 +65,7 @@
     thread->arg    = arg;
     thread->p_ret  = &thread->ret;
     thread->ret    = NULL;
-    thread->handle = (void*)_beginthreadex( NULL, 0, x264_win32thread_worker, thread, 0, NULL );
+    thread->handle = (void*)_beginthreadex( NULL, 0, win32thread_worker, thread, 0, NULL );
     return !thread->handle;
 }
 
@@ -95,7 +95,15 @@
 {
     static const x264_pthread_mutex_t init = X264_PTHREAD_MUTEX_INITIALIZER;
     if( !memcmp( mutex, &init, sizeof(x264_pthread_mutex_t) ) )
-        *mutex = static_mutex;
+    {
+        int ret = 0;
+        EnterCriticalSection( &static_mutex );
+        if( !memcmp( mutex, &init, sizeof(x264_pthread_mutex_t) ) )
+            ret = x264_pthread_mutex_init( mutex, NULL );
+        LeaveCriticalSection( &static_mutex );
+        if( ret )
+            return ret;
+    }
     EnterCriticalSection( mutex );
     return 0;
 }
diff -Nru x264-0.152.2854+gite9a5903/common/win32thread.h x264-0.158.2988+git-20191101.7817004/common/win32thread.h
--- x264-0.152.2854+gite9a5903/common/win32thread.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/win32thread.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * win32thread.h: windows threading
  *****************************************************************************
- * Copyright (C) 2010-2017 x264 project
+ * Copyright (C) 2010-2019 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
diff -Nru x264-0.152.2854+gite9a5903/common/x86/bitstream-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/bitstream-a.asm
--- x264-0.152.2854+gite9a5903/common/x86/bitstream-a.asm	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/bitstream-a.asm	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* bitstream-a.asm: x86 bitstream functions
 ;*****************************************************************************
-;* Copyright (C) 2010-2017 x264 project
+;* Copyright (C) 2010-2019 x264 project
 ;*
 ;* Authors: Fiona Glaser <fiona@x264.com>
 ;*          Henrik Gramner <henrik@gramner.com>
diff -Nru x264-0.152.2854+gite9a5903/common/x86/bitstream.h x264-0.158.2988+git-20191101.7817004/common/x86/bitstream.h
--- x264-0.152.2854+gite9a5903/common/x86/bitstream.h	1970-01-01 00:00:00.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/bitstream.h	2019-11-09 05:16:29.000000000 +0000
@@ -0,0 +1,64 @@
+/*****************************************************************************
+ * bitstream.h: x86 bitstream functions
+ *****************************************************************************
+ * Copyright (C) 2017-2019 x264 project
+ *
+ * Authors: Anton Mitrofanov <BugMaster@narod.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#ifndef X264_X86_BITSTREAM_H
+#define X264_X86_BITSTREAM_H
+
+#define x264_nal_escape_mmx2 x264_template(nal_escape_mmx2)
+uint8_t *x264_nal_escape_mmx2( uint8_t *dst, uint8_t *src, uint8_t *end );
+#define x264_nal_escape_sse2 x264_template(nal_escape_sse2)
+uint8_t *x264_nal_escape_sse2( uint8_t *dst, uint8_t *src, uint8_t *end );
+#define x264_nal_escape_avx2 x264_template(nal_escape_avx2)
+uint8_t *x264_nal_escape_avx2( uint8_t *dst, uint8_t *src, uint8_t *end );
+#define x264_cabac_block_residual_rd_internal_sse2 x264_template(cabac_block_residual_rd_internal_sse2)
+void x264_cabac_block_residual_rd_internal_sse2       ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+#define x264_cabac_block_residual_rd_internal_lzcnt x264_template(cabac_block_residual_rd_internal_lzcnt)
+void x264_cabac_block_residual_rd_internal_lzcnt      ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+#define x264_cabac_block_residual_rd_internal_ssse3 x264_template(cabac_block_residual_rd_internal_ssse3)
+void x264_cabac_block_residual_rd_internal_ssse3      ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+#define x264_cabac_block_residual_rd_internal_ssse3_lzcnt x264_template(cabac_block_residual_rd_internal_ssse3_lzcnt)
+void x264_cabac_block_residual_rd_internal_ssse3_lzcnt( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+#define x264_cabac_block_residual_rd_internal_avx512 x264_template(cabac_block_residual_rd_internal_avx512)
+void x264_cabac_block_residual_rd_internal_avx512     ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+#define x264_cabac_block_residual_8x8_rd_internal_sse2 x264_template(cabac_block_residual_8x8_rd_internal_sse2)
+void x264_cabac_block_residual_8x8_rd_internal_sse2       ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+#define x264_cabac_block_residual_8x8_rd_internal_lzcnt x264_template(cabac_block_residual_8x8_rd_internal_lzcnt)
+void x264_cabac_block_residual_8x8_rd_internal_lzcnt      ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+#define x264_cabac_block_residual_8x8_rd_internal_ssse3 x264_template(cabac_block_residual_8x8_rd_internal_ssse3)
+void x264_cabac_block_residual_8x8_rd_internal_ssse3      ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+#define x264_cabac_block_residual_8x8_rd_internal_ssse3_lzcnt x264_template(cabac_block_residual_8x8_rd_internal_ssse3_lzcnt)
+void x264_cabac_block_residual_8x8_rd_internal_ssse3_lzcnt( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+#define x264_cabac_block_residual_8x8_rd_internal_avx512 x264_template(cabac_block_residual_8x8_rd_internal_avx512)
+void x264_cabac_block_residual_8x8_rd_internal_avx512     ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+#define x264_cabac_block_residual_internal_sse2 x264_template(cabac_block_residual_internal_sse2)
+void x264_cabac_block_residual_internal_sse2  ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+#define x264_cabac_block_residual_internal_lzcnt x264_template(cabac_block_residual_internal_lzcnt)
+void x264_cabac_block_residual_internal_lzcnt ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+#define x264_cabac_block_residual_internal_avx2 x264_template(cabac_block_residual_internal_avx2)
+void x264_cabac_block_residual_internal_avx2  ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+#define x264_cabac_block_residual_internal_avx512 x264_template(cabac_block_residual_internal_avx512)
+void x264_cabac_block_residual_internal_avx512( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+
+#endif
diff -Nru x264-0.152.2854+gite9a5903/common/x86/cabac-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/cabac-a.asm
--- x264-0.152.2854+gite9a5903/common/x86/cabac-a.asm	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/cabac-a.asm	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* cabac-a.asm: x86 cabac
 ;*****************************************************************************
-;* Copyright (C) 2008-2017 x264 project
+;* Copyright (C) 2008-2019 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Fiona Glaser <fiona@x264.com>
@@ -28,28 +28,26 @@
 %include "x86inc.asm"
 %include "x86util.asm"
 
-SECTION_RODATA
-
-coeff_abs_level1_ctx:       db 1, 2, 3, 4, 0, 0, 0, 0
-coeff_abs_levelgt1_ctx:     db 5, 5, 5, 5, 6, 7, 8, 9
-coeff_abs_level_transition: db 1, 2, 3, 3, 4, 5, 6, 7
-                            db 4, 4, 4, 4, 5, 6, 7, 7
+SECTION_RODATA 64
 
 %if ARCH_X86_64
-%macro COEFF_LAST_TABLE 17
-    %define funccpu1 %1
-    %define funccpu2 %2
-    %define funccpu3 %3
+%macro COEFF_LAST_TABLE 4-18 16, 15, 16, 4, 15, 64, 16, 15, 16, 64, 16, 15, 16, 64
+    %xdefine %%funccpu1 %2 ; last4
+    %xdefine %%funccpu2 %3 ; last64
+    %xdefine %%funccpu3 %4 ; last15/last16
+    coeff_last_%1:
+    %xdefine %%base coeff_last_%1
     %rep 14
-        %ifidn %4, 4
-            dq mangle(private_prefix %+ _coeff_last%4_ %+ funccpu1)
-        %elifidn %4, 64
-            dq mangle(private_prefix %+ _coeff_last%4_ %+ funccpu2)
+        %ifidn %5, 4
+            dd mangle(private_prefix %+ _coeff_last%5_ %+ %%funccpu1) - %%base
+        %elifidn %5, 64
+            dd mangle(private_prefix %+ _coeff_last%5_ %+ %%funccpu2) - %%base
         %else
-            dq mangle(private_prefix %+ _coeff_last%4_ %+ funccpu3)
+            dd mangle(private_prefix %+ _coeff_last%5_ %+ %%funccpu3) - %%base
         %endif
         %rotate 1
     %endrep
+    dd 0, 0 ; 64-byte alignment padding
 %endmacro
 
 cextern coeff_last4_mmx2
@@ -68,33 +66,35 @@
 cextern coeff_last64_avx2
 cextern coeff_last64_avx512
 
-%ifdef PIC
-SECTION .data
-%endif
-coeff_last_sse2:   COEFF_LAST_TABLE mmx2,   sse2,   sse2,   16, 15, 16, 4, 15, 64, 16, 15, 16, 64, 16, 15, 16, 64
-coeff_last_lzcnt:  COEFF_LAST_TABLE lzcnt,  lzcnt,  lzcnt,  16, 15, 16, 4, 15, 64, 16, 15, 16, 64, 16, 15, 16, 64
-coeff_last_avx2:   COEFF_LAST_TABLE lzcnt,  avx2,   lzcnt,  16, 15, 16, 4, 15, 64, 16, 15, 16, 64, 16, 15, 16, 64
+COEFF_LAST_TABLE sse2,   mmx2,   sse2,   sse2
+COEFF_LAST_TABLE lzcnt,  lzcnt,  lzcnt,  lzcnt
+COEFF_LAST_TABLE avx2,   lzcnt,  avx2,   lzcnt
 %if HIGH_BIT_DEPTH
-coeff_last_avx512: COEFF_LAST_TABLE avx512, avx512, avx512, 16, 15, 16, 4, 15, 64, 16, 15, 16, 64, 16, 15, 16, 64
+COEFF_LAST_TABLE avx512, avx512, avx512, avx512
 %else
-coeff_last_avx512: COEFF_LAST_TABLE lzcnt,  avx512, avx512, 16, 15, 16, 4, 15, 64, 16, 15, 16, 64, 16, 15, 16, 64
+COEFF_LAST_TABLE avx512, lzcnt,  avx512, avx512
 %endif
 %endif
 
+coeff_abs_level1_ctx:       db 1, 2, 3, 4, 0, 0, 0, 0
+coeff_abs_levelgt1_ctx:     db 5, 5, 5, 5, 6, 7, 8, 9
+coeff_abs_level_transition: db 1, 2, 3, 3, 4, 5, 6, 7
+                            db 4, 4, 4, 4, 5, 6, 7, 7
+
 SECTION .text
 
-cextern cabac_range_lps
-cextern cabac_transition
-cextern cabac_renorm_shift
-cextern cabac_entropy
+cextern_common cabac_range_lps
+cextern_common cabac_transition
+cextern_common cabac_renorm_shift
+cextern_common cabac_entropy
 cextern cabac_size_unary
 cextern cabac_transition_unary
-cextern significant_coeff_flag_offset
-cextern significant_coeff_flag_offset_8x8
-cextern last_coeff_flag_offset
-cextern last_coeff_flag_offset_8x8
-cextern coeff_abs_level_m1_offset
-cextern count_cat_m1
+cextern_common significant_coeff_flag_offset
+cextern_common significant_coeff_flag_offset_8x8
+cextern_common last_coeff_flag_offset
+cextern_common last_coeff_flag_offset_8x8
+cextern_common coeff_abs_level_m1_offset
+cextern_common count_cat_m1
 cextern cabac_encode_ue_bypass
 
 %if ARCH_X86_64
@@ -117,15 +117,13 @@
 endstruc
 
 %macro LOAD_GLOBAL 3-5 0 ; dst, base, off1, off2, tmp
-%ifdef PIC
-    %ifidn %4, 0
-        movzx %1, byte [%2+%3+r7-$$]
-    %else
-        lea   %5, [r7+%4]
-        movzx %1, byte [%2+%3+%5-$$]
-    %endif
-%else
+%if ARCH_X86_64 == 0
     movzx %1, byte [%2+%3+%4]
+%elifidn %4, 0
+    movzx %1, byte [%2+%3+r7-$$]
+%else
+    lea   %5, [r7+%4]
+    movzx %1, byte [%2+%3+%5-$$]
 %endif
 %endmacro
 
@@ -150,9 +148,9 @@
     shr   t5d, 6
     movifnidn t2d, r2m
 %if WIN64
-    PUSH r7
+    PUSH   r7
 %endif
-%ifdef PIC
+%if ARCH_X86_64
     lea    r7, [$$]
 %endif
     LOAD_GLOBAL t5d, cabac_range_lps-4, t5, t4*2, t4
@@ -179,7 +177,7 @@
     shl   t6d, t3b
 %endif
 %if WIN64
-    POP r7
+    POP    r7
 %endif
     mov   [t0+cb.range], t4d
     add   t3d, [t0+cb.queue]
@@ -274,6 +272,7 @@
 CABAC asm
 CABAC bmi2
 
+%if ARCH_X86_64
 ; %1 = label name
 ; %2 = node_ctx init?
 %macro COEFF_ABS_LEVEL_GT1 2
@@ -404,6 +403,13 @@
 %endif
 %endmacro
 
+%macro COEFF_LAST 2 ; table, ctx_block_cat
+    lea    r1, [%1 GLOBAL]
+    movsxd r6, [r1+4*%2]
+    add    r6, r1
+    call   r6
+%endmacro
+
 ;-----------------------------------------------------------------------------
 ; void x264_cabac_block_residual_rd_internal_sse2 ( dctcoef *l, int b_interlaced,
 ;                                                   int ctx_block_cat, x264_cabac_t *cb );
@@ -421,15 +427,9 @@
     %define dct r4
 %endif
 
-%ifdef PIC
-    cglobal func, 4,13,6,-maxcoeffs*SIZEOF_DCTCOEF
+cglobal func, 4,13,6,-maxcoeffs*SIZEOF_DCTCOEF
     lea     r12, [$$]
     %define GLOBAL +r12-$$
-%else
-    cglobal func, 4,12,6,-maxcoeffs*SIZEOF_DCTCOEF
-    %define GLOBAL
-%endif
-
     shl     r1d, 4                                            ; MB_INTERLACED*16
 %if %1
     lea      r4, [significant_coeff_flag_offset_8x8+r1*4 GLOBAL]     ; r12 = sig offset 8x8
@@ -452,7 +452,7 @@
     add      r4, rsp                                          ; restore AC coefficient offset
 %endif
 ; for improved OOE performance, run coeff_last on the original coefficients.
-    call [%2+gprsize*r2 GLOBAL]                               ; coeff_last[ctx_block_cat]( dct )
+    COEFF_LAST %2, r2                                         ; coeff_last[ctx_block_cat]( dct )
 ; we know on 64-bit that the SSE2 versions of this function only
 ; overwrite r0, r1, and rax (r6). last64 overwrites r2 too, but we
 ; don't need r2 in 8x8 mode.
@@ -539,7 +539,6 @@
     RET
 %endmacro
 
-%if ARCH_X86_64
 INIT_XMM sse2
 CABAC_RESIDUAL_RD 0, coeff_last_sse2
 CABAC_RESIDUAL_RD 1, coeff_last_sse2
@@ -560,7 +559,6 @@
 CABAC_RESIDUAL_RD 0, coeff_last_avx512
 INIT_ZMM avx512
 CABAC_RESIDUAL_RD 1, coeff_last_avx512
-%endif
 
 ;-----------------------------------------------------------------------------
 ; void x264_cabac_block_residual_internal_sse2 ( dctcoef *l, int b_interlaced,
@@ -638,15 +636,10 @@
 
 %macro CABAC_RESIDUAL 1
 cglobal cabac_block_residual_internal, 4,15,0,-4*64
-%ifdef PIC
 ; if we use the same r7 as in cabac_encode_decision, we can cheat and save a register.
     lea     r7, [$$]
     %define lastm [rsp+4*1]
     %define GLOBAL +r7-$$
-%else
-    %define lastm r7d
-    %define GLOBAL
-%endif
     shl     r1d, 4
 
     %define sigoffq r8
@@ -673,7 +666,7 @@
     mov     dct, r0
     mov leveloffm, leveloffd
 
-    call [%1+gprsize*r2 GLOBAL]
+    COEFF_LAST %1, r2
     mov   lastm, eax
 ; put cabac in r0; needed for cabac_encode_decision
     mov      r0, r3
@@ -764,7 +757,6 @@
     RET
 %endmacro
 
-%if ARCH_X86_64
 INIT_XMM sse2
 CABAC_RESIDUAL coeff_last_sse2
 INIT_XMM lzcnt
diff -Nru x264-0.152.2854+gite9a5903/common/x86/const-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/const-a.asm
--- x264-0.152.2854+gite9a5903/common/x86/const-a.asm	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/const-a.asm	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* const-a.asm: x86 global constants
 ;*****************************************************************************
-;* Copyright (C) 2010-2017 x264 project
+;* Copyright (C) 2010-2019 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Fiona Glaser <fiona@x264.com>
diff -Nru x264-0.152.2854+gite9a5903/common/x86/cpu-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/cpu-a.asm
--- x264-0.152.2854+gite9a5903/common/x86/cpu-a.asm	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/cpu-a.asm	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* cpu-a.asm: x86 cpu utilities
 ;*****************************************************************************
-;* Copyright (C) 2003-2017 x264 project
+;* Copyright (C) 2003-2019 x264 project
 ;*
 ;* Authors: Laurent Aimar <fenrir@via.ecp.fr>
 ;*          Loren Merritt <lorenm@u.washington.edu>
@@ -64,28 +64,21 @@
 %endif
     ret
 
-%if ARCH_X86_64
-
 ;-----------------------------------------------------------------------------
-; void stack_align( void (*func)(void*), void *arg );
+; void cpu_emms( void )
 ;-----------------------------------------------------------------------------
-cglobal stack_align
-    push rbp
-    mov  rbp, rsp
-%if WIN64
-    sub  rsp, 32 ; shadow space
-%endif
-    and  rsp, ~(STACK_ALIGNMENT-1)
-    mov  rax, r0
-    mov   r0, r1
-    mov   r1, r2
-    mov   r2, r3
-    call rax
-    leave
+cglobal cpu_emms
+    emms
     ret
 
-%else
+;-----------------------------------------------------------------------------
+; void cpu_sfence( void )
+;-----------------------------------------------------------------------------
+cglobal cpu_sfence
+    sfence
+    ret
 
+%if ARCH_X86_64 == 0
 ;-----------------------------------------------------------------------------
 ; int cpu_cpuid_test( void )
 ; return 0 if unsupported
@@ -111,35 +104,4 @@
     pop     ebx
     popfd
     ret
-
-cglobal stack_align
-    push ebp
-    mov  ebp, esp
-    sub  esp, 12
-    and  esp, ~(STACK_ALIGNMENT-1)
-    mov  ecx, [ebp+8]
-    mov  edx, [ebp+12]
-    mov  [esp], edx
-    mov  edx, [ebp+16]
-    mov  [esp+4], edx
-    mov  edx, [ebp+20]
-    mov  [esp+8], edx
-    call ecx
-    leave
-    ret
-
 %endif
-
-;-----------------------------------------------------------------------------
-; void cpu_emms( void )
-;-----------------------------------------------------------------------------
-cglobal cpu_emms
-    emms
-    ret
-
-;-----------------------------------------------------------------------------
-; void cpu_sfence( void )
-;-----------------------------------------------------------------------------
-cglobal cpu_sfence
-    sfence
-    ret
diff -Nru x264-0.152.2854+gite9a5903/common/x86/dct-32.asm x264-0.158.2988+git-20191101.7817004/common/x86/dct-32.asm
--- x264-0.152.2854+gite9a5903/common/x86/dct-32.asm	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/dct-32.asm	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* dct-32.asm: x86_32 transform and zigzag
 ;*****************************************************************************
-;* Copyright (C) 2003-2017 x264 project
+;* Copyright (C) 2003-2019 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Holger Lubitz <holger@lubitz.org>
@@ -161,8 +161,7 @@
 
 %macro SUB8x8_DCT8 0
 cglobal sub8x8_dct8, 3,3,8
-global current_function %+ .skip_prologue
-.skip_prologue:
+cglobal_label .skip_prologue
     LOAD_DIFF8x4 0,1,2,3, none,none, r1, r2
     LOAD_DIFF8x4 4,5,6,7, none,none, r1, r2
 
@@ -211,8 +210,7 @@
 %macro ADD8x8_IDCT8 0
 cglobal add8x8_idct8, 2,2
     add r1, 128
-global current_function %+ .skip_prologue
-.skip_prologue:
+cglobal_label .skip_prologue
     UNSPILL_SHUFFLE r1, 1,2,3,5,6,7, -6,-4,-2,2,4,6
     IDCT8_1D d,0,1,2,3,4,5,6,7,[r1-128],[r1+0]
     mova   [r1+0], m4
@@ -443,8 +441,7 @@
 %macro DCT_SUB8 0
 cglobal sub8x8_dct, 3,3
     add r2, 4*FDEC_STRIDE
-global current_function %+ .skip_prologue
-.skip_prologue:
+cglobal_label .skip_prologue
 %if cpuflag(ssse3)
     mova m7, [hsub_mul]
 %endif
@@ -476,8 +473,7 @@
 ;-----------------------------------------------------------------------------
 cglobal sub8x8_dct8, 3,3
     add r2, 4*FDEC_STRIDE
-global current_function %+ .skip_prologue
-.skip_prologue:
+cglobal_label .skip_prologue
 %if cpuflag(ssse3)
     mova m7, [hsub_mul]
     LOAD_DIFF8x4 0, 1, 2, 3, 4, 7, r1, r2-4*FDEC_STRIDE
@@ -525,8 +521,7 @@
 %macro ADD8x8 0
 cglobal add8x8_idct, 2,2
     add r0, 4*FDEC_STRIDE
-global current_function %+ .skip_prologue
-.skip_prologue:
+cglobal_label .skip_prologue
     UNSPILL_SHUFFLE r1, 0,2,1,3, 0,1,2,3
     SBUTTERFLY qdq, 0, 1, 4
     SBUTTERFLY qdq, 2, 3, 4
@@ -569,8 +564,7 @@
 %macro ADD8x8_IDCT8 0
 cglobal add8x8_idct8, 2,2
     add r0, 4*FDEC_STRIDE
-global current_function %+ .skip_prologue
-.skip_prologue:
+cglobal_label .skip_prologue
     UNSPILL r1, 1,2,3,5,6,7
     IDCT8_1D   w,0,1,2,3,4,5,6,7,[r1+0],[r1+64]
     SPILL r1, 6
diff -Nru x264-0.152.2854+gite9a5903/common/x86/dct-64.asm x264-0.158.2988+git-20191101.7817004/common/x86/dct-64.asm
--- x264-0.152.2854+gite9a5903/common/x86/dct-64.asm	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/dct-64.asm	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* dct-64.asm: x86_64 transform and zigzag
 ;*****************************************************************************
-;* Copyright (C) 2003-2017 x264 project
+;* Copyright (C) 2003-2019 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Holger Lubitz <holger@lubitz.org>
@@ -142,8 +142,7 @@
 %macro SUB8x8_DCT8 0
 cglobal sub8x8_dct8, 3,3,14
     TAIL_CALL .skip_prologue, 0
-global current_function %+ .skip_prologue
-.skip_prologue:
+cglobal_label .skip_prologue
     LOAD_DIFF8x4 0,1,2,3, none,none, r1, r2
     LOAD_DIFF8x4 4,5,6,7, none,none, r1, r2
 
@@ -192,8 +191,7 @@
 cglobal add8x8_idct8, 2,2,16
     add r1, 128
     TAIL_CALL .skip_prologue, 0
-global current_function %+ .skip_prologue
-.skip_prologue:
+cglobal_label .skip_prologue
     mova     m0, [r1-128]
     mova     m1, [r1-96]
     mova     m2, [r1-64]
@@ -255,8 +253,7 @@
     mova m7, [hsub_mul]
 %endif
     TAIL_CALL .skip_prologue, 0
-global current_function %+ .skip_prologue
-.skip_prologue:
+cglobal_label .skip_prologue
     SWAP 7, 9
     LOAD_DIFF8x4 0, 1, 2, 3, 8, 9, r1, r2-4*FDEC_STRIDE
     LOAD_DIFF8x4 4, 5, 6, 7, 8, 9, r1, r2-4*FDEC_STRIDE
@@ -279,8 +276,7 @@
     mova m7, [hsub_mul]
 %endif
     TAIL_CALL .skip_prologue, 0
-global current_function %+ .skip_prologue
-.skip_prologue:
+cglobal_label .skip_prologue
     SWAP 7, 10
     LOAD_DIFF8x4  0, 1, 2, 3, 4, 10, r1, r2-4*FDEC_STRIDE
     LOAD_DIFF8x4  4, 5, 6, 7, 8, 10, r1, r2-4*FDEC_STRIDE
@@ -355,8 +351,7 @@
     add r0, 4*FDEC_STRIDE
     pxor m7, m7
     TAIL_CALL .skip_prologue, 0
-global current_function %+ .skip_prologue
-.skip_prologue:
+cglobal_label .skip_prologue
     SWAP 7, 9
     movdqa  m0, [r1+0x00]
     movdqa  m1, [r1+0x10]
@@ -391,8 +386,7 @@
     add  r0, 4*FDEC_STRIDE
     pxor m7, m7
     TAIL_CALL .skip_prologue, 0
-global current_function %+ .skip_prologue
-.skip_prologue:
+cglobal_label .skip_prologue
     SWAP 7, 9
     mova   m0, [r1+ 0]
     mova   m2, [r1+16]
diff -Nru x264-0.152.2854+gite9a5903/common/x86/dct-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/dct-a.asm
--- x264-0.152.2854+gite9a5903/common/x86/dct-a.asm	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/dct-a.asm	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* dct-a.asm: x86 transform and zigzag
 ;*****************************************************************************
-;* Copyright (C) 2003-2017 x264 project
+;* Copyright (C) 2003-2019 x264 project
 ;*
 ;* Authors: Holger Lubitz <holger@lubitz.org>
 ;*          Loren Merritt <lorenm@u.washington.edu>
@@ -510,8 +510,7 @@
     add    r0, 4*FDEC_STRIDE
     pxor   m7, m7
     TAIL_CALL .skip_prologue, 0
-global current_function %+ .skip_prologue
-.skip_prologue:
+cglobal_label .skip_prologue
     ; TRANSPOSE4x4Q
     mova       xm0, [r1+ 0]
     mova       xm1, [r1+32]
@@ -622,8 +621,8 @@
     SBUTTERFLY wd, 1, 0, 2
     paddw      m2, m1, m0
     psubw      m3, m1, m0
-    paddw      m2 {k1}, m1       ; 0+1+2+3 0<<1+1-2-3<<1
-    psubw      m3 {k1}, m0       ; 0-1-2+3 0-1<<1+2<<1-3
+    vpaddw     m2 {k1}, m1       ; 0+1+2+3 0<<1+1-2-3<<1
+    vpsubw     m3 {k1}, m0       ; 0-1-2+3 0-1<<1+2<<1-3
     shufps     m1, m2, m3, q2323 ; a3 b3 a2 b2 c3 d3 c2 d2
     punpcklqdq m2, m3            ; a0 b0 a1 b1 c0 d0 c1 d1
     SUMSUB_BA   w, 1, 2, 3
@@ -631,8 +630,8 @@
     shufps     m1, m2, q2020     ; a0+a3 b0+b3 c0+c3 d0+d3 a0-a3 b0-b3 c0-c3 d0-d3
     paddw      m2, m1, m3
     psubw      m0, m1, m3
-    paddw      m2 {k2}, m1       ; 0'+1'+2'+3' 0'<<1+1'-2'-3'<<1
-    psubw      m0 {k2}, m3       ; 0'-1'-2'+3' 0'-1'<<1+2'<<1-3'
+    vpaddw     m2 {k2}, m1       ; 0'+1'+2'+3' 0'<<1+1'-2'-3'<<1
+    vpsubw     m0 {k2}, m3       ; 0'-1'-2'+3' 0'-1'<<1+2'<<1-3'
 %endmacro
 
 INIT_XMM avx512
@@ -744,7 +743,7 @@
     paddw      xmm0, xmm2       ; 0+1 0+1 2+3 2+3
     punpckldq  xmm0, xmm1       ; 0+1 0+1 0-1 0-1 2+3 2+3 2-3 2-3
     punpcklqdq xmm1, xmm0, xmm0
-    psubw      xmm0 {k1}, xm3, xmm0
+    vpsubw     xmm0 {k1}, xm3, xmm0
     paddw      xmm0, xmm1       ; 0+1+2+3 0+1-2-3 0-1+2-3 0-1-2+3
     movhps     [r0], xmm0
     RET
diff -Nru x264-0.152.2854+gite9a5903/common/x86/dct.h x264-0.158.2988+git-20191101.7817004/common/x86/dct.h
--- x264-0.152.2854+gite9a5903/common/x86/dct.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/dct.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * dct.h: x86 transform and zigzag
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -25,120 +25,225 @@
  * For more information, contact us at licensing@x264.com.
  *****************************************************************************/
 
-#ifndef X264_I386_DCT_H
-#define X264_I386_DCT_H
+#ifndef X264_X86_DCT_H
+#define X264_X86_DCT_H
 
+#define x264_sub4x4_dct_mmx x264_template(sub4x4_dct_mmx)
 void x264_sub4x4_dct_mmx    ( dctcoef dct    [16], pixel   *pix1, pixel   *pix2 );
+#define x264_sub8x8_dct_mmx x264_template(sub8x8_dct_mmx)
 void x264_sub8x8_dct_mmx    ( dctcoef dct[ 4][16], pixel   *pix1, pixel   *pix2 );
+#define x264_sub16x16_dct_mmx x264_template(sub16x16_dct_mmx)
 void x264_sub16x16_dct_mmx  ( dctcoef dct[16][16], pixel   *pix1, pixel   *pix2 );
+#define x264_sub8x8_dct_sse2 x264_template(sub8x8_dct_sse2)
 void x264_sub8x8_dct_sse2   ( int16_t dct[ 4][16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub16x16_dct_sse2 x264_template(sub16x16_dct_sse2)
 void x264_sub16x16_dct_sse2 ( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub4x4_dct_ssse3 x264_template(sub4x4_dct_ssse3)
 void x264_sub4x4_dct_ssse3  ( int16_t dct    [16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub4x4_dct_avx512 x264_template(sub4x4_dct_avx512)
 void x264_sub4x4_dct_avx512 ( int16_t dct    [16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub8x8_dct_ssse3 x264_template(sub8x8_dct_ssse3)
 void x264_sub8x8_dct_ssse3  ( int16_t dct[ 4][16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub16x16_dct_ssse3 x264_template(sub16x16_dct_ssse3)
 void x264_sub16x16_dct_ssse3( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub8x8_dct_avx x264_template(sub8x8_dct_avx)
 void x264_sub8x8_dct_avx    ( int16_t dct[ 4][16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub16x16_dct_avx x264_template(sub16x16_dct_avx)
 void x264_sub16x16_dct_avx  ( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub8x8_dct_xop x264_template(sub8x8_dct_xop)
 void x264_sub8x8_dct_xop    ( int16_t dct[ 4][16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub16x16_dct_xop x264_template(sub16x16_dct_xop)
 void x264_sub16x16_dct_xop  ( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub8x8_dct_avx2 x264_template(sub8x8_dct_avx2)
 void x264_sub8x8_dct_avx2   ( int16_t dct[ 4][16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub8x8_dct_avx512 x264_template(sub8x8_dct_avx512)
 void x264_sub8x8_dct_avx512 ( int16_t dct[ 4][16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub16x16_dct_avx2 x264_template(sub16x16_dct_avx2)
 void x264_sub16x16_dct_avx2 ( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub16x16_dct_avx512 x264_template(sub16x16_dct_avx512)
 void x264_sub16x16_dct_avx512( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub8x8_dct_dc_mmx2 x264_template(sub8x8_dct_dc_mmx2)
 void x264_sub8x8_dct_dc_mmx2   ( int16_t dct [ 4], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub8x8_dct_dc_sse2 x264_template(sub8x8_dct_dc_sse2)
 void x264_sub8x8_dct_dc_sse2   ( dctcoef dct [ 4], pixel   *pix1, pixel   *pix2 );
+#define x264_sub8x8_dct_dc_avx512 x264_template(sub8x8_dct_dc_avx512)
 void x264_sub8x8_dct_dc_avx512 ( int16_t dct [ 4], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub8x16_dct_dc_sse2 x264_template(sub8x16_dct_dc_sse2)
 void x264_sub8x16_dct_dc_sse2  ( dctcoef dct [ 8], pixel   *pix1, pixel   *pix2 );
+#define x264_sub8x16_dct_dc_ssse3 x264_template(sub8x16_dct_dc_ssse3)
 void x264_sub8x16_dct_dc_ssse3 ( int16_t dct [ 8], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub8x16_dct_dc_avx x264_template(sub8x16_dct_dc_avx)
 void x264_sub8x16_dct_dc_avx   ( dctcoef dct [ 8], pixel   *pix1, pixel   *pix2 );
+#define x264_sub8x16_dct_dc_avx512 x264_template(sub8x16_dct_dc_avx512)
 void x264_sub8x16_dct_dc_avx512( int16_t dct [ 8], uint8_t *pix1, uint8_t *pix2 );
 
+#define x264_add4x4_idct_mmx x264_template(add4x4_idct_mmx)
 void x264_add4x4_idct_mmx       ( uint8_t *p_dst, int16_t dct    [16] );
+#define x264_add4x4_idct_sse2 x264_template(add4x4_idct_sse2)
 void x264_add4x4_idct_sse2     ( uint16_t *p_dst, int32_t dct    [16] );
+#define x264_add4x4_idct_sse4 x264_template(add4x4_idct_sse4)
 void x264_add4x4_idct_sse4      ( uint8_t *p_dst, int16_t dct    [16] );
+#define x264_add4x4_idct_avx x264_template(add4x4_idct_avx)
 void x264_add4x4_idct_avx       ( pixel   *p_dst, dctcoef dct    [16] );
+#define x264_add8x8_idct_mmx x264_template(add8x8_idct_mmx)
 void x264_add8x8_idct_mmx       ( uint8_t *p_dst, int16_t dct[ 4][16] );
+#define x264_add8x8_idct_dc_mmx2 x264_template(add8x8_idct_dc_mmx2)
 void x264_add8x8_idct_dc_mmx2   ( uint8_t *p_dst, int16_t dct    [ 4] );
+#define x264_add16x16_idct_mmx x264_template(add16x16_idct_mmx)
 void x264_add16x16_idct_mmx     ( uint8_t *p_dst, int16_t dct[16][16] );
+#define x264_add16x16_idct_dc_mmx2 x264_template(add16x16_idct_dc_mmx2)
 void x264_add16x16_idct_dc_mmx2 ( uint8_t *p_dst, int16_t dct    [16] );
+#define x264_add8x8_idct_sse2 x264_template(add8x8_idct_sse2)
 void x264_add8x8_idct_sse2      ( pixel   *p_dst, dctcoef dct[ 4][16] );
+#define x264_add8x8_idct_avx x264_template(add8x8_idct_avx)
 void x264_add8x8_idct_avx       ( pixel   *p_dst, dctcoef dct[ 4][16] );
+#define x264_add8x8_idct_avx2 x264_template(add8x8_idct_avx2)
 void x264_add8x8_idct_avx2      ( pixel   *p_dst, dctcoef dct[ 4][16] );
+#define x264_add8x8_idct_avx512 x264_template(add8x8_idct_avx512)
 void x264_add8x8_idct_avx512    ( uint8_t *p_dst, int16_t dct[ 4][16] );
+#define x264_add16x16_idct_sse2 x264_template(add16x16_idct_sse2)
 void x264_add16x16_idct_sse2    ( pixel   *p_dst, dctcoef dct[16][16] );
+#define x264_add16x16_idct_avx x264_template(add16x16_idct_avx)
 void x264_add16x16_idct_avx     ( pixel   *p_dst, dctcoef dct[16][16] );
+#define x264_add16x16_idct_avx2 x264_template(add16x16_idct_avx2)
 void x264_add16x16_idct_avx2    ( pixel   *p_dst, dctcoef dct[16][16] );
+#define x264_add8x8_idct_dc_sse2 x264_template(add8x8_idct_dc_sse2)
 void x264_add8x8_idct_dc_sse2   ( pixel   *p_dst, dctcoef dct    [ 4] );
+#define x264_add16x16_idct_dc_sse2 x264_template(add16x16_idct_dc_sse2)
 void x264_add16x16_idct_dc_sse2 ( pixel   *p_dst, dctcoef dct    [16] );
+#define x264_add8x8_idct_dc_ssse3 x264_template(add8x8_idct_dc_ssse3)
 void x264_add8x8_idct_dc_ssse3  ( uint8_t *p_dst, int16_t dct    [ 4] );
+#define x264_add16x16_idct_dc_ssse3 x264_template(add16x16_idct_dc_ssse3)
 void x264_add16x16_idct_dc_ssse3( uint8_t *p_dst, int16_t dct    [16] );
+#define x264_add8x8_idct_dc_avx x264_template(add8x8_idct_dc_avx)
 void x264_add8x8_idct_dc_avx    ( pixel   *p_dst, dctcoef dct    [ 4] );
+#define x264_add16x16_idct_dc_avx x264_template(add16x16_idct_dc_avx)
 void x264_add16x16_idct_dc_avx  ( pixel   *p_dst, dctcoef dct    [16] );
+#define x264_add16x16_idct_dc_avx2 x264_template(add16x16_idct_dc_avx2)
 void x264_add16x16_idct_dc_avx2 ( uint8_t *p_dst, int16_t dct    [16] );
 
+#define x264_dct4x4dc_mmx2 x264_template(dct4x4dc_mmx2)
 void x264_dct4x4dc_mmx2      ( int16_t d[16] );
+#define x264_dct4x4dc_sse2 x264_template(dct4x4dc_sse2)
 void x264_dct4x4dc_sse2      ( int32_t d[16] );
+#define x264_dct4x4dc_avx x264_template(dct4x4dc_avx)
 void x264_dct4x4dc_avx       ( int32_t d[16] );
+#define x264_idct4x4dc_mmx x264_template(idct4x4dc_mmx)
 void x264_idct4x4dc_mmx      ( int16_t d[16] );
+#define x264_idct4x4dc_sse2 x264_template(idct4x4dc_sse2)
 void x264_idct4x4dc_sse2     ( int32_t d[16] );
+#define x264_idct4x4dc_avx x264_template(idct4x4dc_avx)
 void x264_idct4x4dc_avx      ( int32_t d[16] );
 
+#define x264_dct2x4dc_mmx2 x264_template(dct2x4dc_mmx2)
 void x264_dct2x4dc_mmx2( dctcoef dct[8], dctcoef dct4x4[8][16] );
+#define x264_dct2x4dc_sse2 x264_template(dct2x4dc_sse2)
 void x264_dct2x4dc_sse2( dctcoef dct[8], dctcoef dct4x4[8][16] );
+#define x264_dct2x4dc_avx x264_template(dct2x4dc_avx)
 void x264_dct2x4dc_avx ( dctcoef dct[8], dctcoef dct4x4[8][16] );
 
+#define x264_sub8x8_dct8_mmx x264_template(sub8x8_dct8_mmx)
 void x264_sub8x8_dct8_mmx    ( int16_t dct   [64], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub16x16_dct8_mmx x264_template(sub16x16_dct8_mmx)
 void x264_sub16x16_dct8_mmx  ( int16_t dct[4][64], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub8x8_dct8_sse2 x264_template(sub8x8_dct8_sse2)
 void x264_sub8x8_dct8_sse2   ( dctcoef dct   [64], pixel *pix1, pixel *pix2 );
+#define x264_sub16x16_dct8_sse2 x264_template(sub16x16_dct8_sse2)
 void x264_sub16x16_dct8_sse2 ( dctcoef dct[4][64], pixel *pix1, pixel *pix2 );
+#define x264_sub8x8_dct8_ssse3 x264_template(sub8x8_dct8_ssse3)
 void x264_sub8x8_dct8_ssse3  ( int16_t dct   [64], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub16x16_dct8_ssse3 x264_template(sub16x16_dct8_ssse3)
 void x264_sub16x16_dct8_ssse3( int16_t dct[4][64], uint8_t *pix1, uint8_t *pix2 );
+#define x264_sub8x8_dct8_sse4 x264_template(sub8x8_dct8_sse4)
 void x264_sub8x8_dct8_sse4   ( int32_t dct   [64], uint16_t *pix1, uint16_t *pix2 );
+#define x264_sub16x16_dct8_sse4 x264_template(sub16x16_dct8_sse4)
 void x264_sub16x16_dct8_sse4 ( int32_t dct[4][64], uint16_t *pix1, uint16_t *pix2 );
+#define x264_sub8x8_dct8_avx x264_template(sub8x8_dct8_avx)
 void x264_sub8x8_dct8_avx    ( dctcoef dct   [64], pixel *pix1, pixel *pix2 );
+#define x264_sub16x16_dct8_avx x264_template(sub16x16_dct8_avx)
 void x264_sub16x16_dct8_avx  ( dctcoef dct[4][64], pixel *pix1, pixel *pix2 );
+#define x264_sub16x16_dct8_avx2 x264_template(sub16x16_dct8_avx2)
 void x264_sub16x16_dct8_avx2 ( dctcoef dct[4][64], pixel *pix1, pixel *pix2 );
 
 
+#define x264_add8x8_idct8_mmx x264_template(add8x8_idct8_mmx)
 void x264_add8x8_idct8_mmx   ( uint8_t *dst, int16_t dct   [64] );
+#define x264_add16x16_idct8_mmx x264_template(add16x16_idct8_mmx)
 void x264_add16x16_idct8_mmx ( uint8_t *dst, int16_t dct[4][64] );
+#define x264_add8x8_idct8_sse2 x264_template(add8x8_idct8_sse2)
 void x264_add8x8_idct8_sse2  ( pixel *dst, dctcoef dct   [64] );
+#define x264_add16x16_idct8_sse2 x264_template(add16x16_idct8_sse2)
 void x264_add16x16_idct8_sse2( pixel *dst, dctcoef dct[4][64] );
+#define x264_add8x8_idct8_avx x264_template(add8x8_idct8_avx)
 void x264_add8x8_idct8_avx   ( pixel *dst, dctcoef dct   [64] );
+#define x264_add16x16_idct8_avx x264_template(add16x16_idct8_avx)
 void x264_add16x16_idct8_avx ( pixel *dst, dctcoef dct[4][64] );
 
+#define x264_zigzag_scan_8x8_frame_mmx2 x264_template(zigzag_scan_8x8_frame_mmx2)
 void x264_zigzag_scan_8x8_frame_mmx2  ( int16_t level[64], int16_t dct[64] );
+#define x264_zigzag_scan_8x8_frame_sse2 x264_template(zigzag_scan_8x8_frame_sse2)
 void x264_zigzag_scan_8x8_frame_sse2  ( dctcoef level[64], dctcoef dct[64] );
+#define x264_zigzag_scan_8x8_frame_ssse3 x264_template(zigzag_scan_8x8_frame_ssse3)
 void x264_zigzag_scan_8x8_frame_ssse3 ( int16_t level[64], int16_t dct[64] );
+#define x264_zigzag_scan_8x8_frame_avx x264_template(zigzag_scan_8x8_frame_avx)
 void x264_zigzag_scan_8x8_frame_avx   ( dctcoef level[64], dctcoef dct[64] );
+#define x264_zigzag_scan_8x8_frame_xop x264_template(zigzag_scan_8x8_frame_xop)
 void x264_zigzag_scan_8x8_frame_xop   ( int16_t level[64], int16_t dct[64] );
+#define x264_zigzag_scan_8x8_frame_avx512 x264_template(zigzag_scan_8x8_frame_avx512)
 void x264_zigzag_scan_8x8_frame_avx512( dctcoef level[64], dctcoef dct[64] );
+#define x264_zigzag_scan_4x4_frame_mmx x264_template(zigzag_scan_4x4_frame_mmx)
 void x264_zigzag_scan_4x4_frame_mmx   ( int16_t level[16], int16_t dct[16] );
+#define x264_zigzag_scan_4x4_frame_sse2 x264_template(zigzag_scan_4x4_frame_sse2)
 void x264_zigzag_scan_4x4_frame_sse2  ( int32_t level[16], int32_t dct[16] );
+#define x264_zigzag_scan_4x4_frame_ssse3 x264_template(zigzag_scan_4x4_frame_ssse3)
 void x264_zigzag_scan_4x4_frame_ssse3 ( int16_t level[16], int16_t dct[16] );
+#define x264_zigzag_scan_4x4_frame_avx x264_template(zigzag_scan_4x4_frame_avx)
 void x264_zigzag_scan_4x4_frame_avx   ( dctcoef level[16], dctcoef dct[16] );
+#define x264_zigzag_scan_4x4_frame_xop x264_template(zigzag_scan_4x4_frame_xop)
 void x264_zigzag_scan_4x4_frame_xop   ( dctcoef level[16], dctcoef dct[16] );
+#define x264_zigzag_scan_4x4_frame_avx512 x264_template(zigzag_scan_4x4_frame_avx512)
 void x264_zigzag_scan_4x4_frame_avx512( dctcoef level[16], dctcoef dct[16] );
+#define x264_zigzag_scan_4x4_field_sse x264_template(zigzag_scan_4x4_field_sse)
 void x264_zigzag_scan_4x4_field_sse   ( int16_t level[16], int16_t dct[16] );
+#define x264_zigzag_scan_4x4_field_sse2 x264_template(zigzag_scan_4x4_field_sse2)
 void x264_zigzag_scan_4x4_field_sse2  ( int32_t level[16], int32_t dct[16] );
+#define x264_zigzag_scan_4x4_field_avx512 x264_template(zigzag_scan_4x4_field_avx512)
 void x264_zigzag_scan_4x4_field_avx512( dctcoef level[16], dctcoef dct[16] );
+#define x264_zigzag_scan_8x8_field_mmx2 x264_template(zigzag_scan_8x8_field_mmx2)
 void x264_zigzag_scan_8x8_field_mmx2  ( int16_t level[64], int16_t dct[64] );
+#define x264_zigzag_scan_8x8_field_sse4 x264_template(zigzag_scan_8x8_field_sse4)
 void x264_zigzag_scan_8x8_field_sse4  ( int32_t level[64], int32_t dct[64] );
+#define x264_zigzag_scan_8x8_field_avx x264_template(zigzag_scan_8x8_field_avx)
 void x264_zigzag_scan_8x8_field_avx   ( int32_t level[64], int32_t dct[64] );
+#define x264_zigzag_scan_8x8_field_xop x264_template(zigzag_scan_8x8_field_xop)
 void x264_zigzag_scan_8x8_field_xop   ( int16_t level[64], int16_t dct[64] );
+#define x264_zigzag_scan_8x8_field_avx512 x264_template(zigzag_scan_8x8_field_avx512)
 void x264_zigzag_scan_8x8_field_avx512( dctcoef level[64], dctcoef dct[64] );
+#define x264_zigzag_sub_4x4_frame_avx x264_template(zigzag_sub_4x4_frame_avx)
 int  x264_zigzag_sub_4x4_frame_avx    ( int16_t level[16], const uint8_t *src, uint8_t *dst );
+#define x264_zigzag_sub_4x4_frame_ssse3 x264_template(zigzag_sub_4x4_frame_ssse3)
 int  x264_zigzag_sub_4x4_frame_ssse3  ( int16_t level[16], const uint8_t *src, uint8_t *dst );
+#define x264_zigzag_sub_4x4ac_frame_avx x264_template(zigzag_sub_4x4ac_frame_avx)
 int  x264_zigzag_sub_4x4ac_frame_avx  ( int16_t level[16], const uint8_t *src, uint8_t *dst, int16_t *dc );
+#define x264_zigzag_sub_4x4ac_frame_ssse3 x264_template(zigzag_sub_4x4ac_frame_ssse3)
 int  x264_zigzag_sub_4x4ac_frame_ssse3( int16_t level[16], const uint8_t *src, uint8_t *dst, int16_t *dc );
+#define x264_zigzag_sub_4x4_field_avx x264_template(zigzag_sub_4x4_field_avx)
 int  x264_zigzag_sub_4x4_field_avx    ( int16_t level[16], const uint8_t *src, uint8_t *dst );
+#define x264_zigzag_sub_4x4_field_ssse3 x264_template(zigzag_sub_4x4_field_ssse3)
 int  x264_zigzag_sub_4x4_field_ssse3  ( int16_t level[16], const uint8_t *src, uint8_t *dst );
+#define x264_zigzag_sub_4x4ac_field_avx x264_template(zigzag_sub_4x4ac_field_avx)
 int  x264_zigzag_sub_4x4ac_field_avx  ( int16_t level[16], const uint8_t *src, uint8_t *dst, int16_t *dc );
+#define x264_zigzag_sub_4x4ac_field_ssse3 x264_template(zigzag_sub_4x4ac_field_ssse3)
 int  x264_zigzag_sub_4x4ac_field_ssse3( int16_t level[16], const uint8_t *src, uint8_t *dst, int16_t *dc );
+#define x264_zigzag_interleave_8x8_cavlc_mmx x264_template(zigzag_interleave_8x8_cavlc_mmx)
 void x264_zigzag_interleave_8x8_cavlc_mmx   ( int16_t *dst, int16_t *src, uint8_t *nnz );
+#define x264_zigzag_interleave_8x8_cavlc_sse2 x264_template(zigzag_interleave_8x8_cavlc_sse2)
 void x264_zigzag_interleave_8x8_cavlc_sse2  ( dctcoef *dst, dctcoef *src, uint8_t *nnz );
+#define x264_zigzag_interleave_8x8_cavlc_avx x264_template(zigzag_interleave_8x8_cavlc_avx)
 void x264_zigzag_interleave_8x8_cavlc_avx   ( dctcoef *dst, dctcoef *src, uint8_t *nnz );
+#define x264_zigzag_interleave_8x8_cavlc_avx2 x264_template(zigzag_interleave_8x8_cavlc_avx2)
 void x264_zigzag_interleave_8x8_cavlc_avx2  ( int16_t *dst, int16_t *src, uint8_t *nnz );
+#define x264_zigzag_interleave_8x8_cavlc_avx512 x264_template(zigzag_interleave_8x8_cavlc_avx512)
 void x264_zigzag_interleave_8x8_cavlc_avx512( dctcoef *dst, dctcoef *src, uint8_t *nnz );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/x86/deblock-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/deblock-a.asm
--- x264-0.152.2854+gite9a5903/common/x86/deblock-a.asm	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/deblock-a.asm	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* deblock-a.asm: x86 deblocking
 ;*****************************************************************************
-;* Copyright (C) 2005-2017 x264 project
+;* Copyright (C) 2005-2019 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Fiona Glaser <fiona@x264.com>
diff -Nru x264-0.152.2854+gite9a5903/common/x86/deblock.h x264-0.158.2988+git-20191101.7817004/common/x86/deblock.h
--- x264-0.152.2854+gite9a5903/common/x86/deblock.h	1970-01-01 00:00:00.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/deblock.h	2019-11-09 05:16:29.000000000 +0000
@@ -0,0 +1,146 @@
+/*****************************************************************************
+ * deblock.h: x86 deblocking
+ *****************************************************************************
+ * Copyright (C) 2017-2019 x264 project
+ *
+ * Authors: Anton Mitrofanov <BugMaster@narod.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#ifndef X264_X86_DEBLOCK_H
+#define X264_X86_DEBLOCK_H
+
+#define x264_deblock_v_luma_sse2 x264_template(deblock_v_luma_sse2)
+void x264_deblock_v_luma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_v_luma_avx x264_template(deblock_v_luma_avx)
+void x264_deblock_v_luma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_luma_sse2 x264_template(deblock_h_luma_sse2)
+void x264_deblock_h_luma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_luma_avx x264_template(deblock_h_luma_avx)
+void x264_deblock_h_luma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_v_chroma_sse2 x264_template(deblock_v_chroma_sse2)
+void x264_deblock_v_chroma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_v_chroma_avx x264_template(deblock_v_chroma_avx)
+void x264_deblock_v_chroma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_chroma_sse2 x264_template(deblock_h_chroma_sse2)
+void x264_deblock_h_chroma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_chroma_avx x264_template(deblock_h_chroma_avx)
+void x264_deblock_h_chroma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_chroma_mbaff_sse2 x264_template(deblock_h_chroma_mbaff_sse2)
+void x264_deblock_h_chroma_mbaff_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_chroma_mbaff_avx x264_template(deblock_h_chroma_mbaff_avx)
+void x264_deblock_h_chroma_mbaff_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_chroma_422_mmx2 x264_template(deblock_h_chroma_422_mmx2)
+void x264_deblock_h_chroma_422_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_chroma_422_sse2 x264_template(deblock_h_chroma_422_sse2)
+void x264_deblock_h_chroma_422_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_chroma_422_avx x264_template(deblock_h_chroma_422_avx)
+void x264_deblock_h_chroma_422_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_v_luma_intra_sse2 x264_template(deblock_v_luma_intra_sse2)
+void x264_deblock_v_luma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_v_luma_intra_avx x264_template(deblock_v_luma_intra_avx)
+void x264_deblock_v_luma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_h_luma_intra_sse2 x264_template(deblock_h_luma_intra_sse2)
+void x264_deblock_h_luma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_h_luma_intra_avx x264_template(deblock_h_luma_intra_avx)
+void x264_deblock_h_luma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_v_chroma_intra_sse2 x264_template(deblock_v_chroma_intra_sse2)
+void x264_deblock_v_chroma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_v_chroma_intra_avx x264_template(deblock_v_chroma_intra_avx)
+void x264_deblock_v_chroma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_h_chroma_intra_sse2 x264_template(deblock_h_chroma_intra_sse2)
+void x264_deblock_h_chroma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_h_chroma_intra_avx x264_template(deblock_h_chroma_intra_avx)
+void x264_deblock_h_chroma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_h_chroma_422_intra_mmx2 x264_template(deblock_h_chroma_422_intra_mmx2)
+void x264_deblock_h_chroma_422_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_h_chroma_422_intra_sse2 x264_template(deblock_h_chroma_422_intra_sse2)
+void x264_deblock_h_chroma_422_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_h_chroma_422_intra_avx x264_template(deblock_h_chroma_422_intra_avx)
+void x264_deblock_h_chroma_422_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_strength_sse2 x264_template(deblock_strength_sse2)
+void x264_deblock_strength_sse2  ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
+                                   int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
+                                   int mvy_limit, int bframe );
+#define x264_deblock_strength_ssse3 x264_template(deblock_strength_ssse3)
+void x264_deblock_strength_ssse3 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
+                                   int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
+                                   int mvy_limit, int bframe );
+#define x264_deblock_strength_avx x264_template(deblock_strength_avx)
+void x264_deblock_strength_avx   ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
+                                   int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
+                                   int mvy_limit, int bframe );
+#define x264_deblock_strength_avx2 x264_template(deblock_strength_avx2)
+void x264_deblock_strength_avx2  ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
+                                   int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
+                                   int mvy_limit, int bframe );
+#define x264_deblock_strength_avx512 x264_template(deblock_strength_avx512)
+void x264_deblock_strength_avx512( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
+                                   int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
+                                   int mvy_limit, int bframe );
+
+#define x264_deblock_h_chroma_intra_mbaff_mmx2 x264_template(deblock_h_chroma_intra_mbaff_mmx2)
+void x264_deblock_h_chroma_intra_mbaff_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_h_chroma_intra_mbaff_sse2 x264_template(deblock_h_chroma_intra_mbaff_sse2)
+void x264_deblock_h_chroma_intra_mbaff_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_h_chroma_intra_mbaff_avx x264_template(deblock_h_chroma_intra_mbaff_avx)
+void x264_deblock_h_chroma_intra_mbaff_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
+#if ARCH_X86
+#define x264_deblock_h_luma_mmx2 x264_template(deblock_h_luma_mmx2)
+void x264_deblock_h_luma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_v8_luma_mmx2 x264_template(deblock_v8_luma_mmx2)
+void x264_deblock_v8_luma_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_v_chroma_mmx2 x264_template(deblock_v_chroma_mmx2)
+void x264_deblock_v_chroma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_chroma_mmx2 x264_template(deblock_h_chroma_mmx2)
+void x264_deblock_h_chroma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_chroma_mbaff_mmx2 x264_template(deblock_h_chroma_mbaff_mmx2)
+void x264_deblock_h_chroma_mbaff_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#define x264_deblock_h_luma_intra_mmx2 x264_template(deblock_h_luma_intra_mmx2)
+void x264_deblock_h_luma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_v8_luma_intra_mmx2 x264_template(deblock_v8_luma_intra_mmx2)
+void x264_deblock_v8_luma_intra_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_v_chroma_intra_mmx2 x264_template(deblock_v_chroma_intra_mmx2)
+void x264_deblock_v_chroma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_h_chroma_intra_mmx2 x264_template(deblock_h_chroma_intra_mmx2)
+void x264_deblock_h_chroma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
+#define x264_deblock_v_chroma_intra_mbaff_mmx2 x264_template(deblock_v_chroma_intra_mbaff_mmx2)
+void x264_deblock_h_chroma_intra_mbaff_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
+
+#define x264_deblock_v_luma_mmx2 x264_template(deblock_v_luma_mmx2)
+#define x264_deblock_v_luma_intra_mmx2 x264_template(deblock_v_luma_intra_mmx2)
+#if HIGH_BIT_DEPTH
+void x264_deblock_v_luma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_v_luma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
+#else
+// FIXME this wrapper has a significant cpu cost
+static ALWAYS_INLINE void x264_deblock_v_luma_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
+{
+    x264_deblock_v8_luma_mmx2( pix,   stride, alpha, beta, tc0   );
+    x264_deblock_v8_luma_mmx2( pix+8, stride, alpha, beta, tc0+2 );
+}
+static ALWAYS_INLINE void x264_deblock_v_luma_intra_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta )
+{
+    x264_deblock_v8_luma_intra_mmx2( pix,   stride, alpha, beta );
+    x264_deblock_v8_luma_intra_mmx2( pix+8, stride, alpha, beta );
+}
+#endif // HIGH_BIT_DEPTH
+#endif
+
+#endif
diff -Nru x264-0.152.2854+gite9a5903/common/x86/mc-a2.asm x264-0.158.2988+git-20191101.7817004/common/x86/mc-a2.asm
--- x264-0.152.2854+gite9a5903/common/x86/mc-a2.asm	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/mc-a2.asm	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* mc-a2.asm: x86 motion compensation
 ;*****************************************************************************
-;* Copyright (C) 2005-2017 x264 project
+;* Copyright (C) 2005-2019 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Fiona Glaser <fiona@x264.com>
@@ -64,10 +64,11 @@
 mbtree_prop_list_avx512_shuf: dw 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7
 mbtree_fix8_unpack_shuf: db -1,-1, 1, 0,-1,-1, 3, 2,-1,-1, 5, 4,-1,-1, 7, 6
                          db -1,-1, 9, 8,-1,-1,11,10,-1,-1,13,12,-1,-1,15,14
-mbtree_fix8_pack_shuf:   db  1, 0, 3, 2, 5, 4, 7, 6, 9, 8,11,10,13,12,15,14
+; bits 0-3: pshufb, bits 4-7: AVX-512 vpermq
+mbtree_fix8_pack_shuf:   db 0x01,0x20,0x43,0x62,0x15,0x34,0x57,0x76,0x09,0x08,0x0b,0x0a,0x0d,0x0c,0x0f,0x0e
 
-pf_256:    times 4 dd 256.0
-pf_inv256: times 4 dd 0.00390625
+pf_256:         times 4 dd 256.0
+pf_inv16777216: times 4 dd 0x1p-24
 
 pd_16: times 4 dd 16
 
@@ -1016,6 +1017,143 @@
 INIT_YMM avx2
 PLANE_COPY_CORE 1
 
+%macro PLANE_COPY_AVX512 1 ; swap
+%if %1
+cglobal plane_copy_swap, 6,7
+    vbroadcasti32x4 m4, [copy_swap_shuf]
+%else
+cglobal plane_copy, 6,7
+%endif
+    movsxdifnidn r4, r4d
+%if %1 && HIGH_BIT_DEPTH
+    %define %%mload vmovdqu32
+    lea         r2, [r2+4*r4-64]
+    lea         r0, [r0+4*r4-64]
+    neg         r4
+    mov        r6d, r4d
+    shl         r4, 2
+    or         r6d, 0xffff0010
+    shrx       r6d, r6d, r6d ; (1 << (w & 15)) - 1
+    kmovw       k1, r6d
+%elif %1 || HIGH_BIT_DEPTH
+    %define %%mload vmovdqu16
+    lea         r2, [r2+2*r4-64]
+    lea         r0, [r0+2*r4-64]
+    mov        r6d, -1
+    neg         r4
+    shrx       r6d, r6d, r4d
+    add         r4, r4
+    kmovd       k1, r6d
+%else
+    %define %%mload vmovdqu8
+    lea         r2, [r2+1*r4-64]
+    lea         r0, [r0+1*r4-64]
+    mov         r6, -1
+    neg         r4
+    shrx        r6, r6, r4
+%if ARCH_X86_64
+    kmovq       k1, r6
+%else
+    kmovd       k1, r6d
+    test       r4d, 32
+    jnz .l32
+    kxnord      k2, k2, k2
+    kunpckdq    k1, k1, k2
+.l32:
+%endif
+%endif
+    FIX_STRIDES r3, r1
+    add         r4, 4*64
+    jge .small
+    mov         r6, r4
+
+.loop: ; >256 bytes/row
+    PREFETCHNT_ITER r2+r4+64, 4*64
+    movu        m0, [r2+r4-3*64]
+    movu        m1, [r2+r4-2*64]
+    movu        m2, [r2+r4-1*64]
+    movu        m3, [r2+r4-0*64]
+%if %1
+    pshufb      m0, m4
+    pshufb      m1, m4
+    pshufb      m2, m4
+    pshufb      m3, m4
+%endif
+    movnta [r0+r4-3*64], m0
+    movnta [r0+r4-2*64], m1
+    movnta [r0+r4-1*64], m2
+    movnta [r0+r4-0*64], m3
+    add         r4, 4*64
+    jl .loop
+    PREFETCHNT_ITER r2+r4+64, 4*64
+    sub         r4, 3*64
+    jge .tail
+.loop2:
+    movu        m0, [r2+r4]
+%if %1
+    pshufb      m0, m4
+%endif
+    movnta [r0+r4], m0
+    add         r4, 64
+    jl .loop2
+.tail:
+    %%mload     m0 {k1}{z}, [r2+r4]
+%if %1
+    pshufb      m0, m4
+%endif
+    movnta [r0+r4], m0
+    add         r2, r3
+    add         r0, r1
+    mov         r4, r6
+    dec        r5d
+    jg .loop
+    sfence
+    RET
+
+.small: ; 65-256 bytes/row. skip non-temporal stores
+    sub         r4, 3*64
+    jge .tiny
+    mov         r6, r4
+.small_loop:
+    PREFETCHNT_ITER r2+r4+64, 64
+    movu        m0, [r2+r4]
+%if %1
+    pshufb      m0, m4
+%endif
+    mova   [r0+r4], m0
+    add         r4, 64
+    jl .small_loop
+    PREFETCHNT_ITER r2+r4+64, 64
+    %%mload     m0 {k1}{z}, [r2+r4]
+%if %1
+    pshufb      m0, m4
+%endif
+    mova   [r0+r4], m0
+    add         r2, r3
+    add         r0, r1
+    mov         r4, r6
+    dec        r5d
+    jg .small_loop
+    RET
+
+.tiny: ; 1-64 bytes/row. skip non-temporal stores
+    PREFETCHNT_ITER r2+r4+64, 64
+    %%mload     m0 {k1}{z}, [r2+r4]
+%if %1
+    pshufb      m0, m4
+%endif
+    mova   [r0+r4], m0
+    add         r2, r3
+    add         r0, r1
+    dec        r5d
+    jg .tiny
+    RET
+%endmacro
+
+INIT_ZMM avx512
+PLANE_COPY_AVX512 0
+PLANE_COPY_AVX512 1
+
 %macro INTERLEAVE 4-5 ; dst, srcu, srcv, is_aligned, nt_hint
 %if HIGH_BIT_DEPTH
 %assign x 0
@@ -1258,22 +1396,55 @@
     RET
 %endmacro ; LOAD_DEINTERLEAVE_CHROMA
 
+%macro LOAD_DEINTERLEAVE_CHROMA_FDEC_AVX512 0
+cglobal load_deinterleave_chroma_fdec, 4,5
+    vbroadcasti32x8 m0, [deinterleave_shuf32a]
+    mov            r4d, 0x3333ff00
+    kmovd           k1, r4d
+    lea             r4, [r2*3]
+    kshiftrd        k2, k1, 16
+.loop:
+    vbroadcasti128 ym1, [r1]
+    vbroadcasti32x4 m1 {k1}, [r1+r2]
+    vbroadcasti128 ym2, [r1+r2*2]
+    vbroadcasti32x4 m2 {k1}, [r1+r4]
+    lea             r1, [r1+r2*4]
+    pshufb          m1, m0
+    pshufb          m2, m0
+    vmovdqa32 [r0] {k2}, m1
+    vmovdqa32 [r0+mmsize] {k2}, m2
+    add            r0, 2*mmsize
+    sub           r3d, 4
+    jg .loop
+    RET
+%endmacro
+
 %macro LOAD_DEINTERLEAVE_CHROMA_FENC_AVX2 0
 cglobal load_deinterleave_chroma_fenc, 4,5
     vbroadcasti128 m0, [deinterleave_shuf]
     lea            r4, [r2*3]
 .loop:
-    mova          xm1, [r1]
-    vinserti128    m1, m1, [r1+r2], 1
-    mova          xm2, [r1+r2*2]
-    vinserti128    m2, m2, [r1+r4], 1
+    mova          xm1, [r1]         ; 0
+    vinserti128   ym1, [r1+r2], 1   ; 1
+%if mmsize == 64
+    mova          xm2, [r1+r2*4]    ; 4
+    vinserti32x4   m1, [r1+r2*2], 2 ; 2
+    vinserti32x4   m2, [r1+r4*2], 2 ; 6
+    vinserti32x4   m1, [r1+r4], 3   ; 3
+    lea            r1, [r1+r2*4]
+    vinserti32x4   m2, [r1+r2], 1   ; 5
+    vinserti32x4   m2, [r1+r4], 3   ; 7
+%else
+    mova          xm2, [r1+r2*2]    ; 2
+    vinserti128    m2, [r1+r4], 1   ; 3
+%endif
+    lea            r1, [r1+r2*4]
     pshufb         m1, m0
     pshufb         m2, m0
-    mova [r0+0*FENC_STRIDE], m1
-    mova [r0+2*FENC_STRIDE], m2
-    lea            r1, [r1+r2*4]
-    add            r0, 4*FENC_STRIDE
-    sub           r3d, 4
+    mova         [r0], m1
+    mova  [r0+mmsize], m2
+    add            r0, 2*mmsize
+    sub           r3d, mmsize/8
     jg .loop
     RET
 %endmacro ; LOAD_DEINTERLEAVE_CHROMA_FENC_AVX2
@@ -1498,6 +1669,9 @@
 INIT_YMM avx2
 LOAD_DEINTERLEAVE_CHROMA_FENC_AVX2
 PLANE_DEINTERLEAVE_RGB
+INIT_ZMM avx512
+LOAD_DEINTERLEAVE_CHROMA_FDEC_AVX512
+LOAD_DEINTERLEAVE_CHROMA_FENC_AVX2
 %endif
 
 ; These functions are not general-use; not only do they require aligned input, but memcpy
@@ -2481,8 +2655,8 @@
     paddd           m6, m7             ; i_mb_x += 8
     pand            m3, m8             ; {x, y}
     vprold          m1, m3, 20         ; {y, x} << 4
-    psubw           m3 {k4}, m9, m3    ; {32-x, 32-y}, {32-x, y}
-    psubw           m1 {k5}, m10, m1   ; ({32-y, x}, {y, x}) << 4
+    vpsubw          m3 {k4}, m9, m3    ; {32-x, 32-y}, {32-x, y}
+    vpsubw          m1 {k5}, m10, m1   ; ({32-y, x}, {y, x}) << 4
     pmullw          m3, m1
     paddsw          m3, m3             ; prevent signed overflow in idx0 (32*32<<5 == 0x8000)
     pmulhrsw        m2, m3, m4         ; idx01weight idx23weightp
@@ -2493,11 +2667,11 @@
     vpcmpuw         k2, ym1, ym20, 1    ; {mbx, mbx+1} < width
     kunpckwd        k2, k2, k2
     psrad           m1, m0, 16
-    paddd           m1 {k6}, m11
+    vpaddd          m1 {k6}, m11
     vpcmpud         k1 {k1}, m1, m13, 1 ; mby < height | mby+1 < height
 
     pmaddwd         m0, m15
-    paddd           m0 {k6}, m14        ; idx0 | idx2
+    vpaddd          m0 {k6}, m14        ; idx0 | idx2
     vmovdqu16       m2 {k2}{z}, m2      ; idx01weight | idx23weight
     vptestmd        k1 {k1}, m2, m2     ; mask out offsets with no changes
 
@@ -2589,9 +2763,9 @@
 ;-----------------------------------------------------------------------------
 cglobal mbtree_fix8_unpack, 3,4
 %if mmsize == 32
-    vbroadcastf128 m2, [pf_inv256]
+    vbroadcastf128 m2, [pf_inv16777216]
 %else
-    movaps       m2, [pf_inv256]
+    movaps       m2, [pf_inv16777216]
     mova         m4, [mbtree_fix8_unpack_shuf+16]
 %endif
     mova         m3, [mbtree_fix8_unpack_shuf]
@@ -2612,8 +2786,6 @@
     pshufb       m0, m1, m3
     pshufb       m1, m4
 %endif
-    psrad        m0, 16 ; sign-extend
-    psrad        m1, 16
     cvtdq2ps     m0, m0
     cvtdq2ps     m1, m1
     mulps        m0, m2
@@ -2627,8 +2799,7 @@
     jz .end
 .scalar:
     movzx       r3d, word [r1+2*r2+mmsize]
-    rol         r3w, 8
-    movsx       r3d, r3w
+    bswap       r3d
     ; Use 3-arg cvtsi2ss as a workaround for the fact that the instruction has a stupid dependency on
     ; dst which causes terrible performance when used in a loop otherwise. Blame Intel for poor design.
     cvtsi2ss    xm0, xm2, r3d
@@ -2644,3 +2815,69 @@
 MBTREE_FIX8
 INIT_YMM avx2
 MBTREE_FIX8
+
+%macro MBTREE_FIX8_AVX512_END 0
+    add      r2, mmsize/2
+    jle .loop
+    cmp     r2d, mmsize/2
+    jl .tail
+    RET
+.tail:
+    ; Do the final loop iteration with partial masking to handle the remaining elements.
+    shrx    r3d, r3d, r2d ; (1 << count) - 1
+    kmovd    k1, r3d
+    kshiftrd k2, k1, 16
+    jmp .loop
+%endmacro
+
+INIT_ZMM avx512
+cglobal mbtree_fix8_pack, 3,4
+    vbroadcastf32x4 m2, [pf_256]
+    vbroadcasti32x4 m3, [mbtree_fix8_pack_shuf]
+    psrld       xm4, xm3, 4
+    pmovzxbq     m4, xm4
+    sub         r2d, mmsize/2
+    mov         r3d, -1
+    movsxdifnidn r2, r2d
+    lea          r1, [r1+4*r2]
+    lea          r0, [r0+2*r2]
+    neg          r2
+    jg .tail
+    kmovd        k1, r3d
+    kmovw        k2, k1
+.loop:
+    vmulps       m0 {k1}{z}, m2, [r1+4*r2]
+    vmulps       m1 {k2}{z}, m2, [r1+4*r2+mmsize]
+    cvttps2dq    m0, m0
+    cvttps2dq    m1, m1
+    packssdw     m0, m1
+    pshufb       m0, m3
+    vpermq       m0, m4, m0
+    vmovdqu16 [r0+2*r2] {k1}, m0
+    MBTREE_FIX8_AVX512_END
+
+cglobal mbtree_fix8_unpack, 3,4
+    vbroadcasti32x8 m3, [mbtree_fix8_unpack_shuf]
+    vbroadcastf32x4 m2, [pf_inv16777216]
+    sub         r2d, mmsize/2
+    mov         r3d, -1
+    movsxdifnidn r2, r2d
+    lea          r1, [r1+2*r2]
+    lea          r0, [r0+4*r2]
+    neg          r2
+    jg .tail
+    kmovw        k1, r3d
+    kmovw        k2, k1
+.loop:
+    mova         m1, [r1+2*r2]
+    vshufi32x4   m0, m1, m1, q1100
+    vshufi32x4   m1, m1, m1, q3322
+    pshufb       m0, m3
+    pshufb       m1, m3
+    cvtdq2ps     m0, m0
+    cvtdq2ps     m1, m1
+    mulps        m0, m2
+    mulps        m1, m2
+    vmovaps [r0+4*r2] {k1}, m0
+    vmovaps [r0+4*r2+mmsize] {k2}, m1
+    MBTREE_FIX8_AVX512_END
diff -Nru x264-0.152.2854+gite9a5903/common/x86/mc-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/mc-a.asm
--- x264-0.152.2854+gite9a5903/common/x86/mc-a.asm	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/mc-a.asm	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* mc-a.asm: x86 motion compensation
 ;*****************************************************************************
-;* Copyright (C) 2003-2017 x264 project
+;* Copyright (C) 2003-2019 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Fiona Glaser <fiona@x264.com>
@@ -1331,7 +1331,7 @@
     sub    r4, r2
     shl    r6, 4         ;jump = (offset + align*2)*48
 %define avg_w16_addr avg_w16_align1_1_ssse3-(avg_w16_align2_2_ssse3-avg_w16_align1_1_ssse3)
-%ifdef PIC
+%if ARCH_X86_64
     lea    r7, [avg_w16_addr]
     add    r6, r7
 %else
@@ -1515,6 +1515,25 @@
 PREFETCH_FENC 420
 PREFETCH_FENC 422
 
+%if ARCH_X86_64
+    DECLARE_REG_TMP 4
+%else
+    DECLARE_REG_TMP 2
+%endif
+
+cglobal prefetch_fenc_400, 2,3
+    movifnidn  t0d, r4m
+    FIX_STRIDES r1
+    and        t0d, 3
+    imul       t0d, r1d
+    lea         r0, [r0+t0*4+64*SIZEOF_PIXEL]
+    prefetcht0 [r0]
+    prefetcht0 [r0+r1]
+    lea         r0, [r0+r1*2]
+    prefetcht0 [r0]
+    prefetcht0 [r0+r1]
+    RET
+
 ;-----------------------------------------------------------------------------
 ; void prefetch_ref( pixel *pix, intptr_t stride, int parity )
 ;-----------------------------------------------------------------------------
@@ -2001,7 +2020,7 @@
 %if cpuflag(cache64)
     mov       t0d, r3d
     and       t0d, 7
-%ifdef PIC
+%if ARCH_X86_64
     lea        t1, [ch_shuf_adj]
     movddup   xm5, [t1 + t0*4]
 %else
diff -Nru x264-0.152.2854+gite9a5903/common/x86/mc-c.c x264-0.158.2988+git-20191101.7817004/common/x86/mc-c.c
--- x264-0.152.2854+gite9a5903/common/x86/mc-c.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/mc-c.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc-c.c: x86 motion compensation
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -28,6 +28,40 @@
 #include "common/common.h"
 #include "mc.h"
 
+#define x264_pixel_avg_16x16_avx2 x264_template(pixel_avg_16x16_avx2)
+#define x264_pixel_avg_16x16_avx512 x264_template(pixel_avg_16x16_avx512)
+#define x264_pixel_avg_16x16_mmx2 x264_template(pixel_avg_16x16_mmx2)
+#define x264_pixel_avg_16x16_sse2 x264_template(pixel_avg_16x16_sse2)
+#define x264_pixel_avg_16x16_ssse3 x264_template(pixel_avg_16x16_ssse3)
+#define x264_pixel_avg_16x8_avx2 x264_template(pixel_avg_16x8_avx2)
+#define x264_pixel_avg_16x8_avx512 x264_template(pixel_avg_16x8_avx512)
+#define x264_pixel_avg_16x8_mmx2 x264_template(pixel_avg_16x8_mmx2)
+#define x264_pixel_avg_16x8_sse2 x264_template(pixel_avg_16x8_sse2)
+#define x264_pixel_avg_16x8_ssse3 x264_template(pixel_avg_16x8_ssse3)
+#define x264_pixel_avg_4x16_mmx2 x264_template(pixel_avg_4x16_mmx2)
+#define x264_pixel_avg_4x16_sse2 x264_template(pixel_avg_4x16_sse2)
+#define x264_pixel_avg_4x16_ssse3 x264_template(pixel_avg_4x16_ssse3)
+#define x264_pixel_avg_4x2_mmx2 x264_template(pixel_avg_4x2_mmx2)
+#define x264_pixel_avg_4x2_sse2 x264_template(pixel_avg_4x2_sse2)
+#define x264_pixel_avg_4x2_ssse3 x264_template(pixel_avg_4x2_ssse3)
+#define x264_pixel_avg_4x4_mmx2 x264_template(pixel_avg_4x4_mmx2)
+#define x264_pixel_avg_4x4_sse2 x264_template(pixel_avg_4x4_sse2)
+#define x264_pixel_avg_4x4_ssse3 x264_template(pixel_avg_4x4_ssse3)
+#define x264_pixel_avg_4x8_mmx2 x264_template(pixel_avg_4x8_mmx2)
+#define x264_pixel_avg_4x8_sse2 x264_template(pixel_avg_4x8_sse2)
+#define x264_pixel_avg_4x8_ssse3 x264_template(pixel_avg_4x8_ssse3)
+#define x264_pixel_avg_8x16_avx512 x264_template(pixel_avg_8x16_avx512)
+#define x264_pixel_avg_8x16_mmx2 x264_template(pixel_avg_8x16_mmx2)
+#define x264_pixel_avg_8x16_sse2 x264_template(pixel_avg_8x16_sse2)
+#define x264_pixel_avg_8x16_ssse3 x264_template(pixel_avg_8x16_ssse3)
+#define x264_pixel_avg_8x4_avx512 x264_template(pixel_avg_8x4_avx512)
+#define x264_pixel_avg_8x4_mmx2 x264_template(pixel_avg_8x4_mmx2)
+#define x264_pixel_avg_8x4_sse2 x264_template(pixel_avg_8x4_sse2)
+#define x264_pixel_avg_8x4_ssse3 x264_template(pixel_avg_8x4_ssse3)
+#define x264_pixel_avg_8x8_avx512 x264_template(pixel_avg_8x8_avx512)
+#define x264_pixel_avg_8x8_mmx2 x264_template(pixel_avg_8x8_mmx2)
+#define x264_pixel_avg_8x8_sse2 x264_template(pixel_avg_8x8_sse2)
+#define x264_pixel_avg_8x8_ssse3 x264_template(pixel_avg_8x8_ssse3)
 #define DECL_SUF( func, args )\
     void func##_mmx2 args;\
     void func##_sse2 args;\
@@ -44,10 +78,43 @@
 DECL_SUF( x264_pixel_avg_4x8,   ( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int ))
 DECL_SUF( x264_pixel_avg_4x4,   ( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int ))
 DECL_SUF( x264_pixel_avg_4x2,   ( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int ))
+#undef DECL_SUF
 
+#define x264_mc_weight_w12_mmx2 x264_template(mc_weight_w12_mmx2)
+#define x264_mc_weight_w12_sse2 x264_template(mc_weight_w12_sse2)
+#define x264_mc_weight_w16_avx2 x264_template(mc_weight_w16_avx2)
+#define x264_mc_weight_w16_mmx2 x264_template(mc_weight_w16_mmx2)
+#define x264_mc_weight_w16_sse2 x264_template(mc_weight_w16_sse2)
+#define x264_mc_weight_w16_ssse3 x264_template(mc_weight_w16_ssse3)
+#define x264_mc_weight_w20_avx2 x264_template(mc_weight_w20_avx2)
+#define x264_mc_weight_w20_mmx2 x264_template(mc_weight_w20_mmx2)
+#define x264_mc_weight_w20_sse2 x264_template(mc_weight_w20_sse2)
+#define x264_mc_weight_w20_ssse3 x264_template(mc_weight_w20_ssse3)
+#define x264_mc_weight_w4_mmx2 x264_template(mc_weight_w4_mmx2)
+#define x264_mc_weight_w4_ssse3 x264_template(mc_weight_w4_ssse3)
+#define x264_mc_weight_w8_avx2 x264_template(mc_weight_w8_avx2)
+#define x264_mc_weight_w8_mmx2 x264_template(mc_weight_w8_mmx2)
+#define x264_mc_weight_w8_sse2 x264_template(mc_weight_w8_sse2)
+#define x264_mc_weight_w8_ssse3 x264_template(mc_weight_w8_ssse3)
 #define MC_WEIGHT(w,type) \
     void x264_mc_weight_w##w##_##type( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
 
+#define x264_mc_offsetadd_w12_mmx2 x264_template(mc_offsetadd_w12_mmx2)
+#define x264_mc_offsetadd_w16_mmx2 x264_template(mc_offsetadd_w16_mmx2)
+#define x264_mc_offsetadd_w16_sse2 x264_template(mc_offsetadd_w16_sse2)
+#define x264_mc_offsetadd_w20_mmx2 x264_template(mc_offsetadd_w20_mmx2)
+#define x264_mc_offsetadd_w20_sse2 x264_template(mc_offsetadd_w20_sse2)
+#define x264_mc_offsetadd_w4_mmx2 x264_template(mc_offsetadd_w4_mmx2)
+#define x264_mc_offsetadd_w8_mmx2 x264_template(mc_offsetadd_w8_mmx2)
+#define x264_mc_offsetadd_w8_sse2 x264_template(mc_offsetadd_w8_sse2)
+#define x264_mc_offsetsub_w12_mmx2 x264_template(mc_offsetsub_w12_mmx2)
+#define x264_mc_offsetsub_w16_mmx2 x264_template(mc_offsetsub_w16_mmx2)
+#define x264_mc_offsetsub_w16_sse2 x264_template(mc_offsetsub_w16_sse2)
+#define x264_mc_offsetsub_w20_mmx2 x264_template(mc_offsetsub_w20_mmx2)
+#define x264_mc_offsetsub_w20_sse2 x264_template(mc_offsetsub_w20_sse2)
+#define x264_mc_offsetsub_w4_mmx2 x264_template(mc_offsetsub_w4_mmx2)
+#define x264_mc_offsetsub_w8_mmx2 x264_template(mc_offsetsub_w8_mmx2)
+#define x264_mc_offsetsub_w8_sse2 x264_template(mc_offsetsub_w8_sse2)
 #define MC_WEIGHT_OFFSET(w,type) \
     void x264_mc_offsetadd_w##w##_##type( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int ); \
     void x264_mc_offsetsub_w##w##_##type( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int ); \
@@ -73,113 +140,200 @@
 MC_WEIGHT( 8, avx2 )
 MC_WEIGHT( 16, avx2 )
 MC_WEIGHT( 20, avx2 )
-#undef MC_OFFSET
+#undef MC_WEIGHT_OFFSET
 #undef MC_WEIGHT
 
+#define x264_mc_copy_w4_mmx x264_template(mc_copy_w4_mmx)
 void x264_mc_copy_w4_mmx ( pixel *, intptr_t, pixel *, intptr_t, int );
+#define x264_mc_copy_w8_mmx x264_template(mc_copy_w8_mmx)
 void x264_mc_copy_w8_mmx ( pixel *, intptr_t, pixel *, intptr_t, int );
+#define x264_mc_copy_w8_sse x264_template(mc_copy_w8_sse)
 void x264_mc_copy_w8_sse ( pixel *, intptr_t, pixel *, intptr_t, int );
+#define x264_mc_copy_w16_mmx x264_template(mc_copy_w16_mmx)
 void x264_mc_copy_w16_mmx( pixel *, intptr_t, pixel *, intptr_t, int );
+#define x264_mc_copy_w16_sse x264_template(mc_copy_w16_sse)
 void x264_mc_copy_w16_sse( pixel *, intptr_t, pixel *, intptr_t, int );
+#define x264_mc_copy_w16_aligned_sse x264_template(mc_copy_w16_aligned_sse)
 void x264_mc_copy_w16_aligned_sse( pixel *, intptr_t, pixel *, intptr_t, int );
+#define x264_mc_copy_w16_avx x264_template(mc_copy_w16_avx)
 void x264_mc_copy_w16_avx( uint16_t *, intptr_t, uint16_t *, intptr_t, int );
+#define x264_mc_copy_w16_aligned_avx x264_template(mc_copy_w16_aligned_avx)
 void x264_mc_copy_w16_aligned_avx( uint16_t *, intptr_t, uint16_t *, intptr_t, int );
+#define x264_prefetch_fenc_400_mmx2 x264_template(prefetch_fenc_400_mmx2)
+void x264_prefetch_fenc_400_mmx2( pixel *, intptr_t, pixel *, intptr_t, int );
+#define x264_prefetch_fenc_420_mmx2 x264_template(prefetch_fenc_420_mmx2)
 void x264_prefetch_fenc_420_mmx2( pixel *, intptr_t, pixel *, intptr_t, int );
+#define x264_prefetch_fenc_422_mmx2 x264_template(prefetch_fenc_422_mmx2)
 void x264_prefetch_fenc_422_mmx2( pixel *, intptr_t, pixel *, intptr_t, int );
+#define x264_prefetch_ref_mmx2 x264_template(prefetch_ref_mmx2)
 void x264_prefetch_ref_mmx2( pixel *, intptr_t, int );
+#define x264_plane_copy_core_sse x264_template(plane_copy_core_sse)
 void x264_plane_copy_core_sse( pixel *, intptr_t, pixel *, intptr_t, int w, int h );
+#define x264_plane_copy_core_avx x264_template(plane_copy_core_avx)
 void x264_plane_copy_core_avx( pixel *, intptr_t, pixel *, intptr_t, int w, int h );
+#define x264_plane_copy_avx512 x264_template(plane_copy_avx512)
+void x264_plane_copy_avx512( pixel *, intptr_t, pixel *, intptr_t, int w, int h );
+#define x264_plane_copy_swap_core_ssse3 x264_template(plane_copy_swap_core_ssse3)
 void x264_plane_copy_swap_core_ssse3( pixel *, intptr_t, pixel *, intptr_t, int w, int h );
+#define x264_plane_copy_swap_core_avx2 x264_template(plane_copy_swap_core_avx2)
 void x264_plane_copy_swap_core_avx2 ( pixel *, intptr_t, pixel *, intptr_t, int w, int h );
+#define x264_plane_copy_swap_avx512 x264_template(plane_copy_swap_avx512)
+void x264_plane_copy_swap_avx512( pixel *, intptr_t, pixel *, intptr_t, int w, int h );
+#define x264_plane_copy_interleave_core_mmx2 x264_template(plane_copy_interleave_core_mmx2)
 void x264_plane_copy_interleave_core_mmx2( pixel *dst,  intptr_t i_dst,
                                            pixel *srcu, intptr_t i_srcu,
                                            pixel *srcv, intptr_t i_srcv, int w, int h );
+#define x264_plane_copy_interleave_core_sse2 x264_template(plane_copy_interleave_core_sse2)
 void x264_plane_copy_interleave_core_sse2( pixel *dst,  intptr_t i_dst,
                                            pixel *srcu, intptr_t i_srcu,
                                            pixel *srcv, intptr_t i_srcv, int w, int h );
+#define x264_plane_copy_interleave_core_avx x264_template(plane_copy_interleave_core_avx)
 void x264_plane_copy_interleave_core_avx( pixel *dst,  intptr_t i_dst,
                                           pixel *srcu, intptr_t i_srcu,
                                           pixel *srcv, intptr_t i_srcv, int w, int h );
+#define x264_plane_copy_deinterleave_sse2 x264_template(plane_copy_deinterleave_sse2)
 void x264_plane_copy_deinterleave_sse2( pixel *dsta, intptr_t i_dsta,
                                         pixel *dstb, intptr_t i_dstb,
                                         pixel *src,  intptr_t i_src, int w, int h );
+#define x264_plane_copy_deinterleave_ssse3 x264_template(plane_copy_deinterleave_ssse3)
 void x264_plane_copy_deinterleave_ssse3( uint8_t *dsta, intptr_t i_dsta,
                                          uint8_t *dstb, intptr_t i_dstb,
                                          uint8_t *src,  intptr_t i_src, int w, int h );
+#define x264_plane_copy_deinterleave_avx x264_template(plane_copy_deinterleave_avx)
 void x264_plane_copy_deinterleave_avx( uint16_t *dsta, intptr_t i_dsta,
                                        uint16_t *dstb, intptr_t i_dstb,
                                        uint16_t *src,  intptr_t i_src, int w, int h );
+#define x264_plane_copy_deinterleave_avx2 x264_template(plane_copy_deinterleave_avx2)
 void x264_plane_copy_deinterleave_avx2( pixel *dsta, intptr_t i_dsta,
                                         pixel *dstb, intptr_t i_dstb,
                                         pixel *src,  intptr_t i_src, int w, int h );
+#define x264_plane_copy_deinterleave_rgb_sse2 x264_template(plane_copy_deinterleave_rgb_sse2)
 void x264_plane_copy_deinterleave_rgb_sse2 ( pixel *dsta, intptr_t i_dsta,
                                              pixel *dstb, intptr_t i_dstb,
                                              pixel *dstc, intptr_t i_dstc,
                                              pixel *src,  intptr_t i_src, int pw, int w, int h );
+#define x264_plane_copy_deinterleave_rgb_ssse3 x264_template(plane_copy_deinterleave_rgb_ssse3)
 void x264_plane_copy_deinterleave_rgb_ssse3( pixel *dsta, intptr_t i_dsta,
                                              pixel *dstb, intptr_t i_dstb,
                                              pixel *dstc, intptr_t i_dstc,
                                              pixel *src,  intptr_t i_src, int pw, int w, int h );
+#define x264_plane_copy_deinterleave_rgb_avx2 x264_template(plane_copy_deinterleave_rgb_avx2)
 void x264_plane_copy_deinterleave_rgb_avx2 ( pixel *dsta, intptr_t i_dsta,
                                              pixel *dstb, intptr_t i_dstb,
                                              pixel *dstc, intptr_t i_dstc,
                                              pixel *src,  intptr_t i_src, int pw, int w, int h );
+#define x264_plane_copy_deinterleave_v210_ssse3 x264_template(plane_copy_deinterleave_v210_ssse3)
 void x264_plane_copy_deinterleave_v210_ssse3 ( uint16_t *dstu, intptr_t i_dstu,
                                                uint16_t *dstv, intptr_t i_dstv,
                                                uint32_t *src,  intptr_t i_src, int w, int h );
+#define x264_plane_copy_deinterleave_v210_avx x264_template(plane_copy_deinterleave_v210_avx)
 void x264_plane_copy_deinterleave_v210_avx   ( uint16_t *dstu, intptr_t i_dstu,
                                                uint16_t *dstv, intptr_t i_dstv,
                                                uint32_t *src,  intptr_t i_src, int w, int h );
+#define x264_plane_copy_deinterleave_v210_avx2 x264_template(plane_copy_deinterleave_v210_avx2)
 void x264_plane_copy_deinterleave_v210_avx2  ( uint16_t *dstu, intptr_t i_dstu,
                                                uint16_t *dstv, intptr_t i_dstv,
                                                uint32_t *src,  intptr_t i_src, int w, int h );
+#define x264_plane_copy_deinterleave_v210_avx512 x264_template(plane_copy_deinterleave_v210_avx512)
 void x264_plane_copy_deinterleave_v210_avx512( uint16_t *dstu, intptr_t i_dstu,
                                                uint16_t *dstv, intptr_t i_dstv,
                                                uint32_t *src,  intptr_t i_src, int w, int h );
+#define x264_store_interleave_chroma_mmx2 x264_template(store_interleave_chroma_mmx2)
 void x264_store_interleave_chroma_mmx2( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
+#define x264_store_interleave_chroma_sse2 x264_template(store_interleave_chroma_sse2)
 void x264_store_interleave_chroma_sse2( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
+#define x264_store_interleave_chroma_avx x264_template(store_interleave_chroma_avx)
 void x264_store_interleave_chroma_avx ( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
+#define x264_load_deinterleave_chroma_fenc_sse2 x264_template(load_deinterleave_chroma_fenc_sse2)
 void x264_load_deinterleave_chroma_fenc_sse2( pixel *dst, pixel *src, intptr_t i_src, int height );
+#define x264_load_deinterleave_chroma_fenc_ssse3 x264_template(load_deinterleave_chroma_fenc_ssse3)
 void x264_load_deinterleave_chroma_fenc_ssse3( uint8_t *dst, uint8_t *src, intptr_t i_src, int height );
+#define x264_load_deinterleave_chroma_fenc_avx x264_template(load_deinterleave_chroma_fenc_avx)
 void x264_load_deinterleave_chroma_fenc_avx( uint16_t *dst, uint16_t *src, intptr_t i_src, int height );
+#define x264_load_deinterleave_chroma_fenc_avx2 x264_template(load_deinterleave_chroma_fenc_avx2)
 void x264_load_deinterleave_chroma_fenc_avx2( pixel *dst, pixel *src, intptr_t i_src, int height );
+#define x264_load_deinterleave_chroma_fenc_avx512 x264_template(load_deinterleave_chroma_fenc_avx512)
+void x264_load_deinterleave_chroma_fenc_avx512( uint8_t *dst, uint8_t *src, intptr_t i_src, int height );
+#define x264_load_deinterleave_chroma_fdec_sse2 x264_template(load_deinterleave_chroma_fdec_sse2)
 void x264_load_deinterleave_chroma_fdec_sse2( pixel *dst, pixel *src, intptr_t i_src, int height );
+#define x264_load_deinterleave_chroma_fdec_ssse3 x264_template(load_deinterleave_chroma_fdec_ssse3)
 void x264_load_deinterleave_chroma_fdec_ssse3( uint8_t *dst, uint8_t *src, intptr_t i_src, int height );
+#define x264_load_deinterleave_chroma_fdec_avx x264_template(load_deinterleave_chroma_fdec_avx)
 void x264_load_deinterleave_chroma_fdec_avx( uint16_t *dst, uint16_t *src, intptr_t i_src, int height );
+#define x264_load_deinterleave_chroma_fdec_avx2 x264_template(load_deinterleave_chroma_fdec_avx2)
 void x264_load_deinterleave_chroma_fdec_avx2( uint16_t *dst, uint16_t *src, intptr_t i_src, int height );
+#define x264_load_deinterleave_chroma_fdec_avx512 x264_template(load_deinterleave_chroma_fdec_avx512)
+void x264_load_deinterleave_chroma_fdec_avx512( uint8_t *dst, uint8_t *src, intptr_t i_src, int height );
+#define x264_memcpy_aligned_sse x264_template(memcpy_aligned_sse)
 void *x264_memcpy_aligned_sse   ( void *dst, const void *src, size_t n );
+#define x264_memcpy_aligned_avx x264_template(memcpy_aligned_avx)
 void *x264_memcpy_aligned_avx   ( void *dst, const void *src, size_t n );
+#define x264_memcpy_aligned_avx512 x264_template(memcpy_aligned_avx512)
 void *x264_memcpy_aligned_avx512( void *dst, const void *src, size_t n );
+#define x264_memzero_aligned_sse x264_template(memzero_aligned_sse)
 void x264_memzero_aligned_sse   ( void *dst, size_t n );
+#define x264_memzero_aligned_avx x264_template(memzero_aligned_avx)
 void x264_memzero_aligned_avx   ( void *dst, size_t n );
+#define x264_memzero_aligned_avx512 x264_template(memzero_aligned_avx512)
 void x264_memzero_aligned_avx512( void *dst, size_t n );
+#define x264_integral_init4h_sse4 x264_template(integral_init4h_sse4)
 void x264_integral_init4h_sse4( uint16_t *sum, uint8_t *pix, intptr_t stride );
+#define x264_integral_init4h_avx2 x264_template(integral_init4h_avx2)
 void x264_integral_init4h_avx2( uint16_t *sum, uint8_t *pix, intptr_t stride );
+#define x264_integral_init8h_sse4 x264_template(integral_init8h_sse4)
 void x264_integral_init8h_sse4( uint16_t *sum, uint8_t *pix, intptr_t stride );
+#define x264_integral_init8h_avx x264_template(integral_init8h_avx)
 void x264_integral_init8h_avx ( uint16_t *sum, uint8_t *pix, intptr_t stride );
+#define x264_integral_init8h_avx2 x264_template(integral_init8h_avx2)
 void x264_integral_init8h_avx2( uint16_t *sum, uint8_t *pix, intptr_t stride );
+#define x264_integral_init4v_mmx x264_template(integral_init4v_mmx)
 void x264_integral_init4v_mmx  ( uint16_t *sum8, uint16_t *sum4, intptr_t stride );
+#define x264_integral_init4v_sse2 x264_template(integral_init4v_sse2)
 void x264_integral_init4v_sse2 ( uint16_t *sum8, uint16_t *sum4, intptr_t stride );
+#define x264_integral_init4v_ssse3 x264_template(integral_init4v_ssse3)
 void x264_integral_init4v_ssse3( uint16_t *sum8, uint16_t *sum4, intptr_t stride );
+#define x264_integral_init4v_avx2 x264_template(integral_init4v_avx2)
 void x264_integral_init4v_avx2( uint16_t *sum8, uint16_t *sum4, intptr_t stride );
+#define x264_integral_init8v_mmx x264_template(integral_init8v_mmx)
 void x264_integral_init8v_mmx ( uint16_t *sum8, intptr_t stride );
+#define x264_integral_init8v_sse2 x264_template(integral_init8v_sse2)
 void x264_integral_init8v_sse2( uint16_t *sum8, intptr_t stride );
+#define x264_integral_init8v_avx2 x264_template(integral_init8v_avx2)
 void x264_integral_init8v_avx2( uint16_t *sum8, intptr_t stride );
+#define x264_mbtree_propagate_cost_sse2 x264_template(mbtree_propagate_cost_sse2)
 void x264_mbtree_propagate_cost_sse2  ( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                         uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
+#define x264_mbtree_propagate_cost_avx x264_template(mbtree_propagate_cost_avx)
 void x264_mbtree_propagate_cost_avx   ( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                         uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
+#define x264_mbtree_propagate_cost_fma4 x264_template(mbtree_propagate_cost_fma4)
 void x264_mbtree_propagate_cost_fma4  ( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                         uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
+#define x264_mbtree_propagate_cost_avx2 x264_template(mbtree_propagate_cost_avx2)
 void x264_mbtree_propagate_cost_avx2  ( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                         uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
+#define x264_mbtree_propagate_cost_avx512 x264_template(mbtree_propagate_cost_avx512)
 void x264_mbtree_propagate_cost_avx512( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                         uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
+#define x264_mbtree_fix8_pack_ssse3 x264_template(mbtree_fix8_pack_ssse3)
 void x264_mbtree_fix8_pack_ssse3( uint16_t *dst, float *src, int count );
+#define x264_mbtree_fix8_pack_avx2 x264_template(mbtree_fix8_pack_avx2)
 void x264_mbtree_fix8_pack_avx2 ( uint16_t *dst, float *src, int count );
+#define x264_mbtree_fix8_pack_avx512 x264_template(mbtree_fix8_pack_avx512)
+void x264_mbtree_fix8_pack_avx512( uint16_t *dst, float *src, int count );
+#define x264_mbtree_fix8_unpack_ssse3 x264_template(mbtree_fix8_unpack_ssse3)
 void x264_mbtree_fix8_unpack_ssse3( float *dst, uint16_t *src, int count );
+#define x264_mbtree_fix8_unpack_avx2 x264_template(mbtree_fix8_unpack_avx2)
 void x264_mbtree_fix8_unpack_avx2 ( float *dst, uint16_t *src, int count );
+#define x264_mbtree_fix8_unpack_avx512 x264_template(mbtree_fix8_unpack_avx512)
+void x264_mbtree_fix8_unpack_avx512( float *dst, uint16_t *src, int count );
 
+#define x264_mc_chroma_avx x264_template(mc_chroma_avx)
+#define x264_mc_chroma_avx2 x264_template(mc_chroma_avx2)
+#define x264_mc_chroma_cache64_ssse3 x264_template(mc_chroma_cache64_ssse3)
+#define x264_mc_chroma_mmx2 x264_template(mc_chroma_mmx2)
+#define x264_mc_chroma_sse2 x264_template(mc_chroma_sse2)
+#define x264_mc_chroma_ssse3 x264_template(mc_chroma_ssse3)
 #define MC_CHROMA(cpu)\
 void x264_mc_chroma_##cpu( pixel *dstu, pixel *dstv, intptr_t i_dst, pixel *src, intptr_t i_src,\
                            int dx, int dy, int i_width, int i_height );
@@ -189,7 +343,15 @@
 MC_CHROMA(cache64_ssse3)
 MC_CHROMA(avx)
 MC_CHROMA(avx2)
+#undef MC_CHROMA
 
+#define x264_frame_init_lowres_core_avx x264_template(frame_init_lowres_core_avx)
+#define x264_frame_init_lowres_core_avx2 x264_template(frame_init_lowres_core_avx2)
+#define x264_frame_init_lowres_core_mmx2 x264_template(frame_init_lowres_core_mmx2)
+#define x264_frame_init_lowres_core_cache32_mmx2 x264_template(frame_init_lowres_core_cache32_mmx2)
+#define x264_frame_init_lowres_core_sse2 x264_template(frame_init_lowres_core_sse2)
+#define x264_frame_init_lowres_core_ssse3 x264_template(frame_init_lowres_core_ssse3)
+#define x264_frame_init_lowres_core_xop x264_template(frame_init_lowres_core_xop)
 #define LOWRES(cpu)\
 void x264_frame_init_lowres_core_##cpu( pixel *src0, pixel *dst0, pixel *dsth, pixel *dstv, pixel *dstc,\
                                         intptr_t src_stride, intptr_t dst_stride, int width, int height );
@@ -200,7 +362,34 @@
 LOWRES(avx)
 LOWRES(xop)
 LOWRES(avx2)
+#undef LOWRES
 
+#define x264_pixel_avg2_w10_mmx2 x264_template(pixel_avg2_w10_mmx2)
+#define x264_pixel_avg2_w10_sse2 x264_template(pixel_avg2_w10_sse2)
+#define x264_pixel_avg2_w12_cache32_mmx2 x264_template(pixel_avg2_w12_cache32_mmx2)
+#define x264_pixel_avg2_w12_cache64_mmx2 x264_template(pixel_avg2_w12_cache64_mmx2)
+#define x264_pixel_avg2_w12_mmx2 x264_template(pixel_avg2_w12_mmx2)
+#define x264_pixel_avg2_w16_avx2 x264_template(pixel_avg2_w16_avx2)
+#define x264_pixel_avg2_w16_cache32_mmx2 x264_template(pixel_avg2_w16_cache32_mmx2)
+#define x264_pixel_avg2_w16_cache64_mmx2 x264_template(pixel_avg2_w16_cache64_mmx2)
+#define x264_pixel_avg2_w16_cache64_sse2 x264_template(pixel_avg2_w16_cache64_sse2)
+#define x264_pixel_avg2_w16_cache64_ssse3 x264_template(pixel_avg2_w16_cache64_ssse3)
+#define x264_pixel_avg2_w16_mmx2 x264_template(pixel_avg2_w16_mmx2)
+#define x264_pixel_avg2_w16_sse2 x264_template(pixel_avg2_w16_sse2)
+#define x264_pixel_avg2_w18_avx2 x264_template(pixel_avg2_w18_avx2)
+#define x264_pixel_avg2_w18_mmx2 x264_template(pixel_avg2_w18_mmx2)
+#define x264_pixel_avg2_w18_sse2 x264_template(pixel_avg2_w18_sse2)
+#define x264_pixel_avg2_w20_avx2 x264_template(pixel_avg2_w20_avx2)
+#define x264_pixel_avg2_w20_cache32_mmx2 x264_template(pixel_avg2_w20_cache32_mmx2)
+#define x264_pixel_avg2_w20_cache64_mmx2 x264_template(pixel_avg2_w20_cache64_mmx2)
+#define x264_pixel_avg2_w20_cache64_sse2 x264_template(pixel_avg2_w20_cache64_sse2)
+#define x264_pixel_avg2_w20_mmx2 x264_template(pixel_avg2_w20_mmx2)
+#define x264_pixel_avg2_w20_sse2 x264_template(pixel_avg2_w20_sse2)
+#define x264_pixel_avg2_w4_mmx2 x264_template(pixel_avg2_w4_mmx2)
+#define x264_pixel_avg2_w8_cache32_mmx2 x264_template(pixel_avg2_w8_cache32_mmx2)
+#define x264_pixel_avg2_w8_cache64_mmx2 x264_template(pixel_avg2_w8_cache64_mmx2)
+#define x264_pixel_avg2_w8_mmx2 x264_template(pixel_avg2_w8_mmx2)
+#define x264_pixel_avg2_w8_sse2 x264_template(pixel_avg2_w8_sse2)
 #define PIXEL_AVG_W(width,cpu)\
 void x264_pixel_avg2_w##width##_##cpu( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t );
 /* This declares some functions that don't exist, but that isn't a problem. */
@@ -214,9 +403,11 @@
 PIXEL_AVG_WALL(sse2)
 PIXEL_AVG_WALL(cache64_ssse3)
 PIXEL_AVG_WALL(avx2)
+#undef PIXEL_AVG_W
+#undef PIXEL_AVG_WALL
 
 #define PIXEL_AVG_WTAB(instr, name1, name2, name3, name4, name5)\
-static void (* const x264_pixel_avg_wtab_##instr[6])( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t ) =\
+static void (* const pixel_avg_wtab_##instr[6])( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t ) =\
 {\
     NULL,\
     x264_pixel_avg2_w4_##name1,\
@@ -228,6 +419,10 @@
 
 #if HIGH_BIT_DEPTH
 /* we can replace w12/w20 with w10/w18 as only 9/17 pixels in fact are important */
+#undef x264_pixel_avg2_w12_mmx2
+#undef x264_pixel_avg2_w20_mmx2
+#undef x264_pixel_avg2_w20_sse2
+#undef x264_pixel_avg2_w20_avx2
 #define x264_pixel_avg2_w12_mmx2       x264_pixel_avg2_w10_mmx2
 #define x264_pixel_avg2_w20_mmx2       x264_pixel_avg2_w18_mmx2
 #define x264_pixel_avg2_w12_sse2         x264_pixel_avg2_w10_sse2
@@ -259,7 +454,7 @@
 #endif // HIGH_BIT_DEPTH
 
 #define MC_COPY_WTAB(instr, name1, name2, name3)\
-static void (* const x264_mc_copy_wtab_##instr[5])( pixel *, intptr_t, pixel *, intptr_t, int ) =\
+static void (* const mc_copy_wtab_##instr[5])( pixel *, intptr_t, pixel *, intptr_t, int ) =\
 {\
     NULL,\
     x264_mc_copy_w4_##name1,\
@@ -277,7 +472,7 @@
 #endif
 
 #define MC_WEIGHT_WTAB(function, instr, name1, name2, w12version)\
-    static void (* x264_mc_##function##_wtab_##instr[6])( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int ) =\
+static void (* mc_##function##_wtab_##instr[6])( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int ) =\
 {\
     x264_mc_##function##_w4_##name1,\
     x264_mc_##function##_w4_##name1,\
@@ -295,7 +490,7 @@
 MC_WEIGHT_WTAB(offsetadd,sse2,mmx2,sse2,16)
 MC_WEIGHT_WTAB(offsetsub,sse2,mmx2,sse2,16)
 
-static void x264_weight_cache_mmx2( x264_t *h, x264_weight_t *w )
+static void weight_cache_mmx2( x264_t *h, x264_weight_t *w )
 {
     if( w->i_scale == 1<<w->i_denom )
     {
@@ -327,7 +522,7 @@
 MC_WEIGHT_WTAB(weight,ssse3,ssse3,ssse3,16)
 MC_WEIGHT_WTAB(weight,avx2,ssse3,avx2,16)
 
-static void x264_weight_cache_mmx2( x264_t *h, x264_weight_t *w )
+static void weight_cache_mmx2( x264_t *h, x264_weight_t *w )
 {
     int i;
     int16_t den1;
@@ -350,7 +545,7 @@
     }
 }
 
-static void x264_weight_cache_ssse3( x264_t *h, x264_weight_t *w )
+static void weight_cache_ssse3( x264_t *h, x264_weight_t *w )
 {
     int i, den1;
     if( w->i_scale == 1<<w->i_denom )
@@ -385,7 +580,7 @@
     if( qpel_idx & 5 ) /* qpel interpolation needed */\
     {\
         pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);\
-        x264_pixel_avg_wtab_##instr1[i_width>>2](\
+        pixel_avg_wtab_##instr1[i_width>>2](\
                 dst, i_dst_stride, src1, i_src_stride,\
                 src2, i_height );\
         if( weight->weightfn )\
@@ -394,7 +589,7 @@
     else if( weight->weightfn )\
         weight->weightfn[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, weight, i_height );\
     else\
-        x264_mc_copy_wtab_##instr2[i_width>>2](dst, i_dst_stride, src1, i_src_stride, i_height );\
+        mc_copy_wtab_##instr2[i_width>>2](dst, i_dst_stride, src1, i_src_stride, i_height );\
 }
 
 MC_LUMA(mmx2,mmx2,mmx)
@@ -423,7 +618,7 @@
     if( qpel_idx & 5 ) /* qpel interpolation needed */\
     {\
         pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);\
-        x264_pixel_avg_wtab_##name[i_width>>2](\
+        pixel_avg_wtab_##name[i_width>>2](\
                 dst, *i_dst_stride, src1, i_src_stride,\
                 src2, i_height );\
         if( weight->weightfn )\
@@ -455,6 +650,25 @@
 GET_REF(cache64_ssse3_atom)
 #endif // !HIGH_BIT_DEPTH
 
+#define x264_hpel_filter_avx x264_template(hpel_filter_avx)
+#define x264_hpel_filter_avx2 x264_template(hpel_filter_avx2)
+#define x264_hpel_filter_c_mmx2 x264_template(hpel_filter_c_mmx2)
+#define x264_hpel_filter_c_sse2 x264_template(hpel_filter_c_sse2)
+#define x264_hpel_filter_c_ssse3 x264_template(hpel_filter_c_ssse3)
+#define x264_hpel_filter_c_avx x264_template(hpel_filter_c_avx)
+#define x264_hpel_filter_c_avx2 x264_template(hpel_filter_c_avx2)
+#define x264_hpel_filter_h_mmx2 x264_template(hpel_filter_h_mmx2)
+#define x264_hpel_filter_h_sse2 x264_template(hpel_filter_h_sse2)
+#define x264_hpel_filter_h_ssse3 x264_template(hpel_filter_h_ssse3)
+#define x264_hpel_filter_h_avx x264_template(hpel_filter_h_avx)
+#define x264_hpel_filter_h_avx2 x264_template(hpel_filter_h_avx2)
+#define x264_hpel_filter_sse2 x264_template(hpel_filter_sse2)
+#define x264_hpel_filter_ssse3 x264_template(hpel_filter_ssse3)
+#define x264_hpel_filter_v_mmx2 x264_template(hpel_filter_v_mmx2)
+#define x264_hpel_filter_v_sse2 x264_template(hpel_filter_v_sse2)
+#define x264_hpel_filter_v_ssse3 x264_template(hpel_filter_v_ssse3)
+#define x264_hpel_filter_v_avx x264_template(hpel_filter_v_avx)
+#define x264_hpel_filter_v_avx2 x264_template(hpel_filter_v_avx2)
 #define HPEL(align, cpu, cpuv, cpuc, cpuh)\
 void x264_hpel_filter_v_##cpuv( pixel *dst, pixel *src, int16_t *buf, intptr_t stride, intptr_t width);\
 void x264_hpel_filter_c_##cpuc( pixel *dst, int16_t *buf, intptr_t width );\
@@ -550,18 +764,22 @@
 } while( 0 )
 #endif
 
+#define x264_mbtree_propagate_list_internal_ssse3 x264_template(mbtree_propagate_list_internal_ssse3)
 PROPAGATE_LIST(ssse3)
+#define x264_mbtree_propagate_list_internal_avx x264_template(mbtree_propagate_list_internal_avx)
 PROPAGATE_LIST(avx)
+#define x264_mbtree_propagate_list_internal_avx2 x264_template(mbtree_propagate_list_internal_avx2)
 PROPAGATE_LIST(avx2)
 
 #if ARCH_X86_64
+#define x264_mbtree_propagate_list_internal_avx512 x264_template(mbtree_propagate_list_internal_avx512)
 void x264_mbtree_propagate_list_internal_avx512( size_t len, uint16_t *ref_costs, int16_t (*mvs)[2], int16_t *propagate_amount,
                                                  uint16_t *lowres_costs, int bipred_weight, int mb_y,
                                                  int width, int height, int stride, int list_mask );
 
-static void x264_mbtree_propagate_list_avx512( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],
-                                               int16_t *propagate_amount, uint16_t *lowres_costs,
-                                               int bipred_weight, int mb_y, int len, int list )
+static void mbtree_propagate_list_avx512( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],
+                                          int16_t *propagate_amount, uint16_t *lowres_costs,
+                                          int bipred_weight, int mb_y, int len, int list )
 {
     x264_mbtree_propagate_list_internal_avx512( len, ref_costs, mvs, propagate_amount, lowres_costs, bipred_weight << 9,
                                                 mb_y << 16, h->mb.i_mb_width, h->mb.i_mb_height, h->mb.i_mb_stride,
@@ -584,11 +802,12 @@
     if( !(cpu&X264_CPU_MMX2) )
         return;
 
+    pf->prefetch_fenc_400 = x264_prefetch_fenc_400_mmx2;
     pf->prefetch_fenc_420 = x264_prefetch_fenc_420_mmx2;
     pf->prefetch_fenc_422 = x264_prefetch_fenc_422_mmx2;
     pf->prefetch_ref  = x264_prefetch_ref_mmx2;
 
-    pf->plane_copy_interleave = x264_plane_copy_interleave_mmx2;
+    pf->plane_copy_interleave = plane_copy_interleave_mmx2;
     pf->store_interleave_chroma = x264_store_interleave_chroma_mmx2;
 
     pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_mmx2;
@@ -605,10 +824,10 @@
     pf->get_ref = get_ref_mmx2;
     pf->mc_chroma = x264_mc_chroma_mmx2;
     pf->hpel_filter = x264_hpel_filter_mmx2;
-    pf->weight = x264_mc_weight_wtab_mmx2;
-    pf->weight_cache = x264_weight_cache_mmx2;
-    pf->offsetadd = x264_mc_offsetadd_wtab_mmx2;
-    pf->offsetsub = x264_mc_offsetsub_wtab_mmx2;
+    pf->weight = mc_weight_wtab_mmx2;
+    pf->weight_cache = weight_cache_mmx2;
+    pf->offsetadd = mc_offsetadd_wtab_mmx2;
+    pf->offsetsub = mc_offsetsub_wtab_mmx2;
 
     pf->frame_init_lowres_core = x264_frame_init_lowres_core_mmx2;
 
@@ -616,7 +835,7 @@
     {
         pf->memcpy_aligned  = x264_memcpy_aligned_sse;
         pf->memzero_aligned = x264_memzero_aligned_sse;
-        pf->plane_copy = x264_plane_copy_sse;
+        pf->plane_copy = plane_copy_sse;
     }
 
 #if HIGH_BIT_DEPTH
@@ -633,9 +852,9 @@
     pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_sse2;
     pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_sse2;
 
-    pf->plane_copy_interleave   = x264_plane_copy_interleave_sse2;
+    pf->plane_copy_interleave   = plane_copy_interleave_sse2;
     pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_sse2;
-    pf->plane_copy_deinterleave_yuyv = x264_plane_copy_deinterleave_yuyv_sse2;
+    pf->plane_copy_deinterleave_yuyv = plane_copy_deinterleave_yuyv_sse2;
 
     if( cpu&X264_CPU_SSE2_IS_FAST )
     {
@@ -648,8 +867,8 @@
     pf->integral_init8v = x264_integral_init8v_sse2;
     pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_sse2;
     pf->store_interleave_chroma = x264_store_interleave_chroma_sse2;
-    pf->offsetadd = x264_mc_offsetadd_wtab_sse2;
-    pf->offsetsub = x264_mc_offsetsub_wtab_sse2;
+    pf->offsetadd = mc_offsetadd_wtab_sse2;
+    pf->offsetsub = mc_offsetsub_wtab_sse2;
 
     if( cpu&X264_CPU_SSE2_IS_SLOW )
         return;
@@ -665,7 +884,7 @@
     pf->avg[PIXEL_4x2]   = x264_pixel_avg_4x2_sse2;
 
     pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_sse;
-    pf->weight = x264_mc_weight_wtab_sse2;
+    pf->weight = mc_weight_wtab_sse2;
 
     if( !(cpu&X264_CPU_STACK_MOD4) )
         pf->mc_chroma = x264_mc_chroma_sse2;
@@ -674,9 +893,9 @@
         return;
 
     pf->frame_init_lowres_core = x264_frame_init_lowres_core_ssse3;
-    pf->plane_copy_swap = x264_plane_copy_swap_ssse3;
+    pf->plane_copy_swap = plane_copy_swap_ssse3;
     pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_ssse3;
-    pf->mbtree_propagate_list = x264_mbtree_propagate_list_ssse3;
+    pf->mbtree_propagate_list = mbtree_propagate_list_ssse3;
     pf->mbtree_fix8_pack      = x264_mbtree_fix8_pack_ssse3;
     pf->mbtree_fix8_unpack    = x264_mbtree_fix8_unpack_ssse3;
 
@@ -689,9 +908,9 @@
     pf->frame_init_lowres_core = x264_frame_init_lowres_core_avx;
     pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_avx;
     pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_avx;
-    pf->plane_copy_interleave        = x264_plane_copy_interleave_avx;
+    pf->plane_copy_interleave        = plane_copy_interleave_avx;
     pf->plane_copy_deinterleave      = x264_plane_copy_deinterleave_avx;
-    pf->plane_copy_deinterleave_yuyv = x264_plane_copy_deinterleave_yuyv_avx;
+    pf->plane_copy_deinterleave_yuyv = plane_copy_deinterleave_yuyv_avx;
     pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_avx;
     pf->store_interleave_chroma      = x264_store_interleave_chroma_avx;
     pf->copy[PIXEL_16x16]            = x264_mc_copy_w16_aligned_avx;
@@ -706,6 +925,7 @@
     {
         pf->mc_luma = mc_luma_avx2;
         pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_avx2;
+        pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_avx2;
         pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_avx2;
     }
 
@@ -738,18 +958,18 @@
     pf->hpel_filter = x264_hpel_filter_sse2_amd;
     pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_sse2;
     pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_sse2;
-    pf->plane_copy_deinterleave_yuyv = x264_plane_copy_deinterleave_yuyv_sse2;
+    pf->plane_copy_deinterleave_yuyv = plane_copy_deinterleave_yuyv_sse2;
     pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_sse2;
     pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_sse2;
     pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_sse2;
 
     if( !(cpu&X264_CPU_SSE2_IS_SLOW) )
     {
-        pf->weight = x264_mc_weight_wtab_sse2;
+        pf->weight = mc_weight_wtab_sse2;
         if( !(cpu&X264_CPU_SLOW_ATOM) )
         {
-            pf->offsetadd = x264_mc_offsetadd_wtab_sse2;
-            pf->offsetsub = x264_mc_offsetsub_wtab_sse2;
+            pf->offsetadd = mc_offsetadd_wtab_sse2;
+            pf->offsetsub = mc_offsetsub_wtab_sse2;
         }
 
         pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_sse;
@@ -766,7 +986,7 @@
         if( cpu&X264_CPU_SSE2_IS_FAST )
         {
             pf->store_interleave_chroma = x264_store_interleave_chroma_sse2; // FIXME sse2fast? sse2medium?
-            pf->plane_copy_interleave   = x264_plane_copy_interleave_sse2;
+            pf->plane_copy_interleave   = plane_copy_interleave_sse2;
             pf->mc_luma = mc_luma_sse2;
             pf->get_ref = get_ref_sse2;
             if( cpu&X264_CPU_CACHELINE_64 )
@@ -789,9 +1009,9 @@
     pf->avg[PIXEL_4x8]   = x264_pixel_avg_4x8_ssse3;
     pf->avg[PIXEL_4x4]   = x264_pixel_avg_4x4_ssse3;
     pf->avg[PIXEL_4x2]   = x264_pixel_avg_4x2_ssse3;
-    pf->plane_copy_swap = x264_plane_copy_swap_ssse3;
+    pf->plane_copy_swap  = plane_copy_swap_ssse3;
     pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_ssse3;
-    pf->mbtree_propagate_list = x264_mbtree_propagate_list_ssse3;
+    pf->mbtree_propagate_list = mbtree_propagate_list_ssse3;
     pf->mbtree_fix8_pack      = x264_mbtree_fix8_pack_ssse3;
     pf->mbtree_fix8_unpack    = x264_mbtree_fix8_unpack_ssse3;
 
@@ -800,7 +1020,7 @@
         pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_ssse3;
         pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_ssse3;
         pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_ssse3;
-        pf->plane_copy_deinterleave_yuyv = x264_plane_copy_deinterleave_yuyv_ssse3;
+        pf->plane_copy_deinterleave_yuyv = plane_copy_deinterleave_yuyv_ssse3;
     }
 
     if( !(cpu&X264_CPU_SLOW_PALIGNR) )
@@ -827,8 +1047,8 @@
         }
     }
 
-    pf->weight_cache = x264_weight_cache_ssse3;
-    pf->weight = x264_mc_weight_wtab_ssse3;
+    pf->weight_cache = weight_cache_ssse3;
+    pf->weight = mc_weight_wtab_ssse3;
 
     if( !(cpu&(X264_CPU_SLOW_SHUFFLE|X264_CPU_SLOW_ATOM|X264_CPU_SLOW_PALIGNR)) )
         pf->integral_init4v = x264_integral_init4v_ssse3;
@@ -856,7 +1076,7 @@
     {
         pf->hpel_filter = x264_hpel_filter_avx2;
         pf->mc_chroma = x264_mc_chroma_avx2;
-        pf->weight = x264_mc_weight_wtab_avx2;
+        pf->weight = mc_weight_wtab_avx2;
         pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_avx2;
         pf->avg[PIXEL_16x8]  = x264_pixel_avg_16x8_avx2;
         pf->integral_init8v = x264_integral_init8v_avx2;
@@ -865,6 +1085,7 @@
         pf->integral_init4h = x264_integral_init4h_avx2;
         pf->frame_init_lowres_core = x264_frame_init_lowres_core_avx2;
         pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_avx2;
+        pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_avx2;
     }
 
     if( cpu&X264_CPU_AVX512 )
@@ -874,6 +1095,8 @@
         pf->avg[PIXEL_8x16]  = x264_pixel_avg_8x16_avx512;
         pf->avg[PIXEL_8x8]   = x264_pixel_avg_8x8_avx512;
         pf->avg[PIXEL_8x4]   = x264_pixel_avg_8x4_avx512;
+        pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_avx512;
+        pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_avx512;
     }
 #endif // HIGH_BIT_DEPTH
 
@@ -881,22 +1104,21 @@
         return;
     pf->memcpy_aligned  = x264_memcpy_aligned_avx;
     pf->memzero_aligned = x264_memzero_aligned_avx;
-    pf->plane_copy = x264_plane_copy_avx;
+    pf->plane_copy = plane_copy_avx;
     pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_avx;
-    pf->mbtree_propagate_list = x264_mbtree_propagate_list_avx;
+    pf->mbtree_propagate_list = mbtree_propagate_list_avx;
 
     if( cpu&X264_CPU_FMA4 )
         pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_fma4;
 
     if( !(cpu&X264_CPU_AVX2) )
         return;
-    pf->plane_copy_swap = x264_plane_copy_swap_avx2;
+    pf->plane_copy_swap = plane_copy_swap_avx2;
     pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_avx2;
-    pf->plane_copy_deinterleave_yuyv = x264_plane_copy_deinterleave_yuyv_avx2;
-    pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_avx2;
+    pf->plane_copy_deinterleave_yuyv = plane_copy_deinterleave_yuyv_avx2;
     pf->get_ref = get_ref_avx2;
     pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_avx2;
-    pf->mbtree_propagate_list = x264_mbtree_propagate_list_avx2;
+    pf->mbtree_propagate_list = mbtree_propagate_list_avx2;
     pf->mbtree_fix8_pack      = x264_mbtree_fix8_pack_avx2;
     pf->mbtree_fix8_unpack    = x264_mbtree_fix8_unpack_avx2;
 
@@ -904,8 +1126,12 @@
         return;
     pf->memcpy_aligned = x264_memcpy_aligned_avx512;
     pf->memzero_aligned = x264_memzero_aligned_avx512;
+    pf->plane_copy = x264_plane_copy_avx512;
+    pf->plane_copy_swap = x264_plane_copy_swap_avx512;
     pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_avx512;
 #if ARCH_X86_64
-    pf->mbtree_propagate_list = x264_mbtree_propagate_list_avx512;
+    pf->mbtree_propagate_list = mbtree_propagate_list_avx512;
 #endif
+    pf->mbtree_fix8_pack      = x264_mbtree_fix8_pack_avx512;
+    pf->mbtree_fix8_unpack    = x264_mbtree_fix8_unpack_avx512;
 }
diff -Nru x264-0.152.2854+gite9a5903/common/x86/mc.h x264-0.158.2988+git-20191101.7817004/common/x86/mc.h
--- x264-0.152.2854+gite9a5903/common/x86/mc.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/mc.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.h: x86 motion compensation
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -24,9 +24,10 @@
  * For more information, contact us at licensing@x264.com.
  *****************************************************************************/
 
-#ifndef X264_I386_MC_H
-#define X264_I386_MC_H
+#ifndef X264_X86_MC_H
+#define X264_X86_MC_H
 
+#define x264_mc_init_mmx x264_template(mc_init_mmx)
 void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/x86/pixel-32.asm x264-0.158.2988+git-20191101.7817004/common/x86/pixel-32.asm
--- x264-0.152.2854+gite9a5903/common/x86/pixel-32.asm	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/pixel-32.asm	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* pixel-32.asm: x86_32 pixel metrics
 ;*****************************************************************************
-;* Copyright (C) 2003-2017 x264 project
+;* Copyright (C) 2003-2019 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Laurent Aimar <fenrir@via.ecp.fr>
@@ -33,6 +33,8 @@
 SECTION .text
 INIT_MMX mmx2
 
+%if HIGH_BIT_DEPTH == 0
+
 %macro LOAD_DIFF_4x8P 1 ; dx
     LOAD_DIFF  m0, m7, none, [r0+%1],      [r2+%1]
     LOAD_DIFF  m1, m6, none, [r0+%1+r1],   [r2+%1+r3]
@@ -418,3 +420,4 @@
     emms
     RET
 
+%endif ; !HIGH_BIT_DEPTH
diff -Nru x264-0.152.2854+gite9a5903/common/x86/pixel-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/pixel-a.asm
--- x264-0.152.2854+gite9a5903/common/x86/pixel-a.asm	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/pixel-a.asm	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* pixel.asm: x86 pixel metrics
 ;*****************************************************************************
-;* Copyright (C) 2003-2017 x264 project
+;* Copyright (C) 2003-2019 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Holger Lubitz <holger@lubitz.org>
@@ -131,6 +131,7 @@
 
 sw_f0:     dq 0xfff0, 0
 pd_f0:     times 4 dd 0xffff0000
+pd_2:      times 4 dd 2
 
 pw_76543210: dw 0, 1, 2, 3, 4, 5, 6, 7
 
@@ -2223,7 +2224,7 @@
 ;-----------------------------------------------------------------------------
 ; void intra_sa8d_x3_8x8( uint8_t *fenc, uint8_t edge[36], int *res )
 ;-----------------------------------------------------------------------------
-cglobal intra_sa8d_x3_8x8, 3,3,14
+cglobal intra_sa8d_x3_8x8, 3,3,13
     ; 8x8 hadamard
     pxor        m8, m8
     movq        m0, [r0+0*FENC_STRIDE]
@@ -2245,77 +2246,80 @@
 
     HADAMARD8_2D 0, 1, 2, 3, 4, 5, 6, 7, 8
 
-    ABSW2       m8,  m9,  m2, m3, m2, m3
-    ABSW2       m10, m11, m4, m5, m4, m5
-    paddusw     m8,  m10
-    paddusw     m9,  m11
-    ABSW2       m10, m11, m6, m7, m6, m7
-    ABSW        m13, m1,  m1
-    paddusw     m10, m11
-    paddusw     m8,  m9
-    paddusw     m13, m10
-    paddusw     m13, m8
+    ABSW2       m8, m9, m2, m3, m2, m3
+    ABSW2      m10, m11, m4, m5, m4, m5
+    paddw       m8, m10
+    paddw       m9, m11
+    ABSW2      m10, m11, m6, m7, m6, m7
+    ABSW       m12, m1, m1
+    paddw      m10, m11
+    paddw       m8, m9
+    paddw      m12, m10
+    paddw      m12, m8
 
     ; 1D hadamard of edges
-    movq        m8,  [r1+7]
-    movq        m9,  [r1+16]
-    pxor        m10, m10
-    punpcklbw   m8,  m10
-    punpcklbw   m9,  m10
+    movq        m8, [r1+7]
+    movq        m9, [r1+16]
+    pxor       m10, m10
+    punpcklbw   m8, m10
+    punpcklbw   m9, m10
     HSUMSUB2 pmullw, m8, m9, m10, m11, m11, q1032, [pw_ppppmmmm]
     HSUMSUB2 pmullw, m8, m9, m10, m11, m11, q2301, [pw_ppmmppmm]
-    pshuflw     m10, m8,  q2301
-    pshuflw     m11, m9,  q2301
-    pshufhw     m10, m10, q2301
-    pshufhw     m11, m11, q2301
-    pmullw      m8,  [pw_pmpmpmpm]
-    pmullw      m11, [pw_pmpmpmpm]
-    paddw       m8,  m10
-    paddw       m9,  m11
+    pshuflw    m10, m8, q2301
+    pshuflw    m11, m9, q2301
+    pshufhw    m10, m10, q2301
+    pshufhw    m11, m11, q2301
+    pmullw      m8, [pw_pmpmpmpm]
+    pmullw     m11, [pw_pmpmpmpm]
+    paddw       m8, m10
+    paddw       m9, m11
 
     ; differences
-    paddw       m10, m8, m9
-    paddw       m10, [pw_8]
-    pand        m10, [sw_f0]
-    psllw       m10, 2 ; dc
-
-    psllw       m8,  3 ; left edge
-    psubw       m8,  m0
-    psubw       m10, m0
-    ABSW2       m8, m10, m8, m10, m11, m12 ; 1x8 sum
-    paddusw     m8,  m13
-    paddusw     m13, m10
-    punpcklwd   m0,  m1
-    punpcklwd   m2,  m3
-    punpcklwd   m4,  m5
-    punpcklwd   m6,  m7
-    punpckldq   m0,  m2
-    punpckldq   m4,  m6
-    punpcklqdq  m0,  m4 ; transpose
-    psllw       m9,  3 ; top edge
-    psrldq      m2,  m13, 2 ; 8x7 sum
-    psubw       m0,  m9  ; 8x1 sum
-    ABSW        m0,  m0,  m9
-    paddusw     m2,  m0
+    paddw      m10, m8, m9
+    paddw      m10, [pw_8]
+    pand       m10, [sw_f0]
+    psllw       m8, 3 ; left edge
+    psllw      m10, 2 ; dc
+    psubw       m8, m0
+    psubw      m10, m0
+    punpcklwd   m0, m1
+    punpcklwd   m2, m3
+    punpcklwd   m4, m5
+    punpcklwd   m6, m7
+    ABSW       m10, m10, m1
+    paddw      m10, m12
+    punpckldq   m0, m2
+    punpckldq   m4, m6
+    punpcklqdq  m0, m4 ; transpose
+    psllw       m9, 3 ; top edge
+    psrldq      m2, m10, 2 ; 8x7 sum
+    psubw       m0, m9  ; 8x1 sum
+    ABSW2       m8, m0, m8, m0, m1, m3 ; 1x8 sum
+    paddw       m8, m12
+    paddusw     m2, m0
 
     ; 3x HADDW
-    movdqa      m7,  [pw_1]
-    pmaddwd     m2,  m7
-    pmaddwd     m8,  m7
-    pmaddwd     m13, m7
-    punpckhdq   m3,  m2, m8
-    punpckldq   m2,  m8
-    pshufd      m5,  m13, q3311
-    paddd       m2,  m3
-    paddd       m5,  m13
-    punpckhqdq  m0,  m2, m5
-    punpcklqdq  m2,  m5
-    pavgw       m0,  m2
-    pxor        m1,  m1
-    pavgw       m0,  m1
-    movq      [r2], m0 ; i8x8_v, i8x8_h
-    psrldq      m0, 8
-    movd    [r2+8], m0 ; i8x8_dc
+    mova        m7, [pd_f0]
+    pandn       m0, m7, m10
+    psrld      m10, 16
+    pandn       m1, m7, m8
+    psrld       m8, 16
+    pandn       m7, m2
+    psrld       m2, 16
+    paddd       m0, m10
+    paddd       m1, m8
+    paddd       m2, m7
+    pshufd      m3, m0, q2301
+    punpckhdq   m4, m2, m1
+    punpckldq   m2, m1
+    paddd       m3, m0
+    paddd       m2, m4
+    punpckhqdq  m0, m2, m3
+    punpcklqdq  m2, m3
+    paddd       m0, [pd_2]
+    paddd       m0, m2
+    psrld       m0, 2
+    mova      [r2], m0
     RET
 %endif ; ARCH_X86_64
 %endmacro ; INTRA_SA8D_SSE2
@@ -2862,7 +2866,7 @@
     ; output the predicted samples
     mov       r3d, eax
     shr       r3d, 16
-%ifdef PIC
+%if ARCH_X86_64
     lea        r2, [%2_lut]
     movzx     r2d, byte [r2+r3]
 %else
@@ -4280,7 +4284,7 @@
 
 ; instantiate satds
 
-%if ARCH_X86_64 == 0
+%if ARCH_X86_64 == 0 && HIGH_BIT_DEPTH == 0
 cextern pixel_sa8d_8x8_internal_mmx2
 INIT_MMX mmx2
 SA8D
@@ -4744,7 +4748,7 @@
 %endmacro
 
 %macro SATD_AVX512_END 0-1 0 ; sa8d
-    paddw          m0 {k1}{z}, m1 ; zero-extend to dwords
+    vpaddw         m0 {k1}{z}, m1 ; zero-extend to dwords
 %if ARCH_X86_64
 %if mmsize == 64
     vextracti32x8 ym1, m0, 1
@@ -5099,7 +5103,7 @@
     je .skip ; faster only if this is the common case; remove branch if we use ssim on a macroblock level
     neg       r2
 
-%ifdef PIC
+%if ARCH_X86_64
     lea       r3, [mask_ff + 16]
     %xdefine %%mask r3
 %else
@@ -5549,7 +5553,7 @@
     add       r5, r6
     xor      r0d, r0d ; nmv
     mov     [r5], r0d
-%ifdef PIC
+%if ARCH_X86_64
     lea       r1, [$$]
     %define GLOBAL +r1-$$
 %else
diff -Nru x264-0.152.2854+gite9a5903/common/x86/pixel.h x264-0.158.2988+git-20191101.7817004/common/x86/pixel.h
--- x264-0.152.2854+gite9a5903/common/x86/pixel.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/pixel.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.h: x86 pixel metrics
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -25,9 +25,345 @@
  * For more information, contact us at licensing@x264.com.
  *****************************************************************************/
 
-#ifndef X264_I386_PIXEL_H
-#define X264_I386_PIXEL_H
+#ifndef X264_X86_PIXEL_H
+#define X264_X86_PIXEL_H
 
+#define x264_pixel_ads1_avx x264_template(pixel_ads1_avx)
+#define x264_pixel_ads1_avx2 x264_template(pixel_ads1_avx2)
+#define x264_pixel_ads1_mmx2 x264_template(pixel_ads1_mmx2)
+#define x264_pixel_ads1_sse2 x264_template(pixel_ads1_sse2)
+#define x264_pixel_ads1_ssse3 x264_template(pixel_ads1_ssse3)
+#define x264_pixel_ads2_avx x264_template(pixel_ads2_avx)
+#define x264_pixel_ads2_avx2 x264_template(pixel_ads2_avx2)
+#define x264_pixel_ads2_mmx2 x264_template(pixel_ads2_mmx2)
+#define x264_pixel_ads2_sse2 x264_template(pixel_ads2_sse2)
+#define x264_pixel_ads2_ssse3 x264_template(pixel_ads2_ssse3)
+#define x264_pixel_ads4_avx x264_template(pixel_ads4_avx)
+#define x264_pixel_ads4_avx2 x264_template(pixel_ads4_avx2)
+#define x264_pixel_ads4_mmx2 x264_template(pixel_ads4_mmx2)
+#define x264_pixel_ads4_sse2 x264_template(pixel_ads4_sse2)
+#define x264_pixel_ads4_ssse3 x264_template(pixel_ads4_ssse3)
+#define x264_pixel_hadamard_ac_16x16_avx x264_template(pixel_hadamard_ac_16x16_avx)
+#define x264_pixel_hadamard_ac_16x16_avx2 x264_template(pixel_hadamard_ac_16x16_avx2)
+#define x264_pixel_hadamard_ac_16x16_mmx2 x264_template(pixel_hadamard_ac_16x16_mmx2)
+#define x264_pixel_hadamard_ac_16x16_sse2 x264_template(pixel_hadamard_ac_16x16_sse2)
+#define x264_pixel_hadamard_ac_16x16_sse4 x264_template(pixel_hadamard_ac_16x16_sse4)
+#define x264_pixel_hadamard_ac_16x16_ssse3 x264_template(pixel_hadamard_ac_16x16_ssse3)
+#define x264_pixel_hadamard_ac_16x16_ssse3_atom x264_template(pixel_hadamard_ac_16x16_ssse3_atom)
+#define x264_pixel_hadamard_ac_16x16_xop x264_template(pixel_hadamard_ac_16x16_xop)
+#define x264_pixel_hadamard_ac_16x8_avx x264_template(pixel_hadamard_ac_16x8_avx)
+#define x264_pixel_hadamard_ac_16x8_avx2 x264_template(pixel_hadamard_ac_16x8_avx2)
+#define x264_pixel_hadamard_ac_16x8_mmx2 x264_template(pixel_hadamard_ac_16x8_mmx2)
+#define x264_pixel_hadamard_ac_16x8_sse2 x264_template(pixel_hadamard_ac_16x8_sse2)
+#define x264_pixel_hadamard_ac_16x8_sse4 x264_template(pixel_hadamard_ac_16x8_sse4)
+#define x264_pixel_hadamard_ac_16x8_ssse3 x264_template(pixel_hadamard_ac_16x8_ssse3)
+#define x264_pixel_hadamard_ac_16x8_ssse3_atom x264_template(pixel_hadamard_ac_16x8_ssse3_atom)
+#define x264_pixel_hadamard_ac_16x8_xop x264_template(pixel_hadamard_ac_16x8_xop)
+#define x264_pixel_hadamard_ac_8x16_avx x264_template(pixel_hadamard_ac_8x16_avx)
+#define x264_pixel_hadamard_ac_8x16_mmx2 x264_template(pixel_hadamard_ac_8x16_mmx2)
+#define x264_pixel_hadamard_ac_8x16_sse2 x264_template(pixel_hadamard_ac_8x16_sse2)
+#define x264_pixel_hadamard_ac_8x16_sse4 x264_template(pixel_hadamard_ac_8x16_sse4)
+#define x264_pixel_hadamard_ac_8x16_ssse3 x264_template(pixel_hadamard_ac_8x16_ssse3)
+#define x264_pixel_hadamard_ac_8x16_ssse3_atom x264_template(pixel_hadamard_ac_8x16_ssse3_atom)
+#define x264_pixel_hadamard_ac_8x16_xop x264_template(pixel_hadamard_ac_8x16_xop)
+#define x264_pixel_hadamard_ac_8x8_avx x264_template(pixel_hadamard_ac_8x8_avx)
+#define x264_pixel_hadamard_ac_8x8_mmx2 x264_template(pixel_hadamard_ac_8x8_mmx2)
+#define x264_pixel_hadamard_ac_8x8_sse2 x264_template(pixel_hadamard_ac_8x8_sse2)
+#define x264_pixel_hadamard_ac_8x8_sse4 x264_template(pixel_hadamard_ac_8x8_sse4)
+#define x264_pixel_hadamard_ac_8x8_ssse3 x264_template(pixel_hadamard_ac_8x8_ssse3)
+#define x264_pixel_hadamard_ac_8x8_ssse3_atom x264_template(pixel_hadamard_ac_8x8_ssse3_atom)
+#define x264_pixel_hadamard_ac_8x8_xop x264_template(pixel_hadamard_ac_8x8_xop)
+#define x264_pixel_sa8d_16x16_mmx2 x264_template(pixel_sa8d_16x16_mmx2)
+#define x264_pixel_sa8d_16x16_avx x264_template(pixel_sa8d_16x16_avx)
+#define x264_pixel_sa8d_16x16_sse2 x264_template(pixel_sa8d_16x16_sse2)
+#define x264_pixel_sa8d_16x16_sse4 x264_template(pixel_sa8d_16x16_sse4)
+#define x264_pixel_sa8d_16x16_ssse3 x264_template(pixel_sa8d_16x16_ssse3)
+#define x264_pixel_sa8d_16x16_ssse3_atom x264_template(pixel_sa8d_16x16_ssse3_atom)
+#define x264_pixel_sa8d_16x16_xop x264_template(pixel_sa8d_16x16_xop)
+#define x264_pixel_sa8d_8x8_mmx2 x264_template(pixel_sa8d_8x8_mmx2)
+#define x264_pixel_sa8d_8x8_avx x264_template(pixel_sa8d_8x8_avx)
+#define x264_pixel_sa8d_8x8_avx2 x264_template(pixel_sa8d_8x8_avx2)
+#define x264_pixel_sa8d_8x8_avx512 x264_template(pixel_sa8d_8x8_avx512)
+#define x264_pixel_sa8d_8x8_sse2 x264_template(pixel_sa8d_8x8_sse2)
+#define x264_pixel_sa8d_8x8_sse4 x264_template(pixel_sa8d_8x8_sse4)
+#define x264_pixel_sa8d_8x8_ssse3 x264_template(pixel_sa8d_8x8_ssse3)
+#define x264_pixel_sa8d_8x8_ssse3_atom x264_template(pixel_sa8d_8x8_ssse3_atom)
+#define x264_pixel_sa8d_8x8_xop x264_template(pixel_sa8d_8x8_xop)
+#define x264_pixel_sad_16x16_avx2 x264_template(pixel_sad_16x16_avx2)
+#define x264_pixel_sad_16x16_avx512 x264_template(pixel_sad_16x16_avx512)
+#define x264_pixel_sad_16x16_cache32_mmx2 x264_template(pixel_sad_16x16_cache32_mmx2)
+#define x264_pixel_sad_16x16_cache64_mmx2 x264_template(pixel_sad_16x16_cache64_mmx2)
+#define x264_pixel_sad_16x16_cache64_sse2 x264_template(pixel_sad_16x16_cache64_sse2)
+#define x264_pixel_sad_16x16_cache64_ssse3 x264_template(pixel_sad_16x16_cache64_ssse3)
+#define x264_pixel_sad_16x16_mmx2 x264_template(pixel_sad_16x16_mmx2)
+#define x264_pixel_sad_16x16_sse2 x264_template(pixel_sad_16x16_sse2)
+#define x264_pixel_sad_16x16_sse2_aligned x264_template(pixel_sad_16x16_sse2_aligned)
+#define x264_pixel_sad_16x16_sse3 x264_template(pixel_sad_16x16_sse3)
+#define x264_pixel_sad_16x16_ssse3 x264_template(pixel_sad_16x16_ssse3)
+#define x264_pixel_sad_16x16_ssse3_aligned x264_template(pixel_sad_16x16_ssse3_aligned)
+#define x264_pixel_sad_16x8_avx2 x264_template(pixel_sad_16x8_avx2)
+#define x264_pixel_sad_16x8_avx512 x264_template(pixel_sad_16x8_avx512)
+#define x264_pixel_sad_16x8_cache32_mmx2 x264_template(pixel_sad_16x8_cache32_mmx2)
+#define x264_pixel_sad_16x8_cache64_mmx2 x264_template(pixel_sad_16x8_cache64_mmx2)
+#define x264_pixel_sad_16x8_cache64_sse2 x264_template(pixel_sad_16x8_cache64_sse2)
+#define x264_pixel_sad_16x8_cache64_ssse3 x264_template(pixel_sad_16x8_cache64_ssse3)
+#define x264_pixel_sad_16x8_mmx2 x264_template(pixel_sad_16x8_mmx2)
+#define x264_pixel_sad_16x8_sse2 x264_template(pixel_sad_16x8_sse2)
+#define x264_pixel_sad_16x8_sse2_aligned x264_template(pixel_sad_16x8_sse2_aligned)
+#define x264_pixel_sad_16x8_sse3 x264_template(pixel_sad_16x8_sse3)
+#define x264_pixel_sad_16x8_ssse3 x264_template(pixel_sad_16x8_ssse3)
+#define x264_pixel_sad_16x8_ssse3_aligned x264_template(pixel_sad_16x8_ssse3_aligned)
+#define x264_pixel_sad_4x16_avx512 x264_template(pixel_sad_4x16_avx512)
+#define x264_pixel_sad_4x16_mmx2 x264_template(pixel_sad_4x16_mmx2)
+#define x264_pixel_sad_4x4_avx512 x264_template(pixel_sad_4x4_avx512)
+#define x264_pixel_sad_4x4_mmx2 x264_template(pixel_sad_4x4_mmx2)
+#define x264_pixel_sad_4x4_ssse3 x264_template(pixel_sad_4x4_ssse3)
+#define x264_pixel_sad_4x8_avx512 x264_template(pixel_sad_4x8_avx512)
+#define x264_pixel_sad_4x8_mmx2 x264_template(pixel_sad_4x8_mmx2)
+#define x264_pixel_sad_4x8_ssse3 x264_template(pixel_sad_4x8_ssse3)
+#define x264_pixel_sad_8x16_avx512 x264_template(pixel_sad_8x16_avx512)
+#define x264_pixel_sad_8x16_cache32_mmx2 x264_template(pixel_sad_8x16_cache32_mmx2)
+#define x264_pixel_sad_8x16_cache64_mmx2 x264_template(pixel_sad_8x16_cache64_mmx2)
+#define x264_pixel_sad_8x16_mmx2 x264_template(pixel_sad_8x16_mmx2)
+#define x264_pixel_sad_8x16_sse2 x264_template(pixel_sad_8x16_sse2)
+#define x264_pixel_sad_8x16_sse2_aligned x264_template(pixel_sad_8x16_sse2_aligned)
+#define x264_pixel_sad_8x16_ssse3 x264_template(pixel_sad_8x16_ssse3)
+#define x264_pixel_sad_8x16_ssse3_aligned x264_template(pixel_sad_8x16_ssse3_aligned)
+#define x264_pixel_sad_8x4_avx512 x264_template(pixel_sad_8x4_avx512)
+#define x264_pixel_sad_8x4_cache32_mmx2 x264_template(pixel_sad_8x4_cache32_mmx2)
+#define x264_pixel_sad_8x4_cache64_mmx2 x264_template(pixel_sad_8x4_cache64_mmx2)
+#define x264_pixel_sad_8x4_mmx2 x264_template(pixel_sad_8x4_mmx2)
+#define x264_pixel_sad_8x4_sse2 x264_template(pixel_sad_8x4_sse2)
+#define x264_pixel_sad_8x4_ssse3 x264_template(pixel_sad_8x4_ssse3)
+#define x264_pixel_sad_8x8_avx512 x264_template(pixel_sad_8x8_avx512)
+#define x264_pixel_sad_8x8_cache32_mmx2 x264_template(pixel_sad_8x8_cache32_mmx2)
+#define x264_pixel_sad_8x8_cache64_mmx2 x264_template(pixel_sad_8x8_cache64_mmx2)
+#define x264_pixel_sad_8x8_mmx2 x264_template(pixel_sad_8x8_mmx2)
+#define x264_pixel_sad_8x8_sse2 x264_template(pixel_sad_8x8_sse2)
+#define x264_pixel_sad_8x8_sse2_aligned x264_template(pixel_sad_8x8_sse2_aligned)
+#define x264_pixel_sad_8x8_ssse3 x264_template(pixel_sad_8x8_ssse3)
+#define x264_pixel_sad_8x8_ssse3_aligned x264_template(pixel_sad_8x8_ssse3_aligned)
+#define x264_pixel_sad_x3_16x16_avx x264_template(pixel_sad_x3_16x16_avx)
+#define x264_pixel_sad_x3_16x16_avx2 x264_template(pixel_sad_x3_16x16_avx2)
+#define x264_pixel_sad_x3_16x16_avx512 x264_template(pixel_sad_x3_16x16_avx512)
+#define x264_pixel_sad_x3_16x16_cache32_mmx2 x264_template(pixel_sad_x3_16x16_cache32_mmx2)
+#define x264_pixel_sad_x3_16x16_cache64_mmx2 x264_template(pixel_sad_x3_16x16_cache64_mmx2)
+#define x264_pixel_sad_x3_16x16_cache64_sse2 x264_template(pixel_sad_x3_16x16_cache64_sse2)
+#define x264_pixel_sad_x3_16x16_cache64_ssse3 x264_template(pixel_sad_x3_16x16_cache64_ssse3)
+#define x264_pixel_sad_x3_16x16_mmx2 x264_template(pixel_sad_x3_16x16_mmx2)
+#define x264_pixel_sad_x3_16x16_sse2 x264_template(pixel_sad_x3_16x16_sse2)
+#define x264_pixel_sad_x3_16x16_sse3 x264_template(pixel_sad_x3_16x16_sse3)
+#define x264_pixel_sad_x3_16x16_ssse3 x264_template(pixel_sad_x3_16x16_ssse3)
+#define x264_pixel_sad_x3_16x16_xop x264_template(pixel_sad_x3_16x16_xop)
+#define x264_pixel_sad_x3_16x8_avx x264_template(pixel_sad_x3_16x8_avx)
+#define x264_pixel_sad_x3_16x8_avx2 x264_template(pixel_sad_x3_16x8_avx2)
+#define x264_pixel_sad_x3_16x8_avx512 x264_template(pixel_sad_x3_16x8_avx512)
+#define x264_pixel_sad_x3_16x8_cache32_mmx2 x264_template(pixel_sad_x3_16x8_cache32_mmx2)
+#define x264_pixel_sad_x3_16x8_cache64_mmx2 x264_template(pixel_sad_x3_16x8_cache64_mmx2)
+#define x264_pixel_sad_x3_16x8_cache64_sse2 x264_template(pixel_sad_x3_16x8_cache64_sse2)
+#define x264_pixel_sad_x3_16x8_cache64_ssse3 x264_template(pixel_sad_x3_16x8_cache64_ssse3)
+#define x264_pixel_sad_x3_16x8_mmx2 x264_template(pixel_sad_x3_16x8_mmx2)
+#define x264_pixel_sad_x3_16x8_sse2 x264_template(pixel_sad_x3_16x8_sse2)
+#define x264_pixel_sad_x3_16x8_sse3 x264_template(pixel_sad_x3_16x8_sse3)
+#define x264_pixel_sad_x3_16x8_ssse3 x264_template(pixel_sad_x3_16x8_ssse3)
+#define x264_pixel_sad_x3_16x8_xop x264_template(pixel_sad_x3_16x8_xop)
+#define x264_pixel_sad_x3_4x4_avx512 x264_template(pixel_sad_x3_4x4_avx512)
+#define x264_pixel_sad_x3_4x4_mmx2 x264_template(pixel_sad_x3_4x4_mmx2)
+#define x264_pixel_sad_x3_4x4_ssse3 x264_template(pixel_sad_x3_4x4_ssse3)
+#define x264_pixel_sad_x3_4x8_avx512 x264_template(pixel_sad_x3_4x8_avx512)
+#define x264_pixel_sad_x3_4x8_mmx2 x264_template(pixel_sad_x3_4x8_mmx2)
+#define x264_pixel_sad_x3_4x8_ssse3 x264_template(pixel_sad_x3_4x8_ssse3)
+#define x264_pixel_sad_x3_8x16_avx512 x264_template(pixel_sad_x3_8x16_avx512)
+#define x264_pixel_sad_x3_8x16_cache32_mmx2 x264_template(pixel_sad_x3_8x16_cache32_mmx2)
+#define x264_pixel_sad_x3_8x16_cache64_mmx2 x264_template(pixel_sad_x3_8x16_cache64_mmx2)
+#define x264_pixel_sad_x3_8x16_cache64_sse2 x264_template(pixel_sad_x3_8x16_cache64_sse2)
+#define x264_pixel_sad_x3_8x16_mmx2 x264_template(pixel_sad_x3_8x16_mmx2)
+#define x264_pixel_sad_x3_8x16_sse2 x264_template(pixel_sad_x3_8x16_sse2)
+#define x264_pixel_sad_x3_8x16_ssse3 x264_template(pixel_sad_x3_8x16_ssse3)
+#define x264_pixel_sad_x3_8x16_xop x264_template(pixel_sad_x3_8x16_xop)
+#define x264_pixel_sad_x3_8x4_avx512 x264_template(pixel_sad_x3_8x4_avx512)
+#define x264_pixel_sad_x3_8x4_mmx2 x264_template(pixel_sad_x3_8x4_mmx2)
+#define x264_pixel_sad_x3_8x4_sse2 x264_template(pixel_sad_x3_8x4_sse2)
+#define x264_pixel_sad_x3_8x4_ssse3 x264_template(pixel_sad_x3_8x4_ssse3)
+#define x264_pixel_sad_x3_8x4_xop x264_template(pixel_sad_x3_8x4_xop)
+#define x264_pixel_sad_x3_8x8_avx512 x264_template(pixel_sad_x3_8x8_avx512)
+#define x264_pixel_sad_x3_8x8_cache32_mmx2 x264_template(pixel_sad_x3_8x8_cache32_mmx2)
+#define x264_pixel_sad_x3_8x8_cache64_mmx2 x264_template(pixel_sad_x3_8x8_cache64_mmx2)
+#define x264_pixel_sad_x3_8x8_mmx2 x264_template(pixel_sad_x3_8x8_mmx2)
+#define x264_pixel_sad_x3_8x8_sse2 x264_template(pixel_sad_x3_8x8_sse2)
+#define x264_pixel_sad_x3_8x8_ssse3 x264_template(pixel_sad_x3_8x8_ssse3)
+#define x264_pixel_sad_x3_8x8_xop x264_template(pixel_sad_x3_8x8_xop)
+#define x264_pixel_sad_x4_16x16_avx x264_template(pixel_sad_x4_16x16_avx)
+#define x264_pixel_sad_x4_16x16_avx2 x264_template(pixel_sad_x4_16x16_avx2)
+#define x264_pixel_sad_x4_16x16_avx512 x264_template(pixel_sad_x4_16x16_avx512)
+#define x264_pixel_sad_x4_16x16_cache32_mmx2 x264_template(pixel_sad_x4_16x16_cache32_mmx2)
+#define x264_pixel_sad_x4_16x16_cache64_mmx2 x264_template(pixel_sad_x4_16x16_cache64_mmx2)
+#define x264_pixel_sad_x4_16x16_cache64_sse2 x264_template(pixel_sad_x4_16x16_cache64_sse2)
+#define x264_pixel_sad_x4_16x16_cache64_ssse3 x264_template(pixel_sad_x4_16x16_cache64_ssse3)
+#define x264_pixel_sad_x4_16x16_mmx2 x264_template(pixel_sad_x4_16x16_mmx2)
+#define x264_pixel_sad_x4_16x16_sse2 x264_template(pixel_sad_x4_16x16_sse2)
+#define x264_pixel_sad_x4_16x16_sse3 x264_template(pixel_sad_x4_16x16_sse3)
+#define x264_pixel_sad_x4_16x16_ssse3 x264_template(pixel_sad_x4_16x16_ssse3)
+#define x264_pixel_sad_x4_16x16_xop x264_template(pixel_sad_x4_16x16_xop)
+#define x264_pixel_sad_x4_16x8_avx x264_template(pixel_sad_x4_16x8_avx)
+#define x264_pixel_sad_x4_16x8_avx2 x264_template(pixel_sad_x4_16x8_avx2)
+#define x264_pixel_sad_x4_16x8_avx512 x264_template(pixel_sad_x4_16x8_avx512)
+#define x264_pixel_sad_x4_16x8_cache32_mmx2 x264_template(pixel_sad_x4_16x8_cache32_mmx2)
+#define x264_pixel_sad_x4_16x8_cache64_mmx2 x264_template(pixel_sad_x4_16x8_cache64_mmx2)
+#define x264_pixel_sad_x4_16x8_cache64_sse2 x264_template(pixel_sad_x4_16x8_cache64_sse2)
+#define x264_pixel_sad_x4_16x8_cache64_ssse3 x264_template(pixel_sad_x4_16x8_cache64_ssse3)
+#define x264_pixel_sad_x4_16x8_mmx2 x264_template(pixel_sad_x4_16x8_mmx2)
+#define x264_pixel_sad_x4_16x8_sse2 x264_template(pixel_sad_x4_16x8_sse2)
+#define x264_pixel_sad_x4_16x8_sse3 x264_template(pixel_sad_x4_16x8_sse3)
+#define x264_pixel_sad_x4_16x8_ssse3 x264_template(pixel_sad_x4_16x8_ssse3)
+#define x264_pixel_sad_x4_16x8_xop x264_template(pixel_sad_x4_16x8_xop)
+#define x264_pixel_sad_x4_4x4_avx512 x264_template(pixel_sad_x4_4x4_avx512)
+#define x264_pixel_sad_x4_4x4_mmx2 x264_template(pixel_sad_x4_4x4_mmx2)
+#define x264_pixel_sad_x4_4x4_ssse3 x264_template(pixel_sad_x4_4x4_ssse3)
+#define x264_pixel_sad_x4_4x8_avx512 x264_template(pixel_sad_x4_4x8_avx512)
+#define x264_pixel_sad_x4_4x8_mmx2 x264_template(pixel_sad_x4_4x8_mmx2)
+#define x264_pixel_sad_x4_4x8_ssse3 x264_template(pixel_sad_x4_4x8_ssse3)
+#define x264_pixel_sad_x4_8x16_avx512 x264_template(pixel_sad_x4_8x16_avx512)
+#define x264_pixel_sad_x4_8x16_cache32_mmx2 x264_template(pixel_sad_x4_8x16_cache32_mmx2)
+#define x264_pixel_sad_x4_8x16_cache64_mmx2 x264_template(pixel_sad_x4_8x16_cache64_mmx2)
+#define x264_pixel_sad_x4_8x16_cache64_sse2 x264_template(pixel_sad_x4_8x16_cache64_sse2)
+#define x264_pixel_sad_x4_8x16_mmx2 x264_template(pixel_sad_x4_8x16_mmx2)
+#define x264_pixel_sad_x4_8x16_sse2 x264_template(pixel_sad_x4_8x16_sse2)
+#define x264_pixel_sad_x4_8x16_ssse3 x264_template(pixel_sad_x4_8x16_ssse3)
+#define x264_pixel_sad_x4_8x16_xop x264_template(pixel_sad_x4_8x16_xop)
+#define x264_pixel_sad_x4_8x4_avx512 x264_template(pixel_sad_x4_8x4_avx512)
+#define x264_pixel_sad_x4_8x4_mmx2 x264_template(pixel_sad_x4_8x4_mmx2)
+#define x264_pixel_sad_x4_8x4_sse2 x264_template(pixel_sad_x4_8x4_sse2)
+#define x264_pixel_sad_x4_8x4_ssse3 x264_template(pixel_sad_x4_8x4_ssse3)
+#define x264_pixel_sad_x4_8x4_xop x264_template(pixel_sad_x4_8x4_xop)
+#define x264_pixel_sad_x4_8x8_avx512 x264_template(pixel_sad_x4_8x8_avx512)
+#define x264_pixel_sad_x4_8x8_cache32_mmx2 x264_template(pixel_sad_x4_8x8_cache32_mmx2)
+#define x264_pixel_sad_x4_8x8_cache64_mmx2 x264_template(pixel_sad_x4_8x8_cache64_mmx2)
+#define x264_pixel_sad_x4_8x8_mmx2 x264_template(pixel_sad_x4_8x8_mmx2)
+#define x264_pixel_sad_x4_8x8_sse2 x264_template(pixel_sad_x4_8x8_sse2)
+#define x264_pixel_sad_x4_8x8_ssse3 x264_template(pixel_sad_x4_8x8_ssse3)
+#define x264_pixel_sad_x4_8x8_xop x264_template(pixel_sad_x4_8x8_xop)
+#define x264_pixel_satd_16x16_avx x264_template(pixel_satd_16x16_avx)
+#define x264_pixel_satd_16x16_avx2 x264_template(pixel_satd_16x16_avx2)
+#define x264_pixel_satd_16x16_avx512 x264_template(pixel_satd_16x16_avx512)
+#define x264_pixel_satd_16x16_mmx2 x264_template(pixel_satd_16x16_mmx2)
+#define x264_pixel_satd_16x16_sse2 x264_template(pixel_satd_16x16_sse2)
+#define x264_pixel_satd_16x16_sse4 x264_template(pixel_satd_16x16_sse4)
+#define x264_pixel_satd_16x16_ssse3 x264_template(pixel_satd_16x16_ssse3)
+#define x264_pixel_satd_16x16_ssse3_atom x264_template(pixel_satd_16x16_ssse3_atom)
+#define x264_pixel_satd_16x16_xop x264_template(pixel_satd_16x16_xop)
+#define x264_pixel_satd_16x8_avx x264_template(pixel_satd_16x8_avx)
+#define x264_pixel_satd_16x8_avx2 x264_template(pixel_satd_16x8_avx2)
+#define x264_pixel_satd_16x8_avx512 x264_template(pixel_satd_16x8_avx512)
+#define x264_pixel_satd_16x8_mmx2 x264_template(pixel_satd_16x8_mmx2)
+#define x264_pixel_satd_16x8_sse2 x264_template(pixel_satd_16x8_sse2)
+#define x264_pixel_satd_16x8_sse4 x264_template(pixel_satd_16x8_sse4)
+#define x264_pixel_satd_16x8_ssse3 x264_template(pixel_satd_16x8_ssse3)
+#define x264_pixel_satd_16x8_ssse3_atom x264_template(pixel_satd_16x8_ssse3_atom)
+#define x264_pixel_satd_16x8_xop x264_template(pixel_satd_16x8_xop)
+#define x264_pixel_satd_4x16_avx x264_template(pixel_satd_4x16_avx)
+#define x264_pixel_satd_4x16_avx512 x264_template(pixel_satd_4x16_avx512)
+#define x264_pixel_satd_4x16_mmx2 x264_template(pixel_satd_4x16_mmx2)
+#define x264_pixel_satd_4x16_sse2 x264_template(pixel_satd_4x16_sse2)
+#define x264_pixel_satd_4x16_sse4 x264_template(pixel_satd_4x16_sse4)
+#define x264_pixel_satd_4x16_ssse3 x264_template(pixel_satd_4x16_ssse3)
+#define x264_pixel_satd_4x16_ssse3_atom x264_template(pixel_satd_4x16_ssse3_atom)
+#define x264_pixel_satd_4x4_avx x264_template(pixel_satd_4x4_avx)
+#define x264_pixel_satd_4x4_avx512 x264_template(pixel_satd_4x4_avx512)
+#define x264_pixel_satd_4x4_mmx2 x264_template(pixel_satd_4x4_mmx2)
+#define x264_pixel_satd_4x4_sse4 x264_template(pixel_satd_4x4_sse4)
+#define x264_pixel_satd_4x4_ssse3 x264_template(pixel_satd_4x4_ssse3)
+#define x264_pixel_satd_4x4_xop x264_template(pixel_satd_4x4_xop)
+#define x264_pixel_satd_4x8_avx x264_template(pixel_satd_4x8_avx)
+#define x264_pixel_satd_4x8_avx512 x264_template(pixel_satd_4x8_avx512)
+#define x264_pixel_satd_4x8_mmx2 x264_template(pixel_satd_4x8_mmx2)
+#define x264_pixel_satd_4x8_sse2 x264_template(pixel_satd_4x8_sse2)
+#define x264_pixel_satd_4x8_sse4 x264_template(pixel_satd_4x8_sse4)
+#define x264_pixel_satd_4x8_ssse3 x264_template(pixel_satd_4x8_ssse3)
+#define x264_pixel_satd_4x8_ssse3_atom x264_template(pixel_satd_4x8_ssse3_atom)
+#define x264_pixel_satd_4x8_xop x264_template(pixel_satd_4x8_xop)
+#define x264_pixel_satd_8x16_avx x264_template(pixel_satd_8x16_avx)
+#define x264_pixel_satd_8x16_avx2 x264_template(pixel_satd_8x16_avx2)
+#define x264_pixel_satd_8x16_avx512 x264_template(pixel_satd_8x16_avx512)
+#define x264_pixel_satd_8x16_mmx2 x264_template(pixel_satd_8x16_mmx2)
+#define x264_pixel_satd_8x16_sse2 x264_template(pixel_satd_8x16_sse2)
+#define x264_pixel_satd_8x16_sse4 x264_template(pixel_satd_8x16_sse4)
+#define x264_pixel_satd_8x16_ssse3 x264_template(pixel_satd_8x16_ssse3)
+#define x264_pixel_satd_8x16_ssse3_atom x264_template(pixel_satd_8x16_ssse3_atom)
+#define x264_pixel_satd_8x16_xop x264_template(pixel_satd_8x16_xop)
+#define x264_pixel_satd_8x4_avx x264_template(pixel_satd_8x4_avx)
+#define x264_pixel_satd_8x4_avx512 x264_template(pixel_satd_8x4_avx512)
+#define x264_pixel_satd_8x4_mmx2 x264_template(pixel_satd_8x4_mmx2)
+#define x264_pixel_satd_8x4_sse2 x264_template(pixel_satd_8x4_sse2)
+#define x264_pixel_satd_8x4_sse4 x264_template(pixel_satd_8x4_sse4)
+#define x264_pixel_satd_8x4_ssse3 x264_template(pixel_satd_8x4_ssse3)
+#define x264_pixel_satd_8x4_ssse3_atom x264_template(pixel_satd_8x4_ssse3_atom)
+#define x264_pixel_satd_8x4_xop x264_template(pixel_satd_8x4_xop)
+#define x264_pixel_satd_8x8_avx x264_template(pixel_satd_8x8_avx)
+#define x264_pixel_satd_8x8_avx2 x264_template(pixel_satd_8x8_avx2)
+#define x264_pixel_satd_8x8_avx512 x264_template(pixel_satd_8x8_avx512)
+#define x264_pixel_satd_8x8_mmx2 x264_template(pixel_satd_8x8_mmx2)
+#define x264_pixel_satd_8x8_sse2 x264_template(pixel_satd_8x8_sse2)
+#define x264_pixel_satd_8x8_sse4 x264_template(pixel_satd_8x8_sse4)
+#define x264_pixel_satd_8x8_ssse3 x264_template(pixel_satd_8x8_ssse3)
+#define x264_pixel_satd_8x8_ssse3_atom x264_template(pixel_satd_8x8_ssse3_atom)
+#define x264_pixel_satd_8x8_xop x264_template(pixel_satd_8x8_xop)
+#define x264_pixel_ssd_16x16_avx x264_template(pixel_ssd_16x16_avx)
+#define x264_pixel_ssd_16x16_avx2 x264_template(pixel_ssd_16x16_avx2)
+#define x264_pixel_ssd_16x16_mmx x264_template(pixel_ssd_16x16_mmx)
+#define x264_pixel_ssd_16x16_mmx2 x264_template(pixel_ssd_16x16_mmx2)
+#define x264_pixel_ssd_16x16_sse2 x264_template(pixel_ssd_16x16_sse2)
+#define x264_pixel_ssd_16x16_sse2slow x264_template(pixel_ssd_16x16_sse2slow)
+#define x264_pixel_ssd_16x16_ssse3 x264_template(pixel_ssd_16x16_ssse3)
+#define x264_pixel_ssd_16x16_xop x264_template(pixel_ssd_16x16_xop)
+#define x264_pixel_ssd_16x8_avx x264_template(pixel_ssd_16x8_avx)
+#define x264_pixel_ssd_16x8_avx2 x264_template(pixel_ssd_16x8_avx2)
+#define x264_pixel_ssd_16x8_mmx x264_template(pixel_ssd_16x8_mmx)
+#define x264_pixel_ssd_16x8_mmx2 x264_template(pixel_ssd_16x8_mmx2)
+#define x264_pixel_ssd_16x8_sse2 x264_template(pixel_ssd_16x8_sse2)
+#define x264_pixel_ssd_16x8_sse2slow x264_template(pixel_ssd_16x8_sse2slow)
+#define x264_pixel_ssd_16x8_ssse3 x264_template(pixel_ssd_16x8_ssse3)
+#define x264_pixel_ssd_16x8_xop x264_template(pixel_ssd_16x8_xop)
+#define x264_pixel_ssd_4x16_mmx x264_template(pixel_ssd_4x16_mmx)
+#define x264_pixel_ssd_4x16_mmx2 x264_template(pixel_ssd_4x16_mmx2)
+#define x264_pixel_ssd_4x16_ssse3 x264_template(pixel_ssd_4x16_ssse3)
+#define x264_pixel_ssd_4x4_mmx x264_template(pixel_ssd_4x4_mmx)
+#define x264_pixel_ssd_4x4_mmx2 x264_template(pixel_ssd_4x4_mmx2)
+#define x264_pixel_ssd_4x4_ssse3 x264_template(pixel_ssd_4x4_ssse3)
+#define x264_pixel_ssd_4x8_mmx x264_template(pixel_ssd_4x8_mmx)
+#define x264_pixel_ssd_4x8_mmx2 x264_template(pixel_ssd_4x8_mmx2)
+#define x264_pixel_ssd_4x8_ssse3 x264_template(pixel_ssd_4x8_ssse3)
+#define x264_pixel_ssd_8x16_avx x264_template(pixel_ssd_8x16_avx)
+#define x264_pixel_ssd_8x16_mmx x264_template(pixel_ssd_8x16_mmx)
+#define x264_pixel_ssd_8x16_mmx2 x264_template(pixel_ssd_8x16_mmx2)
+#define x264_pixel_ssd_8x16_sse2 x264_template(pixel_ssd_8x16_sse2)
+#define x264_pixel_ssd_8x16_sse2slow x264_template(pixel_ssd_8x16_sse2slow)
+#define x264_pixel_ssd_8x16_ssse3 x264_template(pixel_ssd_8x16_ssse3)
+#define x264_pixel_ssd_8x16_xop x264_template(pixel_ssd_8x16_xop)
+#define x264_pixel_ssd_8x4_avx x264_template(pixel_ssd_8x4_avx)
+#define x264_pixel_ssd_8x4_mmx x264_template(pixel_ssd_8x4_mmx)
+#define x264_pixel_ssd_8x4_mmx2 x264_template(pixel_ssd_8x4_mmx2)
+#define x264_pixel_ssd_8x4_sse2 x264_template(pixel_ssd_8x4_sse2)
+#define x264_pixel_ssd_8x4_sse2slow x264_template(pixel_ssd_8x4_sse2slow)
+#define x264_pixel_ssd_8x4_ssse3 x264_template(pixel_ssd_8x4_ssse3)
+#define x264_pixel_ssd_8x4_xop x264_template(pixel_ssd_8x4_xop)
+#define x264_pixel_ssd_8x8_avx x264_template(pixel_ssd_8x8_avx)
+#define x264_pixel_ssd_8x8_mmx x264_template(pixel_ssd_8x8_mmx)
+#define x264_pixel_ssd_8x8_mmx2 x264_template(pixel_ssd_8x8_mmx2)
+#define x264_pixel_ssd_8x8_sse2 x264_template(pixel_ssd_8x8_sse2)
+#define x264_pixel_ssd_8x8_sse2slow x264_template(pixel_ssd_8x8_sse2slow)
+#define x264_pixel_ssd_8x8_ssse3 x264_template(pixel_ssd_8x8_ssse3)
+#define x264_pixel_ssd_8x8_xop x264_template(pixel_ssd_8x8_xop)
+#define x264_pixel_var_16x16_avx x264_template(pixel_var_16x16_avx)
+#define x264_pixel_var_16x16_avx2 x264_template(pixel_var_16x16_avx2)
+#define x264_pixel_var_16x16_avx512 x264_template(pixel_var_16x16_avx512)
+#define x264_pixel_var_16x16_sse2 x264_template(pixel_var_16x16_sse2)
+#define x264_pixel_var_8x16_avx x264_template(pixel_var_8x16_avx)
+#define x264_pixel_var_8x16_avx512 x264_template(pixel_var_8x16_avx512)
+#define x264_pixel_var_8x16_sse2 x264_template(pixel_var_8x16_sse2)
+#define x264_pixel_var_8x8_avx x264_template(pixel_var_8x8_avx)
+#define x264_pixel_var_8x8_avx512 x264_template(pixel_var_8x8_avx512)
+#define x264_pixel_var_8x8_sse2 x264_template(pixel_var_8x8_sse2)
 #define DECL_PIXELS( ret, name, suffix, args ) \
     ret x264_pixel_##name##_16x16_##suffix args;\
     ret x264_pixel_##name##_16x8_##suffix args;\
@@ -110,86 +446,153 @@
 DECL_PIXELS( uint64_t, hadamard_ac, avx2,  ( pixel *pix, intptr_t i_stride ))
 
 
+#define x264_intra_satd_x3_4x4_mmx2 x264_template(intra_satd_x3_4x4_mmx2)
 void x264_intra_satd_x3_4x4_mmx2   ( pixel   *, pixel   *, int * );
-void x264_intra_sad_x3_4x4_mmx2    ( pixel   *, pixel   *, int * );
-void x264_intra_sad_x3_4x4_sse2    ( pixel   *, pixel   *, int * );
-void x264_intra_sad_x3_4x4_ssse3   ( pixel   *, pixel   *, int * );
-void x264_intra_sad_x3_4x4_avx     ( pixel   *, pixel   *, int * );
+#define x264_intra_sad_x3_4x4_mmx2 x264_template(intra_sad_x3_4x4_mmx2)
+void x264_intra_sad_x3_4x4_mmx2    ( uint8_t *, uint8_t *, int * );
+#define x264_intra_sad_x3_4x4_sse2 x264_template(intra_sad_x3_4x4_sse2)
+void x264_intra_sad_x3_4x4_sse2    ( uint16_t*, uint16_t*, int * );
+#define x264_intra_sad_x3_4x4_ssse3 x264_template(intra_sad_x3_4x4_ssse3)
+void x264_intra_sad_x3_4x4_ssse3   ( uint16_t*, uint16_t*, int * );
+#define x264_intra_sad_x3_4x4_avx x264_template(intra_sad_x3_4x4_avx)
+void x264_intra_sad_x3_4x4_avx     ( uint16_t*, uint16_t*, int * );
+#define x264_intra_satd_x3_8x8c_mmx2 x264_template(intra_satd_x3_8x8c_mmx2)
 void x264_intra_satd_x3_8x8c_mmx2  ( pixel   *, pixel   *, int * );
+#define x264_intra_satd_x3_8x8c_ssse3 x264_template(intra_satd_x3_8x8c_ssse3)
 void x264_intra_satd_x3_8x8c_ssse3 ( uint8_t *, uint8_t *, int * );
-void x264_intra_sad_x3_8x8c_mmx2   ( pixel   *, pixel   *, int * );
-void x264_intra_sad_x3_8x8c_sse2   ( pixel   *, pixel   *, int * );
-void x264_intra_sad_x3_8x8c_ssse3  ( pixel   *, pixel   *, int * );
-void x264_intra_sad_x3_8x8c_avx2   ( pixel   *, pixel   *, int * );
+#define x264_intra_sad_x3_8x8c_mmx2 x264_template(intra_sad_x3_8x8c_mmx2)
+void x264_intra_sad_x3_8x8c_mmx2   ( uint8_t *, uint8_t *, int * );
+#define x264_intra_sad_x3_8x8c_ssse3 x264_template(intra_sad_x3_8x8c_ssse3)
+void x264_intra_sad_x3_8x8c_ssse3  ( uint8_t *, uint8_t *, int * );
+#define x264_intra_sad_x3_8x8c_avx2 x264_template(intra_sad_x3_8x8c_avx2)
+void x264_intra_sad_x3_8x8c_avx2   ( uint8_t *, uint8_t *, int * );
+#define x264_intra_satd_x3_16x16_mmx2 x264_template(intra_satd_x3_16x16_mmx2)
 void x264_intra_satd_x3_16x16_mmx2 ( pixel   *, pixel   *, int * );
+#define x264_intra_satd_x3_16x16_ssse3 x264_template(intra_satd_x3_16x16_ssse3)
 void x264_intra_satd_x3_16x16_ssse3( uint8_t *, uint8_t *, int * );
-void x264_intra_sad_x3_16x16_mmx2  ( pixel   *, pixel   *, int * );
-void x264_intra_sad_x3_16x16_sse2  ( pixel   *, pixel   *, int * );
-void x264_intra_sad_x3_16x16_ssse3 ( pixel   *, pixel   *, int * );
-void x264_intra_sad_x3_16x16_avx2  ( pixel   *, pixel   *, int * );
+#define x264_intra_sad_x3_16x16_mmx2 x264_template(intra_sad_x3_16x16_mmx2)
+void x264_intra_sad_x3_16x16_mmx2  ( uint8_t *, uint8_t *, int * );
+#define x264_intra_sad_x3_16x16_sse2 x264_template(intra_sad_x3_16x16_sse2)
+void x264_intra_sad_x3_16x16_sse2  ( uint8_t *, uint8_t *, int * );
+#define x264_intra_sad_x3_16x16_ssse3 x264_template(intra_sad_x3_16x16_ssse3)
+void x264_intra_sad_x3_16x16_ssse3 ( uint8_t *, uint8_t *, int * );
+#define x264_intra_sad_x3_16x16_avx2 x264_template(intra_sad_x3_16x16_avx2)
+void x264_intra_sad_x3_16x16_avx2  ( uint8_t *, uint8_t *, int * );
+#define x264_intra_sa8d_x3_8x8_mmx2 x264_template(intra_sa8d_x3_8x8_mmx2)
 void x264_intra_sa8d_x3_8x8_mmx2   ( uint8_t *, uint8_t *, int * );
-void x264_intra_sa8d_x3_8x8_sse2   ( pixel   *, pixel   *, int * );
-void x264_intra_sad_x3_8x8_mmx2    ( pixel   *, pixel   *, int * );
-void x264_intra_sad_x3_8x8_sse2    ( pixel   *, pixel   *, int * );
-void x264_intra_sad_x3_8x8_ssse3   ( pixel   *, pixel   *, int * );
+#define x264_intra_sa8d_x3_8x8_sse2 x264_template(intra_sa8d_x3_8x8_sse2)
+void x264_intra_sa8d_x3_8x8_sse2   ( uint8_t *, uint8_t *, int * );
+#define x264_intra_sad_x3_8x8_mmx2 x264_template(intra_sad_x3_8x8_mmx2)
+void x264_intra_sad_x3_8x8_mmx2    ( uint8_t *, uint8_t *, int * );
+#define x264_intra_sad_x3_8x8_sse2 x264_template(intra_sad_x3_8x8_sse2)
+void x264_intra_sad_x3_8x8_sse2    ( uint16_t*, uint16_t*, int * );
+#define x264_intra_sad_x3_8x8_ssse3 x264_template(intra_sad_x3_8x8_ssse3)
+void x264_intra_sad_x3_8x8_ssse3   ( uint16_t*, uint16_t*, int * );
+#define x264_intra_sad_x3_8x8_avx2 x264_template(intra_sad_x3_8x8_avx2)
 void x264_intra_sad_x3_8x8_avx2    ( uint16_t*, uint16_t*, int * );
+#define x264_intra_satd_x9_4x4_ssse3 x264_template(intra_satd_x9_4x4_ssse3)
 int x264_intra_satd_x9_4x4_ssse3( uint8_t *, uint8_t *, uint16_t * );
+#define x264_intra_satd_x9_4x4_sse4 x264_template(intra_satd_x9_4x4_sse4)
 int x264_intra_satd_x9_4x4_sse4 ( uint8_t *, uint8_t *, uint16_t * );
+#define x264_intra_satd_x9_4x4_avx x264_template(intra_satd_x9_4x4_avx)
 int x264_intra_satd_x9_4x4_avx  ( uint8_t *, uint8_t *, uint16_t * );
+#define x264_intra_satd_x9_4x4_xop x264_template(intra_satd_x9_4x4_xop)
 int x264_intra_satd_x9_4x4_xop  ( uint8_t *, uint8_t *, uint16_t * );
+#define x264_intra_sad_x9_4x4_ssse3 x264_template(intra_sad_x9_4x4_ssse3)
 int x264_intra_sad_x9_4x4_ssse3 ( uint8_t *, uint8_t *, uint16_t * );
+#define x264_intra_sad_x9_4x4_sse4 x264_template(intra_sad_x9_4x4_sse4)
 int x264_intra_sad_x9_4x4_sse4  ( uint8_t *, uint8_t *, uint16_t * );
+#define x264_intra_sad_x9_4x4_avx x264_template(intra_sad_x9_4x4_avx)
 int x264_intra_sad_x9_4x4_avx   ( uint8_t *, uint8_t *, uint16_t * );
+#define x264_intra_sa8d_x9_8x8_ssse3 x264_template(intra_sa8d_x9_8x8_ssse3)
 int x264_intra_sa8d_x9_8x8_ssse3( uint8_t *, uint8_t *, uint8_t *, uint16_t *, uint16_t * );
+#define x264_intra_sa8d_x9_8x8_sse4 x264_template(intra_sa8d_x9_8x8_sse4)
 int x264_intra_sa8d_x9_8x8_sse4 ( uint8_t *, uint8_t *, uint8_t *, uint16_t *, uint16_t * );
+#define x264_intra_sa8d_x9_8x8_avx x264_template(intra_sa8d_x9_8x8_avx)
 int x264_intra_sa8d_x9_8x8_avx  ( uint8_t *, uint8_t *, uint8_t *, uint16_t *, uint16_t * );
+#define x264_intra_sad_x9_8x8_ssse3 x264_template(intra_sad_x9_8x8_ssse3)
 int x264_intra_sad_x9_8x8_ssse3 ( uint8_t *, uint8_t *, uint8_t *, uint16_t *, uint16_t * );
+#define x264_intra_sad_x9_8x8_sse4 x264_template(intra_sad_x9_8x8_sse4)
 int x264_intra_sad_x9_8x8_sse4  ( uint8_t *, uint8_t *, uint8_t *, uint16_t *, uint16_t * );
+#define x264_intra_sad_x9_8x8_avx x264_template(intra_sad_x9_8x8_avx)
 int x264_intra_sad_x9_8x8_avx   ( uint8_t *, uint8_t *, uint8_t *, uint16_t *, uint16_t * );
+#define x264_intra_sad_x9_8x8_avx2 x264_template(intra_sad_x9_8x8_avx2)
 int x264_intra_sad_x9_8x8_avx2  ( uint8_t *, uint8_t *, uint8_t *, uint16_t *, uint16_t * );
 
+#define x264_pixel_ssd_nv12_core_sse2 x264_template(pixel_ssd_nv12_core_sse2)
 void x264_pixel_ssd_nv12_core_sse2( pixel *pixuv1, intptr_t stride1,
                                     pixel *pixuv2, intptr_t stride2, int width,
                                     int height, uint64_t *ssd_u, uint64_t *ssd_v );
+#define x264_pixel_ssd_nv12_core_avx x264_template(pixel_ssd_nv12_core_avx)
 void x264_pixel_ssd_nv12_core_avx ( pixel *pixuv1, intptr_t stride1,
                                     pixel *pixuv2, intptr_t stride2, int width,
                                     int height, uint64_t *ssd_u, uint64_t *ssd_v );
+#define x264_pixel_ssd_nv12_core_xop x264_template(pixel_ssd_nv12_core_xop)
 void x264_pixel_ssd_nv12_core_xop ( pixel *pixuv1, intptr_t stride1,
                                     pixel *pixuv2, intptr_t stride2, int width,
                                     int height, uint64_t *ssd_u, uint64_t *ssd_v );
+#define x264_pixel_ssd_nv12_core_avx2 x264_template(pixel_ssd_nv12_core_avx2)
 void x264_pixel_ssd_nv12_core_avx2( pixel *pixuv1, intptr_t stride1,
                                     pixel *pixuv2, intptr_t stride2, int width,
                                     int height, uint64_t *ssd_u, uint64_t *ssd_v );
+#define x264_pixel_ssim_4x4x2_core_mmx2 x264_template(pixel_ssim_4x4x2_core_mmx2)
 void x264_pixel_ssim_4x4x2_core_mmx2( const uint8_t *pix1, intptr_t stride1,
                                       const uint8_t *pix2, intptr_t stride2, int sums[2][4] );
+#define x264_pixel_ssim_4x4x2_core_sse2 x264_template(pixel_ssim_4x4x2_core_sse2)
 void x264_pixel_ssim_4x4x2_core_sse2( const pixel *pix1, intptr_t stride1,
                                       const pixel *pix2, intptr_t stride2, int sums[2][4] );
+#define x264_pixel_ssim_4x4x2_core_avx x264_template(pixel_ssim_4x4x2_core_avx)
 void x264_pixel_ssim_4x4x2_core_avx ( const pixel *pix1, intptr_t stride1,
                                       const pixel *pix2, intptr_t stride2, int sums[2][4] );
+#define x264_pixel_ssim_end4_sse2 x264_template(pixel_ssim_end4_sse2)
 float x264_pixel_ssim_end4_sse2( int sum0[5][4], int sum1[5][4], int width );
+#define x264_pixel_ssim_end4_avx x264_template(pixel_ssim_end4_avx)
 float x264_pixel_ssim_end4_avx ( int sum0[5][4], int sum1[5][4], int width );
+#define x264_pixel_var2_8x8_sse2 x264_template(pixel_var2_8x8_sse2)
 int  x264_pixel_var2_8x8_sse2   ( pixel   *fenc, pixel   *fdec, int ssd[2] );
+#define x264_pixel_var2_8x8_ssse3 x264_template(pixel_var2_8x8_ssse3)
 int  x264_pixel_var2_8x8_ssse3  ( uint8_t *fenc, uint8_t *fdec, int ssd[2] );
+#define x264_pixel_var2_8x8_avx2 x264_template(pixel_var2_8x8_avx2)
 int  x264_pixel_var2_8x8_avx2   ( pixel   *fenc, pixel   *fdec, int ssd[2] );
+#define x264_pixel_var2_8x8_avx512 x264_template(pixel_var2_8x8_avx512)
 int  x264_pixel_var2_8x8_avx512 ( pixel   *fenc, pixel   *fdec, int ssd[2] );
+#define x264_pixel_var2_8x16_sse2 x264_template(pixel_var2_8x16_sse2)
 int  x264_pixel_var2_8x16_sse2  ( pixel   *fenc, pixel   *fdec, int ssd[2] );
+#define x264_pixel_var2_8x16_ssse3 x264_template(pixel_var2_8x16_ssse3)
 int  x264_pixel_var2_8x16_ssse3 ( uint8_t *fenc, uint8_t *fdec, int ssd[2] );
+#define x264_pixel_var2_8x16_avx2 x264_template(pixel_var2_8x16_avx2)
 int  x264_pixel_var2_8x16_avx2  ( pixel   *fenc, pixel   *fdec, int ssd[2] );
+#define x264_pixel_var2_8x16_avx512 x264_template(pixel_var2_8x16_avx512)
 int  x264_pixel_var2_8x16_avx512( pixel   *fenc, pixel   *fdec, int ssd[2] );
+#define x264_pixel_vsad_mmx2 x264_template(pixel_vsad_mmx2)
 int  x264_pixel_vsad_mmx2 ( pixel *src, intptr_t stride, int height );
+#define x264_pixel_vsad_sse2 x264_template(pixel_vsad_sse2)
 int  x264_pixel_vsad_sse2 ( pixel *src, intptr_t stride, int height );
+#define x264_pixel_vsad_ssse3 x264_template(pixel_vsad_ssse3)
 int  x264_pixel_vsad_ssse3( pixel *src, intptr_t stride, int height );
+#define x264_pixel_vsad_xop x264_template(pixel_vsad_xop)
 int  x264_pixel_vsad_xop  ( pixel *src, intptr_t stride, int height );
+#define x264_pixel_vsad_avx2 x264_template(pixel_vsad_avx2)
 int  x264_pixel_vsad_avx2 ( uint16_t *src, intptr_t stride, int height );
+#define x264_pixel_asd8_sse2 x264_template(pixel_asd8_sse2)
 int x264_pixel_asd8_sse2 ( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, int height );
+#define x264_pixel_asd8_ssse3 x264_template(pixel_asd8_ssse3)
 int x264_pixel_asd8_ssse3( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, int height );
+#define x264_pixel_asd8_xop x264_template(pixel_asd8_xop)
 int x264_pixel_asd8_xop  ( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, int height );
+#define x264_pixel_sa8d_satd_16x16_sse2 x264_template(pixel_sa8d_satd_16x16_sse2)
 uint64_t x264_pixel_sa8d_satd_16x16_sse2      ( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2 );
+#define x264_pixel_sa8d_satd_16x16_ssse3 x264_template(pixel_sa8d_satd_16x16_ssse3)
 uint64_t x264_pixel_sa8d_satd_16x16_ssse3     ( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2 );
+#define x264_pixel_sa8d_satd_16x16_ssse3_atom x264_template(pixel_sa8d_satd_16x16_ssse3_atom)
 uint64_t x264_pixel_sa8d_satd_16x16_ssse3_atom( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2 );
+#define x264_pixel_sa8d_satd_16x16_sse4 x264_template(pixel_sa8d_satd_16x16_sse4)
 uint64_t x264_pixel_sa8d_satd_16x16_sse4      ( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2 );
+#define x264_pixel_sa8d_satd_16x16_avx x264_template(pixel_sa8d_satd_16x16_avx)
 uint64_t x264_pixel_sa8d_satd_16x16_avx       ( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2 );
+#define x264_pixel_sa8d_satd_16x16_xop x264_template(pixel_sa8d_satd_16x16_xop)
 uint64_t x264_pixel_sa8d_satd_16x16_xop       ( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2 );
+#define x264_pixel_sa8d_satd_16x16_avx2 x264_template(pixel_sa8d_satd_16x16_avx2)
 uint64_t x264_pixel_sa8d_satd_16x16_avx2      ( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2 );
 
 
diff -Nru x264-0.152.2854+gite9a5903/common/x86/predict-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/predict-a.asm
--- x264-0.152.2854+gite9a5903/common/x86/predict-a.asm	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/predict-a.asm	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* predict-a.asm: x86 intra prediction
 ;*****************************************************************************
-;* Copyright (C) 2005-2017 x264 project
+;* Copyright (C) 2005-2019 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Holger Lubitz <holger@lubitz.org>
@@ -688,7 +688,7 @@
     je .fix_lt_2
 .do_top:
     and        r2d, 4
-%ifdef PIC
+%if ARCH_X86_64
     lea         r3, [shuf_fixtr]
     pshufb      m3, [r3+r2*4]
 %else
diff -Nru x264-0.152.2854+gite9a5903/common/x86/predict-c.c x264-0.158.2988+git-20191101.7817004/common/x86/predict-c.c
--- x264-0.152.2854+gite9a5903/common/x86/predict-c.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/predict-c.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict-c.c: intra prediction
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -70,7 +70,7 @@
         x264_predict_16x16_p_core_##name( src, i00, b, c );
 
 #define PREDICT_16x16_P(name, name2)\
-static void x264_predict_16x16_p_##name( pixel *src )\
+static void predict_16x16_p_##name( pixel *src )\
 {\
     PREDICT_16x16_P_CORE\
     PREDICT_16x16_P_END(name2)\
@@ -128,7 +128,7 @@
       + 1 * ( src[ 8*FDEC_STRIDE-1] - src[ 6*FDEC_STRIDE-1] );
 
 #define PREDICT_16x16_P_INLINE(name, name2)\
-static void x264_predict_16x16_p_##name( pixel *src )\
+static void predict_16x16_p_##name( pixel *src )\
 {\
     PREDICT_16x16_P_CORE_INLINE\
     PREDICT_16x16_P_END(name2)\
@@ -174,7 +174,7 @@
 #endif // HIGH_BIT_DEPTH
 
 #define PREDICT_8x16C_P(name)\
-static void x264_predict_8x16c_p_##name( pixel *src )\
+static void predict_8x16c_p_##name( pixel *src )\
 {\
     PREDICT_8x16C_P_CORE\
     PREDICT_8x16C_P_END(name)\
@@ -211,7 +211,7 @@
 #endif // HIGH_BIT_DEPTH
 
 #define PREDICT_8x8C_P(name, name2)\
-static void x264_predict_8x8c_p_##name( pixel *src )\
+static void predict_8x8c_p_##name( pixel *src )\
 {\
     PREDICT_8x8C_P_CORE\
     PREDICT_8x8C_P_END(name2)\
@@ -257,7 +257,7 @@
     H += -4 * src[-1*FDEC_STRIDE -1];
 
 #define PREDICT_8x8C_P_INLINE(name, name2)\
-static void x264_predict_8x8c_p_##name( pixel *src )\
+static void predict_8x8c_p_##name( pixel *src )\
 {\
     PREDICT_8x8C_P_CORE_INLINE\
     PREDICT_8x8C_P_END(name2)\
@@ -281,7 +281,7 @@
 PREDICT_8x8C_P_INLINE( avx2, avx2 )
 
 #if ARCH_X86_64 && !HIGH_BIT_DEPTH
-static void x264_predict_8x8c_dc_left( uint8_t *src )
+static void predict_8x8c_dc_left( uint8_t *src )
 {
     int y;
     uint32_t s0 = 0, s1 = 0;
@@ -327,7 +327,7 @@
     pf[I_PRED_16x16_DC_TOP]  = x264_predict_16x16_dc_top_sse2;
     pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_sse2;
     pf[I_PRED_16x16_H]       = x264_predict_16x16_h_sse2;
-    pf[I_PRED_16x16_P]       = x264_predict_16x16_p_sse2;
+    pf[I_PRED_16x16_P]       = predict_16x16_p_sse2;
     if( !(cpu&X264_CPU_AVX) )
         return;
     pf[I_PRED_16x16_V]       = x264_predict_16x16_v_avx;
@@ -336,7 +336,7 @@
     pf[I_PRED_16x16_H]       = x264_predict_16x16_h_avx2;
 #else
 #if !ARCH_X86_64
-    pf[I_PRED_16x16_P]       = x264_predict_16x16_p_mmx2;
+    pf[I_PRED_16x16_P]       = predict_16x16_p_mmx2;
 #endif
     if( !(cpu&X264_CPU_SSE) )
         return;
@@ -348,22 +348,22 @@
         return;
     pf[I_PRED_16x16_DC_TOP]  = x264_predict_16x16_dc_top_sse2;
     pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_sse2;
-    pf[I_PRED_16x16_P]       = x264_predict_16x16_p_sse2;
+    pf[I_PRED_16x16_P]       = predict_16x16_p_sse2;
     if( !(cpu&X264_CPU_SSSE3) )
         return;
     if( !(cpu&X264_CPU_SLOW_PSHUFB) )
         pf[I_PRED_16x16_H]       = x264_predict_16x16_h_ssse3;
 #if HAVE_X86_INLINE_ASM
-    pf[I_PRED_16x16_P]       = x264_predict_16x16_p_ssse3;
+    pf[I_PRED_16x16_P]       = predict_16x16_p_ssse3;
 #endif
     if( !(cpu&X264_CPU_AVX) )
         return;
-    pf[I_PRED_16x16_P]       = x264_predict_16x16_p_avx;
+    pf[I_PRED_16x16_P]       = predict_16x16_p_avx;
 #endif // HIGH_BIT_DEPTH
 
     if( cpu&X264_CPU_AVX2 )
     {
-        pf[I_PRED_16x16_P]       = x264_predict_16x16_p_avx2;
+        pf[I_PRED_16x16_P]       = predict_16x16_p_avx2;
         pf[I_PRED_16x16_DC]      = x264_predict_16x16_dc_avx2;
         pf[I_PRED_16x16_DC_TOP]  = x264_predict_16x16_dc_top_avx2;
         pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_avx2;
@@ -388,16 +388,16 @@
     pf[I_PRED_CHROMA_DC]      = x264_predict_8x8c_dc_sse2;
     pf[I_PRED_CHROMA_DC_TOP]  = x264_predict_8x8c_dc_top_sse2;
     pf[I_PRED_CHROMA_H]       = x264_predict_8x8c_h_sse2;
-    pf[I_PRED_CHROMA_P]       = x264_predict_8x8c_p_sse2;
+    pf[I_PRED_CHROMA_P]       = predict_8x8c_p_sse2;
     if( !(cpu&X264_CPU_AVX) )
         return;
-    pf[I_PRED_CHROMA_P]       = x264_predict_8x8c_p_avx;
+    pf[I_PRED_CHROMA_P]       = predict_8x8c_p_avx;
     if( !(cpu&X264_CPU_AVX2) )
         return;
     pf[I_PRED_CHROMA_H]   = x264_predict_8x8c_h_avx2;
 #else
 #if ARCH_X86_64
-    pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left;
+    pf[I_PRED_CHROMA_DC_LEFT] = predict_8x8c_dc_left;
 #endif
     pf[I_PRED_CHROMA_V]       = x264_predict_8x8c_v_mmx;
     if( !(cpu&X264_CPU_MMX2) )
@@ -405,26 +405,26 @@
     pf[I_PRED_CHROMA_DC_TOP]  = x264_predict_8x8c_dc_top_mmx2;
     pf[I_PRED_CHROMA_H]       = x264_predict_8x8c_h_mmx2;
 #if !ARCH_X86_64
-    pf[I_PRED_CHROMA_P]       = x264_predict_8x8c_p_mmx2;
+    pf[I_PRED_CHROMA_P]       = predict_8x8c_p_mmx2;
 #endif
     pf[I_PRED_CHROMA_DC]      = x264_predict_8x8c_dc_mmx2;
     if( !(cpu&X264_CPU_SSE2) )
         return;
-    pf[I_PRED_CHROMA_P]       = x264_predict_8x8c_p_sse2;
+    pf[I_PRED_CHROMA_P]       = predict_8x8c_p_sse2;
     if( !(cpu&X264_CPU_SSSE3) )
         return;
     pf[I_PRED_CHROMA_H]       = x264_predict_8x8c_h_ssse3;
 #if HAVE_X86_INLINE_ASM
-    pf[I_PRED_CHROMA_P]       = x264_predict_8x8c_p_ssse3;
+    pf[I_PRED_CHROMA_P]       = predict_8x8c_p_ssse3;
 #endif
     if( !(cpu&X264_CPU_AVX) )
         return;
-    pf[I_PRED_CHROMA_P]       = x264_predict_8x8c_p_avx;
+    pf[I_PRED_CHROMA_P]       = predict_8x8c_p_avx;
 #endif // HIGH_BIT_DEPTH
 
     if( cpu&X264_CPU_AVX2 )
     {
-        pf[I_PRED_CHROMA_P]   = x264_predict_8x8c_p_avx2;
+        pf[I_PRED_CHROMA_P]   = predict_8x8c_p_avx2;
     }
 }
 
@@ -445,10 +445,10 @@
     pf[I_PRED_CHROMA_DC_TOP]  = x264_predict_8x16c_dc_top_sse2;
     pf[I_PRED_CHROMA_DC]      = x264_predict_8x16c_dc_sse2;
     pf[I_PRED_CHROMA_H]       = x264_predict_8x16c_h_sse2;
-    pf[I_PRED_CHROMA_P]       = x264_predict_8x16c_p_sse2;
+    pf[I_PRED_CHROMA_P]       = predict_8x16c_p_sse2;
     if( !(cpu&X264_CPU_AVX) )
         return;
-    pf[I_PRED_CHROMA_P]       = x264_predict_8x16c_p_avx;
+    pf[I_PRED_CHROMA_P]       = predict_8x16c_p_avx;
     if( !(cpu&X264_CPU_AVX2) )
         return;
     pf[I_PRED_CHROMA_H]   = x264_predict_8x16c_h_avx2;
@@ -460,22 +460,22 @@
     pf[I_PRED_CHROMA_DC]      = x264_predict_8x16c_dc_mmx2;
     pf[I_PRED_CHROMA_H]       = x264_predict_8x16c_h_mmx2;
 #if !ARCH_X86_64
-    pf[I_PRED_CHROMA_P]       = x264_predict_8x16c_p_mmx2;
+    pf[I_PRED_CHROMA_P]       = predict_8x16c_p_mmx2;
 #endif
     if( !(cpu&X264_CPU_SSE2) )
         return;
-    pf[I_PRED_CHROMA_P]       = x264_predict_8x16c_p_sse2;
+    pf[I_PRED_CHROMA_P]       = predict_8x16c_p_sse2;
     if( !(cpu&X264_CPU_SSSE3) )
         return;
     pf[I_PRED_CHROMA_H]       = x264_predict_8x16c_h_ssse3;
     if( !(cpu&X264_CPU_AVX) )
         return;
-    pf[I_PRED_CHROMA_P]       = x264_predict_8x16c_p_avx;
+    pf[I_PRED_CHROMA_P]       = predict_8x16c_p_avx;
 #endif // HIGH_BIT_DEPTH
 
     if( cpu&X264_CPU_AVX2 )
     {
-        pf[I_PRED_CHROMA_P]   = x264_predict_8x16c_p_avx2;
+        pf[I_PRED_CHROMA_P]   = predict_8x16c_p_avx2;
     }
 }
 
diff -Nru x264-0.152.2854+gite9a5903/common/x86/predict.h x264-0.158.2988+git-20191101.7817004/common/x86/predict.h
--- x264-0.152.2854+gite9a5903/common/x86/predict.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/predict.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.h: x86 intra prediction
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -24,121 +24,233 @@
  * For more information, contact us at licensing@x264.com.
  *****************************************************************************/
 
-#ifndef X264_I386_PREDICT_H
-#define X264_I386_PREDICT_H
+#ifndef X264_X86_PREDICT_H
+#define X264_X86_PREDICT_H
 
+#define x264_predict_16x16_init_mmx x264_template(predict_16x16_init_mmx)
 void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] );
+#define x264_predict_8x16c_init_mmx x264_template(predict_8x16c_init_mmx)
 void x264_predict_8x16c_init_mmx( int cpu, x264_predict_t pf[7] );
+#define x264_predict_8x8c_init_mmx x264_template(predict_8x8c_init_mmx)
 void x264_predict_8x8c_init_mmx ( int cpu, x264_predict_t pf[7] );
+#define x264_predict_4x4_init_mmx x264_template(predict_4x4_init_mmx)
 void x264_predict_4x4_init_mmx  ( int cpu, x264_predict_t pf[12] );
+#define x264_predict_8x8_init_mmx x264_template(predict_8x8_init_mmx)
 void x264_predict_8x8_init_mmx  ( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter );
 
+#define x264_predict_16x16_v_mmx2 x264_template(predict_16x16_v_mmx2)
 void x264_predict_16x16_v_mmx2( pixel *src );
+#define x264_predict_16x16_v_sse x264_template(predict_16x16_v_sse)
 void x264_predict_16x16_v_sse ( pixel *src );
+#define x264_predict_16x16_v_avx x264_template(predict_16x16_v_avx)
 void x264_predict_16x16_v_avx ( uint16_t *src );
+#define x264_predict_16x16_h_mmx2 x264_template(predict_16x16_h_mmx2)
 void x264_predict_16x16_h_mmx2( pixel *src );
+#define x264_predict_16x16_h_sse2 x264_template(predict_16x16_h_sse2)
 void x264_predict_16x16_h_sse2( uint16_t *src );
+#define x264_predict_16x16_h_ssse3 x264_template(predict_16x16_h_ssse3)
 void x264_predict_16x16_h_ssse3( uint8_t *src );
+#define x264_predict_16x16_h_avx2 x264_template(predict_16x16_h_avx2)
 void x264_predict_16x16_h_avx2( uint16_t *src );
+#define x264_predict_16x16_dc_sse2 x264_template(predict_16x16_dc_sse2)
 void x264_predict_16x16_dc_sse2( pixel *src );
+#define x264_predict_16x16_dc_avx2 x264_template(predict_16x16_dc_avx2)
 void x264_predict_16x16_dc_avx2( pixel *src );
+#define x264_predict_16x16_dc_left_sse2 x264_template(predict_16x16_dc_left_sse2)
 void x264_predict_16x16_dc_left_sse2( pixel *src );
+#define x264_predict_16x16_dc_left_avx2 x264_template(predict_16x16_dc_left_avx2)
 void x264_predict_16x16_dc_left_avx2( pixel *src );
+#define x264_predict_16x16_dc_top_sse2 x264_template(predict_16x16_dc_top_sse2)
 void x264_predict_16x16_dc_top_sse2( pixel *src );
+#define x264_predict_16x16_dc_top_avx2 x264_template(predict_16x16_dc_top_avx2)
 void x264_predict_16x16_dc_top_avx2( pixel *src );
+#define x264_predict_16x16_p_core_mmx2 x264_template(predict_16x16_p_core_mmx2)
 void x264_predict_16x16_p_core_mmx2( uint8_t *src, int i00, int b, int c );
+#define x264_predict_16x16_p_core_sse2 x264_template(predict_16x16_p_core_sse2)
 void x264_predict_16x16_p_core_sse2( pixel *src, int i00, int b, int c );
+#define x264_predict_16x16_p_core_avx x264_template(predict_16x16_p_core_avx)
 void x264_predict_16x16_p_core_avx( pixel *src, int i00, int b, int c );
+#define x264_predict_16x16_p_core_avx2 x264_template(predict_16x16_p_core_avx2)
 void x264_predict_16x16_p_core_avx2( pixel *src, int i00, int b, int c );
+#define x264_predict_8x16c_dc_mmx2 x264_template(predict_8x16c_dc_mmx2)
 void x264_predict_8x16c_dc_mmx2( pixel *src );
+#define x264_predict_8x16c_dc_sse2 x264_template(predict_8x16c_dc_sse2)
 void x264_predict_8x16c_dc_sse2( uint16_t *src );
+#define x264_predict_8x16c_dc_top_mmx2 x264_template(predict_8x16c_dc_top_mmx2)
 void x264_predict_8x16c_dc_top_mmx2( uint8_t *src );
+#define x264_predict_8x16c_dc_top_sse2 x264_template(predict_8x16c_dc_top_sse2)
 void x264_predict_8x16c_dc_top_sse2( uint16_t *src );
+#define x264_predict_8x16c_v_mmx x264_template(predict_8x16c_v_mmx)
 void x264_predict_8x16c_v_mmx( uint8_t *src );
+#define x264_predict_8x16c_v_sse x264_template(predict_8x16c_v_sse)
 void x264_predict_8x16c_v_sse( uint16_t *src );
+#define x264_predict_8x16c_h_mmx2 x264_template(predict_8x16c_h_mmx2)
 void x264_predict_8x16c_h_mmx2( pixel *src );
+#define x264_predict_8x16c_h_sse2 x264_template(predict_8x16c_h_sse2)
 void x264_predict_8x16c_h_sse2( uint16_t *src );
+#define x264_predict_8x16c_h_ssse3 x264_template(predict_8x16c_h_ssse3)
 void x264_predict_8x16c_h_ssse3( uint8_t *src );
+#define x264_predict_8x16c_h_avx2 x264_template(predict_8x16c_h_avx2)
 void x264_predict_8x16c_h_avx2( uint16_t *src );
+#define x264_predict_8x16c_p_core_mmx2 x264_template(predict_8x16c_p_core_mmx2)
 void x264_predict_8x16c_p_core_mmx2( uint8_t *src, int i00, int b, int c );
+#define x264_predict_8x16c_p_core_sse2 x264_template(predict_8x16c_p_core_sse2)
 void x264_predict_8x16c_p_core_sse2( pixel *src, int i00, int b, int c );
+#define x264_predict_8x16c_p_core_avx x264_template(predict_8x16c_p_core_avx)
 void x264_predict_8x16c_p_core_avx ( pixel *src, int i00, int b, int c );
+#define x264_predict_8x16c_p_core_avx2 x264_template(predict_8x16c_p_core_avx2)
 void x264_predict_8x16c_p_core_avx2( pixel *src, int i00, int b, int c );
+#define x264_predict_8x8c_p_core_mmx2 x264_template(predict_8x8c_p_core_mmx2)
 void x264_predict_8x8c_p_core_mmx2( uint8_t *src, int i00, int b, int c );
+#define x264_predict_8x8c_p_core_sse2 x264_template(predict_8x8c_p_core_sse2)
 void x264_predict_8x8c_p_core_sse2( pixel *src, int i00, int b, int c );
+#define x264_predict_8x8c_p_core_avx x264_template(predict_8x8c_p_core_avx)
 void x264_predict_8x8c_p_core_avx ( pixel *src, int i00, int b, int c );
+#define x264_predict_8x8c_p_core_avx2 x264_template(predict_8x8c_p_core_avx2)
 void x264_predict_8x8c_p_core_avx2( pixel *src, int i00, int b, int c );
+#define x264_predict_8x8c_dc_mmx2 x264_template(predict_8x8c_dc_mmx2)
 void x264_predict_8x8c_dc_mmx2( pixel *src );
+#define x264_predict_8x8c_dc_sse2 x264_template(predict_8x8c_dc_sse2)
 void x264_predict_8x8c_dc_sse2( uint16_t *src );
+#define x264_predict_8x8c_dc_top_mmx2 x264_template(predict_8x8c_dc_top_mmx2)
 void x264_predict_8x8c_dc_top_mmx2( uint8_t *src );
+#define x264_predict_8x8c_dc_top_sse2 x264_template(predict_8x8c_dc_top_sse2)
 void x264_predict_8x8c_dc_top_sse2( uint16_t *src );
+#define x264_predict_8x8c_v_mmx x264_template(predict_8x8c_v_mmx)
 void x264_predict_8x8c_v_mmx( pixel *src );
+#define x264_predict_8x8c_v_sse x264_template(predict_8x8c_v_sse)
 void x264_predict_8x8c_v_sse( uint16_t *src );
+#define x264_predict_8x8c_h_mmx2 x264_template(predict_8x8c_h_mmx2)
 void x264_predict_8x8c_h_mmx2( pixel *src );
+#define x264_predict_8x8c_h_sse2 x264_template(predict_8x8c_h_sse2)
 void x264_predict_8x8c_h_sse2( uint16_t *src );
+#define x264_predict_8x8c_h_ssse3 x264_template(predict_8x8c_h_ssse3)
 void x264_predict_8x8c_h_ssse3( uint8_t *src );
+#define x264_predict_8x8c_h_avx2 x264_template(predict_8x8c_h_avx2)
 void x264_predict_8x8c_h_avx2( uint16_t *src );
+#define x264_predict_8x8_v_mmx2 x264_template(predict_8x8_v_mmx2)
 void x264_predict_8x8_v_mmx2( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_v_sse x264_template(predict_8x8_v_sse)
 void x264_predict_8x8_v_sse ( uint16_t *src, uint16_t edge[36] );
+#define x264_predict_8x8_h_mmx2 x264_template(predict_8x8_h_mmx2)
 void x264_predict_8x8_h_mmx2( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_h_sse2 x264_template(predict_8x8_h_sse2)
 void x264_predict_8x8_h_sse2( uint16_t *src, uint16_t edge[36] );
+#define x264_predict_8x8_hd_mmx2 x264_template(predict_8x8_hd_mmx2)
 void x264_predict_8x8_hd_mmx2( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_hu_mmx2 x264_template(predict_8x8_hu_mmx2)
 void x264_predict_8x8_hu_mmx2( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_dc_mmx2 x264_template(predict_8x8_dc_mmx2)
 void x264_predict_8x8_dc_mmx2( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_dc_sse2 x264_template(predict_8x8_dc_sse2)
 void x264_predict_8x8_dc_sse2( uint16_t *src, uint16_t edge[36] );
+#define x264_predict_8x8_dc_top_mmx2 x264_template(predict_8x8_dc_top_mmx2)
 void x264_predict_8x8_dc_top_mmx2( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_dc_top_sse2 x264_template(predict_8x8_dc_top_sse2)
 void x264_predict_8x8_dc_top_sse2( uint16_t *src, uint16_t edge[36] );
+#define x264_predict_8x8_dc_left_mmx2 x264_template(predict_8x8_dc_left_mmx2)
 void x264_predict_8x8_dc_left_mmx2( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_dc_left_sse2 x264_template(predict_8x8_dc_left_sse2)
 void x264_predict_8x8_dc_left_sse2( uint16_t *src, uint16_t edge[36] );
+#define x264_predict_8x8_ddl_mmx2 x264_template(predict_8x8_ddl_mmx2)
 void x264_predict_8x8_ddl_mmx2( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_ddl_sse2 x264_template(predict_8x8_ddl_sse2)
 void x264_predict_8x8_ddl_sse2( pixel *src, pixel edge[36] );
+#define x264_predict_8x8_ddl_ssse3 x264_template(predict_8x8_ddl_ssse3)
 void x264_predict_8x8_ddl_ssse3( pixel *src, pixel edge[36] );
+#define x264_predict_8x8_ddl_cache64_ssse3 x264_template(predict_8x8_ddl_cache64_ssse3)
 void x264_predict_8x8_ddl_cache64_ssse3( pixel *src, pixel edge[36] );
+#define x264_predict_8x8_ddl_avx x264_template(predict_8x8_ddl_avx)
 void x264_predict_8x8_ddl_avx( pixel *src, pixel edge[36] );
+#define x264_predict_8x8_ddr_mmx2 x264_template(predict_8x8_ddr_mmx2)
 void x264_predict_8x8_ddr_mmx2( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_ddr_sse2 x264_template(predict_8x8_ddr_sse2)
 void x264_predict_8x8_ddr_sse2( pixel *src, pixel edge[36] );
+#define x264_predict_8x8_ddr_ssse3 x264_template(predict_8x8_ddr_ssse3)
 void x264_predict_8x8_ddr_ssse3( pixel *src, pixel edge[36] );
+#define x264_predict_8x8_ddr_cache64_ssse3 x264_template(predict_8x8_ddr_cache64_ssse3)
 void x264_predict_8x8_ddr_cache64_ssse3( pixel *src, pixel edge[36] );
+#define x264_predict_8x8_ddr_avx x264_template(predict_8x8_ddr_avx)
 void x264_predict_8x8_ddr_avx( pixel *src, pixel edge[36] );
+#define x264_predict_8x8_vl_sse2 x264_template(predict_8x8_vl_sse2)
 void x264_predict_8x8_vl_sse2( pixel *src, pixel edge[36] );
+#define x264_predict_8x8_vl_ssse3 x264_template(predict_8x8_vl_ssse3)
 void x264_predict_8x8_vl_ssse3( pixel *src, pixel edge[36] );
+#define x264_predict_8x8_vl_avx x264_template(predict_8x8_vl_avx)
 void x264_predict_8x8_vl_avx( pixel *src, pixel edge[36] );
+#define x264_predict_8x8_vl_mmx2 x264_template(predict_8x8_vl_mmx2)
 void x264_predict_8x8_vl_mmx2( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_vr_mmx2 x264_template(predict_8x8_vr_mmx2)
 void x264_predict_8x8_vr_mmx2( uint8_t *src, uint8_t edge[36] );
+#define x264_predict_8x8_vr_sse2 x264_template(predict_8x8_vr_sse2)
 void x264_predict_8x8_vr_sse2( pixel *src, pixel edge[36] );
+#define x264_predict_8x8_vr_ssse3 x264_template(predict_8x8_vr_ssse3)
 void x264_predict_8x8_vr_ssse3( pixel *src, pixel edge[36] );
+#define x264_predict_8x8_vr_avx x264_template(predict_8x8_vr_avx)
 void x264_predict_8x8_vr_avx( pixel *src, pixel edge[36] );
+#define x264_predict_8x8_hu_sse2 x264_template(predict_8x8_hu_sse2)
 void x264_predict_8x8_hu_sse2( pixel *src, pixel edge[36] );
+#define x264_predict_8x8_hu_ssse3 x264_template(predict_8x8_hu_ssse3)
 void x264_predict_8x8_hu_ssse3( pixel *src, pixel edge[36] );
+#define x264_predict_8x8_hu_avx x264_template(predict_8x8_hu_avx)
 void x264_predict_8x8_hu_avx( pixel *src, pixel edge[36] );
+#define x264_predict_8x8_hd_sse2 x264_template(predict_8x8_hd_sse2)
 void x264_predict_8x8_hd_sse2( pixel *src, pixel edge[36] );
+#define x264_predict_8x8_hd_ssse3 x264_template(predict_8x8_hd_ssse3)
 void x264_predict_8x8_hd_ssse3( pixel *src, pixel edge[36] );
+#define x264_predict_8x8_hd_avx x264_template(predict_8x8_hd_avx)
 void x264_predict_8x8_hd_avx( pixel *src, pixel edge[36] );
+#define x264_predict_8x8_filter_mmx2 x264_template(predict_8x8_filter_mmx2)
 void x264_predict_8x8_filter_mmx2( uint8_t *src, uint8_t edge[36], int i_neighbor, int i_filters );
+#define x264_predict_8x8_filter_sse2 x264_template(predict_8x8_filter_sse2)
 void x264_predict_8x8_filter_sse2( uint16_t *src, uint16_t edge[36], int i_neighbor, int i_filters );
+#define x264_predict_8x8_filter_ssse3 x264_template(predict_8x8_filter_ssse3)
 void x264_predict_8x8_filter_ssse3( pixel *src, pixel edge[36], int i_neighbor, int i_filters );
+#define x264_predict_8x8_filter_avx x264_template(predict_8x8_filter_avx)
 void x264_predict_8x8_filter_avx( uint16_t *src, uint16_t edge[36], int i_neighbor, int i_filters );
+#define x264_predict_4x4_h_avx2 x264_template(predict_4x4_h_avx2)
 void x264_predict_4x4_h_avx2( uint16_t *src );
+#define x264_predict_4x4_ddl_mmx2 x264_template(predict_4x4_ddl_mmx2)
 void x264_predict_4x4_ddl_mmx2( pixel *src );
+#define x264_predict_4x4_ddl_sse2 x264_template(predict_4x4_ddl_sse2)
 void x264_predict_4x4_ddl_sse2( uint16_t *src );
+#define x264_predict_4x4_ddl_avx x264_template(predict_4x4_ddl_avx)
 void x264_predict_4x4_ddl_avx( uint16_t *src );
+#define x264_predict_4x4_ddr_mmx2 x264_template(predict_4x4_ddr_mmx2)
 void x264_predict_4x4_ddr_mmx2( pixel *src );
+#define x264_predict_4x4_vl_mmx2 x264_template(predict_4x4_vl_mmx2)
 void x264_predict_4x4_vl_mmx2( pixel *src );
+#define x264_predict_4x4_vl_sse2 x264_template(predict_4x4_vl_sse2)
 void x264_predict_4x4_vl_sse2( uint16_t *src );
+#define x264_predict_4x4_vl_avx x264_template(predict_4x4_vl_avx)
 void x264_predict_4x4_vl_avx( uint16_t *src );
+#define x264_predict_4x4_vr_mmx2 x264_template(predict_4x4_vr_mmx2)
 void x264_predict_4x4_vr_mmx2( uint8_t *src );
+#define x264_predict_4x4_vr_sse2 x264_template(predict_4x4_vr_sse2)
 void x264_predict_4x4_vr_sse2( uint16_t *src );
+#define x264_predict_4x4_vr_ssse3 x264_template(predict_4x4_vr_ssse3)
 void x264_predict_4x4_vr_ssse3( pixel *src );
+#define x264_predict_4x4_vr_cache64_ssse3 x264_template(predict_4x4_vr_cache64_ssse3)
 void x264_predict_4x4_vr_cache64_ssse3( uint8_t *src );
+#define x264_predict_4x4_vr_avx x264_template(predict_4x4_vr_avx)
 void x264_predict_4x4_vr_avx( uint16_t *src );
+#define x264_predict_4x4_hd_mmx2 x264_template(predict_4x4_hd_mmx2)
 void x264_predict_4x4_hd_mmx2( pixel *src );
+#define x264_predict_4x4_hd_sse2 x264_template(predict_4x4_hd_sse2)
 void x264_predict_4x4_hd_sse2( uint16_t *src );
+#define x264_predict_4x4_hd_ssse3 x264_template(predict_4x4_hd_ssse3)
 void x264_predict_4x4_hd_ssse3( pixel *src );
+#define x264_predict_4x4_hd_avx x264_template(predict_4x4_hd_avx)
 void x264_predict_4x4_hd_avx( uint16_t *src );
+#define x264_predict_4x4_dc_mmx2 x264_template(predict_4x4_dc_mmx2)
 void x264_predict_4x4_dc_mmx2( pixel *src );
+#define x264_predict_4x4_ddr_sse2 x264_template(predict_4x4_ddr_sse2)
 void x264_predict_4x4_ddr_sse2( uint16_t *src );
+#define x264_predict_4x4_ddr_ssse3 x264_template(predict_4x4_ddr_ssse3)
 void x264_predict_4x4_ddr_ssse3( pixel *src );
+#define x264_predict_4x4_ddr_avx x264_template(predict_4x4_ddr_avx)
 void x264_predict_4x4_ddr_avx( uint16_t *src );
+#define x264_predict_4x4_hu_mmx2 x264_template(predict_4x4_hu_mmx2)
 void x264_predict_4x4_hu_mmx2( pixel *src );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/x86/quant-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/quant-a.asm
--- x264-0.152.2854+gite9a5903/common/x86/quant-a.asm	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/quant-a.asm	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* quant-a.asm: x86 quantization and level-run
 ;*****************************************************************************
-;* Copyright (C) 2005-2017 x264 project
+;* Copyright (C) 2005-2019 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Fiona Glaser <fiona@x264.com>
@@ -673,7 +673,7 @@
     sub  t2d, t0d
     sub  t2d, t1d   ; i_mf = i_qp % 6
     shl  t2d, %2
-%ifdef PIC
+%if ARCH_X86_64
     lea  r1, [dequant%1_scale]
     add  r1, t2
 %else
@@ -761,7 +761,7 @@
     sub  t2d, t1d   ; i_mf = i_qp % 6
     shl  t2d, %1
 %if %2
-%ifdef PIC
+%if ARCH_X86_64
 %define dmf r1+t2
     lea   r1, [dequant8_scale]
 %else
@@ -1421,8 +1421,8 @@
 %endif
 %endmacro
 
-cextern decimate_table4
-cextern decimate_table8
+cextern_common decimate_table4
+cextern_common decimate_table8
 
 %macro DECIMATE4x4 1
 
@@ -1449,7 +1449,7 @@
     shr   edx, 1
 %endif
 %endif
-%ifdef PIC
+%if ARCH_X86_64
     lea    r4, [decimate_mask_table4]
     %define mask_table r4
 %else
@@ -1580,16 +1580,11 @@
     add   eax, r3d
     jnz .ret9
 %endif
-%ifdef PIC
-    lea r4, [decimate_table8]
-    %define table r4
-%else
-    %define table decimate_table8
-%endif
+    lea    r4, [decimate_table8]
     mov    al, -6
 .loop:
     tzcnt rcx, r1
-    add    al, byte [table + rcx]
+    add    al, byte [r4 + rcx]
     jge .ret9
     shr    r1, 1
     SHRX   r1, rcx
@@ -2165,7 +2160,7 @@
 
 %macro COEFF_LEVELRUN_LUT 1
 cglobal coeff_level_run%1,2,4+(%1/9)
-%ifdef PIC
+%if ARCH_X86_64
     lea       r5, [$$]
     %define GLOBAL +r5-$$
 %else
diff -Nru x264-0.152.2854+gite9a5903/common/x86/quant.h x264-0.158.2988+git-20191101.7817004/common/x86/quant.h
--- x264-0.152.2854+gite9a5903/common/x86/quant.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/quant.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * quant.h: x86 quantization and level-run
  *****************************************************************************
- * Copyright (C) 2005-2017 x264 project
+ * Copyright (C) 2005-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Fiona Glaser <fiona@x264.com>
@@ -25,132 +25,254 @@
  * For more information, contact us at licensing@x264.com.
  *****************************************************************************/
 
-#ifndef X264_I386_QUANT_H
-#define X264_I386_QUANT_H
+#ifndef X264_X86_QUANT_H
+#define X264_X86_QUANT_H
 
+#define x264_quant_2x2_dc_mmx2 x264_template(quant_2x2_dc_mmx2)
 int x264_quant_2x2_dc_mmx2( dctcoef dct[4], int mf, int bias );
+#define x264_quant_4x4_dc_mmx2 x264_template(quant_4x4_dc_mmx2)
 int x264_quant_4x4_dc_mmx2( dctcoef dct[16], int mf, int bias );
+#define x264_quant_4x4_mmx2 x264_template(quant_4x4_mmx2)
 int x264_quant_4x4_mmx2( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] );
+#define x264_quant_8x8_mmx2 x264_template(quant_8x8_mmx2)
 int x264_quant_8x8_mmx2( dctcoef dct[64], udctcoef mf[64], udctcoef bias[64] );
+#define x264_quant_2x2_dc_sse2 x264_template(quant_2x2_dc_sse2)
 int x264_quant_2x2_dc_sse2( dctcoef dct[16], int mf, int bias );
+#define x264_quant_4x4_dc_sse2 x264_template(quant_4x4_dc_sse2)
 int x264_quant_4x4_dc_sse2( dctcoef dct[16], int mf, int bias );
+#define x264_quant_4x4_sse2 x264_template(quant_4x4_sse2)
 int x264_quant_4x4_sse2( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] );
+#define x264_quant_4x4x4_sse2 x264_template(quant_4x4x4_sse2)
 int x264_quant_4x4x4_sse2( dctcoef dct[4][16], udctcoef mf[16], udctcoef bias[16] );
+#define x264_quant_8x8_sse2 x264_template(quant_8x8_sse2)
 int x264_quant_8x8_sse2( dctcoef dct[64], udctcoef mf[64], udctcoef bias[64] );
+#define x264_quant_2x2_dc_ssse3 x264_template(quant_2x2_dc_ssse3)
 int x264_quant_2x2_dc_ssse3( dctcoef dct[4], int mf, int bias );
+#define x264_quant_4x4_dc_ssse3 x264_template(quant_4x4_dc_ssse3)
 int x264_quant_4x4_dc_ssse3( dctcoef dct[16], int mf, int bias );
+#define x264_quant_4x4_ssse3 x264_template(quant_4x4_ssse3)
 int x264_quant_4x4_ssse3( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] );
+#define x264_quant_4x4x4_ssse3 x264_template(quant_4x4x4_ssse3)
 int x264_quant_4x4x4_ssse3( dctcoef dct[4][16], udctcoef mf[16], udctcoef bias[16] );
+#define x264_quant_8x8_ssse3 x264_template(quant_8x8_ssse3)
 int x264_quant_8x8_ssse3( dctcoef dct[64], udctcoef mf[64], udctcoef bias[64] );
+#define x264_quant_2x2_dc_sse4 x264_template(quant_2x2_dc_sse4)
 int x264_quant_2x2_dc_sse4( dctcoef dct[16], int mf, int bias );
+#define x264_quant_4x4_dc_sse4 x264_template(quant_4x4_dc_sse4)
 int x264_quant_4x4_dc_sse4( dctcoef dct[16], int mf, int bias );
+#define x264_quant_4x4_sse4 x264_template(quant_4x4_sse4)
 int x264_quant_4x4_sse4( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] );
+#define x264_quant_4x4x4_sse4 x264_template(quant_4x4x4_sse4)
 int x264_quant_4x4x4_sse4( dctcoef dct[4][16], udctcoef mf[16], udctcoef bias[16] );
+#define x264_quant_8x8_sse4 x264_template(quant_8x8_sse4)
 int x264_quant_8x8_sse4( dctcoef dct[64], udctcoef mf[64], udctcoef bias[64] );
+#define x264_quant_4x4_avx2 x264_template(quant_4x4_avx2)
 int x264_quant_4x4_avx2( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] );
+#define x264_quant_4x4_dc_avx2 x264_template(quant_4x4_dc_avx2)
 int x264_quant_4x4_dc_avx2( dctcoef dct[16], int mf, int bias );
+#define x264_quant_8x8_avx2 x264_template(quant_8x8_avx2)
 int x264_quant_8x8_avx2( dctcoef dct[64], udctcoef mf[64], udctcoef bias[64] );
+#define x264_quant_4x4x4_avx2 x264_template(quant_4x4x4_avx2)
 int x264_quant_4x4x4_avx2( dctcoef dct[4][16], udctcoef mf[16], udctcoef bias[16] );
+#define x264_dequant_4x4_mmx x264_template(dequant_4x4_mmx)
 void x264_dequant_4x4_mmx( int16_t dct[16], int dequant_mf[6][16], int i_qp );
+#define x264_dequant_4x4dc_mmx2 x264_template(dequant_4x4dc_mmx2)
 void x264_dequant_4x4dc_mmx2( int16_t dct[16], int dequant_mf[6][16], int i_qp );
+#define x264_dequant_8x8_mmx x264_template(dequant_8x8_mmx)
 void x264_dequant_8x8_mmx( int16_t dct[64], int dequant_mf[6][64], int i_qp );
+#define x264_dequant_4x4_sse2 x264_template(dequant_4x4_sse2)
 void x264_dequant_4x4_sse2( dctcoef dct[16], int dequant_mf[6][16], int i_qp );
+#define x264_dequant_4x4dc_sse2 x264_template(dequant_4x4dc_sse2)
 void x264_dequant_4x4dc_sse2( dctcoef dct[16], int dequant_mf[6][16], int i_qp );
+#define x264_dequant_8x8_sse2 x264_template(dequant_8x8_sse2)
 void x264_dequant_8x8_sse2( dctcoef dct[64], int dequant_mf[6][64], int i_qp );
+#define x264_dequant_4x4_avx x264_template(dequant_4x4_avx)
 void x264_dequant_4x4_avx( dctcoef dct[16], int dequant_mf[6][16], int i_qp );
+#define x264_dequant_4x4dc_avx x264_template(dequant_4x4dc_avx)
 void x264_dequant_4x4dc_avx( dctcoef dct[16], int dequant_mf[6][16], int i_qp );
+#define x264_dequant_8x8_avx x264_template(dequant_8x8_avx)
 void x264_dequant_8x8_avx( dctcoef dct[64], int dequant_mf[6][64], int i_qp );
+#define x264_dequant_4x4_xop x264_template(dequant_4x4_xop)
 void x264_dequant_4x4_xop( dctcoef dct[16], int dequant_mf[6][16], int i_qp );
+#define x264_dequant_4x4dc_xop x264_template(dequant_4x4dc_xop)
 void x264_dequant_4x4dc_xop( dctcoef dct[16], int dequant_mf[6][16], int i_qp );
+#define x264_dequant_8x8_xop x264_template(dequant_8x8_xop)
 void x264_dequant_8x8_xop( dctcoef dct[64], int dequant_mf[6][64], int i_qp );
+#define x264_dequant_4x4_avx2 x264_template(dequant_4x4_avx2)
 void x264_dequant_4x4_avx2( dctcoef dct[16], int dequant_mf[6][16], int i_qp );
+#define x264_dequant_4x4dc_avx2 x264_template(dequant_4x4dc_avx2)
 void x264_dequant_4x4dc_avx2( dctcoef dct[16], int dequant_mf[6][16], int i_qp );
+#define x264_dequant_8x8_avx2 x264_template(dequant_8x8_avx2)
 void x264_dequant_8x8_avx2( dctcoef dct[64], int dequant_mf[6][64], int i_qp );
+#define x264_dequant_4x4_avx512 x264_template(dequant_4x4_avx512)
 void x264_dequant_4x4_avx512( dctcoef dct[16], int dequant_mf[6][16], int i_qp );
+#define x264_dequant_8x8_avx512 x264_template(dequant_8x8_avx512)
 void x264_dequant_8x8_avx512( dctcoef dct[64], int dequant_mf[6][64], int i_qp );
+#define x264_dequant_4x4_flat16_mmx x264_template(dequant_4x4_flat16_mmx)
 void x264_dequant_4x4_flat16_mmx( int16_t dct[16], int dequant_mf[6][16], int i_qp );
+#define x264_dequant_8x8_flat16_mmx x264_template(dequant_8x8_flat16_mmx)
 void x264_dequant_8x8_flat16_mmx( int16_t dct[64], int dequant_mf[6][64], int i_qp );
+#define x264_dequant_4x4_flat16_sse2 x264_template(dequant_4x4_flat16_sse2)
 void x264_dequant_4x4_flat16_sse2( int16_t dct[16], int dequant_mf[6][16], int i_qp );
+#define x264_dequant_8x8_flat16_sse2 x264_template(dequant_8x8_flat16_sse2)
 void x264_dequant_8x8_flat16_sse2( int16_t dct[64], int dequant_mf[6][64], int i_qp );
+#define x264_dequant_4x4_flat16_avx2 x264_template(dequant_4x4_flat16_avx2)
 void x264_dequant_4x4_flat16_avx2( int16_t dct[16], int dequant_mf[6][16], int i_qp );
+#define x264_dequant_8x8_flat16_avx2 x264_template(dequant_8x8_flat16_avx2)
 void x264_dequant_8x8_flat16_avx2( int16_t dct[64], int dequant_mf[6][64], int i_qp );
+#define x264_dequant_8x8_flat16_avx512 x264_template(dequant_8x8_flat16_avx512)
 void x264_dequant_8x8_flat16_avx512( int16_t dct[64], int dequant_mf[6][64], int i_qp );
+#define x264_idct_dequant_2x4_dc_sse2 x264_template(idct_dequant_2x4_dc_sse2)
 void x264_idct_dequant_2x4_dc_sse2( dctcoef dct[8], dctcoef dct4x4[8][16], int dequant_mf[6][16], int i_qp );
+#define x264_idct_dequant_2x4_dc_avx x264_template(idct_dequant_2x4_dc_avx)
 void x264_idct_dequant_2x4_dc_avx ( dctcoef dct[8], dctcoef dct4x4[8][16], int dequant_mf[6][16], int i_qp );
+#define x264_idct_dequant_2x4_dconly_sse2 x264_template(idct_dequant_2x4_dconly_sse2)
 void x264_idct_dequant_2x4_dconly_sse2( dctcoef dct[8], int dequant_mf[6][16], int i_qp );
+#define x264_idct_dequant_2x4_dconly_avx x264_template(idct_dequant_2x4_dconly_avx)
 void x264_idct_dequant_2x4_dconly_avx ( dctcoef dct[8], int dequant_mf[6][16], int i_qp );
+#define x264_optimize_chroma_2x2_dc_sse2 x264_template(optimize_chroma_2x2_dc_sse2)
 int x264_optimize_chroma_2x2_dc_sse2( dctcoef dct[4], int dequant_mf );
+#define x264_optimize_chroma_2x2_dc_ssse3 x264_template(optimize_chroma_2x2_dc_ssse3)
 int x264_optimize_chroma_2x2_dc_ssse3( dctcoef dct[4], int dequant_mf );
+#define x264_optimize_chroma_2x2_dc_sse4 x264_template(optimize_chroma_2x2_dc_sse4)
 int x264_optimize_chroma_2x2_dc_sse4( dctcoef dct[4], int dequant_mf );
+#define x264_optimize_chroma_2x2_dc_avx x264_template(optimize_chroma_2x2_dc_avx)
 int x264_optimize_chroma_2x2_dc_avx( dctcoef dct[4], int dequant_mf );
+#define x264_denoise_dct_mmx x264_template(denoise_dct_mmx)
 void x264_denoise_dct_mmx  ( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size );
+#define x264_denoise_dct_sse2 x264_template(denoise_dct_sse2)
 void x264_denoise_dct_sse2 ( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size );
+#define x264_denoise_dct_ssse3 x264_template(denoise_dct_ssse3)
 void x264_denoise_dct_ssse3( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size );
+#define x264_denoise_dct_avx x264_template(denoise_dct_avx)
 void x264_denoise_dct_avx  ( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size );
+#define x264_denoise_dct_avx2 x264_template(denoise_dct_avx2)
 void x264_denoise_dct_avx2 ( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size );
+#define x264_decimate_score15_sse2 x264_template(decimate_score15_sse2)
 int x264_decimate_score15_sse2( dctcoef *dct );
+#define x264_decimate_score15_ssse3 x264_template(decimate_score15_ssse3)
 int x264_decimate_score15_ssse3( dctcoef *dct );
+#define x264_decimate_score15_avx512 x264_template(decimate_score15_avx512)
 int x264_decimate_score15_avx512( dctcoef *dct );
+#define x264_decimate_score16_sse2 x264_template(decimate_score16_sse2)
 int x264_decimate_score16_sse2( dctcoef *dct );
+#define x264_decimate_score16_ssse3 x264_template(decimate_score16_ssse3)
 int x264_decimate_score16_ssse3( dctcoef *dct );
+#define x264_decimate_score16_avx512 x264_template(decimate_score16_avx512)
 int x264_decimate_score16_avx512( dctcoef *dct );
+#define x264_decimate_score64_sse2 x264_template(decimate_score64_sse2)
 int x264_decimate_score64_sse2( dctcoef *dct );
+#define x264_decimate_score64_ssse3 x264_template(decimate_score64_ssse3)
 int x264_decimate_score64_ssse3( dctcoef *dct );
+#define x264_decimate_score64_avx2 x264_template(decimate_score64_avx2)
 int x264_decimate_score64_avx2( int16_t *dct );
+#define x264_decimate_score64_avx512 x264_template(decimate_score64_avx512)
 int x264_decimate_score64_avx512( dctcoef *dct );
+#define x264_coeff_last4_mmx2 x264_template(coeff_last4_mmx2)
 int x264_coeff_last4_mmx2( dctcoef *dct );
+#define x264_coeff_last8_mmx2 x264_template(coeff_last8_mmx2)
 int x264_coeff_last8_mmx2( dctcoef *dct );
+#define x264_coeff_last15_mmx2 x264_template(coeff_last15_mmx2)
 int x264_coeff_last15_mmx2( dctcoef *dct );
+#define x264_coeff_last16_mmx2 x264_template(coeff_last16_mmx2)
 int x264_coeff_last16_mmx2( dctcoef *dct );
+#define x264_coeff_last64_mmx2 x264_template(coeff_last64_mmx2)
 int x264_coeff_last64_mmx2( dctcoef *dct );
+#define x264_coeff_last8_sse2 x264_template(coeff_last8_sse2)
 int x264_coeff_last8_sse2( dctcoef *dct );
+#define x264_coeff_last15_sse2 x264_template(coeff_last15_sse2)
 int x264_coeff_last15_sse2( dctcoef *dct );
+#define x264_coeff_last16_sse2 x264_template(coeff_last16_sse2)
 int x264_coeff_last16_sse2( dctcoef *dct );
+#define x264_coeff_last64_sse2 x264_template(coeff_last64_sse2)
 int x264_coeff_last64_sse2( dctcoef *dct );
+#define x264_coeff_last4_lzcnt x264_template(coeff_last4_lzcnt)
 int x264_coeff_last4_lzcnt( dctcoef *dct );
+#define x264_coeff_last8_lzcnt x264_template(coeff_last8_lzcnt)
 int x264_coeff_last8_lzcnt( dctcoef *dct );
+#define x264_coeff_last15_lzcnt x264_template(coeff_last15_lzcnt)
 int x264_coeff_last15_lzcnt( dctcoef *dct );
+#define x264_coeff_last16_lzcnt x264_template(coeff_last16_lzcnt)
 int x264_coeff_last16_lzcnt( dctcoef *dct );
+#define x264_coeff_last64_lzcnt x264_template(coeff_last64_lzcnt)
 int x264_coeff_last64_lzcnt( dctcoef *dct );
+#define x264_coeff_last64_avx2 x264_template(coeff_last64_avx2)
 int x264_coeff_last64_avx2 ( dctcoef *dct );
+#define x264_coeff_last4_avx512 x264_template(coeff_last4_avx512)
 int x264_coeff_last4_avx512( int32_t *dct );
+#define x264_coeff_last8_avx512 x264_template(coeff_last8_avx512)
 int x264_coeff_last8_avx512( dctcoef *dct );
+#define x264_coeff_last15_avx512 x264_template(coeff_last15_avx512)
 int x264_coeff_last15_avx512( dctcoef *dct );
+#define x264_coeff_last16_avx512 x264_template(coeff_last16_avx512)
 int x264_coeff_last16_avx512( dctcoef *dct );
+#define x264_coeff_last64_avx512 x264_template(coeff_last64_avx512)
 int x264_coeff_last64_avx512( dctcoef *dct );
+#define x264_coeff_level_run16_mmx2 x264_template(coeff_level_run16_mmx2)
 int x264_coeff_level_run16_mmx2( dctcoef *dct, x264_run_level_t *runlevel );
+#define x264_coeff_level_run16_sse2 x264_template(coeff_level_run16_sse2)
 int x264_coeff_level_run16_sse2( dctcoef *dct, x264_run_level_t *runlevel );
+#define x264_coeff_level_run16_lzcnt x264_template(coeff_level_run16_lzcnt)
 int x264_coeff_level_run16_lzcnt( dctcoef *dct, x264_run_level_t *runlevel );
+#define x264_coeff_level_run16_ssse3 x264_template(coeff_level_run16_ssse3)
 int x264_coeff_level_run16_ssse3( dctcoef *dct, x264_run_level_t *runlevel );
+#define x264_coeff_level_run16_ssse3_lzcnt x264_template(coeff_level_run16_ssse3_lzcnt)
 int x264_coeff_level_run16_ssse3_lzcnt( dctcoef *dct, x264_run_level_t *runlevel );
+#define x264_coeff_level_run16_avx2 x264_template(coeff_level_run16_avx2)
 int x264_coeff_level_run16_avx2( dctcoef *dct, x264_run_level_t *runlevel );
+#define x264_coeff_level_run15_mmx2 x264_template(coeff_level_run15_mmx2)
 int x264_coeff_level_run15_mmx2( dctcoef *dct, x264_run_level_t *runlevel );
+#define x264_coeff_level_run15_sse2 x264_template(coeff_level_run15_sse2)
 int x264_coeff_level_run15_sse2( dctcoef *dct, x264_run_level_t *runlevel );
+#define x264_coeff_level_run15_lzcnt x264_template(coeff_level_run15_lzcnt)
 int x264_coeff_level_run15_lzcnt( dctcoef *dct, x264_run_level_t *runlevel );
+#define x264_coeff_level_run15_ssse3 x264_template(coeff_level_run15_ssse3)
 int x264_coeff_level_run15_ssse3( dctcoef *dct, x264_run_level_t *runlevel );
+#define x264_coeff_level_run15_ssse3_lzcnt x264_template(coeff_level_run15_ssse3_lzcnt)
 int x264_coeff_level_run15_ssse3_lzcnt( dctcoef *dct, x264_run_level_t *runlevel );
+#define x264_coeff_level_run15_avx2 x264_template(coeff_level_run15_avx2)
 int x264_coeff_level_run15_avx2( dctcoef *dct, x264_run_level_t *runlevel );
+#define x264_coeff_level_run4_mmx2 x264_template(coeff_level_run4_mmx2)
 int x264_coeff_level_run4_mmx2( dctcoef *dct, x264_run_level_t *runlevel );
+#define x264_coeff_level_run4_lzcnt x264_template(coeff_level_run4_lzcnt)
 int x264_coeff_level_run4_lzcnt( dctcoef *dct, x264_run_level_t *runlevel );
+#define x264_coeff_level_run4_ssse3 x264_template(coeff_level_run4_ssse3)
 int x264_coeff_level_run4_ssse3( dctcoef *dct, x264_run_level_t *runlevel );
+#define x264_coeff_level_run4_ssse3_lzcnt x264_template(coeff_level_run4_ssse3_lzcnt)
 int x264_coeff_level_run4_ssse3_lzcnt( dctcoef *dct, x264_run_level_t *runlevel );
+#define x264_coeff_level_run8_mmx2 x264_template(coeff_level_run8_mmx2)
 int x264_coeff_level_run8_mmx2( dctcoef *dct, x264_run_level_t *runlevel );
+#define x264_coeff_level_run8_lzcnt x264_template(coeff_level_run8_lzcnt)
 int x264_coeff_level_run8_lzcnt( dctcoef *dct, x264_run_level_t *runlevel );
+#define x264_coeff_level_run8_sse2 x264_template(coeff_level_run8_sse2)
 int x264_coeff_level_run8_sse2( dctcoef *dct, x264_run_level_t *runlevel );
-int x264_coeff_level_run8_lzcnt( dctcoef *dct, x264_run_level_t *runlevel );
+#define x264_coeff_level_run8_ssse3 x264_template(coeff_level_run8_ssse3)
 int x264_coeff_level_run8_ssse3( dctcoef *dct, x264_run_level_t *runlevel );
+#define x264_coeff_level_run8_ssse3_lzcnt x264_template(coeff_level_run8_ssse3_lzcnt)
 int x264_coeff_level_run8_ssse3_lzcnt( dctcoef *dct, x264_run_level_t *runlevel );
+#define x264_trellis_cabac_4x4_sse2 x264_template(trellis_cabac_4x4_sse2)
 int x264_trellis_cabac_4x4_sse2 ( TRELLIS_PARAMS, int b_ac );
+#define x264_trellis_cabac_4x4_ssse3 x264_template(trellis_cabac_4x4_ssse3)
 int x264_trellis_cabac_4x4_ssse3( TRELLIS_PARAMS, int b_ac );
+#define x264_trellis_cabac_8x8_sse2 x264_template(trellis_cabac_8x8_sse2)
 int x264_trellis_cabac_8x8_sse2 ( TRELLIS_PARAMS, int b_interlaced );
+#define x264_trellis_cabac_8x8_ssse3 x264_template(trellis_cabac_8x8_ssse3)
 int x264_trellis_cabac_8x8_ssse3( TRELLIS_PARAMS, int b_interlaced );
+#define x264_trellis_cabac_4x4_psy_sse2 x264_template(trellis_cabac_4x4_psy_sse2)
 int x264_trellis_cabac_4x4_psy_sse2 ( TRELLIS_PARAMS, int b_ac, dctcoef *fenc_dct, int i_psy_trellis );
+#define x264_trellis_cabac_4x4_psy_ssse3 x264_template(trellis_cabac_4x4_psy_ssse3)
 int x264_trellis_cabac_4x4_psy_ssse3( TRELLIS_PARAMS, int b_ac, dctcoef *fenc_dct, int i_psy_trellis );
+#define x264_trellis_cabac_8x8_psy_sse2 x264_template(trellis_cabac_8x8_psy_sse2)
 int x264_trellis_cabac_8x8_psy_sse2 ( TRELLIS_PARAMS, int b_interlaced, dctcoef *fenc_dct, int i_psy_trellis );
+#define x264_trellis_cabac_8x8_psy_ssse3 x264_template(trellis_cabac_8x8_psy_ssse3)
 int x264_trellis_cabac_8x8_psy_ssse3( TRELLIS_PARAMS, int b_interlaced, dctcoef *fenc_dct, int i_psy_trellis );
+#define x264_trellis_cabac_dc_sse2 x264_template(trellis_cabac_dc_sse2)
 int x264_trellis_cabac_dc_sse2 ( TRELLIS_PARAMS, int i_coefs );
+#define x264_trellis_cabac_dc_ssse3 x264_template(trellis_cabac_dc_ssse3)
 int x264_trellis_cabac_dc_ssse3( TRELLIS_PARAMS, int i_coefs );
+#define x264_trellis_cabac_chroma_422_dc_sse2 x264_template(trellis_cabac_chroma_422_dc_sse2)
 int x264_trellis_cabac_chroma_422_dc_sse2 ( TRELLIS_PARAMS );
+#define x264_trellis_cabac_chroma_422_dc_ssse3 x264_template(trellis_cabac_chroma_422_dc_ssse3)
 int x264_trellis_cabac_chroma_422_dc_ssse3( TRELLIS_PARAMS );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/common/x86/sad16-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/sad16-a.asm
--- x264-0.152.2854+gite9a5903/common/x86/sad16-a.asm	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/sad16-a.asm	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* sad16-a.asm: x86 high depth sad functions
 ;*****************************************************************************
-;* Copyright (C) 2010-2017 x264 project
+;* Copyright (C) 2010-2019 x264 project
 ;*
 ;* Authors: Oskar Arvidsson <oskar@irock.se>
 ;*          Henrik Gramner <henrik@gramner.com>
diff -Nru x264-0.152.2854+gite9a5903/common/x86/sad-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/sad-a.asm
--- x264-0.152.2854+gite9a5903/common/x86/sad-a.asm	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/sad-a.asm	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* sad-a.asm: x86 sad functions
 ;*****************************************************************************
-;* Copyright (C) 2003-2017 x264 project
+;* Copyright (C) 2003-2019 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Fiona Glaser <fiona@x264.com>
@@ -1920,7 +1920,7 @@
     shl     r4d, 4  ; code size = 80
 %endif
 %define sad_w16_addr (sad_w16_align1_%1 + (sad_w16_align1_%1 - sad_w16_align2_%1))
-%ifdef PIC
+%if ARCH_X86_64
     lea     r5, [sad_w16_addr]
     add     r5, r4
 %else
diff -Nru x264-0.152.2854+gite9a5903/common/x86/trellis-64.asm x264-0.158.2988+git-20191101.7817004/common/x86/trellis-64.asm
--- x264-0.152.2854+gite9a5903/common/x86/trellis-64.asm	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/trellis-64.asm	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* trellis-64.asm: x86_64 trellis quantization
 ;*****************************************************************************
-;* Copyright (C) 2012-2017 x264 project
+;* Copyright (C) 2012-2019 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*
@@ -61,17 +61,17 @@
 cextern pd_8
 cextern pd_0123
 cextern pd_4567
-cextern cabac_entropy
-cextern cabac_transition
+cextern_common cabac_entropy
+cextern_common cabac_transition
 cextern cabac_size_unary
 cextern cabac_transition_unary
-cextern dct4_weight_tab
-cextern dct8_weight_tab
-cextern dct4_weight2_tab
-cextern dct8_weight2_tab
-cextern last_coeff_flag_offset_8x8
-cextern significant_coeff_flag_offset_8x8
-cextern coeff_flag_offset_chroma_422_dc
+cextern_common dct4_weight_tab
+cextern_common dct8_weight_tab
+cextern_common dct4_weight2_tab
+cextern_common dct8_weight2_tab
+cextern_common last_coeff_flag_offset_8x8
+cextern_common significant_coeff_flag_offset_8x8
+cextern_common coeff_flag_offset_chroma_422_dc
 
 SECTION .text
 
@@ -202,7 +202,6 @@
     paddd      m6, m6
     %define unquant_mf m6
 %endif
-%ifdef PIC
 %if dc == 0
     mov unquant_mfm, unquant_mfq
 %endif
@@ -212,9 +211,6 @@
     ; (Any address in .text would work, this one was just convenient.)
     lea r0, [$$]
     %define GLOBAL +r0-$$
-%else
-    %define GLOBAL
-%endif
 
     TRELLIS_LOOP 0 ; node_ctx 0..3
     TRELLIS_LOOP 1 ; node_ctx 1..7
@@ -304,12 +300,8 @@
     mov    r10, cabac_state_sigm
 %if num_coefs == 64
     mov    r6d, b_interlacedm
-%ifdef PIC
     add    r6d, iid
     movzx  r6d, byte [significant_coeff_flag_offset_8x8 + r6 GLOBAL]
-%else
-    movzx  r6d, byte [significant_coeff_flag_offset_8x8 + r6 + iiq]
-%endif
     movzx  r10, byte [r10 + r6]
 %elif num_coefs == 8
     movzx  r13, byte [coeff_flag_offset_chroma_422_dc + iiq GLOBAL]
@@ -408,12 +400,8 @@
 %if dc
     pmuludq m0, unquant_mf
 %else
-%ifdef PIC
     mov    r10, unquant_mfm
     LOAD_DUP m3, [r10 + zigzagiq*4]
-%else
-    LOAD_DUP m3, [unquant_mfq + zigzagiq*4]
-%endif
     pmuludq m0, m3
 %endif
     paddd   m0, [pq_128]
diff -Nru x264-0.152.2854+gite9a5903/common/x86/util.h x264-0.158.2988+git-20191101.7817004/common/x86/util.h
--- x264-0.152.2854+gite9a5903/common/x86/util.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/util.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * util.h: x86 inline asm
  *****************************************************************************
- * Copyright (C) 2008-2017 x264 project
+ * Copyright (C) 2008-2019 x264 project
  *
  * Authors: Fiona Glaser <fiona@x264.com>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -122,7 +122,7 @@
 }
 
 #define x264_predictor_clip x264_predictor_clip_mmx2
-static int ALWAYS_INLINE x264_predictor_clip_mmx2( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv )
+static ALWAYS_INLINE int x264_predictor_clip_mmx2( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv )
 {
     static const uint32_t pd_32 = 0x20;
     intptr_t tmp = (intptr_t)mv_limit, mvc_max = i_mvc, i = 0;
@@ -184,7 +184,7 @@
 
 /* Same as the above, except we do (mv + 2) >> 2 on the input. */
 #define x264_predictor_roundclip x264_predictor_roundclip_mmx2
-static int ALWAYS_INLINE x264_predictor_roundclip_mmx2( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv )
+static ALWAYS_INLINE int x264_predictor_roundclip_mmx2( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv )
 {
     static const uint64_t pw_2 = 0x0002000200020002ULL;
     static const uint32_t pd_32 = 0x20;
diff -Nru x264-0.152.2854+gite9a5903/common/x86/x86inc.asm x264-0.158.2988+git-20191101.7817004/common/x86/x86inc.asm
--- x264-0.152.2854+gite9a5903/common/x86/x86inc.asm	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/x86inc.asm	2019-11-09 05:16:29.000000000 +0000
@@ -1,12 +1,12 @@
 ;*****************************************************************************
 ;* x86inc.asm: x264asm abstraction layer
 ;*****************************************************************************
-;* Copyright (C) 2005-2017 x264 project
+;* Copyright (C) 2005-2019 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
+;*          Henrik Gramner <henrik@gramner.com>
 ;*          Anton Mitrofanov <BugMaster@narod.ru>
 ;*          Fiona Glaser <fiona@x264.com>
-;*          Henrik Gramner <henrik@gramner.com>
 ;*
 ;* Permission to use, copy, modify, and/or distribute this software for any
 ;* purpose with or without fee is hereby granted, provided that the above
@@ -65,12 +65,19 @@
 %endif
 
 %define FORMAT_ELF 0
+%define FORMAT_MACHO 0
 %ifidn __OUTPUT_FORMAT__,elf
     %define FORMAT_ELF 1
 %elifidn __OUTPUT_FORMAT__,elf32
     %define FORMAT_ELF 1
 %elifidn __OUTPUT_FORMAT__,elf64
     %define FORMAT_ELF 1
+%elifidn __OUTPUT_FORMAT__,macho
+    %define FORMAT_MACHO 1
+%elifidn __OUTPUT_FORMAT__,macho32
+    %define FORMAT_MACHO 1
+%elifidn __OUTPUT_FORMAT__,macho64
+    %define FORMAT_MACHO 1
 %endif
 
 %ifdef PREFIX
@@ -80,23 +87,30 @@
 %endif
 
 %macro SECTION_RODATA 0-1 16
-    SECTION .rodata align=%1
+    %ifidn __OUTPUT_FORMAT__,win32
+        SECTION .rdata align=%1
+    %elif WIN64
+        SECTION .rdata align=%1
+    %else
+        SECTION .rodata align=%1
+    %endif
 %endmacro
 
-%if WIN64
-    %define PIC
-%elif ARCH_X86_64 == 0
-; x86_32 doesn't require PIC.
-; Some distros prefer shared objects to be PIC, but nothing breaks if
-; the code contains a few textrels, so we'll skip that complexity.
-    %undef PIC
-%endif
-%ifdef PIC
+%if ARCH_X86_64
+    %define PIC 1 ; always use PIC on x86-64
     default rel
+%elifidn __OUTPUT_FORMAT__,win32
+    %define PIC 0 ; PIC isn't used on 32-bit Windows
+%elifndef PIC
+    %define PIC 0
 %endif
 
+%define HAVE_PRIVATE_EXTERN 1
 %ifdef __NASM_VER__
     %use smartalign
+    %if __NASM_VERSION_ID__ < 0x020e0000 ; 2.14
+        %define HAVE_PRIVATE_EXTERN 0
+    %endif
 %endif
 
 ; Macros to eliminate most code duplication between x86_32 and x86_64:
@@ -214,6 +228,18 @@
     %define gprsize 4
 %endif
 
+%macro LEA 2
+%if ARCH_X86_64
+    lea %1, [%2]
+%elif PIC
+    call $+5 ; special-cased to not affect the RSB on most CPU:s
+    pop %1
+    add %1, (%2)-$+1
+%else
+    mov %1, %2
+%endif
+%endmacro
+
 %macro PUSH 1
     push %1
     %ifidn rstk, rsp
@@ -275,6 +301,10 @@
     %endif
 %endmacro
 
+%if ARCH_X86_64 == 0
+    %define movsxd movifnidn
+%endif
+
 %macro movsxdifnidn 2
     %ifnidn %1, %2
         movsxd %1, %2
@@ -663,7 +693,7 @@
 
 BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, jna, jnae, jb, jbe, jnb, jnbe, jc, jnc, js, jns, jo, jno, jp, jnp
 
-%macro TAIL_CALL 2 ; callee, is_nonadjacent
+%macro TAIL_CALL 1-2 1 ; callee, is_nonadjacent
     %if has_epilogue
         call %1
         RET
@@ -693,22 +723,25 @@
 %endmacro
 %macro cglobal_internal 2-3+
     annotate_function_size
-    %if %1
-        %xdefine %%FUNCTION_PREFIX private_prefix
-        %xdefine %%VISIBILITY hidden
-    %else
-        %xdefine %%FUNCTION_PREFIX public_prefix
-        %xdefine %%VISIBILITY
-    %endif
     %ifndef cglobaled_%2
-        %xdefine %2 mangle(%%FUNCTION_PREFIX %+ _ %+ %2)
+        %if %1
+            %xdefine %2 mangle(private_prefix %+ _ %+ %2)
+        %else
+            %xdefine %2 mangle(public_prefix %+ _ %+ %2)
+        %endif
         %xdefine %2.skip_prologue %2 %+ .skip_prologue
         CAT_XDEFINE cglobaled_, %2, 1
     %endif
     %xdefine current_function %2
     %xdefine current_function_section __SECT__
     %if FORMAT_ELF
-        global %2:function %%VISIBILITY
+        %if %1
+            global %2:function hidden
+        %else
+            global %2:function
+        %endif
+    %elif FORMAT_MACHO && HAVE_PRIVATE_EXTERN && %1
+        global %2:private_extern
     %else
         global %2
     %endif
@@ -725,6 +758,18 @@
     %endif
 %endmacro
 
+; Create a global symbol from a local label with the correct name mangling and type
+%macro cglobal_label 1
+    %if FORMAT_ELF
+        global current_function %+ %1:function hidden
+    %elif FORMAT_MACHO && HAVE_PRIVATE_EXTERN
+        global current_function %+ %1:private_extern
+    %else
+        global current_function %+ %1
+    %endif
+    %1:
+%endmacro
+
 %macro cextern 1
     %xdefine %1 mangle(private_prefix %+ _ %+ %1)
     CAT_XDEFINE cglobaled_, %1, 1
@@ -744,6 +789,8 @@
     %xdefine %1 mangle(private_prefix %+ _ %+ %1)
     %if FORMAT_ELF
         global %1:data hidden
+    %elif FORMAT_MACHO && HAVE_PRIVATE_EXTERN
+        global %1:private_extern
     %else
         global %1
     %endif
@@ -788,19 +835,20 @@
 %assign cpuflags_sse4     (1<<10)| cpuflags_ssse3
 %assign cpuflags_sse42    (1<<11)| cpuflags_sse4
 %assign cpuflags_aesni    (1<<12)| cpuflags_sse42
-%assign cpuflags_avx      (1<<13)| cpuflags_sse42
-%assign cpuflags_xop      (1<<14)| cpuflags_avx
-%assign cpuflags_fma4     (1<<15)| cpuflags_avx
-%assign cpuflags_fma3     (1<<16)| cpuflags_avx
-%assign cpuflags_bmi1     (1<<17)| cpuflags_avx|cpuflags_lzcnt
-%assign cpuflags_bmi2     (1<<18)| cpuflags_bmi1
-%assign cpuflags_avx2     (1<<19)| cpuflags_fma3|cpuflags_bmi2
-%assign cpuflags_avx512   (1<<20)| cpuflags_avx2 ; F, CD, BW, DQ, VL
-
-%assign cpuflags_cache32  (1<<21)
-%assign cpuflags_cache64  (1<<22)
-%assign cpuflags_aligned  (1<<23) ; not a cpu feature, but a function variant
-%assign cpuflags_atom     (1<<24)
+%assign cpuflags_gfni     (1<<13)| cpuflags_sse42
+%assign cpuflags_avx      (1<<14)| cpuflags_sse42
+%assign cpuflags_xop      (1<<15)| cpuflags_avx
+%assign cpuflags_fma4     (1<<16)| cpuflags_avx
+%assign cpuflags_fma3     (1<<17)| cpuflags_avx
+%assign cpuflags_bmi1     (1<<18)| cpuflags_avx|cpuflags_lzcnt
+%assign cpuflags_bmi2     (1<<19)| cpuflags_bmi1
+%assign cpuflags_avx2     (1<<20)| cpuflags_fma3|cpuflags_bmi2
+%assign cpuflags_avx512   (1<<21)| cpuflags_avx2 ; F, CD, BW, DQ, VL
+
+%assign cpuflags_cache32  (1<<22)
+%assign cpuflags_cache64  (1<<23)
+%assign cpuflags_aligned  (1<<24) ; not a cpu feature, but a function variant
+%assign cpuflags_atom     (1<<25)
 
 ; Returns a boolean value expressing whether or not the specified cpuflag is enabled.
 %define    cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) - 1) >> 31) & 1)
@@ -871,6 +919,36 @@
     %undef %1%2
 %endmacro
 
+%macro DEFINE_MMREGS 1 ; mmtype
+    %assign %%prev_mmregs 0
+    %ifdef num_mmregs
+        %assign %%prev_mmregs num_mmregs
+    %endif
+
+    %assign num_mmregs 8
+    %if ARCH_X86_64 && mmsize >= 16
+        %assign num_mmregs 16
+        %if cpuflag(avx512) || mmsize == 64
+            %assign num_mmregs 32
+        %endif
+    %endif
+
+    %assign %%i 0
+    %rep num_mmregs
+        CAT_XDEFINE m, %%i, %1 %+ %%i
+        CAT_XDEFINE nn%1, %%i, %%i
+        %assign %%i %%i+1
+    %endrep
+    %if %%prev_mmregs > num_mmregs
+        %rep %%prev_mmregs - num_mmregs
+            CAT_UNDEF m, %%i
+            CAT_UNDEF nn %+ mmtype, %%i
+            %assign %%i %%i+1
+        %endrep
+    %endif
+    %xdefine mmtype %1
+%endmacro
+
 ; Prefer registers 16-31 over 0-15 to avoid having to use vzeroupper
 %macro AVX512_MM_PERMUTATION 0-1 0 ; start_reg
     %if ARCH_X86_64 && cpuflag(avx512)
@@ -887,47 +965,26 @@
     %assign avx_enabled 0
     %define RESET_MM_PERMUTATION INIT_MMX %1
     %define mmsize 8
-    %define num_mmregs 8
     %define mova movq
     %define movu movq
     %define movh movd
     %define movnta movntq
-    %assign %%i 0
-    %rep 8
-        CAT_XDEFINE m, %%i, mm %+ %%i
-        CAT_XDEFINE nnmm, %%i, %%i
-        %assign %%i %%i+1
-    %endrep
-    %rep 24
-        CAT_UNDEF m, %%i
-        CAT_UNDEF nnmm, %%i
-        %assign %%i %%i+1
-    %endrep
     INIT_CPUFLAGS %1
+    DEFINE_MMREGS mm
 %endmacro
 
 %macro INIT_XMM 0-1+
     %assign avx_enabled 0
     %define RESET_MM_PERMUTATION INIT_XMM %1
     %define mmsize 16
-    %define num_mmregs 8
-    %if ARCH_X86_64
-        %define num_mmregs 32
-    %endif
     %define mova movdqa
     %define movu movdqu
     %define movh movq
     %define movnta movntdq
-    %assign %%i 0
-    %rep num_mmregs
-        CAT_XDEFINE m, %%i, xmm %+ %%i
-        CAT_XDEFINE nnxmm, %%i, %%i
-        %assign %%i %%i+1
-    %endrep
     INIT_CPUFLAGS %1
+    DEFINE_MMREGS xmm
     %if WIN64
-        ; Swap callee-saved registers with volatile registers
-        AVX512_MM_PERMUTATION 6
+        AVX512_MM_PERMUTATION 6 ; Swap callee-saved registers with volatile registers
     %endif
 %endmacro
 
@@ -935,21 +992,12 @@
     %assign avx_enabled 1
     %define RESET_MM_PERMUTATION INIT_YMM %1
     %define mmsize 32
-    %define num_mmregs 8
-    %if ARCH_X86_64
-        %define num_mmregs 32
-    %endif
     %define mova movdqa
     %define movu movdqu
     %undef movh
     %define movnta movntdq
-    %assign %%i 0
-    %rep num_mmregs
-        CAT_XDEFINE m, %%i, ymm %+ %%i
-        CAT_XDEFINE nnymm, %%i, %%i
-        %assign %%i %%i+1
-    %endrep
     INIT_CPUFLAGS %1
+    DEFINE_MMREGS ymm
     AVX512_MM_PERMUTATION
 %endmacro
 
@@ -957,21 +1005,12 @@
     %assign avx_enabled 1
     %define RESET_MM_PERMUTATION INIT_ZMM %1
     %define mmsize 64
-    %define num_mmregs 8
-    %if ARCH_X86_64
-        %define num_mmregs 32
-    %endif
     %define mova movdqa
     %define movu movdqu
     %undef movh
     %define movnta movntdq
-    %assign %%i 0
-    %rep num_mmregs
-        CAT_XDEFINE m, %%i, zmm %+ %%i
-        CAT_XDEFINE nnzmm, %%i, %%i
-        %assign %%i %%i+1
-    %endrep
     INIT_CPUFLAGS %1
+    DEFINE_MMREGS zmm
     AVX512_MM_PERMUTATION
 %endmacro
 
@@ -1070,19 +1109,32 @@
     %endif
     %assign %%i 0
     %rep num_mmregs
-        CAT_XDEFINE %%f, %%i, m %+ %%i
+        %xdefine %%tmp m %+ %%i
+        CAT_XDEFINE %%f, %%i, regnumof %+ %%tmp
         %assign %%i %%i+1
     %endrep
 %endmacro
 
-%macro LOAD_MM_PERMUTATION 1 ; name to load from
-    %ifdef %1_m0
+%macro LOAD_MM_PERMUTATION 0-1 ; name to load from
+    %if %0
+        %xdefine %%f %1_m
+    %else
+        %xdefine %%f current_function %+ _m
+    %endif
+    %xdefine %%tmp %%f %+ 0
+    %ifnum %%tmp
+        RESET_MM_PERMUTATION
         %assign %%i 0
         %rep num_mmregs
-            CAT_XDEFINE m, %%i, %1_m %+ %%i
-            CAT_XDEFINE nn, m %+ %%i, %%i
+            %xdefine %%tmp %%f %+ %%i
+            CAT_XDEFINE %%m, %%i, m %+ %%tmp
             %assign %%i %%i+1
         %endrep
+        %rep num_mmregs
+            %assign %%i %%i-1
+            CAT_XDEFINE m, %%i, %%m %+ %%i
+            CAT_XDEFINE nn, m %+ %%i, %%i
+        %endrep
     %endif
 %endmacro
 
@@ -1188,8 +1240,22 @@
         %ifdef cpuname
             %if notcpuflag(%2)
                 %error use of ``%1'' %2 instruction in cpuname function: current_function
-            %elif cpuflags_%2 < cpuflags_sse && notcpuflag(sse2) && __sizeofreg > 8
+            %elif %3 == 0 && __sizeofreg == 16 && notcpuflag(sse2)
                 %error use of ``%1'' sse2 instruction in cpuname function: current_function
+            %elif %3 == 0 && __sizeofreg == 32 && notcpuflag(avx2)
+                %error use of ``%1'' avx2 instruction in cpuname function: current_function
+            %elif __sizeofreg == 16 && notcpuflag(sse)
+                %error use of ``%1'' sse instruction in cpuname function: current_function
+            %elif __sizeofreg == 32 && notcpuflag(avx)
+                %error use of ``%1'' avx instruction in cpuname function: current_function
+            %elif __sizeofreg == 64 && notcpuflag(avx512)
+                %error use of ``%1'' avx512 instruction in cpuname function: current_function
+            %elifidn %1, pextrw ; special case because the base instruction is mmx2,
+                %ifnid %6       ; but sse4 is required for memory operands
+                    %if notcpuflag(sse4)
+                        %error use of ``%1'' sse4 instruction in cpuname function: current_function
+                    %endif
+                %endif
             %endif
         %endif
     %endif
@@ -1233,9 +1299,40 @@
     %elif %0 >= 9
         __instr %6, %7, %8, %9
     %elif %0 == 8
-        __instr %6, %7, %8
+        %if avx_enabled && %5
+            %xdefine __src1 %7
+            %xdefine __src2 %8
+            %ifnum regnumof%7
+                %ifnum regnumof%8
+                    %if regnumof%7 < 8 && regnumof%8 >= 8 && regnumof%8 < 16 && sizeof%8 <= 32
+                        ; Most VEX-encoded instructions require an additional byte to encode when
+                        ; src2 is a high register (e.g. m8..15). If the instruction is commutative
+                        ; we can swap src1 and src2 when doing so reduces the instruction length.
+                        %xdefine __src1 %8
+                        %xdefine __src2 %7
+                    %endif
+                %endif
+            %endif
+            __instr %6, __src1, __src2
+        %else
+            __instr %6, %7, %8
+        %endif
     %elif %0 == 7
-        __instr %6, %7
+        %if avx_enabled && %5
+            %xdefine __src1 %6
+            %xdefine __src2 %7
+            %ifnum regnumof%6
+                %ifnum regnumof%7
+                    %if regnumof%6 < 8 && regnumof%7 >= 8 && regnumof%7 < 16 && sizeof%7 <= 32
+                        %xdefine __src1 %7
+                        %xdefine __src2 %6
+                    %endif
+                %endif
+            %endif
+            __instr %6, __src1, __src2
+        %else
+            __instr %6, %7
+        %endif
     %else
         __instr %6
     %endif
@@ -1270,12 +1367,12 @@
 AVX_INSTR addss, sse, 1, 0, 0
 AVX_INSTR addsubpd, sse3, 1, 0, 0
 AVX_INSTR addsubps, sse3, 1, 0, 0
-AVX_INSTR aesdec, fnord, 0, 0, 0
-AVX_INSTR aesdeclast, fnord, 0, 0, 0
-AVX_INSTR aesenc, fnord, 0, 0, 0
-AVX_INSTR aesenclast, fnord, 0, 0, 0
-AVX_INSTR aesimc
-AVX_INSTR aeskeygenassist
+AVX_INSTR aesdec, aesni, 0, 0, 0
+AVX_INSTR aesdeclast, aesni, 0, 0, 0
+AVX_INSTR aesenc, aesni, 0, 0, 0
+AVX_INSTR aesenclast, aesni, 0, 0, 0
+AVX_INSTR aesimc, aesni
+AVX_INSTR aeskeygenassist, aesni
 AVX_INSTR andnpd, sse2, 1, 0, 0
 AVX_INSTR andnps, sse, 1, 0, 0
 AVX_INSTR andpd, sse2, 1, 0, 1
@@ -1284,42 +1381,77 @@
 AVX_INSTR blendps, sse4, 1, 1, 0
 AVX_INSTR blendvpd, sse4 ; can't be emulated
 AVX_INSTR blendvps, sse4 ; can't be emulated
+AVX_INSTR cmpeqpd, sse2, 1, 0, 1
+AVX_INSTR cmpeqps, sse, 1, 0, 1
+AVX_INSTR cmpeqsd, sse2, 1, 0, 0
+AVX_INSTR cmpeqss, sse, 1, 0, 0
+AVX_INSTR cmplepd, sse2, 1, 0, 0
+AVX_INSTR cmpleps, sse, 1, 0, 0
+AVX_INSTR cmplesd, sse2, 1, 0, 0
+AVX_INSTR cmpless, sse, 1, 0, 0
+AVX_INSTR cmpltpd, sse2, 1, 0, 0
+AVX_INSTR cmpltps, sse, 1, 0, 0
+AVX_INSTR cmpltsd, sse2, 1, 0, 0
+AVX_INSTR cmpltss, sse, 1, 0, 0
+AVX_INSTR cmpneqpd, sse2, 1, 0, 1
+AVX_INSTR cmpneqps, sse, 1, 0, 1
+AVX_INSTR cmpneqsd, sse2, 1, 0, 0
+AVX_INSTR cmpneqss, sse, 1, 0, 0
+AVX_INSTR cmpnlepd, sse2, 1, 0, 0
+AVX_INSTR cmpnleps, sse, 1, 0, 0
+AVX_INSTR cmpnlesd, sse2, 1, 0, 0
+AVX_INSTR cmpnless, sse, 1, 0, 0
+AVX_INSTR cmpnltpd, sse2, 1, 0, 0
+AVX_INSTR cmpnltps, sse, 1, 0, 0
+AVX_INSTR cmpnltsd, sse2, 1, 0, 0
+AVX_INSTR cmpnltss, sse, 1, 0, 0
+AVX_INSTR cmpordpd, sse2 1, 0, 1
+AVX_INSTR cmpordps, sse 1, 0, 1
+AVX_INSTR cmpordsd, sse2 1, 0, 0
+AVX_INSTR cmpordss, sse 1, 0, 0
 AVX_INSTR cmppd, sse2, 1, 1, 0
 AVX_INSTR cmpps, sse, 1, 1, 0
 AVX_INSTR cmpsd, sse2, 1, 1, 0
 AVX_INSTR cmpss, sse, 1, 1, 0
-AVX_INSTR comisd, sse2
-AVX_INSTR comiss, sse
-AVX_INSTR cvtdq2pd, sse2
-AVX_INSTR cvtdq2ps, sse2
-AVX_INSTR cvtpd2dq, sse2
-AVX_INSTR cvtpd2ps, sse2
-AVX_INSTR cvtps2dq, sse2
-AVX_INSTR cvtps2pd, sse2
-AVX_INSTR cvtsd2si, sse2
+AVX_INSTR cmpunordpd, sse2, 1, 0, 1
+AVX_INSTR cmpunordps, sse, 1, 0, 1
+AVX_INSTR cmpunordsd, sse2, 1, 0, 0
+AVX_INSTR cmpunordss, sse, 1, 0, 0
+AVX_INSTR comisd, sse2, 1
+AVX_INSTR comiss, sse, 1
+AVX_INSTR cvtdq2pd, sse2, 1
+AVX_INSTR cvtdq2ps, sse2, 1
+AVX_INSTR cvtpd2dq, sse2, 1
+AVX_INSTR cvtpd2ps, sse2, 1
+AVX_INSTR cvtps2dq, sse2, 1
+AVX_INSTR cvtps2pd, sse2, 1
+AVX_INSTR cvtsd2si, sse2, 1
 AVX_INSTR cvtsd2ss, sse2, 1, 0, 0
 AVX_INSTR cvtsi2sd, sse2, 1, 0, 0
 AVX_INSTR cvtsi2ss, sse, 1, 0, 0
 AVX_INSTR cvtss2sd, sse2, 1, 0, 0
-AVX_INSTR cvtss2si, sse
-AVX_INSTR cvttpd2dq, sse2
-AVX_INSTR cvttps2dq, sse2
-AVX_INSTR cvttsd2si, sse2
-AVX_INSTR cvttss2si, sse
+AVX_INSTR cvtss2si, sse, 1
+AVX_INSTR cvttpd2dq, sse2, 1
+AVX_INSTR cvttps2dq, sse2, 1
+AVX_INSTR cvttsd2si, sse2, 1
+AVX_INSTR cvttss2si, sse, 1
 AVX_INSTR divpd, sse2, 1, 0, 0
 AVX_INSTR divps, sse, 1, 0, 0
 AVX_INSTR divsd, sse2, 1, 0, 0
 AVX_INSTR divss, sse, 1, 0, 0
 AVX_INSTR dppd, sse4, 1, 1, 0
 AVX_INSTR dpps, sse4, 1, 1, 0
-AVX_INSTR extractps, sse4
+AVX_INSTR extractps, sse4, 1
+AVX_INSTR gf2p8affineinvqb, gfni, 0, 1, 0
+AVX_INSTR gf2p8affineqb, gfni, 0, 1, 0
+AVX_INSTR gf2p8mulb, gfni, 0, 0, 0
 AVX_INSTR haddpd, sse3, 1, 0, 0
 AVX_INSTR haddps, sse3, 1, 0, 0
 AVX_INSTR hsubpd, sse3, 1, 0, 0
 AVX_INSTR hsubps, sse3, 1, 0, 0
 AVX_INSTR insertps, sse4, 1, 1, 0
 AVX_INSTR lddqu, sse3
-AVX_INSTR ldmxcsr, sse
+AVX_INSTR ldmxcsr, sse, 1
 AVX_INSTR maskmovdqu, sse2
 AVX_INSTR maxpd, sse2, 1, 0, 1
 AVX_INSTR maxps, sse, 1, 0, 1
@@ -1329,10 +1461,10 @@
 AVX_INSTR minps, sse, 1, 0, 1
 AVX_INSTR minsd, sse2, 1, 0, 0
 AVX_INSTR minss, sse, 1, 0, 0
-AVX_INSTR movapd, sse2
-AVX_INSTR movaps, sse
+AVX_INSTR movapd, sse2, 1
+AVX_INSTR movaps, sse, 1
 AVX_INSTR movd, mmx
-AVX_INSTR movddup, sse3
+AVX_INSTR movddup, sse3, 1
 AVX_INSTR movdqa, sse2
 AVX_INSTR movdqu, sse2
 AVX_INSTR movhlps, sse, 1, 0, 0
@@ -1341,19 +1473,19 @@
 AVX_INSTR movlhps, sse, 1, 0, 0
 AVX_INSTR movlpd, sse2, 1, 0, 0
 AVX_INSTR movlps, sse, 1, 0, 0
-AVX_INSTR movmskpd, sse2
-AVX_INSTR movmskps, sse
+AVX_INSTR movmskpd, sse2, 1
+AVX_INSTR movmskps, sse, 1
 AVX_INSTR movntdq, sse2
 AVX_INSTR movntdqa, sse4
-AVX_INSTR movntpd, sse2
-AVX_INSTR movntps, sse
+AVX_INSTR movntpd, sse2, 1
+AVX_INSTR movntps, sse, 1
 AVX_INSTR movq, mmx
 AVX_INSTR movsd, sse2, 1, 0, 0
-AVX_INSTR movshdup, sse3
-AVX_INSTR movsldup, sse3
+AVX_INSTR movshdup, sse3, 1
+AVX_INSTR movsldup, sse3, 1
 AVX_INSTR movss, sse, 1, 0, 0
-AVX_INSTR movupd, sse2
-AVX_INSTR movups, sse
+AVX_INSTR movupd, sse2, 1
+AVX_INSTR movups, sse, 1
 AVX_INSTR mpsadbw, sse4, 0, 1, 0
 AVX_INSTR mulpd, sse2, 1, 0, 1
 AVX_INSTR mulps, sse, 1, 0, 1
@@ -1486,27 +1618,27 @@
 AVX_INSTR punpckldq, mmx, 0, 0, 0
 AVX_INSTR punpcklqdq, sse2, 0, 0, 0
 AVX_INSTR pxor, mmx, 0, 0, 1
-AVX_INSTR rcpps, sse
+AVX_INSTR rcpps, sse, 1
 AVX_INSTR rcpss, sse, 1, 0, 0
-AVX_INSTR roundpd, sse4
-AVX_INSTR roundps, sse4
+AVX_INSTR roundpd, sse4, 1
+AVX_INSTR roundps, sse4, 1
 AVX_INSTR roundsd, sse4, 1, 1, 0
 AVX_INSTR roundss, sse4, 1, 1, 0
-AVX_INSTR rsqrtps, sse
+AVX_INSTR rsqrtps, sse, 1
 AVX_INSTR rsqrtss, sse, 1, 0, 0
 AVX_INSTR shufpd, sse2, 1, 1, 0
 AVX_INSTR shufps, sse, 1, 1, 0
-AVX_INSTR sqrtpd, sse2
-AVX_INSTR sqrtps, sse
+AVX_INSTR sqrtpd, sse2, 1
+AVX_INSTR sqrtps, sse, 1
 AVX_INSTR sqrtsd, sse2, 1, 0, 0
 AVX_INSTR sqrtss, sse, 1, 0, 0
-AVX_INSTR stmxcsr, sse
+AVX_INSTR stmxcsr, sse, 1
 AVX_INSTR subpd, sse2, 1, 0, 0
 AVX_INSTR subps, sse, 1, 0, 0
 AVX_INSTR subsd, sse2, 1, 0, 0
 AVX_INSTR subss, sse, 1, 0, 0
-AVX_INSTR ucomisd, sse2
-AVX_INSTR ucomiss, sse
+AVX_INSTR ucomisd, sse2, 1
+AVX_INSTR ucomiss, sse, 1
 AVX_INSTR unpckhpd, sse2, 1, 0, 0
 AVX_INSTR unpckhps, sse, 1, 0, 0
 AVX_INSTR unpcklpd, sse2, 1, 0, 0
@@ -1519,6 +1651,38 @@
 AVX_INSTR pfsub, 3dnow, 1, 0, 0
 AVX_INSTR pfmul, 3dnow, 1, 0, 1
 
+;%1 == instruction
+;%2 == minimal instruction set
+%macro GPR_INSTR 2
+    %macro %1 2-5 fnord, %1, %2
+        %ifdef cpuname
+            %if notcpuflag(%5)
+                %error use of ``%4'' %5 instruction in cpuname function: current_function
+            %endif
+        %endif
+        %ifidn %3, fnord
+            %4 %1, %2
+        %else
+            %4 %1, %2, %3
+        %endif
+    %endmacro
+%endmacro
+
+GPR_INSTR andn, bmi1
+GPR_INSTR bextr, bmi1
+GPR_INSTR blsi, bmi1
+GPR_INSTR blsr, bmi1
+GPR_INSTR blsmsk, bmi1
+GPR_INSTR bzhi, bmi2
+GPR_INSTR mulx, bmi2
+GPR_INSTR pdep, bmi2
+GPR_INSTR pext, bmi2
+GPR_INSTR popcnt, sse42
+GPR_INSTR rorx, bmi2
+GPR_INSTR sarx, bmi2
+GPR_INSTR shlx, bmi2
+GPR_INSTR shrx, bmi2
+
 ; base-4 constants for shuffles
 %assign i 0
 %rep 256
@@ -1615,6 +1779,11 @@
                 %assign %%evex_required 1
             %endif
         %endif
+        %ifnum regnumof%3
+            %if regnumof%3 >= 16 || sizeof%3 > 32
+                %assign %%evex_required 1
+            %endif
+        %endif
         %if %%evex_required
             %6 %%args
         %else
diff -Nru x264-0.152.2854+gite9a5903/common/x86/x86util.asm x264-0.158.2988+git-20191101.7817004/common/x86/x86util.asm
--- x264-0.152.2854+gite9a5903/common/x86/x86util.asm	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/common/x86/x86util.asm	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* x86util.asm: x86 utility macros
 ;*****************************************************************************
-;* Copyright (C) 2008-2017 x264 project
+;* Copyright (C) 2008-2019 x264 project
 ;*
 ;* Authors: Holger Lubitz <holger@lubitz.org>
 ;*          Loren Merritt <lorenm@u.washington.edu>
@@ -24,6 +24,23 @@
 ;* For more information, contact us at licensing@x264.com.
 ;*****************************************************************************
 
+; like cextern, but with a plain x264 prefix instead of a bitdepth-specific one
+%macro cextern_common 1
+    %xdefine %1 mangle(x264 %+ _ %+ %1)
+    CAT_XDEFINE cglobaled_, %1, 1
+    extern %1
+%endmacro
+
+%ifndef BIT_DEPTH
+    %assign BIT_DEPTH 0
+%endif
+
+%if BIT_DEPTH > 8
+    %assign HIGH_BIT_DEPTH 1
+%else
+    %assign HIGH_BIT_DEPTH 0
+%endif
+
 %assign FENC_STRIDE 16
 %assign FDEC_STRIDE 32
 
@@ -54,7 +71,6 @@
 %endif
 %endmacro
 
-
 %macro SBUTTERFLY 4
 %ifidn %1, dqqq
     vperm2i128  m%4, m%2, m%3, q0301 ; punpckh
diff -Nru x264-0.152.2854+gite9a5903/config.guess x264-0.158.2988+git-20191101.7817004/config.guess
--- x264-0.152.2854+gite9a5903/config.guess	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/config.guess	2019-11-09 05:16:29.000000000 +0000
@@ -1,12 +1,14 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
-#   Copyright 1992-2017 Free Software Foundation, Inc.
+#   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+#   2011, 2012 Free Software Foundation, Inc.
 
-timestamp='2017-11-07'
+timestamp='2012-09-25'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or
+# the Free Software Foundation; either version 2 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
@@ -15,22 +17,24 @@
 # General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, see <https://www.gnu.org/licenses/>.
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
 #
 # As a special exception to the GNU General Public License, if you
 # distribute this file as part of a program that contains a
 # configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that
-# program.  This Exception is an additional permission under section 7
-# of the GNU General Public License, version 3 ("GPLv3").
+# the same distribution terms that you use for the rest of that program.
+
+
+# Originally written by Per Bothner.  Please send patches (context
+# diff format) to <config-patches@gnu.org> and include a ChangeLog
+# entry.
 #
-# Originally written by Per Bothner; maintained since 2000 by Ben Elliston.
+# This script attempts to guess a canonical system name similar to
+# config.sub.  If it succeeds, it prints the system name on stdout, and
+# exits with 0.  Otherwise, it exits with 1.
 #
 # You can get the latest version of this script from:
-# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
-#
-# Please send patches to <config-patches@gnu.org>.
-
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
 
 me=`echo "$0" | sed -e 's,.*/,,'`
 
@@ -39,7 +43,7 @@
 
 Output the configuration name of the system \`$me' is run on.
 
-Options:
+Operation modes:
   -h, --help         print this help, then exit
   -t, --time-stamp   print date of last modification, then exit
   -v, --version      print version number, then exit
@@ -50,7 +54,9 @@
 GNU config.guess ($timestamp)
 
 Originally written by Per Bothner.
-Copyright 1992-2017 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -132,27 +138,6 @@
 UNAME_SYSTEM=`(uname -s) 2>/dev/null`  || UNAME_SYSTEM=unknown
 UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
 
-case "${UNAME_SYSTEM}" in
-Linux|GNU|GNU/*)
-	# If the system lacks a compiler, then just pick glibc.
-	# We could probably try harder.
-	LIBC=gnu
-
-	eval $set_cc_for_build
-	cat <<-EOF > $dummy.c
-	#include <features.h>
-	#if defined(__UCLIBC__)
-	LIBC=uclibc
-	#elif defined(__dietlibc__)
-	LIBC=dietlibc
-	#else
-	LIBC=gnu
-	#endif
-	EOF
-	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`
-	;;
-esac
-
 # Note: order is significant - the case branches are not exclusive.
 
 case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
@@ -168,29 +153,19 @@
 	# Note: NetBSD doesn't particularly care about the vendor
 	# portion of the name.  We always set it to "unknown".
 	sysctl="sysctl -n hw.machine_arch"
-	UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \
-	    /sbin/$sysctl 2>/dev/null || \
-	    /usr/sbin/$sysctl 2>/dev/null || \
-	    echo unknown)`
+	UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \
+	    /usr/sbin/$sysctl 2>/dev/null || echo unknown)`
 	case "${UNAME_MACHINE_ARCH}" in
 	    armeb) machine=armeb-unknown ;;
 	    arm*) machine=arm-unknown ;;
 	    sh3el) machine=shl-unknown ;;
 	    sh3eb) machine=sh-unknown ;;
 	    sh5el) machine=sh5le-unknown ;;
-	    earmv*)
-		arch=`echo ${UNAME_MACHINE_ARCH} | sed -e 's,^e\(armv[0-9]\).*$,\1,'`
-		endian=`echo ${UNAME_MACHINE_ARCH} | sed -ne 's,^.*\(eb\)$,\1,p'`
-		machine=${arch}${endian}-unknown
-		;;
 	    *) machine=${UNAME_MACHINE_ARCH}-unknown ;;
 	esac
 	# The Operating System including object format, if it has switched
-	# to ELF recently (or will in the future) and ABI.
+	# to ELF recently, or will in the future.
 	case "${UNAME_MACHINE_ARCH}" in
-	    earm*)
-		os=netbsdelf
-		;;
 	    arm*|i386|m68k|ns32k|sh3*|sparc|vax)
 		eval $set_cc_for_build
 		if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
@@ -207,13 +182,6 @@
 		os=netbsd
 		;;
 	esac
-	# Determine ABI tags.
-	case "${UNAME_MACHINE_ARCH}" in
-	    earm*)
-		expr='s/^earmv[0-9]/-eabi/;s/eb$//'
-		abi=`echo ${UNAME_MACHINE_ARCH} | sed -e "$expr"`
-		;;
-	esac
 	# The OS release
 	# Debian GNU/NetBSD machines have a different userland, and
 	# thus, need a distinct triplet. However, they do not need
@@ -224,13 +192,13 @@
 		release='-gnu'
 		;;
 	    *)
-		release=`echo ${UNAME_RELEASE} | sed -e 's/[-_].*//' | cut -d. -f1,2`
+		release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
 		;;
 	esac
 	# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
 	# contains redundant information, the shorter form:
 	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
-	echo "${machine}-${os}${release}${abi}"
+	echo "${machine}-${os}${release}"
 	exit ;;
     *:Bitrig:*:*)
 	UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
@@ -240,13 +208,6 @@
 	UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
 	echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
 	exit ;;
-    *:LibertyBSD:*:*)
-	UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'`
-	echo ${UNAME_MACHINE_ARCH}-unknown-libertybsd${UNAME_RELEASE}
-	exit ;;
-    *:MidnightBSD:*:*)
-	echo ${UNAME_MACHINE}-unknown-midnightbsd${UNAME_RELEASE}
-	exit ;;
     *:ekkoBSD:*:*)
 	echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
 	exit ;;
@@ -259,12 +220,6 @@
     *:MirBSD:*:*)
 	echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE}
 	exit ;;
-    *:Sortix:*:*)
-	echo ${UNAME_MACHINE}-unknown-sortix
-	exit ;;
-    *:Redox:*:*)
-	echo ${UNAME_MACHINE}-unknown-redox
-	exit ;;
     alpha:OSF1:*:*)
 	case $UNAME_RELEASE in
 	*4.0)
@@ -281,46 +236,55 @@
 	ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1`
 	case "$ALPHA_CPU_TYPE" in
 	    "EV4 (21064)")
-		UNAME_MACHINE=alpha ;;
+		UNAME_MACHINE="alpha" ;;
 	    "EV4.5 (21064)")
-		UNAME_MACHINE=alpha ;;
+		UNAME_MACHINE="alpha" ;;
 	    "LCA4 (21066/21068)")
-		UNAME_MACHINE=alpha ;;
+		UNAME_MACHINE="alpha" ;;
 	    "EV5 (21164)")
-		UNAME_MACHINE=alphaev5 ;;
+		UNAME_MACHINE="alphaev5" ;;
 	    "EV5.6 (21164A)")
-		UNAME_MACHINE=alphaev56 ;;
+		UNAME_MACHINE="alphaev56" ;;
 	    "EV5.6 (21164PC)")
-		UNAME_MACHINE=alphapca56 ;;
+		UNAME_MACHINE="alphapca56" ;;
 	    "EV5.7 (21164PC)")
-		UNAME_MACHINE=alphapca57 ;;
+		UNAME_MACHINE="alphapca57" ;;
 	    "EV6 (21264)")
-		UNAME_MACHINE=alphaev6 ;;
+		UNAME_MACHINE="alphaev6" ;;
 	    "EV6.7 (21264A)")
-		UNAME_MACHINE=alphaev67 ;;
+		UNAME_MACHINE="alphaev67" ;;
 	    "EV6.8CB (21264C)")
-		UNAME_MACHINE=alphaev68 ;;
+		UNAME_MACHINE="alphaev68" ;;
 	    "EV6.8AL (21264B)")
-		UNAME_MACHINE=alphaev68 ;;
+		UNAME_MACHINE="alphaev68" ;;
 	    "EV6.8CX (21264D)")
-		UNAME_MACHINE=alphaev68 ;;
+		UNAME_MACHINE="alphaev68" ;;
 	    "EV6.9A (21264/EV69A)")
-		UNAME_MACHINE=alphaev69 ;;
+		UNAME_MACHINE="alphaev69" ;;
 	    "EV7 (21364)")
-		UNAME_MACHINE=alphaev7 ;;
+		UNAME_MACHINE="alphaev7" ;;
 	    "EV7.9 (21364A)")
-		UNAME_MACHINE=alphaev79 ;;
+		UNAME_MACHINE="alphaev79" ;;
 	esac
 	# A Pn.n version is a patched version.
 	# A Vn.n version is a released version.
 	# A Tn.n version is a released field test version.
 	# A Xn.n version is an unreleased experimental baselevel.
 	# 1.2 uses "1.2" for uname -r.
-	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
+	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
 	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
 	exitcode=$?
 	trap '' 0
 	exit $exitcode ;;
+    Alpha\ *:Windows_NT*:*)
+	# How do we know it's Interix rather than the generic POSIX subsystem?
+	# Should we change UNAME_MACHINE based on the output of uname instead
+	# of the specific Alpha model?
+	echo alpha-pc-interix
+	exit ;;
+    21064:Windows_NT:50:3)
+	echo alpha-dec-winnt3.5
+	exit ;;
     Amiga*:UNIX_System_V:4.0:*)
 	echo m68k-unknown-sysv4
 	exit ;;
@@ -380,16 +344,16 @@
 	exit ;;
     i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
 	eval $set_cc_for_build
-	SUN_ARCH=i386
+	SUN_ARCH="i386"
 	# If there is a compiler, see if it is configured for 64-bit objects.
 	# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
 	# This test works for both compilers.
-	if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
+	if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
 	    if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
-		(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+		(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
 		grep IS_64BIT_ARCH >/dev/null
 	    then
-		SUN_ARCH=x86_64
+		SUN_ARCH="x86_64"
 	    fi
 	fi
 	echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
@@ -414,7 +378,7 @@
 	exit ;;
     sun*:*:4.2BSD:*)
 	UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
-	test "x${UNAME_RELEASE}" = x && UNAME_RELEASE=3
+	test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
 	case "`/bin/arch`" in
 	    sun3)
 		echo m68k-sun-sunos${UNAME_RELEASE}
@@ -482,13 +446,13 @@
 #endif
 	#if defined (host_mips) && defined (MIPSEB)
 	#if defined (SYSTYPE_SYSV)
-	  printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0);
+	  printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0);
 	#endif
 	#if defined (SYSTYPE_SVR4)
-	  printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0);
+	  printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0);
 	#endif
 	#if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
-	  printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0);
+	  printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0);
 	#endif
 	#endif
 	  exit (-1);
@@ -600,9 +564,8 @@
 	else
 		IBM_ARCH=powerpc
 	fi
-	if [ -x /usr/bin/lslpp ] ; then
-		IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc |
-			   awk -F: '{ print $3 }' | sed s/[0-9]*$/0/`
+	if [ -x /usr/bin/oslevel ] ; then
+		IBM_REV=`/usr/bin/oslevel`
 	else
 		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
 	fi
@@ -611,7 +574,7 @@
     *:AIX:*:*)
 	echo rs6000-ibm-aix
 	exit ;;
-    ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*)
+    ibmrt:4.4BSD:*|romp-ibm:BSD:*)
 	echo romp-ibm-bsd4.4
 	exit ;;
     ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and
@@ -632,20 +595,20 @@
     9000/[34678]??:HP-UX:*:*)
 	HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
 	case "${UNAME_MACHINE}" in
-	    9000/31?)            HP_ARCH=m68000 ;;
-	    9000/[34]??)         HP_ARCH=m68k ;;
+	    9000/31? )            HP_ARCH=m68000 ;;
+	    9000/[34]?? )         HP_ARCH=m68k ;;
 	    9000/[678][0-9][0-9])
 		if [ -x /usr/bin/getconf ]; then
 		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
 		    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
 		    case "${sc_cpu_version}" in
-		      523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0
-		      528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1
+		      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
+		      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
 		      532)                      # CPU_PA_RISC2_0
 			case "${sc_kernel_bits}" in
-			  32) HP_ARCH=hppa2.0n ;;
-			  64) HP_ARCH=hppa2.0w ;;
-			  '') HP_ARCH=hppa2.0 ;;   # HP-UX 10.20
+			  32) HP_ARCH="hppa2.0n" ;;
+			  64) HP_ARCH="hppa2.0w" ;;
+			  '') HP_ARCH="hppa2.0" ;;   # HP-UX 10.20
 			esac ;;
 		    esac
 		fi
@@ -684,11 +647,11 @@
 		    exit (0);
 		}
 EOF
-		    (CCOPTS="" $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
+		    (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
 		    test -z "$HP_ARCH" && HP_ARCH=hppa
 		fi ;;
 	esac
-	if [ ${HP_ARCH} = hppa2.0w ]
+	if [ ${HP_ARCH} = "hppa2.0w" ]
 	then
 	    eval $set_cc_for_build
 
@@ -701,12 +664,12 @@
 	    # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
 	    # => hppa64-hp-hpux11.23
 
-	    if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) |
+	    if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
 		grep -q __LP64__
 	    then
-		HP_ARCH=hppa2.0w
+		HP_ARCH="hppa2.0w"
 	    else
-		HP_ARCH=hppa64
+		HP_ARCH="hppa64"
 	    fi
 	fi
 	echo ${HP_ARCH}-hp-hpux${HPUX_REV}
@@ -746,7 +709,7 @@
 		{ echo "$SYSTEM_NAME"; exit; }
 	echo unknown-hitachi-hiuxwe2
 	exit ;;
-    9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*)
+    9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
 	echo hppa1.1-hp-bsd
 	exit ;;
     9000/8??:4.3bsd:*:*)
@@ -755,7 +718,7 @@
     *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
 	echo hppa1.0-hp-mpeix
 	exit ;;
-    hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*)
+    hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
 	echo hppa1.1-hp-osf
 	exit ;;
     hp8??:OSF1:*:*)
@@ -811,14 +774,14 @@
 	echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
 	exit ;;
     F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
-	FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
-	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
+	FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
 	FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
 	echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
 	exit ;;
     5000:UNIX_System_V:4.*:*)
-	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
-	FUJITSU_REL=`echo ${UNAME_RELEASE} | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'`
+	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+	FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
 	echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
 	exit ;;
     i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
@@ -834,11 +797,10 @@
 	UNAME_PROCESSOR=`/usr/bin/uname -p`
 	case ${UNAME_PROCESSOR} in
 	    amd64)
-		UNAME_PROCESSOR=x86_64 ;;
-	    i386)
-		UNAME_PROCESSOR=i586 ;;
+		echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+	    *)
+		echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
 	esac
-	echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
 	exit ;;
     i*:CYGWIN*:*)
 	echo ${UNAME_MACHINE}-pc-cygwin
@@ -849,9 +811,13 @@
     *:MINGW*:*)
 	echo ${UNAME_MACHINE}-pc-mingw32
 	exit ;;
-    *:MSYS*:*)
+    i*:MSYS*:*)
 	echo ${UNAME_MACHINE}-pc-msys
 	exit ;;
+    i*:windows32*:*)
+	# uname -m includes "-pc" on this system.
+	echo ${UNAME_MACHINE}-mingw32
+	exit ;;
     i*:PW*:*)
 	echo ${UNAME_MACHINE}-pc-pw32
 	exit ;;
@@ -867,32 +833,47 @@
 		echo ia64-unknown-interix${UNAME_RELEASE}
 		exit ;;
 	esac ;;
+    [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
+	echo i${UNAME_MACHINE}-pc-mks
+	exit ;;
+    8664:Windows_NT:*)
+	echo x86_64-pc-mks
+	exit ;;
+    i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
+	# How do we know it's Interix rather than the generic POSIX subsystem?
+	# It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
+	# UNAME_MACHINE based on the output of uname instead of i386?
+	echo i586-pc-interix
+	exit ;;
     i*:UWIN*:*)
 	echo ${UNAME_MACHINE}-pc-uwin
 	exit ;;
     amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
 	echo x86_64-unknown-cygwin
 	exit ;;
+    p*:CYGWIN*:*)
+	echo powerpcle-unknown-cygwin
+	exit ;;
     prep*:SunOS:5.*:*)
 	echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
 	exit ;;
     *:GNU:*:*)
 	# the GNU system
-	echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
+	echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
 	exit ;;
     *:GNU/*:*:*)
 	# other systems with GNU libc and userland
-	echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}
+	echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu
 	exit ;;
     i*86:Minix:*:*)
 	echo ${UNAME_MACHINE}-pc-minix
 	exit ;;
     aarch64:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     aarch64_be:Linux:*:*)
 	UNAME_MACHINE=aarch64_be
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     alpha:Linux:*:*)
 	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
@@ -905,60 +886,59 @@
 	  EV68*) UNAME_MACHINE=alphaev68 ;;
 	esac
 	objdump --private-headers /bin/sh | grep -q ld.so.1
-	if test "$?" = 0 ; then LIBC=gnulibc1 ; fi
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
-	exit ;;
-    arc:Linux:*:* | arceb:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
+	echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
 	exit ;;
     arm*:Linux:*:*)
 	eval $set_cc_for_build
 	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
 	    | grep -q __ARM_EABI__
 	then
-	    echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	    echo ${UNAME_MACHINE}-unknown-linux-gnu
 	else
 	    if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
 		| grep -q __ARM_PCS_VFP
 	    then
-		echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi
+		echo ${UNAME_MACHINE}-unknown-linux-gnueabi
 	    else
-		echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabihf
+		echo ${UNAME_MACHINE}-unknown-linux-gnueabihf
 	    fi
 	fi
 	exit ;;
     avr32*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     cris:Linux:*:*)
-	echo ${UNAME_MACHINE}-axis-linux-${LIBC}
+	echo ${UNAME_MACHINE}-axis-linux-gnu
 	exit ;;
     crisv32:Linux:*:*)
-	echo ${UNAME_MACHINE}-axis-linux-${LIBC}
-	exit ;;
-    e2k:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo ${UNAME_MACHINE}-axis-linux-gnu
 	exit ;;
     frv:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     hexagon:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     i*86:Linux:*:*)
-	echo ${UNAME_MACHINE}-pc-linux-${LIBC}
+	LIBC=gnu
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+	#ifdef __dietlibc__
+	LIBC=dietlibc
+	#endif
+EOF
+	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
+	echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
 	exit ;;
     ia64:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
-	exit ;;
-    k1om:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     m32r*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     m68*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     mips:Linux:*:* | mips64:Linux:*:*)
 	eval $set_cc_for_build
@@ -977,69 +957,57 @@
 	#endif
 EOF
 	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
-	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
+	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
 	;;
-    mips64el:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
-	exit ;;
-    openrisc*:Linux:*:*)
-	echo or1k-unknown-linux-${LIBC}
-	exit ;;
-    or32:Linux:*:* | or1k*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+    or32:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     padre:Linux:*:*)
-	echo sparc-unknown-linux-${LIBC}
+	echo sparc-unknown-linux-gnu
 	exit ;;
     parisc64:Linux:*:* | hppa64:Linux:*:*)
-	echo hppa64-unknown-linux-${LIBC}
+	echo hppa64-unknown-linux-gnu
 	exit ;;
     parisc:Linux:*:* | hppa:Linux:*:*)
 	# Look for CPU level
 	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
-	  PA7*) echo hppa1.1-unknown-linux-${LIBC} ;;
-	  PA8*) echo hppa2.0-unknown-linux-${LIBC} ;;
-	  *)    echo hppa-unknown-linux-${LIBC} ;;
+	  PA7*) echo hppa1.1-unknown-linux-gnu ;;
+	  PA8*) echo hppa2.0-unknown-linux-gnu ;;
+	  *)    echo hppa-unknown-linux-gnu ;;
 	esac
 	exit ;;
     ppc64:Linux:*:*)
-	echo powerpc64-unknown-linux-${LIBC}
-	exit ;;
-    ppc:Linux:*:*)
-	echo powerpc-unknown-linux-${LIBC}
+	echo powerpc64-unknown-linux-gnu
 	exit ;;
     ppc64le:Linux:*:*)
-	echo powerpc64le-unknown-linux-${LIBC}
+	echo powerpc64le-unknown-linux-gnu
 	exit ;;
-    ppcle:Linux:*:*)
-	echo powerpcle-unknown-linux-${LIBC}
-	exit ;;
-    riscv32:Linux:*:* | riscv64:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+    ppc:Linux:*:*)
+	echo powerpc-unknown-linux-gnu
 	exit ;;
     s390:Linux:*:* | s390x:Linux:*:*)
-	echo ${UNAME_MACHINE}-ibm-linux-${LIBC}
+	echo ${UNAME_MACHINE}-ibm-linux
 	exit ;;
     sh64*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     sh*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     sparc:Linux:*:* | sparc64:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     tile*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     vax:Linux:*:*)
-	echo ${UNAME_MACHINE}-dec-linux-${LIBC}
+	echo ${UNAME_MACHINE}-dec-linux-gnu
 	exit ;;
     x86_64:Linux:*:*)
-	echo ${UNAME_MACHINE}-pc-linux-${LIBC}
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     xtensa*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     i*86:DYNIX/ptx:4*:*)
 	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
@@ -1075,7 +1043,7 @@
     i*86:*DOS:*:*)
 	echo ${UNAME_MACHINE}-pc-msdosdjgpp
 	exit ;;
-    i*86:*:4.*:*)
+    i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*)
 	UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
 	if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
 		echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL}
@@ -1115,7 +1083,7 @@
 	# uname -m prints for DJGPP always 'pc', but it prints nothing about
 	# the processor, so we play safe by assuming i586.
 	# Note: whatever this is, it MUST be the same as what config.sub
-	# prints for the "djgpp" host, or else GDB configure will decide that
+	# prints for the "djgpp" host, or else GDB configury will decide that
 	# this is a cross-build.
 	echo i586-pc-msdosdjgpp
 	exit ;;
@@ -1264,9 +1232,6 @@
     SX-8R:SUPER-UX:*:*)
 	echo sx8r-nec-superux${UNAME_RELEASE}
 	exit ;;
-    SX-ACE:SUPER-UX:*:*)
-	echo sxace-nec-superux${UNAME_RELEASE}
-	exit ;;
     Power*:Rhapsody:*:*)
 	echo powerpc-apple-rhapsody${UNAME_RELEASE}
 	exit ;;
@@ -1275,43 +1240,24 @@
 	exit ;;
     *:Darwin:*:*)
 	UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
-	eval $set_cc_for_build
-	if test "$UNAME_PROCESSOR" = unknown ; then
-	    UNAME_PROCESSOR=powerpc
-	fi
-	if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then
-	    if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
-		if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
-		       (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
-		       grep IS_64BIT_ARCH >/dev/null
-		then
-		    case $UNAME_PROCESSOR in
-			i386) UNAME_PROCESSOR=x86_64 ;;
-			powerpc) UNAME_PROCESSOR=powerpc64 ;;
-		    esac
-		fi
-		# On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc
-		if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \
-		       (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
-		       grep IS_PPC >/dev/null
-		then
-		    UNAME_PROCESSOR=powerpc
-		fi
-	    fi
-	elif test "$UNAME_PROCESSOR" = i386 ; then
-	    # Avoid executing cc on OS X 10.9, as it ships with a stub
-	    # that puts up a graphical alert prompting to install
-	    # developer tools.  Any system running Mac OS X 10.7 or
-	    # later (Darwin 11 and later) is required to have a 64-bit
-	    # processor. This is not true of the ARM version of Darwin
-	    # that Apple uses in portable devices.
-	    UNAME_PROCESSOR=x86_64
-	fi
+	case $UNAME_PROCESSOR in
+	    i386)
+		eval $set_cc_for_build
+		if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+		  if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
+		      (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+		      grep IS_64BIT_ARCH >/dev/null
+		  then
+		      UNAME_PROCESSOR="x86_64"
+		  fi
+		fi ;;
+	    unknown) UNAME_PROCESSOR=powerpc ;;
+	esac
 	echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
 	exit ;;
     *:procnto*:*:* | *:QNX:[0123456789]*:*)
 	UNAME_PROCESSOR=`uname -p`
-	if test "$UNAME_PROCESSOR" = x86; then
+	if test "$UNAME_PROCESSOR" = "x86"; then
 		UNAME_PROCESSOR=i386
 		UNAME_MACHINE=pc
 	fi
@@ -1320,18 +1266,15 @@
     *:QNX:*:4*)
 	echo i386-pc-qnx
 	exit ;;
-    NEO-*:NONSTOP_KERNEL:*:*)
+    NEO-?:NONSTOP_KERNEL:*:*)
 	echo neo-tandem-nsk${UNAME_RELEASE}
 	exit ;;
     NSE-*:NONSTOP_KERNEL:*:*)
 	echo nse-tandem-nsk${UNAME_RELEASE}
 	exit ;;
-    NSR-*:NONSTOP_KERNEL:*:*)
+    NSR-?:NONSTOP_KERNEL:*:*)
 	echo nsr-tandem-nsk${UNAME_RELEASE}
 	exit ;;
-    NSX-*:NONSTOP_KERNEL:*:*)
-	echo nsx-tandem-nsk${UNAME_RELEASE}
-	exit ;;
     *:NonStop-UX:*:*)
 	echo mips-compaq-nonstopux
 	exit ;;
@@ -1345,7 +1288,7 @@
 	# "uname -m" is not consistent, so use $cputype instead. 386
 	# is converted to i386 for consistency with other x86
 	# operating systems.
-	if test "$cputype" = 386; then
+	if test "$cputype" = "386"; then
 	    UNAME_MACHINE=i386
 	else
 	    UNAME_MACHINE="$cputype"
@@ -1387,7 +1330,7 @@
 	echo i386-pc-xenix
 	exit ;;
     i*86:skyos:*:*)
-	echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE} | sed -e 's/ .*$//'`
+	echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//'
 	exit ;;
     i*86:rdos:*:*)
 	echo ${UNAME_MACHINE}-pc-rdos
@@ -1398,37 +1341,171 @@
     x86_64:VMkernel:*:*)
 	echo ${UNAME_MACHINE}-unknown-esx
 	exit ;;
-    amd64:Isilon\ OneFS:*:*)
-	echo x86_64-unknown-onefs
-	exit ;;
 esac
 
-echo "$0: unable to guess system type" >&2
+eval $set_cc_for_build
+cat >$dummy.c <<EOF
+#ifdef _SEQUENT_
+# include <sys/types.h>
+# include <sys/utsname.h>
+#endif
+main ()
+{
+#if defined (sony)
+#if defined (MIPSEB)
+  /* BFD wants "bsd" instead of "newsos".  Perhaps BFD should be changed,
+     I don't know....  */
+  printf ("mips-sony-bsd\n"); exit (0);
+#else
+#include <sys/param.h>
+  printf ("m68k-sony-newsos%s\n",
+#ifdef NEWSOS4
+	"4"
+#else
+	""
+#endif
+	); exit (0);
+#endif
+#endif
+
+#if defined (__arm) && defined (__acorn) && defined (__unix)
+  printf ("arm-acorn-riscix\n"); exit (0);
+#endif
+
+#if defined (hp300) && !defined (hpux)
+  printf ("m68k-hp-bsd\n"); exit (0);
+#endif
+
+#if defined (NeXT)
+#if !defined (__ARCHITECTURE__)
+#define __ARCHITECTURE__ "m68k"
+#endif
+  int version;
+  version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
+  if (version < 4)
+    printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
+  else
+    printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
+  exit (0);
+#endif
+
+#if defined (MULTIMAX) || defined (n16)
+#if defined (UMAXV)
+  printf ("ns32k-encore-sysv\n"); exit (0);
+#else
+#if defined (CMU)
+  printf ("ns32k-encore-mach\n"); exit (0);
+#else
+  printf ("ns32k-encore-bsd\n"); exit (0);
+#endif
+#endif
+#endif
+
+#if defined (__386BSD__)
+  printf ("i386-pc-bsd\n"); exit (0);
+#endif
+
+#if defined (sequent)
+#if defined (i386)
+  printf ("i386-sequent-dynix\n"); exit (0);
+#endif
+#if defined (ns32000)
+  printf ("ns32k-sequent-dynix\n"); exit (0);
+#endif
+#endif
 
-case "${UNAME_MACHINE}:${UNAME_SYSTEM}" in
-    mips:Linux | mips64:Linux)
-	# If we got here on MIPS GNU/Linux, output extra information.
-	cat >&2 <<EOF
+#if defined (_SEQUENT_)
+    struct utsname un;
 
-NOTE: MIPS GNU/Linux systems require a C compiler to fully recognize
-the system type. Please install a C compiler and try again.
+    uname(&un);
+
+    if (strncmp(un.version, "V2", 2) == 0) {
+	printf ("i386-sequent-ptx2\n"); exit (0);
+    }
+    if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
+	printf ("i386-sequent-ptx1\n"); exit (0);
+    }
+    printf ("i386-sequent-ptx\n"); exit (0);
+
+#endif
+
+#if defined (vax)
+# if !defined (ultrix)
+#  include <sys/param.h>
+#  if defined (BSD)
+#   if BSD == 43
+      printf ("vax-dec-bsd4.3\n"); exit (0);
+#   else
+#    if BSD == 199006
+      printf ("vax-dec-bsd4.3reno\n"); exit (0);
+#    else
+      printf ("vax-dec-bsd\n"); exit (0);
+#    endif
+#   endif
+#  else
+    printf ("vax-dec-bsd\n"); exit (0);
+#  endif
+# else
+    printf ("vax-dec-ultrix\n"); exit (0);
+# endif
+#endif
+
+#if defined (alliant) && defined (i860)
+  printf ("i860-alliant-bsd\n"); exit (0);
+#endif
+
+  exit (1);
+}
 EOF
-	;;
-esac
+
+$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` &&
+	{ echo "$SYSTEM_NAME"; exit; }
+
+# Apollos put the system type in the environment.
+
+test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; }
+
+# Convex versions that predate uname can use getsysinfo(1)
+
+if [ -x /usr/convex/getsysinfo ]
+then
+    case `getsysinfo -f cpu_type` in
+    c1*)
+	echo c1-convex-bsd
+	exit ;;
+    c2*)
+	if getsysinfo -f scalar_acc
+	then echo c32-convex-bsd
+	else echo c2-convex-bsd
+	fi
+	exit ;;
+    c34*)
+	echo c34-convex-bsd
+	exit ;;
+    c38*)
+	echo c38-convex-bsd
+	exit ;;
+    c4*)
+	echo c4-convex-bsd
+	exit ;;
+    esac
+fi
 
 cat >&2 <<EOF
+$0: unable to guess system type
 
-This script (version $timestamp), has failed to recognize the
-operating system you are using. If your script is old, overwrite *all*
-copies of config.guess and config.sub with the latest versions from:
+This script, last modified $timestamp, has failed to recognize
+the operating system you are using. It is advised that you
+download the most up to date version of the config scripts from
 
-  https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
 and
-  https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
 
-If $0 has already been updated, send the following data and any
-information you think might be pertinent to config-patches@gnu.org to
-provide the necessary information to handle your system.
+If the version you run ($0) is already up to date, please
+send the following data and any information you think might be
+pertinent to <config-patches@gnu.org> in order to provide the needed
+information to handle your system.
 
 config.guess timestamp = $timestamp
 
@@ -1456,7 +1533,7 @@
 exit 1
 
 # Local variables:
-# eval: (add-hook 'write-file-functions 'time-stamp)
+# eval: (add-hook 'write-file-hooks 'time-stamp)
 # time-stamp-start: "timestamp='"
 # time-stamp-format: "%:y-%02m-%02d"
 # time-stamp-end: "'"
diff -Nru x264-0.152.2854+gite9a5903/config.sub x264-0.158.2988+git-20191101.7817004/config.sub
--- x264-0.152.2854+gite9a5903/config.sub	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/config.sub	2019-11-09 05:16:29.000000000 +0000
@@ -1,31 +1,36 @@
 #! /bin/sh
 # Configuration validation subroutine script.
-#   Copyright 1992-2017 Free Software Foundation, Inc.
-
-timestamp='2017-11-23'
-
-# This file is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or
+#   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+#   2011, 2012 Free Software Foundation, Inc.
+
+timestamp='2012-12-06'
+
+# This file is (in principle) common to ALL GNU software.
+# The presence of a machine in this file suggests that SOME GNU software
+# can handle that machine.  It does not imply ALL GNU software can.
+#
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
 # (at your option) any later version.
 #
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, see <https://www.gnu.org/licenses/>.
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
 #
 # As a special exception to the GNU General Public License, if you
 # distribute this file as part of a program that contains a
 # configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that
-# program.  This Exception is an additional permission under section 7
-# of the GNU General Public License, version 3 ("GPLv3").
+# the same distribution terms that you use for the rest of that program.
 
 
-# Please send patches to <config-patches@gnu.org>.
+# Please send patches to <config-patches@gnu.org>.  Submit a context
+# diff and a properly formatted GNU ChangeLog entry.
 #
 # Configuration subroutine to validate and canonicalize a configuration type.
 # Supply the specified configuration type as an argument.
@@ -33,7 +38,7 @@
 # Otherwise, we print the canonical config type on stdout and succeed.
 
 # You can get the latest version of this script from:
-# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
 
 # This file is supposed to be the same for all GNU packages
 # and recognize all the CPU types, system types and aliases
@@ -53,11 +58,12 @@
 me=`echo "$0" | sed -e 's,.*/,,'`
 
 usage="\
-Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS
+Usage: $0 [OPTION] CPU-MFR-OPSYS
+       $0 [OPTION] ALIAS
 
 Canonicalize a configuration name.
 
-Options:
+Operation modes:
   -h, --help         print this help, then exit
   -t, --time-stamp   print date of last modification, then exit
   -v, --version      print version number, then exit
@@ -67,7 +73,9 @@
 version="\
 GNU config.sub ($timestamp)
 
-Copyright 1992-2017 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -116,8 +124,8 @@
 case $maybe_os in
   nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
   linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
-  knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \
-  kopensolaris*-gnu* | cloudabi*-eabi* | \
+  knetbsd*-gnu* | netbsd*-gnu* | \
+  kopensolaris*-gnu* | \
   storm-chaos* | os2-emx* | rtmk-nova*)
     os=-$maybe_os
     basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
@@ -229,6 +237,9 @@
 	-ptx*)
 		basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`
 		;;
+	-windowsnt*)
+		os=`echo $os | sed -e 's/windowsnt/winnt/'`
+		;;
 	-psos*)
 		os=-psos
 		;;
@@ -248,21 +259,19 @@
 	| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
 	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
 	| am33_2.0 \
-	| arc | arceb \
+	| arc \
 	| arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \
 	| avr | avr32 \
-	| ba \
 	| be32 | be64 \
 	| bfin \
-	| c4x | c8051 | clipper \
+	| c4x | clipper \
 	| d10v | d30v | dlx | dsp16xx \
-	| e2k | epiphany \
-	| fido | fr30 | frv | ft32 \
+	| epiphany \
+	| fido | fr30 | frv \
 	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
 	| hexagon \
-	| i370 | i860 | i960 | ia16 | ia64 \
+	| i370 | i860 | i960 | ia64 \
 	| ip2k | iq2000 \
-	| k1om \
 	| le32 | le64 \
 	| lm32 \
 	| m32c | m32r | m32rle | m68000 | m68k | m88k \
@@ -280,30 +289,26 @@
 	| mips64vr5900 | mips64vr5900el \
 	| mipsisa32 | mipsisa32el \
 	| mipsisa32r2 | mipsisa32r2el \
-	| mipsisa32r6 | mipsisa32r6el \
 	| mipsisa64 | mipsisa64el \
 	| mipsisa64r2 | mipsisa64r2el \
-	| mipsisa64r6 | mipsisa64r6el \
 	| mipsisa64sb1 | mipsisa64sb1el \
 	| mipsisa64sr71k | mipsisa64sr71kel \
-	| mipsr5900 | mipsr5900el \
 	| mipstx39 | mipstx39el \
 	| mn10200 | mn10300 \
 	| moxie \
 	| mt \
 	| msp430 \
 	| nds32 | nds32le | nds32be \
-	| nios | nios2 | nios2eb | nios2el \
+	| nios | nios2 \
 	| ns16k | ns32k \
-	| open8 | or1k | or1knd | or32 \
+	| open8 \
+	| or32 \
 	| pdp10 | pdp11 | pj | pjl \
 	| powerpc | powerpc64 | powerpc64le | powerpcle \
-	| pru \
 	| pyramid \
-	| riscv32 | riscv64 \
 	| rl78 | rx \
 	| score \
-	| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[234]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
+	| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
 	| sh64 | sh64le \
 	| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
 	| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
@@ -311,8 +316,7 @@
 	| tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
 	| ubicom32 \
 	| v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
-	| visium \
-	| wasm32 \
+	| we32k \
 	| x86 | xc16x | xstormy16 | xtensa \
 	| z8k | z80)
 		basic_machine=$basic_machine-unknown
@@ -326,10 +330,7 @@
 	c6x)
 		basic_machine=tic6x-unknown
 		;;
-	leon|leon[3-9])
-		basic_machine=sparc-$basic_machine
-		;;
-	m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip)
+	m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip)
 		basic_machine=$basic_machine-unknown
 		os=-none
 		;;
@@ -371,23 +372,21 @@
 	| aarch64-* | aarch64_be-* \
 	| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
 	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
-	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \
+	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
 	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
 	| avr-* | avr32-* \
-	| ba-* \
 	| be32-* | be64-* \
 	| bfin-* | bs2000-* \
 	| c[123]* | c30-* | [cjt]90-* | c4x-* \
-	| c8051-* | clipper-* | craynv-* | cydra-* \
+	| clipper-* | craynv-* | cydra-* \
 	| d10v-* | d30v-* | dlx-* \
-	| e2k-* | elxsi-* \
+	| elxsi-* \
 	| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
 	| h8300-* | h8500-* \
 	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
 	| hexagon-* \
-	| i*86-* | i860-* | i960-* | ia16-* | ia64-* \
+	| i*86-* | i860-* | i960-* | ia64-* \
 	| ip2k-* | iq2000-* \
-	| k1om-* \
 	| le32-* | le64-* \
 	| lm32-* \
 	| m32c-* | m32r-* | m32rle-* \
@@ -407,34 +406,28 @@
 	| mips64vr5900-* | mips64vr5900el-* \
 	| mipsisa32-* | mipsisa32el-* \
 	| mipsisa32r2-* | mipsisa32r2el-* \
-	| mipsisa32r6-* | mipsisa32r6el-* \
 	| mipsisa64-* | mipsisa64el-* \
 	| mipsisa64r2-* | mipsisa64r2el-* \
-	| mipsisa64r6-* | mipsisa64r6el-* \
 	| mipsisa64sb1-* | mipsisa64sb1el-* \
 	| mipsisa64sr71k-* | mipsisa64sr71kel-* \
-	| mipsr5900-* | mipsr5900el-* \
 	| mipstx39-* | mipstx39el-* \
 	| mmix-* \
 	| mt-* \
 	| msp430-* \
 	| nds32-* | nds32le-* | nds32be-* \
-	| nios-* | nios2-* | nios2eb-* | nios2el-* \
+	| nios-* | nios2-* \
 	| none-* | np1-* | ns16k-* | ns32k-* \
 	| open8-* \
-	| or1k*-* \
 	| orion-* \
 	| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
 	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
-	| pru-* \
 	| pyramid-* \
-	| riscv32-* | riscv64-* \
 	| rl78-* | romp-* | rs6000-* | rx-* \
 	| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
 	| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
 	| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
 	| sparclite-* \
-	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx*-* \
+	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \
 	| tahoe-* \
 	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
 	| tile*-* \
@@ -442,8 +435,6 @@
 	| ubicom32-* \
 	| v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
 	| vax-* \
-	| visium-* \
-	| wasm32-* \
 	| we32k-* \
 	| x86-* | x86_64-* | xc16x-* | xps100-* \
 	| xstormy16-* | xtensa*-* \
@@ -520,9 +511,6 @@
 		basic_machine=i386-pc
 		os=-aros
 		;;
-	asmjs)
-		basic_machine=asmjs-unknown
-		;;
 	aux)
 		basic_machine=m68k-apple
 		os=-aux
@@ -639,18 +627,10 @@
 		basic_machine=rs6000-bull
 		os=-bosx
 		;;
-	dpx2*)
+	dpx2* | dpx2*-bull)
 		basic_machine=m68k-bull
 		os=-sysv3
 		;;
-	e500v[12])
-		basic_machine=powerpc-unknown
-		os=$os"spe"
-		;;
-	e500v[12]-*)
-		basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
-		os=$os"spe"
-		;;
 	ebmon29k)
 		basic_machine=a29k-amd
 		os=-ebmon
@@ -792,9 +772,6 @@
 		basic_machine=m68k-isi
 		os=-sysv
 		;;
-	leon-*|leon[3-9]-*)
-		basic_machine=sparc-`echo $basic_machine | sed 's/-.*//'`
-		;;
 	m68knommu)
 		basic_machine=m68k-unknown
 		os=-linux
@@ -822,7 +799,7 @@
 		os=-mingw64
 		;;
 	mingw32)
-		basic_machine=i686-pc
+		basic_machine=i386-pc
 		os=-mingw32
 		;;
 	mingw32ce)
@@ -850,10 +827,6 @@
 		basic_machine=powerpc-unknown
 		os=-morphos
 		;;
-	moxiebox)
-		basic_machine=moxie-unknown
-		os=-moxiebox
-		;;
 	msdos)
 		basic_machine=i386-pc
 		os=-msdos
@@ -862,7 +835,7 @@
 		basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
 		;;
 	msys)
-		basic_machine=i686-pc
+		basic_machine=i386-pc
 		os=-msys
 		;;
 	mvs)
@@ -901,7 +874,7 @@
 		basic_machine=v70-nec
 		os=-sysv
 		;;
-	next | m*-next)
+	next | m*-next )
 		basic_machine=m68k-next
 		case $os in
 		    -nextstep* )
@@ -946,9 +919,6 @@
 	nsr-tandem)
 		basic_machine=nsr-tandem
 		;;
-	nsx-tandem)
-		basic_machine=nsx-tandem
-		;;
 	op50n-* | op60c-*)
 		basic_machine=hppa1.1-oki
 		os=-proelf
@@ -1033,7 +1003,7 @@
 	ppc-* | ppcbe-*)
 		basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
 		;;
-	ppcle | powerpclittle)
+	ppcle | powerpclittle | ppc-le | powerpc-little)
 		basic_machine=powerpcle-unknown
 		;;
 	ppcle-* | powerpclittle-*)
@@ -1043,7 +1013,7 @@
 		;;
 	ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
 		;;
-	ppc64le | powerpc64little)
+	ppc64le | powerpc64little | ppc64-le | powerpc64-little)
 		basic_machine=powerpc64le-unknown
 		;;
 	ppc64le-* | powerpc64little-*)
@@ -1244,9 +1214,6 @@
 		basic_machine=a29k-wrs
 		os=-vxworks
 		;;
-	wasm32)
-		basic_machine=wasm32-unknown
-		;;
 	w65*)
 		basic_machine=w65-wdc
 		os=-none
@@ -1255,9 +1222,6 @@
 		basic_machine=hppa1.1-winbond
 		os=-proelf
 		;;
-	x64)
-		basic_machine=x86_64-pc
-		;;
 	xbox)
 		basic_machine=i686-pc
 		os=-mingw32
@@ -1365,8 +1329,8 @@
 if [ x"$os" != x"" ]
 then
 case $os in
-	# First match some system type aliases that might get confused
-	# with valid system types.
+	# First match some system type aliases
+	# that might get confused with valid system types.
 	# -solaris* is a basic system type, with this one exception.
 	-auroraux)
 		os=-auroraux
@@ -1386,37 +1350,36 @@
 	-gnu/linux*)
 		os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
 		;;
-	# Now accept the basic system types.
+	# First accept the basic system types.
 	# The portable systems comes first.
-	# Each alternative MUST end in a * to match a version number.
+	# Each alternative MUST END IN A *, to match a version number.
 	# -sysv* is not here because it comes later, after sysvr4.
 	-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
 	      | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
 	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
-	      | -sym* | -kopensolaris* | -plan9* \
+	      | -sym* | -kopensolaris* \
 	      | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
-	      | -aos* | -aros* | -cloudabi* | -sortix* \
+	      | -aos* | -aros* \
 	      | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
 	      | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
 	      | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
-	      | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \
+	      | -bitrig* | -openbsd* | -solidbsd* \
 	      | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
 	      | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
 	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
 	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
-	      | -chorusos* | -chorusrdb* | -cegcc* | -glidix* \
+	      | -chorusos* | -chorusrdb* | -cegcc* \
 	      | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
-	      | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
+	      | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
 	      | -linux-newlib* | -linux-musl* | -linux-uclibc* \
-	      | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \
+	      | -uxpv* | -beos* | -mpeix* | -udk* \
 	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
 	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
 	      | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
 	      | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
 	      | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
 	      | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
-	      | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \
-	      | -onefs* | -tirtos* | -phoenix* | -fuchsia* | -redox*)
+	      | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*)
 	# Remember, each alternative MUST END IN *, to match a version number.
 		;;
 	-qnx*)
@@ -1491,7 +1454,7 @@
 	-nova*)
 		os=-rtmk-nova
 		;;
-	-ns2)
+	-ns2 )
 		os=-nextstep2
 		;;
 	-nsk*)
@@ -1540,29 +1503,17 @@
 	-aros*)
 		os=-aros
 		;;
+	-kaos*)
+		os=-kaos
+		;;
 	-zvmoe)
 		os=-zvmoe
 		;;
 	-dicos*)
 		os=-dicos
 		;;
-	-pikeos*)
-		# Until real need of OS specific support for
-		# particular features comes up, bare metal
-		# configurations are quite functional.
-		case $basic_machine in
-		    arm*)
-			os=-eabi
-			;;
-		    *)
-			os=-elf
-			;;
-		esac
-		;;
 	-nacl*)
 		;;
-	-ios)
-		;;
 	-none)
 		;;
 	*)
@@ -1603,9 +1554,6 @@
 	c4x-* | tic4x-*)
 		os=-coff
 		;;
-	c8051-*)
-		os=-elf
-		;;
 	hexagon-*)
 		os=-elf
 		;;
@@ -1658,9 +1606,6 @@
 	sparc-* | *-sun)
 		os=-sunos4.1.1
 		;;
-	pru-*)
-		os=-elf
-		;;
 	*-be)
 		os=-beos
 		;;
@@ -1706,7 +1651,7 @@
 	m88k-omron*)
 		os=-luna
 		;;
-	*-next)
+	*-next )
 		os=-nextstep
 		;;
 	*-sequent)
@@ -1841,7 +1786,7 @@
 exit
 
 # Local variables:
-# eval: (add-hook 'write-file-functions 'time-stamp)
+# eval: (add-hook 'write-file-hooks 'time-stamp)
 # time-stamp-start: "timestamp='"
 # time-stamp-format: "%:y-%02m-%02d"
 # time-stamp-end: "'"
diff -Nru x264-0.152.2854+gite9a5903/configure x264-0.158.2988+git-20191101.7817004/configure
--- x264-0.152.2854+gite9a5903/configure	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/configure	2019-11-09 05:16:29.000000000 +0000
@@ -30,8 +30,8 @@
   --disable-thread         disable multithreaded encoding
   --disable-win32thread    disable win32threads (windows only)
   --disable-interlaced     disable interlaced encoding support
-  --bit-depth=BIT_DEPTH    set output bit depth (8-10) [8]
-  --chroma-format=FORMAT   output chroma format (420, 422, 444, all) [all]
+  --bit-depth=BIT_DEPTH    set output bit depth (8, 10, all) [all]
+  --chroma-format=FORMAT   output chroma format (400, 420, 422, 444, all) [all]
 
 Advanced options:
   --disable-asm            disable platform-specific assembly optimizations
@@ -151,9 +151,9 @@
     done
     echo "int main (void) { $3 return 0; }" >> conftest.c
     if [ $compiler_style = MS ]; then
-        cc_cmd="$CC conftest.c $(cc_cflags $CFLAGS $CHECK_CFLAGS $2) -link $(cl_ldflags $2 $LDFLAGSCLI $LDFLAGS)"
+        cc_cmd="$CC conftest.c $(cc_cflags $CFLAGS $CFLAGSCLI $CHECK_CFLAGS $2) -link $(cl_ldflags $2 $LDFLAGSCLI $LDFLAGS)"
     else
-        cc_cmd="$CC conftest.c $CFLAGS $CHECK_CFLAGS $2 $LDFLAGSCLI $LDFLAGS -o conftest"
+        cc_cmd="$CC conftest.c $CFLAGS $CFLAGSCLI $CHECK_CFLAGS $2 $LDFLAGSCLI $LDFLAGS -o conftest"
     fi
     if $cc_cmd >conftest.log 2>&1; then
         res=$?
@@ -253,6 +253,24 @@
     return $res
 }
 
+pkg_check() {
+    log_check "for packages: $1"
+    pkg_cmd="$PKGCONFIG --exists $1"
+    if $pkg_cmd >conftest.log 2>&1; then
+        res=$?
+        log_ok
+    else
+        res=$?
+        log_fail
+        log_msg "Failed commandline was:"
+        log_msg "--------------------------------------------------"
+        log_msg "$pkg_cmd"
+        cat conftest.log >> config.log
+        log_msg "--------------------------------------------------"
+    fi
+    return $res
+}
+
 define() {
     echo "#define $1$([ -n "$2" ] && echo " $2" || echo " 1")" >> config.h
 }
@@ -276,6 +294,7 @@
         fi
         arg="$(grep '#define X264_BIT_DEPTH ' $x264_config_path | sed -e 's/#define X264_BIT_DEPTH *//; s/ *$//')"
         if [ -n "$arg" ]; then
+            [ "$arg" = 0 ] && arg="all"
             if [ "$arg" != "$bit_depth" ]; then
                 echo "Override output bit depth with system libx264 configuration"
                 bit_depth="$arg"
@@ -353,7 +372,7 @@
 gprof="no"
 strip="no"
 pic="no"
-bit_depth="8"
+bit_depth="all"
 chroma_format="all"
 compiler="GNU"
 compiler_style="GNU"
@@ -361,6 +380,8 @@
 vsx="auto"
 
 CFLAGS="$CFLAGS -Wall -I. -I\$(SRCPATH)"
+CFLAGSSO="$CFLAGSSO"
+CFLAGSCLI="$CFLAGSCLI"
 LDFLAGS="$LDFLAGS"
 LDFLAGSCLI="$LDFLAGSCLI"
 ASFLAGS="$ASFLAGS -I. -I\$(SRCPATH)"
@@ -375,9 +396,9 @@
 "
 
 # list of all preprocessor HAVE values we can define
-CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \
+CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON AARCH64 BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \
              LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC INTEL_DISPATCHER \
-             MSA MMAP WINRT VSX ARM_INLINE_ASM"
+             MSA MMAP WINRT VSX ARM_INLINE_ASM STRTOK_R CLOCK_GETTIME BITDEPTH8 BITDEPTH10"
 
 # parse options
 
@@ -491,16 +512,15 @@
             ;;
         --bit-depth=*)
             bit_depth="$optarg"
-            if [ "$bit_depth" -lt "8" -o "$bit_depth" -gt "10" ]; then
-                echo "Supplied bit depth must be in range [8,10]."
+            if [ "$bit_depth" != "8" -a "$bit_depth" != "10" -a "$bit_depth" != "all" ]; then
+                echo "Supplied bit depth must be 8, 10 or all."
                 exit 1
             fi
-            bit_depth=`expr $bit_depth + 0`
             ;;
         --chroma-format=*)
             chroma_format="$optarg"
-            if [ $chroma_format != "420" -a $chroma_format != "422" -a $chroma_format != "444" -a $chroma_format != "all" ]; then
-                echo "Supplied chroma format must be 420, 422, 444 or all."
+            if [ $chroma_format != "400" -a $chroma_format != "420" -a $chroma_format != "422" -a $chroma_format != "444" -a $chroma_format != "all" ]; then
+                echo "Supplied chroma format must be 400, 420, 422, 444 or all."
                 exit 1
             fi
             ;;
@@ -530,10 +550,10 @@
 fi
 
 if [ "x$host" = x ]; then
-    host=`${SRCPATH}/config.guess`
+    host="$(${SRCPATH}/config.guess)"
 fi
 # normalize a triplet into a quadruplet
-host=`${SRCPATH}/config.sub $host`
+host="$(${SRCPATH}/config.sub $host)"
 
 # split $host
 host_cpu="${host%%-*}"
@@ -544,7 +564,7 @@
 trap 'rm -rf conftest*' EXIT
 
 # test for use of compilers that require specific handling
-cc_base=`basename "$CC"`
+cc_base="$(basename "$CC")"
 QPRE="-"
 if [[ $host_os = mingw* || $host_os = cygwin* ]]; then
     if [[ "$cc_base" = icl || "$cc_base" = icl[\ .]* ]]; then
@@ -720,7 +740,7 @@
         stack_alignment=16
         [ $compiler = GNU ] && CFLAGS="-m64 $CFLAGS" && LDFLAGS="-m64 $LDFLAGS"
         if [ "$SYS" = MACOSX ]; then
-            ASFLAGS="$ASFLAGS -f macho64 -DPIC -DPREFIX"
+            ASFLAGS="$ASFLAGS -f macho64 -DPREFIX"
             if cc_check '' "-arch x86_64"; then
                 CFLAGS="$CFLAGS -arch x86_64"
                 LDFLAGS="$LDFLAGS -arch x86_64"
@@ -780,8 +800,10 @@
                 CFLAGS="$CFLAGS -arch armv7"
                 LDFLAGS="$LDFLAGS -arch armv7"
             fi
-        elif [ "$SYS" = WINDOWS ] ; then
+        elif [ "$SYS" = WINDOWS ] && [ "$compiler" = CL ] ; then
             AS="${AS-${SRCPATH}/tools/gas-preprocessor.pl -arch arm -as-type armasm -force-thumb -- armasm -nologo -ignore 4509}"
+        elif [ "$SYS" = WINDOWS ] ; then
+            AS="${AS-${SRCPATH}/tools/gas-preprocessor.pl -arch arm -as-type clang -force-thumb -- ${CC} -mimplicit-it=always}"
         else
             AS="${AS-${CC}}"
         fi
@@ -789,9 +811,13 @@
     aarch64)
         ARCH="AARCH64"
         stack_alignment=16
-        AS="${AS-${CC}}"
         if [ "$SYS" = MACOSX ] ; then
+            AS="${AS-${CC}}"
             ASFLAGS="$ASFLAGS -DPREFIX -DPIC"
+        elif [ "$SYS" = WINDOWS ] && [ "$compiler" = CL ] ; then
+            AS="${AS-${SRCPATH}/tools/gas-preprocessor.pl -arch aarch64 -as-type armasm -- armasm64 -nologo}"
+        else
+            AS="${AS-${CC}}"
         fi
         ;;
     s390|s390x)
@@ -866,12 +892,17 @@
     if cc_check '' -mpreferred-stack-boundary=6 ; then
         CFLAGS="$CFLAGS -mpreferred-stack-boundary=6"
         stack_alignment=64
-    elif cc_check '' -mpreferred-stack-boundary=5 ; then
-        CFLAGS="$CFLAGS -mpreferred-stack-boundary=5"
-        stack_alignment=32
-    elif [ $stack_alignment -lt 16 ] && cc_check '' -mpreferred-stack-boundary=4 ; then
-        CFLAGS="$CFLAGS -mpreferred-stack-boundary=4"
-        stack_alignment=16
+    elif cc_check '' -mstack-alignment=64 ; then
+        CFLAGS="$CFLAGS -mstack-alignment=64"
+        stack_alignment=64
+    elif [ $stack_alignment -lt 16 ] ; then
+        if cc_check '' -mpreferred-stack-boundary=4 ; then
+            CFLAGS="$CFLAGS -mpreferred-stack-boundary=4"
+            stack_alignment=16
+        elif cc_check '' -mstack-alignment=16 ; then
+            CFLAGS="$CFLAGS -mstack-alignment=16"
+            stack_alignment=16
+        fi
     fi
 elif [ $compiler = ICC -a $ARCH = X86 ]; then
     # icc on linux has various degrees of mod16 stack support
@@ -890,7 +921,7 @@
 
 if [ $asm = auto -a \( $ARCH = X86 -o $ARCH = X86_64 \) ] ; then
     if ! as_check "vmovdqa32 [eax]{k1}{z}, zmm0" ; then
-        VER=`($AS --version || echo no assembler) 2>/dev/null | head -n 1`
+        VER="$( ($AS --version || echo no assembler) 2>/dev/null | head -n 1 )"
         echo "Found $VER"
         echo "Minimum version is nasm-2.13"
         echo "If you really want to compile without asm, configure with --disable-asm."
@@ -921,7 +952,12 @@
 fi
 
 if [ $asm = auto -a $ARCH = AARCH64 ] ; then
-    if  cc_check '' '' '__asm__("cmeq v0.8h, v0.8h, #0");' ; then define HAVE_NEON
+    if [ $compiler = CL ] && cpp_check '' '' 'defined(_M_ARM64)' ; then
+        define HAVE_AARCH64
+        define HAVE_NEON
+    elif cc_check '' '' '__asm__("cmeq v0.8h, v0.8h, #0");' ; then
+        define HAVE_AARCH64
+        define HAVE_NEON
         ASFLAGS="$ASFLAGS -c"
     else
         echo "no NEON support, try adding -mfpu=neon to CFLAGS"
@@ -973,8 +1009,9 @@
 
 if [ "$cli_libx264" = "system" -a "$shared" != "yes" ] ; then
     [ "$static" = "yes" ] && die "Option --system-libx264 can not be used together with --enable-static"
-    if $PKGCONFIG --exists x264 2>/dev/null; then
+    if pkg_check x264 ; then
         X264_LIBS="$($PKGCONFIG --libs x264)"
+        X264_CFLAGS="$($PKGCONFIG --cflags x264)"
         X264_INCLUDE_DIR="${X264_INCLUDE_DIR-$($PKGCONFIG --variable=includedir x264)}"
         configure_system_override "$X264_INCLUDE_DIR" || die "Detection of system libx264 configuration failed"
     else
@@ -1035,10 +1072,21 @@
 fi
 [ "$thread" != "no" ] && define HAVE_THREAD
 
-if cc_check "math.h" "-Werror" "return log2f(2);" ; then
+if cc_check 'math.h' '' 'volatile float x = 2; return log2f(x);' ; then
     define HAVE_LOG2F
 fi
 
+if cc_check 'string.h' '' 'strtok_r(0, 0, 0);' ; then
+    define HAVE_STRTOK_R
+fi
+
+if cc_check 'time.h' '' 'clock_gettime(CLOCK_MONOTONIC, 0);' ; then
+    define HAVE_CLOCK_GETTIME
+elif cc_check 'time.h' '-lrt' 'clock_gettime(CLOCK_MONOTONIC, 0);' ; then
+    define HAVE_CLOCK_GETTIME
+    LDFLAGS="$LDFLAGS -lrt"
+fi
+
 if [ "$SYS" != "WINDOWS" ] && cpp_check "sys/mman.h unistd.h" "" "defined(MAP_PRIVATE)"; then
     define HAVE_MMAP
 fi
@@ -1059,48 +1107,52 @@
 
 if [ "$swscale" = "auto" ] ; then
     swscale="no"
-    if $PKGCONFIG --exists libswscale 2>/dev/null; then
+    if pkg_check 'libswscale libavutil' ; then
         SWSCALE_LIBS="$SWSCALE_LIBS $($PKGCONFIG --libs libswscale libavutil)"
         SWSCALE_CFLAGS="$SWSCALE_CFLAGS $($PKGCONFIG --cflags libswscale libavutil)"
     fi
     [ -z "$SWSCALE_LIBS" ] && SWSCALE_LIBS="-lswscale -lavutil"
 
     if cc_check "libswscale/swscale.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "sws_init_context(0,0,0);" ; then
-        if cpp_check "libavutil/pixdesc.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "defined(AV_PIX_FMT_FLAG_RGB)" ; then
+        if cc_check "libavutil/pixdesc.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "AVComponentDescriptor x; x.depth = 8;" ; then
             swscale="yes"
         else
-            echo "Warning: AV_PIX_FMT_FLAG_RGB is missing from libavutil, update for swscale support"
+            echo "Warning: libswscale is too old"
         fi
     fi
 fi
 
 if [ "$lavf" = "auto" ] ; then
     lavf="no"
-    if $PKGCONFIG --exists libavformat libavcodec libswscale 2>/dev/null; then
-        LAVF_LIBS="$LAVF_LIBS $($PKGCONFIG --libs libavformat libavcodec libavutil libswscale)"
-        LAVF_CFLAGS="$LAVF_CFLAGS $($PKGCONFIG --cflags libavformat libavcodec libavutil libswscale)"
+    if pkg_check 'libavformat libavcodec libavutil' ; then
+        LAVF_LIBS="$LAVF_LIBS $($PKGCONFIG --libs libavformat libavcodec libavutil)"
+        LAVF_CFLAGS="$LAVF_CFLAGS $($PKGCONFIG --cflags libavformat libavcodec libavutil)"
     fi
-    if [ -z "$LAVF_LIBS" -a -z "$LAVF_CFLAGS" ]; then
+    if [ -z "$LAVF_LIBS" ] && cc_check '' -lavformat ; then
         LAVF_LIBS="-lavformat"
-        for lib in -lpostproc -lavcodec -lswscale -lavutil -lm -lz -lbz2 $libpthread -lavifil32 -lws2_32; do
+        for lib in -lavcodec -lavresample -lswresample -lavutil -lbz2 -lz $libpthread -lole32 -luser32 -lws2_32 -lsecur32 ; do
             cc_check "" $lib && LAVF_LIBS="$LAVF_LIBS $lib"
         done
     fi
-    LAVF_LIBS="-L. $LAVF_LIBS"
-    if cc_check libavformat/avformat.h "$LAVF_CFLAGS $LAVF_LIBS" "av_frame_free(0);" ; then
-        if [ "$swscale" = "yes" ]; then
+
+    if cc_check libavformat/avformat.h "$LAVF_CFLAGS $LAVF_LIBS" "av_register_all();" ; then
+        if cc_check libavcodec/avcodec.h "$LAVF_CFLAGS $LAVF_LIBS" "avcodec_send_packet(0,0);" ; then
             lavf="yes"
         else
-            echo "Warning: libavformat is not supported without swscale support"
+            echo "Warning: libavformat is too old"
         fi
     fi
+    if [ "$lavf" = "yes" -a "$swscale" = "no" ]; then
+        echo "Warning: libavformat is not supported without swscale support"
+        lavf="no"
+    fi
 fi
 
 if [ "$ffms" = "auto" ] ; then
     ffms_major="2"; ffms_minor="21"; ffms_micro="0"; ffms_bump="0"
     ffms="no"
 
-    if $PKGCONFIG --exists ffms2 2>/dev/null; then
+    if pkg_check ffms2 ; then
         FFMS2_LIBS="$FFMS2_LIBS $($PKGCONFIG --libs ffms2)"
         FFMS2_CFLAGS="$FFMS2_CFLAGS $($PKGCONFIG --cflags ffms2)"
     fi
@@ -1142,13 +1194,13 @@
 
 if [ "$lsmash" = "auto" ] ; then
     lsmash="no"
-    if $PKGCONFIG --exists liblsmash 2>/dev/null; then
+    if pkg_check liblsmash ; then
         LSMASH_LIBS="$LSMASH_LIBS $($PKGCONFIG --libs liblsmash)"
         LSMASH_CFLAGS="$LSMASH_CFLAGS $($PKGCONFIG --cflags liblsmash)"
     fi
     [ -z "$LSMASH_LIBS" ] && LSMASH_LIBS="-llsmash"
 
-    if cc_check lsmash.h "$LSMASH_CFLAGS $LSMASH_LIBS" ; then
+    if cc_check lsmash.h "$LSMASH_CFLAGS $LSMASH_LIBS" "lsmash_destroy_root(0);" ; then
         if cpp_check lsmash.h "$LSMASH_CFLAGS" "LSMASH_VERSION_MAJOR > 1 || (LSMASH_VERSION_MAJOR == 1 && LSMASH_VERSION_MINOR >= 5)" ; then
             lsmash="yes"
         else
@@ -1165,7 +1217,7 @@
         cc_check "" -lws2_32 && GPAC_LIBS="$GPAC_LIBS -lws2_32"
         cc_check "" -lwinmm && GPAC_LIBS="$GPAC_LIBS -lwinmm"
     fi
-    if cc_check gpac/isomedia.h "$GPAC_LIBS" ; then
+    if cc_check gpac/isomedia.h "$GPAC_LIBS" "gf_isom_close(0);" ; then
         if cc_check gpac/isomedia.h "$GPAC_LIBS" "gf_isom_set_pixel_aspect_ratio(0,0,0,0,0);" ; then
             gpac="yes"
         else
@@ -1181,8 +1233,8 @@
     define HAVE_LSMASH
 elif [ "$gpac" = "yes" ] ; then
     mp4="gpac"
-    define HAVE_GPAC
     LDFLAGSCLI="$GPAC_LIBS $LDFLAGSCLI"
+    define HAVE_GPAC
 fi
 
 if [ "$avs" = "auto" ] ; then
@@ -1206,7 +1258,7 @@
 
 if [ "$pic" = "yes" ] ; then
     [ "$SYS" != WINDOWS -a "$SYS" != CYGWIN ] && CFLAGS="$CFLAGS -fPIC"
-    ASFLAGS="$ASFLAGS -DPIC"
+    [[ "$ASFLAGS" != *"-DPIC"* ]] && ASFLAGS="$ASFLAGS -DPIC"
     # resolve textrels in the x86 asm
     cc_check stdio.h "-shared -Wl,-Bsymbolic" && SOFLAGS="$SOFLAGS -Wl,-Bsymbolic"
     [ $SYS = SunOS -a "$ARCH" = "X86" ] && SOFLAGS="$SOFLAGS -mimpure-text"
@@ -1261,26 +1313,30 @@
     CFLAGS="-Wno-maybe-uninitialized $CFLAGS"
 fi
 
+if [ $compiler = GNU ] && cc_check '' -fvisibility=hidden ; then
+    CFLAGS="$CFLAGS -fvisibility=hidden"
+fi
+
 if [ $compiler = ICC -o $compiler = ICL ] ; then
     if cc_check 'extras/intel_dispatcher.h' '' 'x264_intel_dispatcher_override();' ; then
         define HAVE_INTEL_DISPATCHER
     fi
 fi
 
-if [ "$bit_depth" -gt "8" ]; then
-    define HIGH_BIT_DEPTH
-    ASFLAGS="$ASFLAGS -DHIGH_BIT_DEPTH=1"
+if [ "$bit_depth" = "all" ]; then
+    define HAVE_BITDEPTH8
+    define HAVE_BITDEPTH10
+elif [ "$bit_depth" -eq "8" ]; then
+    define HAVE_BITDEPTH8
+elif [ "$bit_depth" -eq "10" ]; then
+    define HAVE_BITDEPTH10
     opencl="no"
-else
-    ASFLAGS="$ASFLAGS -DHIGH_BIT_DEPTH=0"
 fi
 
 if [ "$chroma_format" != "all" ]; then
     define CHROMA_FORMAT CHROMA_$chroma_format
 fi
 
-ASFLAGS="$ASFLAGS -DBIT_DEPTH=$bit_depth"
-
 [ $gpl = yes ] && define HAVE_GPL && x264_gpl=1 || x264_gpl=0
 
 [ $interlaced = yes ] && define HAVE_INTERLACED && x264_interlaced=1 || x264_interlaced=0
@@ -1291,10 +1347,10 @@
     # cygwin can use opencl if it can use LoadLibrary
     if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibraryW(0);") ; then
         opencl="yes"
-        define HAVE_OPENCL
+        define HAVE_OPENCL "(BIT_DEPTH==8)"
     elif [ "$SYS" = "LINUX" -o "$SYS" = "MACOSX" ] ; then
         opencl="yes"
-        define HAVE_OPENCL
+        define HAVE_OPENCL "(BIT_DEPTH==8)"
         libdl="-ldl"
     fi
     LDFLAGS="$LDFLAGS $libdl"
@@ -1302,28 +1358,38 @@
 
 #define undefined vars as 0
 for var in $CONFIG_HAVE; do
-    grep -q "HAVE_$var 1" config.h || define HAVE_$var 0
+    grep -q "HAVE_$var " config.h || define HAVE_$var 0
 done
 
 # generate exported config file
 
-config_chroma_format="X264_CSP_I$chroma_format"
-[ "$config_chroma_format" == "X264_CSP_Iall" ] && config_chroma_format="0"
+[ "$bit_depth" = "all" ] && config_bit_depth="0" || config_bit_depth="$bit_depth"
+[ "$chroma_format" = "all" ] && config_chroma_format="0" || config_chroma_format="X264_CSP_I$chroma_format"
 cat > x264_config.h << EOF
-#define X264_BIT_DEPTH     $bit_depth
 #define X264_GPL           $x264_gpl
 #define X264_INTERLACED    $x264_interlaced
+#define X264_BIT_DEPTH     $config_bit_depth
 #define X264_CHROMA_FORMAT $config_chroma_format
 EOF
 
 ${SRCPATH}/version.sh >> x264_config.h
 
+if [ "$shared" = "yes" ]; then
+    CFLAGSSO="$CFLAGSSO -DX264_API_EXPORTS"
+fi
+
 if [ "$cli_libx264" = "system" ] ; then
     if [ "$shared" = "yes" ]; then
-        CLI_LIBX264='$(SONAME)'
+        if [ "$SYS" = "WINDOWS" -o "$SYS" = "CYGWIN" ]; then
+            CLI_LIBX264='$(IMPLIBNAME)'
+        else
+            CLI_LIBX264='$(SONAME)'
+        fi
+        CFLAGSCLI="$CFLAGSCLI -DX264_API_IMPORTS"
     else
         CLI_LIBX264=
         LDFLAGSCLI="$X264_LIBS $LDFLAGSCLI"
+        CFLAGSCLI="$CFLAGSCLI $X264_CFLAGS"
         cc_check 'stdint.h x264.h' '' 'x264_encoder_open(0);' || die "System libx264 can't be used for compilation of this version"
     fi
 else
@@ -1362,7 +1428,11 @@
     LIBX264=libx264.a
     [ -n "$RC" ] && RCFLAGS="$RCFLAGS -I. -o "
 fi
-[ $compiler != GNU ] && CFLAGS="$(cc_cflags $CFLAGS)"
+if [ $compiler != GNU ]; then
+    CFLAGS="$(cc_cflags $CFLAGS)"
+    CFLAGSSO="$(cc_cflags $CFLAGSSO)"
+    CFLAGSCLI="$(cc_cflags $CFLAGSCLI)"
+fi
 if [ $compiler = ICC -o $compiler = ICL ]; then
     # icc does not define __SSE__ until SSE2 optimization and icl never defines it or _M_IX86_FP
     [ \( $ARCH = X86_64 -o $ARCH = X86 \) -a $asm = yes ] && ! cpp_check "" "" "defined(__SSE__)" && define __SSE__
@@ -1401,13 +1471,17 @@
 SYS=$SYS
 CC=$CC
 CFLAGS=$CFLAGS
+CFLAGSSO=$CFLAGSSO
+CFLAGSCLI=$CFLAGSCLI
 COMPILER=$compiler
 COMPILER_STYLE=$compiler_style
 DEPMM=$DEPMM
 DEPMT=$DEPMT
 LD=$LD
 LDFLAGS=$LDFLAGS
+LDFLAGSCLI=$LDFLAGSCLI
 LIBX264=$LIBX264
+CLI_LIBX264=$CLI_LIBX264
 AR=$AR
 RANLIB=$RANLIB
 STRIP=$STRIP
@@ -1427,8 +1501,9 @@
 EOF
 
 if [ $compiler_style = MS ]; then
-    echo '%.o: %.c' >> config.mak
-    echo '	$(CC) $(CFLAGS) -c -Fo$@ $<' >> config.mak
+    echo 'CC_O=-Fo$@' >> config.mak
+else
+    echo 'CC_O=-o $@' >> config.mak
 fi
 
 if [ "$cli" = "yes" ]; then
@@ -1442,14 +1517,7 @@
         echo "SONAME=libx264-$API.dll" >> config.mak
         if [ $compiler_style = MS ]; then
             echo 'IMPLIBNAME=libx264.dll.lib' >> config.mak
-            # GNU ld on windows defaults to exporting all global functions if there are no explicit __declspec(dllexport) declarations
-            # MSVC link does not act similarly, so it is required to make an export definition out of x264.h and use it at link time
-            echo "SOFLAGS=-dll -def:x264.def -implib:\$(IMPLIBNAME) $SOFLAGS" >> config.mak
-            echo "EXPORTS" > x264.def
-            # export API functions
-            grep "^\(int\|void\|x264_t\).*x264" ${SRCPATH}/x264.h | sed -e "s/.*\(x264.*\)(.*/\1/;s/open/open_$API/g" >> x264.def
-            # export API variables/data. must be flagged with the DATA keyword
-            grep "extern.*x264" ${SRCPATH}/x264.h | sed -e "s/.*\(x264\w*\)\W.*/\1 DATA/;" >> x264.def
+            echo "SOFLAGS=-dll -implib:\$(IMPLIBNAME) $SOFLAGS" >> config.mak
         else
             echo 'IMPLIBNAME=libx264.dll.a' >> config.mak
             echo "SOFLAGS=-shared -Wl,--out-implib,\$(IMPLIBNAME) $SOFLAGS" >> config.mak
@@ -1476,9 +1544,6 @@
     echo 'install: install-lib-static' >> config.mak
 fi
 
-echo "LDFLAGSCLI = $LDFLAGSCLI" >> config.mak
-echo "CLI_LIBX264 = $CLI_LIBX264" >> config.mak
-
 cat > x264.pc << EOF
 prefix=$prefix
 exec_prefix=$exec_prefix
@@ -1487,16 +1552,14 @@
 
 Name: x264
 Description: H.264 (MPEG4 AVC) encoder library
-Version: $(grep POINTVER < x264_config.h | sed -e 's/.* "//; s/".*//')
+Version: $(grep POINTVER < x264_config.h | sed -e 's/.* "//; s/".*//; s/ .*//')
 Libs: -L$libdir -lx264 $([ "$shared" = "yes" ] || echo $libpthread $libm $libdl)
 Libs.private: $([ "$shared" = "yes" ] && echo $libpthread $libm $libdl)
-Cflags: -I$includedir
+Cflags: -I$includedir $([ "$shared" = "yes" ] && echo "-DX264_API_IMPORTS")
 EOF
 
 filters="crop select_every"
-gpl_filters=""
 [ $swscale = yes ] && filters="resize $filters"
-[ $gpl = yes ] && filters="$filters $gpl_filters"
 
 cat > conftest.log <<EOF
 platform:      $ARCH
diff -Nru x264-0.152.2854+gite9a5903/debian/changelog x264-0.158.2988+git-20191101.7817004/debian/changelog
--- x264-0.152.2854+gite9a5903/debian/changelog	2018-01-19 11:36:28.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/debian/changelog	2019-11-09 07:33:26.000000000 +0000
@@ -1,3 +1,58 @@
+x264 (2:0.158.2988+git-20191101.7817004-1ppa1) bionic; urgency=medium
+
+  * Patch for GPAC 0.8.
+  * Update to 0.158.2988+git7817004.
+
+ -- Hung-Yi Chen <gaod@hychen.org>  Sat, 9 Nov 2019 15:33:26 +0800
+
+x264 (2:0.158.2984+git3759fcb-1) bionic; urgency=medium
+
+  * Update to 0.158.2984+git3759fcb.
+
+ -- Hung-Yi Chen <gaod@hychen.org>  Thu, 15 Aug 2019 17:28:58 +0800
+
+x264 (2:0.155.2917+git0a84d98-2) unstable; urgency=medium
+
+  * Team upload.
+  * Upload to unstable.
+
+ -- Sebastian Ramacher <sramacher@debian.org>  Thu, 27 Sep 2018 22:03:40 +0200
+
+x264 (2:0.155.2917+git0a84d98-1) experimental; urgency=medium
+
+  [ Ondřej Nový ]
+  * d/copyright: Change Format URL to correct one
+  * d/control: Set Vcs-* to salsa.debian.org
+  * d/changelog: Remove trailing whitespaces
+
+  [ Felipe Sateler ]
+  * Change maintainer address to debian-multimedia@lists.debian.org
+
+  [ Ondřej Nový ]
+  * d/tests: Use AUTOPKGTEST_TMP instead of ADTTMP
+
+  [ Rico Tzschichholz ]
+  * Update to new stable upstream
+  * New upstream version 0.155.2917+git0a84d98
+  * Drop custom 10bit build, upstream supports this as runtime option now
+  * Update debian/control for soname bump
+  * Regenerate manpage
+  * Update copyright years
+
+  [ Sebastian Ramacher ]
+  * debian/{rules,confflags}:
+    - Use dpkg include for architecture variables.
+    - Fix dh_install --list-missing warning.
+    - Remove cruft.
+    - Handle CPPFLAGS.
+  * debian/patches: Export x264_stack_align.
+  * debian/: Bump debhelper compat to 11.
+  * debian/control:
+    - Drop obsolete dpkg-dev B-D.
+    - Bump Standards-Version.
+
+ -- Rico Tzschichholz <ricotz@ubuntu.com>  Fri, 24 Aug 2018 17:25:59 +0200
+
 x264 (2:0.152.2854+gite9a5903-2) unstable; urgency=medium
 
   * Team upload.
@@ -603,11 +658,11 @@
   * Drop --enable-pic, let's see what breaks, LP: #524859
 
   [ Reinhard Tartler ]
-  * New upstream snapshot, no new features, LP: #526396 
+  * New upstream snapshot, no new features, LP: #526396
   * remove quilt infrastructure
   * don't set CFLAGS in debian/rules, upstream build system overrides
     this anyways
-  
+
  -- Reinhard Tartler <siretart@tauware.de>  Sun, 21 Feb 2010 16:57:21 +0100
 
 x264 (2:0.85.1442.1+git781d30-1) lucid; urgency=low
diff -Nru x264-0.152.2854+gite9a5903/debian/compat x264-0.158.2988+git-20191101.7817004/debian/compat
--- x264-0.152.2854+gite9a5903/debian/compat	2018-01-19 11:35:17.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/debian/compat	2019-11-09 05:16:41.000000000 +0000
@@ -1 +1 @@
-9
+11
diff -Nru x264-0.152.2854+gite9a5903/debian/confflags x264-0.158.2988+git-20191101.7817004/debian/confflags
--- x264-0.152.2854+gite9a5903/debian/confflags	2018-01-19 11:35:17.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/debian/confflags	2019-11-09 05:16:41.000000000 +0000
@@ -2,14 +2,7 @@
 # vi:syntax=make
 # configure flags logic
 
-# Set CFLAGS from DEB_CFLAGS if defined, otherwise add build flags from
-# dpkg-buildflags excluding -O2.
-CFLAGS = $(or $(DEB_CFLAGS),$(shell dpkg-buildflags --get CFLAGS 2>/dev/null | sed -e 's/-O2//g'))
-ifeq (,$(CFLAGS))
-  # Handle case for versions of Debian/Ubuntu that have dpkg-dev (<< 1.15.7).
-  CFLAGS = -fstack-protector --param=ssp-buffer-size=4 -Wformat -Wformat-security -Werror=format-security
-endif
-shared_extra_cflags = $(CFLAGS)
+extra_cflags = $(CFLAGS) $(CPPFLAGS)
 
 LDFLAGS := $(filter-out %-Bsymbolic-functions,$(LDFLAGS))
 
@@ -119,14 +112,6 @@
 endif
 endif
 
-# See Bug#743713, the debian sparc and sh4 ports are currently stuck with gcc 4.6
-#  -fno-aggressive-loop-optimizations was introduced only in gcc 4.8
-# this conditional will also help any backporters.
-HAVEGCC4.8 :=$(shell dpkg --compare-versions `gcc --version | grep ^gcc | sed 's/^.* //g'` ge 4.8 && echo yes || echo no)
-ifeq (yes,$(HAVEGCC4.8))
-common_confflags += --extra-cflags=-fno-aggressive-loop-optimizations
-endif
-
 # MIPS upstream arch, mips, mipsel and mips64el Debian arches; no upstream flags by
 # default
 ifneq (,$(filter mips mipsel mips64el,$(DEB_HOST_GNU_CPU)))
@@ -151,7 +136,7 @@
 ifeq ($(toolchain_arch),)
 toolchain_arch := armv6t2
 endif
-shared_extra_cflags += -march=$(toolchain_arch)
+extra_cflags += -march=$(toolchain_arch)
 else
 do_opt := yes
 opt_libdir := /usr/lib/$(DEB_HOST_MULTIARCH)/neon/vfp
diff -Nru x264-0.152.2854+gite9a5903/debian/control x264-0.158.2988+git-20191101.7817004/debian/control
--- x264-0.152.2854+gite9a5903/debian/control	2018-01-19 11:35:17.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/debian/control	2019-11-09 07:33:26.000000000 +0000
@@ -1,21 +1,20 @@
 Source: x264
 Section: libs
 Priority: optional
-Maintainer: Debian Multimedia Maintainers <pkg-multimedia-maintainers@lists.alioth.debian.org>
+Maintainer: Debian Multimedia Maintainers <debian-multimedia@lists.debian.org>
 Uploaders:
  Reinhard Tartler <siretart@tauware.de>,
  Fabian Greffrath <fabian+debian@greffrath.com>,
  Rico Tzschichholz <ricotz@ubuntu.com>
 Build-Depends:
- debhelper (>= 9.20160115),
- dpkg-dev (>= 1.17.14),
+ debhelper (>= 11),
  libavformat-dev (>= 6:9) <!stage1>,
  libffms2-dev <!stage1>,
  libgpac-dev (>= 0.5.0+svn4288~) <!stage1>,
  nasm (>= 2.13) [any-i386 any-amd64]
-Standards-Version: 4.1.3
-Vcs-Git: https://anonscm.debian.org/git/pkg-multimedia/x264.git
-Vcs-Browser: https://anonscm.debian.org/cgit/pkg-multimedia/x264.git
+Standards-Version: 4.1.4
+Vcs-Git: https://salsa.debian.org/multimedia-team/x264.git
+Vcs-Browser: https://salsa.debian.org/multimedia-team/x264
 Homepage: http://www.videolan.org/developers/x264.html
 
 Package: x264
@@ -48,7 +47,7 @@
   * parallel encoding on multiple CPUs
   * interlaced streams
 
-Package: libx264-152
+Package: libx264-158
 Architecture: any
 Multi-Arch: same
 Depends:
@@ -65,7 +64,7 @@
 Architecture: any
 Multi-Arch: same
 Depends:
- libx264-152 (= ${binary:Version}),
+ libx264-158 (= ${binary:Version}),
  ${misc:Depends}
 Description: development files for libx264
  libx264 is an advanced encoding library for creating H.264 (MPEG-4 AVC)
diff -Nru x264-0.152.2854+gite9a5903/debian/control.in x264-0.158.2988+git-20191101.7817004/debian/control.in
--- x264-0.152.2854+gite9a5903/debian/control.in	2018-01-19 11:35:17.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/debian/control.in	2019-11-09 05:16:41.000000000 +0000
@@ -1,21 +1,20 @@
 Source: x264
 Section: libs
 Priority: optional
-Maintainer: Debian Multimedia Maintainers <pkg-multimedia-maintainers@lists.alioth.debian.org>
+Maintainer: Debian Multimedia Maintainers <debian-multimedia@lists.debian.org>
 Uploaders:
  Reinhard Tartler <siretart@tauware.de>,
  Fabian Greffrath <fabian+debian@greffrath.com>,
  Rico Tzschichholz <ricotz@ubuntu.com>
 Build-Depends:
- debhelper (>= 9.20160115),
- dpkg-dev (>= 1.17.14),
+ debhelper (>= 11),
  libavformat-dev (>= 6:9) <!stage1>,
  libffms2-dev <!stage1>,
  libgpac-dev (>= 0.5.0+svn4288~) <!stage1>,
  nasm (>= 2.13) [any-i386 any-amd64]
-Standards-Version: 4.1.3
-Vcs-Git: https://anonscm.debian.org/git/pkg-multimedia/x264.git
-Vcs-Browser: https://anonscm.debian.org/cgit/pkg-multimedia/x264.git
+Standards-Version: 4.1.4
+Vcs-Git: https://salsa.debian.org/multimedia-team/x264.git
+Vcs-Browser: https://salsa.debian.org/multimedia-team/x264
 Homepage: http://www.videolan.org/developers/x264.html
 
 Package: x264
diff -Nru x264-0.152.2854+gite9a5903/debian/copyright x264-0.158.2988+git-20191101.7817004/debian/copyright
--- x264-0.152.2854+gite9a5903/debian/copyright	2018-01-19 11:35:17.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/debian/copyright	2019-11-09 05:16:41.000000000 +0000
@@ -1,17 +1,17 @@
-Format: http://anonscm.debian.org/viewvc/dep/web/deps/dep5.mdwn?revision=174&view=markup
+Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
 Upstream-Name: x264
 Upstream-Contact: x264-devel@videolan.org
 Source: http://www.videolan.org/developers/x264.html
 
 Files: *
-Copyright: 2003-2017 x264 project
+Copyright: 2003-2018 x264 project
 License: GPL-2+
 Comment:
  This program is also available under a commercial proprietary license.
  For more information, contact us at licensing@x264.com.
 
 Files: common/x86/x86inc.asm
-Copyright: 2005-2017 x264 project
+Copyright: 2005-2018 x264 project
 License: ISC
  Permission to use, copy, modify, and/or distribute this software for any
  purpose with or without fee is hereby granted, provided that the above
diff -Nru x264-0.152.2854+gite9a5903/debian/libx264N.install.in x264-0.158.2988+git-20191101.7817004/debian/libx264N.install.in
--- x264-0.152.2854+gite9a5903/debian/libx264N.install.in	2018-01-19 11:35:17.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/debian/libx264N.install.in	2019-11-09 05:16:41.000000000 +0000
@@ -1,2 +1 @@
 shared/usr/lib/@DEB_HOST_MULTIARCH@/libx264.so.*     usr/lib/@DEB_HOST_MULTIARCH@
-shared/usr/lib/@DEB_HOST_MULTIARCH@/x264-10bit/libx264.so.*     usr/lib/@DEB_HOST_MULTIARCH@/x264-10bit
diff -Nru x264-0.152.2854+gite9a5903/debian/patches/gpac-0.8.patch x264-0.158.2988+git-20191101.7817004/debian/patches/gpac-0.8.patch
--- x264-0.152.2854+gite9a5903/debian/patches/gpac-0.8.patch	1970-01-01 00:00:00.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/debian/patches/gpac-0.8.patch	2019-11-09 07:30:02.000000000 +0000
@@ -0,0 +1,31 @@
+Description: Patch for GPAC 0.8.
+ GPAC 0.8 changes interface.
+Author: Hung-Yi Chen <gaod@hychen.org>
+
+Index: x264-0.158.2988+git7817004/configure
+===================================================================
+--- x264-0.158.2988+git7817004.orig/configure
++++ x264-0.158.2988+git7817004/configure
+@@ -1218,7 +1218,7 @@
+         cc_check "" -lwinmm && GPAC_LIBS="$GPAC_LIBS -lwinmm"
+     fi
+     if cc_check gpac/isomedia.h "$GPAC_LIBS" "gf_isom_close(0);" ; then
+-        if cc_check gpac/isomedia.h "$GPAC_LIBS" "gf_isom_set_pixel_aspect_ratio(0,0,0,0,0);" ; then
++        if cc_check gpac/isomedia.h "$GPAC_LIBS" "gf_isom_set_pixel_aspect_ratio(0,0,0,0,0,0);" ; then
+             gpac="yes"
+         else
+             echo "Warning: gpac is too old, update to 2007-06-21 UTC or later"
+ 
+Index: x264-0.158.2988+git7817004/output/mp4.c
+===================================================================
+--- x264-0.158.2988+git7817004.orig/output/mp4.c
++++ x264-0.158.2988+git7817004/output/mp4.c
+@@ -233,7 +233,7 @@
+             dw *= sar;
+         else
+             dh /= sar;
+-        gf_isom_set_pixel_aspect_ratio( p_mp4->p_file, p_mp4->i_track, p_mp4->i_descidx, p_param->vui.i_sar_width, p_param->vui.i_sar_height );
++        gf_isom_set_pixel_aspect_ratio( p_mp4->p_file, p_mp4->i_track, p_mp4->i_descidx, p_param->vui.i_sar_width, p_param->vui.i_sar_height, 0 );
+         gf_isom_set_track_layout_info( p_mp4->p_file, p_mp4->i_track, dw, dh, 0, 0, 0 );
+     }
+
diff -Nru x264-0.152.2854+gite9a5903/debian/patches/properly_detect_x32.patch x264-0.158.2988+git-20191101.7817004/debian/patches/properly_detect_x32.patch
--- x264-0.152.2854+gite9a5903/debian/patches/properly_detect_x32.patch	2018-01-19 11:35:17.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/debian/patches/properly_detect_x32.patch	2019-11-09 05:16:41.000000000 +0000
@@ -3,9 +3,11 @@
  This version doesn't use any assembly yet, a proper port is needed.
 Author: Adam Borowski <kilobyte@angband.pl>
 
---- x264-0.142.2431+gita5831aa.orig/common/common.h
-+++ x264-0.142.2431+gita5831aa/common/common.h
-@@ -1010,7 +1010,7 @@ static int ALWAYS_INLINE x264_predictor_
+Index: x264-0.158.2984+git3759fcb/common/common.h
+===================================================================
+--- x264-0.158.2984+git3759fcb.orig/common/common.h
++++ x264-0.158.2984+git3759fcb/common/common.h
+@@ -800,7 +800,7 @@ static ALWAYS_INLINE int x264_predictor_
      return cnt;
  }
  
@@ -14,9 +16,11 @@
  #include "x86/util.h"
  #endif
  
---- x264-0.142.2431+gita5831aa.orig/configure
-+++ x264-0.142.2431+gita5831aa/configure
-@@ -556,6 +556,10 @@ esac
+Index: x264-0.158.2984+git3759fcb/configure
+===================================================================
+--- x264-0.158.2984+git3759fcb.orig/configure
++++ x264-0.158.2984+git3759fcb/configure
+@@ -704,6 +704,10 @@ esac
  
  LDFLAGS="$LDFLAGS $libm"
  
@@ -27,7 +31,7 @@
  stack_alignment=4
  case $host_cpu in
      i*86)
-@@ -617,6 +621,10 @@ case $host_cpu in
+@@ -760,6 +764,10 @@ case $host_cpu in
              ASFLAGS="$ASFLAGS -f elf64"
          fi
          ;;
diff -Nru x264-0.152.2854+gite9a5903/debian/patches/series x264-0.158.2988+git-20191101.7817004/debian/patches/series
--- x264-0.152.2854+gite9a5903/debian/patches/series	2018-01-19 11:35:17.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/debian/patches/series	2019-11-09 07:31:50.000000000 +0000
@@ -1,2 +1,4 @@
 link_gpac_dynamically.patch
 properly_detect_x32.patch
+version.patch
+gpac-0.8.patch
diff -Nru x264-0.152.2854+gite9a5903/debian/patches/version.patch x264-0.158.2988+git-20191101.7817004/debian/patches/version.patch
--- x264-0.152.2854+gite9a5903/debian/patches/version.patch	1970-01-01 00:00:00.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/debian/patches/version.patch	2019-11-09 05:17:41.000000000 +0000
@@ -0,0 +1,43 @@
+Description: Update version.
+ Script modified from upstream source for Debian packaging.
+Author: Hung-Yi Chen <gaod@hychen.org>
+
+Index: x264-0.158.2988+git7817004/version.sh
+===================================================================
+--- x264-0.158.2988+git7817004.orig/version.sh
++++ x264-0.158.2988+git7817004/version.sh
+@@ -1,28 +1,7 @@
+ #!/bin/sh
+-
+-cd "$(dirname "$0")" >/dev/null && [ -f x264.h ] || exit 1
+-
+-api="$(grep '#define X264_BUILD' < x264.h | sed 's/^.* \([1-9][0-9]*\).*$/\1/')"
+-ver="x"
+-version=""
+-
+-if [ -d .git ] && command -v git >/dev/null 2>&1 ; then
+-    localver="$(($(git rev-list HEAD | wc -l)))"
+-    if [ "$localver" -gt 1 ] ; then
+-        ver_diff="$(($(git rev-list origin/master..HEAD | wc -l)))"
+-        ver="$((localver-ver_diff))"
+-        echo "#define X264_REV $ver"
+-        echo "#define X264_REV_DIFF $ver_diff"
+-        if [ "$ver_diff" -ne 0 ] ; then
+-            ver="$ver+$ver_diff"
+-        fi
+-        if git status | grep -q "modified:" ; then
+-            ver="${ver}M"
+-        fi
+-        ver="$ver $(git rev-list -n 1 HEAD | cut -c 1-7)"
+-        version=" r$ver"
+-    fi
+-fi
+-
+-echo "#define X264_VERSION \"$version\""
+-echo "#define X264_POINTVER \"0.$api.$ver\""
++# Script modified from upstream source for Debian packaging since packaging
++# won't include .git repository.
++echo '#define X264_REV 2988'
++echo '#define X264_REV_DIFF 0'
++echo '#define X264_VERSION " r2984 7817004"'
++echo '#define X264_POINTVER "0.158.2988 7817004"'
diff -Nru x264-0.152.2854+gite9a5903/debian/rules x264-0.158.2988+git-20191101.7817004/debian/rules
--- x264-0.152.2854+gite9a5903/debian/rules	2018-01-19 11:35:17.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/debian/rules	2019-11-09 05:16:41.000000000 +0000
@@ -2,18 +2,14 @@
 
 libx264N := libx264-$(shell awk '/\#define X264_BUILD/{print $$3}' x264.h)
 
-DEB_HOST_MULTIARCH  ?= $(shell dpkg-architecture -qDEB_HOST_MULTIARCH)
-DEB_BUILD_GNU_TYPE  ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)
-DEB_HOST_GNU_TYPE   ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE)
-DEB_HOST_GNU_CPU    ?= $(shell dpkg-architecture -qDEB_HOST_GNU_CPU)
-
+include /usr/share/dpkg/architecture.mk
 include debian/confflags
 
 DH_INSTALL_FILES = debian/$(libx264N).install \
                    debian/libx264-dev.install
 
 %:
-	dh $@ --parallel
+	dh $@
 
 .PHONY: debian/control
 debian/control:
@@ -24,13 +20,13 @@
 
 override_dh_auto_build:
 	# Build static lib
-	LDFLAGS="$(LDFLAGS)" ./configure $(static_confflags) \
+	CFLAGS="$(extra_cflags)" LDFLAGS="$(LDFLAGS)" ./configure $(static_confflags) \
 	   || ( tail -v -n +0 config.log config.log ; exit 1 )
 	$(MAKE)
 	$(MAKE) install DESTDIR=$(CURDIR)/debian/install/static
 	$(MAKE) distclean
 	# Build shared lib
-	CFLAGS="$(shared_extra_cflags)" LDFLAGS="$(LDFLAGS)" ./configure $(shared_confflags) \
+	CFLAGS="$(extra_cflags)" LDFLAGS="$(LDFLAGS)" ./configure $(shared_confflags) \
 	   || ( tail -v -n +0 config.log config.log ; exit 1 )
 	$(MAKE)
 	$(MAKE) install DESTDIR=$(CURDIR)/debian/install/shared
@@ -42,25 +38,6 @@
 	$(MAKE)
 	$(MAKE) install DESTDIR=$(CURDIR)/debian/install/opt
 endif
-	# now do the 10 bit builds
-	$(MAKE) distclean
-	CFLAGS="$(shared_extra_cflags)" LDFLAGS="$(LDFLAGS)" ./configure $(shared_confflags) --bit-depth=10 \
-	   || ( tail -v -n +0 config.log config.log ; exit 1 )
-	$(MAKE)
-	install -d -m755 $(CURDIR)/debian/install/shared/usr/lib/$(DEB_HOST_MULTIARCH)/x264-10bit
-	install -m755 libx264.so.* $(CURDIR)/debian/install/shared/usr/lib/$(DEB_HOST_MULTIARCH)/x264-10bit
-	sed -e 's,@DEB_HOST_MULTIARCH@,$(DEB_HOST_MULTIARCH),' \
-		debian/x264-10bit.in > $(CURDIR)/debian/install/shared/usr/bin/x264-10bit
-	chmod 755 $(CURDIR)/debian/install/shared/usr/bin/x264-10bit
-ifeq ($(do_opt),yes)
-	$(MAKE) distclean
-	# Build opt lib
-	LDFLAGS="$(LDFLAGS)" ./configure $(opt_confflags) --bit-depth=10 \
-	   || ( tail -v -n +0 config.log config.log ; exit 1 )
-	$(MAKE)
-	install -d -m755 $(CURDIR)/debian/install/opt/usr/lib/$(DEB_HOST_MULTIARCH)/x264-10bit
-	install -m755 libx264.so.* $(CURDIR)/debian/install/opt/usr/lib/$(DEB_HOST_MULTIARCH)/x264-10bit
-endif
 
 override_dh_auto_configure:
 	# dh_auto_configure phase handled via dh_auto_build.
@@ -74,12 +51,15 @@
 	dh_clean config.mak2 $(DH_INSTALL_FILES)
 
 override_dh_install: $(DH_INSTALL_FILES)
-	dh_install --list-missing --sourcedir=debian/install
+	dh_install --sourcedir=debian/install
 ifeq ($(do_opt),yes)
 	mkdir -p debian/$(libx264N)$(opt_libdir)
 	cp -a debian/install/opt$(opt_libdir)/*.so.* debian/$(libx264N)$(opt_libdir)
 endif
 
+override_dh_missing:
+	dh_missing --list-missing --sourcedir=debian/install
+
 debian/x264.1: build
 	env LD_LIBRARY_PATH="$(LD_LIBRARY_PATH):$(CURDIR)/debian/install/shared/usr/lib/$(DEB_HOST_MULTIARCH)" \
 	help2man -n "fast h264 encoder" -N -s1 -S "Videolan project" -h '--fullhelp' \
diff -Nru x264-0.152.2854+gite9a5903/debian/tests/encode-testimage x264-0.158.2988+git-20191101.7817004/debian/tests/encode-testimage
--- x264-0.152.2854+gite9a5903/debian/tests/encode-testimage	2018-01-19 11:35:17.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/debian/tests/encode-testimage	2019-11-09 05:16:41.000000000 +0000
@@ -4,7 +4,7 @@
 
 set -e
 
-cd "$ADTTMP"
+cd "$AUTOPKGTEST_TMP"
 
 ffmpeg -y -filter_complex testsrc -t 10 in.avi
 x264 --crf 24 -o out.mkv in.avi
diff -Nru x264-0.152.2854+gite9a5903/debian/x264.1 x264-0.158.2988+git-20191101.7817004/debian/x264.1
--- x264-0.152.2854+gite9a5903/debian/x264.1	2018-01-19 11:35:17.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/debian/x264.1	2019-11-09 05:16:41.000000000 +0000
@@ -1,9 +1,9 @@
-.\" DO NOT MODIFY THIS FILE!  It was generated by help2man 1.47.5.
-.TH X264 "1" "December 2017" "Videolan project" "User Commands"
+.\" DO NOT MODIFY THIS FILE!  It was generated by help2man 1.47.6.
+.TH X264 "1" "August 2018" "Videolan project" "User Commands"
 .SH NAME
 x264 \- fast h264 encoder
 .SH DESCRIPTION
-x264 core:152 r2854 e9a5903
+x264 core:155 r2917 0a84d98
 Syntax: x264 [options] \fB\-o\fR outfile infile
 .PP
 Infile can be raw (in which case resolution is required),
@@ -18,8 +18,9 @@
 \&.flv \-> Flash Video
 \&.mp4 \-> MP4 if compiled with GPAC or L\-SMASH support (gpac)
 .PP
-Output bit depth: 10 (configured at compile time)
-.SH OPTIONS
+Output bit depth: 8/10
+\&.
+Options:
 .TP
 \fB\-h\fR, \fB\-\-help\fR
 List basic options
@@ -283,13 +284,13 @@
 Ratecontrol:
 .TP
 \fB\-q\fR, \fB\-\-qp\fR <integer>
-Force constant QP (0\-69, 0=lossless)
+Force constant QP (0\-81, 0=lossless)
 .TP
 \fB\-B\fR, \fB\-\-bitrate\fR <integer>
 Set bitrate (kbit/s)
 .TP
 \fB\-\-crf\fR <float>
-Quality\-based VBR (0\-51) [23.0]
+Quality\-based VBR (\fB\-12\-51\fR) [23.0]
 .HP
 \fB\-\-rc\-lookahead\fR <integer> Number of frames for frametype lookahead [40]
 .HP
@@ -517,18 +518,25 @@
 .TP
 smpte240m, linear, log100, log316,
 iec61966\-2\-4, bt1361e, iec61966\-2\-1,
-bt2020\-10, bt2020\-12, smpte2084, smpte428
+bt2020\-10, bt2020\-12, smpte2084, smpte428,
+arib\-std\-b67
 .TP
 \fB\-\-colormatrix\fR <string>
 Specify color matrix setting ["???"]
 \- undef, bt709, fcc, bt470bg, smpte170m,
 .TP
 smpte240m, GBR, YCgCo, bt2020nc, bt2020c,
-smpte2085
+smpte2085, chroma\-derived\-nc,
+chroma\-derived\-c, ICtCp
 .TP
 \fB\-\-chromaloc\fR <integer>
 Specify chroma sample location (0 to 5) [0]
 .TP
+\fB\-\-alternative\-transfer\fR <string> Specify an alternative transfer
+characteristics ["undef"]
+.IP
+\- same values as \fB\-\-transfer\fR
+.TP
 \fB\-\-nal\-hrd\fR <string>
 Signal HRD information (requires vbv\-bufsize)
 \- none, vbr, cbr (cbr not allowed in .mp4)
@@ -570,25 +578,22 @@
 \- valid csps for `lavf' demuxer:
 yuv420p, yuyv422, rgb24, bgr24, yuv422p,
 yuv444p, yuv410p, yuv411p, gray, monow, monob,
-pal8, yuvj420p, yuvj422p, yuvj444p, xvmcmc,
-xvmcidct, uyvy422, uyyvyy411, bgr8, bgr4,
-bgr4_byte, rgb8, rgb4, rgb4_byte, nv12, nv21,
-argb, rgba, abgr, bgra, gray16be, gray16le,
-yuv440p, yuvj440p, yuva420p, vdpau_h264,
-vdpau_mpeg1, vdpau_mpeg2, vdpau_wmv3,
-vdpau_vc1, rgb48be, rgb48le, rgb565be,
-rgb565le, rgb555be, rgb555le, bgr565be,
-bgr565le, bgr555be, bgr555le, vaapi_moco,
-vaapi_idct, vaapi_vld, yuv420p16le,
-yuv420p16be, yuv422p16le, yuv422p16be,
-yuv444p16le, yuv444p16be, vdpau_mpeg4,
-dxva2_vld, rgb444le, rgb444be, bgr444le,
-bgr444be, ya8, bgr48be, bgr48le, yuv420p9be,
-yuv420p9le, yuv420p10be, yuv420p10le,
-yuv422p10be, yuv422p10le, yuv444p9be,
-yuv444p9le, yuv444p10be, yuv444p10le,
-yuv422p9be, yuv422p9le, vda_vld, gbrp, gbrp9be,
-gbrp9le, gbrp10be, gbrp10le, gbrp16be,
+pal8, yuvj420p, yuvj422p, yuvj444p, uyvy422,
+uyyvyy411, bgr8, bgr4, bgr4_byte, rgb8, rgb4,
+rgb4_byte, nv12, nv21, argb, rgba, abgr, bgra,
+gray16be, gray16le, yuv440p, yuvj440p,
+yuva420p, rgb48be, rgb48le, rgb565be, rgb565le,
+rgb555be, rgb555le, bgr565be, bgr565le,
+bgr555be, bgr555le, vaapi_moco, vaapi_idct,
+vaapi_vld, yuv420p16le, yuv420p16be,
+yuv422p16le, yuv422p16be, yuv444p16le,
+yuv444p16be, dxva2_vld, rgb444le, rgb444be,
+bgr444le, bgr444be, ya8, bgr48be, bgr48le,
+yuv420p9be, yuv420p9le, yuv420p10be,
+yuv420p10le, yuv422p10be, yuv422p10le,
+yuv444p9be, yuv444p9le, yuv444p10be,
+yuv444p10le, yuv422p9be, yuv422p9le, gbrp,
+gbrp9be, gbrp9le, gbrp10be, gbrp10le, gbrp16be,
 gbrp16le, yuva422p, yuva444p, yuva420p9be,
 yuva420p9le, yuva422p9be, yuva422p9le,
 yuva444p9be, yuva444p9le, yuva420p10be,
@@ -597,10 +602,10 @@
 yuva420p16le, yuva422p16be, yuva422p16le,
 yuva444p16be, yuva444p16le, vdpau, xyz12le,
 xyz12be, nv16, nv20le, nv20be, rgba64be,
-rgba64le, bgra64be, bgra64le, yvyu422, vda,
-ya16be, ya16le, gbrap, gbrap16be, gbrap16le,
-qsv, mmal, d3d11va_vld, cuda, 0rgb, rgb0, 0bgr,
-bgr0, yuv420p12be, yuv420p12le, yuv420p14be,
+rgba64le, bgra64be, bgra64le, yvyu422, ya16be,
+ya16le, gbrap, gbrap16be, gbrap16le, qsv, mmal,
+d3d11va_vld, cuda, 0rgb, rgb0, 0bgr, bgr0,
+yuv420p12be, yuv420p12le, yuv420p14be,
 yuv420p14le, yuv422p12be, yuv422p12le,
 yuv422p14be, yuv422p14le, yuv444p12be,
 yuv444p12le, yuv444p14be, yuv444p14le,
@@ -609,19 +614,22 @@
 bayer_gbrg8, bayer_grbg8, bayer_bggr16le,
 bayer_bggr16be, bayer_rggb16le, bayer_rggb16be,
 bayer_gbrg16le, bayer_gbrg16be, bayer_grbg16le,
-bayer_grbg16be, yuv440p10le, yuv440p10be,
+bayer_grbg16be, xvmc, yuv440p10le, yuv440p10be,
 yuv440p12le, yuv440p12be, ayuv64le, ayuv64be,
 videotoolbox_vld, p010le, p010be, gbrap12be,
 gbrap12le, gbrap10be, gbrap10le, mediacodec,
 gray12be, gray12le, gray10be, gray10le, p016le,
 p016be, d3d11, gray9be, gray9le, gbrpf32be,
-gbrpf32le, gbrapf32be, gbrapf32le, drm_prime
+gbrpf32le, gbrapf32be, gbrapf32le, drm_prime,
+opencl
 .TP
 \fB\-\-output\-csp\fR <string>
 Specify output colorspace ["i420"]
 \- i420, i422, i444, rgb
 .HP
 \fB\-\-input\-depth\fR <integer> Specify input bit depth for raw input
+.HP
+\fB\-\-output\-depth\fR <integer> Specify output bit depth
 .TP
 \fB\-\-input\-range\fR <string>
 Specify input color range ["auto"]
@@ -778,11 +786,11 @@
 offsets: the offset into the step to select a frame
 see: http://avisynth.nl/index.php/Select#SelectEvery
 .PP
-(libswscale 4.8.100)
-(libavformat 57.83.100)
+(libswscale 5.1.100)
+(libavformat 58.12.100)
 (ffmpegsource 2.23.0.0)
-built on Dec 31 2017, gcc: 7.2.0
-x264 configuration: \fB\-\-bit\-depth\fR=\fI\,8\/\fR \fB\-\-chroma\-format\fR=\fI\,all\/\fR
-libx264 configuration: \fB\-\-bit\-depth\fR=\fI\,10\/\fR \fB\-\-chroma\-format\fR=\fI\,all\/\fR
+built on Aug 24 2018, gcc: 8.2.0
+x264 configuration: \fB\-\-chroma\-format\fR=\fI\,all\/\fR
+libx264 configuration: \fB\-\-chroma\-format\fR=\fI\,all\/\fR
 x264 license: GPL version 2 or later
 libswscale/libavformat/ffmpegsource license: GPL version 2 or later
diff -Nru x264-0.152.2854+gite9a5903/debian/x264-10bit.1 x264-0.158.2988+git-20191101.7817004/debian/x264-10bit.1
--- x264-0.152.2854+gite9a5903/debian/x264-10bit.1	2018-01-19 11:35:17.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/debian/x264-10bit.1	1970-01-01 00:00:00.000000000 +0000
@@ -1,13 +0,0 @@
-.TH x264-10bit 1
-.SH NAME
-x264-10bit \- wrapper script for the 10-bit variant of the libx264 shared library
-.SH SYNOPSIS
-x264-10bit <prog> <arguments>
-.SH DESCRIPTION
-This is a wrapper script that sets the LD_LIBRARY_PATH variable so that the 10-bit variant of the libx264 shared library is preferred over the regular 8-bit variant. It then calls the program <prog> with the arguments <arguments>.
-.SH OPTIONS
-This wrapper script has no options. All arguments are passed over to the called program.
-.SH SEE ALSO
-x264(1)
-.SH AUTHOR
-Reinhard Tartler <siretart@debian.org>
diff -Nru x264-0.152.2854+gite9a5903/debian/x264-10bit.in x264-0.158.2988+git-20191101.7817004/debian/x264-10bit.in
--- x264-0.152.2854+gite9a5903/debian/x264-10bit.in	2018-01-19 11:35:17.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/debian/x264-10bit.in	1970-01-01 00:00:00.000000000 +0000
@@ -1,27 +0,0 @@
-#!/bin/bash
-#
-# Copyright (C) 2014 Reinhard Tartler
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-# .
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-# .
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-export LD_LIBRARY_PATH=/usr/lib/@DEB_HOST_MULTIARCH@/x264-10bit${LD_LIBRARY_PATH:+:}${LD_LIBRARY_PATH:-}
-
-if [ $# -gt 0 ]; then
-    exec "$@"
-    echo "Failed to execute '$@'" >&2
-    exit 1
-fi
-
-echo "Usage `basename $0` <prog> <arguments>" >&2
-exit 2
diff -Nru x264-0.152.2854+gite9a5903/debian/x264-get-orig-source x264-0.158.2988+git-20191101.7817004/debian/x264-get-orig-source
--- x264-0.152.2854+gite9a5903/debian/x264-get-orig-source	2018-01-19 11:35:17.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/debian/x264-get-orig-source	2019-11-09 05:16:41.000000000 +0000
@@ -3,8 +3,8 @@
 # Script used to generate the orig source tarball for x264.
 
 X264_GIT_URL="git://git.videolan.org/x264.git"
-X264_GIT_COMMIT="e9a5903edf8ca59ef20e6f4894c196f135af735e"
-DATE_RETRIEVED="20171224"
+X264_GIT_COMMIT="0a84d986e7020f8344f00752e3600b9769cc1e85"
+DATE_RETRIEVED="20180806"
 COMMIT_SHORT_FORM="$(echo $X264_GIT_COMMIT | \
                      sed -e 's/^\([[:xdigit:]]\{,7\}\).*/\1/')"
 
diff -Nru x264-0.152.2854+gite9a5903/debian/x264.install x264-0.158.2988+git-20191101.7817004/debian/x264.install
--- x264-0.152.2854+gite9a5903/debian/x264.install	2018-01-19 11:35:17.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/debian/x264.install	2019-11-09 05:16:41.000000000 +0000
@@ -1,2 +1 @@
 shared/usr/bin/x264     usr/bin
-shared/usr/bin/x264-10bit     usr/bin
diff -Nru x264-0.152.2854+gite9a5903/debian/x264.manpages x264-0.158.2988+git-20191101.7817004/debian/x264.manpages
--- x264-0.152.2854+gite9a5903/debian/x264.manpages	2018-01-19 11:35:17.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/debian/x264.manpages	2019-11-09 05:16:41.000000000 +0000
@@ -1,2 +1 @@
 debian/x264.1
-debian/x264-10bit.1
diff -Nru x264-0.152.2854+gite9a5903/encoder/analyse.c x264-0.158.2988+git-20191101.7817004/encoder/analyse.c
--- x264-0.152.2854+gite9a5903/encoder/analyse.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/encoder/analyse.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * analyse.c: macroblock analysis
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -120,124 +120,6 @@
 
 } x264_mb_analysis_t;
 
-/* lambda = pow(2,qp/6-2) */
-const uint16_t x264_lambda_tab[QP_MAX_MAX+1] =
-{
-   1,   1,   1,   1,   1,   1,   1,   1, /*  0- 7 */
-   1,   1,   1,   1,   1,   1,   1,   1, /*  8-15 */
-   2,   2,   2,   2,   3,   3,   3,   4, /* 16-23 */
-   4,   4,   5,   6,   6,   7,   8,   9, /* 24-31 */
-  10,  11,  13,  14,  16,  18,  20,  23, /* 32-39 */
-  25,  29,  32,  36,  40,  45,  51,  57, /* 40-47 */
-  64,  72,  81,  91, 102, 114, 128, 144, /* 48-55 */
- 161, 181, 203, 228, 256, 287, 323, 362, /* 56-63 */
- 406, 456, 512, 575, 645, 724, 813, 912, /* 64-71 */
-1024,1149,1290,1448,1625,1825,2048,2299, /* 72-79 */
-2580,2896,                               /* 80-81 */
-};
-
-/* lambda2 = pow(lambda,2) * .9 * 256 */
-/* Capped to avoid overflow */
-const int x264_lambda2_tab[QP_MAX_MAX+1] =
-{
-       14,       18,       22,       28,       36,       45,      57,      72, /*  0- 7 */
-       91,      115,      145,      182,      230,      290,     365,     460, /*  8-15 */
-      580,      731,      921,     1161,     1462,     1843,    2322,    2925, /* 16-23 */
-     3686,     4644,     5851,     7372,     9289,    11703,   14745,   18578, /* 24-31 */
-    23407,    29491,    37156,    46814,    58982,    74313,   93628,  117964, /* 32-39 */
-   148626,   187257,   235929,   297252,   374514,   471859,  594505,  749029, /* 40-47 */
-   943718,  1189010,  1498059,  1887436,  2378021,  2996119, 3774873, 4756042, /* 48-55 */
-  5992238,  7549747,  9512085, 11984476, 15099494, 19024170,23968953,30198988, /* 56-63 */
- 38048341, 47937906, 60397977, 76096683, 95875813,120795955,                   /* 64-69 */
-134217727,134217727,134217727,134217727,134217727,134217727,                   /* 70-75 */
-134217727,134217727,134217727,134217727,134217727,134217727,                   /* 76-81 */
-};
-
-const uint8_t x264_exp2_lut[64] =
-{
-      0,   3,   6,   8,  11,  14,  17,  20,  23,  26,  29,  32,  36,  39,  42,  45,
-     48,  52,  55,  58,  62,  65,  69,  72,  76,  80,  83,  87,  91,  94,  98, 102,
-    106, 110, 114, 118, 122, 126, 130, 135, 139, 143, 147, 152, 156, 161, 165, 170,
-    175, 179, 184, 189, 194, 198, 203, 208, 214, 219, 224, 229, 234, 240, 245, 250
-};
-
-const float x264_log2_lut[128] =
-{
-    0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682,
-    0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987,
-    0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840,
-    0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288,
-    0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370,
-    0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121,
-    0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570,
-    0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743,
-    0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662,
-    0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349,
-    0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819,
-    0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090,
-    0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175,
-    0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087,
-    0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837,
-    0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435,
-};
-
-/* Avoid an int/float conversion. */
-const float x264_log2_lz_lut[32] =
-{
-    31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
-};
-
-// should the intra and inter lambdas be different?
-// I'm just matching the behaviour of deadzone quant.
-static const int x264_trellis_lambda2_tab[2][QP_MAX_MAX+1] =
-{
-    // inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS)
-    {
-               46,       58,       73,       92,      117,      147,
-              185,      233,      294,      370,      466,      587,
-              740,      932,     1174,     1480,     1864,     2349,
-             2959,     3728,     4697,     5918,     7457,     9395,
-            11837,    14914,    18790,    23674,    29828,    37581,
-            47349,    59656,    75163,    94699,   119313,   150326,
-           189399,   238627,   300652,   378798,   477255,   601304,
-           757596,   954511,  1202608,  1515192,  1909022,  2405217,
-          3030384,  3818045,  4810435,  6060769,  7636091,  9620872,
-         12121539, 15272182, 19241743, 24243077, 30544363, 38483486,
-         48486154, 61088726, 76966972, 96972308,
-        122177453,134217727,134217727,134217727,134217727,134217727,
-        134217727,134217727,134217727,134217727,134217727,134217727,
-    },
-    // intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS)
-    {
-               27,       34,       43,       54,       68,       86,
-              108,      136,      172,      216,      273,      343,
-              433,      545,      687,      865,     1090,     1374,
-             1731,     2180,     2747,     3461,     4361,     5494,
-             6922,     8721,    10988,    13844,    17442,    21976,
-            27688,    34885,    43953,    55377,    69771,    87906,
-           110755,   139543,   175813,   221511,   279087,   351627,
-           443023,   558174,   703255,   886046,  1116348,  1406511,
-          1772093,  2232697,  2813022,  3544186,  4465396,  5626046,
-          7088374,  8930791, 11252092, 14176748, 17861583, 22504184,
-         28353495, 35723165, 45008368, 56706990,
-         71446330, 90016736,113413980,134217727,134217727,134217727,
-        134217727,134217727,134217727,134217727,134217727,134217727,
-        134217727,134217727,134217727,134217727,134217727,134217727,
-    }
-};
-
-#define MAX_CHROMA_LAMBDA_OFFSET 36
-static const uint16_t x264_chroma_lambda2_offset_tab[MAX_CHROMA_LAMBDA_OFFSET+1] =
-{
-       16,    20,    25,    32,    40,    50,
-       64,    80,   101,   128,   161,   203,
-      256,   322,   406,   512,   645,   812,
-     1024,  1290,  1625,  2048,  2580,  3250,
-     4096,  5160,  6501,  8192, 10321, 13003,
-    16384, 20642, 26007, 32768, 41285, 52015,
-    65535
-};
-
 /* TODO: calculate CABAC costs */
 static const uint8_t i_mb_b_cost_table[X264_MBTYPE_MAX] =
 {
@@ -256,18 +138,14 @@
     5, 3, 3, 1
 };
 
-static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );
-
-static uint16_t x264_cost_ref[QP_MAX+1][3][33];
-static UNUSED x264_pthread_mutex_t cost_ref_mutex = X264_PTHREAD_MUTEX_INITIALIZER;
-static uint16_t x264_cost_i4x4_mode[(QP_MAX+2)*32];
+static void analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );
 
 static int init_costs( x264_t *h, float *logs, int qp )
 {
     if( h->cost_mv[qp] )
         return 0;
 
-    int mv_range = h->param.analyse.i_mv_range;
+    int mv_range = h->param.analyse.i_mv_range << PARAM_INTERLACED;
     int lambda = x264_lambda_tab[qp];
     /* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
     CHECKED_MALLOC( h->cost_mv[qp], (4*4*mv_range + 1) * sizeof(uint16_t) );
@@ -277,11 +155,9 @@
         h->cost_mv[qp][-i] =
         h->cost_mv[qp][i]  = X264_MIN( (int)(lambda * logs[i] + .5f), UINT16_MAX );
     }
-    x264_pthread_mutex_lock( &cost_ref_mutex );
     for( int i = 0; i < 3; i++ )
         for( int j = 0; j < 33; j++ )
-            x264_cost_ref[qp][i][j] = i ? X264_MIN( lambda * bs_size_te( i, j ), UINT16_MAX ) : 0;
-    x264_pthread_mutex_unlock( &cost_ref_mutex );
+            h->cost_table->ref[qp][i][j] = i ? X264_MIN( lambda * bs_size_te( i, j ), UINT16_MAX ) : 0;
     if( h->param.analyse.i_me_method >= X264_ME_ESA && !h->cost_mv_fpel[qp][0] )
     {
         for( int j = 0; j < 4; j++ )
@@ -292,7 +168,7 @@
                 h->cost_mv_fpel[qp][j][i] = h->cost_mv[qp][i*4+j];
         }
     }
-    uint16_t *cost_i4x4_mode = (uint16_t*)ALIGN((intptr_t)x264_cost_i4x4_mode,64) + qp*32;
+    uint16_t *cost_i4x4_mode = h->cost_table->i4x4_mode[qp];
     for( int i = 0; i < 17; i++ )
         cost_i4x4_mode[i] = 3*lambda*(i!=8);
     return 0;
@@ -302,7 +178,7 @@
 
 int x264_analyse_init_costs( x264_t *h )
 {
-    int mv_range = h->param.analyse.i_mv_range;
+    int mv_range = h->param.analyse.i_mv_range << PARAM_INTERLACED;
     float *logs = x264_malloc( (2*4*mv_range+1) * sizeof(float) );
     if( !logs )
         return -1;
@@ -327,14 +203,16 @@
 
 void x264_analyse_free_costs( x264_t *h )
 {
-    int mv_range = h->param.analyse.i_mv_range;
+    int mv_range = h->param.analyse.i_mv_range << PARAM_INTERLACED;
     for( int i = 0; i < QP_MAX+1; i++ )
     {
         if( h->cost_mv[i] )
             x264_free( h->cost_mv[i] - 2*4*mv_range );
-        if( h->cost_mv_fpel[i][0] )
-            for( int j = 0; j < 4; j++ )
+        for( int j = 0; j < 4; j++ )
+        {
+            if( h->cost_mv_fpel[i][j] )
                 x264_free( h->cost_mv_fpel[i][j] - 2*mv_range );
+        }
     }
 }
 
@@ -367,14 +245,14 @@
 }
 
 /* initialize an array of lambda*nbits for all possible mvs */
-static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
+static void mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
 {
     a->p_cost_mv = h->cost_mv[a->i_qp];
-    a->p_cost_ref[0] = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)];
-    a->p_cost_ref[1] = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)];
+    a->p_cost_ref[0] = h->cost_table->ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)];
+    a->p_cost_ref[1] = h->cost_table->ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)];
 }
 
-static void x264_mb_analyse_init_qp( x264_t *h, x264_mb_analysis_t *a, int qp )
+static void mb_analyse_init_qp( x264_t *h, x264_mb_analysis_t *a, int qp )
 {
     int effective_chroma_qp = h->chroma_qp_table[SPEC_QP(qp)] + X264_MAX( qp - QP_MAX_SPEC, 0 );
     a->i_lambda = x264_lambda_tab[qp];
@@ -413,7 +291,7 @@
     h->mb.i_chroma_qp = h->chroma_qp_table[qp];
 }
 
-static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
+static void mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
 {
     int subme = h->param.analyse.i_subpel_refine - (h->sh.i_type == SLICE_TYPE_B);
 
@@ -424,15 +302,15 @@
     h->mb.b_deblock_rdo = h->param.analyse.i_subpel_refine >= 9 && h->sh.i_disable_deblocking_filter_idc != 1;
     a->b_early_terminate = h->param.analyse.i_subpel_refine < 11;
 
-    x264_mb_analyse_init_qp( h, a, qp );
+    mb_analyse_init_qp( h, a, qp );
 
     h->mb.b_transform_8x8 = 0;
 
     /* I: Intra part */
     a->i_satd_i16x16 =
     a->i_satd_i8x8   =
-    a->i_satd_i4x4   =
-    a->i_satd_chroma = COST_MAX;
+    a->i_satd_i4x4   = COST_MAX;
+    a->i_satd_chroma = CHROMA_FORMAT ? COST_MAX : 0;
 
     /* non-RD PCM decision is inaccurate (as is psy-rd), so don't do it.
      * PCM cost can overflow with high lambda2, so cap it at COST_MAX. */
@@ -567,14 +445,12 @@
         /* Fast intra decision */
         if( a->b_early_terminate && h->mb.i_mb_xy - h->sh.i_first_mb > 4 )
         {
-            /* Always run in fast-intra mode for subme < 3 */
-            if( h->mb.i_subpel_refine > 2 &&
-              ( IS_INTRA( h->mb.i_mb_type_left[0] ) ||
+            if( IS_INTRA( h->mb.i_mb_type_left[0] ) ||
                 IS_INTRA( h->mb.i_mb_type_top ) ||
                 IS_INTRA( h->mb.i_mb_type_topleft ) ||
                 IS_INTRA( h->mb.i_mb_type_topright ) ||
                 (h->sh.i_type == SLICE_TYPE_P && IS_INTRA( h->fref[0][0]->mb_type[h->mb.i_mb_xy] )) ||
-                (h->mb.i_mb_xy - h->sh.i_first_mb < 3*(h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_8x8] + h->stat.frame.i_mb_count[I_16x16])) ) )
+                (h->mb.i_mb_xy - h->sh.i_first_mb < 3*(h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_8x8] + h->stat.frame.i_mb_count[I_16x16])) )
             { /* intra is likely */ }
             else
             {
@@ -682,21 +558,19 @@
 }
 
 /* For trellis=2, we need to do this for both sizes of DCT, for trellis=1 we only need to use it on the chosen mode. */
-static void inline x264_psy_trellis_init( x264_t *h, int do_both_dct )
+static inline void psy_trellis_init( x264_t *h, int do_both_dct )
 {
-    ALIGNED_16( static pixel zero[16*FDEC_STRIDE] ) = {0};
-
     if( do_both_dct || h->mb.b_transform_8x8 )
-        h->dctf.sub16x16_dct8( h->mb.pic.fenc_dct8, h->mb.pic.p_fenc[0], zero );
+        h->dctf.sub16x16_dct8( h->mb.pic.fenc_dct8, h->mb.pic.p_fenc[0], (pixel*)x264_zero );
     if( do_both_dct || !h->mb.b_transform_8x8 )
-        h->dctf.sub16x16_dct( h->mb.pic.fenc_dct4, h->mb.pic.p_fenc[0], zero );
+        h->dctf.sub16x16_dct( h->mb.pic.fenc_dct4, h->mb.pic.p_fenc[0], (pixel*)x264_zero );
 }
 
 /* Reset fenc satd scores cache for psy RD */
-static inline void x264_mb_init_fenc_cache( x264_t *h, int b_satd )
+static inline void mb_init_fenc_cache( x264_t *h, int b_satd )
 {
     if( h->param.analyse.i_trellis == 2 && h->mb.i_psy_trellis )
-        x264_psy_trellis_init( h, h->param.analyse.b_transform_8x8 );
+        psy_trellis_init( h, h->param.analyse.b_transform_8x8 );
     if( !h->mb.i_psy_rd )
         return;
 
@@ -709,7 +583,7 @@
         h->mc.memzero_aligned( h->mb.pic.fenc_satd_cache, sizeof(h->mb.pic.fenc_satd_cache) );
 }
 
-static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *a )
+static void mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *a )
 {
     if( a->i_satd_chroma < COST_MAX )
         return;
@@ -791,7 +665,7 @@
 }
 
 /* FIXME: should we do any sort of merged chroma analysis with 4:4:4? */
-static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter )
+static void mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter )
 {
     const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter;
     pixel *p_src = h->mb.pic.p_fenc[0];
@@ -867,7 +741,7 @@
             return;
     }
 
-    uint16_t *cost_i4x4_mode = (uint16_t*)ALIGN((intptr_t)x264_cost_i4x4_mode,64) + a->i_qp*32 + 8;
+    uint16_t *cost_i4x4_mode = h->cost_table->i4x4_mode[a->i_qp] + 8;
     /* 8x8 prediction selection */
     if( flags & X264_ANALYSE_I8x8 )
     {
@@ -909,10 +783,11 @@
             {
                 if( !h->mb.b_lossless && predict_mode[5] >= 0 )
                 {
-                    ALIGNED_ARRAY_16( int32_t, satd,[9] );
+                    ALIGNED_ARRAY_16( int32_t, satd,[4] );
                     h->pixf.intra_mbcmp_x3_8x8( p_src_by, edge, satd );
                     int favor_vertical = satd[I_PRED_4x4_H] > satd[I_PRED_4x4_V];
-                    satd[i_pred_mode] -= 3 * lambda;
+                    if( i_pred_mode < 3 )
+                        satd[i_pred_mode] -= 3 * lambda;
                     for( int i = 2; i >= 0; i-- )
                     {
                         int cost = satd[i];
@@ -1027,10 +902,11 @@
             {
                 if( !h->mb.b_lossless && predict_mode[5] >= 0 )
                 {
-                    ALIGNED_ARRAY_16( int32_t, satd,[9] );
+                    ALIGNED_ARRAY_16( int32_t, satd,[4] );
                     h->pixf.intra_mbcmp_x3_4x4( p_src_by, p_dst_by, satd );
                     int favor_vertical = satd[I_PRED_4x4_H] > satd[I_PRED_4x4_V];
-                    satd[i_pred_mode] -= 3 * lambda;
+                    if( i_pred_mode < 3 )
+                        satd[i_pred_mode] -= 3 * lambda;
                     i_best = satd[I_PRED_4x4_DC]; a->i_predict4x4[idx] = I_PRED_4x4_DC;
                     COPY2_IF_LT( i_best, satd[I_PRED_4x4_H], a->i_predict4x4[idx], I_PRED_4x4_H );
                     COPY2_IF_LT( i_best, satd[I_PRED_4x4_V], a->i_predict4x4[idx], I_PRED_4x4_V );
@@ -1103,7 +979,7 @@
     }
 }
 
-static void x264_intra_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_thresh )
+static void intra_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_thresh )
 {
     if( !a->b_early_terminate )
         i_satd_thresh = COST_MAX;
@@ -1111,8 +987,8 @@
     if( a->i_satd_i16x16 < i_satd_thresh )
     {
         h->mb.i_type = I_16x16;
-        x264_analyse_update_cache( h, a );
-        a->i_satd_i16x16 = x264_rd_cost_mb( h, a->i_lambda2 );
+        analyse_update_cache( h, a );
+        a->i_satd_i16x16 = rd_cost_mb( h, a->i_lambda2 );
     }
     else
         a->i_satd_i16x16 = COST_MAX;
@@ -1120,8 +996,8 @@
     if( a->i_satd_i4x4 < i_satd_thresh )
     {
         h->mb.i_type = I_4x4;
-        x264_analyse_update_cache( h, a );
-        a->i_satd_i4x4 = x264_rd_cost_mb( h, a->i_lambda2 );
+        analyse_update_cache( h, a );
+        a->i_satd_i4x4 = rd_cost_mb( h, a->i_lambda2 );
     }
     else
         a->i_satd_i4x4 = COST_MAX;
@@ -1129,15 +1005,15 @@
     if( a->i_satd_i8x8 < i_satd_thresh )
     {
         h->mb.i_type = I_8x8;
-        x264_analyse_update_cache( h, a );
-        a->i_satd_i8x8 = x264_rd_cost_mb( h, a->i_lambda2 );
+        analyse_update_cache( h, a );
+        a->i_satd_i8x8 = rd_cost_mb( h, a->i_lambda2 );
         a->i_cbp_i8x8_luma = h->mb.i_cbp_luma;
     }
     else
         a->i_satd_i8x8 = COST_MAX;
 }
 
-static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
+static void intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
 {
     uint64_t i_satd, i_best;
     int plane_count = CHROMA444 ? 3 : 1;
@@ -1155,13 +1031,13 @@
             if( i_mode == old_pred_mode || a->i_satd_i16x16_dir[i_mode] > i_thresh )
                 continue;
             h->mb.i_intra16x16_pred_mode = i_mode;
-            i_satd = x264_rd_cost_mb( h, a->i_lambda2 );
+            i_satd = rd_cost_mb( h, a->i_lambda2 );
             COPY2_IF_LT( i_best, i_satd, a->i_predict16x16, i_mode );
         }
     }
 
     /* RD selection for chroma prediction */
-    if( !CHROMA444 )
+    if( CHROMA_FORMAT == CHROMA_420 || CHROMA_FORMAT == CHROMA_422 )
     {
         const int8_t *predict_mode = predict_chroma_mode_available( h->mb.i_neighbour_intra );
         if( predict_mode[1] >= 0 )
@@ -1181,10 +1057,10 @@
             {
                 int i_cbp_chroma_best = h->mb.i_cbp_chroma;
                 int i_chroma_lambda = x264_lambda2_tab[h->mb.i_chroma_qp];
-                /* the previous thing encoded was x264_intra_rd(), so the pixels and
+                /* the previous thing encoded was intra_rd(), so the pixels and
                  * coefs for the current chroma mode are still around, so we only
                  * have to recount the bits. */
-                i_best = x264_rd_cost_chroma( h, i_chroma_lambda, a->i_predict8x8chroma, 0 );
+                i_best = rd_cost_chroma( h, i_chroma_lambda, a->i_predict8x8chroma, 0 );
                 for( int i = 0; i < i_max; i++ )
                 {
                     int i_mode = predict_mode_sorted[i];
@@ -1198,7 +1074,7 @@
                     /* if we've already found a mode that needs no residual, then
                      * probably any mode with a residual will be worse.
                      * so avoid dct on the remaining modes to improve speed. */
-                    i_satd = x264_rd_cost_chroma( h, i_chroma_lambda, i_mode, h->mb.i_cbp_chroma != 0x00 );
+                    i_satd = rd_cost_chroma( h, i_chroma_lambda, i_mode, h->mb.i_cbp_chroma != 0x00 );
                     COPY3_IF_LT( i_best, i_satd, a->i_predict8x8chroma, i_mode, i_cbp_chroma_best, h->mb.i_cbp_chroma );
                 }
                 h->mb.i_chroma_pred_mode = a->i_predict8x8chroma;
@@ -1228,7 +1104,7 @@
             for( ; *predict_mode >= 0; predict_mode++ )
             {
                 int i_mode = *predict_mode;
-                i_satd = x264_rd_cost_i4x4( h, a->i_lambda2, idx, i_mode );
+                i_satd = rd_cost_i4x4( h, a->i_lambda2, idx, i_mode );
 
                 if( i_best > i_satd )
                 {
@@ -1287,7 +1163,7 @@
                     continue;
 
                 h->mb.i_cbp_luma = a->i_cbp_i8x8_luma;
-                i_satd = x264_rd_cost_i8x8( h, a->i_lambda2, idx, i_mode, edge );
+                i_satd = rd_cost_i8x8( h, a->i_lambda2, idx, i_mode, edge );
 
                 if( i_best > i_satd )
                 {
@@ -1366,7 +1242,7 @@
 #define REF_COST(list, ref) \
     (a->p_cost_ref[list][ref])
 
-static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
+static void mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
 {
     x264_me_t m;
     int i_mvc;
@@ -1415,7 +1291,7 @@
             && x264_macroblock_probe_pskip( h ) )
         {
             h->mb.i_type = P_SKIP;
-            x264_analyse_update_cache( h, a );
+            analyse_update_cache( h, a );
             assert( h->mb.cache.pskip_mv[1] <= h->mb.mv_max_spel[1] || h->i_thread_frames == 1 );
             return;
         }
@@ -1433,19 +1309,19 @@
     h->mb.i_type = P_L0;
     if( a->i_mbrd )
     {
-        x264_mb_init_fenc_cache( h, a->i_mbrd >= 2 || h->param.analyse.inter & X264_ANALYSE_PSUB8x8 );
+        mb_init_fenc_cache( h, a->i_mbrd >= 2 || h->param.analyse.inter & X264_ANALYSE_PSUB8x8 );
         if( a->l0.me16x16.i_ref == 0 && M32( a->l0.me16x16.mv ) == M32( h->mb.cache.pskip_mv ) && !a->b_force_intra )
         {
             h->mb.i_partition = D_16x16;
             x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );
-            a->l0.i_rd16x16 = x264_rd_cost_mb( h, a->i_lambda2 );
+            a->l0.i_rd16x16 = rd_cost_mb( h, a->i_lambda2 );
             if( !(h->mb.i_cbp_luma|h->mb.i_cbp_chroma) )
                 h->mb.i_type = P_SKIP;
         }
     }
 }
 
-static void x264_mb_analyse_inter_p8x8_mixed_ref( x264_t *h, x264_mb_analysis_t *a )
+static void mb_analyse_inter_p8x8_mixed_ref( x264_t *h, x264_mb_analysis_t *a )
 {
     x264_me_t m;
     pixel **p_fenc = h->mb.pic.p_fenc;
@@ -1536,7 +1412,7 @@
     M32( h->mb.i_sub_partition ) = D_L0_8x8 * 0x01010101;
 }
 
-static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
+static void mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
 {
     /* Duplicate refs are rarely useful in p8x8 due to the high cost of the
      * reference frame flags.  Thus, if we're not doing mixedrefs, just
@@ -1591,7 +1467,7 @@
     M32( h->mb.i_sub_partition ) = D_L0_8x8 * 0x01010101;
 }
 
-static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a, int i_best_satd )
+static void mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a, int i_best_satd )
 {
     x264_me_t m;
     pixel **p_fenc = h->mb.pic.p_fenc;
@@ -1657,7 +1533,7 @@
     a->l0.i_cost16x8 = a->l0.me16x8[0].cost + a->l0.me16x8[1].cost;
 }
 
-static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a, int i_best_satd )
+static void mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a, int i_best_satd )
 {
     x264_me_t m;
     pixel **p_fenc = h->mb.pic.p_fenc;
@@ -1722,8 +1598,8 @@
     a->l0.i_cost8x16 = a->l0.me8x16[0].cost + a->l0.me8x16[1].cost;
 }
 
-static ALWAYS_INLINE int x264_mb_analyse_inter_p4x4_chroma_internal( x264_t *h, x264_mb_analysis_t *a,
-                                                                     pixel **p_fref, int i8x8, int size, int chroma )
+static ALWAYS_INLINE int mb_analyse_inter_p4x4_chroma_internal( x264_t *h, x264_mb_analysis_t *a,
+                                                                pixel **p_fref, int i8x8, int size, int chroma )
 {
     ALIGNED_ARRAY_32( pixel, pix1,[16*16] );
     pixel *pix2 = pix1+8;
@@ -1786,17 +1662,17 @@
          + h->pixf.mbcmp[chromapix]( &h->mb.pic.p_fenc[2][oe], FENC_STRIDE, pix2, 16 );
 }
 
-static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a, pixel **p_fref, int i8x8, int size )
+static int mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a, pixel **p_fref, int i8x8, int size )
 {
     if( CHROMA_FORMAT == CHROMA_444 )
-        return x264_mb_analyse_inter_p4x4_chroma_internal( h, a, p_fref, i8x8, size, CHROMA_444 );
+        return mb_analyse_inter_p4x4_chroma_internal( h, a, p_fref, i8x8, size, CHROMA_444 );
     else if( CHROMA_FORMAT == CHROMA_422 )
-        return x264_mb_analyse_inter_p4x4_chroma_internal( h, a, p_fref, i8x8, size, CHROMA_422 );
+        return mb_analyse_inter_p4x4_chroma_internal( h, a, p_fref, i8x8, size, CHROMA_422 );
     else
-        return x264_mb_analyse_inter_p4x4_chroma_internal( h, a, p_fref, i8x8, size, CHROMA_420 );
+        return mb_analyse_inter_p4x4_chroma_internal( h, a, p_fref, i8x8, size, CHROMA_420 );
 }
 
-static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
+static void mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
 {
     pixel **p_fref = h->mb.pic.p_fref[0][a->l0.me8x8[i8x8].i_ref];
     pixel **p_fenc = h->mb.pic.p_fenc;
@@ -1832,10 +1708,10 @@
                             REF_COST( 0, i_ref ) +
                             a->i_lambda * i_sub_mb_p_cost_table[D_L0_4x4];
     if( h->mb.b_chroma_me && !CHROMA444 )
-        a->l0.i_cost4x4[i8x8] += x264_mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_4x4 );
+        a->l0.i_cost4x4[i8x8] += mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_4x4 );
 }
 
-static void x264_mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
+static void mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
 {
     pixel **p_fref = h->mb.pic.p_fref[0][a->l0.me8x8[i8x8].i_ref];
     pixel **p_fenc = h->mb.pic.p_fenc;
@@ -1868,10 +1744,10 @@
                             REF_COST( 0, i_ref ) +
                             a->i_lambda * i_sub_mb_p_cost_table[D_L0_8x4];
     if( h->mb.b_chroma_me && !CHROMA444 )
-        a->l0.i_cost8x4[i8x8] += x264_mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_8x4 );
+        a->l0.i_cost8x4[i8x8] += mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_8x4 );
 }
 
-static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
+static void mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
 {
     pixel **p_fref = h->mb.pic.p_fref[0][a->l0.me8x8[i8x8].i_ref];
     pixel **p_fenc = h->mb.pic.p_fenc;
@@ -1904,10 +1780,10 @@
                             REF_COST( 0, i_ref ) +
                             a->i_lambda * i_sub_mb_p_cost_table[D_L0_4x8];
     if( h->mb.b_chroma_me && !CHROMA444 )
-        a->l0.i_cost4x8[i8x8] += x264_mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_4x8 );
+        a->l0.i_cost4x8[i8x8] += mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_4x8 );
 }
 
-static ALWAYS_INLINE int x264_analyse_bi_chroma( x264_t *h, x264_mb_analysis_t *a, int idx, int i_pixel )
+static ALWAYS_INLINE int analyse_bi_chroma( x264_t *h, x264_mb_analysis_t *a, int idx, int i_pixel )
 {
     ALIGNED_ARRAY_32( pixel, pix, [4],[16*16] );
     ALIGNED_ARRAY_32( pixel,  bi, [2],[16*16] );
@@ -1955,7 +1831,7 @@
     return i_chroma_cost;
 }
 
-static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
+static void mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
 {
     /* Assumes that fdec still contains the results of
      * x264_mb_predict_mv_direct16x16 and x264_mb_mc */
@@ -2001,7 +1877,7 @@
     }
 }
 
-static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
+static void mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
 {
     ALIGNED_ARRAY_32( pixel, pix0,[16*16] );
     ALIGNED_ARRAY_32( pixel, pix1,[16*16] );
@@ -2073,7 +1949,7 @@
                 {
                     /* We already tested skip */
                     h->mb.i_type = B_SKIP;
-                    x264_analyse_update_cache( h, a );
+                    analyse_update_cache( h, a );
                     return;
                 }
             }
@@ -2105,7 +1981,7 @@
                      + a->l1.bi16x16.cost_mv;
 
     if( h->mb.b_chroma_me )
-        a->i_cost16x16bi += x264_analyse_bi_chroma( h, a, 0, PIXEL_16x16 );
+        a->i_cost16x16bi += analyse_bi_chroma( h, a, 0, PIXEL_16x16 );
 
     /* Always try the 0,0,0,0 vector; helps avoid errant motion vectors in fades */
     if( M32( a->l0.bi16x16.mv ) | M32( a->l1.bi16x16.mv ) )
@@ -2137,7 +2013,7 @@
             }
             else
             {
-                ALIGNED_ARRAY_32( pixel, pixuv, [2],[16*FENC_STRIDE] );
+                ALIGNED_ARRAY_64( pixel, pixuv, [2],[16*FENC_STRIDE] );
                 int chromapix = h->luma2chroma_pixel[PIXEL_16x16];
                 int v_shift = CHROMA_V_SHIFT;
 
@@ -2187,7 +2063,7 @@
     a->l1.me16x16.cost += a->i_lambda * i_mb_b_cost_table[B_L1_L1];
 }
 
-static inline void x264_mb_cache_mv_p8x8( x264_t *h, x264_mb_analysis_t *a, int i )
+static inline void mb_cache_mv_p8x8( x264_t *h, x264_mb_analysis_t *a, int i )
 {
     int x = 2*(i&1);
     int y = i&2;
@@ -2217,7 +2093,7 @@
     }
 }
 
-static void x264_mb_load_mv_direct8x8( x264_t *h, int idx )
+static void mb_load_mv_direct8x8( x264_t *h, int idx )
 {
     int x = 2*(idx&1);
     int y = idx&2;
@@ -2253,13 +2129,13 @@
             x264_macroblock_cache_mvd( h, x,y,dx,dy, 1, 0 ); \
     }
 
-static inline void x264_mb_cache_mv_b8x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
+static inline void mb_cache_mv_b8x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
 {
     int x = 2*(i&1);
     int y = i&2;
     if( h->mb.i_sub_partition[i] == D_DIRECT_8x8 )
     {
-        x264_mb_load_mv_direct8x8( h, i );
+        mb_load_mv_direct8x8( h, i );
         if( b_mvd )
         {
             x264_macroblock_cache_mvd(  h, x, y, 2, 2, 0, 0 );
@@ -2272,17 +2148,17 @@
         CACHE_MV_BI( x, y, 2, 2, a->l0.me8x8[i], a->l1.me8x8[i], h->mb.i_sub_partition[i] );
     }
 }
-static inline void x264_mb_cache_mv_b16x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
+static inline void mb_cache_mv_b16x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
 {
     CACHE_MV_BI( 0, 2*i, 4, 2, a->l0.me16x8[i], a->l1.me16x8[i], a->i_mb_partition16x8[i] );
 }
-static inline void x264_mb_cache_mv_b8x16( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
+static inline void mb_cache_mv_b8x16( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
 {
     CACHE_MV_BI( 2*i, 0, 2, 4, a->l0.me8x16[i], a->l1.me8x16[i], a->i_mb_partition8x16[i] );
 }
 #undef CACHE_MV_BI
 
-static void x264_mb_analyse_inter_b8x8_mixed_ref( x264_t *h, x264_mb_analysis_t *a )
+static void mb_analyse_inter_b8x8_mixed_ref( x264_t *h, x264_mb_analysis_t *a )
 {
     ALIGNED_ARRAY_16( pixel, pix,[2],[8*8] );
     int i_maxref[2] = {h->mb.pic.i_fref[0]-1, h->mb.pic.i_fref[1]-1};
@@ -2371,7 +2247,7 @@
 
         if( h->mb.b_chroma_me )
         {
-            int i_chroma_cost = x264_analyse_bi_chroma( h, a, i, PIXEL_8x8 );
+            int i_chroma_cost = analyse_bi_chroma( h, a, i, PIXEL_8x8 );
             i_part_cost_bi += i_chroma_cost;
             a->i_satd8x8[2][i] += i_chroma_cost;
         }
@@ -2387,14 +2263,14 @@
         a->i_cost8x8bi += i_part_cost;
 
         /* XXX Needed for x264_mb_predict_mv */
-        x264_mb_cache_mv_b8x8( h, a, i, 0 );
+        mb_cache_mv_b8x8( h, a, i, 0 );
     }
 
     /* mb type cost */
     a->i_cost8x8bi += a->i_lambda * i_mb_b_cost_table[B_8x8];
 }
 
-static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
+static void mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
 {
     pixel **p_fref[2] =
         { h->mb.pic.p_fref[0][a->l0.me16x16.i_ref],
@@ -2451,7 +2327,7 @@
 
         if( h->mb.b_chroma_me )
         {
-            int i_chroma_cost = x264_analyse_bi_chroma( h, a, i, PIXEL_8x8 );
+            int i_chroma_cost = analyse_bi_chroma( h, a, i, PIXEL_8x8 );
             i_part_cost_bi += i_chroma_cost;
             a->i_satd8x8[2][i] += i_chroma_cost;
         }
@@ -2464,14 +2340,14 @@
         a->i_cost8x8bi += i_part_cost;
 
         /* XXX Needed for x264_mb_predict_mv */
-        x264_mb_cache_mv_b8x8( h, a, i, 0 );
+        mb_cache_mv_b8x8( h, a, i, 0 );
     }
 
     /* mb type cost */
     a->i_cost8x8bi += a->i_lambda * i_mb_b_cost_table[B_8x8];
 }
 
-static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a, int i_best_satd )
+static void mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a, int i_best_satd )
 {
     ALIGNED_ARRAY_32( pixel, pix,[2],[16*8] );
     ALIGNED_4( int16_t mvc[3][2] );
@@ -2529,7 +2405,7 @@
                         + a->l1.me16x8[i].i_ref_cost;
 
         if( h->mb.b_chroma_me )
-            i_part_cost_bi += x264_analyse_bi_chroma( h, a, i, PIXEL_16x8 );
+            i_part_cost_bi += analyse_bi_chroma( h, a, i, PIXEL_16x8 );
 
         i_part_cost = a->l0.me16x8[i].cost;
         a->i_mb_partition16x8[i] = D_L0_8x8; /* not actually 8x8, only the L0 matters */
@@ -2555,7 +2431,7 @@
             return;
         }
 
-        x264_mb_cache_mv_b16x8( h, a, i, 0 );
+        mb_cache_mv_b16x8( h, a, i, 0 );
     }
 
     /* mb type cost */
@@ -2565,7 +2441,7 @@
     a->i_cost16x8bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type16x8];
 }
 
-static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a, int i_best_satd )
+static void mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a, int i_best_satd )
 {
     ALIGNED_ARRAY_16( pixel, pix,[2],[8*16] );
     ALIGNED_4( int16_t mvc[3][2] );
@@ -2622,7 +2498,7 @@
                         + a->l1.me8x16[i].i_ref_cost;
 
         if( h->mb.b_chroma_me )
-            i_part_cost_bi += x264_analyse_bi_chroma( h, a, i, PIXEL_8x16 );
+            i_part_cost_bi += analyse_bi_chroma( h, a, i, PIXEL_8x16 );
 
         i_part_cost = a->l0.me8x16[i].cost;
         a->i_mb_partition8x16[i] = D_L0_8x8;
@@ -2648,7 +2524,7 @@
             return;
         }
 
-        x264_mb_cache_mv_b8x16( h, a, i, 0 );
+        mb_cache_mv_b8x16( h, a, i, 0 );
     }
 
     /* mb type cost */
@@ -2658,7 +2534,7 @@
     a->i_cost8x16bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type8x16];
 }
 
-static void x264_mb_analyse_p_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd )
+static void mb_analyse_p_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd )
 {
     int thresh = a->b_early_terminate ? i_satd * 5/4 + 1 : COST_MAX;
 
@@ -2666,15 +2542,15 @@
     if( a->l0.i_rd16x16 == COST_MAX && (!a->b_early_terminate || a->l0.me16x16.cost <= i_satd * 3/2) )
     {
         h->mb.i_partition = D_16x16;
-        x264_analyse_update_cache( h, a );
-        a->l0.i_rd16x16 = x264_rd_cost_mb( h, a->i_lambda2 );
+        analyse_update_cache( h, a );
+        a->l0.i_rd16x16 = rd_cost_mb( h, a->i_lambda2 );
     }
 
     if( a->l0.i_cost16x8 < thresh )
     {
         h->mb.i_partition = D_16x8;
-        x264_analyse_update_cache( h, a );
-        a->l0.i_cost16x8 = x264_rd_cost_mb( h, a->i_lambda2 );
+        analyse_update_cache( h, a );
+        a->l0.i_cost16x8 = rd_cost_mb( h, a->i_lambda2 );
     }
     else
         a->l0.i_cost16x8 = COST_MAX;
@@ -2682,8 +2558,8 @@
     if( a->l0.i_cost8x16 < thresh )
     {
         h->mb.i_partition = D_8x16;
-        x264_analyse_update_cache( h, a );
-        a->l0.i_cost8x16 = x264_rd_cost_mb( h, a->i_lambda2 );
+        analyse_update_cache( h, a );
+        a->l0.i_cost8x16 = rd_cost_mb( h, a->i_lambda2 );
     }
     else
         a->l0.i_cost8x16 = COST_MAX;
@@ -2712,7 +2588,7 @@
                     if( costs[subtype] > sub8x8_thresh )
                         continue;
                     h->mb.i_sub_partition[i] = subtype;
-                    x264_mb_cache_mv_p8x8( h, a, i );
+                    mb_cache_mv_p8x8( h, a, i );
                     if( subtype == btype )
                         continue;
                     cost = x264_rd_cost_part( h, a->i_lambda2, i<<2, PIXEL_8x8 );
@@ -2721,19 +2597,19 @@
                 if( h->mb.i_sub_partition[i] != btype )
                 {
                     h->mb.i_sub_partition[i] = btype;
-                    x264_mb_cache_mv_p8x8( h, a, i );
+                    mb_cache_mv_p8x8( h, a, i );
                 }
             }
         }
         else
-            x264_analyse_update_cache( h, a );
-        a->l0.i_cost8x8 = x264_rd_cost_mb( h, a->i_lambda2 );
+            analyse_update_cache( h, a );
+        a->l0.i_cost8x8 = rd_cost_mb( h, a->i_lambda2 );
     }
     else
         a->l0.i_cost8x8 = COST_MAX;
 }
 
-static void x264_mb_analyse_b_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter )
+static void mb_analyse_b_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter )
 {
     int thresh = a->b_early_terminate ? i_satd_inter * (17 + (!!h->mb.i_psy_rd))/16 + 1 : COST_MAX;
 
@@ -2743,8 +2619,8 @@
         /* Assumes direct/skip MC is still in fdec */
         /* Requires b-rdo to be done before intra analysis */
         h->mb.b_skip_mc = 1;
-        x264_analyse_update_cache( h, a );
-        a->i_rd16x16direct = x264_rd_cost_mb( h, a->i_lambda2 );
+        analyse_update_cache( h, a );
+        a->i_rd16x16direct = rd_cost_mb( h, a->i_lambda2 );
         h->mb.b_skip_mc = 0;
     }
 
@@ -2754,24 +2630,24 @@
     if( a->l0.me16x16.cost < thresh && a->l0.i_rd16x16 == COST_MAX )
     {
         h->mb.i_type = B_L0_L0;
-        x264_analyse_update_cache( h, a );
-        a->l0.i_rd16x16 = x264_rd_cost_mb( h, a->i_lambda2 );
+        analyse_update_cache( h, a );
+        a->l0.i_rd16x16 = rd_cost_mb( h, a->i_lambda2 );
     }
 
     /* L1 */
     if( a->l1.me16x16.cost < thresh && a->l1.i_rd16x16 == COST_MAX )
     {
         h->mb.i_type = B_L1_L1;
-        x264_analyse_update_cache( h, a );
-        a->l1.i_rd16x16 = x264_rd_cost_mb( h, a->i_lambda2 );
+        analyse_update_cache( h, a );
+        a->l1.i_rd16x16 = rd_cost_mb( h, a->i_lambda2 );
     }
 
     /* BI */
     if( a->i_cost16x16bi < thresh && a->i_rd16x16bi == COST_MAX )
     {
         h->mb.i_type = B_BI_BI;
-        x264_analyse_update_cache( h, a );
-        a->i_rd16x16bi = x264_rd_cost_mb( h, a->i_lambda2 );
+        analyse_update_cache( h, a );
+        a->i_rd16x16bi = rd_cost_mb( h, a->i_lambda2 );
     }
 
     /* 8x8 */
@@ -2779,8 +2655,8 @@
     {
         h->mb.i_type = B_8x8;
         h->mb.i_partition = D_8x8;
-        x264_analyse_update_cache( h, a );
-        a->i_rd8x8bi = x264_rd_cost_mb( h, a->i_lambda2 );
+        analyse_update_cache( h, a );
+        a->i_rd8x8bi = rd_cost_mb( h, a->i_lambda2 );
         x264_macroblock_cache_skip( h, 0, 0, 4, 4, 0 );
     }
 
@@ -2789,8 +2665,8 @@
     {
         h->mb.i_type = a->i_mb_type16x8;
         h->mb.i_partition = D_16x8;
-        x264_analyse_update_cache( h, a );
-        a->i_rd16x8bi = x264_rd_cost_mb( h, a->i_lambda2 );
+        analyse_update_cache( h, a );
+        a->i_rd16x8bi = rd_cost_mb( h, a->i_lambda2 );
     }
 
     /* 8x16 */
@@ -2798,12 +2674,12 @@
     {
         h->mb.i_type = a->i_mb_type8x16;
         h->mb.i_partition = D_8x16;
-        x264_analyse_update_cache( h, a );
-        a->i_rd8x16bi = x264_rd_cost_mb( h, a->i_lambda2 );
+        analyse_update_cache( h, a );
+        a->i_rd8x16bi = rd_cost_mb( h, a->i_lambda2 );
     }
 }
 
-static void x264_refine_bidir( x264_t *h, x264_mb_analysis_t *a )
+static void refine_bidir( x264_t *h, x264_mb_analysis_t *a )
 {
     int i_biweight;
 
@@ -2846,7 +2722,7 @@
     }
 }
 
-static inline void x264_mb_analyse_transform( x264_t *h )
+static inline void mb_analyse_transform( x264_t *h )
 {
     if( x264_mb_transform_8x8_allowed( h ) && h->param.analyse.b_transform_8x8 && !h->mb.b_lossless )
     {
@@ -2884,7 +2760,7 @@
     }
 }
 
-static inline void x264_mb_analyse_transform_rd( x264_t *h, x264_mb_analysis_t *a, int *i_satd, int *i_rd )
+static inline void mb_analyse_transform_rd( x264_t *h, x264_mb_analysis_t *a, int *i_satd, int *i_rd )
 {
     if( h->param.analyse.b_transform_8x8 && h->pps->b_transform_8x8_mode )
     {
@@ -2895,10 +2771,10 @@
         else if( !x264_transform_allowed[h->mb.i_type] )
             return;
 
-        x264_analyse_update_cache( h, a );
+        analyse_update_cache( h, a );
         h->mb.b_transform_8x8 ^= 1;
         /* FIXME only luma is needed for 4:2:0, but the score for comparison already includes chroma */
-        int i_rd8 = x264_rd_cost_mb( h, a->i_lambda2 );
+        int i_rd8 = rd_cost_mb( h, a->i_lambda2 );
 
         if( *i_rd >= i_rd8 )
         {
@@ -2921,12 +2797,12 @@
  * There must be a more efficient way to get that portion of the benefit
  * without doing full QP-RD, but RD-decimation doesn't seem to do the
  * trick. */
-static inline void x264_mb_analyse_qp_rd( x264_t *h, x264_mb_analysis_t *a )
+static inline void mb_analyse_qp_rd( x264_t *h, x264_mb_analysis_t *a )
 {
     int bcost, cost, failures, prevcost, origcost;
     int orig_qp = h->mb.i_qp, bqp = h->mb.i_qp;
     int last_qp_tried = 0;
-    origcost = bcost = x264_rd_cost_mb( h, a->i_lambda2 );
+    origcost = bcost = rd_cost_mb( h, a->i_lambda2 );
     int origcbp = h->mb.cbp[h->mb.i_mb_xy];
 
     /* If CBP is already zero, don't raise the quantizer any higher. */
@@ -2959,7 +2835,7 @@
             {
                 h->mb.i_qp = X264_MAX( h->mb.i_qp - threshold - 1, SPEC_QP( h->param.rc.i_qp_min ) );
                 h->mb.i_chroma_qp = h->chroma_qp_table[h->mb.i_qp];
-                already_checked_cost = x264_rd_cost_mb( h, a->i_lambda2 );
+                already_checked_cost = rd_cost_mb( h, a->i_lambda2 );
                 if( !h->mb.cbp[h->mb.i_mb_xy] )
                 {
                     /* If our empty-CBP block is lower QP than the last QP,
@@ -2983,7 +2859,7 @@
             else
             {
                 h->mb.i_chroma_qp = h->chroma_qp_table[h->mb.i_qp];
-                cost = x264_rd_cost_mb( h, a->i_lambda2 );
+                cost = rd_cost_mb( h, a->i_lambda2 );
                 COPY2_IF_LT( bcost, cost, bqp, h->mb.i_qp );
             }
 
@@ -3008,7 +2884,7 @@
     {
         h->mb.i_qp = h->mb.i_last_qp;
         h->mb.i_chroma_qp = h->chroma_qp_table[h->mb.i_qp];
-        cost = x264_rd_cost_mb( h, a->i_lambda2 );
+        cost = rd_cost_mb( h, a->i_lambda2 );
         COPY2_IF_LT( bcost, cost, bqp, h->mb.i_qp );
     }
 
@@ -3020,7 +2896,7 @@
         x264_mb_transform_8x8_allowed( h ) )
     {
         h->mb.b_transform_8x8 ^= 1;
-        cost = x264_rd_cost_mb( h, a->i_lambda2 );
+        cost = rd_cost_mb( h, a->i_lambda2 );
         if( cost > bcost )
             h->mb.b_transform_8x8 ^= 1;
     }
@@ -3042,17 +2918,17 @@
 
     if( h->param.analyse.b_mb_info )
         h->fdec->effective_qp[h->mb.i_mb_xy] = h->mb.i_qp; /* Store the real analysis QP. */
-    x264_mb_analyse_init( h, &analysis, h->mb.i_qp );
+    mb_analyse_init( h, &analysis, h->mb.i_qp );
 
     /*--------------------------- Do the analysis ---------------------------*/
     if( h->sh.i_type == SLICE_TYPE_I )
     {
 intra_analysis:
         if( analysis.i_mbrd )
-            x264_mb_init_fenc_cache( h, analysis.i_mbrd >= 2 );
-        x264_mb_analyse_intra( h, &analysis, COST_MAX );
+            mb_init_fenc_cache( h, analysis.i_mbrd >= 2 );
+        mb_analyse_intra( h, &analysis, COST_MAX );
         if( analysis.i_mbrd )
-            x264_intra_rd( h, &analysis, COST_MAX );
+            intra_rd( h, &analysis, COST_MAX );
 
         i_cost = analysis.i_satd_i16x16;
         h->mb.i_type = I_16x16;
@@ -3062,7 +2938,7 @@
             h->mb.i_type = I_PCM;
 
         else if( analysis.i_mbrd >= 2 )
-            x264_intra_rd_refine( h, &analysis );
+            intra_rd_refine( h, &analysis );
     }
     else if( h->sh.i_type == SLICE_TYPE_P )
     {
@@ -3075,7 +2951,7 @@
         {
             if( !h->param.analyse.b_psy )
             {
-                x264_mb_analyse_init_qp( h, &analysis, X264_MAX( h->mb.i_qp - h->mb.ip_offset, h->param.rc.i_qp_min ) );
+                mb_analyse_init_qp( h, &analysis, X264_MAX( h->mb.i_qp - h->mb.ip_offset, h->param.rc.i_qp_min ) );
                 goto intra_analysis;
             }
         }
@@ -3147,9 +3023,9 @@
             int i_partition;
             int i_satd_inter, i_satd_intra;
 
-            x264_mb_analyse_load_costs( h, &analysis );
+            mb_analyse_load_costs( h, &analysis );
 
-            x264_mb_analyse_inter_p16x16( h, &analysis );
+            mb_analyse_inter_p16x16( h, &analysis );
 
             if( h->mb.i_type == P_SKIP )
             {
@@ -3161,9 +3037,9 @@
             if( flags & X264_ANALYSE_PSUB16x16 )
             {
                 if( h->param.analyse.b_mixed_references )
-                    x264_mb_analyse_inter_p8x8_mixed_ref( h, &analysis );
+                    mb_analyse_inter_p8x8_mixed_ref( h, &analysis );
                 else
-                    x264_mb_analyse_inter_p8x8( h, &analysis );
+                    mb_analyse_inter_p8x8( h, &analysis );
             }
 
             /* Select best inter mode */
@@ -3183,24 +3059,24 @@
                 {
                     for( int i = 0; i < 4; i++ )
                     {
-                        x264_mb_analyse_inter_p4x4( h, &analysis, i );
+                        mb_analyse_inter_p4x4( h, &analysis, i );
                         int i_thresh8x4 = analysis.l0.me4x4[i][1].cost_mv + analysis.l0.me4x4[i][2].cost_mv;
                         if( !analysis.b_early_terminate || analysis.l0.i_cost4x4[i] < analysis.l0.me8x8[i].cost + i_thresh8x4 )
                         {
                             int i_cost8x8 = analysis.l0.i_cost4x4[i];
                             h->mb.i_sub_partition[i] = D_L0_4x4;
 
-                            x264_mb_analyse_inter_p8x4( h, &analysis, i );
+                            mb_analyse_inter_p8x4( h, &analysis, i );
                             COPY2_IF_LT( i_cost8x8, analysis.l0.i_cost8x4[i],
                                          h->mb.i_sub_partition[i], D_L0_8x4 );
 
-                            x264_mb_analyse_inter_p4x8( h, &analysis, i );
+                            mb_analyse_inter_p4x8( h, &analysis, i );
                             COPY2_IF_LT( i_cost8x8, analysis.l0.i_cost4x8[i],
                                          h->mb.i_sub_partition[i], D_L0_4x8 );
 
                             i_cost += i_cost8x8 - analysis.l0.me8x8[i].cost;
                         }
-                        x264_mb_cache_mv_p8x8( h, &analysis, i );
+                        mb_cache_mv_p8x8( h, &analysis, i );
                     }
                     analysis.l0.i_cost8x8 = i_cost;
                 }
@@ -3215,14 +3091,14 @@
                                       + analysis.l0.me8x8[3].cost_mv + analysis.l0.me8x8[3].i_ref_cost + 1) >> 1;
                 analysis.i_cost_est16x8[1] = analysis.i_satd8x8[0][2] + analysis.i_satd8x8[0][3] + i_avg_mv_ref_cost;
 
-                x264_mb_analyse_inter_p16x8( h, &analysis, i_cost );
+                mb_analyse_inter_p16x8( h, &analysis, i_cost );
                 COPY3_IF_LT( i_cost, analysis.l0.i_cost16x8, i_type, P_L0, i_partition, D_16x8 );
 
                 i_avg_mv_ref_cost = (analysis.l0.me8x8[1].cost_mv + analysis.l0.me8x8[1].i_ref_cost
                                   + analysis.l0.me8x8[3].cost_mv + analysis.l0.me8x8[3].i_ref_cost + 1) >> 1;
                 analysis.i_cost_est8x16[1] = analysis.i_satd8x8[0][1] + analysis.i_satd8x8[0][3] + i_avg_mv_ref_cost;
 
-                x264_mb_analyse_inter_p8x16( h, &analysis, i_cost );
+                mb_analyse_inter_p8x16( h, &analysis, i_cost );
                 COPY3_IF_LT( i_cost, analysis.l0.i_cost8x16, i_type, P_L0, i_partition, D_8x16 );
             }
 
@@ -3296,20 +3172,20 @@
             {
                 if( CHROMA444 )
                 {
-                    x264_mb_analyse_intra( h, &analysis, i_cost );
-                    x264_mb_analyse_intra_chroma( h, &analysis );
+                    mb_analyse_intra( h, &analysis, i_cost );
+                    mb_analyse_intra_chroma( h, &analysis );
                 }
                 else
                 {
-                    x264_mb_analyse_intra_chroma( h, &analysis );
-                    x264_mb_analyse_intra( h, &analysis, i_cost - analysis.i_satd_chroma );
+                    mb_analyse_intra_chroma( h, &analysis );
+                    mb_analyse_intra( h, &analysis, i_cost - analysis.i_satd_chroma );
                 }
                 analysis.i_satd_i16x16 += analysis.i_satd_chroma;
                 analysis.i_satd_i8x8   += analysis.i_satd_chroma;
                 analysis.i_satd_i4x4   += analysis.i_satd_chroma;
             }
             else
-                x264_mb_analyse_intra( h, &analysis, i_cost );
+                mb_analyse_intra( h, &analysis, i_cost );
 
             i_satd_inter = i_cost;
             i_satd_intra = X264_MIN3( analysis.i_satd_i16x16,
@@ -3318,7 +3194,7 @@
 
             if( analysis.i_mbrd )
             {
-                x264_mb_analyse_p_rd( h, &analysis, X264_MIN(i_satd_inter, i_satd_intra) );
+                mb_analyse_p_rd( h, &analysis, X264_MIN(i_satd_inter, i_satd_intra) );
                 i_type = P_L0;
                 i_partition = D_16x16;
                 i_cost = analysis.l0.i_rd16x16;
@@ -3328,8 +3204,8 @@
                 h->mb.i_type = i_type;
                 h->mb.i_partition = i_partition;
                 if( i_cost < COST_MAX )
-                    x264_mb_analyse_transform_rd( h, &analysis, &i_satd_inter, &i_cost );
-                x264_intra_rd( h, &analysis, i_satd_inter * 5/4 + 1 );
+                    mb_analyse_transform_rd( h, &analysis, &i_satd_inter, &i_cost );
+                intra_rd( h, &analysis, i_satd_inter * 5/4 + 1 );
             }
 
             COPY2_IF_LT( i_cost, analysis.i_satd_i16x16, i_type, I_16x16 );
@@ -3343,7 +3219,7 @@
             {
                 /* Intra masking: copy fdec to fenc and re-encode the block as intra in order to make it appear as if
                  * it was an inter block. */
-                x264_analyse_update_cache( h, &analysis );
+                analyse_update_cache( h, &analysis );
                 x264_macroblock_encode( h );
                 for( int p = 0; p < (CHROMA444 ? 3 : 1); p++ )
                     h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fenc[p], FENC_STRIDE, h->mb.pic.p_fdec[p], FDEC_STRIDE, 16 );
@@ -3353,7 +3229,7 @@
                     h->mc.copy[PIXEL_8x8]  ( h->mb.pic.p_fenc[1], FENC_STRIDE, h->mb.pic.p_fdec[1], FDEC_STRIDE, height );
                     h->mc.copy[PIXEL_8x8]  ( h->mb.pic.p_fenc[2], FENC_STRIDE, h->mb.pic.p_fdec[2], FDEC_STRIDE, height );
                 }
-                x264_mb_analyse_init_qp( h, &analysis, X264_MAX( h->mb.i_qp - h->mb.ip_offset, h->param.rc.i_qp_min ) );
+                mb_analyse_init_qp( h, &analysis, X264_MAX( h->mb.i_qp - h->mb.ip_offset, h->param.rc.i_qp_min ) );
                 goto intra_analysis;
             }
 
@@ -3361,7 +3237,7 @@
             {
                 if( IS_INTRA( h->mb.i_type ) )
                 {
-                    x264_intra_rd_refine( h, &analysis );
+                    intra_rd_refine( h, &analysis );
                 }
                 else if( i_partition == D_16x16 )
                 {
@@ -3387,7 +3263,7 @@
                 }
                 else if( i_partition == D_8x8 )
                 {
-                    x264_analyse_update_cache( h, &analysis );
+                    analyse_update_cache( h, &analysis );
                     for( int i8x8 = 0; i8x8 < 4; i8x8++ )
                     {
                         if( h->mb.i_sub_partition[i8x8] == D_L0_8x8 )
@@ -3422,7 +3298,7 @@
         int b_skip = 0;
 
         if( analysis.i_mbrd )
-            x264_mb_init_fenc_cache( h, analysis.i_mbrd >= 2 );
+            mb_init_fenc_cache( h, analysis.i_mbrd >= 2 );
 
         h->mb.i_type = B_SKIP;
         if( h->mb.b_direct_auto_write )
@@ -3490,14 +3366,14 @@
             h->mb.b_skip_mc = 0;
             h->mb.i_type = B_DIRECT;
 
-            x264_mb_analyse_load_costs( h, &analysis );
+            mb_analyse_load_costs( h, &analysis );
 
             /* select best inter mode */
             /* direct must be first */
             if( analysis.b_direct_available )
-                x264_mb_analyse_inter_direct( h, &analysis );
+                mb_analyse_inter_direct( h, &analysis );
 
-            x264_mb_analyse_inter_b16x16( h, &analysis );
+            mb_analyse_inter_b16x16( h, &analysis );
 
             if( h->mb.i_type == B_SKIP )
             {
@@ -3517,14 +3393,14 @@
 
             if( analysis.i_mbrd && analysis.b_early_terminate && analysis.i_cost16x16direct <= i_cost * 33/32 )
             {
-                x264_mb_analyse_b_rd( h, &analysis, i_cost );
+                mb_analyse_b_rd( h, &analysis, i_cost );
                 if( i_bskip_cost < analysis.i_rd16x16direct &&
                     i_bskip_cost < analysis.i_rd16x16bi &&
                     i_bskip_cost < analysis.l0.i_rd16x16 &&
                     i_bskip_cost < analysis.l1.i_rd16x16 )
                 {
                     h->mb.i_type = B_SKIP;
-                    x264_analyse_update_cache( h, &analysis );
+                    analyse_update_cache( h, &analysis );
                     return;
                 }
             }
@@ -3532,9 +3408,9 @@
             if( flags & X264_ANALYSE_BSUB16x16 )
             {
                 if( h->param.analyse.b_mixed_references )
-                    x264_mb_analyse_inter_b8x8_mixed_ref( h, &analysis );
+                    mb_analyse_inter_b8x8_mixed_ref( h, &analysis );
                 else
-                    x264_mb_analyse_inter_b8x8( h, &analysis );
+                    mb_analyse_inter_b8x8( h, &analysis );
 
                 COPY3_IF_LT( i_cost, analysis.i_cost8x8bi, i_type, B_8x8, i_partition, D_8x8 );
 
@@ -3584,17 +3460,17 @@
                 int try_16x8_first = i_cost_est16x8bi_total < i_cost_est8x16bi_total;
                 if( try_16x8_first && (!analysis.b_early_terminate || i_cost_est16x8bi_total < i_cost) )
                 {
-                    x264_mb_analyse_inter_b16x8( h, &analysis, i_cost );
+                    mb_analyse_inter_b16x8( h, &analysis, i_cost );
                     COPY3_IF_LT( i_cost, analysis.i_cost16x8bi, i_type, analysis.i_mb_type16x8, i_partition, D_16x8 );
                 }
                 if( !analysis.b_early_terminate || i_cost_est8x16bi_total < i_cost )
                 {
-                    x264_mb_analyse_inter_b8x16( h, &analysis, i_cost );
+                    mb_analyse_inter_b8x16( h, &analysis, i_cost );
                     COPY3_IF_LT( i_cost, analysis.i_cost8x16bi, i_type, analysis.i_mb_type8x16, i_partition, D_8x16 );
                 }
                 if( !try_16x8_first && (!analysis.b_early_terminate || i_cost_est16x8bi_total < i_cost) )
                 {
-                    x264_mb_analyse_inter_b16x8( h, &analysis, i_cost );
+                    mb_analyse_inter_b16x8( h, &analysis, i_cost );
                     COPY3_IF_LT( i_cost, analysis.i_cost16x8bi, i_type, analysis.i_mb_type16x8, i_partition, D_16x8 );
                 }
             }
@@ -3686,7 +3562,7 @@
 
             if( analysis.i_mbrd )
             {
-                x264_mb_analyse_b_rd( h, &analysis, i_satd_inter );
+                mb_analyse_b_rd( h, &analysis, i_satd_inter );
                 i_type = B_SKIP;
                 i_cost = i_bskip_cost;
                 i_partition = D_16x16;
@@ -3706,25 +3582,25 @@
             {
                 if( CHROMA444 )
                 {
-                    x264_mb_analyse_intra( h, &analysis, i_satd_inter );
-                    x264_mb_analyse_intra_chroma( h, &analysis );
+                    mb_analyse_intra( h, &analysis, i_satd_inter );
+                    mb_analyse_intra_chroma( h, &analysis );
                 }
                 else
                 {
-                    x264_mb_analyse_intra_chroma( h, &analysis );
-                    x264_mb_analyse_intra( h, &analysis, i_satd_inter - analysis.i_satd_chroma );
+                    mb_analyse_intra_chroma( h, &analysis );
+                    mb_analyse_intra( h, &analysis, i_satd_inter - analysis.i_satd_chroma );
                 }
                 analysis.i_satd_i16x16 += analysis.i_satd_chroma;
                 analysis.i_satd_i8x8   += analysis.i_satd_chroma;
                 analysis.i_satd_i4x4   += analysis.i_satd_chroma;
             }
             else
-                x264_mb_analyse_intra( h, &analysis, i_satd_inter );
+                mb_analyse_intra( h, &analysis, i_satd_inter );
 
             if( analysis.i_mbrd )
             {
-                x264_mb_analyse_transform_rd( h, &analysis, &i_satd_inter, &i_cost );
-                x264_intra_rd( h, &analysis, i_satd_inter * 17/16 + 1 );
+                mb_analyse_transform_rd( h, &analysis, &i_satd_inter, &i_cost );
+                intra_rd( h, &analysis, i_satd_inter * 17/16 + 1 );
             }
 
             COPY2_IF_LT( i_cost, analysis.i_satd_i16x16, i_type, I_16x16 );
@@ -3736,14 +3612,14 @@
             h->mb.i_partition = i_partition;
 
             if( analysis.i_mbrd >= 2 && IS_INTRA( i_type ) && i_type != I_PCM )
-                x264_intra_rd_refine( h, &analysis );
+                intra_rd_refine( h, &analysis );
             if( h->mb.i_subpel_refine >= 5 )
-                x264_refine_bidir( h, &analysis );
+                refine_bidir( h, &analysis );
 
             if( analysis.i_mbrd >= 2 && i_type > B_DIRECT && i_type < B_SKIP )
             {
                 int i_biweight;
-                x264_analyse_update_cache( h, &analysis );
+                analyse_update_cache( h, &analysis );
 
                 if( i_partition == D_16x16 )
                 {
@@ -3814,7 +3690,7 @@
         }
     }
 
-    x264_analyse_update_cache( h, &analysis );
+    analyse_update_cache( h, &analysis );
 
     /* In rare cases we can end up qpel-RDing our way back to a larger partition size
      * without realizing it.  Check for this and account for it if necessary. */
@@ -3830,22 +3706,22 @@
     }
 
     if( !analysis.i_mbrd )
-        x264_mb_analyse_transform( h );
+        mb_analyse_transform( h );
 
     if( analysis.i_mbrd == 3 && !IS_SKIP(h->mb.i_type) )
-        x264_mb_analyse_qp_rd( h, &analysis );
+        mb_analyse_qp_rd( h, &analysis );
 
     h->mb.b_trellis = h->param.analyse.i_trellis;
     h->mb.b_noise_reduction = h->mb.b_noise_reduction || (!!h->param.analyse.i_noise_reduction && !IS_INTRA( h->mb.i_type ));
 
     if( !IS_SKIP(h->mb.i_type) && h->mb.i_psy_trellis && h->param.analyse.i_trellis == 1 )
-        x264_psy_trellis_init( h, 0 );
+        psy_trellis_init( h, 0 );
     if( h->mb.b_trellis == 1 || h->mb.b_noise_reduction )
         h->mb.i_skip_intra = 0;
 }
 
 /*-------------------- Update MB from the analysis ----------------------*/
-static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a  )
+static void analyse_update_cache( x264_t *h, x264_mb_analysis_t *a  )
 {
     switch( h->mb.i_type )
     {
@@ -3853,17 +3729,17 @@
             for( int i = 0; i < 16; i++ )
                 h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] = a->i_predict4x4[i];
 
-            x264_mb_analyse_intra_chroma( h, a );
+            mb_analyse_intra_chroma( h, a );
             break;
         case I_8x8:
             for( int i = 0; i < 4; i++ )
                 x264_macroblock_cache_intra8x8_pred( h, 2*(i&1), 2*(i>>1), a->i_predict8x8[i] );
 
-            x264_mb_analyse_intra_chroma( h, a );
+            mb_analyse_intra_chroma( h, a );
             break;
         case I_16x16:
             h->mb.i_intra16x16_pred_mode = a->i_predict16x16;
-            x264_mb_analyse_intra_chroma( h, a );
+            mb_analyse_intra_chroma( h, a );
             break;
 
         case I_PCM:
@@ -3903,7 +3779,7 @@
             x264_macroblock_cache_ref( h, 0, 2, 2, 2, 0, a->l0.me8x8[2].i_ref );
             x264_macroblock_cache_ref( h, 2, 2, 2, 2, 0, a->l0.me8x8[3].i_ref );
             for( int i = 0; i < 4; i++ )
-                x264_mb_cache_mv_p8x8( h, a, i );
+                mb_cache_mv_p8x8( h, a, i );
             break;
 
         case P_SKIP:
@@ -3917,16 +3793,16 @@
         case B_SKIP:
         case B_DIRECT:
             h->mb.i_partition = h->mb.cache.direct_partition;
-            x264_mb_load_mv_direct8x8( h, 0 );
-            x264_mb_load_mv_direct8x8( h, 1 );
-            x264_mb_load_mv_direct8x8( h, 2 );
-            x264_mb_load_mv_direct8x8( h, 3 );
+            mb_load_mv_direct8x8( h, 0 );
+            mb_load_mv_direct8x8( h, 1 );
+            mb_load_mv_direct8x8( h, 2 );
+            mb_load_mv_direct8x8( h, 3 );
             break;
 
         case B_8x8:
             /* optimize: cache might not need to be rewritten */
             for( int i = 0; i < 4; i++ )
-                x264_mb_cache_mv_b8x8( h, a, i, 1 );
+                mb_cache_mv_b8x8( h, a, i, 1 );
             break;
 
         default: /* the rest of the B types */
@@ -3961,12 +3837,12 @@
                 }
                 break;
             case D_16x8:
-                x264_mb_cache_mv_b16x8( h, a, 0, 1 );
-                x264_mb_cache_mv_b16x8( h, a, 1, 1 );
+                mb_cache_mv_b16x8( h, a, 0, 1 );
+                mb_cache_mv_b16x8( h, a, 1, 1 );
                 break;
             case D_8x16:
-                x264_mb_cache_mv_b8x16( h, a, 0, 1 );
-                x264_mb_cache_mv_b8x16( h, a, 1, 1 );
+                mb_cache_mv_b8x16( h, a, 0, 1 );
+                mb_cache_mv_b8x16( h, a, 1, 1 );
                 break;
             default:
                 x264_log( h, X264_LOG_ERROR, "internal error (invalid MB type)\n" );
@@ -3995,10 +3871,10 @@
                 x264_log( h, X264_LOG_DEBUG, "mb_xy: %d,%d \n", h->mb.i_mb_x, h->mb.i_mb_y);
                 x264_log( h, X264_LOG_DEBUG, "completed: %d \n", completed );
                 x264_log( h, X264_LOG_WARNING, "recovering by using intra mode\n");
-                x264_mb_analyse_intra( h, a, COST_MAX );
+                mb_analyse_intra( h, a, COST_MAX );
                 h->mb.i_type = I_16x16;
                 h->mb.i_intra16x16_pred_mode = a->i_predict16x16;
-                x264_mb_analyse_intra_chroma( h, a );
+                mb_analyse_intra_chroma( h, a );
             }
         }
     }
diff -Nru x264-0.152.2854+gite9a5903/encoder/analyse.h x264-0.158.2988+git-20191101.7817004/encoder/analyse.h
--- x264-0.152.2854+gite9a5903/encoder/analyse.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/encoder/analyse.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * analyse.h: macroblock analysis
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -24,21 +24,32 @@
  * For more information, contact us at licensing@x264.com.
  *****************************************************************************/
 
-#ifndef X264_ANALYSE_H
-#define X264_ANALYSE_H
+#ifndef X264_ENCODER_ANALYSE_H
+#define X264_ENCODER_ANALYSE_H
 
+#define x264_analyse_init_costs x264_template(analyse_init_costs)
 int x264_analyse_init_costs( x264_t *h );
+#define x264_analyse_free_costs x264_template(analyse_free_costs)
 void x264_analyse_free_costs( x264_t *h );
+#define x264_analyse_weight_frame x264_template(analyse_weight_frame)
 void x264_analyse_weight_frame( x264_t *h, int end );
+#define x264_macroblock_analyse x264_template(macroblock_analyse)
 void x264_macroblock_analyse( x264_t *h );
+#define x264_slicetype_decide x264_template(slicetype_decide)
 void x264_slicetype_decide( x264_t *h );
 
+#define x264_slicetype_analyse x264_template(slicetype_analyse)
 void x264_slicetype_analyse( x264_t *h, int intra_minigop );
 
+#define x264_lookahead_init x264_template(lookahead_init)
 int  x264_lookahead_init( x264_t *h, int i_slicetype_length );
+#define x264_lookahead_is_empty x264_template(lookahead_is_empty)
 int  x264_lookahead_is_empty( x264_t *h );
+#define x264_lookahead_put_frame x264_template(lookahead_put_frame)
 void x264_lookahead_put_frame( x264_t *h, x264_frame_t *frame );
+#define x264_lookahead_get_frames x264_template(lookahead_get_frames)
 void x264_lookahead_get_frames( x264_t *h );
+#define x264_lookahead_delete x264_template(lookahead_delete)
 void x264_lookahead_delete( x264_t *h );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/encoder/api.c x264-0.158.2988+git-20191101.7817004/encoder/api.c
--- x264-0.152.2854+gite9a5903/encoder/api.c	1970-01-01 00:00:00.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/encoder/api.c	2019-11-09 05:16:29.000000000 +0000
@@ -0,0 +1,194 @@
+/*****************************************************************************
+ * api.c: bit depth independent interface
+ *****************************************************************************
+ * Copyright (C) 2003-2019 x264 project
+ *
+ * Authors: Vittorio Giovara <vittorio.giovara@gmail.com>
+ *          Luca Barbato <lu_zero@gentoo.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#include "common/base.h"
+
+/****************************************************************************
+ * global symbols
+ ****************************************************************************/
+const int x264_chroma_format = X264_CHROMA_FORMAT;
+
+x264_t *x264_8_encoder_open( x264_param_t * );
+void x264_8_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal );
+int  x264_8_encoder_reconfig( x264_t *, x264_param_t * );
+void x264_8_encoder_parameters( x264_t *, x264_param_t * );
+int  x264_8_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal );
+int  x264_8_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out );
+void x264_8_encoder_close( x264_t * );
+int  x264_8_encoder_delayed_frames( x264_t * );
+int  x264_8_encoder_maximum_delayed_frames( x264_t * );
+void x264_8_encoder_intra_refresh( x264_t * );
+int  x264_8_encoder_invalidate_reference( x264_t *, int64_t pts );
+
+x264_t *x264_10_encoder_open( x264_param_t * );
+void x264_10_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal );
+int  x264_10_encoder_reconfig( x264_t *, x264_param_t * );
+void x264_10_encoder_parameters( x264_t *, x264_param_t * );
+int  x264_10_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal );
+int  x264_10_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out );
+void x264_10_encoder_close( x264_t * );
+int  x264_10_encoder_delayed_frames( x264_t * );
+int  x264_10_encoder_maximum_delayed_frames( x264_t * );
+void x264_10_encoder_intra_refresh( x264_t * );
+int  x264_10_encoder_invalidate_reference( x264_t *, int64_t pts );
+
+typedef struct x264_api_t
+{
+    /* Internal reference to x264_t data */
+    x264_t *x264;
+
+    /* API entry points */
+    void (*nal_encode)( x264_t *h, uint8_t *dst, x264_nal_t *nal );
+    int  (*encoder_reconfig)( x264_t *, x264_param_t * );
+    void (*encoder_parameters)( x264_t *, x264_param_t * );
+    int  (*encoder_headers)( x264_t *, x264_nal_t **pp_nal, int *pi_nal );
+    int  (*encoder_encode)( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out );
+    void (*encoder_close)( x264_t * );
+    int  (*encoder_delayed_frames)( x264_t * );
+    int  (*encoder_maximum_delayed_frames)( x264_t * );
+    void (*encoder_intra_refresh)( x264_t * );
+    int  (*encoder_invalidate_reference)( x264_t *, int64_t pts );
+} x264_api_t;
+
+REALIGN_STACK x264_t *x264_encoder_open( x264_param_t *param )
+{
+    x264_api_t *api = calloc( 1, sizeof( x264_api_t ) );
+    if( !api )
+        return NULL;
+
+    if( HAVE_BITDEPTH8 && param->i_bitdepth == 8 )
+    {
+        api->nal_encode = x264_8_nal_encode;
+        api->encoder_reconfig = x264_8_encoder_reconfig;
+        api->encoder_parameters = x264_8_encoder_parameters;
+        api->encoder_headers = x264_8_encoder_headers;
+        api->encoder_encode = x264_8_encoder_encode;
+        api->encoder_close = x264_8_encoder_close;
+        api->encoder_delayed_frames = x264_8_encoder_delayed_frames;
+        api->encoder_maximum_delayed_frames = x264_8_encoder_maximum_delayed_frames;
+        api->encoder_intra_refresh = x264_8_encoder_intra_refresh;
+        api->encoder_invalidate_reference = x264_8_encoder_invalidate_reference;
+
+        api->x264 = x264_8_encoder_open( param );
+    }
+    else if( HAVE_BITDEPTH10 && param->i_bitdepth == 10 )
+    {
+        api->nal_encode = x264_10_nal_encode;
+        api->encoder_reconfig = x264_10_encoder_reconfig;
+        api->encoder_parameters = x264_10_encoder_parameters;
+        api->encoder_headers = x264_10_encoder_headers;
+        api->encoder_encode = x264_10_encoder_encode;
+        api->encoder_close = x264_10_encoder_close;
+        api->encoder_delayed_frames = x264_10_encoder_delayed_frames;
+        api->encoder_maximum_delayed_frames = x264_10_encoder_maximum_delayed_frames;
+        api->encoder_intra_refresh = x264_10_encoder_intra_refresh;
+        api->encoder_invalidate_reference = x264_10_encoder_invalidate_reference;
+
+        api->x264 = x264_10_encoder_open( param );
+    }
+    else
+        x264_log_internal( X264_LOG_ERROR, "not compiled with %d bit depth support\n", param->i_bitdepth );
+
+    if( !api->x264 )
+    {
+        free( api );
+        return NULL;
+    }
+
+    /* x264_t is opaque */
+    return (x264_t *)api;
+}
+
+REALIGN_STACK void x264_encoder_close( x264_t *h )
+{
+    x264_api_t *api = (x264_api_t *)h;
+
+    api->encoder_close( api->x264 );
+    free( api );
+}
+
+REALIGN_STACK void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal )
+{
+    x264_api_t *api = (x264_api_t *)h;
+
+    api->nal_encode( api->x264, dst, nal );
+}
+
+REALIGN_STACK int x264_encoder_reconfig( x264_t *h, x264_param_t *param)
+{
+    x264_api_t *api = (x264_api_t *)h;
+
+    return api->encoder_reconfig( api->x264, param );
+}
+
+REALIGN_STACK void x264_encoder_parameters( x264_t *h, x264_param_t *param )
+{
+    x264_api_t *api = (x264_api_t *)h;
+
+    api->encoder_parameters( api->x264, param );
+}
+
+REALIGN_STACK int x264_encoder_headers( x264_t *h, x264_nal_t **pp_nal, int *pi_nal )
+{
+    x264_api_t *api = (x264_api_t *)h;
+
+    return api->encoder_headers( api->x264, pp_nal, pi_nal );
+}
+
+REALIGN_STACK int x264_encoder_encode( x264_t *h, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out )
+{
+    x264_api_t *api = (x264_api_t *)h;
+
+    return api->encoder_encode( api->x264, pp_nal, pi_nal, pic_in, pic_out );
+}
+
+REALIGN_STACK int x264_encoder_delayed_frames( x264_t *h )
+{
+    x264_api_t *api = (x264_api_t *)h;
+
+    return api->encoder_delayed_frames( api->x264 );
+}
+
+REALIGN_STACK int x264_encoder_maximum_delayed_frames( x264_t *h )
+{
+    x264_api_t *api = (x264_api_t *)h;
+
+    return api->encoder_maximum_delayed_frames( api->x264 );
+}
+
+REALIGN_STACK void x264_encoder_intra_refresh( x264_t *h )
+{
+    x264_api_t *api = (x264_api_t *)h;
+
+    api->encoder_intra_refresh( api->x264 );
+}
+
+REALIGN_STACK int x264_encoder_invalidate_reference( x264_t *h, int64_t pts )
+{
+    x264_api_t *api = (x264_api_t *)h;
+
+    return api->encoder_invalidate_reference( api->x264, pts );
+}
diff -Nru x264-0.152.2854+gite9a5903/encoder/cabac.c x264-0.158.2988+git-20191101.7817004/encoder/cabac.c
--- x264-0.152.2854+gite9a5903/encoder/cabac.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/encoder/cabac.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cabac.c: cabac bitstream writing
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -32,8 +32,8 @@
 #define RDO_SKIP_BS 0
 #endif
 
-static inline void x264_cabac_mb_type_intra( x264_t *h, x264_cabac_t *cb, int i_mb_type,
-                    int ctx0, int ctx1, int ctx2, int ctx3, int ctx4, int ctx5 )
+static inline void cabac_mb_type_intra( x264_t *h, x264_cabac_t *cb, int i_mb_type,
+                                        int ctx0, int ctx1, int ctx2, int ctx3, int ctx4, int ctx5 )
 {
     if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
     {
@@ -67,7 +67,7 @@
 }
 
 #if !RDO_SKIP_BS
-static void x264_cabac_field_decoding_flag( x264_t *h, x264_cabac_t *cb )
+static void cabac_field_decoding_flag( x264_t *h, x264_cabac_t *cb )
 {
     int ctx = 0;
     ctx += h->mb.field_decoding_flag & !!h->mb.i_mb_x;
@@ -80,7 +80,7 @@
 }
 #endif
 
-static void x264_cabac_intra4x4_pred_mode( x264_cabac_t *cb, int i_pred, int i_mode )
+static void cabac_intra4x4_pred_mode( x264_cabac_t *cb, int i_pred, int i_mode )
 {
     if( i_pred == i_mode )
         x264_cabac_encode_decision( cb, 68, 1 );
@@ -95,7 +95,7 @@
     }
 }
 
-static void x264_cabac_intra_chroma_pred_mode( x264_t *h, x264_cabac_t *cb )
+static void cabac_intra_chroma_pred_mode( x264_t *h, x264_cabac_t *cb )
 {
     int i_mode = x264_mb_chroma_pred_mode_fix[h->mb.i_chroma_pred_mode];
     int ctx = 0;
@@ -115,7 +115,7 @@
     }
 }
 
-static void x264_cabac_cbp_luma( x264_t *h, x264_cabac_t *cb )
+static void cabac_cbp_luma( x264_t *h, x264_cabac_t *cb )
 {
     int cbp = h->mb.i_cbp_luma;
     int cbp_l = h->mb.cache.i_cbp_left;
@@ -126,7 +126,7 @@
     x264_cabac_encode_decision_noup( cb, 76 - ((cbp   >> 2) & 1) - ((cbp   >> 0) & 2), (cbp >> 3) & 1 );
 }
 
-static void x264_cabac_cbp_chroma( x264_t *h, x264_cabac_t *cb )
+static void cabac_cbp_chroma( x264_t *h, x264_cabac_t *cb )
 {
     int cbp_a = h->mb.cache.i_cbp_left & 0x30;
     int cbp_b = h->mb.cache.i_cbp_top  & 0x30;
@@ -147,7 +147,7 @@
     }
 }
 
-static void x264_cabac_qp_delta( x264_t *h, x264_cabac_t *cb )
+static void cabac_qp_delta( x264_t *h, x264_cabac_t *cb )
 {
     int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
     int ctx;
@@ -195,7 +195,7 @@
 }
 #endif
 
-static inline void x264_cabac_subpartition_p( x264_cabac_t *cb, int i_sub )
+static inline void cabac_subpartition_p( x264_cabac_t *cb, int i_sub )
 {
     if( i_sub == D_L0_8x8 )
     {
@@ -212,7 +212,7 @@
     }
 }
 
-static ALWAYS_INLINE void x264_cabac_subpartition_b( x264_cabac_t *cb, int i_sub )
+static ALWAYS_INLINE void cabac_subpartition_b( x264_cabac_t *cb, int i_sub )
 {
     if( i_sub == D_DIRECT_8x8 )
     {
@@ -232,13 +232,13 @@
     x264_cabac_encode_decision( cb, 39, i_sub == D_L1_8x8 );
 }
 
-static ALWAYS_INLINE void x264_cabac_transform_size( x264_t *h, x264_cabac_t *cb )
+static ALWAYS_INLINE void cabac_transform_size( x264_t *h, x264_cabac_t *cb )
 {
     int ctx = 399 + h->mb.cache.i_neighbour_transform_size;
     x264_cabac_encode_decision_noup( cb, ctx, h->mb.b_transform_8x8 );
 }
 
-static ALWAYS_INLINE void x264_cabac_ref_internal( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int bframe )
+static ALWAYS_INLINE void cabac_ref_internal( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int bframe )
 {
     const int i8 = x264_scan8[idx];
     const int i_refa = h->mb.cache.ref[i_list][i8 - 1];
@@ -258,16 +258,16 @@
     x264_cabac_encode_decision( cb, 54 + ctx, 0 );
 }
 
-static NOINLINE void x264_cabac_ref_p( x264_t *h, x264_cabac_t *cb, int idx )
+static NOINLINE void cabac_ref_p( x264_t *h, x264_cabac_t *cb, int idx )
 {
-    x264_cabac_ref_internal( h, cb, 0, idx, 0 );
+    cabac_ref_internal( h, cb, 0, idx, 0 );
 }
-static NOINLINE void x264_cabac_ref_b( x264_t *h, x264_cabac_t *cb, int i_list, int idx )
+static NOINLINE void cabac_ref_b( x264_t *h, x264_cabac_t *cb, int i_list, int idx )
 {
-    x264_cabac_ref_internal( h, cb, i_list, idx, 1 );
+    cabac_ref_internal( h, cb, i_list, idx, 1 );
 }
 
-static ALWAYS_INLINE int x264_cabac_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd, int ctx )
+static ALWAYS_INLINE int cabac_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd, int ctx )
 {
     int ctxbase = l ? 47 : 40;
 
@@ -326,7 +326,7 @@
     return X264_MIN( i_abs, 66 );
 }
 
-static NOINLINE uint16_t x264_cabac_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width )
+static NOINLINE uint16_t cabac_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width )
 {
     ALIGNED_4( int16_t mvp[2] );
     int mdx, mdy;
@@ -339,46 +339,46 @@
                                        h->mb.cache.mvd[i_list][x264_scan8[idx] - 8]);
 
     /* encode */
-    mdx = x264_cabac_mvd_cpn( h, cb, i_list, idx, 0, mdx, amvd&0xFF );
-    mdy = x264_cabac_mvd_cpn( h, cb, i_list, idx, 1, mdy, amvd>>8 );
+    mdx = cabac_mvd_cpn( h, cb, i_list, idx, 0, mdx, amvd&0xFF );
+    mdy = cabac_mvd_cpn( h, cb, i_list, idx, 1, mdy, amvd>>8 );
 
     return pack8to16(mdx,mdy);
 }
 
-#define x264_cabac_mvd(h,cb,i_list,idx,width,height)\
+#define cabac_mvd(h,cb,i_list,idx,width,height)\
 do\
 {\
-    uint16_t mvd = x264_cabac_mvd(h,cb,i_list,idx,width);\
+    uint16_t mvd = cabac_mvd(h,cb,i_list,idx,width);\
     x264_macroblock_cache_mvd( h, block_idx_x[idx], block_idx_y[idx], width, height, i_list, mvd );\
 } while( 0 )
 
-static inline void x264_cabac_8x8_mvd( x264_t *h, x264_cabac_t *cb, int i )
+static inline void cabac_8x8_mvd( x264_t *h, x264_cabac_t *cb, int i )
 {
     switch( h->mb.i_sub_partition[i] )
     {
         case D_L0_8x8:
-            x264_cabac_mvd( h, cb, 0, 4*i, 2, 2 );
+            cabac_mvd( h, cb, 0, 4*i, 2, 2 );
             break;
         case D_L0_8x4:
-            x264_cabac_mvd( h, cb, 0, 4*i+0, 2, 1 );
-            x264_cabac_mvd( h, cb, 0, 4*i+2, 2, 1 );
+            cabac_mvd( h, cb, 0, 4*i+0, 2, 1 );
+            cabac_mvd( h, cb, 0, 4*i+2, 2, 1 );
             break;
         case D_L0_4x8:
-            x264_cabac_mvd( h, cb, 0, 4*i+0, 1, 2 );
-            x264_cabac_mvd( h, cb, 0, 4*i+1, 1, 2 );
+            cabac_mvd( h, cb, 0, 4*i+0, 1, 2 );
+            cabac_mvd( h, cb, 0, 4*i+1, 1, 2 );
             break;
         case D_L0_4x4:
-            x264_cabac_mvd( h, cb, 0, 4*i+0, 1, 1 );
-            x264_cabac_mvd( h, cb, 0, 4*i+1, 1, 1 );
-            x264_cabac_mvd( h, cb, 0, 4*i+2, 1, 1 );
-            x264_cabac_mvd( h, cb, 0, 4*i+3, 1, 1 );
+            cabac_mvd( h, cb, 0, 4*i+0, 1, 1 );
+            cabac_mvd( h, cb, 0, 4*i+1, 1, 1 );
+            cabac_mvd( h, cb, 0, 4*i+2, 1, 1 );
+            cabac_mvd( h, cb, 0, 4*i+3, 1, 1 );
             break;
         default:
             assert(0);
     }
 }
 
-static ALWAYS_INLINE void x264_cabac_mb_header_i( x264_t *h, x264_cabac_t *cb, int i_mb_type, int slice_type, int chroma )
+static ALWAYS_INLINE void cabac_mb_header_i( x264_t *h, x264_cabac_t *cb, int i_mb_type, int slice_type, int chroma )
 {
     if( slice_type == SLICE_TYPE_I )
     {
@@ -388,7 +388,7 @@
         if( (h->mb.i_neighbour & MB_TOP) && h->mb.i_mb_type_top != I_4x4 )
             ctx++;
 
-        x264_cabac_mb_type_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 );
+        cabac_mb_type_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 );
     }
     else if( slice_type == SLICE_TYPE_P )
     {
@@ -396,7 +396,7 @@
         x264_cabac_encode_decision_noup( cb, 14, 1 );
 
         /* suffix */
-        x264_cabac_mb_type_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 );
+        cabac_mb_type_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 );
     }
     else if( slice_type == SLICE_TYPE_B )
     {
@@ -408,7 +408,7 @@
         x264_cabac_encode_decision( cb, 27+5,   1 );
 
         /* suffix */
-        x264_cabac_mb_type_intra( h, cb, i_mb_type, 32+0, 32+1, 32+2, 32+2, 32+3, 32+3 );
+        cabac_mb_type_intra( h, cb, i_mb_type, 32+0, 32+1, 32+2, 32+2, 32+3, 32+3 );
     }
 
     if( i_mb_type == I_PCM )
@@ -417,22 +417,22 @@
     if( i_mb_type != I_16x16 )
     {
         if( h->pps->b_transform_8x8_mode )
-            x264_cabac_transform_size( h, cb );
+            cabac_transform_size( h, cb );
 
         int di = h->mb.b_transform_8x8 ? 4 : 1;
         for( int i = 0; i < 16; i += di )
         {
             const int i_pred = x264_mb_predict_intra4x4_mode( h, i );
             const int i_mode = x264_mb_pred_mode4x4_fix( h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] );
-            x264_cabac_intra4x4_pred_mode( cb, i_pred, i_mode );
+            cabac_intra4x4_pred_mode( cb, i_pred, i_mode );
         }
     }
 
     if( chroma )
-        x264_cabac_intra_chroma_pred_mode( h, cb );
+        cabac_intra_chroma_pred_mode( h, cb );
 }
 
-static ALWAYS_INLINE void x264_cabac_mb_header_p( x264_t *h, x264_cabac_t *cb, int i_mb_type, int chroma )
+static ALWAYS_INLINE void cabac_mb_header_p( x264_t *h, x264_cabac_t *cb, int i_mb_type, int chroma )
 {
     if( i_mb_type == P_L0 )
     {
@@ -442,8 +442,8 @@
             x264_cabac_encode_decision_noup( cb, 15, 0 );
             x264_cabac_encode_decision_noup( cb, 16, 0 );
             if( h->mb.pic.i_fref[0] > 1 )
-                x264_cabac_ref_p( h, cb, 0 );
-            x264_cabac_mvd( h, cb, 0, 0, 4, 4 );
+                cabac_ref_p( h, cb, 0 );
+            cabac_mvd( h, cb, 0, 0, 4, 4 );
         }
         else if( h->mb.i_partition == D_16x8 )
         {
@@ -451,11 +451,11 @@
             x264_cabac_encode_decision_noup( cb, 17, 1 );
             if( h->mb.pic.i_fref[0] > 1 )
             {
-                x264_cabac_ref_p( h, cb, 0 );
-                x264_cabac_ref_p( h, cb, 8 );
+                cabac_ref_p( h, cb, 0 );
+                cabac_ref_p( h, cb, 8 );
             }
-            x264_cabac_mvd( h, cb, 0, 0, 4, 2 );
-            x264_cabac_mvd( h, cb, 0, 8, 4, 2 );
+            cabac_mvd( h, cb, 0, 0, 4, 2 );
+            cabac_mvd( h, cb, 0, 8, 4, 2 );
         }
         else //if( h->mb.i_partition == D_8x16 )
         {
@@ -463,11 +463,11 @@
             x264_cabac_encode_decision_noup( cb, 17, 0 );
             if( h->mb.pic.i_fref[0] > 1 )
             {
-                x264_cabac_ref_p( h, cb, 0 );
-                x264_cabac_ref_p( h, cb, 4 );
+                cabac_ref_p( h, cb, 0 );
+                cabac_ref_p( h, cb, 4 );
             }
-            x264_cabac_mvd( h, cb, 0, 0, 2, 4 );
-            x264_cabac_mvd( h, cb, 0, 4, 2, 4 );
+            cabac_mvd( h, cb, 0, 0, 2, 4 );
+            cabac_mvd( h, cb, 0, 4, 2, 4 );
         }
     }
     else if( i_mb_type == P_8x8 )
@@ -478,25 +478,25 @@
 
         /* sub mb type */
         for( int i = 0; i < 4; i++ )
-            x264_cabac_subpartition_p( cb, h->mb.i_sub_partition[i] );
+            cabac_subpartition_p( cb, h->mb.i_sub_partition[i] );
 
         /* ref 0 */
         if( h->mb.pic.i_fref[0] > 1 )
         {
-            x264_cabac_ref_p( h, cb,  0 );
-            x264_cabac_ref_p( h, cb,  4 );
-            x264_cabac_ref_p( h, cb,  8 );
-            x264_cabac_ref_p( h, cb, 12 );
+            cabac_ref_p( h, cb,  0 );
+            cabac_ref_p( h, cb,  4 );
+            cabac_ref_p( h, cb,  8 );
+            cabac_ref_p( h, cb, 12 );
         }
 
         for( int i = 0; i < 4; i++ )
-            x264_cabac_8x8_mvd( h, cb, i );
+            cabac_8x8_mvd( h, cb, i );
     }
     else /* intra */
-        x264_cabac_mb_header_i( h, cb, i_mb_type, SLICE_TYPE_P, chroma );
+        cabac_mb_header_i( h, cb, i_mb_type, SLICE_TYPE_P, chroma );
 }
 
-static ALWAYS_INLINE void x264_cabac_mb_header_b( x264_t *h, x264_cabac_t *cb, int i_mb_type, int chroma )
+static ALWAYS_INLINE void cabac_mb_header_b( x264_t *h, x264_cabac_t *cb, int i_mb_type, int chroma )
 {
     int ctx = 0;
     if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left[0] != B_SKIP && h->mb.i_mb_type_left[0] != B_DIRECT )
@@ -521,26 +521,26 @@
 
         /* sub mb type */
         for( int i = 0; i < 4; i++ )
-            x264_cabac_subpartition_b( cb, h->mb.i_sub_partition[i] );
+            cabac_subpartition_b( cb, h->mb.i_sub_partition[i] );
 
         /* ref */
         if( h->mb.pic.i_fref[0] > 1 )
             for( int i = 0; i < 4; i++ )
                 if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
-                    x264_cabac_ref_b( h, cb, 0, 4*i );
+                    cabac_ref_b( h, cb, 0, 4*i );
 
         if( h->mb.pic.i_fref[1] > 1 )
             for( int i = 0; i < 4; i++ )
                 if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
-                    x264_cabac_ref_b( h, cb, 1, 4*i );
+                    cabac_ref_b( h, cb, 1, 4*i );
 
         for( int i = 0; i < 4; i++ )
             if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
-                x264_cabac_mvd( h, cb, 0, 4*i, 2, 2 );
+                cabac_mvd( h, cb, 0, 4*i, 2, 2 );
 
         for( int i = 0; i < 4; i++ )
             if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
-                x264_cabac_mvd( h, cb, 1, 4*i, 2, 2 );
+                cabac_mvd( h, cb, 1, 4*i, 2, 2 );
     }
     else if( i_mb_type >= B_L0_L0 && i_mb_type <= B_BI_BI )
     {
@@ -576,40 +576,40 @@
         if( h->mb.pic.i_fref[0] > 1 )
         {
             if( b_list[0][0] )
-                x264_cabac_ref_b( h, cb, 0, 0 );
+                cabac_ref_b( h, cb, 0, 0 );
             if( b_list[0][1] && h->mb.i_partition != D_16x16 )
-                x264_cabac_ref_b( h, cb, 0, 8 >> (h->mb.i_partition == D_8x16) );
+                cabac_ref_b( h, cb, 0, 8 >> (h->mb.i_partition == D_8x16) );
         }
         if( h->mb.pic.i_fref[1] > 1 )
         {
             if( b_list[1][0] )
-                x264_cabac_ref_b( h, cb, 1, 0 );
+                cabac_ref_b( h, cb, 1, 0 );
             if( b_list[1][1] && h->mb.i_partition != D_16x16 )
-                x264_cabac_ref_b( h, cb, 1, 8 >> (h->mb.i_partition == D_8x16) );
+                cabac_ref_b( h, cb, 1, 8 >> (h->mb.i_partition == D_8x16) );
         }
         for( int i_list = 0; i_list < 2; i_list++ )
         {
             if( h->mb.i_partition == D_16x16 )
             {
-                if( b_list[i_list][0] ) x264_cabac_mvd( h, cb, i_list, 0, 4, 4 );
+                if( b_list[i_list][0] ) cabac_mvd( h, cb, i_list, 0, 4, 4 );
             }
             else if( h->mb.i_partition == D_16x8 )
             {
-                if( b_list[i_list][0] ) x264_cabac_mvd( h, cb, i_list, 0, 4, 2 );
-                if( b_list[i_list][1] ) x264_cabac_mvd( h, cb, i_list, 8, 4, 2 );
+                if( b_list[i_list][0] ) cabac_mvd( h, cb, i_list, 0, 4, 2 );
+                if( b_list[i_list][1] ) cabac_mvd( h, cb, i_list, 8, 4, 2 );
             }
             else //if( h->mb.i_partition == D_8x16 )
             {
-                if( b_list[i_list][0] ) x264_cabac_mvd( h, cb, i_list, 0, 2, 4 );
-                if( b_list[i_list][1] ) x264_cabac_mvd( h, cb, i_list, 4, 2, 4 );
+                if( b_list[i_list][0] ) cabac_mvd( h, cb, i_list, 0, 2, 4 );
+                if( b_list[i_list][1] ) cabac_mvd( h, cb, i_list, 4, 2, 4 );
             }
         }
     }
     else /* intra */
-        x264_cabac_mb_header_i( h, cb, i_mb_type, SLICE_TYPE_B, chroma );
+        cabac_mb_header_i( h, cb, i_mb_type, SLICE_TYPE_B, chroma );
 }
 
-static int ALWAYS_INLINE x264_cabac_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx, int b_intra, int b_dc )
+static ALWAYS_INLINE int cabac_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx, int b_intra, int b_dc )
 {
     static const uint16_t base_ctx[14] = {85,89,93,97,101,1012,460,464,468,1016,472,476,480,1020};
 
@@ -644,53 +644,6 @@
     }
 }
 
-#if !RDO_SKIP_BS
-extern const uint8_t x264_significant_coeff_flag_offset_8x8[2][64];
-extern const uint8_t x264_last_coeff_flag_offset_8x8[63];
-extern const uint8_t x264_coeff_flag_offset_chroma_422_dc[7];
-extern const uint16_t x264_significant_coeff_flag_offset[2][16];
-extern const uint16_t x264_last_coeff_flag_offset[2][16];
-extern const uint16_t x264_coeff_abs_level_m1_offset[16];
-extern const uint8_t x264_count_cat_m1[14];
-#else
-/* Padded to [64] for easier addressing */
-const uint8_t x264_significant_coeff_flag_offset_8x8[2][64] =
-{{
-    0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
-    4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
-    7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
-   12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12
-},{
-    0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
-    6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
-    9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
-    9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14
-}};
-const uint8_t x264_last_coeff_flag_offset_8x8[63] =
-{
-    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
-    5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
-};
-const uint8_t x264_coeff_flag_offset_chroma_422_dc[7] = { 0, 0, 1, 1, 2, 2, 2 }; /* MIN( i/2, 2 ) */
-const uint16_t x264_significant_coeff_flag_offset[2][16] =
-{
-    { 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718, 0, 0 },
-    { 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733, 0, 0 }
-};
-const uint16_t x264_last_coeff_flag_offset[2][16] =
-{
-    { 166+0, 166+15, 166+29, 166+44, 166+47, 417, 572+0, 572+15, 572+29, 690, 616+0, 616+15, 616+29, 748, 0, 0 },
-    { 338+0, 338+15, 338+29, 338+44, 338+47, 451, 864+0, 864+15, 864+29, 699, 908+0, 908+15, 908+29, 757, 0, 0 }
-};
-const uint16_t x264_coeff_abs_level_m1_offset[16] =
-{
-    227+0, 227+10, 227+20, 227+30, 227+39, 426, 952+0, 952+10, 952+20, 708, 982+0, 982+10, 982+20, 766
-};
-const uint8_t x264_count_cat_m1[14] = {15, 14, 15, 3, 14, 63, 15, 14, 15, 63, 15, 14, 15, 63};
-#endif
-
 // node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
 //           4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
 /* map node ctx => cabac ctx for level=1 */
@@ -709,7 +662,7 @@
 };
 
 #if !RDO_SKIP_BS
-static ALWAYS_INLINE void x264_cabac_block_residual_internal( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l, int chroma422dc )
+static ALWAYS_INLINE void cabac_block_residual_internal( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l, int chroma422dc )
 {
     int ctx_sig = x264_significant_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
     int ctx_last = x264_last_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
@@ -796,10 +749,10 @@
 
 void x264_cabac_block_residual_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
 {
-    x264_cabac_block_residual_internal( h, cb, ctx_block_cat, l, 0 );
+    cabac_block_residual_internal( h, cb, ctx_block_cat, l, 0 );
 }
 
-static void ALWAYS_INLINE x264_cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
+static ALWAYS_INLINE void cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
 {
 #if ARCH_X86_64 && HAVE_MMX && !defined( __MACH__ )
     h->bsf.cabac_block_residual_internal( l, MB_INTERLACED, ctx_block_cat, cb );
@@ -807,19 +760,19 @@
     x264_cabac_block_residual_c( h, cb, ctx_block_cat, l );
 #endif
 }
-static void x264_cabac_block_residual_422_dc( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
+static void cabac_block_residual_422_dc( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
 {
     /* Template a version specifically for chroma 4:2:2 DC in order to avoid
      * slowing down everything else due to the added complexity. */
-    x264_cabac_block_residual_internal( h, cb, DCT_CHROMA_DC, l, 1 );
+    cabac_block_residual_internal( h, cb, DCT_CHROMA_DC, l, 1 );
 }
-#define x264_cabac_block_residual_8x8( h, cb, cat, l ) x264_cabac_block_residual( h, cb, cat, l )
+#define cabac_block_residual_8x8( h, cb, cat, l ) cabac_block_residual( h, cb, cat, l )
 #else
 
 /* Faster RDO by merging sigmap and level coding. Note that for 8x8dct and chroma 4:2:2 dc this is
  * slightly incorrect because the sigmap is not reversible (contexts are repeated). However, there
  * is nearly no quality penalty for this (~0.001db) and the speed boost (~30%) is worth it. */
-static void ALWAYS_INLINE x264_cabac_block_residual_internal( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l, int b_8x8, int chroma422dc )
+static ALWAYS_INLINE void cabac_block_residual_internal( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l, int b_8x8, int chroma422dc )
 {
     const uint8_t *sig_offset = x264_significant_coeff_flag_offset_8x8[MB_INTERLACED];
     int ctx_sig = x264_significant_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
@@ -906,14 +859,14 @@
 
 void x264_cabac_block_residual_8x8_rd_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
 {
-    x264_cabac_block_residual_internal( h, cb, ctx_block_cat, l, 1, 0 );
+    cabac_block_residual_internal( h, cb, ctx_block_cat, l, 1, 0 );
 }
 void x264_cabac_block_residual_rd_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
 {
-    x264_cabac_block_residual_internal( h, cb, ctx_block_cat, l, 0, 0 );
+    cabac_block_residual_internal( h, cb, ctx_block_cat, l, 0, 0 );
 }
 
-static ALWAYS_INLINE void x264_cabac_block_residual_8x8( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
+static ALWAYS_INLINE void cabac_block_residual_8x8( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
 {
 #if ARCH_X86_64 && HAVE_MMX && !defined( __MACH__ )
     h->bsf.cabac_block_residual_8x8_rd_internal( l, MB_INTERLACED, ctx_block_cat, cb );
@@ -921,7 +874,7 @@
     x264_cabac_block_residual_8x8_rd_c( h, cb, ctx_block_cat, l );
 #endif
 }
-static ALWAYS_INLINE void x264_cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
+static ALWAYS_INLINE void cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
 {
 #if ARCH_X86_64 && HAVE_MMX && !defined( __MACH__ )
     h->bsf.cabac_block_residual_rd_internal( l, MB_INTERLACED, ctx_block_cat, cb );
@@ -930,38 +883,38 @@
 #endif
 }
 
-static void x264_cabac_block_residual_422_dc( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
+static void cabac_block_residual_422_dc( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
 {
-    x264_cabac_block_residual_internal( h, cb, DCT_CHROMA_DC, l, 0, 1 );
+    cabac_block_residual_internal( h, cb, DCT_CHROMA_DC, l, 0, 1 );
 }
 #endif
 
-#define x264_cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, b_dc, name )\
+#define cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, b_dc, name )\
 do\
 {\
-    int ctxidxinc = x264_cabac_cbf_ctxidxinc( h, ctx_block_cat, i_idx, b_intra, b_dc );\
+    int ctxidxinc = cabac_cbf_ctxidxinc( h, ctx_block_cat, i_idx, b_intra, b_dc );\
     if( h->mb.cache.non_zero_count[x264_scan8[i_idx]] )\
     {\
         x264_cabac_encode_decision( cb, ctxidxinc, 1 );\
-        x264_cabac_block_residual##name( h, cb, ctx_block_cat, l );\
+        cabac_block_residual##name( h, cb, ctx_block_cat, l );\
     }\
     else\
         x264_cabac_encode_decision( cb, ctxidxinc, 0 );\
 } while( 0 )
 
-#define x264_cabac_block_residual_dc_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\
-    x264_cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, 1, )
+#define cabac_block_residual_dc_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\
+    cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, 1, )
 
-#define x264_cabac_block_residual_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\
-    x264_cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, 0, )
+#define cabac_block_residual_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\
+    cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, 0, )
 
-#define x264_cabac_block_residual_8x8_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\
-    x264_cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, 0, _8x8 )
+#define cabac_block_residual_8x8_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\
+    cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, 0, _8x8 )
 
-#define x264_cabac_block_residual_422_dc_cbf( h, cb, ch, b_intra )\
-    x264_cabac_block_residual_cbf_internal( h, cb, DCT_CHROMA_DC, CHROMA_DC+(ch), h->dct.chroma_dc[ch], b_intra, 1, _422_dc )
+#define cabac_block_residual_422_dc_cbf( h, cb, ch, b_intra )\
+    cabac_block_residual_cbf_internal( h, cb, DCT_CHROMA_DC, CHROMA_DC+(ch), h->dct.chroma_dc[ch], b_intra, 1, _422_dc )
 
-static ALWAYS_INLINE void x264_macroblock_write_cabac_internal( x264_t *h, x264_cabac_t *cb, int plane_count, int chroma )
+static ALWAYS_INLINE void macroblock_write_cabac_internal( x264_t *h, x264_cabac_t *cb, int plane_count, int chroma )
 {
     const int i_mb_type = h->mb.i_type;
 
@@ -972,16 +925,16 @@
     if( SLICE_MBAFF &&
         (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
     {
-        x264_cabac_field_decoding_flag( h, cb );
+        cabac_field_decoding_flag( h, cb );
     }
 #endif
 
     if( h->sh.i_type == SLICE_TYPE_P )
-        x264_cabac_mb_header_p( h, cb, i_mb_type, chroma );
+        cabac_mb_header_p( h, cb, i_mb_type, chroma );
     else if( h->sh.i_type == SLICE_TYPE_B )
-        x264_cabac_mb_header_b( h, cb, i_mb_type, chroma );
+        cabac_mb_header_b( h, cb, i_mb_type, chroma );
     else //if( h->sh.i_type == SLICE_TYPE_I )
-        x264_cabac_mb_header_i( h, cb, i_mb_type, SLICE_TYPE_I, chroma );
+        cabac_mb_header_i( h, cb, i_mb_type, SLICE_TYPE_I, chroma );
 
 #if !RDO_SKIP_BS
     i_mb_pos_tex = x264_cabac_pos( cb );
@@ -1012,20 +965,20 @@
 
     if( i_mb_type != I_16x16 )
     {
-        x264_cabac_cbp_luma( h, cb );
+        cabac_cbp_luma( h, cb );
         if( chroma )
-            x264_cabac_cbp_chroma( h, cb );
+            cabac_cbp_chroma( h, cb );
     }
 
     if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma )
     {
-        x264_cabac_transform_size( h, cb );
+        cabac_transform_size( h, cb );
     }
 
     if( h->mb.i_cbp_luma || (chroma && h->mb.i_cbp_chroma) || i_mb_type == I_16x16 )
     {
         const int b_intra = IS_INTRA( i_mb_type );
-        x264_cabac_qp_delta( h, cb );
+        cabac_qp_delta( h, cb );
 
         /* write residual */
         if( i_mb_type == I_16x16 )
@@ -1033,12 +986,12 @@
             /* DC Luma */
             for( int p = 0; p < plane_count; p++ )
             {
-                x264_cabac_block_residual_dc_cbf( h, cb, ctx_cat_plane[DCT_LUMA_DC][p], LUMA_DC+p, h->dct.luma16x16_dc[p], 1 );
+                cabac_block_residual_dc_cbf( h, cb, ctx_cat_plane[DCT_LUMA_DC][p], LUMA_DC+p, h->dct.luma16x16_dc[p], 1 );
 
                 /* AC Luma */
                 if( h->mb.i_cbp_luma )
                     for( int i = p*16; i < p*16+16; i++ )
-                        x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_AC][p], i, h->dct.luma4x4[i]+1, 1 );
+                        cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_AC][p], i, h->dct.luma4x4[i]+1, 1 );
             }
         }
         else if( h->mb.b_transform_8x8 )
@@ -1086,14 +1039,14 @@
 
                 for( int p = 0; p < 3; p++ )
                     FOREACH_BIT( i, 0, h->mb.i_cbp_luma )
-                        x264_cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i*4+p*16, h->dct.luma8x8[i+p*4], b_intra );
+                        cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i*4+p*16, h->dct.luma8x8[i+p*4], b_intra );
 
                 MUNGE_8x8_NNZ( RESTORE )
             }
             else
             {
                 FOREACH_BIT( i, 0, h->mb.i_cbp_luma )
-                    x264_cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i] );
+                    cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i] );
             }
         }
         else
@@ -1101,20 +1054,20 @@
             for( int p = 0; p < plane_count; p++ )
                 FOREACH_BIT( i8x8, 0, h->mb.i_cbp_luma )
                     for( int i = 0; i < 4; i++ )
-                        x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i+i8x8*4+p*16, h->dct.luma4x4[i+i8x8*4+p*16], b_intra );
+                        cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i+i8x8*4+p*16, h->dct.luma4x4[i+i8x8*4+p*16], b_intra );
         }
 
         if( chroma && h->mb.i_cbp_chroma ) /* Chroma DC residual present */
         {
             if( CHROMA_FORMAT == CHROMA_422 )
             {
-                x264_cabac_block_residual_422_dc_cbf( h, cb, 0, b_intra );
-                x264_cabac_block_residual_422_dc_cbf( h, cb, 1, b_intra );
+                cabac_block_residual_422_dc_cbf( h, cb, 0, b_intra );
+                cabac_block_residual_422_dc_cbf( h, cb, 1, b_intra );
             }
             else
             {
-                x264_cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+0, h->dct.chroma_dc[0], b_intra );
-                x264_cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+1, h->dct.chroma_dc[1], b_intra );
+                cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+0, h->dct.chroma_dc[0], b_intra );
+                cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+1, h->dct.chroma_dc[1], b_intra );
             }
 
             if( h->mb.i_cbp_chroma == 2 ) /* Chroma AC residual present */
@@ -1122,7 +1075,7 @@
                 int step = 8 << CHROMA_V_SHIFT;
                 for( int i = 16; i < 3*16; i += step )
                     for( int j = i; j < i+4; j++ )
-                        x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, j, h->dct.luma4x4[j]+1, b_intra );
+                        cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, j, h->dct.luma4x4[j]+1, b_intra );
             }
         }
     }
@@ -1135,9 +1088,11 @@
 void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
 {
     if( CHROMA444 )
-        x264_macroblock_write_cabac_internal( h, cb, 3, 0 );
+        macroblock_write_cabac_internal( h, cb, 3, 0 );
+    else if( CHROMA_FORMAT )
+        macroblock_write_cabac_internal( h, cb, 1, 1 );
     else
-        x264_macroblock_write_cabac_internal( h, cb, 1, 1 );
+        macroblock_write_cabac_internal( h, cb, 1, 0 );
 }
 
 #if RDO_SKIP_BS
@@ -1148,7 +1103,7 @@
  * only writes subpartition for p8x8, needed for sub-8x8 mode decision RDO
  * works on all partition sizes except 16x16
  *****************************************************************************/
-static void x264_partition_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_pixel )
+static void partition_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_pixel )
 {
     const int i_mb_type = h->mb.i_type;
     int b_8x16 = h->mb.i_partition == D_8x16;
@@ -1156,22 +1111,22 @@
 
     if( i_mb_type == P_8x8 )
     {
-        x264_cabac_8x8_mvd( h, cb, i8 );
-        x264_cabac_subpartition_p( cb, h->mb.i_sub_partition[i8] );
+        cabac_8x8_mvd( h, cb, i8 );
+        cabac_subpartition_p( cb, h->mb.i_sub_partition[i8] );
     }
     else if( i_mb_type == P_L0 )
-        x264_cabac_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
+        cabac_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
     else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 )
     {
-        if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) x264_cabac_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
-        if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) x264_cabac_mvd( h, cb, 1, 4*i8, 4>>b_8x16, 2<<b_8x16 );
+        if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) cabac_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
+        if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) cabac_mvd( h, cb, 1, 4*i8, 4>>b_8x16, 2<<b_8x16 );
     }
     else //if( i_mb_type == B_8x8 )
     {
         if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
-            x264_cabac_mvd( h, cb, 0, 4*i8, 2, 2 );
+            cabac_mvd( h, cb, 0, 4*i8, 2, 2 );
         if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
-            x264_cabac_mvd( h, cb, 1, 4*i8, 2, 2 );
+            cabac_mvd( h, cb, 1, 4*i8, 2, 2 );
     }
 
     for( int j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
@@ -1182,14 +1137,14 @@
             {
                 if( CHROMA444 )
                     for( int p = 0; p < 3; p++ )
-                        x264_cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i8*4+p*16, h->dct.luma8x8[i8+p*4], 0 );
+                        cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i8*4+p*16, h->dct.luma8x8[i8+p*4], 0 );
                 else
-                    x264_cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i8] );
+                    cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i8] );
             }
             else
                 for( int p = 0; p < plane_count; p++ )
                     for( int i4 = 0; i4 < 4; i4++ )
-                        x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i4+i8*4+p*16, h->dct.luma4x4[i4+i8*4+p*16], 0 );
+                        cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i4+i8*4+p*16, h->dct.luma4x4[i4+i8*4+p*16], 0 );
         }
 
         if( h->mb.i_cbp_chroma )
@@ -1197,15 +1152,15 @@
             if( CHROMA_FORMAT == CHROMA_422 )
             {
                 int offset = (5*i8) & 0x09;
-                x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 16+offset, h->dct.luma4x4[16+offset]+1, 0 );
-                x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 18+offset, h->dct.luma4x4[18+offset]+1, 0 );
-                x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 32+offset, h->dct.luma4x4[32+offset]+1, 0 );
-                x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 34+offset, h->dct.luma4x4[34+offset]+1, 0 );
+                cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 16+offset, h->dct.luma4x4[16+offset]+1, 0 );
+                cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 18+offset, h->dct.luma4x4[18+offset]+1, 0 );
+                cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 32+offset, h->dct.luma4x4[32+offset]+1, 0 );
+                cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 34+offset, h->dct.luma4x4[34+offset]+1, 0 );
             }
             else
             {
-                x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1, 0 );
-                x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 32+i8, h->dct.luma4x4[32+i8]+1, 0 );
+                cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1, 0 );
+                cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 32+i8, h->dct.luma4x4[32+i8]+1, 0 );
             }
         }
 
@@ -1213,63 +1168,63 @@
     }
 }
 
-static void x264_subpartition_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_pixel )
+static void subpartition_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_pixel )
 {
     int b_8x4 = i_pixel == PIXEL_8x4;
     int plane_count = CHROMA444 ? 3 : 1;
     if( i_pixel == PIXEL_4x4 )
-        x264_cabac_mvd( h, cb, 0, i4, 1, 1 );
+        cabac_mvd( h, cb, 0, i4, 1, 1 );
     else
-        x264_cabac_mvd( h, cb, 0, i4, 1+b_8x4, 2-b_8x4 );
+        cabac_mvd( h, cb, 0, i4, 1+b_8x4, 2-b_8x4 );
     for( int p = 0; p < plane_count; p++ )
     {
-        x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], p*16+i4, h->dct.luma4x4[p*16+i4], 0 );
+        cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], p*16+i4, h->dct.luma4x4[p*16+i4], 0 );
         if( i_pixel != PIXEL_4x4 )
-            x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], p*16+i4+2-b_8x4, h->dct.luma4x4[p*16+i4+2-b_8x4], 0 );
+            cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], p*16+i4+2-b_8x4, h->dct.luma4x4[p*16+i4+2-b_8x4], 0 );
     }
 }
 
-static void x264_partition_i8x8_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_mode )
+static void partition_i8x8_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_mode )
 {
     const int i_pred = x264_mb_predict_intra4x4_mode( h, 4*i8 );
     i_mode = x264_mb_pred_mode4x4_fix( i_mode );
-    x264_cabac_intra4x4_pred_mode( cb, i_pred, i_mode );
-    x264_cabac_cbp_luma( h, cb );
+    cabac_intra4x4_pred_mode( cb, i_pred, i_mode );
+    cabac_cbp_luma( h, cb );
     if( h->mb.i_cbp_luma & (1 << i8) )
     {
         if( CHROMA444 )
             for( int p = 0; p < 3; p++ )
-                x264_cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i8*4+p*16, h->dct.luma8x8[i8+p*4], 1 );
+                cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i8*4+p*16, h->dct.luma8x8[i8+p*4], 1 );
         else
-            x264_cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i8] );
+            cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i8] );
     }
 }
 
-static void x264_partition_i4x4_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_mode )
+static void partition_i4x4_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_mode )
 {
     const int i_pred = x264_mb_predict_intra4x4_mode( h, i4 );
     int plane_count = CHROMA444 ? 3 : 1;
     i_mode = x264_mb_pred_mode4x4_fix( i_mode );
-    x264_cabac_intra4x4_pred_mode( cb, i_pred, i_mode );
+    cabac_intra4x4_pred_mode( cb, i_pred, i_mode );
     for( int p = 0; p < plane_count; p++ )
-        x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i4+p*16, h->dct.luma4x4[i4+p*16], 1 );
+        cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i4+p*16, h->dct.luma4x4[i4+p*16], 1 );
 }
 
-static void x264_chroma_size_cabac( x264_t *h, x264_cabac_t *cb )
+static void chroma_size_cabac( x264_t *h, x264_cabac_t *cb )
 {
-    x264_cabac_intra_chroma_pred_mode( h, cb );
-    x264_cabac_cbp_chroma( h, cb );
+    cabac_intra_chroma_pred_mode( h, cb );
+    cabac_cbp_chroma( h, cb );
     if( h->mb.i_cbp_chroma )
     {
         if( CHROMA_FORMAT == CHROMA_422 )
         {
-            x264_cabac_block_residual_422_dc_cbf( h, cb, 0, 1 );
-            x264_cabac_block_residual_422_dc_cbf( h, cb, 1, 1 );
+            cabac_block_residual_422_dc_cbf( h, cb, 0, 1 );
+            cabac_block_residual_422_dc_cbf( h, cb, 1, 1 );
         }
         else
         {
-            x264_cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+0, h->dct.chroma_dc[0], 1 );
-            x264_cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+1, h->dct.chroma_dc[1], 1 );
+            cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+0, h->dct.chroma_dc[0], 1 );
+            cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+1, h->dct.chroma_dc[1], 1 );
         }
 
         if( h->mb.i_cbp_chroma == 2 )
@@ -1277,7 +1232,7 @@
             int step = 8 << CHROMA_V_SHIFT;
             for( int i = 16; i < 3*16; i += step )
                 for( int j = i; j < i+4; j++ )
-                    x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, j, h->dct.luma4x4[j]+1, 1 );
+                    cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, j, h->dct.luma4x4[j]+1, 1 );
         }
     }
 }
diff -Nru x264-0.152.2854+gite9a5903/encoder/cavlc.c x264-0.158.2988+git-20191101.7817004/encoder/cavlc.c
--- x264-0.152.2854+gite9a5903/encoder/cavlc.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/encoder/cavlc.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cavlc.c: cavlc bitstream writing
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -67,7 +67,7 @@
 /****************************************************************************
  * x264_cavlc_block_residual:
  ****************************************************************************/
-static inline int x264_cavlc_block_residual_escape( x264_t *h, int i_suffix_length, int level )
+static inline int cavlc_block_residual_escape( x264_t *h, int i_suffix_length, int level )
 {
     bs_t *s = &h->out.bs;
     static const uint16_t next_suffix[7] = { 0, 3, 6, 12, 24, 48, 0xffff };
@@ -118,7 +118,7 @@
     return i_suffix_length;
 }
 
-static int x264_cavlc_block_residual_internal( x264_t *h, int ctx_block_cat, dctcoef *l, int nC )
+static int cavlc_block_residual_internal( x264_t *h, int ctx_block_cat, dctcoef *l, int nC )
 {
     bs_t *s = &h->out.bs;
     static const uint8_t ctz_index[8] = {3,0,1,0,2,0,1,0};
@@ -163,7 +163,7 @@
             i_suffix_length = x264_level_token[i_suffix_length][val_original].i_next;
         }
         else
-            i_suffix_length = x264_cavlc_block_residual_escape( h, i_suffix_length, val-LEVEL_TABLE_SIZE/2 );
+            i_suffix_length = cavlc_block_residual_escape( h, i_suffix_length, val-LEVEL_TABLE_SIZE/2 );
         for( int i = i_trailing+1; i < i_total; i++ )
         {
             val = runlevel.level[i] + LEVEL_TABLE_SIZE/2;
@@ -173,7 +173,7 @@
                 i_suffix_length = x264_level_token[i_suffix_length][val].i_next;
             }
             else
-                i_suffix_length = x264_cavlc_block_residual_escape( h, i_suffix_length, val-LEVEL_TABLE_SIZE/2 );
+                i_suffix_length = cavlc_block_residual_escape( h, i_suffix_length, val-LEVEL_TABLE_SIZE/2 );
         }
     }
 
@@ -205,10 +205,10 @@
     if( !*nnz )\
         bs_write_vlc( &h->out.bs, x264_coeff0_token[nC] );\
     else\
-        *nnz = x264_cavlc_block_residual_internal(h,cat,l,nC);\
+        *nnz = cavlc_block_residual_internal(h,cat,l,nC);\
 }
 
-static void x264_cavlc_qp_delta( x264_t *h )
+static void cavlc_qp_delta( x264_t *h )
 {
     bs_t *s = &h->out.bs;
     int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
@@ -238,7 +238,7 @@
     bs_write_se( s, i_dqp );
 }
 
-static void x264_cavlc_mvd( x264_t *h, int i_list, int idx, int width )
+static void cavlc_mvd( x264_t *h, int i_list, int idx, int width )
 {
     bs_t *s = &h->out.bs;
     ALIGNED_4( int16_t mvp[2] );
@@ -247,31 +247,31 @@
     bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1] );
 }
 
-static inline void x264_cavlc_8x8_mvd( x264_t *h, int i )
+static inline void cavlc_8x8_mvd( x264_t *h, int i )
 {
     switch( h->mb.i_sub_partition[i] )
     {
         case D_L0_8x8:
-            x264_cavlc_mvd( h, 0, 4*i, 2 );
+            cavlc_mvd( h, 0, 4*i, 2 );
             break;
         case D_L0_8x4:
-            x264_cavlc_mvd( h, 0, 4*i+0, 2 );
-            x264_cavlc_mvd( h, 0, 4*i+2, 2 );
+            cavlc_mvd( h, 0, 4*i+0, 2 );
+            cavlc_mvd( h, 0, 4*i+2, 2 );
             break;
         case D_L0_4x8:
-            x264_cavlc_mvd( h, 0, 4*i+0, 1 );
-            x264_cavlc_mvd( h, 0, 4*i+1, 1 );
+            cavlc_mvd( h, 0, 4*i+0, 1 );
+            cavlc_mvd( h, 0, 4*i+1, 1 );
             break;
         case D_L0_4x4:
-            x264_cavlc_mvd( h, 0, 4*i+0, 1 );
-            x264_cavlc_mvd( h, 0, 4*i+1, 1 );
-            x264_cavlc_mvd( h, 0, 4*i+2, 1 );
-            x264_cavlc_mvd( h, 0, 4*i+3, 1 );
+            cavlc_mvd( h, 0, 4*i+0, 1 );
+            cavlc_mvd( h, 0, 4*i+1, 1 );
+            cavlc_mvd( h, 0, 4*i+2, 1 );
+            cavlc_mvd( h, 0, 4*i+3, 1 );
             break;
     }
 }
 
-static ALWAYS_INLINE void x264_cavlc_macroblock_luma_residual( x264_t *h, int plane_count )
+static ALWAYS_INLINE void cavlc_macroblock_luma_residual( x264_t *h, int plane_count )
 {
     if( h->mb.b_transform_8x8 )
     {
@@ -290,9 +290,9 @@
 }
 
 #if RDO_SKIP_BS
-static ALWAYS_INLINE void x264_cavlc_partition_luma_residual( x264_t *h, int i8, int p )
+static ALWAYS_INLINE void cavlc_partition_luma_residual( x264_t *h, int i8, int p )
 {
-    if( h->mb.b_transform_8x8 && h->mb.cache.non_zero_count[x264_scan8[i8*4]] )
+    if( h->mb.b_transform_8x8 && h->mb.cache.non_zero_count[x264_scan8[i8*4+p*16]] )
         h->zigzagf.interleave_8x8_cavlc( h->dct.luma4x4[i8*4+p*16], h->dct.luma8x8[i8+p*4],
                                          &h->mb.cache.non_zero_count[x264_scan8[i8*4+p*16]] );
 
@@ -302,7 +302,7 @@
 }
 #endif
 
-static void x264_cavlc_mb_header_i( x264_t *h, int i_mb_type, int i_mb_i_offset, int chroma )
+static void cavlc_mb_header_i( x264_t *h, int i_mb_type, int i_mb_i_offset, int chroma )
 {
     bs_t *s = &h->out.bs;
     if( i_mb_type == I_16x16 )
@@ -334,7 +334,7 @@
         bs_write_ue( s, x264_mb_chroma_pred_mode_fix[h->mb.i_chroma_pred_mode] );
 }
 
-static ALWAYS_INLINE void x264_cavlc_mb_header_p( x264_t *h, int i_mb_type, int chroma )
+static ALWAYS_INLINE void cavlc_mb_header_p( x264_t *h, int i_mb_type, int chroma )
 {
     bs_t *s = &h->out.bs;
     if( i_mb_type == P_L0 )
@@ -345,7 +345,7 @@
 
             if( h->mb.pic.i_fref[0] > 1 )
                 bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
-            x264_cavlc_mvd( h, 0, 0, 4 );
+            cavlc_mvd( h, 0, 0, 4 );
         }
         else if( h->mb.i_partition == D_16x8 )
         {
@@ -355,8 +355,8 @@
                 bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
                 bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
             }
-            x264_cavlc_mvd( h, 0, 0, 4 );
-            x264_cavlc_mvd( h, 0, 8, 4 );
+            cavlc_mvd( h, 0, 0, 4 );
+            cavlc_mvd( h, 0, 8, 4 );
         }
         else if( h->mb.i_partition == D_8x16 )
         {
@@ -366,8 +366,8 @@
                 bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
                 bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] );
             }
-            x264_cavlc_mvd( h, 0, 0, 2 );
-            x264_cavlc_mvd( h, 0, 4, 2 );
+            cavlc_mvd( h, 0, 0, 2 );
+            cavlc_mvd( h, 0, 4, 2 );
         }
     }
     else if( i_mb_type == P_8x8 )
@@ -402,13 +402,13 @@
         }
 
         for( int i = 0; i < 4; i++ )
-            x264_cavlc_8x8_mvd( h, i );
+            cavlc_8x8_mvd( h, i );
     }
     else //if( IS_INTRA( i_mb_type ) )
-        x264_cavlc_mb_header_i( h, i_mb_type, 5, chroma );
+        cavlc_mb_header_i( h, i_mb_type, 5, chroma );
 }
 
-static ALWAYS_INLINE void x264_cavlc_mb_header_b( x264_t *h, int i_mb_type, int chroma )
+static ALWAYS_INLINE void cavlc_mb_header_b( x264_t *h, int i_mb_type, int chroma )
 {
     bs_t *s = &h->out.bs;
     if( i_mb_type == B_8x8 )
@@ -432,10 +432,10 @@
         /* mvd */
         for( int i = 0; i < 4; i++ )
             if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
-                x264_cavlc_mvd( h, 0, 4*i, 2 );
+                cavlc_mvd( h, 0, 4*i, 2 );
         for( int i = 0; i < 4; i++ )
             if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
-                x264_cavlc_mvd( h, 1, 4*i, 2 );
+                cavlc_mvd( h, 1, 4*i, 2 );
     }
     else if( i_mb_type >= B_L0_L0 && i_mb_type <= B_BI_BI )
     {
@@ -450,8 +450,8 @@
         {
             if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[0]] );
             if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[0]] );
-            if( b_list[0][0] ) x264_cavlc_mvd( h, 0, 0, 4 );
-            if( b_list[1][0] ) x264_cavlc_mvd( h, 1, 0, 4 );
+            if( b_list[0][0] ) cavlc_mvd( h, 0, 0, 4 );
+            if( b_list[1][0] ) cavlc_mvd( h, 1, 0, 4 );
         }
         else
         {
@@ -461,24 +461,24 @@
             if( i_ref1_max && b_list[1][1] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[12]] );
             if( h->mb.i_partition == D_16x8 )
             {
-                if( b_list[0][0] ) x264_cavlc_mvd( h, 0, 0, 4 );
-                if( b_list[0][1] ) x264_cavlc_mvd( h, 0, 8, 4 );
-                if( b_list[1][0] ) x264_cavlc_mvd( h, 1, 0, 4 );
-                if( b_list[1][1] ) x264_cavlc_mvd( h, 1, 8, 4 );
+                if( b_list[0][0] ) cavlc_mvd( h, 0, 0, 4 );
+                if( b_list[0][1] ) cavlc_mvd( h, 0, 8, 4 );
+                if( b_list[1][0] ) cavlc_mvd( h, 1, 0, 4 );
+                if( b_list[1][1] ) cavlc_mvd( h, 1, 8, 4 );
             }
             else //if( h->mb.i_partition == D_8x16 )
             {
-                if( b_list[0][0] ) x264_cavlc_mvd( h, 0, 0, 2 );
-                if( b_list[0][1] ) x264_cavlc_mvd( h, 0, 4, 2 );
-                if( b_list[1][0] ) x264_cavlc_mvd( h, 1, 0, 2 );
-                if( b_list[1][1] ) x264_cavlc_mvd( h, 1, 4, 2 );
+                if( b_list[0][0] ) cavlc_mvd( h, 0, 0, 2 );
+                if( b_list[0][1] ) cavlc_mvd( h, 0, 4, 2 );
+                if( b_list[1][0] ) cavlc_mvd( h, 1, 0, 2 );
+                if( b_list[1][1] ) cavlc_mvd( h, 1, 4, 2 );
             }
         }
     }
     else if( i_mb_type == B_DIRECT )
         bs_write1( s, 1 );
     else //if( IS_INTRA( i_mb_type ) )
-        x264_cavlc_mb_header_i( h, i_mb_type, 23, chroma );
+        cavlc_mb_header_i( h, i_mb_type, 23, chroma );
 }
 
 /*****************************************************************************
@@ -489,7 +489,7 @@
     bs_t *s = &h->out.bs;
     const int i_mb_type = h->mb.i_type;
     int plane_count = CHROMA444 ? 3 : 1;
-    int chroma = !CHROMA444;
+    int chroma = CHROMA_FORMAT == CHROMA_420 || CHROMA_FORMAT == CHROMA_422;
 
 #if RDO_SKIP_BS
     s->i_bits_encoded = 0;
@@ -536,11 +536,11 @@
 #endif
 
     if( h->sh.i_type == SLICE_TYPE_P )
-        x264_cavlc_mb_header_p( h, i_mb_type, chroma );
+        cavlc_mb_header_p( h, i_mb_type, chroma );
     else if( h->sh.i_type == SLICE_TYPE_B )
-        x264_cavlc_mb_header_b( h, i_mb_type, chroma );
+        cavlc_mb_header_b( h, i_mb_type, chroma );
     else //if( h->sh.i_type == SLICE_TYPE_I )
-        x264_cavlc_mb_header_i( h, i_mb_type, 0, chroma );
+        cavlc_mb_header_i( h, i_mb_type, 0, chroma );
 
 #if !RDO_SKIP_BS
     i_mb_pos_tex = bs_pos( s );
@@ -557,7 +557,7 @@
 
     if( i_mb_type == I_16x16 )
     {
-        x264_cavlc_qp_delta( h );
+        cavlc_qp_delta( h );
 
         /* DC Luma */
         for( int p = 0; p < plane_count; p++ )
@@ -572,8 +572,8 @@
     }
     else if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma )
     {
-        x264_cavlc_qp_delta( h );
-        x264_cavlc_macroblock_luma_residual( h, plane_count );
+        cavlc_qp_delta( h );
+        cavlc_macroblock_luma_residual( h, plane_count );
     }
     if( h->mb.i_cbp_chroma )
     {
@@ -602,7 +602,7 @@
  * only writes subpartition for p8x8, needed for sub-8x8 mode decision RDO
  * works on all partition sizes except 16x16
  *****************************************************************************/
-static int x264_partition_size_cavlc( x264_t *h, int i8, int i_pixel )
+static int partition_size_cavlc( x264_t *h, int i8, int i_pixel )
 {
     bs_t *s = &h->out.bs;
     const int i_mb_type = h->mb.i_type;
@@ -614,28 +614,28 @@
 
     if( i_mb_type == P_8x8 )
     {
-        x264_cavlc_8x8_mvd( h, i8 );
+        cavlc_8x8_mvd( h, i8 );
         bs_write_ue( s, subpartition_p_to_golomb[ h->mb.i_sub_partition[i8] ] );
     }
     else if( i_mb_type == P_L0 )
-        x264_cavlc_mvd( h, 0, 4*i8, 4>>b_8x16 );
+        cavlc_mvd( h, 0, 4*i8, 4>>b_8x16 );
     else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 )
     {
-        if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) x264_cavlc_mvd( h, 0, 4*i8, 4>>b_8x16 );
-        if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) x264_cavlc_mvd( h, 1, 4*i8, 4>>b_8x16 );
+        if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) cavlc_mvd( h, 0, 4*i8, 4>>b_8x16 );
+        if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) cavlc_mvd( h, 1, 4*i8, 4>>b_8x16 );
     }
     else //if( i_mb_type == B_8x8 )
     {
         if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
-            x264_cavlc_mvd( h, 0, 4*i8, 2 );
+            cavlc_mvd( h, 0, 4*i8, 2 );
         if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
-            x264_cavlc_mvd( h, 1, 4*i8, 2 );
+            cavlc_mvd( h, 1, 4*i8, 2 );
     }
 
     for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
     {
         for( int p = 0; p < plane_count; p++ )
-            x264_cavlc_partition_luma_residual( h, i8, p );
+            cavlc_partition_luma_residual( h, i8, p );
         if( h->mb.i_cbp_chroma )
         {
             if( CHROMA_FORMAT == CHROMA_422 )
@@ -658,12 +658,12 @@
     return h->out.bs.i_bits_encoded;
 }
 
-static int x264_subpartition_size_cavlc( x264_t *h, int i4, int i_pixel )
+static int subpartition_size_cavlc( x264_t *h, int i4, int i_pixel )
 {
     int plane_count = CHROMA444 ? 3 : 1;
     int b_8x4 = i_pixel == PIXEL_8x4;
     h->out.bs.i_bits_encoded = 0;
-    x264_cavlc_mvd( h, 0, i4, 1+b_8x4 );
+    cavlc_mvd( h, 0, i4, 1+b_8x4 );
     for( int p = 0; p < plane_count; p++ )
     {
         x264_cavlc_block_residual( h, DCT_LUMA_4x4, p*16+i4, h->dct.luma4x4[p*16+i4] );
@@ -674,7 +674,7 @@
     return h->out.bs.i_bits_encoded;
 }
 
-static int x264_cavlc_intra4x4_pred_size( x264_t *h, int i4, int i_mode )
+static int cavlc_intra4x4_pred_size( x264_t *h, int i4, int i_mode )
 {
     if( x264_mb_predict_intra4x4_mode( h, i4 ) == x264_mb_pred_mode4x4_fix( i_mode ) )
         return 1;
@@ -682,26 +682,26 @@
         return 4;
 }
 
-static int x264_partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode )
+static int partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode )
 {
     int plane_count = CHROMA444 ? 3 : 1;
-    h->out.bs.i_bits_encoded = x264_cavlc_intra4x4_pred_size( h, 4*i8, i_mode );
+    h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, 4*i8, i_mode );
     bs_write_ue( &h->out.bs, cbp_to_golomb[!CHROMA444][1][(h->mb.i_cbp_chroma << 4)|h->mb.i_cbp_luma] );
     for( int p = 0; p < plane_count; p++ )
-        x264_cavlc_partition_luma_residual( h, i8, p );
+        cavlc_partition_luma_residual( h, i8, p );
     return h->out.bs.i_bits_encoded;
 }
 
-static int x264_partition_i4x4_size_cavlc( x264_t *h, int i4, int i_mode )
+static int partition_i4x4_size_cavlc( x264_t *h, int i4, int i_mode )
 {
     int plane_count = CHROMA444 ? 3 : 1;
-    h->out.bs.i_bits_encoded = x264_cavlc_intra4x4_pred_size( h, i4, i_mode );
+    h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, i4, i_mode );
     for( int p = 0; p < plane_count; p++ )
         x264_cavlc_block_residual( h, DCT_LUMA_4x4, p*16+i4, h->dct.luma4x4[p*16+i4] );
     return h->out.bs.i_bits_encoded;
 }
 
-static int x264_chroma_size_cavlc( x264_t *h )
+static int chroma_size_cavlc( x264_t *h )
 {
     h->out.bs.i_bits_encoded = bs_size_ue( x264_mb_chroma_pred_mode_fix[h->mb.i_chroma_pred_mode] );
     if( h->mb.i_cbp_chroma )
diff -Nru x264-0.152.2854+gite9a5903/encoder/encoder.c x264-0.158.2988+git-20191101.7817004/encoder/encoder.c
--- x264-0.152.2854+gite9a5903/encoder/encoder.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/encoder/encoder.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * encoder.c: top-level encoder functions
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -40,16 +40,20 @@
 
 #define bs_write_ue bs_write_ue_big
 
-static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
-                                   x264_nal_t **pp_nal, int *pi_nal,
-                                   x264_picture_t *pic_out );
+// forward declaration needed for template usage
+void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal );
+void x264_macroblock_cache_load_progressive( x264_t *h, int i_mb_x, int i_mb_y );
+
+static int encoder_frame_end( x264_t *h, x264_t *thread_current,
+                              x264_nal_t **pp_nal, int *pi_nal,
+                              x264_picture_t *pic_out );
 
 /****************************************************************************
  *
  ******************************* x264 libs **********************************
  *
  ****************************************************************************/
-static double x264_psnr( double sqe, double size )
+static double calc_psnr( double sqe, double size )
 {
     double mse = sqe / (PIXEL_MAX*PIXEL_MAX * size);
     if( mse <= 0.0000000001 ) /* Max 100dB */
@@ -58,7 +62,7 @@
     return -10.0 * log10( mse );
 }
 
-static double x264_ssim( double ssim )
+static double calc_ssim_db( double ssim )
 {
     double inv_ssim = 1 - ssim;
     if( inv_ssim <= 0.0000000001 ) /* Max 100dB */
@@ -67,7 +71,7 @@
     return -10.0 * log10( inv_ssim );
 }
 
-static int x264_threadpool_wait_all( x264_t *h )
+static int threadpool_wait_all( x264_t *h )
 {
     for( int i = 0; i < h->param.i_threads; i++ )
         if( h->thread[i]->b_thread_active )
@@ -79,7 +83,7 @@
     return 0;
 }
 
-static void x264_frame_dump( x264_t *h )
+static void frame_dump( x264_t *h )
 {
     FILE *f = x264_fopen( h->param.psz_dump_yuv, "r+b" );
     if( !f )
@@ -87,7 +91,7 @@
 
     /* Wait for the threads to finish deblocking */
     if( h->param.b_sliced_threads )
-        x264_threadpool_wait_all( h );
+        threadpool_wait_all( h );
 
     /* Write the frame in display order */
     int frame_size = FRAME_SIZE( h->param.i_height * h->param.i_width * sizeof(pixel) );
@@ -96,7 +100,7 @@
         for( int p = 0; p < (CHROMA444 ? 3 : 1); p++ )
             for( int y = 0; y < h->param.i_height; y++ )
                 fwrite( &h->fdec->plane[p][y*h->fdec->i_stride[p]], sizeof(pixel), h->param.i_width, f );
-        if( !CHROMA444 )
+        if( CHROMA_FORMAT == CHROMA_420 || CHROMA_FORMAT == CHROMA_422 )
         {
             int cw = h->param.i_width>>1;
             int ch = h->param.i_height>>CHROMA_V_SHIFT;
@@ -115,9 +119,9 @@
 }
 
 /* Fill "default" values */
-static void x264_slice_header_init( x264_t *h, x264_slice_header_t *sh,
-                                    x264_sps_t *sps, x264_pps_t *pps,
-                                    int i_idr_pic_id, int i_frame, int i_qp )
+static void slice_header_init( x264_t *h, x264_slice_header_t *sh,
+                               x264_sps_t *sps, x264_pps_t *pps,
+                               int i_idr_pic_id, int i_frame, int i_qp )
 {
     x264_param_t *param = &h->param;
 
@@ -206,7 +210,7 @@
     sh->i_beta_offset = param->i_deblocking_filter_beta << 1;
 }
 
-static void x264_slice_header_write( bs_t *s, x264_slice_header_t *sh, int i_nal_ref_idc )
+static void slice_header_write( bs_t *s, x264_slice_header_t *sh, int i_nal_ref_idc )
 {
     if( sh->b_mbaff )
     {
@@ -289,25 +293,29 @@
     {
         sh->b_weighted_pred = sh->weight[0][0].weightfn || sh->weight[0][1].weightfn || sh->weight[0][2].weightfn;
         /* pred_weight_table() */
-        bs_write_ue( s, sh->weight[0][0].i_denom );
-        bs_write_ue( s, sh->weight[0][1].i_denom );
+        bs_write_ue( s, sh->weight[0][0].i_denom ); /* luma_log2_weight_denom */
+        if( sh->sps->i_chroma_format_idc )
+            bs_write_ue( s, sh->weight[0][1].i_denom ); /* chroma_log2_weight_denom */
         for( int i = 0; i < sh->i_num_ref_idx_l0_active; i++ )
         {
             int luma_weight_l0_flag = !!sh->weight[i][0].weightfn;
-            int chroma_weight_l0_flag = !!sh->weight[i][1].weightfn || !!sh->weight[i][2].weightfn;
             bs_write1( s, luma_weight_l0_flag );
             if( luma_weight_l0_flag )
             {
                 bs_write_se( s, sh->weight[i][0].i_scale );
                 bs_write_se( s, sh->weight[i][0].i_offset );
             }
-            bs_write1( s, chroma_weight_l0_flag );
-            if( chroma_weight_l0_flag )
+            if( sh->sps->i_chroma_format_idc )
             {
-                for( int j = 1; j < 3; j++ )
+                int chroma_weight_l0_flag = sh->weight[i][1].weightfn || sh->weight[i][2].weightfn;
+                bs_write1( s, chroma_weight_l0_flag );
+                if( chroma_weight_l0_flag )
                 {
-                    bs_write_se( s, sh->weight[i][j].i_scale );
-                    bs_write_se( s, sh->weight[i][j].i_offset );
+                    for( int j = 1; j < 3; j++ )
+                    {
+                        bs_write_se( s, sh->weight[i][j].i_scale );
+                        bs_write_se( s, sh->weight[i][j].i_offset );
+                    }
                 }
             }
         }
@@ -357,7 +365,7 @@
 
 /* If we are within a reasonable distance of the end of the memory allocated for the bitstream, */
 /* reallocate, adding an arbitrary amount of space. */
-static int x264_bitstream_check_buffer_internal( x264_t *h, int size, int b_cabac, int i_nal )
+static int bitstream_check_buffer_internal( x264_t *h, int size, int b_cabac, int i_nal )
 {
     if( (b_cabac && (h->cabac.p_end - h->cabac.p < size)) ||
         (h->out.bs.p_end - h->out.bs.p < size) )
@@ -390,20 +398,20 @@
     return 0;
 }
 
-static int x264_bitstream_check_buffer( x264_t *h )
+static int bitstream_check_buffer( x264_t *h )
 {
     int max_row_size = (2500 << SLICE_MBAFF) * h->mb.i_mb_width;
-    return x264_bitstream_check_buffer_internal( h, max_row_size, h->param.b_cabac, h->out.i_nal );
+    return bitstream_check_buffer_internal( h, max_row_size, h->param.b_cabac, h->out.i_nal );
 }
 
-static int x264_bitstream_check_buffer_filler( x264_t *h, int filler )
+static int bitstream_check_buffer_filler( x264_t *h, int filler )
 {
     filler += 32; // add padding for safety
-    return x264_bitstream_check_buffer_internal( h, filler, 0, -1 );
+    return bitstream_check_buffer_internal( h, filler, 0, -1 );
 }
 
 #if HAVE_THREAD
-static void x264_encoder_thread_init( x264_t *h )
+static void encoder_thread_init( x264_t *h )
 {
     if( h->param.i_sync_lookahead )
         x264_lower_thread_priority( 10 );
@@ -418,11 +426,11 @@
  *
  ****************************************************************************/
 
-static int x264_validate_parameters( x264_t *h, int b_open )
+static int validate_parameters( x264_t *h, int b_open )
 {
     if( !h->param.pf_log )
     {
-        x264_log( NULL, X264_LOG_ERROR, "pf_log not set! did you forget to call x264_param_default?\n" );
+        x264_log_internal( X264_LOG_ERROR, "pf_log not set! did you forget to call x264_param_default?\n" );
         return -1;
     }
 
@@ -471,7 +479,12 @@
 
     int i_csp = h->param.i_csp & X264_CSP_MASK;
 #if X264_CHROMA_FORMAT
-    if( CHROMA_FORMAT != CHROMA_420 && i_csp >= X264_CSP_I420 && i_csp < X264_CSP_I422 )
+    if( CHROMA_FORMAT != CHROMA_400 && i_csp == X264_CSP_I400 )
+    {
+        x264_log( h, X264_LOG_ERROR, "not compiled with 4:0:0 support\n" );
+        return -1;
+    }
+    else if( CHROMA_FORMAT != CHROMA_420 && i_csp >= X264_CSP_I420 && i_csp < X264_CSP_I422 )
     {
         x264_log( h, X264_LOG_ERROR, "not compiled with 4:2:0 support\n" );
         return -1;
@@ -489,13 +502,26 @@
 #endif
     if( i_csp <= X264_CSP_NONE || i_csp >= X264_CSP_MAX )
     {
-        x264_log( h, X264_LOG_ERROR, "invalid CSP (only I420/YV12/NV12/NV21/I422/YV16/NV16/YUYV/UYVY/"
+        x264_log( h, X264_LOG_ERROR, "invalid CSP (only I400/I420/YV12/NV12/NV21/I422/YV16/NV16/YUYV/UYVY/"
                                      "I444/YV24/BGR/BGRA/RGB supported)\n" );
         return -1;
     }
 
-    int w_mod = i_csp < X264_CSP_I444 ? 2 : 1;
-    int h_mod = (i_csp < X264_CSP_I422 ? 2 : 1) << PARAM_INTERLACED;
+    int w_mod = 1;
+    int h_mod = 1 << (PARAM_INTERLACED || h->param.b_fake_interlaced);
+    if( i_csp == X264_CSP_I400 )
+    {
+        h->param.analyse.i_chroma_qp_offset = 0;
+        h->param.analyse.b_chroma_me = 0;
+        h->param.vui.i_colmatrix = 2; /* undefined */
+    }
+    else if( i_csp < X264_CSP_I444 )
+    {
+        w_mod = 2;
+        if( i_csp < X264_CSP_I422 )
+            h_mod *= 2;
+    }
+
     if( h->param.i_width % w_mod )
     {
         x264_log( h, X264_LOG_ERROR, "width not divisible by %d (%dx%d)\n",
@@ -791,21 +817,36 @@
         memcpy( h->param.cqm_4ic, avcintra_lut[type][res][i].cqm_4ic, sizeof(h->param.cqm_4ic) );
         memcpy( h->param.cqm_8iy, avcintra_lut[type][res][i].cqm_8iy, sizeof(h->param.cqm_8iy) );
 
-        /* Need exactly 10 slices of equal MB count... why?  $deity knows... */
-        h->param.i_slice_max_mbs = ((h->param.i_width + 15) / 16) * ((h->param.i_height + 15) / 16) / 10;
-        h->param.i_slice_max_size = 0;
-        /* The slice structure only allows a maximum of 2 threads for 1080i/p
-         * and 1 or 5 threads for 720p */
-        if( h->param.b_sliced_threads )
+        /* Sony XAVC flavor much more simple */
+        if( h->param.i_avcintra_flavor == X264_AVCINTRA_FLAVOR_SONY )
         {
-            if( res )
-                h->param.i_threads = X264_MIN( 2, h->param.i_threads );
-            else
+            h->param.i_slice_count = 8;
+            if( h->param.b_sliced_threads )
+                h->param.i_threads = h->param.i_slice_count;
+            /* Sony XAVC unlike AVC-Intra doesn't seem to have a QP floor */
+        }
+        else
+        {
+            /* Need exactly 10 slices of equal MB count... why?  $deity knows... */
+            h->param.i_slice_max_mbs = ((h->param.i_width + 15) / 16) * ((h->param.i_height + 15) / 16) / 10;
+            h->param.i_slice_max_size = 0;
+            /* The slice structure only allows a maximum of 2 threads for 1080i/p
+             * and 1 or 5 threads for 720p */
+            if( h->param.b_sliced_threads )
             {
-                h->param.i_threads = X264_MIN( 5, h->param.i_threads );
-                if( h->param.i_threads < 5 )
-                    h->param.i_threads = 1;
+                if( res )
+                    h->param.i_threads = X264_MIN( 2, h->param.i_threads );
+                else
+                {
+                    h->param.i_threads = X264_MIN( 5, h->param.i_threads );
+                    if( h->param.i_threads < 5 )
+                        h->param.i_threads = 1;
+                }
             }
+
+            /* Official encoder doesn't appear to go under 13
+             * and Avid cannot handle negative QPs */
+            h->param.rc.i_qp_min = X264_MAX( h->param.rc.i_qp_min, QP_BD_OFFSET + 1 );
         }
 
         if( type )
@@ -815,15 +856,11 @@
             h->param.vui.i_sar_width  = 4;
             h->param.vui.i_sar_height = 3;
         }
-
-        /* Official encoder doesn't appear to go under 13
-         * and Avid cannot handle negative QPs */
-        h->param.rc.i_qp_min = X264_MAX( h->param.rc.i_qp_min, QP_BD_OFFSET + 1 );
     }
 
     h->param.rc.f_rf_constant = x264_clip3f( h->param.rc.f_rf_constant, -QP_BD_OFFSET, 51 );
     h->param.rc.f_rf_constant_max = x264_clip3f( h->param.rc.f_rf_constant_max, -QP_BD_OFFSET, 51 );
-    h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX );
+    h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, -1, QP_MAX );
     h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 0, 11 );
     h->param.rc.f_ip_factor = X264_MAX( h->param.rc.f_ip_factor, 0.01f );
     h->param.rc.f_pb_factor = X264_MAX( h->param.rc.f_pb_factor, 0.01f );
@@ -852,19 +889,18 @@
         /* 8x8dct is not useful without RD in CAVLC lossless */
         if( !h->param.b_cabac && h->param.analyse.i_subpel_refine < 6 )
             h->param.analyse.b_transform_8x8 = 0;
-        h->param.analyse.inter &= ~X264_ANALYSE_I8x8;
-        h->param.analyse.intra &= ~X264_ANALYSE_I8x8;
-    }
-    if( i_csp >= X264_CSP_I444 && h->param.b_cabac )
-    {
-        /* Disable 8x8dct during 4:4:4+CABAC encoding for compatibility with libavcodec */
-        h->param.analyse.b_transform_8x8 = 0;
     }
     if( h->param.rc.i_rc_method == X264_RC_CQP )
     {
         float qp_p = h->param.rc.i_qp_constant;
         float qp_i = qp_p - 6*log2f( h->param.rc.f_ip_factor );
         float qp_b = qp_p + 6*log2f( h->param.rc.f_pb_factor );
+        if( qp_p < 0 )
+        {
+            x264_log( h, X264_LOG_ERROR, "qp not specified\n" );
+            return -1;
+        }
+
         h->param.rc.i_qp_min = x264_clip3( (int)(X264_MIN3( qp_p, qp_i, qp_b )), 0, QP_MAX );
         h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, QP_MAX );
         h->param.rc.i_aq_mode = 0;
@@ -1338,6 +1374,9 @@
 
     switch( CHROMA_FORMAT )
     {
+        case CHROMA_400:
+            h->mc.prefetch_fenc = h->mc.prefetch_fenc_400;
+            break;
         case CHROMA_420:
             memcpy( h->predict_chroma, h->predict_8x8c, sizeof(h->predict_chroma) );
             h->mc.prefetch_fenc = h->mc.prefetch_fenc_420;
@@ -1368,7 +1407,7 @@
     }
 }
 
-static void x264_set_aspect_ratio( x264_t *h, x264_param_t *param, int initial )
+static void set_aspect_ratio( x264_t *h, x264_param_t *param, int initial )
 {
     /* VUI */
     if( param->vui.i_sar_width > 0 && param->vui.i_sar_height > 0 )
@@ -1431,7 +1470,7 @@
         goto fail;
     }
 
-    if( x264_validate_parameters( h, 1 ) < 0 )
+    if( validate_parameters( h, 1 ) < 0 )
         goto fail;
 
     if( h->param.psz_cqm_file )
@@ -1461,9 +1500,10 @@
         goto fail;
     }
 
-    x264_set_aspect_ratio( h, &h->param, 1 );
+    set_aspect_ratio( h, &h->param, 1 );
 
     x264_sps_init( h->sps, h->param.i_sps_id, &h->param );
+    x264_sps_init_scaling_list( h->sps, &h->param );
     x264_pps_init( h->pps, h->param.i_sps_id, &h->param, h->sps );
 
     x264_validate_levels( h, 1 );
@@ -1517,6 +1557,7 @@
     h->frames.i_largest_pts = h->frames.i_second_largest_pts = -1;
     h->frames.i_poc_last_open_gop = -1;
 
+    CHECKED_MALLOCZERO( h->cost_table, sizeof(*h->cost_table) );
     CHECKED_MALLOCZERO( h->frames.unused[0], (h->frames.i_delay + 3) * sizeof(x264_frame_t *) );
     /* Allocate room for max refs plus a few extra just in case. */
     CHECKED_MALLOCZERO( h->frames.unused[1], (h->i_thread_frames + X264_REF_MAX + 4) * sizeof(x264_frame_t *) );
@@ -1553,7 +1594,7 @@
     if( h->param.b_cabac )
         x264_cabac_init( h );
     else
-        x264_stack_align( x264_cavlc_init, h );
+        x264_cavlc_init( h );
 
     mbcmp_init( h );
     chroma_dsp_init( h );
@@ -1616,7 +1657,7 @@
     CHECKED_MALLOC( h->reconfig_h, sizeof(x264_t) );
 
     if( h->param.i_threads > 1 &&
-        x264_threadpool_init( &h->threadpool, h->param.i_threads, (void*)x264_encoder_thread_init, h ) )
+        x264_threadpool_init( &h->threadpool, h->param.i_threads, (void*)encoder_thread_init, h ) )
         goto fail;
     if( h->param.i_lookahead_threads > 1 &&
         x264_threadpool_init( &h->lookaheadpool, h->param.i_lookahead_threads, NULL, NULL ) )
@@ -1717,26 +1758,20 @@
     const char *profile = h->sps->i_profile_idc == PROFILE_BASELINE ? "Constrained Baseline" :
                           h->sps->i_profile_idc == PROFILE_MAIN ? "Main" :
                           h->sps->i_profile_idc == PROFILE_HIGH ? "High" :
-                          h->sps->i_profile_idc == PROFILE_HIGH10 ? (h->sps->b_constraint_set3 == 1 ? "High 10 Intra" : "High 10") :
-                          h->sps->i_profile_idc == PROFILE_HIGH422 ? (h->sps->b_constraint_set3 == 1 ? "High 4:2:2 Intra" : "High 4:2:2") :
-                          h->sps->b_constraint_set3 == 1 ? "High 4:4:4 Intra" : "High 4:4:4 Predictive";
+                          h->sps->i_profile_idc == PROFILE_HIGH10 ?
+                              (h->sps->b_constraint_set3 ? "High 10 Intra" : "High 10") :
+                          h->sps->i_profile_idc == PROFILE_HIGH422 ?
+                              (h->sps->b_constraint_set3 ? "High 4:2:2 Intra" : "High 4:2:2") :
+                          h->sps->b_constraint_set3 ? "High 4:4:4 Intra" : "High 4:4:4 Predictive";
     char level[4];
     snprintf( level, sizeof(level), "%d.%d", h->sps->i_level_idc/10, h->sps->i_level_idc%10 );
     if( h->sps->i_level_idc == 9 || ( h->sps->i_level_idc == 11 && h->sps->b_constraint_set3 &&
         (h->sps->i_profile_idc == PROFILE_BASELINE || h->sps->i_profile_idc == PROFILE_MAIN) ) )
         strcpy( level, "1b" );
 
-    if( h->sps->i_profile_idc < PROFILE_HIGH10 )
-    {
-        x264_log( h, X264_LOG_INFO, "profile %s, level %s\n",
-            profile, level );
-    }
-    else
-    {
-        static const char * const subsampling[4] = { "4:0:0", "4:2:0", "4:2:2", "4:4:4" };
-        x264_log( h, X264_LOG_INFO, "profile %s, level %s, %s %d-bit\n",
-            profile, level, subsampling[CHROMA_FORMAT], BIT_DEPTH );
-    }
+    static const char * const subsampling[4] = { "4:0:0", "4:2:0", "4:2:2", "4:4:4" };
+    x264_log( h, X264_LOG_INFO, "profile %s, level %s, %s, %d-bit\n",
+              profile, level, subsampling[CHROMA_FORMAT], BIT_DEPTH );
 
     return h;
 fail:
@@ -1745,10 +1780,10 @@
 }
 
 /****************************************************************************/
-static int x264_encoder_try_reconfig( x264_t *h, x264_param_t *param, int *rc_reconfig )
+static int encoder_try_reconfig( x264_t *h, x264_param_t *param, int *rc_reconfig )
 {
     *rc_reconfig = 0;
-    x264_set_aspect_ratio( h, param, 0 );
+    set_aspect_ratio( h, param, 0 );
 #define COPY(var) h->param.var = param->var
     COPY( i_frame_reference ); // but never uses more refs than initially specified
     COPY( i_bframe_bias );
@@ -1809,13 +1844,13 @@
     COPY( rc.f_rf_constant_max );
 #undef COPY
 
-    return x264_validate_parameters( h, 0 );
+    return validate_parameters( h, 0 );
 }
 
 int x264_encoder_reconfig_apply( x264_t *h, x264_param_t *param )
 {
     int rc_reconfig;
-    int ret = x264_encoder_try_reconfig( h, param, &rc_reconfig );
+    int ret = encoder_try_reconfig( h, param, &rc_reconfig );
 
     mbcmp_init( h );
     if( !ret )
@@ -1842,7 +1877,7 @@
     h->reconfig_h->param = h->param;
 
     int rc_reconfig;
-    int ret = x264_encoder_try_reconfig( h->reconfig_h, param, &rc_reconfig );
+    int ret = encoder_try_reconfig( h->reconfig_h, param, &rc_reconfig );
     if( !ret )
         h->reconfig = 1;
     else
@@ -1860,7 +1895,7 @@
 }
 
 /* internal usage */
-static void x264_nal_start( x264_t *h, int i_type, int i_ref_idc )
+static void nal_start( x264_t *h, int i_type, int i_ref_idc )
 {
     x264_nal_t *nal = &h->out.nal[h->out.i_nal];
 
@@ -1874,7 +1909,7 @@
 }
 
 /* if number of allocated nals is not enough, re-allocate a larger one. */
-static int x264_nal_check_buffer( x264_t *h )
+static int nal_check_buffer( x264_t *h )
 {
     if( h->out.i_nal >= h->out.i_nals_allocated )
     {
@@ -1889,7 +1924,7 @@
     return 0;
 }
 
-static int x264_nal_end( x264_t *h )
+static int nal_end( x264_t *h )
 {
     x264_nal_t *nal = &h->out.nal[h->out.i_nal];
     uint8_t *end = &h->out.p_bitstream[bs_pos( &h->out.bs ) / 8];
@@ -1901,11 +1936,11 @@
         h->param.nalu_process( h, nal, h->fenc->opaque );
     h->out.i_nal++;
 
-    return x264_nal_check_buffer( h );
+    return nal_check_buffer( h );
 }
 
-static int x264_check_encapsulated_buffer( x264_t *h, x264_t *h0, int start,
-                                           int previous_nal_size, int necessary_size )
+static int check_encapsulated_buffer( x264_t *h, x264_t *h0, int start,
+                                      int previous_nal_size, int necessary_size )
 {
     if( h0->nal_buffer_size < necessary_size )
     {
@@ -1928,7 +1963,7 @@
     return 0;
 }
 
-static int x264_encoder_encapsulate_nals( x264_t *h, int start )
+static int encoder_encapsulate_nals( x264_t *h, int start )
 {
     x264_t *h0 = h->thread[0];
     int nal_size = 0, previous_nal_size = 0;
@@ -1950,7 +1985,7 @@
     int necessary_size = previous_nal_size + nal_size * 3/2 + h->out.i_nal * 4 + 4 + 64;
     for( int i = start; i < h->out.i_nal; i++ )
         necessary_size += h->out.nal[i].i_padding;
-    if( x264_check_encapsulated_buffer( h, h0, start, previous_nal_size, necessary_size ) )
+    if( check_encapsulated_buffer( h, h0, start, previous_nal_size, necessary_size ) )
         return -1;
 
     uint8_t *nal_buffer = h0->nal_buffer + previous_nal_size;
@@ -1981,25 +2016,25 @@
     /* Write SEI, SPS and PPS. */
 
     /* generate sequence parameters */
-    x264_nal_start( h, NAL_SPS, NAL_PRIORITY_HIGHEST );
+    nal_start( h, NAL_SPS, NAL_PRIORITY_HIGHEST );
     x264_sps_write( &h->out.bs, h->sps );
-    if( x264_nal_end( h ) )
+    if( nal_end( h ) )
         return -1;
 
     /* generate picture parameters */
-    x264_nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST );
+    nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST );
     x264_pps_write( &h->out.bs, h->sps, h->pps );
-    if( x264_nal_end( h ) )
+    if( nal_end( h ) )
         return -1;
 
     /* identify ourselves */
-    x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
+    nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
     if( x264_sei_version_write( h, &h->out.bs ) )
         return -1;
-    if( x264_nal_end( h ) )
+    if( nal_end( h ) )
         return -1;
 
-    frame_size = x264_encoder_encapsulate_nals( h, 0 );
+    frame_size = encoder_encapsulate_nals( h, 0 );
     if( frame_size < 0 )
         return -1;
 
@@ -2013,7 +2048,7 @@
 
 /* Check to see whether we have chosen a reference list ordering different
  * from the standard's default. */
-static inline void x264_reference_check_reorder( x264_t *h )
+static inline void reference_check_reorder( x264_t *h )
 {
     /* The reorder check doesn't check for missing frames, so just
      * force a reorder if one of the reference list is corrupt. */
@@ -2038,7 +2073,7 @@
 }
 
 /* return -1 on failure, else return the index of the new reference frame */
-static int x264_weighted_reference_duplicate( x264_t *h, int i_ref, const x264_weight_t *w )
+static int weighted_reference_duplicate( x264_t *h, int i_ref, const x264_weight_t *w )
 {
     int i = h->i_ref[0];
     int j = 1;
@@ -2077,7 +2112,7 @@
     return j;
 }
 
-static void x264_weighted_pred_init( x264_t *h )
+static void weighted_pred_init( x264_t *h )
 {
     /* for now no analysis and set all weights to nothing */
     for( int i_ref = 0; i_ref < h->i_ref[0]; i_ref++ )
@@ -2161,7 +2196,7 @@
     h->sh.weight[0][2].i_denom = h->sh.weight[0][1].i_denom;
 }
 
-static inline int x264_reference_distance( x264_t *h, x264_frame_t *frame )
+static inline int reference_distance( x264_t *h, x264_frame_t *frame )
 {
     if( h->param.i_frame_packing == 5 )
         return abs((h->fenc->i_frame&~1) - (frame->i_frame&~1)) +
@@ -2170,7 +2205,7 @@
         return abs(h->fenc->i_frame - frame->i_frame);
 }
 
-static inline void x264_reference_build_list( x264_t *h, int i_poc )
+static inline void reference_build_list( x264_t *h, int i_poc )
 {
     int b_ok;
 
@@ -2227,7 +2262,7 @@
                 if( list ? h->fref[list][i+1]->i_poc < h->fref_nearest[list]->i_poc
                          : h->fref[list][i+1]->i_poc > h->fref_nearest[list]->i_poc )
                     h->fref_nearest[list] = h->fref[list][i+1];
-                if( x264_reference_distance( h, h->fref[list][i] ) > x264_reference_distance( h, h->fref[list][i+1] ) )
+                if( reference_distance( h, h->fref[list][i] ) > reference_distance( h, h->fref[list][i+1] ) )
                 {
                     XCHG( x264_frame_t*, h->fref[list][i], h->fref[list][i+1] );
                     b_ok = 0;
@@ -2237,7 +2272,7 @@
         } while( !b_ok );
     }
 
-    x264_reference_check_reorder( h );
+    reference_check_reorder( h );
 
     h->i_ref[1] = X264_MIN( h->i_ref[1], h->frames.i_max_ref1 );
     h->i_ref[0] = X264_MIN( h->i_ref[0], h->frames.i_max_ref0 );
@@ -2262,7 +2297,7 @@
             {
                 h->fenc->weight[0][0].i_denom = 0;
                 SET_WEIGHT( w[0], 1, 1, 0, -1 );
-                idx = x264_weighted_reference_duplicate( h, 0, w );
+                idx = weighted_reference_duplicate( h, 0, w );
             }
             else
             {
@@ -2270,13 +2305,13 @@
                 {
                     SET_WEIGHT( h->fenc->weight[0][0], 1, 1, 0, h->fenc->weight[0][0].i_offset );
                 }
-                x264_weighted_reference_duplicate( h, 0, x264_weight_none );
+                weighted_reference_duplicate( h, 0, x264_weight_none );
                 if( h->fenc->weight[0][0].i_offset > -128 )
                 {
                     w[0] = h->fenc->weight[0][0];
                     w[0].i_offset--;
                     h->mc.weight_cache( h, &w[0] );
-                    idx = x264_weighted_reference_duplicate( h, 0, w );
+                    idx = weighted_reference_duplicate( h, 0, w );
                 }
             }
         }
@@ -2288,7 +2323,7 @@
     h->mb.pic.i_fref[1] = h->i_ref[1];
 }
 
-static void x264_fdec_filter_row( x264_t *h, int mb_y, int pass )
+static void fdec_filter_row( x264_t *h, int mb_y, int pass )
 {
     /* mb_y is the mb to be encoded next, not the mb to be filtered here */
     int b_hpel = h->fdec->b_kept_as_ref;
@@ -2408,7 +2443,7 @@
     }
 }
 
-static inline int x264_reference_update( x264_t *h )
+static inline int reference_update( x264_t *h )
 {
     if( !h->fdec->b_kept_as_ref )
     {
@@ -2438,7 +2473,7 @@
     return 0;
 }
 
-static inline void x264_reference_reset( x264_t *h )
+static inline void reference_reset( x264_t *h )
 {
     while( h->frames.reference[0] )
         x264_frame_push_unused( h, x264_frame_pop( h->frames.reference ) );
@@ -2446,7 +2481,7 @@
     h->fenc->i_poc = 0;
 }
 
-static inline void x264_reference_hierarchy_reset( x264_t *h )
+static inline void reference_hierarchy_reset( x264_t *h )
 {
     int ref;
     int b_hasdelayframe = 0;
@@ -2483,12 +2518,12 @@
         h->sh.i_mmco_remove_from_end = X264_MAX( ref + 2 - h->frames.i_max_dpb, 0 );
 }
 
-static inline void x264_slice_init( x264_t *h, int i_nal_type, int i_global_qp )
+static inline void slice_init( x264_t *h, int i_nal_type, int i_global_qp )
 {
     /* ------------------------ Create slice header  ----------------------- */
     if( i_nal_type == NAL_SLICE_IDR )
     {
-        x264_slice_header_init( h, &h->sh, h->sps, h->pps, h->i_idr_pic_id, h->i_frame_num, i_global_qp );
+        slice_header_init( h, &h->sh, h->sps, h->pps, h->i_idr_pic_id, h->i_frame_num, i_global_qp );
 
         /* alternate id */
         if( h->param.i_avcintra_class )
@@ -2512,7 +2547,7 @@
     }
     else
     {
-        x264_slice_header_init( h, &h->sh, h->sps, h->pps, -1, h->i_frame_num, i_global_qp );
+        slice_header_init( h, &h->sh, h->sps, h->pps, -1, h->i_frame_num, i_global_qp );
 
         h->sh.i_num_ref_idx_l0_active = h->i_ref[0] <= 0 ? 1 : h->i_ref[0];
         h->sh.i_num_ref_idx_l1_active = h->i_ref[1] <= 0 ? 1 : h->i_ref[1];
@@ -2564,7 +2599,7 @@
     int field_decoding_flag;
 } x264_bs_bak_t;
 
-static ALWAYS_INLINE void x264_bitstream_backup( x264_t *h, x264_bs_bak_t *bak, int i_skip, int full )
+static ALWAYS_INLINE void bitstream_backup( x264_t *h, x264_bs_bak_t *bak, int i_skip, int full )
 {
     if( full )
     {
@@ -2598,7 +2633,7 @@
     }
 }
 
-static ALWAYS_INLINE void x264_bitstream_restore( x264_t *h, x264_bs_bak_t *bak, int *skip, int full )
+static ALWAYS_INLINE void bitstream_restore( x264_t *h, x264_bs_bak_t *bak, int *skip, int full )
 {
     if( full )
     {
@@ -2627,7 +2662,7 @@
     }
 }
 
-static intptr_t x264_slice_write( x264_t *h )
+static intptr_t slice_write( x264_t *h )
 {
     int i_skip;
     int mb_xy, i_mb_x, i_mb_y;
@@ -2654,7 +2689,7 @@
     bs_realign( &h->out.bs );
 
     /* Slice */
-    x264_nal_start( h, h->i_nal_type, h->i_nal_ref_idc );
+    nal_start( h, h->i_nal_type, h->i_nal_ref_idc );
     h->out.nal[h->out.i_nal].i_first_mb = h->sh.i_first_mb;
 
     /* Slice header */
@@ -2666,7 +2701,7 @@
     h->sh.i_qp = SPEC_QP( h->sh.i_qp );
     h->sh.i_qp_delta = h->sh.i_qp - h->pps->i_pic_init_qp;
 
-    x264_slice_header_write( &h->out.bs, &h->sh, h->i_nal_ref_idc );
+    slice_header_write( &h->out.bs, &h->sh, h->i_nal_ref_idc );
     if( h->param.b_cabac )
     {
         /* alignment needed */
@@ -2694,23 +2729,23 @@
 
         if( i_mb_x == 0 )
         {
-            if( x264_bitstream_check_buffer( h ) )
+            if( bitstream_check_buffer( h ) )
                 return -1;
             if( !(i_mb_y & SLICE_MBAFF) && h->param.rc.i_vbv_buffer_size )
-                x264_bitstream_backup( h, &bs_bak[BS_BAK_ROW_VBV], i_skip, 1 );
+                bitstream_backup( h, &bs_bak[BS_BAK_ROW_VBV], i_skip, 1 );
             if( !h->mb.b_reencode_mb )
-                x264_fdec_filter_row( h, i_mb_y, 0 );
+                fdec_filter_row( h, i_mb_y, 0 );
         }
 
         if( back_up_bitstream )
         {
             if( back_up_bitstream_cavlc )
-                x264_bitstream_backup( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], i_skip, 0 );
+                bitstream_backup( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], i_skip, 0 );
             if( slice_max_size && !(i_mb_y & SLICE_MBAFF) )
             {
-                x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], i_skip, 0 );
+                bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], i_skip, 0 );
                 if( (thread_last_mb+1-mb_xy) == h->param.i_slice_min_mbs )
-                    x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], i_skip, 0 );
+                    bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], i_skip, 0 );
             }
         }
 
@@ -2775,7 +2810,7 @@
                     h->mb.i_skip_intra = 0;
                     h->mb.b_skip_mc = 0;
                     h->mb.b_overflow = 0;
-                    x264_bitstream_restore( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], &i_skip, 0 );
+                    bitstream_restore( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], &i_skip, 0 );
                     goto reencode;
                 }
             }
@@ -2815,14 +2850,14 @@
                             slice_max_size = 0;
                             goto cont;
                         }
-                        x264_bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], &i_skip, 0 );
+                        bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], &i_skip, 0 );
                         h->mb.b_reencode_mb = 1;
                         h->sh.i_last_mb = thread_last_mb-h->param.i_slice_min_mbs;
                         break;
                     }
                     if( mb_xy-SLICE_MBAFF*h->mb.i_mb_stride != h->sh.i_first_mb )
                     {
-                        x264_bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], &i_skip, 0 );
+                        bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], &i_skip, 0 );
                         h->mb.b_reencode_mb = 1;
                         if( SLICE_MBAFF )
                         {
@@ -2851,7 +2886,7 @@
 
         if( x264_ratecontrol_mb( h, mb_size ) < 0 )
         {
-            x264_bitstream_restore( h, &bs_bak[BS_BAK_ROW_VBV], &i_skip, 1 );
+            bitstream_restore( h, &bs_bak[BS_BAK_ROW_VBV], &i_skip, 1 );
             h->mb.b_reencode_mb = 1;
             i_mb_x = 0;
             i_mb_y = i_mb_y - SLICE_MBAFF;
@@ -2968,7 +3003,7 @@
         bs_rbsp_trailing( &h->out.bs );
         bs_flush( &h->out.bs );
     }
-    if( x264_nal_end( h ) )
+    if( nal_end( h ) )
         return -1;
 
     if( h->sh.i_last_mb == (h->i_threadslice_end * h->mb.i_mb_width - 1) )
@@ -2977,7 +3012,7 @@
                                   + (h->out.i_nal*NALU_OVERHEAD * 8)
                                   - h->stat.frame.i_tex_bits
                                   - h->stat.frame.i_mv_bits;
-        x264_fdec_filter_row( h, h->i_threadslice_end, 0 );
+        fdec_filter_row( h, h->i_threadslice_end, 0 );
 
         if( h->param.b_sliced_threads )
         {
@@ -2985,13 +3020,13 @@
             x264_threadslice_cond_broadcast( h, 1 );
             /* Do hpel now */
             for( int mb_y = h->i_threadslice_start; mb_y <= h->i_threadslice_end; mb_y++ )
-                x264_fdec_filter_row( h, mb_y, 1 );
+                fdec_filter_row( h, mb_y, 1 );
             x264_threadslice_cond_broadcast( h, 2 );
             /* Do the first row of hpel, now that the previous slice is done */
             if( h->i_thread_idx > 0 )
             {
                 x264_threadslice_cond_wait( h->thread[h->i_thread_idx-1], 2 );
-                x264_fdec_filter_row( h, h->i_threadslice_start + (1 << SLICE_MBAFF), 2 );
+                fdec_filter_row( h, h->i_threadslice_start + (1 << SLICE_MBAFF), 2 );
             }
         }
 
@@ -3007,7 +3042,7 @@
     return 0;
 }
 
-static void x264_thread_sync_context( x264_t *dst, x264_t *src )
+static void thread_sync_context( x264_t *dst, x264_t *src )
 {
     if( dst == src )
         return;
@@ -3028,16 +3063,17 @@
     dst->reconfig = src->reconfig;
 }
 
-static void x264_thread_sync_stat( x264_t *dst, x264_t *src )
+static void thread_sync_stat( x264_t *dst, x264_t *src )
 {
     if( dst != src )
         memcpy( &dst->stat, &src->stat, offsetof(x264_t, stat.frame) - offsetof(x264_t, stat) );
 }
 
-static void *x264_slices_write( x264_t *h )
+static void *slices_write( x264_t *h )
 {
     int i_slice_num = 0;
     int last_thread_mb = h->sh.i_last_mb;
+    int round_bias = h->param.i_avcintra_class ? 0 : h->param.i_slice_count/2;
 
     /* init stats */
     memset( &h->stat.frame, 0, sizeof(h->stat.frame) );
@@ -3072,11 +3108,11 @@
                 int height = h->mb.i_mb_height >> PARAM_INTERLACED;
                 int width = h->mb.i_mb_width << PARAM_INTERLACED;
                 i_slice_num++;
-                h->sh.i_last_mb = (height * i_slice_num + h->param.i_slice_count/2) / h->param.i_slice_count * width - 1;
+                h->sh.i_last_mb = (height * i_slice_num + round_bias) / h->param.i_slice_count * width - 1;
             }
         }
         h->sh.i_last_mb = X264_MIN( h->sh.i_last_mb, last_thread_mb );
-        if( x264_stack_align( x264_slice_write, h ) )
+        if( slice_write( h ) )
             goto fail;
         h->sh.i_first_mb = h->sh.i_last_mb + 1;
         // if i_first_mb is not the last mb in a row then go to the next mb in MBAFF order
@@ -3093,8 +3129,10 @@
     return (void *)-1;
 }
 
-static int x264_threaded_slices_write( x264_t *h )
+static int threaded_slices_write( x264_t *h )
 {
+    int round_bias = h->param.i_avcintra_class ? 0 : h->param.i_slice_count/2;
+
     /* set first/last mb and sync contexts */
     for( int i = 0; i < h->param.i_threads; i++ )
     {
@@ -3105,13 +3143,13 @@
             memcpy( &t->i_frame, &h->i_frame, offsetof(x264_t, rc) - offsetof(x264_t, i_frame) );
         }
         int height = h->mb.i_mb_height >> PARAM_INTERLACED;
-        t->i_threadslice_start = ((height *  i    + h->param.i_slice_count/2) / h->param.i_threads) << PARAM_INTERLACED;
-        t->i_threadslice_end   = ((height * (i+1) + h->param.i_slice_count/2) / h->param.i_threads) << PARAM_INTERLACED;
+        t->i_threadslice_start = ((height *  i    + round_bias) / h->param.i_threads) << PARAM_INTERLACED;
+        t->i_threadslice_end   = ((height * (i+1) + round_bias) / h->param.i_threads) << PARAM_INTERLACED;
         t->sh.i_first_mb = t->i_threadslice_start * h->mb.i_mb_width;
         t->sh.i_last_mb  =   t->i_threadslice_end * h->mb.i_mb_width - 1;
     }
 
-    x264_stack_align( x264_analyse_weight_frame, h, h->mb.i_mb_height*16 + 16 );
+    x264_analyse_weight_frame( h, h->mb.i_mb_height*16 + 16 );
 
     x264_threads_distribute_ratecontrol( h );
 
@@ -3124,7 +3162,7 @@
     }
     /* dispatch */
     for( int i = 0; i < h->param.i_threads; i++ )
-        x264_threadpool_run( h->threadpool, (void*)x264_slices_write, h->thread[i] );
+        x264_threadpool_run( h->threadpool, (void*)slices_write, h->thread[i] );
     /* wait */
     for( int i = 0; i < h->param.i_threads; i++ )
         x264_threadslice_cond_wait( h->thread[i], 1 );
@@ -3138,7 +3176,7 @@
         {
             h->out.nal[h->out.i_nal] = t->out.nal[j];
             h->out.i_nal++;
-            x264_nal_check_buffer( h );
+            nal_check_buffer( h );
         }
         /* All entries in stat.frame are ints except for ssd/ssim. */
         for( int j = 0; j < (offsetof(x264_t,stat.frame.i_ssd) - offsetof(x264_t,stat.frame.i_mv_bits)) / sizeof(int); j++ )
@@ -3215,7 +3253,7 @@
         h->i_thread_phase = (h->i_thread_phase + 1) % h->i_thread_frames;
         thread_current = h->thread[ h->i_thread_phase ];
         thread_oldest  = h->thread[ (h->i_thread_phase + 1) % h->i_thread_frames ];
-        x264_thread_sync_context( thread_current, thread_prev );
+        thread_sync_context( thread_current, thread_prev );
         x264_thread_sync_ratecontrol( thread_current, thread_prev, thread_oldest );
         h = thread_current;
     }
@@ -3289,7 +3327,7 @@
                 return -1;
         }
         else
-            x264_stack_align( x264_adaptive_quant_frame, h, fenc, pic_in->prop.quant_offsets );
+            x264_adaptive_quant_frame( h, fenc, pic_in->prop.quant_offsets );
 
         if( pic_in->prop.quant_offsets_free )
             pic_in->prop.quant_offsets_free( pic_in->prop.quant_offsets );
@@ -3322,7 +3360,7 @@
         x264_lookahead_get_frames( h );
 
     if( !h->frames.current[0] && x264_lookahead_is_empty( h ) )
-        return x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out );
+        return encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out );
 
     /* ------------------- Get frame to be encoded ------------------------- */
     /* 4: get picture to encode */
@@ -3330,7 +3368,7 @@
 
     /* If applicable, wait for previous frame reconstruction to finish */
     if( h->param.b_sliced_threads )
-        if( x264_threadpool_wait_all( h ) < 0 )
+        if( threadpool_wait_all( h ) < 0 )
             return -1;
 
     if( h->i_frame == 0 )
@@ -3352,7 +3390,7 @@
     x264_ratecontrol_zone_init( h );
 
     // ok to call this before encoding any frames, since the initial values of fdec have b_kept_as_ref=0
-    if( x264_reference_update( h ) )
+    if( reference_update( h ) )
         return -1;
     h->fdec->i_lines_completed = -1;
 
@@ -3394,7 +3432,7 @@
         i_nal_type    = NAL_SLICE_IDR;
         i_nal_ref_idc = NAL_PRIORITY_HIGHEST;
         h->sh.i_type = SLICE_TYPE_I;
-        x264_reference_reset( h );
+        reference_reset( h );
         h->frames.i_poc_last_open_gop = -1;
     }
     else if( h->fenc->i_type == X264_TYPE_I )
@@ -3402,7 +3440,7 @@
         i_nal_type    = NAL_SLICE;
         i_nal_ref_idc = NAL_PRIORITY_HIGH; /* Not completely true but for now it is (as all I/P are kept as ref)*/
         h->sh.i_type = SLICE_TYPE_I;
-        x264_reference_hierarchy_reset( h );
+        reference_hierarchy_reset( h );
         if( h->param.b_open_gop )
             h->frames.i_poc_last_open_gop = h->fenc->b_keyframe ? h->fenc->i_poc : -1;
     }
@@ -3411,7 +3449,7 @@
         i_nal_type    = NAL_SLICE;
         i_nal_ref_idc = NAL_PRIORITY_HIGH; /* Not completely true but for now it is (as all I/P are kept as ref)*/
         h->sh.i_type = SLICE_TYPE_P;
-        x264_reference_hierarchy_reset( h );
+        reference_hierarchy_reset( h );
         h->frames.i_poc_last_open_gop = -1;
     }
     else if( h->fenc->i_type == X264_TYPE_BREF )
@@ -3419,7 +3457,7 @@
         i_nal_type    = NAL_SLICE;
         i_nal_ref_idc = h->param.i_bframe_pyramid == X264_B_PYRAMID_STRICT ? NAL_PRIORITY_LOW : NAL_PRIORITY_HIGH;
         h->sh.i_type = SLICE_TYPE_B;
-        x264_reference_hierarchy_reset( h );
+        reference_hierarchy_reset( h );
     }
     else    /* B frame */
     {
@@ -3454,7 +3492,7 @@
 
     /* ------------------- Init                ----------------------------- */
     /* build ref list 0/1 */
-    x264_reference_build_list( h, h->fdec->i_poc );
+    reference_build_list( h, h->fdec->i_poc );
 
     /* ---------------------- Write the bitstream -------------------------- */
     /* Init bitstream context */
@@ -3485,10 +3523,11 @@
         else
             pic_type = 7;
 
-        x264_nal_start( h, NAL_AUD, NAL_PRIORITY_DISPOSABLE );
+        nal_start( h, NAL_AUD, NAL_PRIORITY_DISPOSABLE );
         bs_write( &h->out.bs, 3, pic_type );
         bs_rbsp_trailing( &h->out.bs );
-        if( x264_nal_end( h ) )
+        bs_flush( &h->out.bs );
+        if( nal_end( h ) )
             return -1;
         overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
     }
@@ -3538,9 +3577,9 @@
         if( h->param.b_repeat_headers )
         {
             /* generate sequence parameters */
-            x264_nal_start( h, NAL_SPS, NAL_PRIORITY_HIGHEST );
+            nal_start( h, NAL_SPS, NAL_PRIORITY_HIGHEST );
             x264_sps_write( &h->out.bs, h->sps );
-            if( x264_nal_end( h ) )
+            if( nal_end( h ) )
                 return -1;
             /* Pad AUD/SPS to 256 bytes like Panasonic */
             if( h->param.i_avcintra_class )
@@ -3548,22 +3587,28 @@
             overhead += h->out.nal[h->out.i_nal-1].i_payload + h->out.nal[h->out.i_nal-1].i_padding + NALU_OVERHEAD;
 
             /* generate picture parameters */
-            x264_nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST );
+            nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST );
             x264_pps_write( &h->out.bs, h->sps, h->pps );
-            if( x264_nal_end( h ) )
+            if( nal_end( h ) )
                 return -1;
             if( h->param.i_avcintra_class )
-                h->out.nal[h->out.i_nal-1].i_padding = 256 - h->out.nal[h->out.i_nal-1].i_payload - NALU_OVERHEAD;
+            {
+                int total_len = 256;
+                /* Sony XAVC uses an oversized PPS instead of SEI padding */
+                if( h->param.i_avcintra_flavor == X264_AVCINTRA_FLAVOR_SONY )
+                    total_len += h->param.i_height == 1080 ? 18*512 : 10*512;
+                h->out.nal[h->out.i_nal-1].i_padding = total_len - h->out.nal[h->out.i_nal-1].i_payload - NALU_OVERHEAD;
+            }
             overhead += h->out.nal[h->out.i_nal-1].i_payload + h->out.nal[h->out.i_nal-1].i_padding + NALU_OVERHEAD;
         }
 
-        /* when frame threading is used, buffering period sei is written in x264_encoder_frame_end */
+        /* when frame threading is used, buffering period sei is written in encoder_frame_end */
         if( h->i_thread_frames == 1 && h->sps->vui.b_nal_hrd_parameters_present )
         {
             x264_hrd_fullness( h );
-            x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
+            nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
             x264_sei_buffering_period_write( h, &h->out.bs );
-            if( x264_nal_end( h ) )
+            if( nal_end( h ) )
                return -1;
             overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
         }
@@ -3572,10 +3617,10 @@
     /* write extra sei */
     for( int i = 0; i < h->fenc->extra_sei.num_payloads; i++ )
     {
-        x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
+        nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
         x264_sei_write( &h->out.bs, h->fenc->extra_sei.payloads[i].payload, h->fenc->extra_sei.payloads[i].payload_size,
                         h->fenc->extra_sei.payloads[i].payload_type );
-        if( x264_nal_end( h ) )
+        if( nal_end( h ) )
             return -1;
         overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
         if( h->fenc->extra_sei.sei_free )
@@ -3598,10 +3643,10 @@
         if( h->param.b_repeat_headers && h->fenc->i_frame == 0 && !h->param.i_avcintra_class )
         {
             /* identify ourself */
-            x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
+            nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
             if( x264_sei_version_write( h, &h->out.bs ) )
                 return -1;
-            if( x264_nal_end( h ) )
+            if( nal_end( h ) )
                 return -1;
             overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
         }
@@ -3609,9 +3654,9 @@
         if( h->fenc->i_type != X264_TYPE_IDR )
         {
             int time_to_recovery = h->param.b_open_gop ? 0 : X264_MIN( h->mb.i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe - 1;
-            x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
+            nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
             x264_sei_recovery_point_write( h, &h->out.bs, time_to_recovery );
-            if( x264_nal_end( h ) )
+            if( nal_end( h ) )
                 return -1;
             overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
         }
@@ -3619,9 +3664,18 @@
 
     if( h->param.i_frame_packing >= 0 && (h->fenc->b_keyframe || h->param.i_frame_packing == 5) )
     {
-        x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
+        nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
         x264_sei_frame_packing_write( h, &h->out.bs );
-        if( x264_nal_end( h ) )
+        if( nal_end( h ) )
+            return -1;
+        overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
+    }
+
+    if( h->param.i_alternative_transfer != 2 )
+    {
+        nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
+        x264_sei_alternative_transfer_write( h, &h->out.bs );
+        if( nal_end( h ) )
             return -1;
         overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
     }
@@ -3629,9 +3683,9 @@
     /* generate sei pic timing */
     if( h->sps->vui.b_pic_struct_present || h->sps->vui.b_nal_hrd_parameters_present )
     {
-        x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
+        nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
         x264_sei_pic_timing_write( h, &h->out.bs );
-        if( x264_nal_end( h ) )
+        if( nal_end( h ) )
             return -1;
         overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
     }
@@ -3640,9 +3694,9 @@
     if( !IS_X264_TYPE_B( h->fenc->i_type ) && h->b_sh_backup )
     {
         h->b_sh_backup = 0;
-        x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
+        nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
         x264_sei_dec_ref_pic_marking_write( h, &h->out.bs );
-        if( x264_nal_end( h ) )
+        if( nal_end( h ) )
             return -1;
         overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
     }
@@ -3651,21 +3705,21 @@
         h->i_cpb_delay_pir_offset_next = h->fenc->i_cpb_delay;
 
     /* Filler space: 10 or 18 SEIs' worth of space, depending on resolution */
-    if( h->param.i_avcintra_class )
+    if( h->param.i_avcintra_class && h->param.i_avcintra_flavor != X264_AVCINTRA_FLAVOR_SONY )
     {
         /* Write an empty filler NAL to mimic the AUD in the P2 format*/
-        x264_nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE );
+        nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE );
         x264_filler_write( h, &h->out.bs, 0 );
-        if( x264_nal_end( h ) )
+        if( nal_end( h ) )
             return -1;
         overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
 
         /* All lengths are magic lengths that decoders expect to see */
         /* "UMID" SEI */
-        x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
+        nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
         if( x264_sei_avcintra_umid_write( h, &h->out.bs ) < 0 )
             return -1;
-        if( x264_nal_end( h ) )
+        if( nal_end( h ) )
             return -1;
         overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
 
@@ -3682,10 +3736,10 @@
             total_len = 9*512;
         }
         /* "VANC" SEI */
-        x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
+        nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
         if( x264_sei_avcintra_vanc_write( h, &h->out.bs, unpadded_len ) < 0 )
             return -1;
-        if( x264_nal_end( h ) )
+        if( nal_end( h ) )
             return -1;
 
         h->out.nal[h->out.i_nal-1].i_padding = total_len - h->out.nal[h->out.i_nal-1].i_payload - SEI_OVERHEAD;
@@ -3703,20 +3757,20 @@
     if( h->param.rc.b_stat_read && h->sh.i_type != SLICE_TYPE_I )
     {
         x264_reference_build_list_optimal( h );
-        x264_reference_check_reorder( h );
+        reference_check_reorder( h );
     }
 
     if( h->i_ref[0] )
         h->fdec->i_poc_l0ref0 = h->fref[0][0]->i_poc;
 
     /* ------------------------ Create slice header  ----------------------- */
-    x264_slice_init( h, i_nal_type, i_global_qp );
+    slice_init( h, i_nal_type, i_global_qp );
 
     /*------------------------- Weights -------------------------------------*/
     if( h->sh.i_type == SLICE_TYPE_B )
         x264_macroblock_bipred_init( h );
 
-    x264_weighted_pred_init( h );
+    weighted_pred_init( h );
 
     if( i_nal_ref_idc != NAL_PRIORITY_DISPOSABLE )
         h->i_frame_num++;
@@ -3726,24 +3780,24 @@
     h->i_threadslice_end = h->mb.i_mb_height;
     if( h->i_thread_frames > 1 )
     {
-        x264_threadpool_run( h->threadpool, (void*)x264_slices_write, h );
+        x264_threadpool_run( h->threadpool, (void*)slices_write, h );
         h->b_thread_active = 1;
     }
     else if( h->param.b_sliced_threads )
     {
-        if( x264_threaded_slices_write( h ) )
+        if( threaded_slices_write( h ) )
             return -1;
     }
     else
-        if( (intptr_t)x264_slices_write( h ) )
+        if( (intptr_t)slices_write( h ) )
             return -1;
 
-    return x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out );
+    return encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out );
 }
 
-static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
-                                   x264_nal_t **pp_nal, int *pi_nal,
-                                   x264_picture_t *pic_out )
+static int encoder_frame_end( x264_t *h, x264_t *thread_current,
+                              x264_nal_t **pp_nal, int *pi_nal,
+                              x264_picture_t *pic_out )
 {
     char psz_message[80];
 
@@ -3765,9 +3819,9 @@
     if( h->i_thread_frames > 1 && h->fenc->b_keyframe && h->sps->vui.b_nal_hrd_parameters_present )
     {
         x264_hrd_fullness( h );
-        x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
+        nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
         x264_sei_buffering_period_write( h, &h->out.bs );
-        if( x264_nal_end( h ) )
+        if( nal_end( h ) )
            return -1;
         /* buffering period sei must follow AUD, SPS and PPS and precede all other SEIs */
         int idx = 0;
@@ -3780,7 +3834,7 @@
         h->out.nal[idx] = nal_tmp;
     }
 
-    int frame_size = x264_encoder_encapsulate_nals( h, 0 );
+    int frame_size = encoder_encapsulate_nals( h, 0 );
     if( frame_size < 0 )
         return -1;
 
@@ -3825,7 +3879,7 @@
      * We don't know the size of the last slice until encapsulation so we add filler to the encapsulated NAL */
     if( h->param.i_avcintra_class )
     {
-        if( x264_check_encapsulated_buffer( h, h->thread[0], h->out.i_nal, frame_size, frame_size + filler ) < 0 )
+        if( check_encapsulated_buffer( h, h->thread[0], h->out.i_nal, frame_size, frame_size + filler ) < 0 )
             return -1;
 
         x264_nal_t *nal = &h->out.nal[h->out.i_nal-1];
@@ -3860,13 +3914,13 @@
             else
                 f = X264_MAX( 0, filler - overhead );
 
-            if( x264_bitstream_check_buffer_filler( h, f ) )
+            if( bitstream_check_buffer_filler( h, f ) )
                 return -1;
-            x264_nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE );
+            nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE );
             x264_filler_write( h, &h->out.bs, f );
-            if( x264_nal_end( h ) )
+            if( nal_end( h ) )
                 return -1;
-            int total_size = x264_encoder_encapsulate_nals( h, h->out.i_nal-1 );
+            int total_size = encoder_encapsulate_nals( h, h->out.i_nal-1 );
             if( total_size < 0 )
                 return -1;
             frame_size += total_size;
@@ -3883,7 +3937,7 @@
     x264_noise_reduction_update( h );
 
     /* ---------------------- Compute/Print statistics --------------------- */
-    x264_thread_sync_stat( h, h->thread[0] );
+    thread_sync_stat( h, h->thread[0] );
 
     /* Slice stat */
     h->stat.i_frame_count[h->sh.i_type]++;
@@ -3941,10 +3995,10 @@
         };
         int luma_size = h->param.i_width * h->param.i_height;
         int chroma_size = CHROMA_SIZE( luma_size );
-        pic_out->prop.f_psnr[0] = x264_psnr( ssd[0], luma_size );
-        pic_out->prop.f_psnr[1] = x264_psnr( ssd[1], chroma_size );
-        pic_out->prop.f_psnr[2] = x264_psnr( ssd[2], chroma_size );
-        pic_out->prop.f_psnr_avg = x264_psnr( ssd[0] + ssd[1] + ssd[2], luma_size + chroma_size*2 );
+        pic_out->prop.f_psnr[0] = calc_psnr( ssd[0], luma_size );
+        pic_out->prop.f_psnr[1] = calc_psnr( ssd[1], chroma_size );
+        pic_out->prop.f_psnr[2] = calc_psnr( ssd[2], chroma_size );
+        pic_out->prop.f_psnr_avg = calc_psnr( ssd[0] + ssd[1] + ssd[2], luma_size + chroma_size*2 );
 
         h->stat.f_ssd_global[h->sh.i_type]   += dur * (ssd[0] + ssd[1] + ssd[2]);
         h->stat.f_psnr_average[h->sh.i_type] += dur * pic_out->prop.f_psnr_avg;
@@ -3980,9 +4034,9 @@
               psz_message );
 
     // keep stats all in one place
-    x264_thread_sync_stat( h->thread[0], h );
+    thread_sync_stat( h->thread[0], h );
     // for the use of the next frame
-    x264_thread_sync_stat( thread_current, h );
+    thread_sync_stat( thread_current, h );
 
 #ifdef DEBUG_MB_TYPE
 {
@@ -4011,13 +4065,13 @@
         }
 
     if( h->param.psz_dump_yuv )
-        x264_frame_dump( h );
+        frame_dump( h );
     x264_emms();
 
     return frame_size;
 }
 
-static void x264_print_intra( int64_t *i_mb_count, double i_count, int b_print_pcm, char *intra )
+static void print_intra( int64_t *i_mb_count, double i_count, int b_print_pcm, char *intra )
 {
     intra += sprintf( intra, "I16..4%s: %4.1f%% %4.1f%% %4.1f%%",
         b_print_pcm ? "..PCM" : "",
@@ -4048,7 +4102,7 @@
 #endif
 
     if( h->param.b_sliced_threads )
-        x264_threadpool_wait_all( h );
+        threadpool_wait_all( h );
     if( h->param.i_threads > 1 )
         x264_threadpool_delete( h->threadpool );
     if( h->param.i_lookahead_threads > 1 )
@@ -4089,7 +4143,7 @@
                           (double)h->stat.i_frame_size[i_slice] / i_count,
                           h->stat.f_psnr_mean_y[i_slice] / dur, h->stat.f_psnr_mean_u[i_slice] / dur, h->stat.f_psnr_mean_v[i_slice] / dur,
                           h->stat.f_psnr_average[i_slice] / dur,
-                          x264_psnr( h->stat.f_ssd_global[i_slice], dur * i_yuv_size ) );
+                          calc_psnr( h->stat.f_ssd_global[i_slice], dur * i_yuv_size ) );
             }
             else
             {
@@ -4126,7 +4180,7 @@
     {
         int64_t *i_mb_count = h->stat.i_mb_count[SLICE_TYPE_I];
         double i_count = (double)h->stat.i_frame_count[SLICE_TYPE_I] * h->mb.i_mb_count / 100.0;
-        x264_print_intra( i_mb_count, i_count, b_print_pcm, buf );
+        print_intra( i_mb_count, i_count, b_print_pcm, buf );
         x264_log( h, X264_LOG_INFO, "mb I  %s\n", buf );
     }
     if( h->stat.i_frame_count[SLICE_TYPE_P] > 0 )
@@ -4134,7 +4188,7 @@
         int64_t *i_mb_count = h->stat.i_mb_count[SLICE_TYPE_P];
         double i_count = (double)h->stat.i_frame_count[SLICE_TYPE_P] * h->mb.i_mb_count / 100.0;
         int64_t *i_mb_size = i_mb_count_size[SLICE_TYPE_P];
-        x264_print_intra( i_mb_count, i_count, b_print_pcm, buf );
+        print_intra( i_mb_count, i_count, b_print_pcm, buf );
         x264_log( h, X264_LOG_INFO,
                   "mb P  %s  P16..4: %4.1f%% %4.1f%% %4.1f%% %4.1f%% %4.1f%%    skip:%4.1f%%\n",
                   buf,
@@ -4152,7 +4206,7 @@
         double i_mb_list_count;
         int64_t *i_mb_size = i_mb_count_size[SLICE_TYPE_B];
         int64_t list_count[3] = {0}; /* 0 == L0, 1 == L1, 2 == BI */
-        x264_print_intra( i_mb_count, i_count, b_print_pcm, buf );
+        print_intra( i_mb_count, i_count, b_print_pcm, buf );
         for( int i = 0; i < X264_PARTTYPE_MAX; i++ )
             for( int j = 0; j < 2; j++ )
             {
@@ -4232,17 +4286,27 @@
         }
 
         buf[0] = 0;
-        int csize = CHROMA444 ? 4 : 1;
-        if( i_mb_count != i_all_intra )
-            sprintf( buf, " inter: %.1f%% %.1f%% %.1f%%",
-                     h->stat.i_mb_cbp[1] * 100.0 / ((i_mb_count - i_all_intra)*4),
-                     h->stat.i_mb_cbp[3] * 100.0 / ((i_mb_count - i_all_intra)*csize),
-                     h->stat.i_mb_cbp[5] * 100.0 / ((i_mb_count - i_all_intra)*csize) );
-        x264_log( h, X264_LOG_INFO, "coded y,%s,%s intra: %.1f%% %.1f%% %.1f%%%s\n",
-                  CHROMA444?"u":"uvDC", CHROMA444?"v":"uvAC",
-                  h->stat.i_mb_cbp[0] * 100.0 / (i_all_intra*4),
-                  h->stat.i_mb_cbp[2] * 100.0 / (i_all_intra*csize),
-                  h->stat.i_mb_cbp[4] * 100.0 / (i_all_intra*csize), buf );
+        if( CHROMA_FORMAT )
+        {
+            int csize = CHROMA444 ? 4 : 1;
+            if( i_mb_count != i_all_intra )
+                sprintf( buf, " inter: %.1f%% %.1f%% %.1f%%",
+                         h->stat.i_mb_cbp[1] * 100.0 / ((i_mb_count - i_all_intra)*4),
+                         h->stat.i_mb_cbp[3] * 100.0 / ((i_mb_count - i_all_intra)*csize),
+                         h->stat.i_mb_cbp[5] * 100.0 / ((i_mb_count - i_all_intra)*csize) );
+            x264_log( h, X264_LOG_INFO, "coded y,%s,%s intra: %.1f%% %.1f%% %.1f%%%s\n",
+                      CHROMA444?"u":"uvDC", CHROMA444?"v":"uvAC",
+                      h->stat.i_mb_cbp[0] * 100.0 / (i_all_intra*4),
+                      h->stat.i_mb_cbp[2] * 100.0 / (i_all_intra*csize),
+                      h->stat.i_mb_cbp[4] * 100.0 / (i_all_intra*csize), buf );
+        }
+        else
+        {
+            if( i_mb_count != i_all_intra )
+                sprintf( buf, " inter: %.1f%%", h->stat.i_mb_cbp[1] * 100.0 / ((i_mb_count - i_all_intra)*4) );
+            x264_log( h, X264_LOG_INFO, "coded y intra: %.1f%%%s\n",
+                      h->stat.i_mb_cbp[0] * 100.0 / (i_all_intra*4), buf );
+        }
 
         int64_t fixed_pred_modes[4][9] = {{0}};
         int64_t sum_pred_modes[4] = {0};
@@ -4289,9 +4353,13 @@
                       fixed_pred_modes[3][3] * 100.0 / sum_pred_modes[3] );
 
         if( h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE && h->stat.i_frame_count[SLICE_TYPE_P] > 0 )
-            x264_log( h, X264_LOG_INFO, "Weighted P-Frames: Y:%.1f%% UV:%.1f%%\n",
-                      h->stat.i_wpred[0] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P],
-                      h->stat.i_wpred[1] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P] );
+        {
+            buf[0] = 0;
+            if( CHROMA_FORMAT )
+                sprintf( buf, " UV:%.1f%%", h->stat.i_wpred[1] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P] );
+            x264_log( h, X264_LOG_INFO, "Weighted P-Frames: Y:%.1f%%%s\n",
+                      h->stat.i_wpred[0] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P], buf );
+        }
 
         for( int i_list = 0; i_list < 2; i_list++ )
             for( int i_slice = 0; i_slice < 2; i_slice++ )
@@ -4315,7 +4383,7 @@
         if( h->param.analyse.b_ssim )
         {
             float ssim = SUM3( h->stat.f_ssim_mean_y ) / duration;
-            x264_log( h, X264_LOG_INFO, "SSIM Mean Y:%.7f (%6.3fdb)\n", ssim, x264_ssim( ssim ) );
+            x264_log( h, X264_LOG_INFO, "SSIM Mean Y:%.7f (%6.3fdb)\n", ssim, calc_ssim_db( ssim ) );
         }
         if( h->param.analyse.b_psnr )
         {
@@ -4325,7 +4393,7 @@
                       SUM3( h->stat.f_psnr_mean_u ) / duration,
                       SUM3( h->stat.f_psnr_mean_v ) / duration,
                       SUM3( h->stat.f_psnr_average ) / duration,
-                      x264_psnr( SUM3( h->stat.f_ssd_global ), duration * i_yuv_size ),
+                      calc_psnr( SUM3( h->stat.f_ssd_global ), duration * i_yuv_size ),
                       f_bitrate );
         }
         else
@@ -4345,6 +4413,7 @@
     x264_free( h->nal_buffer );
     x264_free( h->reconfig_h );
     x264_analyse_free_costs( h );
+    x264_free( h->cost_table );
 
     if( h->i_thread_frames > 1 )
         h = h->thread[h->i_thread_phase];
diff -Nru x264-0.152.2854+gite9a5903/encoder/lookahead.c x264-0.158.2988+git-20191101.7817004/encoder/lookahead.c
--- x264-0.152.2854+gite9a5903/encoder/lookahead.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/encoder/lookahead.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * lookahead.c: high-level lookahead functions
  *****************************************************************************
- * Copyright (C) 2010-2017 Avail Media and x264 project
+ * Copyright (C) 2010-2019 Avail Media and x264 project
  *
  * Authors: Michael Kazmier <mkazmier@availmedia.com>
  *          Alex Giladi <agiladi@availmedia.com>
@@ -39,7 +39,7 @@
 #include "common/common.h"
 #include "analyse.h"
 
-static void x264_lookahead_shift( x264_sync_frame_list_t *dst, x264_sync_frame_list_t *src, int count )
+static void lookahead_shift( x264_sync_frame_list_t *dst, x264_sync_frame_list_t *src, int count )
 {
     int i = count;
     while( i-- )
@@ -56,7 +56,7 @@
     }
 }
 
-static void x264_lookahead_update_last_nonb( x264_t *h, x264_frame_t *new_nonb )
+static void lookahead_update_last_nonb( x264_t *h, x264_frame_t *new_nonb )
 {
     if( h->lookahead->last_nonb )
         x264_frame_push_unused( h, h->lookahead->last_nonb );
@@ -65,11 +65,11 @@
 }
 
 #if HAVE_THREAD
-static void x264_lookahead_slicetype_decide( x264_t *h )
+static void lookahead_slicetype_decide( x264_t *h )
 {
-    x264_stack_align( x264_slicetype_decide, h );
+    x264_slicetype_decide( h );
 
-    x264_lookahead_update_last_nonb( h, h->lookahead->next.list[0] );
+    lookahead_update_last_nonb( h, h->lookahead->next.list[0] );
     int shift_frames = h->lookahead->next.list[0]->i_bframes + 1;
 
     x264_pthread_mutex_lock( &h->lookahead->ofbuf.mutex );
@@ -77,24 +77,24 @@
         x264_pthread_cond_wait( &h->lookahead->ofbuf.cv_empty, &h->lookahead->ofbuf.mutex );
 
     x264_pthread_mutex_lock( &h->lookahead->next.mutex );
-    x264_lookahead_shift( &h->lookahead->ofbuf, &h->lookahead->next, shift_frames );
+    lookahead_shift( &h->lookahead->ofbuf, &h->lookahead->next, shift_frames );
     x264_pthread_mutex_unlock( &h->lookahead->next.mutex );
 
     /* For MB-tree and VBV lookahead, we have to perform propagation analysis on I-frames too. */
     if( h->lookahead->b_analyse_keyframe && IS_X264_TYPE_I( h->lookahead->last_nonb->i_type ) )
-        x264_stack_align( x264_slicetype_analyse, h, shift_frames );
+        x264_slicetype_analyse( h, shift_frames );
 
     x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex );
 }
 
-static void *x264_lookahead_thread( x264_t *h )
+REALIGN_STACK static void *lookahead_thread( x264_t *h )
 {
     while( !h->lookahead->b_exit_thread )
     {
         x264_pthread_mutex_lock( &h->lookahead->ifbuf.mutex );
         x264_pthread_mutex_lock( &h->lookahead->next.mutex );
         int shift = X264_MIN( h->lookahead->next.i_max_size - h->lookahead->next.i_size, h->lookahead->ifbuf.i_size );
-        x264_lookahead_shift( &h->lookahead->next, &h->lookahead->ifbuf, shift );
+        lookahead_shift( &h->lookahead->next, &h->lookahead->ifbuf, shift );
         x264_pthread_mutex_unlock( &h->lookahead->next.mutex );
         if( h->lookahead->next.i_size <= h->lookahead->i_slicetype_length + h->param.b_vfr_input )
         {
@@ -105,22 +105,23 @@
         else
         {
             x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex );
-            x264_lookahead_slicetype_decide( h );
+            lookahead_slicetype_decide( h );
         }
     }   /* end of input frames */
     x264_pthread_mutex_lock( &h->lookahead->ifbuf.mutex );
     x264_pthread_mutex_lock( &h->lookahead->next.mutex );
-    x264_lookahead_shift( &h->lookahead->next, &h->lookahead->ifbuf, h->lookahead->ifbuf.i_size );
+    lookahead_shift( &h->lookahead->next, &h->lookahead->ifbuf, h->lookahead->ifbuf.i_size );
     x264_pthread_mutex_unlock( &h->lookahead->next.mutex );
     x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex );
     while( h->lookahead->next.i_size )
-        x264_lookahead_slicetype_decide( h );
+        lookahead_slicetype_decide( h );
     x264_pthread_mutex_lock( &h->lookahead->ofbuf.mutex );
     h->lookahead->b_thread_active = 0;
     x264_pthread_cond_broadcast( &h->lookahead->ofbuf.cv_fill );
     x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex );
     return NULL;
 }
+
 #endif
 
 int x264_lookahead_init( x264_t *h, int i_slicetype_length )
@@ -152,7 +153,7 @@
     if( x264_macroblock_thread_allocate( look_h, 1 ) < 0 )
         goto fail;
 
-    if( x264_pthread_create( &look->thread_handle, NULL, (void*)x264_lookahead_thread, look_h ) )
+    if( x264_pthread_create( &look->thread_handle, NULL, (void*)lookahead_thread, look_h ) )
         goto fail;
     look->b_thread_active = 1;
 
@@ -201,7 +202,7 @@
     return b_empty;
 }
 
-static void x264_lookahead_encoder_shift( x264_t *h )
+static void lookahead_encoder_shift( x264_t *h )
 {
     if( !h->lookahead->ofbuf.i_size )
         return;
@@ -221,7 +222,7 @@
         x264_pthread_mutex_lock( &h->lookahead->ofbuf.mutex );
         while( !h->lookahead->ofbuf.i_size && h->lookahead->b_thread_active )
             x264_pthread_cond_wait( &h->lookahead->ofbuf.cv_fill, &h->lookahead->ofbuf.mutex );
-        x264_lookahead_encoder_shift( h );
+        lookahead_encoder_shift( h );
         x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex );
     }
     else
@@ -230,15 +231,15 @@
         if( h->frames.current[0] || !h->lookahead->next.i_size )
             return;
 
-        x264_stack_align( x264_slicetype_decide, h );
-        x264_lookahead_update_last_nonb( h, h->lookahead->next.list[0] );
+        x264_slicetype_decide( h );
+        lookahead_update_last_nonb( h, h->lookahead->next.list[0] );
         int shift_frames = h->lookahead->next.list[0]->i_bframes + 1;
-        x264_lookahead_shift( &h->lookahead->ofbuf, &h->lookahead->next, shift_frames );
+        lookahead_shift( &h->lookahead->ofbuf, &h->lookahead->next, shift_frames );
 
         /* For MB-tree and VBV lookahead, we have to perform propagation analysis on I-frames too. */
         if( h->lookahead->b_analyse_keyframe && IS_X264_TYPE_I( h->lookahead->last_nonb->i_type ) )
-            x264_stack_align( x264_slicetype_analyse, h, shift_frames );
+            x264_slicetype_analyse( h, shift_frames );
 
-        x264_lookahead_encoder_shift( h );
+        lookahead_encoder_shift( h );
     }
 }
diff -Nru x264-0.152.2854+gite9a5903/encoder/macroblock.c x264-0.158.2988+git-20191101.7817004/encoder/macroblock.c
--- x264-0.152.2854+gite9a5903/encoder/macroblock.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/encoder/macroblock.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * macroblock.c: macroblock encoding
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -123,7 +123,7 @@
 /* This means that decimation can be done merely by adjusting the CBP and NNZ
  * rather than memsetting the coefficients. */
 
-static void x264_mb_encode_i16x16( x264_t *h, int p, int i_qp )
+static void mb_encode_i16x16( x264_t *h, int p, int i_qp )
 {
     pixel *p_src = h->mb.pic.p_fenc[p];
     pixel *p_dst = h->mb.pic.p_fdec[p];
@@ -242,7 +242,7 @@
  * Unlike luma blocks, this can't be done with a lookup table or
  * other shortcut technique because of the interdependencies
  * between the coefficients due to the chroma DC transform. */
-static ALWAYS_INLINE int x264_mb_optimize_chroma_dc( x264_t *h, dctcoef *dct_dc, int dequant_mf[6][16], int i_qp, int chroma422 )
+static ALWAYS_INLINE int mb_optimize_chroma_dc( x264_t *h, dctcoef *dct_dc, int dequant_mf[6][16], int i_qp, int chroma422 )
 {
     int dmf = dequant_mf[i_qp%6][0] << i_qp/6;
 
@@ -256,7 +256,7 @@
         return h->quantf.optimize_chroma_2x2_dc( dct_dc, dmf );
 }
 
-static ALWAYS_INLINE void x264_mb_encode_chroma_internal( x264_t *h, int b_inter, int i_qp, int chroma422 )
+static ALWAYS_INLINE void mb_encode_chroma_internal( x264_t *h, int b_inter, int i_qp, int chroma422 )
 {
     int nz, nz_dc;
     int b_decimate = b_inter && h->mb.b_dct_decimate;
@@ -316,7 +316,7 @@
 
                     if( nz_dc )
                     {
-                        if( !x264_mb_optimize_chroma_dc( h, dct_dc, dequant_mf, i_qp+3*chroma422, chroma422 ) )
+                        if( !mb_optimize_chroma_dc( h, dct_dc, dequant_mf, i_qp+3*chroma422, chroma422 ) )
                             continue;
                         h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+ch]] = 1;
                         if( chroma422 )
@@ -441,7 +441,7 @@
 
             if( !nz_dc ) /* Whole block is empty */
                 continue;
-            if( !x264_mb_optimize_chroma_dc( h, dct_dc, dequant_mf, i_qp+3*chroma422, chroma422 ) )
+            if( !mb_optimize_chroma_dc( h, dct_dc, dequant_mf, i_qp+3*chroma422, chroma422 ) )
             {
                 h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+ch]] = 0;
                 continue;
@@ -492,12 +492,12 @@
 void x264_mb_encode_chroma( x264_t *h, int b_inter, int i_qp )
 {
     if( CHROMA_FORMAT == CHROMA_420 )
-        x264_mb_encode_chroma_internal( h, b_inter, i_qp, 0 );
+        mb_encode_chroma_internal( h, b_inter, i_qp, 0 );
     else
-        x264_mb_encode_chroma_internal( h, b_inter, i_qp, 1 );
+        mb_encode_chroma_internal( h, b_inter, i_qp, 1 );
 }
 
-static void x264_macroblock_encode_skip( x264_t *h )
+static void macroblock_encode_skip( x264_t *h )
 {
     M32( &h->mb.cache.non_zero_count[x264_scan8[ 0]] ) = 0;
     M32( &h->mb.cache.non_zero_count[x264_scan8[ 2]] ) = 0;
@@ -615,7 +615,7 @@
 /*****************************************************************************
  * x264_macroblock_encode:
  *****************************************************************************/
-static ALWAYS_INLINE void x264_macroblock_encode_internal( x264_t *h, int plane_count, int chroma )
+static ALWAYS_INLINE void macroblock_encode_internal( x264_t *h, int plane_count, int chroma )
 {
     int i_qp = h->mb.i_qp;
     int b_decimate = h->mb.b_dct_decimate;
@@ -691,7 +691,7 @@
             }
         }
 
-        x264_macroblock_encode_skip( h );
+        macroblock_encode_skip( h );
         return;
     }
     if( h->mb.i_type == B_SKIP )
@@ -699,7 +699,7 @@
         /* don't do bskip motion compensation if it was already done in macroblock_analyse */
         if( !h->mb.b_skip_mc )
             x264_mb_mc( h );
-        x264_macroblock_encode_skip( h );
+        macroblock_encode_skip( h );
         return;
     }
 
@@ -708,7 +708,7 @@
         h->mb.b_transform_8x8 = 0;
 
         for( int p = 0; p < plane_count; p++, i_qp = h->mb.i_chroma_qp )
-            x264_mb_encode_i16x16( h, p, i_qp );
+            mb_encode_i16x16( h, p, i_qp );
     }
     else if( h->mb.i_type == I_8x8 )
     {
@@ -974,16 +974,18 @@
 void x264_macroblock_encode( x264_t *h )
 {
     if( CHROMA444 )
-        x264_macroblock_encode_internal( h, 3, 0 );
+        macroblock_encode_internal( h, 3, 0 );
+    else if( CHROMA_FORMAT )
+        macroblock_encode_internal( h, 1, 1 );
     else
-        x264_macroblock_encode_internal( h, 1, 1 );
+        macroblock_encode_internal( h, 1, 0 );
 }
 
 /*****************************************************************************
  * x264_macroblock_probe_skip:
  *  Check if the current MB could be encoded as a [PB]_SKIP
  *****************************************************************************/
-static ALWAYS_INLINE int x264_macroblock_probe_skip_internal( x264_t *h, int b_bidir, int plane_count, int chroma )
+static ALWAYS_INLINE int macroblock_probe_skip_internal( x264_t *h, int b_bidir, int plane_count, int chroma )
 {
     ALIGNED_ARRAY_64( dctcoef, dct4x4,[8],[16] );
     ALIGNED_ARRAY_64( dctcoef, dctscan,[16] );
@@ -1126,12 +1128,14 @@
 
 int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
 {
-    if( CHROMA_FORMAT == CHROMA_444 )
-        return x264_macroblock_probe_skip_internal( h, b_bidir, 3, CHROMA_444 );
+    if( CHROMA_FORMAT == CHROMA_420 )
+        return macroblock_probe_skip_internal( h, b_bidir, 1, CHROMA_420 );
     else if( CHROMA_FORMAT == CHROMA_422 )
-        return x264_macroblock_probe_skip_internal( h, b_bidir, 1, CHROMA_422 );
+        return macroblock_probe_skip_internal( h, b_bidir, 1, CHROMA_422 );
+    else if( CHROMA_FORMAT == CHROMA_444 )
+        return macroblock_probe_skip_internal( h, b_bidir, 3, CHROMA_444 );
     else
-        return x264_macroblock_probe_skip_internal( h, b_bidir, 1, CHROMA_420 );
+        return macroblock_probe_skip_internal( h, b_bidir, 1, CHROMA_400 );
 }
 
 /****************************************************************************
@@ -1172,7 +1176,7 @@
  * RD only; 4 calls to this do not make up for one macroblock_encode.
  * doesn't transform chroma dc.
  *****************************************************************************/
-static ALWAYS_INLINE void x264_macroblock_encode_p8x8_internal( x264_t *h, int i8, int plane_count, int chroma )
+static ALWAYS_INLINE void macroblock_encode_p8x8_internal( x264_t *h, int i8, int plane_count, int chroma )
 {
     int b_decimate = h->mb.b_dct_decimate;
     int i_qp = h->mb.i_qp;
@@ -1365,18 +1369,20 @@
 
 void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
 {
-    if( CHROMA444 )
-        x264_macroblock_encode_p8x8_internal( h, i8, 3, CHROMA_444 );
+    if( CHROMA_FORMAT == CHROMA_420 )
+        macroblock_encode_p8x8_internal( h, i8, 1, CHROMA_420 );
     else if( CHROMA_FORMAT == CHROMA_422 )
-        x264_macroblock_encode_p8x8_internal( h, i8, 1, CHROMA_422 );
+        macroblock_encode_p8x8_internal( h, i8, 1, CHROMA_422 );
+    else if( CHROMA_FORMAT == CHROMA_444 )
+        macroblock_encode_p8x8_internal( h, i8, 3, CHROMA_444 );
     else
-        x264_macroblock_encode_p8x8_internal( h, i8, 1, CHROMA_420 );
+        macroblock_encode_p8x8_internal( h, i8, 1, CHROMA_400 );
 }
 
 /*****************************************************************************
  * RD only, luma only (for 4:2:0)
  *****************************************************************************/
-static ALWAYS_INLINE void x264_macroblock_encode_p4x4_internal( x264_t *h, int i4, int plane_count )
+static ALWAYS_INLINE void macroblock_encode_p4x4_internal( x264_t *h, int i4, int plane_count )
 {
     int i_qp = h->mb.i_qp;
 
@@ -1413,7 +1419,7 @@
 void x264_macroblock_encode_p4x4( x264_t *h, int i8 )
 {
     if( CHROMA444 )
-        x264_macroblock_encode_p4x4_internal( h, i8, 3 );
+        macroblock_encode_p4x4_internal( h, i8, 3 );
     else
-        x264_macroblock_encode_p4x4_internal( h, i8, 1 );
+        macroblock_encode_p4x4_internal( h, i8, 1 );
 }
diff -Nru x264-0.152.2854+gite9a5903/encoder/macroblock.h x264-0.158.2988+git-20191101.7817004/encoder/macroblock.h
--- x264-0.152.2854+gite9a5903/encoder/macroblock.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/encoder/macroblock.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * macroblock.h: macroblock encoding
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -29,11 +29,10 @@
 
 #include "common/macroblock.h"
 
-extern const int x264_lambda2_tab[QP_MAX_MAX+1];
-extern const uint16_t x264_lambda_tab[QP_MAX_MAX+1];
-
+#define x264_rdo_init x264_template(rdo_init)
 void x264_rdo_init( void );
 
+#define x264_macroblock_probe_skip x264_template(macroblock_probe_skip)
 int x264_macroblock_probe_skip( x264_t *h, int b_bidir );
 
 #define x264_macroblock_probe_pskip( h )\
@@ -41,32 +40,51 @@
 #define x264_macroblock_probe_bskip( h )\
     x264_macroblock_probe_skip( h, 1 )
 
+#define x264_predict_lossless_4x4 x264_template(predict_lossless_4x4)
 void x264_predict_lossless_4x4( x264_t *h, pixel *p_dst, int p, int idx, int i_mode );
+#define x264_predict_lossless_8x8 x264_template(predict_lossless_8x8)
 void x264_predict_lossless_8x8( x264_t *h, pixel *p_dst, int p, int idx, int i_mode, pixel edge[36] );
+#define x264_predict_lossless_16x16 x264_template(predict_lossless_16x16)
 void x264_predict_lossless_16x16( x264_t *h, int p, int i_mode );
+#define x264_predict_lossless_chroma x264_template(predict_lossless_chroma)
 void x264_predict_lossless_chroma( x264_t *h, int i_mode );
 
+#define x264_macroblock_encode x264_template(macroblock_encode)
 void x264_macroblock_encode      ( x264_t *h );
+#define x264_macroblock_write_cabac x264_template(macroblock_write_cabac)
 void x264_macroblock_write_cabac ( x264_t *h, x264_cabac_t *cb );
+#define x264_macroblock_write_cavlc x264_template(macroblock_write_cavlc)
 void x264_macroblock_write_cavlc ( x264_t *h );
 
+#define x264_macroblock_encode_p8x8 x264_template(macroblock_encode_p8x8)
 void x264_macroblock_encode_p8x8( x264_t *h, int i8 );
+#define x264_macroblock_encode_p4x4 x264_template(macroblock_encode_p4x4)
 void x264_macroblock_encode_p4x4( x264_t *h, int i4 );
+#define x264_mb_encode_chroma x264_template(mb_encode_chroma)
 void x264_mb_encode_chroma( x264_t *h, int b_inter, int i_qp );
 
+#define x264_cabac_mb_skip x264_template(cabac_mb_skip)
 void x264_cabac_mb_skip( x264_t *h, int b_skip );
+#define x264_cabac_block_residual_c x264_template(cabac_block_residual_c)
 void x264_cabac_block_residual_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l );
+#define x264_cabac_block_residual_8x8_rd_c x264_template(cabac_block_residual_8x8_rd_c)
 void x264_cabac_block_residual_8x8_rd_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l );
+#define x264_cabac_block_residual_rd_c x264_template(cabac_block_residual_rd_c)
 void x264_cabac_block_residual_rd_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l );
 
+#define x264_quant_luma_dc_trellis x264_template(quant_luma_dc_trellis)
 int x264_quant_luma_dc_trellis( x264_t *h, dctcoef *dct, int i_quant_cat, int i_qp,
                                 int ctx_block_cat, int b_intra, int idx );
+#define x264_quant_chroma_dc_trellis x264_template(quant_chroma_dc_trellis)
 int x264_quant_chroma_dc_trellis( x264_t *h, dctcoef *dct, int i_qp, int b_intra, int idx );
+#define x264_quant_4x4_trellis x264_template(quant_4x4_trellis)
 int x264_quant_4x4_trellis( x264_t *h, dctcoef *dct, int i_quant_cat,
                              int i_qp, int ctx_block_cat, int b_intra, int b_chroma, int idx );
+#define x264_quant_8x8_trellis x264_template(quant_8x8_trellis)
 int x264_quant_8x8_trellis( x264_t *h, dctcoef *dct, int i_quant_cat,
                              int i_qp, int ctx_block_cat, int b_intra, int b_chroma, int idx );
 
+#define x264_noise_reduction_update x264_template(noise_reduction_update)
 void x264_noise_reduction_update( x264_t *h );
 
 static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, dctcoef dct[16], int i_qp, int ctx_block_cat, int b_intra, int p, int idx )
@@ -195,4 +213,3 @@
 }
 
 #endif
-
diff -Nru x264-0.152.2854+gite9a5903/encoder/me.c x264-0.158.2988+git-20191101.7817004/encoder/me.c
--- x264-0.152.2854+gite9a5903/encoder/me.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/encoder/me.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * me.c: motion estimation
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -424,7 +424,7 @@
             /* Uneven-cross Multi-Hexagon-grid Search
              * as in JM, except with different early termination */
 
-            static const uint8_t x264_pixel_size_shift[7] = { 0, 1, 1, 2, 3, 3, 4 };
+            static const uint8_t pixel_size_shift[7] = { 0, 1, 1, 2, 3, 3, 4 };
 
             int ucost1, ucost2;
             int cross_start = 1;
@@ -446,7 +446,7 @@
             omx = bmx; omy = bmy;
 
             /* early termination */
-#define SAD_THRESH(v) ( bcost < ( v >> x264_pixel_size_shift[i_pixel] ) )
+#define SAD_THRESH(v) ( bcost < ( v >> pixel_size_shift[i_pixel] ) )
             if( bcost == ucost2 && SAD_THRESH(2000) )
             {
                 COST_MV_X4( 0,-2, -1,-1, 1,-1, -2,0 );
@@ -633,7 +633,6 @@
             /* successive elimination by comparing DC before a full SAD,
              * because sum(abs(diff)) >= abs(diff(sum)). */
             uint16_t *sums_base = m->integral;
-            ALIGNED_16( static pixel zero[8*FENC_STRIDE] ) = {0};
             ALIGNED_ARRAY_16( int, enc_dc,[4] );
             int sad_size = i_pixel <= PIXEL_8x8 ? PIXEL_8x8 : PIXEL_4x4;
             int delta = x264_pixel_size[sad_size].w;
@@ -641,7 +640,7 @@
             int xn;
             uint16_t *cost_fpel_mvx = h->cost_mv_fpel[h->mb.i_qp][-m->mvp[0]&3] + (-m->mvp[0]>>2);
 
-            h->pixf.sad_x4[sad_size]( zero, p_fenc, p_fenc+delta,
+            h->pixf.sad_x4[sad_size]( (pixel*)x264_zero, p_fenc, p_fenc+delta,
                 p_fenc+delta*FENC_STRIDE, p_fenc+delta+delta*FENC_STRIDE,
                 FENC_STRIDE, enc_dc );
             if( delta == 4 )
@@ -1012,7 +1011,7 @@
             src[2][list][i] = h->mc.get_ref( pixv_buf[list][i], &stride[2][list][i], &m->p_fref[8],\
                                              m->i_stride[2], mvx, mvy, bw, bh, x264_weight_none );\
         }\
-        else\
+        else if( CHROMA_FORMAT )\
             h->mc.mc_chroma( pixu_buf[list][i], pixv_buf[list][i], 8, m->p_fref[4], m->i_stride[1],\
                              mvx, 2*(mvy+mv##list##y_offset)>>chroma_v_shift, bw>>1, bh>>chroma_v_shift );\
     }\
@@ -1022,9 +1021,10 @@
 
 /* Don't unroll the BIME_CACHE loop. I couldn't find any way to force this
  * other than making its iteration count not a compile-time constant. */
+#define x264_iter_kludge x264_template(iter_kludge)
 int x264_iter_kludge = 0;
 
-static void ALWAYS_INLINE x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight, int i8, int i_lambda2, int rd )
+static ALWAYS_INLINE void me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight, int i8, int i_lambda2, int rd )
 {
     int x = i8&1;
     int y = i8>>1;
@@ -1134,7 +1134,7 @@
                             h->mc.avg[i_pixel]( pixu, FDEC_STRIDE, src[1][0][i0], stride[1][0][i0], src[1][1][i1], stride[1][1][i1], i_weight );
                             h->mc.avg[i_pixel]( pixv, FDEC_STRIDE, src[2][0][i0], stride[2][0][i0], src[2][1][i1], stride[2][1][i1], i_weight );
                         }
-                        else
+                        else if( CHROMA_FORMAT )
                         {
                             h->mc.avg[chromapix]( pixu, FDEC_STRIDE, pixu_buf[0][i0], 8, pixu_buf[1][i1], 8, i_weight );
                             h->mc.avg[chromapix]( pixv, FDEC_STRIDE, pixv_buf[0][i0], 8, pixv_buf[1][i1], 8, i_weight );
@@ -1179,7 +1179,7 @@
 
 void x264_me_refine_bidir_satd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight )
 {
-    x264_me_refine_bidir( h, m0, m1, i_weight, 0, 0, 0 );
+    me_refine_bidir( h, m0, m1, i_weight, 0, 0, 0 );
 }
 
 void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight, int i8, int i_lambda2 )
@@ -1187,7 +1187,7 @@
     /* Motion compensation is done as part of bidir_rd; don't repeat
      * it in encoding. */
     h->mb.b_skip_mc = 1;
-    x264_me_refine_bidir( h, m0, m1, i_weight, i8, i_lambda2, 1 );
+    me_refine_bidir( h, m0, m1, i_weight, i8, i_lambda2, 1 );
     h->mb.b_skip_mc = 0;
 }
 
@@ -1216,7 +1216,7 @@
             h->mc.mc_luma( pixu, FDEC_STRIDE, &m->p_fref[4], m->i_stride[1], mx, my, bw, bh, &m->weight[1] ); \
             h->mc.mc_luma( pixv, FDEC_STRIDE, &m->p_fref[8], m->i_stride[2], mx, my, bw, bh, &m->weight[2] ); \
         } \
-        else if( m->i_pixel <= PIXEL_8x8 ) \
+        else if( CHROMA_FORMAT && m->i_pixel <= PIXEL_8x8 ) \
         { \
             h->mc.mc_chroma( pixu, pixv, FDEC_STRIDE, m->p_fref[4], m->i_stride[1], \
                              mx, 2*(my+mvy_offset)>>chroma_v_shift, bw>>1, bh>>chroma_v_shift ); \
diff -Nru x264-0.152.2854+gite9a5903/encoder/me.h x264-0.158.2988+git-20191101.7817004/encoder/me.h
--- x264-0.152.2854+gite9a5903/encoder/me.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/encoder/me.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * me.h: motion estimation
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -24,8 +24,8 @@
  * For more information, contact us at licensing@x264.com.
  *****************************************************************************/
 
-#ifndef X264_ME_H
-#define X264_ME_H
+#ifndef X264_ENCODER_ME_H
+#define X264_ENCODER_ME_H
 
 #define COST_MAX (1<<28)
 #define COST_MAX64 (1ULL<<60)
@@ -55,15 +55,22 @@
     ALIGNED_4( int16_t mv[2] );
 } ALIGNED_64( x264_me_t );
 
+#define x264_me_search_ref x264_template(me_search_ref)
 void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc, int *p_fullpel_thresh );
 #define x264_me_search( h, m, mvc, i_mvc )\
     x264_me_search_ref( h, m, mvc, i_mvc, NULL )
 
+#define x264_me_refine_qpel x264_template(me_refine_qpel)
 void x264_me_refine_qpel( x264_t *h, x264_me_t *m );
+#define x264_me_refine_qpel_refdupe x264_template(me_refine_qpel_refdupe)
 void x264_me_refine_qpel_refdupe( x264_t *h, x264_me_t *m, int *p_halfpel_thresh );
+#define x264_me_refine_qpel_rd x264_template(me_refine_qpel_rd)
 void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int i_list );
+#define x264_me_refine_bidir_rd x264_template(me_refine_bidir_rd)
 void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight, int i8, int i_lambda2 );
+#define x264_me_refine_bidir_satd x264_template(me_refine_bidir_satd)
 void x264_me_refine_bidir_satd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight );
+#define x264_rd_cost_part x264_template(rd_cost_part)
 uint64_t x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel );
 
 #define COPY1_IF_LT(x,y)\
diff -Nru x264-0.152.2854+gite9a5903/encoder/ratecontrol.c x264-0.158.2988+git-20191101.7817004/encoder/ratecontrol.c
--- x264-0.152.2854+gite9a5903/encoder/ratecontrol.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/encoder/ratecontrol.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * ratecontrol.c: ratecontrol
  *****************************************************************************
- * Copyright (C) 2005-2017 x264 project
+ * Copyright (C) 2005-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Michael Niedermayer <michaelni@gmx.at>
@@ -154,8 +154,8 @@
     } mbtree;
 
     /* MBRC stuff */
-    float frame_size_estimated; /* Access to this variable must be atomic: double is
-                                 * not atomic on all arches we care about */
+    volatile float frame_size_estimated; /* Access to this variable must be atomic: double is
+                                          * not atomic on all arches we care about */
     double frame_size_maximum;  /* Maximum frame size due to MinCR */
     double frame_size_planned;
     double slice_size_planned;
@@ -243,7 +243,7 @@
     stride <<= b_field;
     if( b_chroma )
     {
-        ALIGNED_ARRAY_32( pixel, pix,[FENC_STRIDE*16] );
+        ALIGNED_ARRAY_64( pixel, pix,[FENC_STRIDE*16] );
         int chromapix = h->luma2chroma_pixel[PIXEL_16x16];
         int shift = 7 - CHROMA_V_SHIFT;
 
@@ -256,7 +256,7 @@
 }
 
 // Find the total AC energy of the block in all planes.
-static NOINLINE uint32_t x264_ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame )
+static NOINLINE uint32_t ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame )
 {
     /* This function contains annoying hacks because GCC has a habit of reordering emms
      * and putting it after floating point ops.  As a result, we put the emms at the end of the
@@ -278,7 +278,7 @@
             var_interlaced  += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, 1, 1 );
             var_progressive += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, 0, 0 );
         }
-        else
+        else if( CHROMA_FORMAT )
         {
             var_interlaced  += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, 1, 1 );
             var_progressive += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, 0, 0 );
@@ -293,7 +293,7 @@
             var += ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, PARAM_INTERLACED, 1 );
             var += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, PARAM_INTERLACED, 1 );
         }
-        else
+        else if( CHROMA_FORMAT )
             var += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, PARAM_INTERLACED, 1 );
     }
     x264_emms();
@@ -337,7 +337,7 @@
         {
             for( int mb_y = 0; mb_y < h->mb.i_mb_height; mb_y++ )
                 for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x++ )
-                    x264_ac_energy_mb( h, mb_x, mb_y, frame );
+                    ac_energy_mb( h, mb_x, mb_y, frame );
         }
         else
             return;
@@ -358,7 +358,7 @@
             for( int mb_y = 0; mb_y < h->mb.i_mb_height; mb_y++ )
                 for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x++ )
                 {
-                    uint32_t energy = x264_ac_energy_mb( h, mb_x, mb_y, frame );
+                    uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame );
                     float qp_adj = powf( energy * bit_depth_correction + 1, 0.125f );
                     frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
                     avg_adj += qp_adj;
@@ -390,7 +390,7 @@
                 }
                 else
                 {
-                    uint32_t energy = x264_ac_energy_mb( h, mb_x, mb_y, frame );
+                    uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame );
                     qp_adj = strength * (x264_log2( X264_MAX(energy, 1) ) - (14.427f + 2*(BIT_DEPTH-8)));
                 }
                 if( quant_offsets )
@@ -413,7 +413,7 @@
     }
 }
 
-static int x264_macroblock_tree_rescale_init( x264_t *h, x264_ratecontrol_t *rc )
+static int macroblock_tree_rescale_init( x264_t *h, x264_ratecontrol_t *rc )
 {
     /* Use fractional QP array dimensions to compensate for edge padding */
     float srcdim[2] = {rc->mbtree.srcdim[0] / 16.f, rc->mbtree.srcdim[1] / 16.f};
@@ -486,7 +486,7 @@
     return -1;
 }
 
-static void x264_macroblock_tree_rescale_destroy( x264_ratecontrol_t *rc )
+static void macroblock_tree_rescale_destroy( x264_ratecontrol_t *rc )
 {
     for( int i = 0; i < 2; i++ )
     {
@@ -505,7 +505,7 @@
     return sum;
 }
 
-static void x264_macroblock_tree_rescale( x264_t *h, x264_ratecontrol_t *rc, float *dst )
+static void macroblock_tree_rescale( x264_t *h, x264_ratecontrol_t *rc, float *dst )
 {
     float *input, *output;
     int filtersize, stride, height;
@@ -567,14 +567,14 @@
         float *dst = rc->mbtree.rescale_enabled ? rc->mbtree.scale_buffer[0] : frame->f_qp_offset;
         h->mc.mbtree_fix8_unpack( dst, rc->mbtree.qp_buffer[rc->mbtree.qpbuf_pos], rc->mbtree.src_mb_count );
         if( rc->mbtree.rescale_enabled )
-            x264_macroblock_tree_rescale( h, rc, frame->f_qp_offset );
+            macroblock_tree_rescale( h, rc, frame->f_qp_offset );
         if( h->frames.b_have_lowres )
             for( int i = 0; i < h->mb.i_mb_count; i++ )
                 frame->i_inv_qscale_factor[i] = x264_exp2fix8( frame->f_qp_offset[i] );
         rc->mbtree.qpbuf_pos--;
     }
     else
-        x264_stack_align( x264_adaptive_quant_frame, h, frame, quant_offsets );
+        x264_adaptive_quant_frame( h, frame, quant_offsets );
     return 0;
 fail:
     x264_log( h, X264_LOG_ERROR, "Incomplete MB-tree stats file.\n" );
@@ -618,7 +618,7 @@
     return 0;
 }
 
-static char *x264_strcat_filename( char *input, char *suffix )
+static char *strcat_filename( char *input, char *suffix )
 {
     char *output = x264_malloc( strlen( input ) + strlen( suffix ) + 1 );
     if( !output )
@@ -771,9 +771,9 @@
     rc->last_non_b_pict_type = -1;
     rc->cbr_decay = 1.0;
 
-    if( h->param.rc.i_rc_method == X264_RC_CRF && h->param.rc.b_stat_read )
+    if( h->param.rc.i_rc_method != X264_RC_ABR && h->param.rc.b_stat_read )
     {
-        x264_log( h, X264_LOG_ERROR, "constant rate-factor is incompatible with 2pass.\n" );
+        x264_log( h, X264_LOG_ERROR, "CRF/CQP is incompatible with 2pass.\n" );
         return -1;
     }
 
@@ -878,7 +878,7 @@
         }
         if( h->param.rc.b_mb_tree )
         {
-            char *mbtree_stats_in = x264_strcat_filename( h->param.rc.psz_stat_in, ".mbtree" );
+            char *mbtree_stats_in = strcat_filename( h->param.rc.psz_stat_in, ".mbtree" );
             if( !mbtree_stats_in )
                 return -1;
             rc->p_mbtree_stat_file_in = x264_fopen( mbtree_stats_in, "rb" );
@@ -941,6 +941,7 @@
             CMP_OPT_FIRST_PASS( "intra_refresh", h->param.b_intra_refresh );
             CMP_OPT_FIRST_PASS( "open_gop", h->param.b_open_gop );
             CMP_OPT_FIRST_PASS( "bluray_compat", h->param.b_bluray_compat );
+            CMP_OPT_FIRST_PASS( "mbtree", h->param.rc.b_mb_tree );
 
             if( (p = strstr( opts, "interlaced=" )) )
             {
@@ -1154,7 +1155,7 @@
     if( h->param.rc.b_stat_write )
     {
         char *p;
-        rc->psz_stat_file_tmpname = x264_strcat_filename( h->param.rc.psz_stat_out, ".temp" );
+        rc->psz_stat_file_tmpname = strcat_filename( h->param.rc.psz_stat_out, ".temp" );
         if( !rc->psz_stat_file_tmpname )
             return -1;
 
@@ -1171,8 +1172,8 @@
         x264_free( p );
         if( h->param.rc.b_mb_tree && !h->param.rc.b_stat_read )
         {
-            rc->psz_mbtree_stat_file_tmpname = x264_strcat_filename( h->param.rc.psz_stat_out, ".mbtree.temp" );
-            rc->psz_mbtree_stat_file_name = x264_strcat_filename( h->param.rc.psz_stat_out, ".mbtree" );
+            rc->psz_mbtree_stat_file_tmpname = strcat_filename( h->param.rc.psz_stat_out, ".mbtree.temp" );
+            rc->psz_mbtree_stat_file_name = strcat_filename( h->param.rc.psz_stat_out, ".mbtree" );
             if( !rc->psz_mbtree_stat_file_tmpname || !rc->psz_mbtree_stat_file_name )
                 return -1;
 
@@ -1192,7 +1193,7 @@
             rc->mbtree.srcdim[0] = h->param.i_width;
             rc->mbtree.srcdim[1] = h->param.i_height;
         }
-        if( x264_macroblock_tree_rescale_init( h, rc ) < 0 )
+        if( macroblock_tree_rescale_init( h, rc ) < 0 )
             return -1;
     }
 
@@ -1327,9 +1328,10 @@
 
 static x264_zone_t *get_zone( x264_t *h, int frame_num )
 {
-    for( int i = h->rc->i_zones - 1; i >= 0; i-- )
+    x264_ratecontrol_t *rc = h->rc;
+    for( int i = rc->i_zones - 1; i >= 0; i-- )
     {
-        x264_zone_t *z = &h->rc->zones[i];
+        x264_zone_t *z = &rc->zones[i];
         if( frame_num >= z->i_start && frame_num <= z->i_end )
             return z;
     }
@@ -1385,7 +1387,7 @@
     x264_free( rc->pred_b_from_p );
     x264_free( rc->entry );
     x264_free( rc->entry_out );
-    x264_macroblock_tree_rescale_destroy( rc );
+    macroblock_tree_rescale_destroy( rc );
     if( rc->zones )
     {
         x264_free( rc->zones[0].param );
@@ -1432,7 +1434,7 @@
     {
         int frame = h->fenc->i_frame;
         assert( frame >= 0 && frame < rc->num_entries );
-        rce = h->rc->rce = &h->rc->entry[frame];
+        rce = rc->rce = &rc->entry[frame];
 
         if( h->sh.i_type == SLICE_TYPE_B
             && h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_AUTO )
@@ -1693,7 +1695,7 @@
             b1 = bits_so_far + predict_row_size_to_end( h, y, rc->qpm ) + size_of_other_slices;
         }
 
-        h->rc->frame_size_estimated = b1 - size_of_other_slices;
+        rc->frame_size_estimated = b1 - size_of_other_slices;
 
         /* If the current row was large enough to cause a large QP jump, try re-encoding it. */
         if( rc->qpm > qp_max && prev_row_qp < qp_max && can_reencode_row )
@@ -1709,12 +1711,12 @@
     }
     else
     {
-        h->rc->frame_size_estimated = bits_so_far;
+        rc->frame_size_estimated = bits_so_far;
 
         /* Last-ditch attempt: if the last row of the frame underflowed the VBV,
          * try again. */
         if( rc->qpm < qp_max && can_reencode_row
-            && (h->rc->frame_size_estimated + size_of_other_slices > X264_MIN( rc->frame_size_maximum, rc->buffer_fill )) )
+            && (bits_so_far + size_of_other_slices > X264_MIN( rc->frame_size_maximum, rc->buffer_fill )) )
         {
             rc->qpm = qp_max;
             rc->qpa_rc = rc->qpa_rc_prev;
@@ -2211,7 +2213,7 @@
     rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final_min / h->sps->vui.i_time_scale;
     if( h->i_thread_frames > 1 )
     {
-        int j = h->rc - h->thread[0]->rc;
+        int j = rcc - h->thread[0]->rc;
         for( int i = 1; i < h->i_thread_frames; i++ )
         {
             x264_t *t = h->thread[ (j+i)%h->i_thread_frames ];
@@ -2445,7 +2447,7 @@
         /* Limit planned size by MinCR */
         if( rcc->b_vbv )
             rcc->frame_size_planned = X264_MIN( rcc->frame_size_planned, rcc->frame_size_maximum );
-        h->rc->frame_size_estimated = rcc->frame_size_planned;
+        rcc->frame_size_estimated = rcc->frame_size_planned;
 
         /* For row SATDs */
         if( rcc->b_vbv )
@@ -2458,7 +2460,7 @@
         double predicted_bits = total_bits;
         if( h->i_thread_frames > 1 )
         {
-            int j = h->rc - h->thread[0]->rc;
+            int j = rcc - h->thread[0]->rc;
             for( int i = 1; i < h->i_thread_frames; i++ )
             {
                 x264_t *t = h->thread[(j+i) % h->i_thread_frames];
@@ -2627,12 +2629,12 @@
         /* Limit planned size by MinCR */
         if( rcc->b_vbv )
             rcc->frame_size_planned = X264_MIN( rcc->frame_size_planned, rcc->frame_size_maximum );
-        h->rc->frame_size_estimated = rcc->frame_size_planned;
+        rcc->frame_size_estimated = rcc->frame_size_planned;
         return q;
     }
 }
 
-static void x264_threads_normalize_predictors( x264_t *h )
+static void threads_normalize_predictors( x264_t *h )
 {
     double totalsize = 0;
     for( int i = 0; i < h->param.i_threads; i++ )
@@ -2677,7 +2679,7 @@
     }
     if( rc->b_vbv && rc->frame_size_planned )
     {
-        x264_threads_normalize_predictors( h );
+        threads_normalize_predictors( h );
 
         if( rc->single_frame_vbv )
         {
@@ -2688,7 +2690,7 @@
                 float max_frame_error = x264_clip3f( 1.0 / (t->i_threadslice_end - t->i_threadslice_start), 0.05, 0.25 );
                 t->rc->slice_size_planned += 2 * max_frame_error * rc->frame_size_planned;
             }
-            x264_threads_normalize_predictors( h );
+            threads_normalize_predictors( h );
         }
 
         for( int i = 0; i < h->param.i_threads; i++ )
diff -Nru x264-0.152.2854+gite9a5903/encoder/ratecontrol.h x264-0.158.2988+git-20191101.7817004/encoder/ratecontrol.h
--- x264-0.152.2854+gite9a5903/encoder/ratecontrol.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/encoder/ratecontrol.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * ratecontrol.h: ratecontrol
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -24,8 +24,8 @@
  * For more information, contact us at licensing@x264.com.
  *****************************************************************************/
 
-#ifndef X264_RATECONTROL_H
-#define X264_RATECONTROL_H
+#ifndef X264_ENCODER_RATECONTROL_H
+#define X264_ENCODER_RATECONTROL_H
 
 /* Completely arbitrary.  Ratecontrol lowers relative quality at higher framerates
  * and the reverse at lower framerates; this serves as the center of the curve.
@@ -39,28 +39,49 @@
 
 #define CLIP_DURATION(f) x264_clip3f(f,MIN_FRAME_DURATION,MAX_FRAME_DURATION)
 
+#define x264_ratecontrol_new x264_template(ratecontrol_new)
 int  x264_ratecontrol_new   ( x264_t * );
+#define x264_ratecontrol_delete x264_template(ratecontrol_delete)
 void x264_ratecontrol_delete( x264_t * );
 
+#define x264_ratecontrol_init_reconfigurable x264_template(ratecontrol_init_reconfigurable)
 void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init );
+#define x264_encoder_reconfig_apply x264_template(encoder_reconfig_apply)
 int x264_encoder_reconfig_apply( x264_t *h, x264_param_t *param );
 
+#define x264_adaptive_quant_frame x264_template(adaptive_quant_frame)
 void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_offsets );
+#define x264_macroblock_tree_read x264_template(macroblock_tree_read)
 int  x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame, float *quant_offsets );
+#define x264_reference_build_list_optimal x264_template(reference_build_list_optimal)
 int  x264_reference_build_list_optimal( x264_t *h );
+#define x264_thread_sync_ratecontrol x264_template(thread_sync_ratecontrol)
 void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next );
+#define x264_ratecontrol_zone_init x264_template(ratecontrol_zone_init)
 void x264_ratecontrol_zone_init( x264_t * );
+#define x264_ratecontrol_start x264_template(ratecontrol_start)
 void x264_ratecontrol_start( x264_t *, int i_force_qp, int overhead );
+#define x264_ratecontrol_slice_type x264_template(ratecontrol_slice_type)
 int  x264_ratecontrol_slice_type( x264_t *, int i_frame );
+#define x264_ratecontrol_set_weights x264_template(ratecontrol_set_weights)
 void x264_ratecontrol_set_weights( x264_t *h, x264_frame_t *frm );
+#define x264_ratecontrol_mb x264_template(ratecontrol_mb)
 int  x264_ratecontrol_mb( x264_t *, int bits );
+#define x264_ratecontrol_qp x264_template(ratecontrol_qp)
 int  x264_ratecontrol_qp( x264_t * );
+#define x264_ratecontrol_mb_qp x264_template(ratecontrol_mb_qp)
 int  x264_ratecontrol_mb_qp( x264_t *h );
+#define x264_ratecontrol_end x264_template(ratecontrol_end)
 int  x264_ratecontrol_end( x264_t *, int bits, int *filler );
+#define x264_ratecontrol_summary x264_template(ratecontrol_summary)
 void x264_ratecontrol_summary( x264_t * );
+#define x264_rc_analyse_slice x264_template(rc_analyse_slice)
 int  x264_rc_analyse_slice( x264_t *h );
+#define x264_threads_distribute_ratecontrol x264_template(threads_distribute_ratecontrol)
 void x264_threads_distribute_ratecontrol( x264_t *h );
+#define x264_threads_merge_ratecontrol x264_template(threads_merge_ratecontrol)
 void x264_threads_merge_ratecontrol( x264_t *h );
+#define x264_hrd_fullness x264_template(hrd_fullness)
 void x264_hrd_fullness( x264_t *h );
-#endif
 
+#endif
diff -Nru x264-0.152.2854+gite9a5903/encoder/rdo.c x264-0.158.2988+git-20191101.7817004/encoder/rdo.c
--- x264-0.152.2854+gite9a5903/encoder/rdo.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/encoder/rdo.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * rdo.c: rate-distortion optimization
  *****************************************************************************
- * Copyright (C) 2005-2017 x264 project
+ * Copyright (C) 2005-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Fiona Glaser <fiona@x264.com>
@@ -32,7 +32,9 @@
 
 /* Transition and size tables for abs<9 MVD and residual coding */
 /* Consist of i_prefix-2 1s, one zero, and a bypass sign bit */
+#define x264_cabac_transition_unary x264_template(cabac_transition_unary)
 uint8_t x264_cabac_transition_unary[15][128];
+#define x264_cabac_size_unary x264_template(cabac_size_unary)
 uint16_t x264_cabac_size_unary[15][128];
 /* Transition and size tables for abs>9 MVD */
 /* Consist of 5 1s and a bypass sign bit */
@@ -46,7 +48,8 @@
 #define bs_write_ue(s,v)   ((s)->i_bits_encoded += bs_size_ue(v))
 #define bs_write_se(s,v)   ((s)->i_bits_encoded += bs_size_se(v))
 #define bs_write_te(s,v,l) ((s)->i_bits_encoded += bs_size_te(v,l))
-#define x264_macroblock_write_cavlc  static x264_macroblock_size_cavlc
+#undef  x264_macroblock_write_cavlc
+#define x264_macroblock_write_cavlc  static macroblock_size_cavlc
 #include "cavlc.c"
 
 /* CABAC: not exactly the same. x264_cabac_size_decision() keeps track of
@@ -55,12 +58,14 @@
 #undef  x264_cabac_encode_decision_noup
 #undef  x264_cabac_encode_bypass
 #undef  x264_cabac_encode_terminal
+#undef  x264_cabac_encode_ue_bypass
 #define x264_cabac_encode_decision(c,x,v) x264_cabac_size_decision(c,x,v)
 #define x264_cabac_encode_decision_noup(c,x,v) x264_cabac_size_decision_noup(c,x,v)
 #define x264_cabac_encode_terminal(c)     ((c)->f8_bits_encoded += 7)
 #define x264_cabac_encode_bypass(c,v)     ((c)->f8_bits_encoded += 256)
 #define x264_cabac_encode_ue_bypass(c,e,v) ((c)->f8_bits_encoded += (bs_size_ue_big(v+(1<<e)-1)-e)<<8)
-#define x264_macroblock_write_cabac  static x264_macroblock_size_cabac
+#undef  x264_macroblock_write_cabac
+#define x264_macroblock_write_cabac  static macroblock_size_cabac
 #include "cabac.c"
 
 #define COPY_CABAC h->mc.memcpy_aligned( &cabac_tmp.f8_bits_encoded, &h->cabac.f8_bits_encoded, \
@@ -91,7 +96,6 @@
     static const uint8_t satd_shift_x[3] = {3,   2,   2};
     static const uint8_t satd_shift_y[3] = {2-1, 3-2, 2-2};
     static const uint8_t  satd_offset[3] = {0,   8,   16};
-    ALIGNED_16( static pixel zero[16] ) = {0};
     int cache_index = (x >> satd_shift_x[size - PIXEL_8x4]) + (y >> satd_shift_y[size - PIXEL_8x4])
                     + satd_offset[size - PIXEL_8x4];
     int res = h->mb.pic.fenc_satd_cache[cache_index];
@@ -100,8 +104,8 @@
     else
     {
         pixel *fenc = h->mb.pic.p_fenc[0] + x + y*FENC_STRIDE;
-        int dc = h->pixf.sad[size]( fenc, FENC_STRIDE, zero, 0 ) >> 1;
-        res = h->pixf.satd[size]( fenc, FENC_STRIDE, zero, 0 ) - dc;
+        int dc = h->pixf.sad[size]( fenc, FENC_STRIDE, (pixel*)x264_zero, 0 ) >> 1;
+        res = h->pixf.satd[size]( fenc, FENC_STRIDE, (pixel*)x264_zero, 0 ) - dc;
         h->mb.pic.fenc_satd_cache[cache_index] = res + 1;
         return res;
     }
@@ -118,7 +122,6 @@
 
 static inline int ssd_plane( x264_t *h, int size, int p, int x, int y )
 {
-    ALIGNED_16( static pixel zero[16] ) = {0};
     int satd = 0;
     pixel *fdec = h->mb.pic.p_fdec[p] + x + y*FDEC_STRIDE;
     pixel *fenc = h->mb.pic.p_fenc[p] + x + y*FENC_STRIDE;
@@ -135,8 +138,8 @@
         }
         else
         {
-            int dc = h->pixf.sad[size]( fdec, FDEC_STRIDE, zero, 0 ) >> 1;
-            satd = abs(h->pixf.satd[size]( fdec, FDEC_STRIDE, zero, 0 ) - dc - cached_satd( h, size, x, y ));
+            int dc = h->pixf.sad[size]( fdec, FDEC_STRIDE, (pixel*)x264_zero, 0 ) >> 1;
+            satd = abs(h->pixf.satd[size]( fdec, FDEC_STRIDE, (pixel*)x264_zero, 0 ) - dc - cached_satd( h, size, x, y ));
         }
         satd = (satd * h->mb.i_psy_rd * h->mb.i_psy_rd_lambda + 128) >> 8;
     }
@@ -145,13 +148,17 @@
 
 static inline int ssd_mb( x264_t *h )
 {
-    int chroma_size = h->luma2chroma_pixel[PIXEL_16x16];
-    int chroma_ssd = ssd_plane(h, chroma_size, 1, 0, 0) + ssd_plane(h, chroma_size, 2, 0, 0);
-    chroma_ssd = ((uint64_t)chroma_ssd * h->mb.i_chroma_lambda2_offset + 128) >> 8;
-    return ssd_plane(h, PIXEL_16x16, 0, 0, 0) + chroma_ssd;
+    int i_ssd = ssd_plane( h, PIXEL_16x16, 0, 0, 0 );
+    if( CHROMA_FORMAT )
+    {
+        int chroma_size = h->luma2chroma_pixel[PIXEL_16x16];
+        int chroma_ssd = ssd_plane( h, chroma_size, 1, 0, 0 ) + ssd_plane( h, chroma_size, 2, 0, 0 );
+        i_ssd += ((uint64_t)chroma_ssd * h->mb.i_chroma_lambda2_offset + 128) >> 8;
+    }
+    return i_ssd;
 }
 
-static int x264_rd_cost_mb( x264_t *h, int i_lambda2 )
+static int rd_cost_mb( x264_t *h, int i_lambda2 )
 {
     int b_transform_bak = h->mb.b_transform_8x8;
     int i_ssd;
@@ -173,12 +180,12 @@
     {
         x264_cabac_t cabac_tmp;
         COPY_CABAC;
-        x264_macroblock_size_cabac( h, &cabac_tmp );
+        macroblock_size_cabac( h, &cabac_tmp );
         i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 32768 ) >> 16;
     }
     else
     {
-        x264_macroblock_size_cavlc( h );
+        macroblock_size_cavlc( h );
         i_bits = ( (uint64_t)h->out.bs.i_bits_encoded * i_lambda2 + 128 ) >> 8;
     }
 
@@ -190,7 +197,7 @@
 
 /* partition RD functions use 8 bits more precision to avoid large rounding errors at low QPs */
 
-static uint64_t x264_rd_cost_subpart( x264_t *h, int i_lambda2, int i4, int i_pixel )
+static uint64_t rd_cost_subpart( x264_t *h, int i_lambda2, int i4, int i_pixel )
 {
     uint64_t i_ssd, i_bits;
 
@@ -213,11 +220,11 @@
     {
         x264_cabac_t cabac_tmp;
         COPY_CABAC;
-        x264_subpartition_size_cabac( h, &cabac_tmp, i4, i_pixel );
+        subpartition_size_cabac( h, &cabac_tmp, i4, i_pixel );
         i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8;
     }
     else
-        i_bits = x264_subpartition_size_cavlc( h, i4, i_pixel );
+        i_bits = subpartition_size_cavlc( h, i4, i_pixel );
 
     return (i_ssd<<8) + i_bits;
 }
@@ -229,12 +236,12 @@
 
     if( i_pixel == PIXEL_16x16 )
     {
-        int i_cost = x264_rd_cost_mb( h, i_lambda2 );
+        int i_cost = rd_cost_mb( h, i_lambda2 );
         return i_cost;
     }
 
     if( i_pixel > PIXEL_8x8 )
-        return x264_rd_cost_subpart( h, i_lambda2, i4, i_pixel );
+        return rd_cost_subpart( h, i_lambda2, i4, i_pixel );
 
     h->mb.i_cbp_luma = 0;
 
@@ -247,25 +254,28 @@
     int ssd_x = 8*(i8&1);
     int ssd_y = 8*(i8>>1);
     i_ssd = ssd_plane( h, i_pixel, 0, ssd_x, ssd_y );
-    int chromapix = h->luma2chroma_pixel[i_pixel];
-    int chromassd = ssd_plane( h, chromapix, 1, ssd_x>>CHROMA_H_SHIFT, ssd_y>>CHROMA_V_SHIFT )
-                  + ssd_plane( h, chromapix, 2, ssd_x>>CHROMA_H_SHIFT, ssd_y>>CHROMA_V_SHIFT );
-    i_ssd += ((uint64_t)chromassd * h->mb.i_chroma_lambda2_offset + 128) >> 8;
+    if( CHROMA_FORMAT )
+    {
+        int chroma_size = h->luma2chroma_pixel[i_pixel];
+        int chroma_ssd = ssd_plane( h, chroma_size, 1, ssd_x>>CHROMA_H_SHIFT, ssd_y>>CHROMA_V_SHIFT )
+                       + ssd_plane( h, chroma_size, 2, ssd_x>>CHROMA_H_SHIFT, ssd_y>>CHROMA_V_SHIFT );
+        i_ssd += ((uint64_t)chroma_ssd * h->mb.i_chroma_lambda2_offset + 128) >> 8;
+    }
 
     if( h->param.b_cabac )
     {
         x264_cabac_t cabac_tmp;
         COPY_CABAC;
-        x264_partition_size_cabac( h, &cabac_tmp, i8, i_pixel );
+        partition_size_cabac( h, &cabac_tmp, i8, i_pixel );
         i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8;
     }
     else
-        i_bits = (uint64_t)x264_partition_size_cavlc( h, i8, i_pixel ) * i_lambda2;
+        i_bits = (uint64_t)partition_size_cavlc( h, i8, i_pixel ) * i_lambda2;
 
     return (i_ssd<<8) + i_bits;
 }
 
-static uint64_t x264_rd_cost_i8x8( x264_t *h, int i_lambda2, int i8, int i_mode, pixel edge[4][32] )
+static uint64_t rd_cost_i8x8( x264_t *h, int i_lambda2, int i8, int i_mode, pixel edge[4][32] )
 {
     uint64_t i_ssd, i_bits;
     int plane_count = CHROMA444 ? 3 : 1;
@@ -292,16 +302,16 @@
     {
         x264_cabac_t cabac_tmp;
         COPY_CABAC;
-        x264_partition_i8x8_size_cabac( h, &cabac_tmp, i8, i_mode );
+        partition_i8x8_size_cabac( h, &cabac_tmp, i8, i_mode );
         i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8;
     }
     else
-        i_bits = (uint64_t)x264_partition_i8x8_size_cavlc( h, i8, i_mode ) * i_lambda2;
+        i_bits = (uint64_t)partition_i8x8_size_cavlc( h, i8, i_mode ) * i_lambda2;
 
     return (i_ssd<<8) + i_bits;
 }
 
-static uint64_t x264_rd_cost_i4x4( x264_t *h, int i_lambda2, int i4, int i_mode )
+static uint64_t rd_cost_i4x4( x264_t *h, int i_lambda2, int i4, int i_mode )
 {
     uint64_t i_ssd, i_bits;
     int plane_count = CHROMA444 ? 3 : 1;
@@ -326,16 +336,16 @@
     {
         x264_cabac_t cabac_tmp;
         COPY_CABAC;
-        x264_partition_i4x4_size_cabac( h, &cabac_tmp, i4, i_mode );
+        partition_i4x4_size_cabac( h, &cabac_tmp, i4, i_mode );
         i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8;
     }
     else
-        i_bits = (uint64_t)x264_partition_i4x4_size_cavlc( h, i4, i_mode ) * i_lambda2;
+        i_bits = (uint64_t)partition_i4x4_size_cavlc( h, i4, i_mode ) * i_lambda2;
 
     return (i_ssd<<8) + i_bits;
 }
 
-static uint64_t x264_rd_cost_chroma( x264_t *h, int i_lambda2, int i_mode, int b_dct )
+static uint64_t rd_cost_chroma( x264_t *h, int i_lambda2, int i_mode, int b_dct )
 {
     uint64_t i_ssd, i_bits;
 
@@ -352,11 +362,11 @@
     {
         x264_cabac_t cabac_tmp;
         COPY_CABAC;
-        x264_chroma_size_cabac( h, &cabac_tmp );
+        chroma_size_cabac( h, &cabac_tmp );
         i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8;
     }
     else
-        i_bits = (uint64_t)x264_chroma_size_cavlc( h ) * i_lambda2;
+        i_bits = (uint64_t)chroma_size_cavlc( h ) * i_lambda2;
 
     return (i_ssd<<8) + i_bits;
 }
@@ -907,8 +917,8 @@
                          const uint8_t *zigzag, int ctx_block_cat, int lambda2, int b_ac,
                          int b_chroma, int dc, int num_coefs, int idx, int b_8x8 )
 {
-    ALIGNED_16( dctcoef quant_coefs[2][16] );
-    ALIGNED_16( dctcoef coefs[16] ) = {0};
+    ALIGNED_ARRAY_16( dctcoef, quant_coefs,[2],[16] );
+    ALIGNED_ARRAY_16( dctcoef, coefs,[16] );
     const uint32_t *coef_weight1 = b_8x8 ? x264_dct8_weight_tab : x264_dct4_weight_tab;
     const uint32_t *coef_weight2 = b_8x8 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
     int delta_distortion[16];
@@ -918,6 +928,9 @@
     int nC = b_chroma && dc ? 3 + (num_coefs>>2)
                             : ct_index[x264_mb_predict_non_zero_code( h, !b_chroma && dc ? (idx - LUMA_DC)*16 : idx )];
 
+    for( i = 0; i < 16; i += 16/sizeof(*coefs) )
+        M128( &coefs[i] ) = M128_ZERO;
+
     /* Code for handling 8x8dct -> 4x4dct CAVLC munging.  Input/output use a different
      * step/start/end than internal processing. */
     int step = 1;
@@ -997,7 +1010,7 @@
     if( !coef_mask )
         bs_write_vlc( &h->out.bs, x264_coeff0_token[nC] );
     else
-        x264_cavlc_block_residual_internal( h, ctx_block_cat, coefs + b_ac, nC );
+        cavlc_block_residual_internal( h, ctx_block_cat, coefs + b_ac, nC );
     score = (int64_t)h->out.bs.i_bits_encoded * lambda2;
 
     /* QNS loop: pick the change that improves RD the most, apply it, repeat.
@@ -1030,7 +1043,7 @@
             if( !cur_mask )
                 bs_write_vlc( &h->out.bs, x264_coeff0_token[nC] );
             else
-                x264_cavlc_block_residual_internal( h, ctx_block_cat, coefs + b_ac, nC );
+                cavlc_block_residual_internal( h, ctx_block_cat, coefs + b_ac, nC );
             cur_score += (int64_t)h->out.bs.i_bits_encoded * lambda2;
 
             coefs[i] = old_coef;
@@ -1089,8 +1102,8 @@
         DCT_LUMA_DC, h->mb.i_trellis_lambda2[0][b_intra], 0, 0, 1, 16, idx, 0 );
 }
 
-static const uint8_t x264_zigzag_scan2x2[4] = { 0, 1, 2, 3 };
-static const uint8_t x264_zigzag_scan2x4[8] = { 0, 2, 1, 4, 6, 3, 5, 7 };
+static const uint8_t zigzag_scan2x2[4] = { 0, 1, 2, 3 };
+static const uint8_t zigzag_scan2x4[8] = { 0, 2, 1, 4, 6, 3, 5, 7 };
 
 int x264_quant_chroma_dc_trellis( x264_t *h, dctcoef *dct, int i_qp, int b_intra, int idx )
 {
@@ -1100,12 +1113,12 @@
 
     if( CHROMA_FORMAT == CHROMA_422 )
     {
-        zigzag = x264_zigzag_scan2x4;
+        zigzag = zigzag_scan2x4;
         num_coefs = 8;
     }
     else
     {
-        zigzag = x264_zigzag_scan2x2;
+        zigzag = zigzag_scan2x2;
         num_coefs = 4;
     }
 
diff -Nru x264-0.152.2854+gite9a5903/encoder/set.c x264-0.158.2988+git-20191101.7817004/encoder/set.c
--- x264-0.152.2854+gite9a5903/encoder/set.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/encoder/set.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * set: header writing
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -31,7 +31,7 @@
 
 // Indexed by pic_struct values
 static const uint8_t num_clock_ts[10] = { 0, 1, 1, 1, 2, 2, 3, 3, 2, 3 };
-const static uint8_t avcintra_uuid[] = {0xF7, 0x49, 0x3E, 0xB3, 0xD4, 0x00, 0x47, 0x96, 0x86, 0x86, 0xC9, 0x70, 0x7B, 0x64, 0x37, 0x2A};
+static const uint8_t avcintra_uuid[] = {0xF7, 0x49, 0x3E, 0xB3, 0xD4, 0x00, 0x47, 0x96, 0x86, 0x86, 0xC9, 0x70, 0x7B, 0x64, 0x37, 0x2A};
 
 static void transpose( uint8_t *buf, int w )
 {
@@ -40,15 +40,15 @@
             XCHG( uint8_t, buf[w*i+j], buf[w*j+i] );
 }
 
-static void scaling_list_write( bs_t *s, x264_pps_t *pps, int idx )
+static void scaling_list_write( bs_t *s, x264_sps_t *sps, int idx )
 {
     const int len = idx<4 ? 16 : 64;
     const uint8_t *zigzag = idx<4 ? x264_zigzag_scan4[0] : x264_zigzag_scan8[0];
-    const uint8_t *list = pps->scaling_list[idx];
-    const uint8_t *def_list = (idx==CQM_4IC) ? pps->scaling_list[CQM_4IY]
-                            : (idx==CQM_4PC) ? pps->scaling_list[CQM_4PY]
-                            : (idx==CQM_8IC+4) ? pps->scaling_list[CQM_8IY+4]
-                            : (idx==CQM_8PC+4) ? pps->scaling_list[CQM_8PY+4]
+    const uint8_t *list = sps->scaling_list[idx];
+    const uint8_t *def_list = (idx==CQM_4IC) ? sps->scaling_list[CQM_4IY]
+                            : (idx==CQM_4PC) ? sps->scaling_list[CQM_4PY]
+                            : (idx==CQM_8IC+4) ? sps->scaling_list[CQM_8IY+4]
+                            : (idx==CQM_8PC+4) ? sps->scaling_list[CQM_8PY+4]
                             : x264_cqm_jvt[idx];
     if( !memcmp( list, def_list, len ) )
         bs_write1( s, 0 );   // scaling_list_present_flag
@@ -105,8 +105,12 @@
     sps->i_id = i_id;
     sps->i_mb_width = ( param->i_width + 15 ) / 16;
     sps->i_mb_height= ( param->i_height + 15 ) / 16;
+    sps->b_frame_mbs_only = !(param->b_interlaced || param->b_fake_interlaced);
+    if( !sps->b_frame_mbs_only )
+        sps->i_mb_height = ( sps->i_mb_height + 1 ) & ~1;
     sps->i_chroma_format_idc = csp >= X264_CSP_I444 ? CHROMA_444 :
-                               csp >= X264_CSP_I422 ? CHROMA_422 : CHROMA_420;
+                               csp >= X264_CSP_I422 ? CHROMA_422 :
+                               csp >= X264_CSP_I420 ? CHROMA_420 : CHROMA_400;
 
     sps->b_qpprime_y_zero_transform_bypass = param->rc.i_rc_method == X264_RC_CQP && param->rc.i_qp_constant == 0;
     if( sps->b_qpprime_y_zero_transform_bypass || sps->i_chroma_format_idc == CHROMA_444 )
@@ -115,7 +119,7 @@
         sps->i_profile_idc  = PROFILE_HIGH422;
     else if( BIT_DEPTH > 8 )
         sps->i_profile_idc  = PROFILE_HIGH10;
-    else if( param->analyse.b_transform_8x8 || param->i_cqm_preset != X264_CQM_FLAT )
+    else if( param->analyse.b_transform_8x8 || param->i_cqm_preset != X264_CQM_FLAT || sps->i_chroma_format_idc == CHROMA_400 )
         sps->i_profile_idc  = PROFILE_HIGH;
     else if( param->b_cabac || param->i_bframe > 0 || param->b_interlaced || param->b_fake_interlaced || param->analyse.i_weighted_pred > 0 )
         sps->i_profile_idc  = PROFILE_MAIN;
@@ -178,9 +182,6 @@
     sps->b_vui = 1;
 
     sps->b_gaps_in_frame_num_value_allowed = 0;
-    sps->b_frame_mbs_only = !(param->b_interlaced || param->b_fake_interlaced);
-    if( !sps->b_frame_mbs_only )
-        sps->i_mb_height = ( sps->i_mb_height + 1 ) & ~1;
     sps->b_mb_adaptive_frame_field = param->b_interlaced;
     sps->b_direct8x8_inference = 1;
 
@@ -197,22 +198,14 @@
     sps->vui.b_color_description_present = 0;
 
     sps->vui.i_colorprim = ( param->vui.i_colorprim >= 0 && param->vui.i_colorprim <= 12 ? param->vui.i_colorprim : 2 );
-    sps->vui.i_transfer  = ( param->vui.i_transfer  >= 0 && param->vui.i_transfer  <= 17 ? param->vui.i_transfer  : 2 );
-    sps->vui.i_colmatrix = ( param->vui.i_colmatrix >= 0 && param->vui.i_colmatrix <= 11 ? param->vui.i_colmatrix :
+    sps->vui.i_transfer  = ( param->vui.i_transfer  >= 0 && param->vui.i_transfer  <= 18 ? param->vui.i_transfer  : 2 );
+    sps->vui.i_colmatrix = ( param->vui.i_colmatrix >= 0 && param->vui.i_colmatrix <= 14 ? param->vui.i_colmatrix :
                            ( csp >= X264_CSP_BGR ? 0 : 2 ) );
-    if( sps->vui.i_colorprim != 2 ||
-        sps->vui.i_transfer  != 2 ||
-        sps->vui.i_colmatrix != 2 )
-    {
+    if( sps->vui.i_colorprim != 2 || sps->vui.i_transfer != 2 || sps->vui.i_colmatrix != 2 )
         sps->vui.b_color_description_present = 1;
-    }
 
-    if( sps->vui.i_vidformat != 5 ||
-        sps->vui.b_fullrange ||
-        sps->vui.b_color_description_present )
-    {
+    if( sps->vui.i_vidformat != 5 || sps->vui.b_fullrange || sps->vui.b_color_description_present )
         sps->vui.b_signal_type_present = 1;
-    }
 
     /* FIXME: not sufficient for interlaced video */
     sps->vui.b_chroma_loc_info_present = param->vui.i_chroma_loc > 0 && param->vui.i_chroma_loc <= 5 &&
@@ -247,6 +240,9 @@
         sps->vui.i_log2_max_mv_length_horizontal =
         sps->vui.i_log2_max_mv_length_vertical = (int)log2f( X264_MAX( 1, param->analyse.i_mv_range*4-1 ) ) + 1;
     }
+
+    sps->b_avcintra = !!param->i_avcintra_class;
+    sps->i_cqm_preset = param->i_cqm_preset;
 }
 
 void x264_sps_init_reconfigurable( x264_sps_t *sps, x264_param_t *param )
@@ -254,7 +250,7 @@
     sps->crop.i_left   = param->crop_rect.i_left;
     sps->crop.i_top    = param->crop_rect.i_top;
     sps->crop.i_right  = param->crop_rect.i_right + sps->i_mb_width*16 - param->i_width;
-    sps->crop.i_bottom = (param->crop_rect.i_bottom + sps->i_mb_height*16 - param->i_height) >> !sps->b_frame_mbs_only;
+    sps->crop.i_bottom = param->crop_rect.i_bottom + sps->i_mb_height*16 - param->i_height;
     sps->b_crop = sps->crop.i_left  || sps->crop.i_top ||
                   sps->crop.i_right || sps->crop.i_bottom;
 
@@ -267,6 +263,44 @@
     }
 }
 
+void x264_sps_init_scaling_list( x264_sps_t *sps, x264_param_t *param )
+{
+    switch( sps->i_cqm_preset )
+    {
+    case X264_CQM_FLAT:
+        for( int i = 0; i < 8; i++ )
+            sps->scaling_list[i] = x264_cqm_flat16;
+        break;
+    case X264_CQM_JVT:
+        for( int i = 0; i < 8; i++ )
+            sps->scaling_list[i] = x264_cqm_jvt[i];
+        break;
+    case X264_CQM_CUSTOM:
+        /* match the transposed DCT & zigzag */
+        transpose( param->cqm_4iy, 4 );
+        transpose( param->cqm_4py, 4 );
+        transpose( param->cqm_4ic, 4 );
+        transpose( param->cqm_4pc, 4 );
+        transpose( param->cqm_8iy, 8 );
+        transpose( param->cqm_8py, 8 );
+        transpose( param->cqm_8ic, 8 );
+        transpose( param->cqm_8pc, 8 );
+        sps->scaling_list[CQM_4IY] = param->cqm_4iy;
+        sps->scaling_list[CQM_4PY] = param->cqm_4py;
+        sps->scaling_list[CQM_4IC] = param->cqm_4ic;
+        sps->scaling_list[CQM_4PC] = param->cqm_4pc;
+        sps->scaling_list[CQM_8IY+4] = param->cqm_8iy;
+        sps->scaling_list[CQM_8PY+4] = param->cqm_8py;
+        sps->scaling_list[CQM_8IC+4] = param->cqm_8ic;
+        sps->scaling_list[CQM_8PC+4] = param->cqm_8pc;
+        for( int i = 0; i < 8; i++ )
+            for( int j = 0; j < (i < 4 ? 16 : 64); j++ )
+                if( sps->scaling_list[i][j] == 0 )
+                    sps->scaling_list[i] = x264_cqm_jvt[i];
+        break;
+    }
+}
+
 void x264_sps_write( bs_t *s, x264_sps_t *sps )
 {
     bs_realign( s );
@@ -290,7 +324,26 @@
         bs_write_ue( s, BIT_DEPTH-8 ); // bit_depth_luma_minus8
         bs_write_ue( s, BIT_DEPTH-8 ); // bit_depth_chroma_minus8
         bs_write1( s, sps->b_qpprime_y_zero_transform_bypass );
-        bs_write1( s, 0 ); // seq_scaling_matrix_present_flag
+        /* Exactly match the AVC-Intra bitstream */
+        bs_write1( s, sps->b_avcintra ); // seq_scaling_matrix_present_flag
+        if( sps->b_avcintra )
+        {
+            scaling_list_write( s, sps, CQM_4IY );
+            scaling_list_write( s, sps, CQM_4IC );
+            scaling_list_write( s, sps, CQM_4IC );
+            bs_write1( s, 0 ); // no inter
+            bs_write1( s, 0 ); // no inter
+            bs_write1( s, 0 ); // no inter
+            scaling_list_write( s, sps, CQM_8IY+4 );
+            bs_write1( s, 0 ); // no inter
+            if( sps->i_chroma_format_idc == CHROMA_444 )
+            {
+                scaling_list_write( s, sps, CQM_8IC+4 );
+                bs_write1( s, 0 ); // no inter
+                scaling_list_write( s, sps, CQM_8IC+4 );
+                bs_write1( s, 0 ); // no inter
+            }
+        }
     }
 
     bs_write_ue( s, sps->i_log2_max_frame_num - 4 );
@@ -310,7 +363,7 @@
     if( sps->b_crop )
     {
         int h_shift = sps->i_chroma_format_idc == CHROMA_420 || sps->i_chroma_format_idc == CHROMA_422;
-        int v_shift = sps->i_chroma_format_idc == CHROMA_420;
+        int v_shift = (sps->i_chroma_format_idc == CHROMA_420) + !sps->b_frame_mbs_only;
         bs_write_ue( s, sps->crop.i_left   >> h_shift );
         bs_write_ue( s, sps->crop.i_right  >> h_shift );
         bs_write_ue( s, sps->crop.i_top    >> v_shift );
@@ -446,43 +499,6 @@
     pps->b_redundant_pic_cnt = 0;
 
     pps->b_transform_8x8_mode = param->analyse.b_transform_8x8 ? 1 : 0;
-
-    pps->i_cqm_preset = param->i_cqm_preset;
-
-    switch( pps->i_cqm_preset )
-    {
-    case X264_CQM_FLAT:
-        for( int i = 0; i < 8; i++ )
-            pps->scaling_list[i] = x264_cqm_flat16;
-        break;
-    case X264_CQM_JVT:
-        for( int i = 0; i < 8; i++ )
-            pps->scaling_list[i] = x264_cqm_jvt[i];
-        break;
-    case X264_CQM_CUSTOM:
-        /* match the transposed DCT & zigzag */
-        transpose( param->cqm_4iy, 4 );
-        transpose( param->cqm_4py, 4 );
-        transpose( param->cqm_4ic, 4 );
-        transpose( param->cqm_4pc, 4 );
-        transpose( param->cqm_8iy, 8 );
-        transpose( param->cqm_8py, 8 );
-        transpose( param->cqm_8ic, 8 );
-        transpose( param->cqm_8pc, 8 );
-        pps->scaling_list[CQM_4IY] = param->cqm_4iy;
-        pps->scaling_list[CQM_4PY] = param->cqm_4py;
-        pps->scaling_list[CQM_4IC] = param->cqm_4ic;
-        pps->scaling_list[CQM_4PC] = param->cqm_4pc;
-        pps->scaling_list[CQM_8IY+4] = param->cqm_8iy;
-        pps->scaling_list[CQM_8PY+4] = param->cqm_8py;
-        pps->scaling_list[CQM_8IC+4] = param->cqm_8ic;
-        pps->scaling_list[CQM_8PC+4] = param->cqm_8pc;
-        for( int i = 0; i < 8; i++ )
-            for( int j = 0; j < (i < 4 ? 16 : 64); j++ )
-                if( pps->scaling_list[i][j] == 0 )
-                    pps->scaling_list[i] = x264_cqm_jvt[i];
-        break;
-    }
 }
 
 void x264_pps_write( bs_t *s, x264_sps_t *sps, x264_pps_t *pps )
@@ -508,26 +524,27 @@
     bs_write1( s, pps->b_constrained_intra_pred );
     bs_write1( s, pps->b_redundant_pic_cnt );
 
-    if( pps->b_transform_8x8_mode || pps->i_cqm_preset != X264_CQM_FLAT )
+    int b_scaling_list = !sps->b_avcintra && sps->i_cqm_preset != X264_CQM_FLAT;
+    if( pps->b_transform_8x8_mode || b_scaling_list )
     {
         bs_write1( s, pps->b_transform_8x8_mode );
-        bs_write1( s, (pps->i_cqm_preset != X264_CQM_FLAT) );
-        if( pps->i_cqm_preset != X264_CQM_FLAT )
+        bs_write1( s, b_scaling_list );
+        if( b_scaling_list )
         {
-            scaling_list_write( s, pps, CQM_4IY );
-            scaling_list_write( s, pps, CQM_4IC );
+            scaling_list_write( s, sps, CQM_4IY );
+            scaling_list_write( s, sps, CQM_4IC );
             bs_write1( s, 0 ); // Cr = Cb
-            scaling_list_write( s, pps, CQM_4PY );
-            scaling_list_write( s, pps, CQM_4PC );
+            scaling_list_write( s, sps, CQM_4PY );
+            scaling_list_write( s, sps, CQM_4PC );
             bs_write1( s, 0 ); // Cr = Cb
             if( pps->b_transform_8x8_mode )
             {
-                scaling_list_write( s, pps, CQM_8IY+4 );
-                scaling_list_write( s, pps, CQM_8PY+4 );
+                scaling_list_write( s, sps, CQM_8IY+4 );
+                scaling_list_write( s, sps, CQM_8PY+4 );
                 if( sps->i_chroma_format_idc == CHROMA_444 )
                 {
-                    scaling_list_write( s, pps, CQM_8IC+4 );
-                    scaling_list_write( s, pps, CQM_8PC+4 );
+                    scaling_list_write( s, sps, CQM_8IC+4 );
+                    scaling_list_write( s, sps, CQM_8PC+4 );
                     bs_write1( s, 0 ); // Cr = Cb
                     bs_write1( s, 0 ); // Cr = Cb
                 }
@@ -555,7 +572,6 @@
     bs_write( &q, 2, 0 ); //changing_slice_group 0
 
     bs_align_10( &q );
-    bs_flush( &q );
 
     x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_RECOVERY_POINT );
 }
@@ -578,7 +594,7 @@
 
     memcpy( payload, uuid, 16 );
     sprintf( payload+16, "x264 - core %d%s - H.264/MPEG-4 AVC codec - "
-             "Copy%s 2003-2017 - http://www.videolan.org/x264.html - options: %s",
+             "Copy%s 2003-2019 - http://www.videolan.org/x264.html - options: %s",
              X264_BUILD, X264_VERSION, HAVE_GPL?"left":"right", opts );
     length = strlen(payload)+1;
 
@@ -610,7 +626,6 @@
     }
 
     bs_align_10( &q );
-    bs_flush( &q );
 
     x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_BUFFERING_PERIOD );
 }
@@ -642,7 +657,6 @@
     }
 
     bs_align_10( &q );
-    bs_flush( &q );
 
     x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_PIC_TIMING );
 }
@@ -685,11 +699,26 @@
     bs_write1( &q, 0 );                           // frame_packing_arrangement_extension_flag
 
     bs_align_10( &q );
-    bs_flush( &q );
 
     x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_FRAME_PACKING );
 }
 
+void x264_sei_alternative_transfer_write( x264_t *h, bs_t *s )
+{
+    bs_t q;
+    ALIGNED_4( uint8_t tmp_buf[100] );
+    M32( tmp_buf ) = 0; // shut up gcc
+    bs_init( &q, tmp_buf, 100 );
+
+    bs_realign( &q );
+
+    bs_write ( &q, 8, h->param.i_alternative_transfer ); // preferred_transfer_characteristics
+
+    bs_align_10( &q );
+
+    x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_ALTERNATIVE_TRANSFER );
+}
+
 void x264_filler_write( x264_t *h, bs_t *s, int filler )
 {
     bs_realign( s );
@@ -729,7 +758,6 @@
     }
 
     bs_align_10( &q );
-    bs_flush( &q );
 
     x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_DEC_REF_PIC_MARKING );
 }
@@ -781,31 +809,6 @@
     return 0;
 }
 
-const x264_level_t x264_levels[] =
-{
-    { 10,     1485,     99,    396,     64,    175,   64, 64,  0, 2, 0, 0, 1 },
-    {  9,     1485,     99,    396,    128,    350,   64, 64,  0, 2, 0, 0, 1 }, /* "1b" */
-    { 11,     3000,    396,    900,    192,    500,  128, 64,  0, 2, 0, 0, 1 },
-    { 12,     6000,    396,   2376,    384,   1000,  128, 64,  0, 2, 0, 0, 1 },
-    { 13,    11880,    396,   2376,    768,   2000,  128, 64,  0, 2, 0, 0, 1 },
-    { 20,    11880,    396,   2376,   2000,   2000,  128, 64,  0, 2, 0, 0, 1 },
-    { 21,    19800,    792,   4752,   4000,   4000,  256, 64,  0, 2, 0, 0, 0 },
-    { 22,    20250,   1620,   8100,   4000,   4000,  256, 64,  0, 2, 0, 0, 0 },
-    { 30,    40500,   1620,   8100,  10000,  10000,  256, 32, 22, 2, 0, 1, 0 },
-    { 31,   108000,   3600,  18000,  14000,  14000,  512, 16, 60, 4, 1, 1, 0 },
-    { 32,   216000,   5120,  20480,  20000,  20000,  512, 16, 60, 4, 1, 1, 0 },
-    { 40,   245760,   8192,  32768,  20000,  25000,  512, 16, 60, 4, 1, 1, 0 },
-    { 41,   245760,   8192,  32768,  50000,  62500,  512, 16, 24, 2, 1, 1, 0 },
-    { 42,   522240,   8704,  34816,  50000,  62500,  512, 16, 24, 2, 1, 1, 1 },
-    { 50,   589824,  22080, 110400, 135000, 135000,  512, 16, 24, 2, 1, 1, 1 },
-    { 51,   983040,  36864, 184320, 240000, 240000,  512, 16, 24, 2, 1, 1, 1 },
-    { 52,  2073600,  36864, 184320, 240000, 240000,  512, 16, 24, 2, 1, 1, 1 },
-    { 60,  4177920, 139264, 696320, 240000, 240000, 8192, 16, 24, 2, 1, 1, 1 },
-    { 61,  8355840, 139264, 696320, 480000, 480000, 8192, 16, 24, 2, 1, 1, 1 },
-    { 62, 16711680, 139264, 696320, 800000, 800000, 8192, 16, 24, 2, 1, 1, 1 },
-    { 0 }
-};
-
 #define ERROR(...)\
 {\
     if( verbose )\
diff -Nru x264-0.152.2854+gite9a5903/encoder/set.h x264-0.158.2988+git-20191101.7817004/encoder/set.h
--- x264-0.152.2854+gite9a5903/encoder/set.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/encoder/set.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * set.h: header writing
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -27,21 +27,41 @@
 #ifndef X264_ENCODER_SET_H
 #define X264_ENCODER_SET_H
 
+#define x264_sps_init x264_template(sps_init)
 void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param );
+#define x264_sps_init_reconfigurable x264_template(sps_init_reconfigurable)
 void x264_sps_init_reconfigurable( x264_sps_t *sps, x264_param_t *param );
+#define x264_sps_init_scaling_list x264_template(sps_init_scaling_list)
+void x264_sps_init_scaling_list( x264_sps_t *sps, x264_param_t *param );
+#define x264_sps_write x264_template(sps_write)
 void x264_sps_write( bs_t *s, x264_sps_t *sps );
+#define x264_pps_init x264_template(pps_init)
 void x264_pps_init( x264_pps_t *pps, int i_id, x264_param_t *param, x264_sps_t *sps );
+#define x264_pps_write x264_template(pps_write)
 void x264_pps_write( bs_t *s, x264_sps_t *sps, x264_pps_t *pps );
+#define x264_sei_recovery_point_write x264_template(sei_recovery_point_write)
 void x264_sei_recovery_point_write( x264_t *h, bs_t *s, int recovery_frame_cnt );
+#define x264_sei_version_write x264_template(sei_version_write)
 int  x264_sei_version_write( x264_t *h, bs_t *s );
+#define x264_validate_levels x264_template(validate_levels)
 int  x264_validate_levels( x264_t *h, int verbose );
+#define x264_sei_buffering_period_write x264_template(sei_buffering_period_write)
 void x264_sei_buffering_period_write( x264_t *h, bs_t *s );
+#define x264_sei_pic_timing_write x264_template(sei_pic_timing_write)
 void x264_sei_pic_timing_write( x264_t *h, bs_t *s );
+#define x264_sei_dec_ref_pic_marking_write x264_template(sei_dec_ref_pic_marking_write)
 void x264_sei_dec_ref_pic_marking_write( x264_t *h, bs_t *s );
+#define x264_sei_frame_packing_write x264_template(sei_frame_packing_write)
 void x264_sei_frame_packing_write( x264_t *h, bs_t *s );
+#define x264_sei_alternative_transfer_write x264_template(sei_alternative_transfer_write)
+void x264_sei_alternative_transfer_write( x264_t *h, bs_t *s );
+#define x264_sei_avcintra_umid_write x264_template(sei_avcintra_umid_write)
 int  x264_sei_avcintra_umid_write( x264_t *h, bs_t *s );
+#define x264_sei_avcintra_vanc_write x264_template(sei_avcintra_vanc_write)
 int  x264_sei_avcintra_vanc_write( x264_t *h, bs_t *s, int len );
+#define x264_sei_write x264_template(sei_write)
 void x264_sei_write( bs_t *s, uint8_t *payload, int payload_size, int payload_type );
+#define x264_filler_write x264_template(filler_write)
 void x264_filler_write( x264_t *h, bs_t *s, int filler );
 
 #endif
diff -Nru x264-0.152.2854+gite9a5903/encoder/slicetype.c x264-0.158.2988+git-20191101.7817004/encoder/slicetype.c
--- x264-0.152.2854+gite9a5903/encoder/slicetype.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/encoder/slicetype.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * slicetype.c: lookahead analysis
  *****************************************************************************
- * Copyright (C) 2005-2017 x264 project
+ * Copyright (C) 2005-2019 x264 project
  *
  * Authors: Fiona Glaser <fiona@x264.com>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -32,26 +32,21 @@
 // Indexed by pic_struct values
 static const uint8_t delta_tfi_divisor[10] = { 0, 2, 1, 1, 2, 2, 3, 3, 4, 6 };
 
-static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
-                                      x264_frame_t **frames, int p0, int p1, int b );
+static int slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
+                                 x264_frame_t **frames, int p0, int p1, int b );
 
+#define x264_weights_analyse x264_template(weights_analyse)
 void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int b_lookahead );
 
 #if HAVE_OPENCL
-int x264_opencl_lowres_init( x264_t *h, x264_frame_t *fenc, int lambda );
-int x264_opencl_motionsearch( x264_t *h, x264_frame_t **frames, int b, int ref, int b_islist1, int lambda, const x264_weight_t *w );
-int x264_opencl_finalize_cost( x264_t *h, int lambda, x264_frame_t **frames, int p0, int p1, int b, int dist_scale_factor );
-int x264_opencl_precalculate_frame_cost( x264_t *h, x264_frame_t **frames, int lambda, int p0, int p1, int b );
-void x264_opencl_flush( x264_t *h );
-void x264_opencl_slicetype_prep( x264_t *h, x264_frame_t **frames, int num_frames, int lambda );
-void x264_opencl_slicetype_end( x264_t *h );
+#include "slicetype-cl.h"
 #endif
 
-static void x264_lowres_context_init( x264_t *h, x264_mb_analysis_t *a )
+static void lowres_context_init( x264_t *h, x264_mb_analysis_t *a )
 {
     a->i_qp = X264_LOOKAHEAD_QP;
     a->i_lambda = x264_lambda_tab[ a->i_qp ];
-    x264_mb_analyse_load_costs( h, a );
+    mb_analyse_load_costs( h, a );
     if( h->param.analyse.i_subpel_refine > 1 )
     {
         h->mb.i_me_method = X264_MIN( X264_ME_HEX, h->param.analyse.i_me_method );
@@ -66,7 +61,7 @@
 }
 
 /* makes a non-h264 weight (i.e. fix7), into an h264 weight */
-static void x264_weight_get_h264( int weight_nonh264, int offset, x264_weight_t *w )
+static void weight_get_h264( int weight_nonh264, int offset, x264_weight_t *w )
 {
     w->i_offset = offset;
     w->i_denom = 7;
@@ -79,7 +74,7 @@
     w->i_scale = X264_MIN( w->i_scale, 127 );
 }
 
-static NOINLINE pixel *x264_weight_cost_init_luma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, pixel *dest )
+static NOINLINE pixel *weight_cost_init_luma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, pixel *dest )
 {
     int ref0_distance = fenc->i_frame - ref->i_frame - 1;
     /* Note: this will never run during lookahead as weights_analyse is only called if no
@@ -113,7 +108,7 @@
  * fenc = ref + offset
  * v = u + stride * chroma height */
 
-static NOINLINE void x264_weight_cost_init_chroma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, pixel *dstu, pixel *dstv )
+static NOINLINE void weight_cost_init_chroma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, pixel *dstu, pixel *dstv )
 {
     int ref0_distance = fenc->i_frame - ref->i_frame - 1;
     int i_stride = fenc->i_stride[1];
@@ -145,7 +140,7 @@
     x264_emms();
 }
 
-static NOINLINE pixel *x264_weight_cost_init_chroma444( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, pixel *dst, int p )
+static NOINLINE pixel *weight_cost_init_chroma444( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, pixel *dst, int p )
 {
     int ref0_distance = fenc->i_frame - ref->i_frame - 1;
     int i_stride = fenc->i_stride[p];
@@ -173,7 +168,7 @@
     return ref->plane[p];
 }
 
-static int x264_weight_slice_header_cost( x264_t *h, x264_weight_t *w, int b_chroma )
+static int weight_slice_header_cost( x264_t *h, x264_weight_t *w, int b_chroma )
 {
     /* Add cost of weights in the slice header. */
     int lambda = x264_lambda_tab[X264_LOOKAHEAD_QP];
@@ -194,7 +189,7 @@
     return lambda * numslices * ( 10 + denom_cost + 2 * (bs_size_se( w[0].i_scale ) + bs_size_se( w[0].i_offset )) );
 }
 
-static NOINLINE unsigned int x264_weight_cost_luma( x264_t *h, x264_frame_t *fenc, pixel *src, x264_weight_t *w )
+static NOINLINE unsigned int weight_cost_luma( x264_t *h, x264_frame_t *fenc, pixel *src, x264_weight_t *w )
 {
     unsigned int cost = 0;
     int i_stride = fenc->i_stride_lowres;
@@ -214,7 +209,7 @@
                 int cmp = h->pixf.mbcmp[PIXEL_8x8]( buf, 8, &fenc_plane[pixoff], i_stride );
                 cost += X264_MIN( cmp, fenc->i_intra_cost[i_mb] );
             }
-        cost += x264_weight_slice_header_cost( h, w, 0 );
+        cost += weight_slice_header_cost( h, w, 0 );
     }
     else
         for( int y = 0; y < i_lines; y += 8, pixoff = y*i_stride )
@@ -227,7 +222,7 @@
     return cost;
 }
 
-static NOINLINE unsigned int x264_weight_cost_chroma( x264_t *h, x264_frame_t *fenc, pixel *ref, x264_weight_t *w )
+static NOINLINE unsigned int weight_cost_chroma( x264_t *h, x264_frame_t *fenc, pixel *ref, x264_weight_t *w )
 {
     unsigned int cost = 0;
     int i_stride = fenc->i_stride[1];
@@ -250,7 +245,7 @@
                  * pixels. */
                 cost += h->pixf.asd8( buf, 8, &src[pixoff], i_stride, height );
             }
-        cost += x264_weight_slice_header_cost( h, w, 1 );
+        cost += weight_slice_header_cost( h, w, 1 );
     }
     else
         for( int y = 0; y < i_lines; y += height, pixoff = y*i_stride )
@@ -260,7 +255,7 @@
     return cost;
 }
 
-static NOINLINE unsigned int x264_weight_cost_chroma444( x264_t *h, x264_frame_t *fenc, pixel *ref, x264_weight_t *w, int p )
+static NOINLINE unsigned int weight_cost_chroma444( x264_t *h, x264_frame_t *fenc, pixel *ref, x264_weight_t *w, int p )
 {
     unsigned int cost = 0;
     int i_stride = fenc->i_stride[p];
@@ -277,7 +272,7 @@
                 w->weightfn[16>>2]( buf, 16, &ref[pixoff], i_stride, w, 16 );
                 cost += h->pixf.mbcmp[PIXEL_16x16]( buf, 16, &src[pixoff], i_stride );
             }
-        cost += x264_weight_slice_header_cost( h, w, 1 );
+        cost += weight_slice_header_cost( h, w, 1 );
     }
     else
         for( int y = 0; y < i_lines; y += 16, pixoff = y*i_stride )
@@ -302,11 +297,12 @@
     float ref_mean[3];
     for( int plane = 0; plane <= 2*!b_lookahead; plane++ )
     {
-        float fenc_var = fenc->i_pixel_ssd[plane] + !ref->i_pixel_ssd[plane];
-        float ref_var  =  ref->i_pixel_ssd[plane] + !ref->i_pixel_ssd[plane];
+        int zero_bias = !ref->i_pixel_ssd[plane];
+        float fenc_var = fenc->i_pixel_ssd[plane] + zero_bias;
+        float ref_var  =  ref->i_pixel_ssd[plane] + zero_bias;
         guess_scale[plane] = sqrtf( fenc_var / ref_var );
-        fenc_mean[plane] = (float)fenc->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]) / (1 << (BIT_DEPTH - 8));
-        ref_mean[plane]  = (float) ref->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]) / (1 << (BIT_DEPTH - 8));
+        fenc_mean[plane] = (float)(fenc->i_pixel_sum[plane] + zero_bias) / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]) / (1 << (BIT_DEPTH - 8));
+        ref_mean[plane]  = (float)( ref->i_pixel_sum[plane] + zero_bias) / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]) / (1 << (BIT_DEPTH - 8));
     }
 
     int chroma_denom = 7;
@@ -323,7 +319,7 @@
     }
 
     /* Don't check chroma in lookahead, or if there wasn't a luma weight. */
-    for( int plane = 0; plane <= 2 && !( plane && ( !weights[0].weightfn || b_lookahead ) ); plane++ )
+    for( int plane = 0; plane < (CHROMA_FORMAT ? 3 : 1) && !( plane && ( !weights[0].weightfn || b_lookahead ) ); plane++ )
     {
         int minoff, minscale, mindenom;
         unsigned int minscore, origscore;
@@ -347,7 +343,7 @@
             }
         }
         else
-            x264_weight_get_h264( round( guess_scale[plane] * 128 ), 0, &weights[plane] );
+            weight_get_h264( round( guess_scale[plane] * 128 ), 0, &weights[plane] );
 
         found = 0;
         mindenom = weights[plane].i_denom;
@@ -360,27 +356,27 @@
             if( !fenc->b_intra_calculated )
             {
                 x264_mb_analysis_t a;
-                x264_lowres_context_init( h, &a );
-                x264_slicetype_frame_cost( h, &a, &fenc, 0, 0, 0 );
+                lowres_context_init( h, &a );
+                slicetype_frame_cost( h, &a, &fenc, 0, 0, 0 );
             }
-            mcbuf = x264_weight_cost_init_luma( h, fenc, ref, h->mb.p_weight_buf[0] );
-            origscore = minscore = x264_weight_cost_luma( h, fenc, mcbuf, NULL );
+            mcbuf = weight_cost_init_luma( h, fenc, ref, h->mb.p_weight_buf[0] );
+            origscore = minscore = weight_cost_luma( h, fenc, mcbuf, NULL );
         }
         else
         {
             if( CHROMA444 )
             {
-                mcbuf = x264_weight_cost_init_chroma444( h, fenc, ref, h->mb.p_weight_buf[0], plane );
-                origscore = minscore = x264_weight_cost_chroma444( h, fenc, mcbuf, NULL, plane );
+                mcbuf = weight_cost_init_chroma444( h, fenc, ref, h->mb.p_weight_buf[0], plane );
+                origscore = minscore = weight_cost_chroma444( h, fenc, mcbuf, NULL, plane );
             }
             else
             {
                 pixel *dstu = h->mb.p_weight_buf[0];
                 pixel *dstv = h->mb.p_weight_buf[0]+fenc->i_stride[1]*fenc->i_lines[1];
                 if( !chroma_initted++ )
-                    x264_weight_cost_init_chroma( h, fenc, ref, dstu, dstv );
+                    weight_cost_init_chroma( h, fenc, ref, dstu, dstv );
                 mcbuf = plane == 1 ? dstu : dstv;
-                origscore = minscore = x264_weight_cost_chroma( h, fenc, mcbuf, NULL );
+                origscore = minscore = weight_cost_chroma( h, fenc, mcbuf, NULL );
             }
         }
 
@@ -409,8 +405,7 @@
                  * because scale has a much wider range than offset (because of denom), so
                  * it should almost never need to be clamped. */
                 cur_offset = x264_clip3( cur_offset, -128, 127 );
-                cur_scale = (1 << mindenom) * (fenc_mean[plane] - cur_offset) / ref_mean[plane] + 0.5f;
-                cur_scale = x264_clip3( cur_scale, 0, 127 );
+                cur_scale = x264_clip3f( (1 << mindenom) * (fenc_mean[plane] - cur_offset) / ref_mean[plane] + 0.5f, 0, 127 );
             }
             int start_offset = x264_clip3( cur_offset - offset_dist, -128, 127 );
             int end_offset   = x264_clip3( cur_offset + offset_dist, -128, 127 );
@@ -421,12 +416,12 @@
                 if( plane )
                 {
                     if( CHROMA444 )
-                        s = x264_weight_cost_chroma444( h, fenc, mcbuf, &weights[plane], plane );
+                        s = weight_cost_chroma444( h, fenc, mcbuf, &weights[plane], plane );
                     else
-                        s = x264_weight_cost_chroma( h, fenc, mcbuf, &weights[plane] );
+                        s = weight_cost_chroma( h, fenc, mcbuf, &weights[plane] );
                 }
                 else
-                    s = x264_weight_cost_luma( h, fenc, mcbuf, &weights[plane] );
+                    s = weight_cost_luma( h, fenc, mcbuf, &weights[plane] );
                 COPY4_IF_LT( minscore, s, minscale, cur_scale, minoff, i_off, found, 1 );
 
                 // Don't check any more offsets if the previous one had a lower cost than the current one
@@ -487,7 +482,7 @@
     if( weights[0].weightfn && b_lookahead )
     {
         //scale lowres in lookahead for slicetype_frame_cost
-        pixel *src = ref->buffer_lowres[0];
+        pixel *src = ref->buffer_lowres;
         pixel *dst = h->mb.p_weight_buf[0];
         int width = ref->i_width_lowres + PADH*2;
         int height = ref->i_lines_lowres + PADV*2;
@@ -508,10 +503,10 @@
 #define NUM_ROWS 3
 #define ROW_SATD (NUM_INTS + (h->mb.i_mb_y - h->i_threadslice_start))
 
-static void x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
-                                    x264_frame_t **frames, int p0, int p1, int b,
-                                    int dist_scale_factor, int do_search[2], const x264_weight_t *w,
-                                    int *output_inter, int *output_intra )
+static void slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
+                               x264_frame_t **frames, int p0, int p1, int b,
+                               int dist_scale_factor, int do_search[2], const x264_weight_t *w,
+                               int *output_inter, int *output_intra )
 {
     x264_frame_t *fref0 = frames[p0];
     x264_frame_t *fref1 = frames[p1];
@@ -808,7 +803,7 @@
     int *output_intra;
 } x264_slicetype_slice_t;
 
-static void x264_slicetype_slice_cost( x264_slicetype_slice_t *s )
+static void slicetype_slice_cost( x264_slicetype_slice_t *s )
 {
     x264_t *h = s->h;
 
@@ -826,12 +821,12 @@
 
     for( h->mb.i_mb_y = start_y; h->mb.i_mb_y >= end_y; h->mb.i_mb_y-- )
         for( h->mb.i_mb_x = start_x; h->mb.i_mb_x >= end_x; h->mb.i_mb_x-- )
-            x264_slicetype_mb_cost( h, s->a, s->frames, s->p0, s->p1, s->b, s->dist_scale_factor,
-                                    s->do_search, s->w, s->output_inter, s->output_intra );
+            slicetype_mb_cost( h, s->a, s->frames, s->p0, s->p1, s->b, s->dist_scale_factor,
+                               s->do_search, s->w, s->output_inter, s->output_intra );
 }
 
-static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
-                                      x264_frame_t **frames, int p0, int p1, int b )
+static int slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
+                                 x264_frame_t **frames, int p0, int p1, int b )
 {
     int i_score = 0;
     int do_search[2];
@@ -923,7 +918,7 @@
                     output_inter[i+1] = output_inter[i] + thread_output_size + PAD_SIZE;
                     output_intra[i+1] = output_intra[i] + thread_output_size + PAD_SIZE;
 
-                    x264_threadpool_run( h->lookaheadpool, (void*)x264_slicetype_slice_cost, &s[i] );
+                    x264_threadpool_run( h->lookaheadpool, (void*)slicetype_slice_cost, &s[i] );
                 }
                 for( int i = 0; i < h->param.i_lookahead_threads; i++ )
                     x264_threadpool_wait( h->lookaheadpool, &s[i] );
@@ -937,7 +932,7 @@
                 output_inter[0][NUM_ROWS] = output_intra[0][NUM_ROWS] = h->mb.i_mb_height;
                 x264_slicetype_slice_t s = (x264_slicetype_slice_t){ h, a, frames, p0, p1, b, dist_scale_factor, do_search, w,
                     output_inter[0], output_intra[0] };
-                x264_slicetype_slice_cost( &s );
+                slicetype_slice_cost( &s );
             }
 
             /* Sum up accumulators */
@@ -993,7 +988,7 @@
 
 /* If MB-tree changes the quantizers, we need to recalculate the frame cost without
  * re-running lookahead. */
-static int x264_slicetype_frame_cost_recalculate( x264_t *h, x264_frame_t **frames, int p0, int p1, int b )
+static int slicetype_frame_cost_recalculate( x264_t *h, x264_frame_t **frames, int p0, int p1, int b )
 {
     int i_score = 0;
     int *row_satd = frames[b]->i_row_satds[b-p0][p1-b];
@@ -1023,7 +1018,7 @@
 /* Trade off precision in mbtree for increased range */
 #define MBTREE_PRECISION 0.5f
 
-static void x264_macroblock_tree_finish( x264_t *h, x264_frame_t *frame, float average_duration, int ref0_distance )
+static void macroblock_tree_finish( x264_t *h, x264_frame_t *frame, float average_duration, int ref0_distance )
 {
     int fps_factor = round( CLIP_DURATION(average_duration) / CLIP_DURATION(frame->f_duration) * 256 / MBTREE_PRECISION );
     float weightdelta = 0.0;
@@ -1045,7 +1040,7 @@
     }
 }
 
-static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, float average_duration, int p0, int p1, int b, int referenced )
+static void macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, float average_duration, int p0, int p1, int b, int referenced )
 {
     uint16_t *ref_costs[2] = {frames[p0]->i_propagate_cost,frames[p1]->i_propagate_cost};
     int dist_scale_factor = ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
@@ -1082,10 +1077,10 @@
     }
 
     if( h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead && referenced )
-        x264_macroblock_tree_finish( h, frames[b], average_duration, b == p1 ? b - p0 : 0 );
+        macroblock_tree_finish( h, frames[b], average_duration, b == p1 ? b - p0 : 0 );
 }
 
-static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int num_frames, int b_intra )
+static void macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int num_frames, int b_intra )
 {
     int idx = !b_intra;
     int last_nonb, cur_nonb = 1;
@@ -1100,7 +1095,7 @@
     int i = num_frames;
 
     if( b_intra )
-        x264_slicetype_frame_cost( h, a, frames, 0, 0, 0 );
+        slicetype_frame_cost( h, a, frames, 0, 0, 0 );
 
     while( i > 0 && IS_X264_TYPE_B( frames[i]->i_type ) )
         i--;
@@ -1134,13 +1129,13 @@
             cur_nonb--;
         if( cur_nonb < idx )
             break;
-        x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, last_nonb );
+        slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, last_nonb );
         memset( frames[cur_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
         bframes = last_nonb - cur_nonb - 1;
         if( h->param.i_bframe_pyramid && bframes > 1 )
         {
             int middle = (bframes + 1)/2 + cur_nonb;
-            x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, middle );
+            slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, middle );
             memset( frames[middle]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
             while( i > cur_nonb )
             {
@@ -1148,52 +1143,52 @@
                 int p1 = i < middle ? middle : last_nonb;
                 if( i != middle )
                 {
-                    x264_slicetype_frame_cost( h, a, frames, p0, p1, i );
-                    x264_macroblock_tree_propagate( h, frames, average_duration, p0, p1, i, 0 );
+                    slicetype_frame_cost( h, a, frames, p0, p1, i );
+                    macroblock_tree_propagate( h, frames, average_duration, p0, p1, i, 0 );
                 }
                 i--;
             }
-            x264_macroblock_tree_propagate( h, frames, average_duration, cur_nonb, last_nonb, middle, 1 );
+            macroblock_tree_propagate( h, frames, average_duration, cur_nonb, last_nonb, middle, 1 );
         }
         else
         {
             while( i > cur_nonb )
             {
-                x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, i );
-                x264_macroblock_tree_propagate( h, frames, average_duration, cur_nonb, last_nonb, i, 0 );
+                slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, i );
+                macroblock_tree_propagate( h, frames, average_duration, cur_nonb, last_nonb, i, 0 );
                 i--;
             }
         }
-        x264_macroblock_tree_propagate( h, frames, average_duration, cur_nonb, last_nonb, last_nonb, 1 );
+        macroblock_tree_propagate( h, frames, average_duration, cur_nonb, last_nonb, last_nonb, 1 );
         last_nonb = cur_nonb;
     }
 
     if( !h->param.rc.i_lookahead )
     {
-        x264_slicetype_frame_cost( h, a, frames, 0, last_nonb, last_nonb );
-        x264_macroblock_tree_propagate( h, frames, average_duration, 0, last_nonb, last_nonb, 1 );
+        slicetype_frame_cost( h, a, frames, 0, last_nonb, last_nonb );
+        macroblock_tree_propagate( h, frames, average_duration, 0, last_nonb, last_nonb, 1 );
         XCHG( uint16_t*, frames[last_nonb]->i_propagate_cost, frames[0]->i_propagate_cost );
     }
 
-    x264_macroblock_tree_finish( h, frames[last_nonb], average_duration, last_nonb );
+    macroblock_tree_finish( h, frames[last_nonb], average_duration, last_nonb );
     if( h->param.i_bframe_pyramid && bframes > 1 && !h->param.rc.i_vbv_buffer_size )
-        x264_macroblock_tree_finish( h, frames[last_nonb+(bframes+1)/2], average_duration, 0 );
+        macroblock_tree_finish( h, frames[last_nonb+(bframes+1)/2], average_duration, 0 );
 }
 
-static int x264_vbv_frame_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int p0, int p1, int b )
+static int vbv_frame_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int p0, int p1, int b )
 {
-    int cost = x264_slicetype_frame_cost( h, a, frames, p0, p1, b );
+    int cost = slicetype_frame_cost( h, a, frames, p0, p1, b );
     if( h->param.rc.i_aq_mode )
     {
         if( h->param.rc.b_mb_tree )
-            return x264_slicetype_frame_cost_recalculate( h, frames, p0, p1, b );
+            return slicetype_frame_cost_recalculate( h, frames, p0, p1, b );
         else
             return frames[b]->i_cost_est_aq[b-p0][p1-b];
     }
     return cost;
 }
 
-static void x264_calculate_durations( x264_t *h, x264_frame_t *cur_frame, x264_frame_t *prev_frame, int64_t *i_cpb_delay, int64_t *i_coded_fields )
+static void calculate_durations( x264_t *h, x264_frame_t *cur_frame, x264_frame_t *prev_frame, int64_t *i_cpb_delay, int64_t *i_coded_fields )
 {
     cur_frame->i_cpb_delay = *i_cpb_delay;
     cur_frame->i_dpb_output_delay = cur_frame->i_field_cnt - *i_coded_fields;
@@ -1219,7 +1214,7 @@
     cur_frame->i_cpb_duration = cur_frame->i_duration;
 }
 
-static void x264_vbv_lookahead( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int num_frames, int keyframe )
+static void vbv_lookahead( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int num_frames, int keyframe )
 {
     int last_nonb = 0, cur_nonb = 1, idx = 0;
     x264_frame_t *prev_frame = NULL;
@@ -1240,11 +1235,11 @@
         if( next_nonb != cur_nonb )
         {
             int p0 = IS_X264_TYPE_I( frames[cur_nonb]->i_type ) ? cur_nonb : last_nonb;
-            frames[next_nonb]->i_planned_satd[idx] = x264_vbv_frame_cost( h, a, frames, p0, cur_nonb, cur_nonb );
+            frames[next_nonb]->i_planned_satd[idx] = vbv_frame_cost( h, a, frames, p0, cur_nonb, cur_nonb );
             frames[next_nonb]->i_planned_type[idx] = frames[cur_nonb]->i_type;
             frames[cur_nonb]->i_coded_fields_lookahead = h->i_coded_fields_lookahead;
             frames[cur_nonb]->i_cpb_delay_lookahead = h->i_cpb_delay_lookahead;
-            x264_calculate_durations( h, frames[cur_nonb], prev_frame, &h->i_cpb_delay_lookahead, &h->i_coded_fields_lookahead );
+            calculate_durations( h, frames[cur_nonb], prev_frame, &h->i_cpb_delay_lookahead, &h->i_coded_fields_lookahead );
             if( prev_frame )
             {
                 frames[next_nonb]->f_planned_cpb_duration[prev_frame_idx] = (double)prev_frame->i_cpb_duration *
@@ -1259,11 +1254,11 @@
         /* Handle the B-frames: coded order */
         for( int i = last_nonb+1; i < cur_nonb; i++, idx++ )
         {
-            frames[next_nonb]->i_planned_satd[idx] = x264_vbv_frame_cost( h, a, frames, last_nonb, cur_nonb, i );
+            frames[next_nonb]->i_planned_satd[idx] = vbv_frame_cost( h, a, frames, last_nonb, cur_nonb, i );
             frames[next_nonb]->i_planned_type[idx] = X264_TYPE_B;
             frames[i]->i_coded_fields_lookahead = h->i_coded_fields_lookahead;
             frames[i]->i_cpb_delay_lookahead = h->i_cpb_delay_lookahead;
-            x264_calculate_durations( h, frames[i], prev_frame, &h->i_cpb_delay_lookahead, &h->i_coded_fields_lookahead );
+            calculate_durations( h, frames[i], prev_frame, &h->i_cpb_delay_lookahead, &h->i_coded_fields_lookahead );
             if( prev_frame )
             {
                 frames[next_nonb]->f_planned_cpb_duration[prev_frame_idx] = (double)prev_frame->i_cpb_duration *
@@ -1282,10 +1277,10 @@
     frames[next_nonb]->i_planned_type[idx] = X264_TYPE_AUTO;
 }
 
-static int x264_slicetype_path_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, char *path, int threshold )
+static uint64_t slicetype_path_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, char *path, uint64_t threshold )
 {
+    uint64_t cost = 0;
     int loc = 1;
-    int cost = 0;
     int cur_nonb = 0;
     path--; /* Since the 1st path element is really the second frame */
     while( path[loc] )
@@ -1297,9 +1292,9 @@
 
         /* Add the cost of the non-B-frame found above */
         if( path[next_nonb] == 'P' )
-            cost += x264_slicetype_frame_cost( h, a, frames, cur_nonb, next_nonb, next_nonb );
+            cost += slicetype_frame_cost( h, a, frames, cur_nonb, next_nonb, next_nonb );
         else /* I-frame */
-            cost += x264_slicetype_frame_cost( h, a, frames, next_nonb, next_nonb, next_nonb );
+            cost += slicetype_frame_cost( h, a, frames, next_nonb, next_nonb, next_nonb );
         /* Early terminate if the cost we have found is larger than the best path cost so far */
         if( cost > threshold )
             break;
@@ -1307,15 +1302,15 @@
         if( h->param.i_bframe_pyramid && next_nonb - cur_nonb > 2 )
         {
             int middle = cur_nonb + (next_nonb - cur_nonb)/2;
-            cost += x264_slicetype_frame_cost( h, a, frames, cur_nonb, next_nonb, middle );
+            cost += slicetype_frame_cost( h, a, frames, cur_nonb, next_nonb, middle );
             for( int next_b = loc; next_b < middle && cost < threshold; next_b++ )
-                cost += x264_slicetype_frame_cost( h, a, frames, cur_nonb, middle, next_b );
+                cost += slicetype_frame_cost( h, a, frames, cur_nonb, middle, next_b );
             for( int next_b = middle+1; next_b < next_nonb && cost < threshold; next_b++ )
-                cost += x264_slicetype_frame_cost( h, a, frames, middle, next_nonb, next_b );
+                cost += slicetype_frame_cost( h, a, frames, middle, next_nonb, next_b );
         }
         else
             for( int next_b = loc; next_b < next_nonb && cost < threshold; next_b++ )
-                cost += x264_slicetype_frame_cost( h, a, frames, cur_nonb, next_nonb, next_b );
+                cost += slicetype_frame_cost( h, a, frames, cur_nonb, next_nonb, next_b );
 
         loc = next_nonb + 1;
         cur_nonb = next_nonb;
@@ -1327,11 +1322,11 @@
 /* Uses strings due to the fact that the speed of the control functions is
    negligible compared to the cost of running slicetype_frame_cost, and because
    it makes debugging easier. */
-static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, char (*best_paths)[X264_LOOKAHEAD_MAX+1] )
+static void slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, char (*best_paths)[X264_LOOKAHEAD_MAX+1] )
 {
     char paths[2][X264_LOOKAHEAD_MAX+1];
     int num_paths = X264_MIN( h->param.i_bframe+1, length );
-    int best_cost = COST_MAX;
+    uint64_t best_cost = COST_MAX64;
     int best_possible = 0;
     int idx = 0;
 
@@ -1362,9 +1357,9 @@
         if( possible || !best_possible )
         {
             if( possible && !best_possible )
-                best_cost = COST_MAX;
+                best_cost = COST_MAX64;
             /* Calculate the actual cost of the current path */
-            int cost = x264_slicetype_path_cost( h, a, frames, paths[idx], best_cost );
+            uint64_t cost = slicetype_path_cost( h, a, frames, paths[idx], best_cost );
             if( cost < best_cost )
             {
                 best_cost = cost;
@@ -1386,7 +1381,7 @@
     if( real_scenecut && h->param.i_frame_packing == 5 && (frame->i_frame&1) )
         return 0;
 
-    x264_slicetype_frame_cost( h, a, frames, p0, p1, p1 );
+    slicetype_frame_cost( h, a, frames, p0, p1, p1 );
 
     int icost = frame->i_cost_est[0][0];
     int pcost = frame->i_cost_est[p1-p0][0];
@@ -1473,7 +1468,7 @@
     x264_frame_t *frames[X264_LOOKAHEAD_MAX+3] = { NULL, };
     int num_frames, orig_num_frames, keyint_limit, framecnt;
     int i_max_search = X264_MIN( h->lookahead->next.i_size, X264_LOOKAHEAD_MAX );
-    int vbv_lookahead = h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead;
+    int b_vbv_lookahead = h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead;
     /* For determinism we should limit the search to the number of frames lookahead has for sure
      * in h->lookahead->next.list buffer, except at the end of stream.
      * For normal calls with (intra_minigop == 0) that is h->lookahead->i_slicetype_length + 1 frames.
@@ -1490,12 +1485,12 @@
     for( framecnt = 0; framecnt < i_max_search; framecnt++ )
         frames[framecnt+1] = h->lookahead->next.list[framecnt];
 
-    x264_lowres_context_init( h, &a );
+    lowres_context_init( h, &a );
 
     if( !framecnt )
     {
         if( h->param.rc.b_mb_tree )
-            x264_macroblock_tree( h, &a, frames, 0, keyframe );
+            macroblock_tree( h, &a, frames, 0, keyframe );
         return;
     }
 
@@ -1506,7 +1501,7 @@
      * there will be significant visual artifacts if the frames just before
      * go down in quality due to being referenced less, despite it being
      * more RD-optimal. */
-    if( (h->param.analyse.b_psy && h->param.rc.b_mb_tree) || vbv_lookahead )
+    if( (h->param.analyse.b_psy && h->param.rc.b_mb_tree) || b_vbv_lookahead )
         num_frames = framecnt;
     else if( h->param.b_open_gop && num_frames < framecnt )
         num_frames++;
@@ -1556,7 +1551,7 @@
 
                 /* Perform the frametype analysis. */
                 for( int j = 2; j <= num_frames; j++ )
-                    x264_slicetype_path( h, &a, frames, j, best_paths );
+                    slicetype_path( h, &a, frames, j, best_paths );
 
                 /* Load the results of the analysis into the frame types. */
                 for( int j = 1; j < num_frames; j++ )
@@ -1607,9 +1602,9 @@
                 int bframes = j - last_nonb - 1;
                 memset( path, 'B', bframes );
                 strcpy( path+bframes, "PP" );
-                int cost_p = x264_slicetype_path_cost( h, &a, frames+last_nonb, path, COST_MAX );
+                uint64_t cost_p = slicetype_path_cost( h, &a, frames+last_nonb, path, COST_MAX64 );
                 strcpy( path+bframes, "BP" );
-                int cost_b = x264_slicetype_path_cost( h, &a, frames+last_nonb, path, cost_p );
+                uint64_t cost_b = slicetype_path_cost( h, &a, frames+last_nonb, path, cost_p );
 
                 if( cost_b < cost_p )
                     frames[j]->i_type = X264_TYPE_B;
@@ -1672,7 +1667,7 @@
     /* Perform the actual macroblock tree analysis.
      * Don't go farther than the maximum keyframe interval; this helps in short GOPs. */
     if( h->param.rc.b_mb_tree )
-        x264_macroblock_tree( h, &a, frames, X264_MIN(num_frames, h->param.i_keyint_max), keyframe );
+        macroblock_tree( h, &a, frames, X264_MIN(num_frames, h->param.i_keyint_max), keyframe );
 
     /* Enforce keyframe limit. */
     if( !h->param.b_intra_refresh )
@@ -1727,8 +1722,8 @@
         }
     }
 
-    if( vbv_lookahead )
-        x264_vbv_lookahead( h, &a, frames, num_frames, keyframe );
+    if( b_vbv_lookahead )
+        vbv_lookahead( h, &a, frames, num_frames, keyframe );
 
     /* Restore frametypes for all frames that haven't actually been decided yet. */
     for( int j = reset_start; j <= num_frames; j++ )
@@ -1899,7 +1894,7 @@
         int p0, p1, b;
         p1 = b = bframes + 1;
 
-        x264_lowres_context_init( h, &a );
+        lowres_context_init( h, &a );
 
         frames[0] = h->lookahead->last_nonb;
         memcpy( &frames[1], h->lookahead->next.list, (bframes+1) * sizeof(x264_frame_t*) );
@@ -1908,12 +1903,12 @@
         else // P
             p0 = 0;
 
-        x264_slicetype_frame_cost( h, &a, frames, p0, p1, b );
+        slicetype_frame_cost( h, &a, frames, p0, p1, b );
 
         if( (p0 != p1 || bframes) && h->param.rc.i_vbv_buffer_size )
         {
             /* We need the intra costs for row SATDs. */
-            x264_slicetype_frame_cost( h, &a, frames, b, b, b );
+            slicetype_frame_cost( h, &a, frames, b, b, b );
 
             /* We need B-frame costs for row SATDs. */
             p0 = 0;
@@ -1924,7 +1919,7 @@
                         p1++;
                 else
                     p1 = bframes + 1;
-                x264_slicetype_frame_cost( h, &a, frames, p0, p1, b );
+                slicetype_frame_cost( h, &a, frames, p0, p1, b );
                 if( frames[b]->i_type == X264_TYPE_BREF )
                     p0 = b;
             }
@@ -1961,12 +1956,12 @@
         h->lookahead->next.list[i]->i_coded = i_coded++;
         if( i )
         {
-            x264_calculate_durations( h, h->lookahead->next.list[i], h->lookahead->next.list[i-1], &h->i_cpb_delay, &h->i_coded_fields );
+            calculate_durations( h, h->lookahead->next.list[i], h->lookahead->next.list[i-1], &h->i_cpb_delay, &h->i_coded_fields );
             h->lookahead->next.list[0]->f_planned_cpb_duration[i-1] = (double)h->lookahead->next.list[i]->i_cpb_duration *
                                                                       h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
         }
         else
-            x264_calculate_durations( h, h->lookahead->next.list[i], NULL, &h->i_cpb_delay, &h->i_coded_fields );
+            calculate_durations( h, h->lookahead->next.list[i], NULL, &h->i_cpb_delay, &h->i_coded_fields );
     }
 }
 
@@ -1994,9 +1989,9 @@
 
     if( h->param.rc.b_mb_tree && !h->param.rc.b_stat_read )
     {
-        cost = x264_slicetype_frame_cost_recalculate( h, frames, p0, p1, b );
+        cost = slicetype_frame_cost_recalculate( h, frames, p0, p1, b );
         if( b && h->param.rc.i_vbv_buffer_size )
-            x264_slicetype_frame_cost_recalculate( h, frames, b, b, b );
+            slicetype_frame_cost_recalculate( h, frames, b, b, b );
     }
     /* In AQ, use the weighted score instead. */
     else if( h->param.rc.i_aq_mode )
diff -Nru x264-0.152.2854+gite9a5903/encoder/slicetype-cl.c x264-0.158.2988+git-20191101.7817004/encoder/slicetype-cl.c
--- x264-0.152.2854+gite9a5903/encoder/slicetype-cl.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/encoder/slicetype-cl.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * slicetype-cl.c: OpenCL slicetype decision code (lowres lookahead)
  *****************************************************************************
- * Copyright (C) 2012-2017 x264 project
+ * Copyright (C) 2012-2019 x264 project
  *
  * Authors: Steve Borho <sborho@multicorewareinc.com>
  *
@@ -26,12 +26,14 @@
 #include "common/common.h"
 #include "macroblock.h"
 #include "me.h"
+#include "slicetype-cl.h"
 
 #if HAVE_OPENCL
 #ifdef _WIN32
 #include <windows.h>
 #endif
 
+#define x264_weights_analyse x264_template(weights_analyse)
 void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int b_lookahead );
 
 /* We define CL_QUEUE_THREAD_HANDLE_AMD here because it is not defined
@@ -67,7 +69,7 @@
     h->opencl.pl_occupancy = 0;
 }
 
-static void *x264_opencl_alloc_locked( x264_t *h, int bytes )
+static void *opencl_alloc_locked( x264_t *h, int bytes )
 {
     if( h->opencl.pl_occupancy + bytes >= PAGE_LOCKED_BUF_SIZE )
         x264_opencl_flush( h );
@@ -161,7 +163,7 @@
 
     /* Copy image to the GPU, downscale to unpadded 8x8, then continue for all scales */
 
-    char *locked = x264_opencl_alloc_locked( h, luma_length );
+    char *locked = opencl_alloc_locked( h, luma_length );
     memcpy( locked, fenc->plane[0], luma_length );
     OCLCHECK( clEnqueueWriteBuffer, h->opencl.queue,  h->opencl.luma_16x16_image[h->opencl.last_buf], CL_FALSE, 0, luma_length, locked, 0, NULL, NULL );
 
@@ -169,7 +171,7 @@
     if( h->param.rc.i_aq_mode && fenc->i_inv_qscale_factor )
     {
         int size = h->mb.i_mb_count * sizeof(int16_t);
-        locked = x264_opencl_alloc_locked( h, size );
+        locked = opencl_alloc_locked( h, size );
         memcpy( locked, fenc->i_inv_qscale_factor, size );
         OCLCHECK( clEnqueueWriteBuffer, h->opencl.queue, fenc->opencl.inv_qscale_factor, CL_FALSE, 0, size, locked, 0, NULL, NULL );
     }
@@ -250,7 +252,7 @@
         x264_opencl_flush( h );
 
     int size = h->mb.i_mb_count * sizeof(int16_t);
-    locked = x264_opencl_alloc_locked( h, size );
+    locked = opencl_alloc_locked( h, size );
     OCLCHECK( clEnqueueReadBuffer, h->opencl.queue, fenc->opencl.intra_cost, CL_FALSE, 0, size, locked, 0, NULL, NULL );
     h->opencl.copies[h->opencl.num_copies].dest = fenc->lowres_costs[0][0];
     h->opencl.copies[h->opencl.num_copies].src = locked;
@@ -258,7 +260,7 @@
     h->opencl.num_copies++;
 
     size = h->mb.i_mb_height * sizeof(int);
-    locked = x264_opencl_alloc_locked( h, size );
+    locked = opencl_alloc_locked( h, size );
     OCLCHECK( clEnqueueReadBuffer, h->opencl.queue, h->opencl.row_satds[h->opencl.last_buf], CL_FALSE, 0, size, locked, 0, NULL, NULL );
     h->opencl.copies[h->opencl.num_copies].dest = fenc->i_row_satds[0][0];
     h->opencl.copies[h->opencl.num_copies].src = locked;
@@ -266,7 +268,7 @@
     h->opencl.num_copies++;
 
     size = sizeof(int) * 4;
-    locked = x264_opencl_alloc_locked( h, size );
+    locked = opencl_alloc_locked( h, size );
     OCLCHECK( clEnqueueReadBuffer, h->opencl.queue, h->opencl.frame_stats[h->opencl.last_buf], CL_FALSE, 0, size, locked, 0, NULL, NULL );
     h->opencl.copies[h->opencl.num_copies].dest = &fenc->i_cost_est[0][0];
     h->opencl.copies[h->opencl.num_copies].src = locked;
@@ -286,7 +288,7 @@
  * applications will have self-tuning code to try many possible variables and
  * measure the runtime.  Here we simply make an educated guess based on what we
  * know GPUs typically prefer.  */
-static void x264_optimal_launch_dims( x264_t *h, size_t *gdims, size_t *ldims, const cl_kernel kernel, const cl_device_id device )
+static void optimal_launch_dims( x264_t *h, size_t *gdims, size_t *ldims, const cl_kernel kernel, const cl_device_id device )
 {
     x264_opencl_function_t *ocl = h->opencl.ocl;
     size_t max_work_group = 256;    /* reasonable defaults for OpenCL 1.0 devices, below APIs may fail */
@@ -425,7 +427,7 @@
         if( gdims[0] < 2 || gdims[1] < 2 )
             continue;
         gdims[0] <<= 2;
-        x264_optimal_launch_dims( h, gdims, ldims, h->opencl.hme_kernel, h->opencl.device );
+        optimal_launch_dims( h, gdims, ldims, h->opencl.hme_kernel, h->opencl.device );
 
         mb_per_group = (ldims[0] >> 2) * ldims[1];
         cost_local_size = 4 * mb_per_group * sizeof(int16_t);
@@ -513,7 +515,7 @@
     if( h->opencl.num_copies >= MAX_FINISH_COPIES - 1 )
         x264_opencl_flush( h );
 
-    char *locked = x264_opencl_alloc_locked( h, mvlen );
+    char *locked = opencl_alloc_locked( h, mvlen );
     h->opencl.copies[h->opencl.num_copies].src = locked;
     h->opencl.copies[h->opencl.num_copies].bytes = mvlen;
 
@@ -560,7 +562,7 @@
         /* For B frames, use 4 threads per MB for BIDIR checks */
         ldims = ldim_bidir;
         gdims[0] <<= 2;
-        x264_optimal_launch_dims( h, gdims, ldims, h->opencl.mode_select_kernel, h->opencl.device );
+        optimal_launch_dims( h, gdims, ldims, h->opencl.mode_select_kernel, h->opencl.device );
         int mb_per_group = (ldims[0] >> 2) * ldims[1];
         cost_local_size = 4 * mb_per_group * sizeof(int16_t);
         satd_local_size = 16 * mb_per_group * sizeof(uint32_t);
@@ -609,7 +611,7 @@
         x264_opencl_flush( h );
 
     int size =  h->mb.i_mb_count * sizeof(int16_t);
-    char *locked = x264_opencl_alloc_locked( h, size );
+    char *locked = opencl_alloc_locked( h, size );
     h->opencl.copies[h->opencl.num_copies].src = locked;
     h->opencl.copies[h->opencl.num_copies].dest = fenc->lowres_costs[b - p0][p1 - b];
     h->opencl.copies[h->opencl.num_copies].bytes = size;
@@ -617,7 +619,7 @@
     h->opencl.num_copies++;
 
     size =  h->mb.i_mb_height * sizeof(int);
-    locked = x264_opencl_alloc_locked( h, size );
+    locked = opencl_alloc_locked( h, size );
     h->opencl.copies[h->opencl.num_copies].src = locked;
     h->opencl.copies[h->opencl.num_copies].dest = fenc->i_row_satds[b - p0][p1 - b];
     h->opencl.copies[h->opencl.num_copies].bytes = size;
@@ -625,7 +627,7 @@
     h->opencl.num_copies++;
 
     size =  4 * sizeof(int);
-    locked = x264_opencl_alloc_locked( h, size );
+    locked = opencl_alloc_locked( h, size );
     OCLCHECK( clEnqueueReadBuffer, h->opencl.queue, h->opencl.frame_stats[h->opencl.last_buf], CL_FALSE, 0, size, locked, 0, NULL, NULL );
     h->opencl.last_buf = !h->opencl.last_buf;
 
diff -Nru x264-0.152.2854+gite9a5903/encoder/slicetype-cl.h x264-0.158.2988+git-20191101.7817004/encoder/slicetype-cl.h
--- x264-0.152.2854+gite9a5903/encoder/slicetype-cl.h	1970-01-01 00:00:00.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/encoder/slicetype-cl.h	2019-11-09 05:16:29.000000000 +0000
@@ -0,0 +1,44 @@
+/*****************************************************************************
+ * slicetype-cl.h: OpenCL slicetype decision code (lowres lookahead)
+ *****************************************************************************
+ * Copyright (C) 2017-2019 x264 project
+ *
+ * Authors: Anton Mitrofanov <BugMaster@narod.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#ifndef X264_ENCODER_SLICETYPE_CL_H
+#define X264_ENCODER_SLICETYPE_CL_H
+
+#define x264_opencl_lowres_init x264_template(opencl_lowres_init)
+int x264_opencl_lowres_init( x264_t *h, x264_frame_t *fenc, int lambda );
+#define x264_opencl_motionsearch x264_template(opencl_motionsearch)
+int x264_opencl_motionsearch( x264_t *h, x264_frame_t **frames, int b, int ref, int b_islist1, int lambda, const x264_weight_t *w );
+#define x264_opencl_finalize_cost x264_template(opencl_finalize_cost)
+int x264_opencl_finalize_cost( x264_t *h, int lambda, x264_frame_t **frames, int p0, int p1, int b, int dist_scale_factor );
+#define x264_opencl_precalculate_frame_cost x264_template(opencl_precalculate_frame_cost)
+int x264_opencl_precalculate_frame_cost( x264_t *h, x264_frame_t **frames, int lambda, int p0, int p1, int b );
+#define x264_opencl_flush x264_template(opencl_flush)
+void x264_opencl_flush( x264_t *h );
+#define x264_opencl_slicetype_prep x264_template(opencl_slicetype_prep)
+void x264_opencl_slicetype_prep( x264_t *h, x264_frame_t **frames, int num_frames, int lambda );
+#define x264_opencl_slicetype_end x264_template(opencl_slicetype_end)
+void x264_opencl_slicetype_end( x264_t *h );
+
+#endif
diff -Nru x264-0.152.2854+gite9a5903/example.c x264-0.158.2988+git-20191101.7817004/example.c
--- x264-0.152.2854+gite9a5903/example.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/example.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * example.c: libx264 API usage example
  *****************************************************************************
- * Copyright (C) 2014-2017 x264 project
+ * Copyright (C) 2014-2019 x264 project
  *
  * Authors: Anton Mitrofanov <BugMaster@narod.ru>
  *
@@ -68,6 +68,7 @@
         goto fail;
 
     /* Configure non-default params */
+    param.i_bitdepth = 8;
     param.i_csp = X264_CSP_I420;
     param.i_width  = width;
     param.i_height = height;
diff -Nru x264-0.152.2854+gite9a5903/extras/intel_dispatcher.h x264-0.158.2988+git-20191101.7817004/extras/intel_dispatcher.h
--- x264-0.152.2854+gite9a5903/extras/intel_dispatcher.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/extras/intel_dispatcher.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * intel_dispatcher.h: intel compiler cpu dispatcher override
  *****************************************************************************
- * Copyright (C) 2014-2017 x264 project
+ * Copyright (C) 2014-2019 x264 project
  *
  * Authors: Anton Mitrofanov <BugMaster@narod.ru>
  *
diff -Nru x264-0.152.2854+gite9a5903/filters/filters.c x264-0.158.2988+git-20191101.7817004/filters/filters.c
--- x264-0.152.2854+gite9a5903/filters/filters.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/filters/filters.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * filters.c: common filter functions
  *****************************************************************************
- * Copyright (C) 2010-2017 x264 project
+ * Copyright (C) 2010-2019 x264 project
  *
  * Authors: Diogo Franco <diogomfranco@gmail.com>
  *          Steven Walters <kemuri9@gmail.com>
@@ -26,6 +26,7 @@
  *****************************************************************************/
 
 #include "filters.h"
+
 #define RETURN_IF_ERROR( cond, ... ) RETURN_IF_ERR( cond, "options", NULL, __VA_ARGS__ )
 
 char **x264_split_options( const char *opt_str, const char * const *options )
diff -Nru x264-0.152.2854+gite9a5903/filters/filters.h x264-0.158.2988+git-20191101.7817004/filters/filters.h
--- x264-0.152.2854+gite9a5903/filters/filters.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/filters/filters.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * filters.h: common filter functions
  *****************************************************************************
- * Copyright (C) 2010-2017 x264 project
+ * Copyright (C) 2010-2019 x264 project
  *
  * Authors: Diogo Franco <diogomfranco@gmail.com>
  *          Steven Walters <kemuri9@gmail.com>
diff -Nru x264-0.152.2854+gite9a5903/filters/video/cache.c x264-0.158.2988+git-20191101.7817004/filters/video/cache.c
--- x264-0.152.2854+gite9a5903/filters/video/cache.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/filters/video/cache.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cache.c: cache video filter
  *****************************************************************************
- * Copyright (C) 2010-2017 x264 project
+ * Copyright (C) 2010-2019 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
@@ -25,7 +25,15 @@
 
 #include "video.h"
 #include "internal.h"
-#define NAME "cache"
+#include "common/common.h"
+
+#define cache_filter x264_glue3(cache, BIT_DEPTH, filter)
+#if BIT_DEPTH == 8
+#define NAME "cache_8"
+#else
+#define NAME "cache_10"
+#endif
+
 #define LAST_FRAME (h->first_frame + h->cur_size - 1)
 
 typedef struct
diff -Nru x264-0.152.2854+gite9a5903/filters/video/crop.c x264-0.158.2988+git-20191101.7817004/filters/video/crop.c
--- x264-0.152.2854+gite9a5903/filters/video/crop.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/filters/video/crop.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * crop.c: crop video filter
  *****************************************************************************
- * Copyright (C) 2010-2017 x264 project
+ * Copyright (C) 2010-2019 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *          James Darnley <james.darnley@gmail.com>
@@ -25,6 +25,7 @@
  *****************************************************************************/
 
 #include "video.h"
+
 #define NAME "crop"
 #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, NAME, __VA_ARGS__ )
 
diff -Nru x264-0.152.2854+gite9a5903/filters/video/depth.c x264-0.158.2988+git-20191101.7817004/filters/video/depth.c
--- x264-0.152.2854+gite9a5903/filters/video/depth.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/filters/video/depth.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * depth.c: bit-depth conversion video filter
  *****************************************************************************
- * Copyright (C) 2010-2017 x264 project
+ * Copyright (C) 2010-2019 x264 project
  *
  * Authors: Oskar Arvidsson <oskar@irock.se>
  *
@@ -24,7 +24,15 @@
  *****************************************************************************/
 
 #include "video.h"
-#define NAME "depth"
+#include "common/common.h"
+
+#define depth_filter x264_glue3(depth, BIT_DEPTH, filter)
+#if BIT_DEPTH == 8
+#define NAME "depth_8"
+#else
+#define NAME "depth_10"
+#endif
+
 #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, NAME, __VA_ARGS__ )
 
 cli_vid_filter_t depth_filter;
@@ -43,7 +51,8 @@
 static int depth_filter_csp_is_supported( int csp )
 {
     int csp_mask = csp & X264_CSP_MASK;
-    return csp_mask == X264_CSP_I420 ||
+    return csp_mask == X264_CSP_I400 ||
+           csp_mask == X264_CSP_I420 ||
            csp_mask == X264_CSP_I422 ||
            csp_mask == X264_CSP_I444 ||
            csp_mask == X264_CSP_YV12 ||
@@ -74,10 +83,10 @@
 static void dither_plane_##pitch( pixel *dst, int dst_stride, uint16_t *src, int src_stride, \
                                   int width, int height, int16_t *errors ) \
 { \
-    const int lshift = 16-X264_BIT_DEPTH; \
-    const int rshift = 16-X264_BIT_DEPTH+2; \
-    const int half = 1 << (16-X264_BIT_DEPTH+1); \
-    const int pixel_max = (1 << X264_BIT_DEPTH)-1; \
+    const int lshift = 16-BIT_DEPTH; \
+    const int rshift = 16-BIT_DEPTH+2; \
+    const int half = 1 << (16-BIT_DEPTH+1); \
+    const int pixel_max = (1 << BIT_DEPTH)-1; \
     memset( errors, 0, (width+1) * sizeof(int16_t) ); \
     for( int y = 0; y < height; y++, src += src_stride, dst += dst_stride ) \
     { \
@@ -137,7 +146,7 @@
 static void scale_image( cli_image_t *output, cli_image_t *img )
 {
     int csp_mask = img->csp & X264_CSP_MASK;
-    const int shift = X264_BIT_DEPTH - 8;
+    const int shift = BIT_DEPTH - 8;
     for( int i = 0; i < img->planes; i++ )
     {
         uint8_t *src = img->plane[i];
@@ -217,7 +226,7 @@
             ret = 1;
     }
 
-    FAIL_IF_ERROR( bit_depth != X264_BIT_DEPTH, "this build supports only bit depth %d\n", X264_BIT_DEPTH );
+    FAIL_IF_ERROR( bit_depth != BIT_DEPTH, "this filter supports only bit depth %d\n", BIT_DEPTH );
     FAIL_IF_ERROR( ret, "unsupported bit depth conversion.\n" );
 
     /* only add the filter to the chain if it's needed */
diff -Nru x264-0.152.2854+gite9a5903/filters/video/fix_vfr_pts.c x264-0.158.2988+git-20191101.7817004/filters/video/fix_vfr_pts.c
--- x264-0.152.2854+gite9a5903/filters/video/fix_vfr_pts.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/filters/video/fix_vfr_pts.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * fix_vfr_pts.c: vfr pts fixing video filter
  *****************************************************************************
- * Copyright (C) 2010-2017 x264 project
+ * Copyright (C) 2010-2019 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
diff -Nru x264-0.152.2854+gite9a5903/filters/video/internal.c x264-0.158.2988+git-20191101.7817004/filters/video/internal.c
--- x264-0.152.2854+gite9a5903/filters/video/internal.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/filters/video/internal.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * internal.c: video filter utilities
  *****************************************************************************
- * Copyright (C) 2010-2017 x264 project
+ * Copyright (C) 2010-2019 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
@@ -24,6 +24,7 @@
  *****************************************************************************/
 
 #include "internal.h"
+
 #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "x264", __VA_ARGS__ )
 
 void x264_cli_plane_copy( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int w, int h )
diff -Nru x264-0.152.2854+gite9a5903/filters/video/internal.h x264-0.158.2988+git-20191101.7817004/filters/video/internal.h
--- x264-0.152.2854+gite9a5903/filters/video/internal.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/filters/video/internal.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * internal.h: video filter utilities
  *****************************************************************************
- * Copyright (C) 2010-2017 x264 project
+ * Copyright (C) 2010-2019 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
@@ -25,6 +25,7 @@
 
 #ifndef X264_FILTER_VIDEO_INTERNAL_H
 #define X264_FILTER_VIDEO_INTERNAL_H
+
 #include "video.h"
 
 void x264_cli_plane_copy( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int w, int h );
diff -Nru x264-0.152.2854+gite9a5903/filters/video/resize.c x264-0.158.2988+git-20191101.7817004/filters/video/resize.c
--- x264-0.152.2854+gite9a5903/filters/video/resize.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/filters/video/resize.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * resize.c: resize video filter
  *****************************************************************************
- * Copyright (C) 2010-2017 x264 project
+ * Copyright (C) 2010-2019 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
@@ -24,6 +24,7 @@
  *****************************************************************************/
 
 #include "video.h"
+
 #define NAME "resize"
 #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, NAME, __VA_ARGS__ )
 
@@ -71,6 +72,7 @@
     /* state of swapping chroma planes pre and post resize */
     int pre_swap_chroma;
     int post_swap_chroma;
+    int fast_mono;      /* yuv with planar luma can be "converted" to monochrome by simply ignoring chroma */
     int variable_input; /* input is capable of changing properties */
     int working;        /* we have already started working with frames */
     frame_prop_t dst;   /* desired output properties */
@@ -145,6 +147,7 @@
         return csp&X264_CSP_MASK;
     switch( csp&X264_CSP_MASK )
     {
+        case X264_CSP_I400: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_GRAY16    : AV_PIX_FMT_GRAY8;
         case X264_CSP_YV12: /* specially handled via swapping chroma */
         case X264_CSP_I420: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV420P16 : AV_PIX_FMT_YUV420P;
         case X264_CSP_YV16: /* specially handled via swapping chroma */
@@ -201,7 +204,7 @@
     {
         // yuv-based
         if( pix_desc->nb_components == 1 || pix_desc->nb_components == 2 ) // no chroma
-            ret = X264_CSP_I420;
+            ret = X264_CSP_I400;
         else if( pix_desc->log2_chroma_w && pix_desc->log2_chroma_h ) // reduced chroma width & height
             ret = (pix_number_of_planes( pix_desc ) == 2) ? X264_CSP_NV12 : X264_CSP_I420;
         else if( pix_desc->log2_chroma_w ) // reduced chroma width only
@@ -211,7 +214,7 @@
     }
     // now determine high depth
     for( int i = 0; i < pix_desc->nb_components; i++ )
-        if( pix_desc->comp[i].depth_minus1 >= 8 )
+        if( pix_desc->comp[i].depth > 8 )
             ret |= X264_CSP_HIGH_DEPTH;
     return ret;
 }
@@ -362,7 +365,7 @@
     return 0;
 }
 
-static int x264_init_sws_context( resizer_hnd_t *h )
+static int init_sws_context( resizer_hnd_t *h )
 {
     if( h->ctx )
         sws_freeContext( h->ctx );
@@ -397,15 +400,18 @@
         return 0;
     /* also warn if the resizer was initialized after the first frame */
     if( h->ctx || h->working )
+    {
         x264_cli_log( NAME, X264_LOG_WARNING, "stream properties changed at pts %"PRId64"\n", in->pts );
+        h->fast_mono = 0;
+    }
     h->scale = input_prop;
-    if( !h->buffer_allocated )
+    if( !h->buffer_allocated && !h->fast_mono )
     {
         if( x264_cli_pic_alloc_aligned( &h->buffer, h->dst_csp, h->dst.width, h->dst.height ) )
             return -1;
         h->buffer_allocated = 1;
     }
-    FAIL_IF_ERROR( x264_init_sws_context( h ), "swscale init failed\n" );
+    FAIL_IF_ERROR( init_sws_context( h ), "swscale init failed\n" );
     return 0;
 }
 
@@ -503,6 +509,11 @@
                       h->input_range ? "PC" : "TV", h->dst.range ? "PC" : "TV" );
     h->dst_csp |= info->csp & X264_CSP_VFLIP; // preserve vflip
 
+    if( dst_csp == X264_CSP_I400 &&
+        ((src_csp >= X264_CSP_I420 && src_csp <= X264_CSP_NV16) || src_csp == X264_CSP_I444 || src_csp == X264_CSP_YV24) &&
+        h->dst.width == info->width && h->dst.height == info->height && h->dst.range == h->input_range )
+        h->fast_mono = 1; /* use the input luma plane as is */
+
     /* if the input is not variable, initialize the context */
     if( !h->variable_input )
     {
@@ -535,7 +546,7 @@
     h->working = 1;
     if( h->pre_swap_chroma )
         XCHG( uint8_t*, output->img.plane[1], output->img.plane[2] );
-    if( h->ctx )
+    if( h->ctx && !h->fast_mono )
     {
         sws_scale( h->ctx, (const uint8_t* const*)output->img.plane, output->img.stride,
                    0, output->img.height, h->buffer.img.plane, h->buffer.img.stride );
diff -Nru x264-0.152.2854+gite9a5903/filters/video/select_every.c x264-0.158.2988+git-20191101.7817004/filters/video/select_every.c
--- x264-0.152.2854+gite9a5903/filters/video/select_every.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/filters/video/select_every.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * select_every.c: select-every video filter
  *****************************************************************************
- * Copyright (C) 2010-2017 x264 project
+ * Copyright (C) 2010-2019 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
@@ -24,6 +24,7 @@
  *****************************************************************************/
 
 #include "video.h"
+
 #define NAME "select_every"
 #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, NAME, __VA_ARGS__ )
 
@@ -62,7 +63,7 @@
     h->pattern_len = 0;
     h->step_size = 0;
     int offsets[MAX_PATTERN_SIZE];
-    for( char *tok, *p = opt_string; (tok = strtok( p, "," )); p = NULL )
+    for( char *tok, *p = opt_string, UNUSED *saveptr = NULL; (tok = strtok_r( p, ",", &saveptr )); p = NULL )
     {
         int val = x264_otoi( tok, -1 );
         if( p )
@@ -95,7 +96,9 @@
          if( max_rewind == h->step_size )
              break;
     }
-    if( x264_init_vid_filter( "cache", handle, filter, info, param, (void*)max_rewind ) )
+    char name[20];
+    sprintf( name, "cache_%d", param->i_bitdepth );
+    if( x264_init_vid_filter( name, handle, filter, info, param, (void*)max_rewind ) )
         return -1;
 
     /* done initing, overwrite properties */
diff -Nru x264-0.152.2854+gite9a5903/filters/video/source.c x264-0.158.2988+git-20191101.7817004/filters/video/source.c
--- x264-0.152.2854+gite9a5903/filters/video/source.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/filters/video/source.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * source.c: source video filter
  *****************************************************************************
- * Copyright (C) 2010-2017 x264 project
+ * Copyright (C) 2010-2019 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
diff -Nru x264-0.152.2854+gite9a5903/filters/video/video.c x264-0.158.2988+git-20191101.7817004/filters/video/video.c
--- x264-0.152.2854+gite9a5903/filters/video/video.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/filters/video/video.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * video.c: video filters
  *****************************************************************************
- * Copyright (C) 2010-2017 x264 project
+ * Copyright (C) 2010-2019 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
@@ -46,12 +46,18 @@
 {
     extern cli_vid_filter_t source_filter;
     first_filter = &source_filter;
-    REGISTER_VFILTER( cache );
+#if HAVE_BITDEPTH8
+    REGISTER_VFILTER( cache_8 );
+    REGISTER_VFILTER( depth_8 );
+#endif
+#if HAVE_BITDEPTH10
+    REGISTER_VFILTER( cache_10 );
+    REGISTER_VFILTER( depth_10 );
+#endif
     REGISTER_VFILTER( crop );
     REGISTER_VFILTER( fix_vfr_pts );
     REGISTER_VFILTER( resize );
     REGISTER_VFILTER( select_every );
-    REGISTER_VFILTER( depth );
 #if HAVE_GPL
 #endif
 }
diff -Nru x264-0.152.2854+gite9a5903/filters/video/video.h x264-0.158.2988+git-20191101.7817004/filters/video/video.h
--- x264-0.152.2854+gite9a5903/filters/video/video.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/filters/video/video.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * video.h: video filters
  *****************************************************************************
- * Copyright (C) 2010-2017 x264 project
+ * Copyright (C) 2010-2019 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
diff -Nru x264-0.152.2854+gite9a5903/.gitignore x264-0.158.2988+git-20191101.7817004/.gitignore
--- x264-0.152.2854+gite9a5903/.gitignore	1970-01-01 00:00:00.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/.gitignore	2019-11-09 05:16:29.000000000 +0000
@@ -0,0 +1,50 @@
+*~
+*.a
+*.diff
+*.orig
+*.rej
+*.dll*
+*.exe
+*.def
+*.lib
+*.pdb
+*.mo
+*.o
+*.patch
+*.pc
+*.pot
+*.so*
+*.dylib
+.*.swp
+.depend
+.DS_Store
+TAGS
+config.h
+config.mak
+config.log
+x264_config.h
+x264
+checkasm
+
+*.264
+*.h264
+*.2pass
+*.ffindex
+*.avs
+*.mkv
+*.flv
+*.mp4
+*.y4m
+*.yuv
+*.log
+*.mbtree
+*.temp
+*.pyc
+*.pgd
+*.pgc
+
+.digress_x264
+dataDec.txt
+log.dec
+common/oclobj.h
+x264_lookahead.clbin
diff -Nru x264-0.152.2854+gite9a5903/.gitlab-ci.yml x264-0.158.2988+git-20191101.7817004/.gitlab-ci.yml
--- x264-0.152.2854+gite9a5903/.gitlab-ci.yml	1970-01-01 00:00:00.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/.gitlab-ci.yml	2019-11-09 05:16:29.000000000 +0000
@@ -0,0 +1,200 @@
+stages:
+    - build
+    - test
+    - release
+
+.variables-debian-amd64: &variables-debian-amd64
+    _TRIPLET: ""
+    _PLATFORMSUFFIX: ""
+    _PATH: "debian-x86_64"
+    _WRAPPER: ""
+
+.variables-debian-aarch64: &variables-debian-aarch64
+    _TRIPLET: ""
+    _PLATFORMSUFFIX: ""
+    _PATH: "debian-aarch64"
+    _WRAPPER: ""
+
+.variables-win32: &variables-win32
+    _TRIPLET: "i686-w64-mingw32"
+    _PLATFORMSUFFIX: ".exe"
+    _PATH: "win32"
+    _WRAPPER: "wine"
+
+.variables-win64: &variables-win64
+    _TRIPLET: "x86_64-w64-mingw32"
+    _PLATFORMSUFFIX: ".exe"
+    _PATH: "win64"
+    _WRAPPER: "wine64"
+
+.variables-macos: &variables-macos
+    _TRIPLET: "x86_64-apple-darwin18"
+    _PLATFORMSUFFIX: ""
+    _PATH: "macos-x86_64"
+    _WRAPPER: ""
+
+.build:
+    stage: build
+    script: |
+        set -x
+        curl -L -- https://download.videolan.org/videolan/x264/av.tar.gz > av.tar.gz && tar xfzv av.tar.gz
+        cd av
+        ./bootstrap
+        make -j$(getconf _NPROCESSORS_ONLN)
+        cd ..
+        export PKG_CONFIG_PATH=`/bin/ls -d $PWD/av/*/lib/pkgconfig`
+        ./configure --enable-pic --enable-strip --extra-ldflags="-static"
+        make -j$(getconf _NPROCESSORS_ONLN) x264 checkasm
+    artifacts:
+        name: "$CI_PROJECT_PATH_SLUG-$CI_JOB_NAME-$CI_COMMIT_SHORT_SHA"
+        paths:
+            - x264${_PLATFORMSUFFIX}
+            - checkasm8${_PLATFORMSUFFIX}
+            - checkasm10${_PLATFORMSUFFIX}
+        expire_in: 1 week
+
+build-debian-amd64:
+    extends: .build
+    image: registry.videolan.org/x264-debian-unstable:20190404162201
+    tags:
+        - docker
+        - amd64
+    variables: *variables-debian-amd64
+
+build-debian-aarch64:
+    extends: .build
+    image: registry.videolan.org/x264-debian-unstable-aarch64:20190716192245
+    tags:
+        - docker
+        - aarch64
+    variables: *variables-debian-aarch64
+
+.build-win:
+    extends: build-debian-amd64
+    script: |
+        set -x
+        curl -f -o vlc-contrib-${_TRIPLET}-latest.tar.bz2 https://nightlies.videolan.org/build/${_PATH}/last/vlc-contrib-${_TRIPLET}-`date +%Y%m%d`.tar.bz2 || curl -f -o vlc-contrib-${_TRIPLET}-latest.tar.bz2 https://nightlies.videolan.org/build/${_PATH}/last/vlc-contrib-${_TRIPLET}-`date --date=yesterday +%Y%m%d`.tar.bz2
+        bunzip2 vlc-contrib-${_TRIPLET}-latest.tar.bz2
+        tar xvf vlc-contrib-${_TRIPLET}-latest.tar
+        sed -i "s#@@CONTRIB_PREFIX@@#`pwd`/${_TRIPLET}#g" ${_TRIPLET}/lib/pkgconfig/*.pc
+        export PKG_CONFIG_LIBDIR=`pwd`/${_TRIPLET}/lib/pkgconfig
+        ./configure --host=${_TRIPLET} --cross-prefix=${_TRIPLET}- --enable-pic --enable-strip
+        make -j$(getconf _NPROCESSORS_ONLN) x264 checkasm
+
+build-win32:
+    extends: .build-win
+    variables: *variables-win32
+
+build-win64:
+    extends: .build-win
+    variables: *variables-win64
+
+build-macos:
+    extends: .build
+    tags:
+        - macos
+    script: |
+        set -x
+        curl -O https://nightlies.videolan.org/build/contribs/vlc-contrib-${_TRIPLET}-latest.tar.bz2
+        bunzip2 vlc-contrib-${_TRIPLET}-latest.tar.bz2
+        tar xvf vlc-contrib-${_TRIPLET}-latest.tar
+        sed -i.bak "s#@@CONTRIB_PREFIX@@#`pwd`/${_TRIPLET}#g" ${_TRIPLET}/lib/pkgconfig/*.pc
+        export PKG_CONFIG_LIBDIR=`pwd`/${_TRIPLET}/lib/pkgconfig
+        ./configure --enable-strip
+        make -j$(getconf _NPROCESSORS_ONLN) x264 checkasm
+    variables: *variables-macos
+
+.test: &test
+    stage: test
+    script: |
+        set -x
+        ${_WRAPPER} ./checkasm8${_PLATFORMSUFFIX}
+        ${_WRAPPER} ./checkasm10${_PLATFORMSUFFIX}
+    artifacts:
+        expire_in: 10 minutes
+
+test-debian-amd64:
+    <<: *test
+    extends: build-debian-amd64
+    dependencies:
+        - build-debian-amd64
+    variables: *variables-debian-amd64
+
+test-debian-aarch64:
+    <<: *test
+    extends: build-debian-aarch64
+    dependencies:
+        - build-debian-aarch64
+    variables: *variables-debian-aarch64
+
+test-win32:
+    <<: *test
+    extends: build-win32
+    dependencies:
+        - build-win32
+    variables: *variables-win32
+
+test-win64:
+    <<: *test
+    extends: build-win64
+    dependencies:
+        - build-win64
+    variables: *variables-win64
+
+test-macos:
+    <<: *test
+    extends: build-macos
+    dependencies:
+        - build-macos
+    variables: *variables-macos
+
+.release: &release
+    stage: release
+    script: |
+        set -x
+        _VERSION=$(./version.sh | grep _VERSION -| cut -d\  -f4-| sed 's, ,-,g' | sed 's,",,')
+        mv x264${_PLATFORMSUFFIX} x264-${_VERSION}${_PLATFORMSUFFIX}
+    when: manual
+    only:
+        - master@videolan/x264
+        - stable@videolan/x264
+    artifacts:
+        name: "$CI_PROJECT_PATH_SLUG-$CI_JOB_NAME-$CI_COMMIT_SHORT_SHA"
+        paths:
+            - x264-*${_PLATFORMSUFFIX}
+        expire_in: '10 minutes'
+
+release-debian-amd64:
+    <<: *release
+    extends: build-debian-amd64
+    dependencies:
+        - build-debian-amd64
+    variables: *variables-debian-amd64
+
+release-debian-aarch64:
+    <<: *release
+    extends: build-debian-aarch64
+    dependencies:
+        - build-debian-aarch64
+    variables: *variables-debian-aarch64
+
+release-win32:
+    <<: *release
+    extends: build-win32
+    dependencies:
+        - build-win32
+    variables: *variables-win32
+
+release-win64:
+    <<: *release
+    extends: build-win64
+    dependencies:
+        - build-win64
+    variables: *variables-win64
+
+release-macos:
+    <<: *release
+    extends: build-macos
+    dependencies:
+        - build-macos
+    variables: *variables-macos
diff -Nru x264-0.152.2854+gite9a5903/input/avs.c x264-0.158.2988+git-20191101.7817004/input/avs.c
--- x264-0.152.2854+gite9a5903/input/avs.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/input/avs.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * avs.c: avisynth input
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *          Anton Mitrofanov <BugMaster@narod.ru>
@@ -25,6 +25,7 @@
  *****************************************************************************/
 
 #include "input.h"
+
 #if USE_AVXSYNTH
 #include <dlfcn.h>
 #if SYS_MACOSX
@@ -39,7 +40,6 @@
 #define avs_close FreeLibrary
 #define avs_address GetProcAddress
 #endif
-#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "avs", __VA_ARGS__ )
 
 #define AVSC_NO_DECLSPEC
 #undef EXTERN_C
@@ -50,6 +50,8 @@
 #endif
 #define AVSC_DECLARE_FUNC(name) name##_func name
 
+#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "avs", __VA_ARGS__ )
+
 /* AVS uses a versioned interface to control backwards compatibility */
 /* YV12 support is required, which was added in 2.5 */
 #define AVS_INTERFACE_25 2
@@ -123,7 +125,7 @@
 } avs_hnd_t;
 
 /* load the library and functions we require from it */
-static int x264_avs_load_library( avs_hnd_t *h )
+static int custom_avs_load_library( avs_hnd_t *h )
 {
     h->library = avs_open();
     if( !h->library )
@@ -188,6 +190,8 @@
 #define AVS_IS_YUV420P16( vi ) (0)
 #define AVS_IS_YUV422P16( vi ) (0)
 #define AVS_IS_YUV444P16( vi ) (0)
+#define AVS_IS_Y( vi ) (0)
+#define AVS_IS_Y16( vi ) (0)
 #else
 #define AVS_IS_AVISYNTHPLUS (h->func.avs_is_420 && h->func.avs_is_422 && h->func.avs_is_444)
 #define AVS_IS_420( vi ) (h->func.avs_is_420 ? h->func.avs_is_420( vi ) : avs_is_yv12( vi ))
@@ -198,6 +202,8 @@
 #define AVS_IS_YUV420P16( vi ) (h->func.avs_is_yuv420p16 && h->func.avs_is_yuv420p16( vi ))
 #define AVS_IS_YUV422P16( vi ) (h->func.avs_is_yuv422p16 && h->func.avs_is_yuv422p16( vi ))
 #define AVS_IS_YUV444P16( vi ) (h->func.avs_is_yuv444p16 && h->func.avs_is_yuv444p16( vi ))
+#define AVS_IS_Y( vi ) (h->func.avs_is_y ? h->func.avs_is_y( vi ) : avs_is_y8( vi ))
+#define AVS_IS_Y16( vi ) (h->func.avs_is_y16 && h->func.avs_is_y16( vi ))
 #endif
 
 /* generate a filter sequence to try based on the filename extension */
@@ -259,7 +265,7 @@
     avs_hnd_t *h = calloc( 1, sizeof(avs_hnd_t) );
     if( !h )
         return -1;
-    FAIL_IF_ERROR( x264_avs_load_library( h ), "failed to load avisynth\n" );
+    FAIL_IF_ERROR( custom_avs_load_library( h ), "failed to load avisynth\n" );
     h->env = h->func.avs_create_script_environment( AVS_INTERFACE_25 );
     if( h->func.avs_get_error )
     {
@@ -286,7 +292,7 @@
     if( !strcasecmp( filename_ext, "avs" ) )
     {
         res = h->func.avs_invoke( h->env, "Import", arg, NULL );
-        FAIL_IF_ERROR( avs_is_error( res ), "%s\n", avs_as_string( res ) );
+        FAIL_IF_ERROR( avs_is_error( res ), "%s\n", avs_as_error( res ) );
         /* check if the user is using a multi-threaded script and apply distributor if necessary.
            adapted from avisynth's vfw interface */
         AVS_Value mt_test = h->func.avs_invoke( h->env, "GetMTMode", avs_new_value_bool( 0 ), NULL );
@@ -337,16 +343,17 @@
     {
         x264_cli_log( "avs", X264_LOG_WARNING, "detected fieldbased (separated) input, weaving to frames\n" );
         AVS_Value tmp = h->func.avs_invoke( h->env, "Weave", res, NULL );
-        FAIL_IF_ERROR( avs_is_error( tmp ), "couldn't weave fields into frames\n" );
+        FAIL_IF_ERROR( avs_is_error( tmp ), "couldn't weave fields into frames: %s\n", avs_as_error( tmp ) );
         res = update_clip( h, &vi, tmp, res );
         info->interlaced = 1;
         info->tff = avs_is_tff( vi );
     }
 #if !HAVE_SWSCALE
     /* if swscale is not available, convert the CSP if necessary */
-    FAIL_IF_ERROR( avs_version < 2.6f && (opt->output_csp == X264_CSP_I422 || opt->output_csp == X264_CSP_I444),
-                   "avisynth >= 2.6 is required for i422/i444 output\n" );
-    if( (opt->output_csp == X264_CSP_I420 && !AVS_IS_420( vi )) ||
+    FAIL_IF_ERROR( avs_version < 2.6f && (opt->output_csp == X264_CSP_I400 || opt->output_csp == X264_CSP_I422 || opt->output_csp == X264_CSP_I444),
+                   "avisynth >= 2.6 is required for i400/i422/i444 output\n" );
+    if( (opt->output_csp == X264_CSP_I400 && !AVS_IS_Y( vi )) ||
+        (opt->output_csp == X264_CSP_I420 && !AVS_IS_420( vi )) ||
         (opt->output_csp == X264_CSP_I422 && !AVS_IS_422( vi )) ||
         (opt->output_csp == X264_CSP_I444 && !AVS_IS_444( vi )) ||
         (opt->output_csp == X264_CSP_RGB && !avs_is_rgb( vi )) )
@@ -354,46 +361,58 @@
         const char *csp;
         if( AVS_IS_AVISYNTHPLUS )
         {
-            csp = opt->output_csp == X264_CSP_I420 ? "YUV420" :
+            csp = opt->output_csp == X264_CSP_I400 ? "Y" :
+                  opt->output_csp == X264_CSP_I420 ? "YUV420" :
                   opt->output_csp == X264_CSP_I422 ? "YUV422" :
                   opt->output_csp == X264_CSP_I444 ? "YUV444" :
                   "RGB";
         }
         else
         {
-            csp = opt->output_csp == X264_CSP_I420 ? "YV12" :
+            csp = opt->output_csp == X264_CSP_I400 ? "Y8" :
+                  opt->output_csp == X264_CSP_I420 ? "YV12" :
                   opt->output_csp == X264_CSP_I422 ? "YV16" :
                   opt->output_csp == X264_CSP_I444 ? "YV24" :
                   "RGB";
         }
         x264_cli_log( "avs", X264_LOG_WARNING, "converting input clip to %s\n", csp );
-        FAIL_IF_ERROR( opt->output_csp < X264_CSP_I444 && (vi->width&1),
-                       "input clip width not divisible by 2 (%dx%d)\n", vi->width, vi->height );
-        FAIL_IF_ERROR( opt->output_csp == X264_CSP_I420 && info->interlaced && (vi->height&3),
-                       "input clip height not divisible by 4 (%dx%d)\n", vi->width, vi->height );
-        FAIL_IF_ERROR( (opt->output_csp == X264_CSP_I420 || info->interlaced) && (vi->height&1),
-                       "input clip height not divisible by 2 (%dx%d)\n", vi->width, vi->height );
+        if( opt->output_csp != X264_CSP_I400 )
+        {
+            FAIL_IF_ERROR( opt->output_csp < X264_CSP_I444 && (vi->width&1),
+                           "input clip width not divisible by 2 (%dx%d)\n", vi->width, vi->height );
+            FAIL_IF_ERROR( opt->output_csp == X264_CSP_I420 && info->interlaced && (vi->height&3),
+                           "input clip height not divisible by 4 (%dx%d)\n", vi->width, vi->height );
+            FAIL_IF_ERROR( (opt->output_csp == X264_CSP_I420 || info->interlaced) && (vi->height&1),
+                           "input clip height not divisible by 2 (%dx%d)\n", vi->width, vi->height );
+        }
         char conv_func[16];
         snprintf( conv_func, sizeof(conv_func), "ConvertTo%s", csp );
-        char matrix[7] = "";
-        int arg_count = 2;
+        AVS_Value arg_arr[3];
+        const char *arg_name[3];
+        int arg_count = 1;
+        arg_arr[0] = res;
+        arg_name[0] = NULL;
+        if( opt->output_csp != X264_CSP_I400 )
+        {
+            arg_arr[arg_count] = avs_new_value_bool( info->interlaced );
+            arg_name[arg_count] = "interlaced";
+            arg_count++;
+        }
         /* if doing a rgb <-> yuv conversion then range is handled via 'matrix'. though it's only supported in 2.56+ */
+        char matrix[7];
         if( avs_version >= 2.56f && ((opt->output_csp == X264_CSP_RGB && avs_is_yuv( vi )) || (opt->output_csp != X264_CSP_RGB && avs_is_rgb( vi ))) )
         {
             // if converting from yuv, then we specify the matrix for the input, otherwise use the output's.
             int use_pc_matrix = avs_is_yuv( vi ) ? opt->input_range == RANGE_PC : opt->output_range == RANGE_PC;
             snprintf( matrix, sizeof(matrix), "%s601", use_pc_matrix ? "PC." : "Rec" ); /* FIXME: use correct coefficients */
+            arg_arr[arg_count] = avs_new_value_string( matrix );
+            arg_name[arg_count] = "matrix";
             arg_count++;
             // notification that the input range has changed to the desired one
             opt->input_range = opt->output_range;
         }
-        const char *arg_name[] = { NULL, "interlaced", "matrix" };
-        AVS_Value arg_arr[3];
-        arg_arr[0] = res;
-        arg_arr[1] = avs_new_value_bool( info->interlaced );
-        arg_arr[2] = avs_new_value_string( matrix );
         AVS_Value res2 = h->func.avs_invoke( h->env, conv_func, avs_new_value_array( arg_arr, arg_count ), arg_name );
-        FAIL_IF_ERROR( avs_is_error( res2 ), "couldn't convert input clip to %s\n", csp );
+        FAIL_IF_ERROR( avs_is_error( res2 ), "couldn't convert input clip to %s: %s\n", csp, avs_as_error( res2 ) );
         res = update_clip( h, &vi, res2, res );
     }
     /* if swscale is not available, change the range if necessary. This only applies to YUV-based CSPs however */
@@ -441,13 +460,15 @@
         info->csp = X264_CSP_I420 | X264_CSP_HIGH_DEPTH;
     else if( avs_is_yv12( vi ) )
         info->csp = X264_CSP_I420;
-#if HAVE_SWSCALE
+    else if( AVS_IS_Y16( vi ) )
+        info->csp = X264_CSP_I400 | X264_CSP_HIGH_DEPTH;
+    else if( avs_is_y8( vi ) )
+        info->csp = X264_CSP_I400;
     else if( avs_is_yuy2( vi ) )
-        info->csp = AV_PIX_FMT_YUYV422 | X264_CSP_OTHER;
+        info->csp = X264_CSP_YUYV;
+#if HAVE_SWSCALE
     else if( avs_is_yv411( vi ) )
         info->csp = AV_PIX_FMT_YUV411P | X264_CSP_OTHER;
-    else if( avs_is_y8( vi ) )
-        info->csp = AV_PIX_FMT_GRAY8 | X264_CSP_OTHER;
 #endif
     else
     {
diff -Nru x264-0.152.2854+gite9a5903/input/ffms.c x264-0.158.2988+git-20191101.7817004/input/ffms.c
--- x264-0.152.2854+gite9a5903/input/ffms.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/input/ffms.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * ffms.c: ffmpegsource input
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: Mike Gurlitz <mike.gurlitz@gmail.com>
  *          Steven Walters <kemuri9@gmail.com>
@@ -27,12 +27,13 @@
 
 #include "input.h"
 #include <ffms.h>
-#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "ffms", __VA_ARGS__ )
 
 #undef DECLARE_ALIGNED
 #include <libavcodec/avcodec.h>
 #include <libswscale/swscale.h>
 
+#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "ffms", __VA_ARGS__ )
+
 #define PROGRESS_LENGTH 36
 
 typedef struct
diff -Nru x264-0.152.2854+gite9a5903/input/input.c x264-0.158.2988+git-20191101.7817004/input/input.c
--- x264-0.152.2854+gite9a5903/input/input.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/input/input.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * input.c: common input functions
  *****************************************************************************
- * Copyright (C) 2010-2017 x264 project
+ * Copyright (C) 2010-2019 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *          Henrik Gramner <henrik@gramner.com>
@@ -34,6 +34,7 @@
 #endif
 
 const x264_cli_csp_t x264_cli_csps[] = {
+    [X264_CSP_I400] = { "i400", 1, { 1 },         { 1 },         1, 1 },
     [X264_CSP_I420] = { "i420", 3, { 1, .5, .5 }, { 1, .5, .5 }, 2, 2 },
     [X264_CSP_I422] = { "i422", 3, { 1, .5, .5 }, { 1,  1,  1 }, 2, 1 },
     [X264_CSP_I444] = { "i444", 3, { 1,  1,  1 }, { 1,  1,  1 }, 1, 1 },
@@ -86,7 +87,7 @@
     return size;
 }
 
-static int x264_cli_pic_init_internal( cli_pic_t *pic, int csp, int width, int height, int align, int alloc )
+static int cli_pic_init_internal( cli_pic_t *pic, int csp, int width, int height, int align, int alloc )
 {
     memset( pic, 0, sizeof(cli_pic_t) );
     int csp_mask = csp & X264_CSP_MASK;
@@ -118,17 +119,17 @@
 
 int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height )
 {
-    return x264_cli_pic_init_internal( pic, csp, width, height, 1, 1 );
+    return cli_pic_init_internal( pic, csp, width, height, 1, 1 );
 }
 
 int x264_cli_pic_alloc_aligned( cli_pic_t *pic, int csp, int width, int height )
 {
-    return x264_cli_pic_init_internal( pic, csp, width, height, NATIVE_ALIGN, 1 );
+    return cli_pic_init_internal( pic, csp, width, height, NATIVE_ALIGN, 1 );
 }
 
 int x264_cli_pic_init_noalloc( cli_pic_t *pic, int csp, int width, int height )
 {
-    return x264_cli_pic_init_internal( pic, csp, width, height, 1, 0 );
+    return cli_pic_init_internal( pic, csp, width, height, 1, 0 );
 }
 
 void x264_cli_pic_clean( cli_pic_t *pic )
@@ -148,35 +149,71 @@
 /* Functions for handling memory-mapped input frames */
 int x264_cli_mmap_init( cli_mmap_t *h, FILE *fh )
 {
-#ifdef _WIN32
-    HANDLE osfhandle = (HANDLE)_get_osfhandle( _fileno( fh ) );
-    if( osfhandle != INVALID_HANDLE_VALUE )
+#if defined(_WIN32) || HAVE_MMAP
+    int fd = fileno( fh );
+    x264_struct_stat file_stat;
+    if( !x264_fstat( fd, &file_stat ) )
     {
-        SYSTEM_INFO si;
-        GetSystemInfo( &si );
-        h->align_mask = si.dwAllocationGranularity - 1;
-        h->prefetch_virtual_memory = (void*)GetProcAddress( GetModuleHandleW( L"kernel32.dll" ), "PrefetchVirtualMemory" );
-        h->process_handle = GetCurrentProcess();
-        h->map_handle = CreateFileMappingW( osfhandle, NULL, PAGE_READONLY, 0, 0, NULL );
-        return !h->map_handle;
-    }
+        h->file_size = file_stat.st_size;
+#ifdef _WIN32
+        HANDLE osfhandle = (HANDLE)_get_osfhandle( fd );
+        if( osfhandle != INVALID_HANDLE_VALUE )
+        {
+            SYSTEM_INFO si;
+            GetSystemInfo( &si );
+            h->page_mask = si.dwPageSize - 1;
+            h->align_mask = si.dwAllocationGranularity - 1;
+            h->prefetch_virtual_memory = (void*)GetProcAddress( GetModuleHandleW( L"kernel32.dll" ), "PrefetchVirtualMemory" );
+            h->process_handle = GetCurrentProcess();
+            h->map_handle = CreateFileMappingW( osfhandle, NULL, PAGE_READONLY, 0, 0, NULL );
+            return !h->map_handle;
+        }
 #elif HAVE_MMAP && defined(_SC_PAGESIZE)
-    h->align_mask = sysconf( _SC_PAGESIZE ) - 1;
-    h->fd = fileno( fh );
-    return h->align_mask < 0 || h->fd < 0;
+        h->align_mask = sysconf( _SC_PAGESIZE ) - 1;
+        h->fd = fd;
+        return h->align_mask < 0 || fd < 0;
+#endif
+    }
 #endif
     return -1;
 }
 
+/* Third-party filters such as swscale can overread the input buffer which may result
+ * in segfaults. We have to pad the buffer size as a workaround to avoid that. */
+#define MMAP_PADDING 64
+
 void *x264_cli_mmap( cli_mmap_t *h, int64_t offset, size_t size )
 {
 #if defined(_WIN32) || HAVE_MMAP
+    uint8_t *base;
     int align = offset & h->align_mask;
     offset -= align;
     size   += align;
 #ifdef _WIN32
-    uint8_t *base = MapViewOfFile( h->map_handle, FILE_MAP_READ, offset >> 32, offset, size );
-    if( base )
+    /* If the padding crosses a page boundary we need to increase the mapping size. */
+    size_t padded_size = (-size & h->page_mask) < MMAP_PADDING ? size + MMAP_PADDING : size;
+    if( offset + padded_size > h->file_size )
+    {
+        /* It's not possible to do the POSIX mmap() remapping trick on Windows, so if the padding crosses a
+         * page boundary past the end of the file we have to copy the entire frame into a padded buffer. */
+        if( (base = MapViewOfFile( h->map_handle, FILE_MAP_READ, offset >> 32, offset, size )) )
+        {
+            uint8_t *buf = NULL;
+            HANDLE anon_map = CreateFileMappingW( INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, padded_size, NULL );
+            if( anon_map )
+            {
+                if( (buf = MapViewOfFile( anon_map, FILE_MAP_WRITE, 0, 0, 0 )) )
+                {
+                    buf += align;
+                    memcpy( buf, base + align, size - align );
+                }
+                CloseHandle( anon_map );
+            }
+            UnmapViewOfFile( base );
+            return buf;
+        }
+    }
+    else if( (base = MapViewOfFile( h->map_handle, FILE_MAP_READ, offset >> 32, offset, padded_size )) )
     {
         /* PrefetchVirtualMemory() is only available on Windows 8 and newer. */
         if( h->prefetch_virtual_memory )
@@ -187,8 +224,8 @@
         return base + align;
     }
 #else
-    uint8_t *base = mmap( NULL, size, PROT_READ, MAP_PRIVATE, h->fd, offset );
-    if( base != MAP_FAILED )
+    size_t padded_size = size + MMAP_PADDING;
+    if( (base = mmap( NULL, padded_size, PROT_READ, MAP_PRIVATE, h->fd, offset )) != MAP_FAILED )
     {
         /* Ask the OS to readahead pages. This improves performance whereas
          * forcing page faults by manually accessing every page does not.
@@ -199,6 +236,12 @@
 #elif defined(POSIX_MADV_WILLNEED)
         posix_madvise( base, size, POSIX_MADV_WILLNEED );
 #endif
+        /* Remap the file mapping of any padding that crosses a page boundary past the end of
+         * the file into a copy of the last valid page to prevent reads from invalid memory. */
+        size_t aligned_size = (padded_size - 1) & ~h->align_mask;
+        if( offset + aligned_size >= h->file_size )
+            mmap( base + aligned_size, padded_size - aligned_size, PROT_READ, MAP_PRIVATE|MAP_FIXED, h->fd, (offset + size - 1) & ~h->align_mask );
+
         return base + align;
     }
 #endif
@@ -213,7 +256,7 @@
 #ifdef _WIN32
     return !UnmapViewOfFile( base );
 #else
-    return munmap( base, size + (intptr_t)addr - (intptr_t)base );
+    return munmap( base, size + MMAP_PADDING + (intptr_t)addr - (intptr_t)base );
 #endif
 #endif
     return -1;
diff -Nru x264-0.152.2854+gite9a5903/input/input.h x264-0.158.2988+git-20191101.7817004/input/input.h
--- x264-0.152.2854+gite9a5903/input/input.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/input/input.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * input.h: file input
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -102,7 +102,8 @@
 extern const cli_input_t raw_input;
 extern const cli_input_t y4m_input;
 extern const cli_input_t avs_input;
-extern const cli_input_t thread_input;
+extern const cli_input_t thread_8_input;
+extern const cli_input_t thread_10_input;
 extern const cli_input_t lavf_input;
 extern const cli_input_t ffms_input;
 extern const cli_input_t timecode_input;
@@ -137,8 +138,10 @@
 
 typedef struct
 {
+    int64_t file_size;
     int align_mask;
 #ifdef _WIN32
+    int page_mask;
     BOOL (WINAPI *prefetch_virtual_memory)( HANDLE, ULONG_PTR, PVOID, ULONG );
     HANDLE process_handle;
     HANDLE map_handle;
diff -Nru x264-0.152.2854+gite9a5903/input/lavf.c x264-0.158.2988+git-20191101.7817004/input/lavf.c
--- x264-0.152.2854+gite9a5903/input/lavf.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/input/lavf.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * lavf.c: libavformat input
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: Mike Gurlitz <mike.gurlitz@gmail.com>
  *          Steven Walters <kemuri9@gmail.com>
@@ -25,16 +25,20 @@
  *****************************************************************************/
 
 #include "input.h"
-#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "lavf", __VA_ARGS__ )
+
 #undef DECLARE_ALIGNED
 #include <libavformat/avformat.h>
+#include <libavutil/dict.h>
+#include <libavutil/error.h>
 #include <libavutil/mem.h>
 #include <libavutil/pixdesc.h>
-#include <libavutil/dict.h>
+
+#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "lavf", __VA_ARGS__ )
 
 typedef struct
 {
     AVFormatContext *lavf;
+    AVCodecContext *lavc;
     AVFrame *frame;
     int stream_id;
     int next_frame;
@@ -54,6 +58,25 @@
     }
 }
 
+static AVCodecContext *codec_from_stream( AVStream *stream )
+{
+    AVCodec *codec = avcodec_find_decoder( stream->codecpar->codec_id );
+    if( !codec )
+        return NULL;
+
+    AVCodecContext *c = avcodec_alloc_context3( codec );
+    if( !c )
+        return NULL;
+
+    if( avcodec_parameters_to_context( c, stream->codecpar ) < 0 )
+    {
+        avcodec_free_context( &c );
+        return NULL;
+    }
+
+    return c;
+}
+
 static int read_frame_internal( cli_pic_t *p_pic, lavf_hnd_t *h, int i_frame, video_info_t *info )
 {
     if( h->first_pic && !info )
@@ -72,8 +95,6 @@
             return 0;
     }
 
-    AVCodecContext *c = h->lavf->streams[h->stream_id]->codec;
-
     AVPacket pkt;
     av_init_packet( &pkt );
     pkt.data = NULL;
@@ -81,31 +102,32 @@
 
     while( i_frame >= h->next_frame )
     {
-        int finished = 0;
-        int ret = 0;
-        do
-        {
-            ret = av_read_frame( h->lavf, &pkt );
+        int ret;
 
-            if( ret < 0 )
+        while( (ret = avcodec_receive_frame( h->lavc, h->frame )) )
+        {
+            if( ret == AVERROR(EAGAIN) )
             {
-                av_init_packet( &pkt );
-                pkt.data = NULL;
-                pkt.size = 0;
+                while( !(ret = av_read_frame( h->lavf, &pkt )) && pkt.stream_index != h->stream_id )
+                    av_packet_unref( &pkt );
+
+                if( ret )
+                    ret = avcodec_send_packet( h->lavc, NULL );
+                else
+                {
+                    ret = avcodec_send_packet( h->lavc, &pkt );
+                    av_packet_unref( &pkt );
+                }
             }
+            else if( ret == AVERROR_EOF )
+                return -1;
 
-            if( ret < 0 || pkt.stream_index == h->stream_id )
+            if( ret )
             {
-                if( avcodec_decode_video2( c, h->frame, &finished, &pkt ) < 0 )
-                    x264_cli_log( "lavf", X264_LOG_WARNING, "video decoding failed on frame %d\n", h->next_frame );
+                x264_cli_log( "lavf", X264_LOG_WARNING, "video decoding failed on frame %d\n", h->next_frame );
+                return -1;
             }
-
-            if( ret >= 0 )
-                av_free_packet( &pkt );
-        } while( !finished && ret >= 0 );
-
-        if( !finished )
-            return -1;
+        }
 
         h->next_frame++;
     }
@@ -113,9 +135,9 @@
     memcpy( p_pic->img.stride, h->frame->linesize, sizeof(p_pic->img.stride) );
     memcpy( p_pic->img.plane, h->frame->data, sizeof(p_pic->img.plane) );
     int is_fullrange   = 0;
-    p_pic->img.width   = c->width;
-    p_pic->img.height  = c->height;
-    p_pic->img.csp     = handle_jpeg( c->pix_fmt, &is_fullrange ) | X264_CSP_OTHER;
+    p_pic->img.width   = h->lavc->width;
+    p_pic->img.height  = h->lavc->height;
+    p_pic->img.csp     = handle_jpeg( h->lavc->pix_fmt, &is_fullrange ) | X264_CSP_OTHER;
 
     if( info )
     {
@@ -127,8 +149,8 @@
     if( h->vfr_input )
     {
         p_pic->pts = p_pic->duration = 0;
-        if( h->frame->pkt_pts != AV_NOPTS_VALUE )
-            p_pic->pts = h->frame->pkt_pts;
+        if( h->frame->pts != AV_NOPTS_VALUE )
+            p_pic->pts = h->frame->pts;
         else if( h->frame->pkt_dts != AV_NOPTS_VALUE )
             p_pic->pts = h->frame->pkt_dts; // for AVI files
         else if( info )
@@ -174,12 +196,15 @@
     FAIL_IF_ERROR( avformat_find_stream_info( h->lavf, NULL ) < 0, "could not find input stream info\n" );
 
     int i = 0;
-    while( i < h->lavf->nb_streams && h->lavf->streams[i]->codec->codec_type != AVMEDIA_TYPE_VIDEO )
+    while( i < h->lavf->nb_streams && h->lavf->streams[i]->codecpar->codec_type != AVMEDIA_TYPE_VIDEO )
         i++;
     FAIL_IF_ERROR( i == h->lavf->nb_streams, "could not find video stream\n" );
     h->stream_id       = i;
     h->next_frame      = 0;
-    AVCodecContext *c  = h->lavf->streams[i]->codec;
+    h->lavc            = codec_from_stream( h->lavf->streams[i] );
+    if( !h->lavc )
+        return -1;
+
     info->fps_num      = h->lavf->streams[i]->avg_frame_rate.num;
     info->fps_den      = h->lavf->streams[i]->avg_frame_rate.den;
     info->timebase_num = h->lavf->streams[i]->time_base.num;
@@ -187,7 +212,7 @@
     /* lavf is thread unsafe as calling av_read_frame invalidates previously read AVPackets */
     info->thread_safe  = 0;
     h->vfr_input       = info->vfr;
-    FAIL_IF_ERROR( avcodec_open2( c, avcodec_find_decoder( c->codec_id ), NULL ),
+    FAIL_IF_ERROR( avcodec_open2( h->lavc, avcodec_find_decoder( h->lavc->codec_id ), NULL ),
                    "could not find decoder for video stream\n" );
 
     /* prefetch the first frame and set/confirm flags */
@@ -197,17 +222,17 @@
     if( read_frame_internal( h->first_pic, h, 0, info ) )
         return -1;
 
-    info->width      = c->width;
-    info->height     = c->height;
+    info->width      = h->lavc->width;
+    info->height     = h->lavc->height;
     info->csp        = h->first_pic->img.csp;
     info->num_frames = h->lavf->streams[i]->nb_frames;
-    info->sar_height = c->sample_aspect_ratio.den;
-    info->sar_width  = c->sample_aspect_ratio.num;
-    info->fullrange |= c->color_range == AVCOL_RANGE_JPEG;
+    info->sar_height = h->lavc->sample_aspect_ratio.den;
+    info->sar_width  = h->lavc->sample_aspect_ratio.num;
+    info->fullrange |= h->lavc->color_range == AVCOL_RANGE_JPEG;
 
     /* avisynth stores rgb data vertically flipped. */
     if( !strcasecmp( get_filename_extension( psz_filename ), "avs" ) &&
-        (c->pix_fmt == AV_PIX_FMT_BGRA || c->pix_fmt == AV_PIX_FMT_BGR24) )
+        (h->lavc->pix_fmt == AV_PIX_FMT_BGRA || h->lavc->pix_fmt == AV_PIX_FMT_BGR24) )
         info->csp |= X264_CSP_VFLIP;
 
     *p_handle = h;
@@ -237,7 +262,7 @@
 static int close_file( hnd_t handle )
 {
     lavf_hnd_t *h = handle;
-    avcodec_close( h->lavf->streams[h->stream_id]->codec );
+    avcodec_free_context( &h->lavc );
     avformat_close_input( &h->lavf );
     av_frame_free( &h->frame );
     free( h );
diff -Nru x264-0.152.2854+gite9a5903/input/raw.c x264-0.158.2988+git-20191101.7817004/input/raw.c
--- x264-0.152.2854+gite9a5903/input/raw.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/input/raw.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * raw.c: raw input
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -26,6 +26,7 @@
  *****************************************************************************/
 
 #include "input.h"
+
 #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "raw", __VA_ARGS__ )
 
 typedef struct
diff -Nru x264-0.152.2854+gite9a5903/input/thread.c x264-0.158.2988+git-20191101.7817004/input/thread.c
--- x264-0.152.2854+gite9a5903/input/thread.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/input/thread.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * thread.c: threaded input
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -25,6 +25,9 @@
  *****************************************************************************/
 
 #include "input.h"
+#include "common/common.h"
+
+#define thread_input x264_glue3(thread, BIT_DEPTH, input)
 
 typedef struct
 {
diff -Nru x264-0.152.2854+gite9a5903/input/timecode.c x264-0.158.2988+git-20191101.7817004/input/timecode.c
--- x264-0.152.2854+gite9a5903/input/timecode.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/input/timecode.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * timecode.c: timecode file input
  *****************************************************************************
- * Copyright (C) 2010-2017 x264 project
+ * Copyright (C) 2010-2019 x264 project
  *
  * Authors: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
  *
@@ -24,6 +24,7 @@
  *****************************************************************************/
 
 #include "input.h"
+
 #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "timecode", __VA_ARGS__ )
 
 typedef struct
diff -Nru x264-0.152.2854+gite9a5903/input/y4m.c x264-0.158.2988+git-20191101.7817004/input/y4m.c
--- x264-0.152.2854+gite9a5903/input/y4m.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/input/y4m.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * y4m.c: y4m input
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -25,6 +25,7 @@
  *****************************************************************************/
 
 #include "input.h"
+
 #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "y4m", __VA_ARGS__ )
 
 typedef struct
@@ -47,10 +48,12 @@
 
 static int parse_csp_and_depth( char *csp_name, int *bit_depth )
 {
-    int csp    = X264_CSP_MAX;
+    int csp = X264_CSP_MAX;
 
     /* Set colorspace from known variants */
-    if( !strncmp( "420", csp_name, 3 ) )
+    if( !strncmp( "mono", csp_name, 4 ) )
+        csp = X264_CSP_I400;
+    else if( !strncmp( "420", csp_name, 3 ) )
         csp = X264_CSP_I420;
     else if( !strncmp( "422", csp_name, 3 ) )
         csp = X264_CSP_I422;
@@ -58,7 +61,8 @@
         csp = X264_CSP_I444;
 
     /* Set high bit depth from known extensions */
-    if( sscanf( csp_name, "%*d%*[pP]%d", bit_depth ) != 1 )
+    if( sscanf( csp_name, "mono%d", bit_depth ) != 1 &&
+        sscanf( csp_name, "%*d%*[pP]%d", bit_depth ) != 1 )
         *bit_depth = 8;
 
     return csp;
diff -Nru x264-0.152.2854+gite9a5903/Makefile x264-0.158.2988+git-20191101.7817004/Makefile
--- x264-0.152.2854+gite9a5903/Makefile	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/Makefile	2019-11-09 05:16:29.000000000 +0000
@@ -8,51 +8,62 @@
 vpath %.asm $(SRCPATH)
 vpath %.rc $(SRCPATH)
 
+CFLAGS += $(CFLAGSPROF)
+LDFLAGS += $(LDFLAGSPROF)
+
 GENERATED =
 
 all: default
 default:
 
-SRCS = common/mc.c common/predict.c common/pixel.c common/macroblock.c \
-       common/frame.c common/dct.c common/cpu.c common/cabac.c \
-       common/common.c common/osdep.c common/rectangle.c \
-       common/set.c common/quant.c common/deblock.c common/vlc.c \
-       common/mvpred.c common/bitstream.c \
-       encoder/analyse.c encoder/me.c encoder/ratecontrol.c \
-       encoder/set.c encoder/macroblock.c encoder/cabac.c \
-       encoder/cavlc.c encoder/encoder.c encoder/lookahead.c
+SRCS = common/osdep.c common/base.c common/cpu.c common/tables.c \
+       encoder/api.c
+
+SRCS_X = common/mc.c common/predict.c common/pixel.c common/macroblock.c \
+         common/frame.c common/dct.c common/cabac.c \
+         common/common.c common/rectangle.c \
+         common/set.c common/quant.c common/deblock.c common/vlc.c \
+         common/mvpred.c common/bitstream.c \
+         encoder/analyse.c encoder/me.c encoder/ratecontrol.c \
+         encoder/set.c encoder/macroblock.c encoder/cabac.c \
+         encoder/cavlc.c encoder/encoder.c encoder/lookahead.c
 
-SRCCLI = x264.c input/input.c input/timecode.c input/raw.c input/y4m.c \
-         output/raw.c output/matroska.c output/matroska_ebml.c \
+SRCS_8 =
+
+SRCCLI = x264.c autocomplete.c input/input.c input/timecode.c input/raw.c \
+         input/y4m.c output/raw.c output/matroska.c output/matroska_ebml.c \
          output/flv.c output/flv_bytestream.c filters/filters.c \
          filters/video/video.c filters/video/source.c filters/video/internal.c \
-         filters/video/resize.c filters/video/cache.c filters/video/fix_vfr_pts.c \
-         filters/video/select_every.c filters/video/crop.c filters/video/depth.c
+         filters/video/resize.c filters/video/fix_vfr_pts.c \
+         filters/video/select_every.c filters/video/crop.c
+
+SRCCLI_X = filters/video/cache.c filters/video/depth.c
 
 SRCSO =
+
+SRCCHK_X = tools/checkasm.c
+
+SRCEXAMPLE = example.c
+
 OBJS =
+OBJASM =
 OBJSO =
 OBJCLI =
-
-OBJCHK = tools/checkasm.o
-
-OBJEXAMPLE = example.o
+OBJCHK =
+OBJCHK_8 =
+OBJCHK_10 =
+OBJEXAMPLE =
 
 CONFIG := $(shell cat config.h)
 
-# GPL-only files
-ifneq ($(findstring HAVE_GPL 1, $(CONFIG)),)
-SRCCLI +=
-endif
-
 # Optional module sources
 ifneq ($(findstring HAVE_AVS 1, $(CONFIG)),)
 SRCCLI += input/avs.c
 endif
 
 ifneq ($(findstring HAVE_THREAD 1, $(CONFIG)),)
-SRCCLI += input/thread.c
-SRCS   += common/threadpool.c
+SRCS_X   += common/threadpool.c
+SRCCLI_X += input/thread.c
 endif
 
 ifneq ($(findstring HAVE_WIN32THREAD 1, $(CONFIG)),)
@@ -75,83 +86,116 @@
 SRCCLI += output/mp4_lsmash.c
 endif
 
-# MMX/SSE optims
 ifneq ($(AS),)
-X86SRC0 = const-a.asm cabac-a.asm dct-a.asm deblock-a.asm mc-a.asm \
-          mc-a2.asm pixel-a.asm predict-a.asm quant-a.asm \
-          cpu-a.asm dct-32.asm bitstream-a.asm
-ifneq ($(findstring HIGH_BIT_DEPTH, $(CONFIG)),)
-X86SRC0 += sad16-a.asm
-else
-X86SRC0 += sad-a.asm
-endif
-X86SRC = $(X86SRC0:%=common/x86/%)
 
+# MMX/SSE optims
+SRCASM_X =
 ifeq ($(SYS_ARCH),X86)
 ARCH_X86 = yes
-ASMSRC   = $(X86SRC) common/x86/pixel-32.asm
+SRCASM_X += common/x86/dct-32.asm \
+            common/x86/pixel-32.asm
 endif
-
 ifeq ($(SYS_ARCH),X86_64)
 ARCH_X86 = yes
-ASMSRC   = $(X86SRC:-32.asm=-64.asm) common/x86/trellis-64.asm
+SRCASM_X += common/x86/dct-64.asm \
+            common/x86/trellis-64.asm
 endif
 
 ifdef ARCH_X86
-SRCS   += common/x86/mc-c.c common/x86/predict-c.c
-OBJASM  = $(ASMSRC:%.asm=%.o)
-$(OBJASM): common/x86/x86inc.asm common/x86/x86util.asm
-OBJCHK += tools/checkasm-a.o
+SRCASM_X += common/x86/bitstream-a.asm \
+            common/x86/const-a.asm \
+            common/x86/cabac-a.asm \
+            common/x86/dct-a.asm \
+            common/x86/deblock-a.asm \
+            common/x86/mc-a.asm \
+            common/x86/mc-a2.asm \
+            common/x86/pixel-a.asm \
+            common/x86/predict-a.asm \
+            common/x86/quant-a.asm
+SRCS_X   += common/x86/mc-c.c \
+            common/x86/predict-c.c
+
+OBJASM += common/x86/cpu-a.o
+ifneq ($(findstring HAVE_BITDEPTH8 1, $(CONFIG)),)
+OBJASM += $(SRCASM_X:%.asm=%-8.o) common/x86/sad-a-8.o
 endif
+ifneq ($(findstring HAVE_BITDEPTH10 1, $(CONFIG)),)
+OBJASM += $(SRCASM_X:%.asm=%-10.o) common/x86/sad16-a-10.o
+endif
+
+OBJCHK += tools/checkasm-a.o
 endif
 
 # AltiVec optims
 ifeq ($(SYS_ARCH),PPC)
-ifneq ($(AS),)
-SRCS += common/ppc/mc.c common/ppc/pixel.c common/ppc/dct.c \
-        common/ppc/quant.c common/ppc/deblock.c \
-        common/ppc/predict.c
-endif
+SRCS_X += common/ppc/dct.c \
+          common/ppc/deblock.c \
+          common/ppc/mc.c \
+          common/ppc/pixel.c \
+          common/ppc/predict.c \
+          common/ppc/quant.c
 endif
 
 # NEON optims
 ifeq ($(SYS_ARCH),ARM)
-ifneq ($(AS),)
-ASMSRC += common/arm/cpu-a.S common/arm/pixel-a.S common/arm/mc-a.S \
-          common/arm/dct-a.S common/arm/quant-a.S common/arm/deblock-a.S \
-          common/arm/predict-a.S common/arm/bitstream-a.S
-SRCS   += common/arm/mc-c.c common/arm/predict-c.c
-OBJASM  = $(ASMSRC:%.S=%.o)
-OBJCHK += tools/checkasm-arm.o
+SRCASM_X  = common/arm/bitstream-a.S \
+            common/arm/dct-a.S \
+            common/arm/deblock-a.S \
+            common/arm/mc-a.S \
+            common/arm/pixel-a.S \
+            common/arm/predict-a.S \
+            common/arm/quant-a.S
+SRCS_X   += common/arm/mc-c.c \
+            common/arm/predict-c.c
+
+OBJASM += common/arm/cpu-a.o
+ifneq ($(findstring HAVE_BITDEPTH8 1, $(CONFIG)),)
+OBJASM += $(SRCASM_X:%.S=%-8.o)
 endif
+ifneq ($(findstring HAVE_BITDEPTH10 1, $(CONFIG)),)
+OBJASM += $(SRCASM_X:%.S=%-10.o)
+endif
+
+OBJCHK += tools/checkasm-arm.o
 endif
 
 # AArch64 NEON optims
 ifeq ($(SYS_ARCH),AARCH64)
-ifneq ($(AS),)
-ASMSRC += common/aarch64/bitstream-a.S \
-          common/aarch64/cabac-a.S     \
-          common/aarch64/dct-a.S     \
-          common/aarch64/deblock-a.S \
-          common/aarch64/mc-a.S      \
-          common/aarch64/pixel-a.S   \
-          common/aarch64/predict-a.S \
-          common/aarch64/quant-a.S
-SRCS   += common/aarch64/asm-offsets.c \
-          common/aarch64/mc-c.c        \
-          common/aarch64/predict-c.c
-OBJASM  = $(ASMSRC:%.S=%.o)
-OBJCHK += tools/checkasm-aarch64.o
+SRCASM_X  = common/aarch64/bitstream-a.S \
+            common/aarch64/cabac-a.S \
+            common/aarch64/dct-a.S \
+            common/aarch64/deblock-a.S \
+            common/aarch64/mc-a.S \
+            common/aarch64/pixel-a.S \
+            common/aarch64/predict-a.S \
+            common/aarch64/quant-a.S
+SRCS_X   += common/aarch64/asm-offsets.c \
+            common/aarch64/mc-c.c \
+            common/aarch64/predict-c.c
+
+OBJASM +=
+ifneq ($(findstring HAVE_BITDEPTH8 1, $(CONFIG)),)
+OBJASM += $(SRCASM_X:%.S=%-8.o)
 endif
+ifneq ($(findstring HAVE_BITDEPTH10 1, $(CONFIG)),)
+OBJASM += $(SRCASM_X:%.S=%-10.o)
+endif
+
+OBJCHK += tools/checkasm-aarch64.o
 endif
 
 # MSA optims
 ifeq ($(SYS_ARCH),MIPS)
 ifneq ($(findstring HAVE_MSA 1, $(CONFIG)),)
-SRCS += common/mips/mc-c.c common/mips/dct-c.c \
-        common/mips/deblock-c.c common/mips/pixel-c.c \
-        common/mips/predict-c.c common/mips/quant-c.c
+SRCS_X += common/mips/dct-c.c \
+          common/mips/deblock-c.c \
+          common/mips/mc-c.c \
+          common/mips/pixel-c.c \
+          common/mips/predict-c.c \
+          common/mips/quant-c.c
+endif
 endif
+
 endif
 
 ifneq ($(HAVE_GETOPT_LONG),1)
@@ -170,14 +214,28 @@
 common/oclobj.h: common/opencl/x264-cl.h $(wildcard $(SRCPATH)/common/opencl/*.cl)
 	cat $^ | $(SRCPATH)/tools/cltostr.sh $@
 GENERATED += common/oclobj.h
-SRCS += common/opencl.c encoder/slicetype-cl.c
+SRCS_8 += common/opencl.c encoder/slicetype-cl.c
 endif
 
 OBJS   += $(SRCS:%.c=%.o)
 OBJCLI += $(SRCCLI:%.c=%.o)
 OBJSO  += $(SRCSO:%.c=%.o)
+OBJEXAMPLE += $(SRCEXAMPLE:%.c=%.o)
 
-.PHONY: all default fprofiled clean distclean install install-* uninstall cli lib-* etags
+ifneq ($(findstring HAVE_BITDEPTH8 1, $(CONFIG)),)
+OBJS      += $(SRCS_X:%.c=%-8.o) $(SRCS_8:%.c=%-8.o)
+OBJCLI    += $(SRCCLI_X:%.c=%-8.o)
+OBJCHK_8  += $(SRCCHK_X:%.c=%-8.o)
+checkasm: checkasm8$(EXE)
+endif
+ifneq ($(findstring HAVE_BITDEPTH10 1, $(CONFIG)),)
+OBJS      += $(SRCS_X:%.c=%-10.o)
+OBJCLI    += $(SRCCLI_X:%.c=%-10.o)
+OBJCHK_10 += $(SRCCHK_X:%.c=%-10.o)
+checkasm: checkasm10$(EXE)
+endif
+
+.PHONY: all default fprofiled clean distclean install install-* uninstall cli lib-* checkasm etags
 
 cli: x264$(EXE)
 lib-static: $(LIBX264)
@@ -191,32 +249,66 @@
 $(SONAME): $(GENERATED) .depend $(OBJS) $(OBJASM) $(OBJSO)
 	$(LD)$@ $(OBJS) $(OBJASM) $(OBJSO) $(SOFLAGS) $(LDFLAGS)
 
+$(IMPLIBNAME): $(SONAME)
+
 ifneq ($(EXE),)
-.PHONY: x264 checkasm example
+.PHONY: x264 checkasm8 checkasm10 example
 x264: x264$(EXE)
-checkasm: checkasm$(EXE)
+checkasm8: checkasm8$(EXE)
+checkasm10: checkasm10$(EXE)
 example: example$(EXE)
 endif
 
 x264$(EXE): $(GENERATED) .depend $(OBJCLI) $(CLI_LIBX264)
 	$(LD)$@ $(OBJCLI) $(CLI_LIBX264) $(LDFLAGSCLI) $(LDFLAGS)
 
-checkasm$(EXE): $(GENERATED) .depend $(OBJCHK) $(LIBX264)
-	$(LD)$@ $(OBJCHK) $(LIBX264) $(LDFLAGS)
+checkasm8$(EXE): $(GENERATED) .depend $(OBJCHK) $(OBJCHK_8) $(LIBX264)
+	$(LD)$@ $(OBJCHK) $(OBJCHK_8) $(LIBX264) $(LDFLAGS)
+
+checkasm10$(EXE): $(GENERATED) .depend $(OBJCHK) $(OBJCHK_10) $(LIBX264)
+	$(LD)$@ $(OBJCHK) $(OBJCHK_10) $(LIBX264) $(LDFLAGS)
 
 example$(EXE): $(GENERATED) .depend $(OBJEXAMPLE) $(LIBX264)
 	$(LD)$@ $(OBJEXAMPLE) $(LIBX264) $(LDFLAGS)
 
-$(OBJS) $(OBJASM) $(OBJSO) $(OBJCLI) $(OBJCHK) $(OBJEXAMPLE): .depend
+$(OBJS) $(OBJSO): CFLAGS += $(CFLAGSSO)
+$(OBJCLI): CFLAGS += $(CFLAGSCLI)
+
+$(OBJS) $(OBJASM) $(OBJSO) $(OBJCLI) $(OBJCHK) $(OBJCHK_8) $(OBJCHK_10) $(OBJEXAMPLE): .depend
+
+%.o: %.c
+	$(CC) $(CFLAGS) -c $< $(CC_O)
+
+%-8.o: %.c
+	$(CC) $(CFLAGS) -c $< $(CC_O) -DHIGH_BIT_DEPTH=0 -DBIT_DEPTH=8
+
+%-10.o: %.c
+	$(CC) $(CFLAGS) -c $< $(CC_O) -DHIGH_BIT_DEPTH=1 -DBIT_DEPTH=10
 
 %.o: %.asm common/x86/x86inc.asm common/x86/x86util.asm
 	$(AS) $(ASFLAGS) -o $@ $<
 	-@ $(if $(STRIP), $(STRIP) -x $@) # delete local/anonymous symbols, so they don't show up in oprofile
 
+%-8.o: %.asm common/x86/x86inc.asm common/x86/x86util.asm
+	$(AS) $(ASFLAGS) -o $@ $< -DBIT_DEPTH=8 -Dprivate_prefix=x264_8
+	-@ $(if $(STRIP), $(STRIP) -x $@)
+
+%-10.o: %.asm common/x86/x86inc.asm common/x86/x86util.asm
+	$(AS) $(ASFLAGS) -o $@ $< -DBIT_DEPTH=10 -Dprivate_prefix=x264_10
+	-@ $(if $(STRIP), $(STRIP) -x $@)
+
 %.o: %.S
 	$(AS) $(ASFLAGS) -o $@ $<
 	-@ $(if $(STRIP), $(STRIP) -x $@) # delete local/anonymous symbols, so they don't show up in oprofile
 
+%-8.o: %.S
+	$(AS) $(ASFLAGS) -o $@ $< -DHIGH_BIT_DEPTH=0 -DBIT_DEPTH=8
+	-@ $(if $(STRIP), $(STRIP) -x $@)
+
+%-10.o: %.S
+	$(AS) $(ASFLAGS) -o $@ $< -DHIGH_BIT_DEPTH=1 -DBIT_DEPTH=10
+	-@ $(if $(STRIP), $(STRIP) -x $@)
+
 %.dll.o: %.rc x264.h
 	$(RC) $(RCFLAGS)$@ -DDLL $<
 
@@ -227,9 +319,21 @@
 	@rm -f .depend
 	@echo 'dependency file generation...'
 ifeq ($(COMPILER),CL)
-	@$(foreach SRC, $(addprefix $(SRCPATH)/, $(SRCS) $(SRCCLI) $(SRCSO)), $(SRCPATH)/tools/msvsdepend.sh "$(CC)" "$(CFLAGS)" "$(SRC)" "$(SRC:$(SRCPATH)/%.c=%.o)" 1>> .depend;)
+	@$(foreach SRC, $(addprefix $(SRCPATH)/, $(SRCS) $(SRCCLI) $(SRCSO) $(SRCEXAMPLE)), $(SRCPATH)/tools/msvsdepend.sh "$(CC)" "$(CFLAGS)" "$(SRC)" "$(SRC:$(SRCPATH)/%.c=%.o)" 1>> .depend;)
+ifneq ($(findstring HAVE_BITDEPTH8 1, $(CONFIG)),)
+	@$(foreach SRC, $(addprefix $(SRCPATH)/, $(SRCS_X) $(SRCS_8) $(SRCCLI_X) $(SRCCHK_X)), $(SRCPATH)/tools/msvsdepend.sh "$(CC)" "$(CFLAGS)" "$(SRC)" "$(SRC:$(SRCPATH)/%.c=%-8.o)" 1>> .depend;)
+endif
+ifneq ($(findstring HAVE_BITDEPTH10 1, $(CONFIG)),)
+	@$(foreach SRC, $(addprefix $(SRCPATH)/, $(SRCS_X) $(SRCCLI_X) $(SRCCHK_X)), $(SRCPATH)/tools/msvsdepend.sh "$(CC)" "$(CFLAGS)" "$(SRC)" "$(SRC:$(SRCPATH)/%.c=%-10.o)" 1>> .depend;)
+endif
 else
-	@$(foreach SRC, $(addprefix $(SRCPATH)/, $(SRCS) $(SRCCLI) $(SRCSO)), $(CC) $(CFLAGS) $(SRC) $(DEPMT) $(SRC:$(SRCPATH)/%.c=%.o) $(DEPMM) 1>> .depend;)
+	@$(foreach SRC, $(addprefix $(SRCPATH)/, $(SRCS) $(SRCCLI) $(SRCSO) $(SRCEXAMPLE)), $(CC) $(CFLAGS) $(SRC) $(DEPMT) $(SRC:$(SRCPATH)/%.c=%.o) $(DEPMM) 1>> .depend;)
+ifneq ($(findstring HAVE_BITDEPTH8 1, $(CONFIG)),)
+	@$(foreach SRC, $(addprefix $(SRCPATH)/, $(SRCS_X) $(SRCS_8) $(SRCCLI_X) $(SRCCHK_X)), $(CC) $(CFLAGS) $(SRC) $(DEPMT) $(SRC:$(SRCPATH)/%.c=%-8.o) $(DEPMM) 1>> .depend;)
+endif
+ifneq ($(findstring HAVE_BITDEPTH10 1, $(CONFIG)),)
+	@$(foreach SRC, $(addprefix $(SRCPATH)/, $(SRCS_X) $(SRCCLI_X) $(SRCCHK_X)), $(CC) $(CFLAGS) $(SRC) $(DEPMT) $(SRC:$(SRCPATH)/%.c=%-10.o) $(DEPMM) 1>> .depend;)
+endif
 endif
 
 config.mak:
@@ -240,7 +344,7 @@
 include .depend
 endif
 
-SRC2 = $(SRCS) $(SRCCLI)
+OBJPROF = $(OBJS) $(OBJSO) $(OBJCLI)
 # These should cover most of the important codepaths
 OPT0 = --crf 30 -b1 -m1 -r1 --me dia --no-cabac --direct temporal --ssim --no-weightb
 OPT1 = --crf 16 -b2 -m3 -r3 --me hex --no-8x8dct --direct spatial --no-dct-decimate -t0  --slice-max-mbs 50
@@ -257,25 +361,25 @@
 	@echo 'where infiles are anything that x264 understands,'
 	@echo 'i.e. YUV with resolution in the filename, y4m, or avisynth.'
 else
-fprofiled:
-	$(MAKE) clean
-	$(MAKE) x264$(EXE) CFLAGS="$(CFLAGS) $(PROF_GEN_CC)" LDFLAGS="$(LDFLAGS) $(PROF_GEN_LD)"
+fprofiled: clean
+	$(MAKE) x264$(EXE) CFLAGSPROF="$(PROF_GEN_CC)" LDFLAGSPROF="$(PROF_GEN_LD)"
 	$(foreach V, $(VIDS), $(foreach I, 0 1 2 3 4 5 6 7, ./x264$(EXE) $(OPT$I) --threads 1 $(V) -o $(DEVNULL) ;))
 ifeq ($(COMPILER),CL)
 # Because Visual Studio timestamps the object files within the PGD, it fails to build if they change - only the executable should be deleted
 	rm -f x264$(EXE)
 else
-	rm -f $(SRC2:%.c=%.o)
+	rm -f $(OBJPROF)
 endif
-	$(MAKE) CFLAGS="$(CFLAGS) $(PROF_USE_CC)" LDFLAGS="$(LDFLAGS) $(PROF_USE_LD)"
-	rm -f $(SRC2:%.c=%.gcda) $(SRC2:%.c=%.gcno) *.dyn pgopti.dpi pgopti.dpi.lock *.pgd *.pgc
+	$(MAKE) CFLAGSPROF="$(PROF_USE_CC)" LDFLAGSPROF="$(PROF_USE_LD)"
+	rm -f $(OBJPROF:%.o=%.gcda) $(OBJPROF:%.o=%.gcno) *.dyn pgopti.dpi pgopti.dpi.lock *.pgd *.pgc
 endif
 
 clean:
-	rm -f $(OBJS) $(OBJASM) $(OBJCLI) $(OBJSO) $(SONAME) *.a *.lib *.exp *.pdb x264 x264.exe .depend TAGS
-	rm -f checkasm checkasm.exe $(OBJCHK) $(GENERATED) x264_lookahead.clbin
-	rm -f example example.exe $(OBJEXAMPLE)
-	rm -f $(SRC2:%.c=%.gcda) $(SRC2:%.c=%.gcno) *.dyn pgopti.dpi pgopti.dpi.lock *.pgd *.pgc
+	rm -f $(OBJS) $(OBJASM) $(OBJCLI) $(OBJSO) $(GENERATED) .depend TAGS
+	rm -f $(SONAME) *.a *.lib *.exp *.pdb x264$(EXE) x264_lookahead.clbin
+	rm -f checkasm8$(EXE) checkasm10$(EXE) $(OBJCHK) $(OBJCHK_8) $(OBJCHK_10)
+	rm -f example$(EXE) $(OBJEXAMPLE)
+	rm -f $(OBJPROF:%.o=%.gcda) $(OBJPROF:%.o=%.gcno) *.dyn pgopti.dpi pgopti.dpi.lock *.pgd *.pgc
 
 distclean: clean
 	rm -f config.mak x264_config.h config.h config.log x264.pc x264.def
@@ -286,18 +390,17 @@
 	$(INSTALL) x264$(EXE) $(DESTDIR)$(bindir)
 
 install-lib-dev:
-	$(INSTALL) -d $(DESTDIR)$(includedir)
-	$(INSTALL) -d $(DESTDIR)$(libdir)
-	$(INSTALL) -d $(DESTDIR)$(libdir)/pkgconfig
-	$(INSTALL) -m 644 $(SRCPATH)/x264.h $(DESTDIR)$(includedir)
-	$(INSTALL) -m 644 x264_config.h $(DESTDIR)$(includedir)
+	$(INSTALL) -d $(DESTDIR)$(includedir) $(DESTDIR)$(libdir)/pkgconfig
+	$(INSTALL) -m 644 $(SRCPATH)/x264.h x264_config.h $(DESTDIR)$(includedir)
 	$(INSTALL) -m 644 x264.pc $(DESTDIR)$(libdir)/pkgconfig
 
 install-lib-static: lib-static install-lib-dev
+	$(INSTALL) -d $(DESTDIR)$(libdir)
 	$(INSTALL) -m 644 $(LIBX264) $(DESTDIR)$(libdir)
 	$(if $(RANLIB), $(RANLIB) $(DESTDIR)$(libdir)/$(LIBX264))
 
 install-lib-shared: lib-shared install-lib-dev
+	$(INSTALL) -d $(DESTDIR)$(libdir)
 ifneq ($(IMPLIBNAME),)
 	$(INSTALL) -d $(DESTDIR)$(bindir)
 	$(INSTALL) -m 755 $(SONAME) $(DESTDIR)$(bindir)
@@ -316,7 +419,5 @@
 	rm -f $(DESTDIR)$(libdir)/$(SONAME) $(DESTDIR)$(libdir)/libx264.$(SOSUFFIX)
 endif
 
-etags: TAGS
-
-TAGS:
-	etags $(SRCS)
+etags TAGS:
+	etags $(SRCS) $(SRCS_X) $(SRCS_8)
diff -Nru x264-0.152.2854+gite9a5903/output/flv_bytestream.c x264-0.158.2988+git-20191101.7817004/output/flv_bytestream.c
--- x264-0.152.2854+gite9a5903/output/flv_bytestream.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/output/flv_bytestream.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * flv_bytestream.c: flv muxer utilities
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: Kieran Kunhya <kieran@kunhya.com>
  *
diff -Nru x264-0.152.2854+gite9a5903/output/flv_bytestream.h x264-0.158.2988+git-20191101.7817004/output/flv_bytestream.h
--- x264-0.152.2854+gite9a5903/output/flv_bytestream.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/output/flv_bytestream.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * flv_bytestream.h: flv muxer utilities
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: Kieran Kunhya <kieran@kunhya.com>
  *
@@ -90,8 +90,8 @@
 
 enum
 {
-    FLV_FRAME_KEY   = 1 << FLV_VIDEO_FRAMETYPE_OFFSET | 7,
-    FLV_FRAME_INTER = 2 << FLV_VIDEO_FRAMETYPE_OFFSET | 7,
+    FLV_FRAME_KEY   = 1 << FLV_VIDEO_FRAMETYPE_OFFSET,
+    FLV_FRAME_INTER = 2 << FLV_VIDEO_FRAMETYPE_OFFSET,
 };
 
 typedef enum
diff -Nru x264-0.152.2854+gite9a5903/output/flv.c x264-0.158.2988+git-20191101.7817004/output/flv.c
--- x264-0.152.2854+gite9a5903/output/flv.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/output/flv.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * flv.c: flv muxer
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: Kieran Kunhya <kieran@kunhya.com>
  *
@@ -196,7 +196,7 @@
     flv_put_be24( c, 0 ); // StreamID - Always 0
     p_flv->start = c->d_cur; // needed for overwriting length
 
-    flv_put_byte( c, 7 | FLV_FRAME_KEY ); // Frametype and CodecID
+    flv_put_byte( c, FLV_FRAME_KEY | FLV_CODECID_H264 ); // FrameType and CodecID
     flv_put_byte( c, 0 ); // AVC sequence header
     flv_put_be24( c, 0 ); // composition time
 
@@ -279,7 +279,7 @@
     flv_put_be24( c, 0 );
 
     p_flv->start = c->d_cur;
-    flv_put_byte( c, p_picture->b_keyframe ? FLV_FRAME_KEY : FLV_FRAME_INTER );
+    flv_put_byte( c, (p_picture->b_keyframe ? FLV_FRAME_KEY : FLV_FRAME_INTER) | FLV_CODECID_H264 );
     flv_put_byte( c, 1 ); // AVC NALU
     flv_put_be24( c, offset );
 
@@ -322,7 +322,12 @@
 
     CHECK( flv_flush_data( c ) );
 
-    double total_duration = (2 * largest_pts - second_largest_pts) * p_flv->d_timebase;
+    double total_duration;
+    /* duration algorithm fails with one frame */
+    if( p_flv->i_framenum == 1 )
+        total_duration = p_flv->i_fps_num ? (double)p_flv->i_fps_den / p_flv->i_fps_num : 0;
+    else
+        total_duration = (2 * largest_pts - second_largest_pts) * p_flv->d_timebase;
 
     if( x264_is_regular_file( c->fp ) && total_duration > 0 )
     {
diff -Nru x264-0.152.2854+gite9a5903/output/matroska.c x264-0.158.2988+git-20191101.7817004/output/matroska.c
--- x264-0.152.2854+gite9a5903/output/matroska.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/output/matroska.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * matroska.c: matroska muxer
  *****************************************************************************
- * Copyright (C) 2005-2017 x264 project
+ * Copyright (C) 2005-2019 x264 project
  *
  * Authors: Mike Matsnev <mike@haali.su>
  *
diff -Nru x264-0.152.2854+gite9a5903/output/matroska_ebml.c x264-0.158.2988+git-20191101.7817004/output/matroska_ebml.c
--- x264-0.152.2854+gite9a5903/output/matroska_ebml.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/output/matroska_ebml.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * matroska_ebml.c: matroska muxer utilities
  *****************************************************************************
- * Copyright (C) 2005-2017 x264 project
+ * Copyright (C) 2005-2019 x264 project
  *
  * Authors: Mike Matsnev <mike@haali.su>
  *
diff -Nru x264-0.152.2854+gite9a5903/output/matroska_ebml.h x264-0.158.2988+git-20191101.7817004/output/matroska_ebml.h
--- x264-0.152.2854+gite9a5903/output/matroska_ebml.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/output/matroska_ebml.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * matroska_ebml.h: matroska muxer utilities
  *****************************************************************************
- * Copyright (C) 2005-2017 x264 project
+ * Copyright (C) 2005-2019 x264 project
  *
  * Authors: Mike Matsnev <mike@haali.su>
  *
diff -Nru x264-0.152.2854+gite9a5903/output/mp4.c x264-0.158.2988+git-20191101.7817004/output/mp4.c
--- x264-0.152.2854+gite9a5903/output/mp4.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/output/mp4.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mp4.c: mp4 muxer
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
diff -Nru x264-0.152.2854+gite9a5903/output/mp4_lsmash.c x264-0.158.2988+git-20191101.7817004/output/mp4_lsmash.c
--- x264-0.152.2854+gite9a5903/output/mp4_lsmash.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/output/mp4_lsmash.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mp4_lsmash.c: mp4 muxer using L-SMASH
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
diff -Nru x264-0.152.2854+gite9a5903/output/output.h x264-0.158.2988+git-20191101.7817004/output/output.h
--- x264-0.152.2854+gite9a5903/output/output.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/output/output.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * output.h: x264 file output modules
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
diff -Nru x264-0.152.2854+gite9a5903/output/raw.c x264-0.158.2988+git-20191101.7817004/output/raw.c
--- x264-0.152.2854+gite9a5903/output/raw.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/output/raw.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * raw.c: raw muxer
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
diff -Nru x264-0.152.2854+gite9a5903/tools/bash-autocomplete.sh x264-0.158.2988+git-20191101.7817004/tools/bash-autocomplete.sh
--- x264-0.152.2854+gite9a5903/tools/bash-autocomplete.sh	1970-01-01 00:00:00.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/tools/bash-autocomplete.sh	2019-11-09 05:16:29.000000000 +0000
@@ -0,0 +1,15 @@
+_x264()
+{
+    local path args cur prev
+
+    path="${COMP_LINE%%[[:blank:]]*}"
+    args="${COMP_LINE:${#path}:$((COMP_POINT-${#path}))}"
+    cur="${args##*[[:blank:]=]}"
+    prev="$(sed 's/[[:blank:]=]*$//; s/^.*[[:blank:]]//' <<< "${args%%"$cur"}")"
+
+    # Expand ~
+    printf -v path '%q' "$path" && eval path="${path/#'\~'/'~'}"
+
+    COMPREPLY=($("$path" --autocomplete "$prev" "$cur")) && compopt +o default
+} 2>/dev/null
+complete -o default -F _x264 x264
diff -Nru x264-0.152.2854+gite9a5903/tools/checkasm-aarch64.S x264-0.158.2988+git-20191101.7817004/tools/checkasm-aarch64.S
--- x264-0.152.2854+gite9a5903/tools/checkasm-aarch64.S	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/tools/checkasm-aarch64.S	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /****************************************************************************
  * checkasm-aarch64.S: assembly check tool
  *****************************************************************************
- * Copyright (C) 2015-2017 x264 project
+ * Copyright (C) 2015-2019 x264 project
  *
  * Authors: Martin Storsjo <martin@martin.st>
  *
@@ -58,7 +58,7 @@
 
 #define CLOBBER_STACK ((8*MAX_ARGS + 15) & ~15)
 
-function x264_checkasm_stack_clobber, export=1
+function checkasm_stack_clobber, export=1
     mov         x3,  sp
     mov         x2,  #CLOBBER_STACK
 1:
@@ -71,7 +71,7 @@
 
 #define ARG_STACK ((8*(MAX_ARGS - 8) + 15) & ~15)
 
-function x264_checkasm_call, export=1
+function checkasm_call, export=1
     stp         x29, x30, [sp, #-16]!
     mov         x29, sp
     stp         x19, x20, [sp, #-16]!
@@ -149,7 +149,7 @@
     mov         w9,  #0
     str         w9,  [x2]
     movrel      x0, error_message
-    bl          X(puts)
+    bl          EXT(puts)
 0:
     ldp         x0,  x1,  [sp], #16
     ldp         d14, d15, [sp], #16
diff -Nru x264-0.152.2854+gite9a5903/tools/checkasm-a.asm x264-0.158.2988+git-20191101.7817004/tools/checkasm-a.asm
--- x264-0.152.2854+gite9a5903/tools/checkasm-a.asm	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/tools/checkasm-a.asm	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* checkasm-a.asm: assembly check tool
 ;*****************************************************************************
-;* Copyright (C) 2008-2017 x264 project
+;* Copyright (C) 2008-2019 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Henrik Gramner <henrik@gramner.com>
diff -Nru x264-0.152.2854+gite9a5903/tools/checkasm-arm.S x264-0.158.2988+git-20191101.7817004/tools/checkasm-arm.S
--- x264-0.152.2854+gite9a5903/tools/checkasm-arm.S	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/tools/checkasm-arm.S	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /****************************************************************************
  * checkasm-arm.S: assembly check tool
  *****************************************************************************
- * Copyright (C) 2015-2017 x264 project
+ * Copyright (C) 2015-2019 x264 project
  *
  * Authors: Martin Storsjo <martin@martin.st>
  *
@@ -52,7 +52,7 @@
 
 .macro clobbercheck variant
 .equ pushed, 4*10
-function x264_checkasm_call_\variant
+function checkasm_call_\variant
     push        {r4-r11, lr}
 .ifc \variant, neon
     vpush       {q4-q7}
@@ -128,7 +128,7 @@
     mov         r12, #0
     str         r12, [r2]
     movrel      r0, error_message
-    blx         X(puts)
+    blx         EXT(puts)
 0:
     pop         {r0, r1}
 .ifc \variant, neon
diff -Nru x264-0.152.2854+gite9a5903/tools/checkasm.c x264-0.158.2988+git-20191101.7817004/tools/checkasm.c
--- x264-0.152.2854+gite9a5903/tools/checkasm.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/tools/checkasm.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * checkasm.c: assembly check tool
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -27,7 +27,6 @@
 
 #include <ctype.h>
 #include "common/common.h"
-#include "common/cpu.h"
 #include "encoder/macroblock.h"
 
 #ifdef _WIN32
@@ -41,15 +40,15 @@
 #endif
 
 /* buf1, buf2: initialised to random data and shouldn't write into them */
-uint8_t *buf1, *buf2;
+static uint8_t *buf1, *buf2;
 /* buf3, buf4: used to store output */
-uint8_t *buf3, *buf4;
+static uint8_t *buf3, *buf4;
 /* pbuf1, pbuf2: initialised to random pixel data and shouldn't write into them. */
-pixel *pbuf1, *pbuf2;
+static pixel *pbuf1, *pbuf2;
 /* pbuf3, pbuf4: point to buf3, buf4, just for type convenience */
-pixel *pbuf3, *pbuf4;
+static pixel *pbuf3, *pbuf4;
 
-int quiet = 0;
+static int quiet = 0;
 
 #define report( name ) { \
     if( used_asm && !quiet ) \
@@ -75,10 +74,10 @@
     bench_t vers[MAX_CPUS];
 } bench_func_t;
 
-int do_bench = 0;
-int bench_pattern_len = 0;
-const char *bench_pattern = "";
-char func_name[100];
+static int do_bench = 0;
+static int bench_pattern_len = 0;
+static const char *bench_pattern = "";
+static char func_name[100];
 static bench_func_t benchs[MAX_FUNCS];
 
 static const char *pixel_names[12] = { "16x16", "16x8", "8x16", "8x8", "8x4", "4x8", "4x4", "4x16", "4x2", "2x8", "2x4", "2x2" };
@@ -176,7 +175,7 @@
             if( k < j )
                 continue;
             printf( "%s_%s%s: %"PRId64"\n", benchs[i].name,
-#if HAVE_MMX
+#if ARCH_X86 || ARCH_X86_64
                     b->cpu&X264_CPU_AVX512 ? "avx512" :
                     b->cpu&X264_CPU_AVX2 ? "avx2" :
                     b->cpu&X264_CPU_BMI2 ? "bmi2" :
@@ -207,7 +206,7 @@
                     b->cpu&X264_CPU_MSA ? "msa" :
 #endif
                     "c",
-#if HAVE_MMX
+#if ARCH_X86 || ARCH_X86_64
                     b->cpu&X264_CPU_CACHELINE_32 ? "_c32" :
                     b->cpu&X264_CPU_SLOW_ATOM && b->cpu&X264_CPU_CACHELINE_64 ? "_c64_atom" :
                     b->cpu&X264_CPU_CACHELINE_64 ? "_c64" :
@@ -230,7 +229,7 @@
 static void (*simd_warmup_func)( void ) = NULL;
 #define simd_warmup() do { if( simd_warmup_func ) simd_warmup_func(); } while( 0 )
 
-#if ARCH_X86 || ARCH_X86_64
+#if HAVE_MMX
 int x264_stack_pagealign( int (*func)(), int align );
 void x264_checkasm_warmup_avx( void );
 void x264_checkasm_warmup_avx512( void );
@@ -242,11 +241,11 @@
 #define x264_stack_pagealign( func, align ) func()
 #endif
 
-#if ARCH_AARCH64
+#if HAVE_AARCH64
 intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... );
 #endif
 
-#if ARCH_ARM
+#if HAVE_ARMV6
 intptr_t x264_checkasm_call_neon( intptr_t (*func)(), int *ok, ... );
 intptr_t x264_checkasm_call_noneon( intptr_t (*func)(), int *ok, ... );
 intptr_t (*x264_checkasm_call)( intptr_t (*func)(), int *ok, ... ) = x264_checkasm_call_noneon;
@@ -254,7 +253,7 @@
 
 #define call_c1(func,...) func(__VA_ARGS__)
 
-#if ARCH_X86_64
+#if HAVE_MMX && ARCH_X86_64
 /* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
  * This is done by clobbering the stack with junk around the stack pointer and calling the
  * assembly function through x264_checkasm_call with added dummy arguments which forces all
@@ -270,19 +269,19 @@
     x264_checkasm_stack_clobber( r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r ); /* max_args+6 */ \
     simd_warmup(); \
     x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, __VA_ARGS__ ); })
-#elif ARCH_AARCH64 && !defined(__APPLE__)
+#elif HAVE_AARCH64 && !defined(__APPLE__)
 void x264_checkasm_stack_clobber( uint64_t clobber, ... );
 #define call_a1(func,...) ({ \
     uint64_t r = (rand() & 0xffff) * 0x0001000100010001ULL; \
     x264_checkasm_stack_clobber( r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r ); /* max_args+8 */ \
     x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, 0, 0, __VA_ARGS__ ); })
-#elif ARCH_X86 || ARCH_ARM
+#elif HAVE_MMX || HAVE_ARMV6
 #define call_a1(func,...) x264_checkasm_call( (intptr_t(*)())func, &ok, __VA_ARGS__ )
 #else
 #define call_a1 call_c1
 #endif
 
-#if ARCH_ARM
+#if HAVE_ARMV6
 #define call_a1_64(func,...) ((uint64_t (*)(intptr_t(*)(), int*, ...))x264_checkasm_call)( (intptr_t(*)())func, &ok, __VA_ARGS__ )
 #else
 #define call_a1_64 call_a1
@@ -597,13 +596,13 @@
 #define TEST_INTRA_X3( name, i8x8, ... ) \
     if( pixel_asm.name && pixel_asm.name != pixel_ref.name ) \
     { \
-        ALIGNED_16( int res_c[3] ); \
-        ALIGNED_16( int res_asm[3] ); \
+        ALIGNED_16( int res_c[4] ); \
+        ALIGNED_16( int res_asm[4] ); \
         set_func_name( #name ); \
         used_asm = 1; \
         call_c( pixel_c.name, pbuf1+48, i8x8 ? edge : pbuf3+48, res_c ); \
         call_a( pixel_asm.name, pbuf1+48, i8x8 ? edge : pbuf3+48, res_asm ); \
-        if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
+        if( memcmp(res_c, res_asm, 3 * sizeof(*res_c)) ) \
         { \
             ok = 0; \
             fprintf( stderr, #name": %d,%d,%d != %d,%d,%d [FAILED]\n", \
@@ -862,7 +861,7 @@
     h->param.analyse.i_luma_deadzone[1] = 0;
     h->param.analyse.b_transform_8x8 = 1;
     for( int i = 0; i < 6; i++ )
-        h->pps->scaling_list[i] = x264_cqm_flat16;
+        h->sps->scaling_list[i] = x264_cqm_flat16;
     x264_cqm_init( h );
     x264_quant_init( h, 0, &qf );
 
@@ -1579,13 +1578,14 @@
             intptr_t src_stride = plane_specs[i].src_stride;
             intptr_t dst_stride = ALIGN( w, 16 );
             intptr_t offv = dst_stride*h + 16;
+            pixel *src1 = pbuf1 + X264_MAX(0, -src_stride) * (h-1);
 
             for( int pw = 3; pw <= 4; pw++ )
             {
                 memset( pbuf3, 0, 0x1000 );
                 memset( pbuf4, 0, 0x1000 );
-                call_c( mc_c.plane_copy_deinterleave_rgb, pbuf3, dst_stride, pbuf3+offv, dst_stride, pbuf3+2*offv, dst_stride, pbuf1, src_stride, pw, w, h );
-                call_a( mc_a.plane_copy_deinterleave_rgb, pbuf4, dst_stride, pbuf4+offv, dst_stride, pbuf4+2*offv, dst_stride, pbuf1, src_stride, pw, w, h );
+                call_c( mc_c.plane_copy_deinterleave_rgb, pbuf3, dst_stride, pbuf3+offv, dst_stride, pbuf3+2*offv, dst_stride, src1, src_stride, pw, w, h );
+                call_a( mc_a.plane_copy_deinterleave_rgb, pbuf4, dst_stride, pbuf4+offv, dst_stride, pbuf4+2*offv, dst_stride, src1, src_stride, pw, w, h );
                 for( int y = 0; y < h; y++ )
                     if( memcmp( pbuf3+y*dst_stride+0*offv, pbuf4+y*dst_stride+0*offv, w ) ||
                         memcmp( pbuf3+y*dst_stride+1*offv, pbuf4+y*dst_stride+1*offv, w ) ||
@@ -1800,6 +1800,8 @@
         }
     }
 
+    static const uint16_t mbtree_fix8_counts[] = { 5, 384, 392, 400, 415 };
+
     if( mc_a.mbtree_fix8_pack != mc_ref.mbtree_fix8_pack )
     {
         set_func_name( "mbtree_fix8_pack" );
@@ -1807,9 +1809,9 @@
         float *fix8_src = (float*)(buf3 + 0x800);
         uint16_t *dstc = (uint16_t*)buf3;
         uint16_t *dsta = (uint16_t*)buf4;
-        for( int i = 0; i < 5; i++ )
+        for( int i = 0; i < ARRAY_ELEMS(mbtree_fix8_counts); i++ )
         {
-            int count = 256 + i;
+            int count = mbtree_fix8_counts[i];
 
             for( int j = 0; j < count; j++ )
                 fix8_src[j] = (int16_t)(rand()) / 256.0f;
@@ -1834,9 +1836,9 @@
         uint16_t *fix8_src = (uint16_t*)(buf3 + 0x800);
         float *dstc = (float*)buf3;
         float *dsta = (float*)buf4;
-        for( int i = 0; i < 5; i++ )
+        for( int i = 0; i < ARRAY_ELEMS(mbtree_fix8_counts); i++ )
         {
-            int count = 256 + i;
+            int count = mbtree_fix8_counts[i];
 
             for( int j = 0; j < count; j++ )
                 fix8_src[j] = rand();
@@ -2036,14 +2038,14 @@
         if( i_cqm == 0 )
         {
             for( int i = 0; i < 6; i++ )
-                h->pps->scaling_list[i] = x264_cqm_flat16;
-            h->param.i_cqm_preset = h->pps->i_cqm_preset = X264_CQM_FLAT;
+                h->sps->scaling_list[i] = x264_cqm_flat16;
+            h->param.i_cqm_preset = h->sps->i_cqm_preset = X264_CQM_FLAT;
         }
         else if( i_cqm == 1 )
         {
             for( int i = 0; i < 6; i++ )
-                h->pps->scaling_list[i] = x264_cqm_jvt[i];
-            h->param.i_cqm_preset = h->pps->i_cqm_preset = X264_CQM_JVT;
+                h->sps->scaling_list[i] = x264_cqm_jvt[i];
+            h->param.i_cqm_preset = h->sps->i_cqm_preset = X264_CQM_JVT;
         }
         else
         {
@@ -2055,8 +2057,8 @@
                 for( int i = 0; i < 64; i++ )
                     cqm_buf[i] = 1;
             for( int i = 0; i < 6; i++ )
-                h->pps->scaling_list[i] = cqm_buf;
-            h->param.i_cqm_preset = h->pps->i_cqm_preset = X264_CQM_CUSTOM;
+                h->sps->scaling_list[i] = cqm_buf;
+            h->param.i_cqm_preset = h->sps->i_cqm_preset = X264_CQM_CUSTOM;
         }
 
         h->param.rc.i_qp_min = 0;
@@ -2599,7 +2601,7 @@
 DECL_CABAC(c)
 #if HAVE_MMX
 DECL_CABAC(asm)
-#elif defined(ARCH_AARCH64)
+#elif HAVE_AARCH64
 DECL_CABAC(asm)
 #else
 #define run_cabac_decision_asm run_cabac_decision_c
@@ -2805,7 +2807,7 @@
     int ret = 0;
     int cpu0 = 0, cpu1 = 0;
     uint32_t cpu_detect = x264_cpu_detect();
-#if ARCH_X86 || ARCH_X86_64
+#if HAVE_MMX
     if( cpu_detect & X264_CPU_AVX512 )
         simd_warmup_func = x264_checkasm_warmup_avx512;
     else if( cpu_detect & X264_CPU_AVX )
@@ -2813,7 +2815,7 @@
 #endif
     simd_warmup();
 
-#if HAVE_MMX
+#if ARCH_X86 || ARCH_X86_64
     if( cpu_detect & X264_CPU_MMX2 )
     {
         ret |= add_flags( &cpu0, &cpu1, X264_CPU_MMX | X264_CPU_MMX2, "MMX" );
@@ -2912,7 +2914,7 @@
     return ret;
 }
 
-int main(int argc, char *argv[])
+REALIGN_STACK int main( int argc, char **argv )
 {
 #ifdef _WIN32
     /* Disable the Windows Error Reporting dialog */
@@ -2971,4 +2973,3 @@
         print_bench();
     return 0;
 }
-
diff -Nru x264-0.152.2854+gite9a5903/tools/gas-preprocessor.pl x264-0.158.2988+git-20191101.7817004/tools/gas-preprocessor.pl
--- x264-0.152.2854+gite9a5903/tools/gas-preprocessor.pl	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/tools/gas-preprocessor.pl	2019-11-09 05:16:29.000000000 +0000
@@ -97,8 +97,12 @@
 if ($as_type eq "armasm") {
 
     $preprocess_c_cmd[0] = "cpp";
-    push(@preprocess_c_cmd, "-U__ELF__");
-    push(@preprocess_c_cmd, "-U__MACH__");
+    push(@preprocess_c_cmd, "-undef");
+    # Normally a preprocessor for windows would predefine _WIN32,
+    # but we're using any generic system-agnostic preprocessor "cpp"
+    # with -undef (to avoid getting predefined variables from the host
+    # system in cross compilation cases), so manually define it here.
+    push(@preprocess_c_cmd, "-D_WIN32");
 
     @preprocess_c_cmd = grep ! /^-nologo$/, @preprocess_c_cmd;
     # Remove -ignore XX parameter pairs from preprocess_c_cmd
@@ -245,7 +249,7 @@
 
 my %thumb_labels;
 my %call_targets;
-my %mov32_targets;
+my %import_symbols;
 
 my %neon_alias_reg;
 my %neon_alias_type;
@@ -270,7 +274,7 @@
     # the line indicates a comment for all supported archs (aarch64, arm, ppc
     # and x86). Also strips line number comments but since they are off anyway
     # it is no loss.
-    s/^#.*$//;
+    s/^\s*#.*$//;
     # remove all comments (to avoid interfering with evaluating directives)
     s/(?<!\\)$inputcomm.*//x;
     # Strip out windows linefeeds
@@ -626,18 +630,26 @@
     return 0;
 }
 
+sub is_aarch64_register {
+    my $name = $_[0];
+    if ($name =~ /^[xw]\d+$/) {
+        return 1;
+    }
+    return 0;
+}
+
 sub handle_local_label {
     my $line = $_[0];
     my $num  = $_[1];
     my $dir  = $_[2];
     my $target = "$num$dir";
     if ($dir eq "b") {
-        $line =~ s/$target/$last_temp_labels{$num}/g;
+        $line =~ s/\b$target\b/$last_temp_labels{$num}/g;
     } else {
         my $name = "temp_label_$temp_label_next";
         $temp_label_next++;
         push(@{$next_temp_labels{$num}}, $name);
-        $line =~ s/$target/$name/g;
+        $line =~ s/\b$target\b/$name/g;
     }
     return $line;
 }
@@ -677,9 +689,9 @@
     }
 
     # handle GNU as pc-relative relocations for adrp/add
-    if ($line =~ /(.*)\s*adrp([\w\s\d]+)\s*,\s*#?:pg_hi21:([^\s]+)/) {
+    if ($line =~ /(.*)\s*adrp([\w\s\d]+)\s*,\s*#?:pg_hi21:([^\s]+)/ and $as_type =~ /^apple-/) {
         $line = "$1 adrp$2, ${3}\@PAGE\n";
-    } elsif ($line =~ /(.*)\s*add([\w\s\d]+)\s*,([\w\s\d]+)\s*,\s*#?:lo12:([^\s]+)/) {
+    } elsif ($line =~ /(.*)\s*add([\w\s\d]+)\s*,([\w\s\d]+)\s*,\s*#?:lo12:([^\s]+)/ and $as_type =~ /^apple-/) {
         $line = "$1 add$2, $3, ${4}\@PAGEOFF\n";
     }
 
@@ -705,7 +717,7 @@
         my $cond = $3;
         my $label = $4;
         # Don't interpret e.g. bic as b<cc> with ic as conditional code
-        if ($cond =~ /|$arm_cond_codes/) {
+        if ($cond =~ /^(|$arm_cond_codes)$/) {
             if (exists $thumb_labels{$label}) {
                 print ASMFILE ".thumb_func $label\n";
             } else {
@@ -785,24 +797,25 @@
     if ($arch eq "aarch64") {
         # fix missing aarch64 instructions in Xcode 5.1 (beta3)
         # mov with vector arguments is not supported, use alias orr instead
-        if ($line =~ /^\s*mov\s+(v\d[\.{}\[\]\w]+),\s*(v\d[\.{}\[\]\w]+)\b\s*$/) {
-            $line = "        orr $1, $2, $2\n";
+        if ($line =~ /^(\d+:)?\s*mov\s+(v\d[\.{}\[\]\w]+),\s*(v\d[\.{}\[\]\w]+)\b\s*$/) {
+            $line = "$1        orr $2, $3, $3\n";
         }
         # movi 16, 32 bit shifted variant, shift is optional
-        if ($line =~ /^\s*movi\s+(v[0-3]?\d\.(?:2|4|8)[hsHS])\s*,\s*(#\w+)\b\s*$/) {
-            $line = "        movi $1, $2, lsl #0\n";
+        if ($line =~ /^(\d+:)?\s*movi\s+(v[0-3]?\d\.(?:2|4|8)[hsHS])\s*,\s*(#\w+)\b\s*$/) {
+            $line = "$1        movi $2, $3, lsl #0\n";
         }
         # Xcode 5 misses the alias uxtl. Replace it with the more general ushll.
         # Clang 3.4 misses the alias sxtl too. Replace it with the more general sshll.
-        if ($line =~ /^\s*(s|u)xtl(2)?\s+(v[0-3]?\d\.[248][hsdHSD])\s*,\s*(v[0-3]?\d\.(?:2|4|8|16)[bhsBHS])\b\s*$/) {
-            $line = "        $1shll$2 $3, $4, #0\n";
-        }
-        # clang 3.4 does not automatically use shifted immediates in add/sub
-        if ($as_type eq "clang" and
-            $line =~ /^(\s*(?:add|sub)s?) ([^#l]+)#([\d\+\-\*\/ <>]+)\s*$/) {
-            my $imm = eval $3;
+        # armasm64 also misses these instructions.
+        if ($line =~ /^(\d+:)?\s*(s|u)xtl(2)?\s+(v[0-3]?\d\.[248][hsdHSD])\s*,\s*(v[0-3]?\d\.(?:2|4|8|16)[bhsBHS])\b\s*$/) {
+            $line = "$1        $2shll$3 $4, $5, #0\n";
+        }
+        # clang 3.4 and armasm64 do not automatically use shifted immediates in add/sub
+        if (($as_type eq "clang" or $as_type eq "armasm") and
+            $line =~ /^(\d+:)?(\s*(?:add|sub)s?) ([^#l]+)#([\d\+\-\*\/ <>]+)\s*$/) {
+            my $imm = eval $4;
             if ($imm > 4095 and not ($imm & 4095)) {
-                $line = "$1 $2#" . ($imm >> 12) . ", lsl #12\n";
+                $line = "$1 $2 $3#" . ($imm >> 12) . ", lsl #12\n";
             }
         }
         if ($ENV{GASPP_FIX_XCODE5}) {
@@ -853,7 +866,7 @@
             $last_temp_labels{$num} = $name;
         }
 
-        if ($line =~ s/^(\w+):/$1/) {
+        if ($line =~ s/^\s*(\w+):/$1/) {
             # Skip labels that have already been declared with a PROC,
             # labels must not be declared multiple times.
             return if (defined $labels_seen{$1});
@@ -866,21 +879,40 @@
 
 
         # Check branch instructions
-        if ($line =~ /(?:^|\n)\s*(\w+\s*:\s*)?(bl?x?(..)?(\.w)?)\s+(\w+)/) {
+        if ($line =~ /(?:^|\n)\s*(\w+\s*:\s*)?(bl?x?\.?(..)?(\.w)?)\s+(\w+)/) {
             my $instr = $2;
             my $cond = $3;
             my $width = $4;
             my $target = $5;
             # Don't interpret e.g. bic as b<cc> with ic as conditional code
-            if ($cond !~ /|$arm_cond_codes/) {
+            if ($cond !~ /^(|$arm_cond_codes)$/) {
                 # Not actually a branch
             } elsif ($target =~ /^(\d+)([bf])$/) {
                 # The target is a local label
                 $line = handle_local_label($line, $1, $2);
-                $line =~ s/\b$instr\b/$&.w/ if $width eq "";
-            } elsif (!is_arm_register($target)) {
+                $line =~ s/\b$instr\b/$&.w/ if $width eq "" and $arch eq "arm";
+            } elsif (($arch eq "arm" and !is_arm_register($target)) or
+                     ($arch eq "aarch64" and !is_aarch64_register($target))) {
+                $call_targets{$target}++;
+            }
+        } elsif ($line =~ /(?:^|\n)\s*(\w+\s*:\s*)?(cbn?z|adr|tbz)\s+(\w+)\s*,(\s*#\d+\s*,)?\s*(\w+)/) {
+            my $instr = $2;
+            my $reg = $3;
+            my $bit = $4;
+            my $target = $5;
+            if ($target =~ /^(\d+)([bf])$/) {
+                # The target is a local label
+                $line = handle_local_label($line, $1, $2);
+            } else {
                 $call_targets{$target}++;
             }
+            # Convert tbz with a wX register into an xX register,
+            # due to armasm64 bugs/limitations.
+            if ($instr eq "tbz" and $reg =~ /w\d+/) {
+                my $xreg = $reg;
+                $xreg =~ s/w/x/;
+                $line =~ s/\b$reg\b/$xreg/;
+            }
         } elsif ($line =~ /^\s*.h?word.*\b\d+[bf]\b/) {
             while ($line =~ /\b(\d+)([bf])\b/g) {
                 $line = handle_local_label($line, $1, $2);
@@ -918,19 +950,106 @@
             $line =~ s/\(\s*(\d+)\s*([<>])\s*(\d+)\s*\)/$val/;
         }
 
-        # Change a movw... #:lower16: into a mov32 pseudoinstruction
-        $line =~ s/^(\s*)movw(\s+\w+\s*,\s*)\#:lower16:(.*)$/$1mov32$2$3/;
-        # and remove the following, matching movt completely
-        $line =~ s/^\s*movt\s+\w+\s*,\s*\#:upper16:.*$//;
-
-        if ($line =~ /^\s*mov32\s+\w+,\s*([a-zA-Z]\w*)/) {
-            $mov32_targets{$1}++;
-        }
+        if ($arch eq "arm") {
+            # Change a movw... #:lower16: into a mov32 pseudoinstruction
+            $line =~ s/^(\s*)movw(\s+\w+\s*,\s*)\#:lower16:(.*)$/$1mov32$2$3/;
+            # and remove the following, matching movt completely
+            $line =~ s/^\s*movt\s+\w+\s*,\s*\#:upper16:.*$//;
+
+            if ($line =~ /^\s*mov32\s+\w+,\s*([a-zA-Z]\w*)/) {
+                $import_symbols{$1}++;
+            }
+
+            # Misc bugs/deficiencies:
+            # armasm seems unable to parse e.g. "vmov s0, s1" without a type
+            # qualifier, thus add .f32.
+            $line =~ s/^(\s+(?:vmov|vadd))(\s+s\d+\s*,\s*s\d+)/$1.f32$2/;
+        } elsif ($arch eq "aarch64") {
+            # Convert ext into ext8; armasm64 seems to require it named as ext8.
+            $line =~ s/^(\s+)ext(\s+)/$1ext8$2/;
+
+            # Pick up targets from ldr x0, =sym+offset
+            if ($line =~ /^\s*ldr\s+(\w+)\s*,\s*=([a-zA-Z]\w*)(.*)$/) {
+                my $reg = $1;
+                my $sym = $2;
+                my $offset = eval_expr($3);
+                if ($offset < 0) {
+                    # armasm64 is buggy with ldr x0, =sym+offset where the
+                    # offset is a negative value; it does write a negative
+                    # offset into the literal pool as it should, but the
+                    # negative offset only covers the lower 32 bit of the 64
+                    # bit literal/relocation.
+                    # Thus remove the offset and apply it manually with a sub
+                    # afterwards.
+                    $offset = -$offset;
+                    $line = "\tldr $reg, =$sym\n\tsub $reg, $reg, #$offset\n";
+                }
+                $import_symbols{$sym}++;
+            }
+
+            # armasm64 (currently) doesn't support offsets on adrp targets,
+            # even though the COFF format relocations (and the linker)
+            # supports it. Therefore strip out the offsets from adrp and
+            # add :lo12: (in case future armasm64 would start handling it)
+            # and add an extra explicit add instruction for the offset.
+            if ($line =~ s/(adrp\s+\w+\s*,\s*(\w+))([\d\+\-\*\/\(\) <>]+)?/\1/) {
+                $import_symbols{$2}++;
+            }
+            if ($line =~ s/(add\s+(\w+)\s*,\s*\w+\s*,\s*):lo12:(\w+)([\d\+\-\*\/\(\) <>]+)?/\1\3/) {
+                my $reg = $2;
+                my $sym = $3;
+                my $offset = eval_expr($4);
+                $line .= "\tadd $reg, $reg, #$offset\n" if $offset > 0;
+                $import_symbols{$sym}++;
+            }
+
+            # Convert e.g. "add x0, x0, w0, uxtw" into "add x0, x0, w0, uxtw #0",
+            # or "ldr x0, [x0, w0, uxtw]" into "ldr x0, [x0, w0, uxtw #0]".
+            $line =~ s/(uxtw|sxtw)(\s*\]?\s*)$/\1 #0\2/i;
+
+            # Convert "mov x0, v0.d[0]" into "umov x0, v0.d[0]"
+            $line =~ s/\bmov\s+[xw]\d+\s*,\s*v\d+\.[ds]/u$&/i;
+
+            # Convert "ccmp w0, #0, #0, ne" into "ccmpne w0, #0, #0",
+            # and "csel w0, w0, w0, ne" into "cselne w0, w0, w0".
+            $line =~ s/(ccmp|csel)\s+([xw]\w+)\s*,\s*([xw#]\w+)\s*,\s*([xw#]\w+)\s*,\s*($arm_cond_codes)/\1\5 \2, \3, \4/;
+
+            # Convert "cinc w0, w0, ne" into "cincne w0, w0".
+            $line =~ s/(cinc)\s+([xw]\w+)\s*,\s*([xw]\w+)\s*,\s*($arm_cond_codes)/\1\4 \2, \3/;
+
+            # Convert "cset w0, lo" into "csetlo w0"
+            $line =~ s/(cset)\s+([xw]\w+)\s*,\s*($arm_cond_codes)/\1\3 \2/;
+
+            # Strip out prfum; armasm64 fails to assemble any
+            # variant/combination of prfum tested so far, but it can be
+            # left out without any
+            $line =~ s/prfum.*\]//;
+
+            # Convert "ldrb w0, [x0, #-1]" into "ldurb w0, [x0, #-1]".
+            # Don't do this for forms with writeback though.
+            if ($line =~ /(ld|st)(r[bh]?)\s+(\w+)\s*,\s*\[\s*(\w+)\s*,\s*#([^\]]+)\s*\][^!]/) {
+                my $instr = $1;
+                my $suffix = $2;
+                my $target = $3;
+                my $base = $4;
+                my $offset = eval_expr($5);
+                if ($offset < 0) {
+                    $line =~ s/$instr$suffix/${instr}u$suffix/;
+                }
+            }
 
-        # Misc bugs/deficiencies:
-        # armasm seems unable to parse e.g. "vmov s0, s1" without a type
-        # qualifier, thus add .f32.
-        $line =~ s/^(\s+(?:vmov|vadd))(\s+s\d+\s*,\s*s\d+)/$1.f32$2/;
+            if ($ENV{GASPP_ARMASM64_INVERT_SCALE}) {
+                # Instructions like fcvtzs and scvtf store the scale value
+                # inverted in the opcode (stored as 64 - scale), but armasm64
+                # in early versions stores it as-is. Thus convert from
+                # "fcvtzs w0, s0, #8" into "fcvtzs w0, s0, #56".
+                if ($line =~ /(?:fcvtzs|scvtf)\s+(\w+)\s*,\s*(\w+)\s*,\s*#(\d+)/) {
+                    my $scale = $3;
+                    my $inverted_scale = 64 - $3;
+                    $line =~ s/#$scale/#$inverted_scale/;
+                }
+            }
+        }
         # armasm is unable to parse &0x - add spacing
         $line =~ s/&0x/& 0x/g;
     }
@@ -944,7 +1063,7 @@
         # Convert "mov pc, lr" into "bx lr", since the former only works
         # for switching from arm to thumb (and only in armv7), but not
         # from thumb to arm.
-        s/mov\s*pc\s*,\s*lr/bx lr/g;
+        $line =~ s/mov\s*pc\s*,\s*lr/bx lr/g;
 
         # Convert stmdb/ldmia/stmfd/ldmfd/ldm with only one register into a plain str/ldr with post-increment/decrement.
         # Wide thumb2 encoding requires at least two registers in register list while all other encodings support one register too.
@@ -1013,11 +1132,16 @@
         $line =~ s/\.text/AREA |.text|, CODE, READONLY, ALIGN=4, CODEALIGN/;
         $line =~ s/(\s*)(.*)\.rodata/$1AREA |.rodata|, DATA, READONLY, ALIGN=5/;
         $line =~ s/\.data/AREA |.data|, DATA, ALIGN=5/;
-
+    }
+    if ($as_type eq "armasm" and $arch eq "arm") {
         $line =~ s/fmxr/vmsr/;
         $line =~ s/fmrx/vmrs/;
         $line =~ s/fadds/vadd.f32/;
     }
+    if ($as_type eq "armasm" and $arch eq "aarch64") {
+        # Convert "b.eq" into "beq"
+        $line =~ s/\bb\.($arm_cond_codes)\b/b\1/;
+    }
 
     # catch unknown section names that aren't mach-o style (with a comma)
     if ($as_type =~ /apple-/ and $line =~ /.section ([^,]*)$/) {
@@ -1038,7 +1162,7 @@
         grep exists $thumb_labels{$_}, keys %call_targets;
 } else {
     map print(ASMFILE "\tIMPORT $_\n"),
-        grep ! exists $labels_seen{$_}, (keys %call_targets, keys %mov32_targets);
+        grep ! exists $labels_seen{$_}, (keys %call_targets, keys %import_symbols);
 
     print ASMFILE "\tEND\n";
 }
diff -Nru x264-0.152.2854+gite9a5903/version.sh x264-0.158.2988+git-20191101.7817004/version.sh
--- x264-0.152.2854+gite9a5903/version.sh	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/version.sh	2019-11-09 05:16:29.000000000 +0000
@@ -1,5 +1,28 @@
 #!/bin/sh
-# Script modified from upstream source for Debian packaging since packaging
-# won't include .git repository.
-echo '#define X264_VERSION " r2854 e9a5903"'
-echo '#define X264_POINTVER "0.152.2854 e9a5903"'
+
+cd "$(dirname "$0")" >/dev/null && [ -f x264.h ] || exit 1
+
+api="$(grep '#define X264_BUILD' < x264.h | sed 's/^.* \([1-9][0-9]*\).*$/\1/')"
+ver="x"
+version=""
+
+if [ -d .git ] && command -v git >/dev/null 2>&1 ; then
+    localver="$(($(git rev-list HEAD | wc -l)))"
+    if [ "$localver" -gt 1 ] ; then
+        ver_diff="$(($(git rev-list origin/master..HEAD | wc -l)))"
+        ver="$((localver-ver_diff))"
+        echo "#define X264_REV $ver"
+        echo "#define X264_REV_DIFF $ver_diff"
+        if [ "$ver_diff" -ne 0 ] ; then
+            ver="$ver+$ver_diff"
+        fi
+        if git status | grep -q "modified:" ; then
+            ver="${ver}M"
+        fi
+        ver="$ver $(git rev-list -n 1 HEAD | cut -c 1-7)"
+        version=" r$ver"
+    fi
+fi
+
+echo "#define X264_VERSION \"$version\""
+echo "#define X264_POINTVER \"0.$api.$ver\""
diff -Nru x264-0.152.2854+gite9a5903/x264.c x264-0.158.2988+git-20191101.7817004/x264.c
--- x264-0.152.2854+gite9a5903/x264.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/x264.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * x264: top-level x264cli functions
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -39,12 +39,14 @@
 
 #include <signal.h>
 #include <getopt.h>
-#include "common/common.h"
 #include "x264cli.h"
 #include "input/input.h"
 #include "output/output.h"
 #include "filters/filters.h"
 
+#define QP_MAX_SPEC (51+6*2)
+#define QP_MAX (QP_MAX_SPEC+18)
+
 #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "x264", __VA_ARGS__ )
 
 #if HAVE_LAVF
@@ -164,11 +166,52 @@
 /* video filter operation struct */
 static cli_vid_filter_t filter;
 
-static const char * const demuxer_names[] =
+const char * const x264_avcintra_class_names[] = { "50", "100", "200", 0 };
+const char * const x264_cqm_names[] = { "flat", "jvt", 0 };
+const char * const x264_log_level_names[] = { "none", "error", "warning", "info", "debug", 0 };
+const char * const x264_partition_names[] = { "p8x8", "p4x4", "b8x8", "i8x8", "i4x4", "none", "all", 0 };
+const char * const x264_pulldown_names[] = { "none", "22", "32", "64", "double", "triple", "euro", 0 };
+const char * const x264_range_names[] = { "auto", "tv", "pc", 0 };
+
+const char * const x264_output_csp_names[] =
+{
+#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I400
+    "i400",
+#endif
+#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I420
+    "i420",
+#endif
+#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I422
+    "i422",
+#endif
+#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I444
+    "i444", "rgb",
+#endif
+    0
+};
+
+const char * const x264_valid_profile_names[] =
+{
+#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT <= X264_CSP_I420
+#if HAVE_BITDEPTH8
+#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I420
+    "baseline", "main",
+#endif
+    "high",
+#endif
+#if HAVE_BITDEPTH10
+   "high10",
+#endif
+#endif
+#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I422
+   "high422",
+#endif
+   "high444", 0
+};
+
+const char * const x264_demuxer_names[] =
 {
-    "auto",
-    "raw",
-    "y4m",
+    "auto", "raw", "y4m",
 #if HAVE_AVS
     "avs",
 #endif
@@ -181,43 +224,24 @@
     0
 };
 
-static const char * const muxer_names[] =
+const char * const x264_muxer_names[] =
 {
-    "auto",
-    "raw",
-    "mkv",
-    "flv",
+    "auto", "raw", "mkv", "flv",
 #if HAVE_GPAC || HAVE_LSMASH
     "mp4",
 #endif
     0
 };
 
-static const char * const pulldown_names[] = { "none", "22", "32", "64", "double", "triple", "euro", 0 };
-static const char * const log_level_names[] = { "none", "error", "warning", "info", "debug", 0 };
-static const char * const output_csp_names[] =
-{
-#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I420
-    "i420",
-#endif
-#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I422
-    "i422",
-#endif
-#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I444
-    "i444", "rgb",
-#endif
-    0
-};
 static const char * const chroma_format_names[] =
 {
     [0] = "all",
+    [X264_CSP_I400] = "i400",
     [X264_CSP_I420] = "i420",
     [X264_CSP_I422] = "i422",
     [X264_CSP_I444] = "i444"
 };
 
-static const char * const range_names[] = { "auto", "tv", "pc", 0 };
-
 typedef struct
 {
     int mod;
@@ -331,8 +355,8 @@
 #else
     printf( "using an unknown compiler\n" );
 #endif
-    printf( "x264 configuration: --bit-depth=%d --chroma-format=%s\n", X264_BIT_DEPTH, chroma_format_names[X264_CHROMA_FORMAT] );
-    printf( "libx264 configuration: --bit-depth=%d --chroma-format=%s\n", x264_bit_depth, chroma_format_names[x264_chroma_format] );
+    printf( "x264 configuration: --chroma-format=%s\n", chroma_format_names[X264_CHROMA_FORMAT] );
+    printf( "libx264 configuration: --chroma-format=%s\n", chroma_format_names[x264_chroma_format] );
     printf( "x264 license: " );
 #if HAVE_GPL
     printf( "GPL version 2 or later\n" );
@@ -349,8 +373,11 @@
 #endif
 }
 
-int main( int argc, char **argv )
+REALIGN_STACK int main( int argc, char **argv )
 {
+    if( argc == 4 && !strcmp( argv[1], "--autocomplete" ) )
+        return x264_cli_autocomplete( argv[2], argv[3] );
+
     x264_param_t param;
     cli_opt_t opt = {0};
     int ret = 0;
@@ -483,7 +510,7 @@
         " .mkv -> Matroska\n"
         " .flv -> Flash Video\n"
         " .mp4 -> MP4 if compiled with GPAC or L-SMASH support (%s)\n"
-        "Output bit depth: %d (configured at compile time)\n"
+        "Output bit depth: %s\n."
         "\n"
         "Options:\n"
         "\n"
@@ -514,7 +541,15 @@
 #else
         "no",
 #endif
-        x264_bit_depth
+#if HAVE_BITDEPTH8 && HAVE_BITDEPTH10
+        "8/10"
+#elif HAVE_BITDEPTH8
+        "8"
+#elif HAVE_BITDEPTH10
+        "10"
+#else
+        "none"
+#endif
       );
     H0( "Example usage:\n" );
     H0( "\n" );
@@ -539,8 +574,9 @@
     H0( "      --profile <string>      Force the limits of an H.264 profile\n"
         "                                  Overrides all settings.\n" );
     H2(
-#if X264_CHROMA_FORMAT <= X264_CSP_I420
-#if X264_BIT_DEPTH==8
+#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT <= X264_CSP_I420
+#if HAVE_BITDEPTH8
+#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I420
         "                                  - baseline:\n"
         "                                    --no-8x8dct --bframes 0 --no-cabac\n"
         "                                    --cqm flat --weightp 0\n"
@@ -549,14 +585,17 @@
         "                                  - main:\n"
         "                                    --no-8x8dct --cqm flat\n"
         "                                    No lossless.\n"
+#endif
         "                                  - high:\n"
         "                                    No lossless.\n"
 #endif
+#if HAVE_BITDEPTH10
         "                                  - high10:\n"
         "                                    No lossless.\n"
         "                                    Support for bit depth 8-10.\n"
 #endif
-#if X264_CHROMA_FORMAT <= X264_CSP_I422
+#endif
+#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I422
         "                                  - high422:\n"
         "                                    No lossless.\n"
         "                                    Support for bit depth 8-10.\n"
@@ -565,19 +604,7 @@
         "                                  - high444:\n"
         "                                    Support for bit depth 8-10.\n"
         "                                    Support for 4:2:0/4:2:2/4:4:4 chroma subsampling.\n" );
-        else H0(
-        "                                  - "
-#if X264_CHROMA_FORMAT <= X264_CSP_I420
-#if X264_BIT_DEPTH==8
-        "baseline,main,high,"
-#endif
-        "high10,"
-#endif
-#if X264_CHROMA_FORMAT <= X264_CSP_I422
-        "high422,"
-#endif
-        "high444\n"
-               );
+    else H0( "                                  - %s\n", stringify_names( buf, x264_valid_profile_names ) );
     H0( "      --preset <string>       Use a preset to select encoding settings [medium]\n"
         "                                  Overridden by user settings.\n" );
     H2( "                                  - ultrafast:\n"
@@ -726,7 +753,7 @@
     H2( "      --crf-max <float>       With CRF+VBV, limit RF to this value\n"
         "                                  May cause VBV underflows!\n" );
     H2( "      --qpmin <integer>       Set min QP [%d]\n", defaults->rc.i_qp_min );
-    H2( "      --qpmax <integer>       Set max QP [%d]\n", defaults->rc.i_qp_max );
+    H2( "      --qpmax <integer>       Set max QP [%d]\n", X264_MIN( defaults->rc.i_qp_max, QP_MAX ) );
     H2( "      --qpstep <integer>      Set max QP step [%d]\n", defaults->rc.i_qp_step );
     H2( "      --ratetol <float>       Tolerance of ABR ratecontrol and VBV [%.1f]\n", defaults->rc.f_rate_tolerance );
     H2( "      --ipratio <float>       QP factor between I and P [%.2f]\n", defaults->rc.f_ip_factor );
@@ -764,9 +791,8 @@
     H1( "Analysis:\n" );
     H1( "\n" );
     H1( "  -A, --partitions <string>   Partitions to consider [\"p8x8,b8x8,i8x8,i4x4\"]\n"
-        "                                  - p8x8, p4x4, b8x8, i8x8, i4x4\n"
-        "                                  - none, all\n"
-        "                                  (p4x4 requires p8x8. i8x8 requires --8x8dct.)\n" );
+        "                                  - %s\n"
+        "                                  (p4x4 requires p8x8. i8x8 requires --8x8dct.)\n", stringify_names( buf, x264_partition_names ) );
     H1( "      --direct <string>       Direct MV prediction mode [\"%s\"]\n"
         "                                  - none, spatial, temporal, auto\n",
                                        strtable_lookup( x264_direct_pred_names, defaults->analyse.i_direct_mv_pred ) );
@@ -818,8 +844,8 @@
     H2( "      --deadzone-inter <int>  Set the size of the inter luma quantization deadzone [%d]\n", defaults->analyse.i_luma_deadzone[0] );
     H2( "      --deadzone-intra <int>  Set the size of the intra luma quantization deadzone [%d]\n", defaults->analyse.i_luma_deadzone[1] );
     H2( "                                  Deadzones should be in the range 0 - 32.\n" );
-    H2( "      --cqm <string>          Preset quant matrices [\"flat\"]\n"
-        "                                  - jvt, flat\n" );
+    H2( "      --cqm <string>          Preset quant matrices [\"%s\"]\n"
+        "                                  - %s\n", x264_cqm_names[0], stringify_names( buf, x264_cqm_names ) );
     H1( "      --cqmfile <string>      Read custom quant matrices from a JM-compatible file\n" );
     H2( "                                  Overrides any other --cqm* options.\n" );
     H2( "      --cqm4 <list>           Set all 4x4 quant matrices\n"
@@ -842,7 +868,7 @@
         "                                  - component, pal, ntsc, secam, mac, undef\n",
                                        strtable_lookup( x264_vidformat_names, defaults->vui.i_vidformat ) );
     H2( "      --range <string>        Specify color range [\"%s\"]\n"
-        "                                  - %s\n", range_names[0], stringify_names( buf, range_names ) );
+        "                                  - %s\n", x264_range_names[0], stringify_names( buf, x264_range_names ) );
     H2( "      --colorprim <string>    Specify color primaries [\"%s\"]\n"
         "                                  - undef, bt709, bt470m, bt470bg, smpte170m,\n"
         "                                    smpte240m, film, bt2020, smpte428,\n"
@@ -852,16 +878,21 @@
         "                                  - undef, bt709, bt470m, bt470bg, smpte170m,\n"
         "                                    smpte240m, linear, log100, log316,\n"
         "                                    iec61966-2-4, bt1361e, iec61966-2-1,\n"
-        "                                    bt2020-10, bt2020-12, smpte2084, smpte428\n",
+        "                                    bt2020-10, bt2020-12, smpte2084, smpte428,\n"
+        "                                    arib-std-b67\n",
                                        strtable_lookup( x264_transfer_names, defaults->vui.i_transfer ) );
     H2( "      --colormatrix <string>  Specify color matrix setting [\"%s\"]\n"
         "                                  - undef, bt709, fcc, bt470bg, smpte170m,\n"
         "                                    smpte240m, GBR, YCgCo, bt2020nc, bt2020c,\n"
-        "                                    smpte2085\n",
+        "                                    smpte2085, chroma-derived-nc,\n"
+        "                                    chroma-derived-c, ICtCp\n",
                                        strtable_lookup( x264_colmatrix_names, defaults->vui.i_colmatrix ) );
     H2( "      --chromaloc <integer>   Specify chroma sample location (0 to 5) [%d]\n",
                                        defaults->vui.i_chroma_loc );
-
+    H2( "      --alternative-transfer <string> Specify an alternative transfer\n"
+        "                              characteristics [\"%s\"]\n"
+        "                                  - same values as --transfer\n",
+                                       strtable_lookup( x264_transfer_names, defaults->i_alternative_transfer ) );
     H2( "      --nal-hrd <string>      Signal HRD information (requires vbv-bufsize)\n"
         "                                  - none, vbr, cbr (cbr not allowed in .mp4)\n" );
     H2( "      --filler                Force hard-CBR and generate filler (implied by\n"
@@ -875,17 +906,24 @@
     H0( "\n" );
     H0( "  -o, --output <string>       Specify output file\n" );
     H1( "      --muxer <string>        Specify output container format [\"%s\"]\n"
-        "                                  - %s\n", muxer_names[0], stringify_names( buf, muxer_names ) );
+        "                                  - %s\n", x264_muxer_names[0], stringify_names( buf, x264_muxer_names ) );
     H1( "      --demuxer <string>      Specify input container format [\"%s\"]\n"
-        "                                  - %s\n", demuxer_names[0], stringify_names( buf, demuxer_names ) );
+        "                                  - %s\n", x264_demuxer_names[0], stringify_names( buf, x264_demuxer_names ) );
     H1( "      --input-fmt <string>    Specify input file format (requires lavf support)\n" );
     H1( "      --input-csp <string>    Specify input colorspace format for raw input\n" );
     print_csp_names( longhelp );
     H1( "      --output-csp <string>   Specify output colorspace [\"%s\"]\n"
-        "                                  - %s\n", output_csp_names[0], stringify_names( buf, output_csp_names ) );
+        "                                  - %s\n",
+#if X264_CHROMA_FORMAT
+        x264_output_csp_names[0],
+#else
+        "i420",
+#endif
+        stringify_names( buf, x264_output_csp_names ) );
     H1( "      --input-depth <integer> Specify input bit depth for raw input\n" );
+    H1( "      --output-depth <integer> Specify output bit depth\n" );
     H1( "      --input-range <string>  Specify input color range [\"%s\"]\n"
-        "                                  - %s\n", range_names[0], stringify_names( buf, range_names ) );
+        "                                  - %s\n", x264_range_names[0], stringify_names( buf, x264_range_names ) );
     H1( "      --input-res <intxint>   Specify input resolution (width x height)\n" );
     H1( "      --index <string>        Filename for input index file\n" );
     H0( "      --sar width:height      Specify Sample Aspect Ratio\n" );
@@ -895,7 +933,9 @@
     H0( "      --level <string>        Specify level (as defined by Annex A)\n" );
     H1( "      --bluray-compat         Enable compatibility hacks for Blu-ray support\n" );
     H1( "      --avcintra-class <integer> Use compatibility hacks for AVC-Intra class\n"
-        "                                  - 50, 100, 200\n" );
+        "                                  - %s\n", stringify_names( buf, x264_avcintra_class_names ) );
+    H1( "      --avcintra-flavor <string> AVC-Intra flavor [\"%s\"]\n"
+        "                                  - %s\n", x264_avcintra_flavor_names[0], stringify_names( buf, x264_avcintra_flavor_names ) );
     H1( "      --stitchable            Don't optimize headers based on video content\n"
         "                              Ensures ability to recombine a segmented encode\n" );
     H1( "\n" );
@@ -903,8 +943,8 @@
     H1( "      --no-progress           Don't show the progress indicator while encoding\n" );
     H0( "      --quiet                 Quiet Mode\n" );
     H1( "      --log-level <string>    Specify the maximum level of logging [\"%s\"]\n"
-        "                                  - %s\n", strtable_lookup( log_level_names, cli_log_level - X264_LOG_NONE ),
-                                       stringify_names( buf, log_level_names ) );
+        "                                  - %s\n", strtable_lookup( x264_log_level_names, cli_log_level - X264_LOG_NONE ),
+                                       stringify_names( buf, x264_log_level_names ) );
     H1( "      --psnr                  Enable PSNR computation\n" );
     H1( "      --ssim                  Enable SSIM computation\n" );
     H1( "      --threads <integer>     Force a specific number of threads\n" );
@@ -972,6 +1012,7 @@
     OPT_INPUT_RES,
     OPT_INPUT_CSP,
     OPT_INPUT_DEPTH,
+    OPT_OUTPUT_DEPTH,
     OPT_DTS_COMPRESSION,
     OPT_OUTPUT_CSP,
     OPT_INPUT_RANGE,
@@ -998,6 +1039,7 @@
     { "open-gop",          no_argument, NULL, 0 },
     { "bluray-compat",     no_argument, NULL, 0 },
     { "avcintra-class", required_argument, NULL, 0 },
+    { "avcintra-flavor", required_argument, NULL, 0 },
     { "min-keyint",  required_argument, NULL, 'i' },
     { "keyint",      required_argument, NULL, 'I' },
     { "intra-refresh",     no_argument, NULL, 0 },
@@ -1133,12 +1175,14 @@
     { "pulldown",    required_argument, NULL, OPT_PULLDOWN },
     { "fake-interlaced",   no_argument, NULL, 0 },
     { "frame-packing",     required_argument, NULL, 0 },
+    { "alternative-transfer", required_argument, NULL, 0 },
     { "vf",          required_argument, NULL, OPT_VIDEO_FILTER },
     { "video-filter", required_argument, NULL, OPT_VIDEO_FILTER },
     { "input-fmt",   required_argument, NULL, OPT_INPUT_FMT },
     { "input-res",   required_argument, NULL, OPT_INPUT_RES },
     { "input-csp",   required_argument, NULL, OPT_INPUT_CSP },
     { "input-depth", required_argument, NULL, OPT_INPUT_DEPTH },
+    { "output-depth", required_argument, NULL, OPT_OUTPUT_DEPTH },
     { "dts-compress",      no_argument, NULL, OPT_DTS_COMPRESSION },
     { "output-csp",  required_argument, NULL, OPT_OUTPUT_CSP },
     { "input-range", required_argument, NULL, OPT_INPUT_RANGE },
@@ -1298,7 +1342,9 @@
     /* force the output csp to what the user specified (or the default) */
     param->i_csp = info->csp;
     int csp = info->csp & X264_CSP_MASK;
-    if( output_csp == X264_CSP_I420 && (csp < X264_CSP_I420 || csp >= X264_CSP_I422) )
+    if( output_csp == X264_CSP_I400 && csp != X264_CSP_I400 )
+        param->i_csp = X264_CSP_I400;
+    else if( output_csp == X264_CSP_I420 && (csp < X264_CSP_I420 || csp >= X264_CSP_I422) )
         param->i_csp = X264_CSP_I420;
     else if( output_csp == X264_CSP_I422 && (csp < X264_CSP_I422 || csp >= X264_CSP_I444) )
         param->i_csp = X264_CSP_I422;
@@ -1314,10 +1360,11 @@
     if( x264_init_vid_filter( "resize", handle, &filter, info, param, NULL ) )
         return -1;
 
-    char args[20];
-    sprintf( args, "bit_depth=%d", x264_bit_depth );
+    char args[20], name[20];
+    sprintf( args, "bit_depth=%d", param->i_bitdepth );
+    sprintf( name, "depth_%d", param->i_bitdepth );
 
-    if( x264_init_vid_filter( "depth", handle, &filter, info, param, args ) )
+    if( x264_init_vid_filter( name, handle, &filter, info, param, args ) )
         return -1;
 
     return 0;
@@ -1348,9 +1395,9 @@
 static int parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
 {
     char *input_filename = NULL;
-    const char *demuxer = demuxer_names[0];
+    const char *demuxer = x264_demuxer_names[0];
     char *output_filename = NULL;
-    const char *muxer = muxer_names[0];
+    const char *muxer = x264_muxer_names[0];
     char *tcfile_name = NULL;
     x264_param_t defaults;
     char *profile = NULL;
@@ -1432,10 +1479,10 @@
                 output_filename = optarg;
                 break;
             case OPT_MUXER:
-                FAIL_IF_ERROR( parse_enum_name( optarg, muxer_names, &muxer ), "Unknown muxer `%s'\n", optarg );
+                FAIL_IF_ERROR( parse_enum_name( optarg, x264_muxer_names, &muxer ), "Unknown muxer `%s'\n", optarg );
                 break;
             case OPT_DEMUXER:
-                FAIL_IF_ERROR( parse_enum_name( optarg, demuxer_names, &demuxer ), "Unknown demuxer `%s'\n", optarg );
+                FAIL_IF_ERROR( parse_enum_name( optarg, x264_demuxer_names, &demuxer ), "Unknown demuxer `%s'\n", optarg );
                 break;
             case OPT_INDEX:
                 input_opt.index_file = optarg;
@@ -1460,7 +1507,7 @@
                 cli_log_level = param->i_log_level = X264_LOG_DEBUG;
                 break;
             case OPT_LOG_LEVEL:
-                if( !parse_enum_value( optarg, log_level_names, &cli_log_level ) )
+                if( !parse_enum_value( optarg, x264_log_level_names, &cli_log_level ) )
                     cli_log_level += X264_LOG_NONE;
                 else
                     cli_log_level = atoi( optarg );
@@ -1499,7 +1546,7 @@
                 input_opt.timebase = optarg;
                 break;
             case OPT_PULLDOWN:
-                FAIL_IF_ERROR( parse_enum_value( optarg, pulldown_names, &opt->i_pulldown ), "Unknown pulldown `%s'\n", optarg );
+                FAIL_IF_ERROR( parse_enum_value( optarg, x264_pulldown_names, &opt->i_pulldown ), "Unknown pulldown `%s'\n", optarg );
                 break;
             case OPT_VIDEO_FILTER:
                 vid_filters = optarg;
@@ -1516,25 +1563,28 @@
             case OPT_INPUT_DEPTH:
                 input_opt.bit_depth = atoi( optarg );
                 break;
+            case OPT_OUTPUT_DEPTH:
+                param->i_bitdepth = atoi( optarg );
+                break;
             case OPT_DTS_COMPRESSION:
                 output_opt.use_dts_compress = 1;
                 break;
             case OPT_OUTPUT_CSP:
-                FAIL_IF_ERROR( parse_enum_value( optarg, output_csp_names, &output_csp ), "Unknown output csp `%s'\n", optarg );
+                FAIL_IF_ERROR( parse_enum_value( optarg, x264_output_csp_names, &output_csp ), "Unknown output csp `%s'\n", optarg );
                 // correct the parsed value to the libx264 csp value
 #if X264_CHROMA_FORMAT
                 static const uint8_t output_csp_fix[] = { X264_CHROMA_FORMAT, X264_CSP_RGB };
 #else
-                static const uint8_t output_csp_fix[] = { X264_CSP_I420, X264_CSP_I422, X264_CSP_I444, X264_CSP_RGB };
+                static const uint8_t output_csp_fix[] = { X264_CSP_I400, X264_CSP_I420, X264_CSP_I422, X264_CSP_I444, X264_CSP_RGB };
 #endif
                 param->i_csp = output_csp = output_csp_fix[output_csp];
                 break;
             case OPT_INPUT_RANGE:
-                FAIL_IF_ERROR( parse_enum_value( optarg, range_names, &input_opt.input_range ), "Unknown input range `%s'\n", optarg );
+                FAIL_IF_ERROR( parse_enum_value( optarg, x264_range_names, &input_opt.input_range ), "Unknown input range `%s'\n", optarg );
                 input_opt.input_range += RANGE_AUTO;
                 break;
             case OPT_RANGE:
-                FAIL_IF_ERROR( parse_enum_value( optarg, range_names, &param->vui.b_fullrange ), "Unknown range `%s'\n", optarg );
+                FAIL_IF_ERROR( parse_enum_value( optarg, x264_range_names, &param->vui.b_fullrange ), "Unknown range `%s'\n", optarg );
                 input_opt.output_range = param->vui.b_fullrange += RANGE_AUTO;
                 break;
             default:
@@ -1627,15 +1677,23 @@
 
     /* init threaded input while the information about the input video is unaltered by filtering */
 #if HAVE_THREAD
-    if( info.thread_safe && (b_thread_input || param->i_threads > 1
+    const cli_input_t *thread_input;
+    if( HAVE_BITDEPTH8 && param->i_bitdepth == 8 )
+        thread_input = &thread_8_input;
+    else if( HAVE_BITDEPTH10 && param->i_bitdepth == 10 )
+        thread_input = &thread_10_input;
+    else
+        thread_input = NULL;
+
+    if( thread_input && info.thread_safe && (b_thread_input || param->i_threads > 1
         || (param->i_threads == X264_THREADS_AUTO && x264_cpu_num_processors() > 1)) )
     {
-        if( thread_input.open_file( NULL, &opt->hin, &info, NULL ) )
+        if( thread_input->open_file( NULL, &opt->hin, &info, NULL ) )
         {
             fprintf( stderr, "x264 [error]: threaded input failed\n" );
             return -1;
         }
-        cli_input = thread_input;
+        cli_input = *thread_input;
     }
 #endif
 
diff -Nru x264-0.152.2854+gite9a5903/x264cli.h x264-0.158.2988+git-20191101.7817004/x264cli.h
--- x264-0.152.2854+gite9a5903/x264cli.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/x264cli.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * x264cli.h: x264cli common
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -27,13 +27,24 @@
 #ifndef X264_CLI_H
 #define X264_CLI_H
 
-#include "common/common.h"
+#include "common/base.h"
 
 /* In microseconds */
 #define UPDATE_INTERVAL 250000
 
 typedef void *hnd_t;
 
+extern const char * const x264_avcintra_class_names[];
+extern const char * const x264_cqm_names[];
+extern const char * const x264_log_level_names[];
+extern const char * const x264_partition_names[];
+extern const char * const x264_pulldown_names[];
+extern const char * const x264_range_names[];
+extern const char * const x264_output_csp_names[];
+extern const char * const x264_valid_profile_names[];
+extern const char * const x264_demuxer_names[];
+extern const char * const x264_muxer_names[];
+
 static inline uint64_t gcd( uint64_t a, uint64_t b )
 {
     while( 1 )
@@ -62,6 +73,7 @@
 
 void x264_cli_log( const char *name, int i_level, const char *fmt, ... );
 void x264_cli_printf( int i_level, const char *fmt, ... );
+int x264_cli_autocomplete( const char *prev, const char *cur );
 
 #ifdef _WIN32
 void x264_cli_set_console_title( const char *title );
diff -Nru x264-0.152.2854+gite9a5903/x264dll.c x264-0.158.2988+git-20191101.7817004/x264dll.c
--- x264-0.152.2854+gite9a5903/x264dll.c	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/x264dll.c	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * x264dll: x264 DLLMain for win32
  *****************************************************************************
- * Copyright (C) 2009-2017 x264 project
+ * Copyright (C) 2009-2019 x264 project
  *
  * Authors: Anton Mitrofanov <BugMaster@narod.ru>
  *
@@ -23,7 +23,7 @@
  * For more information, contact us at licensing@x264.com.
  *****************************************************************************/
 
-#include "common/common.h"
+#include "common/base.h"
 #include <windows.h>
 
 /* Callback for our DLL so we can initialize pthread */
diff -Nru x264-0.152.2854+gite9a5903/x264.h x264-0.158.2988+git-20191101.7817004/x264.h
--- x264-0.152.2854+gite9a5903/x264.h	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/x264.h	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * x264.h: x264 public header
  *****************************************************************************
- * Copyright (C) 2003-2017 x264 project
+ * Copyright (C) 2003-2019 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -45,7 +45,20 @@
 
 #include "x264_config.h"
 
-#define X264_BUILD 152
+#define X264_BUILD 158
+
+#ifdef _WIN32
+#   define X264_DLL_IMPORT __declspec(dllimport)
+#   define X264_DLL_EXPORT __declspec(dllexport)
+#else
+#   if defined(__GNUC__) && (__GNUC__ >= 4)
+#       define X264_DLL_IMPORT
+#       define X264_DLL_EXPORT __attribute__((visibility("default")))
+#   else
+#       define X264_DLL_IMPORT
+#       define X264_DLL_EXPORT
+#   endif
+#endif
 
 /* Application developers planning to link against a shared library version of
  * libx264 from a Microsoft Visual Studio or similar development environment
@@ -53,9 +66,13 @@
  * This clause does not apply to MinGW, similar development environments, or non
  * Windows platforms. */
 #ifdef X264_API_IMPORTS
-#define X264_API __declspec(dllimport)
+#   define X264_API X264_DLL_IMPORT
 #else
-#define X264_API
+#   ifdef X264_API_EXPORTS
+#       define X264_API X264_DLL_EXPORT
+#   else
+#       define X264_API
+#   endif
 #endif
 
 /* x264_t:
@@ -170,6 +187,7 @@
 #define X264_ANALYSE_PSUB16x16  0x0010  /* Analyse p16x8, p8x16 and p8x8 */
 #define X264_ANALYSE_PSUB8x8    0x0020  /* Analyse p8x4, p4x8, p4x4 */
 #define X264_ANALYSE_BSUB16x16  0x0100  /* Analyse b16x8, b8x16 and b8x8 */
+
 #define X264_DIRECT_PRED_NONE        0
 #define X264_DIRECT_PRED_SPATIAL     1
 #define X264_DIRECT_PRED_TEMPORAL    2
@@ -202,6 +220,10 @@
 #define X264_KEYINT_MIN_AUTO         0
 #define X264_KEYINT_MAX_INFINITE     (1<<30)
 
+/* AVC-Intra flavors */
+#define X264_AVCINTRA_FLAVOR_PANASONIC 0
+#define X264_AVCINTRA_FLAVOR_SONY      1
+
 static const char * const x264_direct_pred_names[] = { "none", "spatial", "temporal", "auto", 0 };
 static const char * const x264_motion_est_names[] = { "dia", "hex", "umh", "esa", "tesa", 0 };
 static const char * const x264_b_pyramid_names[] = { "none", "strict", "normal", 0 };
@@ -211,30 +233,32 @@
 static const char * const x264_colorprim_names[] = { "", "bt709", "undef", "", "bt470m", "bt470bg", "smpte170m", "smpte240m", "film", "bt2020", "smpte428",
                                                      "smpte431", "smpte432", 0 };
 static const char * const x264_transfer_names[] = { "", "bt709", "undef", "", "bt470m", "bt470bg", "smpte170m", "smpte240m", "linear", "log100", "log316",
-                                                    "iec61966-2-4", "bt1361e", "iec61966-2-1", "bt2020-10", "bt2020-12", "smpte2084", "smpte428", 0 };
+                                                    "iec61966-2-4", "bt1361e", "iec61966-2-1", "bt2020-10", "bt2020-12", "smpte2084", "smpte428", "arib-std-b67", 0 };
 static const char * const x264_colmatrix_names[] = { "GBR", "bt709", "undef", "", "fcc", "bt470bg", "smpte170m", "smpte240m", "YCgCo", "bt2020nc", "bt2020c",
-                                                     "smpte2085", 0 };
+                                                     "smpte2085", "chroma-derived-nc", "chroma-derived-c", "ICtCp", 0 };
 static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
+static const char * const x264_avcintra_flavor_names[] = { "panasonic", "sony", 0 };
 
 /* Colorspace type */
 #define X264_CSP_MASK           0x00ff  /* */
 #define X264_CSP_NONE           0x0000  /* Invalid mode     */
-#define X264_CSP_I420           0x0001  /* yuv 4:2:0 planar */
-#define X264_CSP_YV12           0x0002  /* yvu 4:2:0 planar */
-#define X264_CSP_NV12           0x0003  /* yuv 4:2:0, with one y plane and one packed u+v */
-#define X264_CSP_NV21           0x0004  /* yuv 4:2:0, with one y plane and one packed v+u */
-#define X264_CSP_I422           0x0005  /* yuv 4:2:2 planar */
-#define X264_CSP_YV16           0x0006  /* yvu 4:2:2 planar */
-#define X264_CSP_NV16           0x0007  /* yuv 4:2:2, with one y plane and one packed u+v */
-#define X264_CSP_YUYV           0x0008  /* yuyv 4:2:2 packed */
-#define X264_CSP_UYVY           0x0009  /* uyvy 4:2:2 packed */
-#define X264_CSP_V210           0x000a  /* 10-bit yuv 4:2:2 packed in 32 */
-#define X264_CSP_I444           0x000b  /* yuv 4:4:4 planar */
-#define X264_CSP_YV24           0x000c  /* yvu 4:4:4 planar */
-#define X264_CSP_BGR            0x000d  /* packed bgr 24bits */
-#define X264_CSP_BGRA           0x000e  /* packed bgr 32bits */
-#define X264_CSP_RGB            0x000f  /* packed rgb 24bits */
-#define X264_CSP_MAX            0x0010  /* end of list */
+#define X264_CSP_I400           0x0001  /* monochrome 4:0:0 */
+#define X264_CSP_I420           0x0002  /* yuv 4:2:0 planar */
+#define X264_CSP_YV12           0x0003  /* yvu 4:2:0 planar */
+#define X264_CSP_NV12           0x0004  /* yuv 4:2:0, with one y plane and one packed u+v */
+#define X264_CSP_NV21           0x0005  /* yuv 4:2:0, with one y plane and one packed v+u */
+#define X264_CSP_I422           0x0006  /* yuv 4:2:2 planar */
+#define X264_CSP_YV16           0x0007  /* yvu 4:2:2 planar */
+#define X264_CSP_NV16           0x0008  /* yuv 4:2:2, with one y plane and one packed u+v */
+#define X264_CSP_YUYV           0x0009  /* yuyv 4:2:2 packed */
+#define X264_CSP_UYVY           0x000a  /* uyvy 4:2:2 packed */
+#define X264_CSP_V210           0x000b  /* 10-bit yuv 4:2:2 packed in 32 */
+#define X264_CSP_I444           0x000c  /* yuv 4:4:4 planar */
+#define X264_CSP_YV24           0x000d  /* yvu 4:4:4 planar */
+#define X264_CSP_BGR            0x000e  /* packed bgr 24bits */
+#define X264_CSP_BGRA           0x000f  /* packed bgr 32bits */
+#define X264_CSP_RGB            0x0010  /* packed rgb 24bits */
+#define X264_CSP_MAX            0x0011  /* end of list */
 #define X264_CSP_VFLIP          0x1000  /* the csp is vertically flipped */
 #define X264_CSP_HIGH_DEPTH     0x2000  /* the csp has a depth of 16 bits per pixel component */
 
@@ -292,6 +316,7 @@
     int         i_width;
     int         i_height;
     int         i_csp;         /* CSP of encoded bitstream */
+    int         i_bitdepth;
     int         i_level_idc;
     int         i_frame_total; /* number of frames to encode if known, else 0 */
 
@@ -336,6 +361,7 @@
     int         b_open_gop;
     int         b_bluray_compat;
     int         i_avcintra_class;
+    int         i_avcintra_flavor;
 
     int         b_deblocking_filter;
     int         i_deblocking_filter_alphac0;    /* [-6, 6] -6 light filter, 6 strong */
@@ -407,7 +433,7 @@
     {
         int         i_rc_method;    /* X264_RC_* */
 
-        int         i_qp_constant;  /* 0 to (51 + 6*(x264_bit_depth-8)). 0=lossless */
+        int         i_qp_constant;  /* 0=lossless */
         int         i_qp_min;       /* min allowed QP value */
         int         i_qp_max;       /* max allowed QP value */
         int         i_qp_step;      /* max QP step between frames */
@@ -459,6 +485,9 @@
     /* frame packing arrangement flag */
     int i_frame_packing;
 
+    /* alternative transfer SEI */
+    int i_alternative_transfer;
+
     /* Muxing parameters */
     int b_aud;                  /* generate access unit delimiters */
     int b_repeat_headers;       /* put SPS/PPS before each keyframe */
@@ -556,7 +585,7 @@
     void (*nalu_process)( x264_t *h, x264_nal_t *nal, void *opaque );
 } x264_param_t;
 
-void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal );
+X264_API void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal );
 
 /****************************************************************************
  * H.264 level restriction information
@@ -588,7 +617,7 @@
 
 /* x264_param_default:
  *      fill x264_param_t with default values and do CPU detection */
-void    x264_param_default( x264_param_t * );
+X264_API void x264_param_default( x264_param_t * );
 
 /* x264_param_parse:
  *  set one parameter by name.
@@ -599,7 +628,7 @@
  *  value=NULL means "true" for boolean options, but is a BAD_VALUE for non-booleans. */
 #define X264_PARAM_BAD_NAME  (-1)
 #define X264_PARAM_BAD_VALUE (-2)
-int x264_param_parse( x264_param_t *, const char *name, const char *value );
+X264_API int x264_param_parse( x264_param_t *, const char *name, const char *value );
 
 /****************************************************************************
  * Advanced parameter handling functions
@@ -643,13 +672,13 @@
  *      film, animation, grain, stillimage, psnr, and ssim are psy tunings.
  *
  *      returns 0 on success, negative on failure (e.g. invalid preset/tune name). */
-int     x264_param_default_preset( x264_param_t *, const char *preset, const char *tune );
+X264_API int x264_param_default_preset( x264_param_t *, const char *preset, const char *tune );
 
 /* x264_param_apply_fastfirstpass:
  *      If first-pass mode is set (rc.b_stat_read == 0, rc.b_stat_write == 1),
  *      modify the encoder settings to disable options generally not useful on
  *      the first pass. */
-void    x264_param_apply_fastfirstpass( x264_param_t * );
+X264_API void x264_param_apply_fastfirstpass( x264_param_t * );
 
 /* x264_param_apply_profile:
  *      Applies the restrictions of the given profile.
@@ -664,21 +693,12 @@
  *      decrease them.
  *
  *      returns 0 on success, negative on failure (e.g. invalid profile name). */
-int     x264_param_apply_profile( x264_param_t *, const char *profile );
+X264_API int x264_param_apply_profile( x264_param_t *, const char *profile );
 
 /****************************************************************************
  * Picture structures and functions
  ****************************************************************************/
 
-/* x264_bit_depth:
- *      Specifies the number of bits per pixel that x264 uses. This is also the
- *      bit depth that x264 encodes in. If this value is > 8, x264 will read
- *      two bytes of input data for each pixel sample, and expect the upper
- *      (16-x264_bit_depth) bits to be zero.
- *      Note: The flag X264_CSP_HIGH_DEPTH must be used to specify the
- *      colorspace depth as well. */
-X264_API extern const int x264_bit_depth;
-
 /* x264_chroma_format:
  *      Specifies the chroma formats that x264 supports encoding. When this
  *      value is non-zero, then it represents a X264_CSP_* that is the only
@@ -843,17 +863,17 @@
 /* x264_picture_init:
  *  initialize an x264_picture_t.  Needs to be done if the calling application
  *  allocates its own x264_picture_t as opposed to using x264_picture_alloc. */
-void x264_picture_init( x264_picture_t *pic );
+X264_API void x264_picture_init( x264_picture_t *pic );
 
 /* x264_picture_alloc:
  *  alloc data for a picture. You must call x264_picture_clean on it.
  *  returns 0 on success, or -1 on malloc failure or invalid colorspace. */
-int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height );
+X264_API int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height );
 
 /* x264_picture_clean:
  *  free associated resource for a x264_picture_t allocated with
  *  x264_picture_alloc ONLY */
-void x264_picture_clean( x264_picture_t *pic );
+X264_API void x264_picture_clean( x264_picture_t *pic );
 
 /****************************************************************************
  * Encoder functions
@@ -868,7 +888,7 @@
 
 /* x264_encoder_open:
  *      create a new encoder handler, all parameters from x264_param_t are copied */
-x264_t *x264_encoder_open( x264_param_t * );
+X264_API x264_t *x264_encoder_open( x264_param_t * );
 
 /* x264_encoder_reconfig:
  *      various parameters from x264_param_t are copied.
@@ -883,7 +903,7 @@
  *      more so than for other presets, many of the speed shortcuts used in ultrafast cannot be
  *      switched out of; using reconfig to switch between ultrafast and other presets is not
  *      recommended without a more fine-grained breakdown of parameters to take this into account. */
-int     x264_encoder_reconfig( x264_t *, x264_param_t * );
+X264_API int x264_encoder_reconfig( x264_t *, x264_param_t * );
 /* x264_encoder_parameters:
  *      copies the current internal set of parameters to the pointer provided
  *      by the caller.  useful when the calling application needs to know
@@ -891,32 +911,32 @@
  *      of the encoder after multiple x264_encoder_reconfig calls.
  *      note that the data accessible through pointers in the returned param struct
  *      (e.g. filenames) should not be modified by the calling application. */
-void    x264_encoder_parameters( x264_t *, x264_param_t * );
+X264_API void x264_encoder_parameters( x264_t *, x264_param_t * );
 /* x264_encoder_headers:
  *      return the SPS and PPS that will be used for the whole stream.
  *      *pi_nal is the number of NAL units outputted in pp_nal.
  *      returns the number of bytes in the returned NALs.
  *      returns negative on error.
  *      the payloads of all output NALs are guaranteed to be sequential in memory. */
-int     x264_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal );
+X264_API int x264_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal );
 /* x264_encoder_encode:
  *      encode one picture.
  *      *pi_nal is the number of NAL units outputted in pp_nal.
  *      returns the number of bytes in the returned NALs.
  *      returns negative on error and zero if no NAL units returned.
  *      the payloads of all output NALs are guaranteed to be sequential in memory. */
-int     x264_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out );
+X264_API int x264_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out );
 /* x264_encoder_close:
  *      close an encoder handler */
-void    x264_encoder_close( x264_t * );
+X264_API void x264_encoder_close( x264_t * );
 /* x264_encoder_delayed_frames:
  *      return the number of currently delayed (buffered) frames
  *      this should be used at the end of the stream, to know when you have all the encoded frames. */
-int     x264_encoder_delayed_frames( x264_t * );
-/* x264_encoder_maximum_delayed_frames( x264_t *h ):
+X264_API int x264_encoder_delayed_frames( x264_t * );
+/* x264_encoder_maximum_delayed_frames( x264_t * ):
  *      return the maximum number of delayed (buffered) frames that can occur with the current
  *      parameters. */
-int     x264_encoder_maximum_delayed_frames( x264_t *h );
+X264_API int x264_encoder_maximum_delayed_frames( x264_t * );
 /* x264_encoder_intra_refresh:
  *      If an intra refresh is not in progress, begin one with the next P-frame.
  *      If an intra refresh is in progress, begin one as soon as the current one finishes.
@@ -930,7 +950,7 @@
  *      behavior is undefined.
  *
  *      Should not be called during an x264_encoder_encode. */
-void    x264_encoder_intra_refresh( x264_t * );
+X264_API void x264_encoder_intra_refresh( x264_t * );
 /* x264_encoder_invalidate_reference:
  *      An interactive error resilience tool, designed for use in a low-latency one-encoder-few-clients
  *      system.  When the client has packet loss or otherwise incorrectly decodes a frame, the encoder
@@ -953,7 +973,7 @@
  *      Should not be called during an x264_encoder_encode, but multiple calls can be made simultaneously.
  *
  *      Returns 0 on success, negative on failure. */
-int x264_encoder_invalidate_reference( x264_t *, int64_t pts );
+X264_API int x264_encoder_invalidate_reference( x264_t *, int64_t pts );
 
 #ifdef __cplusplus
 }
diff -Nru x264-0.152.2854+gite9a5903/x264res.rc x264-0.158.2988+git-20191101.7817004/x264res.rc
--- x264-0.152.2854+gite9a5903/x264res.rc	2017-12-31 12:50:51.000000000 +0000
+++ x264-0.158.2988+git-20191101.7817004/x264res.rc	2019-11-09 05:16:29.000000000 +0000
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * x264res.rc: windows resource file
  *****************************************************************************
- * Copyright (C) 2012-2017 x264 project
+ * Copyright (C) 2012-2019 x264 project
  *
  * Authors: Henrik Gramner <henrik@gramner.com>
  *
@@ -64,7 +64,7 @@
 #endif
             VALUE "FileVersion",      X264_POINTVER
             VALUE "InternalName",     "x264"
-            VALUE "LegalCopyright",   "Copyright (C) 2003-2017 x264 project"
+            VALUE "LegalCopyright",   "Copyright (C) 2003-2019 x264 project"
 #ifdef DLL
             VALUE "OriginalFilename", "libx264-" xstr(X264_BUILD) ".dll"
 #else